From 42eadce9443277c481693dc335a5b5e2e7a2fa0d Mon Sep 17 00:00:00 2001 From: Kenza Chaabouni Date: Thu, 25 Dec 2025 01:31:46 +0100 Subject: [PATCH 1/4] NumPy and sklearn questions --- numpy_questions.py | 29 ++++++++++++-- sklearn_questions.py | 95 +++++++++++++++++++++++++++++++++++++------- 2 files changed, 105 insertions(+), 19 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index 21fcec4b..370f974a 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -37,10 +37,17 @@ def max_index(X): If the input is not a numpy array or if the shape is not 2D. """ - i = 0 - j = 0 - # TODO + if not isinstance(X, np.ndarray): + raise ValueError("Input must be a numpy array.") + + if X.ndim != 2: + raise ValueError("Input array's shape must be 2D.") + + # find which row contains the maximum, then get its column + row = np.argmax(np.max(X, axis=1)) + col = np.argmax(X[row]) + i, j = row, col return i, j @@ -64,4 +71,18 @@ def wallis_product(n_terms): """ # XXX : The n_terms is an int that corresponds to the number of # terms in the product. For example 10000. - return 0. + + # n_terms cannot be negative (follows by definition) + if not isinstance(n_terms, int) or n_terms < 0: + raise ValueError("n_terms must be a non-negative integer.") + + if n_terms == 0: + return 1.0 + + # k runs from 1 to n_terms + k = np.arange(1, n_terms + 1, dtype=float) + + terms = (4 * k ** 2) / (4 * k ** 2 - 1) + product = np.prod(terms) + + return 2 * product diff --git a/sklearn_questions.py b/sklearn_questions.py index f65038c6..86cf7a23 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -29,46 +29,111 @@ class OneNearestNeighbor(BaseEstimator, ClassifierMixin): - "OneNearestNeighbor classifier." + """ OneNearestNeighbor classifier predicts the label of a sample + as the label of the closest training sample using Euclidean distance. + """ def __init__(self): # noqa: D107 pass def fit(self, X, y): - """Write docstring. + + """ + Fit the one-nearest-neighbor classifier + + Parameters + ---------- + X : ndarray of shape (n_samples, n_features) + Training input + y : ndarray of shape (n_samples,) + Target labels + + Returns + ------- + self : OneNearestNeighbor fitted estimator + + Raises + ------ + ValueError + If X and y don't have compatible shapes or if y is not suitable for + classification. - And describe parameters """ + X, y = check_X_y(X, y) check_classification_targets(y) self.classes_ = np.unique(y) self.n_features_in_ = X.shape[1] - # XXX fix + self.X_ = X + self.y_ = y return self def predict(self, X): - """Write docstring. - - And describe parameters + """ + Predict class labels for samples in X. + + Parameters + ---------- + X : ndarray of shape (n_samples, n_features) + Input samples + + Returns + ------- + y_pred : ndarray of shape (n_samples,) + Predicted labels + + Raises + ------ + ValueError + If the estimator not fitted or X has incorrect shape. + """ + check_is_fitted(self) X = check_array(X) + y_pred = np.full( - shape=len(X), fill_value=self.classes_[0], + shape=len(X), + fill_value=self.classes_[0], dtype=self.classes_.dtype - ) + ) + + # nearest neighbor for each sample using Euclidean distances + dists = np.sum((X[:, np.newaxis, :] - self.X_[np.newaxis, :, :]) ** 2, axis=2) + + # closest training point + nn_idx = np.argmin(dists, axis=1) + + # pred + y_pred[:] = self.y_[nn_idx] - # XXX fix return y_pred def score(self, X, y): - """Write docstring. - - And describe parameters + + """Compute the mean accuracy on the given test data and labels. + + Parameters + ---------- + X : ndarray of shape (n_samples, n_features) + Test input + y : ndarray of shape (n_samples,) + True labels test + + Returns + ------- + score : float + Mean accuracy of the classifier on the test dataset + + Raises + ------ + ValueError + If X and y have incompatible shapes. """ X, y = check_X_y(X, y) y_pred = self.predict(X) + + # return accuracy + return float(np.mean(y_pred == y)) - # XXX fix - return y_pred.sum() From b12432bdd7ceb2b3349ce5e8741174e4a4f66a01 Mon Sep 17 00:00:00 2001 From: Kenza Chaabouni Date: Thu, 25 Dec 2025 02:13:53 +0100 Subject: [PATCH 2/4] NumPy and sklearn questions --- numpy_questions.py | 12 ++++-------- sklearn_questions.py | 37 +++++++++++++++++-------------------- 2 files changed, 21 insertions(+), 28 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index 370f974a..fa9fc8bf 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -62,7 +62,7 @@ def wallis_product(n_terms): ---------- n_terms : int Number of steps in the Wallis product. Note that `n_terms=0` will - consider the product to be `1`. + consider the procduct to be `1`. Returns ------- @@ -71,18 +71,14 @@ def wallis_product(n_terms): """ # XXX : The n_terms is an int that corresponds to the number of # terms in the product. For example 10000. - # n_terms cannot be negative (follows by definition) if not isinstance(n_terms, int) or n_terms < 0: - raise ValueError("n_terms must be a non-negative integer.") - + raise ValueError("n_terms must be a non-negative integer.") if n_terms == 0: - return 1.0 - + return 1.0 # k runs from 1 to n_terms k = np.arange(1, n_terms + 1, dtype=float) - - terms = (4 * k ** 2) / (4 * k ** 2 - 1) + terms = (4 * k**2) / (4 * k**2 - 1) product = np.prod(terms) return 2 * product diff --git a/sklearn_questions.py b/sklearn_questions.py index 86cf7a23..5f41fcb1 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -29,21 +29,18 @@ class OneNearestNeighbor(BaseEstimator, ClassifierMixin): - """ OneNearestNeighbor classifier predicts the label of a sample - as the label of the closest training sample using Euclidean distance. - """ + """One-nearest-neighbor classifier using Euclidean distance.""" def __init__(self): # noqa: D107 pass def fit(self, X, y): - - """ - Fit the one-nearest-neighbor classifier + + """Fit the one-nearest-neighbor classifier Parameters ---------- - X : ndarray of shape (n_samples, n_features) + X : ndarray of shape (n_samples, n_features) Training input y : ndarray of shape (n_samples,) Target labels @@ -51,15 +48,15 @@ def fit(self, X, y): Returns ------- self : OneNearestNeighbor fitted estimator - + Raises ------ ValueError - If X and y don't have compatible shapes or if y is not suitable for - classification. + If X and y don't have compatible shapes + or if y is not suitable for classification. """ - + X, y = check_X_y(X, y) check_classification_targets(y) self.classes_ = np.unique(y) @@ -70,7 +67,7 @@ def fit(self, X, y): return self def predict(self, X): - + """ Predict class labels for samples in X. @@ -89,7 +86,7 @@ def predict(self, X): ValueError If the estimator not fitted or X has incorrect shape. """ - + check_is_fitted(self) X = check_array(X) @@ -97,10 +94,11 @@ def predict(self, X): shape=len(X), fill_value=self.classes_[0], dtype=self.classes_.dtype - ) + ) # nearest neighbor for each sample using Euclidean distances - dists = np.sum((X[:, np.newaxis, :] - self.X_[np.newaxis, :, :]) ** 2, axis=2) + dist = X[:, np.newaxis, :] - self.X_[np.newaxis, :, :] + dists = np.sum(dist ** 2, axis=2) # closest training point nn_idx = np.argmin(dists, axis=1) @@ -111,7 +109,7 @@ def predict(self, X): return y_pred def score(self, X, y): - + """Compute the mean accuracy on the given test data and labels. Parameters @@ -120,12 +118,12 @@ def score(self, X, y): Test input y : ndarray of shape (n_samples,) True labels test - + Returns ------- score : float Mean accuracy of the classifier on the test dataset - + Raises ------ ValueError @@ -133,7 +131,6 @@ def score(self, X, y): """ X, y = check_X_y(X, y) y_pred = self.predict(X) - + # return accuracy return float(np.mean(y_pred == y)) - From 104c205b137e27a6eac66c6a96fe751d98da6967 Mon Sep 17 00:00:00 2001 From: Kenza Chaabouni Date: Thu, 25 Dec 2025 02:27:18 +0100 Subject: [PATCH 3/4] Kenza Chaabouni NumPy and sklearn questions --- numpy_questions.py | 1 - sklearn_questions.py | 26 ++++++++++---------------- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index fa9fc8bf..a933c24a 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -37,7 +37,6 @@ def max_index(X): If the input is not a numpy array or if the shape is not 2D. """ - if not isinstance(X, np.ndarray): raise ValueError("Input must be a numpy array.") diff --git a/sklearn_questions.py b/sklearn_questions.py index 5f41fcb1..01e7e8ae 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -35,39 +35,35 @@ def __init__(self): # noqa: D107 pass def fit(self, X, y): - - """Fit the one-nearest-neighbor classifier - + """Fit the one-nearest-neighbor classifier. + Parameters ---------- X : ndarray of shape (n_samples, n_features) - Training input + Training input. y : ndarray of shape (n_samples,) - Target labels - + Target labels. + Returns ------- - self : OneNearestNeighbor fitted estimator - + self : OneNearestNeighbor + Fitted estimator. + Raises ------ ValueError - If X and y don't have compatible shapes - or if y is not suitable for classification. - + If X and y do not have compatible shapes or if y is not suitable + for classification. """ - X, y = check_X_y(X, y) check_classification_targets(y) self.classes_ = np.unique(y) self.n_features_in_ = X.shape[1] - self.X_ = X self.y_ = y return self def predict(self, X): - """ Predict class labels for samples in X. @@ -86,7 +82,6 @@ def predict(self, X): ValueError If the estimator not fitted or X has incorrect shape. """ - check_is_fitted(self) X = check_array(X) @@ -109,7 +104,6 @@ def predict(self, X): return y_pred def score(self, X, y): - """Compute the mean accuracy on the given test data and labels. Parameters From 3b177091283c7406fa1c9b89326ba5abf3ee75d0 Mon Sep 17 00:00:00 2001 From: Kenza Chaabouni Date: Thu, 25 Dec 2025 02:31:16 +0100 Subject: [PATCH 4/4] Kenza Chaabouni NumPy and sklearn questions --- sklearn_questions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn_questions.py b/sklearn_questions.py index 01e7e8ae..c88eaa29 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -36,19 +36,19 @@ def __init__(self): # noqa: D107 def fit(self, X, y): """Fit the one-nearest-neighbor classifier. - + Parameters ---------- X : ndarray of shape (n_samples, n_features) Training input. y : ndarray of shape (n_samples,) Target labels. - + Returns ------- self : OneNearestNeighbor Fitted estimator. - + Raises ------ ValueError