From dff98a91a381e4bd1de3c193280b7da83442467d Mon Sep 17 00:00:00 2001 From: "samy.elbkr" Date: Sun, 16 Nov 2025 23:01:50 +0100 Subject: [PATCH 1/7] questions answered --- .DS_Store | Bin 0 -> 8196 bytes github-assignment | 1 + numpy_questions.py | 13 +++++--- sklearn_questions.py | 74 ++++++++++++++++++++++++++++++++----------- 4 files changed, 65 insertions(+), 23 deletions(-) create mode 100644 .DS_Store create mode 160000 github-assignment diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..1e4eccc42a954a61cdf2109478d236667c6460d0 GIT binary patch literal 8196 zcmeHM%We}f6g_TJ5(JBYK$Hd2ELb5CsbYxi8Sg0QE_i$AIH8gACK&M0>Es|dmF$yfCh_T^9rj4BI8m@ zvf_Jg5)JJ!8f<49f~8;!Zc9N+)4A`?Od54l-hTHUQ?70#R%lSgjm z72<_#0NIP`mN|y%Uu1rDJjM>T(Z*AB(9W{~c^+B;8`sI$oXkz}277qLcAtFC$Y{!u zBd$J23O+s-d|bveM%zS!C&U)L_3{>W4Z^5Ryk;DUHnc6jayH%!$E7uVSBQ9nnlCY; zkA2!ts%CC}7W`bbbDg8-o}6XzR|W;UTV%-bw092vWDUY6@^;|uT=Rmg$eG8C-*^P` zV=F!Xg$Na8-~uwR>T+I}`+s}+`~M3#Gfi0;PzI_pU}{Mx*`l^R)fZUFDIc*sXAu#- k%;Ca+1lW1{gCW}yRmCRum^ow*MPCG18njXd{wf2%0bkV(%K!iX literal 0 HcmV?d00001 diff --git a/github-assignment b/github-assignment new file mode 160000 index 00000000..769c2dd0 --- /dev/null +++ b/github-assignment @@ -0,0 +1 @@ +Subproject commit 769c2dd03019961072c98a6b9802aef4e14cfa67 diff --git a/numpy_questions.py b/numpy_questions.py index 21fcec4b..26446d36 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -40,7 +40,8 @@ def max_index(X): i = 0 j = 0 - # TODO + flat_index = np.argmax(X) + i, j = np.unravel_index(flat_index, X.shape) return i, j @@ -62,6 +63,10 @@ def wallis_product(n_terms): pi : float The approximation of order `n_terms` of pi using the Wallis product. """ - # XXX : The n_terms is an int that corresponds to the number of - # terms in the product. For example 10000. - return 0. + product = 1.0 + + for n in range(1, n_terms + 1): + product *= (4 * n**2) / (4 * n**2 - 1) + + pi = 2 * product + return pi diff --git a/sklearn_questions.py b/sklearn_questions.py index f65038c6..3bf9d9fa 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -29,46 +29,82 @@ class OneNearestNeighbor(BaseEstimator, ClassifierMixin): - "OneNearestNeighbor classifier." + """One Nearest Neighbor classifier. - def __init__(self): # noqa: D107 + This classifier predicts the label of the closest training sample + using the Euclidean distance. + """ + + def __init__(self): + """Initialize the OneNearestNeighbor classifier.""" pass def fit(self, X, y): - """Write docstring. + """Fit the classifier. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Training data. + + y : array-like of shape (n_samples,) + Class labels. - And describe parameters + Returns + ------- + self : object + The fitted classifier. """ X, y = check_X_y(X, y) check_classification_targets(y) + + self.X_ = X + self.y_ = y self.classes_ = np.unique(y) self.n_features_in_ = X.shape[1] - # XXX fix return self def predict(self, X): - """Write docstring. + """Predict the closest label for each sample. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Input samples. - And describe parameters + Returns + ------- + y_pred : ndarray of shape (n_samples,) + Predicted class labels. """ - check_is_fitted(self) + check_is_fitted(self, ["X_", "y_"]) X = check_array(X) - y_pred = np.full( - shape=len(X), fill_value=self.classes_[0], - dtype=self.classes_.dtype - ) - # XXX fix - return y_pred + # Compute distances to all training points + distances = np.sqrt(((self.X_[None, :, :] - X[:, None, :]) ** 2).sum(axis=2)) + + # Index of nearest neighbor + nn_index = np.argmin(distances, axis=1) + + return self.y_[nn_index] def score(self, X, y): - """Write docstring. + """Return accuracy of the classifier. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Test samples. - And describe parameters + y : array-like of shape (n_samples,) + True labels. + + Returns + ------- + score : float + Accuracy (fraction of correct predictions). """ X, y = check_X_y(X, y) y_pred = self.predict(X) - - # XXX fix - return y_pred.sum() + return np.mean(y_pred == y) \ No newline at end of file From bbd4f4b98c7a724ed43c7b56f7af5cef0bf17f9f Mon Sep 17 00:00:00 2001 From: "samy.elbkr" Date: Mon, 17 Nov 2025 11:35:50 +0100 Subject: [PATCH 2/7] questions answered --- .DS_Store | Bin 8196 -> 8196 bytes numpy_questions.py | 1 + sklearn_questions.py | 1 + 3 files changed, 2 insertions(+) diff --git a/.DS_Store b/.DS_Store index 1e4eccc42a954a61cdf2109478d236667c6460d0..e9d4ff11b713ea5f642a21bbf60b8ff2f86fffa2 100644 GIT binary patch delta 106 zcmZp1XmQw}CJ?)C0RsaA3xgg*IzuKyNp8N2OHxjL5>SkTfuUigL+WuyRQVLV@&y@& U!O8i#1wcJO69P6j3pDZr0ET86rvLx| delta 106 zcmZp1XmQw}CJ?*Mn1O+Tg+Y%YogtH Date: Mon, 17 Nov 2025 11:43:51 +0100 Subject: [PATCH 3/7] style fixes --- numpy_questions.py | 5 ++--- sklearn_questions.py | 22 +++++++++++++++------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index 9824d99c..8e3a2ab4 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -64,10 +64,9 @@ def wallis_product(n_terms): The approximation of order `n_terms` of pi using the Wallis product. """ product = 1.0 - + for n in range(1, n_terms + 1): product *= (4 * n**2) / (4 * n**2 - 1) - + pi = 2 * product - #completion return pi diff --git a/sklearn_questions.py b/sklearn_questions.py index ef29eeec..1601a96e 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -78,16 +78,25 @@ def predict(self, X): y_pred : ndarray of shape (n_samples,) Predicted class labels. """ - check_is_fitted(self, ["X_", "y_"]) + check_is_fitted(self, ["X_", "y_", "n_features_in_"]) X = check_array(X) - # Compute distances to all training points - distances = np.sqrt(((self.X_[None, :, :] - X[:, None, :]) ** 2).sum(axis=2)) + if X.shape[1] != self.n_features_in_: + raise ValueError( + "X has {} features, but OneNearestNeighbor was fitted with " + "{} features.".format(X.shape[1], self.n_features_in_) + ) - # Index of nearest neighbor - nn_index = np.argmin(distances, axis=1) + n_samples = X.shape[0] + y_pred = np.empty(n_samples, dtype=self.y_.dtype) - return self.y_[nn_index] + for i in range(n_samples): + # Compute Euclidean distances to all training samples + distances = np.linalg.norm(self.X_ - X[i], axis=1) + nn_index = np.argmin(distances) + y_pred[i] = self.y_[nn_index] + + return y_pred def score(self, X, y): """Return accuracy of the classifier. @@ -107,5 +116,4 @@ def score(self, X, y): """ X, y = check_X_y(X, y) y_pred = self.predict(X) - #completion return np.mean(y_pred == y) \ No newline at end of file From c30181057f36cc44eb88ddb047bc98db73f077b0 Mon Sep 17 00:00:00 2001 From: "samy.elbkr" Date: Mon, 17 Nov 2025 11:47:07 +0100 Subject: [PATCH 4/7] style fixes-2 --- numpy_questions.py | 4 ++++ sklearn_questions.py | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/numpy_questions.py b/numpy_questions.py index 8e3a2ab4..a158478d 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -37,6 +37,10 @@ def max_index(X): If the input is not a numpy array or if the shape is not 2D. """ + if not isinstance(X, np.ndarray): + raise ValueError("X must be a numpy array.") + if X.ndim != 2: + raise ValueError("X must be a 2D array.") i = 0 j = 0 diff --git a/sklearn_questions.py b/sklearn_questions.py index 1601a96e..921fb5c9 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -116,4 +116,5 @@ def score(self, X, y): """ X, y = check_X_y(X, y) y_pred = self.predict(X) - return np.mean(y_pred == y) \ No newline at end of file + return np.mean(y_pred == y) + \ No newline at end of file From 546547f8d391bf5768bfe07f8598494349cc2581 Mon Sep 17 00:00:00 2001 From: "samy.elbkr" Date: Mon, 17 Nov 2025 11:53:23 +0100 Subject: [PATCH 5/7] style fixes-2 --- numpy_questions.py | 4 +++- sklearn_questions.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index a158478d..d3a2377f 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -15,7 +15,7 @@ This will be enforced with `flake8`. You can check that there is no flake8 errors by calling `flake8` at the root of the repo. """ -import numpy as np +#import numpy as np def max_index(X): @@ -67,6 +67,8 @@ def wallis_product(n_terms): pi : float The approximation of order `n_terms` of pi using the Wallis product. """ + if n_terms == 0: + return 1.0 product = 1.0 for n in range(1, n_terms + 1): diff --git a/sklearn_questions.py b/sklearn_questions.py index 921fb5c9..8cdcdbb0 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -117,4 +117,4 @@ def score(self, X, y): X, y = check_X_y(X, y) y_pred = self.predict(X) return np.mean(y_pred == y) - \ No newline at end of file + From 0b6ae5f13d546d01d4c1adfa72628dbd40856e1e Mon Sep 17 00:00:00 2001 From: "samy.elbkr" Date: Mon, 17 Nov 2025 11:54:27 +0100 Subject: [PATCH 6/7] style fixes-2 --- numpy_questions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy_questions.py b/numpy_questions.py index d3a2377f..c16ec3dd 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -15,7 +15,7 @@ This will be enforced with `flake8`. You can check that there is no flake8 errors by calling `flake8` at the root of the repo. """ -#import numpy as np +import numpy as np def max_index(X): From 9b6ad3ee90ce73bfe07f8bde8f4b8a67ace62f46 Mon Sep 17 00:00:00 2001 From: "samy.elbkr" Date: Mon, 17 Nov 2025 11:55:38 +0100 Subject: [PATCH 7/7] style fixes-2 --- sklearn_questions.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn_questions.py b/sklearn_questions.py index 8cdcdbb0..b8370f2a 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -117,4 +117,3 @@ def score(self, X, y): X, y = check_X_y(X, y) y_pred = self.predict(X) return np.mean(y_pred == y) -