Open-Deep-ML · dzungphieuluuky · Apr 8, 2026
diff --git a/questions/189_mahalanobis-distance/__pycache__/solution.cpython-313.pyc b/questions/189_mahalanobis-distance/__pycache__/solution.cpython-313.pyc
diff --git a/questions/189_mahalanobis-distance/description.md b/questions/189_mahalanobis-distance/description.md
@@ -0,0 +1,10 @@
+Write a Python function that calculates the Mahalanobis distance between a point and a probability distribution. The function should take a point, the mean vector of the distribution, and the covariance matrix as inputs and return the Mahalanobis distance as a scalar value. The Mahalanobis distance is a measure of how many standard deviations away a point is from the mean, considering the correlations in the data (represented by the covariance matrix).
+
+The function inputs should be in NumPy format (numpy.ndarray):
+- `point`: A 1D NumPy array representing the point in the feature space
+- `mean`: A 1D NumPy array representing the mean of the distribution
+- `cov_matrix`: A 2D NumPy array representing the covariance matrix of the distribution
+
+The function should return a float representing the Mahalanobis distance computed as: $D_M = \sqrt{(x - \mu)^T \Sigma^{-1} (x - \mu)}$
+
+**Note:** The return value should be rounded to 4 decimal places.
diff --git a/questions/189_mahalanobis-distance/example.json b/questions/189_mahalanobis-distance/example.json
@@ -0,0 +1,5 @@
+{
+  "input": "point = np.array([3.0, 4.0, 2.0]),\nmean = np.array([0.0, 0.0, 0.0]),\ncov_matrix = np.array([[1.0, 0.0, 0.0],\n                       [0.0, 1.0, 0.0],\n                       [0.0, 0.0, 1.0]])",
+  "output": "5.744562646538029",
+  "reasoning": "With an identity covariance matrix, the Mahalanobis distance reduces to the Euclidean distance. The point [3, 4, 2] is at distance sqrt(3^2 + 4^2 + 2^2) = sqrt(9 + 16 + 4) = sqrt(29) ≈ 5.745 from the origin [0, 0, 0]. The covariance matrix being identity means there is no correlation between dimensions and all dimensions have unit variance, so the standardized distance equals the Euclidean distance."
+}
diff --git a/questions/189_mahalanobis-distance/example.md b/questions/189_mahalanobis-distance/example.md
@@ -0,0 +1,21 @@
+# Example: Mahalanobis Distance
+
+## Example Input
+
+```python
+point = np.array([3.0, 4.0, 2.0])
+mean = np.array([0.0, 0.0, 0.0])
+cov_matrix = np.array([[1.0, 0.0, 0.0],
+                       [0.0, 1.0, 0.0],
+                       [0.0, 0.0, 1.0]])
+```
+
+## Example Output
+
+```
+5.744562646538029
+```
+
+## Reasoning
+
+With an identity covariance matrix, the Mahalanobis distance reduces to the Euclidean distance. The point [3, 4, 2] is at distance sqrt(3^2 + 4^2 + 2^2) = sqrt(9 + 16 + 4) = sqrt(29) ≈ 5.745 from the origin [0, 0, 0]. The covariance matrix being identity means there is no correlation between dimensions and all dimensions have unit variance, so the standardized distance equals the Euclidean distance.
diff --git a/questions/189_mahalanobis-distance/generate_tests.py b/questions/189_mahalanobis-distance/generate_tests.py
@@ -0,0 +1,125 @@
+"""
+Generate test cases for the Mahalanobis distance problem.
+This script creates comprehensive test cases and writes them to tests.json.
+"""
+import json
+import numpy as np
+from pathlib import Path
+
+
+def mahalanobis_distance_reference(point, mean, cov_matrix):
+    """Reference implementation for generating expected outputs."""
+    diff = point - mean
+    try:
+        inv_cov = np.linalg.inv(cov_matrix)
+    except np.linalg.LinAlgError:
+        inv_cov = np.linalg.pinv(cov_matrix)
+    distance = np.sqrt(np.dot(np.dot(diff, inv_cov), diff))
+    return float(distance)
+
+
+def generate_tests():
+    """Generate diverse test cases for Mahalanobis distance."""
+    tests = []
+
+    # Test 1: Identity covariance (Euclidean distance)
+    point1 = np.array([3.0, 4.0])
+    mean1 = np.array([0.0, 0.0])
+    cov1 = np.array([[1.0, 0.0], [0.0, 1.0]])
+    expected1 = round(mahalanobis_distance_reference(point1, mean1, cov1), 4)
+    tests.append({
+        "test": "print(mahalanobis_distance(np.array([3.0, 4.0]), np.array([0.0, 0.0]), np.array([[1.0, 0.0], [0.0, 1.0]])))",
+        "expected_output": str(expected1)
+    })
+
+    # Test 2: Scaled identity covariance (2D)
+    point2 = np.array([2.0, 2.0])
+    mean2 = np.array([0.0, 0.0])
+    cov2 = np.array([[4.0, 0.0], [0.0, 1.0]])
+    expected2 = round(mahalanobis_distance_reference(point2, mean2, cov2), 4)
+    tests.append({
+        "test": "print(mahalanobis_distance(np.array([2.0, 2.0]), np.array([0.0, 0.0]), np.array([[4.0, 0.0], [0.0, 1.0]])))",
+        "expected_output": str(expected2)
+    })
+
+    # Test 3: Non-diagonal covariance (with correlation)
+    point3 = np.array([1.0, 1.0])
+    mean3 = np.array([0.0, 0.0])
+    cov3 = np.array([[2.0, 1.0], [1.0, 2.0]])
+    expected3 = round(mahalanobis_distance_reference(point3, mean3, cov3), 4)
+    tests.append({
+        "test": "print(mahalanobis_distance(np.array([1.0, 1.0]), np.array([0.0, 0.0]), np.array([[2.0, 1.0], [1.0, 2.0]])))",
+        "expected_output": str(expected3)
+    })
+
+    # Test 4: 3D case with non-zero mean
+    point4 = np.array([5.0, 3.0, 2.0])
+    mean4 = np.array([1.0, 1.0, 1.0])
+    cov4 = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
+    expected4 = round(mahalanobis_distance_reference(point4, mean4, cov4), 4)
+    tests.append({
+        "test": "print(mahalanobis_distance(np.array([5.0, 3.0, 2.0]), np.array([1.0, 1.0, 1.0]), np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])))",
+        "expected_output": str(expected4)
+    })
+
+    # Test 5: 3D with scaled covariance
+    point5 = np.array([2.0, 2.0, 2.0])
+    mean5 = np.array([0.0, 0.0, 0.0])
+    cov5 = np.array([[2.0, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 2.0]])
+    expected5 = round(mahalanobis_distance_reference(point5, mean5, cov5), 4)
+    tests.append({
+        "test": "print(mahalanobis_distance(np.array([2.0, 2.0, 2.0]), np.array([0.0, 0.0, 0.0]), np.array([[2.0, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 2.0]])))",
+        "expected_output": str(expected5)
+    })
+
+    # Test 6: Point at mean (distance should be 0)
+    point6 = np.array([1.0, 2.0])
+    mean6 = np.array([1.0, 2.0])
+    cov6 = np.array([[1.0, 0.0], [0.0, 1.0]])
+    expected6 = round(mahalanobis_distance_reference(point6, mean6, cov6), 4)
+    tests.append({
+        "test": "print(mahalanobis_distance(np.array([1.0, 2.0]), np.array([1.0, 2.0]), np.array([[1.0, 0.0], [0.0, 1.0]])))",
+        "expected_output": str(expected6)
+    })
+
+    # Test 7: 4D case
+    point7 = np.array([1.0, 2.0, 3.0, 4.0])
+    mean7 = np.array([0.0, 0.0, 0.0, 0.0])
+    cov7 = np.eye(4)
+    expected7 = round(mahalanobis_distance_reference(point7, mean7, cov7), 4)
+    tests.append({
+        "test": "print(mahalanobis_distance(np.array([1.0, 2.0, 3.0, 4.0]), np.array([0.0, 0.0, 0.0, 0.0]), np.eye(4)))",
+        "expected_output": str(expected7)
+    })
+
+    # Test 8: Negative coordinates
+    point8 = np.array([-2.0, 3.0])
+    mean8 = np.array([1.0, 1.0])
+    cov8 = np.array([[1.0, 0.5], [0.5, 1.0]])
+    expected8 = round(mahalanobis_distance_reference(point8, mean8, cov8), 4)
+    tests.append({
+        "test": "print(mahalanobis_distance(np.array([-2.0, 3.0]), np.array([1.0, 1.0]), np.array([[1.0, 0.5], [0.5, 1.0]])))",
+        "expected_output": str(expected8)
+    })
+
+    return tests
+
+
+def main():
+    """Generate tests and write to tests.json."""
+    tests = generate_tests()
+
+    # Get the directory where this script is located
+    script_dir = Path(__file__).parent
+    output_file = script_dir / "tests.json"
+
+    # Write to JSON file
+    with open(output_file, "w") as f:
+        json.dump(tests, f, indent=2)
+
+    print(f"✓ Generated {len(tests)} test cases")
+    print(f"✓ Written to {output_file}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/questions/189_mahalanobis-distance/learn.md b/questions/189_mahalanobis-distance/learn.md
@@ -0,0 +1,44 @@
+## Mahalanobis Distance
+
+The Mahalanobis distance is a fundamental metric in multivariate statistics and machine learning used to measure the distance between a point and a probability distribution. Unlike Euclidean distance, which treats all dimensions equally, the Mahalanobis distance accounts for the correlations and scales of different features via the covariance matrix.
+
+### Formula
+
+The Mahalanobis distance from a point $x$ to a distribution with mean $\mu$ and covariance matrix $\Sigma$ is defined as:
+
+$$D_M = \sqrt{(x - \mu)^T \Sigma^{-1} (x - \mu)}$$
+
+### Components
+
+- **$(x - \mu)$**: The deviation vector from the point to the mean
+- **$\Sigma^{-1}$**: The inverse of the covariance matrix, which accounts for variance and correlation
+- **$(x - \mu)^T \Sigma^{-1} (x - \mu)$**: A quadratic form that measures standardized distance
+
+### Properties
+
+1. **Scale-Invariant**: Takes into account the variance along each dimension
+2. **Correlation-Aware**: Accounts for correlations between features through the covariance matrix
+3. **Special Case**: When the covariance matrix is the identity matrix, Mahalanobis distance reduces to Euclidean distance
+4. **Outlier Detection**: Points with large Mahalanobis distance are statistical outliers relative to the distribution
+
+### Applications
+
+- **Outlier Detection**: Identifying observations that deviate significantly from the distribution
+- **Clustering**: Measuring distances in multivariate Gaussian mixture models
+- **Classification**: Computing distances in discriminant analysis and Mahalanobis distance classifiers
+- **Data Quality**: Detecting anomalies and unusual patterns in multivariate datasets
+
+### Computational Steps
+
+1. Compute the deviation vector: $d = x - \mu$
+2. Compute the inverse of the covariance matrix: $\Sigma^{-1}$
+3. Compute the quadratic form: $d^T \Sigma^{-1} d$
+4. Take the square root to obtain the distance: $\sqrt{d^T \Sigma^{-1} d}$
+
+### Example
+
+For a 2D distribution with mean $\mu = [0, 0]$ and covariance matrix $\Sigma = \begin{bmatrix} 1 & 0 \\ 0 & 1 \end{bmatrix}$ (identity matrix), a point $x = [3, 4]$ has Mahalanobis distance:
+
+$$D_M = \sqrt{[3, 4] \begin{bmatrix} 1 & 0 \\ 0 & 1 \end{bmatrix} [3, 4]^T} = \sqrt{9 + 16} = 5$$
+
+This is equivalent to the Euclidean distance since the covariance is identity.
diff --git a/questions/189_mahalanobis-distance/meta.json b/questions/189_mahalanobis-distance/meta.json
@@ -0,0 +1,12 @@
+{
+  "id": "189",
+  "title": "Calculating Mahalanobis Distance",
+  "difficulty": "medium",
+  "category": "Machine Learning",
+  "video": "",
+  "likes": "0",
+  "dislikes": "0",
+  "contributor": [],
+  "tinygrad_difficulty": "medium",
+  "pytorch_difficulty": "medium"
+}
diff --git a/questions/189_mahalanobis-distance/solution.py b/questions/189_mahalanobis-distance/solution.py
@@ -0,0 +1,34 @@
+import numpy as np
+
+def mahalanobis_distance(point: np.ndarray, mean: np.ndarray, cov_matrix: np.ndarray) -> float:
+	"""
+	Calculate the Mahalanobis distance between a point and a probability distribution.
+
+	Args:
+		point (np.ndarray): A 1D numpy array representing the point in the feature space.
+			Shape: (n_features,)
+		mean (np.ndarray): A 1D numpy array representing the mean of the distribution.
+			Shape: (n_features,)
+		cov_matrix (np.ndarray): A 2D numpy array representing the covariance matrix.
+			Shape: (n_features, n_features)
+
+	Returns:
+		float: The Mahalanobis distance, computed as sqrt((point - mean)^T * cov_matrix^(-1) * (point - mean))
+			   A scalar representing how many standard deviations the point is from the mean,
+			   accounting for correlation between features.
+
+	"""
+	# Compute deviation vector
+	diff = point - mean
+
+	# Compute inverse of covariance matrix
+	try:
+		inv_cov = np.linalg.inv(cov_matrix)
+	except np.linalg.LinAlgError:
+		# Handle singular covariance matrix using pseudo-inverse
+		inv_cov = np.linalg.pinv(cov_matrix)
+
+	# Compute Mahalanobis distance: sqrt((x - mu)^T * Sigma^(-1) * (x - mu))
+	distance = np.sqrt(np.dot(np.dot(diff, inv_cov), diff))
+
+	return round(float(distance), 4)
diff --git a/questions/189_mahalanobis-distance/starter_code.py b/questions/189_mahalanobis-distance/starter_code.py
@@ -0,0 +1,23 @@
+import numpy as np
+
+def mahalanobis_distance(point: np.ndarray, mean: np.ndarray, cov_matrix: np.ndarray) -> float:
+	"""
+	Calculate the Mahalanobis distance between a point and a probability distribution.
+
+	Args:
+		point (np.ndarray): A 1D numpy array representing the point in the feature space.
+			Shape: (n_features,)
+		mean (np.ndarray): A 1D numpy array representing the mean of the distribution.
+			Shape: (n_features,)
+		cov_matrix (np.ndarray): A 2D numpy array representing the covariance matrix.
+			Shape: (n_features, n_features)
+
+	Returns:
+		float: The Mahalanobis distance, rounded to 4 decimal places.
+			   Computed as sqrt((point - mean)^T * cov_matrix^(-1) * (point - mean)).
+			   A scalar representing how many standard deviations the point is from the mean,
+			   accounting for correlation between features.
+
+	"""
+	# Your code here
+	pass
diff --git a/questions/189_mahalanobis-distance/tests.json b/questions/189_mahalanobis-distance/tests.json
@@ -0,0 +1,34 @@
+[
+  {
+    "test": "print(mahalanobis_distance(np.array([3.0, 4.0]), np.array([0.0, 0.0]), np.array([[1.0, 0.0], [0.0, 1.0]])))",
+    "expected_output": "5.0"
+  },
+  {
+    "test": "print(mahalanobis_distance(np.array([2.0, 2.0]), np.array([0.0, 0.0]), np.array([[4.0, 0.0], [0.0, 1.0]])))",
+    "expected_output": "2.2361"
+  },
+  {
+    "test": "print(mahalanobis_distance(np.array([1.0, 1.0]), np.array([0.0, 0.0]), np.array([[2.0, 1.0], [1.0, 2.0]])))",
+    "expected_output": "0.8165"
+  },
+  {
+    "test": "print(mahalanobis_distance(np.array([5.0, 3.0, 2.0]), np.array([1.0, 1.0, 1.0]), np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])))",
+    "expected_output": "4.5826"
+  },
+  {
+    "test": "print(mahalanobis_distance(np.array([2.0, 2.0, 2.0]), np.array([0.0, 0.0, 0.0]), np.array([[2.0, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 2.0]])))",
+    "expected_output": "2.4495"
+  },
+  {
+    "test": "print(mahalanobis_distance(np.array([1.0, 2.0]), np.array([1.0, 2.0]), np.array([[1.0, 0.0], [0.0, 1.0]])))",
+    "expected_output": "0.0"
+  },
+  {
+    "test": "print(mahalanobis_distance(np.array([1.0, 2.0, 3.0, 4.0]), np.array([0.0, 0.0, 0.0, 0.0]), np.eye(4)))",
+    "expected_output": "5.4772"
+  },
+  {
+    "test": "print(mahalanobis_distance(np.array([-2.0, 3.0]), np.array([1.0, 1.0]), np.array([[1.0, 0.5], [0.5, 1.0]])))",
+    "expected_output": "5.0332"
+  }
+]