diff --git a/jaineel.txt b/jaineel.txt new file mode 100644 index 0000000..79e1e68 --- /dev/null +++ b/jaineel.txt @@ -0,0 +1,2 @@ +just for hactober fest +please merge this commit diff --git a/main.py b/main.py new file mode 100644 index 0000000..a3755a7 --- /dev/null +++ b/main.py @@ -0,0 +1,49 @@ +from copy import deepcopy +import numpy as np +import pandas as pd +from matplotlib import pyplot as plt +plt.rcParams['figure.figsize'] = (16, 9) +plt.style.use('ggplot') + +data = pd.read_csv('xclara.csv') +print("Input Data and Shape") +print(data.shape) +print(data.head()) + +f1 = data['V1'].values +f2 = data['V2'].values +X = np.array(list(zip(f1, f2))) +plt.scatter(f1, f2, c='black', s=7) + +def dist(a, b, ax=1): + return np.linalg.norm(a - b, axis=ax) +k = 2 +C_x = np.random.randint(0, np.max(X)-20, size=k) +C_y = np.random.randint(0, np.max(X)-20, size=k) +C = np.array(list(zip(C_x, C_y)), dtype=np.float32) +print("Initial Centroids") +print(C) + +plt.scatter(f1, f2, c='#050505', s=7) +plt.scatter(C_x, C_y, marker='*', s=200, c='g') +C_old = np.zeros(C.shape) +clusters = np.zeros(len(X)) +error = dist(C, C_old, None) +while error != 0: + for i in range(len(X)): + distances = dist(X[i], C) + cluster = np.argmin(distances) + clusters[i] = cluster + C_old = deepcopy(C) + for i in range(k): + points = [X[j] for j in range(len(X)) if clusters[j] == i] + C[i] = np.mean(points, axis=0) + error = dist(C, C_old, None) + +colors = ['r', 'g', 'b', 'y', 'c', 'm'] +fig, ax = plt.subplots() +for i in range(k): + points = np.array([X[j] for j in range(len(X)) if clusters[j] == i]) + ax.scatter(points[:, 0], points[:, 1], s=7, c=colors[i]) +ax.scatter(C[:, 0], C[:, 1], marker='*', s=200, c='#050505') +