OpenXLab-Edu · CuriseJia · Sep 29, 2022 · Sep 28, 2022
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/.idea/OpenBaseLab-Edu.iml b/.idea/OpenBaseLab-Edu.iml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/BaseML/Classification.py b/BaseML/Classification.py
@@ -8,10 +8,12 @@
 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.model_selection import train_test_split
 from sklearn.svm import SVC
+from sklearn.cluster import KMeans
+from sklearn.metrics import silhouette_score, silhouette_samples
 import joblib
 
 class cls:
-    def __init__(self, algorithm='KNN', n_neighbors=5, n_estimators=100, ):
+    def __init__(self, algorithm='KNN', n_neighbors=5, n_estimators=100, N_CLUSTERS=5):
         self.algorithm = algorithm
         self.cwd = os.path.dirname(os.getcwd())  # 获取当前文件的绝对路径
         self.file_dirname = os.path.dirname(os.path.abspath(__file__))
@@ -29,6 +31,9 @@ def __init__(self, algorithm='KNN', n_neighbors=5, n_estimators=100, ):
             self.model = DecisionTreeClassifier()
         elif self.algorithm == 'AdaBoost':
             self.model = AdaBoostClassifier(n_estimators=n_estimators, random_state=0)
+        elif self.algorithm == 'Kmeans':
+            self.n = N_CLUSTERS
+            self.model = KMeans(self.n)
 
     def train(self, seed=0, data_type='csv'):
         if self.algorithm == 'AdaBoost' or 'SVM' or 'NaiveBayes':
@@ -57,21 +62,38 @@ def train(self, seed=0, data_type='csv'):
             self.model.fit(self.x_train, self.y_train)
             acc = self.model.score(self.x_test, self.y_test)
             print('准确率为：{}%'.format(acc * 100))
+
+        elif self.algorithm == 'Kmeans':
+            self.dataset = pd.read_csv(self.dataset_path)
+            self.x_train = self.dataset.drop('省级行政区', axis=1)
+            self.x_train = self.x_train.drop("城市", axis=1)
+            self.x_np = np.array(self.x_train)
+            self.model.fit(self.x_np)
 
     def inference(self, data):
         if self.algorithm == 'AdaBoost' or 'SVM' or 'NaiveBayes':
             pred = self.model.predict(self.test_set['data'])
             acc = accuracy_score(self.test_set['label'], pred)
             print('准确率为：{}%'.format(acc * 100))
+
         elif self.algorithm == 'KNN':
             result = self.model.predict(data)
             print(result)
             print("分类结果：{}".format(self.dataset['target_names'][result]))
+
         elif self.algorithm == 'CART':
             self.model.fit_transform(data)
             print(self.model.n_features_)
             print(self.model.n_samples_)
 
+        elif self.algorithm == 'Kmeans':
+            labels = self.model.labels_      # 获取聚类标签
+            print(silhouette_score(self.x_np, labels))      # 获取聚类结果总的轮廓系数
+            print(self.model.cluster_centers_)	# 输出类簇中心
+            for i in range(self.n):
+                print(f" CLUSTER-{i+1} ".center(60, '='))
+                print(self.dataset[labels == i])
+
     def load_dataset(self, path, test_size=0.2, dataset=''):
         self.dataset_path = path
         self.test_size = test_size

diff --git a/BaseML/__init__.py b/BaseML/__init__.py
@@ -5,6 +5,8 @@
 from .AdaBoost import AdaBoost
 from .GaussianNB import GaussianNB
 from .SVM import SVM
+from .Classification import cls
+from .Regression import reg
 
 
 __all__ = [
@@ -14,4 +16,6 @@
     'GaussianNB',
     'KNN',
     'PCA',
-    'Perceptron']
+    'Perceptron',
+    'cls',
+    'reg']
diff --git a/BaseML/__pycache__/AdaBoost.cpython-39.pyc b/BaseML/__pycache__/AdaBoost.cpython-39.pyc
diff --git a/BaseML/__pycache__/CART.cpython-39.pyc b/BaseML/__pycache__/CART.cpython-39.pyc
diff --git a/BaseML/__pycache__/Classification.cpython-39.pyc b/BaseML/__pycache__/Classification.cpython-39.pyc
diff --git a/BaseML/__pycache__/GaussianNB.cpython-39.pyc b/BaseML/__pycache__/GaussianNB.cpython-39.pyc
diff --git a/BaseML/__pycache__/KNNClassifier.cpython-39.pyc b/BaseML/__pycache__/KNNClassifier.cpython-39.pyc
diff --git a/BaseML/__pycache__/PCA.cpython-39.pyc b/BaseML/__pycache__/PCA.cpython-39.pyc
diff --git a/BaseML/__pycache__/Perceptron.cpython-39.pyc b/BaseML/__pycache__/Perceptron.cpython-39.pyc
diff --git a/BaseML/__pycache__/Regression.cpython-39.pyc b/BaseML/__pycache__/Regression.cpython-39.pyc
diff --git a/BaseML/__pycache__/SVM.cpython-39.pyc b/BaseML/__pycache__/SVM.cpython-39.pyc
diff --git a/BaseML/__pycache__/__init__.cpython-39.pyc b/BaseML/__pycache__/__init__.cpython-39.pyc
diff --git a/demo/kemans.py b/demo/kemans.py
@@ -0,0 +1,12 @@
+from BaseML import Classification
+
+N_CLUSTERS = 7                                     # 类簇的数量
+DATA_PATH = '~/Downloads/China_cities.csv'              # 数据集路径
+
+
+def city():
+    model = Classification(algorithm='Kmeans', N_CLUSTERS=5)
+    model.load_dataset(path=DATA_PATH)
+    model.train()
+    model.inference()
+    model.save()