diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/OpenBaseLab-Edu.iml b/.idea/OpenBaseLab-Edu.iml new file mode 100644 index 0000000..08fc73f --- /dev/null +++ b/.idea/OpenBaseLab-Edu.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..45d5172 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..29a39d1 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/BaseML/Classification.py b/BaseML/Classification.py index ecac105..1137280 100644 --- a/BaseML/Classification.py +++ b/BaseML/Classification.py @@ -8,10 +8,12 @@ from sklearn.neighbors import KNeighborsClassifier from sklearn.model_selection import train_test_split from sklearn.svm import SVC +from sklearn.cluster import KMeans +from sklearn.metrics import silhouette_score, silhouette_samples import joblib class cls: - def __init__(self, algorithm='KNN', n_neighbors=5, n_estimators=100, ): + def __init__(self, algorithm='KNN', n_neighbors=5, n_estimators=100, N_CLUSTERS=5): self.algorithm = algorithm self.cwd = os.path.dirname(os.getcwd()) # 获取当前文件的绝对路径 self.file_dirname = os.path.dirname(os.path.abspath(__file__)) @@ -29,6 +31,9 @@ def __init__(self, algorithm='KNN', n_neighbors=5, n_estimators=100, ): self.model = DecisionTreeClassifier() elif self.algorithm == 'AdaBoost': self.model = AdaBoostClassifier(n_estimators=n_estimators, random_state=0) + elif self.algorithm == 'Kmeans': + self.n = N_CLUSTERS + self.model = KMeans(self.n) def train(self, seed=0, data_type='csv'): if self.algorithm == 'AdaBoost' or 'SVM' or 'NaiveBayes': @@ -57,21 +62,38 @@ def train(self, seed=0, data_type='csv'): self.model.fit(self.x_train, self.y_train) acc = self.model.score(self.x_test, self.y_test) print('准确率为:{}%'.format(acc * 100)) + + elif self.algorithm == 'Kmeans': + self.dataset = pd.read_csv(self.dataset_path) + self.x_train = self.dataset.drop('省级行政区', axis=1) + self.x_train = self.x_train.drop("城市", axis=1) + self.x_np = np.array(self.x_train) + self.model.fit(self.x_np) def inference(self, data): if self.algorithm == 'AdaBoost' or 'SVM' or 'NaiveBayes': pred = self.model.predict(self.test_set['data']) acc = accuracy_score(self.test_set['label'], pred) print('准确率为:{}%'.format(acc * 100)) + elif self.algorithm == 'KNN': result = self.model.predict(data) print(result) print("分类结果:{}".format(self.dataset['target_names'][result])) + elif self.algorithm == 'CART': self.model.fit_transform(data) print(self.model.n_features_) print(self.model.n_samples_) + elif self.algorithm == 'Kmeans': + labels = self.model.labels_ # 获取聚类标签 + print(silhouette_score(self.x_np, labels)) # 获取聚类结果总的轮廓系数 + print(self.model.cluster_centers_) # 输出类簇中心 + for i in range(self.n): + print(f" CLUSTER-{i+1} ".center(60, '=')) + print(self.dataset[labels == i]) + def load_dataset(self, path, test_size=0.2, dataset=''): self.dataset_path = path self.test_size = test_size diff --git a/BaseML/__init__.py b/BaseML/__init__.py index 412a05e..3cffca0 100644 --- a/BaseML/__init__.py +++ b/BaseML/__init__.py @@ -5,6 +5,8 @@ from .AdaBoost import AdaBoost from .GaussianNB import GaussianNB from .SVM import SVM +from .Classification import cls +from .Regression import reg __all__ = [ @@ -14,4 +16,6 @@ 'GaussianNB', 'KNN', 'PCA', - 'Perceptron'] + 'Perceptron', + 'cls', + 'reg'] diff --git a/BaseML/__pycache__/AdaBoost.cpython-39.pyc b/BaseML/__pycache__/AdaBoost.cpython-39.pyc new file mode 100644 index 0000000..6d40a48 Binary files /dev/null and b/BaseML/__pycache__/AdaBoost.cpython-39.pyc differ diff --git a/BaseML/__pycache__/CART.cpython-39.pyc b/BaseML/__pycache__/CART.cpython-39.pyc new file mode 100644 index 0000000..31318c1 Binary files /dev/null and b/BaseML/__pycache__/CART.cpython-39.pyc differ diff --git a/BaseML/__pycache__/Classification.cpython-39.pyc b/BaseML/__pycache__/Classification.cpython-39.pyc new file mode 100644 index 0000000..ae19f0a Binary files /dev/null and b/BaseML/__pycache__/Classification.cpython-39.pyc differ diff --git a/BaseML/__pycache__/GaussianNB.cpython-39.pyc b/BaseML/__pycache__/GaussianNB.cpython-39.pyc new file mode 100644 index 0000000..b7246da Binary files /dev/null and b/BaseML/__pycache__/GaussianNB.cpython-39.pyc differ diff --git a/BaseML/__pycache__/KNNClassifier.cpython-39.pyc b/BaseML/__pycache__/KNNClassifier.cpython-39.pyc new file mode 100644 index 0000000..1feab1f Binary files /dev/null and b/BaseML/__pycache__/KNNClassifier.cpython-39.pyc differ diff --git a/BaseML/__pycache__/PCA.cpython-39.pyc b/BaseML/__pycache__/PCA.cpython-39.pyc new file mode 100644 index 0000000..dcfbf14 Binary files /dev/null and b/BaseML/__pycache__/PCA.cpython-39.pyc differ diff --git a/BaseML/__pycache__/Perceptron.cpython-39.pyc b/BaseML/__pycache__/Perceptron.cpython-39.pyc new file mode 100644 index 0000000..ac062ae Binary files /dev/null and b/BaseML/__pycache__/Perceptron.cpython-39.pyc differ diff --git a/BaseML/__pycache__/Regression.cpython-39.pyc b/BaseML/__pycache__/Regression.cpython-39.pyc new file mode 100644 index 0000000..2935c9c Binary files /dev/null and b/BaseML/__pycache__/Regression.cpython-39.pyc differ diff --git a/BaseML/__pycache__/SVM.cpython-39.pyc b/BaseML/__pycache__/SVM.cpython-39.pyc new file mode 100644 index 0000000..0364a92 Binary files /dev/null and b/BaseML/__pycache__/SVM.cpython-39.pyc differ diff --git a/BaseML/__pycache__/__init__.cpython-39.pyc b/BaseML/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..f339be7 Binary files /dev/null and b/BaseML/__pycache__/__init__.cpython-39.pyc differ diff --git a/demo/kemans.py b/demo/kemans.py new file mode 100644 index 0000000..f34a7b6 --- /dev/null +++ b/demo/kemans.py @@ -0,0 +1,12 @@ +from BaseML import Classification + +N_CLUSTERS = 7 # 类簇的数量 +DATA_PATH = '~/Downloads/China_cities.csv' # 数据集路径 + + +def city(): + model = Classification(algorithm='Kmeans', N_CLUSTERS=5) + model.load_dataset(path=DATA_PATH) + model.train() + model.inference() + model.save()