diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/OpenBaseLab-Edu.iml b/.idea/OpenBaseLab-Edu.iml
new file mode 100644
index 0000000..08fc73f
--- /dev/null
+++ b/.idea/OpenBaseLab-Edu.iml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..45d5172
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..29a39d1
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/BaseML/Classification.py b/BaseML/Classification.py
index ecac105..1137280 100644
--- a/BaseML/Classification.py
+++ b/BaseML/Classification.py
@@ -8,10 +8,12 @@
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
+from sklearn.cluster import KMeans
+from sklearn.metrics import silhouette_score, silhouette_samples
import joblib
class cls:
- def __init__(self, algorithm='KNN', n_neighbors=5, n_estimators=100, ):
+ def __init__(self, algorithm='KNN', n_neighbors=5, n_estimators=100, N_CLUSTERS=5):
self.algorithm = algorithm
self.cwd = os.path.dirname(os.getcwd()) # 获取当前文件的绝对路径
self.file_dirname = os.path.dirname(os.path.abspath(__file__))
@@ -29,6 +31,9 @@ def __init__(self, algorithm='KNN', n_neighbors=5, n_estimators=100, ):
self.model = DecisionTreeClassifier()
elif self.algorithm == 'AdaBoost':
self.model = AdaBoostClassifier(n_estimators=n_estimators, random_state=0)
+ elif self.algorithm == 'Kmeans':
+ self.n = N_CLUSTERS
+ self.model = KMeans(self.n)
def train(self, seed=0, data_type='csv'):
if self.algorithm == 'AdaBoost' or 'SVM' or 'NaiveBayes':
@@ -57,21 +62,38 @@ def train(self, seed=0, data_type='csv'):
self.model.fit(self.x_train, self.y_train)
acc = self.model.score(self.x_test, self.y_test)
print('准确率为:{}%'.format(acc * 100))
+
+ elif self.algorithm == 'Kmeans':
+ self.dataset = pd.read_csv(self.dataset_path)
+ self.x_train = self.dataset.drop('省级行政区', axis=1)
+ self.x_train = self.x_train.drop("城市", axis=1)
+ self.x_np = np.array(self.x_train)
+ self.model.fit(self.x_np)
def inference(self, data):
if self.algorithm == 'AdaBoost' or 'SVM' or 'NaiveBayes':
pred = self.model.predict(self.test_set['data'])
acc = accuracy_score(self.test_set['label'], pred)
print('准确率为:{}%'.format(acc * 100))
+
elif self.algorithm == 'KNN':
result = self.model.predict(data)
print(result)
print("分类结果:{}".format(self.dataset['target_names'][result]))
+
elif self.algorithm == 'CART':
self.model.fit_transform(data)
print(self.model.n_features_)
print(self.model.n_samples_)
+ elif self.algorithm == 'Kmeans':
+ labels = self.model.labels_ # 获取聚类标签
+ print(silhouette_score(self.x_np, labels)) # 获取聚类结果总的轮廓系数
+ print(self.model.cluster_centers_) # 输出类簇中心
+ for i in range(self.n):
+ print(f" CLUSTER-{i+1} ".center(60, '='))
+ print(self.dataset[labels == i])
+
def load_dataset(self, path, test_size=0.2, dataset=''):
self.dataset_path = path
self.test_size = test_size
diff --git a/BaseML/__init__.py b/BaseML/__init__.py
index 412a05e..3cffca0 100644
--- a/BaseML/__init__.py
+++ b/BaseML/__init__.py
@@ -5,6 +5,8 @@
from .AdaBoost import AdaBoost
from .GaussianNB import GaussianNB
from .SVM import SVM
+from .Classification import cls
+from .Regression import reg
__all__ = [
@@ -14,4 +16,6 @@
'GaussianNB',
'KNN',
'PCA',
- 'Perceptron']
+ 'Perceptron',
+ 'cls',
+ 'reg']
diff --git a/BaseML/__pycache__/AdaBoost.cpython-39.pyc b/BaseML/__pycache__/AdaBoost.cpython-39.pyc
new file mode 100644
index 0000000..6d40a48
Binary files /dev/null and b/BaseML/__pycache__/AdaBoost.cpython-39.pyc differ
diff --git a/BaseML/__pycache__/CART.cpython-39.pyc b/BaseML/__pycache__/CART.cpython-39.pyc
new file mode 100644
index 0000000..31318c1
Binary files /dev/null and b/BaseML/__pycache__/CART.cpython-39.pyc differ
diff --git a/BaseML/__pycache__/Classification.cpython-39.pyc b/BaseML/__pycache__/Classification.cpython-39.pyc
new file mode 100644
index 0000000..ae19f0a
Binary files /dev/null and b/BaseML/__pycache__/Classification.cpython-39.pyc differ
diff --git a/BaseML/__pycache__/GaussianNB.cpython-39.pyc b/BaseML/__pycache__/GaussianNB.cpython-39.pyc
new file mode 100644
index 0000000..b7246da
Binary files /dev/null and b/BaseML/__pycache__/GaussianNB.cpython-39.pyc differ
diff --git a/BaseML/__pycache__/KNNClassifier.cpython-39.pyc b/BaseML/__pycache__/KNNClassifier.cpython-39.pyc
new file mode 100644
index 0000000..1feab1f
Binary files /dev/null and b/BaseML/__pycache__/KNNClassifier.cpython-39.pyc differ
diff --git a/BaseML/__pycache__/PCA.cpython-39.pyc b/BaseML/__pycache__/PCA.cpython-39.pyc
new file mode 100644
index 0000000..dcfbf14
Binary files /dev/null and b/BaseML/__pycache__/PCA.cpython-39.pyc differ
diff --git a/BaseML/__pycache__/Perceptron.cpython-39.pyc b/BaseML/__pycache__/Perceptron.cpython-39.pyc
new file mode 100644
index 0000000..ac062ae
Binary files /dev/null and b/BaseML/__pycache__/Perceptron.cpython-39.pyc differ
diff --git a/BaseML/__pycache__/Regression.cpython-39.pyc b/BaseML/__pycache__/Regression.cpython-39.pyc
new file mode 100644
index 0000000..2935c9c
Binary files /dev/null and b/BaseML/__pycache__/Regression.cpython-39.pyc differ
diff --git a/BaseML/__pycache__/SVM.cpython-39.pyc b/BaseML/__pycache__/SVM.cpython-39.pyc
new file mode 100644
index 0000000..0364a92
Binary files /dev/null and b/BaseML/__pycache__/SVM.cpython-39.pyc differ
diff --git a/BaseML/__pycache__/__init__.cpython-39.pyc b/BaseML/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000..f339be7
Binary files /dev/null and b/BaseML/__pycache__/__init__.cpython-39.pyc differ
diff --git a/demo/kemans.py b/demo/kemans.py
new file mode 100644
index 0000000..f34a7b6
--- /dev/null
+++ b/demo/kemans.py
@@ -0,0 +1,12 @@
+from BaseML import Classification
+
+N_CLUSTERS = 7 # 类簇的数量
+DATA_PATH = '~/Downloads/China_cities.csv' # 数据集路径
+
+
+def city():
+ model = Classification(algorithm='Kmeans', N_CLUSTERS=5)
+ model.load_dataset(path=DATA_PATH)
+ model.train()
+ model.inference()
+ model.save()