Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/OpenBaseLab-Edu.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 23 additions & 1 deletion BaseML/Classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, silhouette_samples
import joblib

class cls:
def __init__(self, algorithm='KNN', n_neighbors=5, n_estimators=100, ):
def __init__(self, algorithm='KNN', n_neighbors=5, n_estimators=100, N_CLUSTERS=5):
self.algorithm = algorithm
self.cwd = os.path.dirname(os.getcwd()) # 获取当前文件的绝对路径
self.file_dirname = os.path.dirname(os.path.abspath(__file__))
Expand All @@ -29,6 +31,9 @@ def __init__(self, algorithm='KNN', n_neighbors=5, n_estimators=100, ):
self.model = DecisionTreeClassifier()
elif self.algorithm == 'AdaBoost':
self.model = AdaBoostClassifier(n_estimators=n_estimators, random_state=0)
elif self.algorithm == 'Kmeans':
self.n = N_CLUSTERS
self.model = KMeans(self.n)

def train(self, seed=0, data_type='csv'):
if self.algorithm == 'AdaBoost' or 'SVM' or 'NaiveBayes':
Expand Down Expand Up @@ -57,21 +62,38 @@ def train(self, seed=0, data_type='csv'):
self.model.fit(self.x_train, self.y_train)
acc = self.model.score(self.x_test, self.y_test)
print('准确率为:{}%'.format(acc * 100))

elif self.algorithm == 'Kmeans':
self.dataset = pd.read_csv(self.dataset_path)
self.x_train = self.dataset.drop('省级行政区', axis=1)
self.x_train = self.x_train.drop("城市", axis=1)
self.x_np = np.array(self.x_train)
self.model.fit(self.x_np)

def inference(self, data):
if self.algorithm == 'AdaBoost' or 'SVM' or 'NaiveBayes':
pred = self.model.predict(self.test_set['data'])
acc = accuracy_score(self.test_set['label'], pred)
print('准确率为:{}%'.format(acc * 100))

elif self.algorithm == 'KNN':
result = self.model.predict(data)
print(result)
print("分类结果:{}".format(self.dataset['target_names'][result]))

elif self.algorithm == 'CART':
self.model.fit_transform(data)
print(self.model.n_features_)
print(self.model.n_samples_)

elif self.algorithm == 'Kmeans':
labels = self.model.labels_ # 获取聚类标签
print(silhouette_score(self.x_np, labels)) # 获取聚类结果总的轮廓系数
print(self.model.cluster_centers_) # 输出类簇中心
for i in range(self.n):
print(f" CLUSTER-{i+1} ".center(60, '='))
print(self.dataset[labels == i])

def load_dataset(self, path, test_size=0.2, dataset=''):
self.dataset_path = path
self.test_size = test_size
Expand Down
6 changes: 5 additions & 1 deletion BaseML/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from .AdaBoost import AdaBoost
from .GaussianNB import GaussianNB
from .SVM import SVM
from .Classification import cls
from .Regression import reg


__all__ = [
Expand All @@ -14,4 +16,6 @@
'GaussianNB',
'KNN',
'PCA',
'Perceptron']
'Perceptron',
'cls',
'reg']
Binary file added BaseML/__pycache__/AdaBoost.cpython-39.pyc
Binary file not shown.
Binary file added BaseML/__pycache__/CART.cpython-39.pyc
Binary file not shown.
Binary file added BaseML/__pycache__/Classification.cpython-39.pyc
Binary file not shown.
Binary file added BaseML/__pycache__/GaussianNB.cpython-39.pyc
Binary file not shown.
Binary file added BaseML/__pycache__/KNNClassifier.cpython-39.pyc
Binary file not shown.
Binary file added BaseML/__pycache__/PCA.cpython-39.pyc
Binary file not shown.
Binary file added BaseML/__pycache__/Perceptron.cpython-39.pyc
Binary file not shown.
Binary file added BaseML/__pycache__/Regression.cpython-39.pyc
Binary file not shown.
Binary file added BaseML/__pycache__/SVM.cpython-39.pyc
Binary file not shown.
Binary file added BaseML/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
12 changes: 12 additions & 0 deletions demo/kemans.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from BaseML import Classification

N_CLUSTERS = 7 # 类簇的数量
DATA_PATH = '~/Downloads/China_cities.csv' # 数据集路径


def city():
model = Classification(algorithm='Kmeans', N_CLUSTERS=5)
model.load_dataset(path=DATA_PATH)
model.train()
model.inference()
model.save()