diff --git a/KNN.py b/KNN.py new file mode 100644 index 0000000..764b8d8 --- /dev/null +++ b/KNN.py @@ -0,0 +1,33 @@ +from sklearn.neighbors import KNeighborsClassifier +from sklearn.model_selection import train_test_split +import os + + +class KNNClassifier: + def __init__(self, + backbone='KNNClassifier', + n_neighbors=10, + ): + self.backbone = backbone + # 获取外部运行py的绝对路径 + self.cwd = os.path.dirname(os.getcwd()) + # 获取当前文件的绝对路径 + self.file_dirname = os.path.dirname(os.path.abspath(__file__)) + self.dataset = '' + self.x_train, self.x_test, self.y_train, self.y_test = 0, 0, 0, 0 + self.model = KNeighborsClassifier(n_neighbors=n_neighbors) + + def train(self): + self.model.fit(self.x_train, self.y_train) + acc = self.model.score(self.x_test, self.y_test) + print('准确率为:{}%'.format(acc * 100)) + + def inference(self, data): + result = self.model.predict(data) + print(result) + print("分类结果:{}".format(self.dataset['target_names'][result])) + + def load_dataset(self, dataset): + self.dataset = dataset + self.x_train, self.x_test, self.y_train, self.y_test = \ + train_test_split(self.dataset['data'], self.dataset['target'], test_size=0.2, random_state=0) diff --git a/Mlearing_Edu.py b/Mlearing_Edu.py new file mode 100644 index 0000000..952b645 --- /dev/null +++ b/Mlearing_Edu.py @@ -0,0 +1,39 @@ +import pandas as pd +import numpy as np +from sklearn.model_selection import train_test_split +from sklearn.ensemble import RandomForestClassifier +from sklearn.metrics import accuracy_score +import os + +class MMMlearing: + def __init__(self, + backbone='RandomForest' + ): + self.backbone = backbone + # 获取外部运行py的绝对路径 + self.cwd = os.path.dirname(os.getcwd()) + # 获取当前文件的绝对路径 + self.file_dirname = os.path.dirname(os.path.abspath(__file__)) + self.model = RandomForestClassifier() + + def train(self, seed=0): + np.random.seed(seed) + dataset = pd.read_csv(self.dataset_path, sep=',', header=None).values + np.random.shuffle(dataset) + data, label = dataset[:,:-1],dataset[:,-1] + train_index = int((1-self.test_size) * len(dataset)) + train_data, train_label = data[:train_index, :], label[:train_index] + self.test_set = { + 'data':data[train_index:, :], + 'label':label[train_index:] + } + self.model.fit(train_data, train_label) + + def inference(self): + pred = self.model.predict(self.test_set['data']) + acc = accuracy_score(self.test_set['label'], pred) + print('准确率为:{}%'.format(acc*100)) + + def load_dataset(self, path, test_size=0.2): + self.dataset_path = path + self.test_size = test_size \ No newline at end of file diff --git a/PCA.py b/PCA.py new file mode 100644 index 0000000..d227739 --- /dev/null +++ b/PCA.py @@ -0,0 +1,30 @@ +from sklearn.decomposition import PCA +import os + + +class PCA: + def __init__(self, + backbone='KNNClassifier', + n_components='mle', + ): + self.backbone = backbone + # 获取外部运行py的绝对路径 + self.cwd = os.path.dirname(os.getcwd()) + # 获取当前文件的绝对路径 + self.file_dirname = os.path.dirname(os.path.abspath(__file__)) + self.dataset = '' + self.x_train, self.x_test = 0, 0 + self.model = PCA(n_components=n_components) + + def train(self): + self.model.fit(self.dataset) + print(self.model.explained_variance_ratio_) + # 返回所保留的n个成分各自的方差百分比,这里可以理解为单个变量方差贡献率。 + + def inference(self, data): + self.model.fit_transform(data) + print(self.model.n_features_) + print(self.model.n_samples_) + + def load_dataset(self, dataset): + self.dataset = dataset diff --git a/ml_demo.py b/ml_demo.py new file mode 100644 index 0000000..149058c --- /dev/null +++ b/ml_demo.py @@ -0,0 +1,25 @@ +from sklearn.datasets import load_wine +from KNN import KNNClassifier +from PCA import PCA +import numpy as np + + +def knn_demo(): + model = KNNClassifier(n_neighbors=10) + model.load_dataset(dataset=load_wine()) + model.train() + test_data = np.array([[11.8, 4.39, 2.39, 29, 82, 2.86, 3.53, 0.21, 2.85, 2.8, 0.75, 3.78, 490]]) + model.inference(test_data) + + +def pca_demo(): + data = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) + model = PCA(n_components='mle') + model.load_dataset(dataset=data) + model.train() + model.inference(data) + + +if __name__ == '__main__': + knn_demo() + pca_demo()