diff --git a/BaseML/Classification.py b/BaseML/Classification.py index 1137280..75fa2b5 100644 --- a/BaseML/Classification.py +++ b/BaseML/Classification.py @@ -6,11 +6,13 @@ from sklearn.ensemble import AdaBoostClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier +from sklearn.neural_network import MLPClassifier from sklearn.model_selection import train_test_split from sklearn.svm import SVC from sklearn.cluster import KMeans from sklearn.metrics import silhouette_score, silhouette_samples import joblib +import random class cls: def __init__(self, algorithm='KNN', n_neighbors=5, n_estimators=100, N_CLUSTERS=5): @@ -18,7 +20,7 @@ def __init__(self, algorithm='KNN', n_neighbors=5, n_estimators=100, N_CLUSTERS= self.cwd = os.path.dirname(os.getcwd()) # 获取当前文件的绝对路径 self.file_dirname = os.path.dirname(os.path.abspath(__file__)) self.dataset_path = ' ' - self.test_size = ' ' + self.test_size = 0.2 self.test_set = ' ' self.x_train, self.x_test, self.y_train, self.y_test = 0, 0, 0, 0 if self.algorithm == 'KNN': @@ -34,75 +36,133 @@ def __init__(self, algorithm='KNN', n_neighbors=5, n_estimators=100, N_CLUSTERS= elif self.algorithm == 'Kmeans': self.n = N_CLUSTERS self.model = KMeans(self.n) + elif self.algorithm == 'MLP': + self.model = MLPClassifier(solver='lbfgs') def train(self, seed=0, data_type='csv'): - if self.algorithm == 'AdaBoost' or 'SVM' or 'NaiveBayes': + if self.algorithm in ['AdaBoost','SVM','NaiveBayes', 'MLP']: np.random.seed(seed) - if data_type == 'csv': - dataset = pd.read_csv(self.dataset_path, sep=',', header=None).values - np.random.shuffle(dataset) - - data, label = dataset[:, :-1], dataset[:, -1] - train_index = int((1 - self.test_size) * len(dataset)) - train_data, train_label = data[:train_index, ], label[:train_index] - self.test_set = { - 'data': data[train_index:, ], - 'label': label[train_index:] - } - self.model.fit(train_data, train_label) + np.random.shuffle(self.dataset) + + self.x_train, self.x_test, self.y_train, self.y_test = \ + train_test_split(self.x_train, self.y_train, test_size=0.2, random_state=0) + self.model.fit(self.x_train, self.y_train) elif self.algorithm == 'CART': - self.model.fit(self.dataset) - print(self.model.explained_variance_ratio_) + self.model.fit(self.x_train, self.y_train) + # print(self.model.explained_variance_ratio_) # 返回所保留的n个成分各自的方差百分比,这里可以理解为单个变量方差贡献率。 elif self.algorithm == 'KNN': self.x_train, self.x_test, self.y_train, self.y_test = \ - train_test_split(self.dataset['data'], self.dataset['target'], test_size=0.2, random_state=0) + train_test_split(self.x_train, self.y_train, test_size=0.2, random_state=0) self.model.fit(self.x_train, self.y_train) acc = self.model.score(self.x_test, self.y_test) print('准确率为:{}%'.format(acc * 100)) elif self.algorithm == 'Kmeans': - self.dataset = pd.read_csv(self.dataset_path) - self.x_train = self.dataset.drop('省级行政区', axis=1) - self.x_train = self.x_train.drop("城市", axis=1) - self.x_np = np.array(self.x_train) - self.model.fit(self.x_np) - - def inference(self, data): - if self.algorithm == 'AdaBoost' or 'SVM' or 'NaiveBayes': - pred = self.model.predict(self.test_set['data']) - acc = accuracy_score(self.test_set['label'], pred) - print('准确率为:{}%'.format(acc * 100)) + # 对列数据进行文本过滤,只抽取有数据的列 + delete_list = [] + if self.x_train.ndim >= 2: + for col_idx in range(self.x_train.shape[1]): + col = self.x_train[:,col_idx] + # 随机取一个元素,查看其type + if isinstance(random.choice(col),str): + delete_list.append(col_idx) - elif self.algorithm == 'KNN': - result = self.model.predict(data) - print(result) - print("分类结果:{}".format(self.dataset['target_names'][result])) + self.x_train = np.delete(self.x_train, delete_list, axis=1) + self.model.fit(self.x_train) - elif self.algorithm == 'CART': - self.model.fit_transform(data) - print(self.model.n_features_) - print(self.model.n_samples_) + def inference(self, data = np.nan): + if data is not np.nan: # 对data进行了指定 + self.x_test = data + + if self.algorithm in ['AdaBoost','SVM','NaiveBayes', 'MLP','KNN','CART']: + pred = self.model.predict(self.x_test) + return pred elif self.algorithm == 'Kmeans': labels = self.model.labels_ # 获取聚类标签 - print(silhouette_score(self.x_np, labels)) # 获取聚类结果总的轮廓系数 + print(silhouette_score(self.x_train, labels)) # 获取聚类结果总的轮廓系数 print(self.model.cluster_centers_) # 输出类簇中心 for i in range(self.n): print(f" CLUSTER-{i+1} ".center(60, '=')) print(self.dataset[labels == i]) - def load_dataset(self, path, test_size=0.2, dataset=''): - self.dataset_path = path - self.test_size = test_size - self.dataset=dataset + pred = self.model.predict(self.x_test) + return pred - def save(self): + + # 从文件加载数据集,支持csv文件和txt文件 + def load_dataset_from_file(self, path, x_column = [], y_column = []): + if type == 'csv': + self.dataset = pd.read_csv(path).values # .values就转成numpy格式了 + X = self.dataset[:,x_column] + y = self.dataset[:,y_column] + self.get_data(X,y,x_column,y_column) + elif type == 'txt': + self.dataset = np.loadtxt(path) + X = self.dataset[:,x_column] + y = self.dataset[:,y_column] + X = X.values + y = y.values + self.get_data(X,y,x_column,y_column) + + # 从数据加载数据集,支持['numpy','list','DataFrame'] + def load_dataset_from_data(self, X, y = None, x_column = [], y_column = []): + if type(X) != type(y): + raise TypeError("数据格式不同,无法加载") + if isinstance(X,list): + X = np.array(X) + y = np.array(y) + self.get_data(X,y,x_column,y_column) + elif isinstance(X,np.ndarray): + self.get_data(X,y,x_column,y_column) + elif isinstance(X,pd.DataFrame): + X = X.values + y = y.values + self.get_data(X,y,x_column,y_column) + + + + # 支持的type有['csv', 'numpy','pandas','list','txt],后面一律转为numpy格式 + def load_dataset(self, X, y = None, type = None, x_column = [], y_column = []): + if len(x_column) == 0: + raise ValueError("请传入数据列号") + if type == 'csv': + self.dataset = pd.read_csv(X).values # .values就转成numpy格式了 + self.get_data(self.dataset,self.dataset,x_column,y_column) + elif type == 'numpy': # 统一转成numpy格式 + self.get_data(X,y,x_column,y_column) + elif type == 'pandas': + X = X.values + y = y.values + self.get_data(X,y,x_column,y_column) + elif type == 'list': + X = np.array(X) + y = np.array(y) + self.get_data(X,y,x_column,y_column) + elif type == 'txt': + self.dataset = np.loadtxt(X) + self.dataset = self.dataset.values + self.get_data(self.dataset,self.dataset,x_column,y_column) + + + def save(self,path="checkpoint.pkl"): print("Saving model checkpoints...") - joblib.dump(self.model, '../checkpoint.pkl', compress=3) + joblib.dump(self.model, path, compress=3) + print("Saved successfully!") def load(self, path): - joblib.load(path) \ No newline at end of file + self.model = joblib.load(path) + + def get_data(self,X,y,x_column,y_column): + if len(X): + self.x_train = X[:,x_column] + if len(y): # + if y.ndim == 1: + y = y.reshape(-1,1) + self.y_train = y[:,y_column] + if self.y_train.shape[0]: + self.dataset = np.concatenate((self.x_train,self.y_train),axis=1) # 按列进行拼接 diff --git a/BaseML/Regression.py b/BaseML/Regression.py index ef918ad..5fb9496 100644 --- a/BaseML/Regression.py +++ b/BaseML/Regression.py @@ -52,10 +52,66 @@ def inference(self, data): print(self.model.n_features_) print(self.model.n_samples_) - def load_dataset(self,path,test_size=0.2, dataset=''): - self.dataset_path = path - self.test_size = test_size - self.dataset = dataset + # 从文件加载数据集,支持csv文件和txt文件 + def load_dataset_from_file(self, path, x_column = [], y_column = []): + if type == 'csv': + self.dataset = pd.read_csv(path).values # .values就转成numpy格式了 + self.get_data(X,y,x_column,y_column) + elif type == 'txt': + self.dataset = np.loadtxt(path) + X = X.values + y = y.values + self.get_data(X,y,x_column,y_column) + + # 从数据加载数据集,支持['numpy','list','DataFrame'] + def load_dataset_from_data(self, X, y = None, x_column = [], y_column = []): + if type(X) != type(y): + raise TypeError("数据格式不同,无法加载") + if isinstance(X,list): + X = np.array(X) + y = np.array(y) + self.get_data(X,y,x_column,y_column) + elif isinstance(X,np.ndarray): + self.get_data(X,y,x_column,y_column) + elif isinstance(X,pd.DataFrame): + X = X.values + y = y.values + self.get_data(X,y,x_column,y_column) + + + + # 支持的type有['csv', 'numpy','pandas','list','txt],后面一律转为numpy格式 + def load_dataset(self, X, y = None, type = None, x_column = [], y_column = []): + if len(x_column) == 0: + raise ValueError("请传入数据列号") + if type == 'csv': + self.dataset = pd.read_csv(X).values # .values就转成numpy格式了 + self.get_data(X,y,x_column,y_column) + elif type == 'numpy': # 统一转成numpy格式 + self.get_data(X,y,x_column,y_column) + elif type == 'pandas': + X = X.values + y = y.values + self.get_data(X,y,x_column,y_column) + elif type == 'list': + X = np.array(X) + y = np.array(y) + self.get_data(X,y,x_column,y_column) + elif type == 'txt': + self.dataset = np.loadtxt(X) + X = X.values + y = y.values + self.get_data(X,y,x_column,y_column) + + def get_data(self,X,y,x_column,y_column): + if len(X): + self.x_train = X[:,x_column] + if len(y): # + if y.ndim == 1: + y = y.reshape(-1,1) + self.y_train = y[:,y_column] + if self.y_train.shape[0]: + self.dataset = np.concatenate((self.x_train,self.y_train),axis=1) # 按列进行拼接 def save(self): diff --git a/BaseML/__pycache__/AdaBoost.cpython-37.pyc b/BaseML/__pycache__/AdaBoost.cpython-37.pyc new file mode 100644 index 0000000..d5ff5e1 Binary files /dev/null and b/BaseML/__pycache__/AdaBoost.cpython-37.pyc differ diff --git a/BaseML/__pycache__/CART.cpython-37.pyc b/BaseML/__pycache__/CART.cpython-37.pyc new file mode 100644 index 0000000..51b93c1 Binary files /dev/null and b/BaseML/__pycache__/CART.cpython-37.pyc differ diff --git a/BaseML/__pycache__/Classification.cpython-37.pyc b/BaseML/__pycache__/Classification.cpython-37.pyc new file mode 100644 index 0000000..d2e2e40 Binary files /dev/null and b/BaseML/__pycache__/Classification.cpython-37.pyc differ diff --git a/BaseML/__pycache__/GaussianNB.cpython-37.pyc b/BaseML/__pycache__/GaussianNB.cpython-37.pyc new file mode 100644 index 0000000..5e8c4d1 Binary files /dev/null and b/BaseML/__pycache__/GaussianNB.cpython-37.pyc differ diff --git a/BaseML/__pycache__/KNNClassifier.cpython-37.pyc b/BaseML/__pycache__/KNNClassifier.cpython-37.pyc new file mode 100644 index 0000000..977597a Binary files /dev/null and b/BaseML/__pycache__/KNNClassifier.cpython-37.pyc differ diff --git a/BaseML/__pycache__/PCA.cpython-37.pyc b/BaseML/__pycache__/PCA.cpython-37.pyc new file mode 100644 index 0000000..6ede3ed Binary files /dev/null and b/BaseML/__pycache__/PCA.cpython-37.pyc differ diff --git a/BaseML/__pycache__/Perceptron.cpython-37.pyc b/BaseML/__pycache__/Perceptron.cpython-37.pyc new file mode 100644 index 0000000..2e17068 Binary files /dev/null and b/BaseML/__pycache__/Perceptron.cpython-37.pyc differ diff --git a/BaseML/__pycache__/Regression.cpython-37.pyc b/BaseML/__pycache__/Regression.cpython-37.pyc new file mode 100644 index 0000000..b0976b0 Binary files /dev/null and b/BaseML/__pycache__/Regression.cpython-37.pyc differ diff --git a/BaseML/__pycache__/SVM.cpython-37.pyc b/BaseML/__pycache__/SVM.cpython-37.pyc new file mode 100644 index 0000000..2e354bd Binary files /dev/null and b/BaseML/__pycache__/SVM.cpython-37.pyc differ diff --git a/BaseML/__pycache__/__init__.cpython-37.pyc b/BaseML/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..162d437 Binary files /dev/null and b/BaseML/__pycache__/__init__.cpython-37.pyc differ diff --git a/Downloads/China_cities.csv b/Downloads/China_cities.csv new file mode 100644 index 0000000..a8bb95e --- /dev/null +++ b/Downloads/China_cities.csv @@ -0,0 +1,352 @@ +省级行政区,城市,北纬,东经 +北京,北京市,39.904690,116.407170 +天津,天津市,39.085100,117.199370 +上海,上海市,31.230370,121.473700 +重庆,重庆市,29.564710,106.550730 +香港特别行政区,九龙,22.327115,114.17495 +香港特别行政区,新界,22.341766,114.202408 +香港特别行政区,香港岛,22.266416,114.177314 +澳门特别行政区,路环岛,22.116226,113.564857 +澳门特别行政区,澳门半岛,22.198751,113.549134 +澳门特别行政区,氹仔岛,22.156838,113.577669 +台湾省,台中市,24.138620,120.679510 +台湾省,台北市,25.037798,121.565170 +台湾省,台南市,23.172478,120.279363 +台湾省,嘉义市,23.481568,120.452538 +台湾省,高雄市,22.620856,120.286795 +台湾省,基隆市,25.130741,121.746248 +台湾省,新北市,25.012366,121.465746 +河北省,石家庄市,38.042760,114.514300 +河北省,唐山市,39.630480,118.180580 +河北省,秦皇岛市,39.935450,119.599640 +河北省,邯郸市,36.625560,114.539180 +河北省,邢台市,37.070550,114.504430 +河北省,保定市,38.873960,115.464590 +河北省,张家口市,40.824440,114.887550 +河北省,承德市,40.951500,117.963400 +河北省,沧州市,38.304410,116.838690 +河北省,廊坊市,39.537750,116.683760 +河北省,衡水市,37.738860,115.670540 +河南省,郑州市,34.747250,113.624930 +河南省,开封市,34.797260,114.307310 +河南省,洛阳市,34.618120,112.453610 +河南省,平顶山市,33.766090,113.192410 +河南省,安阳市,36.097710,114.393100 +河南省,鹤壁市,35.747000,114.297450 +河南省,新乡市,35.303230,113.926750 +河南省,焦作市,35.215630,113.242010 +河南省,濮阳市,35.761890,115.029320 +河南省,许昌市,34.035700,113.852330 +河南省,漯河市,33.581490,114.016810 +河南省,三门峡市,34.772610,111.200300 +河南省,南阳市,32.990730,112.528510 +河南省,商丘市,34.414270,115.656350 +河南省,信阳市,32.147140,114.092790 +河南省,周口市,33.625830,114.696950 +河南省,驻马店市,33.011420,114.022990 +山东省,济南市,36.651840,117.120090 +山东省,青岛市,36.066230,120.382990 +山东省,淄博市,36.813100,118.054800 +山东省,枣庄市,34.810710,117.321960 +山东省,东营市,37.433650,118.674660 +山东省,烟台市,37.463530,121.448010 +山东省,潍坊市,36.706860,119.161760 +山东省,济宁市,35.414590,116.587240 +山东省,泰安市,36.199940,117.088400 +山东省,威海市,37.513480,122.121710 +山东省,日照市,35.416460,119.527190 +山东省,莱芜市,36.213590,117.676670 +山东省,临沂市,35.104650,118.356460 +山东省,德州市,37.435500,116.359270 +山东省,聊城市,36.457020,115.985490 +山东省,滨州市,37.382110,117.972790 +山东省,菏泽市,35.233630,115.481150 +山西省,太原市,37.876876,112.556252 +山西省,大同市,40.081863,113.304424 +山西省,阳泉市,37.856680,113.580470 +山西省,长治市,36.195810,113.116490 +山西省,晋城市,35.490390,112.851130 +山西省,朔州市,39.357422,112.439374 +山西省,晋中市,37.687020,112.752780 +山西省,运城市,35.026280,111.006990 +山西省,忻州市,38.416700,112.734180 +山西省,临汾市,36.088220,111.519620 +山西省,吕梁市,37.519340,111.141650 +辽宁省,沈阳市,41.677180,123.463100 +辽宁省,大连市,38.913690,121.614760 +辽宁省,鞍山市,41.107770,122.994600 +辽宁省,抚顺市,41.879710,123.957220 +辽宁省,本溪市,41.294130,123.766860 +辽宁省,丹东市,39.999800,124.356010 +辽宁省,锦州市,41.095150,121.127030 +辽宁省,营口市,40.666830,122.234900 +辽宁省,阜新市,42.021660,121.670110 +辽宁省,辽阳市,41.268090,123.237360 +辽宁省,盘锦市,41.119960,122.070780 +辽宁省,铁岭市,42.286200,123.842410 +辽宁省,朝阳市,41.573470,120.450800 +辽宁省,葫芦岛市,40.711000,120.836990 +吉林省,长春市,43.816020,125.323570 +吉林省,吉林市,43.837840,126.549440 +吉林省,四平市,43.166460,124.350360 +吉林省,辽源市,42.888050,125.143680 +吉林省,通化市,41.728290,125.939900 +吉林省,白山市,41.940800,126.424430 +吉林省,松原市,45.141100,124.825150 +吉林省,白城市,45.619600,122.838710 +吉林省,延边朝鲜族自治州,42.891190,129.509100 +黑龙江省,哈尔滨市,45.802160,126.535800 +黑龙江省,齐齐哈尔市,47.354310,123.917960 +黑龙江省,鸡西市,45.295240,130.969540 +黑龙江省,鹤岗市,47.349890,130.297850 +黑龙江省,双鸭山市,46.646580,131.159100 +黑龙江省,大庆市,46.595319,125.110961 +黑龙江省,伊春市,47.727520,128.840490 +黑龙江省,佳木斯市,46.799770,130.318820 +黑龙江省,七台河市,45.770650,131.003060 +黑龙江省,牡丹江市,44.552690,129.632440 +黑龙江省,黑河市,50.245230,127.528520 +黑龙江省,绥化市,46.652460,126.969320 +黑龙江省,大兴安岭地区,51.923980,124.592160 +江苏省,南京市,32.058380,118.796470 +江苏省,无锡市,31.490990,120.312370 +江苏省,徐州市,34.204400,117.285770 +江苏省,常州市,31.810720,119.973650 +江苏省,苏州市,31.298340,120.583190 +江苏省,南通市,31.979580,120.893710 +江苏省,连云港市,34.596690,119.222950 +江苏省,淮安市,33.610160,119.015950 +江苏省,盐城市,33.349510,120.161640 +江苏省,扬州市,32.393580,119.412690 +江苏省,镇江市,32.189590,119.425000 +江苏省,泰州市,32.455460,119.925540 +江苏省,宿迁市,33.961930,118.275490 +浙江省,杭州市,30.274150,120.155150 +浙江省,宁波市,29.873860,121.550270 +浙江省,温州市,27.994920,120.699390 +浙江省,嘉兴市,30.745010,120.755500 +浙江省,湖州市,30.893050,120.088050 +浙江省,绍兴市,30.030330,120.580200 +浙江省,金华市,29.078120,119.647590 +浙江省,衢州市,28.935920,118.874190 +浙江省,舟山市,29.985390,122.207780 +浙江省,台州市,28.656110,121.420560 +浙江省,丽水市,28.467200,119.922930 +安徽省,合肥市,31.820570,117.229010 +安徽省,芜湖市,31.352460,118.433130 +安徽省,蚌埠市,32.915480,117.389320 +安徽省,淮南市,32.625490,116.999800 +安徽省,马鞍山市,31.670670,118.506110 +安徽省,淮北市,33.954790,116.798340 +安徽省,铜陵市,30.944860,117.812320 +安徽省,安庆市,30.542940,117.063540 +安徽省,黄山市,29.715170,118.338660 +安徽省,滁州市,32.301810,118.316830 +安徽省,阜阳市,32.889630,115.814950 +安徽省,宿州市,33.646140,116.963910 +安徽省,六安市,31.734880,116.523240 +安徽省,亳州市,33.844610,115.779310 +安徽省,池州市,30.664690,117.491420 +安徽省,宣城市,30.940780,118.758660 +福建省,福州市,26.074210,119.296470 +福建省,厦门市,24.479510,118.089480 +福建省,莆田市,25.454000,119.007710 +福建省,三明市,26.263850,117.639220 +福建省,泉州市,24.873890,118.675870 +福建省,漳州市,24.513470,117.647250 +福建省,南平市,27.331750,118.120430 +福建省,龙岩市,25.075040,117.017220 +福建省,宁德市,26.665710,119.548190 +江西省,南昌市,28.682020,115.857940 +江西省,景德镇市,29.268690,117.178390 +江西省,萍乡市,27.622890,113.854270 +江西省,九江市,29.705480,116.001460 +江西省,新余市,27.817760,114.917130 +江西省,鹰潭市,28.260190,117.069190 +江西省,赣州市,25.831090,114.934760 +江西省,吉安市,27.113820,114.993760 +江西省,宜春市,27.814430,114.416120 +江西省,抚州市,27.947810,116.358090 +江西省,上饶市,28.454630,117.943570 +湖北省,武汉市,30.592760,114.305250 +湖北省,黄石市,30.199530,115.038900 +湖北省,十堰市,32.629180,110.798010 +湖北省,宜昌市,30.691860,111.286420 +湖北省,襄阳市,32.009000,112.122550 +湖北省,鄂州市,30.390850,114.894950 +湖北省,荆门市,31.035460,112.199450 +湖北省,孝感市,30.924830,113.916450 +湖北省,荆州市,30.334790,112.240690 +湖北省,黄冈市,30.453470,114.872380 +湖北省,咸宁市,29.841260,114.322450 +湖北省,随州市,31.690130,113.382620 +湖北省,恩施土家族苗族自治州,30.272170,109.488170 +湖南省,长沙市,28.227780,112.938860 +湖南省,株洲市,27.827670,113.133960 +湖南省,湘潭市,27.829750,112.944110 +湖南省,衡阳市,26.893240,112.571950 +湖南省,邵阳市,27.238900,111.467700 +湖南省,岳阳市,29.357280,113.129190 +湖南省,常德市,29.031580,111.698540 +湖南省,张家界市,29.116670,110.478390 +湖南省,益阳市,28.553910,112.355160 +湖南省,郴州市,25.770630,113.014850 +湖南省,永州市,26.420340,111.612250 +湖南省,怀化市,27.569740,110.001600 +湖南省,娄底市,27.697280,111.994580 +湖南省,湘西土家族苗族自治州,28.311730,109.738930 +广东省,广州市,23.129080,113.264360 +广东省,韶关市,24.810390,113.597230 +广东省,深圳市,22.542860,114.059560 +广东省,珠海市,22.270730,113.576680 +广东省,汕头市,23.353500,116.682210 +广东省,佛山市,23.021850,113.121920 +广东省,江门市,22.578650,113.081610 +广东省,湛江市,21.271340,110.358940 +广东省,茂名市,21.663290,110.925230 +广东省,肇庆市,23.046900,112.465280 +广东省,惠州市,23.110750,114.416790 +广东省,梅州市,24.288440,116.122640 +广东省,汕尾市,22.785660,115.375140 +广东省,河源市,23.743650,114.700650 +广东省,阳江市,21.858290,111.982560 +广东省,清远市,23.682010,113.056150 +广东省,东莞市,23.020670,113.751790 +广东省,中山市,22.515950,113.392600 +广东省,潮州市,23.656700,116.622960 +广东省,揭阳市,23.549720,116.372710 +广东省,云浮市,22.915250,112.044530 +海南省,海口市,20.044220,110.199890 +海南省,三亚市,18.252480,109.512090 +海南省,三沙市,16.832720,112.333560 +海南省,儋州市,19.520930,109.580690 +四川省,成都市,30.570200,104.064760 +四川省,自贡市,29.339200,104.778440 +四川省,攀枝花市,26.582280,101.718720 +四川省,泸州市,28.871700,105.442570 +四川省,德阳市,31.126790,104.397900 +四川省,绵阳市,31.467510,104.679600 +四川省,广元市,32.435490,105.843570 +四川省,遂宁市,30.532860,105.592730 +四川省,内江市,29.580150,105.058440 +四川省,乐山市,29.552210,103.765390 +四川省,南充市,30.837310,106.110730 +四川省,眉山市,30.075630,103.848510 +四川省,宜宾市,28.751300,104.641700 +四川省,广安市,30.455960,106.633220 +四川省,达州市,31.208640,107.467910 +四川省,雅安市,30.010530,103.042400 +四川省,巴中市,31.867150,106.747330 +四川省,资阳市,30.128590,104.627980 +四川省,阿坝藏族羌族自治州,31.899400,102.224770 +四川省,甘孜藏族自治州,30.049320,101.962540 +四川省,凉山彝族自治州,27.881640,102.267460 +贵州省,贵阳市,26.647020,106.630240 +贵州省,六盘水市,26.593360,104.830230 +贵州省,遵义市,27.725450,106.927230 +贵州省,安顺市,26.253670,105.946200 +贵州省,毕节市,27.298470,105.305040 +贵州省,铜仁市,27.690660,109.180990 +贵州省,黔西南布依族苗族自治州,25.089880,104.904370 +贵州省,黔东南苗族侗族自治州,26.583640,107.984160 +贵州省,黔南布依族苗族自治州,26.254270,107.522260 +云南省,昆明市,24.879660,102.833220 +云南省,曲靖市,25.490020,103.796250 +云南省,玉溪市,24.351800,102.547140 +云南省,保山市,25.112050,99.161810 +云南省,昭通市,27.338170,103.716800 +云南省,丽江市,26.856480,100.227100 +云南省,普洱市,22.825210,100.966240 +云南省,临沧市,23.884260,100.088840 +云南省,楚雄彝族自治州,25.044950,101.527670 +云南省,红河哈尼族彝族自治州,23.364220,103.375600 +云南省,文山壮族苗族自治州,23.398490,104.215040 +云南省,西双版纳傣族自治州,22.007490,100.797390 +云南省,大理白族自治州,25.606480,100.267640 +云南省,德宏傣族景颇族自治州,24.432320,98.584860 +云南省,怒江傈僳族自治州,25.817630,98.856700 +云南省,迪庆藏族自治州,27.819080,99.703050 +陕西省,西安市,34.341270,108.939840 +陕西省,铜川市,34.896730,108.945150 +陕西省,宝鸡市,34.361940,107.237320 +陕西省,咸阳市,34.329320,108.709290 +陕西省,渭南市,34.499970,109.510150 +陕西省,延安市,36.585290,109.489780 +陕西省,汉中市,33.067610,107.023770 +陕西省,榆林市,38.285200,109.734580 +陕西省,安康市,32.684860,109.029320 +陕西省,商洛市,33.870360,109.940410 +甘肃省,兰州市,36.061380,103.834170 +甘肃省,嘉峪关市,39.772010,98.290110 +甘肃省,金昌市,38.520060,102.187590 +甘肃省,白银市,36.544700,104.137730 +甘肃省,天水市,34.580850,105.724860 +甘肃省,武威市,37.928200,102.637970 +甘肃省,张掖市,38.925920,100.449810 +甘肃省,平凉市,35.543030,106.665300 +甘肃省,酒泉市,39.732550,98.493940 +甘肃省,庆阳市,35.709780,107.642920 +甘肃省,定西市,35.581130,104.625240 +甘肃省,陇南市,33.401000,104.921660 +甘肃省,临夏回族自治州,35.601220,103.210910 +甘肃省,甘南藏族自治州,34.983270,102.911020 +青海省,西宁市,36.617290,101.777820 +青海省,海东市,36.482090,102.401730 +青海省,海北藏族自治州,36.954540,100.900960 +青海省,黄南藏族自治州,35.519910,102.015070 +青海省,海南藏族自治州,36.286630,100.620370 +青海省,果洛藏族自治州,34.471410,100.244750 +青海省,玉树藏族自治州,33.005280,97.006500 +青海省,海西蒙古族藏族自治州,37.377100,97.371220 +广西壮族自治区,南宁市,22.816730,108.366900 +广西壮族自治区,柳州市,24.325430,109.415520 +广西壮族自治区,桂林市,25.273610,110.290020 +广西壮族自治区,梧州市,23.476910,111.279170 +广西壮族自治区,北海市,21.481120,109.120080 +广西壮族自治区,防城港市,21.687130,108.354720 +广西壮族自治区,钦州市,21.979700,108.654310 +广西壮族自治区,贵港市,23.113060,109.597640 +广西壮族自治区,玉林市,22.654510,110.180980 +广西壮族自治区,百色市,23.902160,106.618380 +广西壮族自治区,贺州市,24.403460,111.566550 +广西壮族自治区,河池市,24.692910,108.085400 +广西壮族自治区,来宾市,23.752100,109.222380 +广西壮族自治区,崇左市,22.378950,107.364850 +内蒙古自治区,呼和浩特市,40.841490,111.751990 +内蒙古自治区,包头市,40.657810,109.840210 +内蒙古自治区,乌海市,39.653840,106.795460 +内蒙古自治区,赤峰市,42.258600,118.888940 +内蒙古自治区,通辽市,43.652470,122.244690 +内蒙古自治区,鄂尔多斯市,39.608450,109.780870 +内蒙古自治区,呼伦贝尔市,49.211630,119.765840 +内蒙古自治区,巴彦淖尔市,40.743170,107.387730 +内蒙古自治区,乌兰察布市,40.993910,113.133760 +内蒙古自治区,兴安盟,46.082080,122.038180 +内蒙古自治区,锡林郭勒盟,43.933200,116.047750 +内蒙古自治区,阿拉善盟,38.851530,105.728980 +宁夏回族自治区,银川市,38.486440,106.232480 +宁夏回族自治区,石嘴山市,38.984100,106.384180 +宁夏回族自治区,吴忠市,37.997550,106.198790 +宁夏回族自治区,固原市,36.015800,106.242590 +宁夏回族自治区,中卫市,37.500260,105.196760 +西藏自治区,拉萨市,29.644150,91.114500 +西藏自治区,日喀则市,29.267050,88.881160 +西藏自治区,昌都市,31.140730,97.172250 +西藏自治区,林芝市,29.648950,94.361550 +西藏自治区,山南市,29.237050,91.773130 +西藏自治区,那曲市,31.476140,92.051360 +西藏自治区,阿里地区,30.400510,81.145400 +新疆维吾尔自治区,乌鲁木齐市,43.826630,87.616880 +新疆维吾尔自治区,克拉玛依市,45.579990,84.889270 +新疆维吾尔自治区,吐鲁番市,42.951300,89.189540 +新疆维吾尔自治区,哈密市,42.818550,93.515380 +新疆维吾尔自治区,昌吉回族自治州,44.011170,87.308220 +新疆维吾尔自治区,博尔塔拉蒙古自治州,44.905970,82.066650 +新疆维吾尔自治区,巴音郭楞蒙古自治州,41.764040,86.145170 +新疆维吾尔自治区,阿克苏地区,41.168420,80.260080 +新疆维吾尔自治区,克孜勒苏柯尔克孜自治州,39.715300,76.166610 +新疆维吾尔自治区,喀什地区,39.470420,75.989760 +新疆维吾尔自治区,和田地区,37.114310,79.922470 +新疆维吾尔自治区,伊犁哈萨克自治州,43.916890,81.324160 +新疆维吾尔自治区,塔城地区,46.745320,82.980460 +新疆维吾尔自治区,阿勒泰地区,47.845640,88.140230 diff --git a/Downloads/lenses.csv b/Downloads/lenses.csv new file mode 100644 index 0000000..c8b324d --- /dev/null +++ b/Downloads/lenses.csv @@ -0,0 +1,24 @@ +1,1,1,1,1,3 +2,1,1,1,2,2 +3,1,1,2,1,3 +4,1,1,2,2,1 +5,1,2,1,1,3 +6,1,2,1,2,2 +7,1,2,2,1,3 +8,1,2,2,2,1 +9,2,1,1,1,3 +10,2,1,1,2,2 +11,2,1,2,1,3 +12,2,1,2,2,1 +13,2,2,1,1,3 +14,2,2,1,2,2 +15,2,2,2,1,3 +16,2,2,2,2,3 +17,3,1,1,1,3 +18,3,1,1,2,3 +19,3,1,2,1,3 +20,3,1,2,2,1 +21,3,2,1,1,3 +22,3,2,1,2,2 +23,3,2,2,1,3 +24,3,2,2,2,3 diff --git a/checkpoint.pkl b/checkpoint.pkl new file mode 100644 index 0000000..085cc15 Binary files /dev/null and b/checkpoint.pkl differ diff --git a/demo/.vscode/launch.json b/demo/.vscode/launch.json new file mode 100644 index 0000000..95c55cf --- /dev/null +++ b/demo/.vscode/launch.json @@ -0,0 +1,16 @@ +{ + // 使用 IntelliSense 了解相关属性。 + // 悬停以查看现有属性的描述。 + // 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true + } + ] +} \ No newline at end of file diff --git a/demo/__init__.py b/demo/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/demo/cart_demo.py b/demo/cart_demo.py new file mode 100644 index 0000000..f1d6d0b --- /dev/null +++ b/demo/cart_demo.py @@ -0,0 +1,13 @@ + +from BaseML import cls + +model=cls('CART') +model.load_dataset('lenses.csv', type ='csv', x_column = [0,1,2,3,4],y_column=[5]) +model.train() +model.save('mymodel.pkl') + +y=model.inference([[1, 1, 1, 1, 1]]) +m=cls('CART') +m.load('mymodel.pkl') +y=m.inference([[1, 1, 1, 1, 1]]) +print(y) \ No newline at end of file diff --git a/demo/iris_cls_demo.py b/demo/iris_cls_demo.py new file mode 100644 index 0000000..354ec42 --- /dev/null +++ b/demo/iris_cls_demo.py @@ -0,0 +1,27 @@ + +from BaseML import Classification +import numpy as np +from sklearn import datasets + +# 导入sklearn内置的iris数据集进行测试 +X = datasets.load_iris().data +y = datasets.load_iris().target + +def iris_cls(algorithm = 'MLP'): # path指定模型保存的路径 + # 实例化模型 + model = Classification.cls(algorithm = algorithm) + # 指定数据集格式 + model.load_dataset(X,y,type = 'numpy',x_column=[0,1,2,3], y_column=[0]) + # 开始训练 + model.train() + # 构建测试数据 + test_data = [[0.2,0.4,3.2,5.6], + [2.3,1.8,0.4,2.3]] + test_data = np.asarray(test_data) + result = model.inference(test_data) + print(result) + + model.save() + +if __name__ == '__main__': + iris_cls(algorithm='Kmeans') \ No newline at end of file diff --git a/demo/kemans.py b/demo/kemans.py deleted file mode 100644 index f34a7b6..0000000 --- a/demo/kemans.py +++ /dev/null @@ -1,12 +0,0 @@ -from BaseML import Classification - -N_CLUSTERS = 7 # 类簇的数量 -DATA_PATH = '~/Downloads/China_cities.csv' # 数据集路径 - - -def city(): - model = Classification(algorithm='Kmeans', N_CLUSTERS=5) - model.load_dataset(path=DATA_PATH) - model.train() - model.inference() - model.save() diff --git a/demo/kmeans.py b/demo/kmeans.py new file mode 100644 index 0000000..94f3d45 --- /dev/null +++ b/demo/kmeans.py @@ -0,0 +1,45 @@ +from BaseML import Classification +import numpy as np + +N_CLUSTERS = 7 # 类簇的数量 +DATA_PATH = 'Downloads/China_cities.csv' # 数据集路径 + + +def city(): + # 实例化模型 + model = Classification.cls(algorithm = 'Kmeans', N_CLUSTERS=5) + # 指定数据集的路径 + model.load_dataset(path=DATA_PATH) + # 开始训练 + model.train() + # kmeans输出聚类结果,不需要输入数据 + model.inference() + # 模型保留 + model.save() + + +def kmeans_train(num_cluster,model_path): + # 实例化模型 + model = Classification.cls(algorithm = 'Kmeans', N_CLUSTERS=num_cluster) + # 指定数据集的路径 + model.load_dataset(DATA_PATH, type='csv', x_column=[2,3], y_column=[0]) + # 开始训练 + model.train() + # 模型保存 + model.save(model_path) + +def kmeans_inference(num_cluster,model_path): + # 实例化模型 + model = Classification.cls(algorithm = 'Kmeans', N_CLUSTERS=num_cluster) + # 加载模型数据集 + model.load_dataset(DATA_PATH, type='csv', x_column=[2,3], y_column=[0]) + # 加载模型权重文件 + model.load(model_path) + # 进行推理 + model.inference() + + +if __name__ == '__main__': + # city() + kmeans_train(5,'checkpoint.pkl') + kmeans_inference(5,'checkpoint.pkl') \ No newline at end of file diff --git a/demo/mymodel.pkl b/demo/mymodel.pkl new file mode 100644 index 0000000..307b2fb Binary files /dev/null and b/demo/mymodel.pkl differ diff --git a/mymodel.pkl b/mymodel.pkl new file mode 100644 index 0000000..67a3394 Binary files /dev/null and b/mymodel.pkl differ