From 24abdc8ff91683abfa174437a02f671695d49f1d Mon Sep 17 00:00:00 2001 From: JiaYanhao <2474840061@qq.com> Date: Tue, 19 Jul 2022 21:52:07 +0800 Subject: [PATCH] 0.0.3 --- .DS_Store | Bin 6148 -> 6148 bytes .idea/.gitignore | 8 +++ .idea/OpenBaseLab-Edu.iml | 8 +++ .../inspectionProfiles/profiles_settings.xml | 6 ++ .idea/misc.xml | 4 ++ .idea/modules.xml | 8 +++ .idea/vcs.xml | 6 ++ BaseML/AdaBoost.py | 55 ++++++++++++++ BaseML/CART.py | 2 - BaseML/GaussianNB.py | 55 ++++++++++++++ BaseML/KNN.py | 2 - LR.py => BaseML/LR.py | 38 +++++----- BaseML/PCA.py | 2 - BaseML/Perceptron.py | 4 +- BaseML/SVM.py | 54 ++++++++++++++ Classifer.py | 67 ------------------ __init__.py | 12 +++- demo_cls.py | 8 --- demo_img.py | 9 --- demo_reg.py | 8 --- mmImage.py => tools/mmImage.py | 41 ++++++----- 21 files changed, 251 insertions(+), 146 deletions(-) create mode 100644 .idea/.gitignore create mode 100644 .idea/OpenBaseLab-Edu.iml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 BaseML/AdaBoost.py create mode 100644 BaseML/GaussianNB.py rename LR.py => BaseML/LR.py (51%) create mode 100644 BaseML/SVM.py delete mode 100644 Classifer.py delete mode 100644 demo_cls.py delete mode 100644 demo_img.py delete mode 100644 demo_reg.py rename mmImage.py => tools/mmImage.py (82%) diff --git a/.DS_Store b/.DS_Store index 6ec67e066cb641a83a7481fcadabc62fa0018939..603d9d9c6a88b4b3cf3d1cf39dc66499c2dba59c 100644 GIT binary patch delta 82 zcmZoMXfc=|#>B`mF;Q%yo+2aL#DLw5Y?FCdY9~Kt(b@c*Re@=<0tY+G!~*Tj>>T_Y fK$V*XIlePb<`=OPU;qL}1_mZ5&9XT{WDPR_U>p&| delta 369 zcmZoMXfc=|#>B!kF;Q%yo+2a9#DLw4n3x%PCi5`XhBz}gG6XS%Fz7K9FjRWxTP zvOFbDv#BM|HT2Lm9Bfq{*|o52qVv8i2;q;_&MqftE|J*9*Xk<7(m_uJI@czFCh0JE9PYg z09sVUkPIXX7)pSwe4vF`e8wmX(z$sRqc+oKb`E|HU_fsE$oQRkGQWtW03!nf5P=CM L5WP7 + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..45d5172 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..29a39d1 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/BaseML/AdaBoost.py b/BaseML/AdaBoost.py new file mode 100644 index 0000000..3e30856 --- /dev/null +++ b/BaseML/AdaBoost.py @@ -0,0 +1,55 @@ +import pandas as pd +import numpy as np +import os +from sklearn.metrics import accuracy_score, mean_squared_error +from sklearn.ensemble import AdaBoostClassifier + + +class AdaBoost: + def __init__(self + ): + self.cwd = os.path.dirname(os.getcwd()) # 获取当前文件的绝对路径 + self.file_dirname = os.path.dirname(os.path.abspath(__file__)) + self.model = AdaBoostClassifier(n_estimators=100, random_state=0) + self.dataset_path = ' ' + self.test_size = ' ' + self.test_set = ' ' + + def train(self, seed=0, data_type='csv'): + np.random.seed(seed) + if data_type == 'csv': + dataset = pd.read_csv(self.dataset_path, sep=',', header=None).values + elif data_type == 'pandas': + dataset = self.load_pd() + elif data_type == 'list': + dataset = self.load_list() + np.random.shuffle(dataset) + + data, label = dataset[:, :-1], dataset[:, -1] + train_index = int((1 - self.test_size) * len(dataset)) + train_data, train_label = data[:train_index, ], label[:train_index] + self.test_set = { + 'data': data[train_index:, ], + 'label': label[train_index:] + } + self.model.fit(train_data, train_label) + + def inference(self, mode='cls'): + pred = self.model.predict(self.test_set['data']) + if mode == 'cls': + acc = accuracy_score(self.test_set['label'], pred) + print('准确率为:{}%'.format(acc * 100)) + elif mode == 'reg': + loss = mean_squared_error(self.test_set['label'], pred) + print('Loss: {}'.format(loss)) + + def load_dataset(self, path, test_size=0.2): + self.dataset_path = path + self.test_size = test_size + + def load_pd(self): + pass + + def load_list(self): + pass + diff --git a/BaseML/CART.py b/BaseML/CART.py index c4db114..e480a74 100644 --- a/BaseML/CART.py +++ b/BaseML/CART.py @@ -4,9 +4,7 @@ class CART: def __init__(self, - backbone='KNNClassifier', ): - self.backbone = backbone # 获取外部运行py的绝对路径 self.cwd = os.path.dirname(os.getcwd()) # 获取当前文件的绝对路径 diff --git a/BaseML/GaussianNB.py b/BaseML/GaussianNB.py new file mode 100644 index 0000000..0b281ea --- /dev/null +++ b/BaseML/GaussianNB.py @@ -0,0 +1,55 @@ +import pandas as pd +import numpy as np +import os + +from sklearn.metrics import accuracy_score, mean_squared_error +from sklearn.naive_bayes import GaussianNB + + +class GaussianNB: + def __init__(self + ): + self.cwd = os.path.dirname(os.getcwd()) # 获取当前文件的绝对路径 + self.file_dirname = os.path.dirname(os.path.abspath(__file__)) + self.model = GaussianNB() + self.dataset_path = ' ' + self.test_size = ' ' + self.test_set = ' ' + + def train(self, seed=0, data_type='csv'): + np.random.seed(seed) + if data_type == 'csv': + dataset = pd.read_csv(self.dataset_path, sep=',', header=None).values + elif data_type == 'pandas': + dataset = self.load_pd() + elif data_type == 'list': + dataset = self.load_list() + np.random.shuffle(dataset) + + data, label = dataset[:, :-1], dataset[:, -1] + train_index = int((1 - self.test_size) * len(dataset)) + train_data, train_label = data[:train_index, ], label[:train_index] + self.test_set = { + 'data': data[train_index:, ], + 'label': label[train_index:] + } + self.model.fit(train_data, train_label) + + def inference(self, mode='cls'): + pred = self.model.predict(self.test_set['data']) + if mode == 'cls': + acc = accuracy_score(self.test_set['label'], pred) + print('准确率为:{}%'.format(acc * 100)) + elif mode == 'reg': + loss = mean_squared_error(self.test_set['label'], pred) + print('Loss: {}'.format(loss)) + + def load_dataset(self, path, test_size=0.2): + self.dataset_path = path + self.test_size = test_size + + def load_pd(self): + pass + + def load_list(self): + pass diff --git a/BaseML/KNN.py b/BaseML/KNN.py index 7d9df2a..db9d01f 100644 --- a/BaseML/KNN.py +++ b/BaseML/KNN.py @@ -5,10 +5,8 @@ class KNN: def __init__(self, - backbone='KNN', n_neighbors=10, ): - self.backbone = backbone # 获取外部运行py的绝对路径 self.cwd = os.path.dirname(os.getcwd()) # 获取当前文件的绝对路径 diff --git a/LR.py b/BaseML/LR.py similarity index 51% rename from LR.py rename to BaseML/LR.py index 28f1291..21d127c 100644 --- a/LR.py +++ b/BaseML/LR.py @@ -1,22 +1,19 @@ from turtle import back import pandas as pd import numpy as np -import os -import cv2 import os -from sklearn.metrics import accuracy_score ,mean_squared_error, r2_score +from sklearn.metrics import accuracy_score, mean_squared_error, r2_score from sklearn import linear_model class LR: - def __init__ (self, - backbone='LR' - ): - self.backbone = backbone #获取外部运行py的绝对路径 - self.cwd = os.path.dirname(os.getcwd()) #获取当前文件的绝对路径 - self.file_dirname = os.path.dirname(os.path.abspath(__file__)) + def __init__(self,): + self.cwd = os.path.dirname(os.getcwd()) #获取当前文件的绝对路径 + self.file_dirname = os.path.dirname(os.path.abspath(__file__)) self.model = linear_model.LinearRegression() + self.dataset_path = ' ' + self.test_size = ' ' - def train(self,seed=0,data_type='csv'): + def train(self, seed=0, data_type='csv'): np.random.seed(seed) if data_type == 'csv': dataset = pd.read_csv(self.dataset_path,sep=',',header=None).values @@ -26,27 +23,26 @@ def train(self,seed=0,data_type='csv'): dataset = self.load_list() np.random.shuffle(dataset) - data,label = dataset[:,:-1],dataset[:,-1] - train_index = int((1-self.test_size)*len(dataset)) - train_data,train_label = data[:train_index,],label[:train_index] + data, label = dataset[:,:-1],dataset[:,-1] + train_index = int((1-self.test_size)*len(dataset)) + train_data, train_label = data[:train_index,],label[:train_index] self.test_set = { - 'data':data[train_index:,], - 'label':label[train_index:] + 'data': data[train_index:,], + 'label': label[train_index:] } self.model.fit(train_data,train_label) - def inference(self,mode = 'cls'): + def inference(self, mode='cls'): pred = self.model.predict(self.test_set['data']) loss = mean_squared_error(self.test_set['label'],pred) print('Loss: {}'.format(loss)) - def load_dataset(self,path,test_size=0.2): self.dataset_path = path - self.test_size=test_size + self.test_size = test_size - def load_pd(): + def load_pd(self): pass - def load_list(): - pass \ No newline at end of file + def load_list(self): + pass diff --git a/BaseML/PCA.py b/BaseML/PCA.py index d227739..2d685ed 100644 --- a/BaseML/PCA.py +++ b/BaseML/PCA.py @@ -4,10 +4,8 @@ class PCA: def __init__(self, - backbone='KNNClassifier', n_components='mle', ): - self.backbone = backbone # 获取外部运行py的绝对路径 self.cwd = os.path.dirname(os.getcwd()) # 获取当前文件的绝对路径 diff --git a/BaseML/Perceptron.py b/BaseML/Perceptron.py index 5bf6b0a..88472bf 100644 --- a/BaseML/Perceptron.py +++ b/BaseML/Perceptron.py @@ -3,10 +3,8 @@ class Perceptron: - def __init__(self, - backbone='KNNClassifier', + def __init__(self ): - self.backbone = backbone # 获取外部运行py的绝对路径 self.cwd = os.path.dirname(os.getcwd()) # 获取当前文件的绝对路径 diff --git a/BaseML/SVM.py b/BaseML/SVM.py new file mode 100644 index 0000000..54adc7b --- /dev/null +++ b/BaseML/SVM.py @@ -0,0 +1,54 @@ +import pandas as pd +import numpy as np +import os +from sklearn.metrics import accuracy_score, mean_squared_error +from sklearn.svm import SVC + + +class SVM: + def __init__(self, + ): + self.cwd = os.path.dirname(os.getcwd()) # 获取当前文件的绝对路径 + self.file_dirname = os.path.dirname(os.path.abspath(__file__)) + self.model = SVC() + self.dataset_path = ' ' + self.test_size = ' ' + self.test_set = ' ' + + def train(self, seed=0, data_type='csv'): + np.random.seed(seed) + if data_type == 'csv': + dataset = pd.read_csv(self.dataset_path, sep=',', header=None).values + elif data_type == 'pandas': + dataset = self.load_pd() + elif data_type == 'list': + dataset = self.load_list() + np.random.shuffle(dataset) + + data, label = dataset[:, :-1], dataset[:, -1] + train_index = int((1 - self.test_size) * len(dataset)) + train_data, train_label = data[:train_index, ], label[:train_index] + self.test_set = { + 'data': data[train_index:, ], + 'label': label[train_index:] + } + self.model.fit(train_data, train_label) + + def inference(self, mode='cls'): + pred = self.model.predict(self.test_set['data']) + if mode == 'cls': + acc = accuracy_score(self.test_set['label'], pred) + print('准确率为:{}%'.format(acc * 100)) + elif mode == 'reg': + loss = mean_squared_error(self.test_set['label'], pred) + print('Loss: {}'.format(loss)) + + def load_dataset(self, path, test_size=0.2): + self.dataset_path = path + self.test_size = test_size + + def load_pd(self): + pass + + def load_list(self): + pass \ No newline at end of file diff --git a/Classifer.py b/Classifer.py deleted file mode 100644 index 5c66594..0000000 --- a/Classifer.py +++ /dev/null @@ -1,67 +0,0 @@ -import pandas as pd -import numpy as np -import os - -from sklearn.metrics import accuracy_score ,mean_squared_error, r2_score -from sklearn.svm import SVC -from sklearn import linear_model -from sklearn.naive_bayes import GaussianNB -from sklearn.ensemble import BaggingClassifier,AdaBoostClassifier -from sklearn.neighbors import KNeighborsClassifier - -class Classifer: - def __init__ (self, - backbone='RandomForest' - ): - self.backbone = backbone #获取外部运行py的绝对路径 - self.cwd = os.path.dirname(os.getcwd()) #获取当前文件的绝对路径 - self.file_dirname = os.path.dirname(os.path.abspath(__file__)) - if backbone == 'SVM': - self.model = SVC() - elif backbone == 'NB': - self.model = GaussianNB() - elif backbone == 'bagging': - self.model = BaggingClassifier(KNeighborsClassifier()) - elif backbone == 'boosting': - self.model = AdaBoostClassifier(n_estimators=100, random_state=0) - - def train(self,seed=0,data_type='csv'): - np.random.seed(seed) - if data_type == 'csv': - dataset = pd.read_csv(self.dataset_path,sep=',',header=None).values - elif data_type == 'pandas': - dataset = self.load_pd() - elif data_type == 'list': - dataset = self.load_list() - np.random.shuffle(dataset) - - data,label = dataset[:,:-1],dataset[:,-1] - train_index = int((1-self.test_size)*len(dataset)) - train_data,train_label = data[:train_index,],label[:train_index] - self.test_set = { - 'data':data[train_index:,], - 'label':label[train_index:] - } - self.model.fit(train_data,train_label) - - def inference(self,mode = 'cls'): - pred = self.model.predict(self.test_set['data']) - if mode == 'cls': - acc = accuracy_score(self.test_set['label'],pred) - print('准确率为:{}%'.format(acc*100)) - elif mode == 'reg': - loss = mean_squared_error(self.test_set['label'],pred) - print('Loss: {}'.format(loss)) - - - - - def load_dataset(self,path,test_size=0.2): - self.dataset_path = path - self.test_size=test_size - - def load_pd(): - pass - - def load_list(): - pass \ No newline at end of file diff --git a/__init__.py b/__init__.py index 40375e1..afc1369 100644 --- a/__init__.py +++ b/__init__.py @@ -2,12 +2,18 @@ from .BaseML import KNN from .BaseML import PCA from .BaseML import Perceptron +from .BaseML import AdaBoost +from .BaseML import GaussianNB +from .BaseML import SVM from .BaseNet import BaseNet -__all__ = [ + +__all__ = [ 'BaseNet', 'CART', - # 'KNNClassifier', + 'SVM', + 'AdaBoost', + 'GaussianNB', 'KNN', 'PCA', - 'Perceptron'] \ No newline at end of file + 'Perceptron'] diff --git a/demo_cls.py b/demo_cls.py deleted file mode 100644 index 31b319b..0000000 --- a/demo_cls.py +++ /dev/null @@ -1,8 +0,0 @@ -# from base import * -from Classifer import * -# from MMEdu import MMMlearing -dataset_path = "./test.csv" -model = Classifer(backbone ='SVM') -model.load_dataset(dataset_path) -model.train() -acc = model.inference() diff --git a/demo_img.py b/demo_img.py deleted file mode 100644 index d0faa58..0000000 --- a/demo_img.py +++ /dev/null @@ -1,9 +0,0 @@ -from mmImage import * -image_path = "6.png" -# model = MMImage(method = 'contour') -# model = MMImage(method = 'canny') -# model = MMImage(method = 'blur') #para = ['mean',3] -model = MMImage(method = 'corner') # para = 0.01) -model.load_image(image_path) -image_out = model.process(save_path = './save.png',para = 0.1) -# diff --git a/demo_reg.py b/demo_reg.py deleted file mode 100644 index 9cea765..0000000 --- a/demo_reg.py +++ /dev/null @@ -1,8 +0,0 @@ -# from base import * -from LR import * -# from MMEdu import MMMlearing -dataset_path = "./test.csv" -model = LR() -model.load_dataset(dataset_path) -model.train() -acc = model.inference() diff --git a/mmImage.py b/tools/mmImage.py similarity index 82% rename from mmImage.py rename to tools/mmImage.py index ae7a945..4e4d4b6 100644 --- a/mmImage.py +++ b/tools/mmImage.py @@ -4,52 +4,52 @@ import numpy as np class MMImage: - def __init__ (self,method='blur'): + def __init__(self, method='blur'): self.method = method self.img = None - def process(self,save_path = '',para = []): + def process(self, save_path='', para=[]): if save_path != '': save_path = save_path img_out = getattr(self, "_"+self.method)(para) - cv2.imwrite(save_path,img_out) + cv2.imwrite(save_path, img_out) - def load_image(self,image_path): - img = cv2.imread(image_path,cv2.IMREAD_UNCHANGED) + def load_image(self, image_path): + img = cv2.imread(image_path, cv2.IMREAD_UNCHANGED) self.img = img - def _blur(self,para = []): - type_method,kernal = para + def _blur(self, para=[]): + type_method, kernal = para img_out = cv2.blur(self.img, (kernal, kernal)) #sum(square)/25 return img_out def _contour(self,para=[]): - gray_img=cv2.cvtColor(self.img,cv2.COLOR_BGR2GRAY) - dep,img_bin=cv2.threshold(gray_img,128,255,cv2.THRESH_BINARY) - image_,contours=cv2.findContours(img_bin, mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_SIMPLE) + gray_img = cv2.cvtColor(self.img, cv2.COLOR_BGR2GRAY) + dep, img_bin = cv2.threshold(gray_img, 128, 255, cv2.THRESH_BINARY) + image_, contours = cv2.findContours(img_bin, mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_SIMPLE) to_write = self.img.copy() # cv2.drawContours(img,contours,0,(0,0,255),3) ret = cv2.drawContours(to_write,image_,-1,(0,0,255),3) return ret - def _hist(self,para=[]): - gray_img=cv2.cvtColor(self.img,cv2.COLOR_BGR2GRAY) - img = gray_img.astype(np.uint8) + def _hist(self, para=[]): + gray_img = cv2.cvtColor(self.img, cv2.COLOR_BGR2GRAY) + img = gray_img.astype(np.uint8) return cv2.equalizeHist(img) - def _watershed_contour(self,para=[]): + def _watershed_contour(self, para=[]): gray = cv2.cvtColor(self.img, cv2.COLOR_BGR2GRAY) # 转为灰度图像 # 查找和绘制图像轮廓 - Gauss = cv2.GaussianBlur(gray, (5,5), sigmaX=4.0) - grad = cv2.Canny(Gauss,50,150) + Gauss = cv2.GaussianBlur(gray, (5, 5), sigmaX=4.0) + grad = cv2.Canny(Gauss, 50, 150) grad, contours = cv2.findContours(grad, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # 查找图像轮廓 markers = np.zeros(self.img.shape[:2], np.int32) # 生成标识图像,所有轮廓区域标识为索引号 (index) for index in range(len(contours)): # 用轮廓的索引号 index 标识轮廓区域 markers = cv2.drawContours(markers, grad, index, (index, index, index), 1, 8, contours) ContoursMarkers = np.zeros(self.img.shape[:2], np.uint8) - ContoursMarkers[markers>0] = 255 + ContoursMarkers[markers > 0] = 255 # 分水岭算法 markers = cv2.watershed(self.img, markers) # 所有轮廓的像素点被标注为 -1 @@ -58,7 +58,7 @@ def _watershed_contour(self,para=[]): bgrMarkers = np.zeros_like(self.img) for i in range(len(contours)): colorKind = [np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)] - bgrMarkers[markers==i] = colorKind + bgrMarkers[markers == i] = colorKind bgrFilled = cv2.addWeighted(self.img, 0.67, bgrMarkers, 0.33, 0) return cv2.cvtColor(bgrFilled, cv2.COLOR_BGR2RGB) @@ -88,7 +88,7 @@ def _watershed(self,para): for i in range(kinds): if (i!=maxKind): colorKind = [np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)] - markersBGR[markers==i] = colorKind + markersBGR[markers == i] = colorKind # 去除连通域中的背景区域部分 unknown = cv2.subtract(sure_bg, sure_fg) # 待定区域,前景与背景的重合区域 markers[unknown == 255] = 0 # 去掉属于背景的区域 (置零) @@ -102,9 +102,8 @@ def _watershed(self,para): # print(self.img.shape, markers.shape, markers.max(), markers.min(), ret) return cv2.cvtColor(markersBGR, cv2.COLOR_BGR2RGB) - def _canny(self,para=[100,200]): - return cv2.Canny(self.img,para[0],para[1]) + return cv2.Canny(self.img, para[0], para[1]) def _corner(self,para = 0.01): gray_img = cv2.cvtColor(self.img, cv2.COLOR_BGR2GRAY)