diff --git a/.DS_Store b/.DS_Store
index 6ec67e0..603d9d9 100644
Binary files a/.DS_Store and b/.DS_Store differ
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/OpenBaseLab-Edu.iml b/.idea/OpenBaseLab-Edu.iml
new file mode 100644
index 0000000..08fc73f
--- /dev/null
+++ b/.idea/OpenBaseLab-Edu.iml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..45d5172
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..29a39d1
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/BaseML/AdaBoost.py b/BaseML/AdaBoost.py
new file mode 100644
index 0000000..3e30856
--- /dev/null
+++ b/BaseML/AdaBoost.py
@@ -0,0 +1,55 @@
+import pandas as pd
+import numpy as np
+import os
+from sklearn.metrics import accuracy_score, mean_squared_error
+from sklearn.ensemble import AdaBoostClassifier
+
+
+class AdaBoost:
+ def __init__(self
+ ):
+ self.cwd = os.path.dirname(os.getcwd()) # 获取当前文件的绝对路径
+ self.file_dirname = os.path.dirname(os.path.abspath(__file__))
+ self.model = AdaBoostClassifier(n_estimators=100, random_state=0)
+ self.dataset_path = ' '
+ self.test_size = ' '
+ self.test_set = ' '
+
+ def train(self, seed=0, data_type='csv'):
+ np.random.seed(seed)
+ if data_type == 'csv':
+ dataset = pd.read_csv(self.dataset_path, sep=',', header=None).values
+ elif data_type == 'pandas':
+ dataset = self.load_pd()
+ elif data_type == 'list':
+ dataset = self.load_list()
+ np.random.shuffle(dataset)
+
+ data, label = dataset[:, :-1], dataset[:, -1]
+ train_index = int((1 - self.test_size) * len(dataset))
+ train_data, train_label = data[:train_index, ], label[:train_index]
+ self.test_set = {
+ 'data': data[train_index:, ],
+ 'label': label[train_index:]
+ }
+ self.model.fit(train_data, train_label)
+
+ def inference(self, mode='cls'):
+ pred = self.model.predict(self.test_set['data'])
+ if mode == 'cls':
+ acc = accuracy_score(self.test_set['label'], pred)
+ print('准确率为:{}%'.format(acc * 100))
+ elif mode == 'reg':
+ loss = mean_squared_error(self.test_set['label'], pred)
+ print('Loss: {}'.format(loss))
+
+ def load_dataset(self, path, test_size=0.2):
+ self.dataset_path = path
+ self.test_size = test_size
+
+ def load_pd(self):
+ pass
+
+ def load_list(self):
+ pass
+
diff --git a/BaseML/CART.py b/BaseML/CART.py
index c4db114..e480a74 100644
--- a/BaseML/CART.py
+++ b/BaseML/CART.py
@@ -4,9 +4,7 @@
class CART:
def __init__(self,
- backbone='KNNClassifier',
):
- self.backbone = backbone
# 获取外部运行py的绝对路径
self.cwd = os.path.dirname(os.getcwd())
# 获取当前文件的绝对路径
diff --git a/BaseML/GaussianNB.py b/BaseML/GaussianNB.py
new file mode 100644
index 0000000..0b281ea
--- /dev/null
+++ b/BaseML/GaussianNB.py
@@ -0,0 +1,55 @@
+import pandas as pd
+import numpy as np
+import os
+
+from sklearn.metrics import accuracy_score, mean_squared_error
+from sklearn.naive_bayes import GaussianNB
+
+
+class GaussianNB:
+ def __init__(self
+ ):
+ self.cwd = os.path.dirname(os.getcwd()) # 获取当前文件的绝对路径
+ self.file_dirname = os.path.dirname(os.path.abspath(__file__))
+ self.model = GaussianNB()
+ self.dataset_path = ' '
+ self.test_size = ' '
+ self.test_set = ' '
+
+ def train(self, seed=0, data_type='csv'):
+ np.random.seed(seed)
+ if data_type == 'csv':
+ dataset = pd.read_csv(self.dataset_path, sep=',', header=None).values
+ elif data_type == 'pandas':
+ dataset = self.load_pd()
+ elif data_type == 'list':
+ dataset = self.load_list()
+ np.random.shuffle(dataset)
+
+ data, label = dataset[:, :-1], dataset[:, -1]
+ train_index = int((1 - self.test_size) * len(dataset))
+ train_data, train_label = data[:train_index, ], label[:train_index]
+ self.test_set = {
+ 'data': data[train_index:, ],
+ 'label': label[train_index:]
+ }
+ self.model.fit(train_data, train_label)
+
+ def inference(self, mode='cls'):
+ pred = self.model.predict(self.test_set['data'])
+ if mode == 'cls':
+ acc = accuracy_score(self.test_set['label'], pred)
+ print('准确率为:{}%'.format(acc * 100))
+ elif mode == 'reg':
+ loss = mean_squared_error(self.test_set['label'], pred)
+ print('Loss: {}'.format(loss))
+
+ def load_dataset(self, path, test_size=0.2):
+ self.dataset_path = path
+ self.test_size = test_size
+
+ def load_pd(self):
+ pass
+
+ def load_list(self):
+ pass
diff --git a/BaseML/KNN.py b/BaseML/KNN.py
index 7d9df2a..db9d01f 100644
--- a/BaseML/KNN.py
+++ b/BaseML/KNN.py
@@ -5,10 +5,8 @@
class KNN:
def __init__(self,
- backbone='KNN',
n_neighbors=10,
):
- self.backbone = backbone
# 获取外部运行py的绝对路径
self.cwd = os.path.dirname(os.getcwd())
# 获取当前文件的绝对路径
diff --git a/LR.py b/BaseML/LR.py
similarity index 51%
rename from LR.py
rename to BaseML/LR.py
index 28f1291..21d127c 100644
--- a/LR.py
+++ b/BaseML/LR.py
@@ -1,22 +1,19 @@
from turtle import back
import pandas as pd
import numpy as np
-import os
-import cv2
import os
-from sklearn.metrics import accuracy_score ,mean_squared_error, r2_score
+from sklearn.metrics import accuracy_score, mean_squared_error, r2_score
from sklearn import linear_model
class LR:
- def __init__ (self,
- backbone='LR'
- ):
- self.backbone = backbone #获取外部运行py的绝对路径
- self.cwd = os.path.dirname(os.getcwd()) #获取当前文件的绝对路径
- self.file_dirname = os.path.dirname(os.path.abspath(__file__))
+ def __init__(self,):
+ self.cwd = os.path.dirname(os.getcwd()) #获取当前文件的绝对路径
+ self.file_dirname = os.path.dirname(os.path.abspath(__file__))
self.model = linear_model.LinearRegression()
+ self.dataset_path = ' '
+ self.test_size = ' '
- def train(self,seed=0,data_type='csv'):
+ def train(self, seed=0, data_type='csv'):
np.random.seed(seed)
if data_type == 'csv':
dataset = pd.read_csv(self.dataset_path,sep=',',header=None).values
@@ -26,27 +23,26 @@ def train(self,seed=0,data_type='csv'):
dataset = self.load_list()
np.random.shuffle(dataset)
- data,label = dataset[:,:-1],dataset[:,-1]
- train_index = int((1-self.test_size)*len(dataset))
- train_data,train_label = data[:train_index,],label[:train_index]
+ data, label = dataset[:,:-1],dataset[:,-1]
+ train_index = int((1-self.test_size)*len(dataset))
+ train_data, train_label = data[:train_index,],label[:train_index]
self.test_set = {
- 'data':data[train_index:,],
- 'label':label[train_index:]
+ 'data': data[train_index:,],
+ 'label': label[train_index:]
}
self.model.fit(train_data,train_label)
- def inference(self,mode = 'cls'):
+ def inference(self, mode='cls'):
pred = self.model.predict(self.test_set['data'])
loss = mean_squared_error(self.test_set['label'],pred)
print('Loss: {}'.format(loss))
-
def load_dataset(self,path,test_size=0.2):
self.dataset_path = path
- self.test_size=test_size
+ self.test_size = test_size
- def load_pd():
+ def load_pd(self):
pass
- def load_list():
- pass
\ No newline at end of file
+ def load_list(self):
+ pass
diff --git a/BaseML/PCA.py b/BaseML/PCA.py
index d227739..2d685ed 100644
--- a/BaseML/PCA.py
+++ b/BaseML/PCA.py
@@ -4,10 +4,8 @@
class PCA:
def __init__(self,
- backbone='KNNClassifier',
n_components='mle',
):
- self.backbone = backbone
# 获取外部运行py的绝对路径
self.cwd = os.path.dirname(os.getcwd())
# 获取当前文件的绝对路径
diff --git a/BaseML/Perceptron.py b/BaseML/Perceptron.py
index 5bf6b0a..88472bf 100644
--- a/BaseML/Perceptron.py
+++ b/BaseML/Perceptron.py
@@ -3,10 +3,8 @@
class Perceptron:
- def __init__(self,
- backbone='KNNClassifier',
+ def __init__(self
):
- self.backbone = backbone
# 获取外部运行py的绝对路径
self.cwd = os.path.dirname(os.getcwd())
# 获取当前文件的绝对路径
diff --git a/BaseML/SVM.py b/BaseML/SVM.py
new file mode 100644
index 0000000..54adc7b
--- /dev/null
+++ b/BaseML/SVM.py
@@ -0,0 +1,54 @@
+import pandas as pd
+import numpy as np
+import os
+from sklearn.metrics import accuracy_score, mean_squared_error
+from sklearn.svm import SVC
+
+
+class SVM:
+ def __init__(self,
+ ):
+ self.cwd = os.path.dirname(os.getcwd()) # 获取当前文件的绝对路径
+ self.file_dirname = os.path.dirname(os.path.abspath(__file__))
+ self.model = SVC()
+ self.dataset_path = ' '
+ self.test_size = ' '
+ self.test_set = ' '
+
+ def train(self, seed=0, data_type='csv'):
+ np.random.seed(seed)
+ if data_type == 'csv':
+ dataset = pd.read_csv(self.dataset_path, sep=',', header=None).values
+ elif data_type == 'pandas':
+ dataset = self.load_pd()
+ elif data_type == 'list':
+ dataset = self.load_list()
+ np.random.shuffle(dataset)
+
+ data, label = dataset[:, :-1], dataset[:, -1]
+ train_index = int((1 - self.test_size) * len(dataset))
+ train_data, train_label = data[:train_index, ], label[:train_index]
+ self.test_set = {
+ 'data': data[train_index:, ],
+ 'label': label[train_index:]
+ }
+ self.model.fit(train_data, train_label)
+
+ def inference(self, mode='cls'):
+ pred = self.model.predict(self.test_set['data'])
+ if mode == 'cls':
+ acc = accuracy_score(self.test_set['label'], pred)
+ print('准确率为:{}%'.format(acc * 100))
+ elif mode == 'reg':
+ loss = mean_squared_error(self.test_set['label'], pred)
+ print('Loss: {}'.format(loss))
+
+ def load_dataset(self, path, test_size=0.2):
+ self.dataset_path = path
+ self.test_size = test_size
+
+ def load_pd(self):
+ pass
+
+ def load_list(self):
+ pass
\ No newline at end of file
diff --git a/Classifer.py b/Classifer.py
deleted file mode 100644
index 5c66594..0000000
--- a/Classifer.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import pandas as pd
-import numpy as np
-import os
-
-from sklearn.metrics import accuracy_score ,mean_squared_error, r2_score
-from sklearn.svm import SVC
-from sklearn import linear_model
-from sklearn.naive_bayes import GaussianNB
-from sklearn.ensemble import BaggingClassifier,AdaBoostClassifier
-from sklearn.neighbors import KNeighborsClassifier
-
-class Classifer:
- def __init__ (self,
- backbone='RandomForest'
- ):
- self.backbone = backbone #获取外部运行py的绝对路径
- self.cwd = os.path.dirname(os.getcwd()) #获取当前文件的绝对路径
- self.file_dirname = os.path.dirname(os.path.abspath(__file__))
- if backbone == 'SVM':
- self.model = SVC()
- elif backbone == 'NB':
- self.model = GaussianNB()
- elif backbone == 'bagging':
- self.model = BaggingClassifier(KNeighborsClassifier())
- elif backbone == 'boosting':
- self.model = AdaBoostClassifier(n_estimators=100, random_state=0)
-
- def train(self,seed=0,data_type='csv'):
- np.random.seed(seed)
- if data_type == 'csv':
- dataset = pd.read_csv(self.dataset_path,sep=',',header=None).values
- elif data_type == 'pandas':
- dataset = self.load_pd()
- elif data_type == 'list':
- dataset = self.load_list()
- np.random.shuffle(dataset)
-
- data,label = dataset[:,:-1],dataset[:,-1]
- train_index = int((1-self.test_size)*len(dataset))
- train_data,train_label = data[:train_index,],label[:train_index]
- self.test_set = {
- 'data':data[train_index:,],
- 'label':label[train_index:]
- }
- self.model.fit(train_data,train_label)
-
- def inference(self,mode = 'cls'):
- pred = self.model.predict(self.test_set['data'])
- if mode == 'cls':
- acc = accuracy_score(self.test_set['label'],pred)
- print('准确率为:{}%'.format(acc*100))
- elif mode == 'reg':
- loss = mean_squared_error(self.test_set['label'],pred)
- print('Loss: {}'.format(loss))
-
-
-
-
- def load_dataset(self,path,test_size=0.2):
- self.dataset_path = path
- self.test_size=test_size
-
- def load_pd():
- pass
-
- def load_list():
- pass
\ No newline at end of file
diff --git a/__init__.py b/__init__.py
index 40375e1..afc1369 100644
--- a/__init__.py
+++ b/__init__.py
@@ -2,12 +2,18 @@
from .BaseML import KNN
from .BaseML import PCA
from .BaseML import Perceptron
+from .BaseML import AdaBoost
+from .BaseML import GaussianNB
+from .BaseML import SVM
from .BaseNet import BaseNet
-__all__ = [
+
+__all__ = [
'BaseNet',
'CART',
- # 'KNNClassifier',
+ 'SVM',
+ 'AdaBoost',
+ 'GaussianNB',
'KNN',
'PCA',
- 'Perceptron']
\ No newline at end of file
+ 'Perceptron']
diff --git a/demo_cls.py b/demo_cls.py
deleted file mode 100644
index 31b319b..0000000
--- a/demo_cls.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# from base import *
-from Classifer import *
-# from MMEdu import MMMlearing
-dataset_path = "./test.csv"
-model = Classifer(backbone ='SVM')
-model.load_dataset(dataset_path)
-model.train()
-acc = model.inference()
diff --git a/demo_img.py b/demo_img.py
deleted file mode 100644
index d0faa58..0000000
--- a/demo_img.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from mmImage import *
-image_path = "6.png"
-# model = MMImage(method = 'contour')
-# model = MMImage(method = 'canny')
-# model = MMImage(method = 'blur') #para = ['mean',3]
-model = MMImage(method = 'corner') # para = 0.01)
-model.load_image(image_path)
-image_out = model.process(save_path = './save.png',para = 0.1)
-#
diff --git a/demo_reg.py b/demo_reg.py
deleted file mode 100644
index 9cea765..0000000
--- a/demo_reg.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# from base import *
-from LR import *
-# from MMEdu import MMMlearing
-dataset_path = "./test.csv"
-model = LR()
-model.load_dataset(dataset_path)
-model.train()
-acc = model.inference()
diff --git a/mmImage.py b/tools/mmImage.py
similarity index 82%
rename from mmImage.py
rename to tools/mmImage.py
index ae7a945..4e4d4b6 100644
--- a/mmImage.py
+++ b/tools/mmImage.py
@@ -4,52 +4,52 @@
import numpy as np
class MMImage:
- def __init__ (self,method='blur'):
+ def __init__(self, method='blur'):
self.method = method
self.img = None
- def process(self,save_path = '',para = []):
+ def process(self, save_path='', para=[]):
if save_path != '':
save_path = save_path
img_out = getattr(self, "_"+self.method)(para)
- cv2.imwrite(save_path,img_out)
+ cv2.imwrite(save_path, img_out)
- def load_image(self,image_path):
- img = cv2.imread(image_path,cv2.IMREAD_UNCHANGED)
+ def load_image(self, image_path):
+ img = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
self.img = img
- def _blur(self,para = []):
- type_method,kernal = para
+ def _blur(self, para=[]):
+ type_method, kernal = para
img_out = cv2.blur(self.img, (kernal, kernal)) #sum(square)/25
return img_out
def _contour(self,para=[]):
- gray_img=cv2.cvtColor(self.img,cv2.COLOR_BGR2GRAY)
- dep,img_bin=cv2.threshold(gray_img,128,255,cv2.THRESH_BINARY)
- image_,contours=cv2.findContours(img_bin, mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_SIMPLE)
+ gray_img = cv2.cvtColor(self.img, cv2.COLOR_BGR2GRAY)
+ dep, img_bin = cv2.threshold(gray_img, 128, 255, cv2.THRESH_BINARY)
+ image_, contours = cv2.findContours(img_bin, mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_SIMPLE)
to_write = self.img.copy()
# cv2.drawContours(img,contours,0,(0,0,255),3)
ret = cv2.drawContours(to_write,image_,-1,(0,0,255),3)
return ret
- def _hist(self,para=[]):
- gray_img=cv2.cvtColor(self.img,cv2.COLOR_BGR2GRAY)
- img = gray_img.astype(np.uint8)
+ def _hist(self, para=[]):
+ gray_img = cv2.cvtColor(self.img, cv2.COLOR_BGR2GRAY)
+ img = gray_img.astype(np.uint8)
return cv2.equalizeHist(img)
- def _watershed_contour(self,para=[]):
+ def _watershed_contour(self, para=[]):
gray = cv2.cvtColor(self.img, cv2.COLOR_BGR2GRAY) # 转为灰度图像
# 查找和绘制图像轮廓
- Gauss = cv2.GaussianBlur(gray, (5,5), sigmaX=4.0)
- grad = cv2.Canny(Gauss,50,150)
+ Gauss = cv2.GaussianBlur(gray, (5, 5), sigmaX=4.0)
+ grad = cv2.Canny(Gauss, 50, 150)
grad, contours = cv2.findContours(grad, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # 查找图像轮廓
markers = np.zeros(self.img.shape[:2], np.int32) # 生成标识图像,所有轮廓区域标识为索引号 (index)
for index in range(len(contours)): # 用轮廓的索引号 index 标识轮廓区域
markers = cv2.drawContours(markers, grad, index, (index, index, index), 1, 8, contours)
ContoursMarkers = np.zeros(self.img.shape[:2], np.uint8)
- ContoursMarkers[markers>0] = 255
+ ContoursMarkers[markers > 0] = 255
# 分水岭算法
markers = cv2.watershed(self.img, markers) # 所有轮廓的像素点被标注为 -1
@@ -58,7 +58,7 @@ def _watershed_contour(self,para=[]):
bgrMarkers = np.zeros_like(self.img)
for i in range(len(contours)):
colorKind = [np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)]
- bgrMarkers[markers==i] = colorKind
+ bgrMarkers[markers == i] = colorKind
bgrFilled = cv2.addWeighted(self.img, 0.67, bgrMarkers, 0.33, 0)
return cv2.cvtColor(bgrFilled, cv2.COLOR_BGR2RGB)
@@ -88,7 +88,7 @@ def _watershed(self,para):
for i in range(kinds):
if (i!=maxKind):
colorKind = [np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)]
- markersBGR[markers==i] = colorKind
+ markersBGR[markers == i] = colorKind
# 去除连通域中的背景区域部分
unknown = cv2.subtract(sure_bg, sure_fg) # 待定区域,前景与背景的重合区域
markers[unknown == 255] = 0 # 去掉属于背景的区域 (置零)
@@ -102,9 +102,8 @@ def _watershed(self,para):
# print(self.img.shape, markers.shape, markers.max(), markers.min(), ret)
return cv2.cvtColor(markersBGR, cv2.COLOR_BGR2RGB)
-
def _canny(self,para=[100,200]):
- return cv2.Canny(self.img,para[0],para[1])
+ return cv2.Canny(self.img, para[0], para[1])
def _corner(self,para = 0.01):
gray_img = cv2.cvtColor(self.img, cv2.COLOR_BGR2GRAY)