maxGrigorenko · maxGrigorenko · May 31, 2025 · May 30, 2025 · May 30, 2025 · May 30, 2025
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,15 @@
+[flake8]
+max-line-length = 120
+extend-ignore =
+    E203,
+    W503
+select =
+    W,C
+    B,B9
+exclude =
+    .git,
+    __pycache__,
+    .venv,
+    build,
+    dist,
+    migrations
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -3,6 +3,7 @@ name: CI
 on:
   push:
     branches:
+      - '*'
       - main
   pull_request:
     branches:

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -0,0 +1,21 @@
+name: Lint and Format
+
+on: [push, pull_request]
+
+jobs:
+  check:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: pip install black flake8 flake8-bugbear
+
+      - name: Run Black
+        run: black --check .
+
+      - name: Run flake8
+        run: flake8 .
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,7 @@
+*.pyc
+.vscode/
+.idea/
+.env
+.pytest_cache/
+venv
+*.nix
diff --git a/README.md b/README.md
@@ -1,13 +1,24 @@
-## 📋 Общее описание
+[![CI](https://github.com/maxGrigorenko/DistributionClassifier/actions/workflows/ci.yml/badge.svg)](https://github.com/maxGrigorenko/DistributionClassifier/actions/workflows/ci.yml)
+
+## Общее описание
 Проект посвящен анализу и сравнению статистических распределений, а также созданию классификатора распределений:
 - **Нормальное vs Лапласа** в папке `/src/normal_laplace`
 - **Экспоненциальное vs Парето** в папке `/src/exp_pareto`
 
-Структура проекта:  
-.   
-├── src/  
-│ ├── normal_laplace/ Анализ Normal/Laplace   
-│ └── exp_pareto/ # Анализ Exponential/Pareto   
-├── tests/  
-├── report/ # Итоговый отчет в формате PDF   
-└── requirements.txt # Список используемых библиотек
+## Описание инфраструктуры
+
+- Язык программирования: `Python 3.11`
+- Форматтер: `black`
+- Модульное тестирование: `pytest`
+
+## Отчет по экспериментам
+
+Файл [report/report.pdf](https://github.com/maxGrigorenko/DistributionClassifier/blob/main/report/report.pdf)
+
+## Структура репозитория
+
+- `/report` -- папка с технической документацией, отчетом и исходниками отчета
+- `/src` -- папка с исходным кодом функций и папками с экспериментами конкретных распределений
+- `/src/exp_pareto` -- Exp vs Pareto
+- `/src/normal_laplace` -- Normal vs Laplace
+- `/tests` -- папка с модульными тестами
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,12 @@
+[tool.black]
+target-version = ["py310"] # Минимальная версия Python
+include = '\.pyi?$'        # Проверять только .py/.pyi файлы
+exclude = '''
+/(
+    \.git
+    | \.venv
+    | __pycache__
+    | build
+    | dist
+)/
+'''
diff --git a/src/exp_pareto/characteristics_applied.py → src/common_tools/characterisctics_applied.py b/src/exp_pareto/characteristics_applied.py → src/common_tools/characterisctics_applied.py
@@ -3,6 +3,8 @@
 import networkx as nx
 from networkx.algorithms.dominating import dominating_set
 
+from graphs import *
+
 
 def get_max_degree(graph):
     return max(dict(graph.degree()).values())
@@ -12,6 +14,10 @@ def get_min_degree(graph):
     return min(dict(graph.degree()).values())
 
 
+def get_mean_degree(graph):
+    return (2 * graph.number_of_edges()) / graph.number_of_nodes()
+
+
 def get_components(graph):
     return nx.number_connected_components(graph)
 
@@ -27,13 +33,7 @@ def get_number_of_triangles(graph):
 
 def get_chromatic(graph):
     coloring = nx.coloring.greedy_color(graph, strategy="largest_first")
-    ans = 0
-    # try:
-    #     ans = max(coloring.values()) + 1
-    # except ValueError:
-    #     graph.draw()
-    ans = max(coloring.values()) + 1
-    return ans
+    return max(coloring.values()) + 1
 
 
 def get_max_independent_set_size(graph):
@@ -51,6 +51,37 @@ def get_minimum_dominating_set_size(graph, n_trials=5):
     return res
 
 
+def get_minimum_dominating_set_size_for_dist(graph: Distance_Graph):
+    d = graph.d_distance
+
+    sorted_numbers = list(graph.get_numbers())
+    sorted_numbers.sort()
+
+    i = 0
+    n = graph.n_vertexes
+
+    count = 0
+    while i < n:
+        current_value = sorted_numbers[i]
+        max_reach = current_value + d
+
+        # Находим последнюю вершину в интервале [current_value, max_reach)
+        j = i
+        while j < n and sorted_numbers[j] < max_reach:
+            j += 1
+
+        # Выбираем последнюю вершину интервала
+        selected = sorted_numbers[j - 1]
+        count += 1
+
+        # Пропускаем все вершины, покрытые выбранной
+        i = j
+        while i < n and sorted_numbers[i] < selected + d:
+            i += 1
+
+    return count
+
+
 @dataclass
 class CharacteristicsSingle:
     max_degree: int

diff --git a/src/exp_pareto/graphs.py → src/common_tools/graphs.py b/src/exp_pareto/graphs.py → src/common_tools/graphs.py
@@ -8,11 +8,13 @@
 class KNN_Graph(nx.Graph):
     n_vertexes: int
     k_neighbours: int
+    numbers: np.ndarray
 
     def __init__(self, incoming_graph_data=None, **attr):
         super().__init__(incoming_graph_data, **attr)
         self.n_vertexes = attr["n"]
         self.k_neighbours = attr["k_neighbours"]
+        self.numbers = None
 
     def build_from_numbers(self, numbers: np.ndarray):
         assert numbers.size == self.n_vertexes
@@ -28,6 +30,8 @@ def build_from_numbers(self, numbers: np.ndarray):
             for j in indices[i][1:]:
                 self.add_edge(i, j)
 
+        self.numbers = numbers.copy()
+
     def draw(self):
         plt.figure(figsize=(8, 6))
         nx.draw(
@@ -47,11 +51,13 @@ def draw(self):
 class Distance_Graph(nx.Graph):
     n_vertexes: int
     d_distance: float
+    numbers: np.ndarray
 
     def __init__(self, incoming_graph_data=None, **attr):
         super().__init__(incoming_graph_data, **attr)
         self.n_vertexes = attr["n"]
         self.d_distance = attr["d_distance"]
+        self.numbers = None
 
     def build_from_numbers(self, numbers: np.ndarray):
         assert numbers.size == self.n_vertexes
@@ -66,6 +72,11 @@ def build_from_numbers(self, numbers: np.ndarray):
                 if distances[i][j] < self.d_distance:
                     self.add_edge(i, j)
 
+        self.numbers = numbers.copy()
+
+    def get_numbers(self):
+        return self.numbers.copy()
+
     def draw(self):
         pos = nx.spring_layout(self)
 

diff --git a/src/exp_pareto/characteristics_experimental.py b/src/exp_pareto/characteristics_experimental.py
@@ -1,8 +1,13 @@
+import os
+import sys
 from dataclasses import dataclass
 
 import networkx as nx
 import numpy as np
 
+sys.path.append(
+    os.path.abspath(os.path.join(os.path.dirname(__file__), "../common_tools"))
+)
 from graphs import Distance_Graph, KNN_Graph
 
 
@@ -81,13 +86,17 @@ def get_characteristics(lambda_param, alpha_param, n, k, d, distrib_type=None):
         distance_graph_pareto.build_from_numbers(numbers_pareto)
 
     return create_characteristics(
-        knn_graph_exp, distance_graph_exp, knn_graph_pareto, distance_graph_pareto
+        knn_graph_exp,
+        distance_graph_exp,
+        knn_graph_pareto,
+        distance_graph_pareto,
+        distrib_type=distrib_type,
     )
 
 
 def get_average_characteristics(lambda_param, alpha_param, n, k, d, distrib_type=None):
     characteristics_list = []
-    for trial in range(5):
+    for _trial in range(5):
         characteristics = get_characteristics(
             lambda_param, alpha_param, n, k, d, distrib_type
         )

diff --git a/src/exp_pareto/classifier.py b/src/exp_pareto/classifier.py
@@ -1,14 +1,21 @@
+import os
+import sys
+
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 from scipy.spatial import ConvexHull, Delaunay
 from tqdm import tqdm
 
-from characteristics_applied import *
-from graphs import Distance_Graph
 from metrics import *
 from visualisations import *
 
+sys.path.append(
+    os.path.abspath(os.path.join(os.path.dirname(__file__), "../common_tools"))
+)
+from characterisctics_applied import *
+from graphs import Distance_Graph
+
 
 class ConvexHullWrapper:
     def __init__(self, points_df):
@@ -125,7 +132,7 @@ def fit(self, exp_points_for_test, pareto_points_for_test, verbose=False):
 
         I_errors = []
         powers = []
-        for i in tqdm(range(self.A.shape[0])):
+        for _ in tqdm(range(self.A.shape[0])):
             points_powers = {}
 
             for exp_point_to_remove in self.A.values:
@@ -175,7 +182,7 @@ def predict_item(self, point: np.array):
 
     def predict_items(self, points: pd.DataFrame):
         results = []
-        for index, point in points.iterrows():
+        for _, point in points.iterrows():
             result = self.predict_item(point.values)
             results.append(result)
         return np.array(results)

diff --git a/src/exp_pareto/experiments_first_part_1.ipynb b/src/exp_pareto/experiments_first_part_1.ipynb
@@ -35,9 +35,13 @@
         "from dataclasses import dataclass\n",
         "from tqdm import tqdm\n",
         "from itertools import product\n",
+        "from characteristics_experimental import *\n",
+        "import sys\n",
+        "import os\n",
         "\n",
-        "from graphs import KNN_Graph, Distance_Graph\n",
-        "from characteristics_experimental import *"
+        "current_dir = os.getcwd()\n",
+        "sys.path.append(os.path.abspath(os.path.join(current_dir, '../common_tools')))\n",
+        "from graphs import KNN_Graph, Distance_Graph"
       ]
     },
     {
@@ -91,26 +95,28 @@
       ],
       "source": [
         "for lambda_param in tqdm(lambdas):\n",
-        "    average_characteristics = get_average_characteristics(lambda_param, 1, n, k, d)\n",
+        "    average_characteristics_knn = get_average_characteristics(lambda_param, 1, n, k, d, \"knn\")\n",
+        "    average_characteristics_dist = get_average_characteristics(lambda_param, 1, n, k, d, \"dist\")\n",
         "\n",
-        "    results.loc[len(results)] = [\"exp\", lambda_param, \"knn\", \"components\", average_characteristics.knn_exp_components]\n",
+        "    results.loc[len(results)] = [\"exp\", lambda_param, \"knn\", \"components\", average_characteristics_knn.knn_exp_components]\n",
         "\n",
-        "    results.loc[len(results)] = [\"exp\", lambda_param, \"knn\", \"chromatic\", average_characteristics.knn_exp_chromatic]\n",
+        "    results.loc[len(results)] = [\"exp\", lambda_param, \"knn\", \"chromatic\", average_characteristics_knn.knn_exp_chromatic]\n",
         "\n",
-        "    results.loc[len(results)] = [\"exp\", lambda_param, \"dist\", \"components\", average_characteristics.distance_exp_components]\n",
+        "    results.loc[len(results)] = [\"exp\", lambda_param, \"dist\", \"components\", average_characteristics_dist.distance_exp_components]\n",
         "\n",
-        "    results.loc[len(results)] = [\"exp\", lambda_param, \"dist\", \"chromatic\", average_characteristics.distance_exp_chromatic]\n",
+        "    results.loc[len(results)] = [\"exp\", lambda_param, \"dist\", \"chromatic\", average_characteristics_dist.distance_exp_chromatic]\n",
         "\n",
         "for alpha_param in tqdm(alphas):\n",
-        "    average_characteristics = get_average_characteristics(1, alpha_param, n, k, d)\n",
+        "    average_characteristics_knn = get_average_characteristics(1, alpha_param, n, k, d, \"knn\")\n",
+        "    average_characteristics_dist = get_average_characteristics(1, alpha_param, n, k, d, \"dist\")\n",
         "\n",
-        "    results.loc[len(results)] = [\"pareto\", alpha_param, \"knn\", \"components\", average_characteristics.knn_pareto_components]\n",
+        "    results.loc[len(results)] = [\"pareto\", alpha_param, \"knn\", \"components\", average_characteristics_knn.knn_pareto_components]\n",
         "\n",
-        "    results.loc[len(results)] = [\"pareto\", alpha_param, \"knn\", \"chromatic\", average_characteristics.knn_pareto_chromatic]\n",
+        "    results.loc[len(results)] = [\"pareto\", alpha_param, \"knn\", \"chromatic\", average_characteristics_knn.knn_pareto_chromatic]\n",
         "\n",
-        "    results.loc[len(results)] = [\"pareto\", alpha_param, \"dist\", \"components\", average_characteristics.distance_pareto_components]\n",
+        "    results.loc[len(results)] = [\"pareto\", alpha_param, \"dist\", \"components\", average_characteristics_dist.distance_pareto_components]\n",
         "\n",
-        "    results.loc[len(results)] = [\"pareto\", alpha_param, \"dist\", \"chromatic\", average_characteristics.distance_pareto_chromatic]\n"
+        "    results.loc[len(results)] = [\"pareto\", alpha_param, \"dist\", \"chromatic\", average_characteristics_dist.distance_pareto_chromatic]\n"
       ]
     },
     {
@@ -193,7 +199,8 @@
       "name": "python3"
     },
     "language_info": {
-      "name": "python"
+      "name": "python",
+      "version": "3.12.8"
     }
   },
   "nbformat": 4,
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,6 +3,7 @@ name: CI @@
     on:
       push:
         branches:
+          - '*'
           - main
       pull_request:
         branches:
@@ Expand Down @@