Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[flake8]
max-line-length = 120
extend-ignore =
E203,
W503
select =
W,C
B,B9
exclude =
.git,
__pycache__,
.venv,
build,
dist,
migrations
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ name: CI
on:
push:
branches:
- '*'
- main
pull_request:
branches:
Expand Down
21 changes: 21 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: Lint and Format

on: [push, pull_request]

jobs:
check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: "3.11"

- name: Install dependencies
run: pip install black flake8 flake8-bugbear

- name: Run Black
run: black --check .

- name: Run flake8
run: flake8 .
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
*.pyc
.vscode/
.idea/
.env
.pytest_cache/
venv
*.nix
29 changes: 20 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
## 📋 Общее описание
[![CI](https://github.com/maxGrigorenko/DistributionClassifier/actions/workflows/ci.yml/badge.svg)](https://github.com/maxGrigorenko/DistributionClassifier/actions/workflows/ci.yml)

## Общее описание
Проект посвящен анализу и сравнению статистических распределений, а также созданию классификатора распределений:
- **Нормальное vs Лапласа** в папке `/src/normal_laplace`
- **Экспоненциальное vs Парето** в папке `/src/exp_pareto`

Структура проекта:
.
├── src/
│ ├── normal_laplace/ Анализ Normal/Laplace
│ └── exp_pareto/ # Анализ Exponential/Pareto
├── tests/
├── report/ # Итоговый отчет в формате PDF
└── requirements.txt # Список используемых библиотек
## Описание инфраструктуры

- Язык программирования: `Python 3.11`
- Форматтер: `black`
- Модульное тестирование: `pytest`

## Отчет по экспериментам

Файл [report/report.pdf](https://github.com/maxGrigorenko/DistributionClassifier/blob/main/report/report.pdf)

## Структура репозитория

- `/report` -- папка с технической документацией, отчетом и исходниками отчета
- `/src` -- папка с исходным кодом функций и папками с экспериментами конкретных распределений
- `/src/exp_pareto` -- Exp vs Pareto
- `/src/normal_laplace` -- Normal vs Laplace
- `/tests` -- папка с модульными тестами
12 changes: 12 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[tool.black]
target-version = ["py310"] # Минимальная версия Python
include = '\.pyi?$' # Проверять только .py/.pyi файлы
exclude = '''
/(
\.git
| \.venv
| __pycache__
| build
| dist
)/
'''
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import networkx as nx
from networkx.algorithms.dominating import dominating_set

from graphs import *


def get_max_degree(graph):
return max(dict(graph.degree()).values())
Expand All @@ -12,6 +14,10 @@ def get_min_degree(graph):
return min(dict(graph.degree()).values())


def get_mean_degree(graph):
return (2 * graph.number_of_edges()) / graph.number_of_nodes()


def get_components(graph):
return nx.number_connected_components(graph)

Expand All @@ -27,13 +33,7 @@ def get_number_of_triangles(graph):

def get_chromatic(graph):
coloring = nx.coloring.greedy_color(graph, strategy="largest_first")
ans = 0
# try:
# ans = max(coloring.values()) + 1
# except ValueError:
# graph.draw()
ans = max(coloring.values()) + 1
return ans
return max(coloring.values()) + 1


def get_max_independent_set_size(graph):
Expand All @@ -51,6 +51,37 @@ def get_minimum_dominating_set_size(graph, n_trials=5):
return res


def get_minimum_dominating_set_size_for_dist(graph: Distance_Graph):
d = graph.d_distance

sorted_numbers = list(graph.get_numbers())
sorted_numbers.sort()

i = 0
n = graph.n_vertexes

count = 0
while i < n:
current_value = sorted_numbers[i]
max_reach = current_value + d

# Находим последнюю вершину в интервале [current_value, max_reach)
j = i
while j < n and sorted_numbers[j] < max_reach:
j += 1

# Выбираем последнюю вершину интервала
selected = sorted_numbers[j - 1]
count += 1

# Пропускаем все вершины, покрытые выбранной
i = j
while i < n and sorted_numbers[i] < selected + d:
i += 1

return count


@dataclass
class CharacteristicsSingle:
max_degree: int
Expand Down
11 changes: 11 additions & 0 deletions src/exp_pareto/graphs.py → src/common_tools/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@
class KNN_Graph(nx.Graph):
n_vertexes: int
k_neighbours: int
numbers: np.ndarray

def __init__(self, incoming_graph_data=None, **attr):
super().__init__(incoming_graph_data, **attr)
self.n_vertexes = attr["n"]
self.k_neighbours = attr["k_neighbours"]
self.numbers = None

def build_from_numbers(self, numbers: np.ndarray):
assert numbers.size == self.n_vertexes
Expand All @@ -28,6 +30,8 @@ def build_from_numbers(self, numbers: np.ndarray):
for j in indices[i][1:]:
self.add_edge(i, j)

self.numbers = numbers.copy()

def draw(self):
plt.figure(figsize=(8, 6))
nx.draw(
Expand All @@ -47,11 +51,13 @@ def draw(self):
class Distance_Graph(nx.Graph):
n_vertexes: int
d_distance: float
numbers: np.ndarray

def __init__(self, incoming_graph_data=None, **attr):
super().__init__(incoming_graph_data, **attr)
self.n_vertexes = attr["n"]
self.d_distance = attr["d_distance"]
self.numbers = None

def build_from_numbers(self, numbers: np.ndarray):
assert numbers.size == self.n_vertexes
Expand All @@ -66,6 +72,11 @@ def build_from_numbers(self, numbers: np.ndarray):
if distances[i][j] < self.d_distance:
self.add_edge(i, j)

self.numbers = numbers.copy()

def get_numbers(self):
return self.numbers.copy()

def draw(self):
pos = nx.spring_layout(self)

Expand Down
13 changes: 11 additions & 2 deletions src/exp_pareto/characteristics_experimental.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
import os
import sys
from dataclasses import dataclass

import networkx as nx
import numpy as np

sys.path.append(
os.path.abspath(os.path.join(os.path.dirname(__file__), "../common_tools"))
)
from graphs import Distance_Graph, KNN_Graph


Expand Down Expand Up @@ -81,13 +86,17 @@ def get_characteristics(lambda_param, alpha_param, n, k, d, distrib_type=None):
distance_graph_pareto.build_from_numbers(numbers_pareto)

return create_characteristics(
knn_graph_exp, distance_graph_exp, knn_graph_pareto, distance_graph_pareto
knn_graph_exp,
distance_graph_exp,
knn_graph_pareto,
distance_graph_pareto,
distrib_type=distrib_type,
)


def get_average_characteristics(lambda_param, alpha_param, n, k, d, distrib_type=None):
characteristics_list = []
for trial in range(5):
for _trial in range(5):
characteristics = get_characteristics(
lambda_param, alpha_param, n, k, d, distrib_type
)
Expand Down
15 changes: 11 additions & 4 deletions src/exp_pareto/classifier.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
import os
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.spatial import ConvexHull, Delaunay
from tqdm import tqdm

from characteristics_applied import *
from graphs import Distance_Graph
from metrics import *
from visualisations import *

sys.path.append(
os.path.abspath(os.path.join(os.path.dirname(__file__), "../common_tools"))
)
from characterisctics_applied import *
from graphs import Distance_Graph


class ConvexHullWrapper:
def __init__(self, points_df):
Expand Down Expand Up @@ -125,7 +132,7 @@ def fit(self, exp_points_for_test, pareto_points_for_test, verbose=False):

I_errors = []
powers = []
for i in tqdm(range(self.A.shape[0])):
for _ in tqdm(range(self.A.shape[0])):
points_powers = {}

for exp_point_to_remove in self.A.values:
Expand Down Expand Up @@ -175,7 +182,7 @@ def predict_item(self, point: np.array):

def predict_items(self, points: pd.DataFrame):
results = []
for index, point in points.iterrows():
for _, point in points.iterrows():
result = self.predict_item(point.values)
results.append(result)
return np.array(results)
Expand Down
33 changes: 20 additions & 13 deletions src/exp_pareto/experiments_first_part_1.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,13 @@
"from dataclasses import dataclass\n",
"from tqdm import tqdm\n",
"from itertools import product\n",
"from characteristics_experimental import *\n",
"import sys\n",
"import os\n",
"\n",
"from graphs import KNN_Graph, Distance_Graph\n",
"from characteristics_experimental import *"
"current_dir = os.getcwd()\n",
"sys.path.append(os.path.abspath(os.path.join(current_dir, '../common_tools')))\n",
"from graphs import KNN_Graph, Distance_Graph"
]
},
{
Expand Down Expand Up @@ -91,26 +95,28 @@
],
"source": [
"for lambda_param in tqdm(lambdas):\n",
" average_characteristics = get_average_characteristics(lambda_param, 1, n, k, d)\n",
" average_characteristics_knn = get_average_characteristics(lambda_param, 1, n, k, d, \"knn\")\n",
" average_characteristics_dist = get_average_characteristics(lambda_param, 1, n, k, d, \"dist\")\n",
"\n",
" results.loc[len(results)] = [\"exp\", lambda_param, \"knn\", \"components\", average_characteristics.knn_exp_components]\n",
" results.loc[len(results)] = [\"exp\", lambda_param, \"knn\", \"components\", average_characteristics_knn.knn_exp_components]\n",
"\n",
" results.loc[len(results)] = [\"exp\", lambda_param, \"knn\", \"chromatic\", average_characteristics.knn_exp_chromatic]\n",
" results.loc[len(results)] = [\"exp\", lambda_param, \"knn\", \"chromatic\", average_characteristics_knn.knn_exp_chromatic]\n",
"\n",
" results.loc[len(results)] = [\"exp\", lambda_param, \"dist\", \"components\", average_characteristics.distance_exp_components]\n",
" results.loc[len(results)] = [\"exp\", lambda_param, \"dist\", \"components\", average_characteristics_dist.distance_exp_components]\n",
"\n",
" results.loc[len(results)] = [\"exp\", lambda_param, \"dist\", \"chromatic\", average_characteristics.distance_exp_chromatic]\n",
" results.loc[len(results)] = [\"exp\", lambda_param, \"dist\", \"chromatic\", average_characteristics_dist.distance_exp_chromatic]\n",
"\n",
"for alpha_param in tqdm(alphas):\n",
" average_characteristics = get_average_characteristics(1, alpha_param, n, k, d)\n",
" average_characteristics_knn = get_average_characteristics(1, alpha_param, n, k, d, \"knn\")\n",
" average_characteristics_dist = get_average_characteristics(1, alpha_param, n, k, d, \"dist\")\n",
"\n",
" results.loc[len(results)] = [\"pareto\", alpha_param, \"knn\", \"components\", average_characteristics.knn_pareto_components]\n",
" results.loc[len(results)] = [\"pareto\", alpha_param, \"knn\", \"components\", average_characteristics_knn.knn_pareto_components]\n",
"\n",
" results.loc[len(results)] = [\"pareto\", alpha_param, \"knn\", \"chromatic\", average_characteristics.knn_pareto_chromatic]\n",
" results.loc[len(results)] = [\"pareto\", alpha_param, \"knn\", \"chromatic\", average_characteristics_knn.knn_pareto_chromatic]\n",
"\n",
" results.loc[len(results)] = [\"pareto\", alpha_param, \"dist\", \"components\", average_characteristics.distance_pareto_components]\n",
" results.loc[len(results)] = [\"pareto\", alpha_param, \"dist\", \"components\", average_characteristics_dist.distance_pareto_components]\n",
"\n",
" results.loc[len(results)] = [\"pareto\", alpha_param, \"dist\", \"chromatic\", average_characteristics.distance_pareto_chromatic]\n"
" results.loc[len(results)] = [\"pareto\", alpha_param, \"dist\", \"chromatic\", average_characteristics_dist.distance_pareto_chromatic]\n"
]
},
{
Expand Down Expand Up @@ -193,7 +199,8 @@
"name": "python3"
},
"language_info": {
"name": "python"
"name": "python",
"version": "3.12.8"
}
},
"nbformat": 4,
Expand Down
Loading