From 8d07f0a436b7f0e189efa10be5ccb7f97431d191 Mon Sep 17 00:00:00 2001 From: lpetrov02 Date: Fri, 15 Dec 2023 09:56:59 +0300 Subject: [PATCH 01/23] Started migration from Data2dLayer to DataLayer --- server/api/GraphBuilder.cpp | 22 +++++++++++++--------- server/api/Parser.cpp | 20 +++++++++++--------- server/api/Parser.h | 2 +- server/core/Layer.cpp | 5 ++--- server/core/Layer.h | 6 +++--- server/core/Parameters.h | 5 ----- 6 files changed, 30 insertions(+), 30 deletions(-) diff --git a/server/api/GraphBuilder.cpp b/server/api/GraphBuilder.cpp index d52f41bf..1bec78d0 100644 --- a/server/api/GraphBuilder.cpp +++ b/server/api/GraphBuilder.cpp @@ -127,12 +127,14 @@ void Graph::Initialize(crow::json::rvalue modelJson, layers_.emplace(layer_id, new ReLULayer{prevLayers}); } else if (type == "Data" || type == "Target") { CHECK_HAS_FIELD(layerDicts[layer_id], "parameters"); - auto params = ParseData2d(layerDicts[layer_id]["parameters"]); - if (dataDicts[layer_id].size() % params.width != 0) { - throw std::invalid_argument("Sizes mismatch!"); + Shape shape = ParseData(layerDicts[layer_id]["parameters"]); + if (shape.size() == 0 || shape.size() % dataDicts[layer_id].size() != 0) { + std::string message = "Object of size " + + std::to_string(dataDicts[layer_id].size()) + + " can not have shape Nx" + shape.toString(); + throw std::runtime_error(message); } - params.height = dataDicts[layer_id].size() / params.width; - layers_.emplace(layer_id, new Data2dLayer{params, dataDicts[layer_id]}); + layers_.emplace(layer_id, new DataLayer{shape, dataDicts[layer_id]}); } else if (type == "Output") { for (auto prevLayerId : reversedEdges[layer_id]) { lastPredictIds_.push_back(prevLayerId); @@ -149,15 +151,17 @@ void Graph::Initialize(crow::json::rvalue modelJson, void Graph::ChangeInputData(std::vector data) { // All data goes to every data layer. Should be changed? for (int id : dataIds_) { - Data2dLayer* layer = reinterpret_cast(layers_[id]); + DataLayer* layer = reinterpret_cast(layers_[id]); size_t width = layer->result->output->shape.cols(); if (data.size() % width != 0) { throw std::invalid_argument("Sizes mismatch!"); } - Shape expectedShape = layer->result->output->shape; - data.resize(expectedShape.size(), 0); - layer->result->output.emplace(Blob::constBlob(expectedShape, data.data())); + Shape expected_shape = layer->result->output->shape; + size_t new_size = (data.size() + expected_shape.size() - 1) / expected_shape.size() * + expected_shape.size(); + data.resize(new_size, 0); + layer->result->output.emplace(Blob::constBlob(expected_shape, data.data())); } } diff --git a/server/api/Parser.cpp b/server/api/Parser.cpp index e1496665..532deec6 100644 --- a/server/api/Parser.cpp +++ b/server/api/Parser.cpp @@ -7,12 +7,11 @@ void CHECK_HAS_FIELD(const crow::json::rvalue& layer, const std::string& field) } void ParseCsvData(const std::vector>& data, std::vector& instances, std::vector& answers) { - // Think about optimization via reservation + instances.reserve(data.size()); + answers.reserve(data.size()); for (auto& instance : data) { answers.push_back(instance.back()); - for (int i = 0; i < instance.size() - 1; ++i) { - instances.push_back(instance[i]); - } + instances.emplace_back(instance.begin(), std::prev(instance.end())); } } @@ -33,11 +32,14 @@ LinearLayerParameters ParseLinear(const crow::json::rvalue& parameters) { return LinearLayerParameters{inFeatures, outFeatures, bias}; } -Data2dLayerParameters ParseData2d(const crow::json::rvalue& parameters) { - size_t width; +Shape ParseData(const crow::json::rvalue& parameters) { + std::vector shape; + shape.reserve(3); - CHECK_HAS_FIELD(parameters, "width"); + CHECK_HAS_FIELD(parameters, "shape"); - width = static_cast(parameters["width"].i()); - return Data2dLayerParameters{.width = width}; + for (auto dim : parameters["shape"]) { + shape.push_back(dim.i()); + } + return Shape{std::move(shape)}; } diff --git a/server/api/Parser.h b/server/api/Parser.h index 23662921..a58ccfa4 100644 --- a/server/api/Parser.h +++ b/server/api/Parser.h @@ -21,4 +21,4 @@ void CHECK_HAS_FIELD(const crow::json::rvalue& layer, const std::string& field); void ParseCsvData(const std::vector>& data, std::vector& instances, std::vector& answers); LinearLayerParameters ParseLinear(const crow::json::rvalue& parameters); -Data2dLayerParameters ParseData2d(const crow::json::rvalue& parameters); +Shape ParseData(const crow::json::rvalue& parameters); diff --git a/server/core/Layer.cpp b/server/core/Layer.cpp index 7f143a42..9a9562d1 100644 --- a/server/core/Layer.cpp +++ b/server/core/Layer.cpp @@ -32,9 +32,8 @@ ReLULayer::ReLULayer(const std::vector& args) { result = Tensor(relu, {args[0]}); } -Data2dLayer::Data2dLayer(const Data2dLayerParameters& params, const std::vector& values) - : width(params.width) { - result = Tensor(Blob::constBlob({{params.height, width}}, values.data())); +DataLayer::DataLayer(const Shape& shape, const std::vector& values) { + result = Tensor(Blob::constBlob(shape, values.data())); } MSELoss::MSELoss(const std::vector& args) : mean({0, 1, 2, 3}) { diff --git a/server/core/Layer.h b/server/core/Layer.h index 8d9cbb50..e9e6e0ca 100644 --- a/server/core/Layer.h +++ b/server/core/Layer.h @@ -5,6 +5,7 @@ #include "RandomInit.h" #include "Tensor.h" +#include "Shape.h" #include "Parameters.h" @@ -16,10 +17,9 @@ class Layer { std::vector layerOperationParams; }; -class Data2dLayer: public Layer { +class DataLayer: public Layer { public: - size_t width; - Data2dLayer(const Data2dLayerParameters& params, const std::vector& values); + DataLayer(const Shape& params, const std::vector& values); }; class LinearLayer: public Layer { diff --git a/server/core/Parameters.h b/server/core/Parameters.h index bb20cc20..27fce33a 100644 --- a/server/core/Parameters.h +++ b/server/core/Parameters.h @@ -5,8 +5,3 @@ struct LinearLayerParameters { std::size_t outFeatures; bool bias; }; - -struct Data2dLayerParameters { - std::size_t width; - std::size_t height; -}; From 02c6ae625a2b9241bce57cbc5156638aa7444653 Mon Sep 17 00:00:00 2001 From: lpetrov02 Date: Tue, 28 Nov 2023 13:56:42 +0300 Subject: [PATCH 02/23] Makes preparations for metrics logging on python Functionality for c++ http added, but not working yet Adds saving train metrics Adds saving train metrics and responding with PNG --- .gitignore | 3 +- py_server/Makefile | 2 +- py_server/mlcraft/db.py | 75 +++++++- py_server/mlcraft/server.py | 74 +++++++- py_server/mlcraft/static/swagger.yaml | 252 +++++++++++++++++++++++++- py_server/mlcraft/utils.py | 17 ++ py_server/pyproject.toml | 1 + server/Makefile | 5 +- server/api/GraphBuilder.cpp | 39 ++-- server/api/GraphBuilder.h | 14 +- server/api/server.cpp | 100 +++++++++- 11 files changed, 542 insertions(+), 40 deletions(-) diff --git a/.gitignore b/.gitignore index 901ef702..48b197a5 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,7 @@ __pycache__/ *$py.class .cache .venv +images/ # Flask stuff: instance/ @@ -49,4 +50,4 @@ dmypy.json # Makefile install bin -*.patch \ No newline at end of file +*.patch diff --git a/py_server/Makefile b/py_server/Makefile index 22184a56..c30780e5 100644 --- a/py_server/Makefile +++ b/py_server/Makefile @@ -83,4 +83,4 @@ $(dirEnv): clean: $(RM) install - $(RM) -r instance + $(RM) -r instance \ No newline at end of file diff --git a/py_server/mlcraft/db.py b/py_server/mlcraft/db.py index 92cd2776..0b07dca3 100644 --- a/py_server/mlcraft/db.py +++ b/py_server/mlcraft/db.py @@ -1,4 +1,5 @@ from threading import Lock # fuck it, just using old robust methods +import datetime import json from sqlite3 import IntegrityError from flask import current_app @@ -31,6 +32,18 @@ class Model(db.Model): # type: ignore raw = db.Column(db.Text) +class Metrics(db.Model): # type: ignore + __tablename__ = "metrics_table" + + id = db.Column(db.Integer, primary_key=True) + model = db.Column(db.Integer, db.ForeignKey("users_table.id"), nullable=False) + label = db.Column(db.Text) + values = db.Column(db.Text) + begin_time = db.Column(db.DateTime) + end_time = db.Column(db.DateTime) + protected = db.Column(db.Boolean) + + class SQLWorker: """ Important: whenever you work with 'content' column of the model, @@ -301,9 +314,6 @@ def verify_connection( ) layer1 = layer1_candidates[0] layer2 = layer2_candidates[0] - # TO BE DONE later - # if not self.check_dimensions(layer1, layer2): - # return LayersConnectionStatus.DimensionsMismatch if layer2["type"] == "Data" or layer1["type"] == "Output": raise Error( "Wrong direction in data or output layer", @@ -340,6 +350,65 @@ def is_model_trained(self, model_id: int): model = self.get_model(model_id) return model.is_trained + def update_metrics(self, model_id, values: list[float], label: str, rewrite: bool): + with current_app.app_context(), self.content_lock: + metrics = Metrics.query.filter_by(model=model_id, label=label)\ + .order_by(Metrics.id.desc()).first() + if not metrics: + metrics = Metrics() + metrics.model = model_id + metrics.label = label + metrics.values = " ".join(list(map(str, values))) + metrics.begin_time = datetime.datetime.now() + metrics.end_time = datetime.datetime.now() + metrics.protected = False + if rewrite: + metrics.values = "" + else: + metrics.values += " " + metrics.values += " ".join(list(map(str, values))) + metrics.end_time = datetime.datetime.now() + db.session.add(metrics) + db.session.commit() + return 0 + + def protect_metrics(self, model_id, label: str, protected: bool): + with current_app.app_context(), self.content_lock: + metrics = Metrics.query.filter_by(model=model_id, label=label)\ + .order_by(Metrics.id.desc()).first() + if not metrics: + raise Error( + f"No recordings found for model with id {model_id} and label {label}.", + HTTPStatus.NOT_FOUND, + ) + metrics.protected = protected + db.session.add(metrics) + db.session.commit() + return 0 + + def get_metrics(self, model_id, label: str) -> str: + with current_app.app_context(), self.content_lock: + metrics = Metrics.query.filter_by( + model=model_id, label=label + ).order_by(Metrics.id.desc()).first() + if not metrics: + raise Error( + f"No recordings found for model with id {model_id} and label {label}.", + HTTPStatus.NOT_FOUND, + ) + return metrics.values + + # Пока без ручки, просто как напоминание о том, что метрики нужно чистить + def delete_old_metrics(): + with current_app.app_context(), self.content_lock: + Metrics.query.filter( + Metrics.end_time < datetime.datetime.now() - datetime.timedelta(days=30) + ).delete() + Metrics.query.filter( + Metrics.end_time < datetime.datetime.now() - datetime.timedelta(days=1) + ).filter_by(protected=False).delete() + db.session.commit() + sql_worker: SQLWorker = None # type: ignore diff --git a/py_server/mlcraft/server.py b/py_server/mlcraft/server.py index 8aa33f7e..45835d91 100644 --- a/py_server/mlcraft/server.py +++ b/py_server/mlcraft/server.py @@ -1,8 +1,16 @@ +from json import dumps +from sqlite3 import IntegrityError +import datetime import requests from http import HTTPStatus -from flask import Blueprint, request, current_app - -from .utils import convert_model_parameters, is_valid_model, convert_model +from flask import Blueprint, request, current_app, send_file +import numpy as np +import os + +from .utils import ( + convert_model_parameters, is_valid_model, convert_model, + plot_metrics, delete_file, +) from .check_dimensions import assert_dimensions_match from .errors import Error @@ -159,7 +167,7 @@ def train_model( model = {"graph": model, "dataset": dataset} response = requests.post( - current_app.config["CPP_SERVER"] + f"/train/{model_id}", + current_app.config["CPP_SERVER"] + f"/train/{user_id}/{model_id}", json=model, timeout=3, ) @@ -183,9 +191,65 @@ def predict(user_id: int, model_id: int): raise Error("Not trained", HTTPStatus.PRECONDITION_FAILED) response = requests.post( - current_app.config["CPP_SERVER"] + f"/predict/{model_id}", + current_app.config["CPP_SERVER"] + f"/predict/{user_id}/{model_id}", json=json_data, timeout=3, ) return response.text, response.status_code + + +@app.route("/update_metrics//", methods=["PUT"]) +def update_metrics(user_id: int, model_id: int): + sql_worker.verify_access(user_id, model_id) + + json = request.json + outputs = np.array(json["outputs"]) + targets = np.array(json["targets"]) + metrics = np.mean((targets - outputs) ** 2, axis=1) + sql_worker.update_metrics( + model_id, list(metrics), + json.get("label", "default"), json.get("rewrite", False), + ) + return "", HTTPStatus.OK + + +@app.route("/protect_metrics//", methods=["PUT"]) +def protect_metrics(user_id: int, model_id: int): + sql_worker.verify_access(user_id, model_id) + + json = request.json + sql_worker.protect_metrics( + model_id, json.get("label", "default"), json.get("protected", True), + ) + return "", HTTPStatus.OK + + +@app.route("/get_metrics//", methods=["PUT"]) +def get_metircs(user_id: int, model_id: int): + sql_worker.verify_access(user_id, model_id) + + json = request.json + values = sql_worker.get_metrics( + model_id, json.get("label", "default"), + ) + return {"values": list(map(float, values.split()))}, HTTPStatus.OK + + +# Add swagger description +@app.route("/get_plots//", methods=["PUT"]) +def get_plots(user_id: int, model_id: int): + sql_worker.verify_access(user_id, model_id) + + json = request.json + label = json.get("label", "default") + values = sql_worker.get_metrics( + model_id, label, + ) + + plot_path = plot_metrics(list(map(float, values.split())), user_id, model_id, label) + current_dir = os.getcwd() + print(current_dir) + response = send_file(os.path.join(current_dir, "images", plot_path)) + delete_file(os.path.join(current_dir, "images", plot_path)) + return response diff --git a/py_server/mlcraft/static/swagger.yaml b/py_server/mlcraft/static/swagger.yaml index 6522c0f3..6b891f9c 100644 --- a/py_server/mlcraft/static/swagger.yaml +++ b/py_server/mlcraft/static/swagger.yaml @@ -173,7 +173,7 @@ paths: '403': $ref: '#/components/responses/ModelForbidden' '404': - $ref: '#components/responses/ModelNotFound' + $ref: '#/components/responses/ModelNotFound' put: tags: - model @@ -568,6 +568,213 @@ paths: application/json: schema: $ref: '#/components/schemas/Error' + /update_metrics/{user_id}/{model_id}: + put: + tags: + - use + summary: "Writes new values of a metric" + description: "WRITES metric values" + operationId: "updates_metrics" + parameters: + - name: user_id + in: path + description: model owner's ID + required: true + schema: + type: integer + format: int64 + - name: model_id + in: path + description: model's ID + required: true + schema: + type: integer + format: int64 + requestBody: + description: "Information about new metric's values" + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/WriteMetricValues' + responses: + '200': + description: "Metrics updated successfully" + '400': + description: "Json isn't provided or required fields missing" + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + '403': + description: "The model does not exist or you have no rights for changing it" + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + '404': + description: "Model / layer does not exist" + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + /protect_metrics/{user_id}/{model_id}: + put: + tags: + - use + summary: "Protects model's metrics from auto-deletion" + description: "PROTECTS metric values" + operationId: "protect_metrics" + parameters: + - name: user_id + in: path + description: model owner's ID + required: true + schema: + type: integer + format: int64 + - name: model_id + in: path + description: model's ID + required: true + schema: + type: integer + format: int64 + requestBody: + description: "Whar record to protect" + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ProtectMetricValues' + responses: + '200': + description: "Settings updated successfully" + '400': + description: "Json isn't provided or required fields missing" + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + '403': + description: "The model does not exist or you have no rights for changing it" + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + '404': + description: "Model / layer does not exist" + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + /get_metrics/{user_id}/{model_id}: + put: + tags: + - use + summary: "Reads metric's values from database" + description: "READS metric values" + operationId: "get_metrics" + parameters: + - name: user_id + in: path + description: model owner's ID + required: true + schema: + type: integer + format: int64 + - name: model_id + in: path + description: model's ID + required: true + schema: + type: integer + format: int64 + requestBody: + description: "Information about metric's values to read" + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ReadMetricValues' + responses: + '200': + description: "Successful read" + content: + application/json: + schema: + $ref: '#/components/schemas/ReturnMetricValues' + '400': + description: "Json isn't provided or required fields missing" + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + '403': + description: "The model does not exist or you have no rights for changing it" + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + '404': + description: "Model / layer does not exist" + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + /get_plots/{user_id}/{model_id}: + put: + tags: + - use + summary: "Get plot of metrics from the database" + description: "PLOTS metric values" + operationId: "get_plots" + parameters: + - name: user_id + in: path + description: model owner's ID + required: true + schema: + type: integer + format: int64 + - name: model_id + in: path + description: model's ID + required: true + schema: + type: integer + format: int64 + requestBody: + description: "Information about metric's values to read" + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ReadMetricValues' + responses: + '200': + description: "Returns PNG" + schema: + type: file + '400': + description: "Json isn't provided or required fields missing" + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + '403': + description: "The model does not exist or you have no rights for changing it" + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + '404': + description: "Model / layer does not exist" + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + components: schemas: User: @@ -640,6 +847,37 @@ components: type: integer format: int64 example: 2 + WriteMetricValues: + type: object + properties: + values: + type: array + items: + type: string + example: [0.25, 0.112, 0.787, 0.238, 0.030, 0.0003] + label: + type: string + example: "train" + new_iteration: + type: boolean + example: true + required: + - values + ProtectMetricValues: + type: object + properties: + label: + type: string + example: "train" + protected: + type: boolean + example: true + ReadMetricValues: + type: object + properties: + label: + type: string + example: "train" Data: type: object properties: @@ -665,11 +903,16 @@ components: 1,1 PredictResult: type: array - descriptoin: Array with 1 element for now items: type: number format: float example: [1.0] + ReturnMetricValues: + type: array + items: + type: number + format: float + example: [0.25, 0.112, 0.787, 0.238, 0.030, 0.0003] Error: type: object properties: @@ -684,7 +927,7 @@ components: example: "Something whent wrong" problemPart: type: string - example: "The problem is in the Retrograde Mercury" + example: "The problem is here: ..." parameters: UserId: name: user_id @@ -743,9 +986,6 @@ components: $ref: '#/components/schemas/Error' NoJson: description: "Json not provided" - content: - text/html: - type: string NoField: description: "Required field is missing" content: diff --git a/py_server/mlcraft/utils.py b/py_server/mlcraft/utils.py index 3228066f..6316b8ac 100644 --- a/py_server/mlcraft/utils.py +++ b/py_server/mlcraft/utils.py @@ -1,7 +1,10 @@ import re +import os +import datetime import typing as tp from enum import Enum from collections import deque, defaultdict +import matplotlib.pyplot as plt def get_edges_from_model(model_dict): @@ -123,3 +126,17 @@ def convert_model(model): model["layers"], ) ) + + +def plot_metrics(values: list[float], user_id: int, model_id: int, label: str) -> str: + dt = datetime.datetime.now() + plt.plot(list(range(1, len(values) + 1)), values) + isoformat_dt = re.sub(r'[:\.\-]', '_', dt.isoformat()) + path = f"{user_id}_{model_id}_{label}_{isoformat_dt}.png" + plt.savefig(os.path.join("images", path)) + return path + + +def delete_file(path: str): + if os.path.exists(path): + os.remove(path) diff --git a/py_server/pyproject.toml b/py_server/pyproject.toml index 50c0d46f..efbc3eb8 100644 --- a/py_server/pyproject.toml +++ b/py_server/pyproject.toml @@ -14,6 +14,7 @@ dependencies = [ "black == 23.10.1", "pytest == 7.4.2", "mypy == 1.6.1", + "matplotlib == 3.8.2", ] [build-system] diff --git a/server/Makefile b/server/Makefile index 35cbc71c..87733243 100644 --- a/server/Makefile +++ b/server/Makefile @@ -14,7 +14,7 @@ CPPFLAGS = -I $(dirCore) -isystem $(dirApi) maxErrors = 4 # number of errors compiler will print to the console # LDFLAGS = -lX11 CXXFLAGS = -std=c++20 -pthread -CXXFLAGS += -Wall -Wextra -pedantic +CXXFLAGS += -Wall -Wextra -pedantic CXXFLAGS += -fmax-errors=$(maxErrors) # for g++ # CXXFLAGS += -lX11 -lm # CXXFLAGS += -ferror-limit=$(maxErrors) # uncomment for clang @@ -117,6 +117,7 @@ getField = $(shell node -p "require('$(CONFIG_PATH)').cpp_server.$1") port = $(call getField,PORT) host = $(call getField,HOST) boost = $(call getField,BOOST_ROOT) +cpprest = /lib/x86_64-linux-gnu/libcpprest.so.2.10 CPPFLAGS += -isystem $(boost)/include @@ -128,7 +129,7 @@ serve: $(server) | $(dirTrain) $(dirPredict) $< $(host) $(port) $(server): $(serverMain) $(serverObjects) $(coreArchive) | checkboost $(dirBin) - $(LINK.cpp) -L $(boost)/lib $^ -o $@ -lX11 -lpng + $(LINK.cpp) -L $(boost)/lib -L $(cpprest) $^ -o $@ -lX11 -lcpprest -lssl -lcrypto -lpng ### TESTS ### diff --git a/server/api/GraphBuilder.cpp b/server/api/GraphBuilder.cpp index 1bec78d0..244421be 100644 --- a/server/api/GraphBuilder.cpp +++ b/server/api/GraphBuilder.cpp @@ -28,9 +28,9 @@ void Graph::OverviewLayers(const crow::json::rvalue& layers, } void Graph::GetEdges(const crow::json::rvalue& connections, - std::unordered_map>& straightEdges, - std::unordered_map>& reversedEdges, - std::unordered_set& entryNodes) { + std::unordered_map>& straightEdges, + std::unordered_map>& reversedEdges, + std::unordered_set& entryNodes) { std::unordered_set nodesWithParents; for (auto& connection : connections) { if (!connection.has("layer_from") || !connection.has("layer_to")) { @@ -56,8 +56,8 @@ void Graph::GetEdges(const crow::json::rvalue& connections, } void Graph::TopologySort(std::unordered_map>& edges, - std::unordered_set& entryNodes, - std::vector& layersOrder) { + std::unordered_set& entryNodes, + std::vector& layersOrder) { std::unordered_set closed; std::stack dfsStack; bool isFinal; @@ -126,6 +126,12 @@ void Graph::Initialize(crow::json::rvalue modelJson, } else if (type == "ReLU") { layers_.emplace(layer_id, new ReLULayer{prevLayers}); } else if (type == "Data" || type == "Target") { + if (type == "Data") { + dataIds_.push_back(layer_id); + } + if (type == "Target") { + targetsIds_.push_back(layer_id); + } CHECK_HAS_FIELD(layerDicts[layer_id], "parameters"); Shape shape = ParseData(layerDicts[layer_id]["parameters"]); if (shape.size() == 0 || shape.size() % dataDicts[layer_id].size() != 0) { @@ -172,23 +178,26 @@ Graph::~Graph() { Allocator::end(); } -std::vector Graph::getLastTrainLayers() const { +std::vector Graph::getLayers(BaseLayerType type) const { std::vector result; - result.reserve(lastTrainIds_.size()); - for (int id : lastTrainIds_) { - result.push_back(layers_.at(id)); + std::vector* layers_ids = nullptr; + if (type == BaseLayerType::Data) { + layers_ids = const_cast*>(&dataIds_); + } else if (type == BaseLayerType::Targets) { + layers_ids = const_cast*>(&targetsIds_); + } else if (type == BaseLayerType::TrainOut) { + layers_ids = const_cast*>(&lastTrainIds_); + } else { + layers_ids = const_cast*>(&lastPredictIds_); } - return result; -} -std::vector Graph::getLastPredictLayers() const { - std::vector result; - result.reserve(lastPredictIds_.size()); - for (int id : lastPredictIds_) { + result.reserve(layers_ids->size()); + for (int id : *layers_ids) { result.push_back(layers_.at(id)); } return result; } + const Layer& Graph::operator[](int i) const { return *layers_.at(i); } diff --git a/server/api/GraphBuilder.h b/server/api/GraphBuilder.h index 76a8bcf2..41c02327 100644 --- a/server/api/GraphBuilder.h +++ b/server/api/GraphBuilder.h @@ -1,10 +1,20 @@ #include #include +#include #include #include "Parser.h" #include "Layer.h" + +enum class BaseLayerType : int { + Data = 0, + Targets = 1, + TrainOut = 2, + PredictOut = 3, +}; + + class Graph { private: std::unordered_map layers_ = {}; @@ -12,6 +22,7 @@ class Graph { std::vector lastTrainIds_ = {}; std::vector lastPredictIds_ = {}; std::vector dataIds_ = {}; + std::vector targetsIds_ = {}; public: Graph() = default; @@ -36,8 +47,7 @@ class Graph { void ChangeInputData(std::vector data); - std::vector getLastTrainLayers() const; - std::vector getLastPredictLayers() const; + std::vector getLayers(BaseLayerType type) const; const Layer& operator[](int i) const; }; diff --git a/server/api/server.cpp b/server/api/server.cpp index 151044ca..1ef65e18 100644 --- a/server/api/server.cpp +++ b/server/api/server.cpp @@ -2,6 +2,14 @@ #include #include #include +#include + +#include + +#include +#include +#include +#include #include #include "GraphBuilder.h" @@ -14,18 +22,35 @@ std::string getDataPath(int id) { return "./model_data/data/" + std::to_string(id) + ".csv"; } +<<<<<<< HEAD std::string getPredictPath(int id) { return "./model_data/predict/" + std::to_string(id) + ".csv"; } void train(json::rvalue& json, Graph** graph, int model_id) { RandomObject initObject(0, 1, 42); +======= +web::json::value GetLogs(const std::optional& node) { + std::vector values; + values.reserve(node.value().rows * node.value().cols); + for (size_t sample_index = 0; sample_index < node.value().rows; ++sample_index) { + for (size_t feature_index = 0; feature_index < node.value().cols; ++feature_index) { + values.push_back(web::json::value::number(node.value()[sample_index][feature_index])); + } + } + return web::json::value::array(values); +} + +void train(json::rvalue& json, Graph** graph, int user_id, int model_id) { + RandomObject initObject(0, 1, 17); +>>>>>>> 6b9ee4f (Makes preparations for metrics logging on python) OptimizerBase SGD = OptimizerBase(0.1); std::vector> data = CsvLoader::load_csv(getDataPath(model_id)); *graph = new Graph(); (*graph)->Initialize(json, data, &initObject, SGD); std::cout << "Graph is ready!" << std::endl; +<<<<<<< HEAD auto& lastNode = (*graph)->getLastTrainLayers()[0]->result.value(); // Пока не думаем о нескольких выходах (!) Hard-coded lastNode.forward(); @@ -41,16 +66,66 @@ void train(json::rvalue& json, Graph** graph, int model_id) { // lastNode.gradient = result; lastNode.gradient = Blob::ones({{1}}); lastNode.backward(); +======= + Blob result {1, 1}; + + auto& lastTrainNode = (*graph)->getLayers(BaseLayerType::TrainOut)[0]->result.value(); + auto& lastPredictNode = (*graph)->getLayers(BaseLayerType::PredictOut)[0]->result.value().output; + auto& targetsNode = (*graph)->getLayers(BaseLayerType::Targets)[0]->result.value().output; + size_t buffer_size = 5; + std::vector targets, outputs; + targets.reserve(buffer_size); + outputs.reserve(buffer_size); + + size_t max_epochs = 1000; + for (size_t epoch = 0; epoch < max_epochs; ++epoch) { + result = lastTrainNode.forward(); + printf("%ld: %f\n", epoch, result[0][0]); + + outputs.push_back(GetLogs(lastPredictNode)); + targets.push_back(GetLogs(targetsNode)); + + if ((epoch == max_epochs - 1 && outputs.size() > 0) || + outputs.size() == buffer_size) { + + web::json::value json; + json["targets"] = web::json::value::array(targets); + json["outputs"] = web::json::value::array(outputs); + json["label"] = web::json::value::string("train"); + if (epoch < buffer_size) { + json["rewrite"] = web::json::value::boolean(true); + } + targets.clear(); + outputs.clear(); + + std::ostringstream request_url; + request_url << "/update_metrics/" << user_id << "/" << model_id; + + web::http::client::http_client client(U("http://localhost:3000")); + client.request(web::http::methods::PUT, U(request_url.str()), json); + } + + // lastTrainNode.gradient = result; + lastTrainNode.gradient.value()[0][0] = 1; + lastTrainNode.backward(); +>>>>>>> 6b9ee4f (Makes preparations for metrics logging on python) SGD.step(); - lastNode.clear(); + lastTrainNode.clear(); } } +<<<<<<< HEAD void predict(int model_id, Graph* graph, std::vector& answer) { std::vector> predict_data = CsvLoader::load_csv(getPredictPath(model_id)); graph->ChangeInputData(predict_data[0]); - - auto& lastNode = graph->getLastPredictLayers()[0]->result.value(); // Пока не думаем о нескольких выходах (!) Hard-coded +======= +void predict(json::rvalue& json, Graph* graph, int user_id, int model_id, + std::vector& answer) { + graph->ChangeInputData(json); +>>>>>>> 6b9ee4f (Makes preparations for metrics logging on python) + + // Пока не думаем о нескольких выходах (!) Hard-coded + auto& lastNode = graph->getLayers(BaseLayerType::PredictOut)[0]->result.value(); lastNode.clear(); const Blob& result = lastNode.forward(); @@ -74,12 +149,23 @@ int main(int argc, char *argv[]) { std::map sessions; +<<<<<<< HEAD CROW_ROUTE(app, "/predict/").methods(HTTPMethod::POST) ([&](const request& req, int model_id) -> response { if (sessions.find(model_id) == sessions.end()) return response(status::METHOD_NOT_ALLOWED, "Not trained"); std::vector answer; try { predict(model_id, sessions[model_id], answer); +======= + CROW_ROUTE(app, "/predict//").methods(HTTPMethod::POST) + ([&](const request& req, int user_id, int model_id) -> response { + auto body = json::load(req.body); + if (!body) return response(status::BAD_REQUEST, "No model provided"); + if (sessions.find(model_id) == sessions.end()) return response(status::METHOD_NOT_ALLOWED, "Not trained"); + std::vector answer; + try { + predict(body, sessions[model_id], user_id, model_id, answer); +>>>>>>> 6b9ee4f (Makes preparations for metrics logging on python) } catch (const std::runtime_error &err) { return response(status::BAD_REQUEST, "Invalid body"); } @@ -90,8 +176,8 @@ int main(int argc, char *argv[]) { return crow::response(status::OK, response); }); - CROW_ROUTE(app, "/train/").methods(HTTPMethod::POST) - ([&](const request& req, int model_id) -> response { + CROW_ROUTE(app, "/train//").methods(HTTPMethod::POST) + ([&](const request& req, int user_id, int model_id) -> response { auto body = json::load(req.body); std::cout << "Checking json!" << std::endl; if (!body) return response(status::BAD_REQUEST, "Invalid body"); @@ -100,7 +186,11 @@ int main(int argc, char *argv[]) { delete sessions[model_id]; } Graph* g = nullptr; +<<<<<<< HEAD train(body, &g, model_id); +======= + train(body, &g, user_id, model_id); +>>>>>>> 6b9ee4f (Makes preparations for metrics logging on python) sessions[model_id] = g; return response(status::OK, "done"); }); From aea60ad50145b5f76d87a308125341921900100a Mon Sep 17 00:00:00 2001 From: lpetrov02 Date: Mon, 11 Dec 2023 19:06:11 +0300 Subject: [PATCH 03/23] Adaptates code for new 4D blob --- py_server/mlcraft/db.py | 26 +++++++++----- py_server/mlcraft/server.py | 42 ++++++++++++++-------- py_server/mlcraft/utils.py | 2 +- server/api/server.cpp | 72 ++++++++++--------------------------- 4 files changed, 64 insertions(+), 78 deletions(-) diff --git a/py_server/mlcraft/db.py b/py_server/mlcraft/db.py index 0b07dca3..5220a7cd 100644 --- a/py_server/mlcraft/db.py +++ b/py_server/mlcraft/db.py @@ -352,8 +352,11 @@ def is_model_trained(self, model_id: int): def update_metrics(self, model_id, values: list[float], label: str, rewrite: bool): with current_app.app_context(), self.content_lock: - metrics = Metrics.query.filter_by(model=model_id, label=label)\ - .order_by(Metrics.id.desc()).first() + metrics = ( + Metrics.query.filter_by(model=model_id, label=label) + .order_by(Metrics.id.desc()) + .first() + ) if not metrics: metrics = Metrics() metrics.model = model_id @@ -374,8 +377,11 @@ def update_metrics(self, model_id, values: list[float], label: str, rewrite: boo def protect_metrics(self, model_id, label: str, protected: bool): with current_app.app_context(), self.content_lock: - metrics = Metrics.query.filter_by(model=model_id, label=label)\ - .order_by(Metrics.id.desc()).first() + metrics = ( + Metrics.query.filter_by(model=model_id, label=label) + .order_by(Metrics.id.desc()) + .first() + ) if not metrics: raise Error( f"No recordings found for model with id {model_id} and label {label}.", @@ -388,18 +394,20 @@ def protect_metrics(self, model_id, label: str, protected: bool): def get_metrics(self, model_id, label: str) -> str: with current_app.app_context(), self.content_lock: - metrics = Metrics.query.filter_by( - model=model_id, label=label - ).order_by(Metrics.id.desc()).first() + metrics = ( + Metrics.query.filter_by(model=model_id, label=label) + .order_by(Metrics.id.desc()) + .first() + ) if not metrics: raise Error( f"No recordings found for model with id {model_id} and label {label}.", - HTTPStatus.NOT_FOUND, + HTTPStatus.NOT_FOUND, ) return metrics.values # Пока без ручки, просто как напоминание о том, что метрики нужно чистить - def delete_old_metrics(): + def delete_old_metrics(self): with current_app.app_context(), self.content_lock: Metrics.query.filter( Metrics.end_time < datetime.datetime.now() - datetime.timedelta(days=30) diff --git a/py_server/mlcraft/server.py b/py_server/mlcraft/server.py index 45835d91..d81137d3 100644 --- a/py_server/mlcraft/server.py +++ b/py_server/mlcraft/server.py @@ -8,8 +8,11 @@ import os from .utils import ( - convert_model_parameters, is_valid_model, convert_model, - plot_metrics, delete_file, + convert_model_parameters, + is_valid_model, + convert_model, + plot_metrics, + delete_file, ) from .check_dimensions import assert_dimensions_match @@ -202,14 +205,21 @@ def predict(user_id: int, model_id: int): @app.route("/update_metrics//", methods=["PUT"]) def update_metrics(user_id: int, model_id: int): sql_worker.verify_access(user_id, model_id) - - json = request.json - outputs = np.array(json["outputs"]) + + json = request.json or {} targets = np.array(json["targets"]) + n_epochs, n_samples = targets.shape + outputs = np.array(json["outputs"]) + if targets.shape != outputs.shape: + outputs = outputs.reshape(n_epochs, n_samples, -1) + + assert targets.shape == outputs.shape # Это временное metrics = np.mean((targets - outputs) ** 2, axis=1) sql_worker.update_metrics( - model_id, list(metrics), - json.get("label", "default"), json.get("rewrite", False), + model_id, + list(metrics), + json.get("label", "default"), + json.get("rewrite", False), ) return "", HTTPStatus.OK @@ -217,10 +227,12 @@ def update_metrics(user_id: int, model_id: int): @app.route("/protect_metrics//", methods=["PUT"]) def protect_metrics(user_id: int, model_id: int): sql_worker.verify_access(user_id, model_id) - - json = request.json + + json = request.json or {} sql_worker.protect_metrics( - model_id, json.get("label", "default"), json.get("protected", True), + model_id, + json.get("label", "default"), + json.get("protected", True), ) return "", HTTPStatus.OK @@ -229,9 +241,10 @@ def protect_metrics(user_id: int, model_id: int): def get_metircs(user_id: int, model_id: int): sql_worker.verify_access(user_id, model_id) - json = request.json + json = request.json or {} values = sql_worker.get_metrics( - model_id, json.get("label", "default"), + model_id, + json.get("label", "default"), ) return {"values": list(map(float, values.split()))}, HTTPStatus.OK @@ -241,10 +254,11 @@ def get_metircs(user_id: int, model_id: int): def get_plots(user_id: int, model_id: int): sql_worker.verify_access(user_id, model_id) - json = request.json + json = request.json or {} label = json.get("label", "default") values = sql_worker.get_metrics( - model_id, label, + model_id, + label, ) plot_path = plot_metrics(list(map(float, values.split())), user_id, model_id, label) diff --git a/py_server/mlcraft/utils.py b/py_server/mlcraft/utils.py index 6316b8ac..0dec6e2a 100644 --- a/py_server/mlcraft/utils.py +++ b/py_server/mlcraft/utils.py @@ -131,7 +131,7 @@ def convert_model(model): def plot_metrics(values: list[float], user_id: int, model_id: int, label: str) -> str: dt = datetime.datetime.now() plt.plot(list(range(1, len(values) + 1)), values) - isoformat_dt = re.sub(r'[:\.\-]', '_', dt.isoformat()) + isoformat_dt = re.sub(r"[:\.\-]", "_", dt.isoformat()) path = f"{user_id}_{model_id}_{label}_{isoformat_dt}.png" plt.savefig(os.path.join("images", path)) return path diff --git a/server/api/server.cpp b/server/api/server.cpp index 1ef65e18..5950b7a3 100644 --- a/server/api/server.cpp +++ b/server/api/server.cpp @@ -22,56 +22,41 @@ std::string getDataPath(int id) { return "./model_data/data/" + std::to_string(id) + ".csv"; } -<<<<<<< HEAD std::string getPredictPath(int id) { return "./model_data/predict/" + std::to_string(id) + ".csv"; } -void train(json::rvalue& json, Graph** graph, int model_id) { - RandomObject initObject(0, 1, 42); -======= web::json::value GetLogs(const std::optional& node) { + assert(node.has_value() && node.value().shape.dimsCount <= 2); std::vector values; - values.reserve(node.value().rows * node.value().cols); - for (size_t sample_index = 0; sample_index < node.value().rows; ++sample_index) { - for (size_t feature_index = 0; feature_index < node.value().cols; ++feature_index) { - values.push_back(web::json::value::number(node.value()[sample_index][feature_index])); + values.reserve(node.value().shape.size()); + for (size_t sample_index = 0; sample_index < node.value().shape.rows(); ++sample_index) { + for (size_t feature_index = 0; feature_index < node.value().shape.cols(); ++feature_index) { + values.push_back(web::json::value::number(node.value()(0, 0, sample_index, feature_index))); } } return web::json::value::array(values); } -void train(json::rvalue& json, Graph** graph, int user_id, int model_id) { - RandomObject initObject(0, 1, 17); ->>>>>>> 6b9ee4f (Makes preparations for metrics logging on python) +void train(json::rvalue& json, Graph** graph, int model_id, int user_id) { + RandomObject initObject(0, 1, 42); OptimizerBase SGD = OptimizerBase(0.1); std::vector> data = CsvLoader::load_csv(getDataPath(model_id)); *graph = new Graph(); (*graph)->Initialize(json, data, &initObject, SGD); std::cout << "Graph is ready!" << std::endl; -<<<<<<< HEAD - auto& lastNode = (*graph)->getLastTrainLayers()[0]->result.value(); // Пока не думаем о нескольких выходах (!) Hard-coded + auto& lastTrainNode = (*graph)->getLayers(BaseLayerType::TrainOut)[0]->result.value(); + auto& lastPredictNode = (*graph)->getLayers(BaseLayerType::PredictOut)[0]->result.value().output; + auto& targetsNode = (*graph)->getLayers(BaseLayerType::Targets)[0]->result.value().output; - lastNode.forward(); - lastNode.gradient = Blob::ones({{1}}); - lastNode.backward(); + lastTrainNode.forward(); + lastTrainNode.gradient = Blob::ones({{1}}); + lastTrainNode.backward(); Allocator::endSession(); - lastNode.clear(); + lastTrainNode.clear(); Allocator::endVirtualMode(); - for (int j = 0; j < 1000; ++j) { - auto& result = lastNode.forward(); - printf("%d: %f\n", j, result(0, 0, 0, 0)); - // lastNode.gradient = result; - lastNode.gradient = Blob::ones({{1}}); - lastNode.backward(); -======= - Blob result {1, 1}; - - auto& lastTrainNode = (*graph)->getLayers(BaseLayerType::TrainOut)[0]->result.value(); - auto& lastPredictNode = (*graph)->getLayers(BaseLayerType::PredictOut)[0]->result.value().output; - auto& targetsNode = (*graph)->getLayers(BaseLayerType::Targets)[0]->result.value().output; size_t buffer_size = 5; std::vector targets, outputs; targets.reserve(buffer_size); @@ -79,8 +64,8 @@ void train(json::rvalue& json, Graph** graph, int user_id, int model_id) { size_t max_epochs = 1000; for (size_t epoch = 0; epoch < max_epochs; ++epoch) { - result = lastTrainNode.forward(); - printf("%ld: %f\n", epoch, result[0][0]); + auto& result = lastTrainNode.forward(); + // printf("%ld: %f\n", epoch, result[0][0]); outputs.push_back(GetLogs(lastPredictNode)); targets.push_back(GetLogs(targetsNode)); @@ -106,23 +91,16 @@ void train(json::rvalue& json, Graph** graph, int user_id, int model_id) { } // lastTrainNode.gradient = result; - lastTrainNode.gradient.value()[0][0] = 1; + lastTrainNode.gradient = Blob::ones({{1}}); lastTrainNode.backward(); ->>>>>>> 6b9ee4f (Makes preparations for metrics logging on python) SGD.step(); lastTrainNode.clear(); } } -<<<<<<< HEAD void predict(int model_id, Graph* graph, std::vector& answer) { std::vector> predict_data = CsvLoader::load_csv(getPredictPath(model_id)); graph->ChangeInputData(predict_data[0]); -======= -void predict(json::rvalue& json, Graph* graph, int user_id, int model_id, - std::vector& answer) { - graph->ChangeInputData(json); ->>>>>>> 6b9ee4f (Makes preparations for metrics logging on python) // Пока не думаем о нескольких выходах (!) Hard-coded auto& lastNode = graph->getLayers(BaseLayerType::PredictOut)[0]->result.value(); @@ -149,23 +127,12 @@ int main(int argc, char *argv[]) { std::map sessions; -<<<<<<< HEAD CROW_ROUTE(app, "/predict/").methods(HTTPMethod::POST) ([&](const request& req, int model_id) -> response { if (sessions.find(model_id) == sessions.end()) return response(status::METHOD_NOT_ALLOWED, "Not trained"); std::vector answer; try { predict(model_id, sessions[model_id], answer); -======= - CROW_ROUTE(app, "/predict//").methods(HTTPMethod::POST) - ([&](const request& req, int user_id, int model_id) -> response { - auto body = json::load(req.body); - if (!body) return response(status::BAD_REQUEST, "No model provided"); - if (sessions.find(model_id) == sessions.end()) return response(status::METHOD_NOT_ALLOWED, "Not trained"); - std::vector answer; - try { - predict(body, sessions[model_id], user_id, model_id, answer); ->>>>>>> 6b9ee4f (Makes preparations for metrics logging on python) } catch (const std::runtime_error &err) { return response(status::BAD_REQUEST, "Invalid body"); } @@ -186,11 +153,8 @@ int main(int argc, char *argv[]) { delete sessions[model_id]; } Graph* g = nullptr; -<<<<<<< HEAD - train(body, &g, model_id); -======= + train(body, &g, user_id, model_id); ->>>>>>> 6b9ee4f (Makes preparations for metrics logging on python) sessions[model_id] = g; return response(status::OK, "done"); }); From 9c416dbcb79d6e280563a1a9c2b68e2ac0948168 Mon Sep 17 00:00:00 2001 From: Artem Goldenberg Date: Tue, 12 Dec 2023 00:19:01 +0300 Subject: [PATCH 04/23] cpprest CI support --- .github/workflows/CI.yml | 3 +++ README.md | 3 +++ server/Makefile | 7 ++----- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 9798a4d5..adea88ef 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -52,6 +52,9 @@ jobs: sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 90 sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 90 + - name: install-cpprest + run: sudo apt install libcpprest-dev + - name: install-boost uses: MarkusJx/install-boost@v2.4.4 id: install-boost diff --git a/README.md b/README.md index 019427b8..97c2050c 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,9 @@ Other targets available: > **Important:** If you want to build a c++ server, you need to install [Boost](https://www.boost.org/users/download/). > Then, **in the `config.json` file** add the path to the boost root (folder with `include` and `lib` inside). > For example: `"BOOST_ROOT": "/usr/local/Cellar/boost/1.81.0_1"` + +> Also you need `cpprest`: on MacOS: `brew install cpprestsdk` +> on Linux: `sudo apt-get install libcpprest-dev` > After that you should be able to build everything just fine... There are 3(4) main targets available to build: diff --git a/server/Makefile b/server/Makefile index 87733243..02af06b4 100644 --- a/server/Makefile +++ b/server/Makefile @@ -12,11 +12,9 @@ CPPFLAGS = -I $(dirCore) -isystem $(dirApi) # C++ compiler flags maxErrors = 4 # number of errors compiler will print to the console -# LDFLAGS = -lX11 CXXFLAGS = -std=c++20 -pthread CXXFLAGS += -Wall -Wextra -pedantic CXXFLAGS += -fmax-errors=$(maxErrors) # for g++ -# CXXFLAGS += -lX11 -lm # CXXFLAGS += -ferror-limit=$(maxErrors) # uncomment for clang CXXFLAGS += -Wno-sign-compare CXXFLAGS += -Wno-unused-parameter @@ -117,7 +115,6 @@ getField = $(shell node -p "require('$(CONFIG_PATH)').cpp_server.$1") port = $(call getField,PORT) host = $(call getField,HOST) boost = $(call getField,BOOST_ROOT) -cpprest = /lib/x86_64-linux-gnu/libcpprest.so.2.10 CPPFLAGS += -isystem $(boost)/include @@ -129,7 +126,7 @@ serve: $(server) | $(dirTrain) $(dirPredict) $< $(host) $(port) $(server): $(serverMain) $(serverObjects) $(coreArchive) | checkboost $(dirBin) - $(LINK.cpp) -L $(boost)/lib -L $(cpprest) $^ -o $@ -lX11 -lcpprest -lssl -lcrypto -lpng + $(LINK.cpp) -L $(boost)/lib $^ -o $@ -lX11 -lpng -lcpprest -lssl -lcrypto ### TESTS ### @@ -165,7 +162,7 @@ checkboost: fi; \ fi -$(dirPredict) $(dirTrain): $(dirModelData) ; mkdir $@ +$(dirPredict) $(dirTrain): $(dirModelData) ; mkdir -p $@ $(dirModelData) $(dirBuild) $(dirBin): ; mkdir $@ From 2ff406667a21d65db4d8d2de8f3afe295bf9de83 Mon Sep 17 00:00:00 2001 From: MaxVorosh Date: Sun, 10 Dec 2023 09:42:56 -0500 Subject: [PATCH 05/23] Add load possibility for zip --- server/Makefile | 3 ++- server/api/server.cpp | 55 ++++++++++++++++++++++++++++++++++++++----- 2 files changed, 51 insertions(+), 7 deletions(-) diff --git a/server/Makefile b/server/Makefile index 02af06b4..4f915b2a 100644 --- a/server/Makefile +++ b/server/Makefile @@ -126,7 +126,8 @@ serve: $(server) | $(dirTrain) $(dirPredict) $< $(host) $(port) $(server): $(serverMain) $(serverObjects) $(coreArchive) | checkboost $(dirBin) - $(LINK.cpp) -L $(boost)/lib $^ -o $@ -lX11 -lpng -lcpprest -lssl -lcrypto +$(LINK.cpp) -L $(boost)/lib $^ -o $@ -lX11 -lpng -lcpprest -lssl -lcrypto -lzip +$(LINK.cpp) -L $(boost)/lib $^ -o $@ -lX11 -lpng -lzip ### TESTS ### diff --git a/server/api/server.cpp b/server/api/server.cpp index 5950b7a3..8cc1f8d3 100644 --- a/server/api/server.cpp +++ b/server/api/server.cpp @@ -10,8 +10,10 @@ #include #include #include +#include #include +#include "zip.h" #include "GraphBuilder.h" #include "CsvLoader.h" @@ -19,11 +21,11 @@ using namespace std; using namespace crow; std::string getDataPath(int id) { - return "./model_data/data/" + std::to_string(id) + ".csv"; + return "./model_data/data/" + std::to_string(id); } std::string getPredictPath(int id) { - return "./model_data/predict/" + std::to_string(id) + ".csv"; + return "./model_data/predict/" + std::to_string(id); } web::json::value GetLogs(const std::optional& node) { @@ -116,6 +118,27 @@ void predict(int model_id, Graph* graph, std::vector& answer) { } } +void extract_from_zip(std::string path, std::string root) { + zip_t* z; + int err; + z = zip_open(path.c_str(), 0, &err); + if (z == nullptr) { + throw std::runtime_error("File doesn't exist"); + } + zip_stat_t info; + for (int i = 0; i < zip_get_num_files(z); ++i) { + if (zip_stat_index(z, i, 0, &info) == 0) { + ofstream fout(root + "/" + info.name, ios::binary); + zip_file* file = zip_fopen_index(z, i, 0); + char file_data[info.size]; + zip_fread(file, file_data, info.size); + fout.write(file_data, info.size); + fout.close(); + } + } + std::filesystem::remove(path); +} + void invalidArgs() { cout << "Usage: ./server " << endl; exit(1); @@ -159,17 +182,29 @@ int main(int argc, char *argv[]) { return response(status::OK, "done"); }); - //curl -X POST -F "InputFile=@filename" http://0.0.0.0:2000/upload_data/1/0 (last can be 1) - CROW_ROUTE(app, "/upload_data//").methods(HTTPMethod::Post) - ([&](const request& req, int model_id, int type) -> response { + // curl -X POST -F "InputFile=@filename" http://0.0.0.0:2000/upload_data/1/0/0 + // Second argument is for request type (train or predict), third - for file type (csv or zip) + CROW_ROUTE(app, "/upload_data///").methods(HTTPMethod::Post) + ([&](const request& req, int model_id, int type, int file_type) -> response { crow::multipart::message file_message(req); - std::string path; + std::string path, root; if (type == 0) { path = getDataPath(model_id); } else { path = getPredictPath(model_id); } + root = path; + if (std::filesystem::exists(path)) { + std::filesystem::remove_all(path); + } + std::filesystem::create_directory(path); + if (file_type == 0) { + path += "/1.csv"; + } + else { + path += "/1.zip"; + } std::ofstream out_file(path); if (!out_file) { return response(status::INTERNAL_SERVER_ERROR, "Failed to open file for storage"); @@ -180,6 +215,14 @@ int main(int argc, char *argv[]) { } out_file << (*content).second.body; out_file.close(); + if (file_type != 0) { + try { + extract_from_zip(path, root); + } + catch (...) { + return response(status::INTERNAL_SERVER_ERROR, "Error in extracting from zip"); + } + } return crow::response(status::OK, "done"); }); From 60ec524053497eb25c1a02995426e6f1627235cb Mon Sep 17 00:00:00 2001 From: MaxVorosh Date: Thu, 14 Dec 2023 10:16:38 -0500 Subject: [PATCH 06/23] Add load possibility for png on predict --- server/api/server.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/server/api/server.cpp b/server/api/server.cpp index 8cc1f8d3..fa835dfe 100644 --- a/server/api/server.cpp +++ b/server/api/server.cpp @@ -202,9 +202,12 @@ int main(int argc, char *argv[]) { if (file_type == 0) { path += "/1.csv"; } - else { + else if (type == 0) { path += "/1.zip"; } + else { + path += "/1.png"; + } std::ofstream out_file(path); if (!out_file) { return response(status::INTERNAL_SERVER_ERROR, "Failed to open file for storage"); @@ -215,7 +218,7 @@ int main(int argc, char *argv[]) { } out_file << (*content).second.body; out_file.close(); - if (file_type != 0) { + if (file_type != 0 && type == 0) { try { extract_from_zip(path, root); } From b16e7af81604a391e0cff3873962a989208a5f19 Mon Sep 17 00:00:00 2001 From: Voroshilov Maksim <47945698+MaxVorosh@users.noreply.github.com> Date: Wed, 13 Dec 2023 22:00:17 +0300 Subject: [PATCH 07/23] GRA-122: Data loader implementation (#67) Data loader implementation --- server/api/CsvLoader.cpp | 20 +++++++ server/api/CsvLoader.h | 1 + server/api/DataLoader.cpp | 78 ++++++++++++++++++++++++++ server/api/DataLoader.h | 23 ++++++++ server/api/DataMarker.cpp | 56 ++++++++++++++++++ server/api/DataMarker.h | 21 +++++++ server/api/ImageLoader.cpp | 7 ++- server/api/ImageLoader.h | 3 +- server/api/UnshuffledCsvLoader.cpp | 33 +++++++++++ server/api/UnshuffledCsvLoader.h | 18 ++++++ server/api/UnshuffledDataLoader.h | 16 ++++++ server/api/UnshuffledImgLoader.cpp | 42 ++++++++++++++ server/api/UnshuffledImgLoader.h | 18 ++++++ server/tests/DataMarkerTests.cpp | 76 +++++++++++++++++++++++++ server/tests/data/1/black_pixel.png | Bin 0 -> 120 bytes server/tests/data/1/labels.csv | 5 ++ server/tests/data/1/lazure_pixel.png | Bin 0 -> 120 bytes server/tests/data/1/picture.png | Bin 0 -> 143 bytes server/tests/data/1/traffic_light.png | Bin 0 -> 126 bytes server/tests/data/1/white_pixel.png | Bin 0 -> 120 bytes 20 files changed, 415 insertions(+), 2 deletions(-) create mode 100644 server/api/DataLoader.cpp create mode 100644 server/api/DataLoader.h create mode 100644 server/api/DataMarker.cpp create mode 100644 server/api/DataMarker.h create mode 100644 server/api/UnshuffledCsvLoader.cpp create mode 100644 server/api/UnshuffledCsvLoader.h create mode 100644 server/api/UnshuffledDataLoader.h create mode 100644 server/api/UnshuffledImgLoader.cpp create mode 100644 server/api/UnshuffledImgLoader.h create mode 100644 server/tests/DataMarkerTests.cpp create mode 100644 server/tests/data/1/black_pixel.png create mode 100644 server/tests/data/1/labels.csv create mode 100644 server/tests/data/1/lazure_pixel.png create mode 100644 server/tests/data/1/picture.png create mode 100644 server/tests/data/1/traffic_light.png create mode 100644 server/tests/data/1/white_pixel.png diff --git a/server/api/CsvLoader.cpp b/server/api/CsvLoader.cpp index bd5970b3..98f75724 100644 --- a/server/api/CsvLoader.cpp +++ b/server/api/CsvLoader.cpp @@ -21,3 +21,23 @@ std::vector> CsvLoader::load_csv(std::string path) { } return result; } + +std::vector> CsvLoader::load_labels(std::string path) { + std::ifstream fin(path); + if (!fin) { + throw std::runtime_error("No such csv file in directory"); + } + std::string line; + std::vector> result; + getline(fin, line); + while (!line.empty()) { + std::stringstream line_stream(line); + std::string file; + std::string label; + getline(line_stream, file, ','); + getline(line_stream, label, ','); + result.push_back({file, std::stof(label)}); + getline(fin, line); + } + return result; +} \ No newline at end of file diff --git a/server/api/CsvLoader.h b/server/api/CsvLoader.h index a9890d89..1d597ef1 100644 --- a/server/api/CsvLoader.h +++ b/server/api/CsvLoader.h @@ -6,4 +6,5 @@ class CsvLoader { public: static std::vector> load_csv(std::string path); + static std::vector> load_labels(std::string path); }; diff --git a/server/api/DataLoader.cpp b/server/api/DataLoader.cpp new file mode 100644 index 00000000..9c643ed4 --- /dev/null +++ b/server/api/DataLoader.cpp @@ -0,0 +1,78 @@ +#include +#include +#include +#include "DataLoader.h" +#include "Blob.h" +#include "Allocator.h" + +void generate_rearrangement(std::vector& rearrangement, std::size_t size) { + rearrangement.resize(size); + for (int i = 0; i < rearrangement.size(); ++i) { + rearrangement[i] = i; + } + // Some shuffle magic from StackOverflow + auto rng = std::default_random_engine { 32 }; + std::shuffle(rearrangement.begin(), rearrangement.end(), rng); +} + +DataLoader::DataLoader(UnshuffledDataLoader* _loader, std::size_t _batch_size): loader(_loader), batch_size(_batch_size) { + generate_rearrangement(rearrangement, loader->size()); +} + +DataLoader::DataLoader(UnshuffledDataLoader* _loader, std::size_t _batch_size, std::string path): loader(_loader), batch_size(_batch_size) { + loader->load_data(path); + generate_rearrangement(rearrangement, loader->size()); +} + +void DataLoader::load_data(std::string path) { + loader->load_data(path); +} + +std::pair> DataLoader::operator[](std::size_t index) const { // batch_size lines from index + if (index >= loader->size()) { + throw std::out_of_range("Index out of range"); + } + auto data = get_raw(index); + Shape shape = loader->get_appropriate_shape(index, batch_size); + return {Blob::constBlob(shape, data.first.data()), data.second}; +} + +std::size_t DataLoader::size() const { + return loader->size(); +} + +void DataLoader::add_data(const DataLoader& other, int index) { + loader->add_data(other.loader, index); +} + +std::pair, std::vector> DataLoader::get_raw(std::size_t index) const { // batch_size lines from index + if (index >= loader->size()) { + throw std::out_of_range("Index out of range"); + } + std::vector data; + std::vector res(batch_size, 0); + Shape shape = loader->get_appropriate_shape(rearrangement[index], batch_size); + auto dims = shape.getDims(); + int data_size = 1; + for (int i = 0; i < dims.size(); ++i) { + data_size *= dims[i]; + } + data.resize(data_size, 0); + int cur_data = 0; + for (int i = index; i < index + batch_size; ++i) { + if (i >= loader->size()) { + break; + } + auto line = loader->get_raw(rearrangement[i]); + res[i - index] = line.second; + for (int j = 0; j < line.first.size(); ++j) { + data[cur_data] = line.first[j]; + cur_data++; + } + } + return {data, res}; +} + +void DataLoader::shuffle() { + generate_rearrangement(rearrangement, loader->size()); +} diff --git a/server/api/DataLoader.h b/server/api/DataLoader.h new file mode 100644 index 00000000..a10582cb --- /dev/null +++ b/server/api/DataLoader.h @@ -0,0 +1,23 @@ +#pragma once + +#include "UnshuffledDataLoader.h" +#include + +void generate_rearrangement(std::vector& rearrangement, std::size_t size); + +class DataLoader { +private: + UnshuffledDataLoader* loader; + std::vector rearrangement; + std::size_t batch_size; +public: + DataLoader() = default; + DataLoader(UnshuffledDataLoader* _loader, std::size_t _batch_size); + DataLoader(UnshuffledDataLoader* _loader, std::size_t _batch_size, std::string path); + void load_data(std::string path); + std::pair> operator[](std::size_t index) const; + void add_data(const DataLoader& other, int index); + std::size_t size() const; + std::pair, std::vector> get_raw(std::size_t index) const; + void shuffle(); +}; diff --git a/server/api/DataMarker.cpp b/server/api/DataMarker.cpp new file mode 100644 index 00000000..be37d5cd --- /dev/null +++ b/server/api/DataMarker.cpp @@ -0,0 +1,56 @@ +#include +#include "DataMarker.h" +#include "UnshuffledCsvLoader.h" +#include "UnshuffledImgLoader.h" +#include "Blob.h" + +DataMarker::DataMarker(std::string path, FileExtension type, int percentage_for_train, std::size_t batch_size) { + if (percentage_for_train > 100 || percentage_for_train < 0) { + throw std::logic_error("Wrong percentage"); + } + DataLoader file_loader; + UnshuffledDataLoader* file_unshuffled_loader; + if (type == FileExtension::Csv) { + file_unshuffled_loader = new UnshuffledCsvLoader; + train_unshuffled_loader = new UnshuffledCsvLoader; + check_unshuffled_loader = new UnshuffledCsvLoader; + } + else if (type == FileExtension::Png) { + file_unshuffled_loader = new UnshuffledImgLoader; + train_unshuffled_loader = new UnshuffledImgLoader; + check_unshuffled_loader = new UnshuffledImgLoader; + } + else { + throw std::logic_error("Unsupported type"); + } + file_loader = DataLoader(file_unshuffled_loader, batch_size, path); + std::vector rearrangement; + generate_rearrangement(rearrangement, file_loader.size()); + train_loader = DataLoader(train_unshuffled_loader, batch_size); + check_loader = DataLoader(check_unshuffled_loader, batch_size); + int instances_for_train = percentage_for_train * (file_loader.size()) / 100; + for (int i = 0; i < file_loader.size(); ++i) { + if (i < instances_for_train) { + train_loader.add_data(file_loader, rearrangement[i]); + } + else { + check_loader.add_data(file_loader, rearrangement[i]); + } + } + train_loader.shuffle(); + check_loader.shuffle(); + delete file_unshuffled_loader; +} + +DataMarker::~DataMarker() { + delete train_unshuffled_loader; + delete check_unshuffled_loader; +} + +DataLoader DataMarker::get_check_loader() { + return check_loader; +} + +DataLoader DataMarker::get_train_loader() { + return train_loader; +} \ No newline at end of file diff --git a/server/api/DataMarker.h b/server/api/DataMarker.h new file mode 100644 index 00000000..4beff113 --- /dev/null +++ b/server/api/DataMarker.h @@ -0,0 +1,21 @@ +#pragma once + +#include +#include "UnshuffledDataLoader.h" +#include "DataLoader.h" + +enum class FileExtension {Csv, Png}; + +class DataMarker { +private: + UnshuffledDataLoader* train_unshuffled_loader; + DataLoader train_loader; + UnshuffledDataLoader* check_unshuffled_loader; + DataLoader check_loader; +public: + DataMarker() = default; + DataMarker(std::string path, FileExtension file_type, int percentage_for_train, std::size_t batch_size); + ~DataMarker(); + DataLoader get_train_loader(); + DataLoader get_check_loader(); +}; diff --git a/server/api/ImageLoader.cpp b/server/api/ImageLoader.cpp index a6201421..2aaa6ac9 100644 --- a/server/api/ImageLoader.cpp +++ b/server/api/ImageLoader.cpp @@ -1,6 +1,6 @@ #include "ImageLoader.h" -std::vector ImageLoader::load_image(char* path) { +std::vector ImageLoader::load_image(const char* path) { cimg_library::CImg image(path); return get_pixels(image); } @@ -22,4 +22,9 @@ std::vector ImageLoader::get_pixels(cimg_library::CImg img } } return ans; +} + +std::pair ImageLoader::get_size(const char *path) { + cimg_library::CImg image(path); + return {image.width(), image.height()}; } \ No newline at end of file diff --git a/server/api/ImageLoader.h b/server/api/ImageLoader.h index ca57561b..8fd09733 100644 --- a/server/api/ImageLoader.h +++ b/server/api/ImageLoader.h @@ -7,6 +7,7 @@ class ImageLoader { public: - static std::vector load_image(char* path); + static std::vector load_image(const char* path); static std::vector get_pixels(cimg_library::CImg); + static std::pair get_size(const char* path); }; diff --git a/server/api/UnshuffledCsvLoader.cpp b/server/api/UnshuffledCsvLoader.cpp new file mode 100644 index 00000000..68c6a3f7 --- /dev/null +++ b/server/api/UnshuffledCsvLoader.cpp @@ -0,0 +1,33 @@ +#include +#include "UnshuffledCsvLoader.h" +#include "CsvLoader.h" + +void UnshuffledCsvLoader::load_data(std::string path) { + data.clear(); + auto file_data = CsvLoader::load_csv(path); + data.resize(file_data.size()); + for (int i = 0; i < file_data.size(); ++i) { + float result = file_data[i].back(); + file_data[i].pop_back(); + data[i] = {file_data[i], result}; + } +} + +void UnshuffledCsvLoader::add_data(const UnshuffledDataLoader* other, int index) { + data.push_back(other->get_raw(index)); +} + +std::size_t UnshuffledCsvLoader::size() const { + return data.size(); +} + +std::pair, float> UnshuffledCsvLoader::get_raw(std::size_t index) const { + if (index >= data.size()) { + throw std::out_of_range("Index out of range"); + } + return {data[index].first, data[index].second}; +} + +Shape UnshuffledCsvLoader::get_appropriate_shape(std::size_t index, std::size_t batch_size) const { + return Shape({batch_size, data[index].first.size()}); +} \ No newline at end of file diff --git a/server/api/UnshuffledCsvLoader.h b/server/api/UnshuffledCsvLoader.h new file mode 100644 index 00000000..64b63279 --- /dev/null +++ b/server/api/UnshuffledCsvLoader.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include +#include "UnshuffledDataLoader.h" + + +class UnshuffledCsvLoader: public UnshuffledDataLoader { +private: + std::vector, float>> data; +public: + UnshuffledCsvLoader() = default; + void load_data(std::string path) override; + void add_data(const UnshuffledDataLoader* other, int index) override; + std::size_t size() const override; + std::pair, float> get_raw(std::size_t index) const override; + Shape get_appropriate_shape(std::size_t index, std::size_t batch_size) const override; +}; diff --git a/server/api/UnshuffledDataLoader.h b/server/api/UnshuffledDataLoader.h new file mode 100644 index 00000000..6e938111 --- /dev/null +++ b/server/api/UnshuffledDataLoader.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include +#include "Allocator.h" + +class UnshuffledDataLoader { +public: + UnshuffledDataLoader() = default; + virtual ~UnshuffledDataLoader() = default; + virtual void load_data(std::string path) = 0; + virtual void add_data(const UnshuffledDataLoader* other, int index) = 0; + virtual std::size_t size() const = 0; + virtual std::pair, float> get_raw(std::size_t index) const = 0; + virtual Shape get_appropriate_shape(std::size_t index, std::size_t batch_size) const = 0; +}; diff --git a/server/api/UnshuffledImgLoader.cpp b/server/api/UnshuffledImgLoader.cpp new file mode 100644 index 00000000..772bf959 --- /dev/null +++ b/server/api/UnshuffledImgLoader.cpp @@ -0,0 +1,42 @@ +#include "UnshuffledImgLoader.h" +#include "CsvLoader.h" +#include "Blob.h" +#include "Allocator.h" +#include "ImageLoader.h" +#include +#include + +void UnshuffledImgLoader::load_data(std::string path) { + for (auto const& dir_entry : std::filesystem::recursive_directory_iterator(path)) { + std::string file_path = dir_entry.path(); + if (file_path.size() >= 4 && file_path.substr(file_path.size() - 4, 4) == ".csv") { + data = CsvLoader::load_labels(file_path.c_str()); + break; + } + } + for (int i = 0; i < data.size(); ++i) { + data[i].first = path + "/" + data[i].first; + } +} + +Shape UnshuffledImgLoader::get_appropriate_shape(std::size_t index, std::size_t batch_size) const { + auto img_size = ImageLoader::get_size(data[index].first.c_str()); + return Shape({batch_size, 3, img_size.first, img_size.second}); +} + +std::pair, float> UnshuffledImgLoader::get_raw(std::size_t index) const { + if (index >= data.size()) { + throw std::out_of_range("Index out of range"); + } + std::string file_path = data[index].first; + float ans = data[index].second; + return {ImageLoader::load_image(file_path.c_str()), ans}; +} + +std::size_t UnshuffledImgLoader::size() const { + return data.size(); +} + +void UnshuffledImgLoader::add_data(const UnshuffledDataLoader* other, int index) { + data.push_back(reinterpret_cast(other)->data[index]); +} \ No newline at end of file diff --git a/server/api/UnshuffledImgLoader.h b/server/api/UnshuffledImgLoader.h new file mode 100644 index 00000000..2eb0753c --- /dev/null +++ b/server/api/UnshuffledImgLoader.h @@ -0,0 +1,18 @@ +#pragma once + +#include "UnshuffledDataLoader.h" +#include "Blob.h" +#include +#include + +class UnshuffledImgLoader: public UnshuffledDataLoader { +private: + std::vector> data; +public: + UnshuffledImgLoader() = default; + void load_data(std::string path) override; // path to folder + void add_data(const UnshuffledDataLoader* other, int index) override; + std::size_t size() const override; + std::pair, float> get_raw(std::size_t index) const override; + Shape get_appropriate_shape(std::size_t index, std::size_t batch_size) const override; +}; diff --git a/server/tests/DataMarkerTests.cpp b/server/tests/DataMarkerTests.cpp new file mode 100644 index 00000000..e4ba27a8 --- /dev/null +++ b/server/tests/DataMarkerTests.cpp @@ -0,0 +1,76 @@ +#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN +#include "doctest.h" +#include "DataMarker.h" +#include + +void check_vectors(std::vector, float>>& ans, std::vector, float>>& res) { + CHECK(ans.size() == res.size()); + for (int i = 0; i < ans.size(); ++i) { + CHECK(ans[i].first.size() == res[i].first.size()); + CHECK(ans[i].second == res[i].second); + for (int j = 0; j < ans[i].first.size(); ++j) { + CHECK(ans[i].first[j] == res[i].first[j]); + } + } +} + +TEST_CASE("Csv-test") { + SUBCASE("and-train") { + DataMarker loader = DataMarker("./tests/data/and-train.csv", FileExtension::Csv, 50, 1); + DataLoader for_train = loader.get_train_loader(); + DataLoader for_check = loader.get_check_loader(); + std::vector, float>> ans = {{{0, 0}, 0}, {{0, 1}, 0}, {{1, 0}, 0}, {{1, 1}, 1}}; + std::vector, float>> res; + CHECK(for_train.size() == 2); + CHECK(for_check.size() == 2); + for (int i = 0; i < 2; ++i) { + auto line1 = for_train.get_raw(i); + CHECK(line1.second.size() == 1); + res.push_back({line1.first, line1.second[0]}); + auto line2 = for_check.get_raw(i); + CHECK(line2.second.size() == 1); + res.push_back({line2.first, line2.second[0]}); + } + sort(res.begin(), res.end()); + check_vectors(ans, res); + } + SUBCASE("xor-train") { + DataMarker loader = DataMarker("./tests/data/xor-train.csv", FileExtension::Csv, 50, 1); + DataLoader for_train = loader.get_train_loader(); + DataLoader for_check = loader.get_check_loader(); + std::vector, float>> ans = {{{0, 0}, 0}, {{0, 1}, 1}, {{1, 0}, 1}, {{1, 1}, 0}}; + std::vector, float>> res; + CHECK(for_train.size() == 2); + CHECK(for_check.size() == 2); + for (int i = 0; i < 2; ++i) { + auto line1 = for_train.get_raw(i); + CHECK(line1.second.size() == 1); + res.push_back({line1.first, line1.second[0]}); + auto line2 = for_check.get_raw(i); + CHECK(line2.second.size() == 1); + res.push_back({line2.first, line2.second[0]}); + } + sort(res.begin(), res.end()); + check_vectors(ans, res); + } +} + +TEST_CASE("Image-test") { + DataMarker loader = DataMarker("./tests/data/1", FileExtension::Png, 80, 1); + DataLoader for_train = loader.get_train_loader(); + DataLoader for_check = loader.get_check_loader(); + std::vector, float>> ans = {{{255, 255, 255}, 0}, {{0, 0, 0}, 0}, {{159, 252, 253}, 0}, {{255, 255, 0, 0, 255, 255, 0, 0, 0}, 1}, {{0, 255, 100, 153, 136, 255, 0, 174, 100, 217, 0, 255, 0, 201, 100, 234, 21, 255}, 1}}; + std::vector, float>> res; + CHECK(for_train.size() == 4); + CHECK(for_check.size() == 1); + for (int i = 0; i < 4; ++i) { + auto line = for_train.get_raw(i); + CHECK(line.second.size() == 1); + res.push_back({line.first, line.second[0]}); + } + CHECK(for_check.get_raw(0).second.size() == 1); + res.push_back({for_check.get_raw(0).first, for_check.get_raw(0).second[0]}); + sort(res.begin(), res.end()); + sort(ans.begin(), ans.end()); + check_vectors(ans, res); +} \ No newline at end of file diff --git a/server/tests/data/1/black_pixel.png b/server/tests/data/1/black_pixel.png new file mode 100644 index 0000000000000000000000000000000000000000..0279819e92a4b9651ea106eaaf6ad25f15455f18 GIT binary patch literal 120 zcmeAS@N?(olHy`uVBq!ia0vp^j3CUx1|;Q0k92}K#X;^)4C~IxyaaMs(j9#r85lP9 zbN@+X1@buyJR*x37=%hdnDJhkd<9UD*VDx@L?S#nAtB)hKLaBRBSV2gDmPGq!PC{x JWt~$(697xK7`p%f literal 0 HcmV?d00001 diff --git a/server/tests/data/1/labels.csv b/server/tests/data/1/labels.csv new file mode 100644 index 00000000..4d1e5d6a --- /dev/null +++ b/server/tests/data/1/labels.csv @@ -0,0 +1,5 @@ +picture.png, 1 +traffic_light.png, 1 +black_pixel.png, 0 +white_pixel.png, 0 +lazure_pixel.png, 0 diff --git a/server/tests/data/1/lazure_pixel.png b/server/tests/data/1/lazure_pixel.png new file mode 100644 index 0000000000000000000000000000000000000000..e914eb77d2514e2943aec9dd0fc22b5d07bda7d1 GIT binary patch literal 120 zcmeAS@N?(olHy`uVBq!ia0vp^j3CUx1|;Q0k92}K#X;^)4C~IxyaaMs(j9#r85lP9 zbN@+X1@buyJR*x37=%hdnDJhkd<9UD*VDx@L?S$S#{cK<>lrxmm}ex-sCNcRFnGH9 KxvXa~60+7Besim4Gngy)^j>prDGUi(`mHcydZY!Vmk;&$QW={eSIev?u&} hT7rkC=g=jJdFRaGZ^1y`?a}{ R`zBDD!PC{xWt~$(699r_A5s7S literal 0 HcmV?d00001 diff --git a/server/tests/data/1/white_pixel.png b/server/tests/data/1/white_pixel.png new file mode 100644 index 0000000000000000000000000000000000000000..b201b72e55464f60720eb1d283852b691b7de327 GIT binary patch literal 120 zcmeAS@N?(olHy`uVBq!ia0vp^j3CUx1|;Q0k92}K#X;^)4C~IxyaaMs(j9#r85lP9 zbN@+X1@buyJR*x37=%hdnDJhkd<9UD*VDx@L?S%-$N&HT>lrwIGyi3Ec0C1@VDNPH Kb6Mw<&;$Uh3mshm literal 0 HcmV?d00001 From 1621b6d7a2271bdf44ded5dc3a6cf27f1358e080 Mon Sep 17 00:00:00 2001 From: Ivan Shanygin <88805084+AntoxaBarin@users.noreply.github.com> Date: Thu, 14 Dec 2023 12:20:32 +0300 Subject: [PATCH 08/23] ID-154: Loss type selection (#70) * Add loss type selection * Add loss type selection * Remove layer-class loss * Clean up Loss type * Make format --- client/templates/main.html | 124 ++++++++++++++++++++++++++++--------- 1 file changed, 95 insertions(+), 29 deletions(-) diff --git a/client/templates/main.html b/client/templates/main.html index b243b9da..f7fcc3a0 100644 --- a/client/templates/main.html +++ b/client/templates/main.html @@ -180,7 +180,10 @@

- @@ -562,7 +579,6 @@

linear_bias_selector.addEventListener("change", () => { setModelView("irrelevant") const parameters = getParametersFromNode(last_selected_node_id) - console.log("last id: " + last_selected_layer_id) const new_value = document.getElementById("bias-parameter").value for (i in parameters) { @@ -584,7 +600,6 @@

linear_inFeatures_input.addEventListener("change", () => { setModelView("irrelevant") const parameters = getParametersFromNode(last_changed_parameters_layer_id) - console.log("last id: " + last_selected_layer_id) const new_value = document.getElementById("linear-inFeatures-parameter").value for (i in parameters) { @@ -604,7 +619,6 @@

linear_outFeatures_input.addEventListener("change", () => { setModelView("irrelevant") const parameters = getParametersFromNode(last_changed_parameters_layer_id) - console.log("last id: " + last_selected_layer_id) const new_value = document.getElementById("linear-outFeatures-parameter").value for (i in parameters) { @@ -624,7 +638,6 @@

special_width_input.addEventListener("change", () => { setModelView("irrelevant") const parameters = getParametersFromNode(last_changed_parameters_layer_id) - console.log("last id: " + last_selected_layer_id) const new_value = document.getElementById("special-width-parameter").value for (i in parameters) { @@ -640,6 +653,21 @@

last_changed_parameters_layer_id = last_selected_node_id }) + const loss_type_selector = document.getElementById("loss-type-parameter") + loss_type_selector.addEventListener("change", () => { + setModelView("irrelevant") + const parameters = getParametersFromNode(last_selected_node_id) + const new_value = loss_type_selector.value + + for (i in parameters) { + if (parameters[i][0] == "type") { + parameters[i][1] = new_value + joinUpdatedLayerParameters(parameters, last_selected_node_id) + break + } + } + }) + /* DRAG EVENT */ /* Mouse and Touch Actions */ let elements = document.getElementsByClassName("drag-drawflow") @@ -1142,13 +1170,16 @@

pathsWithWrittenID = [] } - function getSortableInputList(layerClass) { - switch (layerClass) { + function getSortableInputList(layer) { + switch (layer.class) { case "linear": return document.querySelector("#linear-inputs-sortable-list") case "relu": return document.querySelector("#relu-inputs-sortable-list") case "special": + if (layer.name == "Loss") { + return document.querySelector("#loss-inputs-sortable-list") + } return document.querySelector("#special-inputs-sortable-list") } } @@ -1168,7 +1199,7 @@

// Input selection writeIdOnConnections(correct_id) - let sortable_list = getSortableInputList(node.class) + let sortable_list = getSortableInputList(node) let child = sortable_list.lastElementChild while (child) { @@ -1230,42 +1261,77 @@

}) document.getElementById("layer-data").style.display = "block" - document.getElementById("layer-id").innerHTML = "ID: " + layer_dbid.toString() + setupLayerParameters(node, correct_id) + } - const layer_parameters = getParametersFromNode(correct_id) + function setupLayerParameters(layer, layer_id) { + let layer_parameters = getParametersFromNode(layer_id) - if (node.class == "linear") { - document.querySelector("#linear-layer-parameters").style.display = "block" + switch (layer.class) { + case "linear": + document.querySelector("#linear-layer-parameters").style.display = "block" - for (i in layer_parameters) { - if (layer_parameters[i][0] == "bias") { - document.querySelector("#bias-parameter").value = layer_parameters[i][1] == "1" - } - if (layer_parameters[i][0] == "inFeatures") { - document.querySelector("#linear-inFeatures-parameter").value = layer_parameters[i][1] + for (i in layer_parameters) { + switch (layer_parameters[i][0]) { + case "bias": + document.querySelector("#bias-parameter").value = layer_parameters[i][1] == "1" + break + case "inFeatures": + document.querySelector("#linear-inFeatures-parameter").value = layer_parameters[i][1] + break + case "outFeatures": + document.querySelector("#linear-outFeatures-parameter").value = layer_parameters[i][1] + break + } } - if (layer_parameters[i][0] == "outFeatures") { - document.querySelector("#linear-outFeatures-parameter").value = layer_parameters[i][1] + break + + case "special": + if (layer.name == "Loss") { + document.querySelector("#loss-layer-parameters").style.display = "block" + for (i in layer_parameters) { + switch (layer_parameters[i][0]) { + case "width": + document.querySelector("#loss-width-parameter").value = layer_parameters[i][1] + break + case "type": + document.querySelector("#loss-type-parameter").value = layer_parameters[i][1] + break + } + } + } else { + document.querySelector("#special-layer-parameters").style.display = "block" + for (i in layer_parameters) { + switch (layer_parameters[i][0]) { + case "width": + document.querySelector("#special-width-parameter").value = layer_parameters[i][1] + break + case "type": + document.querySelector("#loss-type-parameter").value = layer_parameters[i][1] + break + } + } } - } - } else if (node.class == "special") { - document.querySelector("#special-layer-parameters").style.display = "block" + break - for (i in layer_parameters) { - if (layer_parameters[i][0] == "width") { - document.querySelector("#special-width-parameter").value = layer_parameters[i][1] + case "relu": + document.querySelector("#relu-layer-parameters").style.display = "block" + for (i in layer_parameters) { + if (layer_parameters[i][0] == "width") { + document.querySelector("#relu-width-parameter").value = layer_parameters[i][1] + } } - } - } else if (node.class == "relu") { - document.querySelector("#relu-layer-parameters").style.display = "block" + break } } + function hideLayerInfo() { document.getElementById("layer-data").style.display = "none" document.querySelector("#linear-layer-parameters").style.display = "none" document.querySelector("#special-layer-parameters").style.display = "none" document.querySelector("#relu-layer-parameters").style.display = "none" + document.querySelector("#loss-layer-parameters").style.display = "none" deleteIdOnConnections() } From aefce3adc7de94b6c54da2e2a03ead18d453f8e4 Mon Sep 17 00:00:00 2001 From: Ivan Shanygin <88805084+AntoxaBarin@users.noreply.github.com> Date: Thu, 14 Dec 2023 12:47:18 +0300 Subject: [PATCH 09/23] ID-171: Fix input selection (#69) * Fix input selection * Clean up fix input selection --- client/scripts/storage.js | 4 ++ client/templates/main.html | 76 +++++++++++++++++++++++--------------- 2 files changed, 50 insertions(+), 30 deletions(-) diff --git a/client/scripts/storage.js b/client/scripts/storage.js index df5f10c4..bb149e5d 100644 --- a/client/scripts/storage.js +++ b/client/scripts/storage.js @@ -14,3 +14,7 @@ let last_selected_node_id // layer's drawflow id let last_changed_parameters_layer_id let layer_drag_offset_X = 0 let layer_drag_offset_Y = 0 + +let is_id_on_paths = false // Is there written id's on connections +let input_selection_layer_id +let input_selection_layer_dbid diff --git a/client/templates/main.html b/client/templates/main.html index f7fcc3a0..e66c979c 100644 --- a/client/templates/main.html +++ b/client/templates/main.html @@ -440,7 +440,6 @@

} } click(e) { - deleteIdOnConnections() hideLayerInfo() super.click(e) } @@ -538,7 +537,7 @@

if (response.ok) { editor.updateNodeDataFromId(connection.input_id, { DBID: data2.DBID, - Connections: data2.Connections.concat(`${data1.DBID};${connection.output_id}`), // "db_id;drawflow_id" + Connections: [].concat(`${data1.DBID};${connection.output_id}`, data2.Connections), // "db_id;drawflow_id" Parameters: data2.Parameters, }) } else { @@ -1160,14 +1159,20 @@

const tmp = addLabelText(htmlConnection, connection.split(";")[0]) // Writes id on connection pathsWithWrittenID.push(tmp) } + is_id_on_paths = true } // Remove id written on connection function deleteIdOnConnections() { for (let path of pathsWithWrittenID) { - path.setAttribute("display", "none") + try { + path.setAttribute("display", "none") + } catch (e) { + console.error(e) + } } pathsWithWrittenID = [] + is_id_on_paths = false } function getSortableInputList(layer) { @@ -1187,7 +1192,36 @@

function hideResult() { document.getElementById("predict-result-wrapper").style.display = "none" } + + function matchConnectionsAfterSelection(new_input_order) { + let newConnections = [] + let connections = editor.getNodeFromId(input_selection_layer_id).data.Connections + for (let input of new_input_order) { + for (let connection of connections) { + if (input == connection.split(";")[0]) { + newConnections.push(connection) + } + } + } + let new_inputs_object = { + new_parents: new_input_order, + layer_id: input_selection_layer_dbid, + } + updateParentOrder(new_inputs_object).then(response => { + if (response.ok) { + editor.updateNodeDataFromId(input_selection_layer_id, { + DBID: input_selection_layer_dbid, + Connections: newConnections, + Parameters: editor.getNodeFromId(input_selection_layer_id).data.Parameters, + }) + } else { + errorNotification("Failed to update input order.\n Server is not responding now.") + } + }) + } + function showLayerInfo(id) { + hideLayerInfo() // id format "node-" let correct_id = id if (id.includes("node")) { @@ -1196,10 +1230,13 @@

const node = editor.getNodeFromId(correct_id) const connections = editor.getNodeFromId(correct_id).data.Connections const layer_dbid = editor.getNodeFromId(correct_id).data.DBID + input_selection_layer_id = correct_id + input_selection_layer_dbid = layer_dbid // Input selection - writeIdOnConnections(correct_id) - let sortable_list = getSortableInputList(node) + writeIdOnConnections(input_selection_layer_id) + let sortable_list = getSortableInputList(node.class) + let child = sortable_list.lastElementChild while (child) { @@ -1233,30 +1270,7 @@

continue } } - - let newConnections = [] - for (let i = 0; i < new_input_order.length; i++) { - for (let connection of connections) { - if (new_input_order[i] == connection.split(";")[0]) { - newConnections[i] = connection - } - } - } - let new_inputs_object = { - new_parents: new_input_order, - layer_id: layer_dbid, - } - updateParentOrder(new_inputs_object).then(response => { - if (response.ok) { - editor.updateNodeDataFromId(correct_id, { - DBID: layer_dbid, - Connections: newConnections, - Parameters: editor.getNodeFromId(correct_id).data.Parameters, - }) - } else { - errorNotification("Failed to update input order.\n Server is not responding now.") - } - }) + matchConnectionsAfterSelection(new_input_order) }, }) @@ -1333,7 +1347,9 @@

document.querySelector("#relu-layer-parameters").style.display = "none" document.querySelector("#loss-layer-parameters").style.display = "none" - deleteIdOnConnections() + if (is_id_on_paths) { + deleteIdOnConnections() + } } function addLabelText(path, labelText) { From 8303c909d15e5539c74acb024ddb664a1e00c20e Mon Sep 17 00:00:00 2001 From: MaxVorosh Date: Thu, 14 Dec 2023 11:55:41 -0500 Subject: [PATCH 10/23] Change train and predict for zip file case --- server/api/server.cpp | 58 +++++++++++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 13 deletions(-) diff --git a/server/api/server.cpp b/server/api/server.cpp index fa835dfe..e64707fb 100644 --- a/server/api/server.cpp +++ b/server/api/server.cpp @@ -16,6 +16,8 @@ #include "zip.h" #include "GraphBuilder.h" #include "CsvLoader.h" +#include "ImageLoader.h" +#include "DataMarker.h" using namespace std; using namespace crow; @@ -40,10 +42,28 @@ web::json::value GetLogs(const std::optional& node) { return web::json::value::array(values); } -void train(json::rvalue& json, Graph** graph, int model_id, int user_id) { +void train(json::rvalue& json, Graph** graph, int model_id, int user_id, FileExtension extension) { RandomObject initObject(0, 1, 42); OptimizerBase SGD = OptimizerBase(0.1); - std::vector> data = CsvLoader::load_csv(getDataPath(model_id)); + + // Should be adopted for DataLoader possibilities + std::string path = getDataPath(model_id); + if (extension == FileExtension::Csv) { + path += "/1.csv"; + } + DataMarker dataMarker = DataMarker(path, extension, 100, 1); + DataLoader dataLoader = dataMarker.get_train_loader(); + std::vector> data; + for (int i = 0; i < dataLoader.size(); ++i) { + auto p = dataLoader.get_raw(i); + p.first.push_back(p.second[0]); + for (auto x: p.first) { + std::cout << x << ' '; + } + std::cout << std::endl; + data.push_back(p.first); + } + *graph = new Graph(); (*graph)->Initialize(json, data, &initObject, SGD); std::cout << "Graph is ready!" << std::endl; @@ -100,8 +120,14 @@ void train(json::rvalue& json, Graph** graph, int model_id, int user_id) { } } -void predict(int model_id, Graph* graph, std::vector& answer) { - std::vector> predict_data = CsvLoader::load_csv(getPredictPath(model_id)); +void predict(int model_id, Graph* graph, std::vector& answer, FileExtension extension) { + std::vector> predict_data; + if (extension == FileExtension::Csv) { + predict_data = CsvLoader::load_csv(getPredictPath(model_id) + "/1.csv"); + } + else { + predict_data = {ImageLoader::load_image((getPredictPath(model_id) + "/1.png").c_str())}; + } graph->ChangeInputData(predict_data[0]); // Пока не думаем о нескольких выходах (!) Hard-coded @@ -149,20 +175,18 @@ int main(int argc, char *argv[]) { SimpleApp app; std::map sessions; + std::map file_types; CROW_ROUTE(app, "/predict/").methods(HTTPMethod::POST) ([&](const request& req, int model_id) -> response { if (sessions.find(model_id) == sessions.end()) return response(status::METHOD_NOT_ALLOWED, "Not trained"); std::vector answer; try { - predict(model_id, sessions[model_id], answer); + predict(model_id, sessions[model_id], answer, file_types[model_id]); } catch (const std::runtime_error &err) { return response(status::BAD_REQUEST, "Invalid body"); } - json::wvalue response; - for (int i = 0; i < answer.size(); ++i) { - response[i] = answer[i]; - } + json::wvalue response = answer[0]; return crow::response(status::OK, response); }); @@ -176,8 +200,12 @@ int main(int argc, char *argv[]) { delete sessions[model_id]; } Graph* g = nullptr; +<<<<<<< HEAD train(body, &g, user_id, model_id); +======= + train(body, &g, model_id, file_types[model_id]); +>>>>>>> 105c02e (Change train and predict for zip file case) sessions[model_id] = g; return response(status::OK, "done"); }); @@ -201,12 +229,16 @@ int main(int argc, char *argv[]) { std::filesystem::create_directory(path); if (file_type == 0) { path += "/1.csv"; - } - else if (type == 0) { - path += "/1.zip"; + file_types[model_id] = FileExtension::Csv; } else { - path += "/1.png"; + file_types[model_id] = FileExtension::Png; + if (type == 0) { + path += "/1.zip"; + } + else { + path += "/1.png"; + } } std::ofstream out_file(path); if (!out_file) { From 796559112b259df5b231625e67ed8d830ef47e75 Mon Sep 17 00:00:00 2001 From: lpetrov02 Date: Fri, 15 Dec 2023 11:10:28 +0300 Subject: [PATCH 11/23] Starts fixing train --- server/api/DataLoader.cpp | 23 +++++++++-------------- server/api/server.cpp | 8 ++------ 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/server/api/DataLoader.cpp b/server/api/DataLoader.cpp index 9c643ed4..23cad28e 100644 --- a/server/api/DataLoader.cpp +++ b/server/api/DataLoader.cpp @@ -38,39 +38,34 @@ std::pair> DataLoader::operator[](std::size_t index) co } std::size_t DataLoader::size() const { - return loader->size(); + return (loader->size() + batch_size - 1) / batch_size; } void DataLoader::add_data(const DataLoader& other, int index) { loader->add_data(other.loader, index); } -std::pair, std::vector> DataLoader::get_raw(std::size_t index) const { // batch_size lines from index - if (index >= loader->size()) { +std::pair, std::vector> DataLoader::get_raw(std::size_t batch_index) const { // batch_size lines from index + if (batch_index >= loader->size()) { throw std::out_of_range("Index out of range"); } std::vector data; std::vector res(batch_size, 0); - Shape shape = loader->get_appropriate_shape(rearrangement[index], batch_size); + Shape shape = loader->get_appropriate_shape(rearrangement[batch_index], batch_size); auto dims = shape.getDims(); - int data_size = 1; - for (int i = 0; i < dims.size(); ++i) { - data_size *= dims[i]; - } - data.resize(data_size, 0); + data.resize(dims.size(), 0); int cur_data = 0; - for (int i = index; i < index + batch_size; ++i) { + for (int i = batch_size * batch_index; i < (batch_size + 1) * batch_index; ++i) { if (i >= loader->size()) { break; } auto line = loader->get_raw(rearrangement[i]); - res[i - index] = line.second; + res[i - batch_size * batch_index] = line.second; for (int j = 0; j < line.first.size(); ++j) { - data[cur_data] = line.first[j]; - cur_data++; + data[cur_data++] = line.first[j]; } } - return {data, res}; + return {std::move(data), std::move(res)}; } void DataLoader::shuffle() { diff --git a/server/api/server.cpp b/server/api/server.cpp index e64707fb..2f96b234 100644 --- a/server/api/server.cpp +++ b/server/api/server.cpp @@ -51,7 +51,7 @@ void train(json::rvalue& json, Graph** graph, int model_id, int user_id, FileExt if (extension == FileExtension::Csv) { path += "/1.csv"; } - DataMarker dataMarker = DataMarker(path, extension, 100, 1); + DataMarker dataMarker = DataMarker(path, extension, 100, 4); DataLoader dataLoader = dataMarker.get_train_loader(); std::vector> data; for (int i = 0; i < dataLoader.size(); ++i) { @@ -200,12 +200,8 @@ int main(int argc, char *argv[]) { delete sessions[model_id]; } Graph* g = nullptr; -<<<<<<< HEAD - train(body, &g, user_id, model_id); -======= - train(body, &g, model_id, file_types[model_id]); ->>>>>>> 105c02e (Change train and predict for zip file case) + train(body, &g, model_id, user_id, file_types[model_id]); sessions[model_id] = g; return response(status::OK, "done"); }); From c1835c131a953703f8b89565d63ef8695f05df47 Mon Sep 17 00:00:00 2001 From: lpetrov02 Date: Fri, 15 Dec 2023 15:04:28 +0300 Subject: [PATCH 12/23] Fixes train with dataloader --- server/Makefile | 3 +- server/api/GraphBuilder.cpp | 59 ++++++++++++++++--------------------- server/api/GraphBuilder.h | 13 ++++---- server/api/Parser.cpp | 16 +++++----- server/api/Parser.h | 2 +- server/api/server.cpp | 38 +++++++++++++++++------- server/core/Layer.cpp | 7 +++++ server/core/Layer.h | 1 + 8 files changed, 76 insertions(+), 63 deletions(-) diff --git a/server/Makefile b/server/Makefile index 4f915b2a..b3f865fa 100644 --- a/server/Makefile +++ b/server/Makefile @@ -126,8 +126,7 @@ serve: $(server) | $(dirTrain) $(dirPredict) $< $(host) $(port) $(server): $(serverMain) $(serverObjects) $(coreArchive) | checkboost $(dirBin) -$(LINK.cpp) -L $(boost)/lib $^ -o $@ -lX11 -lpng -lcpprest -lssl -lcrypto -lzip -$(LINK.cpp) -L $(boost)/lib $^ -o $@ -lX11 -lpng -lzip + $(LINK.cpp) -L $(boost)/lib $^ -o $@ -lX11 -lpng -lcpprest -lssl -lcrypto -lzip ### TESTS ### diff --git a/server/api/GraphBuilder.cpp b/server/api/GraphBuilder.cpp index 244421be..d8eaf49a 100644 --- a/server/api/GraphBuilder.cpp +++ b/server/api/GraphBuilder.cpp @@ -2,12 +2,7 @@ #include "Allocator.h" void Graph::OverviewLayers(const crow::json::rvalue& layers, - const std::vector>& data, - std::unordered_map& layer_dicts, - std::unordered_map>& data_dicts) { - std::vector instances; - std::vector answers; - ParseCsvData(data, instances, answers); + std::unordered_map& layer_dicts) { for (auto& layer : layers) { CHECK_HAS_FIELD(layer, "id"); CHECK_HAS_FIELD(layer, "type"); @@ -16,14 +11,6 @@ void Graph::OverviewLayers(const crow::json::rvalue& layers, layerTypes_[id] = type; layer_dicts[id] = layer; - - if (type == "Data") { - data_dicts[id] = instances; // All data goes for all data layers. May be should be splitted by them - dataIds_.push_back(id); - } - if (type == "Target") { - data_dicts[id] = answers; - } } } @@ -88,9 +75,9 @@ void Graph::TopologySort(std::unordered_map>& edges, } void Graph::Initialize(crow::json::rvalue modelJson, - const std::vector>& data, - RandomObject* randomInit, - OptimizerBase& SGD) { + RandomObject* randomInit, + OptimizerBase& SGD, + size_t batch_size) { Allocator::startVirtualMode(); CHECK_HAS_FIELD(modelJson, "graph"); CHECK_HAS_FIELD(modelJson["graph"], "layers"); @@ -101,8 +88,7 @@ void Graph::Initialize(crow::json::rvalue modelJson, // Parse Jsons into dicts of Jsons std::unordered_map layerDicts; - std::unordered_map> dataDicts; - OverviewLayers(layersJson, data, layerDicts, dataDicts); + OverviewLayers(layersJson, layerDicts); std::unordered_map> straightEdges, reversedEdges; std::unordered_set entryNodes; @@ -134,13 +120,7 @@ void Graph::Initialize(crow::json::rvalue modelJson, } CHECK_HAS_FIELD(layerDicts[layer_id], "parameters"); Shape shape = ParseData(layerDicts[layer_id]["parameters"]); - if (shape.size() == 0 || shape.size() % dataDicts[layer_id].size() != 0) { - std::string message = "Object of size " + - std::to_string(dataDicts[layer_id].size()) + - " can not have shape Nx" + shape.toString(); - throw std::runtime_error(message); - } - layers_.emplace(layer_id, new DataLayer{shape, dataDicts[layer_id]}); + layers_.emplace(layer_id, new DataLayer{shape, batch_size}); } else if (type == "Output") { for (auto prevLayerId : reversedEdges[layer_id]) { lastPredictIds_.push_back(prevLayerId); @@ -154,19 +134,30 @@ void Graph::Initialize(crow::json::rvalue modelJson, } } -void Graph::ChangeInputData(std::vector data) { +void Graph::ChangeLayersData(std::vector data, BaseLayerType type) { // All data goes to every data layer. Should be changed? - for (int id : dataIds_) { + std::vector* layers = nullptr; + if (type == BaseLayerType::Data) { + layers = &dataIds_; + } else if (type == BaseLayerType::Targets) { + layers = &targetsIds_; + } else { + throw std::invalid_argument("Can change data only in 'Data' or 'Target' layers"); + } + for (int id : *layers) { DataLayer* layer = reinterpret_cast(layers_[id]); - size_t width = layer->result->output->shape.cols(); - if (data.size() % width != 0) { + Shape expected_shape = layer->result->output->shape; + std::vector dims = expected_shape.getDims(); + size_t sample_size = 1; + for (auto it = std::next(dims.begin()); it != dims.end(); ++it) { + sample_size *= *it; + } + + if (data.size() % sample_size != 0 || data.size() > expected_shape.size()) { throw std::invalid_argument("Sizes mismatch!"); } - Shape expected_shape = layer->result->output->shape; - size_t new_size = (data.size() + expected_shape.size() - 1) / expected_shape.size() * - expected_shape.size(); - data.resize(new_size, 0); + data.resize(expected_shape.size(), 0); layer->result->output.emplace(Blob::constBlob(expected_shape, data.data())); } } diff --git a/server/api/GraphBuilder.h b/server/api/GraphBuilder.h index 41c02327..cbdca510 100644 --- a/server/api/GraphBuilder.h +++ b/server/api/GraphBuilder.h @@ -27,14 +27,13 @@ class Graph { public: Graph() = default; void Initialize(crow::json::rvalue modelJson, - const std::vector>& data, - RandomObject* randomInit, - OptimizerBase& SGD); + RandomObject* randomInit, + OptimizerBase& SGD, + size_t batch_size); ~Graph(); - void OverviewLayers(const crow::json::rvalue& layers, const std::vector>& data, - std::unordered_map& layer_dicts, - std::unordered_map>& data_dicts); + void OverviewLayers(const crow::json::rvalue& layers, + std::unordered_map& layer_dicts); void GetEdges(const crow::json::rvalue& connections, std::unordered_map>& straightEdges, @@ -45,7 +44,7 @@ class Graph { std::unordered_set& entryNodes, std::vector& layersOrder); - void ChangeInputData(std::vector data); + void ChangeLayersData(std::vector data, BaseLayerType type); std::vector getLayers(BaseLayerType type) const; diff --git a/server/api/Parser.cpp b/server/api/Parser.cpp index 532deec6..a1f60b6a 100644 --- a/server/api/Parser.cpp +++ b/server/api/Parser.cpp @@ -6,14 +6,14 @@ void CHECK_HAS_FIELD(const crow::json::rvalue& layer, const std::string& field) } } -void ParseCsvData(const std::vector>& data, std::vector& instances, std::vector& answers) { - instances.reserve(data.size()); - answers.reserve(data.size()); - for (auto& instance : data) { - answers.push_back(instance.back()); - instances.emplace_back(instance.begin(), std::prev(instance.end())); - } -} +// void ParseCsvData(const std::vector>& data, std::vector& instances, std::vector& answers) { +// instances.reserve(data.size()); +// answers.reserve(data.size()); +// for (auto& instance : data) { +// answers.push_back(instance.back()); +// instances.emplace_back(instance.begin(), std::prev(instance.end())); +// } +// } LinearLayerParameters ParseLinear(const crow::json::rvalue& parameters) { size_t inFeatures, outFeatures; diff --git a/server/api/Parser.h b/server/api/Parser.h index a58ccfa4..a742689c 100644 --- a/server/api/Parser.h +++ b/server/api/Parser.h @@ -18,7 +18,7 @@ void CHECK_HAS_FIELD(const crow::json::rvalue& layer, const std::string& field); -void ParseCsvData(const std::vector>& data, std::vector& instances, std::vector& answers); +// void ParseCsvData(const std::vector>& data, std::vector& instances, std::vector& answers); LinearLayerParameters ParseLinear(const crow::json::rvalue& parameters); Shape ParseData(const crow::json::rvalue& parameters); diff --git a/server/api/server.cpp b/server/api/server.cpp index 2f96b234..e60bd745 100644 --- a/server/api/server.cpp +++ b/server/api/server.cpp @@ -6,18 +6,25 @@ #include +#include + +#include + + #include #include #include #include -#include -#include -#include "zip.h" +#pragma push_macro("U") +#undef U +#include "ImageLoader.h" +#pragma pop_macro("U") + #include "GraphBuilder.h" #include "CsvLoader.h" -#include "ImageLoader.h" #include "DataMarker.h" +#include using namespace std; using namespace crow; @@ -64,8 +71,9 @@ void train(json::rvalue& json, Graph** graph, int model_id, int user_id, FileExt data.push_back(p.first); } + size_t batch_size = 4; // hard-coded *graph = new Graph(); - (*graph)->Initialize(json, data, &initObject, SGD); + (*graph)->Initialize(json, &initObject, SGD, batch_size); std::cout << "Graph is ready!" << std::endl; auto& lastTrainNode = (*graph)->getLayers(BaseLayerType::TrainOut)[0]->result.value(); @@ -85,8 +93,14 @@ void train(json::rvalue& json, Graph** graph, int model_id, int user_id, FileExt outputs.reserve(buffer_size); size_t max_epochs = 1000; + std::pair, std::vector> batch; for (size_t epoch = 0; epoch < max_epochs; ++epoch) { - auto& result = lastTrainNode.forward(); + for (size_t batch_index = 0; batch_index < dataLoader.size(); ++batch_index) { + batch = dataLoader.get_raw(batch_index); + (*graph)->ChangeLayersData(batch.first, BaseLayerType::Data); + (*graph)->ChangeLayersData(batch.second, BaseLayerType::Targets); + } + lastTrainNode.forward(); // printf("%ld: %f\n", epoch, result[0][0]); outputs.push_back(GetLogs(lastPredictNode)); @@ -128,12 +142,14 @@ void predict(int model_id, Graph* graph, std::vector& answer, FileExtensi else { predict_data = {ImageLoader::load_image((getPredictPath(model_id) + "/1.png").c_str())}; } - graph->ChangeInputData(predict_data[0]); + graph->ChangeLayersData(predict_data[0], BaseLayerType::Targets); // Пока не думаем о нескольких выходах (!) Hard-coded auto& lastNode = graph->getLayers(BaseLayerType::PredictOut)[0]->result.value(); lastNode.clear(); - const Blob& result = lastNode.forward(); + lastNode.forward(); + + auto& result = lastNode.output.value(); answer.reserve(result.shape.rows() * result.shape.cols()); for (size_t j = 0; j < result.shape.rows(); ++j) { @@ -156,9 +172,9 @@ void extract_from_zip(std::string path, std::string root) { if (zip_stat_index(z, i, 0, &info) == 0) { ofstream fout(root + "/" + info.name, ios::binary); zip_file* file = zip_fopen_index(z, i, 0); - char file_data[info.size]; - zip_fread(file, file_data, info.size); - fout.write(file_data, info.size); + std::vector file_data(info.size); + zip_fread(file, file_data.data(), info.size); + fout.write(file_data.data(), info.size); fout.close(); } } diff --git a/server/core/Layer.cpp b/server/core/Layer.cpp index 9a9562d1..7db81779 100644 --- a/server/core/Layer.cpp +++ b/server/core/Layer.cpp @@ -36,6 +36,13 @@ DataLayer::DataLayer(const Shape& shape, const std::vector& values) { result = Tensor(Blob::constBlob(shape, values.data())); } +DataLayer::DataLayer(const Shape& shape, size_t batch_size) { + std::vector dims = shape.getDims(); + dims.insert(dims.begin(), batch_size); + Shape batch_shape(dims); + result = Tensor(Blob::constRandomBlob(batch_shape, nullptr)); +} + MSELoss::MSELoss(const std::vector& args) : mean({0, 1, 2, 3}) { pipeline.reserve(2); diff --git a/server/core/Layer.h b/server/core/Layer.h index e9e6e0ca..bdca7c95 100644 --- a/server/core/Layer.h +++ b/server/core/Layer.h @@ -20,6 +20,7 @@ class Layer { class DataLayer: public Layer { public: DataLayer(const Shape& params, const std::vector& values); + DataLayer(const Shape& params, size_t batch_size); }; class LinearLayer: public Layer { From 41e75f0e52163d972c8041a51df40f5eeafe769f Mon Sep 17 00:00:00 2001 From: lpetrov02 Date: Fri, 15 Dec 2023 19:07:35 +0300 Subject: [PATCH 13/23] It's not fucking working :( --- py_server/mlcraft/check_dimensions.py | 6 ++-- py_server/mlcraft/dataset.py | 14 ++++---- py_server/mlcraft/server.py | 2 +- py_server/tests/test_check_dimensions.py | 16 ++++----- server/api/DataLoader.cpp | 4 +-- server/api/Parser.cpp | 8 +++-- server/api/server.cpp | 41 ++++++++++-------------- server/core/Allocator.cpp | 2 +- server/core/Operation.h | 5 +++ server/core/Shape.cpp | 2 +- server/tests/linear_relu_linear_mse.json | 4 +-- 11 files changed, 53 insertions(+), 51 deletions(-) diff --git a/py_server/mlcraft/check_dimensions.py b/py_server/mlcraft/check_dimensions.py index 772e928d..5147ff80 100644 --- a/py_server/mlcraft/check_dimensions.py +++ b/py_server/mlcraft/check_dimensions.py @@ -6,8 +6,8 @@ class Data2dChecker: - def __init__(self, width): - self.output_shape = [-1, width] + def __init__(self, shape: list[int]): + self.output_shape = [-1] + shape def __call__(self): return True @@ -60,7 +60,7 @@ def __call__(self, input_shape: list[int], targets_shape: list[int]): def create_checker(layer: dict): match layer["type"]: case "Data" | "Target": - return Data2dChecker(layer["parameters"]["width"]) + return Data2dChecker(layer["parameters"]["shape"]) case "Linear": return LinearChecker( layer["parameters"]["inFeatures"], layer["parameters"]["outFeatures"] diff --git a/py_server/mlcraft/dataset.py b/py_server/mlcraft/dataset.py index 79fcc2ab..5fe8bc99 100644 --- a/py_server/mlcraft/dataset.py +++ b/py_server/mlcraft/dataset.py @@ -1,4 +1,6 @@ import csv +import math + from http import HTTPStatus from io import StringIO from .errors import Error @@ -19,10 +21,10 @@ def extract_train_data(bytes: bytes, model: dict) -> dict: data: list[float] = [] target: list[float] = [] + dims = list(map(int, data_layer["parameters"]["shape"])) + dims_total = math.prod(dims) for row in reader: - if ( - int(data_layer["parameters"]["width"]) != len(row) - 1 - ): # columns = features + 1 for target + if dims_total != len(row) - 1: # columns = features + 1 for target raise Error("Input csv column count doesn't match data's feature count") data.extend(map(float, row[:-1])) @@ -36,11 +38,11 @@ def extract_predict_data(bytes: bytes, model: dict) -> dict: reader = csv.reader(StringIO(bytes.decode())) row = next(reader, None) + dims = list(map(int, data_layer["parameters"]["shape"])) + dims_total = math.prod(dims) if row is None: raise Error("Bad csv format") - if int(data_layer["parameters"]["width"]) != len( - row - ): # predict request - one row with data only + if dims_total != len(row): # predict request - one row with data only raise Error("Input csv column count doesn't match data's feature count") return {data_layer["id"]: list(map(float, row))} diff --git a/py_server/mlcraft/server.py b/py_server/mlcraft/server.py index d81137d3..3972053b 100644 --- a/py_server/mlcraft/server.py +++ b/py_server/mlcraft/server.py @@ -265,5 +265,5 @@ def get_plots(user_id: int, model_id: int): current_dir = os.getcwd() print(current_dir) response = send_file(os.path.join(current_dir, "images", plot_path)) - delete_file(os.path.join(current_dir, "images", plot_path)) + # delete_file(os.path.join(current_dir, "images", plot_path)) return response diff --git a/py_server/tests/test_check_dimensions.py b/py_server/tests/test_check_dimensions.py index 705f5e36..35cdbf20 100644 --- a/py_server/tests/test_check_dimensions.py +++ b/py_server/tests/test_check_dimensions.py @@ -5,8 +5,8 @@ def test_correct_simple(): layers = [ - {"id": 0, "type": "Target", "parameters": {"width": 1}, "parents": []}, - {"id": 1, "type": "Data", "parameters": {"width": 2}, "parents": []}, + {"id": 0, "type": "Target", "parameters": {"shape": [1]}, "parents": []}, + {"id": 1, "type": "Data", "parameters": {"shape": [2]}, "parents": []}, { "id": 2, "type": "Linear", @@ -23,8 +23,8 @@ def test_correct_simple(): def test_correct_harder(): layers = [ - {"id": 0, "type": "Target", "parameters": {"width": 1}, "parents": []}, - {"id": 1, "type": "Data", "parameters": {"width": 2}, "parents": []}, + {"id": 0, "type": "Target", "parameters": {"shape": [1]}, "parents": []}, + {"id": 1, "type": "Data", "parameters": {"shape": [2]}, "parents": []}, { "id": 2, "type": "Linear", @@ -55,8 +55,8 @@ def test_correct_harder(): def test_mismatch_simple(): layers = [ - {"id": 0, "type": "Target", "parameters": {"width": 1}, "parents": []}, - {"id": 1, "type": "Data", "parameters": {"width": 2}, "parents": []}, + {"id": 0, "type": "Target", "parameters": {"shape": [1]}, "parents": []}, + {"id": 1, "type": "Data", "parameters": {"shape": [2]}, "parents": []}, { "id": 2, "type": "Linear", @@ -77,8 +77,8 @@ def test_mismatch_simple(): def test_mismatch_harder(): layers = [ - {"id": 0, "type": "Target", "parameters": {"width": 1}, "parents": []}, - {"id": 1, "type": "Data", "parameters": {"width": 2}, "parents": []}, + {"id": 0, "type": "Target", "parameters": {"shape": [1]}, "parents": []}, + {"id": 1, "type": "Data", "parameters": {"shape": [2]}, "parents": []}, { "id": 2, "type": "Linear", diff --git a/server/api/DataLoader.cpp b/server/api/DataLoader.cpp index 23cad28e..b226ae6e 100644 --- a/server/api/DataLoader.cpp +++ b/server/api/DataLoader.cpp @@ -46,14 +46,14 @@ void DataLoader::add_data(const DataLoader& other, int index) { } std::pair, std::vector> DataLoader::get_raw(std::size_t batch_index) const { // batch_size lines from index - if (batch_index >= loader->size()) { + if (batch_index >= size()) { throw std::out_of_range("Index out of range"); } std::vector data; std::vector res(batch_size, 0); Shape shape = loader->get_appropriate_shape(rearrangement[batch_index], batch_size); auto dims = shape.getDims(); - data.resize(dims.size(), 0); + data.resize(shape.size(), 0); int cur_data = 0; for (int i = batch_size * batch_index; i < (batch_size + 1) * batch_index; ++i) { if (i >= loader->size()) { diff --git a/server/api/Parser.cpp b/server/api/Parser.cpp index a1f60b6a..4462f529 100644 --- a/server/api/Parser.cpp +++ b/server/api/Parser.cpp @@ -25,9 +25,11 @@ LinearLayerParameters ParseLinear(const crow::json::rvalue& parameters) { inFeatures = static_cast(parameters["inFeatures"].i()); outFeatures = static_cast(parameters["outFeatures"].i()); - if (parameters.has("bias")) { - bias = parameters["bias"].b(); - } + // if (parameters.has("bias")) { + // std::cout << parameters["bias"] << std::endl; + // bias = parameters["bias"].b(); + // std::cout << "done" << std::endl; + // } return LinearLayerParameters{inFeatures, outFeatures, bias}; } diff --git a/server/api/server.cpp b/server/api/server.cpp index e60bd745..cb9519fd 100644 --- a/server/api/server.cpp +++ b/server/api/server.cpp @@ -37,13 +37,13 @@ std::string getPredictPath(int id) { return "./model_data/predict/" + std::to_string(id); } -web::json::value GetLogs(const std::optional& node) { - assert(node.has_value() && node.value().shape.dimsCount <= 2); +web::json::value GetLogs(const Blob& node) { + assert(node.shape.dimsCount <= 2); std::vector values; - values.reserve(node.value().shape.size()); - for (size_t sample_index = 0; sample_index < node.value().shape.rows(); ++sample_index) { - for (size_t feature_index = 0; feature_index < node.value().shape.cols(); ++feature_index) { - values.push_back(web::json::value::number(node.value()(0, 0, sample_index, feature_index))); + values.reserve(node.shape.size()); + for (size_t sample_index = 0; sample_index < node.shape.rows(); ++sample_index) { + for (size_t feature_index = 0; feature_index < node.shape.cols(); ++feature_index) { + values.push_back(web::json::value::number(node(0, 0, sample_index, feature_index))); } } return web::json::value::array(values); @@ -60,16 +60,6 @@ void train(json::rvalue& json, Graph** graph, int model_id, int user_id, FileExt } DataMarker dataMarker = DataMarker(path, extension, 100, 4); DataLoader dataLoader = dataMarker.get_train_loader(); - std::vector> data; - for (int i = 0; i < dataLoader.size(); ++i) { - auto p = dataLoader.get_raw(i); - p.first.push_back(p.second[0]); - for (auto x: p.first) { - std::cout << x << ' '; - } - std::cout << std::endl; - data.push_back(p.first); - } size_t batch_size = 4; // hard-coded *graph = new Graph(); @@ -92,8 +82,13 @@ void train(json::rvalue& json, Graph** graph, int model_id, int user_id, FileExt targets.reserve(buffer_size); outputs.reserve(buffer_size); - size_t max_epochs = 1000; + size_t max_epochs = 30; std::pair, std::vector> batch; + + web::http::client::http_client client(U("http://localhost:3000")); + std::ostringstream request_url; + request_url << "/update_metrics/" << user_id << "/" << model_id; + for (size_t epoch = 0; epoch < max_epochs; ++epoch) { for (size_t batch_index = 0; batch_index < dataLoader.size(); ++batch_index) { batch = dataLoader.get_raw(batch_index); @@ -103,8 +98,10 @@ void train(json::rvalue& json, Graph** graph, int model_id, int user_id, FileExt lastTrainNode.forward(); // printf("%ld: %f\n", epoch, result[0][0]); - outputs.push_back(GetLogs(lastPredictNode)); - targets.push_back(GetLogs(targetsNode)); + outputs.push_back(GetLogs(lastPredictNode.value())); + targets.push_back(GetLogs(targetsNode.value())); + // outputs.push_back(web::json::value::array({web::json::value::number(1.0)})); + // targets.push_back(web::json::value::array({web::json::value::number(1.0)})); if ((epoch == max_epochs - 1 && outputs.size() > 0) || outputs.size() == buffer_size) { @@ -119,17 +116,13 @@ void train(json::rvalue& json, Graph** graph, int model_id, int user_id, FileExt targets.clear(); outputs.clear(); - std::ostringstream request_url; - request_url << "/update_metrics/" << user_id << "/" << model_id; - - web::http::client::http_client client(U("http://localhost:3000")); client.request(web::http::methods::PUT, U(request_url.str()), json); } - // lastTrainNode.gradient = result; lastTrainNode.gradient = Blob::ones({{1}}); lastTrainNode.backward(); SGD.step(); + // Allocator::endSession(); lastTrainNode.clear(); } } diff --git a/server/core/Allocator.cpp b/server/core/Allocator.cpp index 2ae4a83c..10ce9c8a 100644 --- a/server/core/Allocator.cpp +++ b/server/core/Allocator.cpp @@ -9,7 +9,7 @@ using namespace std; Allocator* Allocator::instance = NULL; -#define TMP_BUFF_SIZE 2048 +#define TMP_BUFF_SIZE 16384 void Allocator::startVirtualMode() { assert(!instance); diff --git a/server/core/Operation.h b/server/core/Operation.h index ad8b6e79..f178506d 100644 --- a/server/core/Operation.h +++ b/server/core/Operation.h @@ -14,6 +14,11 @@ struct Operation { virtual std::vector grad(const Blob& gradient, const std::vector& args) const = 0; virtual Shape computeDim(const std::vector& args) const = 0; const Operation& operator=(const Operation& other) const; + std::string name = ""; + + virtual std::string getName() const { + return name; + } }; struct Noop: Operation { diff --git a/server/core/Shape.cpp b/server/core/Shape.cpp index b154009a..74ff8d32 100644 --- a/server/core/Shape.cpp +++ b/server/core/Shape.cpp @@ -48,7 +48,7 @@ string Shape::toString() const { vector Shape::getDims() const { vector dims; - for (int i = 0; i < 4; i++) { + for (int i = 4 - dimsCount; i < 4; i++) { dims.push_back(this->dims[i]); } return dims; diff --git a/server/tests/linear_relu_linear_mse.json b/server/tests/linear_relu_linear_mse.json index f1d872ac..f89468be 100644 --- a/server/tests/linear_relu_linear_mse.json +++ b/server/tests/linear_relu_linear_mse.json @@ -31,7 +31,7 @@ "id": 0, "type": "Data", "parameters": { - "width": 2 + "shape": [2] } }, { @@ -60,7 +60,7 @@ "id": 4, "type": "Target", "parameters": { - "width": 1 + "shape": [1] } }, { From e2d19147f7b5c231afefb1ebe04d351f552f8199 Mon Sep 17 00:00:00 2001 From: lpetrov02 Date: Mon, 18 Dec 2023 08:05:45 +0300 Subject: [PATCH 14/23] It's not fucking working :( --- server/api/server.cpp | 53 +++++++++++++++++++++------------------ server/core/Optimizer.cpp | 11 +++++--- server/core/Optimizer.h | 12 ++++----- 3 files changed, 42 insertions(+), 34 deletions(-) diff --git a/server/api/server.cpp b/server/api/server.cpp index cb9519fd..c21b7d19 100644 --- a/server/api/server.cpp +++ b/server/api/server.cpp @@ -39,14 +39,14 @@ std::string getPredictPath(int id) { web::json::value GetLogs(const Blob& node) { assert(node.shape.dimsCount <= 2); - std::vector values; - values.reserve(node.shape.size()); + web::json::value values; + size_t elements_stored = 0; for (size_t sample_index = 0; sample_index < node.shape.rows(); ++sample_index) { for (size_t feature_index = 0; feature_index < node.shape.cols(); ++feature_index) { - values.push_back(web::json::value::number(node(0, 0, sample_index, feature_index))); + values[elements_stored++] = web::json::value::number(node(0, 0, sample_index, feature_index)); } } - return web::json::value::array(values); + return values; } void train(json::rvalue& json, Graph** graph, int model_id, int user_id, FileExtension extension) { @@ -67,8 +67,8 @@ void train(json::rvalue& json, Graph** graph, int model_id, int user_id, FileExt std::cout << "Graph is ready!" << std::endl; auto& lastTrainNode = (*graph)->getLayers(BaseLayerType::TrainOut)[0]->result.value(); - auto& lastPredictNode = (*graph)->getLayers(BaseLayerType::PredictOut)[0]->result.value().output; - auto& targetsNode = (*graph)->getLayers(BaseLayerType::Targets)[0]->result.value().output; + // auto& lastPredictNode = (*graph)->getLayers(BaseLayerType::PredictOut)[0]->result.value().output; + // auto& targetsNode = (*graph)->getLayers(BaseLayerType::Targets)[0]->result.value().output; lastTrainNode.forward(); lastTrainNode.gradient = Blob::ones({{1}}); @@ -77,10 +77,9 @@ void train(json::rvalue& json, Graph** graph, int model_id, int user_id, FileExt lastTrainNode.clear(); Allocator::endVirtualMode(); - size_t buffer_size = 5; - std::vector targets, outputs; - targets.reserve(buffer_size); - outputs.reserve(buffer_size); + size_t buffer_size = 5, actual_size = 0; + web::json::value request; + request[U("rewrite")] = web::json::value::boolean(true); size_t max_epochs = 30; std::pair, std::vector> batch; @@ -90,6 +89,7 @@ void train(json::rvalue& json, Graph** graph, int model_id, int user_id, FileExt request_url << "/update_metrics/" << user_id << "/" << model_id; for (size_t epoch = 0; epoch < max_epochs; ++epoch) { + std::cerr << epoch << " Start" << std::endl; for (size_t batch_index = 0; batch_index < dataLoader.size(); ++batch_index) { batch = dataLoader.get_raw(batch_index); (*graph)->ChangeLayersData(batch.first, BaseLayerType::Data); @@ -98,31 +98,34 @@ void train(json::rvalue& json, Graph** graph, int model_id, int user_id, FileExt lastTrainNode.forward(); // printf("%ld: %f\n", epoch, result[0][0]); - outputs.push_back(GetLogs(lastPredictNode.value())); - targets.push_back(GetLogs(targetsNode.value())); - // outputs.push_back(web::json::value::array({web::json::value::number(1.0)})); - // targets.push_back(web::json::value::array({web::json::value::number(1.0)})); + auto& lastPredictNode = (*graph)->getLayers(BaseLayerType::PredictOut)[0]->result.value().output.value(); + auto& targetsNode = (*graph)->getLayers(BaseLayerType::Targets)[0]->result.value().output.value(); + request[U("targets")][actual_size] = web::json::value(GetLogs(lastPredictNode)); + request[U("outputs")][actual_size] = web::json::value(GetLogs(targetsNode)); + ++actual_size; - if ((epoch == max_epochs - 1 && outputs.size() > 0) || - outputs.size() == buffer_size) { + if ((epoch == max_epochs - 1 && actual_size > 0) || + actual_size == buffer_size) { - web::json::value json; - json["targets"] = web::json::value::array(targets); - json["outputs"] = web::json::value::array(outputs); - json["label"] = web::json::value::string("train"); + request[U("label")] = web::json::value::string("train"); if (epoch < buffer_size) { - json["rewrite"] = web::json::value::boolean(true); + request[U("rewrite")] = web::json::value::boolean(true); } - targets.clear(); - outputs.clear(); - client.request(web::http::methods::PUT, U(request_url.str()), json); + client.request(web::http::methods::PUT, U(request_url.str()), request); + request = web::json::value(); + actual_size = 0; } + std::cerr << epoch << " OK" << std::endl; + lastTrainNode.gradient = Blob::ones({{1}}); + std::cerr << epoch << " Gradient vanished" << std::endl; lastTrainNode.backward(); + std::cerr << epoch << " Backwarded" << std::endl; SGD.step(); - // Allocator::endSession(); + std::cerr << epoch << " Made SGD step" << std::endl; + Allocator::endSession(); lastTrainNode.clear(); } } diff --git a/server/core/Optimizer.cpp b/server/core/Optimizer.cpp index 213f5745..7c57eafe 100644 --- a/server/core/Optimizer.cpp +++ b/server/core/Optimizer.cpp @@ -5,13 +5,18 @@ #include "Tensor.h" #include "Optimizer.h" +OptimizerBase::OptimizerBase(float lr) : lr(lr) { +} + void OptimizerBase::append(std::vector& newParams) { - params.reserve(params.size() + newParams.size() * sizeof(TensorRef)); - params.insert(params.end(),newParams.begin(),newParams.end()); + params.reserve(params.size() + newParams.size()); + params.insert(params.end(), newParams.begin(), newParams.end()); } void OptimizerBase::step() { for (int i = 0; i < params.size(); i++) { - *params[i].get().output -= lr * *params[i].get().gradient; + if (params[i].get().gradient.has_value() && params[i].get().output.has_value()) { + params[i].get().output.value() -= lr * params[i].get().gradient.value(); + } } } \ No newline at end of file diff --git a/server/core/Optimizer.h b/server/core/Optimizer.h index f5cb0e75..50843a79 100644 --- a/server/core/Optimizer.h +++ b/server/core/Optimizer.h @@ -5,12 +5,12 @@ #include "Tensor.h" class OptimizerBase { - -const float lr; -std::vector params; - public: - OptimizerBase(float lr) : lr(lr), params() {}; + OptimizerBase(float lr); void append(std::vector& newParams); void step(); -}; \ No newline at end of file + +private: + const float lr; + std::vector params; +}; From 985402174fb5b802f5196370f621d6fcfdcf0399 Mon Sep 17 00:00:00 2001 From: Ivan Shanygin Date: Mon, 18 Dec 2023 15:53:54 +0300 Subject: [PATCH 15/23] Input selection bug fix --- client/templates/main.html | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/client/templates/main.html b/client/templates/main.html index e66c979c..e3a87c3f 100644 --- a/client/templates/main.html +++ b/client/templates/main.html @@ -1263,8 +1263,9 @@

new_input_order = [] for (let i in sortable_list.childNodes) { try { - if (sortable_list.childNodes[i].textContent.includes("input")) { - new_input_order.push(Number(sortable_list.childNodes[i].textContent.split(" ")[3])) + if (sortable_list.childNodes[i].textContent.length > 0) { + console.log(sortable_list.childNodes[i].textContent) + new_input_order.push(Number(sortable_list.childNodes[i].textContent)) } } catch (e) { continue From 47b519cb5aa9a33792b2433d7dbe6c16884ffd67 Mon Sep 17 00:00:00 2001 From: lpetrov02 Date: Fri, 15 Dec 2023 09:56:59 +0300 Subject: [PATCH 16/23] Started migration from Data2dLayer to DataLayer Makes preparations for metrics logging on python Functionality for c++ http added, but not working yet Adds saving train metrics Adds saving train metrics and responding with PNG Adaptates code for new 4D blob cpprest CI support Add load possibility for zip Add load possibility for png on predict ------- GRA-122: Data loader implementation (#67) Data loader implementation ------- ID-154: Loss type selection (#70) * Add loss type selection * Add loss type selection * Remove layer-class loss * Clean up Loss type * Make format ------- ID-171: Fix input selection (#69) * Fix input selection * Clean up fix input selection ------- Change train and predict for zip file case Starts fixing train Fixes train with dataloader It's not fucking working :( (x3) server train fix Fixes train and predcit --- .github/workflows/CI.yml | 3 + .gitignore | 3 +- README.md | 3 + client/templates/main.html | 18 +- py_server/Makefile | 2 +- py_server/mlcraft/check_dimensions.py | 6 +- py_server/mlcraft/dataset.py | 14 +- py_server/mlcraft/db.py | 83 ++++++- py_server/mlcraft/server.py | 99 ++++++-- py_server/mlcraft/static/swagger.yaml | 283 ++++++++++++++++++++--- py_server/mlcraft/utils.py | 17 ++ py_server/pyproject.toml | 1 + py_server/tests/test_check_dimensions.py | 16 +- server/Makefile | 8 +- server/api/DataLoader.cpp | 26 +-- server/api/DataLoader.h | 1 + server/api/GraphBuilder.cpp | 97 ++++---- server/api/GraphBuilder.h | 27 ++- server/api/Parser.cpp | 38 +-- server/api/Parser.h | 4 +- server/api/server.cpp | 211 +++++++++++++---- server/core/Allocator.cpp | 2 +- server/core/Layer.cpp | 13 +- server/core/Layer.h | 7 +- server/core/LazyBlob.cpp | 6 +- server/core/Operation.h | 5 + server/core/Optimizer.cpp | 11 +- server/core/Optimizer.h | 12 +- server/core/Parameters.h | 5 - server/tests/linear_relu_linear_mse.json | 4 +- 30 files changed, 781 insertions(+), 244 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 9798a4d5..adea88ef 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -52,6 +52,9 @@ jobs: sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 90 sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 90 + - name: install-cpprest + run: sudo apt install libcpprest-dev + - name: install-boost uses: MarkusJx/install-boost@v2.4.4 id: install-boost diff --git a/.gitignore b/.gitignore index 901ef702..48b197a5 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,7 @@ __pycache__/ *$py.class .cache .venv +images/ # Flask stuff: instance/ @@ -49,4 +50,4 @@ dmypy.json # Makefile install bin -*.patch \ No newline at end of file +*.patch diff --git a/README.md b/README.md index 019427b8..97c2050c 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,9 @@ Other targets available: > **Important:** If you want to build a c++ server, you need to install [Boost](https://www.boost.org/users/download/). > Then, **in the `config.json` file** add the path to the boost root (folder with `include` and `lib` inside). > For example: `"BOOST_ROOT": "/usr/local/Cellar/boost/1.81.0_1"` + +> Also you need `cpprest`: on MacOS: `brew install cpprestsdk` +> on Linux: `sudo apt-get install libcpprest-dev` > After that you should be able to build everything just fine... There are 3(4) main targets available to build: diff --git a/client/templates/main.html b/client/templates/main.html index c62663e7..01a7dfd3 100644 --- a/client/templates/main.html +++ b/client/templates/main.html @@ -174,7 +174,7 @@

- - - -