From 0c0d4d9625eefefa8ec29e2a67d10db60b34ac5d Mon Sep 17 00:00:00 2001 From: chreman Date: Thu, 6 May 2021 15:57:57 +0200 Subject: [PATCH 01/54] change dockerfile structure --- .../{base.docker => workers/base/Dockerfile} | 0 server/workers/{ => base}/run_base.py | 0 .../dataprocessing/Dockerfile} | 14 +++++++------- .../{ => dataprocessing}/run_dataprocessing.py | 0 .../gsheets/Dockerfile} | 10 +++++----- server/workers/{ => gsheets}/run_gsheets.py | 0 .../openaire/Dockerfile} | 14 +++++++------- server/workers/{ => openaire}/run_openaire.py | 0 .../pubmed/Dockerfile} | 18 +++++++++--------- server/workers/{ => pubmed}/run_pubmed.py | 0 .../services/Dockerfile} | 6 +++--- .../triple/Dockerfile} | 10 +++++----- server/workers/{ => triple}/run_triple.py | 0 13 files changed, 36 insertions(+), 36 deletions(-) rename server/{base.docker => workers/base/Dockerfile} (100%) rename server/workers/{ => base}/run_base.py (100%) rename server/{dataprocessing.docker => workers/dataprocessing/Dockerfile} (95%) rename server/workers/{ => dataprocessing}/run_dataprocessing.py (100%) rename server/{search_gsheets.docker => workers/gsheets/Dockerfile} (56%) rename server/workers/{ => gsheets}/run_gsheets.py (100%) rename server/{openaire.docker => workers/openaire/Dockerfile} (95%) rename server/workers/{ => openaire}/run_openaire.py (100%) rename server/{pubmed.docker => workers/pubmed/Dockerfile} (94%) rename server/workers/{ => pubmed}/run_pubmed.py (100%) rename server/{services.docker => workers/services/Dockerfile} (80%) rename server/{search_triple.docker => workers/triple/Dockerfile} (62%) rename server/workers/{ => triple}/run_triple.py (100%) diff --git a/server/base.docker b/server/workers/base/Dockerfile similarity index 100% rename from server/base.docker rename to server/workers/base/Dockerfile diff --git a/server/workers/run_base.py b/server/workers/base/run_base.py similarity index 100% rename from server/workers/run_base.py rename to server/workers/base/run_base.py diff --git a/server/dataprocessing.docker b/server/workers/dataprocessing/Dockerfile similarity index 95% rename from server/dataprocessing.docker rename to server/workers/dataprocessing/Dockerfile index 53db85737..78abf305a 100644 --- a/server/dataprocessing.docker +++ b/server/workers/dataprocessing/Dockerfile @@ -144,22 +144,22 @@ RUN R -e 'options(repos="https://cran.wu.ac.at")' && \ R -e 'install.packages("renv", version="0.11.0-5")' WORKDIR /headstart -COPY workers/dataprocessing/requirements.txt . +COPY dataprocessing/requirements.txt . RUN pip3 install --no-cache-dir -r requirements.txt -COPY workers/dataprocessing/renv.lock . -COPY workers/dataprocessing/activate.R . +COPY dataprocessing/renv.lock . +COPY dataprocessing/activate.R . RUN R -e 'renv::consent(provided = TRUE)' && \ R -e 'setwd("./"); renv::activate(); renv::restore(lockfile = "./renv.lock")' -COPY workers/common ./common -COPY workers/dataprocessing ./dataprocessing +COPY ../common ./common +COPY ./dataprocessing ./dataprocessing COPY preprocessing/resources ./resources COPY preprocessing/other-scripts ./other-scripts RUN mkdir -p /var/log/headstart && touch /var/log/headstart/headstart.log -COPY workers/redis_config.json . +COPY redis_config.json . -COPY workers/*.py ./ +COPY *.py ./ ENTRYPOINT python3 run_dataprocessing.py diff --git a/server/workers/run_dataprocessing.py b/server/workers/dataprocessing/run_dataprocessing.py similarity index 100% rename from server/workers/run_dataprocessing.py rename to server/workers/dataprocessing/run_dataprocessing.py diff --git a/server/search_gsheets.docker b/server/workers/gsheets/Dockerfile similarity index 56% rename from server/search_gsheets.docker rename to server/workers/gsheets/Dockerfile index c80187617..c4057cc60 100644 --- a/server/search_gsheets.docker +++ b/server/workers/gsheets/Dockerfile @@ -6,11 +6,11 @@ RUN apk update RUN apk add build-base gcc WORKDIR /headstart -COPY workers/gsheets/requirements.txt . +COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt -COPY workers/gsheets/src/ ./gsheets/src -COPY workers/run_gsheets.py . -COPY workers/gsheets/token.pickle ./gsheets -COPY workers/redis_config.json . +COPY src/ ./gsheets/src +COPY run_gsheets.py . +COPY token.pickle ./gsheets +COPY redis_config.json . ENTRYPOINT python run_gsheets.py diff --git a/server/workers/run_gsheets.py b/server/workers/gsheets/run_gsheets.py similarity index 100% rename from server/workers/run_gsheets.py rename to server/workers/gsheets/run_gsheets.py diff --git a/server/openaire.docker b/server/workers/openaire/Dockerfile similarity index 95% rename from server/openaire.docker rename to server/workers/openaire/Dockerfile index 4a15cc2f5..eab60e18e 100644 --- a/server/openaire.docker +++ b/server/workers/openaire/Dockerfile @@ -141,22 +141,22 @@ RUN R -e 'options(repos="https://cran.wu.ac.at")' && \ R -e 'install.packages("renv", version="0.11.0-5")' WORKDIR /headstart -COPY workers/openaire/requirements.txt . +COPY requirements.txt . RUN pip3 install --no-cache-dir -r requirements.txt -COPY workers/openaire/renv.lock . -COPY workers/openaire/activate.R . +COPY openaire/renv.lock . +COPY openaire/activate.R . RUN R -e 'renv::consent(provided = TRUE)' && \ R -e 'setwd("./"); renv::activate(); renv::restore(lockfile = "./renv.lock")' -COPY workers/common ./common -COPY workers/openaire ./openaire +COPY ../common ./common +COPY ./openaire ./openaire COPY preprocessing/resources ./resources COPY preprocessing/other-scripts ./other-scripts RUN mkdir -p /var/log/headstart && touch /var/log/headstart/headstart.log -COPY workers/redis_config.json . +COPY redis_config.json . -COPY workers/*.py ./ +COPY *.py ./ ENTRYPOINT python3 run_openaire.py diff --git a/server/workers/run_openaire.py b/server/workers/openaire/run_openaire.py similarity index 100% rename from server/workers/run_openaire.py rename to server/workers/openaire/run_openaire.py diff --git a/server/pubmed.docker b/server/workers/pubmed/Dockerfile similarity index 94% rename from server/pubmed.docker rename to server/workers/pubmed/Dockerfile index ec3311706..3a02b6e1e 100644 --- a/server/pubmed.docker +++ b/server/workers/pubmed/Dockerfile @@ -141,22 +141,22 @@ RUN R -e 'options(repos="https://cran.wu.ac.at")' && \ R -e 'install.packages("renv", version="0.11.0-5")' WORKDIR /headstart -COPY workers/pubmed/requirements.txt . +COPY requirements.txt . RUN pip3 install --no-cache-dir -r requirements.txt -COPY workers/pubmed/renv.lock . -COPY workers/pubmed/activate.R . +COPY renv.lock . +COPY activate.R . RUN R -e 'renv::consent(provided = TRUE)' && \ R -e 'setwd("./"); renv::activate(); renv::restore(lockfile = "./renv.lock")' -COPY workers/common ./common -COPY workers/pubmed ./pubmed -COPY preprocessing/resources ./resources -COPY preprocessing/other-scripts ./other-scripts +COPY ../common ./common +COPY ./pubmed ./pubmed +COPY ../../preprocessing/resources ./resources +COPY ../../preprocessing/other-scripts ./other-scripts RUN mkdir -p /var/log/headstart && touch /var/log/headstart/headstart.log -COPY workers/redis_config.json . +COPY redis_config.json . -COPY workers/*.py ./ +COPY *.py ./ ENTRYPOINT python3 run_pubmed.py diff --git a/server/workers/run_pubmed.py b/server/workers/pubmed/run_pubmed.py similarity index 100% rename from server/workers/run_pubmed.py rename to server/workers/pubmed/run_pubmed.py diff --git a/server/services.docker b/server/workers/services/Dockerfile similarity index 80% rename from server/services.docker rename to server/workers/services/Dockerfile index 07e014cdb..73c58a1b3 100644 --- a/server/services.docker +++ b/server/workers/services/Dockerfile @@ -7,10 +7,10 @@ RUN apt-get install -y --no-install-recommends gcc RUN apt-get install -y --no-install-recommends git WORKDIR /headstart -COPY workers/services/requirements.txt . +COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt RUN pip install git+https://github.com/python-restx/flask-restx -COPY workers/services/src/ ./ -COPY workers/redis_config.json . +COPY src/ ./ +COPY redis_config.json . CMD gunicorn --workers 10 --threads 2 -b 127.0.0.1:5001 'app:app' --timeout 300 diff --git a/server/search_triple.docker b/server/workers/triple/Dockerfile similarity index 62% rename from server/search_triple.docker rename to server/workers/triple/Dockerfile index 50e49454e..e72bcd71e 100644 --- a/server/search_triple.docker +++ b/server/workers/triple/Dockerfile @@ -6,12 +6,12 @@ RUN apk update RUN apk add build-base gcc WORKDIR /headstart -COPY workers/triple/requirements.txt . +COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt RUN python -m spacy download xx_ent_wiki_sm -COPY workers/triple/src/ ./triple/src -COPY workers/run_triple.py . -COPY workers/triple/es_config.json . -COPY workers/redis_config.json . +COPY src/ ./triple/src +COPY run_triple.py . +COPY es_config.json . +COPY redis_config.json . ENTRYPOINT python run_triple.py diff --git a/server/workers/run_triple.py b/server/workers/triple/run_triple.py similarity index 100% rename from server/workers/run_triple.py rename to server/workers/triple/run_triple.py From 90675cb91caac1a496951189e4ca2118e72753ef Mon Sep 17 00:00:00 2001 From: chreman Date: Thu, 6 May 2021 18:23:49 +0200 Subject: [PATCH 02/54] wip --- docker-compose.yml | 51 +++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index c5187ce42..fce153e51 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -22,31 +22,35 @@ services: env_file: - .env - api: - build: - context: server - dockerfile: services.docker - restart: always - depends_on: - - redis - network_mode: "host" - depends_on: - - redis - redis: image: 'redis:4.0-alpine' restart: always + environment: + REDIS_HOST: "${REDIS_HOST}" + REDIS_PORT: "${REDIS_PORT}" command: ["redis-server", "/etc/redis/redis.conf", "--appendonly", "yes"] volumes: - - 'redis:/var/lib/redis/data' - - ./server/workers/redis.conf:/etc/redis/redis.conf + - 'redis:/var/lib/redis/data' + - ./server/workers/redis.conf:/etc/redis/redis.conf restart: always - network_mode: "host" + + api: + build: + context: server/workers/services + restart: always + depends_on: + - redis + environment: + REDIS_HOST: "${REDIS_HOST}" + REDIS_PORT: "${REDIS_PORT}" + ports: + - '127.0.0.1:${REDIS_PORT}:${REDIS_PORT}' + depends_on: + - redis search_triple: build: - context: server - dockerfile: search_triple.docker + context: server/workers/triple env_file: - server/workers/triple/triple.env restart: always @@ -56,8 +60,7 @@ services: search_gsheets: build: - context: server - dockerfile: search_gsheets.docker + context: server/workers/gsheets env_file: - server/workers/gsheets/gsheets.env restart: always @@ -67,8 +70,7 @@ services: dataprocessing: build: - context: server - dockerfile: dataprocessing.docker + context: server/workers/dataprocessing env_file: - server/workers/dataprocessing/dataprocessing.env restart: always @@ -81,8 +83,7 @@ services: search_base: build: - context: server - dockerfile: base.docker + context: server/workers/base env_file: - server/workers/base/base.env restart: always @@ -95,8 +96,7 @@ services: search_pubmed: build: - context: server - dockerfile: pubmed.docker + context: server/workers/pubmed env_file: - server/workers/pubmed/pubmed.env restart: always @@ -109,8 +109,7 @@ services: search_openaire: build: - context: server - dockerfile: openaire.docker + context: server/workers/openaire env_file: - server/workers/openaire/openaire.env restart: always From 5e719d8096c615a6e21075c81d93038951cd7fb5 Mon Sep 17 00:00:00 2001 From: chreman Date: Fri, 7 May 2021 15:15:08 +0200 Subject: [PATCH 03/54] config locations and routing changes --- docker-compose.yml | 73 ++++++++++++++----- server/workers/{services => api}/Dockerfile | 0 .../{services => api}/requirements.txt | 0 .../workers/{services => api}/src/__init__.py | 0 .../{services => api}/src/apis/__init__.py | 0 .../{services => api}/src/apis/base.py | 0 .../{services => api}/src/apis/gsheets.py | 0 .../{services => api}/src/apis/openaire.py | 0 .../{services => api}/src/apis/persistence.py | 0 .../{services => api}/src/apis/pubmed.py | 0 .../src/apis/request_validators.py | 0 .../{services => api}/src/apis/triple.py | 0 .../{services => api}/src/apis/utils.py | 0 server/workers/{services => api}/src/app.py | 0 .../{services => api}/src/config/__init__.py | 0 .../src/config/example_settings.py | 0 .../{services => api}/src/config/swagger.json | 0 .../workers/{services => api}/src/database.py | 0 .../workers/{services => api}/src/manage.py | 0 .../workers/{services => api}/src/models.py | 0 .../src/templates/tables.html | 0 .../{services => api}/src/utils/__init__.py | 0 .../src/utils/monkeypatches.py | 0 server/workers/base/Dockerfile | 4 +- server/workers/base/run_base.py | 1 + server/workers/dataprocessing/Dockerfile | 14 ++-- .../dataprocessing/run_dataprocessing.py | 2 + .../workers/dataprocessing/src/headstart.py | 2 +- server/workers/gsheets/Dockerfile | 10 +-- server/workers/gsheets/run_gsheets.py | 1 + server/workers/openaire/Dockerfile | 14 ++-- server/workers/openaire/run_openaire.py | 1 + server/workers/pubmed/Dockerfile | 18 ++--- server/workers/pubmed/run_pubmed.py | 1 + server/workers/triple/Dockerfile | 10 +-- server/workers/triple/run_triple.py | 1 + 36 files changed, 97 insertions(+), 55 deletions(-) rename server/workers/{services => api}/Dockerfile (100%) rename server/workers/{services => api}/requirements.txt (100%) rename server/workers/{services => api}/src/__init__.py (100%) rename server/workers/{services => api}/src/apis/__init__.py (100%) rename server/workers/{services => api}/src/apis/base.py (100%) rename server/workers/{services => api}/src/apis/gsheets.py (100%) rename server/workers/{services => api}/src/apis/openaire.py (100%) rename server/workers/{services => api}/src/apis/persistence.py (100%) rename server/workers/{services => api}/src/apis/pubmed.py (100%) rename server/workers/{services => api}/src/apis/request_validators.py (100%) rename server/workers/{services => api}/src/apis/triple.py (100%) rename server/workers/{services => api}/src/apis/utils.py (100%) rename server/workers/{services => api}/src/app.py (100%) rename server/workers/{services => api}/src/config/__init__.py (100%) rename server/workers/{services => api}/src/config/example_settings.py (100%) rename server/workers/{services => api}/src/config/swagger.json (100%) rename server/workers/{services => api}/src/database.py (100%) rename server/workers/{services => api}/src/manage.py (100%) rename server/workers/{services => api}/src/models.py (100%) rename server/workers/{services => api}/src/templates/tables.html (100%) rename server/workers/{services => api}/src/utils/__init__.py (100%) rename server/workers/{services => api}/src/utils/monkeypatches.py (100%) diff --git a/docker-compose.yml b/docker-compose.yml index fce153e51..2f2b21981 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -19,12 +19,11 @@ services: image: 'dpage/pgadmin4' ports: - '127.0.0.1:54323:80' - env_file: - - .env redis: image: 'redis:4.0-alpine' restart: always + hostname: "${REDIS_HOST}" environment: REDIS_HOST: "${REDIS_HOST}" REDIS_PORT: "${REDIS_PORT}" @@ -33,94 +32,130 @@ services: - 'redis:/var/lib/redis/data' - ./server/workers/redis.conf:/etc/redis/redis.conf restart: always + networks: + - headstart api: build: - context: server/workers/services + context: server + dockerfile: workers/api/Dockerfile restart: always - depends_on: - - redis environment: REDIS_HOST: "${REDIS_HOST}" REDIS_PORT: "${REDIS_PORT}" ports: - - '127.0.0.1:${REDIS_PORT}:${REDIS_PORT}' + - '5001:5001' depends_on: - redis + networks: + - headstart search_triple: build: - context: server/workers/triple + context: server + dockerfile: workers/triple/Dockerfile env_file: - server/workers/triple/triple.env + environment: + REDIS_HOST: "${REDIS_HOST}" + REDIS_PORT: "${REDIS_PORT}" restart: always - network_mode: "host" depends_on: - redis + networks: + - headstart search_gsheets: build: - context: server/workers/gsheets + context: server + dockerfile: workers/gsheets/Dockerfile env_file: - server/workers/gsheets/gsheets.env + environment: + REDIS_HOST: "${REDIS_HOST}" + REDIS_PORT: "${REDIS_PORT}" restart: always - network_mode: "host" depends_on: - redis + networks: + - headstart dataprocessing: build: - context: server/workers/dataprocessing + context: server + dockerfile: workers/dataprocessing/Dockerfile env_file: - - server/workers/dataprocessing/dataprocessing.env + - server/workers/dataprocessing/dataprocessing.env + environment: + REDIS_HOST: "${REDIS_HOST}" + REDIS_PORT: "${REDIS_PORT}" restart: always - network_mode: "host" volumes: - /opt/local/renv/cache:/renv/cache - /var/log/headstart:/var/log/headstart depends_on: - redis + networks: + - headstart search_base: build: - context: server/workers/base + context: server + dockerfile: workers/base/Dockerfile env_file: - server/workers/base/base.env + environment: + REDIS_HOST: "${REDIS_HOST}" + REDIS_PORT: "${REDIS_PORT}" restart: always - network_mode: "host" volumes: - /opt/local/renv/cache:/renv/cache - /var/log/headstart:/var/log/headstart depends_on: - redis + networks: + - headstart search_pubmed: build: - context: server/workers/pubmed + context: server + dockerfile: workers/pubmed/Dockerfile env_file: - server/workers/pubmed/pubmed.env + environment: + REDIS_HOST: "${REDIS_HOST}" + REDIS_PORT: "${REDIS_PORT}" restart: always - network_mode: "host" volumes: - /opt/local/renv/cache:/renv/cache - /var/log/headstart:/var/log/headstart depends_on: - redis + networks: + - headstart search_openaire: build: - context: server/workers/openaire + context: server + dockerfile: workers/openaire/Dockerfile env_file: - server/workers/openaire/openaire.env + environment: + REDIS_HOST: "${REDIS_HOST}" + REDIS_PORT: "${REDIS_PORT}" restart: always - network_mode: "host" volumes: - /opt/local/renv/cache:/renv/cache - /var/log/headstart:/var/log/headstart depends_on: - redis + networks: + - headstart volumes: redis: db-data: driver: local + +networks: + headstart: \ No newline at end of file diff --git a/server/workers/services/Dockerfile b/server/workers/api/Dockerfile similarity index 100% rename from server/workers/services/Dockerfile rename to server/workers/api/Dockerfile diff --git a/server/workers/services/requirements.txt b/server/workers/api/requirements.txt similarity index 100% rename from server/workers/services/requirements.txt rename to server/workers/api/requirements.txt diff --git a/server/workers/services/src/__init__.py b/server/workers/api/src/__init__.py similarity index 100% rename from server/workers/services/src/__init__.py rename to server/workers/api/src/__init__.py diff --git a/server/workers/services/src/apis/__init__.py b/server/workers/api/src/apis/__init__.py similarity index 100% rename from server/workers/services/src/apis/__init__.py rename to server/workers/api/src/apis/__init__.py diff --git a/server/workers/services/src/apis/base.py b/server/workers/api/src/apis/base.py similarity index 100% rename from server/workers/services/src/apis/base.py rename to server/workers/api/src/apis/base.py diff --git a/server/workers/services/src/apis/gsheets.py b/server/workers/api/src/apis/gsheets.py similarity index 100% rename from server/workers/services/src/apis/gsheets.py rename to server/workers/api/src/apis/gsheets.py diff --git a/server/workers/services/src/apis/openaire.py b/server/workers/api/src/apis/openaire.py similarity index 100% rename from server/workers/services/src/apis/openaire.py rename to server/workers/api/src/apis/openaire.py diff --git a/server/workers/services/src/apis/persistence.py b/server/workers/api/src/apis/persistence.py similarity index 100% rename from server/workers/services/src/apis/persistence.py rename to server/workers/api/src/apis/persistence.py diff --git a/server/workers/services/src/apis/pubmed.py b/server/workers/api/src/apis/pubmed.py similarity index 100% rename from server/workers/services/src/apis/pubmed.py rename to server/workers/api/src/apis/pubmed.py diff --git a/server/workers/services/src/apis/request_validators.py b/server/workers/api/src/apis/request_validators.py similarity index 100% rename from server/workers/services/src/apis/request_validators.py rename to server/workers/api/src/apis/request_validators.py diff --git a/server/workers/services/src/apis/triple.py b/server/workers/api/src/apis/triple.py similarity index 100% rename from server/workers/services/src/apis/triple.py rename to server/workers/api/src/apis/triple.py diff --git a/server/workers/services/src/apis/utils.py b/server/workers/api/src/apis/utils.py similarity index 100% rename from server/workers/services/src/apis/utils.py rename to server/workers/api/src/apis/utils.py diff --git a/server/workers/services/src/app.py b/server/workers/api/src/app.py similarity index 100% rename from server/workers/services/src/app.py rename to server/workers/api/src/app.py diff --git a/server/workers/services/src/config/__init__.py b/server/workers/api/src/config/__init__.py similarity index 100% rename from server/workers/services/src/config/__init__.py rename to server/workers/api/src/config/__init__.py diff --git a/server/workers/services/src/config/example_settings.py b/server/workers/api/src/config/example_settings.py similarity index 100% rename from server/workers/services/src/config/example_settings.py rename to server/workers/api/src/config/example_settings.py diff --git a/server/workers/services/src/config/swagger.json b/server/workers/api/src/config/swagger.json similarity index 100% rename from server/workers/services/src/config/swagger.json rename to server/workers/api/src/config/swagger.json diff --git a/server/workers/services/src/database.py b/server/workers/api/src/database.py similarity index 100% rename from server/workers/services/src/database.py rename to server/workers/api/src/database.py diff --git a/server/workers/services/src/manage.py b/server/workers/api/src/manage.py similarity index 100% rename from server/workers/services/src/manage.py rename to server/workers/api/src/manage.py diff --git a/server/workers/services/src/models.py b/server/workers/api/src/models.py similarity index 100% rename from server/workers/services/src/models.py rename to server/workers/api/src/models.py diff --git a/server/workers/services/src/templates/tables.html b/server/workers/api/src/templates/tables.html similarity index 100% rename from server/workers/services/src/templates/tables.html rename to server/workers/api/src/templates/tables.html diff --git a/server/workers/services/src/utils/__init__.py b/server/workers/api/src/utils/__init__.py similarity index 100% rename from server/workers/services/src/utils/__init__.py rename to server/workers/api/src/utils/__init__.py diff --git a/server/workers/services/src/utils/monkeypatches.py b/server/workers/api/src/utils/monkeypatches.py similarity index 100% rename from server/workers/services/src/utils/monkeypatches.py rename to server/workers/api/src/utils/monkeypatches.py diff --git a/server/workers/base/Dockerfile b/server/workers/base/Dockerfile index 02e03c0a8..b1bfeded1 100644 --- a/server/workers/base/Dockerfile +++ b/server/workers/base/Dockerfile @@ -156,7 +156,7 @@ COPY preprocessing/resources ./resources COPY preprocessing/other-scripts ./other-scripts RUN mkdir -p /var/log/headstart && touch /var/log/headstart/headstart.log -COPY workers/redis_config.json . +COPY workers/base/redis_config.json . -COPY workers/*.py ./ +COPY workers/base/*.py ./ ENTRYPOINT python3 run_base.py diff --git a/server/workers/base/run_base.py b/server/workers/base/run_base.py index 2dcd4c7bf..3b5680574 100644 --- a/server/workers/base/run_base.py +++ b/server/workers/base/run_base.py @@ -7,6 +7,7 @@ if __name__ == '__main__': with open("redis_config.json") as infile: redis_config = json.load(infile) + redis_config["host"] = os.getenv("REDIS_HOST") redis_store = redis.StrictRedis(**redis_config) wrapper = BaseClient("./other-scripts", "run_base.R", redis_store, diff --git a/server/workers/dataprocessing/Dockerfile b/server/workers/dataprocessing/Dockerfile index 78abf305a..6b732ed53 100644 --- a/server/workers/dataprocessing/Dockerfile +++ b/server/workers/dataprocessing/Dockerfile @@ -144,22 +144,22 @@ RUN R -e 'options(repos="https://cran.wu.ac.at")' && \ R -e 'install.packages("renv", version="0.11.0-5")' WORKDIR /headstart -COPY dataprocessing/requirements.txt . +COPY workers/dataprocessing/requirements.txt . RUN pip3 install --no-cache-dir -r requirements.txt -COPY dataprocessing/renv.lock . -COPY dataprocessing/activate.R . +COPY workers/dataprocessing/renv.lock . +COPY workers/dataprocessing/activate.R . RUN R -e 'renv::consent(provided = TRUE)' && \ R -e 'setwd("./"); renv::activate(); renv::restore(lockfile = "./renv.lock")' -COPY ../common ./common -COPY ./dataprocessing ./dataprocessing +COPY workers/common ./common +COPY workers/dataprocessing ./dataprocessing COPY preprocessing/resources ./resources COPY preprocessing/other-scripts ./other-scripts RUN mkdir -p /var/log/headstart && touch /var/log/headstart/headstart.log -COPY redis_config.json . +COPY workers/dataprocessing/redis_config.json . -COPY *.py ./ +COPY workers/dataprocessing/*.py ./ ENTRYPOINT python3 run_dataprocessing.py diff --git a/server/workers/dataprocessing/run_dataprocessing.py b/server/workers/dataprocessing/run_dataprocessing.py index 3ac7eb2e9..c69e44179 100644 --- a/server/workers/dataprocessing/run_dataprocessing.py +++ b/server/workers/dataprocessing/run_dataprocessing.py @@ -7,6 +7,8 @@ if __name__ == '__main__': with open("redis_config.json") as infile: redis_config = json.load(infile) + redis_config["host"] = os.getenv("REDIS_HOST") + print(redis_config) redis_store = redis.StrictRedis(**redis_config) dp = Dataprocessing("./other-scripts", "run_vis_layout.R", diff --git a/server/workers/dataprocessing/src/headstart.py b/server/workers/dataprocessing/src/headstart.py index 1568e460e..dbeddcc86 100644 --- a/server/workers/dataprocessing/src/headstart.py +++ b/server/workers/dataprocessing/src/headstart.py @@ -84,8 +84,8 @@ def run(self): res = self.create_map(params, input_data) self.redis_store.set(k+"_output", json.dumps(res)) except Exception as e: - self.logger.error(e) self.logger.error(params) + self.logger.error(e, exc_info=True) res = {} res["id"] = k res["params"] = params diff --git a/server/workers/gsheets/Dockerfile b/server/workers/gsheets/Dockerfile index c4057cc60..5b620e259 100644 --- a/server/workers/gsheets/Dockerfile +++ b/server/workers/gsheets/Dockerfile @@ -6,11 +6,11 @@ RUN apk update RUN apk add build-base gcc WORKDIR /headstart -COPY requirements.txt . +COPY workers/gsheets/requirements.txt . RUN pip install --no-cache-dir -r requirements.txt -COPY src/ ./gsheets/src -COPY run_gsheets.py . -COPY token.pickle ./gsheets -COPY redis_config.json . +COPY workers/gsheets/src/ ./gsheets/src +COPY workers/gsheets/run_gsheets.py . +COPY workers/gsheets/token.pickle ./gsheets +COPY workers/gsheets/redis_config.json . ENTRYPOINT python run_gsheets.py diff --git a/server/workers/gsheets/run_gsheets.py b/server/workers/gsheets/run_gsheets.py index e7077afb7..a874fc2dd 100644 --- a/server/workers/gsheets/run_gsheets.py +++ b/server/workers/gsheets/run_gsheets.py @@ -7,6 +7,7 @@ if __name__ == '__main__': with open("redis_config.json") as infile: redis_config = json.load(infile) + redis_config["host"] = os.getenv("REDIS_HOST") redis_store = redis.StrictRedis(**redis_config) gc = GSheetsClient(redis_store, os.environ.get("GSHEETS_LOGLEVEL", "INFO")) diff --git a/server/workers/openaire/Dockerfile b/server/workers/openaire/Dockerfile index eab60e18e..45e660816 100644 --- a/server/workers/openaire/Dockerfile +++ b/server/workers/openaire/Dockerfile @@ -141,22 +141,22 @@ RUN R -e 'options(repos="https://cran.wu.ac.at")' && \ R -e 'install.packages("renv", version="0.11.0-5")' WORKDIR /headstart -COPY requirements.txt . +COPY workers/openaire/requirements.txt . RUN pip3 install --no-cache-dir -r requirements.txt -COPY openaire/renv.lock . -COPY openaire/activate.R . +COPY workers/openaire/renv.lock . +COPY workers/openaire/activate.R . RUN R -e 'renv::consent(provided = TRUE)' && \ R -e 'setwd("./"); renv::activate(); renv::restore(lockfile = "./renv.lock")' -COPY ../common ./common -COPY ./openaire ./openaire +COPY workers/common ./common +COPY workers/openaire ./openaire COPY preprocessing/resources ./resources COPY preprocessing/other-scripts ./other-scripts RUN mkdir -p /var/log/headstart && touch /var/log/headstart/headstart.log -COPY redis_config.json . +COPY workers/openaire/redis_config.json . -COPY *.py ./ +COPY workers/openaire/*.py ./ ENTRYPOINT python3 run_openaire.py diff --git a/server/workers/openaire/run_openaire.py b/server/workers/openaire/run_openaire.py index 2f17fe605..d92cd28b5 100644 --- a/server/workers/openaire/run_openaire.py +++ b/server/workers/openaire/run_openaire.py @@ -7,6 +7,7 @@ if __name__ == '__main__': with open("redis_config.json") as infile: redis_config = json.load(infile) + redis_config["host"] = os.getenv("REDIS_HOST") redis_store = redis.StrictRedis(**redis_config) wrapper = OpenAIREClient("./other-scripts", "run_openaire.R", redis_store, diff --git a/server/workers/pubmed/Dockerfile b/server/workers/pubmed/Dockerfile index 3a02b6e1e..b141e35bb 100644 --- a/server/workers/pubmed/Dockerfile +++ b/server/workers/pubmed/Dockerfile @@ -141,22 +141,22 @@ RUN R -e 'options(repos="https://cran.wu.ac.at")' && \ R -e 'install.packages("renv", version="0.11.0-5")' WORKDIR /headstart -COPY requirements.txt . +COPY workers/pubmed/requirements.txt . RUN pip3 install --no-cache-dir -r requirements.txt -COPY renv.lock . -COPY activate.R . +COPY workers/pubmed/renv.lock . +COPY workers/pubmed/activate.R . RUN R -e 'renv::consent(provided = TRUE)' && \ R -e 'setwd("./"); renv::activate(); renv::restore(lockfile = "./renv.lock")' -COPY ../common ./common -COPY ./pubmed ./pubmed -COPY ../../preprocessing/resources ./resources -COPY ../../preprocessing/other-scripts ./other-scripts +COPY workers/common ./common +COPY workers/pubmed ./pubmed +COPY preprocessing/resources ./resources +COPY preprocessing/other-scripts ./other-scripts RUN mkdir -p /var/log/headstart && touch /var/log/headstart/headstart.log -COPY redis_config.json . +COPY workers/pubmed/redis_config.json . -COPY *.py ./ +COPY workers/pubmed/*.py ./ ENTRYPOINT python3 run_pubmed.py diff --git a/server/workers/pubmed/run_pubmed.py b/server/workers/pubmed/run_pubmed.py index f2d6da2de..6674b4180 100644 --- a/server/workers/pubmed/run_pubmed.py +++ b/server/workers/pubmed/run_pubmed.py @@ -7,6 +7,7 @@ if __name__ == '__main__': with open("redis_config.json") as infile: redis_config = json.load(infile) + redis_config["host"] = os.getenv("REDIS_HOST") redis_store = redis.StrictRedis(**redis_config) wrapper = PubMedClient("./other-scripts", "run_pubmed.R", redis_store, diff --git a/server/workers/triple/Dockerfile b/server/workers/triple/Dockerfile index e72bcd71e..28393a262 100644 --- a/server/workers/triple/Dockerfile +++ b/server/workers/triple/Dockerfile @@ -6,12 +6,12 @@ RUN apk update RUN apk add build-base gcc WORKDIR /headstart -COPY requirements.txt . +COPY workers/triple/requirements.txt . RUN pip install --no-cache-dir -r requirements.txt RUN python -m spacy download xx_ent_wiki_sm -COPY src/ ./triple/src -COPY run_triple.py . -COPY es_config.json . -COPY redis_config.json . +COPY workers/triple/src/ ./triple/src +COPY workers/triple/run_triple.py . +COPY workers/triple/es_config.json . +COPY workers/triple/redis_config.json . ENTRYPOINT python run_triple.py diff --git a/server/workers/triple/run_triple.py b/server/workers/triple/run_triple.py index 9b8700615..62a55d197 100644 --- a/server/workers/triple/run_triple.py +++ b/server/workers/triple/run_triple.py @@ -9,6 +9,7 @@ es_config = json.load(infile) with open("redis_config.json") as infile: redis_config = json.load(infile) + redis_config["host"] = os.getenv("REDIS_HOST") redis_store = redis.StrictRedis(**redis_config) tc = TripleClient(es_config, redis_store, os.environ.get("TRIPLE_LOGLEVEL", "INFO")) From 93ebedca3d0a6a8c12618e426200a8c978468d42 Mon Sep 17 00:00:00 2001 From: chreman Date: Mon, 10 May 2021 21:42:46 +0200 Subject: [PATCH 04/54] move CMD for api container to compose --- docker-compose.yml | 6 +++++- server/workers/api/Dockerfile | 10 +++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 2f2b21981..db177d61c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,6 +14,8 @@ services: - ./server/workers/postgresql.conf:/var/lib/postgresql/data/postgresql.conf ports: - '127.0.0.1:5432:5432' + networks: + - headstart pgadmin: image: 'dpage/pgadmin4' @@ -43,6 +45,7 @@ services: environment: REDIS_HOST: "${REDIS_HOST}" REDIS_PORT: "${REDIS_PORT}" + command: ["gunicorn", "--workers", "10", "--threads", "2", "-b", "127.0.0.1:${API_PORT}", "app:app", "--timeout", "300"] ports: - '5001:5001' depends_on: @@ -158,4 +161,5 @@ volumes: driver: local networks: - headstart: \ No newline at end of file + headstart: + driver: bridge \ No newline at end of file diff --git a/server/workers/api/Dockerfile b/server/workers/api/Dockerfile index 73c58a1b3..6cb9a621a 100644 --- a/server/workers/api/Dockerfile +++ b/server/workers/api/Dockerfile @@ -6,11 +6,11 @@ RUN apt-get update RUN apt-get install -y --no-install-recommends gcc RUN apt-get install -y --no-install-recommends git -WORKDIR /headstart -COPY requirements.txt . +WORKDIR /api +COPY workers/api/requirements.txt . RUN pip install --no-cache-dir -r requirements.txt RUN pip install git+https://github.com/python-restx/flask-restx -COPY src/ ./ -COPY redis_config.json . +COPY workers/api/src/ ./ +COPY workers/api/redis_config.json . + -CMD gunicorn --workers 10 --threads 2 -b 127.0.0.1:5001 'app:app' --timeout 300 From 55bb05f153e3d4aecd4484f5f0ae25b60945b0df Mon Sep 17 00:00:00 2001 From: chreman Date: Mon, 10 May 2021 22:59:32 +0200 Subject: [PATCH 05/54] move redis_config to .env --- server/workers/api/Dockerfile | 1 - server/workers/api/src/apis/base.py | 12 +++++++----- server/workers/api/src/apis/gsheets.py | 10 ++++++---- server/workers/api/src/apis/openaire.py | 9 ++++++--- server/workers/api/src/apis/pubmed.py | 9 ++++++--- server/workers/api/src/apis/triple.py | 9 ++++++--- server/workers/base/Dockerfile | 2 -- server/workers/base/run_base.py | 9 ++++++--- server/workers/dataprocessing/Dockerfile | 2 -- server/workers/dataprocessing/run_dataprocessing.py | 10 ++++++---- server/workers/gsheets/Dockerfile | 1 - server/workers/gsheets/run_gsheets.py | 10 ++++++---- server/workers/openaire/run_openaire.py | 10 ++++++---- server/workers/pubmed/Dockerfile | 2 -- server/workers/pubmed/run_pubmed.py | 10 ++++++---- server/workers/tests/test_helpers.py | 8 ++++++-- server/workers/triple/Dockerfile | 1 - server/workers/triple/run_triple.py | 9 ++++++--- 18 files changed, 73 insertions(+), 51 deletions(-) diff --git a/server/workers/api/Dockerfile b/server/workers/api/Dockerfile index 6cb9a621a..9e5271193 100644 --- a/server/workers/api/Dockerfile +++ b/server/workers/api/Dockerfile @@ -11,6 +11,5 @@ COPY workers/api/requirements.txt . RUN pip install --no-cache-dir -r requirements.txt RUN pip install git+https://github.com/python-restx/flask-restx COPY workers/api/src/ ./ -COPY workers/api/redis_config.json . diff --git a/server/workers/api/src/apis/base.py b/server/workers/api/src/apis/base.py index e4e4e9bc5..c2a309555 100644 --- a/server/workers/api/src/apis/base.py +++ b/server/workers/api/src/apis/base.py @@ -13,13 +13,15 @@ from apis.utils import get_key -with open("redis_config.json") as infile: - redis_config = json.load(infile) - -redis_store = redis.StrictRedis(**redis_config) - base_ns = Namespace("base", description="BASE API operations") +redis_config = { + "host": os.getenv("REDIS_HOST"), + "port": os.getenv("REDIS_PORT"), + "db": os.getenv("REDIS_DB"), + "password": os.getenv("REDIS_PASSWORD") +} +redis_store = redis.StrictRedis(**redis_config) search_param_schema = SearchParamSchema() diff --git a/server/workers/api/src/apis/gsheets.py b/server/workers/api/src/apis/gsheets.py index 360dd5bfb..f9e4cca26 100644 --- a/server/workers/api/src/apis/gsheets.py +++ b/server/workers/api/src/apis/gsheets.py @@ -8,10 +8,12 @@ from flask_restx import Namespace, Resource, fields from apis.utils import get_key - -with open("redis_config.json") as infile: - redis_config = json.load(infile) - +redis_config = { + "host": os.getenv("REDIS_HOST"), + "port": os.getenv("REDIS_PORT"), + "db": os.getenv("REDIS_DB"), + "password": os.getenv("REDIS_PASSWORD") +} redis_store = redis.StrictRedis(**redis_config) gsheets_ns = Namespace("google_sheets", description="Google Sheets API operations") diff --git a/server/workers/api/src/apis/openaire.py b/server/workers/api/src/apis/openaire.py index 13eaea1aa..e4c511d85 100644 --- a/server/workers/api/src/apis/openaire.py +++ b/server/workers/api/src/apis/openaire.py @@ -13,9 +13,12 @@ from apis.utils import get_key -with open("redis_config.json") as infile: - redis_config = json.load(infile) - +redis_config = { + "host": os.getenv("REDIS_HOST"), + "port": os.getenv("REDIS_PORT"), + "db": os.getenv("REDIS_DB"), + "password": os.getenv("REDIS_PASSWORD") +} redis_store = redis.StrictRedis(**redis_config) openaire_ns = Namespace("openaire", description="OpenAIRE API operations") diff --git a/server/workers/api/src/apis/pubmed.py b/server/workers/api/src/apis/pubmed.py index fa04bc870..ea6d3fc0d 100644 --- a/server/workers/api/src/apis/pubmed.py +++ b/server/workers/api/src/apis/pubmed.py @@ -13,9 +13,12 @@ from apis.utils import get_key -with open("redis_config.json") as infile: - redis_config = json.load(infile) - +redis_config = { + "host": os.getenv("REDIS_HOST"), + "port": os.getenv("REDIS_PORT"), + "db": os.getenv("REDIS_DB"), + "password": os.getenv("REDIS_PASSWORD") +} redis_store = redis.StrictRedis(**redis_config) pubmed_ns = Namespace("pubmed", description="PubMed API operations") diff --git a/server/workers/api/src/apis/triple.py b/server/workers/api/src/apis/triple.py index c31bd59d8..95ded9fdd 100644 --- a/server/workers/api/src/apis/triple.py +++ b/server/workers/api/src/apis/triple.py @@ -13,9 +13,12 @@ from apis.utils import get_key, detect_error -with open("redis_config.json") as infile: - redis_config = json.load(infile) - +redis_config = { + "host": os.getenv("REDIS_HOST"), + "port": os.getenv("REDIS_PORT"), + "db": os.getenv("REDIS_DB"), + "password": os.getenv("REDIS_PASSWORD") +} redis_store = redis.StrictRedis(**redis_config) triple_ns = Namespace("triple", description="TRIPLE API operations") diff --git a/server/workers/base/Dockerfile b/server/workers/base/Dockerfile index b1bfeded1..e968afa22 100644 --- a/server/workers/base/Dockerfile +++ b/server/workers/base/Dockerfile @@ -156,7 +156,5 @@ COPY preprocessing/resources ./resources COPY preprocessing/other-scripts ./other-scripts RUN mkdir -p /var/log/headstart && touch /var/log/headstart/headstart.log -COPY workers/base/redis_config.json . - COPY workers/base/*.py ./ ENTRYPOINT python3 run_base.py diff --git a/server/workers/base/run_base.py b/server/workers/base/run_base.py index 3b5680574..1c212c6e1 100644 --- a/server/workers/base/run_base.py +++ b/server/workers/base/run_base.py @@ -5,9 +5,12 @@ if __name__ == '__main__': - with open("redis_config.json") as infile: - redis_config = json.load(infile) - redis_config["host"] = os.getenv("REDIS_HOST") + redis_config = { + "host": os.getenv("REDIS_HOST"), + "port": os.getenv("REDIS_PORT"), + "db": os.getenv("REDIS_DB"), + "password": os.getenv("REDIS_PASSWORD") + } redis_store = redis.StrictRedis(**redis_config) wrapper = BaseClient("./other-scripts", "run_base.R", redis_store, diff --git a/server/workers/dataprocessing/Dockerfile b/server/workers/dataprocessing/Dockerfile index 6b732ed53..c527eb800 100644 --- a/server/workers/dataprocessing/Dockerfile +++ b/server/workers/dataprocessing/Dockerfile @@ -159,7 +159,5 @@ COPY preprocessing/resources ./resources COPY preprocessing/other-scripts ./other-scripts RUN mkdir -p /var/log/headstart && touch /var/log/headstart/headstart.log -COPY workers/dataprocessing/redis_config.json . - COPY workers/dataprocessing/*.py ./ ENTRYPOINT python3 run_dataprocessing.py diff --git a/server/workers/dataprocessing/run_dataprocessing.py b/server/workers/dataprocessing/run_dataprocessing.py index c69e44179..7f299c873 100644 --- a/server/workers/dataprocessing/run_dataprocessing.py +++ b/server/workers/dataprocessing/run_dataprocessing.py @@ -5,10 +5,12 @@ if __name__ == '__main__': - with open("redis_config.json") as infile: - redis_config = json.load(infile) - redis_config["host"] = os.getenv("REDIS_HOST") - print(redis_config) + redis_config = { + "host": os.getenv("REDIS_HOST"), + "port": os.getenv("REDIS_PORT"), + "db": os.getenv("REDIS_DB"), + "password": os.getenv("REDIS_PASSWORD") + } redis_store = redis.StrictRedis(**redis_config) dp = Dataprocessing("./other-scripts", "run_vis_layout.R", diff --git a/server/workers/gsheets/Dockerfile b/server/workers/gsheets/Dockerfile index 5b620e259..5ad41de7f 100644 --- a/server/workers/gsheets/Dockerfile +++ b/server/workers/gsheets/Dockerfile @@ -11,6 +11,5 @@ RUN pip install --no-cache-dir -r requirements.txt COPY workers/gsheets/src/ ./gsheets/src COPY workers/gsheets/run_gsheets.py . COPY workers/gsheets/token.pickle ./gsheets -COPY workers/gsheets/redis_config.json . ENTRYPOINT python run_gsheets.py diff --git a/server/workers/gsheets/run_gsheets.py b/server/workers/gsheets/run_gsheets.py index a874fc2dd..66f93c098 100644 --- a/server/workers/gsheets/run_gsheets.py +++ b/server/workers/gsheets/run_gsheets.py @@ -5,10 +5,12 @@ if __name__ == '__main__': - with open("redis_config.json") as infile: - redis_config = json.load(infile) - redis_config["host"] = os.getenv("REDIS_HOST") - + redis_config = { + "host": os.getenv("REDIS_HOST"), + "port": os.getenv("REDIS_PORT"), + "db": os.getenv("REDIS_DB"), + "password": os.getenv("REDIS_PASSWORD") + } redis_store = redis.StrictRedis(**redis_config) gc = GSheetsClient(redis_store, os.environ.get("GSHEETS_LOGLEVEL", "INFO")) gc.run() diff --git a/server/workers/openaire/run_openaire.py b/server/workers/openaire/run_openaire.py index d92cd28b5..45f9059ef 100644 --- a/server/workers/openaire/run_openaire.py +++ b/server/workers/openaire/run_openaire.py @@ -5,10 +5,12 @@ if __name__ == '__main__': - with open("redis_config.json") as infile: - redis_config = json.load(infile) - redis_config["host"] = os.getenv("REDIS_HOST") - + redis_config = { + "host": os.getenv("REDIS_HOST"), + "port": os.getenv("REDIS_PORT"), + "db": os.getenv("REDIS_DB"), + "password": os.getenv("REDIS_PASSWORD") + } redis_store = redis.StrictRedis(**redis_config) wrapper = OpenAIREClient("./other-scripts", "run_openaire.R", redis_store, "english", diff --git a/server/workers/pubmed/Dockerfile b/server/workers/pubmed/Dockerfile index b141e35bb..1b3fca43e 100644 --- a/server/workers/pubmed/Dockerfile +++ b/server/workers/pubmed/Dockerfile @@ -156,7 +156,5 @@ COPY preprocessing/resources ./resources COPY preprocessing/other-scripts ./other-scripts RUN mkdir -p /var/log/headstart && touch /var/log/headstart/headstart.log -COPY workers/pubmed/redis_config.json . - COPY workers/pubmed/*.py ./ ENTRYPOINT python3 run_pubmed.py diff --git a/server/workers/pubmed/run_pubmed.py b/server/workers/pubmed/run_pubmed.py index 6674b4180..3662d7158 100644 --- a/server/workers/pubmed/run_pubmed.py +++ b/server/workers/pubmed/run_pubmed.py @@ -5,10 +5,12 @@ if __name__ == '__main__': - with open("redis_config.json") as infile: - redis_config = json.load(infile) - redis_config["host"] = os.getenv("REDIS_HOST") - + redis_config = { + "host": os.getenv("REDIS_HOST"), + "port": os.getenv("REDIS_PORT"), + "db": os.getenv("REDIS_DB"), + "password": os.getenv("REDIS_PASSWORD") + } redis_store = redis.StrictRedis(**redis_config) wrapper = PubMedClient("./other-scripts", "run_pubmed.R", redis_store, "english", diff --git a/server/workers/tests/test_helpers.py b/server/workers/tests/test_helpers.py index bba33d324..3da202c9b 100644 --- a/server/workers/tests/test_helpers.py +++ b/server/workers/tests/test_helpers.py @@ -10,8 +10,12 @@ from .conftest import RANDOM from ..services.src.apis.utils import get_key -with open("redis_config.json") as infile: - redis_config = json.load(infile) +redis_config = { + "host": os.getenv("REDIS_HOST"), + "port": os.getenv("REDIS_PORT"), + "db": os.getenv("REDIS_DB"), + "password": os.getenv("REDIS_PASSWORD") +} redis_store = redis.StrictRedis(**redis_config) diff --git a/server/workers/triple/Dockerfile b/server/workers/triple/Dockerfile index 28393a262..ba6331afd 100644 --- a/server/workers/triple/Dockerfile +++ b/server/workers/triple/Dockerfile @@ -12,6 +12,5 @@ RUN python -m spacy download xx_ent_wiki_sm COPY workers/triple/src/ ./triple/src COPY workers/triple/run_triple.py . COPY workers/triple/es_config.json . -COPY workers/triple/redis_config.json . ENTRYPOINT python run_triple.py diff --git a/server/workers/triple/run_triple.py b/server/workers/triple/run_triple.py index 62a55d197..1f080277b 100644 --- a/server/workers/triple/run_triple.py +++ b/server/workers/triple/run_triple.py @@ -7,9 +7,12 @@ if __name__ == '__main__': with open("es_config.json") as infile: es_config = json.load(infile) - with open("redis_config.json") as infile: - redis_config = json.load(infile) - redis_config["host"] = os.getenv("REDIS_HOST") + redis_config = { + "host": os.getenv("REDIS_HOST"), + "port": os.getenv("REDIS_PORT"), + "db": os.getenv("REDIS_DB"), + "password": os.getenv("REDIS_PASSWORD") + } redis_store = redis.StrictRedis(**redis_config) tc = TripleClient(es_config, redis_store, os.environ.get("TRIPLE_LOGLEVEL", "INFO")) From 241a3adedbb86a66e27ee1612830be38bf8d3062 Mon Sep 17 00:00:00 2001 From: chreman Date: Tue, 11 May 2021 23:20:42 +0200 Subject: [PATCH 06/54] remove docker-compose.yml for windows; change postgresql container --- docker-compose_win.yml | 42 ------------------------------------------ 1 file changed, 42 deletions(-) delete mode 100644 docker-compose_win.yml diff --git a/docker-compose_win.yml b/docker-compose_win.yml deleted file mode 100644 index 8cd7f6e45..000000000 --- a/docker-compose_win.yml +++ /dev/null @@ -1,42 +0,0 @@ -version: '3.7' - -services: - - api: - build: - context: server - dockerfile: services.docker - restart: always - ports: - - '127.0.0.1:5001:5001' - depends_on: - - redis - - redis: - image: 'redis:4.0-alpine' - restart: always - command: ["redis-server", "/etc/redis/redis.conf", "--appendonly", "yes"] - volumes: - - ./server/workers/redis.conf:/etc/redis/redis.conf - restart: always - ports: - - '127.0.0.1:6379:6379' - - search_triple: - build: - context: server - dockerfile: search_triple.docker - env_file: - - server/workers/triple/triple.env - restart: always - - dataprocessing: - build: - context: server - dockerfile: dataprocessing.docker - env_file: - - server/workers/dataprocessing/dataprocessing.env - restart: always - -volumes: - redis: From 2c133a20847ae71dd8793c0b65ca0e144e252e43 Mon Sep 17 00:00:00 2001 From: chreman Date: Thu, 13 May 2021 12:36:01 +0200 Subject: [PATCH 07/54] move persistence config to env --- docker-compose.yml | 20 +++++++++++------ .../api/src/config/example_settings.py | 22 +++---------------- server/workers/api/src/database.py | 20 ++++++++++++++--- 3 files changed, 33 insertions(+), 29 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index db177d61c..ecaa61176 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,18 +2,19 @@ version: '3.7' services: - pgsql: + db: image: 'postgres:12.2-alpine' restart: always environment: POSTGRES_USER: "${POSTGRES_USER}" POSTGRES_PASSWORD: "${POSTGRES_PASSWORD}" + command: postgres -c config_file=/etc/postgresql.conf -c hba_file=/etc/pg_hba.conf volumes: - - db-data:/var/lib/postgresql/data - - ./server/workers/pg_hba.conf:/var/lib/postgresql/data/pg_hba.conf - - ./server/workers/postgresql.conf:/var/lib/postgresql/data/postgresql.conf + - ~/data/OKMaps/postgresql/data:/var/lib/postgresql/data + - ./server/workers/pg_hba.conf:/etc/pg_hba.conf + - ./server/workers/postgresql.conf:/etc/postgresql.conf ports: - - '127.0.0.1:5432:5432' + - "${POSTGRES_PORT}:${POSTGRES_PORT}" networks: - headstart @@ -21,6 +22,8 @@ services: image: 'dpage/pgadmin4' ports: - '127.0.0.1:54323:80' + networks: + - headstart redis: image: 'redis:4.0-alpine' @@ -45,6 +48,10 @@ services: environment: REDIS_HOST: "${REDIS_HOST}" REDIS_PORT: "${REDIS_PORT}" + POSTGRES_USER: "${POSTGRES_USER}" + POSTGRES_PASSWORD: "${POSTGRES_PASSWORD}" + POSTGRES_HOST: "${POSTGRES_HOST}" + POSTGRES_PORT: "${POSTGRES_PORT}" command: ["gunicorn", "--workers", "10", "--threads", "2", "-b", "127.0.0.1:${API_PORT}", "app:app", "--timeout", "300"] ports: - '5001:5001' @@ -161,5 +168,4 @@ volumes: driver: local networks: - headstart: - driver: bridge \ No newline at end of file + headstart: \ No newline at end of file diff --git a/server/workers/api/src/config/example_settings.py b/server/workers/api/src/config/example_settings.py index ab4375309..423d54eef 100644 --- a/server/workers/api/src/config/example_settings.py +++ b/server/workers/api/src/config/example_settings.py @@ -1,22 +1,6 @@ BEHIND_PROXY = True -DEFAULT = { - 'user': 'user', - 'pw': 'pw', - 'db': 'dev', - 'host': '127.0.0.1', - 'port': '5432', -} -TEST = { - 'user': 'testuser', - 'pw': 'testpassword', - 'db': 'test', - 'host': '127.0.0.1', - 'port': '5432', -} -SQLALCHEMY_DATABASE_URI = 'postgresql://%(user)s:%(pw)s@%(host)s:%(port)s/%(db)s' % DEFAULT -SQLALCHEMY_BINDS = { - 'test': 'postgresql://%(user)s:%(pw)s@%(host)s:%(port)s/%(db)s' % TEST -} -SQLALCHEMY_TRACK_MODIFICATIONS = False +SWAGGER_BASEPATH = "" +DEFAULT_DATABASE = "dev" +DATABASES = ["test"] ENV = "development" DEBUG = True diff --git a/server/workers/api/src/database.py b/server/workers/api/src/database.py index 694f7ea2c..0ffaf58b1 100644 --- a/server/workers/api/src/database.py +++ b/server/workers/api/src/database.py @@ -1,13 +1,27 @@ +import os from sqlalchemy.orm import sessionmaker from sqlalchemy import create_engine from sqlalchemy.ext.declarative import declarative_base from config import settings +bind_params = { + "user": os.getenv("POSTGRES_USER"), + "pw": os.getenv("POSTGRES_PASSWORD"), + "host": os.getenv("POSTGRES_HOST"), + "port": os.getenv("POSTGRES_PORT"), + "db": settings.DEFAULT_DATABASE +} + sessions = {} -sessions[settings.DEFAULT["db"]] = sessionmaker(bind=create_engine(settings.SQLALCHEMY_DATABASE_URI)) -for data_integration, database in settings.SQLALCHEMY_BINDS.items(): - sessions[data_integration] = sessionmaker(bind=create_engine(database, +sessions[settings.DEFAULT_DATABASE] = sessionmaker(bind=create_engine('postgresql://%(user)s:%(pw)s@%(host)s:%(port)s/%(db)s' % bind_params, + max_overflow=15, + pool_pre_ping=True, + pool_recycle=3600, + pool_size=30)) +for database in settings.DATABASES: + bind_params["db"] = database + sessions[database] = sessionmaker(bind=create_engine('postgresql://%(user)s:%(pw)s@%(host)s:%(port)s/%(db)s' % bind_params, max_overflow=15, pool_pre_ping=True, pool_recycle=3600, From f2868306a230654b32e65c141dc9a84ef1971c77 Mon Sep 17 00:00:00 2001 From: chreman Date: Thu, 13 May 2021 13:07:01 +0200 Subject: [PATCH 08/54] port cleanup --- docker-compose.yml | 6 ++++-- server/workers/openaire/Dockerfile | 2 -- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index ecaa61176..b7f703eab 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,8 +13,8 @@ services: - ~/data/OKMaps/postgresql/data:/var/lib/postgresql/data - ./server/workers/pg_hba.conf:/etc/pg_hba.conf - ./server/workers/postgresql.conf:/etc/postgresql.conf - ports: - - "${POSTGRES_PORT}:${POSTGRES_PORT}" + expose: + - "${POSTGRES_PORT}" networks: - headstart @@ -29,6 +29,8 @@ services: image: 'redis:4.0-alpine' restart: always hostname: "${REDIS_HOST}" + expose: + - "${REDIS_PORT}" environment: REDIS_HOST: "${REDIS_HOST}" REDIS_PORT: "${REDIS_PORT}" diff --git a/server/workers/openaire/Dockerfile b/server/workers/openaire/Dockerfile index 45e660816..ffac40e61 100644 --- a/server/workers/openaire/Dockerfile +++ b/server/workers/openaire/Dockerfile @@ -156,7 +156,5 @@ COPY preprocessing/resources ./resources COPY preprocessing/other-scripts ./other-scripts RUN mkdir -p /var/log/headstart && touch /var/log/headstart/headstart.log -COPY workers/openaire/redis_config.json . - COPY workers/openaire/*.py ./ ENTRYPOINT python3 run_openaire.py From ab9d127081977c27ffce22ccdd9bacf8c55fec1e Mon Sep 17 00:00:00 2001 From: chreman Date: Thu, 13 May 2021 17:40:40 +0200 Subject: [PATCH 09/54] add missing redis configs to envs --- docker-compose.yml | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index b7f703eab..12aa2abfd 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -29,12 +29,12 @@ services: image: 'redis:4.0-alpine' restart: always hostname: "${REDIS_HOST}" - expose: - - "${REDIS_PORT}" + ports: + - "${REDIS_PORT}:${REDIS_PORT}" environment: REDIS_HOST: "${REDIS_HOST}" REDIS_PORT: "${REDIS_PORT}" - command: ["redis-server", "/etc/redis/redis.conf", "--appendonly", "yes"] + command: ["redis-server", "/etc/redis/redis.conf", "--bind", "${REDIS_HOST}", "--appendonly", "yes", "--port", "${REDIS_PORT}"] volumes: - 'redis:/var/lib/redis/data' - ./server/workers/redis.conf:/etc/redis/redis.conf @@ -71,6 +71,8 @@ services: environment: REDIS_HOST: "${REDIS_HOST}" REDIS_PORT: "${REDIS_PORT}" + REDIS_DB: "${REDIS_DB}" + REDIS_PASSWORD: "${REDIS_PASSWORD}" restart: always depends_on: - redis @@ -86,6 +88,8 @@ services: environment: REDIS_HOST: "${REDIS_HOST}" REDIS_PORT: "${REDIS_PORT}" + REDIS_DB: "${REDIS_DB}" + REDIS_PASSWORD: "${REDIS_PASSWORD}" restart: always depends_on: - redis @@ -101,6 +105,8 @@ services: environment: REDIS_HOST: "${REDIS_HOST}" REDIS_PORT: "${REDIS_PORT}" + REDIS_DB: "${REDIS_DB}" + REDIS_PASSWORD: "${REDIS_PASSWORD}" restart: always volumes: - /opt/local/renv/cache:/renv/cache @@ -119,6 +125,8 @@ services: environment: REDIS_HOST: "${REDIS_HOST}" REDIS_PORT: "${REDIS_PORT}" + REDIS_DB: "${REDIS_DB}" + REDIS_PASSWORD: "${REDIS_PASSWORD}" restart: always volumes: - /opt/local/renv/cache:/renv/cache @@ -137,6 +145,8 @@ services: environment: REDIS_HOST: "${REDIS_HOST}" REDIS_PORT: "${REDIS_PORT}" + REDIS_DB: "${REDIS_DB}" + REDIS_PASSWORD: "${REDIS_PASSWORD}" restart: always volumes: - /opt/local/renv/cache:/renv/cache @@ -155,6 +165,8 @@ services: environment: REDIS_HOST: "${REDIS_HOST}" REDIS_PORT: "${REDIS_PORT}" + REDIS_DB: "${REDIS_DB}" + REDIS_PASSWORD: "${REDIS_PASSWORD}" restart: always volumes: - /opt/local/renv/cache:/renv/cache From d47a0d65495493e341d51d68e85af1927aa76e2f Mon Sep 17 00:00:00 2001 From: chreman Date: Fri, 14 May 2021 12:38:35 +0200 Subject: [PATCH 10/54] add script to build docker images --- .gitignore | 1 + docker-compose.yml | 44 ++++++++++----------------- example.env | 17 ++++++++--- server/workers/build_docker_images.sh | 7 +++++ 4 files changed, 37 insertions(+), 32 deletions(-) create mode 100755 server/workers/build_docker_images.sh diff --git a/.gitignore b/.gitignore index b3d43a6f7..ec317c828 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,4 @@ coverage/ /lc_browseview_cache.json /lc_cache.json /linkedcat.sqlite +.env \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 12aa2abfd..442bcfcae 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -43,29 +43,27 @@ services: - headstart api: - build: - context: server - dockerfile: workers/api/Dockerfile + image: api:${SERVICE_VERSION} restart: always environment: REDIS_HOST: "${REDIS_HOST}" REDIS_PORT: "${REDIS_PORT}" + REDIS_PASSWORD: "${REDIS_PASSWORD}" + REDIS_DB: "${REDIS_DB}" POSTGRES_USER: "${POSTGRES_USER}" POSTGRES_PASSWORD: "${POSTGRES_PASSWORD}" POSTGRES_HOST: "${POSTGRES_HOST}" POSTGRES_PORT: "${POSTGRES_PORT}" - command: ["gunicorn", "--workers", "10", "--threads", "2", "-b", "127.0.0.1:${API_PORT}", "app:app", "--timeout", "300"] + command: ["gunicorn", "--workers", "10", "--threads", "2", "-b", "0.0.0.0:${API_PORT}", "app:app", "--timeout", "300"] ports: - - '5001:5001' + - "${API_PORT}:${API_PORT}" depends_on: - redis networks: - headstart - search_triple: - build: - context: server - dockerfile: workers/triple/Dockerfile + triple: + image: triple:${SERVICE_VERSION} env_file: - server/workers/triple/triple.env environment: @@ -79,10 +77,8 @@ services: networks: - headstart - search_gsheets: - build: - context: server - dockerfile: workers/gsheets/Dockerfile + gsheets: + image: gsheets:${SERVICE_VERSION} env_file: - server/workers/gsheets/gsheets.env environment: @@ -97,9 +93,7 @@ services: - headstart dataprocessing: - build: - context: server - dockerfile: workers/dataprocessing/Dockerfile + image: dataprocessing:${SERVICE_VERSION} env_file: - server/workers/dataprocessing/dataprocessing.env environment: @@ -116,10 +110,8 @@ services: networks: - headstart - search_base: - build: - context: server - dockerfile: workers/base/Dockerfile + base: + image: base:${SERVICE_VERSION} env_file: - server/workers/base/base.env environment: @@ -136,10 +128,8 @@ services: networks: - headstart - search_pubmed: - build: - context: server - dockerfile: workers/pubmed/Dockerfile + pubmed: + image: pubmed:${SERVICE_VERSION} env_file: - server/workers/pubmed/pubmed.env environment: @@ -156,10 +146,8 @@ services: networks: - headstart - search_openaire: - build: - context: server - dockerfile: workers/openaire/Dockerfile + openaire: + image: openaire:${SERVICE_VERSION} env_file: - server/workers/openaire/openaire.env environment: diff --git a/example.env b/example.env index ee2241b1d..4e2e5ad45 100644 --- a/example.env +++ b/example.env @@ -1,6 +1,15 @@ +COMPOSE_PROJECT_NAME=dockerrefactoring +SERVICE_VERSION=ab9d127081977c27ffce22ccdd9bacf8c55fec1e +NETWORK=dockerrefactoring POSTGRES_DB=postgres POSTGRES_USER=headstart -POSTGRES_PASSWORD=password -PGADMIN_DEFAULT_EMAIL=email@domain.org -PGADMIN_DEFAULT_PASSWORD=password -PGADMIN_VOLUME=/path/to/pgadminworkingdir +POSTGRES_PASSWORD=testpassword +POSTGRES_HOST=dockerrefactoring_db_1 +POSTGRES_PORT=5432 +PGADMIN_DEFAULT_EMAIL=christopher.kittel@openknowledgemaps.org +PGADMIN_DEFAULT_PASSWORD=testpassword +API_PORT=5001 +REDIS_HOST=dockerrefactoring_redis_1 +REDIS_PORT=6379 +REDIS_DB=0 +REDIS_PASSWORD=testpassword \ No newline at end of file diff --git a/server/workers/build_docker_images.sh b/server/workers/build_docker_images.sh new file mode 100755 index 000000000..e9147bf92 --- /dev/null +++ b/server/workers/build_docker_images.sh @@ -0,0 +1,7 @@ +#!/bin/bash +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +services=("api" "triple" "gsheets" "dataprocessing" "base" "pubmed" "openaire") +for service in ${services[@]}; do + docker build -f "$SCRIPT_DIR/../workers/$service/Dockerfile" -t "$service:`git rev-parse HEAD`" "$SCRIPT_DIR/../" +done + From 41b44663c6cc3498210cb7db38de7bee9ccab583 Mon Sep 17 00:00:00 2001 From: chreman Date: Fri, 14 May 2021 12:45:30 +0200 Subject: [PATCH 11/54] add healthcheck endpoint to triple --- server/workers/api/src/apis/triple.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/server/workers/api/src/apis/triple.py b/server/workers/api/src/apis/triple.py index 95ded9fdd..4e75f225e 100644 --- a/server/workers/api/src/apis/triple.py +++ b/server/workers/api/src/apis/triple.py @@ -127,3 +127,9 @@ def get(self): return make_response(result, 200, headers) + +@triple_ns.route('/healthcheck') +class Healthcheck(Resource): + def get(self): + result = {"status": "I'm good"} + return make_response(result, 200, {"Content-Type": "application/json"}) \ No newline at end of file From 16347a34739bedc752836e1e3456131dc75cc9d1 Mon Sep 17 00:00:00 2001 From: chreman Date: Fri, 14 May 2021 13:20:45 +0200 Subject: [PATCH 12/54] add flavor configs --- server/workers/flavorconfigs/v1.env | 15 +++++++++++++++ server/workers/flavorconfigs/v2.env | 15 +++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 server/workers/flavorconfigs/v1.env create mode 100644 server/workers/flavorconfigs/v2.env diff --git a/server/workers/flavorconfigs/v1.env b/server/workers/flavorconfigs/v1.env new file mode 100644 index 000000000..6d910440d --- /dev/null +++ b/server/workers/flavorconfigs/v1.env @@ -0,0 +1,15 @@ +COMPOSE_PROJECT_NAME=v1 +SERVICE_VERSION=d47a0d65495493e341d51d68e85af1927aa76e2f +NETWORK=dockerrefactoring +POSTGRES_DB=postgres +POSTGRES_USER=headstart +POSTGRES_PASSWORD=testpassword +POSTGRES_HOST=dockerrefactoring_db_1 +POSTGRES_PORT=5432 +PGADMIN_DEFAULT_EMAIL=christopher.kittel@openknowledgemaps.org +PGADMIN_DEFAULT_PASSWORD=testpassword +API_PORT=5001 +REDIS_HOST=dockerrefactoring_redis_1 +REDIS_PORT=6379 +REDIS_DB=0 +REDIS_PASSWORD=testpassword \ No newline at end of file diff --git a/server/workers/flavorconfigs/v2.env b/server/workers/flavorconfigs/v2.env new file mode 100644 index 000000000..2f3ce15bd --- /dev/null +++ b/server/workers/flavorconfigs/v2.env @@ -0,0 +1,15 @@ +COMPOSE_PROJECT_NAME=v2 +SERVICE_VERSION=41b44663c6cc3498210cb7db38de7bee9ccab583 +NETWORK=dockerrefactoring +POSTGRES_DB=postgres +POSTGRES_USER=headstart +POSTGRES_PASSWORD=testpassword +POSTGRES_HOST=dockerrefactoring_db_1 +POSTGRES_PORT=5433 +PGADMIN_DEFAULT_EMAIL=christopher.kittel@openknowledgemaps.org +PGADMIN_DEFAULT_PASSWORD=testpassword +API_PORT=5002 +REDIS_HOST=dockerrefactoring_redis_1 +REDIS_PORT=6380 +REDIS_DB=0 +REDIS_PASSWORD=testpassword \ No newline at end of file From b35449cf3c5a907767ae9e2a56e7b7dd0a1e1e82 Mon Sep 17 00:00:00 2001 From: chreman Date: Fri, 14 May 2021 17:07:48 +0200 Subject: [PATCH 13/54] added nginx as proxy; separate volumes per flavor --- docker-compose.yml | 5 ++-- .../workers/flavorconfigs/{v2.env => dev.env} | 8 +++---- .../flavorconfigs/{v1.env => stable.env} | 8 +++---- server/workers/proxy/docker-compose.yml | 23 +++++++++++++++++++ .../proxy/templates/default.conf.template | 11 +++++++++ 5 files changed, 45 insertions(+), 10 deletions(-) rename server/workers/flavorconfigs/{v2.env => dev.env} (59%) rename server/workers/flavorconfigs/{v1.env => stable.env} (58%) create mode 100644 server/workers/proxy/docker-compose.yml create mode 100644 server/workers/proxy/templates/default.conf.template diff --git a/docker-compose.yml b/docker-compose.yml index 442bcfcae..462cf9367 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,7 +10,8 @@ services: POSTGRES_PASSWORD: "${POSTGRES_PASSWORD}" command: postgres -c config_file=/etc/postgresql.conf -c hba_file=/etc/pg_hba.conf volumes: - - ~/data/OKMaps/postgresql/data:/var/lib/postgresql/data + # - ~/data/OKMaps/${COMPOSE_PROJECT_NAME}/postgresql/data:/var/lib/postgresql/data + - db_data:/var/lib/postgresql/data - ./server/workers/pg_hba.conf:/etc/pg_hba.conf - ./server/workers/postgresql.conf:/etc/postgresql.conf expose: @@ -166,7 +167,7 @@ services: volumes: redis: - db-data: + db_data: driver: local networks: diff --git a/server/workers/flavorconfigs/v2.env b/server/workers/flavorconfigs/dev.env similarity index 59% rename from server/workers/flavorconfigs/v2.env rename to server/workers/flavorconfigs/dev.env index 2f3ce15bd..4d0f78daf 100644 --- a/server/workers/flavorconfigs/v2.env +++ b/server/workers/flavorconfigs/dev.env @@ -1,15 +1,15 @@ -COMPOSE_PROJECT_NAME=v2 +COMPOSE_PROJECT_NAME=dev SERVICE_VERSION=41b44663c6cc3498210cb7db38de7bee9ccab583 NETWORK=dockerrefactoring POSTGRES_DB=postgres POSTGRES_USER=headstart POSTGRES_PASSWORD=testpassword -POSTGRES_HOST=dockerrefactoring_db_1 +POSTGRES_HOST=dev_db_1 POSTGRES_PORT=5433 PGADMIN_DEFAULT_EMAIL=christopher.kittel@openknowledgemaps.org PGADMIN_DEFAULT_PASSWORD=testpassword API_PORT=5002 -REDIS_HOST=dockerrefactoring_redis_1 +REDIS_HOST=dev_redis_1 REDIS_PORT=6380 REDIS_DB=0 -REDIS_PASSWORD=testpassword \ No newline at end of file +REDIS_PASSWORD=3tUrLK8a9kDo6m6sx7qKQaM5RLjLsfd4kkbq39LmxawMucvktRQZ9GfGeL3rNasSBFJyaYsQLFXHPAP9cWpnSSoB3ioppQh4mcMGaRWCe42tfWTQw7gLVX6XPs9LCdan \ No newline at end of file diff --git a/server/workers/flavorconfigs/v1.env b/server/workers/flavorconfigs/stable.env similarity index 58% rename from server/workers/flavorconfigs/v1.env rename to server/workers/flavorconfigs/stable.env index 6d910440d..2ca52db6a 100644 --- a/server/workers/flavorconfigs/v1.env +++ b/server/workers/flavorconfigs/stable.env @@ -1,15 +1,15 @@ -COMPOSE_PROJECT_NAME=v1 +COMPOSE_PROJECT_NAME=stable SERVICE_VERSION=d47a0d65495493e341d51d68e85af1927aa76e2f NETWORK=dockerrefactoring POSTGRES_DB=postgres POSTGRES_USER=headstart POSTGRES_PASSWORD=testpassword -POSTGRES_HOST=dockerrefactoring_db_1 +POSTGRES_HOST=stable_db_1 POSTGRES_PORT=5432 PGADMIN_DEFAULT_EMAIL=christopher.kittel@openknowledgemaps.org PGADMIN_DEFAULT_PASSWORD=testpassword API_PORT=5001 -REDIS_HOST=dockerrefactoring_redis_1 +REDIS_HOST=stable_redis_1 REDIS_PORT=6379 REDIS_DB=0 -REDIS_PASSWORD=testpassword \ No newline at end of file +REDIS_PASSWORD=3tUrLK8a9kDo6m6sx7qKQaM5RLjLsfd4kkbq39LmxawMucvktRQZ9GfGeL3rNasSBFJyaYsQLFXHPAP9cWpnSSoB3ioppQh4mcMGaRWCe42tfWTQw7gLVX6XPs9LCdan \ No newline at end of file diff --git a/server/workers/proxy/docker-compose.yml b/server/workers/proxy/docker-compose.yml new file mode 100644 index 000000000..15a61d378 --- /dev/null +++ b/server/workers/proxy/docker-compose.yml @@ -0,0 +1,23 @@ +version: '3.7' + +services: + + proxy: + image: 'nginx' + volumes: + - ./templates:/etc/nginx/templates + environment: + - NGINX_PORT=80 + ports: + - '8080:80' + networks: + - stable_headstart + - dev_headstart + +networks: + stable_headstart: + external: true + name: stable_headstart + dev_headstart: + external: true + name: dev_headstart \ No newline at end of file diff --git a/server/workers/proxy/templates/default.conf.template b/server/workers/proxy/templates/default.conf.template new file mode 100644 index 000000000..8c9e62265 --- /dev/null +++ b/server/workers/proxy/templates/default.conf.template @@ -0,0 +1,11 @@ +server { + listen ${NGINX_PORT}; + + location /stable/ { + proxy_pass http://stable_api_1:5001/api/; + } + + location /dev/ { + proxy_pass http://dev_api_1:5002/api/; + } +} \ No newline at end of file From 053621d7a57b0fa032276b13b725f504058ba324 Mon Sep 17 00:00:00 2001 From: chreman Date: Fri, 14 May 2021 17:11:09 +0200 Subject: [PATCH 14/54] clean up exposed ports --- docker-compose.yml | 6 ------ server/workers/flavorconfigs/dev.env | 2 +- server/workers/proxy/templates/default.conf.template | 2 +- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 462cf9367..6b581af78 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,8 +14,6 @@ services: - db_data:/var/lib/postgresql/data - ./server/workers/pg_hba.conf:/etc/pg_hba.conf - ./server/workers/postgresql.conf:/etc/postgresql.conf - expose: - - "${POSTGRES_PORT}" networks: - headstart @@ -30,8 +28,6 @@ services: image: 'redis:4.0-alpine' restart: always hostname: "${REDIS_HOST}" - ports: - - "${REDIS_PORT}:${REDIS_PORT}" environment: REDIS_HOST: "${REDIS_HOST}" REDIS_PORT: "${REDIS_PORT}" @@ -56,8 +52,6 @@ services: POSTGRES_HOST: "${POSTGRES_HOST}" POSTGRES_PORT: "${POSTGRES_PORT}" command: ["gunicorn", "--workers", "10", "--threads", "2", "-b", "0.0.0.0:${API_PORT}", "app:app", "--timeout", "300"] - ports: - - "${API_PORT}:${API_PORT}" depends_on: - redis networks: diff --git a/server/workers/flavorconfigs/dev.env b/server/workers/flavorconfigs/dev.env index 4d0f78daf..32a97b88e 100644 --- a/server/workers/flavorconfigs/dev.env +++ b/server/workers/flavorconfigs/dev.env @@ -8,7 +8,7 @@ POSTGRES_HOST=dev_db_1 POSTGRES_PORT=5433 PGADMIN_DEFAULT_EMAIL=christopher.kittel@openknowledgemaps.org PGADMIN_DEFAULT_PASSWORD=testpassword -API_PORT=5002 +API_PORT=5001 REDIS_HOST=dev_redis_1 REDIS_PORT=6380 REDIS_DB=0 diff --git a/server/workers/proxy/templates/default.conf.template b/server/workers/proxy/templates/default.conf.template index 8c9e62265..6490bde72 100644 --- a/server/workers/proxy/templates/default.conf.template +++ b/server/workers/proxy/templates/default.conf.template @@ -6,6 +6,6 @@ server { } location /dev/ { - proxy_pass http://dev_api_1:5002/api/; + proxy_pass http://dev_api_1:5001/api/; } } \ No newline at end of file From fb916a1f6b93a3da2eaae828a6e866e6e26279e2 Mon Sep 17 00:00:00 2001 From: chreman Date: Wed, 19 May 2021 21:06:31 +0200 Subject: [PATCH 15/54] clean param --- server/services/search.php | 2 +- server/services/searchBASE.php | 3 +-- server/services/searchDOAJ.php | 3 +-- server/services/searchLinkedCat.php | 4 ++-- server/services/searchLinkedCatAuthorview.php | 3 +-- server/services/searchLinkedCatBrowseview.php | 4 ++-- server/services/searchOpenAire.php | 3 +-- server/services/searchPLOS.php | 4 ++-- server/services/searchPubmed.php | 3 +-- 9 files changed, 12 insertions(+), 17 deletions(-) diff --git a/server/services/search.php b/server/services/search.php index f2b522366..20ce54b48 100644 --- a/server/services/search.php +++ b/server/services/search.php @@ -57,7 +57,7 @@ function search($service_integration, $dirty_query , $retrieve_cached_map = true, $params_for_id = null, $num_labels = 3 , $id = "area_uri", $subjects = "subject" , $precomputed_id = null, $do_clean_query = true - , $processing_backend = "legacy") { + , $api_flavor = "stable") { $INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/"; $ini_array = library\Toolkit::loadIni($INI_DIR); $repo2snapshot = array("plos" => "PLOS" diff --git a/server/services/searchBASE.php b/server/services/searchBASE.php index 28b92a489..ff980cd8f 100644 --- a/server/services/searchBASE.php +++ b/server/services/searchBASE.php @@ -30,8 +30,7 @@ , ";", null, true , true, null, 3 , "area_uri", "subject" - , $precomputed_id, false - , "legacy", "legacy"); + , $precomputed_id, false); echo $result diff --git a/server/services/searchDOAJ.php b/server/services/searchDOAJ.php index 47e4cc7b0..55d631cdf 100644 --- a/server/services/searchDOAJ.php +++ b/server/services/searchDOAJ.php @@ -17,8 +17,7 @@ , ";", null, true , true, null, 3 , "area_uri", "subject" - , $precomputed_id, false - , "legacy", "legacy"); + , $precomputed_id, false); echo $result diff --git a/server/services/searchLinkedCat.php b/server/services/searchLinkedCat.php index 7595a92de..395f7b223 100644 --- a/server/services/searchLinkedCat.php +++ b/server/services/searchLinkedCat.php @@ -17,8 +17,8 @@ array("from", "to", "include_content_type", "today", "vis_type"), ";", null, $transform_query_tolowercase=false, true, null, 3, - "area_uri", "subject", $precomputed_id, true, - "legacy"); + "area_uri", "subject", + $precomputed_id, true); echo $result diff --git a/server/services/searchLinkedCatAuthorview.php b/server/services/searchLinkedCatAuthorview.php index a61e24518..0d1ccf526 100644 --- a/server/services/searchLinkedCatAuthorview.php +++ b/server/services/searchLinkedCatAuthorview.php @@ -19,8 +19,7 @@ ";", null, $transform_query_tolowercase = false, true, null, 3, - "area_uri", "subject", $precomputed_id, true, - "legacy" + "area_uri", "subject", $precomputed_id, true ); echo $result diff --git a/server/services/searchLinkedCatBrowseview.php b/server/services/searchLinkedCatBrowseview.php index 41c5e9221..e2caf715e 100644 --- a/server/services/searchLinkedCatBrowseview.php +++ b/server/services/searchLinkedCatBrowseview.php @@ -19,8 +19,8 @@ ";", null, $transform_query_tolowercase = false, true, null, 3, - "area_uri", "subject", $precomputed_id, true, - "legacy" + "area_uri", "subject", + $precomputed_id, true ); echo $result diff --git a/server/services/searchOpenAire.php b/server/services/searchOpenAire.php index eb9fce663..fad905ae5 100644 --- a/server/services/searchOpenAire.php +++ b/server/services/searchOpenAire.php @@ -28,8 +28,7 @@ , ";", null, false , true, array("project_id", "funder"), 3 , "area_uri", "subject" - , null, true, - "legacy", "legacy"); + , null, true); echo $result diff --git a/server/services/searchPLOS.php b/server/services/searchPLOS.php index 70bcccb25..e6efe2350 100644 --- a/server/services/searchPLOS.php +++ b/server/services/searchPLOS.php @@ -15,8 +15,8 @@ $result = search("plos", $dirty_query, $post_params , array("article_types", "journals", "from", "to", "sorting") , ";", "/", true, true, null, 3 - , "area_uri", "subject", $precomputed_id, false - , "legacy"); + , "area_uri", "subject" + , $precomputed_id, false); echo $result diff --git a/server/services/searchPubmed.php b/server/services/searchPubmed.php index c3f535862..8fc72f482 100644 --- a/server/services/searchPubmed.php +++ b/server/services/searchPubmed.php @@ -21,8 +21,7 @@ , ";", null, true , true, null, 3 , "area_uri", "subject" - , $precomputed_id, false - , "legacy", "legacy"); + , $precomputed_id, false); echo $result From 4e6c9cc7c47cdf4837b62ae960cca11e1a0571b0 Mon Sep 17 00:00:00 2001 From: chreman Date: Wed, 19 May 2021 21:35:31 +0200 Subject: [PATCH 16/54] clean up params --- server/services/search.php | 2 +- server/services/searchBASE.php | 2 +- server/services/searchDOAJ.php | 2 +- server/services/searchLinkedCat.php | 1 - server/services/searchLinkedCatAuthorview.php | 2 -- server/services/searchLinkedCatBrowseview.php | 2 -- server/services/searchOpenAire.php | 2 +- server/services/searchPLOS.php | 2 +- server/services/searchPubmed.php | 2 +- server/services/searchTRIPLE.php | 2 +- 10 files changed, 7 insertions(+), 12 deletions(-) diff --git a/server/services/search.php b/server/services/search.php index 20ce54b48..2a1d5ec37 100644 --- a/server/services/search.php +++ b/server/services/search.php @@ -53,7 +53,7 @@ function cleanQuery($dirty_query, $transform_query_tolowercase) { function search($service_integration, $dirty_query , $post_params, $param_types - , $keyword_separator, $taxonomy_separator, $transform_query_tolowercase = true + , $transform_query_tolowercase = true , $retrieve_cached_map = true, $params_for_id = null, $num_labels = 3 , $id = "area_uri", $subjects = "subject" , $precomputed_id = null, $do_clean_query = true diff --git a/server/services/searchBASE.php b/server/services/searchBASE.php index ff980cd8f..9c73362a4 100644 --- a/server/services/searchBASE.php +++ b/server/services/searchBASE.php @@ -27,7 +27,7 @@ $result = search("base", $dirty_query , $post_params, $params_array - , ";", null, true + , true , true, null, 3 , "area_uri", "subject" , $precomputed_id, false); diff --git a/server/services/searchDOAJ.php b/server/services/searchDOAJ.php index 55d631cdf..e2000ac04 100644 --- a/server/services/searchDOAJ.php +++ b/server/services/searchDOAJ.php @@ -14,7 +14,7 @@ $result = search("doaj", $dirty_query , $post_params, array("from", "to", "today", "sorting") - , ";", null, true + , true , true, null, 3 , "area_uri", "subject" , $precomputed_id, false); diff --git a/server/services/searchLinkedCat.php b/server/services/searchLinkedCat.php index 395f7b223..a0aa59126 100644 --- a/server/services/searchLinkedCat.php +++ b/server/services/searchLinkedCat.php @@ -15,7 +15,6 @@ $result = search("linkedcat", $dirty_query, $post_params, array("from", "to", "include_content_type", "today", "vis_type"), - ";", null, $transform_query_tolowercase=false, true, null, 3, "area_uri", "subject", $precomputed_id, true); diff --git a/server/services/searchLinkedCatAuthorview.php b/server/services/searchLinkedCatAuthorview.php index 0d1ccf526..0c7ea8e83 100644 --- a/server/services/searchLinkedCatAuthorview.php +++ b/server/services/searchLinkedCatAuthorview.php @@ -16,8 +16,6 @@ $dirty_query, $post_params, array("today", "author_id", "doc_count", "living_dates", "image_link", "vis_type"), - ";", - null, $transform_query_tolowercase = false, true, null, 3, "area_uri", "subject", $precomputed_id, true ); diff --git a/server/services/searchLinkedCatBrowseview.php b/server/services/searchLinkedCatBrowseview.php index e2caf715e..1b47d3971 100644 --- a/server/services/searchLinkedCatBrowseview.php +++ b/server/services/searchLinkedCatBrowseview.php @@ -16,8 +16,6 @@ $dirty_query, $post_params, array("today", "bkl_level", "bkl_list", "doc_count", "bkl_top_caption", "from", "to", "include_content_type"), - ";", - null, $transform_query_tolowercase = false, true, null, 3, "area_uri", "subject", $precomputed_id, true diff --git a/server/services/searchOpenAire.php b/server/services/searchOpenAire.php index fad905ae5..4a8953a68 100644 --- a/server/services/searchOpenAire.php +++ b/server/services/searchOpenAire.php @@ -25,7 +25,7 @@ "openaire_link", "obj_id", "acronym") - , ";", null, false + , false , true, array("project_id", "funder"), 3 , "area_uri", "subject" , null, true); diff --git a/server/services/searchPLOS.php b/server/services/searchPLOS.php index e6efe2350..8ef5c0c37 100644 --- a/server/services/searchPLOS.php +++ b/server/services/searchPLOS.php @@ -14,7 +14,7 @@ $result = search("plos", $dirty_query, $post_params , array("article_types", "journals", "from", "to", "sorting") - , ";", "/", true, true, null, 3 + , true, true, null, 3 , "area_uri", "subject" , $precomputed_id, false); diff --git a/server/services/searchPubmed.php b/server/services/searchPubmed.php index 8fc72f482..405310700 100644 --- a/server/services/searchPubmed.php +++ b/server/services/searchPubmed.php @@ -18,7 +18,7 @@ $result = search("pubmed", $dirty_query , $post_params, $query_params - , ";", null, true + , true , true, null, 3 , "area_uri", "subject" , $precomputed_id, false); diff --git a/server/services/searchTRIPLE.php b/server/services/searchTRIPLE.php index 345ba2018..e229af78b 100644 --- a/server/services/searchTRIPLE.php +++ b/server/services/searchTRIPLE.php @@ -27,7 +27,7 @@ $result = search($service_integration, $dirty_query , $post_params, $param_types - , ";", null, true + , true , true, null, 3 , "area_uri", "subject" , $precomputed_id, true); From 5ec2c7725c24fd3546a19c16131c3bf22e9c702f Mon Sep 17 00:00:00 2001 From: chreman Date: Wed, 19 May 2021 21:36:33 +0200 Subject: [PATCH 17/54] split off persistence api --- server/workers/api/src/app.py | 4 +- server/workers/api/src/manage.py | 16 ------- server/workers/persistence/requirements.txt | 14 ++++++ .../src/apis/persistence.py | 0 server/workers/persistence/src/app.py | 48 +++++++++++++++++++ .../persistence/src/config/__init__.py | 0 .../src/config/example_settings.py | 6 +++ .../{api => persistence}/src/database.py | 0 server/workers/persistence/src/migrate.py | 16 +++++++ .../{api => persistence}/src/models.py | 0 10 files changed, 85 insertions(+), 19 deletions(-) delete mode 100644 server/workers/api/src/manage.py create mode 100644 server/workers/persistence/requirements.txt rename server/workers/{api => persistence}/src/apis/persistence.py (100%) create mode 100644 server/workers/persistence/src/app.py create mode 100644 server/workers/persistence/src/config/__init__.py create mode 100644 server/workers/persistence/src/config/example_settings.py rename server/workers/{api => persistence}/src/database.py (100%) create mode 100644 server/workers/persistence/src/migrate.py rename server/workers/{api => persistence}/src/models.py (100%) diff --git a/server/workers/api/src/app.py b/server/workers/api/src/app.py index 8127daaff..a64197b4a 100644 --- a/server/workers/api/src/app.py +++ b/server/workers/api/src/app.py @@ -10,7 +10,6 @@ from apis.base import base_ns from apis.pubmed import pubmed_ns from apis.openaire import openaire_ns -from apis.persistence import persistence_ns from config import settings from utils.monkeypatches import ReverseProxied, __schema__, specs_url, _register_apidoc, inject_flasgger @@ -28,7 +27,7 @@ def api_patches(app, settings): description="Head Start API demo", version="0.1", prefix='/api', - doc="/api/docs") + doc="/docs") if settings.BEHIND_PROXY: api_fixed.behind_proxy = True return api_fixed @@ -49,7 +48,6 @@ def api_patches(app, settings): api.add_namespace(base_ns, path='/base') api.add_namespace(pubmed_ns, path='/pubmed') api.add_namespace(openaire_ns, path='/openaire') -api.add_namespace(persistence_ns, path='/persistence') app.logger.debug(app.config) app.logger.debug(app.url_map) diff --git a/server/workers/api/src/manage.py b/server/workers/api/src/manage.py deleted file mode 100644 index f3de767c2..000000000 --- a/server/workers/api/src/manage.py +++ /dev/null @@ -1,16 +0,0 @@ -from app import app -from models import Visualizations, Revisions -from database import Base, sessions - - -if __name__ == '__main__': - with app.app_context(): - for database, Session in sessions.items(): - try: - session = Session() - engine = session.get_bind() - for name, table in Base.metadata.tables.items(): - if not engine.dialect.has_table(engine, name): - table.create(engine) - except Exception as e: - print(database, e) diff --git a/server/workers/persistence/requirements.txt b/server/workers/persistence/requirements.txt new file mode 100644 index 000000000..744213065 --- /dev/null +++ b/server/workers/persistence/requirements.txt @@ -0,0 +1,14 @@ +flask +flask-cors +flask_sqlalchemy +flask_restx +Werkzeug +marshmallow +gunicorn +redis +hiredis +aioredis +pandas +pyyaml +flasgger +psycopg2-binary diff --git a/server/workers/api/src/apis/persistence.py b/server/workers/persistence/src/apis/persistence.py similarity index 100% rename from server/workers/api/src/apis/persistence.py rename to server/workers/persistence/src/apis/persistence.py diff --git a/server/workers/persistence/src/app.py b/server/workers/persistence/src/app.py new file mode 100644 index 000000000..2de230576 --- /dev/null +++ b/server/workers/persistence/src/app.py @@ -0,0 +1,48 @@ +import os +import sys +from flask import Flask +from flask_restx import Api +from flask_cors import CORS +from werkzeug.middleware.proxy_fix import ProxyFix + +from apis.persistence import persistence_ns + +from config import settings +from utils.monkeypatches import ReverseProxied, __schema__, specs_url, _register_apidoc, inject_flasgger +import logging + + +def api_patches(app, settings): + Api._register_apidoc = _register_apidoc + Api.__schema__ = __schema__ + Api.specs_url = specs_url + + api_fixed = Api( + app, + title="Head Start API", + description="Head Start API demo", + version="0.1", + prefix='/api', + doc="/docs") + if settings.BEHIND_PROXY: + api_fixed.behind_proxy = True + return api_fixed + + +app = Flask('v1', instance_relative_config=True) +app.config.from_object('config.settings') +handler = logging.StreamHandler(sys.stdout) +handler.setLevel(app.logger.level) +app = inject_flasgger(app) +app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1, x_port=1, x_for=1, x_host=1, x_prefix=1) +app.wsgi_app = ReverseProxied(app.wsgi_app) +CORS(app, expose_headers=["Content-Disposition", "Access-Control-Allow-Origin"]) + +api = api_patches(app, settings) +api.add_namespace(persistence_ns, path='/persistence') +app.logger.debug(app.config) +app.logger.debug(app.url_map) + + +if __name__ == '__main__': + app.run(host="127.0.0.1", port=5001, debug=True) diff --git a/server/workers/persistence/src/config/__init__.py b/server/workers/persistence/src/config/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/server/workers/persistence/src/config/example_settings.py b/server/workers/persistence/src/config/example_settings.py new file mode 100644 index 000000000..423d54eef --- /dev/null +++ b/server/workers/persistence/src/config/example_settings.py @@ -0,0 +1,6 @@ +BEHIND_PROXY = True +SWAGGER_BASEPATH = "" +DEFAULT_DATABASE = "dev" +DATABASES = ["test"] +ENV = "development" +DEBUG = True diff --git a/server/workers/api/src/database.py b/server/workers/persistence/src/database.py similarity index 100% rename from server/workers/api/src/database.py rename to server/workers/persistence/src/database.py diff --git a/server/workers/persistence/src/migrate.py b/server/workers/persistence/src/migrate.py new file mode 100644 index 000000000..de87134ef --- /dev/null +++ b/server/workers/persistence/src/migrate.py @@ -0,0 +1,16 @@ +from sqlalchemy import create_engine, select +from models import Visualizations, Revisions +from config import settings + +engine_source = create_engine('sqlite:////home/chris/data/OKMaps/TRIPLE/triple.sqlite') +# engine_target = create_engine('postgresql+psycopg2://headstart:testpassword@172.18.0.2:5432/dev') +engine_target = create_engine('postgresql+psycopg2://%(user)s:%(pw)s@%(host)s:%(port)s/%(db)s' % settings.TRIPLE) + +with engine_source.connect() as conn_source: + with engine_target.connect() as conn_target: + for table in Visualizations.metadata.sorted_tables: + for row in conn_source.execute(select(table.c)): + try: + conn_target.execute(table.insert().values(dict(row))) + except Exception as e: + print(e) diff --git a/server/workers/api/src/models.py b/server/workers/persistence/src/models.py similarity index 100% rename from server/workers/api/src/models.py rename to server/workers/persistence/src/models.py From 1552bfa273cec0f96b13ca737da52b47f1abdcbc Mon Sep 17 00:00:00 2001 From: chreman Date: Wed, 19 May 2021 21:38:07 +0200 Subject: [PATCH 18/54] move last config json to env --- server/workers/triple/example_triple.env | 7 +++++++ server/workers/triple/run_triple.py | 8 ++++++-- server/workers/triple/src/search_triple.py | 11 ++++++----- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/server/workers/triple/example_triple.env b/server/workers/triple/example_triple.env index 8462b8340..e055198c4 100644 --- a/server/workers/triple/example_triple.env +++ b/server/workers/triple/example_triple.env @@ -1 +1,8 @@ TRIPLE_LOGLEVEL=DEBUG +TRIPLE_USER=username +TRIPLE_PASS=password +TRIPLE_HOST=host.name +TRIPLE_PORT=9200 +TRIPLE_DOCUMENTS_INDEX=string +TRIPLE_PROJECTS_INDEX=string +TRIPLE_AUTHORS_INDEX=string \ No newline at end of file diff --git a/server/workers/triple/run_triple.py b/server/workers/triple/run_triple.py index 1f080277b..227663fea 100644 --- a/server/workers/triple/run_triple.py +++ b/server/workers/triple/run_triple.py @@ -5,8 +5,12 @@ if __name__ == '__main__': - with open("es_config.json") as infile: - es_config = json.load(infile) + es_config = { + "user": os.getenv("TRIPLE_USER"), + "pass": os.getenv("TRIPLE_PASS"), + "host": os.getenv("TRIPLE_HOST"), + "port": os.getenv("TRIPLE_PORT") + } redis_config = { "host": os.getenv("REDIS_HOST"), "port": os.getenv("REDIS_PORT"), diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py index 0f11dc6be..b19dec233 100644 --- a/server/workers/triple/src/search_triple.py +++ b/server/workers/triple/src/search_triple.py @@ -1,3 +1,4 @@ +import os import sys import re import json @@ -101,8 +102,8 @@ def build_body(self, parameters): body["query"]["bool"]["must"].append({"term": {"language": parameters.get('language')}}) return body - def search(self, parameters): - index = "triple-poc-document27032021" + def search_documents(self, parameters): + index = os.getenv("TRIPLE_DOCUMENTS_INDEX") fields = ["headline.text", "abstract.text"] s = Search(using=self.es, index=index) # TODO: replace from parameters @@ -125,9 +126,9 @@ def search(self, parameters): if parameters.get('raw') is True: return result.to_dict() else: - return self.process_result(result, parameters) + return self.process_documents(result, parameters) - def process_result(self, result, parameters): + def process_documents(self, result, parameters): """ # * "id": a unique ID, preferably the DOI # * "title": the title @@ -248,7 +249,7 @@ def run(self): try: res = {} res["id"] = k - res["input_data"] = self.search(parameters) + res["input_data"] = self.search_documents(parameters) res["params"] = parameters res["status"] = "success" if parameters.get('raw') is True: From ff497de0d123c5af3f3c332bf2b5f2dbd6fa6695 Mon Sep 17 00:00:00 2001 From: chreman Date: Thu, 20 May 2021 14:07:17 +0200 Subject: [PATCH 19/54] clean up PHP files; add API flavor as config --- .../classes/headstart/library/Inflector.php | 374 ----------- .../DBConnectionPersonalization.php | 119 ---- .../classes/headstart/preprocessing/Readme.md | 50 -- .../preprocessing/connection/Connection.php | 26 - .../preprocessing/connection/DBConnection.php | 226 ------- .../connection/DBConnectionTopics.php | 114 ---- .../connection/DBConnectionWordnet.php | 75 --- .../classes/headstart/preprocessing/main.php | 22 - .../headstart/preprocessing/main_topics.php | 64 -- .../preprocessing/naming/ApiNaming.php | 616 ------------------ .../preprocessing/naming/KeywordNaming.php | 129 ---- .../headstart/preprocessing/naming/Naming.php | 24 - .../headstart/preprocessing/test_rplos.php | 32 - server/preprocessing/conf/config.ini | 22 - server/services/search.php | 19 +- 15 files changed, 11 insertions(+), 1901 deletions(-) delete mode 100644 server/classes/headstart/library/Inflector.php delete mode 100644 server/classes/headstart/personalization/DBConnectionPersonalization.php delete mode 100644 server/classes/headstart/preprocessing/Readme.md delete mode 100644 server/classes/headstart/preprocessing/connection/Connection.php delete mode 100644 server/classes/headstart/preprocessing/connection/DBConnection.php delete mode 100644 server/classes/headstart/preprocessing/connection/DBConnectionTopics.php delete mode 100644 server/classes/headstart/preprocessing/connection/DBConnectionWordnet.php delete mode 100644 server/classes/headstart/preprocessing/main.php delete mode 100644 server/classes/headstart/preprocessing/main_topics.php delete mode 100644 server/classes/headstart/preprocessing/naming/ApiNaming.php delete mode 100644 server/classes/headstart/preprocessing/naming/KeywordNaming.php delete mode 100644 server/classes/headstart/preprocessing/naming/Naming.php delete mode 100644 server/classes/headstart/preprocessing/test_rplos.php diff --git a/server/classes/headstart/library/Inflector.php b/server/classes/headstart/library/Inflector.php deleted file mode 100644 index b2f3c0915..000000000 --- a/server/classes/headstart/library/Inflector.php +++ /dev/null @@ -1,374 +0,0 @@ - '\1zes', - '/^(ox)$/i' => '\1en', - '/([m|l])ouse$/i' => '\1ice', - '/(matr|vert|ind)ix|ex$/i' => '\1ices', - '/(x|ch|ss|sh)$/i' => '\1es', - '/([^aeiouy]|qu)ies$/i' => '\1y', - '/([^aeiouy]|qu)y$/i' => '\1ies', - '/(hive)$/i' => '\1s', - '/(?:([^f])fe|([lr])f)$/i' => '\1\2ves', - '/sis$/i' => 'ses', - '/([ti])um$/i' => '\1a', - '/(buffal|tomat)o$/i' => '\1oes', - '/(bu)s$/i' => '\1ses', - '/(alias|status)/i'=> '\1es', - '/(octop|vir)us$/i'=> '\1i', - '/(ax|test)is$/i'=> '\1es', - '/s$/i'=> 's', - '/$/'=> 's'); - - $uncountable = array('equipment', 'information', 'rice', 'money', 'species', 'series', 'fish', 'sheep'); - - $irregular = array( - 'person' => 'people', - 'man' => 'men', - 'child' => 'children', - 'sex' => 'sexes', - 'move' => 'moves'); - - $lowercased_word = strtolower($word); - - foreach ($uncountable as $_uncountable){ - if(substr($lowercased_word,(-1*strlen($_uncountable))) == $_uncountable){ - return $word; - } - } - - foreach ($irregular as $_plural=> $_singular){ - if (preg_match('/('.$_plural.')$/i', $word, $arr)) { - return preg_replace('/('.$_plural.')$/i', substr($arr[0],0,1).substr($_singular,1), $word); - } - } - - foreach ($plural as $rule => $replacement) { - if (preg_match($rule, $word)) { - return preg_replace($rule, $replacement, $word); - } - } - return false; - - } - - // }}} - // {{{ singularize() - - /** - * Singularizes English nouns. - * - * @access public - * @static - * @param string $word English noun to singularize - * @return string Singular noun. - */ - static function singularize($word) - { - $singular = array ( - '/(quiz)zes$/i' => '\1', - '/(matr)ices$/i' => '\1ix', - '/(vert|ind)ices$/i' => '\1ex', - '/^(ox)en/i' => '\1', - '/(alias|status)es$/i' => '\1', - '/([octop|vir])i$/i' => '\1us', - '/(cris|ax|test)es$/i' => '\1is', - '/(shoe)s$/i' => '\1', - '/(o)es$/i' => '\1', - '/(bus)es$/i' => '\1', - '/([m|l])ice$/i' => '\1ouse', - '/(x|ch|ss|sh)es$/i' => '\1', - '/(m)ovies$/i' => '\1ovie', - '/(s)eries$/i' => '\1eries', - '/([^aeiouy]|qu)ies$/i' => '\1y', - '/([lr])ves$/i' => '\1f', - '/(tive)s$/i' => '\1', - '/(hive)s$/i' => '\1', - '/([^f])ves$/i' => '\1fe', - '/(^analy)ses$/i' => '\1sis', - '/((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$/i' => '\1\2sis', - '/([ti])a$/i' => '\1um', - '/(n)ews$/i' => '\1ews', - '/s$/i' => '', - ); - - $uncountable = array('equipment', 'information', 'rice', 'money', 'species', 'series', 'fish', 'sheep'); - - $irregular = array( - 'person' => 'people', - 'man' => 'men', - 'child' => 'children', - 'sex' => 'sexes', - 'move' => 'moves'); - - $lowercased_word = strtolower($word); - foreach ($uncountable as $_uncountable){ - if(substr($lowercased_word,(-1*strlen($_uncountable))) == $_uncountable){ - return $word; - } - } - - foreach ($irregular as $_plural=> $_singular){ - if (preg_match('/('.$_singular.')$/i', $word, $arr)) { - return preg_replace('/('.$_singular.')$/i', substr($arr[0],0,1).substr($_plural,1), $word); - } - } - - foreach ($singular as $rule => $replacement) { - if (preg_match($rule, $word)) { - return preg_replace($rule, $replacement, $word); - } - } - - return $word; - } - - // }}} - // {{{ titleize() - - /** - * Converts an underscored or CamelCase word into a English - * sentence. - * - * The titleize function converts text like "WelcomePage", - * "welcome_page" or "welcome page" to this "Welcome - * Page". - * If second parameter is set to 'first' it will only - * capitalize the first character of the title. - * - * @access public - * @static - * @param string $word Word to format as tile - * @param string $uppercase If set to 'first' it will only uppercase the - * first character. Otherwise it will uppercase all - * the words in the title. - * @return string Text formatted as title - */ - function titleize($word, $uppercase = '') - { - $uppercase = $uppercase == 'first' ? 'ucfirst' : 'ucwords'; - return $uppercase(Inflector::humanize(Inflector::underscore($word))); - } - - // }}} - // {{{ camelize() - - /** - * Returns given word as CamelCased - * - * Converts a word like "send_email" to "SendEmail". It - * will remove non alphanumeric character from the word, so - * "who's online" will be converted to "WhoSOnline" - * - * @access public - * @static - * @see variablize - * @param string $word Word to convert to camel case - * @return string UpperCamelCasedWord - */ - function camelize($word) - { - return str_replace(' ','',ucwords(preg_replace('/[^A-Z^a-z^0-9]+/',' ',$word))); - } - - // }}} - // {{{ underscore() - - /** - * Converts a word "into_it_s_underscored_version" - * - * Convert any "CamelCased" or "ordinary Word" into an - * "underscored_word". - * - * This can be really useful for creating friendly URLs. - * - * @access public - * @static - * @param string $word Word to underscore - * @return string Underscored word - */ - function underscore($word) - { - return strtolower(preg_replace('/[^A-Z^a-z^0-9]+/','_', - preg_replace('/([a-zd])([A-Z])/','1_2', - preg_replace('/([A-Z]+)([A-Z][a-z])/','1_2',$word)))); - } - - // }}} - // {{{ humanize() - - /** - * Returns a human-readable string from $word - * - * Returns a human-readable string from $word, by replacing - * underscores with a space, and by upper-casing the initial - * character by default. - * - * If you need to uppercase all the words you just have to - * pass 'all' as a second parameter. - * - * @access public - * @static - * @param string $word String to "humanize" - * @param string $uppercase If set to 'all' it will uppercase all the words - * instead of just the first one. - * @return string Human-readable word - */ - function humanize($word, $uppercase = '') - { - $uppercase = $uppercase == 'all' ? 'ucwords' : 'ucfirst'; - return $uppercase(str_replace('_',' ',preg_replace('/_id$/', '',$word))); - } - - // }}} - // {{{ variablize() - - /** - * Same as camelize but first char is underscored - * - * Converts a word like "send_email" to "sendEmail". It - * will remove non alphanumeric character from the word, so - * "who's online" will be converted to "whoSOnline" - * - * @access public - * @static - * @see camelize - * @param string $word Word to lowerCamelCase - * @return string Returns a lowerCamelCasedWord - */ - function variablize($word) - { - $word = Inflector::camelize($word); - return strtolower($word[0]).substr($word,1); - } - - // }}} - // {{{ tableize() - - /** - * Converts a class name to its table name according to rails - * naming conventions. - * - * Converts "Person" to "people" - * - * @access public - * @static - * @see classify - * @param string $class_name Class name for getting related table_name. - * @return string plural_table_name - */ - function tableize($class_name) - { - return Inflector::pluralize(Inflector::underscore($class_name)); - } - - // }}} - // {{{ classify() - - /** - * Converts a table name to its class name according to rails - * naming conventions. - * - * Converts "people" to "Person" - * - * @access public - * @static - * @see tableize - * @param string $table_name Table name for getting related ClassName. - * @return string SingularClassName - */ - function classify($table_name) - { - return Inflector::camelize(Inflector::singularize($table_name)); - } - - // }}} - // {{{ ordinalize() - - /** - * Converts number to its ordinal English form. - * - * This method converts 13 to 13th, 2 to 2nd ... - * - * @access public - * @static - * @param integer $number Number to get its ordinal value - * @return string Ordinal representation of given string. - */ - function ordinalize($number) - { - if (in_array(($number % 100),range(11,13))){ - return $number.'th'; - }else{ - switch (($number % 10)) { - case 1: - return $number.'st'; - break; - case 2: - return $number.'nd'; - break; - case 3: - return $number.'rd'; - default: - return $number.'th'; - break; - } - } - } - - // }}} - -} - -?> diff --git a/server/classes/headstart/personalization/DBConnectionPersonalization.php b/server/classes/headstart/personalization/DBConnectionPersonalization.php deleted file mode 100644 index 5ecc90cff..000000000 --- a/server/classes/headstart/personalization/DBConnectionPersonalization.php +++ /dev/null @@ -1,119 +0,0 @@ -db); - - $data = array(); - - - while($row = mysql_fetch_assoc($result)) { - $data[] = $row; - } - - return $data; - - } - - public function getPersonalRecommendations($user_id, $event_id, $max_recommendations) { - - $query = sprintf("SELECT DISTINCT content.contentID - FROM predictedscore, content - WHERE (predictedscore.contentID=content.contentID) - AND (predictedscore.userID = %d)", mysql_real_escape_string($user_id)); - - $query .= $this->createConferenceIDString($event_id); - - $query .= sprintf("order by score desc limit 0, %d", mysql_real_escape_string($max_recommendations)); - - $result = mysql_query($query, $this->db); - - $data = array(); - - if($result) { - while($row = mysql_fetch_assoc($result)) { - $data[] = $row; - } - } - - return $data; - - } - - public function addPersonalBookmark($user_id, $content_id) { - - $query = sprintf("INSERT INTO bookmarking(contentID, userID, created) - VALUES(%d, %d, now())", mysql_real_escape_string($content_id), mysql_real_escape_string($user_id)); - - - $result = mysql_query($query, $this->db); - - return ($result != false)?(true):(false); - - } - - public function removePersonalBookmark($user_id, $content_id) { - - $query = sprintf("DELETE FROM bookmarking - WHERE contentID=%d AND userID=%d", mysql_real_escape_string($content_id), mysql_real_escape_string($user_id)); - - - $result = mysql_query($query, $this->db); - - return ($result != false)?(true):(false); - - } - - public function getConferenceBookmarks($conference_id) { - - $query = sprintf("SELECT bookmarking.bookmarkingID, bookmarking.userID, bookmarking.contentID - FROM bookmarking, content - WHERE (bookmarking.contentID=content.contentID) AND (content.contentType <> %s)" - ,"\"no-paper\""); - - $query .= $this->createConferenceIDString($conference_id); - - $result = mysql_query($query); - - if (!$result) { - $message = 'Invalid Query: ' . mysql_error() . "\n"; - $message .= 'Full query: ' . $query; - die($message); - } - - $bookmark_array = array(); - - while ($row = mysql_fetch_assoc($result)) { - library\Toolkit::addOrInitiatlizeArrayKeyNumerical($bookmark_array, $row['contentID']); - } - - $return_array = array(); - - foreach ($bookmark_array as $id => $num) { - $return_array[] = array("id" => $id, "num" => $num); - } - - return $return_array; - } - -} diff --git a/server/classes/headstart/preprocessing/Readme.md b/server/classes/headstart/preprocessing/Readme.md deleted file mode 100644 index 2ad43d2c3..000000000 --- a/server/classes/headstart/preprocessing/Readme.md +++ /dev/null @@ -1,50 +0,0 @@ -Pre-processing scripts -====================== - -The pre-processing scripts are used to generate a data file for the visualization. This data file contains all the necessary information for Head Start. This includes - -* Metadata for a paper -* Position of a paper -* Information about which cluster a paper belongs to -* Cluster names - -Authors -------- -Peter Kraker (peter.kraker@tugraz.at) - - -Background ----------- - -You can find more on the background of the pre-processing steps in my dissertation (chapter 5) which can be found [here](http://media.obvsg.at/p-AC11312305-2001). - - -Requirements ------------- - -To get started on the pre-processing, you need two things: - -1. A file containing all the metadata of your documents. You can find a template for this file in output/metadata.csv. -2. A file containing similarity values between the documents. A template can be found in cooc.csv. Originally, the similarity values were based on readership co-occurrence, but there are many other measures that you can use (e.g. the number of keywords or tags that two papers have in common). - -If you want to write an adapter for your favorite data source, you can create a derived class from the base class at headstart\preprocessing\connection\Connection. - -Procedure ---------- - -There are at least two steps involved in the pre-processing pipe (see main.php for an example pipe): - -* Caluclation: this step takes care of multidimensional scaling (i.e. the ordination of the papers) and clustering (i.e. splitting the papers into research areas). See headstart\preprocessing\calculation\RCalculation for an example. - -* Naming: introduces names for each area. See headstart\preprocessing\naming\ApiNaming. - -Before you get started, please create a copy of the file /server/preprocessing/conf/config.ini and rename it to config\_local.ini. Now you need to set a few variables: - -* general/preprocessing_dir: Full path of the preprocessing directory. -* calculation/binary: Full path to your R binary. -* naming/api\_key\_zemanta and naming/api\_key\_calais: The naming the clusters is done with the help of the APIs of Zemanta and OpenCalais. Before you can get started you need to acquire (free) API keys for both of them and enter them in the config file. - -Then you can run the pipeline found in main.php. - - - diff --git a/server/classes/headstart/preprocessing/connection/Connection.php b/server/classes/headstart/preprocessing/connection/Connection.php deleted file mode 100644 index 45f9362a9..000000000 --- a/server/classes/headstart/preprocessing/connection/Connection.php +++ /dev/null @@ -1,26 +0,0 @@ -ini_array = $ini_array; - } - - public function establishConnection() { - - $ini = $this->ini_array["connection"]; - - $this->db = mysql_connect( $ini["host"] . ":" . $ini["port"], - $ini["username"], $ini["password"] ); - - if(!$this->db) - throw new \Exception("Failed to connect to MySQL: " . mysql_error()); - - mysql_set_charset('utf8',$this->db); - - mysql_select_db($ini["db"]); - } - - public function writeCoocFile($conference_id, $cut_off, $file_path) { - - $libraries = $this->getBookmarks($conference_id, $cut_off); - - $count = 0; - - $cooc = array(); - - foreach($libraries as $line) { - fwrite(STDOUT, "Processing line #".$count."\n"); - foreach ($line as $article) { - - if (!array_key_exists($article, $this->numBookmarks)) - continue; - - if($article != "") { - for($row=0; $row < count($line); $row++) { - $coarticle = $line[$row]; - - if (!array_key_exists($coarticle, $this->numBookmarks)) - continue; - - if($coarticle != "") { - if(!isset($cooc[$article.",".$coarticle])) { - $cooc[$article.",".$coarticle] = 1; - } else { - $cooc[$article.",".$coarticle] += 1; - fwrite(STDOUT, "Added to ".$article.",".$coarticle.": ".$cooc[$article.",".$coarticle]."\n"); - } - } - } - } - } - $count++; - } - - arsort($cooc, SORT_NUMERIC); - - - $file_out = library\Toolkit::openOrCreateFile($file_path); - foreach($cooc as $entry=>$count) { - if(intval($count) >= 1) { - fwrite($file_out, $entry.",".$count."\n"); - } - } - fclose($file_out); - } - - public function writeMetadataFile($conference_id, $file, $cut_off) { - - $query_authors = sprintf("SELECT DISTINCT content.contentID, author.name - FROM content, authorpresenter, author - WHERE (content.contentID=authorpresenter.contentID) AND (authorpresenter.authorID=author.authorID) - AND (content.contentType <> %s)" ,"\"no-paper\""); - - $query_authors .= $this->createConferenceIDString($conference_id); - - $result_authors = mysql_query($query_authors); - - $paper_authors = array(); - - while ($row = mysql_fetch_assoc($result_authors)) { - library\Toolkit::addOrInitiatlizeArrayKey($paper_authors, $row['contentID'], $row['name']); - } - - $query = sprintf("SELECT DISTINCT content.contentID, content.title, content.abstract, content.contentType, content.contentTrack, content.contentLink - FROM content - WHERE (content.contentType <> %s)" ,"\"no-paper\""); - - $query .= $this->createConferenceIDString($conference_id); - - $result = mysql_query($query); - - $file = fopen($file, "w+"); - - fputcsv($file, array('id', 'title', 'paper_abstract', 'published_in', 'year', 'url', 'readers', 'authors')); - - while ($row = mysql_fetch_assoc($result)) { - - if (!array_key_exists($row['contentID'], $this->numBookmarks)) { - $row[] = 0; - } else { - $row[] = $this->numBookmarks[$row['contentID']]; - } - - $author_string = ""; - - if(isset($paper_authors[$row['contentID']])) { - foreach($paper_authors[$row['contentID']] as $author) { - $author_string .= $author . ";"; - } - } - - $row[] = $author_string; - - fputcsv($file, $row); - } - - } - - protected function getBookmarks($conference_id, $cut_off) { - - $query = sprintf("SELECT bookmarking.bookmarkingID, bookmarking.userID, bookmarking.contentID - FROM bookmarking, presentation, eventsession, content - WHERE (bookmarking.contentID=presentation.contentID) AND (bookmarking.contentID = content.contentID) - AND (presentation.eventSessionID=eventsession.eventSessionID) AND (content.contentType <> %s)" - ,"\"no-paper\""); - - $query .= $this->createConferenceIDString($conference_id); - - $result = mysql_query($query); - - if (!$result) { - $message = 'Invalid Query: ' . mysql_error() . "\n"; - $message .= 'Full query: ' . $query; - die($message); - } - - $bookmark_array = array(); - - while ($row = mysql_fetch_assoc($result)) { - $bookmark_array[] = $row; - library\Toolkit::addOrInitiatlizeArrayKeyNumerical($this->numBookmarks, $row['contentID']); - } - - if(count($this->numBookmarks) > 0) { - - $this->numBookmarks = array_filter($this->numBookmarks, - function($item) use($cut_off) { - return $item >= $cut_off; - }); - } - - $library_array = array(); - - foreach($bookmark_array as $line) { - if(!isset($library_array[$line['userID']])) - $library_array[$line['userID']] = array($line['contentID']); - else - $library_array[$line['userID']][] = $line['contentID']; - } - - return $library_array; - - } - - protected function createConferenceIDString($conference_id) { - - $query = " AND ("; - - if(is_array($conference_id)) { - foreach($conference_id as $id) { - $query .= sprintf("(content.conferenceID=%d) OR ", mysql_real_escape_string($id)); - } - - $query = substr($query, 0, strlen($query)-4); - $query .= ")"; - - } else { - $query .= sprintf("content.conferenceID=%d)", mysql_real_escape_string($conference_id)); - } - - return $query; - - } - - protected function createApiString($api) { - - $query = " AND ("; - - if(is_array($api)) { - foreach($api as $value) { - $query .= sprintf("(concepts.conceptAPI=\"%s\") OR ", mysql_real_escape_string($value)); - } - - $query = substr($query, 0, strlen($query)-4); - $query .= ")"; - - } else { - $query .= sprintf("concepts.conceptAPI=\"%s\")", mysql_real_escape_string($api)); - } - - return $query; - - } -} diff --git a/server/classes/headstart/preprocessing/connection/DBConnectionTopics.php b/server/classes/headstart/preprocessing/connection/DBConnectionTopics.php deleted file mode 100644 index dbad697c9..000000000 --- a/server/classes/headstart/preprocessing/connection/DBConnectionTopics.php +++ /dev/null @@ -1,114 +0,0 @@ - $values) { - $rank = 1; - - foreach($values["topics"] as $value) { - - $query = sprintf("INSERT INTO concepts (contentID,conceptName,conceptAPI,conceptRank) VALUES(%d, \"%s\", \"%s\", %d)", - mysql_real_escape_string($id), mysql_real_escape_string($value), mysql_real_escape_string($api), - mysql_real_escape_string($rank)); - - $result = mysql_query($query, $this->db); - - if(!$result) { - echo mysql_error(); - } - - $rank++; - } - } - } - - public function writeKeywordsToDB($conference_id) { - - $query = "SELECT content.contentID, content.keywords - FROM content - WHERE content.contentID=content.contentID"; - - $query .= $this->createConferenceIDString($conference_id); - - $result = mysql_query($query); - - $keyword_array = array(); - - while ($row = mysql_fetch_assoc($result)) { - - $keyword_array[$row['contentID']]["topics"] = explode(", ", $row['keywords']); - - } - - $this->writeTopicsToDB($keyword_array, "keywords"); - - } - - public function writeCoocFile($conference_id, $cut_off, $file_path, $api, $normalizeStrings=false) { - $query = "SELECT DISTINCT concepts.contentID, concepts.conceptName, concepts.conceptAPI - FROM concepts, content - WHERE concepts.contentID = content.contentID"; - - $query .= $this->createConferenceIDString($conference_id); - - if($api != null) { - $query .= $this->createApiString($api); - } - - $result = mysql_query($query); - - if($result == false) { - throw new \Exception("Query failed! " . mysql_error()); - } - - $topic_array = array(); - - while ($row = mysql_fetch_assoc($result)) { - - $topic_array[$row['contentID']][] = $row['conceptName']; - - } - - $file = library\Toolkit::openOrCreateFile($file_path); - - foreach($topic_array as $id1 => $terms1) { - - if($normalizeStrings) { - $terms1 = array_map('headstart\library\Toolkit::normalizeString', $terms1); - } - - foreach($topic_array as $id2 => $terms2) { - if($id1 == $id2) - continue; - - if($normalizeStrings) { - $terms2 = array_map('headstart\library\Toolkit::normalizeString', $terms2); - } - - $terms1_unique = array_unique($terms1); - $terms2_unique = array_unique($terms2); - - $out = array($id1, $id2, count(array_intersect($terms1_unique, $terms2_unique))); - fputcsv($file, $out); - } - } - - fclose($file); - } - - protected function formatStrings($string) { - - } -} diff --git a/server/classes/headstart/preprocessing/connection/DBConnectionWordnet.php b/server/classes/headstart/preprocessing/connection/DBConnectionWordnet.php deleted file mode 100644 index b8b825e05..000000000 --- a/server/classes/headstart/preprocessing/connection/DBConnectionWordnet.php +++ /dev/null @@ -1,75 +0,0 @@ -queryDB($conference_id); - - $this->writeToFile($file_path, $result); - - } - - public function returnContents($conference_id, $limit_from = null, $limit_to = null) { - - $result = $this->queryDB($conference_id, $limit_from, $limit_to); - - $contents = array(); - - while ($row = mysql_fetch_assoc($result)) { - $contents[$row['contentID']] = $row['title'] . " " . $row['abstract']; - } - - return $contents; - } - - - protected function queryDB($conference_id, $limit_from = null, $limit_to = null) { - - $query = sprintf("SELECT DISTINCT content.contentID, content.title, content.abstract - FROM content - WHERE (content.contentType <> %s)" - ,"\"no-paper\""); - - $query .= $this->createConferenceIDString($conference_id); - - if(!is_null($limit_from) && !is_null($limit_to)) { - $query .= " LIMIT " . $limit_from . "," . $limit_to; - } - - $result = mysql_query($query, $this->db); - - return $result; - } - - protected function writeToFile($file_path, $result) { - - $file = library\Toolkit::openOrCreateFile($file_path); - - fputcsv($file, array('id', 'content')); - - while ($row = mysql_fetch_assoc($result)) { - - $merged_string = $row['title'] . " " . $row['abstract']; - - $merged_array = array($row['contentID'], $merged_string); - - fputcsv($file, $merged_array); - } - fclose($file); - - } - -} diff --git a/server/classes/headstart/preprocessing/main.php b/server/classes/headstart/preprocessing/main.php deleted file mode 100644 index 7e4467eea..000000000 --- a/server/classes/headstart/preprocessing/main.php +++ /dev/null @@ -1,22 +0,0 @@ -performCalculationAndWriteOutputToFile($WORKING_DIR); - -$naming = new naming\ApiNaming($ini_array); -$naming->performNaming($WORKING_DIR); diff --git a/server/classes/headstart/preprocessing/main_topics.php b/server/classes/headstart/preprocessing/main_topics.php deleted file mode 100644 index fa68a471d..000000000 --- a/server/classes/headstart/preprocessing/main_topics.php +++ /dev/null @@ -1,64 +0,0 @@ -establishConnection(); - -//$dbconnect->writeMetadataFile($ini_array["general"]["event_id"], -// $WORKING_DIR . $ini_array["output"]["metadata"], -// $ini_array["general"]["cut_off"]); -// -//$contents = $dbconnect->returnContents($ini_array["general"]["event_id"]); -// -$naming = new naming\ApiNaming($ini_array); -//$topics = $naming->executeCurlSensium($contents); -// -//foreach($topics as $api => $topic) { -// $cluster_names = $topics[$api]; -// $dbconnect->writeTopicsToDB($cluster_names, $api); -//} -// -//$topics = $naming->executeCurl($contents); -// -//foreach($topics as $api => $topic) { -// $cluster_names = $topics[$api]; -// $dbconnect->writeTopicsToDB($cluster_names, $api); -//} - -$dbconnect->writeCoocFile($ini_array["general"]["event_id"], - $ini_array["general"]["cut_off"], - $WORKING_DIR . $ini_array["output"]["cooc"], - array("calais", "zemanta", "sensium"), - false); - -$calculation = new calculation\RCalculation($ini_array); -$calculation->performCalculationAndWriteOutputToFile($WORKING_DIR); - -$naming->performNaming($WORKING_DIR); - diff --git a/server/classes/headstart/preprocessing/naming/ApiNaming.php b/server/classes/headstart/preprocessing/naming/ApiNaming.php deleted file mode 100644 index bf474e406..000000000 --- a/server/classes/headstart/preprocessing/naming/ApiNaming.php +++ /dev/null @@ -1,616 +0,0 @@ -ini_array["naming"]; - $ini_general = $this->ini_array["general"]; - $ini_output = $this->ini_array["output"]; - $ini_connection = $this->ini_array["connection"]; - - $WORKING_DIR = $working_dir; - - //Output of scaling and clustering script - $CLUSTERS = $WORKING_DIR . $ini_output["output_scaling_clustering"]; - - //Output file - $OUTPUT_FILE = $WORKING_DIR . $ini_output["output_naming"]; - - //Output file for the full API responses - $FULL_ZEMANTA = $WORKING_DIR . "full_responses/zemanta/"; - $FULL_CALAIS = $WORKING_DIR . "full_responses/calais/"; - - $cluster = array(); - $cluster_details = array("title" => array(), "abstracts" => array()); - $counts = array(); - $stop_words = array(); - $output = array(); - - $cluster_text_file = library\Toolkit::openFileForReading($CLUSTERS); - $stop_words_file = library\Toolkit::openFileForReading($ini_general["preprocessing_dir"] . $ini["stop_words"]); - - while (($line = fgetcsv($stop_words_file, null, "\t")) !== false) { - $this->stop_words[] = $line[0]; - } - - $row = 0; - - while (($line = fgetcsv($cluster_text_file, null)) !== false) { - if ($row == 0) { - $output[] = $line; - $row++; - continue; - } - - $output[] = $line; - - $line_cluster_id = intval($ini["line_cluster_id"]); - $line_title = intval($ini["line_title"]); - $line_abstract = intval($ini["line_abstract"]); - - if(!isset($line[$line_cluster_id])) { - throw new \Exception("Error in line: " . $line[0]); - } - - if (!isset($cluster[$line[$line_cluster_id]])) { - $cluster[$line[$line_cluster_id]] = $line[$line_title] . ". " . $line[$line_abstract]; - $cluster_details[$line[$line_cluster_id]]["title"] = $line[$line_title] . "."; - $cluster_details[$line[$line_cluster_id]]["abstracts"] = $line[$line_abstract]; - $counts[$line[$line_cluster_id]] = 1; - } else { - $cluster[$line[$line_cluster_id]] .= "\n" . $line[$line_title] . ". " . $line[$line_abstract]; - $cluster_details[$line[$line_cluster_id]]["title"] .= "\n" . $line[$line_title] . "."; - $cluster_details[$line[$line_cluster_id]]["abstracts"] .= "\n" . $line[$line_abstract]; - $counts[$line[$line_cluster_id]]++; - } - } - - $topics = $this->executeCurl($cluster); - - $cluster_names = array(); - - foreach ($cluster as $id => $text) { - - library\Toolkit::info($text); - - $categories = array("topics" => array(), "topics_title" => array()); - $categories_one = array("topics" => array()); - - //get 1-grams - $response_object_one = $this->getNgrams($cluster[$id], 1); - - $this->processNgrams($response_object_one, "topics_title", $categories_one, $ini["threshold_single_words"]); - - //get 2-, 3-, and 4-grams - for ($n = 4; $n >= 2; $n--) { - - $response_object = $this->getNgrams($cluster[$id], $n); - $response_object_title = $this->getNgrams($cluster_details[$id]["title"], $n); - - arsort($response_object); - - $this->processNgrams($response_object_title, "topics_title", $categories, $ini["threshold_title_ngrams"]); - - $this->processNgrams($response_object, "topics", $categories, $ini["threshold_title_abstract_ngrams"]); - - } - - library\Toolkit::info($id . ": " . print_r($categories, true)); - - $cluster_names_calais = $topics["calais"][$id]; - $cluster_names_zemanta = $topics["zemanta"][$id]; - - $cluster_name = ""; - - //Search for 4-, 3-, and 2-title-grams in Calais concepts - $cluster_name = $this->compareConcepts($cluster_names_calais, $categories, "topics_title"); - - //If that fails, search for 4-, 3-, and 2-title-grams in Zemanta concepts - if($cluster_name == "") { - $cluster_name = $this->compareConcepts($cluster_names_zemanta, $categories, "topics_title"); - } - - //If that fails, search for 4-, 3-, and 2-grams in Zemanta concepts - if($cluster_name == "") { - $cluster_name = $this->compareConcepts($cluster_names_zemanta, $categories, "topics"); - } - - //If that fails, search for 1-grams in Zemanta concepts - if ($cluster_name == "") { - - $count_new = 0; - - $filtered_array = array_filter($categories_one["topics_title"], function ($item) { - return !in_array($item, $this->ini_array["naming"]["forbidden_names"]); - }); - - library\Toolkit::info("Filtered Array: " . print_r($filtered_array, true)); - - - foreach ($cluster_names_zemanta["topics_format"] as $name) { - $key = array_search($name, $filtered_array); - if ($key !== false) { - $cluster_name = $cluster_names_zemanta["topics"][$count_new]; - break; - } - $count_new++; - } - } - - //If everything above fails, name the cluster after the most important concept - //returned by (1) Zemanta or (2) Calais. Finally, name the cluster - //"Miscellaneous" - if ($cluster_name == "") { - if(isset($cluster_names_zemanta["topics"][0])) { - $cluster_name = $cluster_names_zemanta["topics"][0]; - } elseif (isset($cluster_names_calais["topics"][0])) { - $cluster_name = $cluster_names_calais["topics"][0]; - } else { - $cluster_name = "Miscellaneous"; - } - } - - $cluster_id = library\Toolkit::generateUriFromString($cluster_name); - - $cluster_temp = $cluster_id; - $count = 1; - - foreach($cluster_names as $attributes) { - if($attributes["uri"] == $cluster_id) { - $cluster_id = $cluster_temp . "-" . $count; - $count++; - } - } - - $cluster_names[$id] = array("name" => $cluster_name, "uri" => $cluster_id); - - library\Toolkit::info("*** CLUSTER NAME: " . $cluster_name . "\n"); - - //Write full response for later consultation - - $this->getFullResponseZemanta($text, $cluster_id, $FULL_ZEMANTA); - - $this->getFullResponseCalais($text, $cluster_id, $FULL_CALAIS); - } - - //add areas to output array - array_push($output[0], "area_uri", "area"); - - library\Toolkit::info(sizeof($output) . "\n"); - $size = sizeof($output); - - for($counter = 1; $counter < $size; $counter++) { - - $cluster_id = $output[$counter][$line_cluster_id]; - array_push($output[$counter], $cluster_names[$cluster_id]["uri"], $cluster_names[$cluster_id]["name"]); - - library\Toolkit::info("$counter\n"); - } - - $output_handle = library\Toolkit::openOrCreateFile($OUTPUT_FILE); - - foreach ($output as $line) { - fputcsv($output_handle, $line); - } - - fclose($output_handle); - - $UNIQUE_ID = $ini_output["unique_id"]; - $TITLE = $ini_output["title"]; - $persistence = new persistence\SQLitePersistence($ini_connection["sqlite_db"]); - - $header = array_shift($output); - $json_array = array(); - foreach ($output as $row) { - $json_array[] = array_combine($header, $row); - } - - $json = json_encode($json_array); - - $persistence->createVisualization($UNIQUE_ID, $TITLE, $json); - } - - public function executeCurl($clusters) { - - //Initialize cURL multi - $mh_calais_array = array(); - $counter = 0; - $mh_calais_array_counter = 0; - - $mh_zemanta = curl_multi_init(); - $curl_calais_array = array(); - $curl_zemanta_array = array(); - - foreach ($clusters as $id => $text) { - - //Open Calais only allows only for 4 requests at a given time - if($counter % 4 == 0) { - $mh_calais_array_counter++; - $mh_calais_array[$mh_calais_array_counter] = curl_multi_init(); - } - $counter++; - - $curl_calais_array[$id] = $this->createNewCurlHandleCalais($text, "application/json"); - $curl_zemanta_array[$id] = $this->createNewCurlHandleZemanta($text, "json"); - - curl_multi_add_handle($mh_calais_array[$mh_calais_array_counter], $curl_calais_array[$id]); - curl_multi_add_handle($mh_zemanta, $curl_zemanta_array[$id]); - - } - - $active1 = null; - $active2 = null; - - // Run cURL handles - foreach($mh_calais_array as $mh_calais) { - do { - usleep(100000); - $status = curl_multi_exec($mh_calais, $active1); - - } while ($status === CURLM_CALL_MULTI_PERFORM || $active1 > 0); - - $active1 = null; - } - - do { - - usleep(10000); - $status = curl_multi_exec($mh_zemanta, $active2); - $info = curl_multi_info_read($mh_zemanta); - - } while ($status === CURLM_CALL_MULTI_PERFORM || $active2 > 0); - - $topics = array("calais" => array(), "zemanta" => array()); - - foreach($clusters as $id => $cluster) { - - $result_calais = curl_multi_getcontent($curl_calais_array[$id]); - $topics["calais"][$id] = $this->getClusterNamesCalais($result_calais); - curl_multi_remove_handle($mh_calais, $curl_calais_array[$id]); - - $result_zemanta = curl_multi_getcontent($curl_zemanta_array[$id]); - $topics["zemanta"][$id] = $this->getClusterNamesZemanta($result_zemanta); - curl_multi_remove_handle($mh_zemanta, $curl_zemanta_array[$id]); - } - - curl_multi_close($mh_calais); - curl_multi_close($mh_zemanta); - - return $topics; - - } - - public function executeCurlSensium($clusters) { - //Initialize cURL multi - $mh_sensium_array = array(); - $counter = 0; - $mh_sensium_array_counter = 0; - - foreach ($clusters as $id => $text) { - - //Open Calais only allows only for 4 requests at a given time - if($counter % 2 == 0) { - $mh_sensium_array_counter++; - $mh_sensium_array[$mh_sensium_array_counter] = curl_multi_init(); - } - $counter++; - - $curl_sensium_array[$id] = $this->createNewCurlHandleSensium($text); - - curl_multi_add_handle($mh_sensium_array[$mh_sensium_array_counter], $curl_sensium_array[$id]); - - } - - $active1 = null; - - // Run cURL handles - foreach($mh_sensium_array as $mh_sensium) { - do { - usleep(100000); - $status = curl_multi_exec($mh_sensium, $active1); - - } while ($status === CURLM_CALL_MULTI_PERFORM || $active1 > 0); - - $active1 = null; - } - - $topics = array("sensium" => array()); - - foreach($clusters as $id => $cluster) { - - $result_sensium = curl_multi_getcontent($curl_sensium_array[$id]); - $topics["sensium"][$id] = $this->getClusterNamesSensium($result_sensium); - curl_multi_remove_handle($mh_sensium, $curl_sensium_array[$id]); - - } - - curl_multi_close($mh_sensium); - - return $topics; - } - - private function compareConcepts($cluster_names, $categories, $categories_part) { - - $cluster_name = ""; - $count = 0; - - foreach ($cluster_names["topics_format"] as $name) { - - $key = array_search($name, $categories[$categories_part]); - if (!$key === false) { - $key = array_search($name, $categories["topics"]); - } - - if ($key !== false) { - $cluster_name = $cluster_names["topics"][$count]; - break; - } - $count++; - } - - return $cluster_name; - - } - - private function processNgrams($response_object, $category_title, &$categories_object, $threshold) { - - $categories_object[$category_title] = array(); - - foreach ($response_object as $name => $count) { - $name_array = preg_split('/ /', $name); - - //first and last word should not be a stop word - if (!in_array($name_array[0], $this->stop_words) - && !in_array($name_array[count($name_array) - 1], $this->stop_words) - && $count >= $threshold) { - $categories_object[$category_title][] = $name; - } - } - } - - private function getNgrams($text, $n = 3) { - - $ngrams = array(); - $new_string = preg_replace('/[^a-zA-Z0-9\s]/', '', $text); - $new_string = strtolower($new_string); - $tokens = preg_split('/\s/', $new_string); - $len = count($tokens); - for ($i = 0; $i < $len - $n; $i++) { - $ng = ''; - for ($j = $i; $j < $i + $n; $j++) { - $ng .= \Inflector::singularize(trim($tokens[$j])) . " "; - } - - $ng = trim($ng); - - if (isset($ngrams[$ng])) - $ngrams[$ng]++; - else - $ngrams[$ng] = 1; - } - return $ngrams; - } - - private function createNewCurlHandleCalais($text, $format) { - - $apiKey = $this->ini_array["naming"]["api_key_calais"]; - - $contentType = "text/xml"; - $outputFormat = $format; - - $metaDataType = "GenericRelations,SocialTags"; - - $restURL = "http://api.opencalais.com/enlighten/rest/"; - $paramsXML = " " . - " " . - " " . - "Educational Technology" . - " - "; - - - $data = "licenseID=" . urlencode($apiKey); - $data .= "¶msXML=" . urlencode($paramsXML); - $data .= "&content=" . urlencode($text); - - $ch = curl_init(); - curl_setopt($ch, CURLOPT_URL, $restURL); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); - curl_setopt($ch, CURLOPT_HEADER, 0); - curl_setopt($ch, CURLOPT_POSTFIELDS, $data); - curl_setopt($ch, CURLOPT_POST, 1); - curl_setopt($ch, CURLOPT_TIMEOUT, 60); - - return $ch; - - } - - private function createNewCurlHandleZemanta($text, $format) { - - $url = 'http://api.zemanta.com/services/rest/0.0/'; - $key = $this->ini_array["naming"]["api_key_zemanta"]; - $method = "zemanta.suggest"; - $categories = "dmoz"; - - /* It is easier to deal with arrays */ - $args = array( - 'method' => $method, - 'api_key' => $key, - 'text' => $text, - 'format' => $format, - 'return_rdf_links' => 1 - , 'return_categories' => 1 - , 'return_keywords' => 1 - , 'return_images' => 0 - , 'return_categories' => $categories - ); - - - $ch = curl_init(); - curl_setopt($ch, CURLOPT_URL, $url); - curl_setopt($ch, CURLOPT_POST, 1); - curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($args, '', '&')); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - - return $ch; - - } - - private function createNewCurlHandleSensium($text) { - - $url = 'https://api.sensium.io/v1/extract'; - $key = $this->ini_array["naming"]["api_key_sensium"]; - - $args = array( - 'apiKey' => $key, - 'text' => $text, - "extractors" => array("Summary") - ); - - $header_args = array( - 'Content-Type: application/json' - , 'Accept: application/json' - //, 'Accept-encoding: \'gzip\'' - ); - - $json_args = json_encode($args); - - $ch = curl_init(); - curl_setopt($ch, CURLOPT_URL, $url); - curl_setopt($ch, CURLOPT_HTTPHEADER, $header_args); - curl_setopt($ch, CURLOPT_POST, 1); - //curl_setopt($ch, CURLOPT_PROXY, '127.0.0.1:8888'); - //curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($args, '', '&')); - curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); - curl_setopt($ch, CURLOPT_POSTFIELDS, $json_args); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - - return $ch; - - } - - private function getClusterNamesCalais($response) { - - $response_object = json_decode($response); - - $categories = array("topics" => array(), "topics_format" => array()); - - foreach ($response_object as $entity => $attributes) { - if ($entity == "doc") { - continue; - } - - switch ($attributes->_typeGroup) { - case "socialTag" : { - $categories["topics"][] = $attributes->name; - - $final_string = library\Toolkit::normalizeString($attributes->name); - - $categories["topics_format"][] = $final_string; - break; - } - - default : - continue; - } - } - - library\Toolkit::info("Calais: " . print_r($categories["topics_format"], true)); - - return $categories; - } - - private function getClusterNamesZemanta($response) { - - $response_object = json_decode($response); - - $categories = array("topics" => array(), "topics_format" => array()); - - $links = $response_object->markup->links; - $count = 0; - - foreach ($links as $link) { - foreach ($link->target as $site) { - if (stristr($site->url, "http://dbpedia.org")) { - if ($link->entity_type != NULL) { - foreach ($link->entity_type as $entity_type) { - if (isset($categories[$entity_type])) { - $categories[$entity_type][] = $site->title; - } else { - $categories[$entity_type] = array($site->title); - } - } - } else { - $categories["topics"][] = $site->title; - $categories["topics_format"][] = library\Toolkit::normalizeString($site->title); - $count++; - } - } - } - } - - library\Toolkit::info("Zemanta: " . print_r($categories["topics_format"], true)); - - return $categories; - } - - private function getClusterNamesSensium($response) { - - $response_object = json_decode($response); - - $categories = array("topics" => array(), "topics_format" => array()); - - foreach ($response_object->summary->keyPhrases as $phrase) { - - $categories["topics"][] = $phrase->text; - - $final_string = library\Toolkit::normalizeString($phrase->text); - - $categories["topics_format"][] = $final_string; - } - - library\Toolkit::info("Sensium: " . print_r($categories["topics_format"], true)); - - return $categories; - } - - private function getFullResponseCalais($text, $uri, $dir) { - - $ch = $this->createNewCurlHandleCalais($text, "XML/RDF"); - $response = curl_exec($ch); - curl_close($ch); - - library\Toolkit::putContentsToFile($dir . $uri . ".rdf", $response); - } - - private function getFullResponseZemanta($text, $uri, $dir) { - - $ch = $this->createNewCurlHandleZemanta($text, "rdfxml"); - $response = curl_exec($ch); - curl_close($ch); - - library\Toolkit::putContentsToFile($dir . $uri . ".rdf", $response); - } -} diff --git a/server/classes/headstart/preprocessing/naming/KeywordNaming.php b/server/classes/headstart/preprocessing/naming/KeywordNaming.php deleted file mode 100644 index 3f91df2c1..000000000 --- a/server/classes/headstart/preprocessing/naming/KeywordNaming.php +++ /dev/null @@ -1,129 +0,0 @@ - $current_array) { - $counted_sorted_array = array_count_values($current_array); - arsort($counted_sorted_array); - $important_terms = array_keys(array_slice($counted_sorted_array, 0, $num_keywords)); - $final_string = implode(", ", $important_terms); - $result_array[$key] = $final_string; - } - - foreach ($array as $key => $entry) { - $array[$key]["area"] = $result_array[$entry[$id]]; - } - } - - public function performNamingTfIdf(&$array, $num_keywords, $keyword_separator, $taxonomy_separator, $id = "area_uri", $subjects = "subject") { - - $working_array = array(); - - foreach ($array as $entry) { - $uri = $entry[$id]; - $keywords = explode($keyword_separator, $entry[$subjects]); - foreach ($keywords as &$keyword) { - $keyword = preg_replace("/\/", "&#x$1;", $keyword); - if ($taxonomy_separator != null) { - $keyword = substr($keyword, strrpos($keyword, $taxonomy_separator) + 1); - } - } - - //$working_array[$uri] = array(); - - if (isset($working_array[$uri]["all_terms"])) { - $working_array[$uri]["all_terms"] = array_merge($working_array[$uri]["all_terms"], $keywords); - } else { - $working_array[$uri]["all_terms"] = $keywords; - } - } - - $num_docs_per_term = array(); - - foreach ($working_array as $uri => $current_array) { - $current_array["all_terms"] = array_filter($current_array["all_terms"]); - $current_array["all_terms"] = array_map('trim', $current_array["all_terms"]); - array_walk($current_array["all_terms"], function(&$value, &$key) { - $value = ucfirst($value); - }); - - $unique_terms = array_unique($current_array["all_terms"]); - $working_array[$uri]["unique_terms"] = $unique_terms; - - foreach ($unique_terms as $term) { - if (!isset($num_docs_per_term[$term])) - $num_docs_per_term[$term] = 1; - else - $num_docs_per_term[$term] += 1; - } - } - - $result_array = array(); - $totalDocs = count($working_array); - - foreach ($working_array as $uri => $current_array) { - - $current_array["all_terms"] = array_replace($current_array["all_terms"], array_fill_keys(array_keys($current_array["all_terms"], null), '')); - - $num_keywords_per = array_count_values($current_array["all_terms"]); - $wordCount = count($current_array["all_terms"]); - $current_result_array = array(); - - foreach ($current_array["unique_terms"] as $term) { - $termCount = isset($num_keywords_per[$term]) ? ($num_keywords_per[$term]) : (0); - $docsWithTerm = $num_docs_per_term[$term]; - - $tf = $termCount / $wordCount; - $idf = log($totalDocs / $docsWithTerm, 2); - $tfidf = $tf * $idf; - - //$tfidf_short = round($tfidf,2); - //$current_result_array[$term. " " . $tfidf_short] = $tfidf; - - $current_result_array[$term] = $tfidf; - } - - arsort($current_result_array); - - $important_terms = array_keys(array_slice($current_result_array, 0, $num_keywords)); - - $final_string = implode(", ", $important_terms); - $result_array[$uri] = $final_string; - } - - foreach ($array as $uri => $entry) { - $array[$uri]["area"] = $result_array[$entry[$id]]; - } - } - -} diff --git a/server/classes/headstart/preprocessing/naming/Naming.php b/server/classes/headstart/preprocessing/naming/Naming.php deleted file mode 100644 index 7c73c0437..000000000 --- a/server/classes/headstart/preprocessing/naming/Naming.php +++ /dev/null @@ -1,24 +0,0 @@ -ini_array = $ini_array; - - } - - public function performNaming(&$array, $num_keywords) { - - } - -} diff --git a/server/classes/headstart/preprocessing/test_rplos.php b/server/classes/headstart/preprocessing/test_rplos.php deleted file mode 100644 index 3ca817d9c..000000000 --- a/server/classes/headstart/preprocessing/test_rplos.php +++ /dev/null @@ -1,32 +0,0 @@ -performCalculationAndReturnOutputAsJSON($WORKING_DIR, "dna"); - -$output_json = end($output); - -//echo $output_json; - -$persistence = new persistence\SQLitePersistence($ini_array["connection"]["sqlite_db"]); -//$persistence->createVisualization("test_rplos", "My Test RPLOS", $output_json); - -$persistence->writeRevision("test_rplos", $output_json); - -//$naming = new naming\ApiNaming($ini_array); -//$naming->performNaming($WORKING_DIR); - diff --git a/server/preprocessing/conf/config.ini b/server/preprocessing/conf/config.ini index 5ad9b1364..8fe88b82f 100644 --- a/server/preprocessing/conf/config.ini +++ b/server/preprocessing/conf/config.ini @@ -64,25 +64,3 @@ binary = "/usr/bin/Rscript" # Relative path from preprocessing_dir to the R script script = "other-scripts/text_similarity.R" mode = "bookmarks" - -[naming] -api_key_zemanta = "" -api_key_calais = "" - -# Constants for column numbers in the scaling and clustering output -line_cluster_id = 10 -line_title = 1 -line_abstract = 2 - -# English stop word file -stop_words = "resources/english.stop"; - -# Thresholds for n-grams -threshold_title_ngrams = 2; -threshold_title_abstract_ngrams = 3; -threshold_single_words = 4; - -forbidden_names[] = "research" -forbidden_names[] = "science" -forbidden_names[] = "inquiry" -forbidden_names[] = "learning" diff --git a/server/services/search.php b/server/services/search.php index 2a1d5ec37..e2b927da7 100644 --- a/server/services/search.php +++ b/server/services/search.php @@ -56,8 +56,7 @@ function search($service_integration, $dirty_query , $transform_query_tolowercase = true , $retrieve_cached_map = true, $params_for_id = null, $num_labels = 3 , $id = "area_uri", $subjects = "subject" - , $precomputed_id = null, $do_clean_query = true - , $api_flavor = "stable") { + , $precomputed_id = null, $do_clean_query = true) { $INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/"; $ini_array = library\Toolkit::loadIni($INI_DIR); $repo2snapshot = array("plos" => "PLOS" @@ -80,6 +79,10 @@ function search($service_integration, $dirty_query $persistence_backend = isset($ini_array["general"]["persistence_backend"]) ? ($ini_array["general"]["persistence_backend"]) : "legacy"; + $api_url = $ini_array["general"]["api_url"]; + $api_flavor = isset($ini_array["general"]["api_flavor"]) + ? ($ini_array["general"]["api_flavor"]) + : "stable"; $query = ($do_clean_query === true) ?(cleanQuery($dirty_query, $transform_query_tolowercase)) @@ -96,7 +99,7 @@ function search($service_integration, $dirty_query $params_for_id_creation = ($params_for_id === null)?($params_json):(packParamsJSON($params_for_id, $post_params)); if ($persistence_backend === "api") { - $route = $ini_array["general"]["api_url"] . "persistence/" . "createID"; + $route = $api_url . $api_flavor . "/" . "persistence/" . "createID"; $payload = json_encode(array("params" => $post_params, "param_types" => $param_types)); $res = library\CommUtils::call_api($route, $payload); @@ -115,7 +118,7 @@ function search($service_integration, $dirty_query if($retrieve_cached_map) { if ($persistence_backend === "api") { - $route = $ini_array["general"]["api_url"] . "persistence/" . "getLastVersion/" . $database; + $route = $api_url . $api_flavor . "/" . "persistence/" . "getLastVersion/" . $database; $payload = json_encode(array("vis_id" => $unique_id, "details" => false, "context" => false)); @@ -142,7 +145,7 @@ function search($service_integration, $dirty_query $WORKING_DIR = $ini_array["general"]["preprocessing_dir"] . $ini_array["output"]["output_dir"]; if ($processing_backend === "api") { - $route = $ini_array["general"]["api_url"] . $endpoint . "/search"; + $route = $api_url . $api_flavor . "/" . $endpoint . "/search"; $payload = json_encode($post_params); $res = library\CommUtils::call_api($route, $payload); if ($res["httpcode"] != 200) { @@ -179,7 +182,7 @@ function search($service_integration, $dirty_query $vis_title = $service_integration; if ($persistence_backend === "api") { - $route = $ini_array["general"]["api_url"] . "persistence/" . "existsVisualization/" . $database; + $route = $api_url . $api_flavor . "/" . "persistence/" . "existsVisualization/" . $database; $payload = json_encode(array("vis_id" => $unique_id)); $res = library\CommUtils::call_api($route, $payload); if ($res["httpcode"] != 200) { @@ -194,7 +197,7 @@ function search($service_integration, $dirty_query if (!$exists) { if ($persistence_backend === "api") { - $route = $ini_array["general"]["api_url"] . "persistence/" . "createVisualization/" . $database; + $route = $api_url . $api_flavor . "/" . "persistence/" . "createVisualization/" . $database; $payload = json_encode(array("vis_id" => $unique_id, "vis_title" => $vis_title, "data" => $input_json, @@ -210,7 +213,7 @@ function search($service_integration, $dirty_query } } else { if ($persistence_backend === "api") { - $route = $ini_array["general"]["api_url"] . "persistence/" . "writeRevision/" . $database; + $route = $api_url . $api_flavor . "/" . "persistence/" . "writeRevision/" . $database; $payload = json_encode(array("vis_id" => $unique_id, "data" => $input_json)); $res = library\CommUtils::call_api($route, $payload); From 1d75bff522da8e675017a4da9c753525256bdeca Mon Sep 17 00:00:00 2001 From: chreman Date: Thu, 20 May 2021 14:23:25 +0200 Subject: [PATCH 20/54] finish spinning off persistence API --- docker-compose.yml | 12 ++++++-- server/workers/persistence/Dockerfile | 15 +++++++++ server/workers/persistence/requirements.txt | 7 ----- server/workers/persistence/src/app.py | 34 +++++++++++++++++++-- 4 files changed, 57 insertions(+), 11 deletions(-) create mode 100644 server/workers/persistence/Dockerfile diff --git a/docker-compose.yml b/docker-compose.yml index 6b581af78..f4e0b0e8f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -47,13 +47,21 @@ services: REDIS_PORT: "${REDIS_PORT}" REDIS_PASSWORD: "${REDIS_PASSWORD}" REDIS_DB: "${REDIS_DB}" + command: ["gunicorn", "--workers", "10", "--threads", "2", "-b", "0.0.0.0:${API_PORT}", "app:app", "--timeout", "300"] + depends_on: + - redis + networks: + - headstart + + persistence: + image: persistence:${SERVICE_VERSION} + restart: always + environment: POSTGRES_USER: "${POSTGRES_USER}" POSTGRES_PASSWORD: "${POSTGRES_PASSWORD}" POSTGRES_HOST: "${POSTGRES_HOST}" POSTGRES_PORT: "${POSTGRES_PORT}" command: ["gunicorn", "--workers", "10", "--threads", "2", "-b", "0.0.0.0:${API_PORT}", "app:app", "--timeout", "300"] - depends_on: - - redis networks: - headstart diff --git a/server/workers/persistence/Dockerfile b/server/workers/persistence/Dockerfile new file mode 100644 index 000000000..9d552ec68 --- /dev/null +++ b/server/workers/persistence/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.6.10-slim + +MAINTAINER Chris Kittel "christopher.kittel@openknowledgemaps.org" + +RUN apt-get update +RUN apt-get install -y --no-install-recommends gcc +RUN apt-get install -y --no-install-recommends git + +WORKDIR /persistence +COPY workers/persistence/requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +RUN pip install git+https://github.com/python-restx/flask-restx +COPY workers/persistence/src/ ./ + + diff --git a/server/workers/persistence/requirements.txt b/server/workers/persistence/requirements.txt index 744213065..78e0d05ad 100644 --- a/server/workers/persistence/requirements.txt +++ b/server/workers/persistence/requirements.txt @@ -3,12 +3,5 @@ flask-cors flask_sqlalchemy flask_restx Werkzeug -marshmallow gunicorn -redis -hiredis -aioredis -pandas -pyyaml -flasgger psycopg2-binary diff --git a/server/workers/persistence/src/app.py b/server/workers/persistence/src/app.py index 2de230576..580adf4d6 100644 --- a/server/workers/persistence/src/app.py +++ b/server/workers/persistence/src/app.py @@ -8,9 +8,40 @@ from apis.persistence import persistence_ns from config import settings -from utils.monkeypatches import ReverseProxied, __schema__, specs_url, _register_apidoc, inject_flasgger import logging +class ReverseProxied(object): + '''Wrap the application in this middleware and configure the + front-end server to add these headers, to let you quietly bind + this to a URL other than / and to an HTTP scheme that is + different than what is used locally. + + location /myprefix { + proxy_pass http://192.168.0.1:5001; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Scheme $scheme; + proxy_set_header X-Script-Name /myprefix; + } + + :param app: the WSGI application + ''' + def __init__(self, app): + self.app = app + + def __call__(self, environ, start_response): + script_name = environ.get('HTTP_X_SCRIPT_NAME', '') + if script_name: + environ['SCRIPT_NAME'] = script_name + path_info = environ['PATH_INFO'] + if path_info.startswith(script_name): + environ['PATH_INFO'] = path_info[len(script_name):] + + scheme = environ.get('HTTP_X_SCHEME', '') + if scheme: + environ['wsgi.url_scheme'] = scheme + return self.app(environ, start_response) + def api_patches(app, settings): Api._register_apidoc = _register_apidoc @@ -33,7 +64,6 @@ def api_patches(app, settings): app.config.from_object('config.settings') handler = logging.StreamHandler(sys.stdout) handler.setLevel(app.logger.level) -app = inject_flasgger(app) app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1, x_port=1, x_for=1, x_host=1, x_prefix=1) app.wsgi_app = ReverseProxied(app.wsgi_app) CORS(app, expose_headers=["Content-Disposition", "Access-Control-Allow-Origin"]) From 0d614f7d9804570fd6b4cf786c6a26efc5e285a2 Mon Sep 17 00:00:00 2001 From: chreman Date: Thu, 20 May 2021 16:26:33 +0200 Subject: [PATCH 21/54] add example env --- .../flavorconfigs/{dev.env => example.env} | 2 +- server/workers/flavorconfigs/stable.env | 15 --------------- 2 files changed, 1 insertion(+), 16 deletions(-) rename server/workers/flavorconfigs/{dev.env => example.env} (73%) delete mode 100644 server/workers/flavorconfigs/stable.env diff --git a/server/workers/flavorconfigs/dev.env b/server/workers/flavorconfigs/example.env similarity index 73% rename from server/workers/flavorconfigs/dev.env rename to server/workers/flavorconfigs/example.env index 32a97b88e..cea9a0a59 100644 --- a/server/workers/flavorconfigs/dev.env +++ b/server/workers/flavorconfigs/example.env @@ -12,4 +12,4 @@ API_PORT=5001 REDIS_HOST=dev_redis_1 REDIS_PORT=6380 REDIS_DB=0 -REDIS_PASSWORD=3tUrLK8a9kDo6m6sx7qKQaM5RLjLsfd4kkbq39LmxawMucvktRQZ9GfGeL3rNasSBFJyaYsQLFXHPAP9cWpnSSoB3ioppQh4mcMGaRWCe42tfWTQw7gLVX6XPs9LCdan \ No newline at end of file +REDIS_PASSWORD=redis_password \ No newline at end of file diff --git a/server/workers/flavorconfigs/stable.env b/server/workers/flavorconfigs/stable.env deleted file mode 100644 index 2ca52db6a..000000000 --- a/server/workers/flavorconfigs/stable.env +++ /dev/null @@ -1,15 +0,0 @@ -COMPOSE_PROJECT_NAME=stable -SERVICE_VERSION=d47a0d65495493e341d51d68e85af1927aa76e2f -NETWORK=dockerrefactoring -POSTGRES_DB=postgres -POSTGRES_USER=headstart -POSTGRES_PASSWORD=testpassword -POSTGRES_HOST=stable_db_1 -POSTGRES_PORT=5432 -PGADMIN_DEFAULT_EMAIL=christopher.kittel@openknowledgemaps.org -PGADMIN_DEFAULT_PASSWORD=testpassword -API_PORT=5001 -REDIS_HOST=stable_redis_1 -REDIS_PORT=6379 -REDIS_DB=0 -REDIS_PASSWORD=3tUrLK8a9kDo6m6sx7qKQaM5RLjLsfd4kkbq39LmxawMucvktRQZ9GfGeL3rNasSBFJyaYsQLFXHPAP9cWpnSSoB3ioppQh4mcMGaRWCe42tfWTQw7gLVX6XPs9LCdan \ No newline at end of file From d9dfd800b2d5defadb3f783bca0613f950124048 Mon Sep 17 00:00:00 2001 From: chreman Date: Thu, 20 May 2021 16:39:38 +0200 Subject: [PATCH 22/54] add persistence image --- server/workers/build_docker_images.sh | 2 +- server/workers/persistence/src/app.py | 4 ---- server/workers/triple/Dockerfile | 1 - 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/server/workers/build_docker_images.sh b/server/workers/build_docker_images.sh index e9147bf92..b32b015b7 100755 --- a/server/workers/build_docker_images.sh +++ b/server/workers/build_docker_images.sh @@ -1,6 +1,6 @@ #!/bin/bash SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -services=("api" "triple" "gsheets" "dataprocessing" "base" "pubmed" "openaire") +services=("api" "persistence" "triple" "gsheets" "dataprocessing" "base" "pubmed" "openaire") for service in ${services[@]}; do docker build -f "$SCRIPT_DIR/../workers/$service/Dockerfile" -t "$service:`git rev-parse HEAD`" "$SCRIPT_DIR/../" done diff --git a/server/workers/persistence/src/app.py b/server/workers/persistence/src/app.py index 580adf4d6..5a92babfa 100644 --- a/server/workers/persistence/src/app.py +++ b/server/workers/persistence/src/app.py @@ -44,10 +44,6 @@ def __call__(self, environ, start_response): def api_patches(app, settings): - Api._register_apidoc = _register_apidoc - Api.__schema__ = __schema__ - Api.specs_url = specs_url - api_fixed = Api( app, title="Head Start API", diff --git a/server/workers/triple/Dockerfile b/server/workers/triple/Dockerfile index ba6331afd..df9cef1d3 100644 --- a/server/workers/triple/Dockerfile +++ b/server/workers/triple/Dockerfile @@ -11,6 +11,5 @@ RUN pip install --no-cache-dir -r requirements.txt RUN python -m spacy download xx_ent_wiki_sm COPY workers/triple/src/ ./triple/src COPY workers/triple/run_triple.py . -COPY workers/triple/es_config.json . ENTRYPOINT python run_triple.py From 91c7fe475e1e80efc01d0de07912ac9088fb861a Mon Sep 17 00:00:00 2001 From: chreman Date: Fri, 21 May 2021 11:23:42 +0200 Subject: [PATCH 23/54] add flavor to example config --- server/preprocessing/conf/config.ini | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/preprocessing/conf/config.ini b/server/preprocessing/conf/config.ini index 8fe88b82f..efea09612 100644 --- a/server/preprocessing/conf/config.ini +++ b/server/preprocessing/conf/config.ini @@ -14,6 +14,8 @@ vis_path = "path/to/vis" services_path = "server/services/" # URL to OKMaps API api_url = "http://127.0.0.1/api/" +# flavor of API, default: "stable" +api_flavor = "stable" # The persistence backend to use - either api or legacy persistence_backend = "legacy" # The processing backend to use - either api or legacy From e02d716fc9611b71491e42ac8a061b4173857a4e Mon Sep 17 00:00:00 2001 From: chreman Date: Fri, 21 May 2021 18:18:47 +0200 Subject: [PATCH 24/54] add service_version route --- docker-compose.yml | 11 ++++++++++- server/workers/api/src/apis/base.py | 8 ++++++++ server/workers/api/src/apis/gsheets.py | 6 ++++++ server/workers/api/src/apis/openaire.py | 6 ++++++ server/workers/api/src/apis/persistence.py | 22 ++++++++++++++++++++++ server/workers/api/src/apis/pubmed.py | 6 ++++++ server/workers/api/src/apis/triple.py | 12 ++++++++++++ server/workers/flavorconfigs/example.env | 3 ++- 8 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 server/workers/api/src/apis/persistence.py diff --git a/docker-compose.yml b/docker-compose.yml index f4e0b0e8f..0841c0bf1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -43,10 +43,12 @@ services: image: api:${SERVICE_VERSION} restart: always environment: + SERVICE_VERSION: "${SERVICE_VERSION}" REDIS_HOST: "${REDIS_HOST}" REDIS_PORT: "${REDIS_PORT}" REDIS_PASSWORD: "${REDIS_PASSWORD}" REDIS_DB: "${REDIS_DB}" + PERSISTENCE_PORT: "${PERSISTENCE_PORT}" command: ["gunicorn", "--workers", "10", "--threads", "2", "-b", "0.0.0.0:${API_PORT}", "app:app", "--timeout", "300"] depends_on: - redis @@ -57,11 +59,12 @@ services: image: persistence:${SERVICE_VERSION} restart: always environment: + SERVICE_VERSION: "${SERVICE_VERSION}" POSTGRES_USER: "${POSTGRES_USER}" POSTGRES_PASSWORD: "${POSTGRES_PASSWORD}" POSTGRES_HOST: "${POSTGRES_HOST}" POSTGRES_PORT: "${POSTGRES_PORT}" - command: ["gunicorn", "--workers", "10", "--threads", "2", "-b", "0.0.0.0:${API_PORT}", "app:app", "--timeout", "300"] + command: ["gunicorn", "--workers", "10", "--threads", "2", "-b", "0.0.0.0:${PERSISTENCE_PORT}", "app:app", "--timeout", "300"] networks: - headstart @@ -70,6 +73,7 @@ services: env_file: - server/workers/triple/triple.env environment: + SERVICE_VERSION: "${SERVICE_VERSION}" REDIS_HOST: "${REDIS_HOST}" REDIS_PORT: "${REDIS_PORT}" REDIS_DB: "${REDIS_DB}" @@ -85,6 +89,7 @@ services: env_file: - server/workers/gsheets/gsheets.env environment: + SERVICE_VERSION: "${SERVICE_VERSION}" REDIS_HOST: "${REDIS_HOST}" REDIS_PORT: "${REDIS_PORT}" REDIS_DB: "${REDIS_DB}" @@ -100,6 +105,7 @@ services: env_file: - server/workers/dataprocessing/dataprocessing.env environment: + SERVICE_VERSION: "${SERVICE_VERSION}" REDIS_HOST: "${REDIS_HOST}" REDIS_PORT: "${REDIS_PORT}" REDIS_DB: "${REDIS_DB}" @@ -118,6 +124,7 @@ services: env_file: - server/workers/base/base.env environment: + SERVICE_VERSION: "${SERVICE_VERSION}" REDIS_HOST: "${REDIS_HOST}" REDIS_PORT: "${REDIS_PORT}" REDIS_DB: "${REDIS_DB}" @@ -136,6 +143,7 @@ services: env_file: - server/workers/pubmed/pubmed.env environment: + SERVICE_VERSION: "${SERVICE_VERSION}" REDIS_HOST: "${REDIS_HOST}" REDIS_PORT: "${REDIS_PORT}" REDIS_DB: "${REDIS_DB}" @@ -154,6 +162,7 @@ services: env_file: - server/workers/openaire/openaire.env environment: + SERVICE_VERSION: "${SERVICE_VERSION}" REDIS_HOST: "${REDIS_HOST}" REDIS_PORT: "${REDIS_PORT}" REDIS_DB: "${REDIS_DB}" diff --git a/server/workers/api/src/apis/base.py b/server/workers/api/src/apis/base.py index c2a309555..3cbad3426 100644 --- a/server/workers/api/src/apis/base.py +++ b/server/workers/api/src/apis/base.py @@ -96,3 +96,11 @@ def post(self): except Exception as e: base_ns.logger.error(e) abort(500, "Problem encountered, check logs.") + + + +@base_ns.route('/service_version') +class ServiceVersion(Resource): + def get(self): + result = {"service_version": os.getenv("SERVICE_VERSION")} + return make_response(result, 200, {"Content-Type": "application/json"}) \ No newline at end of file diff --git a/server/workers/api/src/apis/gsheets.py b/server/workers/api/src/apis/gsheets.py index f9e4cca26..bcc9baa5c 100644 --- a/server/workers/api/src/apis/gsheets.py +++ b/server/workers/api/src/apis/gsheets.py @@ -102,3 +102,9 @@ def post(self): except Exception as e: gsheets_ns.logger.error(e) abort(500, "Problem encountered during processing, sorry.") + +@gsheets_ns.route('/service_version') +class ServiceVersion(Resource): + def get(self): + result = {"service_version": os.getenv("SERVICE_VERSION")} + return make_response(result, 200, {"Content-Type": "application/json"}) \ No newline at end of file diff --git a/server/workers/api/src/apis/openaire.py b/server/workers/api/src/apis/openaire.py index e4c511d85..4ed1e9a48 100644 --- a/server/workers/api/src/apis/openaire.py +++ b/server/workers/api/src/apis/openaire.py @@ -95,3 +95,9 @@ def post(self): except Exception as e: openaire_ns.logger.error(e) abort(500, "Problem encountered, check logs.") + +@openaire_ns.route('/service_version') +class ServiceVersion(Resource): + def get(self): + result = {"service_version": os.getenv("SERVICE_VERSION")} + return make_response(result, 200, {"Content-Type": "application/json"}) \ No newline at end of file diff --git a/server/workers/api/src/apis/persistence.py b/server/workers/api/src/apis/persistence.py new file mode 100644 index 000000000..3ca33c600 --- /dev/null +++ b/server/workers/api/src/apis/persistence.py @@ -0,0 +1,22 @@ +import os +import json +import uuid +import time +import redis +import asyncio +import aioredis +import pandas as pd + +from flask import Blueprint, request, make_response, jsonify, abort, redirect +from flask_restx import Namespace, Resource, fields +from .request_validators import SearchParamSchema +from apis.utils import get_key, detect_error + + +persistence_ns = Namespace("persistence", description="Persistence API operations redirect") + +persistence_host = + +@persistence_ns.route('/') +def persistence(): + return redirect("stable_persistence_1:5101/api/persistence") \ No newline at end of file diff --git a/server/workers/api/src/apis/pubmed.py b/server/workers/api/src/apis/pubmed.py index ea6d3fc0d..06dd0b1c5 100644 --- a/server/workers/api/src/apis/pubmed.py +++ b/server/workers/api/src/apis/pubmed.py @@ -97,3 +97,9 @@ def post(self): except Exception as e: pubmed_ns.logger.error(e) abort(500, "Problem encountered, check logs.") + +@pubmed_ns.route('/service_version') +class ServiceVersion(Resource): + def get(self): + result = {"service_version": os.getenv("SERVICE_VERSION")} + return make_response(result, 200, {"Content-Type": "application/json"}) \ No newline at end of file diff --git a/server/workers/api/src/apis/triple.py b/server/workers/api/src/apis/triple.py index 4e75f225e..09038af36 100644 --- a/server/workers/api/src/apis/triple.py +++ b/server/workers/api/src/apis/triple.py @@ -72,6 +72,11 @@ def post(self): d = {"id": k, "params": params, "endpoint": "search"} triple_ns.logger.debug(d) + # if length of queue > ?? + # make_response with "wait later" and 503+headers + # have this handled by lightweight-client in search.php + # which then is handled by search-flow under new processing-timeout error + # add to logging redis_store.rpush("triple", json.dumps(d)) result = get_key(redis_store, k) headers = {} @@ -128,6 +133,13 @@ def get(self): 200, headers) + +@triple_ns.route('/service_version') +class ServiceVersion(Resource): + def get(self): + result = {"service_version": os.getenv("SERVICE_VERSION")} + return make_response(result, 200, {"Content-Type": "application/json"}) + @triple_ns.route('/healthcheck') class Healthcheck(Resource): def get(self): diff --git a/server/workers/flavorconfigs/example.env b/server/workers/flavorconfigs/example.env index cea9a0a59..4b353c151 100644 --- a/server/workers/flavorconfigs/example.env +++ b/server/workers/flavorconfigs/example.env @@ -9,7 +9,8 @@ POSTGRES_PORT=5433 PGADMIN_DEFAULT_EMAIL=christopher.kittel@openknowledgemaps.org PGADMIN_DEFAULT_PASSWORD=testpassword API_PORT=5001 +PERSISTENCE_PORT=5101 REDIS_HOST=dev_redis_1 -REDIS_PORT=6380 +REDIS_PORT=6379 REDIS_DB=0 REDIS_PASSWORD=redis_password \ No newline at end of file From a11bac1cf9f22398c4d37620fabb503c393fcfd7 Mon Sep 17 00:00:00 2001 From: chreman Date: Fri, 21 May 2021 18:32:13 +0200 Subject: [PATCH 25/54] add service_version endpoint to persistence --- server/workers/persistence/src/apis/persistence.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/server/workers/persistence/src/apis/persistence.py b/server/workers/persistence/src/apis/persistence.py index 355af9bbf..9fb039f76 100644 --- a/server/workers/persistence/src/apis/persistence.py +++ b/server/workers/persistence/src/apis/persistence.py @@ -1,3 +1,4 @@ +import os from hashlib import md5 from datetime import datetime import json @@ -316,3 +317,10 @@ def post(self): result = {'success': False, 'reason': e} headers = {'ContentType': 'application/json'} return make_response(jsonify(result), 500, headers) + + +@persistence_ns.route('/service_version') +class ServiceVersion(Resource): + def get(self): + result = {"service_version": os.getenv("SERVICE_VERSION")} + return make_response(result, 200, {"Content-Type": "application/json"}) \ No newline at end of file From d47034c6e78534e80b15b7916e7ed222342b97dd Mon Sep 17 00:00:00 2001 From: chreman Date: Tue, 25 May 2021 19:09:45 +0200 Subject: [PATCH 26/54] add new dataprocessing rate limit error --- server/classes/headstart/library/toolkit.php | 15 --------------- .../preprocessing/calculation/RCalculation.php | 13 ++++++++++--- server/services/search.php | 13 +++++-------- 3 files changed, 15 insertions(+), 26 deletions(-) diff --git a/server/classes/headstart/library/toolkit.php b/server/classes/headstart/library/toolkit.php index 81dadccc6..3482798df 100644 --- a/server/classes/headstart/library/toolkit.php +++ b/server/classes/headstart/library/toolkit.php @@ -2,8 +2,6 @@ namespace headstart\library; -include "Inflector.php"; - class Toolkit { private static $initialized = false; @@ -137,19 +135,6 @@ public static function loadIni($path) { return $ini_array; } - public static function normalizeString($old_string) { - self::initialize(); - - $new_string = preg_replace('/[^a-zA-Z0-9\s]/', '', strtolower($old_string)); - $new_string_array = preg_split('/ /', $new_string); - $final_string = ""; - foreach ($new_string_array as $string) { - $final_string .= \Inflector::singularize($string) . " "; - } - - return trim($final_string); - } - public static function isJson($string) { self::initialize(); diff --git a/server/classes/headstart/preprocessing/calculation/RCalculation.php b/server/classes/headstart/preprocessing/calculation/RCalculation.php index 7c75f0c94..2221941a0 100644 --- a/server/classes/headstart/preprocessing/calculation/RCalculation.php +++ b/server/classes/headstart/preprocessing/calculation/RCalculation.php @@ -44,9 +44,16 @@ public function performCalculationAndReturnOutputAsJSON($working_dir, $query, $p } //library\Toolkit::info($path); - exec($path, $output_r); - - return $output_r; + // exec(ps(if [ $(ps -ef | grep -v grep | grep scrape_data.php | wc -l) -lt 1 ])) if n > 5 + // return $try_later with error handling + if(exec("ps -u www-data | grep R | wc -l") <= 12) { + exec($path, $output_r); + return $output_r; + } else { + $output = array(json_encode(array("status" => "error", + "reason" => "dataprocessing rate limit"))); + return $output; + } } public function performStreamgraphCalculation($working_dir, $service, $output_json) { diff --git a/server/services/search.php b/server/services/search.php index e2b927da7..816dd60f5 100644 --- a/server/services/search.php +++ b/server/services/search.php @@ -1,7 +1,6 @@ performCalculationAndReturnOutputAsJSON($WORKING_DIR, $query, $params_filename, $endpoint); From 59a5a279aae79a7a17409cec3a96ea6cdc6697d9 Mon Sep 17 00:00:00 2001 From: chreman Date: Wed, 26 May 2021 13:26:51 +0200 Subject: [PATCH 27/54] add rate limit to modern backend --- server/workers/api/src/apis/triple.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/server/workers/api/src/apis/triple.py b/server/workers/api/src/apis/triple.py index 09038af36..d1a6c9a2e 100644 --- a/server/workers/api/src/apis/triple.py +++ b/server/workers/api/src/apis/triple.py @@ -61,6 +61,10 @@ class Search(Resource): def post(self): """ """ + if redis_store.llen("input_data") > 10: + result = {"status": "error", + "reason": "dataprocessing rate limit"} + return jsonify(result) params = request.get_json() triple_ns.logger.debug(params) errors = search_param_schema.validate(params, partial=True) From a16f99eb0181e494c1e52e66b7e57129484c3f35 Mon Sep 17 00:00:00 2001 From: chreman Date: Wed, 26 May 2021 13:41:11 +0200 Subject: [PATCH 28/54] finish new persistence routing --- docker-compose.yml | 1 + server/workers/api/src/apis/persistence.py | 5 +++-- server/workers/flavorconfigs/example.env | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 0841c0bf1..5c9a215ac 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -48,6 +48,7 @@ services: REDIS_PORT: "${REDIS_PORT}" REDIS_PASSWORD: "${REDIS_PASSWORD}" REDIS_DB: "${REDIS_DB}" + PERSISTENCE_HOST: "${PERSISTENCE_HOST}" PERSISTENCE_PORT: "${PERSISTENCE_PORT}" command: ["gunicorn", "--workers", "10", "--threads", "2", "-b", "0.0.0.0:${API_PORT}", "app:app", "--timeout", "300"] depends_on: diff --git a/server/workers/api/src/apis/persistence.py b/server/workers/api/src/apis/persistence.py index 3ca33c600..3128bf7ec 100644 --- a/server/workers/api/src/apis/persistence.py +++ b/server/workers/api/src/apis/persistence.py @@ -15,8 +15,9 @@ persistence_ns = Namespace("persistence", description="Persistence API operations redirect") -persistence_host = +persistence_uri = "%s:%s" %(os.getenv("PERISTENCE_HOST"), + os.getenv("PERISTENCE_PORT")) @persistence_ns.route('/') def persistence(): - return redirect("stable_persistence_1:5101/api/persistence") \ No newline at end of file + return redirect(persistence_uri + "/api/persistence") \ No newline at end of file diff --git a/server/workers/flavorconfigs/example.env b/server/workers/flavorconfigs/example.env index 4b353c151..b026e28e6 100644 --- a/server/workers/flavorconfigs/example.env +++ b/server/workers/flavorconfigs/example.env @@ -9,6 +9,7 @@ POSTGRES_PORT=5433 PGADMIN_DEFAULT_EMAIL=christopher.kittel@openknowledgemaps.org PGADMIN_DEFAULT_PASSWORD=testpassword API_PORT=5001 +PERSISTENCE_HOST=dev_persistence_1 PERSISTENCE_PORT=5101 REDIS_HOST=dev_redis_1 REDIS_PORT=6379 From 2c766edf97e3a4aece260e68461572702c08e5f4 Mon Sep 17 00:00:00 2001 From: chreman Date: Fri, 28 May 2021 10:16:43 +0200 Subject: [PATCH 29/54] update documentation --- example.env | 15 ------ server/workers/README.md | 112 ++++++++++++++++++--------------------- 2 files changed, 51 insertions(+), 76 deletions(-) delete mode 100644 example.env diff --git a/example.env b/example.env deleted file mode 100644 index 4e2e5ad45..000000000 --- a/example.env +++ /dev/null @@ -1,15 +0,0 @@ -COMPOSE_PROJECT_NAME=dockerrefactoring -SERVICE_VERSION=ab9d127081977c27ffce22ccdd9bacf8c55fec1e -NETWORK=dockerrefactoring -POSTGRES_DB=postgres -POSTGRES_USER=headstart -POSTGRES_PASSWORD=testpassword -POSTGRES_HOST=dockerrefactoring_db_1 -POSTGRES_PORT=5432 -PGADMIN_DEFAULT_EMAIL=christopher.kittel@openknowledgemaps.org -PGADMIN_DEFAULT_PASSWORD=testpassword -API_PORT=5001 -REDIS_HOST=dockerrefactoring_redis_1 -REDIS_PORT=6379 -REDIS_DB=0 -REDIS_PASSWORD=testpassword \ No newline at end of file diff --git a/server/workers/README.md b/server/workers/README.md index 0d9e4159a..46f58d182 100644 --- a/server/workers/README.md +++ b/server/workers/README.md @@ -14,21 +14,11 @@ Each comes with a docker file (ending on `.docker`), which is used for creating Please follow the install instructions for your OS: -* Windows: https://docs.docker.com/docker-for-windows/install/ * Mac: https://docs.docker.com/docker-for-mac/install/ * Ubuntu: https://docs.docker.com/docker-for-mac/install/ (also available for other Linux) Please follow the install instructions for docker-compose for your OS: https://docs.docker.com/compose/install/ -### Windows - -It is recommended to install the latest version of [Docker for Windows](https://hub.docker.com/editions/community/docker-ce-desktop-windows). -Additionally, following settings may need to be activated: - -* [Volume Sharing](https://docs.microsoft.com/en-us/visualstudio/containers/troubleshooting-docker-errors?view=vs-2019) - -(In case Docker for Windows does not seem to start, it may be already running in the background and hiding in the task bar menu in the lower right corner.) - ### Setting up the Apache2 reverse proxy Following Apache2 mods have to be installed and enabled: @@ -49,11 +39,12 @@ The following lines have to be added to the appropriate sites-available config o # other config # Proxy server settings for Head Start API - ProxyPass /api http://localhost:5001/api connectiontimeout=120 timeout=120 - ProxyPassReverse /api http://localhost:5001/api - ProxyPass /swaggerui http://localhost:5001/swaggerui - ProxyPassReverse /swaggerui http://localhost:5001/swaggerui - + + Deny from all + Allow from 127.0.0.1 + ProxyPass http://127.0.0.1:8080/ + ProxyPassReverse http://127.0.0.1/api + ``` @@ -71,10 +62,8 @@ Services: * In `server/workers/services/src/config` copy `example_settings.py` to `settings.py` and change the values for `ENV` (`development` or `production`) and `DEBUG` (`TRUE` or `FALSE`). * In `settings.py` you can also configure databases. - TRIPLE ElasticSearch core service: -* In `server/workers/services/triple/` copy `example_es_config.json` to `es_config.json` and fill in the fields. -* In `server/workers/services/triple/` copy `example_triple.env` to `triple.env` and change the values if necessary. +* In `server/workers/services/triple/` copy `example_triple.env` to `triple.env` and edit the values regarding the ElasticSearch access accordingly. GSheets Google API client authentication credentials:: * In `server/workers/services/gsheets/` copy `example_gsheets.env` to `gsheets.env` and change the values if necessary. @@ -82,12 +71,19 @@ GSheets Google API client authentication credentials:: Secure Redis: -* In `server/workers` copy `example_redis_config.json` to `redis_config.json` and `example_redis.conf` to `redis.conf` and in both files replace "long_secure_password" with a long, secure password (Line 507 in redis.conf, parameter `requirepass`). +* In `server/workers` copy `example_redis.conf` to `redis.conf` and replace "long_secure_password" with a long, secure password (Line 507 in redis.conf, parameter `requirepass`). +Secure Postgres: +* In `server/workers` duplicate `example_pg_hba.conf` to `pg_hba.conf` and review the settings. The default values should be ok for a default deployment (host connections are only allowed for user "headstart" with an md5-hashed password), but you may want to change access rights. + +Overall deployment environment variables: PostgreSQL service: -* In root folder create `.env` from the `example.env` and fill in the environment variables with the correct login data. -* Manual database creation: +* In `server/workers/flavorconfigs` folder create a new `flavorname.env` from the `example.env` and fill in the environment variables with the correct login data. + * This includes Postgresql and redis settings + + +* Manual database creation for Postgres: Enter container: `docker exec -it VARYINGNAME_pgsql_1 psql -U headstart` @@ -95,66 +91,60 @@ Execute command: `CREATE DATABASE databasename;` * In `preprocessing/conf/config_local.ini` change "databasename" to the dev/production database name for the specific integration. This should be in line with the database names provided in `settings.py` -Secure Postgres: -* In `server/workers` duplicate `example_pg_hba.conf` to `pg_hba.conf` and review the settings. The default values should be ok for a default deployment (host connections are only allowed for user "headstart" with an md5-hashed password), but you may want to change access rights. -### Starting the backend services with docker-compose +### Adding a new versioned "flavor" of the backend -Following commands have to be executed from the root folder of the repository, where `docker-compose.yml` is located. -**Build images** +1. Make changes to code in `server/workers` (any API /integration, …) +1. Commit changes +1. Checkout commit (make note of commit hash) +1. Run `server/workers/build_docker_images.sh` +1. Create new {flavor}.env in `server/workers/flavorconfigs/` using `example.env` as template. Set the “COMPOSE_PROJECT_NAME={flavor}” and the SERVICE_VERSION={commit hash} to the values from step 3. +1. Run `docker-compose up --env-file server/workers/flavorconfigs/flavor.env -d` to start the services +1. Add new entry to `server/workers/proxy/templates/default.conf.templates` +1. Add flavored networks to `server/workers/proxy/docker-compose.yml` so that the Nginx-proxy knows where to find the specific versioned services +1. Down and up the proxy service from `server/workers/proxy` working directory +1. Test by e.g. `curl -vvvv localhost/api/{flavor}/triple/service_version` -* on Linux: -``` -docker-compose build -``` -* on Windows: -``` -docker-compose -f docker-compose_win.yml build -``` +### Starting a specific versioned "flavor" of the backend services with docker-compose + +Following commands have to be executed from the root folder of the repository, where `docker-compose.yml` is located. **Start services and send them to the docker daemon** -* on Linux: ``` -docker-compose up -d +docker-compose up --env-file server/workers/flavorconfigs/flavor.env -d ``` -* on Windows: -``` -docker-compose -f docker-compose_win.yml up -d -``` -**All in one:** +**Shutting service down** -* on Linux: ``` -docker-compose up -d --build +docker-compose down --env-file server/workers/flavorconfigs/flavor.env ``` -* shut service down -* on Linux: -``` -docker-compose down -``` - -* on Windows: -``` -docker-compose -f docker-compose_win.yml down -``` +### Adding a new service to the backend -### Deploying the example: +1. Add service configuration in docker-compose.yml + 1. Add required environment variables that need to be passed from .env to container in docker-compose.yml +1. Add service related changes in build-docker-images.sh + 1. Add service to build list +1. Add service source code and Dockerfile in a new folder in `server/workers` +1. Add new env variables to .env files -Use a deployment script, or manually deploy an example (currently only TRIPLE is integrated in this way) as described in [HOWTO: search repos](../../doc/howto_search_repos.md): -Additionally, the `config_local.ini` now requires an additional parameter under `[general]`: +### Integrating with clients +In `server/preprocessing/conf/config_local.ini` change the following configs: ``` # URL to OKMaps API -api_url = "" - -``` - -where `api_url` is the full URL to the API endpoint. +api_url = "http://127.0.0.1/api/" +# flavor of API, default: "stable" +api_flavor = "stable" +# The persistence backend to use - either api or legacy +persistence_backend = "api" +# The processing backend to use - either api or legacy +processing_backend = "api" +``` \ No newline at end of file From ea7fafa6f9d738b4be5887bfb6b711a473c9c34c Mon Sep 17 00:00:00 2001 From: chreman Date: Fri, 28 May 2021 14:23:12 +0200 Subject: [PATCH 30/54] introduce lightweight API client --- .../classes/headstart/library/APIClient.php | 45 +++++++++++++++++++ server/services/GSheetUpdateAvailable.php | 10 ++--- server/services/createNewGSheet.php | 6 ++- server/services/getContext.php | 6 +-- server/services/getGSheetsMap.php | 9 ++-- server/services/getLastVersion.php | 9 ++-- server/services/getLatestRevision.php | 24 +++++----- server/services/search.php | 31 ++++--------- 8 files changed, 82 insertions(+), 58 deletions(-) create mode 100644 server/classes/headstart/library/APIClient.php diff --git a/server/classes/headstart/library/APIClient.php b/server/classes/headstart/library/APIClient.php new file mode 100644 index 000000000..d494947a9 --- /dev/null +++ b/server/classes/headstart/library/APIClient.php @@ -0,0 +1,45 @@ +load_configs(); + } + + public function load_configs() { + $INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/"; + $this->ini_array = Toolkit::loadIni($INI_DIR); + $this->settings = $this->ini_array["general"]; + $this->processing_backend = isset($this->ini_array["general"]["processing_backend"]) + ? ($this->ini_array["general"]["processing_backend"]) + : "legacy"; + $this->persistence_backend = isset($this->ini_array["general"]["persistence_backend"]) + ? ($this->ini_array["general"]["persistence_backend"]) + : "legacy"; + $this->database = $this->ini_array["connection"]["database"]; + $this->WORKING_DIR = $this->ini_array["general"]["preprocessing_dir"] . $this->ini_array["output"]["output_dir"]; + $api_url = $ini_array["general"]["api_url"]; + $api_flavor = isset($ini_array["general"]["api_flavor"]) + ? ($ini_array["general"]["api_flavor"]) + : "stable"; + $this->base_route = $api_url . $api_flavor . "/"; + } + + public function call_api($endpoint, $payload) { + $route = $base_route . $endpoint . "/" . $this->database; + $res = library\CommUtils::call_api($route, $payload); + return $res; + } + + public function call_persistence($endpoint, $payload) { + $route = $base_route . "persistence/" . $endpoint . "/" . $this->database; + $res = library\CommUtils::call_api($route, $payload); + return $res; + } + +} \ No newline at end of file diff --git a/server/services/GSheetUpdateAvailable.php b/server/services/GSheetUpdateAvailable.php index dd0722c1b..c934c52fa 100644 --- a/server/services/GSheetUpdateAvailable.php +++ b/server/services/GSheetUpdateAvailable.php @@ -5,23 +5,21 @@ require_once dirname(__FILE__) . '/../classes/headstart/library/CommUtils.php'; require_once dirname(__FILE__) . '/../classes/headstart/library/toolkit.php'; require dirname(__FILE__) . '/../classes/headstart/persistence/SQLitePersistence.php'; +require_once dirname(__FILE__) . '/../classes/headstart/library/APIClient.php'; use headstart\library; $INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/"; - $ini_array = library\Toolkit::loadIni($INI_DIR); +$apiclient = new \library\APIClient(); +$persistence = new headstart\persistence\SQLitePersistence($ini_array["connection"]["sqlite_db"]); $vis_id = library\CommUtils::getParameter($_GET, "vis_id"); $gsheet_last_updated = library\CommUtils::getParameter($_GET, "gsheet_last_updated"); -$database = $ini_array["connection"]["database"]; - -$persistence = new headstart\persistence\SQLitePersistence($ini_array["connection"]["sqlite_db"]); $persistence_backend = $ini_array["general"]["persistence_backend"]; if ($persistence_backend == "api") { - $route = $ini_array["general"]["api_url"] . "persistence/" . "getLastVersion/" . $database; $payload = json_encode(array("vis_id" => $vis_id, "details" => false, "context" => true)); - $res = library\CommUtils::call_api($route, $payload); + $res = $apiclient->call_persistence("getLastVersion", $payload); if ($res["httpcode"] != 200) { library\CommUtils::echoOrCallback($res, $_GET); } else { diff --git a/server/services/createNewGSheet.php b/server/services/createNewGSheet.php index 69bf5ff7a..9cad7f731 100644 --- a/server/services/createNewGSheet.php +++ b/server/services/createNewGSheet.php @@ -3,8 +3,11 @@ header('Content-type: application/json'); require_once dirname(__FILE__) . '/../classes/headstart/library/CommUtils.php'; +require_once dirname(__FILE__) . '/../classes/headstart/library/APIClient.php'; use headstart\library; +$apiclient = new \library\APIClient(); + if(php_sapi_name() == 'cli') { // Called from command-line, maybe cron # parse options @@ -24,12 +27,11 @@ echo "Call not accepted."; } -$route = $ini_array["general"]["api_url"] . "/gsheets" . "/createKnowledgebase"; $payload = json_encode(array("sheet_name" => $sheet_name, "project_name" => $project_name, "main_curator_email" => $main_curator_email, "knowledge_base_template_id" => $knowledge_base_template_id)); -$res = library\CommUtils::call_api($route, $payload); +$res = $apiclient->call_api("/gsheets" . "/createKnowledgebase", $payload); if ($res["httpcode"] != 200) { echo json_encode($res); } else { diff --git a/server/services/getContext.php b/server/services/getContext.php index f9bec52a8..ea373177b 100644 --- a/server/services/getContext.php +++ b/server/services/getContext.php @@ -5,6 +5,7 @@ require dirname(__FILE__) . '/../classes/headstart/persistence/SQLitePersistence.php'; require_once dirname(__FILE__) . '/../classes/headstart/library/CommUtils.php'; require_once dirname(__FILE__) . '/../classes/headstart/library/toolkit.php'; +require_once dirname(__FILE__) . '/../classes/headstart/library/APIClient.php'; use headstart\library; @@ -15,14 +16,13 @@ $vis_id = library\CommUtils::getParameter($_GET, "vis_id"); $revision_context = isset($_GET["revision_context"]) ? library\CommUtils::getParameter($_GET, "revision_context") : false; +$apiclient = new \library\APIClient(); $persistence = new headstart\persistence\SQLitePersistence($ini_array["connection"]["sqlite_db"]); -$database = $ini_array["connection"]["database"]; $persistence_backend = $ini_array["general"]["persistence_backend"]; if ($persistence_backend === "api") { - $route = $ini_array["general"]["api_url"] . "persistence/" . "getContext/" . $database; $payload = json_encode(array("vis_id" => $vis_id, "revision_context" => $revision_context)); - $res = library\CommUtils::call_api($route, $payload); + $res = $apiclient->call_persistence("getContext", $payload); if ($res["httpcode"] != 200) { library\CommUtils::echoOrCallback($res, $_GET); } else { diff --git a/server/services/getGSheetsMap.php b/server/services/getGSheetsMap.php index 9061f00a5..9b0928d40 100644 --- a/server/services/getGSheetsMap.php +++ b/server/services/getGSheetsMap.php @@ -4,6 +4,7 @@ require_once dirname(__FILE__) . '/../classes/headstart/library/CommUtils.php'; require_once dirname(__FILE__) . '/../classes/headstart/library/toolkit.php'; +require_once dirname(__FILE__) . '/../classes/headstart/library/APIClient.php'; require 'search.php'; use headstart\library; @@ -11,17 +12,15 @@ $INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/"; $ini_array = library\Toolkit::loadIni($INI_DIR); +$apiclient = new \library\APIClient(); +$persistence = new headstart\persistence\SQLitePersistence($ini_array["connection"]["sqlite_db"]); $vis_id = library\CommUtils::getParameter($_GET, "vis_id"); - -$persistence = new headstart\persistence\SQLitePersistence($ini_array["connection"]["sqlite_db"]); $persistence_backend = $ini_array["general"]["persistence_backend"]; -$database = $ini_array["connection"]["database"]; if ($persistence_backend === "api") { - $route = $ini_array["general"]["api_url"] . "persistence/" . "getLastVersion/" . $database; $payload = json_encode(array("vis_id" => $vis_id, "details" => false, "context" => true)); - $res = library\CommUtils::call_api($route, $payload); + $res = $apiclient->call_persistence("getLastVersion", $payload); if ($res["httpcode"] != 200) { library\CommUtils::echoOrCallback($res, $_GET); } else { diff --git a/server/services/getLastVersion.php b/server/services/getLastVersion.php index e2744bf61..ada276216 100644 --- a/server/services/getLastVersion.php +++ b/server/services/getLastVersion.php @@ -6,25 +6,24 @@ require dirname(__FILE__) . '/../classes/headstart/persistence/SQLitePersistence.php'; require_once dirname(__FILE__) . '/../classes/headstart/library/CommUtils.php'; require_once dirname(__FILE__) . '/../classes/headstart/library/toolkit.php'; +require_once dirname(__FILE__) . '/../classes/headstart/library/APIClient.php'; use headstart\library; $INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/"; $ini_array = library\Toolkit::loadIni($INI_DIR); +$apiclient = new \library\APIClient(); +$persistence = new headstart\persistence\SQLitePersistence($ini_array["connection"]["sqlite_db"]); $vis_id = library\CommUtils::getParameter($_GET, "vis_id"); -$database = $ini_array["connection"]["database"]; - -$persistence = new headstart\persistence\SQLitePersistence($ini_array["connection"]["sqlite_db"]); $persistence_backend = $ini_array["general"]["persistence_backend"]; if ($persistence_backend === "api") { - $route = $ini_array["general"]["api_url"] . "persistence/" . "getLastVersion/" . $database; $payload = json_encode(array("vis_id" => $vis_id, "details" => false, "context" => false)); - $res = library\CommUtils::call_api($route, $payload); + $res = $apiclient->call_persistence("getLastVersion", $payload); if ($res["httpcode"] != 200) { echo json_encode($res); } else { diff --git a/server/services/getLatestRevision.php b/server/services/getLatestRevision.php index d5ca3bd5e..9e6468834 100644 --- a/server/services/getLatestRevision.php +++ b/server/services/getLatestRevision.php @@ -6,23 +6,23 @@ require dirname(__FILE__) . '/../classes/headstart/persistence/SQLitePersistence.php'; require_once dirname(__FILE__) . '/../classes/headstart/library/CommUtils.php'; require_once dirname(__FILE__) . '/../classes/headstart/library/toolkit.php'; +require_once dirname(__FILE__) . '/../classes/headstart/library/APIClient.php'; use headstart\library; $INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/"; - $ini_array = library\Toolkit::loadIni($INI_DIR); +$apiclient = new \library\APIClient(); +$persistence = new headstart\persistence\SQLitePersistence($ini_array["connection"]["sqlite_db"]); + +$persistence_backend = $ini_array["general"]["persistence_backend"]; +$processing_backend = $ini_array["general"]["processing_backend"]; $vis_id = library\CommUtils::getParameter($_GET, "vis_id"); $context = filter_input(INPUT_GET, "context", FILTER_VALIDATE_BOOLEAN, array("flags" => FILTER_NULL_ON_FAILURE)); $streamgraph = filter_input(INPUT_GET, "streamgraph", FILTER_VALIDATE_BOOLEAN, array("flags" => FILTER_NULL_ON_FAILURE)); -$database = $ini_array["connection"]["database"]; - -$persistence = new headstart\persistence\SQLitePersistence($ini_array["connection"]["sqlite_db"]); -$persistence_backend = $ini_array["general"]["persistence_backend"]; -$processing_backend = $ini_array["general"]["processing_backend"]; if ($processing_backend == "api") { # case of streamgraph calculation in backend @@ -30,9 +30,8 @@ # context data true start if ($persistence_backend === "api") { # get data + context from api - $route = $ini_array["general"]["api_url"] . "persistence/" . "getLastVersion/" . $database; $payload = json_encode(array("vis_id" => $vis_id, "details" => false, "context" => true)); - $res = library\CommUtils::call_api($route, $payload); + $res = $apiclient->call_persistence("getLastVersion", $payload); if ($res["httpcode"] != 200) { library\CommUtils::echoOrCallback($res, $_GET); } else { @@ -62,9 +61,8 @@ } else { if ($persistence_backend === "api") { # return data without context from api - $route = $ini_array["general"]["api_url"] . "persistence/" . "getLastVersion/" . $database; $payload = json_encode(array("vis_id" => $vis_id, "details" => false, "context" => false)); - $res = library\CommUtils::call_api($route, $payload); + $res = $apiclient->call_persistence("getLastVersion", $payload); if ($res["httpcode"] != 200) { library\CommUtils::echoOrCallback($res, $_GET); } else { @@ -84,9 +82,8 @@ if ($context === true) { if ($persistence_backend === "api") { # get data + context from api - $route = $ini_array["general"]["api_url"] . "persistence/" . "getLastVersion/" . $database; $payload = json_encode(array("vis_id" => $vis_id, "details" => false, "context" => true)); - $res = library\CommUtils::call_api($route, $payload); + $res = $apiclient->call_persistence("getLastVersion", $payload); if ($res["httpcode"] != 200) { library\CommUtils::echoOrCallback($res, $_GET); } else { @@ -117,9 +114,8 @@ } else { if ($persistence_backend === "api") { # get data without context from api - $route = $ini_array["general"]["api_url"] . "persistence/" . "getLastVersion/" . $database; $payload = json_encode(array("vis_id" => $vis_id, "details" => false, "context" => false)); - $res = library\CommUtils::call_api($route, $payload); + $res = $apiclient->call_persistence("getLastVersion", $payload); if ($res["httpcode"] != 200) { library\CommUtils::echoOrCallback($res, $_GET); } else { diff --git a/server/services/search.php b/server/services/search.php index 816dd60f5..09e2d3f58 100644 --- a/server/services/search.php +++ b/server/services/search.php @@ -4,6 +4,7 @@ require dirname(__FILE__) . '/../classes/headstart/persistence/SQLitePersistence.php'; require_once dirname(__FILE__) . '/../classes/headstart/preprocessing/Snapshot.php'; require_once dirname(__FILE__) . '/../classes/headstart/library/CommUtils.php'; +require_once dirname(__FILE__) . '/../classes/headstart/library/APIClient.php'; require_once dirname(__FILE__) . '/../classes/headstart/library/toolkit.php'; require 'helper.php'; @@ -58,6 +59,7 @@ function search($service_integration, $dirty_query , $precomputed_id = null, $do_clean_query = true) { $INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/"; $ini_array = library\Toolkit::loadIni($INI_DIR); + $apiclient = new \library\APIClient(); $repo2snapshot = array("plos" => "PLOS" , "pubmed" => "PubMed" , "doaj" => "DOAJ" @@ -72,17 +74,6 @@ function search($service_integration, $dirty_query $service2endpoint = array("triple_km" => "triple", "triple_sg" => "triple"); - $processing_backend = isset($ini_array["general"]["processing_backend"]) - ? ($ini_array["general"]["processing_backend"]) - : "legacy"; - $persistence_backend = isset($ini_array["general"]["persistence_backend"]) - ? ($ini_array["general"]["persistence_backend"]) - : "legacy"; - $api_url = $ini_array["general"]["api_url"]; - $api_flavor = isset($ini_array["general"]["api_flavor"]) - ? ($ini_array["general"]["api_flavor"]) - : "stable"; - $query = ($do_clean_query === true) ?(cleanQuery($dirty_query, $transform_query_tolowercase)) :($dirty_query); @@ -98,10 +89,9 @@ function search($service_integration, $dirty_query $params_for_id_creation = ($params_for_id === null)?($params_json):(packParamsJSON($params_for_id, $post_params)); if ($persistence_backend === "api") { - $route = $api_url . $api_flavor . "/" . "persistence/" . "createID"; $payload = json_encode(array("params" => $post_params, "param_types" => $param_types)); - $res = library\CommUtils::call_api($route, $payload); + $res = $apiclient->call_persistence("createID", $payload); if ($res["httpcode"] != 200) { echo json_encode($res); } else { @@ -117,11 +107,10 @@ function search($service_integration, $dirty_query if($retrieve_cached_map) { if ($persistence_backend === "api") { - $route = $api_url . $api_flavor . "/" . "persistence/" . "getLastVersion/" . $database; $payload = json_encode(array("vis_id" => $unique_id, "details" => false, "context" => false)); - $res = library\CommUtils::call_api($route, $payload); + $res = $apiclient->call_persistence("getLastVersion", $payload); if ($res["httpcode"] != 200) { echo json_encode($res); } else { @@ -137,9 +126,8 @@ function search($service_integration, $dirty_query } if ($processing_backend === "api") { - $route = $api_url . $api_flavor . "/" . $endpoint . "/search"; $payload = json_encode($post_params); - $res = library\CommUtils::call_api($route, $payload); + $res = $apiclient->call_api($endpoint . "/search", $payload); if ($res["httpcode"] != 200) { return $res; } else { @@ -179,9 +167,8 @@ function search($service_integration, $dirty_query $vis_title = $service_integration; if ($persistence_backend === "api") { - $route = $api_url . $api_flavor . "/" . "persistence/" . "existsVisualization/" . $database; $payload = json_encode(array("vis_id" => $unique_id)); - $res = library\CommUtils::call_api($route, $payload); + $res = $apiclient->call_persistence("existsVisualization", $payload); if ($res["httpcode"] != 200) { return $res; } else { @@ -194,14 +181,13 @@ function search($service_integration, $dirty_query if (!$exists) { if ($persistence_backend === "api") { - $route = $api_url . $api_flavor . "/" . "persistence/" . "createVisualization/" . $database; $payload = json_encode(array("vis_id" => $unique_id, "vis_title" => $vis_title, "data" => $input_json, "vis_clean_query" => $query, "vis_query" => $dirty_query, "vis_params" => $params_json)); - $res = library\CommUtils::call_api($route, $payload); + $res = $apiclient->call_persistence("createVisualization", $payload); if ($res["httpcode"] != 200) { return $res; } @@ -210,10 +196,9 @@ function search($service_integration, $dirty_query } } else { if ($persistence_backend === "api") { - $route = $api_url . $api_flavor . "/" . "persistence/" . "writeRevision/" . $database; $payload = json_encode(array("vis_id" => $unique_id, "data" => $input_json)); - $res = library\CommUtils::call_api($route, $payload); + $res = $apiclient->call_persistence("writeRevision", $payload); if ($res["httpcode"] != 200) { return $res; } From 2e39a4f0b35a88921b31ce09af9f04d222e2bccd Mon Sep 17 00:00:00 2001 From: chreman Date: Mon, 31 May 2021 02:37:28 +0200 Subject: [PATCH 31/54] cleanup --- server/workers/persistence/src/migrate.py | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 server/workers/persistence/src/migrate.py diff --git a/server/workers/persistence/src/migrate.py b/server/workers/persistence/src/migrate.py deleted file mode 100644 index de87134ef..000000000 --- a/server/workers/persistence/src/migrate.py +++ /dev/null @@ -1,16 +0,0 @@ -from sqlalchemy import create_engine, select -from models import Visualizations, Revisions -from config import settings - -engine_source = create_engine('sqlite:////home/chris/data/OKMaps/TRIPLE/triple.sqlite') -# engine_target = create_engine('postgresql+psycopg2://headstart:testpassword@172.18.0.2:5432/dev') -engine_target = create_engine('postgresql+psycopg2://%(user)s:%(pw)s@%(host)s:%(port)s/%(db)s' % settings.TRIPLE) - -with engine_source.connect() as conn_source: - with engine_target.connect() as conn_target: - for table in Visualizations.metadata.sorted_tables: - for row in conn_source.execute(select(table.c)): - try: - conn_target.execute(table.insert().values(dict(row))) - except Exception as e: - print(e) From dfbace0abbe8257675c26f9432523d7eac3a9943 Mon Sep 17 00:00:00 2001 From: chreman Date: Mon, 31 May 2021 02:43:01 +0200 Subject: [PATCH 32/54] cleanup --- server/workers/README.md | 4 ++-- server/workers/api/src/app.py | 2 +- server/workers/api/src/config/__init__.py | 0 server/workers/api/src/config/swagger.json | 1 - server/workers/api/src/{config => }/example_settings.py | 0 server/workers/persistence/src/config/__init__.py | 0 .../workers/persistence/src/{config => }/example_settings.py | 0 7 files changed, 3 insertions(+), 4 deletions(-) delete mode 100644 server/workers/api/src/config/__init__.py delete mode 100644 server/workers/api/src/config/swagger.json rename server/workers/api/src/{config => }/example_settings.py (100%) delete mode 100644 server/workers/persistence/src/config/__init__.py rename server/workers/persistence/src/{config => }/example_settings.py (100%) diff --git a/server/workers/README.md b/server/workers/README.md index 46f58d182..20ada13b3 100644 --- a/server/workers/README.md +++ b/server/workers/README.md @@ -114,14 +114,14 @@ Following commands have to be executed from the root folder of the repository, w **Start services and send them to the docker daemon** ``` -docker-compose up --env-file server/workers/flavorconfigs/flavor.env -d +docker-compose --env-file server/workers/flavorconfigs/flavor.env up -d ``` **Shutting service down** ``` -docker-compose down --env-file server/workers/flavorconfigs/flavor.env +docker-compose --env-file server/workers/flavorconfigs/flavor.env down ``` diff --git a/server/workers/api/src/app.py b/server/workers/api/src/app.py index a64197b4a..5a398900a 100644 --- a/server/workers/api/src/app.py +++ b/server/workers/api/src/app.py @@ -11,7 +11,7 @@ from apis.pubmed import pubmed_ns from apis.openaire import openaire_ns -from config import settings +import settings from utils.monkeypatches import ReverseProxied, __schema__, specs_url, _register_apidoc, inject_flasgger import logging diff --git a/server/workers/api/src/config/__init__.py b/server/workers/api/src/config/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/server/workers/api/src/config/swagger.json b/server/workers/api/src/config/swagger.json deleted file mode 100644 index 034adf72a..000000000 --- a/server/workers/api/src/config/swagger.json +++ /dev/null @@ -1 +0,0 @@ -{"swagger": "2.0", "basePath": "/api", "paths": {"/triple/mappings": {"get": {"responses": {"200": {"description": "OK"}, "400": {"description": "Invalid search parameters"}}, "operationId": "get_mappings", "parameters": [{"description": "Specify the ElasticSearch index to get the mapping of, currently either 'isidore-sources-triple' or 'isidore-documents-triple'", "name": "index", "type": "string", "in": "query"}], "tags": ["triple"]}}, "/triple/search": {"post": {"responses": {"200": {"description": "OK"}, "400": {"description": "Invalid search parameters"}}, "operationId": "post_search", "parameters": [{"name": "payload", "required": true, "in": "body", "schema": {"$ref": "#/definitions/SearchQuery"}}], "produces": ["application/json", "text/csv"], "tags": ["triple"]}}}, "info": {"title": "Head Start API", "version": "0.1", "description": "Head Start API demo"}, "produces": ["application/json"], "consumes": ["application/json"], "tags": [{"name": "triple", "description": "TRIPLE API operations"}], "definitions": {"SearchQuery": {"required": ["from", "q", "sorting", "to", "vis_type"], "properties": {"q": {"type": "string", "description": "query string", "example": "feminicide"}, "sorting": {"type": "string", "description": "most-relevant or most-recent", "example": "most-recent"}, "from": {"type": "string", "description": "yyyy-MM-dd", "example": "2019-01-01"}, "to": {"type": "string", "description": "yyyy-MM-dd", "example": "2019-12-31"}, "vis_type": {"type": "string", "description": "overview or timeline", "example": "overview"}, "raw": {"type": "boolean", "description": "raw results from ElasticSearch", "example": "false"}}, "type": "object"}}, "responses": {"ParseError": {"description": "When a mask can't be parsed"}, "MaskError": {"description": "When any error occurs on mask"}}} diff --git a/server/workers/api/src/config/example_settings.py b/server/workers/api/src/example_settings.py similarity index 100% rename from server/workers/api/src/config/example_settings.py rename to server/workers/api/src/example_settings.py diff --git a/server/workers/persistence/src/config/__init__.py b/server/workers/persistence/src/config/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/server/workers/persistence/src/config/example_settings.py b/server/workers/persistence/src/example_settings.py similarity index 100% rename from server/workers/persistence/src/config/example_settings.py rename to server/workers/persistence/src/example_settings.py From 7d173ae7bede1654551fdcd392eb071c71497262 Mon Sep 17 00:00:00 2001 From: chreman Date: Mon, 31 May 2021 02:48:48 +0200 Subject: [PATCH 33/54] bugfix --- server/workers/api/src/app.py | 2 +- server/workers/persistence/src/app.py | 2 +- server/workers/persistence/src/database.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/server/workers/api/src/app.py b/server/workers/api/src/app.py index 5a398900a..5dded8f2c 100644 --- a/server/workers/api/src/app.py +++ b/server/workers/api/src/app.py @@ -34,7 +34,7 @@ def api_patches(app, settings): app = Flask('v1', instance_relative_config=True) -app.config.from_object('config.settings') +app.config.from_object('settings') handler = logging.StreamHandler(sys.stdout) handler.setLevel(app.logger.level) app = inject_flasgger(app) diff --git a/server/workers/persistence/src/app.py b/server/workers/persistence/src/app.py index 5a92babfa..40b9a0327 100644 --- a/server/workers/persistence/src/app.py +++ b/server/workers/persistence/src/app.py @@ -57,7 +57,7 @@ def api_patches(app, settings): app = Flask('v1', instance_relative_config=True) -app.config.from_object('config.settings') +app.config.from_object('settings') handler = logging.StreamHandler(sys.stdout) handler.setLevel(app.logger.level) app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1, x_port=1, x_for=1, x_host=1, x_prefix=1) diff --git a/server/workers/persistence/src/database.py b/server/workers/persistence/src/database.py index 0ffaf58b1..68681344a 100644 --- a/server/workers/persistence/src/database.py +++ b/server/workers/persistence/src/database.py @@ -2,7 +2,7 @@ from sqlalchemy.orm import sessionmaker from sqlalchemy import create_engine from sqlalchemy.ext.declarative import declarative_base -from config import settings +import settings bind_params = { From 5f4fdaedf77c33aa5f56244da448cd0b9720f000 Mon Sep 17 00:00:00 2001 From: chreman Date: Mon, 31 May 2021 02:50:46 +0200 Subject: [PATCH 34/54] cleanup --- server/workers/api/src/app.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/workers/api/src/app.py b/server/workers/api/src/app.py index 5dded8f2c..175cec747 100644 --- a/server/workers/api/src/app.py +++ b/server/workers/api/src/app.py @@ -12,7 +12,7 @@ from apis.openaire import openaire_ns import settings -from utils.monkeypatches import ReverseProxied, __schema__, specs_url, _register_apidoc, inject_flasgger +from utils.monkeypatches import ReverseProxied, __schema__, specs_url, _register_apidoc import logging @@ -37,7 +37,6 @@ def api_patches(app, settings): app.config.from_object('settings') handler = logging.StreamHandler(sys.stdout) handler.setLevel(app.logger.level) -app = inject_flasgger(app) app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1, x_port=1, x_for=1, x_host=1, x_prefix=1) app.wsgi_app = ReverseProxied(app.wsgi_app) CORS(app, expose_headers=["Content-Disposition", "Access-Control-Allow-Origin"]) From d83e4d746de5881fcca3abe55a5d5a4c99135cf4 Mon Sep 17 00:00:00 2001 From: chreman Date: Mon, 31 May 2021 02:52:22 +0200 Subject: [PATCH 35/54] bugfix --- server/workers/persistence/src/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/workers/persistence/src/app.py b/server/workers/persistence/src/app.py index 40b9a0327..4a5bd7b9b 100644 --- a/server/workers/persistence/src/app.py +++ b/server/workers/persistence/src/app.py @@ -7,7 +7,7 @@ from apis.persistence import persistence_ns -from config import settings +import settings import logging class ReverseProxied(object): From 6300d4a4158031f89262942367d87884b044740d Mon Sep 17 00:00:00 2001 From: chreman Date: Mon, 31 May 2021 02:55:54 +0200 Subject: [PATCH 36/54] cleanup --- server/workers/persistence/src/app.py | 1 - 1 file changed, 1 deletion(-) diff --git a/server/workers/persistence/src/app.py b/server/workers/persistence/src/app.py index 4a5bd7b9b..1968d1d10 100644 --- a/server/workers/persistence/src/app.py +++ b/server/workers/persistence/src/app.py @@ -6,7 +6,6 @@ from werkzeug.middleware.proxy_fix import ProxyFix from apis.persistence import persistence_ns - import settings import logging From 1f3d1dde632bf49a4cd8565cc146f682bfcbfdac Mon Sep 17 00:00:00 2001 From: chreman Date: Mon, 31 May 2021 02:57:38 +0200 Subject: [PATCH 37/54] bugfix --- server/workers/persistence/src/apis/persistence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/workers/persistence/src/apis/persistence.py b/server/workers/persistence/src/apis/persistence.py index 9fb039f76..914fb52fa 100644 --- a/server/workers/persistence/src/apis/persistence.py +++ b/server/workers/persistence/src/apis/persistence.py @@ -8,7 +8,7 @@ from models import Revisions, Visualizations from database import sessions -from config import settings +import settings persistence_ns = Namespace("persistence", description="OKMAps persistence operations") From c607506909e29d0265a037ad39750fc8d7f136f6 Mon Sep 17 00:00:00 2001 From: chreman Date: Mon, 31 May 2021 13:03:23 +0200 Subject: [PATCH 38/54] bugfix --- .../classes/headstart/library/APIClient.php | 26 +++++++++---------- server/services/search.php | 8 +++++- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/server/classes/headstart/library/APIClient.php b/server/classes/headstart/library/APIClient.php index d494947a9..0a03bac65 100644 --- a/server/classes/headstart/library/APIClient.php +++ b/server/classes/headstart/library/APIClient.php @@ -1,19 +1,17 @@ load_configs(); + $this->load_configs($ini_array); } - public function load_configs() { - $INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/"; - $this->ini_array = Toolkit::loadIni($INI_DIR); + public function load_configs($ini_array) { + $this->ini_array = $ini_array; $this->settings = $this->ini_array["general"]; $this->processing_backend = isset($this->ini_array["general"]["processing_backend"]) ? ($this->ini_array["general"]["processing_backend"]) @@ -23,22 +21,22 @@ public function load_configs() { : "legacy"; $this->database = $this->ini_array["connection"]["database"]; $this->WORKING_DIR = $this->ini_array["general"]["preprocessing_dir"] . $this->ini_array["output"]["output_dir"]; - $api_url = $ini_array["general"]["api_url"]; - $api_flavor = isset($ini_array["general"]["api_flavor"]) - ? ($ini_array["general"]["api_flavor"]) + $api_url = $this->ini_array["general"]["api_url"]; + $api_flavor = isset($this->ini_array["general"]["api_flavor"]) + ? ($this->ini_array["general"]["api_flavor"]) : "stable"; $this->base_route = $api_url . $api_flavor . "/"; } public function call_api($endpoint, $payload) { - $route = $base_route . $endpoint . "/" . $this->database; - $res = library\CommUtils::call_api($route, $payload); + $route = $this->base_route . $endpoint . "/" . $this->database; + $res = CommUtils::call_api($route, $payload); return $res; } public function call_persistence($endpoint, $payload) { - $route = $base_route . "persistence/" . $endpoint . "/" . $this->database; - $res = library\CommUtils::call_api($route, $payload); + $route = $this->base_route . "persistence/" . $endpoint . "/" . $this->database; + $res = CommUtils::call_api($route, $payload); return $res; } diff --git a/server/services/search.php b/server/services/search.php index 09e2d3f58..8996028bf 100644 --- a/server/services/search.php +++ b/server/services/search.php @@ -59,7 +59,13 @@ function search($service_integration, $dirty_query , $precomputed_id = null, $do_clean_query = true) { $INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/"; $ini_array = library\Toolkit::loadIni($INI_DIR); - $apiclient = new \library\APIClient(); + $processing_backend = isset($ini_array["general"]["processing_backend"]) + ? ($ini_array["general"]["processing_backend"]) + : "legacy"; + $persistence_backend = isset($ini_array["general"]["persistence_backend"]) + ? ($ini_array["general"]["persistence_backend"]) + : "legacy"; + $apiclient = new \headstart\library\APIClient($service_integration, $ini_array); $repo2snapshot = array("plos" => "PLOS" , "pubmed" => "PubMed" , "doaj" => "DOAJ" From 063a97e97a6bb400f3b7b711f2d1f8eb1f1e63ca Mon Sep 17 00:00:00 2001 From: chreman Date: Mon, 31 May 2021 14:45:15 +0200 Subject: [PATCH 39/54] add persistence forwarder --- server/workers/api/src/app.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/workers/api/src/app.py b/server/workers/api/src/app.py index 175cec747..89edad27a 100644 --- a/server/workers/api/src/app.py +++ b/server/workers/api/src/app.py @@ -10,6 +10,7 @@ from apis.base import base_ns from apis.pubmed import pubmed_ns from apis.openaire import openaire_ns +from apis.persistence import persistence_ns import settings from utils.monkeypatches import ReverseProxied, __schema__, specs_url, _register_apidoc @@ -47,6 +48,7 @@ def api_patches(app, settings): api.add_namespace(base_ns, path='/base') api.add_namespace(pubmed_ns, path='/pubmed') api.add_namespace(openaire_ns, path='/openaire') +api.add_namespace(openaire_ns, path='/persistence') app.logger.debug(app.config) app.logger.debug(app.url_map) From 1c8c118aab475a557fa2be8f739c6a74a65e9e24 Mon Sep 17 00:00:00 2001 From: chreman Date: Mon, 31 May 2021 14:52:54 +0200 Subject: [PATCH 40/54] bugfix --- server/workers/api/src/apis/persistence.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/workers/api/src/apis/persistence.py b/server/workers/api/src/apis/persistence.py index 3128bf7ec..452b72b95 100644 --- a/server/workers/api/src/apis/persistence.py +++ b/server/workers/api/src/apis/persistence.py @@ -15,8 +15,8 @@ persistence_ns = Namespace("persistence", description="Persistence API operations redirect") -persistence_uri = "%s:%s" %(os.getenv("PERISTENCE_HOST"), - os.getenv("PERISTENCE_PORT")) +persistence_uri = "%s:%s" %(os.getenv("PERSISTENCE_HOST"), + os.getenv("PERSISTENCE_PORT")) @persistence_ns.route('/') def persistence(): From f66099d2b8f267c2f3201f2d2e92c17e6abc0c96 Mon Sep 17 00:00:00 2001 From: chreman Date: Sun, 6 Jun 2021 20:05:16 +0200 Subject: [PATCH 41/54] wip --- server/services/search.php | 1 - 1 file changed, 1 deletion(-) diff --git a/server/services/search.php b/server/services/search.php index 8996028bf..537be6edf 100644 --- a/server/services/search.php +++ b/server/services/search.php @@ -6,7 +6,6 @@ require_once dirname(__FILE__) . '/../classes/headstart/library/CommUtils.php'; require_once dirname(__FILE__) . '/../classes/headstart/library/APIClient.php'; require_once dirname(__FILE__) . '/../classes/headstart/library/toolkit.php'; - require 'helper.php'; use headstart\library; From dbb57949866b8341040c26972a76d30fc9e7911c Mon Sep 17 00:00:00 2001 From: chreman Date: Sun, 6 Jun 2021 20:06:48 +0200 Subject: [PATCH 42/54] fix routing --- server/workers/api/src/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/workers/api/src/app.py b/server/workers/api/src/app.py index 89edad27a..46cc61e13 100644 --- a/server/workers/api/src/app.py +++ b/server/workers/api/src/app.py @@ -48,7 +48,7 @@ def api_patches(app, settings): api.add_namespace(base_ns, path='/base') api.add_namespace(pubmed_ns, path='/pubmed') api.add_namespace(openaire_ns, path='/openaire') -api.add_namespace(openaire_ns, path='/persistence') +api.add_namespace(persistence_ns, path='/persistence') app.logger.debug(app.config) app.logger.debug(app.url_map) From 1fee61a611e3fc226e2b14eaf848988f23950bb0 Mon Sep 17 00:00:00 2001 From: chreman Date: Sun, 6 Jun 2021 20:14:11 +0200 Subject: [PATCH 43/54] fix persistence redirect --- server/workers/api/src/apis/persistence.py | 23 ---------------------- server/workers/api/src/app.py | 11 ++++++++--- 2 files changed, 8 insertions(+), 26 deletions(-) delete mode 100644 server/workers/api/src/apis/persistence.py diff --git a/server/workers/api/src/apis/persistence.py b/server/workers/api/src/apis/persistence.py deleted file mode 100644 index 452b72b95..000000000 --- a/server/workers/api/src/apis/persistence.py +++ /dev/null @@ -1,23 +0,0 @@ -import os -import json -import uuid -import time -import redis -import asyncio -import aioredis -import pandas as pd - -from flask import Blueprint, request, make_response, jsonify, abort, redirect -from flask_restx import Namespace, Resource, fields -from .request_validators import SearchParamSchema -from apis.utils import get_key, detect_error - - -persistence_ns = Namespace("persistence", description="Persistence API operations redirect") - -persistence_uri = "%s:%s" %(os.getenv("PERSISTENCE_HOST"), - os.getenv("PERSISTENCE_PORT")) - -@persistence_ns.route('/') -def persistence(): - return redirect(persistence_uri + "/api/persistence") \ No newline at end of file diff --git a/server/workers/api/src/app.py b/server/workers/api/src/app.py index 46cc61e13..8a440c267 100644 --- a/server/workers/api/src/app.py +++ b/server/workers/api/src/app.py @@ -1,6 +1,6 @@ import os import sys -from flask import Flask +from flask import Flask, redirect from flask_restx import Api from flask_cors import CORS from werkzeug.middleware.proxy_fix import ProxyFix @@ -10,7 +10,6 @@ from apis.base import base_ns from apis.pubmed import pubmed_ns from apis.openaire import openaire_ns -from apis.persistence import persistence_ns import settings from utils.monkeypatches import ReverseProxied, __schema__, specs_url, _register_apidoc @@ -48,7 +47,13 @@ def api_patches(app, settings): api.add_namespace(base_ns, path='/base') api.add_namespace(pubmed_ns, path='/pubmed') api.add_namespace(openaire_ns, path='/openaire') -api.add_namespace(persistence_ns, path='/persistence') + +persistence_uri = "%s:%s" %(os.getenv("PERSISTENCE_HOST"), + os.getenv("PERSISTENCE_PORT")) +@app.route('/persistence') +def persistence(): + return redirect(persistence_uri + "/api/persistence") + app.logger.debug(app.config) app.logger.debug(app.url_map) From a5429ec9cc2ee3811f1131e7ff756179f9473f83 Mon Sep 17 00:00:00 2001 From: chreman Date: Sun, 6 Jun 2021 20:15:25 +0200 Subject: [PATCH 44/54] fix persistence redirect --- server/workers/api/src/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/workers/api/src/app.py b/server/workers/api/src/app.py index 8a440c267..038c2a9ac 100644 --- a/server/workers/api/src/app.py +++ b/server/workers/api/src/app.py @@ -50,7 +50,7 @@ def api_patches(app, settings): persistence_uri = "%s:%s" %(os.getenv("PERSISTENCE_HOST"), os.getenv("PERSISTENCE_PORT")) -@app.route('/persistence') +@app.route('/api/persistence') def persistence(): return redirect(persistence_uri + "/api/persistence") From 5ab37b553f14f4b0f4a7c6cea4476a0354c72d3a Mon Sep 17 00:00:00 2001 From: chreman Date: Sun, 6 Jun 2021 21:56:49 +0200 Subject: [PATCH 45/54] fix redirect --- server/workers/api/src/app.py | 11 ++++++----- server/workers/persistence/src/app.py | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/server/workers/api/src/app.py b/server/workers/api/src/app.py index 038c2a9ac..2b1f421a7 100644 --- a/server/workers/api/src/app.py +++ b/server/workers/api/src/app.py @@ -1,6 +1,6 @@ import os import sys -from flask import Flask, redirect +from flask import Flask, redirect, url_for from flask_restx import Api from flask_cors import CORS from werkzeug.middleware.proxy_fix import ProxyFix @@ -48,11 +48,12 @@ def api_patches(app, settings): api.add_namespace(pubmed_ns, path='/pubmed') api.add_namespace(openaire_ns, path='/openaire') -persistence_uri = "%s:%s" %(os.getenv("PERSISTENCE_HOST"), +persistence_uri = "%s:%s/" %(os.getenv("PERSISTENCE_HOST"), os.getenv("PERSISTENCE_PORT")) -@app.route('/api/persistence') -def persistence(): - return redirect(persistence_uri + "/api/persistence") +app.logger.debug(persistence_uri) +@app.route('/api/persistence/') +def persistence_redirect(endpoint): + return redirect(persistence_uri + endpoint, 303) app.logger.debug(app.config) app.logger.debug(app.url_map) diff --git a/server/workers/persistence/src/app.py b/server/workers/persistence/src/app.py index 1968d1d10..f74efbf91 100644 --- a/server/workers/persistence/src/app.py +++ b/server/workers/persistence/src/app.py @@ -70,4 +70,4 @@ def api_patches(app, settings): if __name__ == '__main__': - app.run(host="127.0.0.1", port=5001, debug=True) + app.run(host="127.0.0.1", port=5101, debug=True) From 18eebe08d253f0723c4ec973103a6f2741cae8f4 Mon Sep 17 00:00:00 2001 From: chreman Date: Sun, 6 Jun 2021 22:18:59 +0200 Subject: [PATCH 46/54] fix persistence routing --- docker-compose.yml | 4 +--- server/workers/api/src/app.py | 7 ------- server/workers/persistence/src/app.py | 2 +- 3 files changed, 2 insertions(+), 11 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 5c9a215ac..327535784 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -48,8 +48,6 @@ services: REDIS_PORT: "${REDIS_PORT}" REDIS_PASSWORD: "${REDIS_PASSWORD}" REDIS_DB: "${REDIS_DB}" - PERSISTENCE_HOST: "${PERSISTENCE_HOST}" - PERSISTENCE_PORT: "${PERSISTENCE_PORT}" command: ["gunicorn", "--workers", "10", "--threads", "2", "-b", "0.0.0.0:${API_PORT}", "app:app", "--timeout", "300"] depends_on: - redis @@ -65,7 +63,7 @@ services: POSTGRES_PASSWORD: "${POSTGRES_PASSWORD}" POSTGRES_HOST: "${POSTGRES_HOST}" POSTGRES_PORT: "${POSTGRES_PORT}" - command: ["gunicorn", "--workers", "10", "--threads", "2", "-b", "0.0.0.0:${PERSISTENCE_PORT}", "app:app", "--timeout", "300"] + command: ["gunicorn", "--workers", "10", "--threads", "2", "-b", "0.0.0.0:${API_PORT}", "app:app", "--timeout", "300"] networks: - headstart diff --git a/server/workers/api/src/app.py b/server/workers/api/src/app.py index 2b1f421a7..300c8f430 100644 --- a/server/workers/api/src/app.py +++ b/server/workers/api/src/app.py @@ -48,13 +48,6 @@ def api_patches(app, settings): api.add_namespace(pubmed_ns, path='/pubmed') api.add_namespace(openaire_ns, path='/openaire') -persistence_uri = "%s:%s/" %(os.getenv("PERSISTENCE_HOST"), - os.getenv("PERSISTENCE_PORT")) -app.logger.debug(persistence_uri) -@app.route('/api/persistence/') -def persistence_redirect(endpoint): - return redirect(persistence_uri + endpoint, 303) - app.logger.debug(app.config) app.logger.debug(app.url_map) diff --git a/server/workers/persistence/src/app.py b/server/workers/persistence/src/app.py index f74efbf91..1968d1d10 100644 --- a/server/workers/persistence/src/app.py +++ b/server/workers/persistence/src/app.py @@ -70,4 +70,4 @@ def api_patches(app, settings): if __name__ == '__main__': - app.run(host="127.0.0.1", port=5101, debug=True) + app.run(host="127.0.0.1", port=5001, debug=True) From 8232790ea38d385193c0aaf35b1866fed5c7781f Mon Sep 17 00:00:00 2001 From: chreman Date: Sun, 6 Jun 2021 22:54:46 +0200 Subject: [PATCH 47/54] fix persistence routing --- server/workers/flavorconfigs/example.env | 4 +--- server/workers/proxy/docker-compose.yml | 10 +++------- server/workers/proxy/templates/default.conf.template | 10 +++++----- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/server/workers/flavorconfigs/example.env b/server/workers/flavorconfigs/example.env index b026e28e6..837336bd2 100644 --- a/server/workers/flavorconfigs/example.env +++ b/server/workers/flavorconfigs/example.env @@ -5,12 +5,10 @@ POSTGRES_DB=postgres POSTGRES_USER=headstart POSTGRES_PASSWORD=testpassword POSTGRES_HOST=dev_db_1 -POSTGRES_PORT=5433 +POSTGRES_PORT=5432 PGADMIN_DEFAULT_EMAIL=christopher.kittel@openknowledgemaps.org PGADMIN_DEFAULT_PASSWORD=testpassword API_PORT=5001 -PERSISTENCE_HOST=dev_persistence_1 -PERSISTENCE_PORT=5101 REDIS_HOST=dev_redis_1 REDIS_PORT=6379 REDIS_DB=0 diff --git a/server/workers/proxy/docker-compose.yml b/server/workers/proxy/docker-compose.yml index 15a61d378..6f1121642 100644 --- a/server/workers/proxy/docker-compose.yml +++ b/server/workers/proxy/docker-compose.yml @@ -11,13 +11,9 @@ services: ports: - '8080:80' networks: - - stable_headstart - - dev_headstart + - triple_staging_headstart networks: - stable_headstart: + triple_staging_headstart: external: true - name: stable_headstart - dev_headstart: - external: true - name: dev_headstart \ No newline at end of file + name: triple_staging_headstart \ No newline at end of file diff --git a/server/workers/proxy/templates/default.conf.template b/server/workers/proxy/templates/default.conf.template index 6490bde72..63986aa33 100644 --- a/server/workers/proxy/templates/default.conf.template +++ b/server/workers/proxy/templates/default.conf.template @@ -1,11 +1,11 @@ server { listen ${NGINX_PORT}; - location /stable/ { - proxy_pass http://stable_api_1:5001/api/; - } + location /triple_staging/ { + proxy_pass http://triple_staging_api_1:5001/api/; - location /dev/ { - proxy_pass http://dev_api_1:5001/api/; + location /triple_staging/persistence/ { + proxy_pass http://triple_staging_persistence_1:5001/api/persistence/; + } } } \ No newline at end of file From e22782cdd88dd7fc22246168501079b8289ad3fa Mon Sep 17 00:00:00 2001 From: chreman Date: Sun, 6 Jun 2021 23:13:43 +0200 Subject: [PATCH 48/54] fix postgres routing --- docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-compose.yml b/docker-compose.yml index 327535784..39fe13302 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,6 +4,7 @@ services: db: image: 'postgres:12.2-alpine' + hostname: "${POSTGRES_HOSTNAME}" restart: always environment: POSTGRES_USER: "${POSTGRES_USER}" From 50fec0f3402a6d7cb035e66093d03b6c2936e157 Mon Sep 17 00:00:00 2001 From: chreman Date: Sun, 6 Jun 2021 23:43:21 +0200 Subject: [PATCH 49/54] fix routing in APIClient --- server/classes/headstart/library/APIClient.php | 2 +- server/workers/README.md | 2 +- server/workers/flavorconfigs/example.env | 1 + server/workers/persistence/Dockerfile | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/server/classes/headstart/library/APIClient.php b/server/classes/headstart/library/APIClient.php index 0a03bac65..5eccefec0 100644 --- a/server/classes/headstart/library/APIClient.php +++ b/server/classes/headstart/library/APIClient.php @@ -29,7 +29,7 @@ public function load_configs($ini_array) { } public function call_api($endpoint, $payload) { - $route = $this->base_route . $endpoint . "/" . $this->database; + $route = $this->base_route . $endpoint; $res = CommUtils::call_api($route, $payload); return $res; } diff --git a/server/workers/README.md b/server/workers/README.md index 20ada13b3..bb2fbb067 100644 --- a/server/workers/README.md +++ b/server/workers/README.md @@ -85,7 +85,7 @@ PostgreSQL service: * Manual database creation for Postgres: -Enter container: `docker exec -it VARYINGNAME_pgsql_1 psql -U headstart` +Enter container: `docker exec -it VARYINGNAME_db_1 psql -U headstart` Execute command: `CREATE DATABASE databasename;` diff --git a/server/workers/flavorconfigs/example.env b/server/workers/flavorconfigs/example.env index 837336bd2..2c0f395ad 100644 --- a/server/workers/flavorconfigs/example.env +++ b/server/workers/flavorconfigs/example.env @@ -6,6 +6,7 @@ POSTGRES_USER=headstart POSTGRES_PASSWORD=testpassword POSTGRES_HOST=dev_db_1 POSTGRES_PORT=5432 +POSTGRES_HOSTNAME=headstart_pgsql_1 PGADMIN_DEFAULT_EMAIL=christopher.kittel@openknowledgemaps.org PGADMIN_DEFAULT_PASSWORD=testpassword API_PORT=5001 diff --git a/server/workers/persistence/Dockerfile b/server/workers/persistence/Dockerfile index 9d552ec68..9e8234d5d 100644 --- a/server/workers/persistence/Dockerfile +++ b/server/workers/persistence/Dockerfile @@ -12,4 +12,4 @@ RUN pip install --no-cache-dir -r requirements.txt RUN pip install git+https://github.com/python-restx/flask-restx COPY workers/persistence/src/ ./ - +RUN python manage.py \ No newline at end of file From eb5586bd76178ae1efbc22241ae43758d79594b5 Mon Sep 17 00:00:00 2001 From: chreman Date: Mon, 7 Jun 2021 22:07:31 +0200 Subject: [PATCH 50/54] fix routing --- server/classes/headstart/library/APIClient.php | 6 ++++++ server/services/search.php | 11 +++++------ server/services/searchTRIPLE.php | 3 +-- server/workers/persistence/src/apis/persistence.py | 4 ++-- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/server/classes/headstart/library/APIClient.php b/server/classes/headstart/library/APIClient.php index 5eccefec0..4cb33976b 100644 --- a/server/classes/headstart/library/APIClient.php +++ b/server/classes/headstart/library/APIClient.php @@ -31,12 +31,18 @@ public function load_configs($ini_array) { public function call_api($endpoint, $payload) { $route = $this->base_route . $endpoint; $res = CommUtils::call_api($route, $payload); + if ($res["httpcode"] != 200) { + $res["route"] = $route; + } return $res; } public function call_persistence($endpoint, $payload) { $route = $this->base_route . "persistence/" . $endpoint . "/" . $this->database; $res = CommUtils::call_api($route, $payload); + if ($res["httpcode"] != 200) { + $res["route"] = $route; + } return $res; } diff --git a/server/services/search.php b/server/services/search.php index 537be6edf..e3e97a733 100644 --- a/server/services/search.php +++ b/server/services/search.php @@ -53,8 +53,7 @@ function cleanQuery($dirty_query, $transform_query_tolowercase) { function search($service_integration, $dirty_query , $post_params, $param_types , $transform_query_tolowercase = true - , $retrieve_cached_map = true, $params_for_id = null, $num_labels = 3 - , $id = "area_uri", $subjects = "subject" + , $retrieve_cached_map = true, $params_for_id = null , $precomputed_id = null, $do_clean_query = true) { $INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/"; $ini_array = library\Toolkit::loadIni($INI_DIR); @@ -134,7 +133,7 @@ function search($service_integration, $dirty_query $payload = json_encode($post_params); $res = $apiclient->call_api($endpoint . "/search", $payload); if ($res["httpcode"] != 200) { - return $res; + return json_encode($res); } else { $output_json = $res["result"]; } @@ -175,7 +174,7 @@ function search($service_integration, $dirty_query $payload = json_encode(array("vis_id" => $unique_id)); $res = $apiclient->call_persistence("existsVisualization", $payload); if ($res["httpcode"] != 200) { - return $res; + return json_encode($res); } else { $result = json_decode($res["result"], true); $exists = $result["exists"]; @@ -194,7 +193,7 @@ function search($service_integration, $dirty_query "vis_params" => $params_json)); $res = $apiclient->call_persistence("createVisualization", $payload); if ($res["httpcode"] != 200) { - return $res; + return json_encode($res); } } else { $persistence->createVisualization($unique_id, $vis_title, $input_json, $query, $dirty_query, $params_json); @@ -205,7 +204,7 @@ function search($service_integration, $dirty_query "data" => $input_json)); $res = $apiclient->call_persistence("writeRevision", $payload); if ($res["httpcode"] != 200) { - return $res; + return json_encode($res); } } else { $persistence->writeRevision($unique_id, $input_json); diff --git a/server/services/searchTRIPLE.php b/server/services/searchTRIPLE.php index e229af78b..48a12b342 100644 --- a/server/services/searchTRIPLE.php +++ b/server/services/searchTRIPLE.php @@ -28,8 +28,7 @@ $result = search($service_integration, $dirty_query , $post_params, $param_types , true - , true, null, 3 - , "area_uri", "subject" + , true, null , $precomputed_id, true); echo $result diff --git a/server/workers/persistence/src/apis/persistence.py b/server/workers/persistence/src/apis/persistence.py index 914fb52fa..639c3f28b 100644 --- a/server/workers/persistence/src/apis/persistence.py +++ b/server/workers/persistence/src/apis/persistence.py @@ -294,11 +294,11 @@ def post(self, database): headers) -@persistence_ns.route('/createID') +@persistence_ns.route('/createID/') class createID(Resource): @persistence_ns.produces(["application/json"]) - def post(self): + def post(self, database): try: persistence_ns.logger.debug("createID") payload = request.get_json() From 465384da56957f62600794fe84f34e5de68fc501 Mon Sep 17 00:00:00 2001 From: chreman Date: Mon, 7 Jun 2021 22:20:10 +0200 Subject: [PATCH 51/54] fix APIclient initiation --- server/classes/headstart/library/APIClient.php | 2 +- server/services/GSheetUpdateAvailable.php | 2 +- server/services/createNewGSheet.php | 5 ++++- server/services/getGSheetsMap.php | 2 +- server/services/getLastVersion.php | 2 +- server/services/getLatestRevision.php | 2 +- server/services/search.php | 2 +- server/workers/README.md | 4 ++++ server/workers/persistence/Dockerfile | 4 +--- 9 files changed, 15 insertions(+), 10 deletions(-) diff --git a/server/classes/headstart/library/APIClient.php b/server/classes/headstart/library/APIClient.php index 4cb33976b..01fd6ddcd 100644 --- a/server/classes/headstart/library/APIClient.php +++ b/server/classes/headstart/library/APIClient.php @@ -5,7 +5,7 @@ class APIClient { - public function __construct($service_integration, $ini_array) { + public function __construct($ini_array) { $this->load_configs($ini_array); } diff --git a/server/services/GSheetUpdateAvailable.php b/server/services/GSheetUpdateAvailable.php index c934c52fa..bae8758f9 100644 --- a/server/services/GSheetUpdateAvailable.php +++ b/server/services/GSheetUpdateAvailable.php @@ -10,7 +10,7 @@ $INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/"; $ini_array = library\Toolkit::loadIni($INI_DIR); -$apiclient = new \library\APIClient(); +$apiclient = new \headstart\library\APIClient($ini_array); $persistence = new headstart\persistence\SQLitePersistence($ini_array["connection"]["sqlite_db"]); $vis_id = library\CommUtils::getParameter($_GET, "vis_id"); diff --git a/server/services/createNewGSheet.php b/server/services/createNewGSheet.php index 9cad7f731..289c2a90e 100644 --- a/server/services/createNewGSheet.php +++ b/server/services/createNewGSheet.php @@ -4,9 +4,12 @@ require_once dirname(__FILE__) . '/../classes/headstart/library/CommUtils.php'; require_once dirname(__FILE__) . '/../classes/headstart/library/APIClient.php'; +require_once dirname(__FILE__) . '/../classes/headstart/library/toolkit.php'; use headstart\library; -$apiclient = new \library\APIClient(); +$INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/"; +$ini_array = library\Toolkit::loadIni($INI_DIR); +$apiclient = new \headstart\library\APIClient($ini_array); if(php_sapi_name() == 'cli') { // Called from command-line, maybe cron diff --git a/server/services/getGSheetsMap.php b/server/services/getGSheetsMap.php index 9b0928d40..777ee5495 100644 --- a/server/services/getGSheetsMap.php +++ b/server/services/getGSheetsMap.php @@ -12,7 +12,7 @@ $INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/"; $ini_array = library\Toolkit::loadIni($INI_DIR); -$apiclient = new \library\APIClient(); +$apiclient = new \headstart\library\APIClient($ini_array); $persistence = new headstart\persistence\SQLitePersistence($ini_array["connection"]["sqlite_db"]); $vis_id = library\CommUtils::getParameter($_GET, "vis_id"); diff --git a/server/services/getLastVersion.php b/server/services/getLastVersion.php index ada276216..ce96726d7 100644 --- a/server/services/getLastVersion.php +++ b/server/services/getLastVersion.php @@ -13,7 +13,7 @@ $INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/"; $ini_array = library\Toolkit::loadIni($INI_DIR); -$apiclient = new \library\APIClient(); +$apiclient = new \headstart\library\APIClient($ini_array); $persistence = new headstart\persistence\SQLitePersistence($ini_array["connection"]["sqlite_db"]); $vis_id = library\CommUtils::getParameter($_GET, "vis_id"); diff --git a/server/services/getLatestRevision.php b/server/services/getLatestRevision.php index 9e6468834..320c456c8 100644 --- a/server/services/getLatestRevision.php +++ b/server/services/getLatestRevision.php @@ -12,7 +12,7 @@ $INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/"; $ini_array = library\Toolkit::loadIni($INI_DIR); -$apiclient = new \library\APIClient(); +$apiclient = new \headstart\library\APIClient($ini_array); $persistence = new headstart\persistence\SQLitePersistence($ini_array["connection"]["sqlite_db"]); $persistence_backend = $ini_array["general"]["persistence_backend"]; diff --git a/server/services/search.php b/server/services/search.php index e3e97a733..adf34b333 100644 --- a/server/services/search.php +++ b/server/services/search.php @@ -63,7 +63,7 @@ function search($service_integration, $dirty_query $persistence_backend = isset($ini_array["general"]["persistence_backend"]) ? ($ini_array["general"]["persistence_backend"]) : "legacy"; - $apiclient = new \headstart\library\APIClient($service_integration, $ini_array); + $apiclient = new \headstart\library\APIClient($ini_array); $repo2snapshot = array("plos" => "PLOS" , "pubmed" => "PubMed" , "doaj" => "DOAJ" diff --git a/server/workers/README.md b/server/workers/README.md index bb2fbb067..760a706cf 100644 --- a/server/workers/README.md +++ b/server/workers/README.md @@ -89,6 +89,10 @@ Enter container: `docker exec -it VARYINGNAME_db_1 psql -U headstart` Execute command: `CREATE DATABASE databasename;` +Exit the container and re-enter it as normal user: `docker exec -it VARYINGNAME_db_1 /bin/bash` + +Execute command: `python manage.py` + * In `preprocessing/conf/config_local.ini` change "databasename" to the dev/production database name for the specific integration. This should be in line with the database names provided in `settings.py` diff --git a/server/workers/persistence/Dockerfile b/server/workers/persistence/Dockerfile index 9e8234d5d..158008010 100644 --- a/server/workers/persistence/Dockerfile +++ b/server/workers/persistence/Dockerfile @@ -10,6 +10,4 @@ WORKDIR /persistence COPY workers/persistence/requirements.txt . RUN pip install --no-cache-dir -r requirements.txt RUN pip install git+https://github.com/python-restx/flask-restx -COPY workers/persistence/src/ ./ - -RUN python manage.py \ No newline at end of file +COPY workers/persistence/src/ ./ \ No newline at end of file From 9babf3e4dc8d143454f1a2f4dd40732b2c4f4f38 Mon Sep 17 00:00:00 2001 From: chreman Date: Wed, 9 Jun 2021 10:49:40 +0200 Subject: [PATCH 52/54] fix integration --- .../headstart/preprocessing/calculation/RCalculation.php | 2 +- server/services/getContext.php | 2 +- server/workers/example_redis_config.json | 6 ------ server/workers/proxy/docker-compose.yml | 6 +++--- server/workers/proxy/templates/default.conf.template | 8 ++++---- server/workers/triple/example_es_config.json | 7 ------- 6 files changed, 9 insertions(+), 22 deletions(-) delete mode 100644 server/workers/example_redis_config.json delete mode 100644 server/workers/triple/example_es_config.json diff --git a/server/classes/headstart/preprocessing/calculation/RCalculation.php b/server/classes/headstart/preprocessing/calculation/RCalculation.php index 2221941a0..7449e7406 100644 --- a/server/classes/headstart/preprocessing/calculation/RCalculation.php +++ b/server/classes/headstart/preprocessing/calculation/RCalculation.php @@ -46,7 +46,7 @@ public function performCalculationAndReturnOutputAsJSON($working_dir, $query, $p //library\Toolkit::info($path); // exec(ps(if [ $(ps -ef | grep -v grep | grep scrape_data.php | wc -l) -lt 1 ])) if n > 5 // return $try_later with error handling - if(exec("ps -u www-data | grep R | wc -l") <= 12) { + if(exec("ps -u www-data | grep R | wc -l") <= 8) { exec($path, $output_r); return $output_r; } else { diff --git a/server/services/getContext.php b/server/services/getContext.php index ea373177b..e96870ce4 100644 --- a/server/services/getContext.php +++ b/server/services/getContext.php @@ -16,7 +16,7 @@ $vis_id = library\CommUtils::getParameter($_GET, "vis_id"); $revision_context = isset($_GET["revision_context"]) ? library\CommUtils::getParameter($_GET, "revision_context") : false; -$apiclient = new \library\APIClient(); +$apiclient = new headstart\library\APIClient(); $persistence = new headstart\persistence\SQLitePersistence($ini_array["connection"]["sqlite_db"]); $persistence_backend = $ini_array["general"]["persistence_backend"]; diff --git a/server/workers/example_redis_config.json b/server/workers/example_redis_config.json deleted file mode 100644 index a4ede8678..000000000 --- a/server/workers/example_redis_config.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "host":"127.0.0.1", - "port": 6379, - "db":0, - "password":"long_secure_password" -} diff --git a/server/workers/proxy/docker-compose.yml b/server/workers/proxy/docker-compose.yml index 6f1121642..41b99533a 100644 --- a/server/workers/proxy/docker-compose.yml +++ b/server/workers/proxy/docker-compose.yml @@ -11,9 +11,9 @@ services: ports: - '8080:80' networks: - - triple_staging_headstart + - stable_headstart networks: - triple_staging_headstart: + stable_headstart: external: true - name: triple_staging_headstart \ No newline at end of file + name: stable_headstart \ No newline at end of file diff --git a/server/workers/proxy/templates/default.conf.template b/server/workers/proxy/templates/default.conf.template index 63986aa33..1e459c450 100644 --- a/server/workers/proxy/templates/default.conf.template +++ b/server/workers/proxy/templates/default.conf.template @@ -1,11 +1,11 @@ server { listen ${NGINX_PORT}; - location /triple_staging/ { - proxy_pass http://triple_staging_api_1:5001/api/; + location /stable/ { + proxy_pass http://stable_api_1:5001/api/; - location /triple_staging/persistence/ { - proxy_pass http://triple_staging_persistence_1:5001/api/persistence/; + location /stable/persistence/ { + proxy_pass http://stable_persistence_1:5001/api/persistence/; } } } \ No newline at end of file diff --git a/server/workers/triple/example_es_config.json b/server/workers/triple/example_es_config.json deleted file mode 100644 index 81a0fc96b..000000000 --- a/server/workers/triple/example_es_config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "user": "string", - "pass": "string", - "host": "string", - "port": 9200, - "indices": {"string":"string"} -} From 08d7e2a18e47d9f678bcbb9f28fa4819e12e655f Mon Sep 17 00:00:00 2001 From: chreman Date: Wed, 9 Jun 2021 15:05:59 +0200 Subject: [PATCH 53/54] env update --- server/workers/flavorconfigs/example.env | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/server/workers/flavorconfigs/example.env b/server/workers/flavorconfigs/example.env index 2c0f395ad..1e27b993a 100644 --- a/server/workers/flavorconfigs/example.env +++ b/server/workers/flavorconfigs/example.env @@ -1,16 +1,16 @@ -COMPOSE_PROJECT_NAME=dev -SERVICE_VERSION=41b44663c6cc3498210cb7db38de7bee9ccab583 +COMPOSE_PROJECT_NAME=stable +SERVICE_VERSION=9babf3e4dc8d143454f1a2f4dd40732b2c4f4f38 NETWORK=dockerrefactoring POSTGRES_DB=postgres POSTGRES_USER=headstart POSTGRES_PASSWORD=testpassword -POSTGRES_HOST=dev_db_1 +POSTGRES_HOST=stable_db_1 POSTGRES_PORT=5432 POSTGRES_HOSTNAME=headstart_pgsql_1 PGADMIN_DEFAULT_EMAIL=christopher.kittel@openknowledgemaps.org PGADMIN_DEFAULT_PASSWORD=testpassword API_PORT=5001 -REDIS_HOST=dev_redis_1 +REDIS_HOST=stable_redis_1 REDIS_PORT=6379 REDIS_DB=0 REDIS_PASSWORD=redis_password \ No newline at end of file From ec172e1c0f7a3bf46b743c2c12f29dbc6280361c Mon Sep 17 00:00:00 2001 From: chreman Date: Wed, 9 Jun 2021 15:51:08 +0200 Subject: [PATCH 54/54] bugfix --- server/services/getContext.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/services/getContext.php b/server/services/getContext.php index e96870ce4..672c26042 100644 --- a/server/services/getContext.php +++ b/server/services/getContext.php @@ -16,7 +16,7 @@ $vis_id = library\CommUtils::getParameter($_GET, "vis_id"); $revision_context = isset($_GET["revision_context"]) ? library\CommUtils::getParameter($_GET, "revision_context") : false; -$apiclient = new headstart\library\APIClient(); +$apiclient = new headstart\library\APIClient($ini_array); $persistence = new headstart\persistence\SQLitePersistence($ini_array["connection"]["sqlite_db"]); $persistence_backend = $ini_array["general"]["persistence_backend"];