From d58014e9f81f86bcd943b71eef2eaf06ddd7ce45 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Tue, 3 Mar 2020 10:10:22 +0100 Subject: [PATCH 01/99] containerization structure --- examples/triple/docker-compose.yml | 29 ++ server/headstart_backend.docker | 13 + server/workers/backend.env | 3 + server/workers/backend/headstart.py | 16 + server/workers/renv.lock | 598 ++++++++++++++++++++++++++++ 5 files changed, 659 insertions(+) create mode 100644 examples/triple/docker-compose.yml create mode 100644 server/headstart_backend.docker create mode 100644 server/workers/backend.env create mode 100644 server/workers/backend/headstart.py create mode 100644 server/workers/renv.lock diff --git a/examples/triple/docker-compose.yml b/examples/triple/docker-compose.yml new file mode 100644 index 000000000..145cc5eb3 --- /dev/null +++ b/examples/triple/docker-compose.yml @@ -0,0 +1,29 @@ +version: '3.7' + +services: + + redis: + image: 'redis:4.0-alpine' + restart: always + command: redis-server + volumes: + - 'redis:/var/lib/redis/data' + ports: + - '6379:6379' + + backend: + build: + context: ../server + dockerfile: headstart_backend.docker + env_file: + - workers/backend/backend.env + restart: always + ports: + - '8019:8019' + volumes: + - type: bind + source: /renv/cache + target: /var/cache/renv/cache + +volumes: + redis: diff --git a/server/headstart_backend.docker b/server/headstart_backend.docker new file mode 100644 index 000000000..06e042c16 --- /dev/null +++ b/server/headstart_backend.docker @@ -0,0 +1,13 @@ +FROM rocker/r-ver:3.5.1 + +MAINTAINER Chris Kittel "christopher.kittel@openknowledgemaps.org" +RUN apt-get update +RUN apt-get -y install libssl-dev +RUN apt-get -y install libxml2-dev + +RUN R -e 'install.packages("remotes", repos = c(CRAN = "https://cran.rstudio.com"))' + +WORKDIR /headstart +COPY workers/backend/renv.lock ./ +RUN R -e 'options(repos="https://cran.wu.ac.at"); renv::restore()' +COPY preprocessing/other-scripts ./backend diff --git a/server/workers/backend.env b/server/workers/backend.env new file mode 100644 index 000000000..8078d6292 --- /dev/null +++ b/server/workers/backend.env @@ -0,0 +1,3 @@ +RENV_PATHS_CACHE=/renv/cache +RENV_VERSION="0.6.0-98" +CRAN_REPOS="https://cran.wu.ac.at" diff --git a/server/workers/backend/headstart.py b/server/workers/backend/headstart.py new file mode 100644 index 000000000..8a7e001e2 --- /dev/null +++ b/server/workers/backend/headstart.py @@ -0,0 +1,16 @@ +import os +import time +import json +import subprocess +import redis + +redis_store = redis.StrictRedis(host="localhost", port=6379, db=0) + + +class Backend(object): + + def __init__(self): + # path should be to where in the docker container the Rscript are + self.wd = "headstart" + self.command = 'Rscript' + self.hs = os.path.join(self.wd, "run_vis_layout.R") diff --git a/server/workers/renv.lock b/server/workers/renv.lock new file mode 100644 index 000000000..997f79476 --- /dev/null +++ b/server/workers/renv.lock @@ -0,0 +1,598 @@ +{ + "renv": { + "Version": "0.6.0-98" + }, + "R": { + "Version": "3.5.3", + "Repositories": [ + { + "Name": "CRAN", + "URL": "https://cloud.r-project.org" + } + ] + }, + "Bioconductor": { + "Repositories": [ + { + "Name": "BioCsoft", + "URL": "https://bioconductor.org/packages/3.8/bioc" + }, + { + "Name": "BioCann", + "URL": "https://bioconductor.org/packages/3.8/data/annotation" + }, + { + "Name": "BioCexp", + "URL": "https://bioconductor.org/packages/3.8/data/experiment" + }, + { + "Name": "BioCworkflows", + "URL": "https://bioconductor.org/packages/3.8/workflows" + }, + { + "Name": "CRAN", + "URL": "https://cloud.r-project.org" + } + ] + }, + "Packages": { + "base64enc": { + "Package": "base64enc", + "Version": "0.1-3", + "Source": "CRAN", + "Hash": "eec0d710cee1455ba24eb6ab622a60e9" + }, + "BH": { + "Package": "BH", + "Version": "1.69.0-1", + "Source": "CRAN", + "Hash": "88e64b38758666b85d283617231bb766" + }, + "cli": { + "Package": "cli", + "Version": "1.1.0", + "Source": "CRAN", + "Hash": "c0a5f242aa2259cf7e40da04bb89f5d8" + }, + "crayon": { + "Package": "crayon", + "Version": "1.3.4", + "Source": "CRAN", + "Hash": "1e93bda8f2b60a0defc85c82a39e2891" + }, + "curl": { + "Package": "curl", + "Version": "3.3", + "Source": "CRAN", + "Hash": "921ecc2db7bca3844daaec11fce12e76" + }, + "digest": { + "Package": "digest", + "Version": "0.6.18", + "Source": "CRAN", + "Hash": "50c07175e40eda4f3f1deb0d26a3f9dc" + }, + "evaluate": { + "Package": "evaluate", + "Version": "0.13", + "Source": "CRAN", + "Hash": "1a4182c08eece9c529eec110119ac6a7" + }, + "highr": { + "Package": "highr", + "Version": "0.8", + "Source": "CRAN", + "Hash": "2f5651aef7832e8eea60d3f3add2ffaa" + }, + "htmltools": { + "Package": "htmltools", + "Version": "0.3.6", + "Source": "CRAN", + "Hash": "881e91b8b764a550de68ad18a98c05b5" + }, + "jsonlite": { + "Package": "jsonlite", + "Version": "1.6", + "Source": "CRAN", + "Hash": "9ec2b2fe0e874a66d0cb54d8ced5618e" + }, + "knitr": { + "Package": "knitr", + "Version": "1.22", + "Source": "CRAN", + "Hash": "b1ef4af5ce7c39dcba8da6ff39bfd7a1" + }, + "magrittr": { + "Package": "magrittr", + "Version": "1.5", + "Source": "CRAN", + "Hash": "cb6367fec3fd68ff41424fe9797b8eaf" + }, + "markdown": { + "Package": "markdown", + "Version": "0.9", + "Source": "CRAN", + "Hash": "5e5d7e3b37fbdb3b06e3f9484f80db0e" + }, + "mime": { + "Package": "mime", + "Version": "0.6", + "Source": "CRAN", + "Hash": "217bc37b46b42e1e0636460ae3b96a7d" + }, + "praise": { + "Package": "praise", + "Version": "1.0.0", + "Source": "CRAN", + "Hash": "20ffbe3f59f58529a6786101392fa476" + }, + "processx": { + "Package": "processx", + "Version": "3.3.1.9000", + "Source": "GitHub", + "RemoteType": "github", + "RemoteHost": "api.github.com", + "RemoteRepo": "processx", + "RemoteUsername": "r-lib", + "RemoteRef": "master", + "RemoteSha": "505cb1f8dd1f4b9ff35ed2f64ce0e86dc7855789", + "Hash": "37c3fe67b4a210d04262bcab0c7eb551" + }, + "R6": { + "Package": "R6", + "Version": "2.4.0", + "Source": "CRAN", + "Hash": "d42941d973f7f28b32f694bc89fb9d5f" + }, + "Rcpp": { + "Package": "Rcpp", + "Version": "1.0.1", + "Source": "CRAN", + "Hash": "542a8dbcf371718e4dee591341d28c43" + }, + "RCurl": { + "Package": "RCurl", + "Version": "1.95-4.12", + "Source": "CRAN", + "Hash": "23866c3a35f74fd999e83d6ee38ee5c7" + }, + "rlang": { + "Package": "rlang", + "Version": "0.3.4", + "Source": "CRAN", + "Hash": "1367935e1052d25fd616368dea6d8bf5" + }, + "rmarkdown": { + "Package": "rmarkdown", + "Version": "1.12", + "Source": "CRAN", + "Hash": "6adbc9e47370f441b5ab2a4a4bd066df" + }, + "stringr": { + "Package": "stringr", + "Version": "1.4.0", + "Source": "CRAN", + "Hash": "01fb6c8262ac804d9bafa60cea8773da" + }, + "testthat": { + "Package": "testthat", + "Version": "2.1.1", + "Source": "CRAN", + "Hash": "d1d096420c20ce540736c4e780d3c9bb" + }, + "tinytex": { + "Package": "tinytex", + "Version": "0.13", + "Source": "CRAN", + "Hash": "b866f0a0298dc94dc2e63252bb08ab0c" + }, + "withr": { + "Package": "withr", + "Version": "2.1.2", + "Source": "CRAN", + "Hash": "ce9ffa6d865ecaf6a56cacc811565e07" + }, + "xfun": { + "Package": "xfun", + "Version": "0.7", + "Source": "CRAN", + "Hash": "0821bdcb5e455983cf2daae1c88038c1" + }, + "xml2": { + "Package": "xml2", + "Version": "1.2.0", + "Source": "CRAN", + "Hash": "fe28830040ea8a0cd9d41749b8f9f715" + }, + "yaml": { + "Package": "yaml", + "Version": "2.2.0", + "Source": "CRAN", + "Hash": "e75dfa28ca59a73adfabf6899ce04472" + }, + "arsenal": { + "Package": "arsenal", + "Version": "3.1.0", + "Source": "CRAN", + "Hash": "506ae2edc7d8c64e8a992115b3336184" + }, + "askpass": { + "Package": "askpass", + "Version": "1.1", + "Source": "CRAN", + "Hash": "ff8e30ba3e542c07372fca6de28bce3b" + }, + "assertthat": { + "Package": "assertthat", + "Version": "0.2.1", + "Source": "CRAN", + "Hash": "263effaa484b1dc3013cf1c1e8f6624e" + }, + "backports": { + "Package": "backports", + "Version": "1.1.4", + "Source": "CRAN", + "Hash": "8f863a45c5b6c9ccd5a71988138a48c2" + }, + "bitops": { + "Package": "bitops", + "Version": "1.0-6", + "Source": "CRAN", + "Hash": "f72db5de0feaf15f856ccc2aefdf91de" + }, + "broom": { + "Package": "broom", + "Version": "0.5.2", + "Source": "CRAN", + "Hash": "a9d79e24f3ce764752c892f0d5f7f61c" + }, + "callr": { + "Package": "callr", + "Version": "3.2.0", + "Source": "CRAN", + "Hash": "7af03bdfa915fefcf1e1abcfad81755f" + }, + "cellranger": { + "Package": "cellranger", + "Version": "1.1.0", + "Source": "CRAN", + "Hash": "a6279d19fb8783f899f8dc3edb9a71b0" + }, + "clipr": { + "Package": "clipr", + "Version": "0.6.0", + "Source": "CRAN", + "Hash": "b5c8e6072ef59bb5d2e755f4b249dae0" + }, + "colorspace": { + "Package": "colorspace", + "Version": "1.4-1", + "Source": "CRAN", + "Hash": "70941d6476af18c149b5b6916f87a7c5" + }, + "DBI": { + "Package": "DBI", + "Version": "1.0.0", + "Source": "CRAN", + "Hash": "5608e5aeeba9ae979e5bfd26ca0835a5" + }, + "dbplyr": { + "Package": "dbplyr", + "Version": "1.4.0", + "Source": "CRAN", + "Hash": "3bae66836ad0369d925be9c0b58bdb36" + }, + "dplyr": { + "Package": "dplyr", + "Version": "0.8.1", + "Source": "CRAN", + "Hash": "8cdcbbb5b14d9f9aea8ba9b9fa06729b" + }, + "ellipsis": { + "Package": "ellipsis", + "Version": "0.1.0", + "Source": "CRAN", + "Hash": "6d1d4612d4cfcaa05e67a534c3b5e470" + }, + "fansi": { + "Package": "fansi", + "Version": "0.4.0", + "Source": "CRAN", + "Hash": "7e8ca3a2d400cbdaf72a602e9833c920" + }, + "forcats": { + "Package": "forcats", + "Version": "0.4.0", + "Source": "CRAN", + "Hash": "c5d5b0c24161479679cfbf94fba339f4" + }, + "fs": { + "Package": "fs", + "Version": "1.3.1", + "Source": "CRAN", + "Hash": "110467e48d2be4583fe18d7bdbbc3c54" + }, + "generics": { + "Package": "generics", + "Version": "0.0.2", + "Source": "CRAN", + "Hash": "c2a574a14f5f45a84ead80391c3075d6" + }, + "ggplot2": { + "Package": "ggplot2", + "Version": "3.1.1", + "Source": "CRAN", + "Hash": "4afc1cad89d1a1acc0ed18e69ae71323" + }, + "glue": { + "Package": "glue", + "Version": "1.3.1", + "Source": "CRAN", + "Hash": "156f76da413ebe13f4d7e65ae6a5d19f" + }, + "gtable": { + "Package": "gtable", + "Version": "0.3.0", + "Source": "CRAN", + "Hash": "120444406cc884baa3a2ff5c0566045b" + }, + "haven": { + "Package": "haven", + "Version": "2.1.0", + "Source": "CRAN", + "Hash": "a5147d91e024a58883490a0300cd0534" + }, + "hms": { + "Package": "hms", + "Version": "0.4.2", + "Source": "CRAN", + "Hash": "c1a7ac1b51eb0f5ef51a4ea256eb9cbe" + }, + "httr": { + "Package": "httr", + "Version": "1.4.0", + "Source": "CRAN", + "Hash": "f5dc99b1972d4e9eaf6f5c452339db08" + }, + "labeling": { + "Package": "labeling", + "Version": "0.3", + "Source": "CRAN", + "Hash": "379ec196f3f3bb312727d52b022fa8f9" + }, + "lazyeval": { + "Package": "lazyeval", + "Version": "0.2.2", + "Source": "CRAN", + "Hash": "8c343e48c3f58de2c70a641592abc0f1" + }, + "lubridate": { + "Package": "lubridate", + "Version": "1.7.4", + "Source": "CRAN", + "Hash": "4af93df0d50bb3919de67f36e7d3d00b" + }, + "MASS": { + "Package": "MASS", + "Version": "7.3-51.4", + "Source": "CRAN", + "Hash": "a670f645948409d325eddb0b199ed968" + }, + "modelr": { + "Package": "modelr", + "Version": "0.1.4", + "Source": "CRAN", + "Hash": "5eae0bb48971866c0bf67a6d2b493f1b" + }, + "munsell": { + "Package": "munsell", + "Version": "0.5.0", + "Source": "CRAN", + "Hash": "381047af4d6b44ca4d9285dbdea42434" + }, + "openssl": { + "Package": "openssl", + "Version": "1.3", + "Source": "CRAN", + "Hash": "ca742ffe11d603c1b7ea425d7d85a0ef" + }, + "pillar": { + "Package": "pillar", + "Version": "1.4.0", + "Source": "CRAN", + "Hash": "0012ae5d08cab261e929647763ba6226" + }, + "pkgconfig": { + "Package": "pkgconfig", + "Version": "2.0.2", + "Source": "CRAN", + "Hash": "47f492c929356d52b15018b5827cf857" + }, + "plogr": { + "Package": "plogr", + "Version": "0.2.0", + "Source": "CRAN", + "Hash": "4f3b95dcac0fc64e36666ac8e6430fc7" + }, + "plyr": { + "Package": "plyr", + "Version": "1.8.4", + "Source": "CRAN", + "Hash": "48b2c2a9c4a504ea9167e9252fd9fade" + }, + "prettyunits": { + "Package": "prettyunits", + "Version": "1.0.2", + "Source": "CRAN", + "Hash": "47d4646a3be22893c04093e8db164989" + }, + "progress": { + "Package": "progress", + "Version": "1.2.2", + "Source": "CRAN", + "Hash": "819ec59e511e26ceed4ebdf272b6671f" + }, + "ps": { + "Package": "ps", + "Version": "1.3.0", + "Source": "CRAN", + "Hash": "12c6a7e02239401d47599ddc44a484f4" + }, + "purrr": { + "Package": "purrr", + "Version": "0.3.2", + "Source": "CRAN", + "Hash": "19767ff705a04566e2f396e15ccc1b39" + }, + "RColorBrewer": { + "Package": "RColorBrewer", + "Version": "1.1-2", + "Source": "CRAN", + "Hash": "cdfb92174501c241a0d4e13fcac600a4" + }, + "readr": { + "Package": "readr", + "Version": "1.3.1", + "Source": "CRAN", + "Hash": "19e0ffd8bcaff3476349d8787fc8bd59" + }, + "readxl": { + "Package": "readxl", + "Version": "1.3.1", + "Source": "CRAN", + "Hash": "b0467b5406ee2c29709c4a57cab692d1" + }, + "rematch": { + "Package": "rematch", + "Version": "1.0.1", + "Source": "CRAN", + "Hash": "d80f39f4db9c537a1def235969d648ea" + }, + "reprex": { + "Package": "reprex", + "Version": "0.3.0", + "Source": "CRAN", + "Hash": "99df2b9f9ef2946d5af0910afaf56198" + }, + "reshape2": { + "Package": "reshape2", + "Version": "1.4.3", + "Source": "CRAN", + "Hash": "a0b8854199dbfa29a71f514b3ffcddf0" + }, + "rstudioapi": { + "Package": "rstudioapi", + "Version": "0.10", + "Source": "CRAN", + "Hash": "6c03ab57831cbdae14d599151e832e18" + }, + "rvest": { + "Package": "rvest", + "Version": "0.3.4", + "Source": "CRAN", + "Hash": "a40342e9ea5fd36819a8c8baac506d43" + }, + "scales": { + "Package": "scales", + "Version": "1.0.0", + "Source": "CRAN", + "Hash": "17cfbe1a4fcc8d0440a2af68d9e07bf4" + }, + "selectr": { + "Package": "selectr", + "Version": "0.4-1", + "Source": "CRAN", + "Hash": "cc537e333408bb2f0088bc7b20f0e821" + }, + "stringi": { + "Package": "stringi", + "Version": "1.4.3", + "Source": "CRAN", + "Hash": "0472e71f2347e13cd398d5225ff3b0ea" + }, + "sys": { + "Package": "sys", + "Version": "3.2", + "Source": "CRAN", + "Hash": "22bf912f5ebfddff8992fdd4d01e9bb4" + }, + "tibble": { + "Package": "tibble", + "Version": "2.1.1", + "Source": "CRAN", + "Hash": "12ce38efdfe72eb34c31873431337e35" + }, + "tidyr": { + "Package": "tidyr", + "Version": "0.8.3", + "Source": "CRAN", + "Hash": "d50a808d7d4c6ef63190fdcf452977ae" + }, + "tidyselect": { + "Package": "tidyselect", + "Version": "0.2.5", + "Source": "CRAN", + "Hash": "66c1a6de7f98266b76503698f2ca111d" + }, + "tidyverse": { + "Package": "tidyverse", + "Version": "1.2.1", + "Source": "CRAN", + "Hash": "ea055dfa5ed1e736f1ebacb553cbddbd" + }, + "utf8": { + "Package": "utf8", + "Version": "1.1.4", + "Source": "CRAN", + "Hash": "557d1cc1d7b4ff362db5990acc00f2a0" + }, + "vctrs": { + "Package": "vctrs", + "Version": "0.1.0", + "Source": "CRAN", + "Hash": "a57267520709009929b09af57d668a2a" + }, + "viridisLite": { + "Package": "viridisLite", + "Version": "0.3.0", + "Source": "CRAN", + "Hash": "e62dbc33079a4e2ff7429032f0e46efd" + }, + "whisker": { + "Package": "whisker", + "Version": "0.3-2", + "Source": "CRAN", + "Hash": "1e74aefd2c67890f504a1fa2d0834d70" + }, + "zeallot": { + "Package": "zeallot", + "Version": "0.1.0", + "Source": "CRAN", + "Hash": "c33bb7353728bd4547a079b2c351a021" + }, + "lattice": { + "Package": "lattice", + "Version": "0.20-38", + "Source": "CRAN", + "Hash": "16dfe2407fe485b6a9fb098120008721" + }, + "Matrix": { + "Package": "Matrix", + "Version": "1.2-17", + "Source": "CRAN", + "Hash": "b2eae6a1a2e206ecc478f3ff9dc98a5e" + }, + "mgcv": { + "Package": "mgcv", + "Version": "1.8-28", + "Source": "CRAN", + "Hash": "7b78c0b314871340c4f56d38e6b66dd5" + }, + "nlme": { + "Package": "nlme", + "Version": "3.1-139", + "Source": "CRAN", + "Hash": "120db5776a27efebe1cca6e75cd460a7" + } + } +} From d6e3b7a235d446a76a9b1695d6a8af94e06b1b9b Mon Sep 17 00:00:00 2001 From: chreman Date: Tue, 3 Mar 2020 11:07:08 +0100 Subject: [PATCH 02/99] move --- server/workers/{ => backend}/backend.env | 0 server/workers/{ => backend}/renv.lock | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename server/workers/{ => backend}/backend.env (100%) rename server/workers/{ => backend}/renv.lock (100%) diff --git a/server/workers/backend.env b/server/workers/backend/backend.env similarity index 100% rename from server/workers/backend.env rename to server/workers/backend/backend.env diff --git a/server/workers/renv.lock b/server/workers/backend/renv.lock similarity index 100% rename from server/workers/renv.lock rename to server/workers/backend/renv.lock From cf9106103abd28ca255f02ef80a05c55b41ba7c5 Mon Sep 17 00:00:00 2001 From: chreman Date: Tue, 3 Mar 2020 13:19:05 +0100 Subject: [PATCH 03/99] move --- examples/triple/docker-compose.yml => docker-compose.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename examples/triple/docker-compose.yml => docker-compose.yml (100%) diff --git a/examples/triple/docker-compose.yml b/docker-compose.yml similarity index 100% rename from examples/triple/docker-compose.yml rename to docker-compose.yml From bc959d2e9429d0b0dd0340ba5fd4a1fe614fa73c Mon Sep 17 00:00:00 2001 From: chreman Date: Tue, 3 Mar 2020 14:33:03 +0100 Subject: [PATCH 04/99] basic container build --- docker-compose.yml | 23 +++++++++++++---------- server/headstart_backend.docker | 5 +++-- server/search_triple.docker | 7 +++++++ server/workers/triple/config_example.json | 8 ++++++++ server/workers/triple/requirements.txt | 1 + server/workers/triple/search_triple.py | 12 ++++++++++++ 6 files changed, 44 insertions(+), 12 deletions(-) create mode 100644 server/search_triple.docker create mode 100644 server/workers/triple/config_example.json create mode 100644 server/workers/triple/requirements.txt create mode 100644 server/workers/triple/search_triple.py diff --git a/docker-compose.yml b/docker-compose.yml index 145cc5eb3..cc9270d5d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,22 +8,25 @@ services: command: redis-server volumes: - 'redis:/var/lib/redis/data' + restart: always ports: - '6379:6379' + search_triple: + build: + context: server + dockerfile: search_triple.docker + restart: always + backend: build: - context: ../server + context: server dockerfile: headstart_backend.docker - env_file: - - workers/backend/backend.env - restart: always - ports: - - '8019:8019' - volumes: - - type: bind - source: /renv/cache - target: /var/cache/renv/cache + env_file: + - server/workers/backend/backend.env + restart: always + ports: + - '8019:8019' volumes: redis: diff --git a/server/headstart_backend.docker b/server/headstart_backend.docker index 06e042c16..55d1e01e5 100644 --- a/server/headstart_backend.docker +++ b/server/headstart_backend.docker @@ -2,10 +2,11 @@ FROM rocker/r-ver:3.5.1 MAINTAINER Chris Kittel "christopher.kittel@openknowledgemaps.org" RUN apt-get update -RUN apt-get -y install libssl-dev -RUN apt-get -y install libxml2-dev +RUN apt-get -y install libssl-dev libcurl4-openssl-dev +RUN apt-get -y install libxml2-dev libz-dev libpoppler-cpp-dev RUN R -e 'install.packages("remotes", repos = c(CRAN = "https://cran.rstudio.com"))' +RUN R -e 'install.packages("renv", repos = c(CRAN = "https://cran.rstudio.com"))' WORKDIR /headstart COPY workers/backend/renv.lock ./ diff --git a/server/search_triple.docker b/server/search_triple.docker new file mode 100644 index 000000000..9959544bb --- /dev/null +++ b/server/search_triple.docker @@ -0,0 +1,7 @@ +FROM python:3.6.10-alpine3.10 + +MAINTAINER Chris Kittel "christopher.kittel@openknowledgemaps.org" + +WORKDIR /headstart +COPY workers/triple/requirements.txt ./ +RUN pip install --no-cache-dir -r requirements.txt diff --git a/server/workers/triple/config_example.json b/server/workers/triple/config_example.json new file mode 100644 index 000000000..c3ae9d168 --- /dev/null +++ b/server/workers/triple/config_example.json @@ -0,0 +1,8 @@ +{ + "user": "string", + "pass": "string", + "protocol": "https", + "url": "url", + "port": "int", + "index": "string" +} diff --git a/server/workers/triple/requirements.txt b/server/workers/triple/requirements.txt new file mode 100644 index 000000000..174c3f8b3 --- /dev/null +++ b/server/workers/triple/requirements.txt @@ -0,0 +1 @@ +elasticsearch diff --git a/server/workers/triple/search_triple.py b/server/workers/triple/search_triple.py new file mode 100644 index 000000000..baee02152 --- /dev/null +++ b/server/workers/triple/search_triple.py @@ -0,0 +1,12 @@ +import os +import json + +from elasticsearch import Elasticsearch + + +with open("config.json") as infile: + config = json.load(infile) + +es = Elasticsearch( + ['{protocol}://{user}:{pass}@{url}:{port}'.format(**config)] +) From c092a48e350e1517c46ba728b0a5c1e196945800 Mon Sep 17 00:00:00 2001 From: chreman Date: Tue, 3 Mar 2020 16:30:31 +0100 Subject: [PATCH 05/99] wip --- server/workers/triple/config_example.json | 3 +-- server/workers/triple/search_triple.py | 7 ++++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/server/workers/triple/config_example.json b/server/workers/triple/config_example.json index c3ae9d168..2a9d30889 100644 --- a/server/workers/triple/config_example.json +++ b/server/workers/triple/config_example.json @@ -1,8 +1,7 @@ { "user": "string", "pass": "string", - "protocol": "https", - "url": "url", + "host": "string", "port": "int", "index": "string" } diff --git a/server/workers/triple/search_triple.py b/server/workers/triple/search_triple.py index baee02152..911e49902 100644 --- a/server/workers/triple/search_triple.py +++ b/server/workers/triple/search_triple.py @@ -8,5 +8,10 @@ config = json.load(infile) es = Elasticsearch( - ['{protocol}://{user}:{pass}@{url}:{port}'.format(**config)] + [config.get('host')], + http_auth=(config.get('user', config.get('pass'))), + scheme = "https", + port = config.get('port'), + send_get_body_as='POST', + http_compress=True ) From 03d94162407f0e990f69c4f1f3e717a249cda866 Mon Sep 17 00:00:00 2001 From: chreman Date: Wed, 4 Mar 2020 14:19:48 +0100 Subject: [PATCH 06/99] wip --- server/search_triple.docker | 2 +- server/workers/services.docker | 7 +++++++ server/workers/services/app.py | 13 +++++++++++++ server/workers/services/blueprints/__init__.py | 0 server/workers/services/blueprints/v2/__init__.py | 0 server/workers/services/blueprints/v2/views.py | 8 ++++++++ server/workers/services/config/__init__.py | 0 server/workers/services/config/settings_example.py | 1 + server/workers/services/requirements.txt | 3 +++ server/workers/triple/requirements.txt | 1 + 10 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 server/workers/services.docker create mode 100644 server/workers/services/app.py create mode 100644 server/workers/services/blueprints/__init__.py create mode 100644 server/workers/services/blueprints/v2/__init__.py create mode 100644 server/workers/services/blueprints/v2/views.py create mode 100644 server/workers/services/config/__init__.py create mode 100644 server/workers/services/config/settings_example.py create mode 100644 server/workers/services/requirements.txt diff --git a/server/search_triple.docker b/server/search_triple.docker index 9959544bb..d8deb4d78 100644 --- a/server/search_triple.docker +++ b/server/search_triple.docker @@ -3,5 +3,5 @@ FROM python:3.6.10-alpine3.10 MAINTAINER Chris Kittel "christopher.kittel@openknowledgemaps.org" WORKDIR /headstart -COPY workers/triple/requirements.txt ./ +COPY workers/triple/ ./ RUN pip install --no-cache-dir -r requirements.txt diff --git a/server/workers/services.docker b/server/workers/services.docker new file mode 100644 index 000000000..a7c945622 --- /dev/null +++ b/server/workers/services.docker @@ -0,0 +1,7 @@ +FROM python:3.6.10-alpine3.10 + +MAINTAINER Chris Kittel "christopher.kittel@openknowledgemaps.org" + +WORKDIR /headstart +COPY workers/services/ ./ +RUN pip install --no-cache-dir -r requirements.txt diff --git a/server/workers/services/app.py b/server/workers/services/app.py new file mode 100644 index 000000000..bb64fca0f --- /dev/null +++ b/server/workers/services/app.py @@ -0,0 +1,13 @@ +from flask import Flask + + +def new_services_app(settings_override=None): + from blueprints.v2 import app as v2 + app = Flask('v2', instance_relative_config=True) + + app.config.from_object('config.settings') + app.config.from_pyfile('settings.py', silent=True) + + app.register_blueprint(v2) + + return app diff --git a/server/workers/services/blueprints/__init__.py b/server/workers/services/blueprints/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/server/workers/services/blueprints/v2/__init__.py b/server/workers/services/blueprints/v2/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/server/workers/services/blueprints/v2/views.py b/server/workers/services/blueprints/v2/views.py new file mode 100644 index 000000000..0cc799307 --- /dev/null +++ b/server/workers/services/blueprints/v2/views.py @@ -0,0 +1,8 @@ +import os +import redis + +from flask import Blueprint, make_response + + +redis_store = redis.StrictRedis(host="localhost", port=6379, db=0) +app = Blueprint('v2', __name__) diff --git a/server/workers/services/config/__init__.py b/server/workers/services/config/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/server/workers/services/config/settings_example.py b/server/workers/services/config/settings_example.py new file mode 100644 index 000000000..d4efebf3c --- /dev/null +++ b/server/workers/services/config/settings_example.py @@ -0,0 +1 @@ +REDIS_URL = "redis://:password@localhost:6379/0" diff --git a/server/workers/services/requirements.txt b/server/workers/services/requirements.txt new file mode 100644 index 000000000..c6ca1f63e --- /dev/null +++ b/server/workers/services/requirements.txt @@ -0,0 +1,3 @@ +flask +gunicorn +redis diff --git a/server/workers/triple/requirements.txt b/server/workers/triple/requirements.txt index 174c3f8b3..bf68c7804 100644 --- a/server/workers/triple/requirements.txt +++ b/server/workers/triple/requirements.txt @@ -1 +1,2 @@ elasticsearch +redis From 5a276dfa7f6348eb3d4f36ae2acffc975e1ddb9b Mon Sep 17 00:00:00 2001 From: chreman Date: Wed, 4 Mar 2020 14:20:03 +0100 Subject: [PATCH 07/99] wip --- server/workers/triple/config_example.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/workers/triple/config_example.json b/server/workers/triple/config_example.json index 2a9d30889..81a0fc96b 100644 --- a/server/workers/triple/config_example.json +++ b/server/workers/triple/config_example.json @@ -2,6 +2,6 @@ "user": "string", "pass": "string", "host": "string", - "port": "int", - "index": "string" + "port": 9200, + "indices": {"string":"string"} } From 6dcf2864c254f9b42b24b7fa4d32d6ac4db1ee53 Mon Sep 17 00:00:00 2001 From: chreman Date: Wed, 4 Mar 2020 14:29:48 +0100 Subject: [PATCH 08/99] wip --- server/{workers => }/services.docker | 0 server/workers/services/blueprints/v2/views.py | 16 ++++++++++++++++ 2 files changed, 16 insertions(+) rename server/{workers => }/services.docker (100%) diff --git a/server/workers/services.docker b/server/services.docker similarity index 100% rename from server/workers/services.docker rename to server/services.docker diff --git a/server/workers/services/blueprints/v2/views.py b/server/workers/services/blueprints/v2/views.py index 0cc799307..b59e0a467 100644 --- a/server/workers/services/blueprints/v2/views.py +++ b/server/workers/services/blueprints/v2/views.py @@ -6,3 +6,19 @@ redis_store = redis.StrictRedis(host="localhost", port=6379, db=0) app = Blueprint('v2', __name__) + + +@app.route('/api/v2/search_triple', methods=['GET', 'POST']) +def search_triple(): + """ + """ + + + +@app.route('/api/v2/example_data', methods=['GET', 'POST']) +def example_data(): + headers = {"Content-Type": "application/json"} + data = {"test": "document string"} + return make_response(data, + 200, + headers=headers) From f6e47cb70f030c2a9686f07d56f4fd6e0b07d9b3 Mon Sep 17 00:00:00 2001 From: chreman Date: Wed, 4 Mar 2020 14:54:05 +0100 Subject: [PATCH 09/99] api test works --- docker-compose.yml | 16 ++++++++++++++-- server/services.docker | 2 ++ server/workers/services/app.py | 2 +- server/workers/services/blueprints/v2/views.py | 2 +- 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index cc9270d5d..18ed1edef 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,6 +2,17 @@ version: '3.7' services: + api: + build: + context: server + dockerfile: services.docker + restart: always + ports: + - '5001:5001' + depends_on: + - redis + network_mode: "host" + redis: image: 'redis:4.0-alpine' restart: always @@ -11,12 +22,14 @@ services: restart: always ports: - '6379:6379' + network_mode: "host" search_triple: build: context: server dockerfile: search_triple.docker restart: always + network_mode: "host" backend: build: @@ -25,8 +38,7 @@ services: env_file: - server/workers/backend/backend.env restart: always - ports: - - '8019:8019' + network_mode: "host" volumes: redis: diff --git a/server/services.docker b/server/services.docker index a7c945622..910702986 100644 --- a/server/services.docker +++ b/server/services.docker @@ -5,3 +5,5 @@ MAINTAINER Chris Kittel "christopher.kittel@openknowledgemaps.org" WORKDIR /headstart COPY workers/services/ ./ RUN pip install --no-cache-dir -r requirements.txt + +CMD gunicorn -b 0.0.0.0:5001 'app:new_services_app()' diff --git a/server/workers/services/app.py b/server/workers/services/app.py index bb64fca0f..87bf28847 100644 --- a/server/workers/services/app.py +++ b/server/workers/services/app.py @@ -2,7 +2,7 @@ def new_services_app(settings_override=None): - from blueprints.v2 import app as v2 + from blueprints.v2.views import app as v2 app = Flask('v2', instance_relative_config=True) app.config.from_object('config.settings') diff --git a/server/workers/services/blueprints/v2/views.py b/server/workers/services/blueprints/v2/views.py index b59e0a467..bd74d8eea 100644 --- a/server/workers/services/blueprints/v2/views.py +++ b/server/workers/services/blueprints/v2/views.py @@ -21,4 +21,4 @@ def example_data(): data = {"test": "document string"} return make_response(data, 200, - headers=headers) + headers) From f626c790f4ac226d5111f4eb54ce1874810398ce Mon Sep 17 00:00:00 2001 From: chreman Date: Wed, 4 Mar 2020 15:31:11 +0100 Subject: [PATCH 10/99] backend wip --- server/headstart_backend.docker | 3 +++ server/workers/backend/headstart.py | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/server/headstart_backend.docker b/server/headstart_backend.docker index 55d1e01e5..b8aaca174 100644 --- a/server/headstart_backend.docker +++ b/server/headstart_backend.docker @@ -12,3 +12,6 @@ WORKDIR /headstart COPY workers/backend/renv.lock ./ RUN R -e 'options(repos="https://cran.wu.ac.at"); renv::restore()' COPY preprocessing/other-scripts ./backend + +ENTRYPOINT python +CMD headstart.py diff --git a/server/workers/backend/headstart.py b/server/workers/backend/headstart.py index 8a7e001e2..8ae66ed0e 100644 --- a/server/workers/backend/headstart.py +++ b/server/workers/backend/headstart.py @@ -14,3 +14,15 @@ def __init__(self): self.wd = "headstart" self.command = 'Rscript' self.hs = os.path.join(self.wd, "run_vis_layout.R") + + def create_map(self, input_data): + pass + + def run(self): + while True: + input_data = redis_store.blpop('input_data') + self.create_map(input_data) + +if __name__ == '__main__': + hsb = Backend() + hsb.run() From f6861fd3a7aa66f90ac588bfa8c3392e2d2e2f8b Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Wed, 4 Mar 2020 21:46:55 +0100 Subject: [PATCH 11/99] package structure --- server/workers/backend/{ => src}/backend.env | 0 server/workers/backend/{ => src}/headstart.py | 1 + server/workers/backend/{ => src}/renv.lock | 0 server/workers/services/{ => src}/app.py | 0 .../services/{ => src}/blueprints/__init__.py | 0 .../{ => src}/blueprints/v2/__init__.py | 0 .../services/{ => src}/blueprints/v2/views.py | 0 .../services/{ => src}/config/__init__.py | 0 .../{ => src}/config/settings_example.py | 0 .../services/{ => src}/requirements.txt | 0 server/workers/triple/__init__.py | 0 server/workers/triple/search_triple.py | 17 -------------- .../triple/{ => src}/config_example.json | 0 .../workers/triple/{ => src}/requirements.txt | 0 server/workers/triple/src/search_triple.py | 22 +++++++++++++++++++ server/workers/triple/tests/__init__.py | 0 16 files changed, 23 insertions(+), 17 deletions(-) rename server/workers/backend/{ => src}/backend.env (100%) rename server/workers/backend/{ => src}/headstart.py (99%) rename server/workers/backend/{ => src}/renv.lock (100%) rename server/workers/services/{ => src}/app.py (100%) rename server/workers/services/{ => src}/blueprints/__init__.py (100%) rename server/workers/services/{ => src}/blueprints/v2/__init__.py (100%) rename server/workers/services/{ => src}/blueprints/v2/views.py (100%) rename server/workers/services/{ => src}/config/__init__.py (100%) rename server/workers/services/{ => src}/config/settings_example.py (100%) rename server/workers/services/{ => src}/requirements.txt (100%) create mode 100644 server/workers/triple/__init__.py delete mode 100644 server/workers/triple/search_triple.py rename server/workers/triple/{ => src}/config_example.json (100%) rename server/workers/triple/{ => src}/requirements.txt (100%) create mode 100644 server/workers/triple/src/search_triple.py create mode 100644 server/workers/triple/tests/__init__.py diff --git a/server/workers/backend/backend.env b/server/workers/backend/src/backend.env similarity index 100% rename from server/workers/backend/backend.env rename to server/workers/backend/src/backend.env diff --git a/server/workers/backend/headstart.py b/server/workers/backend/src/headstart.py similarity index 99% rename from server/workers/backend/headstart.py rename to server/workers/backend/src/headstart.py index 8ae66ed0e..3f01d8f6a 100644 --- a/server/workers/backend/headstart.py +++ b/server/workers/backend/src/headstart.py @@ -23,6 +23,7 @@ def run(self): input_data = redis_store.blpop('input_data') self.create_map(input_data) + if __name__ == '__main__': hsb = Backend() hsb.run() diff --git a/server/workers/backend/renv.lock b/server/workers/backend/src/renv.lock similarity index 100% rename from server/workers/backend/renv.lock rename to server/workers/backend/src/renv.lock diff --git a/server/workers/services/app.py b/server/workers/services/src/app.py similarity index 100% rename from server/workers/services/app.py rename to server/workers/services/src/app.py diff --git a/server/workers/services/blueprints/__init__.py b/server/workers/services/src/blueprints/__init__.py similarity index 100% rename from server/workers/services/blueprints/__init__.py rename to server/workers/services/src/blueprints/__init__.py diff --git a/server/workers/services/blueprints/v2/__init__.py b/server/workers/services/src/blueprints/v2/__init__.py similarity index 100% rename from server/workers/services/blueprints/v2/__init__.py rename to server/workers/services/src/blueprints/v2/__init__.py diff --git a/server/workers/services/blueprints/v2/views.py b/server/workers/services/src/blueprints/v2/views.py similarity index 100% rename from server/workers/services/blueprints/v2/views.py rename to server/workers/services/src/blueprints/v2/views.py diff --git a/server/workers/services/config/__init__.py b/server/workers/services/src/config/__init__.py similarity index 100% rename from server/workers/services/config/__init__.py rename to server/workers/services/src/config/__init__.py diff --git a/server/workers/services/config/settings_example.py b/server/workers/services/src/config/settings_example.py similarity index 100% rename from server/workers/services/config/settings_example.py rename to server/workers/services/src/config/settings_example.py diff --git a/server/workers/services/requirements.txt b/server/workers/services/src/requirements.txt similarity index 100% rename from server/workers/services/requirements.txt rename to server/workers/services/src/requirements.txt diff --git a/server/workers/triple/__init__.py b/server/workers/triple/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/server/workers/triple/search_triple.py b/server/workers/triple/search_triple.py deleted file mode 100644 index 911e49902..000000000 --- a/server/workers/triple/search_triple.py +++ /dev/null @@ -1,17 +0,0 @@ -import os -import json - -from elasticsearch import Elasticsearch - - -with open("config.json") as infile: - config = json.load(infile) - -es = Elasticsearch( - [config.get('host')], - http_auth=(config.get('user', config.get('pass'))), - scheme = "https", - port = config.get('port'), - send_get_body_as='POST', - http_compress=True -) diff --git a/server/workers/triple/config_example.json b/server/workers/triple/src/config_example.json similarity index 100% rename from server/workers/triple/config_example.json rename to server/workers/triple/src/config_example.json diff --git a/server/workers/triple/requirements.txt b/server/workers/triple/src/requirements.txt similarity index 100% rename from server/workers/triple/requirements.txt rename to server/workers/triple/src/requirements.txt diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py new file mode 100644 index 000000000..ccd2cdd0b --- /dev/null +++ b/server/workers/triple/src/search_triple.py @@ -0,0 +1,22 @@ +import os +import json +import redis +from elasticsearch import Elasticsearch + + +with open("config.json") as infile: + config = json.load(infile) + +redis_store = redis.StrictRedis(host="localhost", port=6379, db=0) + + +class TripleClient(object): + + def __init__(self, config): + self.es = Elasticsearch( + [config.get('host')], + scheme="http" if config.get('host') == 'localhost' else "https", + port=config.get('port'), + send_get_body_as='POST', + http_compress=True + ) diff --git a/server/workers/triple/tests/__init__.py b/server/workers/triple/tests/__init__.py new file mode 100644 index 000000000..e69de29bb From 33110be170c1d4ad1a0c90c7c497735ab8e55002 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Wed, 4 Mar 2020 23:06:32 +0100 Subject: [PATCH 12/99] entry wip --- server/services/search.php | 15 +++++++++------ server/services/searchTRIPLE.php | 18 ++++++++++++++++++ .../services/src/blueprints/v2/views.py | 7 +++++-- server/workers/triple/src/search_triple.py | 8 ++++++++ 4 files changed, 40 insertions(+), 8 deletions(-) create mode 100644 server/services/searchTRIPLE.php diff --git a/server/services/search.php b/server/services/search.php index f24c2c39d..5bb936bd0 100644 --- a/server/services/search.php +++ b/server/services/search.php @@ -38,8 +38,9 @@ function utf8_converter($array) return $array; } -function search($repository, $dirty_query, $post_params, $param_types, $keyword_separator, $taxonomy_separator, $transform_query_tolowercase = true - , $retrieve_cached_map = true, $params_for_id = null, $num_labels = 3, $id = "area_uri", $subjects = "subject") { +function search($repository, $dirty_query, $post_params, $param_types, $keyword_separator, $taxonomy_separator, $transform_query_tolowercase = true, + $retrieve_cached_map = true, $params_for_id = null, $num_labels = 3, $id = "area_uri", $subjects = "subject", + $backend = null) { $INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/"; $ini_array = library\Toolkit::loadIni($INI_DIR); $query = strip_tags($dirty_query); @@ -76,11 +77,13 @@ function search($repository, $dirty_query, $post_params, $param_types, $keyword_ $WORKING_DIR = $ini_array["general"]["preprocessing_dir"] . $ini_array["output"]["output_dir"]; - $calculation = new \headstart\preprocessing\calculation\RCalculation($ini_array); - $output = $calculation->performCalculationAndReturnOutputAsJSON($WORKING_DIR, $query, $params_filename, $repository); + if ($backend == null) { + $calculation = new \headstart\preprocessing\calculation\RCalculation($ini_array); + $output = $calculation->performCalculationAndReturnOutputAsJSON($WORKING_DIR, $query, $params_filename, $repository); - $output_json = end($output); - $output_json = mb_convert_encoding($output_json, "UTF-8"); + $output_json = end($output); + $output_json = mb_convert_encoding($output_json, "UTF-8"); + } if (!library\Toolkit::isJSON($output_json) || $output_json == "null" || $output_json == null) { diff --git a/server/services/searchTRIPLE.php b/server/services/searchTRIPLE.php new file mode 100644 index 000000000..d397c4863 --- /dev/null +++ b/server/services/searchTRIPLE.php @@ -0,0 +1,18 @@ + diff --git a/server/workers/services/src/blueprints/v2/views.py b/server/workers/services/src/blueprints/v2/views.py index bd74d8eea..2c7d26dcf 100644 --- a/server/workers/services/src/blueprints/v2/views.py +++ b/server/workers/services/src/blueprints/v2/views.py @@ -1,7 +1,8 @@ import os import redis +import asyncio -from flask import Blueprint, make_response +from flask import Blueprint, request, make_response, jsonify redis_store = redis.StrictRedis(host="localhost", port=6379, db=0) @@ -12,7 +13,9 @@ def search_triple(): """ """ - + if request.method == "POST": + redis_store.rpush("search_triple") + result = await redis_store.get(k) @app.route('/api/v2/example_data', methods=['GET', 'POST']) diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py index ccd2cdd0b..4f133999e 100644 --- a/server/workers/triple/src/search_triple.py +++ b/server/workers/triple/src/search_triple.py @@ -20,3 +20,11 @@ def __init__(self, config): send_get_body_as='POST', http_compress=True ) + + def build_query(self, raw_query): + query = {} + return query + + def run(self): + while True: + q = redis_store.blpop("search_triple") From 74550c47c6867ebde2f2780da7a48df163c5858e Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Sat, 7 Mar 2020 15:44:23 +0100 Subject: [PATCH 13/99] triple minimal example --- docker-compose.yml | 2 +- examples/triple/data-config_server.js | 1 + examples/triple/data-config_triple.js | 32 + examples/triple/headstart.php | 28 + examples/triple/options.css | 127 ++++ examples/triple/search.js | 151 +++++ examples/triple/search_options.js | 579 ++++++++++++++++++ examples/triple/search_triple.html | 46 ++ server/headstart_backend.docker | 2 +- server/search_triple.docker | 2 +- server/services.docker | 5 +- server/services/search.php | 17 +- server/services/searchTRIPLE.php | 2 +- server/workers/services/src/app.py | 5 + .../services/src/blueprints/v2/views.py | 15 +- server/workers/services/src/requirements.txt | 2 + 16 files changed, 1007 insertions(+), 9 deletions(-) create mode 100644 examples/triple/data-config_server.js create mode 100644 examples/triple/data-config_triple.js create mode 100644 examples/triple/headstart.php create mode 100644 examples/triple/options.css create mode 100644 examples/triple/search.js create mode 100644 examples/triple/search_options.js create mode 100644 examples/triple/search_triple.html diff --git a/docker-compose.yml b/docker-compose.yml index 18ed1edef..2281f89ee 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -36,7 +36,7 @@ services: context: server dockerfile: headstart_backend.docker env_file: - - server/workers/backend/backend.env + - server/workers/backend/src/backend.env restart: always network_mode: "host" diff --git a/examples/triple/data-config_server.js b/examples/triple/data-config_server.js new file mode 100644 index 000000000..95d53c082 --- /dev/null +++ b/examples/triple/data-config_server.js @@ -0,0 +1 @@ +data_config.server_url = window.location.href.replace(/[^/]*$/, '') + "headstart/server/"; diff --git a/examples/triple/data-config_triple.js b/examples/triple/data-config_triple.js new file mode 100644 index 000000000..73ab641c7 --- /dev/null +++ b/examples/triple/data-config_triple.js @@ -0,0 +1,32 @@ +var data_config = { + tag: "visualization", + mode: "search_repos", + + service: "triple", + + title: "", + base_unit: "citations", + use_area_uri: true, + show_multiples: false, + show_dropdown: false, + preview_type: "pdf", + sort_options: ["relevance", "title", "authors", "year"], + is_force_areas: true, + language: "eng_pubmed", + area_force_alpha: 0.015, + show_list: true, + content_based: true, + url_prefix: "", + + show_context: true, + create_title_from_context: true, + context_most_relevant_tooltip: true, + + doi_outlink: true, + filter_menu_dropdown: true, + sort_menu_dropdown: true, + filter_options: ["all", "open_access"], + + embed_modal: true, + share_modal: false, +}; diff --git a/examples/triple/headstart.php b/examples/triple/headstart.php new file mode 100644 index 000000000..eef156324 --- /dev/null +++ b/examples/triple/headstart.php @@ -0,0 +1,28 @@ + + + + + + + +
+ + + + + + + +
Built with Headstart and rplos. All content retrieved from TRIPLE. +
+ + diff --git a/examples/triple/options.css b/examples/triple/options.css new file mode 100644 index 000000000..39bc1d9cf --- /dev/null +++ b/examples/triple/options.css @@ -0,0 +1,127 @@ +* { + box-sizing: content-box; +} + +.divity { + display: inline-block; +} + +.frontend-hidden { + display: none; +} + +.visible { + display: block; +} + +label { + display: inline; + font-weight: normal; +} + +#filter-container { + margin-top: 15px; +} + +#filter-btn { + margin-bottom: 23px; + margin-right: 20px; + margin-top: 5px; +} + +#filters { + margin-bottom: 10px; +} + +#stats { + margin-bottom: 30px; + margin-top: 30px; +} + +.dropdown { + padding: 5px; +} + +.frontend-btn { + -webkit-border-radius: 5; + -moz-border-radius: 5; + border-radius: 5px; + color: #636363; + font-size: 12px; + background: #ffffff; + padding: 8px 10px 8px 10px; + text-decoration: none; + border: solid #636363 1px; +} + +.frontend-btn:hover { + background: #f5f2f5; + /*background-image: -webkit-linear-gradient(top, #f5f2f5, #c9c9c9); + background-image: -moz-linear-gradient(top, #f5f2f5, #c9c9c9); + background-image: -ms-linear-gradient(top, #f5f2f5, #c9c9c9); + background-image: -o-linear-gradient(top, #f5f2f5, #c9c9c9); + background-image: linear-gradient(to bottom, #f5f2f5, #c9c9c9);*/ + text-decoration: none; +} + +.div { + display: block; +} + +.hidden { + display: none; +} + +.visible { + display: block; +} + +#input-container { + margin-bottom: 30px; +} + +.submit-btn { + padding: 3px 12px; +} + +.caret { + border-left: 1px solid #AAA; + border-radius: 0px 4px 4px 0px; + background: #CCC linear-gradient(to top, #CCC 0%, #EEE 60%) repeat scroll 0% 0%; +} + +.multiselect { + background-color: #fff; + /*background-image: -webkit-gradient(linear, left bottom, left top, color-stop(0, #eee), color-stop(0.5, #fff)); + background-image: -webkit-linear-gradient(center bottom, #eee 0%, #fff 50%); + background-image: -moz-linear-gradient(center bottom, #eee 0%, #fff 50%); + filter: progid:DXImageTransform.Microsoft.gradient(startColorstr = '#ffffff', endColorstr = '#eeeeee', GradientType = 0); + background-image: linear-gradient(to top, #eee 0%, #fff 50%);*/ + + height: 18px +} + +.btn-group { + margin-right: 30px; +} + +.submit-btn { + display: inline-block; + padding-top: 10px; + margin-left: 20px; +} + +#download { + margin-top: 510px; + margin-left: 30px; +} + +.input_items { + display: inline-block; + vertical-align: middle; +} + +.btn { + margin-left: 10px; + width: 100px; +} \ No newline at end of file diff --git a/examples/triple/search.js b/examples/triple/search.js new file mode 100644 index 000000000..e8e6de916 --- /dev/null +++ b/examples/triple/search.js @@ -0,0 +1,151 @@ +var service_url; +var service_name; +var options; + +switch (data_config.service) { + case 'plos': + service_url = data_config.server_url + "services/searchPLOS.php" + service_name = "PLOS"; + options = options_plos; + break; + case 'pubmed': + service_url = data_config.server_url + "services/searchPubmed.php" + service_name = "PubMed"; + options = options_pubmed; + break; + case 'doaj': + service_url = data_config.server_url + "services/searchDOAJ.php" + service_name = "DOAJ"; + options = options_doaj; + break; + + case 'base': + service_url = data_config.server_url + "services/searchBASE.php" + service_name = "BASE"; + options = options_base; + break; + + case 'openaire': + service_url = data_config.server_url + "services/searchOpenAire.php" + service_name = "OpenAire"; + options = options_base; + break; + + case 'triple': + service_url = data_config.server_url + "services/searchTRIPLE.php" + service_name = "Triple"; + options = options_triple; + break; +} + +$(window).bind("pageshow", function () { + $(".btn").attr("disabled", false); +}); + +$("#searchform").validate({ + submitHandler: function (form) { + $(".btn").attr("disabled", true); + $("#progress").html(""); + + d3.select("#progress").append("p") + .text("Please be patient, this can take a while...") + .append("div") + .attr("id", "progressbar") + + $("#progressbar").progressbar(); + var tick_interval = 2; + var tick_increment = 1; + var tick_function = function () { + var value = $("#progressbar").progressbar("option", "value"); + value += tick_increment; + $("#progressbar").progressbar("option", "value", value); + if (value < 100) { + window.setTimeout(tick_function, tick_interval * 1000); + } else { + //alert("Done"); + } + }; + window.setTimeout(tick_function, tick_interval * 1000); + + var data = $("#searchform").serialize(); + + doSubmit(data) + } +}); + +var doSubmit = function (data, newWindow, callback) { + data += "&today=" + new Date().toLocaleDateString("en-US"); + + var openInNewWindow= function(data) { + if (data.status === "success") { + var file = data.id; + window.open("headstart.php?query=" + + data.query + + "&file=" + + file + + "&service=" + + data_config.service + + "&service_name=" + + service_name, '_blank') + console.log('opening') + callback(true) + return false; + } else { + callback(false) + } + } + + var openInThisWindow = function(data) { + if (data.status === "success") { + var file = data.id; + window.location = + "headstart.php?query=" + + data.query + + "&file=" + + file + + "&service=" + + data_config.service + + "&service_name=" + + service_name; + return false; + } else { + $("#progress").html( + "Sorry! Something went wrong. Most likely, we did not get enough results for your search. Please try again with a different query." + ); + $(".btn").prop("disabled", false); + } + } + + $.ajax({ + // make an AJAX request + type: "POST", + url: service_url, + data: data, + success: newWindow ? openInNewWindow : openInThisWindow + }); +}; + +$(document).ready(function () { + var search_options = SearchOptions; + + search_options.init("#filter-container", options); + + options.dropdowns.forEach(function (entry) { + search_options.select_multi('.dropdown_multi_' + entry.id, entry.name) + }) + + var valueExists = function (key, value) { + var find = options.dropdowns.filter( + function (data) { + return data[key] == value + } + ); + + return (find.length > 0) ? (true) : (false); + } + if (valueExists("id", "time_range")) { + search_options.addDatePickerFromTo("#from", "#to", "any-time"); + } else if (valueExists("id", "year_range")) { + search_options.setDateRangeFromPreset("#from", "#to", "any-time-years", "1809"); + } +}); diff --git a/examples/triple/search_options.js b/examples/triple/search_options.js new file mode 100644 index 000000000..d14397598 --- /dev/null +++ b/examples/triple/search_options.js @@ -0,0 +1,579 @@ +var options_triple = { + dropdowns: [ + {id: "year_range", multiple: false, name: "Time Range", type: "dropdown" + , fields: [ + {id: "any-time-years", text: "Any time"} + , {id: "this-year", text: "This year"} + , {id: "last-year-years", text: "Last year"} + , {id: "user-defined", text: "Custom range", class: "user-defined", + inputs: [ + {id: "from", label: "From: ", class: "time_input"} + , {id: "to", label: "To: ", class: "time_input"} + ]} + ]}, + {id: "sorting", multiple: false, name: "Sorting", type: "dropdown" + , fields: [ + {id: "most-relevant", text: "Most relevant"} + , {id: "most-recent", text: "Most recent"} + ]} + ]} + +var options_plos = { + dropdowns: [ + {id: "time_range", multiple: false, name: "Time Range", type: "dropdown" + , fields: [ + {id: "any-time", text: "Any time"} + , {id: "last-month", text: "Last month"} + , {id: "last-year", text: "Last year"} + , {id: "user-defined", text: "Custom range", class: "user-defined", + inputs: [ + {id: "from", label: "From: ", class: "time_input"} + , {id: "to", label: "To: ", class: "time_input"} + ]} + ]}, + {id: "sorting", multiple: false, name: "Sorting", type: "dropdown" + , fields: [ + {id: "most-relevant", text: "Most relevant"} + , {id: "most-recent", text: "Most recent"} + ]}, + {id: "article_types", multiple: true, name: "Article types", type: "dropdown" + , fields: [ + {id: "Research Article", text: "Research Article", selected: true} + , {id: "Review", text: "Review", selected: true} + , {id: "Best Practice", text: "Best Practice", selected: true} + , {id: "Book Review", text: "Book Review", selected: true} + , {id: "Book Review/Science in the Media", text: "Book Review/Science in the Media", selected: true} + , {id: "Case Report", text: "Case Report", selected: true} + , {id: "Collection Review", text: "Collection Review", selected: true} + , {id: "Community Page", text: "Community Page", selected: true} + , {id: "Correction", text: "Correction", selected: false} + , {id: "Correspondence", text: "Correspondence", selected: true} + , {id: "Correspondence and Other Communications", text: "Correspondence and Other Communications", selected: true} + , {id: "Deep Reads", text: "Deep Reads", selected: true} + , {id: "Editorial", text: "Editorial", selected: true} + , {id: "Education", text: "Education", selected: true} + , {id: "Essay", text: "Essay", selected: true} + , {id: "Expert Commentary", text: "Expert Commentary", selected: true} + , {id: "Expression of Concern", text: "Expression of Concern", selected: true} + , {id: "Feature", text: "Feature", selected: true} + , {id: "Formal Comment", text: "Formal Comment", selected: true} + , {id: "From Innovation to Application", text: "From Innovation to Application", selected: true} + , {id: "Guidelines and Guidance", text: "Guidelines and Guidance", selected: true} + , {id: "Health in Action", text: "Health in Action", selected: true} + , {id: "Historical and Philosophical Perspectives", text: "Historical and Philosophical Perspectives", selected: true} + , {id: "Historical Profiles and Perspectives", text: "Historical Profiles and Perspectives", selected: true} + , {id: "Interview", text: "Interview", selected: true} + , {id: "Journal Club", text: "Journal Club", selected: true} + , {id: "Learning Forum ", text: "Learning Forum ", selected: true} + , {id: "Message from ISCB", text: "Message from ISCB", selected: true} + , {id: "Neglected Diseases", text: "Neglected Diseases", selected: true} + , {id: "Obituary", text: "Obituary", selected: true} + , {id: "Opinion", text: "Opinion", selected: true} + , {id: "Overview", text: "Overview", selected: true} + , {id: "Pearls", text: "Pearls", selected: true} + , {id: "Perspective", text: "Perspective", selected: true} + , {id: "Photo Quiz", text: "Photo Quiz", selected: false} + , {id: "Policy Forum", text: "Policy Forum", selected: true} + , {id: "Policy Platform", text: "Policy Platform", selected: true} + , {id: "Primer", text: "Primer", selected: true} + , {id: "Quiz", text: "Quiz", selected: false} + , {id: "Research in Translation", text: "Research in Translation", selected: true} + , {id: "Research Matters", text: "Research Matters", selected: true} + , {id: "Retraction", text: "Retraction", selected: false} + , {id: "Special Report", text: "Special Report", selected: true} + , {id: "Student Forum", text: "Student Forum", selected: true} + , {id: "Symposium", text: "Symposium", selected: true} + , {id: "Synopsis", text: "Synopsis", selected: false} + , {id: "The PLoS Medicine Debate", text: "The PLoS Medicine Debate", selected: true} + , {id: "Topic Page", text: "Topic Page", selected: true} + , {id: "Unsolved Mystery", text: "Unsolved Mystery", selected: true} + , {id: "Viewpoints ", text: "Viewpoints ", selected: true} + + ]}, + , {id: "journals", multiple: true, name: "Journals", type: "dropdown" + , fields: [ + {id: "PLoSONE", text: "PLOS ONE", selected: true} + , {id: "PLoSGenetics", text: "PLOS Genetics", selected: true} + , {id: "PLoSPathogens", text: "PLOS Pathogens", selected: true} + , {id: "PLoSCompBiol", text: "PLOS Computational Biology", selected: true} + , {id: "PLoSNTD", text: "PLOS Neglected Tropical Diseases", selected: true} + , {id: "PLoSBiology", text: "PLOS Biology", selected: true} + , {id: "PLoSMedicine", text: "PLOS Medicine", selected: true} + , {id: "PLoSClinicalTrials", text: "PLOS Hub for Clinical Trials", selected: true} + ]} + ]} + +var options_pubmed = { + dropdowns: [ + {id: "time_range", multiple: false, name: "Time Range", type: "dropdown" + , fields: [ + {id: "any-time", text: "Any time"} + , {id: "last-month", text: "Last month"} + , {id: "last-year", text: "Last year"} + , {id: "user-defined", text: "Custom range", class: "user-defined", + inputs: [ + {id: "from", label: "From: ", class: "time_input"} + , {id: "to", label: "To: ", class: "time_input"} + ]} + ]}, + {id: "sorting", multiple: false, name: "Sorting", type: "dropdown" + , fields: [ + {id: "most-relevant", text: "Most relevant"} + , {id: "most-recent", text: "Most recent"} + ]}, + {id: "article_types", multiple: true, name: "Article types", type: "dropdown" + , fields: [ + {id: "autobiography", text: "Autobiography", selected: true} + , {id: "bibliography", text: "Bibliography", selected: true} + , {id: "biography", text: "Biography", selected: true} + , {id: "book illustrations", text: "Book Illustrations", selected: true} + , {id: "case reports", text: "Case Reports", selected: true} + , {id: "classical article", text: "Classical Article", selected: true} + , {id: "clinical conference", text: "Clinical Conference", selected: true} + , {id: "clinical study", text: "Clinical Study", selected: true} + , {id: "clinical trial", text: "Clinical Trial", selected: true} + , {id: "clinical trial, phase i", text: "Clinical Trial, Phase I", selected: true} + , {id: "clinical trial, phase ii", text: "Clinical Trial, Phase II", selected: true} + , {id: "clinical trial, phase iii", text: "Clinical Trial, Phase III", selected: true} + , {id: "clinical trial, phase iv", text: "Clinical Trial, Phase IV", selected: true} + , {id: "collected works", text: "Collected Works", selected: true} + , {id: "comment", text: "Comment", selected: true} + , {id: "comparative study", text: "Comparative Study", selected: true} + , {id: "congresses", text: "Congresses", selected: true} + , {id: "consensus development conference", text: "Consensus Development Conference", selected: true} + , {id: "consensus development conference, nih", text: "Consensus Development Conference, NIH", selected: true} + , {id: "controlled clinical trial", text: "Controlled Clinical Trial", selected: true} + , {id: "corrected and republished article", text: "Corrected and Republished Article", selected: true} + , {id: "dataset", text: "Dataset", selected: true} + , {id: "dictionary", text: "Dictionary", selected: true} + , {id: "directory", text: "Directory", selected: true} + , {id: "duplicate publication", text: "Duplicate publication", selected: true} + , {id: "editorial", text: "Editorial", selected: true} + , {id: "electronic supplementary materials", text: "Electronic Supplementary Materials", selected: true} + , {id: "english abstract", text: "English Abstract", selected: true} + , {id: "ephemera", text: "Ephemera", selected: true} + , {id: "evaluation studies", text: "Evaluation Studies", selected: true} + , {id: "festschrift", text: "Festschrift", selected: true} + , {id: "government publications", text: "Government Publications", selected: true} + , {id: "guideline", text: "Guideline", selected: true} + , {id: "historical article", text: "Historical Article", selected: true} + , {id: "interactive tutorial", text: "Interactive Tutorial", selected: true} + , {id: "interview", text: "Interview", selected: true} + , {id: "introductory journal article", text: "Introductory Journal Article", selected: true} + , {id: "journal article", text: "Journal Article", selected: true} + , {id: "lectures", text: "Lectures", selected: true} + , {id: "legal cases", text: "Legal Cases", selected: true} + , {id: "legislation", text: "Legislation", selected: true} + , {id: "letter", text: "Letter", selected: true} + , {id: "meta analysis", text: "Meta Analysis", selected: true} + , {id: "multicenter study", text: "Multicenter Study", selected: true} + , {id: "news", text: "News", selected: true} + , {id: "newspaper article", text: "Newspaper Article", selected: true} + , {id: "observational study", text: "Observational Study", selected: true} + , {id: "overall", text: "Overall", selected: true} + , {id: "patient education handout", text: "Patient Education Handout", selected: true} + , {id: "periodical index", text: "Periodical Index", selected: true} + , {id: "personal narratives", text: "Personal Narratives", selected: true} + , {id: "pictorial works", text: "Pictorial Works", selected: true} + , {id: "popular works", text: "Popular Works", selected: true} + , {id: "portraits", text: "Portraits", selected: true} + , {id: "practice guideline", text: "Practice Guideline", selected: true} + , {id: "pragmatic clinical trial", text: "Pragmatic Clinical Trial", selected: true} + , {id: "publication components", text: "Publication Components", selected: true} + , {id: "publication formats", text: "Publication Formats", selected: true} + , {id: "publication type category", text: "Publication Type Category", selected: true} + , {id: "published erratum", text: "Published Erratum", selected: true} + , {id: "randomized controlled trial", text: "Randomized Controlled Trial", selected: true} + , {id: "research support, american recovery and reinvestment act", text: "Research Support, American Recovery and Reinvestment Act", selected: true} + , {id: "research support, n i h, extramural", text: "Research Support, NIH Extramural", selected: true} + , {id: "research support, n i h, intramural", text: "Research Support, NIH Intramural", selected: true} + , {id: "research support, non u s gov't", text: "Research Support, U.S. Gov't", selected: true} + , {id: "research support, u s gov't, non p h s", text: "Research Support, U.S. Gov't, Non P.H.S", selected: true} + , {id: "research support, u s gov't, p h s", text: "Research Support, U.S. Gov't, P.H.S", selected: true} + , {id: "research support, u s government", text: "Research Support, U.S. Government", selected: true} + , {id: "retracted publication", text: "Retracted Publication", selected: true} + , {id: "retraction of publication", text: "Retraction of Publication", selected: true} + , {id: "review", text: "Review", selected: true} + , {id: "scientific integrity review", text: "Scientific Integrity Review", selected: true} + , {id: "study characteristics", text: "Study Characteristics", selected: true} + , {id: "support of research", text: "Support of Research", selected: true} + , {id: "technical report", text: "Technical Report", selected: true} + , {id: "twin study", text: "Twin Study", selected: true} + , {id: "validation studies", text: "Validation Studies", selected: true} + , {id: "video audio media", text: "Video Audio Media", selected: true} + , {id: "webcasts", text: "Webcasts", selected: true}]} + ]} + +var options_doaj = { + dropdowns: [ + {id: "year_range", multiple: false, name: "Time Range", type: "dropdown" + , fields: [ + {id: "any-time-years", text: "Any time"} + , {id: "this-year", text: "This year"} + , {id: "last-year-years", text: "Last year"} + , {id: "user-defined", text: "Custom range", class: "user-defined", + inputs: [ + {id: "from", label: "From: ", class: "time_input"} + , {id: "to", label: "To: ", class: "time_input"} + ]} + ]}, + {id: "sorting", multiple: false, name: "Sorting", type: "dropdown" + , fields: [ + {id: "most-relevant", text: "Most relevant"} + , {id: "most-recent", text: "Most recent"} + ]} + ]} + +var options_base = { + dropdowns: [ + {id: "time_range", multiple: false, name: "Time Range", type: "dropdown" + , fields: [ + {id: "any-time", text: "Any time"} + , {id: "last-month", text: "Last month"} + , {id: "last-year", text: "Last year"} + , {id: "user-defined", text: "Custom range", class: "user-defined", + inputs: [ + {id: "from", label: "From: ", class: "time_input"} + , {id: "to", label: "To: ", class: "time_input"} + ]} + ]}, + {id: "sorting", multiple: false, name: "Sorting", type: "dropdown" + , fields: [ + {id: "most-relevant", text: "Most relevant"} + , {id: "most-recent", text: "Most recent"} + ]}, + {id: "document_types", multiple: true, name: "Document types", type: "dropdown" + , fields: [ + {id: "4", text: "Audio", selected: false} + , {id: "11", text: "Book", selected: false} + , {id: "111", text: "Book part", selected: false} + , {id: "13", text: "Conference object", selected: false} + , {id: "16", text: "Course material", selected: false} + , {id: "7", text: "Dataset", selected: false} + , {id: "5", text: "Image/video", selected: false} + , {id: "121", text: "Journal/newspaper article", selected: true} + , {id: "122", text: "Journal/newspaper other content", selected: false} + , {id: "17", text: "Lecture", selected: false} + , {id: "19", text: "Manuscript", selected: false} + , {id: "3", text: "Map", selected: false} + , {id: "2", text: "Musical notation", selected: false} + , {id: "F", text: "Other/Unknown material", selected: false} + , {id: "1A", text: "Patent", selected: false} + , {id: "14", text: "Report", selected: false} + , {id: "15", text: "Review", selected: false} + , {id: "6", text: "Software", selected: false} + , {id: "51", text: "Still image", selected: false} + , {id: "1", text: "Text", selected: false} + , {id: "181", text: "Thesis: bachelor", selected: false} + , {id: "183", text: "Thesis: doctoral and postdoctoral", selected: false} + , {id: "182", text: "Thesis: master", selected: false} + , {id: "52", text: "Video/moving image", selected: false} + ]}, + {id: "lang_id", multiple: false, name: "Language", type: "dropdown" + , fields: [ + {id: "all", text: "All languages"} + , {id: "eng", text: "English"} + , {"id": "fre", "text": "French (français)"}, + {"id": "spa", "text": "Spanish (español)"}, + {"id": "ger", "text": "German (Deutsch)"}, + {"id": "por", "text": "Portuguese (português)"}, + {"id": "pol", "text": "Polish (Jezyk polski)"}, + {"id": "jpn", "text": "Japanese (???)"}, + {"id": "ita", "text": "Italian (italiano)"}, + {"id": "chi", "text": "Chinese (??)"}, + {"id": "rus", "text": "Russian (??????? ????)"}, + {"id": "ind", "text": "Indonesian (bahasa Indonesia)"}, + {"id": "ukr", "text": "Ukrainian (?????????? ????)"}, + {"id": "gre", "text": "Modern Greek (??a ????????)"}, + {"id": "cze", "text": "Czech (ceština)"}, + {"id": "fin", "text": "Finnish (suomen kieli)"}, + {"id": "swe", "text": "Swedish (svenska)"}, + {"id": "hun", "text": "Hungarian (magyar nyelv)"}, + {"id": "tur", "text": "Turkish (Türkçe)"}, + {"id": "hrv", "text": "Croatian (hrvatski)"}, + {"id": "geo", "text": "Georgian (???????)"}, + {"id": "grc", "text": "Ancient Greek (????????)"}, + {"id": "kor", "text": "Korean (???)"}, + {"id": "slv", "text": "Slovenian (slovenšcina)"}, + {"id": "sux", "text": "Sumerian (????)"}, + {"id": "nob", "text": "Norwegian Bokmal (bokmål)"}, + {"id": "rum", "text": "Romanian (limba româna)"}, + {"id": "ara", "text": "Arabic (????????????)"}, + {"id": "tha", "text": "Thai (???????)"}, + {"id": "nor", "text": "Norwegian (norsk)"}, + {"id": "lat", "text": "Latin (Lingua latina)"}, + {"id": "dut", "text": "Dutch (Nederlands)"}, + {"id": "ice", "text": "Icelandic (íslenska)"}, + {"id": "lit", "text": "Lithuanian (lietuviu kalba)"}, + {"id": "srp", "text": "Serbian (??????)"}, + {"id": "baq", "text": "Basque (euskara)"}, + {"id": "gle", "text": "Irish (Gaeilge)"}, + {"id": "afr", "text": "Afrikaans (Afrikaans)"}, + {"id": "heb", "text": "Hebrew (?????)"}, + {"id": "dan", "text": "Danish (dansk)"}, + {"id": "akk", "text": "Akkadian (????????)"}, + {"id": "slo", "text": "Slovak (slovencina)"}, + {"id": "nau", "text": "Nauru (dorerin Naoero)"}, + {"id": "est", "text": "Estonian (eesti keel)"}, + {"id": "vie", "text": "Vietnamese (Ti?ng Vi?t)"}, + {"id": "bel", "text": "Belarusian (?????????? ????)"}, + {"id": "glg", "text": "Galician (galego)"}, + {"id": "ota", "text": "Ottoman Turkish (???? ??????)"}, + {"id": "per", "text": "Persian (?????)"} + ]}, + ] +} + +var SearchOptions = { + user_defined_date: false, + init: function (tag, data) { + + var self = this; + + self.drawOptions(tag, data); + + }, + drawOptions: function (tag, data) { + var self = this; + + var div = d3.select(tag).append('div') + .attr("id", "filter-btn") + .attr("class", "divity") + + div.append('a') + .attr("href", "#") + .attr("class", "frontend-btn") + .text("Options") + + var filters = d3.select(tag).append('div') + .attr('id', 'filters') + .attr('class', 'divity frontend-hidden') + + d3.select(tag).append('div') + .attr('id', 'input-container') + .attr('class', 'divity frontend-hidden') + + data.dropdowns.forEach(function (entry) { + + if (entry.type == "input") { + var new_input = filters.insert("div", "#input-container") + .attr("class", entry.class) + + new_input.append("label") + .attr("for", entry.id) + .text(entry.label) + .style("margin-left", "8px") + + new_input.append("input") + .attr("id", entry.id) + .attr("name", entry.id) + .attr("type", "text") + .attr("size", "5") + .attr("value", entry.value) + + } else if (entry.type = "dropdown") { + + var new_select = filters + .insert('select', "#input-container") + .attr("id", entry.id) + .style("width", "350px") + .style("overflow", "auto") + .attr("class", "dropdown_multi_" + entry.id) + .style("vertical-align", "top") + .attr("name", entry.id) + + if (entry.multiple) { + new_select.attr("name", entry.id + "[]") + new_select.attr("multiple", "multiple") + } + + entry.fields.forEach(function (option) { + var current_option = new_select + .append('option') + .attr("value", option.id) + .text(option.text); + + if (option.selected) { + current_option.attr("selected", ""); + } + + if (option.inputs != null) { + option.inputs.forEach(function (input) { + d3.select("#input-container") + .append("label") + .attr("for", input.id) + .text(input.label) + .style("margin-left", "8px") + + d3.select("#input-container") + .append("input") + .attr("id", input.id) + .attr("name", input.id) + .attr("class", input.class) + .attr("type", "text") + .attr("size", "18") + }) + } + }) + } + }) + /*filters.append("div") + .attr("class", "submit-btn") + .append("a") + .attr("id", "submit-btn") + .attr("href", "#") + .attr("class", "frontend-btn") + .style("vertical-align", "middle") + .text("Submit"); + + /*d3.select(tag).append("div") + .attr("id", "stats") + .attr("class", "divity") + .html("

Loading...

")*/ + + $("#filter-btn").click(function () { + $("#filters").toggleClass("frontend-hidden"); + //$("#stats").toggleClass("frontend-hidden"); + + var closed = $("#filters").css("display") == "none"; + + if (closed) { + $("#input-container").css("display", "none"); + } else if (self.user_defined_date) { + $("#input-container").css("display", "block"); + } + + }); + + }, + select_multi: function (dropdown_class, entity) { + + var self = this; + + $(function () { + $(dropdown_class).multiselect({ + allSelectedText: "All " + entity + , nonSelectedText: "No " + entity + , nSelectedText: entity + , buttonWidth: '150px' + , numberDisplayed: 2 + , maxHeight: 250 + , includeSelectAllOption: true + , onChange: function (element, checked) { + if (checked === true) { + + if (element.val() !== "user-defined") { + self.user_defined_date = false; + d3.select("#input-container").style("display", "none"); + } else { + self.user_defined_date = true; + d3.select("#input-container").style("display", "block"); + } + + self.setDateRangeFromPreset("#from", "#to", element.val()); + } + } + }); + + }) + }, + setDateRangeFromPreset: function (from, to, val, start_date) { + var current_date = new Date(); + var current_year = current_date.getFullYear(); + + var start = new Date(); + var end = new Date(); + end.setHours(start.getHours() + (start.getTimezoneOffset() / 60) * -1); + + switch (val) { + + case "user-defined": + self.user_defined_date = true; + d3.select("#input-container").style("display", "block"); + break; + + //full date + case "any-time": + if(typeof start_date === "undefined") { + start.setTime(0); + } else { + start.setTime(Date.parse(start_date)) + } + this.setDateFields(from, to, start, end); + break; + + case "last-month": + start.setMonth(end.getMonth() - 1); + this.setDateFields(from, to, start, end); + break; + + case "last-year": + start.setFullYear(end.getFullYear() - 1); + this.setDateFields(from, to, start, end); + break; + + //years only + case "any-time-years": + if(typeof start_date === "undefined") { + $(from).val("1809"); + } else { + $(from).val(start_date); + } + $(to).val(current_year); + break; + + case "this-year": + $(from).val(current_year); + $(to).val(current_year); + break; + + case "last-year-years": + $(from).val(current_year - 1); + $(to).val(current_year - 1); + break; + + default: + break; + } + }, + setDateFields: function (from, to, start, end) { + Date.prototype.yyyymmdd = function () { + var yyyy = this.getFullYear().toString(); + var mm = (this.getMonth() + 1).toString(); // getMonth() is zero-based + var dd = this.getDate().toString(); + return yyyy + "-" + (mm[1] ? mm : "0" + mm[0]) + "-" + (dd[1] ? dd : "0" + dd[0]); // padding + }; + + $(from).datepicker("setDate", start); + $(to).datepicker("setDate", end); + }, + initDateFields: function (from, to) { + setDateFields(from, to); + }, + addDatePickerFromTo: function (from, to, init_time_range, start_date) { + + var self = this; + + $(function () { + $(from).datepicker({ + changeMonth: true, + numberOfMonths: 3, + dateFormat: 'yy-mm-dd', + onClose: function (selectedDate) { + $(to).datepicker("option", "minDate", selectedDate); + } + }); + $(to).datepicker({ + changeMonth: true, + numberOfMonths: 3, + dateFormat: 'yy-mm-dd', + onClose: function (selectedDate) { + $(from).datepicker("option", "maxDate", selectedDate); + } + }); + + self.setDateRangeFromPreset("#from", "#to", init_time_range, start_date); + + }); + } +}; diff --git a/examples/triple/search_triple.html b/examples/triple/search_triple.html new file mode 100644 index 000000000..1656b86a9 --- /dev/null +++ b/examples/triple/search_triple.html @@ -0,0 +1,46 @@ + + + + + + Search BASE and turn it into a visualization + + + + + + + + + + + + + + + +
+

Search TRIPLE and turn it into a visualization

+
+ + + +
+
+
+
+
+
+
Built with Headstart and rbace. All content retrieved from TRIPLE. +
+ + + + + + + diff --git a/server/headstart_backend.docker b/server/headstart_backend.docker index b8aaca174..dd18527b7 100644 --- a/server/headstart_backend.docker +++ b/server/headstart_backend.docker @@ -9,7 +9,7 @@ RUN R -e 'install.packages("remotes", repos = c(CRAN = "https://cran.rstudio.com RUN R -e 'install.packages("renv", repos = c(CRAN = "https://cran.rstudio.com"))' WORKDIR /headstart -COPY workers/backend/renv.lock ./ +COPY workers/backend/src/renv.lock ./ RUN R -e 'options(repos="https://cran.wu.ac.at"); renv::restore()' COPY preprocessing/other-scripts ./backend diff --git a/server/search_triple.docker b/server/search_triple.docker index d8deb4d78..6ce4956d4 100644 --- a/server/search_triple.docker +++ b/server/search_triple.docker @@ -3,5 +3,5 @@ FROM python:3.6.10-alpine3.10 MAINTAINER Chris Kittel "christopher.kittel@openknowledgemaps.org" WORKDIR /headstart -COPY workers/triple/ ./ +COPY workers/triple/src/ ./ RUN pip install --no-cache-dir -r requirements.txt diff --git a/server/services.docker b/server/services.docker index 910702986..ec9bec175 100644 --- a/server/services.docker +++ b/server/services.docker @@ -2,8 +2,11 @@ FROM python:3.6.10-alpine3.10 MAINTAINER Chris Kittel "christopher.kittel@openknowledgemaps.org" +RUN apk update +RUN apk add build-base gcc + WORKDIR /headstart -COPY workers/services/ ./ +COPY workers/services/src/ ./ RUN pip install --no-cache-dir -r requirements.txt CMD gunicorn -b 0.0.0.0:5001 'app:new_services_app()' diff --git a/server/services/search.php b/server/services/search.php index 5bb936bd0..b530ade33 100644 --- a/server/services/search.php +++ b/server/services/search.php @@ -77,7 +77,19 @@ function search($repository, $dirty_query, $post_params, $param_types, $keyword_ $WORKING_DIR = $ini_array["general"]["preprocessing_dir"] . $ini_array["output"]["output_dir"]; - if ($backend == null) { + if ($repository == "triple") { + $url = "http://localhost/api/v2/search_" . $repository; + $payload = json_encode($post_params); + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-Type:application/json')); + curl_setopt($ch, CURLOPT_POST, true); + curl_setopt($ch, CURLOPT_POSTFIELDS, $payload); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + $output_json = curl_exec($ch); + var_dump($output_json); + $output_json = NULL; + } else { $calculation = new \headstart\preprocessing\calculation\RCalculation($ini_array); $output = $calculation->performCalculationAndReturnOutputAsJSON($WORKING_DIR, $query, $params_filename, $repository); @@ -115,7 +127,8 @@ function search($repository, $dirty_query, $post_params, $param_types, $keyword_ , "doaj" => "DOAJ" , "base" => "BASE" , "openaire" => "OpenAire" - , "linkedcat" => "LinkedCat"); + , "linkedcat" => "LinkedCat" + , "triple" => "TRIPLE"); if(!isset($ini_array["snapshot"]["snapshot_enabled"]) || $ini_array["snapshot"]["snapshot_enabled"] > 0) { $snapshot = new \headstart\preprocessing\Snapshot($ini_array, $query, $unique_id, $repository, $repo_mapping[$repository]); diff --git a/server/services/searchTRIPLE.php b/server/services/searchTRIPLE.php index d397c4863..5de28f23e 100644 --- a/server/services/searchTRIPLE.php +++ b/server/services/searchTRIPLE.php @@ -11,7 +11,7 @@ $post_params = $_POST; -$result = search("triple", $dirty_query, $post_params, array("from", "to", "document_types", "sorting"), ";", null); +$result = search("triple", $dirty_query, $post_params, array("from", "to", "sorting"), ";", null); echo $result diff --git a/server/workers/services/src/app.py b/server/workers/services/src/app.py index 87bf28847..38812c4d2 100644 --- a/server/workers/services/src/app.py +++ b/server/workers/services/src/app.py @@ -11,3 +11,8 @@ def new_services_app(settings_override=None): app.register_blueprint(v2) return app + + +if __name__ == '__main__': + app = new_services_app() + app.run(port=5001, debug=True) diff --git a/server/workers/services/src/blueprints/v2/views.py b/server/workers/services/src/blueprints/v2/views.py index 2c7d26dcf..82e1d6562 100644 --- a/server/workers/services/src/blueprints/v2/views.py +++ b/server/workers/services/src/blueprints/v2/views.py @@ -1,6 +1,9 @@ import os +import json +import uuid import redis import asyncio +import aioredis from flask import Blueprint, request, make_response, jsonify @@ -9,13 +12,21 @@ app = Blueprint('v2', __name__) +async def get_key(store, key): + result = await redis_store.get(key) + return result + + @app.route('/api/v2/search_triple', methods=['GET', 'POST']) def search_triple(): """ """ if request.method == "POST": - redis_store.rpush("search_triple") - result = await redis_store.get(k) + data = request.get_json() + k = str(uuid.uuid4()) + d = {"id": k, "data": data} + redis_store.rpush("search_triple", json.dumps(d)) + result = get_key(redis_store, k) @app.route('/api/v2/example_data', methods=['GET', 'POST']) diff --git a/server/workers/services/src/requirements.txt b/server/workers/services/src/requirements.txt index c6ca1f63e..cb1a39062 100644 --- a/server/workers/services/src/requirements.txt +++ b/server/workers/services/src/requirements.txt @@ -1,3 +1,5 @@ flask gunicorn redis +hiredis +aioredis From df150af5e71e274459aa14c6bfcf7ad096083d5e Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Sun, 8 Mar 2020 13:24:46 +0100 Subject: [PATCH 14/99] mappings endpoint --- docker-compose.yml | 38 +++++++------- server/search_triple.docker | 2 + server/services/search.php | 2 +- .../services/src/blueprints/v2/views.py | 49 +++++++++++++++---- server/workers/triple/src/requirements.txt | 1 + server/workers/triple/src/search_triple.py | 32 +++++++++++- 6 files changed, 92 insertions(+), 32 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 2281f89ee..b9d144fa9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,16 +2,16 @@ version: '3.7' services: - api: - build: - context: server - dockerfile: services.docker - restart: always - ports: - - '5001:5001' - depends_on: - - redis - network_mode: "host" + # api: + # build: + # context: server + # dockerfile: services.docker + # restart: always + # ports: + # - '5001:5001' + # depends_on: + # - redis + # network_mode: "host" redis: image: 'redis:4.0-alpine' @@ -30,15 +30,15 @@ services: dockerfile: search_triple.docker restart: always network_mode: "host" - - backend: - build: - context: server - dockerfile: headstart_backend.docker - env_file: - - server/workers/backend/src/backend.env - restart: always - network_mode: "host" + # + # backend: + # build: + # context: server + # dockerfile: headstart_backend.docker + # env_file: + # - server/workers/backend/src/backend.env + # restart: always + # network_mode: "host" volumes: redis: diff --git a/server/search_triple.docker b/server/search_triple.docker index 6ce4956d4..ee40dc1ee 100644 --- a/server/search_triple.docker +++ b/server/search_triple.docker @@ -5,3 +5,5 @@ MAINTAINER Chris Kittel "christopher.kittel@openknowledgemaps.org" WORKDIR /headstart COPY workers/triple/src/ ./ RUN pip install --no-cache-dir -r requirements.txt + +ENTRYPOINT python search_triple.py diff --git a/server/services/search.php b/server/services/search.php index b530ade33..d70942e1e 100644 --- a/server/services/search.php +++ b/server/services/search.php @@ -78,7 +78,7 @@ function search($repository, $dirty_query, $post_params, $param_types, $keyword_ $WORKING_DIR = $ini_array["general"]["preprocessing_dir"] . $ini_array["output"]["output_dir"]; if ($repository == "triple") { - $url = "http://localhost/api/v2/search_" . $repository; + $url = "http://localhost/api/v2/" . $repository . "/search"; $payload = json_encode($post_params); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); diff --git a/server/workers/services/src/blueprints/v2/views.py b/server/workers/services/src/blueprints/v2/views.py index 82e1d6562..89ea046b8 100644 --- a/server/workers/services/src/blueprints/v2/views.py +++ b/server/workers/services/src/blueprints/v2/views.py @@ -1,6 +1,7 @@ import os import json import uuid +import time import redis import asyncio import aioredis @@ -12,21 +13,49 @@ app = Blueprint('v2', __name__) -async def get_key(store, key): - result = await redis_store.get(key) +def get_key(store, key): + while True: + res = redis_store.get(key) + if res is None: + time.sleep(0.5) + else: + result = json.loads(res.decode('utf-8')) + redis_store.delete(key) + break return result -@app.route('/api/v2/search_triple', methods=['GET', 'POST']) -def search_triple(): +@app.route('/api/v2/triple/search', methods=['POST']) +def triple_search(): """ """ - if request.method == "POST": - data = request.get_json() - k = str(uuid.uuid4()) - d = {"id": k, "data": data} - redis_store.rpush("search_triple", json.dumps(d)) - result = get_key(redis_store, k) + data = request.get_json() + print(data) + k = str(uuid.uuid4()) + d = {"id": k, "data": data, "endpoint": "search"} + redis_store.rpush("triple", json.dumps(d)) + result = get_key(redis_store, k) + + headers = {"Content-Type": "application/json"} + return make_response(result, + 200, + headers) + + +@app.route('/api/v2/triple/mappings', methods=['GET']) +def triple_mappings(): + """ + """ + data = {"index": request.args.get('index')} + k = str(uuid.uuid4()) + d = {"id": k, "data": data, "endpoint": "mappings"} + redis_store.rpush("triple", json.dumps(d)) + result = get_key(redis_store, k) + + headers = {"Content-Type": "application/json"} + return make_response(result, + 200, + headers) @app.route('/api/v2/example_data', methods=['GET', 'POST']) diff --git a/server/workers/triple/src/requirements.txt b/server/workers/triple/src/requirements.txt index bf68c7804..f155cbf2e 100644 --- a/server/workers/triple/src/requirements.txt +++ b/server/workers/triple/src/requirements.txt @@ -1,2 +1,3 @@ elasticsearch redis +certifi diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py index 4f133999e..5015ce974 100644 --- a/server/workers/triple/src/search_triple.py +++ b/server/workers/triple/src/search_triple.py @@ -15,16 +15,44 @@ class TripleClient(object): def __init__(self, config): self.es = Elasticsearch( [config.get('host')], + http_auth=(config.get('user'), config.get('pass')), scheme="http" if config.get('host') == 'localhost' else "https", port=config.get('port'), send_get_body_as='POST', http_compress=True ) - def build_query(self, raw_query): + def next_item(self): + queue, msg = redis_store.blpop("triple") + msg = json.loads(msg) + k = msg.get('id') + params = msg.get('data') + endpoint = msg.get('endpoint') + return k, params, endpoint + + def get_mappings(self, index): + return self.es.indices.get_mapping(index) + + def search(self, parameters): + index = "isidore-documents-triple" + body = {"query": { + "bool": { + "must": [ + {"match": {}} + ] + } + }} query = {} return query def run(self): while True: - q = redis_store.blpop("search_triple") + k, params, endpoint = self.next_item() + if endpoint == "mappings": + res = self.get_mappings(params.get('index')) + redis_store.set(k, json.dumps(res)) + + +if __name__ == '__main__': + tc = TripleClient(config) + tc.run() From 1a223f47c80b5cfe5b9a2459efc6be37c9915463 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 9 Mar 2020 17:52:59 +0100 Subject: [PATCH 15/99] wip --- examples/triple/search_triple.html | 2 +- .../services/src/blueprints/v2/views.py | 7 +-- server/workers/triple/src/search_triple.py | 62 ++++++++++++++++--- 3 files changed, 57 insertions(+), 14 deletions(-) diff --git a/examples/triple/search_triple.html b/examples/triple/search_triple.html index 1656b86a9..a48898b9d 100644 --- a/examples/triple/search_triple.html +++ b/examples/triple/search_triple.html @@ -35,7 +35,7 @@

Search TRIPLE and turn it into a visualization

-
Built with Headstart and rbace. All content retrieved from TRIPLE. +
Built with Head Start. All content retrieved from TRIPLE.
diff --git a/server/workers/services/src/blueprints/v2/views.py b/server/workers/services/src/blueprints/v2/views.py index 89ea046b8..2756b651d 100644 --- a/server/workers/services/src/blueprints/v2/views.py +++ b/server/workers/services/src/blueprints/v2/views.py @@ -6,7 +6,7 @@ import asyncio import aioredis -from flask import Blueprint, request, make_response, jsonify +from flask import Blueprint, request, make_response, jsonify, abort redis_store = redis.StrictRedis(host="localhost", port=6379, db=0) @@ -30,9 +30,8 @@ def triple_search(): """ """ data = request.get_json() - print(data) k = str(uuid.uuid4()) - d = {"id": k, "data": data, "endpoint": "search"} + d = {"id": k, "params": data, "endpoint": "search"} redis_store.rpush("triple", json.dumps(d)) result = get_key(redis_store, k) @@ -48,7 +47,7 @@ def triple_mappings(): """ data = {"index": request.args.get('index')} k = str(uuid.uuid4()) - d = {"id": k, "data": data, "endpoint": "mappings"} + d = {"id": k, "params": data, "endpoint": "mappings"} redis_store.rpush("triple", json.dumps(d)) result = get_key(redis_store, k) diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py index 5015ce974..54b0b6d26 100644 --- a/server/workers/triple/src/search_triple.py +++ b/server/workers/triple/src/search_triple.py @@ -2,6 +2,7 @@ import json import redis from elasticsearch import Elasticsearch +import pandas as pd with open("config.json") as infile: @@ -26,24 +27,64 @@ def next_item(self): queue, msg = redis_store.blpop("triple") msg = json.loads(msg) k = msg.get('id') - params = msg.get('data') + params = msg.get('params') endpoint = msg.get('endpoint') return k, params, endpoint def get_mappings(self, index): return self.es.indices.get_mapping(index) + def build_date_field(self, _from, _to): + date = {} + if len(_from) == 4: + _from += "-01-01" + if len(_to) == 4: + _to += "-12-31" + date["gte"] = _from + date["lte"] = _to + return date + + def build_sort_order(self, parameters): + sort = [] + if parameters.get('sorting') == "most-relevant": + sort.append("_score:desc") + if parameters.get('sorting') == "most-recent": + sort.append("date:desc") + return sort + def search(self, parameters): index = "isidore-documents-triple" body = {"query": { - "bool": { - "must": [ - {"match": {}} - ] - } - }} - query = {} - return query + "bool": { + "must": [ + {"multi_match": { + "query": parameters.get('q'), + "fields": ["title", "abstract"] + }}, + {"range": { + "date": self.build_date_field( + parameters.get('from'), + parameters.get('to')) + }} + ] + } + }} + res = self.es.search( + index=index, + body=body, + size=100, + sort=self.build_sort_order(parameters)) + if parameters.get('raw'): + return res + else: + return self.process_result(res) + + def process_result(self, result): + df = pd.DataFrame(result.get('hits').get('hits')) + df = pd.concat([df.drop(["_source"], axis=1), + df["_source"].apply(pd.Series)], + axis=1) + return df def run(self): while True: @@ -51,6 +92,9 @@ def run(self): if endpoint == "mappings": res = self.get_mappings(params.get('index')) redis_store.set(k, json.dumps(res)) + if endpoint == "search": + res = self.search(params) + redis_store.set(k, json.dumps(res)) if __name__ == '__main__': From 6980ecb521b00f14e6097736b2b2e9c54e078e0c Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 9 Mar 2020 19:39:54 +0100 Subject: [PATCH 16/99] swagger+restx added --- server/search_triple.docker | 3 + server/services/search.php | 2 +- .../services/src/{blueprints => }/__init__.py | 0 server/workers/services/src/apis/__init__.py | 6 ++ .../services/src/apis/request_validators.py | 19 ++++ server/workers/services/src/apis/triple.py | 100 ++++++++++++++++++ server/workers/services/src/app.py | 13 ++- .../services/src/blueprints/v2/__init__.py | 0 .../services/src/blueprints/v2/views.py | 66 ------------ server/workers/services/src/requirements.txt | 1 + server/workers/triple/src/requirements.txt | 1 + 11 files changed, 137 insertions(+), 74 deletions(-) rename server/workers/services/src/{blueprints => }/__init__.py (100%) create mode 100644 server/workers/services/src/apis/__init__.py create mode 100644 server/workers/services/src/apis/request_validators.py create mode 100644 server/workers/services/src/apis/triple.py delete mode 100644 server/workers/services/src/blueprints/v2/__init__.py delete mode 100644 server/workers/services/src/blueprints/v2/views.py diff --git a/server/search_triple.docker b/server/search_triple.docker index ee40dc1ee..1d6da3479 100644 --- a/server/search_triple.docker +++ b/server/search_triple.docker @@ -2,6 +2,9 @@ FROM python:3.6.10-alpine3.10 MAINTAINER Chris Kittel "christopher.kittel@openknowledgemaps.org" +RUN apk update +RUN apk add build-base gcc + WORKDIR /headstart COPY workers/triple/src/ ./ RUN pip install --no-cache-dir -r requirements.txt diff --git a/server/services/search.php b/server/services/search.php index d70942e1e..152b21a1d 100644 --- a/server/services/search.php +++ b/server/services/search.php @@ -78,7 +78,7 @@ function search($repository, $dirty_query, $post_params, $param_types, $keyword_ $WORKING_DIR = $ini_array["general"]["preprocessing_dir"] . $ini_array["output"]["output_dir"]; if ($repository == "triple") { - $url = "http://localhost/api/v2/" . $repository . "/search"; + $url = "http://localhost/api/" . $repository . "/search"; $payload = json_encode($post_params); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); diff --git a/server/workers/services/src/blueprints/__init__.py b/server/workers/services/src/__init__.py similarity index 100% rename from server/workers/services/src/blueprints/__init__.py rename to server/workers/services/src/__init__.py diff --git a/server/workers/services/src/apis/__init__.py b/server/workers/services/src/apis/__init__.py new file mode 100644 index 000000000..3f7e6262c --- /dev/null +++ b/server/workers/services/src/apis/__init__.py @@ -0,0 +1,6 @@ +from flask_restx import Api +from .triple import api as triple_api + +api = Api(title="Head Start API", version="0.1", + description="Head Start API demo") +api.add_namespace(triple_api, path='/triple') diff --git a/server/workers/services/src/apis/request_validators.py b/server/workers/services/src/apis/request_validators.py new file mode 100644 index 000000000..12b3f9cb6 --- /dev/null +++ b/server/workers/services/src/apis/request_validators.py @@ -0,0 +1,19 @@ +from marshmallow import Schema, fields, pre_load + + +class SearchParamSchema(Schema): + q = fields.Str(required=True) + sorting = fields.Str(required=True) + from_ = fields.Date(required=True, data_key="from", + format="%Y-%m-%d") + to = fields.Date(required=True, + format="%Y-%m-%d") + + @pre_load + def fix_years(self, in_data, **kwargs): + if len(in_data.get('from')) == 4: + in_data["from"] = in_data["from"]+"-01-01" + if len(in_data.get('to')) == 4: + in_data["to"] = in_data["to"]+"-12-31" + return in_data + #@validates('from_') diff --git a/server/workers/services/src/apis/triple.py b/server/workers/services/src/apis/triple.py new file mode 100644 index 000000000..fe6472f45 --- /dev/null +++ b/server/workers/services/src/apis/triple.py @@ -0,0 +1,100 @@ +import os +import json +import uuid +import time +import redis +import asyncio +import aioredis + +from flask import Blueprint, request, make_response, jsonify, abort +from flask_restx import Namespace, Resource, fields +from .request_validators import SearchParamSchema + +redis_store = redis.StrictRedis(host="localhost", port=6379, db=0) +api = Namespace("triple", description="TRIPLE API") +search_param_schema = SearchParamSchema() + + +def get_key(store, key): + while True: + res = redis_store.get(key) + if res is None: + time.sleep(0.5) + else: + result = json.loads(res.decode('utf-8')) + redis_store.delete(key) + break + return result + + +search_fields = api.model('Search', { + "q": fields.String, + "sorting": fields.String, + "from": fields.DateTime, + "to": fields.DateTime, +}) + +mappings = api.model('Mappings', { + "index": fields.String +}) + + +@api.route('/search') +class Search(Resource): + @api.doc(responses={200: 'OK', + 400: 'Invalid search parameters'}, + params={"q": "string, query term", + "sorting": "string, most-relevant or most-recent", + "from": "yyyy-MM-dd", + "to": "yyyy-MM-dd"}) + # @api.marshal_with(search_fields) + def post(self): + """ + """ + data = request.get_json() + data = {k: data.get(k) + for k + in ["q", "sorting", "from", "to"]} + errors = search_param_schema.validate(data, partial=True) + if errors: + abort(400, str(errors)) + k = str(uuid.uuid4()) + d = {"id": k, "params": data, "endpoint": "search"} + redis_store.rpush("triple", json.dumps(d)) + result = get_key(redis_store, k) + + headers = {"Content-Type": "application/json"} + return make_response(result, + 200, + headers) + + +@api.route('/example_data') +class ExampleData(Resource): + def get(self): + headers = {"Content-Type": "application/json"} + data = {"test": "document string"} + return make_response(data, + 200, + headers) + + +@api.route('/mappings') +class Mappings(Resource): + @api.doc(responses={200: 'OK', + 400: 'Invalid search parameters'}, + params={"index": "Specify the ElasticSearch index to get the mapping of."}) + # @api.marshal_with(mappings) + def get(self): + """ + """ + data = {"index": request.args.get('index')} + k = str(uuid.uuid4()) + d = {"id": k, "params": data, "endpoint": "mappings"} + redis_store.rpush("triple", json.dumps(d)) + result = get_key(redis_store, k) + + headers = {"Content-Type": "application/json"} + return make_response(result, + 200, + headers) diff --git a/server/workers/services/src/app.py b/server/workers/services/src/app.py index 38812c4d2..c9d178122 100644 --- a/server/workers/services/src/app.py +++ b/server/workers/services/src/app.py @@ -1,16 +1,15 @@ from flask import Flask +from apis import api def new_services_app(settings_override=None): - from blueprints.v2.views import app as v2 - app = Flask('v2', instance_relative_config=True) + flask_app = Flask('v2', instance_relative_config=True) - app.config.from_object('config.settings') - app.config.from_pyfile('settings.py', silent=True) + flask_app.config.from_object('config.settings') + flask_app.config.from_pyfile('settings.py', silent=True) - app.register_blueprint(v2) - - return app + api.init_app(flask_app) + return flask_app if __name__ == '__main__': diff --git a/server/workers/services/src/blueprints/v2/__init__.py b/server/workers/services/src/blueprints/v2/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/server/workers/services/src/blueprints/v2/views.py b/server/workers/services/src/blueprints/v2/views.py deleted file mode 100644 index 2756b651d..000000000 --- a/server/workers/services/src/blueprints/v2/views.py +++ /dev/null @@ -1,66 +0,0 @@ -import os -import json -import uuid -import time -import redis -import asyncio -import aioredis - -from flask import Blueprint, request, make_response, jsonify, abort - - -redis_store = redis.StrictRedis(host="localhost", port=6379, db=0) -app = Blueprint('v2', __name__) - - -def get_key(store, key): - while True: - res = redis_store.get(key) - if res is None: - time.sleep(0.5) - else: - result = json.loads(res.decode('utf-8')) - redis_store.delete(key) - break - return result - - -@app.route('/api/v2/triple/search', methods=['POST']) -def triple_search(): - """ - """ - data = request.get_json() - k = str(uuid.uuid4()) - d = {"id": k, "params": data, "endpoint": "search"} - redis_store.rpush("triple", json.dumps(d)) - result = get_key(redis_store, k) - - headers = {"Content-Type": "application/json"} - return make_response(result, - 200, - headers) - - -@app.route('/api/v2/triple/mappings', methods=['GET']) -def triple_mappings(): - """ - """ - data = {"index": request.args.get('index')} - k = str(uuid.uuid4()) - d = {"id": k, "params": data, "endpoint": "mappings"} - redis_store.rpush("triple", json.dumps(d)) - result = get_key(redis_store, k) - - headers = {"Content-Type": "application/json"} - return make_response(result, - 200, - headers) - - -@app.route('/api/v2/example_data', methods=['GET', 'POST']) -def example_data(): - headers = {"Content-Type": "application/json"} - data = {"test": "document string"} - return make_response(data, - 200, - headers) diff --git a/server/workers/services/src/requirements.txt b/server/workers/services/src/requirements.txt index cb1a39062..cf48ee5cc 100644 --- a/server/workers/services/src/requirements.txt +++ b/server/workers/services/src/requirements.txt @@ -1,4 +1,5 @@ flask +flask-restx gunicorn redis hiredis diff --git a/server/workers/triple/src/requirements.txt b/server/workers/triple/src/requirements.txt index f155cbf2e..3e2eadb89 100644 --- a/server/workers/triple/src/requirements.txt +++ b/server/workers/triple/src/requirements.txt @@ -1,3 +1,4 @@ elasticsearch redis certifi +pandas From 9deae73ce1334db66a3b2f9be576238ce30501b9 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 9 Mar 2020 19:39:54 +0100 Subject: [PATCH 17/99] swagger+restx added --- server/search_triple.docker | 3 + server/services/search.php | 2 +- .../services/src/{blueprints => }/__init__.py | 0 server/workers/services/src/apis/__init__.py | 6 ++ .../services/src/apis/request_validators.py | 19 ++++ server/workers/services/src/apis/triple.py | 100 ++++++++++++++++++ server/workers/services/src/app.py | 13 ++- .../services/src/blueprints/v2/__init__.py | 0 .../services/src/blueprints/v2/views.py | 66 ------------ server/workers/services/src/requirements.txt | 1 + server/workers/triple/src/requirements.txt | 1 + server/workers/triple/src/search_triple.py | 2 +- 12 files changed, 138 insertions(+), 75 deletions(-) rename server/workers/services/src/{blueprints => }/__init__.py (100%) create mode 100644 server/workers/services/src/apis/__init__.py create mode 100644 server/workers/services/src/apis/request_validators.py create mode 100644 server/workers/services/src/apis/triple.py delete mode 100644 server/workers/services/src/blueprints/v2/__init__.py delete mode 100644 server/workers/services/src/blueprints/v2/views.py diff --git a/server/search_triple.docker b/server/search_triple.docker index ee40dc1ee..1d6da3479 100644 --- a/server/search_triple.docker +++ b/server/search_triple.docker @@ -2,6 +2,9 @@ FROM python:3.6.10-alpine3.10 MAINTAINER Chris Kittel "christopher.kittel@openknowledgemaps.org" +RUN apk update +RUN apk add build-base gcc + WORKDIR /headstart COPY workers/triple/src/ ./ RUN pip install --no-cache-dir -r requirements.txt diff --git a/server/services/search.php b/server/services/search.php index d70942e1e..152b21a1d 100644 --- a/server/services/search.php +++ b/server/services/search.php @@ -78,7 +78,7 @@ function search($repository, $dirty_query, $post_params, $param_types, $keyword_ $WORKING_DIR = $ini_array["general"]["preprocessing_dir"] . $ini_array["output"]["output_dir"]; if ($repository == "triple") { - $url = "http://localhost/api/v2/" . $repository . "/search"; + $url = "http://localhost/api/" . $repository . "/search"; $payload = json_encode($post_params); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); diff --git a/server/workers/services/src/blueprints/__init__.py b/server/workers/services/src/__init__.py similarity index 100% rename from server/workers/services/src/blueprints/__init__.py rename to server/workers/services/src/__init__.py diff --git a/server/workers/services/src/apis/__init__.py b/server/workers/services/src/apis/__init__.py new file mode 100644 index 000000000..3f7e6262c --- /dev/null +++ b/server/workers/services/src/apis/__init__.py @@ -0,0 +1,6 @@ +from flask_restx import Api +from .triple import api as triple_api + +api = Api(title="Head Start API", version="0.1", + description="Head Start API demo") +api.add_namespace(triple_api, path='/triple') diff --git a/server/workers/services/src/apis/request_validators.py b/server/workers/services/src/apis/request_validators.py new file mode 100644 index 000000000..12b3f9cb6 --- /dev/null +++ b/server/workers/services/src/apis/request_validators.py @@ -0,0 +1,19 @@ +from marshmallow import Schema, fields, pre_load + + +class SearchParamSchema(Schema): + q = fields.Str(required=True) + sorting = fields.Str(required=True) + from_ = fields.Date(required=True, data_key="from", + format="%Y-%m-%d") + to = fields.Date(required=True, + format="%Y-%m-%d") + + @pre_load + def fix_years(self, in_data, **kwargs): + if len(in_data.get('from')) == 4: + in_data["from"] = in_data["from"]+"-01-01" + if len(in_data.get('to')) == 4: + in_data["to"] = in_data["to"]+"-12-31" + return in_data + #@validates('from_') diff --git a/server/workers/services/src/apis/triple.py b/server/workers/services/src/apis/triple.py new file mode 100644 index 000000000..fe6472f45 --- /dev/null +++ b/server/workers/services/src/apis/triple.py @@ -0,0 +1,100 @@ +import os +import json +import uuid +import time +import redis +import asyncio +import aioredis + +from flask import Blueprint, request, make_response, jsonify, abort +from flask_restx import Namespace, Resource, fields +from .request_validators import SearchParamSchema + +redis_store = redis.StrictRedis(host="localhost", port=6379, db=0) +api = Namespace("triple", description="TRIPLE API") +search_param_schema = SearchParamSchema() + + +def get_key(store, key): + while True: + res = redis_store.get(key) + if res is None: + time.sleep(0.5) + else: + result = json.loads(res.decode('utf-8')) + redis_store.delete(key) + break + return result + + +search_fields = api.model('Search', { + "q": fields.String, + "sorting": fields.String, + "from": fields.DateTime, + "to": fields.DateTime, +}) + +mappings = api.model('Mappings', { + "index": fields.String +}) + + +@api.route('/search') +class Search(Resource): + @api.doc(responses={200: 'OK', + 400: 'Invalid search parameters'}, + params={"q": "string, query term", + "sorting": "string, most-relevant or most-recent", + "from": "yyyy-MM-dd", + "to": "yyyy-MM-dd"}) + # @api.marshal_with(search_fields) + def post(self): + """ + """ + data = request.get_json() + data = {k: data.get(k) + for k + in ["q", "sorting", "from", "to"]} + errors = search_param_schema.validate(data, partial=True) + if errors: + abort(400, str(errors)) + k = str(uuid.uuid4()) + d = {"id": k, "params": data, "endpoint": "search"} + redis_store.rpush("triple", json.dumps(d)) + result = get_key(redis_store, k) + + headers = {"Content-Type": "application/json"} + return make_response(result, + 200, + headers) + + +@api.route('/example_data') +class ExampleData(Resource): + def get(self): + headers = {"Content-Type": "application/json"} + data = {"test": "document string"} + return make_response(data, + 200, + headers) + + +@api.route('/mappings') +class Mappings(Resource): + @api.doc(responses={200: 'OK', + 400: 'Invalid search parameters'}, + params={"index": "Specify the ElasticSearch index to get the mapping of."}) + # @api.marshal_with(mappings) + def get(self): + """ + """ + data = {"index": request.args.get('index')} + k = str(uuid.uuid4()) + d = {"id": k, "params": data, "endpoint": "mappings"} + redis_store.rpush("triple", json.dumps(d)) + result = get_key(redis_store, k) + + headers = {"Content-Type": "application/json"} + return make_response(result, + 200, + headers) diff --git a/server/workers/services/src/app.py b/server/workers/services/src/app.py index 38812c4d2..c9d178122 100644 --- a/server/workers/services/src/app.py +++ b/server/workers/services/src/app.py @@ -1,16 +1,15 @@ from flask import Flask +from apis import api def new_services_app(settings_override=None): - from blueprints.v2.views import app as v2 - app = Flask('v2', instance_relative_config=True) + flask_app = Flask('v2', instance_relative_config=True) - app.config.from_object('config.settings') - app.config.from_pyfile('settings.py', silent=True) + flask_app.config.from_object('config.settings') + flask_app.config.from_pyfile('settings.py', silent=True) - app.register_blueprint(v2) - - return app + api.init_app(flask_app) + return flask_app if __name__ == '__main__': diff --git a/server/workers/services/src/blueprints/v2/__init__.py b/server/workers/services/src/blueprints/v2/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/server/workers/services/src/blueprints/v2/views.py b/server/workers/services/src/blueprints/v2/views.py deleted file mode 100644 index 2756b651d..000000000 --- a/server/workers/services/src/blueprints/v2/views.py +++ /dev/null @@ -1,66 +0,0 @@ -import os -import json -import uuid -import time -import redis -import asyncio -import aioredis - -from flask import Blueprint, request, make_response, jsonify, abort - - -redis_store = redis.StrictRedis(host="localhost", port=6379, db=0) -app = Blueprint('v2', __name__) - - -def get_key(store, key): - while True: - res = redis_store.get(key) - if res is None: - time.sleep(0.5) - else: - result = json.loads(res.decode('utf-8')) - redis_store.delete(key) - break - return result - - -@app.route('/api/v2/triple/search', methods=['POST']) -def triple_search(): - """ - """ - data = request.get_json() - k = str(uuid.uuid4()) - d = {"id": k, "params": data, "endpoint": "search"} - redis_store.rpush("triple", json.dumps(d)) - result = get_key(redis_store, k) - - headers = {"Content-Type": "application/json"} - return make_response(result, - 200, - headers) - - -@app.route('/api/v2/triple/mappings', methods=['GET']) -def triple_mappings(): - """ - """ - data = {"index": request.args.get('index')} - k = str(uuid.uuid4()) - d = {"id": k, "params": data, "endpoint": "mappings"} - redis_store.rpush("triple", json.dumps(d)) - result = get_key(redis_store, k) - - headers = {"Content-Type": "application/json"} - return make_response(result, - 200, - headers) - - -@app.route('/api/v2/example_data', methods=['GET', 'POST']) -def example_data(): - headers = {"Content-Type": "application/json"} - data = {"test": "document string"} - return make_response(data, - 200, - headers) diff --git a/server/workers/services/src/requirements.txt b/server/workers/services/src/requirements.txt index cb1a39062..cf48ee5cc 100644 --- a/server/workers/services/src/requirements.txt +++ b/server/workers/services/src/requirements.txt @@ -1,4 +1,5 @@ flask +flask-restx gunicorn redis hiredis diff --git a/server/workers/triple/src/requirements.txt b/server/workers/triple/src/requirements.txt index f155cbf2e..3e2eadb89 100644 --- a/server/workers/triple/src/requirements.txt +++ b/server/workers/triple/src/requirements.txt @@ -1,3 +1,4 @@ elasticsearch redis certifi +pandas diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py index 54b0b6d26..4254449d4 100644 --- a/server/workers/triple/src/search_triple.py +++ b/server/workers/triple/src/search_triple.py @@ -84,7 +84,7 @@ def process_result(self, result): df = pd.concat([df.drop(["_source"], axis=1), df["_source"].apply(pd.Series)], axis=1) - return df + return df.to_json() def run(self): while True: From 269182e33460c324ed27ce87f2b260dd9c480b4d Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Tue, 10 Mar 2020 00:40:28 +0100 Subject: [PATCH 18/99] data cleaning --- server/workers/backend/src/headstart.py | 19 ++++++++-- .../services/src/apis/request_validators.py | 9 +++-- server/workers/services/src/apis/triple.py | 3 +- server/workers/triple/src/search_triple.py | 36 ++++++++++++++++--- 4 files changed, 56 insertions(+), 11 deletions(-) diff --git a/server/workers/backend/src/headstart.py b/server/workers/backend/src/headstart.py index 3f01d8f6a..7abc2aa1e 100644 --- a/server/workers/backend/src/headstart.py +++ b/server/workers/backend/src/headstart.py @@ -2,6 +2,7 @@ import time import json import subprocess +from tempfile import TemporaryFile import redis redis_store = redis.StrictRedis(host="localhost", port=6379, db=0) @@ -15,13 +16,25 @@ def __init__(self): self.command = 'Rscript' self.hs = os.path.join(self.wd, "run_vis_layout.R") + def next_item(self): + queue, msg = redis_store.blpop("input_data") + msg = json.loads(msg) + k = msg.get('id') + input_data = msg.get('input_data') + return k, input_data + def create_map(self, input_data): - pass + with TemporaryFile() as input: + cmd = [self.command, self.hs, input] + output = subprocess.check_output(cmd) + output = [o for o in output.decode('utf-8').split('\n') if len(o) > 0] + return output[-1] def run(self): while True: - input_data = redis_store.blpop('input_data') - self.create_map(input_data) + k, input_data = self.next_item() + output = self.create_map(input_data) + redis_store.set(k+"_output", output) if __name__ == '__main__': diff --git a/server/workers/services/src/apis/request_validators.py b/server/workers/services/src/apis/request_validators.py index 12b3f9cb6..3fb1a63b0 100644 --- a/server/workers/services/src/apis/request_validators.py +++ b/server/workers/services/src/apis/request_validators.py @@ -1,4 +1,5 @@ -from marshmallow import Schema, fields, pre_load +from datetime import datetime +from marshmallow import Schema, fields, pre_load, validates, ValidationError class SearchParamSchema(Schema): @@ -16,4 +17,8 @@ def fix_years(self, in_data, **kwargs): if len(in_data.get('to')) == 4: in_data["to"] = in_data["to"]+"-12-31" return in_data - #@validates('from_') + + @validates('from_') + def is_not_in_future(self, date): + if date > datetime.today(): + raise ValidationError("Starting date can't be in the future.") diff --git a/server/workers/services/src/apis/triple.py b/server/workers/services/src/apis/triple.py index fe6472f45..3ca137ab3 100644 --- a/server/workers/services/src/apis/triple.py +++ b/server/workers/services/src/apis/triple.py @@ -17,12 +17,13 @@ def get_key(store, key): while True: - res = redis_store.get(key) + res = redis_store.get(key+"_output") if res is None: time.sleep(0.5) else: result = json.loads(res.decode('utf-8')) redis_store.delete(key) + redis_store.delete(key+"_output") break return result diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py index 4254449d4..b5cf56385 100644 --- a/server/workers/triple/src/search_triple.py +++ b/server/workers/triple/src/search_triple.py @@ -36,10 +36,6 @@ def get_mappings(self, index): def build_date_field(self, _from, _to): date = {} - if len(_from) == 4: - _from += "-01-01" - if len(_to) == 4: - _to += "-12-31" date["gte"] = _from date["lte"] = _to return date @@ -80,11 +76,41 @@ def search(self, parameters): return self.process_result(res) def process_result(self, result): + """ + # * "id": a unique ID, preferably the DOI + # * "title": the title + # * "authors": authors, preferably in the format "LASTNAME1, FIRSTNAME1;LASTNAME2, FIRSTNAME2" + # * "paper_abstract": the abstract + # * "published_in": name of the journal or venue + # * "year": publication date + # * "url": URL to the landing page + # * "readers": an indicator of the paper's popularity, e.g. number of readers, views, downloads etc. + # * "subject": keywords or classification, split by ; + # * "oa_state": open access status of the item; has the following possible states: 0 for no, 1 for yes, 2 for unknown + # * "link": link to the PDF; if this is not available, a list of candidate URLs that may contain a link to the PDF + """ df = pd.DataFrame(result.get('hits').get('hits')) df = pd.concat([df.drop(["_source"], axis=1), df["_source"].apply(pd.Series)], axis=1) - return df.to_json() + metadata = pd.DataFrame() + metadata["id"] = df.identifier.map(lambda x: x[0] if x else "") + metadata["title"] = df.title.map(lambda x: x[0] if x else "") + metadata["authors"] = df.author.map(lambda x: self.get_authors(x) if x else "") + metadata["abstract"] = df.abstract.map(lambda x: x[0] if x else "") + metadata["published_in"] = df.publisher.map(lambda x: x[0].get('name') if x else "") + metadata["year"] = df.datestamp.map(lambda x: x if x else "") + metadata["url"] = df.url.map(lambda x: x[0] if x else "") + metadata["readers"] = 0 + metadata["subject"] = df.keyword.map(lambda x: "; ".join(x) if x else "") + input_data = {} + input_data["metadata"] = metadata + input_data["text"] = metadata.apply(lambda x: ". ".join(x[["title", "abstract"]]), axis=1) + return input_data.to_json() + + @staticmethod + def get_authors(authorlist): + return "; ".join([", ".join([a.get('lastname')[0], a.get('firstname')[0]]) for a in authorlist if a]) def run(self): while True: From 6640852cb7dc8ff66f84acd6b1a8016c44359a8a Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Tue, 10 Mar 2020 14:10:42 +0100 Subject: [PATCH 19/99] activate backend container --- docker-compose.yml | 42 ++++++++------- server/headstart_backend.docker | 18 +++++-- .../other-scripts/run_vis_layout.R | 53 +++++++++++++++++++ server/search_triple.docker | 3 +- server/services.docker | 3 +- server/workers/backend/{src => }/renv.lock | 0 server/workers/backend/requirements.txt | 1 + server/workers/backend/src/headstart.py | 43 ++++++++++----- .../services/{src => }/requirements.txt | 2 + .../services/src/apis/request_validators.py | 2 +- server/workers/services/src/apis/triple.py | 3 +- .../workers/triple/{src => }/requirements.txt | 0 server/workers/triple/src/search_triple.py | 44 +++++++++------ 13 files changed, 158 insertions(+), 56 deletions(-) create mode 100644 server/preprocessing/other-scripts/run_vis_layout.R rename server/workers/backend/{src => }/renv.lock (100%) create mode 100644 server/workers/backend/requirements.txt rename server/workers/services/{src => }/requirements.txt (63%) rename server/workers/triple/{src => }/requirements.txt (100%) diff --git a/docker-compose.yml b/docker-compose.yml index b9d144fa9..a3023fd94 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,16 +2,16 @@ version: '3.7' services: - # api: - # build: - # context: server - # dockerfile: services.docker - # restart: always - # ports: - # - '5001:5001' - # depends_on: - # - redis - # network_mode: "host" + api: + build: + context: server + dockerfile: services.docker + restart: always + ports: + - '5001:5001' + depends_on: + - redis + network_mode: "host" redis: image: 'redis:4.0-alpine' @@ -30,15 +30,19 @@ services: dockerfile: search_triple.docker restart: always network_mode: "host" - # - # backend: - # build: - # context: server - # dockerfile: headstart_backend.docker - # env_file: - # - server/workers/backend/src/backend.env - # restart: always - # network_mode: "host" + + backend: + build: + context: server + dockerfile: headstart_backend.docker + env_file: + - server/workers/backend/src/backend.env + restart: always + network_mode: "host" + volumes: + - type: bind + source: /var/opt/renv + target: /root/.local/share/renv volumes: redis: diff --git a/server/headstart_backend.docker b/server/headstart_backend.docker index dd18527b7..579d3b83d 100644 --- a/server/headstart_backend.docker +++ b/server/headstart_backend.docker @@ -4,14 +4,22 @@ MAINTAINER Chris Kittel "christopher.kittel@openknowledgemaps.org" RUN apt-get update RUN apt-get -y install libssl-dev libcurl4-openssl-dev RUN apt-get -y install libxml2-dev libz-dev libpoppler-cpp-dev +RUN apt-get -y install python3 python3-pip +RUN R -e 'options(repos="https://cran.wu.ac.at")' RUN R -e 'install.packages("remotes", repos = c(CRAN = "https://cran.rstudio.com"))' RUN R -e 'install.packages("renv", repos = c(CRAN = "https://cran.rstudio.com"))' WORKDIR /headstart -COPY workers/backend/src/renv.lock ./ -RUN R -e 'options(repos="https://cran.wu.ac.at"); renv::restore()' -COPY preprocessing/other-scripts ./backend +COPY workers/backend/requirements.txt . +RUN pip3 install --no-cache-dir -r requirements.txt -ENTRYPOINT python -CMD headstart.py +COPY workers/backend/renv.lock . +RUN R -e 'renv::consent(provided = TRUE)' +RUN R -e 'renv::restore()' + +COPY workers/backend/src/ ./ +COPY preprocessing/resources . +COPY preprocessing/other-scripts . + +ENTRYPOINT python3 headstart.py diff --git a/server/preprocessing/other-scripts/run_vis_layout.R b/server/preprocessing/other-scripts/run_vis_layout.R new file mode 100644 index 000000000..f3bac58e5 --- /dev/null +++ b/server/preprocessing/other-scripts/run_vis_layout.R @@ -0,0 +1,53 @@ +rm(list = ls()) + +args <- commandArgs(TRUE) +wd <- args[1] +query <- args[2] +service <- args[3] +params_file <- args[4] +input_file <- args[5] + + +print(wd) +print(query) +print(service) +print(params_file) + +setwd(wd) #Don't forget to set your working directory + +renv::activate() +renv::restore( lockfile = './renv.lock') + +source('utils.R') +source("vis_layout.R") +DEBUG = FALSE + +params <- fromJSON(params_file) + +if (DEBUG==TRUE){ + setup_logging('DEBUG') +} else { + setup_logging('INFO') +} + +tslog <- getLogger('ts') + +tryCatch({ + output_json = vis_layout(text, metadata, + service, + max_clusters = params$MAX_CLUSTERS, + add_stop_words = params$language, + lang = params$language, + taxonomy_separator = params$taxonomy_separator, + list_size = params$list_size) +}, error=function(err){ + tslog$error(gsub("\n", " ", paste("Processing failed", query, paste(params, collapse=" "), err, sep="||"))) + failed$query <<- query + failed$processing_reason <<- err$message +}) + +if (!exists('output_json')) { + output_json <- detect_error(failed) +} + +print(output_json) diff --git a/server/search_triple.docker b/server/search_triple.docker index 1d6da3479..a0414ebfa 100644 --- a/server/search_triple.docker +++ b/server/search_triple.docker @@ -6,7 +6,8 @@ RUN apk update RUN apk add build-base gcc WORKDIR /headstart -COPY workers/triple/src/ ./ +COPY workers/triple/requirements.txt . RUN pip install --no-cache-dir -r requirements.txt +COPY workers/triple/src/ ./ ENTRYPOINT python search_triple.py diff --git a/server/services.docker b/server/services.docker index ec9bec175..7ad8e026c 100644 --- a/server/services.docker +++ b/server/services.docker @@ -6,7 +6,8 @@ RUN apk update RUN apk add build-base gcc WORKDIR /headstart -COPY workers/services/src/ ./ +COPY workers/services/requirements.txt . RUN pip install --no-cache-dir -r requirements.txt +COPY workers/services/src/ ./ CMD gunicorn -b 0.0.0.0:5001 'app:new_services_app()' diff --git a/server/workers/backend/src/renv.lock b/server/workers/backend/renv.lock similarity index 100% rename from server/workers/backend/src/renv.lock rename to server/workers/backend/renv.lock diff --git a/server/workers/backend/requirements.txt b/server/workers/backend/requirements.txt new file mode 100644 index 000000000..7800f0fad --- /dev/null +++ b/server/workers/backend/requirements.txt @@ -0,0 +1 @@ +redis diff --git a/server/workers/backend/src/headstart.py b/server/workers/backend/src/headstart.py index 7abc2aa1e..c4f4c242f 100644 --- a/server/workers/backend/src/headstart.py +++ b/server/workers/backend/src/headstart.py @@ -1,8 +1,8 @@ import os -import time +import copy import json import subprocess -from tempfile import TemporaryFile +from tempfile import NamedTemporaryFile import redis redis_store = redis.StrictRedis(host="localhost", port=6379, db=0) @@ -12,28 +12,47 @@ class Backend(object): def __init__(self): # path should be to where in the docker container the Rscript are - self.wd = "headstart" + self.wd = "headstart/backend" self.command = 'Rscript' self.hs = os.path.join(self.wd, "run_vis_layout.R") + self.default_params = {} + self.default_params["MAX_CLUSTERS"] = 15 + self.default_params["language"] = "english" + self.default_params["taxonomy_separator"] = ";" + self.default_params["list_size"] = 100 + + def add_default_params(self, params): + default_params = copy.deepcopy(self.default_params) + default_params.update(params) + return default_params def next_item(self): queue, msg = redis_store.blpop("input_data") - msg = json.loads(msg) + msg = json.loads(msg.decode('utf-8')) k = msg.get('id') + params = self.add_default_params(msg.get('params')) input_data = msg.get('input_data') - return k, input_data - - def create_map(self, input_data): - with TemporaryFile() as input: - cmd = [self.command, self.hs, input] - output = subprocess.check_output(cmd) + return k, params, input_data + + def create_map(self, params, input_data): + with NamedTemporaryFile(mode='w+', suffix='.json') as param_file: + with NamedTemporaryFile(mode='w+', suffix='.json') as input_file: + json.dump(params, param_file) + param_file.flush() + json.dump(input_data, input_file) + input_file.flush() + cmd = [self.command, self.hs, self.wd, + params.get('q'), params.get('service'), + param_file.name, input_file.name] + print(cmd) + output = subprocess.check_output(cmd) output = [o for o in output.decode('utf-8').split('\n') if len(o) > 0] return output[-1] def run(self): while True: - k, input_data = self.next_item() - output = self.create_map(input_data) + k, params, input_data = self.next_item() + output = self.create_map(params, input_data) redis_store.set(k+"_output", output) diff --git a/server/workers/services/src/requirements.txt b/server/workers/services/requirements.txt similarity index 63% rename from server/workers/services/src/requirements.txt rename to server/workers/services/requirements.txt index cf48ee5cc..698c4e3b8 100644 --- a/server/workers/services/src/requirements.txt +++ b/server/workers/services/requirements.txt @@ -1,5 +1,7 @@ flask flask-restx +Werkzeug==0.16.1 +marshmallow gunicorn redis hiredis diff --git a/server/workers/services/src/apis/request_validators.py b/server/workers/services/src/apis/request_validators.py index 3fb1a63b0..acfc0c618 100644 --- a/server/workers/services/src/apis/request_validators.py +++ b/server/workers/services/src/apis/request_validators.py @@ -20,5 +20,5 @@ def fix_years(self, in_data, **kwargs): @validates('from_') def is_not_in_future(self, date): - if date > datetime.today(): + if date > datetime.today().date(): raise ValidationError("Starting date can't be in the future.") diff --git a/server/workers/services/src/apis/triple.py b/server/workers/services/src/apis/triple.py index 3ca137ab3..b03b2648a 100644 --- a/server/workers/services/src/apis/triple.py +++ b/server/workers/services/src/apis/triple.py @@ -60,7 +60,8 @@ def post(self): if errors: abort(400, str(errors)) k = str(uuid.uuid4()) - d = {"id": k, "params": data, "endpoint": "search"} + d = {"id": k, "params": data, + "endpoint": "search"} redis_store.rpush("triple", json.dumps(d)) result = get_key(redis_store, k) diff --git a/server/workers/triple/src/requirements.txt b/server/workers/triple/requirements.txt similarity index 100% rename from server/workers/triple/src/requirements.txt rename to server/workers/triple/requirements.txt diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py index b5cf56385..ab8bc487c 100644 --- a/server/workers/triple/src/search_triple.py +++ b/server/workers/triple/src/search_triple.py @@ -28,6 +28,7 @@ def next_item(self): msg = json.loads(msg) k = msg.get('id') params = msg.get('params') + params["service"] = "triple" endpoint = msg.get('endpoint') return k, params, endpoint @@ -65,11 +66,12 @@ def search(self, parameters): ] } }} + sort = self.build_sort_order(parameters) res = self.es.search( index=index, body=body, size=100, - sort=self.build_sort_order(parameters)) + sort=sort) if parameters.get('raw'): return res else: @@ -94,33 +96,43 @@ def process_result(self, result): df["_source"].apply(pd.Series)], axis=1) metadata = pd.DataFrame() - metadata["id"] = df.identifier.map(lambda x: x[0] if x else "") - metadata["title"] = df.title.map(lambda x: x[0] if x else "") - metadata["authors"] = df.author.map(lambda x: self.get_authors(x) if x else "") - metadata["abstract"] = df.abstract.map(lambda x: x[0] if x else "") - metadata["published_in"] = df.publisher.map(lambda x: x[0].get('name') if x else "") - metadata["year"] = df.datestamp.map(lambda x: x if x else "") - metadata["url"] = df.url.map(lambda x: x[0] if x else "") + metadata["id"] = df.identifier.map(lambda x: x[0] if isinstance(x, list) else "") + metadata["title"] = df.title.map(lambda x: x[0] if isinstance(x, list) else "") + metadata["authors"] = df.author.map(lambda x: self.get_authors(x) if isinstance(x, list) else "") + metadata["abstract"] = df.abstract.map(lambda x: x[0] if isinstance(x, list) else "") + metadata["published_in"] = df.publisher.map(lambda x: x[0].get('name') if isinstance(x, list) else "") + metadata["year"] = df.datestamp.map(lambda x: x if isinstance(x, str) else "") + metadata["url"] = df.url.map(lambda x: x[0] if isinstance(x, list) else "") metadata["readers"] = 0 - metadata["subject"] = df.keyword.map(lambda x: "; ".join(x) if x else "") + metadata["subject"] = df.keyword.map(lambda x: "; ".join(x) if isinstance(x, list) else "") input_data = {} - input_data["metadata"] = metadata - input_data["text"] = metadata.apply(lambda x: ". ".join(x[["title", "abstract"]]), axis=1) - return input_data.to_json() + input_data["metadata"] = metadata.to_json() + input_data["text"] = metadata.apply(lambda x: ". ".join(x[["title", "abstract"]]), axis=1).to_json() + return input_data @staticmethod def get_authors(authorlist): - return "; ".join([", ".join([a.get('lastname')[0], a.get('firstname')[0]]) for a in authorlist if a]) + authors = [] + for a in authorlist: + if a: + author = [] + for n in ['lastname', 'firstname']: + if a.get(n, [None])[0]: + author.append(a.get(n)[0]) + authors.append(", ".join(author)) + return "; ".join(authors) def run(self): while True: k, params, endpoint = self.next_item() if endpoint == "mappings": res = self.get_mappings(params.get('index')) - redis_store.set(k, json.dumps(res)) + redis_store.set(k+"_output", json.dumps(res)) if endpoint == "search": - res = self.search(params) - redis_store.set(k, json.dumps(res)) + res = {} + res["input_data"] = self.search(params) + res["params"] = params + redis_store.rpush("input_data", json.dumps(res)) if __name__ == '__main__': From aefb1426ccdaba19298b6101e01494fbceff17bf Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Tue, 10 Mar 2020 20:24:46 +0100 Subject: [PATCH 20/99] subprocess works --- docker-compose.yml | 4 +- .../other-scripts/run_vis_layout.R | 91 ++- server/workers/backend/renv.lock | 770 +++++++++++------- server/workers/backend/src/headstart.py | 4 +- server/workers/services/src/apis/triple.py | 3 +- server/workers/triple/src/search_triple.py | 10 +- 6 files changed, 551 insertions(+), 331 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index a3023fd94..ebbb877cf 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -40,9 +40,7 @@ services: restart: always network_mode: "host" volumes: - - type: bind - source: /var/opt/renv - target: /root/.local/share/renv + - "/var/opt/renv:/root/.local/share/renv" volumes: redis: diff --git a/server/preprocessing/other-scripts/run_vis_layout.R b/server/preprocessing/other-scripts/run_vis_layout.R index f3bac58e5..e9162ec0a 100644 --- a/server/preprocessing/other-scripts/run_vis_layout.R +++ b/server/preprocessing/other-scripts/run_vis_layout.R @@ -7,22 +7,34 @@ service <- args[3] params_file <- args[4] input_file <- args[5] - -print(wd) -print(query) -print(service) -print(params_file) - setwd(wd) #Don't forget to set your working directory renv::activate() -renv::restore( lockfile = './renv.lock') +renv::restore(lockfile = './renv.lock') +library(tibble) +library(tidyr) source('utils.R') source("vis_layout.R") +registerDoParallel(5) DEBUG = FALSE params <- fromJSON(params_file) +input_data <- fromJSON(input_file) +text <- unnest(as_tibble(fromJSON(input_data$text))) +metadata <- unnest(as_tibble(fromJSON(input_data$metadata))) + +if (!is.null(params$lang_id)) { + lang_id <- params$lang_id +} else { + lang_id <- 'all' +} + +if (!is.null(params$vis_type)) { + vis_type <- params$vis_type +} else { + vis_type <- 'overview' +} if (DEBUG==TRUE){ setup_logging('DEBUG') @@ -30,16 +42,73 @@ if (DEBUG==TRUE){ setup_logging('INFO') } +valid_langs <- list( + 'afr'='afrikaans', + 'akk'='akkadian', + 'ara'='arabic', + 'baq'='basque', + 'bel'='belarusian', + 'chi'='chinese', + 'cze'='czech', + 'dan'='danish', + 'dut'='dutch', + 'eng'='english', + 'est'='estonian', + 'fin'='finnish', + 'fre'='french', + 'geo'='georgian', + 'ger'='german', + 'gle'='irish', + 'glg'='galician', + 'grc'='greek', + 'gre'='greek', + 'heb'='hebrew', + 'hrv'='croatian', + 'hun'='hungarian', + 'ice'='icelandic', + 'ind'='indonesian', + 'ita'='italian', + 'jpn'='japanese', + 'kor'='korean', + 'lat'='latin', + 'lit'='lithuanian', + 'nau'='nauru', + 'nob'='norwegian', + 'nor'='norwegian', + 'ota'='turkish', + 'per'='persian', + 'pol'='polish', + 'por'='portuguese', + 'rum'='romanian', + 'rus'='russian', + 'slo'='slovak', + 'slv'='slovenian', + 'spa'='spanish', + 'srp'='serbian', + 'sux'='sumerian', + 'swe'='swedish', + 'tha'='thai', + 'tur'='turkish', + 'ukr'='ukrainian', + 'vie'='vietnamese' +) + +MAX_CLUSTERS = params$MAX_CLUSTERS +LANGUAGE <- get_service_lang(lang_id, valid_langs, service) +ADDITIONAL_STOP_WORDS = LANGUAGE$name + tslog <- getLogger('ts') +failed <- list(params=params) tryCatch({ output_json = vis_layout(text, metadata, service, - max_clusters = params$MAX_CLUSTERS, - add_stop_words = params$language, - lang = params$language, + max_clusters = MAX_CLUSTERS, + add_stop_words = ADDITIONAL_STOP_WORDS, + lang = LANGUAGE$name, taxonomy_separator = params$taxonomy_separator, - list_size = params$list_size) + list_size = params$list_size, + vis_type=vis_type) }, error=function(err){ tslog$error(gsub("\n", " ", paste("Processing failed", query, paste(params, collapse=" "), err, sep="||"))) failed$query <<- query diff --git a/server/workers/backend/renv.lock b/server/workers/backend/renv.lock index 997f79476..734ae671a 100644 --- a/server/workers/backend/renv.lock +++ b/server/workers/backend/renv.lock @@ -11,211 +11,7 @@ } ] }, - "Bioconductor": { - "Repositories": [ - { - "Name": "BioCsoft", - "URL": "https://bioconductor.org/packages/3.8/bioc" - }, - { - "Name": "BioCann", - "URL": "https://bioconductor.org/packages/3.8/data/annotation" - }, - { - "Name": "BioCexp", - "URL": "https://bioconductor.org/packages/3.8/data/experiment" - }, - { - "Name": "BioCworkflows", - "URL": "https://bioconductor.org/packages/3.8/workflows" - }, - { - "Name": "CRAN", - "URL": "https://cloud.r-project.org" - } - ] - }, "Packages": { - "base64enc": { - "Package": "base64enc", - "Version": "0.1-3", - "Source": "CRAN", - "Hash": "eec0d710cee1455ba24eb6ab622a60e9" - }, - "BH": { - "Package": "BH", - "Version": "1.69.0-1", - "Source": "CRAN", - "Hash": "88e64b38758666b85d283617231bb766" - }, - "cli": { - "Package": "cli", - "Version": "1.1.0", - "Source": "CRAN", - "Hash": "c0a5f242aa2259cf7e40da04bb89f5d8" - }, - "crayon": { - "Package": "crayon", - "Version": "1.3.4", - "Source": "CRAN", - "Hash": "1e93bda8f2b60a0defc85c82a39e2891" - }, - "curl": { - "Package": "curl", - "Version": "3.3", - "Source": "CRAN", - "Hash": "921ecc2db7bca3844daaec11fce12e76" - }, - "digest": { - "Package": "digest", - "Version": "0.6.18", - "Source": "CRAN", - "Hash": "50c07175e40eda4f3f1deb0d26a3f9dc" - }, - "evaluate": { - "Package": "evaluate", - "Version": "0.13", - "Source": "CRAN", - "Hash": "1a4182c08eece9c529eec110119ac6a7" - }, - "highr": { - "Package": "highr", - "Version": "0.8", - "Source": "CRAN", - "Hash": "2f5651aef7832e8eea60d3f3add2ffaa" - }, - "htmltools": { - "Package": "htmltools", - "Version": "0.3.6", - "Source": "CRAN", - "Hash": "881e91b8b764a550de68ad18a98c05b5" - }, - "jsonlite": { - "Package": "jsonlite", - "Version": "1.6", - "Source": "CRAN", - "Hash": "9ec2b2fe0e874a66d0cb54d8ced5618e" - }, - "knitr": { - "Package": "knitr", - "Version": "1.22", - "Source": "CRAN", - "Hash": "b1ef4af5ce7c39dcba8da6ff39bfd7a1" - }, - "magrittr": { - "Package": "magrittr", - "Version": "1.5", - "Source": "CRAN", - "Hash": "cb6367fec3fd68ff41424fe9797b8eaf" - }, - "markdown": { - "Package": "markdown", - "Version": "0.9", - "Source": "CRAN", - "Hash": "5e5d7e3b37fbdb3b06e3f9484f80db0e" - }, - "mime": { - "Package": "mime", - "Version": "0.6", - "Source": "CRAN", - "Hash": "217bc37b46b42e1e0636460ae3b96a7d" - }, - "praise": { - "Package": "praise", - "Version": "1.0.0", - "Source": "CRAN", - "Hash": "20ffbe3f59f58529a6786101392fa476" - }, - "processx": { - "Package": "processx", - "Version": "3.3.1.9000", - "Source": "GitHub", - "RemoteType": "github", - "RemoteHost": "api.github.com", - "RemoteRepo": "processx", - "RemoteUsername": "r-lib", - "RemoteRef": "master", - "RemoteSha": "505cb1f8dd1f4b9ff35ed2f64ce0e86dc7855789", - "Hash": "37c3fe67b4a210d04262bcab0c7eb551" - }, - "R6": { - "Package": "R6", - "Version": "2.4.0", - "Source": "CRAN", - "Hash": "d42941d973f7f28b32f694bc89fb9d5f" - }, - "Rcpp": { - "Package": "Rcpp", - "Version": "1.0.1", - "Source": "CRAN", - "Hash": "542a8dbcf371718e4dee591341d28c43" - }, - "RCurl": { - "Package": "RCurl", - "Version": "1.95-4.12", - "Source": "CRAN", - "Hash": "23866c3a35f74fd999e83d6ee38ee5c7" - }, - "rlang": { - "Package": "rlang", - "Version": "0.3.4", - "Source": "CRAN", - "Hash": "1367935e1052d25fd616368dea6d8bf5" - }, - "rmarkdown": { - "Package": "rmarkdown", - "Version": "1.12", - "Source": "CRAN", - "Hash": "6adbc9e47370f441b5ab2a4a4bd066df" - }, - "stringr": { - "Package": "stringr", - "Version": "1.4.0", - "Source": "CRAN", - "Hash": "01fb6c8262ac804d9bafa60cea8773da" - }, - "testthat": { - "Package": "testthat", - "Version": "2.1.1", - "Source": "CRAN", - "Hash": "d1d096420c20ce540736c4e780d3c9bb" - }, - "tinytex": { - "Package": "tinytex", - "Version": "0.13", - "Source": "CRAN", - "Hash": "b866f0a0298dc94dc2e63252bb08ab0c" - }, - "withr": { - "Package": "withr", - "Version": "2.1.2", - "Source": "CRAN", - "Hash": "ce9ffa6d865ecaf6a56cacc811565e07" - }, - "xfun": { - "Package": "xfun", - "Version": "0.7", - "Source": "CRAN", - "Hash": "0821bdcb5e455983cf2daae1c88038c1" - }, - "xml2": { - "Package": "xml2", - "Version": "1.2.0", - "Source": "CRAN", - "Hash": "fe28830040ea8a0cd9d41749b8f9f715" - }, - "yaml": { - "Package": "yaml", - "Version": "2.2.0", - "Source": "CRAN", - "Hash": "e75dfa28ca59a73adfabf6899ce04472" - }, - "arsenal": { - "Package": "arsenal", - "Version": "3.1.0", - "Source": "CRAN", - "Hash": "506ae2edc7d8c64e8a992115b3336184" - }, "askpass": { "Package": "askpass", "Version": "1.1", @@ -234,29 +30,35 @@ "Source": "CRAN", "Hash": "8f863a45c5b6c9ccd5a71988138a48c2" }, + "BH": { + "Package": "BH", + "Version": "1.69.0-1", + "Source": "CRAN", + "Hash": "88e64b38758666b85d283617231bb766" + }, + "bibtex": { + "Package": "bibtex", + "Version": "0.4.2", + "Source": "CRAN", + "Hash": "be04c345083991c18a66f167682e0eaf" + }, "bitops": { "Package": "bitops", "Version": "1.0-6", "Source": "CRAN", "Hash": "f72db5de0feaf15f856ccc2aefdf91de" }, - "broom": { - "Package": "broom", - "Version": "0.5.2", - "Source": "CRAN", - "Hash": "a9d79e24f3ce764752c892f0d5f7f61c" - }, - "callr": { - "Package": "callr", - "Version": "3.2.0", + "caTools": { + "Package": "caTools", + "Version": "1.17.1.2", "Source": "CRAN", - "Hash": "7af03bdfa915fefcf1e1abcfad81755f" + "Hash": "20eca4011f39828a8109a027e0e70ee2" }, - "cellranger": { - "Package": "cellranger", + "cli": { + "Package": "cli", "Version": "1.1.0", "Source": "CRAN", - "Hash": "a6279d19fb8783f899f8dc3edb9a71b0" + "Hash": "c0a5f242aa2259cf7e40da04bb89f5d8" }, "clipr": { "Package": "clipr", @@ -264,23 +66,65 @@ "Source": "CRAN", "Hash": "b5c8e6072ef59bb5d2e755f4b249dae0" }, + "cluster": { + "Package": "cluster", + "Version": "2.0.7-1", + "Source": "CRAN", + "Hash": "c555ac45354b45aee2025cc4e0e63559" + }, + "codetools": { + "Package": "codetools", + "Version": "0.2-15", + "Source": "CRAN", + "Hash": "71fa8e1fac5dc255507f5ed253c6a367" + }, "colorspace": { "Package": "colorspace", "Version": "1.4-1", "Source": "CRAN", "Hash": "70941d6476af18c149b5b6916f87a7c5" }, - "DBI": { - "Package": "DBI", + "crayon": { + "Package": "crayon", + "Version": "1.3.4", + "Source": "CRAN", + "Hash": "1e93bda8f2b60a0defc85c82a39e2891" + }, + "crosstalk": { + "Package": "crosstalk", "Version": "1.0.0", "Source": "CRAN", - "Hash": "5608e5aeeba9ae979e5bfd26ca0835a5" + "Hash": "751f5746f428e5c0e453a53dac3c85af" }, - "dbplyr": { - "Package": "dbplyr", - "Version": "1.4.0", + "crul": { + "Package": "crul", + "Version": "0.7.4", "Source": "CRAN", - "Hash": "3bae66836ad0369d925be9c0b58bdb36" + "Hash": "ab3847b3be70aa3e545acf2dbc5ee049" + }, + "curl": { + "Package": "curl", + "Version": "3.3", + "Source": "CRAN", + "Hash": "921ecc2db7bca3844daaec11fce12e76" + }, + "data.table": { + "Package": "data.table", + "Version": "1.12.2", + "Source": "CRAN", + "Hash": "cb5169933d3cb61b8ddc01998858c87e" + }, + "digest": { + "Package": "digest", + "Version": "0.6.19", + "Source": "CRAN", + "Hash": "b0ffa68c2a452ea6dafda0f7810b3e7d" + }, + "doParallel": { + "Package": "doParallel", + "Version": "1.0.14", + "Source": "CRAN", + "Hash": "0ea05ab6556ff10f3bed2a4f6caf23e2" }, "dplyr": { "Package": "dplyr", @@ -288,6 +132,18 @@ "Source": "CRAN", "Hash": "8cdcbbb5b14d9f9aea8ba9b9fa06729b" }, + "DT": { + "Package": "DT", + "Version": "0.6", + "Source": "CRAN", + "Hash": "c986d4a1754a2129802672fc5c26fc00" + }, + "ecodist": { + "Package": "ecodist", + "Version": "2.0.1", + "Source": "CRAN", + "Hash": "e15d1b21493046010c14d41c28d588c9" + }, "ellipsis": { "Package": "ellipsis", "Version": "0.1.0", @@ -306,17 +162,29 @@ "Source": "CRAN", "Hash": "c5d5b0c24161479679cfbf94fba339f4" }, - "fs": { - "Package": "fs", - "Version": "1.3.1", + "foreach": { + "Package": "foreach", + "Version": "1.4.4", + "Source": "CRAN", + "Hash": "bd821384f7261ac6b892012ce2b82226" + }, + "foreign": { + "Package": "foreign", + "Version": "0.8-70", + "Source": "CRAN", + "Hash": "054c310c9f6a781ae41c0030ed796db8" + }, + "fossil": { + "Package": "fossil", + "Version": "0.3.7", "Source": "CRAN", - "Hash": "110467e48d2be4583fe18d7bdbbc3c54" + "Hash": "0e793b74a4d561a18aeb2e3077c70a88" }, - "generics": { - "Package": "generics", - "Version": "0.0.2", + "gdata": { + "Package": "gdata", + "Version": "2.18.0", "Source": "CRAN", - "Hash": "c2a574a14f5f45a84ead80391c3075d6" + "Hash": "e7c9dd5450cfbb01dd80e4b873ef9cd9" }, "ggplot2": { "Package": "ggplot2", @@ -330,17 +198,31 @@ "Source": "CRAN", "Hash": "156f76da413ebe13f4d7e65ae6a5d19f" }, + "GMD": { + "Package": "GMD", + "Version": "0.3.3", + "Source": "CRAN", + "RemoteType": "url", + "RemoteUrl": "https://cran.r-project.org/src/contrib/Archive/GMD/GMD_0.3.3.tar.gz", + "Hash": "f51cade21ec66181e2070c5606bdd995" + }, + "gplots": { + "Package": "gplots", + "Version": "3.0.1.1", + "Source": "CRAN", + "Hash": "3fab29041c244204a2e61a53cd728420" + }, "gtable": { "Package": "gtable", "Version": "0.3.0", "Source": "CRAN", "Hash": "120444406cc884baa3a2ff5c0566045b" }, - "haven": { - "Package": "haven", - "Version": "2.1.0", + "gtools": { + "Package": "gtools", + "Version": "3.8.1", "Source": "CRAN", - "Hash": "a5147d91e024a58883490a0300cd0534" + "Hash": "a7fe2106d673bc8ebc7529e33d25c2ea" }, "hms": { "Package": "hms", @@ -348,41 +230,137 @@ "Source": "CRAN", "Hash": "c1a7ac1b51eb0f5ef51a4ea256eb9cbe" }, + "htmltools": { + "Package": "htmltools", + "Version": "0.3.6", + "Source": "CRAN", + "Hash": "881e91b8b764a550de68ad18a98c05b5" + }, + "htmlwidgets": { + "Package": "htmlwidgets", + "Version": "1.3", + "Source": "CRAN", + "Hash": "f9779405095eb05656a9aa75e62606cb" + }, + "httpcode": { + "Package": "httpcode", + "Version": "0.2.0", + "Source": "CRAN", + "Hash": "4220672b59d79cab3b3adb8bd65fe577" + }, + "httpuv": { + "Package": "httpuv", + "Version": "1.5.1", + "Source": "CRAN", + "Hash": "8e337f2d75bda94fc57a21619f9249fa" + }, "httr": { "Package": "httr", "Version": "1.4.0", "Source": "CRAN", "Hash": "f5dc99b1972d4e9eaf6f5c452339db08" }, + "iterators": { + "Package": "iterators", + "Version": "1.0.10", + "Source": "CRAN", + "Hash": "14bee32d7116523ad5083d2147844208" + }, + "jaod": { + "Package": "jaod", + "Version": "0.1.0", + "Source": "CRAN", + "Hash": "7ef3bd015b582ae7e08bfb1e69395dfb" + }, + "jsonlite": { + "Package": "jsonlite", + "Version": "1.6", + "Source": "CRAN", + "Hash": "9ec2b2fe0e874a66d0cb54d8ced5618e" + }, + "KernSmooth": { + "Package": "KernSmooth", + "Version": "2.23-15", + "Source": "CRAN", + "Hash": "582d281b0945e11da4f99f22ae9a3044" + }, "labeling": { "Package": "labeling", "Version": "0.3", "Source": "CRAN", "Hash": "379ec196f3f3bb312727d52b022fa8f9" }, + "later": { + "Package": "later", + "Version": "0.8.0", + "Source": "CRAN", + "Hash": "718545eb0b5b354929864914c818ae53" + }, + "lattice": { + "Package": "lattice", + "Version": "0.20-35", + "Source": "CRAN", + "Hash": "96238e972e6d8c631c273d91b13a90c3" + }, "lazyeval": { "Package": "lazyeval", "Version": "0.2.2", "Source": "CRAN", "Hash": "8c343e48c3f58de2c70a641592abc0f1" }, + "logging": { + "Package": "logging", + "Version": "0.9-107", + "Source": "CRAN", + "Hash": "2d4de7886c2dc4a35006039cd7d4f242" + }, "lubridate": { "Package": "lubridate", "Version": "1.7.4", "Source": "CRAN", "Hash": "4af93df0d50bb3919de67f36e7d3d00b" }, + "magrittr": { + "Package": "magrittr", + "Version": "1.5", + "Source": "CRAN", + "Hash": "cb6367fec3fd68ff41424fe9797b8eaf" + }, + "maps": { + "Package": "maps", + "Version": "3.3.0", + "Source": "CRAN", + "Hash": "9c69c5ef1c12a013cefe7eb88ae4cac4" + }, "MASS": { "Package": "MASS", "Version": "7.3-51.4", "Source": "CRAN", "Hash": "a670f645948409d325eddb0b199ed968" }, - "modelr": { - "Package": "modelr", - "Version": "0.1.4", + "Matrix": { + "Package": "Matrix", + "Version": "1.2-14", + "Source": "CRAN", + "Hash": "b411f6d86aaa6ed87bd13ef69b32b7a6" + }, + "mgcv": { + "Package": "mgcv", + "Version": "1.8-24", "Source": "CRAN", - "Hash": "5eae0bb48971866c0bf67a6d2b493f1b" + "Hash": "04a660f17e16de5d2081a9e06212fc14" + }, + "mime": { + "Package": "mime", + "Version": "0.6", + "Source": "CRAN", + "Hash": "217bc37b46b42e1e0636460ae3b96a7d" + }, + "miniUI": { + "Package": "miniUI", + "Version": "0.1.1.1", + "Source": "CRAN", + "Hash": "78d83c530ce013174ce0cabbe3d743e1" }, "munsell": { "Package": "munsell", @@ -390,17 +368,47 @@ "Source": "CRAN", "Hash": "381047af4d6b44ca4d9285dbdea42434" }, + "nlme": { + "Package": "nlme", + "Version": "3.1-137", + "Source": "CRAN", + "Hash": "55f627741007cdb86b124a16183e6a9e" + }, + "NLP": { + "Package": "NLP", + "Version": "0.2-0", + "Source": "CRAN", + "Hash": "a34a7fbfe4c11bf41e085fd2ee1cb752" + }, + "onehot": { + "Package": "onehot", + "Version": "0.1.1", + "Source": "CRAN", + "Hash": "88704001d294ed36bfc54a8d6deacf49" + }, "openssl": { "Package": "openssl", - "Version": "1.3", + "Version": "1.4", + "Source": "CRAN", + "Hash": "4557b03dff7c01f4e7cd10020b0667bc" + }, + "parfossil": { + "Package": "parfossil", + "Version": "0.2.0", + "Source": "CRAN", + "Hash": "fa6f0d351cc11e726f37a48b2f84efeb" + }, + "permute": { + "Package": "permute", + "Version": "0.9-5", "Source": "CRAN", - "Hash": "ca742ffe11d603c1b7ea425d7d85a0ef" + "Hash": "851e9678af3f5882feda908a6dbfc757" }, "pillar": { "Package": "pillar", - "Version": "1.4.0", + "Version": "1.4.1", "Source": "CRAN", - "Hash": "0012ae5d08cab261e929647763ba6226" + "Hash": "6606112a90dbe4d2306acf618072af31" }, "pkgconfig": { "Package": "pkgconfig", @@ -420,23 +428,17 @@ "Source": "CRAN", "Hash": "48b2c2a9c4a504ea9167e9252fd9fade" }, - "prettyunits": { - "Package": "prettyunits", - "Version": "1.0.2", - "Source": "CRAN", - "Hash": "47d4646a3be22893c04093e8db164989" - }, - "progress": { - "Package": "progress", - "Version": "1.2.2", + "promises": { + "Package": "promises", + "Version": "1.0.1", "Source": "CRAN", - "Hash": "819ec59e511e26ceed4ebdf272b6671f" + "Hash": "30a46ce3d9fe3958f266181f8c6f541f" }, - "ps": { - "Package": "ps", - "Version": "1.3.0", + "proxy": { + "Package": "proxy", + "Version": "0.4-23", "Source": "CRAN", - "Hash": "12c6a7e02239401d47599ddc44a484f4" + "Hash": "e5a3c4d16846b0da0fa9073e1a63d946" }, "purrr": { "Package": "purrr", @@ -444,35 +446,71 @@ "Source": "CRAN", "Hash": "19767ff705a04566e2f396e15ccc1b39" }, + "R6": { + "Package": "R6", + "Version": "2.4.0", + "Source": "CRAN", + "Hash": "d42941d973f7f28b32f694bc89fb9d5f" + }, + "rAltmetric": { + "Package": "rAltmetric", + "Version": "0.7.9000", + "Source": "GitHub", + "RemoteType": "github", + "RemoteHost": "api.github.com", + "RemoteRepo": "rAltmetric", + "RemoteUsername": "ropensci", + "RemoteRef": "master", + "RemoteSha": "01457412d96a7f7fc420b06c50c7ffea936cfe20", + "Hash": "d5cbfac197fda42f8eb84dfa8c002240" + }, + "rbace": { + "Package": "rbace", + "Version": "0.0.6.9510", + "Source": "GitHub", + "RemoteType": "github", + "RemoteHost": "api.github.com", + "RemoteRepo": "rbace", + "RemoteUsername": "ropensci", + "RemoteRef": "master", + "RemoteSha": "f3f565fd102a359f34e16aec9b60ea9e108a4547", + "Hash": "8bc1241abc9cbb314f5ec2387cebe529" + }, "RColorBrewer": { "Package": "RColorBrewer", "Version": "1.1-2", "Source": "CRAN", "Hash": "cdfb92174501c241a0d4e13fcac600a4" }, + "Rcpp": { + "Package": "Rcpp", + "Version": "1.0.1", + "Source": "CRAN", + "Hash": "542a8dbcf371718e4dee591341d28c43" + }, + "rcrossref": { + "Package": "rcrossref", + "Version": "0.8.6.9100", + "Source": "GitHub", + "RemoteType": "github", + "RemoteHost": "api.github.com", + "RemoteRepo": "rcrossref", + "RemoteUsername": "ropensci", + "RemoteRef": "async", + "RemoteSha": "9b1223f79dffa9280b77cba8b476f3210cdaeccb", + "Hash": "1024c9aba037a12265d7e07679f5712f" + }, "readr": { "Package": "readr", "Version": "1.3.1", "Source": "CRAN", "Hash": "19e0ffd8bcaff3476349d8787fc8bd59" }, - "readxl": { - "Package": "readxl", - "Version": "1.3.1", - "Source": "CRAN", - "Hash": "b0467b5406ee2c29709c4a57cab692d1" - }, - "rematch": { - "Package": "rematch", - "Version": "1.0.1", - "Source": "CRAN", - "Hash": "d80f39f4db9c537a1def235969d648ea" - }, - "reprex": { - "Package": "reprex", - "Version": "0.3.0", + "rentrez": { + "Package": "rentrez", + "Version": "1.2.2", "Source": "CRAN", - "Hash": "99df2b9f9ef2946d5af0910afaf56198" + "Hash": "2dd2187a1718e2d89a28471542b137be" }, "reshape2": { "Package": "reshape2", @@ -480,17 +518,35 @@ "Source": "CRAN", "Hash": "a0b8854199dbfa29a71f514b3ffcddf0" }, - "rstudioapi": { - "Package": "rstudioapi", - "Version": "0.10", + "rlang": { + "Package": "rlang", + "Version": "0.3.4", "Source": "CRAN", - "Hash": "6c03ab57831cbdae14d599151e832e18" + "Hash": "1367935e1052d25fd616368dea6d8bf5" }, - "rvest": { - "Package": "rvest", - "Version": "0.3.4", + "rlist": { + "Package": "rlist", + "Version": "0.4.6.1", "Source": "CRAN", - "Hash": "a40342e9ea5fd36819a8c8baac506d43" + "Hash": "c41c1a1ade8ced1887350214cec755bd" + }, + "ropenaire": { + "Package": "ropenaire", + "Version": "0.1.7.9210", + "Source": "GitHub", + "RemoteType": "github", + "RemoteHost": "api.github.com", + "RemoteRepo": "ropenaire", + "RemoteUsername": "sckott", + "RemoteRef": "master", + "RemoteSha": "64ed2560741e53b850fccd70ecf379721ca244fd", + "Hash": "d69c0d5e5c613f7a5e02c2622088303f" + }, + "rplos": { + "Package": "rplos", + "Version": "0.8.6", + "Source": "CRAN", + "Hash": "7522c5d653143d57dd538aa54a8c0692" }, "scales": { "Package": "scales", @@ -498,11 +554,59 @@ "Source": "CRAN", "Hash": "17cfbe1a4fcc8d0440a2af68d9e07bf4" }, - "selectr": { - "Package": "selectr", - "Version": "0.4-1", + "shapefiles": { + "Package": "shapefiles", + "Version": "0.7", + "Source": "CRAN", + "Hash": "1f7f87edb340c0d22168c204c974b091" + }, + "shiny": { + "Package": "shiny", + "Version": "1.3.2", + "Source": "CRAN", + "Hash": "f7dd1b66cae0857dd3e07727d5eb7e29" + }, + "slam": { + "Package": "slam", + "Version": "0.1-45", + "Source": "CRAN", + "Hash": "39cf3085b06f29a48d58bd1778fc52e8" + }, + "SnowballC": { + "Package": "SnowballC", + "Version": "0.6.0", "Source": "CRAN", - "Hash": "cc537e333408bb2f0088bc7b20f0e821" + "Hash": "2481de1ebb76bfc6e48e1636055ef2b2" + }, + "solrium": { + "Package": "solrium", + "Version": "1.0.2.9100", + "Source": "GitHub", + "RemoteType": "github", + "RemoteHost": "api.github.com", + "RemoteRepo": "solrium", + "RemoteUsername": "chreman", + "RemoteRef": "master", + "RemoteSha": "0477cb7d69624620ab153da7cf379da18ee10f24", + "Hash": "b4340c273ba40c276b5e7c0c4b0f9876" + }, + "sourcetools": { + "Package": "sourcetools", + "Version": "0.1.7", + "Source": "CRAN", + "Hash": "729278659af88ed36db42f46ccca0ebd" + }, + "sp": { + "Package": "sp", + "Version": "1.3-1", + "Source": "CRAN", + "Hash": "1a01377f70fbc6c380a712c61af85b0b" + }, + "stringdist": { + "Package": "stringdist", + "Version": "0.9.5.1", + "Source": "CRAN", + "Hash": "2ff4e084ce6d1c4e4b43c7159a231ad6" }, "stringi": { "Package": "stringi", @@ -510,17 +614,35 @@ "Source": "CRAN", "Hash": "0472e71f2347e13cd398d5225ff3b0ea" }, + "stringr": { + "Package": "stringr", + "Version": "1.4.0", + "Source": "CRAN", + "Hash": "01fb6c8262ac804d9bafa60cea8773da" + }, "sys": { "Package": "sys", "Version": "3.2", "Source": "CRAN", "Hash": "22bf912f5ebfddff8992fdd4d01e9bb4" }, + "tau": { + "Package": "tau", + "Version": "0.0-21", + "Source": "CRAN", + "Hash": "89df6485218ede79e86c9c25fa2fdf34" + }, + "textcat": { + "Package": "textcat", + "Version": "1.0-6", + "Source": "CRAN", + "Hash": "f7a10232fbee1891f74aa5ee338e40cc" + }, "tibble": { "Package": "tibble", - "Version": "2.1.1", + "Version": "2.1.3", "Source": "CRAN", - "Hash": "12ce38efdfe72eb34c31873431337e35" + "Hash": "b28f74376db8b145e247e29e20bb7deb" }, "tidyr": { "Package": "tidyr", @@ -534,11 +656,23 @@ "Source": "CRAN", "Hash": "66c1a6de7f98266b76503698f2ca111d" }, - "tidyverse": { - "Package": "tidyverse", - "Version": "1.2.1", + "tm": { + "Package": "tm", + "Version": "0.6", + "Source": "CRAN", + "Hash": "4ca6eadf6f48359dd771358026ec0dd4" + }, + "triebeard": { + "Package": "triebeard", + "Version": "0.3.0", + "Source": "CRAN", + "Hash": "b8269394feccda4dbc3548cfcccfe06f" + }, + "urltools": { + "Package": "urltools", + "Version": "1.7.3", "Source": "CRAN", - "Hash": "ea055dfa5ed1e736f1ebacb553cbddbd" + "Hash": "d3b3d098d854f2b358ed3f63ab453d79" }, "utf8": { "Package": "utf8", @@ -552,6 +686,12 @@ "Source": "CRAN", "Hash": "a57267520709009929b09af57d668a2a" }, + "vegan": { + "Package": "vegan", + "Version": "2.5-5", + "Source": "CRAN", + "Hash": "444828ac7cea37e64fb7edfbe61198bf" + }, "viridisLite": { "Package": "viridisLite", "Version": "0.3.0", @@ -564,35 +704,41 @@ "Source": "CRAN", "Hash": "1e74aefd2c67890f504a1fa2d0834d70" }, - "zeallot": { - "Package": "zeallot", - "Version": "0.1.0", + "withr": { + "Package": "withr", + "Version": "2.1.2", "Source": "CRAN", - "Hash": "c33bb7353728bd4547a079b2c351a021" + "Hash": "ce9ffa6d865ecaf6a56cacc811565e07" }, - "lattice": { - "Package": "lattice", - "Version": "0.20-38", + "XML": { + "Package": "XML", + "Version": "3.98-1.20", "Source": "CRAN", - "Hash": "16dfe2407fe485b6a9fb098120008721" + "Hash": "d184b33d55cd9984103f1f6a91050cfd" }, - "Matrix": { - "Package": "Matrix", - "Version": "1.2-17", + "xml2": { + "Package": "xml2", + "Version": "1.2.0", "Source": "CRAN", - "Hash": "b2eae6a1a2e206ecc478f3ff9dc98a5e" + "Hash": "fe28830040ea8a0cd9d41749b8f9f715" }, - "mgcv": { - "Package": "mgcv", - "Version": "1.8-28", + "xtable": { + "Package": "xtable", + "Version": "1.8-4", "Source": "CRAN", - "Hash": "7b78c0b314871340c4f56d38e6b66dd5" + "Hash": "f0d359dd5e96d72ea5384d7194f54843" }, - "nlme": { - "Package": "nlme", - "Version": "3.1-139", + "yaml": { + "Package": "yaml", + "Version": "2.2.0", "Source": "CRAN", - "Hash": "120db5776a27efebe1cca6e75cd460a7" + "Hash": "e75dfa28ca59a73adfabf6899ce04472" + }, + "zeallot": { + "Package": "zeallot", + "Version": "0.1.0", + "Source": "CRAN", + "Hash": "c33bb7353728bd4547a079b2c351a021" } } } diff --git a/server/workers/backend/src/headstart.py b/server/workers/backend/src/headstart.py index c4f4c242f..90b41d6e4 100644 --- a/server/workers/backend/src/headstart.py +++ b/server/workers/backend/src/headstart.py @@ -12,9 +12,9 @@ class Backend(object): def __init__(self): # path should be to where in the docker container the Rscript are - self.wd = "headstart/backend" + self.wd = "./" self.command = 'Rscript' - self.hs = os.path.join(self.wd, "run_vis_layout.R") + self.hs = os.path.abspath(os.path.join(self.wd, "run_vis_layout.R")) self.default_params = {} self.default_params["MAX_CLUSTERS"] = 15 self.default_params["language"] = "english" diff --git a/server/workers/services/src/apis/triple.py b/server/workers/services/src/apis/triple.py index b03b2648a..baa84e00d 100644 --- a/server/workers/services/src/apis/triple.py +++ b/server/workers/services/src/apis/triple.py @@ -47,7 +47,8 @@ class Search(Resource): params={"q": "string, query term", "sorting": "string, most-relevant or most-recent", "from": "yyyy-MM-dd", - "to": "yyyy-MM-dd"}) + "to": "yyyy-MM-dd", + "vis_type": "string, overview or streamgraph"}) # @api.marshal_with(search_fields) def post(self): """ diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py index ab8bc487c..b00fd1155 100644 --- a/server/workers/triple/src/search_triple.py +++ b/server/workers/triple/src/search_triple.py @@ -99,15 +99,20 @@ def process_result(self, result): metadata["id"] = df.identifier.map(lambda x: x[0] if isinstance(x, list) else "") metadata["title"] = df.title.map(lambda x: x[0] if isinstance(x, list) else "") metadata["authors"] = df.author.map(lambda x: self.get_authors(x) if isinstance(x, list) else "") - metadata["abstract"] = df.abstract.map(lambda x: x[0] if isinstance(x, list) else "") + metadata["paper_abstract"] = df.abstract.map(lambda x: x[0] if isinstance(x, list) else "") metadata["published_in"] = df.publisher.map(lambda x: x[0].get('name') if isinstance(x, list) else "") metadata["year"] = df.datestamp.map(lambda x: x if isinstance(x, str) else "") metadata["url"] = df.url.map(lambda x: x[0] if isinstance(x, list) else "") metadata["readers"] = 0 metadata["subject"] = df.keyword.map(lambda x: "; ".join(x) if isinstance(x, list) else "") + metadata["oa_state"] = 2 + metadata["link"] = "" + text = pd.DataFrame() + text["id"] = metadata["id"] + text["content"] = metadata.apply(lambda x: ". ".join(x[["title", "paper_abstract"]]), axis=1).to_json() input_data = {} input_data["metadata"] = metadata.to_json() - input_data["text"] = metadata.apply(lambda x: ". ".join(x[["title", "abstract"]]), axis=1).to_json() + input_data["text"] = text.to_json() return input_data @staticmethod @@ -130,6 +135,7 @@ def run(self): redis_store.set(k+"_output", json.dumps(res)) if endpoint == "search": res = {} + res["id"] = k res["input_data"] = self.search(params) res["params"] = params redis_store.rpush("input_data", json.dumps(res)) From cef5880a403d85cbeb82fb6a9fc4cdf93f094036 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Wed, 11 Mar 2020 01:19:54 +0100 Subject: [PATCH 21/99] backend tweaks --- server/headstart_backend.docker | 151 ++++++++++++++++-- server/preprocessing/other-scripts/cluster.R | 4 +- .../other-scripts/run_vis_layout.R | 1 - .../preprocessing/other-scripts/vis_layout.R | 2 +- 4 files changed, 145 insertions(+), 13 deletions(-) diff --git a/server/headstart_backend.docker b/server/headstart_backend.docker index 579d3b83d..abc0d5baa 100644 --- a/server/headstart_backend.docker +++ b/server/headstart_backend.docker @@ -1,22 +1,155 @@ -FROM rocker/r-ver:3.5.1 +FROM ubuntu:18.04 MAINTAINER Chris Kittel "christopher.kittel@openknowledgemaps.org" -RUN apt-get update -RUN apt-get -y install libssl-dev libcurl4-openssl-dev -RUN apt-get -y install libxml2-dev libz-dev libpoppler-cpp-dev + +ENV DEBIAN_FRONTEND=noninteractive + +ARG R_VERSION +ARG BUILD_DATE +ARG CRAN +## Setting a BUILD_DATE will set CRAN to the matching MRAN date +## No BUILD_DATE means that CRAN will default to latest +ENV R_VERSION=${R_VERSION:-3.5.1} \ + CRAN=${CRAN:-https://cran.rstudio.com} \ + LC_ALL=en_US.UTF-8 \ + LANG=en_US.UTF-8 \ + TERM=xterm + + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + bash-completion \ + ca-certificates \ + file \ + fonts-texgyre \ + g++ \ + gfortran \ + gsfonts \ + libblas-dev \ + libbz2-1.0 \ + libcurl4 \ + libjpeg-turbo8-dev \ + libopenblas-dev \ + libpangocairo-1.0-0 \ + libpcre3 \ + libpng16-16 \ + libreadline7 \ + libtiff5 \ + liblzma5 \ + locales \ + make \ + unzip \ + zip \ + zlib1g \ + && echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen \ + && locale-gen en_US.utf8 \ + && /usr/sbin/update-locale LANG=en_US.UTF-8 \ + && BUILDDEPS="curl \ + default-jdk \ + libbz2-dev \ + libcairo2-dev \ + libcurl4-openssl-dev \ + libpango1.0-dev \ + libjpeg-dev \ + libpcre3-dev \ + libpng-dev \ + libreadline-dev \ + libtiff5-dev \ + liblzma-dev \ + libx11-dev \ + libxt-dev \ + perl \ + tcl8.6-dev \ + tk8.6-dev \ + texinfo \ + texlive-extra-utils \ + texlive-fonts-recommended \ + texlive-fonts-extra \ + texlive-latex-recommended \ + x11proto-core-dev \ + xauth \ + xfonts-base \ + xvfb \ + zlib1g-dev" \ + && apt-get install -y --no-install-recommends $BUILDDEPS \ + && cd tmp/ \ + ## Download source code + && curl -O https://cran.r-project.org/src/base/R-3/R-${R_VERSION}.tar.gz \ + ## Extract source code + && tar -xf R-${R_VERSION}.tar.gz \ + && cd R-${R_VERSION} \ + ## Set compiler flags + && R_PAPERSIZE=letter \ + R_BATCHSAVE="--no-save --no-restore" \ + R_BROWSER=xdg-open \ + PAGER=/usr/bin/pager \ + PERL=/usr/bin/perl \ + R_UNZIPCMD=/usr/bin/unzip \ + R_ZIPCMD=/usr/bin/zip \ + R_PRINTCMD=/usr/bin/lpr \ + LIBnn=lib \ + AWK=/usr/bin/awk \ + CFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g" \ + CXXFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g" \ + ## Configure options + ./configure --enable-R-shlib \ + --enable-memory-profiling \ + --with-readline \ + --with-blas \ + --with-tcltk \ + --disable-nls \ + --with-recommended-packages \ + ## Build and install + && make \ + && make install \ + ## Add a library directory (for user-installed packages) + && mkdir -p /usr/local/lib/R/site-library \ + && chown root:staff /usr/local/lib/R/site-library \ + && chmod g+ws /usr/local/lib/R/site-library \ + ## Fix library path + && sed -i '/^R_LIBS_USER=.*$/d' /usr/local/lib/R/etc/Renviron \ + && echo "R_LIBS_USER=\${R_LIBS_USER-'/usr/local/lib/R/site-library'}" >> /usr/local/lib/R/etc/Renviron \ + && echo "R_LIBS=\${R_LIBS-'/usr/local/lib/R/site-library:/usr/local/lib/R/library:/usr/lib/R/library'}" >> /usr/local/lib/R/etc/Renviron \ + ## Set configured CRAN mirror + && if [ -z "$BUILD_DATE" ]; then MRAN=$CRAN; \ + else MRAN=https://mran.microsoft.com/snapshot/${BUILD_DATE}; fi \ + && echo MRAN=$MRAN >> /etc/environment \ + && echo "options(repos = c(CRAN='$MRAN'), download.file.method = 'libcurl')" >> /usr/local/lib/R/etc/Rprofile.site \ + ## Use littler installation scripts + && Rscript -e "install.packages(c('littler', 'docopt'), repo = '$CRAN')" \ + && ln -s /usr/local/lib/R/site-library/littler/examples/install2.r /usr/local/bin/install2.r \ + && ln -s /usr/local/lib/R/site-library/littler/examples/installGithub.r /usr/local/bin/installGithub.r \ + && ln -s /usr/local/lib/R/site-library/littler/bin/r /usr/local/bin/r \ + ## Clean up from R source install + && cd / \ + && rm -rf /tmp/* \ + && apt-get remove --purge -y $BUILDDEPS \ + && apt-get autoremove -y \ + && apt-get autoclean -y \ + && rm -rf /var/lib/apt/lists/* + +RUN apt update && apt full-upgrade -y && \ + apt install -y links curl vim libcurl4-openssl-dev \ + libxml2-dev libz-dev libpoppler-cpp-dev \ + libssl1.1 libssl-dev && \ + apt clean && \ + rm -f /etc/localtime && \ + ln -s /usr/share/zoneinfo/Europe/Vienna /etc/localtime && \ + dpkg --configure -a + RUN apt-get -y install python3 python3-pip -RUN R -e 'options(repos="https://cran.wu.ac.at")' -RUN R -e 'install.packages("remotes", repos = c(CRAN = "https://cran.rstudio.com"))' -RUN R -e 'install.packages("renv", repos = c(CRAN = "https://cran.rstudio.com"))' +RUN R -e 'options(repos="https://cran.wu.ac.at")' && \ + R -e 'install.packages("remotes", repos = c(CRAN = "https://cran.rstudio.com"))' && \ + R -e 'install.packages("renv", repos = c(CRAN = "https://cran.rstudio.com"))' WORKDIR /headstart COPY workers/backend/requirements.txt . RUN pip3 install --no-cache-dir -r requirements.txt COPY workers/backend/renv.lock . -RUN R -e 'renv::consent(provided = TRUE)' -RUN R -e 'renv::restore()' +RUN R -e 'renv::consent(provided = TRUE)' && \ + R -e 'renv::restore()' COPY workers/backend/src/ ./ COPY preprocessing/resources . diff --git a/server/preprocessing/other-scripts/cluster.R b/server/preprocessing/other-scripts/cluster.R index bf8a1732a..85464618a 100644 --- a/server/preprocessing/other-scripts/cluster.R +++ b/server/preprocessing/other-scripts/cluster.R @@ -97,7 +97,7 @@ get_ndms <- function(distance_matrix, mindim=2, maxdim=2, maxit=500) { # nm.nmin = nmds.min(nm) if (nrow(distance_matrix) <= 2) { points <- tryCatch({ - ord <- metaMDS(distance_matrix, k = 2, parallel = 3, trymax=30, + ord <- metaMDS(distance_matrix, k = 2, parallel = 7, trymax=30, engine="monoMDS", distance='cao', threshold = 0.19, nthreshold=10, model = "linear", @@ -115,7 +115,7 @@ get_ndms <- function(distance_matrix, mindim=2, maxdim=2, maxit=500) { points <- cbind(0, 0) } else { points <- tryCatch({ - ord <- metaMDS(distance_matrix, k = 2, parallel = 3, trymax=30, + ord <- metaMDS(distance_matrix, k = 2, parallel = 7, trymax=30, engine="monoMDS", distance='cao', threshold = 0.19, nthreshold=10, model = "linear", diff --git a/server/preprocessing/other-scripts/run_vis_layout.R b/server/preprocessing/other-scripts/run_vis_layout.R index e9162ec0a..ee72a1b65 100644 --- a/server/preprocessing/other-scripts/run_vis_layout.R +++ b/server/preprocessing/other-scripts/run_vis_layout.R @@ -16,7 +16,6 @@ library(tibble) library(tidyr) source('utils.R') source("vis_layout.R") -registerDoParallel(5) DEBUG = FALSE params <- fromJSON(params_file) diff --git a/server/preprocessing/other-scripts/vis_layout.R b/server/preprocessing/other-scripts/vis_layout.R index b992ec8a6..42c484b92 100644 --- a/server/preprocessing/other-scripts/vis_layout.R +++ b/server/preprocessing/other-scripts/vis_layout.R @@ -13,7 +13,7 @@ library(stringi) library(stringdist) library(plyr) library(onehot) -registerDoParallel(3) +registerDoParallel(7) vlog <- getLogger('vis') From 77c946b1df373e5c6190f0bc2e9e68721e108419 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Wed, 11 Mar 2020 18:54:47 +0100 Subject: [PATCH 22/99] update gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index dd5d85cea..305e732d5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ server/preprocessing/other-scripts/.Rhistory +server/preprocessing/other-scripts/renv /nbproject/private/ /server/nbproject/private/ *_local.* From 045522b87dbe0f518bf29621026b46b18b612bfd Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Wed, 11 Mar 2020 18:57:27 +0100 Subject: [PATCH 23/99] external logging --- docker-compose.yml | 9 +++++++-- server/headstart_backend.docker | 2 ++ server/preprocessing/other-scripts/utils.R | 3 +++ server/workers/backend/backend.env | 3 +++ server/workers/backend/src/backend.env | 3 --- 5 files changed, 15 insertions(+), 5 deletions(-) create mode 100644 server/workers/backend/backend.env delete mode 100644 server/workers/backend/src/backend.env diff --git a/docker-compose.yml b/docker-compose.yml index ebbb877cf..989f22d41 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -36,11 +36,16 @@ services: context: server dockerfile: headstart_backend.docker env_file: - - server/workers/backend/src/backend.env + - server/workers/backend/backend.env restart: always network_mode: "host" volumes: - - "/var/opt/renv:/root/.local/share/renv" + - type: bind + source: /var/opt/renv + target: /root/.local/share/renv + - type: bind + source: /var/log/headstart + target: /var/log/headstart volumes: redis: diff --git a/server/headstart_backend.docker b/server/headstart_backend.docker index abc0d5baa..60e96984d 100644 --- a/server/headstart_backend.docker +++ b/server/headstart_backend.docker @@ -131,6 +131,7 @@ RUN apt-get update \ RUN apt update && apt full-upgrade -y && \ apt install -y links curl vim libcurl4-openssl-dev \ libxml2-dev libz-dev libpoppler-cpp-dev \ + libopenmpi-dev libzmq3-dev \ libssl1.1 libssl-dev && \ apt clean && \ rm -f /etc/localtime && \ @@ -154,5 +155,6 @@ RUN R -e 'renv::consent(provided = TRUE)' && \ COPY workers/backend/src/ ./ COPY preprocessing/resources . COPY preprocessing/other-scripts . +RUN mkdir -p /var/log/headstart && touch /var/log/headstart/headstart.log ENTRYPOINT python3 headstart.py diff --git a/server/preprocessing/other-scripts/utils.R b/server/preprocessing/other-scripts/utils.R index 3ea0cff3e..1dc6ff0c4 100644 --- a/server/preprocessing/other-scripts/utils.R +++ b/server/preprocessing/other-scripts/utils.R @@ -56,6 +56,9 @@ setup_logging <- function(loglevel) { removeHandler('basic.stdout') addHandler(writeToConsole) } else { + if (!file.exists(Sys.getenv("HEADSTART_LOGFILE"))) { + file.create(Sys.getenv("HEADSTART_LOGFILE")) + } getLogger(loglevel) removeHandler('basic.stdout') addHandler(writeToFile, file=Sys.getenv("HEADSTART_LOGFILE")) diff --git a/server/workers/backend/backend.env b/server/workers/backend/backend.env new file mode 100644 index 000000000..6ea3e0991 --- /dev/null +++ b/server/workers/backend/backend.env @@ -0,0 +1,3 @@ +HEADSTART_LOGFILE=/var/log/headstart/headstart.log +RENV_VERSION=0.6.0-98 +CRAN_REPOS=https://cran.wu.ac.at diff --git a/server/workers/backend/src/backend.env b/server/workers/backend/src/backend.env deleted file mode 100644 index 8078d6292..000000000 --- a/server/workers/backend/src/backend.env +++ /dev/null @@ -1,3 +0,0 @@ -RENV_PATHS_CACHE=/renv/cache -RENV_VERSION="0.6.0-98" -CRAN_REPOS="https://cran.wu.ac.at" From a9f5cda15cac522c7f8e90094b0ee412a0249127 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Wed, 11 Mar 2020 21:34:38 +0100 Subject: [PATCH 24/99] close loop --- server/services/search.php | 2 -- 1 file changed, 2 deletions(-) diff --git a/server/services/search.php b/server/services/search.php index 152b21a1d..d8b5b4301 100644 --- a/server/services/search.php +++ b/server/services/search.php @@ -87,8 +87,6 @@ function search($repository, $dirty_query, $post_params, $param_types, $keyword_ curl_setopt($ch, CURLOPT_POSTFIELDS, $payload); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); $output_json = curl_exec($ch); - var_dump($output_json); - $output_json = NULL; } else { $calculation = new \headstart\preprocessing\calculation\RCalculation($ini_array); $output = $calculation->performCalculationAndReturnOutputAsJSON($WORKING_DIR, $query, $params_filename, $repository); From 994d307a6a7238624e84f165de40b5d011efdf71 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Wed, 11 Mar 2020 23:01:55 +0100 Subject: [PATCH 25/99] maybe a break condition would be nice --- server/preprocessing/other-scripts/cluster.R | 1 + 1 file changed, 1 insertion(+) diff --git a/server/preprocessing/other-scripts/cluster.R b/server/preprocessing/other-scripts/cluster.R index 85464618a..1305bb0bb 100644 --- a/server/preprocessing/other-scripts/cluster.R +++ b/server/preprocessing/other-scripts/cluster.R @@ -33,6 +33,7 @@ create_clusters <- function(distance_matrix, max_clusters=-1, method="ward.D") { cut_off <- get_cut_off(css_cluster, attempt) attempt <- attempt+1 cut_off$k + if (attempt > 50) break }, error = function(err){ vclog$warn(err$message) return (NA) From d9c1bc987564304efb580ce96799045b46e045b2 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Wed, 11 Mar 2020 23:41:00 +0100 Subject: [PATCH 26/99] data cleaning --- server/workers/triple/src/search_triple.py | 25 +++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py index b00fd1155..80af1ef02 100644 --- a/server/workers/triple/src/search_triple.py +++ b/server/workers/triple/src/search_triple.py @@ -1,4 +1,5 @@ import os +import re import json import redis from elasticsearch import Elasticsearch @@ -104,7 +105,7 @@ def process_result(self, result): metadata["year"] = df.datestamp.map(lambda x: x if isinstance(x, str) else "") metadata["url"] = df.url.map(lambda x: x[0] if isinstance(x, list) else "") metadata["readers"] = 0 - metadata["subject"] = df.keyword.map(lambda x: "; ".join(x) if isinstance(x, list) else "") + metadata["subject"] = df.keyword.map(lambda x: "; ".join([self.clean_subject(s) for s in x]) if isinstance(x, list) else "") metadata["oa_state"] = 2 metadata["link"] = "" text = pd.DataFrame() @@ -115,6 +116,28 @@ def process_result(self, result): input_data["text"] = text.to_json() return input_data + @staticmethod + def clean_subject(subject): + subject_cleaned = re.sub(r"DOAJ:[^;]*(;|$)?", "", subject) # remove DOAJ classification + subject_cleaned = re.sub(r"/dk/atira[^;]*(;|$)?", "", subject_cleaned) # remove atira classification + subject_cleaned = re.sub(r"ddc:[0-9]+(;|$)?", "", subject_cleaned) # remove Dewey Decimal Classification + subject_cleaned = re.sub(r"([\w\/\:-])*?\/ddc\/([\/0-9\.])*", "", subject_cleaned) # remove Dewey Decimal Classification in URI form + subject_cleaned = re.sub(r"[A-Z,0-9]{2,}-[A-Z,0-9\.]{2,}(;|$)?", "", subject_cleaned) #remove LOC classification + subject_cleaned = re.sub(r"[^\(;]+\(General\)(;|$)?", "", subject_cleaned) # remove general subjects + subject_cleaned = re.sub(r"[^\(;]+\(all\)(;|$)?", "", subject_cleaned) # remove general subjects + subject_cleaned = re.sub(r"[^:;]+ ?:: ?[^;]+(;|$)?", "", subject_cleaned) #remove classification with separator :: + subject_cleaned = re.sub(r"[^\[;]+\[[A-Z,0-9]+\](;|$)?", "", subject_cleaned) # remove WHO classification + subject_cleaned = re.sub(r"", "", subject_cleaned) # remove + subject_cleaned = re.sub(r"\[[^\[]+\][^\;]+(;|$)?", "", subject_cleaned) # remove classification + subject_cleaned = re.sub(r"[0-9]{2,} [A-Z]+[^;]*(;|$)?", "", subject_cleaned) #remove classification + subject_cleaned = re.sub(r" -- ", "; ", subject_cleaned) #replace inconsistent keyword separation + subject_cleaned = re.sub(r" \( ", "; ", subject_cleaned) #replace inconsistent keyword separation + subject_cleaned = re.sub(r"(\w* \w*(\.)( \w* \w*)?)", "; ", subject_cleaned) # remove overly broad keywords separated by . + subject_cleaned = re.sub(r"\. ", "; ", subject_cleaned) # replace inconsistent keyword separation + subject_cleaned = re.sub(r" ?\d[:?-?]?(\d+.)+", "", subject_cleaned) # replace residuals like 5:621.313.323 or '5-76.95' + subject_cleaned = re.sub(r"\w+:\w+-(\w+\/)+", "", subject_cleaned) # replace residuals like Info:eu-repo/classification/ + return subject + @staticmethod def get_authors(authorlist): authors = [] From fa53377a76245f049375b70bd9709deb8697b1c0 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Wed, 11 Mar 2020 23:53:09 +0100 Subject: [PATCH 27/99] the bugfixing commences --- examples/triple/search_triple.html | 2 +- server/headstart_backend.docker | 2 +- server/preprocessing/other-scripts/cluster.R | 5 ++--- server/preprocessing/other-scripts/utils.R | 4 ++++ server/preprocessing/other-scripts/vis_layout.R | 2 +- server/services/search.php | 3 +-- server/workers/backend/src/headstart.py | 11 +++++------ 7 files changed, 15 insertions(+), 14 deletions(-) diff --git a/examples/triple/search_triple.html b/examples/triple/search_triple.html index a48898b9d..940646ef5 100644 --- a/examples/triple/search_triple.html +++ b/examples/triple/search_triple.html @@ -7,7 +7,7 @@ - Search BASE and turn it into a visualization + Search TRIPLE and turn it into a visualization diff --git a/server/headstart_backend.docker b/server/headstart_backend.docker index 60e96984d..21dc48783 100644 --- a/server/headstart_backend.docker +++ b/server/headstart_backend.docker @@ -153,7 +153,7 @@ RUN R -e 'renv::consent(provided = TRUE)' && \ R -e 'renv::restore()' COPY workers/backend/src/ ./ -COPY preprocessing/resources . +COPY preprocessing/resources ./resources COPY preprocessing/other-scripts . RUN mkdir -p /var/log/headstart && touch /var/log/headstart/headstart.log diff --git a/server/preprocessing/other-scripts/cluster.R b/server/preprocessing/other-scripts/cluster.R index 1305bb0bb..d08b509aa 100644 --- a/server/preprocessing/other-scripts/cluster.R +++ b/server/preprocessing/other-scripts/cluster.R @@ -32,8 +32,7 @@ create_clusters <- function(distance_matrix, max_clusters=-1, method="ward.D") { num_clusters <- tryCatch({ cut_off <- get_cut_off(css_cluster, attempt) attempt <- attempt+1 - cut_off$k - if (attempt > 50) break + if (attempt > 200) cut_off$k else NA }, error = function(err){ vclog$warn(err$message) return (NA) @@ -91,7 +90,7 @@ create_clusters <- function(distance_matrix, max_clusters=-1, method="ward.D") { } -get_ndms <- function(distance_matrix, mindim=2, maxdim=2, maxit=500) { +get_ndms <- function(distance_matrix, mindim=2, maxdim=2) { # Perform non-metric multidimensional scaling # nm <- par.nmds(distance_matrix, mindim=mindim, maxdim=maxdim, maxit=maxit) diff --git a/server/preprocessing/other-scripts/utils.R b/server/preprocessing/other-scripts/utils.R index 1dc6ff0c4..d2fd2764e 100644 --- a/server/preprocessing/other-scripts/utils.R +++ b/server/preprocessing/other-scripts/utils.R @@ -29,6 +29,10 @@ get_stopwords <- function(lang, testing) { add_stop_path <- paste0("../resources/", lang, ".stop") additional_stops <- scan(add_stop_path, what="", sep="\n") stops = c(stops, additional_stops) + } else if (dir.exists("./resources")) { + add_stop_path <- paste0("./resources/", lang, ".stop") + additional_stops <- scan(add_stop_path, what="", sep="\n") + stops = c(stops, additional_stops) } else { add_stop_path <- paste0("../../resources/", lang, ".stop") additional_stops <- scan(add_stop_path, what="", sep="\n") diff --git a/server/preprocessing/other-scripts/vis_layout.R b/server/preprocessing/other-scripts/vis_layout.R index 42c484b92..f2ff7fd52 100644 --- a/server/preprocessing/other-scripts/vis_layout.R +++ b/server/preprocessing/other-scripts/vis_layout.R @@ -74,7 +74,7 @@ vis_layout <- function(text, metadata, service, features <- concatenate_features(distance_matrix) vlog$debug("get clusters") clusters <- create_clusters(as.dist(features), max_clusters=max_clusters) - layout <- get_ndms(as.dist(features), maxit=500, mindim=2, maxdim=2) + layout <- get_ndms(as.dist(features), mindim=2, maxdim=2) vlog$debug("get cluster summaries") metadata = replace_keywords_if_empty(metadata, stops, service) diff --git a/server/services/search.php b/server/services/search.php index d8b5b4301..cf9a9eba0 100644 --- a/server/services/search.php +++ b/server/services/search.php @@ -39,8 +39,7 @@ function utf8_converter($array) } function search($repository, $dirty_query, $post_params, $param_types, $keyword_separator, $taxonomy_separator, $transform_query_tolowercase = true, - $retrieve_cached_map = true, $params_for_id = null, $num_labels = 3, $id = "area_uri", $subjects = "subject", - $backend = null) { + $retrieve_cached_map = true, $params_for_id = null, $num_labels = 3, $id = "area_uri", $subjects = "subject") { $INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/"; $ini_array = library\Toolkit::loadIni($INI_DIR); $query = strip_tags($dirty_query); diff --git a/server/workers/backend/src/headstart.py b/server/workers/backend/src/headstart.py index 90b41d6e4..b60f520f7 100644 --- a/server/workers/backend/src/headstart.py +++ b/server/workers/backend/src/headstart.py @@ -2,6 +2,7 @@ import copy import json import subprocess +import asyncio from tempfile import NamedTemporaryFile import redis @@ -44,16 +45,14 @@ def create_map(self, params, input_data): cmd = [self.command, self.hs, self.wd, params.get('q'), params.get('service'), param_file.name, input_file.name] - print(cmd) output = subprocess.check_output(cmd) output = [o for o in output.decode('utf-8').split('\n') if len(o) > 0] - return output[-1] + return json.loads(output[-1]) def run(self): - while True: - k, params, input_data = self.next_item() - output = self.create_map(params, input_data) - redis_store.set(k+"_output", output) + k, params, input_data = self.next_item() + result = self.create_map(params, input_data) + redis_store.set(k+"_output", json.dumps(result)) if __name__ == '__main__': From 66da9291fe71eb1b5ef2e06e21ae6888fa30cb0a Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Thu, 12 Mar 2020 15:39:39 +0100 Subject: [PATCH 28/99] testing --- examples/triple/headstart.php | 4 +- server/preprocessing/other-scripts/cluster.R | 2 +- .../other-scripts/run_vis_layout.R | 4 +- .../other-scripts/test/params_triple.json | 3 + .../other-scripts/test/triple-test.R | 118 ++++++++++++++++++ .../test/triple_test_input_data.json | 1 + server/workers/backend/requirements.txt | 1 + server/workers/backend/src/headstart.py | 3 +- .../services/src/apis/request_validators.py | 4 + server/workers/services/src/apis/triple.py | 18 +-- server/workers/triple/src/search_triple.py | 11 +- 11 files changed, 143 insertions(+), 26 deletions(-) create mode 100644 server/preprocessing/other-scripts/test/params_triple.json create mode 100644 server/preprocessing/other-scripts/test/triple-test.R create mode 100644 server/preprocessing/other-scripts/test/triple_test_input_data.json diff --git a/examples/triple/headstart.php b/examples/triple/headstart.php index eef156324..738ce70df 100644 --- a/examples/triple/headstart.php +++ b/examples/triple/headstart.php @@ -17,8 +17,8 @@ }]; data_config.options = options_.dropdowns; - - + + diff --git a/server/preprocessing/other-scripts/cluster.R b/server/preprocessing/other-scripts/cluster.R index d08b509aa..3b8c9a61e 100644 --- a/server/preprocessing/other-scripts/cluster.R +++ b/server/preprocessing/other-scripts/cluster.R @@ -32,7 +32,7 @@ create_clusters <- function(distance_matrix, max_clusters=-1, method="ward.D") { num_clusters <- tryCatch({ cut_off <- get_cut_off(css_cluster, attempt) attempt <- attempt+1 - if (attempt > 200) cut_off$k else NA + if (attempt > 500) cut_off$k else NA }, error = function(err){ vclog$warn(err$message) return (NA) diff --git a/server/preprocessing/other-scripts/run_vis_layout.R b/server/preprocessing/other-scripts/run_vis_layout.R index ee72a1b65..f35e64399 100644 --- a/server/preprocessing/other-scripts/run_vis_layout.R +++ b/server/preprocessing/other-scripts/run_vis_layout.R @@ -20,8 +20,8 @@ DEBUG = FALSE params <- fromJSON(params_file) input_data <- fromJSON(input_file) -text <- unnest(as_tibble(fromJSON(input_data$text))) -metadata <- unnest(as_tibble(fromJSON(input_data$metadata))) +text <- fromJSON(input_data$text) +metadata <- fromJSON(input_data$metadata) if (!is.null(params$lang_id)) { lang_id <- params$lang_id diff --git a/server/preprocessing/other-scripts/test/params_triple.json b/server/preprocessing/other-scripts/test/params_triple.json new file mode 100644 index 000000000..1ea4012ad --- /dev/null +++ b/server/preprocessing/other-scripts/test/params_triple.json @@ -0,0 +1,3 @@ +{"list_size": 100, + "MAX_CLUSTERS": 15, + "taxonomy_separator": ";"} diff --git a/server/preprocessing/other-scripts/test/triple-test.R b/server/preprocessing/other-scripts/test/triple-test.R new file mode 100644 index 000000000..dd5274c5d --- /dev/null +++ b/server/preprocessing/other-scripts/test/triple-test.R @@ -0,0 +1,118 @@ +rm(list = ls()) + +library(rstudioapi) + +options(warn=1) + +wd <- dirname(rstudioapi::getActiveDocumentContext()$path) +setwd(wd) + +input_file <- "triple_test_input_data.json" +params_file <- "params_triple.json" +query <- "machine learning" +service <- "triple" + +source('../utils.R') + +DEBUG <- TRUE +if (DEBUG==TRUE){ + setup_logging('DEBUG') +} else { + setup_logging('INFO') +} + +source("../vis_layout.R") + +tslog <- getLogger('ts') + +params <- fromJSON(params_file) +input_data <- fromJSON(input_file) +text <- fromJSON(input_data$text) +metadata <- fromJSON(input_data$metadata) + +if (!is.null(params$lang_id)) { + lang_id <- params$lang_id +} else { + lang_id <- 'all' +} + +if (!is.null(params$vis_type)) { + vis_type <- params$vis_type +} else { + vis_type <- 'overview' +} + + +valid_langs <- list( + 'afr'='afrikaans', + 'akk'='akkadian', + 'ara'='arabic', + 'baq'='basque', + 'bel'='belarusian', + 'chi'='chinese', + 'cze'='czech', + 'dan'='danish', + 'dut'='dutch', + 'eng'='english', + 'est'='estonian', + 'fin'='finnish', + 'fre'='french', + 'geo'='georgian', + 'ger'='german', + 'gle'='irish', + 'glg'='galician', + 'grc'='greek', + 'gre'='greek', + 'heb'='hebrew', + 'hrv'='croatian', + 'hun'='hungarian', + 'ice'='icelandic', + 'ind'='indonesian', + 'ita'='italian', + 'jpn'='japanese', + 'kor'='korean', + 'lat'='latin', + 'lit'='lithuanian', + 'nau'='nauru', + 'nob'='norwegian', + 'nor'='norwegian', + 'ota'='turkish', + 'per'='persian', + 'pol'='polish', + 'por'='portuguese', + 'rum'='romanian', + 'rus'='russian', + 'slo'='slovak', + 'slv'='slovenian', + 'spa'='spanish', + 'srp'='serbian', + 'sux'='sumerian', + 'swe'='swedish', + 'tha'='thai', + 'tur'='turkish', + 'ukr'='ukrainian', + 'vie'='vietnamese' +) + +MAX_CLUSTERS = params$MAX_CLUSTERS +LANGUAGE <- get_service_lang(lang_id, valid_langs, service) +ADDITIONAL_STOP_WORDS = LANGUAGE$name + +tslog <- getLogger('ts') + +failed <- list(params=params) +tryCatch({ + output_json = vis_layout(text, metadata, + service, + max_clusters = MAX_CLUSTERS, + add_stop_words = ADDITIONAL_STOP_WORDS, + testing=TRUE, + lang = LANGUAGE$name, + taxonomy_separator = params$taxonomy_separator, + list_size = params$list_size, + vis_type=vis_type) +}, error=function(err){ + tslog$error(gsub("\n", " ", paste("Processing failed", query, paste(params, collapse=" "), err, sep="||"))) + failed$query <<- query + failed$processing_reason <<- err$message +}) diff --git a/server/preprocessing/other-scripts/test/triple_test_input_data.json b/server/preprocessing/other-scripts/test/triple_test_input_data.json new file mode 100644 index 000000000..c5289a877 --- /dev/null +++ b/server/preprocessing/other-scripts/test/triple_test_input_data.json @@ -0,0 +1 @@ +{"metadata": "[{\"id\":\"10670\\/1.cdg5dl\",\"title\":\"Big Data and Machine Learning in Quantitative Investment\",\"authors\":\"Guida, Tony\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2019-09-27\",\"url\":\"https:\\/\\/hal.archives-ouvertes.fr\\/hal-02298299\",\"readers\":0,\"subject\":\"[SHS.ECO]Humanities and Social Sciences\\/Economics and Finance; [SHS.GESTION]Humanities and Social Sciences\\/Business administration\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.zb9ibf\",\"title\":\"Sequence-to-sequence learning for machine translation and automatic differentiation for machine learning software tools\",\"authors\":\"van Merri\\u00ebnboer, Bart\",\"paper_abstract\":\"\",\"published_in\":\"Biblioth\\u00e8que de l'Universit\\u00e9 de Montr\\u00e9al\",\"year\":\"2019-05-14\",\"url\":\"http:\\/\\/hdl.handle.net\\/1866\\/21743\",\"readers\":0,\"subject\":\"Deep learning; Machine learning; Machine translation; Automatic differentiation; Aprentissage automatique; Diff\\u00e9rentiation automatique; Traduction automatique; Apprentissage profond; Neural networks; Natural language processing; Traitement automatique du langage naturel; R\\u00e9seaux de neurones; Applied Sciences - Artificial Intelligence \\/ Sciences appliqu\\u00e9s et technologie - Intelligence artificielle (UMI : 0800)\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.favjnn\",\"title\":\"Rumour Veracity Estimation with Deep Learning for Twitter\",\"authors\":\"Singh, Jyoti,; Rana, Nripendra,; Dwivedi, Yogesh,\",\"paper_abstract\":\"Part 4: Security, Privacy, Ethics and Misinformation\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2019-09-25\",\"url\":\"https:\\/\\/hal.inria.fr\\/hal-02294696\",\"readers\":0,\"subject\":\"Rumour veracity; Deep learning; Twitter; Neural network; Machine learning; [INFO]Computer Science [cs]; [SHS.INFO]Humanities and Social Sciences\\/Library and information sciences\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.bm7sxk\",\"title\":\"Automated Machine Learning : Methods, Systems, Challenges\",\"authors\":\"\",\"paper_abstract\":\"This open access book presents the first comprehensive overview of general methods in Automated Machine Learning (AutoML), collects descriptions of existing systems based on these methods, and discusses the first series of international challenges of AutoML systems. The recent success of commercial ML applications and the rapid growth of the field has created a high demand for off-the-shelf ML methods that can be used easily and without expert knowledge. However, many of the recent machine learning successes crucially rely on human experts, who manually select appropriate ML architectures (deep learning architectures or more traditional ML workflows) and their hyperparameters. To overcome this problem, the field of AutoML targets a progressive automation of machine learning, based on principles from optimization and machine learning itself. This book serves as a point of entry into this quickly-developing field for researchers and advanced students alike, as well as providing a reference for practitioners aiming to use AutoML in their work.\",\"published_in\":\"OAPEN\",\"year\":\"2020-02-04\",\"url\":\"http:\\/\\/www.oapen.org\\/search?identifier=1007149\",\"readers\":0,\"subject\":\"Computer science; Artificial intelligence; Optical data processing; Pattern recognition; UYQ; UYQP; UYT\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.qpk049\",\"title\":\"Machine Learning for Cyber Physical Systems : Selected papers from the International Conference ML4CPS 2018\",\"authors\":\"\",\"paper_abstract\":\"This Open Access proceedings presents new approaches to Machine Learning for Cyber Physical Systems, experiences and visions. It contains some selected papers from the international Conference ML4CPS \\u2013 Machine Learning for Cyber Physical Systems, which was held in Karlsruhe, October 23-24, 2018. Cyber Physical Systems are characterized by their ability to adapt and to learn: They analyze their environment and, based on observations, they learn patterns, correlations and predictive models. Typical applications are condition monitoring, predictive maintenance, image processing and diagnosis. Machine Learning is the key technology for these developments.\",\"published_in\":\"OAPEN\",\"year\":\"2020-02-04\",\"url\":\"http:\\/\\/www.oapen.org\\/search?identifier=1006862\",\"readers\":0,\"subject\":\"Engineering; Computational intelligence; Computer organization; Electrical engineering; Data mining; TJK; UNF; UT; UYQ\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.fc349p\",\"title\":\"Fondations of Machine Learning, part 3\",\"authors\":\"Charpentier, Arthur\",\"paper_abstract\":\"This post is the seventh one of our series on the history and foundations of econometric and machine learning models. The first fours were on econometrics techniques. Part 6 is online here. Boosting and sequential learning As we have seen before, modelling here is based on solving an optimization problem, and solving the problem described by equation [latex](6) [\\/latex] is all the more complex because the functional space [latex]\\\\mathcal{M}[\\/latex] is large. The idea of boosting, as introduce...\",\"published_in\":\"OpenEdition\",\"year\":\"2019-04-02\",\"url\":\"http:\\/\\/freakonometrics.hypotheses.org\\/57782\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.pkmp66\",\"title\":\"Fondations of Machine Learning, part 2\",\"authors\":\"Charpentier, Arthur\",\"paper_abstract\":\"This post is the sixth one of our series on the history and foundations of econometric and machine learning models. The first fours were on econometrics techniques. Part 5 is online here. The probabilistic formalism in the 80\\u2019s We have a training sample, with observations [latex](\\\\mathbf{x}_i,y_i)[\\/latex] where the variables [latex]y[\\/latex] are in a set [latex]\\\\mathcal{Y}[\\/latex]. In the case of classification, [latex]\\\\mathcal{Y}=\\\\{-1,+1\\\\}[\\/latex], but a relatively general set can be conside...\",\"published_in\":\"OpenEdition\",\"year\":\"2019-04-02\",\"url\":\"http:\\/\\/freakonometrics.hypotheses.org\\/57745\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.9cfd2p\",\"title\":\"References on Econometrics and Machine Learning\",\"authors\":\"Charpentier, Arthur\",\"paper_abstract\":\"In our series of posts on the history and foundations of econometric and machine learning models, a lot of references where given. Here they are. Ahamada, I. & E. Flachaire (2011). Non-Parametric Econometrics. Oxford University Press. Aigner, D., Lovell, C.A.J & Schmidt, P. (1977). Formulation and estimation of stochastic frontier production function models. Journal of Econometrics, 6, 21\\u201337. Aldrich, J. (2010). The Econometricians\\u2019 Statisticians, 1895-1945. History of Political Economy, 42...\",\"published_in\":\"OpenEdition\",\"year\":\"2019-04-02\",\"url\":\"http:\\/\\/freakonometrics.hypotheses.org\\/57737\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.y96bc7\",\"title\":\"Fondations of Machine Learning, part 5\",\"authors\":\"Charpentier, Arthur\",\"paper_abstract\":\"This post is the nineth (and probably last) one of our series on the history and foundations of econometric and machine learning models. The first fours were on econometrics techniques. Part 8 is online here. Optimization and algorithmic aspects In econometrics, (numerical) optimization became omnipresent as soon as we left the Gaussian model. We briefly mentioned it in the section on the exponential family, and the use of the Fisher score (gradient descent) to solve the first order condition...\",\"published_in\":\"OpenEdition\",\"year\":\"2019-04-02\",\"url\":\"http:\\/\\/freakonometrics.hypotheses.org\\/57813\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.b09jss\",\"title\":\"Fondations of Machine Learning, part 1\",\"authors\":\"Charpentier, Arthur\",\"paper_abstract\":\"This post is the fifth one of our series on the history and foundations of econometric and machine learning models. The first fours were on econometrics techniques. Part 4 is online here. In parallel with these tools developed by, and for economists, a whole literature has been developed on similar issues, centered on the problems of prediction and forecasting. For Breiman (2001a), a first difference comes from the fact that the statistic has developed around the principle of inference (or to...\",\"published_in\":\"OpenEdition\",\"year\":\"2019-04-02\",\"url\":\"http:\\/\\/freakonometrics.hypotheses.org\\/57705\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.ei13yx\",\"title\":\"Fondations of Machine Learning, part 4\",\"authors\":\"Charpentier, Arthur\",\"paper_abstract\":\"This post is the eighth one of our series on the history and foundations of econometric and machine learning models. The first fours were on econometrics techniques. Part 7 is online here. Penalization and variables selection One important concept in econometrics is Ockham's razor \\u2013 also known as the law of parsimony (lex parsimoniae) \\u2013 which can be related to abductive reasoning. Akaike's criterion was based on a penalty of likelihood taking into account the complexity of the model (the numb...\",\"published_in\":\"OpenEdition\",\"year\":\"2019-04-02\",\"url\":\"http:\\/\\/freakonometrics.hypotheses.org\\/57790\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.ki26w9\",\"title\":\"Machine learning methods for genotype assignment\",\"authors\":\"Georges-Filteau, Jeremy\",\"paper_abstract\":\"Invasive species are an ongoing concern for countries in which natural resources play a vital economic and social role. In Canada, species such as the Asian long-horned beetle, Dutch elm disease, sudden oak death and the Asian gypsy moth threaten forests and the sectors of industry that profit from them. The economic risk is estimated at up to $800M annually. Machine learning methods that quickly and accurately determine the taxon, geographic origin, and pathogenic fitness of biological samples from genomics data would constitute a valuable tool for risk reduction. In this thesis, we reviewed concepts of population genetics, phylogenetic networks, genotype data and current methods for genetic population assignment. Having identified a number of the shortcomings of current methods, we propose a new machine learning approach called Mycorrhiza aimed at predicting the geographical origin of a sample from its genotype in which phylogenetic networks are used as feature engineering tools, followed by a Random Forests classifier. The classification accuracy of our method was compared to widely used assessment tests or mixture analysis methods in population genetics such as STRUCTURE and Admixture, as well as a variant where a PCA is used in place of the phylogenetic network. Multiple published SNP, microsatellite or consensus sequence datasets with wide ranges in size, geographical distribution and populations were used for this purpose. The phylogenetic network and PCA methods show a marked improvement in classification accuracy and definable advantages compared to the existing approaches. As is to be expected, STRUCTURE and Admixture fall short on almost all datasets with a considerable deviation from the Hardy Weinberg equilibrium. The same can be said for Admixture on datasets with a large expected heterozygosity. Moreover, Mycorrhiza consistently estimates mixture proportions more accurately than the PCA variant. Our approach will be useful in the rapid and accurate prediction of geographical origin from genotype samples without the restrictions inherent to currently used methods.\",\"published_in\":\"McGill Library\",\"year\":\"2019-03-29\",\"url\":\"http:\\/\\/digitool.Library.McGill.CA:80\\/R\\/?func=dbin-jump-full&object_id=163873\",\"readers\":0,\"subject\":\"Computer Science\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/10251\\/127574\",\"title\":\"M\\u00e9todos de machine learning en estudios biom\\u00e9dicos.\",\"authors\":\"Gonz\\u00e1lez Vilanova, Arturo\",\"paper_abstract\":\"[ES] El desarrollo de las tecnolog\\u00edas de alto rendimiento en biolog\\u00eda molecular e imagen m\\u00e9dica ha permitido el acceso a grandes cantidades de informaci\\u00f3n de diverso tipo, lo que ahora se conoce como big data. Dicha informaci\\u00f3n es de tal complejidad que resulta muy dif\\u00edcil el poder extraer conclusiones fiables y \\u00fatiles de ella. Se requiere la utilizaci\\u00f3n de m\\u00e9todos de estad\\u00edstica multivariante y un gran poder de computaci\\u00f3n para vislumbrar los patrones, modelos o normas que siguen los datos. En este contexto nace el machine learning o aprendizaje autom\\u00e1tico, una disciplina que fusiona m\\u00e9todos estad\\u00edsticos con inform\\u00e1tica para elaborar algoritmos capaces de clasificar muestras, predecir resultados y realizar inferencias en base a la informaci\\u00f3n que se les proporciona previamente como entrenamiento. Estos m\\u00e9todos aplicados a la biomedicina pueden extraer el sentido de datos de gen\\u00f3mica, transcript\\u00f3mica, imagen m\\u00e9dica, entre otros, lo cual permitir\\u00eda el avance de la medicina a una forma m\\u00e1s personalizada, precisa y efectiva de atenci\\u00f3n m\\u00e9dica. En este trabajo se aplican tres de los modelos de aprendizaje autom\\u00e1tico m\\u00e1s populares en el contexto de la clasificaci\\u00f3n: k-vecinos m\\u00e1s pr\\u00f3ximos, m\\u00e1quinas de soporte vectorial y bosques aleatorios. Los datos utilizados provienen de la extracci\\u00f3n de caracter\\u00edsticas radi\\u00f3micas de imagen m\\u00e9dica y la extracci\\u00f3n de caracter\\u00edsticas morfol\\u00f3gicas de n\\u00facleos celulares. El objetivo es evaluar el desempe\\u00f1o de estos modelos sobre informaci\\u00f3n potencialmente relevante en la cl\\u00ednica. En primer lugar, se hizo un an\\u00e1lisis exploratorio de los datos consistente en el an\\u00e1lisis de componentes principales y an\\u00e1lisis de agrupamiento. El cuerpo principal del trabajo consta de seis pasos: procesado de los datos, estandarizaci\\u00f3n, partici\\u00f3n de los datos, selecci\\u00f3n de caracter\\u00edsticas, entrenamiento y validaci\\u00f3n. El procesado consisti\\u00f3 en la eliminaci\\u00f3n de todas aquellas muestras y variables que por alg\\u00fan motivo no eran adecuadas para su inclusi\\u00f3n en an\\u00e1lisis posteriores. A continuaci\\u00f3n, se transformaron los datos por centrado y escalado. Los datos se dividieron en dos subconjuntos, de los cuales uno sirvi\\u00f3 para el entrenamiento y otro para la validaci\\u00f3n. Durante la selecci\\u00f3n de caracter\\u00edsticas se redujo todav\\u00eda m\\u00e1s el n\\u00famero de variables a tener en cuenta para los modelos hasta tener solo aquellas m\\u00e1s relevantes. Se entrenaron los modelos y se realizaron predicciones sobre las observaciones que no se usaron en el entrenamiento. Con los resultados obtenidos de las predicciones, se calcularon y analizaron m\\u00e9tricas de precisi\\u00f3n. Los resultados obtenidos revelan que la calidad y abundancia de los datos es fundamental para el desarrollo de un buen modelo predictivo. Diferentes modelos pueden ser perfectamente funcionales para un mismo problema de clasificaci\\u00f3n. Los an\\u00e1lisis demuestran una clara relaci\\u00f3n entre algunas de las caracter\\u00edsticas y el resultado cl\\u00ednico.\",\"published_in\":\"Universitat Polit\\u00e8cnica de Val\\u00e8ncia\",\"year\":\"2019-10-07\",\"url\":\"http:\\/\\/hdl.handle.net\\/10251\\/127574\",\"readers\":0,\"subject\":\"Aprendizaje autom\\u00e1tico; Radi\\u00f3mica; Imagen m\\u00e9dica; Diagn\\u00f3stico; Modelo predictivo; Machine learning; Radiomics; Medical imaging; Diagnosis; Predictive model.; BIOQUIMICA Y BIOLOGIA MOLECULAR; ESTADISTICA E INVESTIGACION OPERATIVA; Grado en Biotecnolog\\u00eda-Grau en Biotecnologia\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.oapbao\",\"title\":\"Text analysis, data quality and machine learning\",\"authors\":\"Kyriacopoulou, Tita; Martineau, Claude\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2019-12-03\",\"url\":\"https:\\/\\/hal-upec-upem.archives-ouvertes.fr\\/hal-02378767\",\"readers\":0,\"subject\":\"[SCCO.LING]Cognitive science\\/Linguistics; [INFO]Computer Science [cs]\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.l0zng0\",\"title\":\"Machine learning for end-users: exploring learning goals and pedagogical content knowledge\",\"authors\":\"Sulmont, Elisabeth\",\"paper_abstract\":\"Machine learning (ML) has rapidly become a sought after skill across numerous domains and sectors. Indeed, the need for those who understand ML is rising faster than can be met. This is reflected in the current state of ML education research, where there is little empirical work, and is mostly on courseware and about computer science students. To expand this research area, this thesis focuses on how to teach ML, specifically to adults with a non-technical background. We conducted ten interviews with university instructors to elicit pedagogical content knowledge (PCK) -- an instructor's knowledge on how to teach a particular topic. In the qualitative analysis, we identified aspects of PCK, such as student preconceptions, student barriers, and pedagogical tactics adopted by instructors. These included students overestimating the abilities of ML, student difficulty with math, and instructors strategically choosing instructional datasets. Furthermore, specific learning goals emerged in our analysis. We classified them into the Structure of Observed Learning Outcomes (SOLO) learning taxonomy, wherein a pattern emerged. Learning goals described as easy to teach, such as tracing algorithms, were consistent with lower levels of the SOLO Taxonomy. Learning goals described as hard to teach, such as evaluating and tuning models, were consistent with higher levels of the SOLO taxonomy. This indicates that the algorithms themselves are not the difficult part about learning ML. Our PCK findings were then organized into this taxonomy to present a list of student barriers and teaching strategies related to each SOLO stage. This organization provides a useful guide for supporting student development through better anticipating student difficulty and providing pertinent pedagogical tactics. We conclude with practical uses for educators today through critical course design questions and several promising directions for innovation in teaching ML, including visualization tools and end-user programming.\",\"published_in\":\"McGill Library\",\"year\":\"2019-11-28\",\"url\":\"http:\\/\\/digitool.Library.McGill.CA:80\\/R\\/?func=dbin-jump-full&object_id=169210\",\"readers\":0,\"subject\":\"Computer Science\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.bscrqj\",\"title\":\"Probabilistic Fondations of Econometrics, part 1\",\"authors\":\"Charpentier, Arthur\",\"paper_abstract\":\"In a series of posts, I wanted to get into details of the history and foundations of econometric and machine learning models. I will be some sort of online version of our joint paper with Emmanuel Flachaire and Antoine Ly, Econometrics and Machine Learning (initially writen in French), that will actually appear soon in the journal Economics and Statistics. This is the first one... The importance of probabilistic models in economics is rooted in Working's (1927) questions and the attempts to ...\",\"published_in\":\"OpenEdition\",\"year\":\"2019-04-02\",\"url\":\"http:\\/\\/freakonometrics.hypotheses.org\\/57649\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.ncmac2\",\"title\":\"Machine \\u00e0 vecteurs de support hyperbolique et ing\\u00e9nierie du noyau\",\"authors\":\"El Dakdouki, Aya\",\"paper_abstract\":\"La th\\u00e9orie statistique de l\\u2019apprentissage est un domaine de la statistique inf\\u00e9rentielle dont les fondements ont \\u00e9t\\u00e9 pos\\u00e9s par Vapnik \\u00e0 la fin des ann\\u00e9es 60. Il est consid\\u00e9r\\u00e9 comme un sous-domaine de l\\u2019intelligence artificielle. Dans l\\u2019apprentissage automatique, les machines \\u00e0 vecteurs de support (SVM) sont un ensemble de techniques d\\u2019apprentissage supervis\\u00e9 destin\\u00e9es \\u00e0 r\\u00e9soudre des probl\\u00e8mes de discrimination et de r\\u00e9gression. Dans cette th\\u00e8se, notre objectif est de proposer deux nouveaux probl\\u00e8mes d\\u2019aprentissagestatistique: Un portant sur la conception et l\\u2019\\u00e9valuation d\\u2019une extension des SVM multiclasses et un autre sur la conception d\\u2019un nouveau noyau pour les machines \\u00e0 vecteurs de support. Dans un premier temps, nous avons introduit une nouvelle machine \\u00e0 noyau pour la reconnaissance de mod\\u00e8le multi-classe: la machine \\u00e0 vecteur de support hyperbolique. G\\u00e9ometriquement, il est caract\\u00e9ris\\u00e9 par le fait que ses surfaces de d\\u00e9cision dans l\\u2019espace de redescription sont d\\u00e9finies par des fonctions hyperboliques. Nous avons ensuite \\u00e9tabli ses principales propri\\u00e9t\\u00e9s statistiques. Parmi ces propri\\u00e9t\\u00e9s nous avons montr\\u00e9 que les classes de fonctions composantes sont des classes de Glivenko-Cantelli uniforme, ceci en \\u00e9tablissant un majorant de la complexit\\u00e9 de Rademacher. Enfin, nous \\u00e9tablissons un risque garanti pour notre classifieur.Dans un second temps, nous avons cr\\u00e9er un nouveau noyau s\\u2019appuyant sur la transformation de Fourier d\\u2019un mod\\u00e8le de m\\u00e9lange gaussien. Nous proc\\u00e9dons de la mani\\u00e8re suivante: d\\u2019abord, chaque classe est fragment\\u00e9e en un nombre de sous-classes pertinentes, ensuite on consid\\u00e8re les directions donn\\u00e9es par les vecteurs obtenus en prenant toutes les paires de centres de sous-classes d\\u2019une m\\u00eame classe. Parmi celles-ci, sont exclues celles permettant de connecter deux sous-classes de deux classes diff\\u00e9rentes. On peut aussi voir cela comme la recherche d\\u2019invariance par translation dans chaque classe. Nous l\\u2019avons appliqu\\u00e9 avec succ\\u00e8s sur plusieurs jeux de donn\\u00e9es dans le contexte d\\u2019un apprentissage automatique utilisant des machines \\u00e0 vecteurs support multi-classes.\",\"published_in\":\"ABES\",\"year\":\"2019-12-02\",\"url\":\"http:\\/\\/www.theses.fr\\/2019LIL1I046\\/document\",\"readers\":0,\"subject\":\"Classe de Glivenko-Cantelli; Classifieur multi-Classe; Risque garanti (apprentissage automatique); Complexit\\u00e9 de Rademacher; 519.52\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.jodv82\",\"title\":\"Data mining and machine learning for reverse engineering\",\"authors\":\"Ding, Honghui\",\"paper_abstract\":\"Reverse engineering is fundamental for understanding the inner workings of new malware, exploring new vulnerabilities in existing systems, and identifying patent infringements in the distributed executables. It is the process of getting an in-depth understanding of a given binary executable without its corresponding source code. Reverse engineering is a manually intensive and time-consuming process that relies on a thorough understanding of the full development stack from hardware to applications. It requires a much steeper learning curve than programming. Given the unprecedentedly vast amount of data to be analyzed and the significance of reverse engineering, the overall question that drives the studies in this thesis is how can data mining and machine learning technologies make cybersecurity practitioners more productive to uncover the provenance, understand the intention, and discover the issues behind the data in a scalable way. In this thesis, I focus on two data-driven solutions to help reverse engineers analyzing binary data: assembly clone search and behavioral summarization. Assembly code clone search is emerging as an Information Retrieval (IR) technique that helps address security problems. It has been used for differing binaries to locate the changed parts, identifying known library functions such as encryption, searching for known programming bugs or zero-day vulnerabilities in existing software or Internet of Things (IoT) devices firmware, as well as detecting software plagiarism or GNU license infringements when the source code is unavailable. However, designing an effective search engine is difficult, due to varieties of compiler optimization and obfuscation techniques that make logically similar assembly functions appear to be dramatically different. By working closely with reverse engineers, I identify three different scenarios of reverse engineering and develop novel data mining and machine learning models for assembly clone search to address the respective challenges. By developing an intelligent assembly clone search platform, I optimize the process of reverse engineering by addressing the information needs of reverse engineers. Experimental results suggest that Kam1n0 is accurate, efficient, and scalable for handling a large volume of data.The second part of the thesis goes beyond optimizing an information retrieval process for reverse engineering. I propose to automatically and statically characterize the behaviors of a given binary executable. Behavioral indicators denote those potentially high-risk malicious behaviors exhibited by malware, such as unintended network communications, file encryption, keystroke logging, abnormal registry modifications, sandbox evasion, and camera manipulation. I design a novel neural network architecture that models the different aspects of an executable. It is able to predict over 139 suspicious and malicious behavioral indicators, without running the executable. The resulting system can be used as an additional binary analytic layer to mitigate the issues of polymorphism, metamorphism, and evasive techniques. It also provides another behavioral abstraction of malware to security analysts and reverse engineers. Therefore, it can reduce the data to be manually analyzed, and the reverse engineers can focus on the binaries that are of their interest. In summary, this thesis presents four original research projects that not only advance the knowledge in reverse engineering and data mining, but also contribute to the overall safety of our cyber world by providing open-source award-winning binary analysis systems that empower cybersecurity practitioners.\",\"published_in\":\"McGill Library\",\"year\":\"2019-11-28\",\"url\":\"http:\\/\\/digitool.Library.McGill.CA:80\\/R\\/?func=dbin-jump-full&object_id=169373\",\"readers\":0,\"subject\":\"Information Studies\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/10251\\/127856\",\"title\":\"Dise\\u00f1o de algoritmos de clasificaci\\u00f3n de cultivos mediante t\\u00e9cnicas de Machine Learning\",\"authors\":\"P\\u00e9rez Navas, Lidia\",\"paper_abstract\":\"[ES] Dise\\u00f1o de algoritmos de clasificaci\\u00f3n de cultivos usando t\\u00e9cnicas de Machine Learning en entorno MATLAB. Para ello se usar\\u00e1n diferentes m\\u00e9todos como Random Forest o SVM (Support Vector Machine). Los datos de entrada ser\\u00e1n medidas recogidas en el \\u00e1rea piloto de Barrax (Albacete) donde se recogen datos del tipo de cultivo asociado a diferentes coordenadas. La mitad de las muestras se usar\\u00e1n para el aprendizaje y la otra mitad para validar. Para el aprendizaje, las coordenadas seleccionadas se localizar\\u00e1n en im\\u00e1genes de sat\\u00e9lite Sentinel-2 para extraer los datos en las diferentes bandas espectrales que ofrece el sat\\u00e9lite.\",\"published_in\":\"Universitat Polit\\u00e8cnica de Val\\u00e8ncia\",\"year\":\"2019-10-09\",\"url\":\"http:\\/\\/hdl.handle.net\\/10251\\/127856\",\"readers\":0,\"subject\":\"Teledetecci\\u00f3n; Clasificaci\\u00f3n; Machine Learning; Sat\\u00e9lite; Observaci\\u00f3n de la Tierra; Cultivos; TEORIA DE LA SE\\u00d1AL Y COMUNICACIONES; M\\u00e1ster Universitario en Ingenier\\u00eda de Telecomunicaci\\u00f3n-M\\u00e0ster Universitari en Enginyeria de Telecomunicaci\\u00f3\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/10251\\/124351\",\"title\":\"Machine Learning for Glaucoma Assessment using Fundus Images\",\"authors\":\"D\\u00edaz Pinto, Andr\\u00e9s Yesid\",\"paper_abstract\":\"[ES] Las im\\u00e1genes de fondo de ojo son muy utilizadas por los oftalm\\u00f3logos para la evaluaci\\u00f3n de la retina y la detecci\\u00f3n de glaucoma. Esta patolog\\u00eda es la segunda causa de ceguera en el mundo, seg\\u00fan estudios de la Organizaci\\u00f3n Mundial de la Salud (OMS). En esta tesis doctoral, se estudian algoritmos de aprendizaje autom\\u00e1tico (machine learning) para la evaluaci\\u00f3n autom\\u00e1tica del glaucoma usando im\\u00e1genes de fondo de ojo. En primer lugar, se proponen dos m\\u00e9todos para la segmentaci\\u00f3n autom\\u00e1tica. El primer m\\u00e9todo utiliza la transformaci\\u00f3n Watershed Estoc\\u00e1stica para segmentar la copa \\u00f3ptica y posteriormente medir caracter\\u00edsticas cl\\u00ednicas como la relaci\\u00f3n Copa\\/Disco y la regla ISNT. El segundo m\\u00e9todo es una arquitectura U-Net que se usa espec\\u00edficamente para la segmentaci\\u00f3n del disco \\u00f3ptico y la copa \\u00f3ptica. A continuaci\\u00f3n, se presentan sistemas autom\\u00e1ticos de evaluaci\\u00f3n del glaucoma basados en redes neuronales convolucionales (CNN por sus siglas en ingl\\u00e9s). En este enfoque se utilizan diferentes modelos entrenados en ImageNet como clasificadores autom\\u00e1ticos de glaucoma, usando fine-tuning. Esta nueva t\\u00e9cnica permite detectar el glaucoma sin segmentaci\\u00f3n previa o extracci\\u00f3n de caracter\\u00edsticas. Adem\\u00e1s, este enfoque presenta una mejora considerable del rendimiento comparado con otros trabajos del estado del arte. En tercer lugar, dada la dificultad de obtener grandes cantidades de im\\u00e1genes etiquetadas (glaucoma\\/no glaucoma), esta tesis tambi\\u00e9n aborda el problema de la s\\u00edntesis de im\\u00e1genes de la retina. En concreto se analizaron dos arquitecturas diferentes para la s\\u00edntesis de im\\u00e1genes, las arquitecturas Variational Autoencoder (VAE) y la Generative Adversarial Networks (GAN). Con estas arquitecturas se generaron im\\u00e1genes sint\\u00e9ticas que se analizaron cualitativa y cuantitativamente, obteniendo un rendimiento similar a otros trabajos en la literatura. Finalmente, en esta tesis se plantea la utilizaci\\u00f3n de un tipo de GAN (DCGAN) como alternativa a los sistemas autom\\u00e1ticos de evaluaci\\u00f3n del glaucoma presentados anteriormente. Para alcanzar este objetivo se implement\\u00f3 un algoritmo de aprendizaje semi-supervisado.\",\"published_in\":\"Universitat Polit\\u00e8cnica de Val\\u00e8ncia\",\"year\":\"2019-07-29\",\"url\":\"http:\\/\\/hdl.handle.net\\/10251\\/124351\",\"readers\":0,\"subject\":\"Glaucoma; Fundus Images; Automatic Screening; Ophthalmic Pathologies; Machine Learning; Deep Learning; Computer Vision; Segmentation; Watershed; U-Net; Classification; CNN; GAN; Image Synthesis; DCGAN; VAE; Semi-supervised Learning; TEORIA DE LA SE\\u00d1AL Y COMUNICACIONES\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/10251\\/127875\",\"title\":\"Clasificaci\\u00f3n ac\\u00fastica de salas mediante algoritmos de Machine Learning\",\"authors\":\"Fraga Domingo, Juan\",\"paper_abstract\":\"[ES] El presente Trabajo Fin de Grado (TFG) tiene como objetivo construir un clasificador de salas mediante algoritmos de Machine Learning (ML). Para ello usar\\u00e1 ciertos par\\u00e1metros ac\\u00fasticos de la sala como son el tiempo de reverberaci\\u00f3n (T60) y el Direct-To-Reverberant Ratio (DRR) de la sala. En principio se pretende seguir el esquema expuesto en [Xiong2018] para la realizaci\\u00f3n del clasificador basado en ML. Posteriormente se probar\\u00e1n nuevas features no usadas en dicha referencia pero que tienen que ver con la percepci\\u00f3n sonora, como por ejemplo los coeficientes Mel Frequency Cepstral (MFC). Al finalizar el TFG, el estudiante habr\\u00e1 adquirido un conocimiento avanzado sobre el uso de algoritmos supervisados de machine learning y sobre la caracterizaci\\u00f3n ac\\u00fastica de salas. [Xiong2018] F. Xiong et al., Exploring Auditory-Inspired Acoustic Features for Room Acoustic Parameter Estimation From Monaural Speech , IEEE\\/ACM Trans. Audio, Speech, Lang. Process., vol. 26, no. 10, pp. 1809-1820, Oct. 2018.\",\"published_in\":\"Universitat Polit\\u00e8cnica de Val\\u00e8ncia\",\"year\":\"2019-10-09\",\"url\":\"http:\\/\\/hdl.handle.net\\/10251\\/127875\",\"readers\":0,\"subject\":\"Clasificaci\\u00f3n salas; Tiempo de reverberaci\\u00f3n; Machine learning; TEORIA DE LA SE\\u00d1AL Y COMUNICACIONES; Grado en Ingenier\\u00eda de Tecnolog\\u00edas y Servicios de Telecomunicaci\\u00f3n-Grau en Enginyeria de Tecnologies i Serveis de Telecomunicaci\\u00f3\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.umcqr5\",\"title\":\"The Summer School is now over...\",\"authors\":\"Charpentier, Arthur\",\"paper_abstract\":\"After a great (and long) week, the SIdE (Italian Econometric Association) Summer School, on Machine Learning Algorithms for Econometricians\\u00a0is now over...\",\"published_in\":\"OpenEdition\",\"year\":\"2019-07-24\",\"url\":\"http:\\/\\/freakonometrics.hypotheses.org\\/58268\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/10251\\/134687\",\"title\":\"Predictive analysis of urban waste generation for the city of Bogota , Colombia, through the implementation of decision trees-based machine learning, support vector machines and artifficial neural networks\",\"authors\":\"Solano-Meza, Johanna; Orjuela Yepes, David; Rodrigo-Ilarri, Javier; Cassiraga, Eduardo Fabi\\u00e1n\",\"paper_abstract\":\"[EN] This study presents an analysis of three models associated with artificial intelligence as tools to forecast the generation of urban solid waste in the city of Bogota, in order to learn about this type of waste's behavior. The analysis was carried out in such a manner that different efficient alternatives are presented. In this paper, a possible decision-making strategy was explored and implemented to plan and design technologies for the stages of collection, transport and final disposal of waste in cities, while taking into account their particular characteristics. The first model used to analyze data was the decision tree which employed machine learning as a non-parametric algorithm that models data separation limitations based on the learning decision rules on the input characteristics of the model. Support vector machines were the second method implemented as a forecasting model. The primary advantage of support vector machines is their proper adjustment to data despite its variable nature or when faced with problems with a small amount of training data. Lastly, recurrent neural network models to forecast data were implemented, which yielded positive results. Their architectural design is useful in exploring temporal correlations among the same. Distribution by collection zone in the city, socio-economic stratification, population, and quantity of solid waste generated in a determined period of time were factors considered in the analysis of this forecast. The results found that support vector machines are the most appropriate model for this type of analysis.\",\"published_in\":\"Universitat Polit\\u00e8cnica de Val\\u00e8ncia\",\"year\":\"2020-01-16\",\"url\":\"http:\\/\\/hdl.handle.net\\/10251\\/134687\",\"readers\":0,\"subject\":\"Environmental science; Waste treatment; Water treatment; Green engineering; Environmental chemical engineering; Waste; Urban solid waste; Artificial intelligence; Urban solid waste management; Tree through machine learning; Support vector machines; Artifi Cial neural network; INGENIERIA HIDRAULICA\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.pyirzt\",\"title\":\"Credit Risk Analysis using Machine and Deep Learning Models\",\"authors\":\"Guegan, Dominique\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2020-01-19\",\"url\":\"https:\\/\\/halshs.archives-ouvertes.fr\\/halshs-02125631\",\"readers\":0,\"subject\":\"[SHS.ECO]Humanities and Social Sciences\\/Economics and Finance\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.9dpgei\",\"title\":\"Unsupervised machine learning to analyse city logistics through Twitter\",\"authors\":\"Tamayo, Simon; Combes, Fran\\u00e7ois; Arthur, Gaudron\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2020-02-10\",\"url\":\"https:\\/\\/hal.archives-ouvertes.fr\\/hal-02156076\",\"readers\":0,\"subject\":\"Machine Learning; Natural Language Processing; Social Media Mining; Sentiment Analysis; City Logistics; ACM: I.: Computing Methodologies\\/I.5: PATTERN RECOGNITION\\/I.5.3: Clustering; [INFO.INFO-LG]Computer Science [cs]\\/Machine Learning [cs.LG]; [INFO.INFO-SI]Computer Science [cs]\\/Social and Information Networks [cs.SI]; [INFO.INFO-AI]Computer Science [cs]\\/Artificial Intelligence [cs.AI]; [SDE.ES]Environmental Sciences\\/Environmental and Society\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.dfcv7m\",\"title\":\"Investing Through Economic Cycles with Ensemble Machine Learning Algorithms\",\"authors\":\"Raffinot, Thomas; Benoit, Sylvain\",\"paper_abstract\":\"Ensemble machine learning algorithms (random forest and boosting) are applied to quickly and accurately detect economic turning points in the United States and in the Eurozone over the past three decades. The two key features of those algorithms are their abilities (i) to entertain a large number of predictors and (ii) to perform both variable selection and estimation simultaneously. The real-time ability to nowcast economic turning points is gauged by using investment strategies based on economic regimes induced by our models. When comparing predictive accuracy and profit measures, the model confidence set procedure is applied to avoid data snooping. We show that such investment strategies achieve impressive risk-adjusted returns: timing the market is thus possible.\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2019-09-20\",\"url\":\"https:\\/\\/hal.archives-ouvertes.fr\\/hal-02292317\",\"readers\":0,\"subject\":\"Random Forest; Boosting; Economic cycles; Profit maximization measures; Model Confidence Set; Machine Learning; Turning Points Detection; JEL: C - Mathematical and Quantitative Methods\\/C.C5 - Econometric Modeling\\/C.C5.C53 - Forecasting and Prediction Methods \\u2022 Simulation Methods; JEL: E - Macroeconomics and Monetary Economics\\/E.E3 - Prices, Business Fluctuations, and Cycles\\/E.E3.E32 - Business Fluctuations \\u2022 Cycles; JEL: E - Macroeconomics and Monetary Economics\\/E.E3 - Prices, Business Fluctuations, and Cycles\\/E.E3.E37 - Forecasting and Simulation: Models and Applications; JEL: G - Financial Economics\\/G.G1 - General Financial Markets\\/G.G1.G10 - General; [QFIN]Quantitative Finance [q-fin]\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/10251\\/124001\",\"title\":\"Deep Teaching: Materials for Teaching Machine and Deep Learning\",\"authors\":\"Herta, Christian; Voigt, Benjamin; Baumann, Patrick; Strohmenger, Klaus; Jansen, Christoph; Fischer, Oliver; Zhang, Gefei; Hufnagel, Peter\",\"paper_abstract\":\"[EN] Machine learning (ML) is considered to be hard because it is relatively complicated in comparison to other topics of computer science. The reason is that machine learning is based heavily on mathematics and abstract concepts. This results in an entry barrier for students: Most students want to avoid such difficult topics in elective courses or self-study. In the project Deep.Teaching we address these issues: We motivate by selected applications and support courses as well as self-study by giving practical exercises for different topics in machine learning. The teaching material, provided as jupyter notebooks, consists of theoretical and programming sections. For didactical reasons, we designed programming exercises such that the students have to deeply understand the concepts and principles before they can start to implement a solution. We provide all necessary boilerplate code such that the students can primarily focus on the educational objectives of the exercises. We used different ways to give feedback for self-study: obscured solutions for mathematical results, software tests with assert statements, and graphical illustrations of sample solutions. All of the material is published under a permissive license. Developing jupyter notebooks collaboratively for educational purposes poses some problems. We address these issues and provide solutions\\/best practices.\",\"published_in\":\"Universitat Polit\\u00e8cnica de Val\\u00e8ncia\",\"year\":\"2019-12-02\",\"url\":\"http:\\/\\/hdl.handle.net\\/10251\\/124001\",\"readers\":0,\"subject\":\"Higher Education; Learning; Educational systems; Teaching; Machine learning; Jupyter notebook; Programming exercise; Collaborative development\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.0ei7fv\",\"title\":\"Graph-based machine learning algorithms for predicting disease outcomes\",\"authors\":\"Valenchon, Juliette\",\"paper_abstract\":\"Improving disease outcome prediction can greatly aid in the strategic deployment of secondary prevention approaches. We develop two methods to predict the evolution of diseases by taking into account personal attributes of the subjects and their relationships with medical examination results. Our approaches build upon a recent formulation of this problem as a graph-based geometric matrix completion task. The primary innovation is the introduction of multiple graphs, each relying on a different combination of subject attributes. Via statistical significance tests, we determine the relevant graph(s) for each medically-derived feature. In the first approach, we then employ a multiple-graph recurrent graph convolutional neural network architecture to predict the disease outcomes. In the second approach, we use a multiple-graph graph auto-encoder architecture to predict the disease outcomes. We demonstrate the efficacy of the two techniques by addressing the task of predicting the development of Alzheimer's disease for patients exhibiting mild cognitive impairment, showing that the incorporation of multiple graphs improves predictive capability. Moreover, in the second approach, the use of a graph auto-encoder also helps in increasing predictive capability.\",\"published_in\":\"McGill Library\",\"year\":\"2019-11-28\",\"url\":\"http:\\/\\/digitool.Library.McGill.CA:80\\/R\\/?func=dbin-jump-full&object_id=169666\",\"readers\":0,\"subject\":\"Electrical and Computer Engineering\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.42i4dd\",\"title\":\"Prediction of CO 2 absorption by physical solvents using a chemoinformatics-based machine learning model\",\"authors\":\"Li, Hao; Yan, Dan; Zhang, Zhien; Lichtfouse, Eric\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2019-11-28\",\"url\":\"https:\\/\\/hal.archives-ouvertes.fr\\/hal-02267200\",\"readers\":0,\"subject\":\"[CHIM.COOR]Chemical Sciences\\/Coordination chemistry; [CHIM.ORGA]Chemical Sciences\\/Organic chemistry; [CHIM.THEO]Chemical Sciences\\/Theoretical and\\/or physical chemistry; [SDE.MCG]Environmental Sciences\\/Global Changes; [SDE.IE]Environmental Sciences\\/Environmental Engineering; [SPI.FLUID]Engineering Sciences [physics]\\/Reactive fluid environment\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.n9glyt\",\"title\":\"Performing Deep Recurrent Double Q-Learning for Atari Games\",\"authors\":\"Moreno-Vera, Felipe\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2019-08-10\",\"url\":\"https:\\/\\/hal.archives-ouvertes.fr\\/hal-02217800\",\"readers\":0,\"subject\":\"Double Q-Learning; Reinforcement Learning; Video Games; Atari; Recurrent Networks; Convolutional Networks; [SHS.STAT]Humanities and Social Sciences\\/Methods and statistics; [INFO]Computer Science [cs]; [INFO.INFO-AI]Computer Science [cs]\\/Artificial Intelligence [cs.AI]; [INFO.INFO-CV]Computer Science [cs]\\/Computer Vision and Pattern Recognition [cs.CV]\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.fxa0rv\",\"title\":\"Analyzing semantic trace links using network science and machine learning\",\"authors\":\"Nicholson, Alexander\",\"paper_abstract\":\"Traceability is a useful tool for reasoning about many aspects of software by means of the connections between the artifacts that constitute the software. For example, stakeholders can use traceability to appraise the software quality along many different dimensions including regulatory compliance. The semantic content of these trace links or what they communicate to a stakeholder however, remains significantly unexplored. Additionally, issue tracking systems provide a wealth of data about traceability events and their context, particularly in the case of open source and agile software projects. In this work we investigate semantic traceability from two perspectives using trace links found in the issue trackers of open source software projects. Firstly, most of the state-of-the-art traceability solutions rely on a pair-wise comparison of artifact attributes. This approach ignores the characteristics that result from the entire connected structure of the trace links. To address this, we investigate the use of network science techniques on the issue trackers of sixty-six open source projects. From this perspective we show that two properties of networks, namely scale free degree distributions and triadic closure manifest in traceability networks. We then use these properties to showcase the high level reasoning that can be done by observing the traceability problem from a network perspective and in doing so, motivate the trace link semantic recovery problem. Secondly, we attempt to approach the trace link semantic recovery problem by using machine learning and the attributes of the issue trace links. We experiment with a number of models, tuning approaches and formulations of these attributes including the textual content of each issue, as well as other metadata items. We observe promising results from this task specification and show even higher performance when incorporating data both internal and external to the project's issue tracker.\",\"published_in\":\"McGill Library\",\"year\":\"2019-11-28\",\"url\":\"http:\\/\\/digitool.Library.McGill.CA:80\\/R\\/?func=dbin-jump-full&object_id=169614\",\"readers\":0,\"subject\":\"Computer Science\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.djjczz\",\"title\":\"On the prediction of mRNA subcellular localization with machine learning\",\"authors\":\"Yan, Zichao\",\"paper_abstract\":\"Cells are the basic units of life, and yet they are regulated by many delicate and to some extent, fragile, subcellular processes that are crucial to their survival. A simple genetic mutation could possibly clog up some important regulatory processes, or perturb the function of the product it encodes, which might ultimately bring the demise of the entire system. Therefore, it is important to gain more insights into the many control processes of cell and the regulatory factors associated with them, one prominent example of which would be the mechanism related to the RNA subcellular localization that we would focus on almost exclusively in this study from a computational perspective.RNA subcellular localization mechanism is one of the most important, yet under-appreciated, facets of the broader gene regulatory process, which helps with the cellular organization and regulation on gene expression, via transporting the RNA transcripts to their designated locations where their function, structure or translated proteins are needed. It is generally accepted as a fact that RNA trafficking mechanism is mediated between the trans-regulatory factors such as the RNA binding proteins, and the cis-acting elements \\u2014 short snippets of the transcript that contain the RBP binding sites \\u2014 which we call zipcode as they are considered to contain information on its address of delivery.The release of new RNA subcellular localization dataset has enabled us to build the first computational tool using state-of-the-art deep learning techniques, to predict the localization outcome for the protein-coding RNA from mere transcript sequence, and subsequently to identify the zipcode elements thereof. Our proposed method has achieved good accuracy compared to the baseline methods based on the k-mers features, despite the intrinsic difficulty that arise from the complex and stochastic interactions during trafficking events, as well as the limitations imposed by the available dataset.\",\"published_in\":\"McGill Library\",\"year\":\"2019-11-28\",\"url\":\"http:\\/\\/digitool.Library.McGill.CA:80\\/R\\/?func=dbin-jump-full&object_id=169662\",\"readers\":0,\"subject\":\"Computer Science\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.zqg0hf\",\"title\":\"Fast and slow machine learning\",\"authors\":\"Montiel Lo\\u0301pez, Jacob\",\"paper_abstract\":\"L'\\u00e8re du Big Data a r\\u00e9volutionn\\u00e9 la mani\\u00e8re dont les donn\\u00e9es sont cr\\u00e9\\u00e9es et trait\\u00e9es. Dans ce contexte, de nombreux d\\u00e9fis se posent, compte tenu de la quantit\\u00e9 \\u00e9norme de donn\\u00e9es disponibles qui doivent \\u00eatre efficacement g\\u00e9r\\u00e9es et trait\\u00e9es afin d\\u2019extraire des connaissances. Cette th\\u00e8se explore la symbiose de l'apprentissage en mode batch et en flux, traditionnellement consid\\u00e9r\\u00e9s dans la litt\\u00e9rature comme antagonistes, sur le probl\\u00e8me de la classification \\u00e0 partir de flux de donn\\u00e9es en \\u00e9volution. L'apprentissage en mode batch est une approche bien \\u00e9tablie bas\\u00e9e sur une s\\u00e9quence finie: d'abord les donn\\u00e9es sont collect\\u00e9es, puis les mod\\u00e8les pr\\u00e9dictifs sont cr\\u00e9\\u00e9s, finalement le mod\\u00e8le est appliqu\\u00e9. Par contre, l\\u2019apprentissage par flux consid\\u00e8re les donn\\u00e9es comme infinies, rendant le probl\\u00e8me d\\u2019apprentissage comme une t\\u00e2che continue (sans fin). De plus, les flux de donn\\u00e9es peuvent \\u00e9voluer dans le temps, ce qui signifie que la relation entre les caract\\u00e9ristiques et la r\\u00e9ponse correspondante peut changer. Nous proposons un cadre syst\\u00e9matique pour pr\\u00e9voir le surendettement, un probl\\u00e8me du monde r\\u00e9el ayant des implications importantes dans la soci\\u00e9t\\u00e9 moderne. Les deux versions du m\\u00e9canisme d'alerte pr\\u00e9coce (batch et flux) surpassent les performances de base de la solution mise en \\u0153uvre par le Groupe BPCE, la deuxi\\u00e8me institution bancaire en France. De plus, nous introduisons une m\\u00e9thode d'imputation \\u00e9volutive bas\\u00e9e sur un mod\\u00e8le pour les donn\\u00e9es manquantes dans la classification. Cette m\\u00e9thode pr\\u00e9sente le probl\\u00e8me d'imputation sous la forme d'un ensemble de t\\u00e2ches de classification \\/ r\\u00e9gression r\\u00e9solues progressivement.Nous pr\\u00e9sentons un cadre unifi\\u00e9 qui sert de plate-forme d'apprentissage commune o\\u00f9 les m\\u00e9thodes de traitement par batch et par flux peuvent interagir de mani\\u00e8re positive. Nous montrons que les m\\u00e9thodes batch peuvent \\u00eatre efficacement form\\u00e9es sur le r\\u00e9glage du flux dans des conditions sp\\u00e9cifiques. Nous proposons \\u00e9galement une adaptation de l'Extreme Gradient Boosting algorithme aux flux de donn\\u00e9es en \\u00e9volution. La m\\u00e9thode adaptative propos\\u00e9e g\\u00e9n\\u00e8re et met \\u00e0 jour l'ensemble de mani\\u00e8re incr\\u00e9mentielle \\u00e0 l'aide de mini-lots de donn\\u00e9es. Enfin, nous pr\\u00e9sentons scikit-multiflow, un framework open source en Python qui comble le vide en Python pour une plate-forme de d\\u00e9veloppement\\/recherche pour l'apprentissage \\u00e0 partir de flux de donn\\u00e9es en \\u00e9volution.\",\"published_in\":\"ABES\",\"year\":\"2020-02-02\",\"url\":\"http:\\/\\/www.theses.fr\\/2019SACLT014\\/document\",\"readers\":0,\"subject\":\"Apprentissage automatique; Flux de donn\\u00e9es; Classification; Donn\\u00e9es manquantes; D\\u00e9rive de concept; Big data; Machine learning; Data stream; Classification; Missing data; Concept drift; Big data; \",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.icgmw4\",\"title\":\"Machine learning methods and classification of vegetation in Brest, France\",\"authors\":\"Xie, Guanyao; Niculescu, Simona; Lam, Chinguyen; S\\u00e9veno, Elise\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2020-01-09\",\"url\":\"https:\\/\\/hal.archives-ouvertes.fr\\/hal-02431379\",\"readers\":0,\"subject\":\"[SHS.GEO]Humanities and Social Sciences\\/Geography; [SDE]Environmental Sciences; [SDE.IE]Environmental Sciences\\/Environmental Engineering\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.v7aojz\",\"title\":\"Word Embeddings in Sentiment Analysis\",\"authors\":\"Petrolito, Ruggero; Dell\\u2019Orletta, Felice\",\"paper_abstract\":\"In the late years sentiment analysis and its applications have reached growing popularity. Concerning this field of research, in the very late years machine learning and word representation learning derived from distributional semantics field (i.e. word embeddings) have proven to be very successful in performing sentiment analysis tasks. In this paper we describe a set of experiments, with the aim of evaluating the impact of word embedding-based features in sentiment analysis tasks.\",\"published_in\":\"OpenEdition\",\"year\":\"2019-04-08\",\"url\":\"http:\\/\\/books.openedition.org\\/aaccademia\\/3589\",\"readers\":0,\"subject\":\"linguistica computazionale; elaborazione del linguaggio naturale; analisi semantica; Gurevych (Iryna); Bos (Johan); Computational Linguistics; Natural Language Processing; semantic parsing; Gurevych (Iryna); Bos (Johan); History & Philosophy Of Science; LAN000000; CBX\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.0ecnbf\",\"title\":\"Le dialogue homme-machine : Intelligence artificielle \\/ intelligence humaine\\u00a0: manipulation et \\u00e9valuation\",\"authors\":\"Devillers, Laurence\",\"paper_abstract\":\"Apr\\u00e8s un premier dossier \\u00ab\\u00a0Cerveau et apprentissages\\u00a0\\u00bb (n\\u00b0 428), suivi d\\u2019un deuxi\\u00e8me opus consacr\\u00e9 \\u00e0 la plasticit\\u00e9 du cerveau (n\\u00b0 431), Futuribles ouvre un troisi\\u00e8me volet dans la s\\u00e9rie \\u00ab\\u00a0Cerveau\\u00a0\\u00bb, portant cette fois sur les interactions homme \\/ machine(s) et sur l\\u2019impact des \\u00e9crans sur le d\\u00e9veloppement des jeunes. Sp\\u00e9cialiste des interactions homme \\/ machine, Laurence Devillers pr\\u00e9sente ici les enjeux inh\\u00e9rents au d\\u00e9veloppement des agents conversationnels et autres robots dot\\u00e9s d\\u2019intelligence artificielle, qui interagissent de plus en plus souvent avec les individus, dans diff\\u00e9rents contextes.Apr\\u00e8s avoir rappel\\u00e9 comment fonctionnent ces syst\\u00e8mes (auto)apprenants, elle insiste sur la vigilance n\\u00e9cessaire \\u00e0 l\\u2019\\u00e9gard de possibles manipulations des individus par ce type d\\u2019interfaces (par le biais des nudges, notamment, techniques d\\u2019incitation douce). Elle montre aussi comment sont utilis\\u00e9es les \\u00e9motions dans les interactions homme-machine (ressorts affectifs, humour\\u2026) et pr\\u00e9sente les outils dont on dispose aujourd\\u2019hui pour \\u00e9valuer l\\u2019intelligence artificielle, voire la comparer \\u00e0 celle des humains (en particulier le test de Turing et ses limites). Compte tenu des progr\\u00e8s rapides de l\\u2019apprentissage machine, Laurence Devillers appelle au d\\u00e9veloppement de nouveaux tests d\\u2019\\u00e9valuation des capacit\\u00e9s des machines, visant en particulier \\u00e0 surveiller leur facult\\u00e9 \\u00e0 manipuler les individus. Car si les progr\\u00e8s techniques sont exponentiels, la fa\\u00e7on dont on encadre leur application dans la soci\\u00e9t\\u00e9 et dans le monde r\\u00e9el ne rel\\u00e8ve, pour l\\u2019heure encore, que des citoyens\\u00a0: c\\u2019est aux individus de d\\u00e9terminer d\\u00e8s \\u00e0 pr\\u00e9sent les limites \\u00e9thiques, r\\u00e9glementaires\\u2026, qui doivent encadrer les interfaces homme-machine.\\u00a0S.D.\",\"published_in\":\"Cairn\",\"year\":\"2019-11-26\",\"url\":\"http:\\/\\/www.cairn.info\\/article.php?ID_ARTICLE=FUTUR_433_0051\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.rsjznk\",\"title\":\"Machine learning methods and classification of vegetation in Brest, France\",\"authors\":\"Xie,, Guanyao; Niculescu, Simona; Lam, Nguyen Chi; S\\u00e9veno, Elise\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2019-12-21\",\"url\":\"https:\\/\\/hal.archives-ouvertes.fr\\/hal-02298535\",\"readers\":0,\"subject\":\"[SHS.GEO]Humanities and Social Sciences\\/Geography; [SPI.SIGNAL]Engineering Sciences [physics]\\/Signal and Image processing; [SDE]Environmental Sciences\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.qp9pag\",\"title\":\"A machine learning approach to pattern discovery in symbolic music\",\"authors\":\"de Reuse, Timothy\",\"paper_abstract\":\"The aim of a musical pattern discovery algorithm is to find all instances of repetition in a given input of music, allowing for a user-specifiable amount of variation between identified repetitions. Naive algorithms tend to return many identifications of repeated material that no human listener would readily identify as musically significant, rendering the results technically accurate but useless to the music analyst. Heuristics are often used to filter this set of patterns down to only those which are most \\\"significant\\\" for a given application. This thesis develops an alternative way to address this issue, by training machine learning-based classifiers on human annotations of repeated musical patterns with the goal of replicating the human annotator's judgment as to which patterns are significant. Three different classification methods are tested, based on k-nearest neighbor, k-nearest neighbor with genetic algorithm-optimized feature selection, and a feed-forward neural network. Only monophonic music in symbolic format is examined. Also discussed are previous approaches towards defining musical pattern significance, and the theoretical difficulties inherent to the task.\",\"published_in\":\"McGill Library\",\"year\":\"2019-03-29\",\"url\":\"http:\\/\\/digitool.Library.McGill.CA:80\\/R\\/?func=dbin-jump-full&object_id=163851\",\"readers\":0,\"subject\":\"Music\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.3ei261\",\"title\":\"Large data sets and machine learning: Applications to statistical arbitrage\",\"authors\":\"Huck, Nicolas\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2019-07-26\",\"url\":\"https:\\/\\/hal.archives-ouvertes.fr\\/hal-02143971\",\"readers\":0,\"subject\":\"Finance; Big data; Machine learning; Statistical arbitrage; [QFIN]Quantitative Finance [q-fin]\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.89q8un\",\"title\":\"Metashape -Photoscan\",\"authors\":\"Chayani, Mehdi\",\"paper_abstract\":\"Agisoft \\u00e0 mis \\u00e0 jour une nouvelle version de son logiciel de photogramm\\u00e9trie Photoscan renomm\\u00e9\\u00a0Metashape. De nouvelles \\u00e9volutions sont disponibles: Acc\\u00e9l\\u00e9ration du calcul par GPU Syst\\u00e8me de classification de donn\\u00e9e par machine learning Autres ....\",\"published_in\":\"OpenEdition\",\"year\":\"2019-01-31\",\"url\":\"http:\\/\\/shs3d.hypotheses.org\\/5507\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.ks30rm\",\"title\":\"Le printemps des machines\",\"authors\":\"Perriquet, Olivier\",\"paper_abstract\":\"L\\u2019intelligence artificielle a subi une s\\u00e9rie de phases d\\u2019hibernation, dont l\\u2019une des plus importantes a fait suite aux pr\\u00e9dictions pessimistes de Marvin Minsky quant \\u00e0 la capacit\\u00e9 des r\\u00e9seaux de neurones artificiels \\u00e0 rivaliser avec des m\\u00e9thodes algorithmiques. Mais les succ\\u00e8s r\\u00e9cents du paradigme connexionniste, illustr\\u00e9 par \\u00ab\\u00a0l\\u2019apprentissage profond\\u00a0\\u00bb (deep learning, machine learning), semblent augurer du retour d\\u2019un printemps ensoleill\\u00e9 pour une telle approche de l\\u2019intelligence artificiell...\",\"published_in\":\"OpenEdition\",\"year\":\"2019-06-03\",\"url\":\"http:\\/\\/books.openedition.org\\/ugaeditions\\/10470\",\"readers\":0,\"subject\":\"changement social; communication; m\\u00e9dias; syst\\u00e8mes m\\u00e9diatiques; Communication; Information Science & Library Science; SOC052000; JFD\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.w3i7zv\",\"title\":\"SIDE Summer School, day 2\",\"authors\":\"Charpentier, Arthur\",\"paper_abstract\":\"Tomorrow morning, it will be the second day of the SIdE (Italian Econometric Association) Summer School, on Machine Learning Algorithms for Econometricians. The third series of slides are online. and the fourth one are also online.\",\"published_in\":\"OpenEdition\",\"year\":\"2019-07-15\",\"url\":\"http:\\/\\/freakonometrics.hypotheses.org\\/58240\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.9d039g\",\"title\":\"On my way to Bertinoro (Forl\\u00ec-Cesena, Italia)\",\"authors\":\"Charpentier, Arthur\",\"paper_abstract\":\"At the end of this week, I will be flying to Europe, since I will be giving a series of lectures with Emmanuel Flachaire at the SIdE (Italian Econometric Association) Summer School, on Machine Learning Algorithms for Econometricians.\",\"published_in\":\"OpenEdition\",\"year\":\"2019-07-15\",\"url\":\"http:\\/\\/freakonometrics.hypotheses.org\\/57185\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.epec3l\",\"title\":\"SIDE Summer School, days 4 and 5\",\"authors\":\"Charpentier, Arthur\",\"paper_abstract\":\"Almost the final set of slides for the SIdE (Italian Econometric Association) Summer School, on Machine Learning Algorithms for Econometricians. The eighth series of slides are online, as well as the nineth series (also online) and the twelweth (online)\",\"published_in\":\"OpenEdition\",\"year\":\"2019-07-19\",\"url\":\"http:\\/\\/freakonometrics.hypotheses.org\\/58249\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.2ms4vc\",\"title\":\"Learning Edit Cost Estimation Models for Graph Edit Distance\",\"authors\":\"Cort\\u00e9s, Xavier; Conte, Donatello; Cardot, Hubert\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2019-09-14\",\"url\":\"https:\\/\\/hal.archives-ouvertes.fr\\/hal-02285793\",\"readers\":0,\"subject\":\"[INFO.INFO-CV]Computer Science [cs]\\/Computer Vision and Pattern Recognition [cs.CV]; [SCCO.COMP]Cognitive science\\/Computer science; [INFO.INFO-TI]Computer Science [cs]\\/Image Processing [eess.IV]; [INFO.INFO-TS]Computer Science [cs]\\/Signal and Image Processing\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.jg8ess\",\"title\":\"Statistical analysis and machine learning algorithms for RF breast cancer screening\",\"authors\":\"Joseph, Collin\",\"paper_abstract\":\"The work of this thesis explores statistical and machine learning methods for anomaly detection in a novel low-power microwave breast cancer screening system. Reported dielectric contrast in the microwave frequency range between healthy and malign breast tissue is the main motivator behind the effort to design a time-domain radar-based prototype for safe breast screening. The microwave radar does not strive to yield a three-dimensional image of the breast interior. Instead, its aimed use wouldbe for frequent monthly screenings which have the potential to detect a departure from the normal, hence increasing the chance of early detection and, in turn, successful treatment. The data used for the development of the algorithms was obtained either in controlled laboratory experiments on tissue-mimicking phantoms or in a clinical setting. Since the data is preliminary and scarce, the conclusions may be limited, but in the process of the algorithmic development, this work strives to takeinto account the nature of the signals and how they have been generated in this very new application. The following methods were adapted and applied to the data sets: simple statistical analysis to illustrate the differences in the data sets investigated in this work; discrete Fourier transform, short-time Fourier transform, empirical mode decomposition and ad hoc time domain analysis to derive effective featureextraction strategies for the radio-frequency radar scans; high-dimensional statistical hypothesis tests to investigate the characteristics of time-frequency features extracted; random search, random walk, simulated annealing, genetic algorithm and particle swarm derivative-free optimization algorithms to improve the computational efficiency of an ensemble cost-sensitive support vector machine classifier based on previous literature; and a forward step-wise ensemble selection algorithm to improvethe predictive performance of the classifier. For each of the methods, the results were discussed in the light of the limitations of the collected data sets. Older data sets were found to have high signal amplitudes on average. Statistically significant differences between features extracted from scans with anomalies and scans without anomalies were only observed for scans of subjects with higher average permittivity. The time-frequency analysis features yielded superior predictive performance thanfeature extraction using dimensionality reduction by principal component analysis. The computational efficiency of the classifier was improved by a factor of at least 3.8 when optimization algorithms were used for hyperparameter selection, instead of an exhaustive grid search. With the data available, the forward step-wise selection algorithm did not improve the predictive performance as was anticipated.\",\"published_in\":\"McGill Library\",\"year\":\"2019-11-28\",\"url\":\"http:\\/\\/digitool.Library.McGill.CA:80\\/R\\/?func=dbin-jump-full&object_id=169676\",\"readers\":0,\"subject\":\"Electrical and Computer Engineering\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.g0sb2u\",\"title\":\"Machine Learning in Amyotrophic Lateral Sclerosis: Achievements, Pitfalls, and Future Directions\",\"authors\":\"Grollemund, Vincent; Pradat, Pierre-Fran\\u00e7ois; Querin, Giorgia; Delbot, Fran\\u00e7ois; Le Chat, Ga\\u00e9tan; Pradat-Peyre, Jean-Fran\\u00e7ois; Bede, Peter\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2019-11-19\",\"url\":\"https:\\/\\/hal.sorbonne-universite.fr\\/hal-02063464\",\"readers\":0,\"subject\":\"amyotrophic lateral sclerosis; motor neuron disease; clustering; risk stratification; prognosis; diagnosis; machine learning; [SCCO.NEUR]Cognitive science\\/Neuroscience\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/20.500.11794\\/34741\",\"title\":\"A Machine Learning Approach for the Smart Charging of Electric Vehicles\",\"authors\":\"Lopez, Karol Lina\",\"paper_abstract\":\"Avec l\\u2019adoption croissante des v\\u00e9hicules \\u00e9lectriques, il y a un int\\u00e9r\\u00eat pour utiliser des tarifs dynamiques dont le prix d\\u00e9pend de la demande actuelle, pour encourager les utilisateurs \\u00e0 recharger leurs v\\u00e9hicules en p\\u00e9riode de faible demande \\u00e9vitant les pics d\\u2019\\u00e9lectricit\\u00e9 pouvant d\\u00e9passer la capacit\\u00e9 install\\u00e9e. Le probl\\u00e8me que devaient affronter les utilisateurs de v\\u00e9hicules \\u00e9lectriques est qu\\u2019ils doivent s\\u2019assurer que l\\u2019\\u00e9nergie \\u00e9lectrique pr\\u00e9sente dans les batteries est suffisante pour les d\\u00e9placements et que les p\\u00e9riodes de recharge correspondent \\u00e0 des p\\u00e9riodes o\\u00f9 le prix de l\\u2019\\u00e9lectricit\\u00e9 est bas. La plupart des approches actuelles de planification de recharge supposent une connaissance parfaite des futurs prix de l\\u2019\\u00e9lectricit\\u00e9 et de l\\u2019utilisation du v\\u00e9hicule, ce qui nuit \\u00e0 leur applicabilit\\u00e9 dans la pratique. Cette th\\u00e8se consid\\u00e8re la mod\\u00e9lisation de la recharge intelligente des v\\u00e9hicules \\u00e9lectriques pour d\\u00e9terminer, lors des sessions de connexion, les moments o\\u00f9 le v\\u00e9hicule doit se recharger afin de minimiser le co\\u00fbt pay\\u00e9 pour l\\u2019\\u00e9nergie de ses d\\u00e9placements. La th\\u00e8se comporte quatre principales contributions: 1) Mod\\u00e8le de recharge optimale des v\\u00e9hicules \\u00e9lectriques pour g\\u00e9n\\u00e9rer une s\\u00e9rie de d\\u00e9cisions en utilisant la connaissance a priori du prix de l\\u2019\\u00e9lectricit\\u00e9 et de l\\u2019\\u00e9nergie utilis\\u00e9e, en utilisant la programmation dynamique comme m\\u00e9thode d\\u2019optimisation. 2) Cr\\u00e9ation d\\u2019un mod\\u00e8le de syst\\u00e8me d\\u2019information incluant des variables connexes au mod\\u00e8le de recharge des v\\u00e9hicules \\u00e9lectriques dans un cadre guid\\u00e9 par des donn\\u00e9es. 3) M\\u00e9thode de s\\u00e9lection des donn\\u00e9es pertinentes utilisant la stratification de donn\\u00e9es pouvant r\\u00e9duire significativement le temps requis pour entra\\u00eener les mod\\u00e8les de pr\\u00e9vision avec des r\\u00e9sultats proches de ceux obtenus en utilisant l\\u2019ensemble de donn\\u00e9es complet. 4) Mod\\u00e8le de classification en ligne qui permet de d\\u00e9terminer s\\u2019il faut charger ou non le v\\u00e9hicule \\u00e0 l\\u2019aide de mod\\u00e8les d\\u2019apprentissage automatique qui peuvent g\\u00e9n\\u00e9rer, en temps r\\u00e9el, une d\\u00e9cision de recharge quasi-optimale sans tenir compte d\\u2019une connaissance de l\\u2019information future. Nous d\\u00e9montrons comment la combinaison d\\u2019une m\\u00e9thode d\\u2019optimisation hors ligne, telle que la programmation dynamique, avec des mod\\u00e8les d\\u2019apprentissage automatique et un syst\\u00e8me d\\u2019information ad\\u00e9quat peut fournir une solution tr\\u00e8s proche de l\\u2019optimum global, sans perte d\\u2019applicabilit\\u00e9 dans le monde r\\u00e9el. De plus, la polyvalence de l\\u2019approche propos\\u00e9e permet d\\u2019envisager l\\u2019int\\u00e9gration d\\u2019un plus grand nombre de variables \\u00e0 l\\u2019entr\\u00e9e du mod\\u00e8le, ainsi que d\\u2019autres actions comme par exemple fournir d\\u2019\\u00e9nergie au r\\u00e9seau \\u00e9lectrique pour aider \\u00e0 r\\u00e9duire les pics de demande ce qui pourrait \\u00eatre utile dans un contexte de vehicle-to-grid (V2G).\",\"published_in\":\"Universit\\u00e9 Laval\",\"year\":\"2019-05-08\",\"url\":\"http:\\/\\/hdl.handle.net\\/20.500.11794\\/34741\",\"readers\":0,\"subject\":\"TK 7.5 UL 2019; Apprentissage automatique; V\\u00e9hicules \\u00e9lectriques -- Alimentation en \\u00e9nergie -- Mod\\u00e8les math\\u00e9matiques; Chargeurs (G\\u00e9nie \\u00e9lectrique)\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.lv11y2\",\"title\":\"Computational and Robotic Models of Early Language Development: A Review\",\"authors\":\"Oudeyer, Pierre-Yves; Kachergis, George; Schueller, William\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2020-01-15\",\"url\":\"https:\\/\\/hal.inria.fr\\/hal-02371233\",\"readers\":0,\"subject\":\"Social interaction; Complexity; Dynamical systems; Self-organization; Intrinsic motivation; Early language learning; Computational and robotic models; Machine learning; Development; Embodiment; [SCCO.COMP]Cognitive science\\/Computer science; [INFO.INFO-AI]Computer Science [cs]\\/Artificial Intelligence [cs.AI]\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.3w729f\",\"title\":\"Easily implementable time series forecasting techniques for resource provisioning in cloud computing\",\"authors\":\"Fliess, Michel; Join, C\\u00e9dric; Bekcheva, Maria; Moradi, Alireza; Mounier, Hugues\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2019-12-20\",\"url\":\"https:\\/\\/hal-polytechnique.archives-ouvertes.fr\\/hal-02024835\",\"readers\":0,\"subject\":\"trend; quick fluctuation; machine learning; nonstandard analysis; time series; forecasting; computing resources; virtual machines; Cloud computing; seasonality; estimation; [INFO.INFO-PF]Computer Science [cs]\\/Performance [cs.PF]; [INFO.INFO-ET]Computer Science [cs]\\/Emerging Technologies [cs.ET]; [INFO.INFO-CE]Computer Science [cs]\\/Computational Engineering, Finance, and Science [cs.CE]; [INFO.INFO-AU]Computer Science [cs]\\/Automatic Control Engineering; [STAT.ME]Statistics [stat]\\/Methodology [stat.ME]; [MATH.MATH-LO]Mathematics [math]\\/Logic [math.LO]; [QFIN.PR]Quantitative Finance [q-fin]\\/Pricing of Securities [q-fin.PR]\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.lb625e\",\"title\":\"SIDE Summer School, day 6\",\"authors\":\"Charpentier, Arthur\",\"paper_abstract\":\"Saturday morning, final set of slides for the SIdE (Italian Econometric Association) Summer School, on Machine Learning Algorithms for Econometricians. The first part is now online, on time series as well as the second part, on causal inference (with just a broad overview), which is also\",\"published_in\":\"OpenEdition\",\"year\":\"2019-07-19\",\"url\":\"http:\\/\\/freakonometrics.hypotheses.org\\/58253\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.45wtnz\",\"title\":\"Police pr\\u00e9dictive et fichiers de police\",\"authors\":\"Piotrowicz, Cyril\",\"paper_abstract\":\"National audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2020-02-10\",\"url\":\"https:\\/\\/hal-univ-lyon3.archives-ouvertes.fr\\/hal-02078051\",\"readers\":0,\"subject\":\"Analyse pr\\u00e9dictive; Predpol; Police pr\\u00e9dictive; Fichiers de police; Donn\\u00e9es personnelles; RGPD GDPR; [SHS.DROIT]Humanities and Social Sciences\\/Law\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.ee9cd8\",\"title\":\"SIDE Summer School, day 3\",\"authors\":\"Charpentier, Arthur\",\"paper_abstract\":\"Tomorrow morning, it will be the third day of the SIdE (Italian Econometric Association) Summer School, on Machine Learning Algorithms for Econometricians. The fifth series of slides are online. and so is the sixth series, that are online and the seventh series of slides, that are online\",\"published_in\":\"OpenEdition\",\"year\":\"2019-07-16\",\"url\":\"http:\\/\\/freakonometrics.hypotheses.org\\/58245\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.kg26w4\",\"title\":\"Toward Dialogue Modeling: A Semantic Annotation Scheme for Questions and Answers\",\"authors\":\"Cruz-Bland\\u00f3n, Maria-Andrea; Minnema, Gosse; Nourbakhsh, Aria; Boritchev, Maria; Amblard, Maxime\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2020-01-06\",\"url\":\"https:\\/\\/hal.inria.fr\\/hal-02269613\",\"readers\":0,\"subject\":\"[SCCO.LING]Cognitive science\\/Linguistics; [SCCO.COMP]Cognitive science\\/Computer science; [INFO.INFO-TT]Computer Science [cs]\\/Document and Text Processing; [INFO.INFO-AI]Computer Science [cs]\\/Artificial Intelligence [cs.AI]\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.zmn1oz\",\"title\":\"The challenge of responsible AI\",\"authors\":\"Balagu\\u00e9, Christine\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2020-02-10\",\"url\":\"https:\\/\\/hal.archives-ouvertes.fr\\/hal-01984671\",\"readers\":0,\"subject\":\"Artificial intelligence; Ethics; Responsibility; Algorithm; [SHS.GESTION]Humanities and Social Sciences\\/Business administration\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.fok7n3\",\"title\":\"ML-Based Feature Importance Estimation for Predicting Unethical Behaviour under Pressure\",\"authors\":\"Rivas, Pablo; Harper, Pamela,; Cary, John,; Brown, William,\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2019-08-10\",\"url\":\"https:\\/\\/hal.archives-ouvertes.fr\\/hal-02264914\",\"readers\":0,\"subject\":\"[SDV.ETH]Life Sciences [q-bio]\\/Ethics; [STAT.ML]Statistics [stat]\\/Machine Learning [stat.ML]; [INFO.INFO-LG]Computer Science [cs]\\/Machine Learning [cs.LG]; [INFO.INFO-AI]Computer Science [cs]\\/Artificial Intelligence [cs.AI]; [INFO.INFO-CY]Computer Science [cs]\\/Computers and Society [cs.CY]; [SDE.MCG]Environmental Sciences\\/Global Changes\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.8nu0uf\",\"title\":\"Semantic Systems. The Power of AI and Knowledge Graphs : 15th International Conference, SEMANTiCS 2019, Karlsruhe, Germany, September 9\\u201312, 2019, Proceedings\",\"authors\":\"\",\"paper_abstract\":\"This open access book constitutes the refereed proceedings of the 15th International Conference on Semantic Systems, SEMANTiCS 2019, held in Karlsruhe, Germany, in September 2019. The 20 full papers and 8 short papers presented in this volume were carefully reviewed and selected from 88 submissions. They cover topics such as: web semantics and linked (open) data; machine learning and deep learning techniques; semantic information management and knowledge integration; terminology, thesaurus and ontology management; data mining and knowledge discovery; semantics in blockchain and distributed ledger technologies.\",\"published_in\":\"OAPEN\",\"year\":\"2020-02-04\",\"url\":\"http:\\/\\/www.oapen.org\\/search?identifier=1006835\",\"readers\":0,\"subject\":\"Computer science; Knowledge representation (Information theory)\\u00a0; Special purpose computers; Computer communication systems; User interfaces (Computer systems); Application software; UKN; UNH; UYQE; UYZG\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.tdnvft\",\"title\":\"Entropy-regularized Optimal Transport for Machine Learning\",\"authors\":\"Genevay, Aude\",\"paper_abstract\":\"Le Transport Optimal r\\u00e9gularis\\u00e9 par l\\u2019Entropie (TOE) permet de d\\u00e9\\ufb01nir les Divergences de Sinkhorn (DS), une nouvelle classe de distance entre mesures de probabilit\\u00e9s bas\\u00e9es sur le TOE. Celles-ci permettentd\\u2019interpolerentredeuxautresdistancesconnues: leTransport Optimal(TO)etl\\u2019EcartMoyenMaximal(EMM).LesDSpeuvent\\u00eatre utilis\\u00e9es pour apprendre des mod\\u00e8les probabilistes avec de meilleures performances que les algorithmes existants pour une r\\u00e9gularisation ad\\u00e9quate. Ceci est justi\\ufb01\\u00e9 par un th\\u00e9or\\u00e8me sur l\\u2019approximation des SDpardes\\u00e9chantillons, prouvantqu\\u2019uner\\u00e9gularisationsusantepermet de se d\\u00e9barrasser de la mal\\u00e9diction de la dimension du TO, et l\\u2019on retrouve \\u00e0 l\\u2019in\\ufb01ni le taux de convergence des EMM. En\\ufb01n, nous pr\\u00e9sentons de nouveaux algorithmes de r\\u00e9solution pour le TOE bas\\u00e9s surl\\u2019optimisationstochastique\\u2018en-ligne\\u2019qui,contrairement\\u00e0l\\u2019\\u00e9tatde l\\u2019art, ne se restreignent pas aux mesures discr\\u00e8tes et s\\u2019adaptent bien aux probl\\u00e8mes de grande dimension.\",\"published_in\":\"ABES\",\"year\":\"2020-01-31\",\"url\":\"http:\\/\\/www.theses.fr\\/2019PSLED002\\/document\",\"readers\":0,\"subject\":\"Transport Optimal; Apprentissage Statistique; Optimal Transport; Machine Learning; 006.3\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.fp0hqv\",\"title\":\"Spectral Graph Wavelet Transform as Feature Extractor for Machine Learning in Neuroimaging\",\"authors\":\"Pilavci, Yusuf,; Farrugia, Nicolas\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2020-01-29\",\"url\":\"https:\\/\\/hal-imt-atlantique.archives-ouvertes.fr\\/hal-02052244\",\"readers\":0,\"subject\":\"graph signal processing; wavelets; neuroimaging; regression; [SCCO.NEUR]Cognitive science\\/Neuroscience; [INFO.INFO-CV]Computer Science [cs]\\/Computer Vision and Pattern Recognition [cs.CV]; [INFO.INFO-AI]Computer Science [cs]\\/Artificial Intelligence [cs.AI]\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.dc5rg5\",\"title\":\"Cuarta revoluci\\u00f3n industrial (4.0.) o ciberindustria en el proceso penal: revoluci\\u00f3n digital, inteligencia artificial y el camino hacia la robotizaci\\u00f3n de la justicia\",\"authors\":\"Barona Vilar, Silvia\",\"paper_abstract\":\"This paper aims to analyze the irruption of the fourth industrial revolution (4.0.) and its consequences, especially Artificial Intelligence, in Justice, particularly in Criminal Justice. The incorporation of expert systems, algorithms and computational models in counseling is now a reality, as well as in predicting and in legal decisions. The incorporation of judicial robotic or robot-judge (machine learning) represents a disturbing situation. The replacement of the human-judge creates an artificial cognition that raises doubts about ethics, reliability, the individualization of the sanction, the possible responsibility of the machine, the sensitivity of especially vulnerable victims or depressed groups, the violation of the people's rights, etc.\",\"published_in\":\"RODERIC - Universitat de Val\\u00e8ncia\",\"year\":\"2020-01-09\",\"url\":\"https:\\/\\/hdl.handle.net\\/10550\\/72535\",\"readers\":0,\"subject\":\"Dret penal\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.fbxvrs\",\"title\":\"Du code dans ma th\\u00e8se, 3 interventions autour de l\\u2019auto-instrumentation en th\\u00e8se de doctorat\",\"authors\":\"Pastorelli, Sabrina\",\"paper_abstract\":\"Trois doctorant.e.s se sont confront\\u00e9.e.s aux questions de la recherche m\\u00e9thodologique \\u00e9tant \\u00e0 la fois chercheurs et promoteurs de leur propres outils et de leurs m\\u00e9thodes dans des disciplines diff\\u00e9rentes : science politique, machine learning, Arts & Design. Le podcast du s\\u00e9minaire du MetSem - S\\u00e9minaire M\\u00e9thodologie de Sciences Po est en ligne, \\u00e0 \\u00e9couter sur le site MetSem\",\"published_in\":\"OpenEdition\",\"year\":\"2019-09-30\",\"url\":\"http:\\/\\/qualiquanti.hypotheses.org\\/19\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/10251\\/126095\",\"title\":\"An\\u00e1lisis de im\\u00e1genes m\\u00e9dicas de cerebro para dar soporte al diagn\\u00f3stico\",\"authors\":\"Nadal Almela, Silvia\",\"paper_abstract\":\"[CA] Durant aquest projecte s\\u2019ha treballat amb imatges m\\u00e8diques disponibles gr\\u00e0cies a la col\\u00b7laboraci\\u00f3 entre el centre d\\u2019investigaci\\u00f3 Pattern Recognition and Human Language Technology \\u2013PRHLT\\u2013 i la Fundaci\\u00f3 per al Foment de la Investigaci\\u00f3 Sanit\\u00e0ria i Biom\\u00e8dica de la Comunitat Valenciana \\u2013FISABIO\\u2013. La finalitat \\u00faltima d\\u2019aquest projecte ha sigut construir un classificador per a cada tipus d\\u2019imatge segons l\\u2019etapa de la malaltia Alzh\\u00e8imer en qu\\u00e8 es troba el pacient. Els classificadors han estat basats tant en Deep Learning, alguns utilitzant Convolutional Neural Networks, com en m\\u00e8todes cl\\u00e0ssics de Machine Learning. Abans d\\u2019entrenar els classificadors, s\\u2019ha avaluat la qualitat dels conjunts de dades i s\\u2019ha fet un estudi de les seues caracter\\u00edstiques. Finalment, s\\u2019han comparat els resultats obtinguts amb les diferents t\\u00e8cniques emprades\",\"published_in\":\"Universitat Polit\\u00e8cnica de Val\\u00e8ncia\",\"year\":\"2019-09-19\",\"url\":\"http:\\/\\/hdl.handle.net\\/10251\\/126095\",\"readers\":0,\"subject\":\"Xarxes neuronals; Imatge m\\u00e8dica; Processament d'imatge; Classificador; Aprenentatge autom\\u00e0tic; Redes neuronales; Imagen m\\u00e9dica; Procesamiento de imagen; Clasificador; Aprendizaje autom\\u00e1tico; Neural networks; Medical image; Image processing; Classifier; Machine learning; LENGUAJES Y SISTEMAS INFORMATICOS; Grado en Ingenier\\u00eda Inform\\u00e1tica-Grau en Enginyeria Inform\\u00e0tica\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.ssfroa\",\"title\":\"OM-AI: A Toolkit to Support AI-Based Computer-Assisted Composition Workflows in OpenMusic\",\"authors\":\"Vinjar, Anders; Bresson, Jean\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2019-05-24\",\"url\":\"https:\\/\\/hal.archives-ouvertes.fr\\/hal-02126847\",\"readers\":0,\"subject\":\"[INFO.INFO-PL]Computer Science [cs]\\/Programming Languages [cs.PL]; [INFO.INFO-AI]Computer Science [cs]\\/Artificial Intelligence [cs.AI]; [SHS.MUSIQ]Humanities and Social Sciences\\/Musicology and performing arts\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/10251\\/124304\",\"title\":\"First-Stage Prostate Cancer Identification on Histopathological Images: Hand-Driven versus Automatic Learning\",\"authors\":\"Garc\\u00eda-Pardo, Jos\\u00e9 Gabriel; Colomer, Adri\\u00e1n; Naranjo Ornedo, Valeriana\",\"paper_abstract\":\"[EN] Analysis of histopathological image supposes the most reliable procedure to identify prostate cancer. Most studies try to develop computer aid-systems to face the Gleason grading problem. On the contrary, we delve into the discrimination between healthy and cancerous tissues in its earliest stage, only focusing on the information contained in the automatically segmented gland candidates. We propose a hand-driven learning approach, in which we perform an exhaustive hand-crafted feature extraction stage combining in a novel way descriptors of morphology, texture, fractals and contextual information of the candidates under study. Then, we carry out an in-depth statistical analysis to select the most relevant features that constitute the inputs to the optimised machine-learning classifiers. Additionally, we apply for the first time on prostate segmented glands, deep-learning algorithms modifying the popular VGG19 neural network. We fine-tuned the last convolutional block of the architecture to provide the model specific knowledge about the gland images. The hand-driven learning approach, using a nonlinear Support Vector Machine, reports a slight outperforming over the rest of experiments with a final multi-class accuracy of 0.876 +\\/- 0.026<\\/mml:semantics> in the discrimination between false glands (artefacts), benign glands and Gleason grade 3 glands.\",\"published_in\":\"Universitat Polit\\u00e8cnica de Val\\u00e8ncia\",\"year\":\"2019-07-29\",\"url\":\"http:\\/\\/hdl.handle.net\\/10251\\/124304\",\"readers\":0,\"subject\":\"Gland classification; Hand-crafted feature extraction; Feature selection; Hand-driven learning; Deep learning; Prostate cancer; Histological image; TEORIA DE LA SE\\u00d1AL Y COMUNICACIONES\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.0qijb7\",\"title\":\"Knowledge Models and Image Processing Analysis in Remote Sensing: Examples of Yakutsk (Russia) and Kaunas (Lithuania)\",\"authors\":\"Gadal, S\\u00e9bastien; Ouerghemmi, Walid\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2020-01-14\",\"url\":\"https:\\/\\/hal-amu.archives-ouvertes.fr\\/hal-02120100\",\"readers\":0,\"subject\":\"Arctic; Lithuania; Russia; Kaunas; Yakutsk; Remote Sensing; Geographic Knowledge; Artificial Intelligence; Spectral Databases; Morphometric attributes; Geographic Ontologies; Temporal Analysis; [INFO.INFO-TI]Computer Science [cs]\\/Image Processing [eess.IV]; [SHS.STAT]Humanities and Social Sciences\\/Methods and statistics; [SDE.ES]Environmental Sciences\\/Environmental and Society; [SHS.GEO]Humanities and Social Sciences\\/Geography; [INFO.INFO-MO]Computer Science [cs]\\/Modeling and Simulation\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.w0wd5u\",\"title\":\"Leveraging machine learning for efficient mobility management and data transmission in fog computing\",\"authors\":\"Memon, Salman\",\"paper_abstract\":\"Fog computing is a new proposed architecture that complements the existing cloud computing through one or more layers of intermediate computing servers. These servers, called fogs, are deployed at the edge of the network and bridge the gap between end-user devices and the cloud. They provide compute and storage resources to devices in a similar way to the cloud but in a more distributed fashion. This new computing paradigm offers a set of new challenges that we aim to address in this research. We leverage the use of machine learning, particularly deep learning algorithms capable of taking advantage of the large volumes of data generated in smart city scenarios, to improve the efficiency of the fog computing middleware JAMScript. The first part of our work focuses on optimizing the handover procedure for mobile devices in a fog computing environment using a set of fog and cost predictors. These predictors are used to reduce the service interruptionsexperienced while transitioning from one fog node to another. We simulate a city level fog network with real-world data derived from taxi traces in Shanghai city. We then model the fog associations for vehicles using a feedforward neural network as well as the cost (latency) of interacting with a particular fog server using an recurrent neural network (RNN) with long short-term memory (LSTM) cells. We present a system architecture that describes the components of this predictive system as well as a smarter request routing scheme that can be implemented using it. The second part of our research introduces a learning logger architecture that utilizes an ensemble of LSTMs to model data streams derived from devices at the fog servers. We show how predictions from the learning model can be used to partially replace actual data, thereby saving on valuable bandwidth without compromising the integrity and usability of the data. Finally, we conduct a series of experiments that showcase the performance of our predictive systems and quantify their benefit in a fog computing environment. In these experiments, we use a Docker-container based emulator for a vehicular fog network created using JAMScript to evaluate the fog and cost predictors. For the learning logger experiments, we use a set of environmental sensor data streams.Experimental results show that these systems can yield a considerable reduction in resource usage and gain in transmission efficiency in a fog computing environment.\",\"published_in\":\"McGill Library\",\"year\":\"2019-03-29\",\"url\":\"http:\\/\\/digitool.Library.McGill.CA:80\\/R\\/?func=dbin-jump-full&object_id=163916\",\"readers\":0,\"subject\":\"Electrical and Computer Engineering\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.pyqgsr\",\"title\":\"Modelling world agriculture as a learning machine? From mainstream models to Agribiom 1.0\",\"authors\":\"Dorin, Bruno; Joly, Pierre-Beno\\u00eet\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2020-02-11\",\"url\":\"https:\\/\\/hal.archives-ouvertes.fr\\/hal-02106267\",\"readers\":0,\"subject\":\"Global modelling; Agriculture; Science and technology studies; Learning machine; Agrimonde; [SHS]Humanities and Social Sciences; [QFIN]Quantitative Finance [q-fin]\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.8sw3mq\",\"title\":\"Th\\u00e9orie des matrices al\\u00e9atoires pour l'apprentissage automatique en grande dimension et les r\\u00e9seaux de neurones\",\"authors\":\"Liao, Zhenyu\",\"paper_abstract\":\"Le \\\"Big Data'' et les grands syst\\u00e8mes d'apprentissage sont omnipr\\u00e9sents dans les probl\\u00e8mes d'apprentissage automatique aujourd\\u2019hui. Contrairement \\u00e0 l'apprentissage de petite dimension, les algorithmes d'apprentissage en grande dimension sont sujets \\u00e0 divers ph\\u00e9nom\\u00e8nes contre-intuitifs et se comportent de mani\\u00e8re tr\\u00e8s diff\\u00e9rente des intuitions de petite dimension sur lesquelles ils sont construits. Cependant, en supposant que la dimension et le nombre des donn\\u00e9es sont \\u00e0 la fois grands et comparables, la th\\u00e9orie des matrices al\\u00e9atoires (RMT) fournit une approche syst\\u00e9matique pour \\u00e9valuer le comportement statistique de ces grands syst\\u00e8mes d'apprentissage, lorsqu'ils sont appliqu\\u00e9s \\u00e0 des donn\\u00e9es de grande dimension. L\\u2019objectif principal de cette th\\u00e8se est de proposer un sch\\u00e9ma d'analyse bas\\u00e9 sur la RMT, pour une grande famille de syst\\u00e8mes d\\u2019apprentissage automatique: d'\\u00e9valuer leurs performances, de mieux les comprendre et finalement les am\\u00e9liorer, afin de mieux g\\u00e9rer les probl\\u00e8mes de grandes dimensions aujourd'hui.Pr\\u00e9cis\\u00e9ment, nous commen\\u00e7ons par exploiter la connexion entre les grandes matrices \\u00e0 noyau, les projection al\\u00e9atoires non-lin\\u00e9aires et les r\\u00e9seaux de neurones al\\u00e9atoires simples. En consid\\u00e9rant que les donn\\u00e9es sont tir\\u00e9es ind\\u00e9pendamment d'un mod\\u00e8le de m\\u00e9lange gaussien, nous fournissons une caract\\u00e9risation pr\\u00e9cise des performances de ces syst\\u00e8mes d'apprentissage en grande dimension, exprim\\u00e9e en fonction des statistiques de donn\\u00e9es, de la dimensionnalit\\u00e9 et, surtout, des hyper-param\\u00e8tres du probl\\u00e8me. Lorsque des algorithmes d'apprentissage plus complexes sont consid\\u00e9r\\u00e9s, ce sch\\u00e9ma d'analyse peut \\u00eatre \\u00e9tendu pour acc\\u00e9der \\u00e0 de syst\\u00e8mes d'apprentissage qui sont d\\u00e9finis (implicitement) par des probl\\u00e8mes d'optimisation convexes, lorsque des points optimaux sont atteints. Pour trouver ces points, des m\\u00e9thodes d'optimisation telles que la descente de gradient sont r\\u00e9guli\\u00e8rement utilis\\u00e9es. \\u00c0 cet \\u00e9gard, dans le but d'avoir une meilleur compr\\u00e9hension th\\u00e9orique des m\\u00e9canismes internes de ces m\\u00e9thodes d'optimisation et, en particulier, leur impact sur le mod\\u00e8le d'apprentissage, nous \\u00e9valuons aussi la dynamique de descente de gradient dans les probl\\u00e8mes d'optimisation convexes et non convexes.Ces \\u00e9tudes pr\\u00e9liminaires fournissent une premi\\u00e8re compr\\u00e9hension quantitative des algorithmes d'apprentissage pour le traitement de donn\\u00e9es en grandes dimensions, ce qui permet de proposer de meilleurs crit\\u00e8res de conception pour les grands syst\\u00e8mes d\\u2019apprentissage et, par cons\\u00e9quent, d'avoir un gain de performance remarquable lorsqu'il est appliqu\\u00e9 \\u00e0 des jeux de donn\\u00e9es r\\u00e9els. Profond\\u00e9ment ancr\\u00e9 dans l'id\\u00e9e d'exploiter des donn\\u00e9es de grandes dimensions avec des informations r\\u00e9p\\u00e9t\\u00e9es \\u00e0 un niveau \\\"global'' plut\\u00f4t qu'\\u00e0 un niveau \\\"local'', ce sch\\u00e9ma d'analyse RMT permet une compr\\u00e9hension renouvel\\u00e9e et la possibilit\\u00e9 de contr\\u00f4ler et d'am\\u00e9liorer une famille beaucoup plus large de m\\u00e9thodes d'apprentissage automatique, ouvrant ainsi la porte \\u00e0 un nouveau sch\\u00e9ma d'apprentissage automatique pour l'intelligence artificielle.\",\"published_in\":\"ABES\",\"year\":\"2020-02-02\",\"url\":\"http:\\/\\/www.theses.fr\\/2019SACLC068\\/document\",\"readers\":0,\"subject\":\"Apprentissage automatique; Th\\u00e9orie des matrices al\\u00e9atoires; R\\u00e9seaux de neurones; Machine learning; Random matrix theory; Neural networks; \",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.9snvqs\",\"title\":\"Minerva and Virgil-a program (Notes on Karpathy et al. 2014)\",\"authors\":\"Akmut, Camille\",\"paper_abstract\":\"Recent advances in machine learning and computer vision have resulted in the accurate and consistent identification of objects in images and videos. In this paper, a program named \\\"Minerva and Virgil\\\" is described whose purpose it is to counter State brutality, a term that we propose as the generalization of the already established \\\"police brutality\\\", by making use of these technologies.\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2019-04-15\",\"url\":\"https:\\/\\/hal.archives-ouvertes.fr\\/hal-02054664\",\"readers\":0,\"subject\":\"[INFO.INFO-AI]Computer Science [cs]\\/Artificial Intelligence [cs.AI]; [INFO.INFO-CY]Computer Science [cs]\\/Computers and Society [cs.CY]; [INFO.INFO-CV]Computer Science [cs]\\/Computer Vision and Pattern Recognition [cs.CV]; [INFO.INFO-LG]Computer Science [cs]\\/Machine Learning [cs.LG]; [SHS.PHIL]Humanities and Social Sciences\\/Philosophy\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.b5sy6l\",\"title\":\"Towards incorporating ethics in recommendation systems\",\"authors\":\"Balagu\\u00e9, Christine; Rochd, El Mehdi\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2020-02-10\",\"url\":\"https:\\/\\/hal.archives-ouvertes.fr\\/hal-02093828\",\"readers\":0,\"subject\":\"Ethique; Intelligence artificielle; Responsabilit\\u00e9; Algorithmes; [SHS.GESTION]Humanities and Social Sciences\\/Business administration\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.9m3tmj\",\"title\":\"Learning by stochastic serializations\",\"authors\":\"Strasser, Pablo Ramon; Armand, St\\u00e9phane; Marchand-Maillet, St\\u00e9phane; Kalousis, Alexandros\",\"paper_abstract\":\"Complex structures are typical in machine learning. Tailoring learning algorithms for every structure requires an effort that may be saved by defining a generic learning procedure adaptive to any complex structure. In this paper, we propose to map any complex structure onto a generic form, called serialization, over which we can apply any sequence-based density estimator. We then show how to transfer the learned density back onto the space of original structures. To expose the learning procedure to the structural particularities of the original structures, we take care that the serializations reflect accurately the structures\\u2019 properties. Enumerating all serializations is infeasible. We propose an effective way to sample representative serializations from the complete set of serializations which preserves the statistics of the complete set. Our method is competitive or better than state of the art learning algorithms that have been specifically designed for given structures. In addition, since the serialization involves sampling from a combinatorial process it provides considerable protection from overfitting, which we clearly demonstrate on a number of experiments.\",\"published_in\":\"Universit\\u00e9 de Gen\\u00e8ve\",\"year\":\"2020-02-03\",\"url\":\"https:\\/\\/archive-ouverte.unige.ch\\/unige:129387\",\"readers\":0,\"subject\":\"info:eu-repo\\/classification\\/ddc\\/025.063; info:eu-repo\\/classification\\/ddc\\/025.063; info:eu-repo\\/classification\\/ddc\\/617\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.46oemr\",\"title\":\"Deuxi\\u00e8me s\\u00e9minaire Quaresmi\",\"authors\":\"Engelbeen, C\\u00e9line\",\"paper_abstract\":\"Ce jeudi 28 mars \\u00e0 14h, le laboratoire Quaresmi aura le plaisir d'accueillir pour son deuxi\\u00e8me s\\u00e9minaire Sylvie Vande Velde, physicienne et biologiste, doctorante au sein du machine learning group de l'ULB. Elle viendra nous pr\\u00e9senter un expos\\u00e9 intitul\\u00e9 \\\"Cancer et nouvelles th\\u00e9rapies\\\". \\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0 Dans nos soci\\u00e9t\\u00e9s modernes, de nombreuses personnes sont malheureusement \\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0 chaque ann\\u00e9e atteintes par le cancer. Face \\u00e0\\u00a0 cette maladie, la \\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0 recherche m\\u00e9dicale a d\\u00e9j\\u00e0 connu quelques gr...\",\"published_in\":\"OpenEdition\",\"year\":\"2019-03-21\",\"url\":\"http:\\/\\/quaresmi.hypotheses.org\\/2460\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.2mcjne\",\"title\":\"SIDE Summer School, day 1\",\"authors\":\"Charpentier, Arthur\",\"paper_abstract\":\"This morning, we start the SIdE (Italian Econometric Association) Summer School, on Machine Learning Algorithms for Econometricians. Emmanuel Flachaire will start with a presentation of nonparametric econometric techniques. I will then get back to the geometry of (standard) econometric techniques, to introduce kernels. The first series of slides are online. I will then spend more time on the (popular) idea of \\\"least squares\\\" and mention other loss functions. Slides are online.\",\"published_in\":\"OpenEdition\",\"year\":\"2019-07-15\",\"url\":\"http:\\/\\/freakonometrics.hypotheses.org\\/58236\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/10251\\/127157\",\"title\":\"Aplicaci\\u00f3n de m\\u00e9todos de machine learning a la espectroscop\\u00eda de protones acelerados por l\\u00e1ser\",\"authors\":\"Calatayud Giner, Javier\",\"paper_abstract\":\"[ES] En este trabajo de fin de grado se ha desarrollado un sistema de clasificaci\\u00f3n autom\\u00e1tica de im\\u00e1genes microsc\\u00f3picas basado en tecnolog\\u00eda de Deep Learning. Estas im\\u00e1genes son el resultado de un experimento en el que se hace colisionar protones acelerados contra un material detector, y consisten en un fondo gris con trazas con forma circular. La mayor\\u00eda de las im\\u00e1genes que se obtienen con este experimento no sirven debido a distintos factores como la falta de nitidez, por lo que es necesario clasificarlas. A su vez se ha implementado un sistema que extrae el radio y posici\\u00f3n de las trazas usando t\\u00e9cnicas de visi\\u00f3n artificial.\",\"published_in\":\"Universitat Polit\\u00e8cnica de Val\\u00e8ncia\",\"year\":\"2019-10-03\",\"url\":\"http:\\/\\/hdl.handle.net\\/10251\\/127157\",\"readers\":0,\"subject\":\"Inteligencia Artificial; Machine Learning; Deep Learning; Redes Neuronales Convolucionales; Keras; Vision Artificial; Transformada de Hough; Protones; Laser; Artificial Intelligence; Convolutional Neural Networks; Computer Vision; Hough\\u2019s transform; Protons; MATEMATICA APLICADA; Grado en Ingenier\\u00eda Inform\\u00e1tica-Grau en Enginyeria Inform\\u00e0tica\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.qa08nc\",\"title\":\"Des machines et des hommes. La guerre n\\u2019aura pas lieu\",\"authors\":\"Azoulay, Warren\",\"paper_abstract\":\"M\\u00e9decine, biologie, \\u00e9conomie, finance, sociologie, sport, les domaines \\u00e0 mobiliser l\\u2019apprentissage machine sont de plus en plus nombreux. Si leur int\\u00e9r\\u00eat n\\u2019a cess\\u00e9 de s\\u2019intensifier ces derni\\u00e8res ann\\u00e9es, c\\u2019est qu\\u2019ils ont pr\\u00e9f\\u00e9r\\u00e9 percevoir les m\\u00e9thodes d\\u2019\\u00ab\\u00a0intelligence artificielle\\u00a0\\u00bb comme des outils techniques permettant de traiter de grandes bases de donn\\u00e9es avec un haut degr\\u00e9 de pr\\u00e9cision plut\\u00f4t qu\\u2019en tant que science concurrentielle. De fa\\u00e7on tr\\u00e8s tardive, les juristes l\\u2019explorent d\\u00e9sormais, non sans r\\u00e9fractaires. Tant\\u00f4t consid\\u00e9r\\u00e9e comme la n\\u00e9cessit\\u00e9 d\\u2019une nouvelle \\u00e8re \\u00ab\\u00a0Big Data\\u00a0\\u00bb, l\\u2019intelligence artificielle est aussi d\\u00e9nonc\\u00e9e par d\\u2019autres qui y voient la volont\\u00e9 d\\u2019une m\\u00e9canisation du droit o\\u00f9 la machine pr\\u00e9tendrait supplanter l\\u2019humain. Pourtant, comme pour toutes les autres sciences, les algorithmes seront de puissants outils pour la connaissance. La coop\\u00e9ration entre l\\u2019intelligence artificielle machine et l\\u2019intelligence naturelle humaine permettra de renforcer la compr\\u00e9hension que nous avons des m\\u00e9canismes juridiques et de leur application par les professionnels du droit.\",\"published_in\":\"Cairn\",\"year\":\"2019-12-12\",\"url\":\"http:\\/\\/www.cairn.info\\/article.php?ID_ARTICLE=DRS1_103_0595\",\"readers\":0,\"subject\":\"Algorithmes; Apprentissage automatis\\u00e9; Apprentissage machine; Big Data; Intelligence artificielle; Justice pr\\u00e9dictive; Ouverture des donn\\u00e9es publiques; Algorithms; Artificial intelligence; Automated learning; Big data; Machine learning; Open data; Predictive justice\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.zzivgc\",\"title\":\"Online Platforms and the Labour Market: Learning (with Machines) from an Experiment in France\",\"authors\":\"Hazard, Yagan\",\"paper_abstract\":\"I study the effect of an online job search assistance program taking advantage of a previousexperiment made by the French public employment services, which provides some exogenousvariation in the use of this platform. I focus on the heterogeneity analysis of this treatment,using two main different approaches.The first one is theory-driven, and focus on the analysis of the heterogeneity of thetreatment with respect to various different labour market tightness indicators. Two mainassessments can be made based on this analysis. (i) Tightness indicators are (surprisingly)decorrelated, making it difficult to corroborate the rare significant results obtained. (ii) Theset of significant results obtained suggest that the treatment effect isincreasingin labourmarket tightness. I suggest competing ways of modelling the treatment consistent withthose results. I also document some evidence of a larger treatment effect for individuals withweaker employment prospects. This is in line with other empirical evidence in the literatureevaluating job search assistance programs.The second approach is more data-driven, and resorts to the new machine learning (ML)techniques developed for heterogeneity analysis. I focus on tree-based techniques and forests,which have been central in the development of these techniques. The results of this analysisshed light on the limits of ML in the exploration of treatment effect heterogeneity, especiallyas the main ML-specific test for treatment effect heterogeneity developed by Chernozhukovet al. (2018a) concludes that ML is unable to detect any heterogeneity \\u2014 yet this mightbe not that surprising after all given the lack of statistical power (low take-up) and theprobably low order of magnitude of the treatment effect studied. Still, I provide applicationsof a large part of the existing ML techniques for treatment effect heterogeneity, trying totake advantage of each of them to document which are the dimensions that are likely to beimportant to study treatment effect heterogeneity in my setting.\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2019-12-18\",\"url\":\"https:\\/\\/dumas.ccsd.cnrs.fr\\/dumas-02407555\",\"readers\":0,\"subject\":\"Online job search; Mismatch; Market tightness; Vacancies; Machine learning; JEL: E - Macroeconomics and Monetary Economics\\/E.E2 - Consumption, Saving, Production, Investment, Labor Markets, and Informal Economy\\/E.E2.E24 - Employment \\u2022 Unemployment \\u2022 Wages \\u2022 Intergenerational Income Distribution \\u2022 Aggregate Human Capital \\u2022 Aggregate Labor Productivity; JEL: J - Labor and Demographic Economics\\/J.J6 - Mobility, Unemployment, Vacancies, and Immigrant Workers\\/J.J6.J62 - Job, Occupational, and Intergenerational Mobility; JEL: J - Labor and Demographic Economics\\/J.J6 - Mobility, Unemployment, Vacancies, and Immigrant Workers\\/J.J6.J64 - Unemployment: Models, Duration, Incidence, and Job Search; JEL: C - Mathematical and Quantitative Methods\\/C.C1 - Econometric and Statistical Methods and Methodology: General\\/C.C1.C18 - Methodological Issues: General; [SHS.ECO]Humanities and Social Sciences\\/Economics and Finance\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.dn9l78\",\"title\":\"Using social media, machine learning and natural language processing to map multiple recreational beneficiaries\",\"authors\":\"Gosal, Arjan,; Geijzendorffer, Ilse; V\\u00e1clav\\u00edk, Tom\\u00e1\\u0161; Poulin, Brigitte; Ziv, Guy\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2019-07-24\",\"url\":\"https:\\/\\/hal.archives-ouvertes.fr\\/hal-02190742\",\"readers\":0,\"subject\":\"[SDE.ES]Environmental Sciences\\/Environmental and Society; [SDE.BE]Environmental Sciences\\/Biodiversity and Ecology; [SDE.MCG]Environmental Sciences\\/Global Changes\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.7yg7f1\",\"title\":\"Large-scale Nonlinear Variable Selection via Kernel Random Features\",\"authors\":\"Gregorova, Magda; Ramapuram, Jason Emmanuel; Kalousis, Alexandros; Marchand-Maillet, St\\u00e9phane\",\"paper_abstract\":\"We propose a new method for input variable selection in nonlinear regression. The method is embedded into a kernel regression machine that can model general nonlinear functions, not being a priori limited to additive models. This is the first kernel-based variable selection method applicable to large datasets. It sidesteps the typical poor scaling properties of kernel methods by mapping the inputs into a relatively low-dimensional space of random features. The algorithm discovers the variables relevant for the regression task together with learning the prediction model through learning the appropriate nonlinear random feature maps. We demonstrate the outstanding performance of our method on a set of large-scale synthetic and real datasets.\",\"published_in\":\"Universit\\u00e9 de Gen\\u00e8ve\",\"year\":\"2020-01-17\",\"url\":\"https:\\/\\/archive-ouverte.unige.ch\\/unige:129030\",\"readers\":0,\"subject\":\"info:eu-repo\\/classification\\/ddc\\/025.063; info:eu-repo\\/classification\\/ddc\\/025.063\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.ktrx8d\",\"title\":\"Open data for humanists: big differences in small steps\",\"authors\":\"T\\u00f3th-Czifra, Erzs\\u00e9bet\",\"paper_abstract\":\"\\u201cPhilologists must for at least two reasons open up the textual data upon which they base their work. First, researchers need to be able to download, modify and redistribute their textual data if they are to fully exploit both new methods that center around algorithmic analysis (e.g., corpus linguistics, computational linguistics, text mining, and various applications of machine learning) and new scholarly products and practices that computational methods enable [...]. Second, open data is es...\",\"published_in\":\"OpenEdition\",\"year\":\"2019-05-12\",\"url\":\"http:\\/\\/dariahopen.hypotheses.org\\/550\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.3djx67\",\"title\":\"The Socio-Legal Relevance of Artificial Intelligence\",\"authors\":\"Larsson, Stefan\",\"paper_abstract\":\"L\\u2019article propose une analyse sociojuridique des questions d\\u2019\\u00e9quit\\u00e9, de responsabilit\\u00e9 et de transparence pos\\u00e9es par les applications d\\u2019intelligence artificielle\\u00a0(IA) employ\\u00e9es actuellement dans nos soci\\u00e9t\\u00e9s et de machine learning. Pour rendre compte de ces d\\u00e9fis juridiques et normatifs, nous analysons des cas probl\\u00e9matiques, comme la reconnaissance d\\u2019images fond\\u00e9e sur des bases de donn\\u00e9es qui pr\\u00e9sentent des biais de genre. Nous envisageons ensuite sept aspects de la transparence qui permettent de compl\\u00e9ter les notions d\\u2019explainable\\u00a0AI\\u00a0(XAI) dans la recherche en sciences informatiques. L\\u2019article examine aussi l\\u2019effet de miroir normatif provoqu\\u00e9 par l\\u2019usage des valeurs humaines et des structures soci\\u00e9tales comme donn\\u00e9es d\\u2019entra\\u00eenement pour les technologies d\\u2019apprentissage. Enfin, nous plaidons pour une approche multidisciplinaire dans la recherche, le d\\u00e9veloppement et la gouvernance en mati\\u00e8re d\\u2019IA.\",\"published_in\":\"Cairn\",\"year\":\"2019-12-12\",\"url\":\"http:\\/\\/www.cairn.info\\/article.php?ID_ARTICLE=DRS1_103_0573\",\"readers\":0,\"subject\":\"Conception normative; Explainable AI et transparence des algorithmes; Intelligence artificielle appliqu\\u00e9e; Machine learning et droit; Responsabilit\\u00e9 algorithmique; Technologie et changement social; Algorithmic accountability and normative design; Applied artificial intelligence; Explainable AI and algorithmic transparency; Machine learning and law; Technology and Social change\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.ms7s8f\",\"title\":\"OpenMethods introduction to: Analyzing Documents with TF-IDF | Programming Historian\",\"authors\":\"Stapel, Rombert\",\"paper_abstract\":\"Introduction: The indispensable Programming Historian\\u00a0comes with an introduction to Term Frequency - Inverse Document Frequency (tf-idf) provided by Matthew J. Lavin. The procedure, concerned with specificity of terms in a document, has its origins in information retrieval, but can be applied as an exploratory tool, finding textual similarity, or as a pre-processing tool for machine learning. It is therefore not only useful for textual scholars, but also for historians working with large collections of text.\",\"published_in\":\"DARIAH\",\"year\":\"2019-09-27\",\"url\":\"https:\\/\\/openmethods.dariah.eu\\/2019\\/09\\/15\\/analyzing-documents-with-tf-idf-programming-historian\\/\",\"readers\":0,\"subject\":\"Barry Warsaw; Discovering; Capture; Content Analysis; Bartolomeo Vanzetti; Research Objects; stopword; The New York Times; Latent Dirichlet Allocation; stop word; tokenizer; New York City; African American; lemmatization; Python; Andrew Y. Ng; Term Frequency-Inverse Document Frequency; Analysis; style guide; Karen Sp\\u00e4rck Jones; Scikit-Learn; W.E.B. Du Bois; Automatic Text Summarization; csv; Research Techniques; Journal of Machine Learning Research; topic modeling; n-grams; Sparse matrices; machine learning; text file; tf-idf; Research Activities; natural logarithm; investigative journalism; Relational Analysis; programming language; Path class; Cochrane, Wisconsin; Standard Oil; sparse matrix; information retrieval; Ida M. Tarbell; Topic Modeling; Document Clustering; Michael I. Jordan; tokenize; Fivethirtyeight.com; non-governmental organization; Information Retrieval; digital humanities; Text; Willa Cather; object-oriented programming; Jupyter; nom-de-plume; Text summarization; Cochrane Collaboration; Iraq War Logs; Upton Sinclair; Ida Tarbell; Natural language processing; sklearn; Nellie Bly; Abraham Lincoln; Nicola Sacco; Text Bearing Objects; New York Times; inverse document frequency\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.lhasgg\",\"title\":\"OpenMethods introduction to: Analyzing Documents with TF-IDF | Programming Historian\",\"authors\":\"Stapel, Rombert\",\"paper_abstract\":\"Introduction: The indispensable Programming Historian\\u00a0comes with an introduction to Term Frequency - Inverse Document Frequency (tf-idf) provided by Matthew J. Lavin. The procedure, concerned with specificity of terms in a document, has its origins in information retrieval, but can be applied as an exploratory tool, finding textual similarity, or as a pre-processing tool for machine learning. It is therefore not only useful for textual scholars, but also for historians working with large collections of text.\",\"published_in\":\"DARIAH\",\"year\":\"2019-09-11\",\"url\":\"https:\\/\\/openmethods.dariah.eu\\/2019\\/07\\/31\\/analyzing-documents-with-tf-idf-programming-historian\\/\",\"readers\":0,\"subject\":\"programming language; Ida M. Tarbell; object-oriented programming; Term Frequency-Inverse Document Frequency; Natural language processing; Michael I. Jordan; Automatic Text Summarization; digital humanities; Ida Tarbell; Topic Modeling; Nicola Sacco; Research Objects; Relational Analysis; W.E.B. Du Bois; New York Times; Karen Sp\\u00e4rck Jones; Jupyter; stop word; n-grams; MacOS; The New York Times; Information Retrieval; text file; information retrieval; Willa Cather; Sparse matrices; csv; Abraham Lincoln; Capture; Fivethirtyeight.com; Python; sklearn; nom-de-plume; Discovering; Journal of Machine Learning Research; Text summarization; Bartolomeo Vanzetti; machine learning; New York City; Upton Sinclair; Latent Dirichlet Allocation; Content Analysis; Barry Warsaw; African American; Path class; sparse matrix; Document Clustering; Text; natural logarithm; investigative journalism; Analysis; inverse document frequency; Research Activities; non-governmental organization; stopword; Research Techniques; Andrew Y. Ng; Nellie Bly; via bookmarklet; tokenizer; Iraq War Logs; Scikit-Learn; tf-idf; style guide; Text Bearing Objects; lemmatization; Standard Oil; topic modeling; Cochrane, Wisconsin; Cochrane Collaboration; tokenize\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.f7bvpv\",\"title\":\"Identification of metabolites from tandem mass spectra with a machine learning approach utilizing structural features\",\"authors\":\"Li, Yuanyue; Kuhn, Michael; Gavin Perrin, Anne-Claude; Bork, Peer\",\"paper_abstract\":\"Untargeted mass spectrometry is a powerful method for detecting metabolites in biological samples. However, fast and accurate identification of the metabolites' structures from MS\\/MS spectra is still a great challenge.\",\"published_in\":\"Universit\\u00e9 de Gen\\u00e8ve\",\"year\":\"2019-10-22\",\"url\":\"https:\\/\\/archive-ouverte.unige.ch\\/unige:124756\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.x80hl3\",\"title\":\"Probabilistic Fondations of Econometrics, part 3\",\"authors\":\"Charpentier, Arthur\",\"paper_abstract\":\"This post is the third one of our series on the history and foundations of econometric and machine learning models. Part 2 is online here. Exponential family and linear models The Gaussian linear model is a special case of a large family of linear models, obtained when the conditional distribution of [latex]Y[\\/latex] (given the covariates) belongs to the exponential family[latex display=\\\"true\\\"] f(y_i|\\\\theta_i,\\\\phi)=\\\\exp\\\\left(\\\\frac{y_i\\\\theta_i-b(\\\\theta_i)}{a(\\\\phi)}+c(y_i,\\\\phi)\\\\right) [\\/latex] ...\",\"published_in\":\"OpenEdition\",\"year\":\"2019-04-02\",\"url\":\"http:\\/\\/freakonometrics.hypotheses.org\\/57693\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.rvntbi\",\"title\":\"Probabilistic Fondations of Econometrics, part 2\",\"authors\":\"Charpentier, Arthur\",\"paper_abstract\":\"This post is the second one of our series on the history and foundations of econometric and machine learning models. Part 1 is online here. Geometric Properties of this Linear Model Let's define the scalar product in [latex]\\\\mathbb{R}^n[\\/latex], [latex]\\u27e8\\\\mathbf{a},\\\\mathbf{b}\\u27e9=\\\\mathbf{a}^T\\\\mathbf{b}[\\/latex], and let's note [latex]\\\\|\\\\cdot\\\\|[\\/latex] the associated Euclidean standard, [latex]\\\\|\\\\mathbf{a}\\\\|=\\\\sqrt{\\\\mathbf{a}^T\\\\mathbf{a}}[\\/latex] (denoted [latex]\\\\|\\\\cdot\\\\|_{\\\\ell_2}[\\/latex] in the nex...\",\"published_in\":\"OpenEdition\",\"year\":\"2019-04-02\",\"url\":\"http:\\/\\/freakonometrics.hypotheses.org\\/57674\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.8og9dc\",\"title\":\"Combining domain filling with a self-organizing map to analyze multi-species hydrocarbon signatures on a regional scale\",\"authors\":\"Nathan, Brian; Lary, David,\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2020-01-14\",\"url\":\"https:\\/\\/hal-amu.archives-ouvertes.fr\\/hal-02176642\",\"readers\":0,\"subject\":\"Volatile organic compounds; Regional signatures; Self-organizing maps; Machine learning; Environmental health impacts; [SDE]Environmental Sciences\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/10251\\/123881\",\"title\":\"Active learning and social commitment projects as a teaching-learning intervention in engineering degrees\",\"authors\":\"Pern\\u00eda-Espinoza, Alpha; Sanz-Garcia, Andres; Martinez-de-Pison-Ascacibar, F. Javier; Peci\\u00f1a-Marqueta, Sergio; Blanco-Fernandez, Julio\",\"paper_abstract\":\"[EN] The purpose of universities, apart from produce qualified professionals with problem-solving capabilities and soft-skills, should be to develop the social responsibility sense on their students. In this context, our proposal combines project based learning (PBL) and service based learning (SBL) along with gamming and the use of open-source machines, with the aim to increase student\\u2019s motivation and their social commitment with an affordable budget. The strategy, from now on named OS-PBL-SR (Open-Source-based PBL projects with Social Responsibility), mainly includes three important aspects: (i) assignment with projects orientated towards a social benefit; (ii) development of the projects using open-source Do It Yourself desktop machines (DIY-DkM); and (iii) include gamming in the evaluation method. The strategy was applied in the subject Manufacturing Technology but it might be easily exportable to other technical subjects. The results from the last academic year are presented. Also, a new OS-PBL-SR proposal aimed to the design and fabrication of autonomy-oriented products for people in a dependency situation is presented. The results showed the beneficial impact on undergraduate students by keeping high levels of motivation reflected on excellent success rates and scores. In addition, essential advantages in the use of DIY-DkM were found regarding the implementation of this kind of PBL strategy.\",\"published_in\":\"Universitat Polit\\u00e8cnica de Val\\u00e8ncia\",\"year\":\"2019-12-02\",\"url\":\"http:\\/\\/hdl.handle.net\\/10251\\/123881\",\"readers\":0,\"subject\":\"Higher Education; Learning; Educational systems; Teaching; Project Based Learning (PBL); Service Based Learning (SBL); Motivation; Open-source machine\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.j5n6vo\",\"title\":\"M\\u00e9thodes d'apprentissage statistique pour le criblage virtuel de m\\u00e9dicament\",\"authors\":\"Playe, Benoit\",\"paper_abstract\":\"Le processus de d\\u00e9couverte de m\\u00e9dicaments a un succ\\u00e8s limit\\u00e9 malgr\\u00e9 tous les progr\\u00e8s r\\u00e9alis\\u00e9s. En effet, on estime actuellement que le d\\u00e9veloppement d'un m\\u00e9dicament n\\u00e9cessite environ 1,8 milliard de dollars am\\u00e9ricains sur environ 13 ans. Nous nous concentrons dans cette th\\u00e8se sur des approches statistiques qui criblent virtuellement un grand ensemble de compos\\u00e9s chimique contre un grand nombre de prot\\u00e9ines. Leurs applications sont polyvalentes : elles permettent d\\u2019identifier des candidats m\\u00e9dicaments pour des cibles th\\u00e9rapeutiques connues, d\\u2019anticiper des effets secondaires potentiels, ou de proposer de nouvelles indications th\\u00e9rapeutiques pour des m\\u00e9dicaments connus. Cette th\\u00e8se est con\\u00e7ue selon deux cadres d'approches de criblage virtuel : les approches dans lesquelles les donn\\u00e9es sont d\\u00e9crites num\\u00e9riquement sur la base des connaissances des experts, et les approches bas\\u00e9es sur l'apprentissage automatique de la repr\\u00e9sentation num\\u00e9rique \\u00e0 partir du graphe mol\\u00e9culaire et de la s\\u00e9quence prot\\u00e9ique. Nous discutons ces approches et les appliquons pour guider la d\\u00e9couverte de m\\u00e9dicaments.\",\"published_in\":\"ABES\",\"year\":\"2019-07-17\",\"url\":\"http:\\/\\/www.theses.fr\\/2019PSLEM010\\/document\",\"readers\":0,\"subject\":\"Criblage virtuel de m\\u00e9dicament; Bio-informatique; Apprentissage statistique; Drug virtual screening; Bioinformatics; Machine learning; 570.15\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/20.500.11794\\/34944\",\"title\":\"Inferring phenotypes from genotypes with machine learning : an application to the global problem of antibiotic resistance\",\"authors\":\"Drouin, Alexandre\",\"paper_abstract\":\"Tableau d\\u2019honneur de la Facult\\u00e9 des \\u00e9tudes sup\\u00e9rieures et postdoctorales, 2018-2019\",\"published_in\":\"Universit\\u00e9 Laval\",\"year\":\"2019-05-24\",\"url\":\"http:\\/\\/hdl.handle.net\\/20.500.11794\\/34944\",\"readers\":0,\"subject\":\"QA 76.05 UL 2019; Ph\\u00e9notypes; G\\u00e9notypes; Apprentissage automatique; Algorithmes d'apprentissage\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.b5iz8l\",\"title\":\"Machine learning for predicting psychotic relapse at 2\\u202fyears in schizophrenia in the national FACE-SZ cohort\",\"authors\":\"Fond, G.; Bulzacka, E.; Boucekine, M.; Sch\\u00fcrhoff, F.; Berna, F.; Godin, O.; Aouizerate, B.; Capdevielle, D.; Chereau, I.; d'Amato, T.; Dubertret, C.; Dubreucq, J.; Faget, C.; Leignier, S.; Lan\\u00e7on, C.; Mallet, J.; Misdrahi, D.; Passerieux, C.; Rey, R.; Schandrin, A.; Urbach, M.; Vidailhet, P.; Leboyer, M.; Boyer, L.; Llorca, M.\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2020-02-06\",\"url\":\"https:\\/\\/hal.uca.fr\\/hal-02094189\",\"readers\":0,\"subject\":\"[SCCO.NEUR]Cognitive science\\/Neuroscience; [SDV.NEU.NB]Life Sciences [q-bio]\\/Neurons and Cognition [q-bio.NC]\\/Neurobiology; [SDV.NEU.PC]Life Sciences [q-bio]\\/Neurons and Cognition [q-bio.NC]\\/Psychology and behavior; [SDV.NEU.SC]Life Sciences [q-bio]\\/Neurons and Cognition [q-bio.NC]\\/Cognitive Sciences\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.iqnl7v\",\"title\":\"Motorized individual mobility in commuting trips: modal preference or constrained mode choice? A machine learning approach\",\"authors\":\"Le Boennec, R\\u00e9my; Hadj Selem, Fouad; Khodabandelou, Ghazaleh\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2019-11-21\",\"url\":\"https:\\/\\/halshs.archives-ouvertes.fr\\/halshs-02288207\",\"readers\":0,\"subject\":\"Modal shift; Artificial intelligence; Mobility flow inference; Commuting trip; Mode choice; [SHS.ECO]Humanities and Social Sciences\\/Economics and Finance; [INFO]Computer Science [cs]; [INFO.INFO-AI]Computer Science [cs]\\/Artificial Intelligence [cs.AI]\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.sjsygu\",\"title\":\"Probabilistic Fondations of Econometrics, part 4\",\"authors\":\"Charpentier, Arthur\",\"paper_abstract\":\"This post is the fourth one of our series on the history and foundations of econometric and machine learning models. Part 3 is online here. Goodness of Fit, and Model In the Gaussian linear model, the determination coefficient - noted [latex]R^2[\\/latex] - is often used as a measure of fit quality. It is based on the variance decomposition formula [latex display=\\\"true\\\"]\\\\underbrace{\\\\frac{1}{n}\\\\sum_{i=1}^n (y_i-\\\\bar{y})^2}_{\\\\text{total variance}}=\\\\underbrace{\\\\frac{1}{n}\\\\sum_{i=1}^n (y_i-\\\\widehat...\",\"published_in\":\"OpenEdition\",\"year\":\"2019-04-02\",\"url\":\"http:\\/\\/freakonometrics.hypotheses.org\\/57703\",\"readers\":0,\"subject\":\"\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.ikxpbg\",\"title\":\"Contribution au d\\u00e9veloppement de l\\u2019apprentissage profond dans les syst\\u00e8mes distribu\\u00e9s\",\"authors\":\"Hardy, Corentin\",\"paper_abstract\":\"L'apprentissage profond permet de d\\u00e9velopper un nombre de services de plus en plus important. Il n\\u00e9cessite cependant de grandes bases de donn\\u00e9es d'apprentissage et beaucoup de puissance de calcul. Afin de r\\u00e9duire les co\\u00fbts de cet apprentissage profond, nous proposons la mise en \\u0153uvre d'un apprentissage collaboratif. Les futures utilisateurs des services permis par l'apprentissage profond peuvent ainsi participer \\u00e0 celui-ci en mettant \\u00e0 disposition leurs machines ainsi que leurs donn\\u00e9es sans d\\u00e9placer ces derni\\u00e8res sur le cloud. Nous proposons diff\\u00e9rentes m\\u00e9thodes afin d'apprendre des r\\u00e9seaux de neurones profonds dans ce contexte de syst\\u00e8me distribu\\u00e9.\",\"published_in\":\"ABES\",\"year\":\"2019-09-12\",\"url\":\"http:\\/\\/www.theses.fr\\/2019REN1S020\\/document\",\"readers\":0,\"subject\":\"R\\u00e9seaux de neurones profonds; Apprentissage automatique; Calcul distribu\\u00e9; Deep neural networks; Machine learning; Distributed computing; \",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.o1kjua\",\"title\":\"Intelligence artificielle, apprentissage machine et explicabilit\\u00e9 des mod\\u00e8les\",\"authors\":\"Varenne, Franck\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2020-01-21\",\"url\":\"https:\\/\\/hal.archives-ouvertes.fr\\/hal-02327227\",\"readers\":0,\"subject\":\"intelligence artificielle; explicabilit\\u00e9; apprentissage machine; interpr\\u00e9tabilit\\u00e9; \\u00e9pist\\u00e9mologie des mod\\u00e8les; mod\\u00e8les pr\\u00e9dictifs; mod\\u00e8les explicatifs; [INFO.INFO-AI]Computer Science [cs]\\/Artificial Intelligence [cs.AI]; [INFO.INFO-LG]Computer Science [cs]\\/Machine Learning [cs.LG]; [SHS.PHIL]Humanities and Social Sciences\\/Philosophy; [SHS.HISPHILSO]Humanities and Social Sciences\\/History, Philosophy and Sociology of Sciences; [INFO.INFO-MO]Computer Science [cs]\\/Modeling and Simulation\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/10251\\/124236\",\"title\":\"DEVELOPMENT OF AN ARTIFICIAL INTELLIGENCE IMPLEMENTATION PLAN FOR THE SPANISH PUBLIC SECTOR\",\"authors\":\"Murgui Herrero, Miguel Guillermo\",\"paper_abstract\":\"[EN] The present work proposes an AI implementation plan for the Spanish Public Sector as a guideline for accomplishing three main goals: (1) to acquire a better understanding of the AI framework in the context of the Public Administration, (2) to identify the principal actors of this process of radical transformation that all administrations will have to face in the near future, and (3) to provide a feasible prioritization strategy for determining which AI systems should be applied first, and in which particular areas of the Public Sector. The first step for developing this strategy has been to conduct a study of the current situation of AI in both scenarios Europe and Spain. Secondly, for determining the priorities among the AI systems subjected to study -Machine Learning, Deep Learning, Bid Data analytics, Natural Language Processing, Computer Vision, Content Generation and Reinforcement Learning-, a quantitative analysis based on the PACE prioritization matrix has been carried out. Finally, the order of public areas to approach with the implementation of AI systems -Education, Healthcare, Smart cities, Citizen-Public Administration interaction and Security-, has been based on a qualitative analysis of the current situation of the considered public areas.\",\"published_in\":\"Universitat Polit\\u00e8cnica de Val\\u00e8ncia\",\"year\":\"2019-07-26\",\"url\":\"http:\\/\\/hdl.handle.net\\/10251\\/124236\",\"readers\":0,\"subject\":\"Citizen participation; Process improvement; Public processes; Smart government; Citizens; Public government; e-government processes; Public administration; Public sector; Artificial intelligence; Sector p\\u00fablico; Gobierno p\\u00fablico; Inteligencia artificial; Administraci\\u00f3n p\\u00fablica; Ciudadanos; Procesos electr\\u00f3nicos gubernamentales; Procesos p\\u00fablicos; Mejora de procesos; Participaci\\u00f3n ciudadana; Gobierno inteligente; ORGANIZACION DE EMPRESAS; M\\u00e1ster Universitario en Gesti\\u00f3n de Empresas, Productos y Servicios-M\\u00e0ster Universitari en Gesti\\u00f3 d'Empreses, Productes i Serveis\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.emo6ff\",\"title\":\"Goal-oriented dialogue systems : state-of-the-art and future works\",\"authors\":\"Schaub, L\\u00e9on-Paul; Vaudapiviz, Cyndel\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2020-02-10\",\"url\":\"https:\\/\\/hal.archives-ouvertes.fr\\/hal-02180287\",\"readers\":0,\"subject\":\"Human-machine dialogue; adversarial learning; goal-oriented systems; memory model; reinforcement learning; dialog systems; [INFO.INFO-TT]Computer Science [cs]\\/Document and Text Processing; [INFO.INFO-AI]Computer Science [cs]\\/Artificial Intelligence [cs.AI]; [SCCO.COMP]Cognitive science\\/Computer science; [SCCO.LING]Cognitive science\\/Linguistics; [SCCO.NEUR]Cognitive science\\/Neuroscience; [INFO.INFO-HC]Computer Science [cs]\\/Human-Computer Interaction [cs.HC]\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.6fxo42\",\"title\":\"A Machine Learning Approach to Study the Relationship between Features of the Urban Environment and Street Value\",\"authors\":\"Venerandi, Alessandro; Fusco, Giovanni; Tettamanzi, Andrea G. B.; Emsellem, David\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2020-01-14\",\"url\":\"https:\\/\\/hal.inria.fr\\/hal-02319539\",\"readers\":0,\"subject\":\"French Riviera; Ensemble method; Urban environment; Street value; Machine learning; [INFO.INFO-AI]Computer Science [cs]\\/Artificial Intelligence [cs.AI]; [SHS.GEO]Humanities and Social Sciences\\/Geography\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.bt9f1h\",\"title\":\"'Whatever it Takes' to Change Belief: Evidence from Twitter\",\"authors\":\"Stiefel, Michael; Viv\\u00e8s, R\\u00e9mi\",\"paper_abstract\":\"The sovereign debt literature emphasizes the possibility of avoiding a self-fulfilling default crisis if markets anticipate the central bank to act as lender of last resort. This paper investigates the extent to which changes in belief about an intervention of the European Central Bank (ECB) explain the sudden reduction of government bond spreads for the distressed countries in summer 2012. We study Twitter data and extract belief using machine learning techniques. We find evidence of strong increases in the perceived likelihood of ECB intervention and show that those increases explain subsequent decreases in the bond spreads of the distressed countries.\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2020-01-23\",\"url\":\"https:\\/\\/halshs.archives-ouvertes.fr\\/halshs-02053429\",\"readers\":0,\"subject\":\"Twitter data; unconventional monetary policy; self-fulfilling default crisis; JEL: E - Macroeconomics and Monetary Economics\\/E.E4 - Money and Interest Rates\\/E.E4.E44 - Financial Markets and the Macroeconomy; JEL: E - Macroeconomics and Monetary Economics\\/E.E5 - Monetary Policy, Central Banking, and the Supply of Money and Credit\\/E.E5.E58 - Central Banks and Their Policies; JEL: D - Microeconomics\\/D.D8 - Information, Knowledge, and Uncertainty\\/D.D8.D83 - Search \\u2022 Learning \\u2022 Information and Knowledge \\u2022 Communication \\u2022 Belief \\u2022 Unawareness; JEL: F - International Economics\\/F.F3 - International Finance\\/F.F3.F34 - International Lending and Debt Problems; [SHS.ECO]Humanities and Social Sciences\\/Economics and Finance\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.i8fbmj\",\"title\":\"Exploring dynamic hamiltonian Monte Carlo for bayesian neural networks\",\"authors\":\"Lau, Yiu Sing\",\"paper_abstract\":\"Neural network models have seen tremendous success in predictive tasks in machine learning and artificial intelligence, with some attributing their success to implicit use of Bayesian inference. Stan is a state-of-the-art software for Bayesian statistical computing used mainly in the statistical community, however, it is not optimized for use with neural network models. In this thesis, we replicated much of Stan's No U-Turn sampler in PyTorch and explored its use for sampling from Bayesian neural network models. We were able to explore different samplers, model structures and their sampling and predictive performances on a benchmark classification task. We found that Bayesian inference gives more robust predictive performance compared to their frequentist counterparts in general, but care is needed with the choice of prior and the MCMC sampler.\",\"published_in\":\"McGill Library\",\"year\":\"2019-03-29\",\"url\":\"http:\\/\\/digitool.Library.McGill.CA:80\\/R\\/?func=dbin-jump-full&object_id=163435\",\"readers\":0,\"subject\":\"Mathematics and Statistics\",\"oa_state\":2,\"link\":\"\"},{\"id\":\"10670\\/1.7pxxly\",\"title\":\"The Next Generation Cognitive Security Operations Center: Adaptive Analytic Lambda Architecture for Efficient Defense against Adversarial Attacks\",\"authors\":\"Demertzis, Konstantinos; Tziritas, Nikos; Kikiras, Panayiotis; Sanchez, Salvador Llopis; Iliadis, Lazaros\",\"paper_abstract\":\"International audience\",\"published_in\":\"Centre pour la communication scientifique directe\",\"year\":\"2019-08-09\",\"url\":\"https:\\/\\/hal.archives-ouvertes.fr\\/hal-02157104\",\"readers\":0,\"subject\":\"lambda architecture; cognitive cybersecurity intelligence; network flow forensics; adversarial attacks; malware traffic analysis; security operations center; [INFO]Computer Science [cs]; [SCCO]Cognitive science\",\"oa_state\":2,\"link\":\"\"}]", "text": "[{\"id\":\"10670\\/1.cdg5dl\",\"content\":\"Big Data and Machine Learning in Quantitative Investment. International audience\"},{\"id\":\"10670\\/1.zb9ibf\",\"content\":\"Sequence-to-sequence learning for machine translation and automatic differentiation for machine learning software tools. \"},{\"id\":\"10670\\/1.favjnn\",\"content\":\"Rumour Veracity Estimation with Deep Learning for Twitter. Part 4: Security, Privacy, Ethics and Misinformation\"},{\"id\":\"10670\\/1.bm7sxk\",\"content\":\"Automated Machine Learning : Methods, Systems, Challenges. This open access book presents the first comprehensive overview of general methods in Automated Machine Learning (AutoML), collects descriptions of existing systems based on these methods, and discusses the first series of international challenges of AutoML systems. The recent success of commercial ML applications and the rapid growth of the field has created a high demand for off-the-shelf ML methods that can be used easily and without expert knowledge. However, many of the recent machine learning successes crucially rely on human experts, who manually select appropriate ML architectures (deep learning architectures or more traditional ML workflows) and their hyperparameters. To overcome this problem, the field of AutoML targets a progressive automation of machine learning, based on principles from optimization and machine learning itself. This book serves as a point of entry into this quickly-developing field for researchers and advanced students alike, as well as providing a reference for practitioners aiming to use AutoML in their work.\"},{\"id\":\"10670\\/1.qpk049\",\"content\":\"Machine Learning for Cyber Physical Systems : Selected papers from the International Conference ML4CPS 2018. This Open Access proceedings presents new approaches to Machine Learning for Cyber Physical Systems, experiences and visions. It contains some selected papers from the international Conference ML4CPS \\u2013 Machine Learning for Cyber Physical Systems, which was held in Karlsruhe, October 23-24, 2018. Cyber Physical Systems are characterized by their ability to adapt and to learn: They analyze their environment and, based on observations, they learn patterns, correlations and predictive models. Typical applications are condition monitoring, predictive maintenance, image processing and diagnosis. Machine Learning is the key technology for these developments.\"},{\"id\":\"10670\\/1.fc349p\",\"content\":\"Fondations of Machine Learning, part 3. This post is the seventh one of our series on the history and foundations of econometric and machine learning models. The first fours were on econometrics techniques. Part 6 is online here. Boosting and sequential learning As we have seen before, modelling here is based on solving an optimization problem, and solving the problem described by equation [latex](6) [\\/latex] is all the more complex because the functional space [latex]\\\\mathcal{M}[\\/latex] is large. The idea of boosting, as introduce...\"},{\"id\":\"10670\\/1.pkmp66\",\"content\":\"Fondations of Machine Learning, part 2. This post is the sixth one of our series on the history and foundations of econometric and machine learning models. The first fours were on econometrics techniques. Part 5 is online here. The probabilistic formalism in the 80\\u2019s We have a training sample, with observations [latex](\\\\mathbf{x}_i,y_i)[\\/latex] where the variables [latex]y[\\/latex] are in a set [latex]\\\\mathcal{Y}[\\/latex]. In the case of classification, [latex]\\\\mathcal{Y}=\\\\{-1,+1\\\\}[\\/latex], but a relatively general set can be conside...\"},{\"id\":\"10670\\/1.9cfd2p\",\"content\":\"References on Econometrics and Machine Learning. In our series of posts on the history and foundations of econometric and machine learning models, a lot of references where given. Here they are. Ahamada, I. & E. Flachaire (2011). Non-Parametric Econometrics. Oxford University Press. Aigner, D., Lovell, C.A.J & Schmidt, P. (1977). Formulation and estimation of stochastic frontier production function models. Journal of Econometrics, 6, 21\\u201337. Aldrich, J. (2010). The Econometricians\\u2019 Statisticians, 1895-1945. History of Political Economy, 42...\"},{\"id\":\"10670\\/1.y96bc7\",\"content\":\"Fondations of Machine Learning, part 5. This post is the nineth (and probably last) one of our series on the history and foundations of econometric and machine learning models. The first fours were on econometrics techniques. Part 8 is online here. Optimization and algorithmic aspects In econometrics, (numerical) optimization became omnipresent as soon as we left the Gaussian model. We briefly mentioned it in the section on the exponential family, and the use of the Fisher score (gradient descent) to solve the first order condition...\"},{\"id\":\"10670\\/1.b09jss\",\"content\":\"Fondations of Machine Learning, part 1. This post is the fifth one of our series on the history and foundations of econometric and machine learning models. The first fours were on econometrics techniques. Part 4 is online here. In parallel with these tools developed by, and for economists, a whole literature has been developed on similar issues, centered on the problems of prediction and forecasting. For Breiman (2001a), a first difference comes from the fact that the statistic has developed around the principle of inference (or to...\"},{\"id\":\"10670\\/1.ei13yx\",\"content\":\"Fondations of Machine Learning, part 4. This post is the eighth one of our series on the history and foundations of econometric and machine learning models. The first fours were on econometrics techniques. Part 7 is online here. Penalization and variables selection One important concept in econometrics is Ockham's razor \\u2013 also known as the law of parsimony (lex parsimoniae) \\u2013 which can be related to abductive reasoning. Akaike's criterion was based on a penalty of likelihood taking into account the complexity of the model (the numb...\"},{\"id\":\"10670\\/1.ki26w9\",\"content\":\"Machine learning methods for genotype assignment. Invasive species are an ongoing concern for countries in which natural resources play a vital economic and social role. In Canada, species such as the Asian long-horned beetle, Dutch elm disease, sudden oak death and the Asian gypsy moth threaten forests and the sectors of industry that profit from them. The economic risk is estimated at up to $800M annually. Machine learning methods that quickly and accurately determine the taxon, geographic origin, and pathogenic fitness of biological samples from genomics data would constitute a valuable tool for risk reduction. In this thesis, we reviewed concepts of population genetics, phylogenetic networks, genotype data and current methods for genetic population assignment. Having identified a number of the shortcomings of current methods, we propose a new machine learning approach called Mycorrhiza aimed at predicting the geographical origin of a sample from its genotype in which phylogenetic networks are used as feature engineering tools, followed by a Random Forests classifier. The classification accuracy of our method was compared to widely used assessment tests or mixture analysis methods in population genetics such as STRUCTURE and Admixture, as well as a variant where a PCA is used in place of the phylogenetic network. Multiple published SNP, microsatellite or consensus sequence datasets with wide ranges in size, geographical distribution and populations were used for this purpose. The phylogenetic network and PCA methods show a marked improvement in classification accuracy and definable advantages compared to the existing approaches. As is to be expected, STRUCTURE and Admixture fall short on almost all datasets with a considerable deviation from the Hardy Weinberg equilibrium. The same can be said for Admixture on datasets with a large expected heterozygosity. Moreover, Mycorrhiza consistently estimates mixture proportions more accurately than the PCA variant. Our approach will be useful in the rapid and accurate prediction of geographical origin from genotype samples without the restrictions inherent to currently used methods.\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/10251\\/127574\",\"content\":\"M\\u00e9todos de machine learning en estudios biom\\u00e9dicos.. [ES] El desarrollo de las tecnolog\\u00edas de alto rendimiento en biolog\\u00eda molecular e imagen m\\u00e9dica ha permitido el acceso a grandes cantidades de informaci\\u00f3n de diverso tipo, lo que ahora se conoce como big data. Dicha informaci\\u00f3n es de tal complejidad que resulta muy dif\\u00edcil el poder extraer conclusiones fiables y \\u00fatiles de ella. Se requiere la utilizaci\\u00f3n de m\\u00e9todos de estad\\u00edstica multivariante y un gran poder de computaci\\u00f3n para vislumbrar los patrones, modelos o normas que siguen los datos. En este contexto nace el machine learning o aprendizaje autom\\u00e1tico, una disciplina que fusiona m\\u00e9todos estad\\u00edsticos con inform\\u00e1tica para elaborar algoritmos capaces de clasificar muestras, predecir resultados y realizar inferencias en base a la informaci\\u00f3n que se les proporciona previamente como entrenamiento. Estos m\\u00e9todos aplicados a la biomedicina pueden extraer el sentido de datos de gen\\u00f3mica, transcript\\u00f3mica, imagen m\\u00e9dica, entre otros, lo cual permitir\\u00eda el avance de la medicina a una forma m\\u00e1s personalizada, precisa y efectiva de atenci\\u00f3n m\\u00e9dica. En este trabajo se aplican tres de los modelos de aprendizaje autom\\u00e1tico m\\u00e1s populares en el contexto de la clasificaci\\u00f3n: k-vecinos m\\u00e1s pr\\u00f3ximos, m\\u00e1quinas de soporte vectorial y bosques aleatorios. Los datos utilizados provienen de la extracci\\u00f3n de caracter\\u00edsticas radi\\u00f3micas de imagen m\\u00e9dica y la extracci\\u00f3n de caracter\\u00edsticas morfol\\u00f3gicas de n\\u00facleos celulares. El objetivo es evaluar el desempe\\u00f1o de estos modelos sobre informaci\\u00f3n potencialmente relevante en la cl\\u00ednica. En primer lugar, se hizo un an\\u00e1lisis exploratorio de los datos consistente en el an\\u00e1lisis de componentes principales y an\\u00e1lisis de agrupamiento. El cuerpo principal del trabajo consta de seis pasos: procesado de los datos, estandarizaci\\u00f3n, partici\\u00f3n de los datos, selecci\\u00f3n de caracter\\u00edsticas, entrenamiento y validaci\\u00f3n. El procesado consisti\\u00f3 en la eliminaci\\u00f3n de todas aquellas muestras y variables que por alg\\u00fan motivo no eran adecuadas para su inclusi\\u00f3n en an\\u00e1lisis posteriores. A continuaci\\u00f3n, se transformaron los datos por centrado y escalado. Los datos se dividieron en dos subconjuntos, de los cuales uno sirvi\\u00f3 para el entrenamiento y otro para la validaci\\u00f3n. Durante la selecci\\u00f3n de caracter\\u00edsticas se redujo todav\\u00eda m\\u00e1s el n\\u00famero de variables a tener en cuenta para los modelos hasta tener solo aquellas m\\u00e1s relevantes. Se entrenaron los modelos y se realizaron predicciones sobre las observaciones que no se usaron en el entrenamiento. Con los resultados obtenidos de las predicciones, se calcularon y analizaron m\\u00e9tricas de precisi\\u00f3n. Los resultados obtenidos revelan que la calidad y abundancia de los datos es fundamental para el desarrollo de un buen modelo predictivo. Diferentes modelos pueden ser perfectamente funcionales para un mismo problema de clasificaci\\u00f3n. Los an\\u00e1lisis demuestran una clara relaci\\u00f3n entre algunas de las caracter\\u00edsticas y el resultado cl\\u00ednico.\"},{\"id\":\"10670\\/1.oapbao\",\"content\":\"Text analysis, data quality and machine learning. International audience\"},{\"id\":\"10670\\/1.l0zng0\",\"content\":\"Machine learning for end-users: exploring learning goals and pedagogical content knowledge. Machine learning (ML) has rapidly become a sought after skill across numerous domains and sectors. Indeed, the need for those who understand ML is rising faster than can be met. This is reflected in the current state of ML education research, where there is little empirical work, and is mostly on courseware and about computer science students. To expand this research area, this thesis focuses on how to teach ML, specifically to adults with a non-technical background. We conducted ten interviews with university instructors to elicit pedagogical content knowledge (PCK) -- an instructor's knowledge on how to teach a particular topic. In the qualitative analysis, we identified aspects of PCK, such as student preconceptions, student barriers, and pedagogical tactics adopted by instructors. These included students overestimating the abilities of ML, student difficulty with math, and instructors strategically choosing instructional datasets. Furthermore, specific learning goals emerged in our analysis. We classified them into the Structure of Observed Learning Outcomes (SOLO) learning taxonomy, wherein a pattern emerged. Learning goals described as easy to teach, such as tracing algorithms, were consistent with lower levels of the SOLO Taxonomy. Learning goals described as hard to teach, such as evaluating and tuning models, were consistent with higher levels of the SOLO taxonomy. This indicates that the algorithms themselves are not the difficult part about learning ML. Our PCK findings were then organized into this taxonomy to present a list of student barriers and teaching strategies related to each SOLO stage. This organization provides a useful guide for supporting student development through better anticipating student difficulty and providing pertinent pedagogical tactics. We conclude with practical uses for educators today through critical course design questions and several promising directions for innovation in teaching ML, including visualization tools and end-user programming.\"},{\"id\":\"10670\\/1.bscrqj\",\"content\":\"Probabilistic Fondations of Econometrics, part 1. In a series of posts, I wanted to get into details of the history and foundations of econometric and machine learning models. I will be some sort of online version of our joint paper with Emmanuel Flachaire and Antoine Ly, Econometrics and Machine Learning (initially writen in French), that will actually appear soon in the journal Economics and Statistics. This is the first one... The importance of probabilistic models in economics is rooted in Working's (1927) questions and the attempts to ...\"},{\"id\":\"10670\\/1.ncmac2\",\"content\":\"Machine \\u00e0 vecteurs de support hyperbolique et ing\\u00e9nierie du noyau. La th\\u00e9orie statistique de l\\u2019apprentissage est un domaine de la statistique inf\\u00e9rentielle dont les fondements ont \\u00e9t\\u00e9 pos\\u00e9s par Vapnik \\u00e0 la fin des ann\\u00e9es 60. Il est consid\\u00e9r\\u00e9 comme un sous-domaine de l\\u2019intelligence artificielle. Dans l\\u2019apprentissage automatique, les machines \\u00e0 vecteurs de support (SVM) sont un ensemble de techniques d\\u2019apprentissage supervis\\u00e9 destin\\u00e9es \\u00e0 r\\u00e9soudre des probl\\u00e8mes de discrimination et de r\\u00e9gression. Dans cette th\\u00e8se, notre objectif est de proposer deux nouveaux probl\\u00e8mes d\\u2019aprentissagestatistique: Un portant sur la conception et l\\u2019\\u00e9valuation d\\u2019une extension des SVM multiclasses et un autre sur la conception d\\u2019un nouveau noyau pour les machines \\u00e0 vecteurs de support. Dans un premier temps, nous avons introduit une nouvelle machine \\u00e0 noyau pour la reconnaissance de mod\\u00e8le multi-classe: la machine \\u00e0 vecteur de support hyperbolique. G\\u00e9ometriquement, il est caract\\u00e9ris\\u00e9 par le fait que ses surfaces de d\\u00e9cision dans l\\u2019espace de redescription sont d\\u00e9finies par des fonctions hyperboliques. Nous avons ensuite \\u00e9tabli ses principales propri\\u00e9t\\u00e9s statistiques. Parmi ces propri\\u00e9t\\u00e9s nous avons montr\\u00e9 que les classes de fonctions composantes sont des classes de Glivenko-Cantelli uniforme, ceci en \\u00e9tablissant un majorant de la complexit\\u00e9 de Rademacher. Enfin, nous \\u00e9tablissons un risque garanti pour notre classifieur.Dans un second temps, nous avons cr\\u00e9er un nouveau noyau s\\u2019appuyant sur la transformation de Fourier d\\u2019un mod\\u00e8le de m\\u00e9lange gaussien. Nous proc\\u00e9dons de la mani\\u00e8re suivante: d\\u2019abord, chaque classe est fragment\\u00e9e en un nombre de sous-classes pertinentes, ensuite on consid\\u00e8re les directions donn\\u00e9es par les vecteurs obtenus en prenant toutes les paires de centres de sous-classes d\\u2019une m\\u00eame classe. Parmi celles-ci, sont exclues celles permettant de connecter deux sous-classes de deux classes diff\\u00e9rentes. On peut aussi voir cela comme la recherche d\\u2019invariance par translation dans chaque classe. Nous l\\u2019avons appliqu\\u00e9 avec succ\\u00e8s sur plusieurs jeux de donn\\u00e9es dans le contexte d\\u2019un apprentissage automatique utilisant des machines \\u00e0 vecteurs support multi-classes.\"},{\"id\":\"10670\\/1.jodv82\",\"content\":\"Data mining and machine learning for reverse engineering. Reverse engineering is fundamental for understanding the inner workings of new malware, exploring new vulnerabilities in existing systems, and identifying patent infringements in the distributed executables. It is the process of getting an in-depth understanding of a given binary executable without its corresponding source code. Reverse engineering is a manually intensive and time-consuming process that relies on a thorough understanding of the full development stack from hardware to applications. It requires a much steeper learning curve than programming. Given the unprecedentedly vast amount of data to be analyzed and the significance of reverse engineering, the overall question that drives the studies in this thesis is how can data mining and machine learning technologies make cybersecurity practitioners more productive to uncover the provenance, understand the intention, and discover the issues behind the data in a scalable way. In this thesis, I focus on two data-driven solutions to help reverse engineers analyzing binary data: assembly clone search and behavioral summarization. Assembly code clone search is emerging as an Information Retrieval (IR) technique that helps address security problems. It has been used for differing binaries to locate the changed parts, identifying known library functions such as encryption, searching for known programming bugs or zero-day vulnerabilities in existing software or Internet of Things (IoT) devices firmware, as well as detecting software plagiarism or GNU license infringements when the source code is unavailable. However, designing an effective search engine is difficult, due to varieties of compiler optimization and obfuscation techniques that make logically similar assembly functions appear to be dramatically different. By working closely with reverse engineers, I identify three different scenarios of reverse engineering and develop novel data mining and machine learning models for assembly clone search to address the respective challenges. By developing an intelligent assembly clone search platform, I optimize the process of reverse engineering by addressing the information needs of reverse engineers. Experimental results suggest that Kam1n0 is accurate, efficient, and scalable for handling a large volume of data.The second part of the thesis goes beyond optimizing an information retrieval process for reverse engineering. I propose to automatically and statically characterize the behaviors of a given binary executable. Behavioral indicators denote those potentially high-risk malicious behaviors exhibited by malware, such as unintended network communications, file encryption, keystroke logging, abnormal registry modifications, sandbox evasion, and camera manipulation. I design a novel neural network architecture that models the different aspects of an executable. It is able to predict over 139 suspicious and malicious behavioral indicators, without running the executable. The resulting system can be used as an additional binary analytic layer to mitigate the issues of polymorphism, metamorphism, and evasive techniques. It also provides another behavioral abstraction of malware to security analysts and reverse engineers. Therefore, it can reduce the data to be manually analyzed, and the reverse engineers can focus on the binaries that are of their interest. In summary, this thesis presents four original research projects that not only advance the knowledge in reverse engineering and data mining, but also contribute to the overall safety of our cyber world by providing open-source award-winning binary analysis systems that empower cybersecurity practitioners.\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/10251\\/127856\",\"content\":\"Dise\\u00f1o de algoritmos de clasificaci\\u00f3n de cultivos mediante t\\u00e9cnicas de Machine Learning. [ES] Dise\\u00f1o de algoritmos de clasificaci\\u00f3n de cultivos usando t\\u00e9cnicas de Machine Learning en entorno MATLAB. Para ello se usar\\u00e1n diferentes m\\u00e9todos como Random Forest o SVM (Support Vector Machine). Los datos de entrada ser\\u00e1n medidas recogidas en el \\u00e1rea piloto de Barrax (Albacete) donde se recogen datos del tipo de cultivo asociado a diferentes coordenadas. La mitad de las muestras se usar\\u00e1n para el aprendizaje y la otra mitad para validar. Para el aprendizaje, las coordenadas seleccionadas se localizar\\u00e1n en im\\u00e1genes de sat\\u00e9lite Sentinel-2 para extraer los datos en las diferentes bandas espectrales que ofrece el sat\\u00e9lite.\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/10251\\/124351\",\"content\":\"Machine Learning for Glaucoma Assessment using Fundus Images. [ES] Las im\\u00e1genes de fondo de ojo son muy utilizadas por los oftalm\\u00f3logos para la evaluaci\\u00f3n de la retina y la detecci\\u00f3n de glaucoma. Esta patolog\\u00eda es la segunda causa de ceguera en el mundo, seg\\u00fan estudios de la Organizaci\\u00f3n Mundial de la Salud (OMS). En esta tesis doctoral, se estudian algoritmos de aprendizaje autom\\u00e1tico (machine learning) para la evaluaci\\u00f3n autom\\u00e1tica del glaucoma usando im\\u00e1genes de fondo de ojo. En primer lugar, se proponen dos m\\u00e9todos para la segmentaci\\u00f3n autom\\u00e1tica. El primer m\\u00e9todo utiliza la transformaci\\u00f3n Watershed Estoc\\u00e1stica para segmentar la copa \\u00f3ptica y posteriormente medir caracter\\u00edsticas cl\\u00ednicas como la relaci\\u00f3n Copa\\/Disco y la regla ISNT. El segundo m\\u00e9todo es una arquitectura U-Net que se usa espec\\u00edficamente para la segmentaci\\u00f3n del disco \\u00f3ptico y la copa \\u00f3ptica. A continuaci\\u00f3n, se presentan sistemas autom\\u00e1ticos de evaluaci\\u00f3n del glaucoma basados en redes neuronales convolucionales (CNN por sus siglas en ingl\\u00e9s). En este enfoque se utilizan diferentes modelos entrenados en ImageNet como clasificadores autom\\u00e1ticos de glaucoma, usando fine-tuning. Esta nueva t\\u00e9cnica permite detectar el glaucoma sin segmentaci\\u00f3n previa o extracci\\u00f3n de caracter\\u00edsticas. Adem\\u00e1s, este enfoque presenta una mejora considerable del rendimiento comparado con otros trabajos del estado del arte. En tercer lugar, dada la dificultad de obtener grandes cantidades de im\\u00e1genes etiquetadas (glaucoma\\/no glaucoma), esta tesis tambi\\u00e9n aborda el problema de la s\\u00edntesis de im\\u00e1genes de la retina. En concreto se analizaron dos arquitecturas diferentes para la s\\u00edntesis de im\\u00e1genes, las arquitecturas Variational Autoencoder (VAE) y la Generative Adversarial Networks (GAN). Con estas arquitecturas se generaron im\\u00e1genes sint\\u00e9ticas que se analizaron cualitativa y cuantitativamente, obteniendo un rendimiento similar a otros trabajos en la literatura. Finalmente, en esta tesis se plantea la utilizaci\\u00f3n de un tipo de GAN (DCGAN) como alternativa a los sistemas autom\\u00e1ticos de evaluaci\\u00f3n del glaucoma presentados anteriormente. Para alcanzar este objetivo se implement\\u00f3 un algoritmo de aprendizaje semi-supervisado.\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/10251\\/127875\",\"content\":\"Clasificaci\\u00f3n ac\\u00fastica de salas mediante algoritmos de Machine Learning. [ES] El presente Trabajo Fin de Grado (TFG) tiene como objetivo construir un clasificador de salas mediante algoritmos de Machine Learning (ML). Para ello usar\\u00e1 ciertos par\\u00e1metros ac\\u00fasticos de la sala como son el tiempo de reverberaci\\u00f3n (T60) y el Direct-To-Reverberant Ratio (DRR) de la sala. En principio se pretende seguir el esquema expuesto en [Xiong2018] para la realizaci\\u00f3n del clasificador basado en ML. Posteriormente se probar\\u00e1n nuevas features no usadas en dicha referencia pero que tienen que ver con la percepci\\u00f3n sonora, como por ejemplo los coeficientes Mel Frequency Cepstral (MFC). Al finalizar el TFG, el estudiante habr\\u00e1 adquirido un conocimiento avanzado sobre el uso de algoritmos supervisados de machine learning y sobre la caracterizaci\\u00f3n ac\\u00fastica de salas. [Xiong2018] F. Xiong et al., Exploring Auditory-Inspired Acoustic Features for Room Acoustic Parameter Estimation From Monaural Speech , IEEE\\/ACM Trans. Audio, Speech, Lang. Process., vol. 26, no. 10, pp. 1809-1820, Oct. 2018.\"},{\"id\":\"10670\\/1.umcqr5\",\"content\":\"The Summer School is now over.... After a great (and long) week, the SIdE (Italian Econometric Association) Summer School, on Machine Learning Algorithms for Econometricians\\u00a0is now over...\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/10251\\/134687\",\"content\":\"Predictive analysis of urban waste generation for the city of Bogota , Colombia, through the implementation of decision trees-based machine learning, support vector machines and artifficial neural networks. [EN] This study presents an analysis of three models associated with artificial intelligence as tools to forecast the generation of urban solid waste in the city of Bogota, in order to learn about this type of waste's behavior. The analysis was carried out in such a manner that different efficient alternatives are presented. In this paper, a possible decision-making strategy was explored and implemented to plan and design technologies for the stages of collection, transport and final disposal of waste in cities, while taking into account their particular characteristics. The first model used to analyze data was the decision tree which employed machine learning as a non-parametric algorithm that models data separation limitations based on the learning decision rules on the input characteristics of the model. Support vector machines were the second method implemented as a forecasting model. The primary advantage of support vector machines is their proper adjustment to data despite its variable nature or when faced with problems with a small amount of training data. Lastly, recurrent neural network models to forecast data were implemented, which yielded positive results. Their architectural design is useful in exploring temporal correlations among the same. Distribution by collection zone in the city, socio-economic stratification, population, and quantity of solid waste generated in a determined period of time were factors considered in the analysis of this forecast. The results found that support vector machines are the most appropriate model for this type of analysis.\"},{\"id\":\"10670\\/1.pyirzt\",\"content\":\"Credit Risk Analysis using Machine and Deep Learning Models. International audience\"},{\"id\":\"10670\\/1.9dpgei\",\"content\":\"Unsupervised machine learning to analyse city logistics through Twitter. International audience\"},{\"id\":\"10670\\/1.dfcv7m\",\"content\":\"Investing Through Economic Cycles with Ensemble Machine Learning Algorithms. Ensemble machine learning algorithms (random forest and boosting) are applied to quickly and accurately detect economic turning points in the United States and in the Eurozone over the past three decades. The two key features of those algorithms are their abilities (i) to entertain a large number of predictors and (ii) to perform both variable selection and estimation simultaneously. The real-time ability to nowcast economic turning points is gauged by using investment strategies based on economic regimes induced by our models. When comparing predictive accuracy and profit measures, the model confidence set procedure is applied to avoid data snooping. We show that such investment strategies achieve impressive risk-adjusted returns: timing the market is thus possible.\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/10251\\/124001\",\"content\":\"Deep Teaching: Materials for Teaching Machine and Deep Learning. [EN] Machine learning (ML) is considered to be hard because it is relatively complicated in comparison to other topics of computer science. The reason is that machine learning is based heavily on mathematics and abstract concepts. This results in an entry barrier for students: Most students want to avoid such difficult topics in elective courses or self-study. In the project Deep.Teaching we address these issues: We motivate by selected applications and support courses as well as self-study by giving practical exercises for different topics in machine learning. The teaching material, provided as jupyter notebooks, consists of theoretical and programming sections. For didactical reasons, we designed programming exercises such that the students have to deeply understand the concepts and principles before they can start to implement a solution. We provide all necessary boilerplate code such that the students can primarily focus on the educational objectives of the exercises. We used different ways to give feedback for self-study: obscured solutions for mathematical results, software tests with assert statements, and graphical illustrations of sample solutions. All of the material is published under a permissive license. Developing jupyter notebooks collaboratively for educational purposes poses some problems. We address these issues and provide solutions\\/best practices.\"},{\"id\":\"10670\\/1.0ei7fv\",\"content\":\"Graph-based machine learning algorithms for predicting disease outcomes. Improving disease outcome prediction can greatly aid in the strategic deployment of secondary prevention approaches. We develop two methods to predict the evolution of diseases by taking into account personal attributes of the subjects and their relationships with medical examination results. Our approaches build upon a recent formulation of this problem as a graph-based geometric matrix completion task. The primary innovation is the introduction of multiple graphs, each relying on a different combination of subject attributes. Via statistical significance tests, we determine the relevant graph(s) for each medically-derived feature. In the first approach, we then employ a multiple-graph recurrent graph convolutional neural network architecture to predict the disease outcomes. In the second approach, we use a multiple-graph graph auto-encoder architecture to predict the disease outcomes. We demonstrate the efficacy of the two techniques by addressing the task of predicting the development of Alzheimer's disease for patients exhibiting mild cognitive impairment, showing that the incorporation of multiple graphs improves predictive capability. Moreover, in the second approach, the use of a graph auto-encoder also helps in increasing predictive capability.\"},{\"id\":\"10670\\/1.42i4dd\",\"content\":\"Prediction of CO 2 absorption by physical solvents using a chemoinformatics-based machine learning model. International audience\"},{\"id\":\"10670\\/1.n9glyt\",\"content\":\"Performing Deep Recurrent Double Q-Learning for Atari Games. International audience\"},{\"id\":\"10670\\/1.fxa0rv\",\"content\":\"Analyzing semantic trace links using network science and machine learning. Traceability is a useful tool for reasoning about many aspects of software by means of the connections between the artifacts that constitute the software. For example, stakeholders can use traceability to appraise the software quality along many different dimensions including regulatory compliance. The semantic content of these trace links or what they communicate to a stakeholder however, remains significantly unexplored. Additionally, issue tracking systems provide a wealth of data about traceability events and their context, particularly in the case of open source and agile software projects. In this work we investigate semantic traceability from two perspectives using trace links found in the issue trackers of open source software projects. Firstly, most of the state-of-the-art traceability solutions rely on a pair-wise comparison of artifact attributes. This approach ignores the characteristics that result from the entire connected structure of the trace links. To address this, we investigate the use of network science techniques on the issue trackers of sixty-six open source projects. From this perspective we show that two properties of networks, namely scale free degree distributions and triadic closure manifest in traceability networks. We then use these properties to showcase the high level reasoning that can be done by observing the traceability problem from a network perspective and in doing so, motivate the trace link semantic recovery problem. Secondly, we attempt to approach the trace link semantic recovery problem by using machine learning and the attributes of the issue trace links. We experiment with a number of models, tuning approaches and formulations of these attributes including the textual content of each issue, as well as other metadata items. We observe promising results from this task specification and show even higher performance when incorporating data both internal and external to the project's issue tracker.\"},{\"id\":\"10670\\/1.djjczz\",\"content\":\"On the prediction of mRNA subcellular localization with machine learning. Cells are the basic units of life, and yet they are regulated by many delicate and to some extent, fragile, subcellular processes that are crucial to their survival. A simple genetic mutation could possibly clog up some important regulatory processes, or perturb the function of the product it encodes, which might ultimately bring the demise of the entire system. Therefore, it is important to gain more insights into the many control processes of cell and the regulatory factors associated with them, one prominent example of which would be the mechanism related to the RNA subcellular localization that we would focus on almost exclusively in this study from a computational perspective.RNA subcellular localization mechanism is one of the most important, yet under-appreciated, facets of the broader gene regulatory process, which helps with the cellular organization and regulation on gene expression, via transporting the RNA transcripts to their designated locations where their function, structure or translated proteins are needed. It is generally accepted as a fact that RNA trafficking mechanism is mediated between the trans-regulatory factors such as the RNA binding proteins, and the cis-acting elements \\u2014 short snippets of the transcript that contain the RBP binding sites \\u2014 which we call zipcode as they are considered to contain information on its address of delivery.The release of new RNA subcellular localization dataset has enabled us to build the first computational tool using state-of-the-art deep learning techniques, to predict the localization outcome for the protein-coding RNA from mere transcript sequence, and subsequently to identify the zipcode elements thereof. Our proposed method has achieved good accuracy compared to the baseline methods based on the k-mers features, despite the intrinsic difficulty that arise from the complex and stochastic interactions during trafficking events, as well as the limitations imposed by the available dataset.\"},{\"id\":\"10670\\/1.zqg0hf\",\"content\":\"Fast and slow machine learning. L'\\u00e8re du Big Data a r\\u00e9volutionn\\u00e9 la mani\\u00e8re dont les donn\\u00e9es sont cr\\u00e9\\u00e9es et trait\\u00e9es. Dans ce contexte, de nombreux d\\u00e9fis se posent, compte tenu de la quantit\\u00e9 \\u00e9norme de donn\\u00e9es disponibles qui doivent \\u00eatre efficacement g\\u00e9r\\u00e9es et trait\\u00e9es afin d\\u2019extraire des connaissances. Cette th\\u00e8se explore la symbiose de l'apprentissage en mode batch et en flux, traditionnellement consid\\u00e9r\\u00e9s dans la litt\\u00e9rature comme antagonistes, sur le probl\\u00e8me de la classification \\u00e0 partir de flux de donn\\u00e9es en \\u00e9volution. L'apprentissage en mode batch est une approche bien \\u00e9tablie bas\\u00e9e sur une s\\u00e9quence finie: d'abord les donn\\u00e9es sont collect\\u00e9es, puis les mod\\u00e8les pr\\u00e9dictifs sont cr\\u00e9\\u00e9s, finalement le mod\\u00e8le est appliqu\\u00e9. Par contre, l\\u2019apprentissage par flux consid\\u00e8re les donn\\u00e9es comme infinies, rendant le probl\\u00e8me d\\u2019apprentissage comme une t\\u00e2che continue (sans fin). De plus, les flux de donn\\u00e9es peuvent \\u00e9voluer dans le temps, ce qui signifie que la relation entre les caract\\u00e9ristiques et la r\\u00e9ponse correspondante peut changer. Nous proposons un cadre syst\\u00e9matique pour pr\\u00e9voir le surendettement, un probl\\u00e8me du monde r\\u00e9el ayant des implications importantes dans la soci\\u00e9t\\u00e9 moderne. Les deux versions du m\\u00e9canisme d'alerte pr\\u00e9coce (batch et flux) surpassent les performances de base de la solution mise en \\u0153uvre par le Groupe BPCE, la deuxi\\u00e8me institution bancaire en France. De plus, nous introduisons une m\\u00e9thode d'imputation \\u00e9volutive bas\\u00e9e sur un mod\\u00e8le pour les donn\\u00e9es manquantes dans la classification. Cette m\\u00e9thode pr\\u00e9sente le probl\\u00e8me d'imputation sous la forme d'un ensemble de t\\u00e2ches de classification \\/ r\\u00e9gression r\\u00e9solues progressivement.Nous pr\\u00e9sentons un cadre unifi\\u00e9 qui sert de plate-forme d'apprentissage commune o\\u00f9 les m\\u00e9thodes de traitement par batch et par flux peuvent interagir de mani\\u00e8re positive. Nous montrons que les m\\u00e9thodes batch peuvent \\u00eatre efficacement form\\u00e9es sur le r\\u00e9glage du flux dans des conditions sp\\u00e9cifiques. Nous proposons \\u00e9galement une adaptation de l'Extreme Gradient Boosting algorithme aux flux de donn\\u00e9es en \\u00e9volution. La m\\u00e9thode adaptative propos\\u00e9e g\\u00e9n\\u00e8re et met \\u00e0 jour l'ensemble de mani\\u00e8re incr\\u00e9mentielle \\u00e0 l'aide de mini-lots de donn\\u00e9es. Enfin, nous pr\\u00e9sentons scikit-multiflow, un framework open source en Python qui comble le vide en Python pour une plate-forme de d\\u00e9veloppement\\/recherche pour l'apprentissage \\u00e0 partir de flux de donn\\u00e9es en \\u00e9volution.\"},{\"id\":\"10670\\/1.icgmw4\",\"content\":\"Machine learning methods and classification of vegetation in Brest, France. International audience\"},{\"id\":\"10670\\/1.v7aojz\",\"content\":\"Word Embeddings in Sentiment Analysis. In the late years sentiment analysis and its applications have reached growing popularity. Concerning this field of research, in the very late years machine learning and word representation learning derived from distributional semantics field (i.e. word embeddings) have proven to be very successful in performing sentiment analysis tasks. In this paper we describe a set of experiments, with the aim of evaluating the impact of word embedding-based features in sentiment analysis tasks.\"},{\"id\":\"10670\\/1.0ecnbf\",\"content\":\"Le dialogue homme-machine : Intelligence artificielle \\/ intelligence humaine\\u00a0: manipulation et \\u00e9valuation. Apr\\u00e8s un premier dossier \\u00ab\\u00a0Cerveau et apprentissages\\u00a0\\u00bb (n\\u00b0 428), suivi d\\u2019un deuxi\\u00e8me opus consacr\\u00e9 \\u00e0 la plasticit\\u00e9 du cerveau (n\\u00b0 431), Futuribles ouvre un troisi\\u00e8me volet dans la s\\u00e9rie \\u00ab\\u00a0Cerveau\\u00a0\\u00bb, portant cette fois sur les interactions homme \\/ machine(s) et sur l\\u2019impact des \\u00e9crans sur le d\\u00e9veloppement des jeunes. Sp\\u00e9cialiste des interactions homme \\/ machine, Laurence Devillers pr\\u00e9sente ici les enjeux inh\\u00e9rents au d\\u00e9veloppement des agents conversationnels et autres robots dot\\u00e9s d\\u2019intelligence artificielle, qui interagissent de plus en plus souvent avec les individus, dans diff\\u00e9rents contextes.Apr\\u00e8s avoir rappel\\u00e9 comment fonctionnent ces syst\\u00e8mes (auto)apprenants, elle insiste sur la vigilance n\\u00e9cessaire \\u00e0 l\\u2019\\u00e9gard de possibles manipulations des individus par ce type d\\u2019interfaces (par le biais des nudges, notamment, techniques d\\u2019incitation douce). Elle montre aussi comment sont utilis\\u00e9es les \\u00e9motions dans les interactions homme-machine (ressorts affectifs, humour\\u2026) et pr\\u00e9sente les outils dont on dispose aujourd\\u2019hui pour \\u00e9valuer l\\u2019intelligence artificielle, voire la comparer \\u00e0 celle des humains (en particulier le test de Turing et ses limites). Compte tenu des progr\\u00e8s rapides de l\\u2019apprentissage machine, Laurence Devillers appelle au d\\u00e9veloppement de nouveaux tests d\\u2019\\u00e9valuation des capacit\\u00e9s des machines, visant en particulier \\u00e0 surveiller leur facult\\u00e9 \\u00e0 manipuler les individus. Car si les progr\\u00e8s techniques sont exponentiels, la fa\\u00e7on dont on encadre leur application dans la soci\\u00e9t\\u00e9 et dans le monde r\\u00e9el ne rel\\u00e8ve, pour l\\u2019heure encore, que des citoyens\\u00a0: c\\u2019est aux individus de d\\u00e9terminer d\\u00e8s \\u00e0 pr\\u00e9sent les limites \\u00e9thiques, r\\u00e9glementaires\\u2026, qui doivent encadrer les interfaces homme-machine.\\u00a0S.D.\"},{\"id\":\"10670\\/1.rsjznk\",\"content\":\"Machine learning methods and classification of vegetation in Brest, France. International audience\"},{\"id\":\"10670\\/1.qp9pag\",\"content\":\"A machine learning approach to pattern discovery in symbolic music. The aim of a musical pattern discovery algorithm is to find all instances of repetition in a given input of music, allowing for a user-specifiable amount of variation between identified repetitions. Naive algorithms tend to return many identifications of repeated material that no human listener would readily identify as musically significant, rendering the results technically accurate but useless to the music analyst. Heuristics are often used to filter this set of patterns down to only those which are most \\\"significant\\\" for a given application. This thesis develops an alternative way to address this issue, by training machine learning-based classifiers on human annotations of repeated musical patterns with the goal of replicating the human annotator's judgment as to which patterns are significant. Three different classification methods are tested, based on k-nearest neighbor, k-nearest neighbor with genetic algorithm-optimized feature selection, and a feed-forward neural network. Only monophonic music in symbolic format is examined. Also discussed are previous approaches towards defining musical pattern significance, and the theoretical difficulties inherent to the task.\"},{\"id\":\"10670\\/1.3ei261\",\"content\":\"Large data sets and machine learning: Applications to statistical arbitrage. International audience\"},{\"id\":\"10670\\/1.89q8un\",\"content\":\"Metashape -Photoscan. Agisoft \\u00e0 mis \\u00e0 jour une nouvelle version de son logiciel de photogramm\\u00e9trie Photoscan renomm\\u00e9\\u00a0Metashape. De nouvelles \\u00e9volutions sont disponibles: Acc\\u00e9l\\u00e9ration du calcul par GPU Syst\\u00e8me de classification de donn\\u00e9e par machine learning Autres ....\"},{\"id\":\"10670\\/1.ks30rm\",\"content\":\"Le printemps des machines. L\\u2019intelligence artificielle a subi une s\\u00e9rie de phases d\\u2019hibernation, dont l\\u2019une des plus importantes a fait suite aux pr\\u00e9dictions pessimistes de Marvin Minsky quant \\u00e0 la capacit\\u00e9 des r\\u00e9seaux de neurones artificiels \\u00e0 rivaliser avec des m\\u00e9thodes algorithmiques. Mais les succ\\u00e8s r\\u00e9cents du paradigme connexionniste, illustr\\u00e9 par \\u00ab\\u00a0l\\u2019apprentissage profond\\u00a0\\u00bb (deep learning, machine learning), semblent augurer du retour d\\u2019un printemps ensoleill\\u00e9 pour une telle approche de l\\u2019intelligence artificiell...\"},{\"id\":\"10670\\/1.w3i7zv\",\"content\":\"SIDE Summer School, day 2. Tomorrow morning, it will be the second day of the SIdE (Italian Econometric Association) Summer School, on Machine Learning Algorithms for Econometricians. The third series of slides are online. and the fourth one are also online.\"},{\"id\":\"10670\\/1.9d039g\",\"content\":\"On my way to Bertinoro (Forl\\u00ec-Cesena, Italia). At the end of this week, I will be flying to Europe, since I will be giving a series of lectures with Emmanuel Flachaire at the SIdE (Italian Econometric Association) Summer School, on Machine Learning Algorithms for Econometricians.\"},{\"id\":\"10670\\/1.epec3l\",\"content\":\"SIDE Summer School, days 4 and 5. Almost the final set of slides for the SIdE (Italian Econometric Association) Summer School, on Machine Learning Algorithms for Econometricians. The eighth series of slides are online, as well as the nineth series (also online) and the twelweth (online)\"},{\"id\":\"10670\\/1.2ms4vc\",\"content\":\"Learning Edit Cost Estimation Models for Graph Edit Distance. International audience\"},{\"id\":\"10670\\/1.jg8ess\",\"content\":\"Statistical analysis and machine learning algorithms for RF breast cancer screening. The work of this thesis explores statistical and machine learning methods for anomaly detection in a novel low-power microwave breast cancer screening system. Reported dielectric contrast in the microwave frequency range between healthy and malign breast tissue is the main motivator behind the effort to design a time-domain radar-based prototype for safe breast screening. The microwave radar does not strive to yield a three-dimensional image of the breast interior. Instead, its aimed use wouldbe for frequent monthly screenings which have the potential to detect a departure from the normal, hence increasing the chance of early detection and, in turn, successful treatment. The data used for the development of the algorithms was obtained either in controlled laboratory experiments on tissue-mimicking phantoms or in a clinical setting. Since the data is preliminary and scarce, the conclusions may be limited, but in the process of the algorithmic development, this work strives to takeinto account the nature of the signals and how they have been generated in this very new application. The following methods were adapted and applied to the data sets: simple statistical analysis to illustrate the differences in the data sets investigated in this work; discrete Fourier transform, short-time Fourier transform, empirical mode decomposition and ad hoc time domain analysis to derive effective featureextraction strategies for the radio-frequency radar scans; high-dimensional statistical hypothesis tests to investigate the characteristics of time-frequency features extracted; random search, random walk, simulated annealing, genetic algorithm and particle swarm derivative-free optimization algorithms to improve the computational efficiency of an ensemble cost-sensitive support vector machine classifier based on previous literature; and a forward step-wise ensemble selection algorithm to improvethe predictive performance of the classifier. For each of the methods, the results were discussed in the light of the limitations of the collected data sets. Older data sets were found to have high signal amplitudes on average. Statistically significant differences between features extracted from scans with anomalies and scans without anomalies were only observed for scans of subjects with higher average permittivity. The time-frequency analysis features yielded superior predictive performance thanfeature extraction using dimensionality reduction by principal component analysis. The computational efficiency of the classifier was improved by a factor of at least 3.8 when optimization algorithms were used for hyperparameter selection, instead of an exhaustive grid search. With the data available, the forward step-wise selection algorithm did not improve the predictive performance as was anticipated.\"},{\"id\":\"10670\\/1.g0sb2u\",\"content\":\"Machine Learning in Amyotrophic Lateral Sclerosis: Achievements, Pitfalls, and Future Directions. International audience\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/20.500.11794\\/34741\",\"content\":\"A Machine Learning Approach for the Smart Charging of Electric Vehicles. Avec l\\u2019adoption croissante des v\\u00e9hicules \\u00e9lectriques, il y a un int\\u00e9r\\u00eat pour utiliser des tarifs dynamiques dont le prix d\\u00e9pend de la demande actuelle, pour encourager les utilisateurs \\u00e0 recharger leurs v\\u00e9hicules en p\\u00e9riode de faible demande \\u00e9vitant les pics d\\u2019\\u00e9lectricit\\u00e9 pouvant d\\u00e9passer la capacit\\u00e9 install\\u00e9e. Le probl\\u00e8me que devaient affronter les utilisateurs de v\\u00e9hicules \\u00e9lectriques est qu\\u2019ils doivent s\\u2019assurer que l\\u2019\\u00e9nergie \\u00e9lectrique pr\\u00e9sente dans les batteries est suffisante pour les d\\u00e9placements et que les p\\u00e9riodes de recharge correspondent \\u00e0 des p\\u00e9riodes o\\u00f9 le prix de l\\u2019\\u00e9lectricit\\u00e9 est bas. La plupart des approches actuelles de planification de recharge supposent une connaissance parfaite des futurs prix de l\\u2019\\u00e9lectricit\\u00e9 et de l\\u2019utilisation du v\\u00e9hicule, ce qui nuit \\u00e0 leur applicabilit\\u00e9 dans la pratique. Cette th\\u00e8se consid\\u00e8re la mod\\u00e9lisation de la recharge intelligente des v\\u00e9hicules \\u00e9lectriques pour d\\u00e9terminer, lors des sessions de connexion, les moments o\\u00f9 le v\\u00e9hicule doit se recharger afin de minimiser le co\\u00fbt pay\\u00e9 pour l\\u2019\\u00e9nergie de ses d\\u00e9placements. La th\\u00e8se comporte quatre principales contributions: 1) Mod\\u00e8le de recharge optimale des v\\u00e9hicules \\u00e9lectriques pour g\\u00e9n\\u00e9rer une s\\u00e9rie de d\\u00e9cisions en utilisant la connaissance a priori du prix de l\\u2019\\u00e9lectricit\\u00e9 et de l\\u2019\\u00e9nergie utilis\\u00e9e, en utilisant la programmation dynamique comme m\\u00e9thode d\\u2019optimisation. 2) Cr\\u00e9ation d\\u2019un mod\\u00e8le de syst\\u00e8me d\\u2019information incluant des variables connexes au mod\\u00e8le de recharge des v\\u00e9hicules \\u00e9lectriques dans un cadre guid\\u00e9 par des donn\\u00e9es. 3) M\\u00e9thode de s\\u00e9lection des donn\\u00e9es pertinentes utilisant la stratification de donn\\u00e9es pouvant r\\u00e9duire significativement le temps requis pour entra\\u00eener les mod\\u00e8les de pr\\u00e9vision avec des r\\u00e9sultats proches de ceux obtenus en utilisant l\\u2019ensemble de donn\\u00e9es complet. 4) Mod\\u00e8le de classification en ligne qui permet de d\\u00e9terminer s\\u2019il faut charger ou non le v\\u00e9hicule \\u00e0 l\\u2019aide de mod\\u00e8les d\\u2019apprentissage automatique qui peuvent g\\u00e9n\\u00e9rer, en temps r\\u00e9el, une d\\u00e9cision de recharge quasi-optimale sans tenir compte d\\u2019une connaissance de l\\u2019information future. Nous d\\u00e9montrons comment la combinaison d\\u2019une m\\u00e9thode d\\u2019optimisation hors ligne, telle que la programmation dynamique, avec des mod\\u00e8les d\\u2019apprentissage automatique et un syst\\u00e8me d\\u2019information ad\\u00e9quat peut fournir une solution tr\\u00e8s proche de l\\u2019optimum global, sans perte d\\u2019applicabilit\\u00e9 dans le monde r\\u00e9el. De plus, la polyvalence de l\\u2019approche propos\\u00e9e permet d\\u2019envisager l\\u2019int\\u00e9gration d\\u2019un plus grand nombre de variables \\u00e0 l\\u2019entr\\u00e9e du mod\\u00e8le, ainsi que d\\u2019autres actions comme par exemple fournir d\\u2019\\u00e9nergie au r\\u00e9seau \\u00e9lectrique pour aider \\u00e0 r\\u00e9duire les pics de demande ce qui pourrait \\u00eatre utile dans un contexte de vehicle-to-grid (V2G).\"},{\"id\":\"10670\\/1.lv11y2\",\"content\":\"Computational and Robotic Models of Early Language Development: A Review. International audience\"},{\"id\":\"10670\\/1.3w729f\",\"content\":\"Easily implementable time series forecasting techniques for resource provisioning in cloud computing. International audience\"},{\"id\":\"10670\\/1.lb625e\",\"content\":\"SIDE Summer School, day 6. Saturday morning, final set of slides for the SIdE (Italian Econometric Association) Summer School, on Machine Learning Algorithms for Econometricians. The first part is now online, on time series as well as the second part, on causal inference (with just a broad overview), which is also\"},{\"id\":\"10670\\/1.45wtnz\",\"content\":\"Police pr\\u00e9dictive et fichiers de police. National audience\"},{\"id\":\"10670\\/1.ee9cd8\",\"content\":\"SIDE Summer School, day 3. Tomorrow morning, it will be the third day of the SIdE (Italian Econometric Association) Summer School, on Machine Learning Algorithms for Econometricians. The fifth series of slides are online. and so is the sixth series, that are online and the seventh series of slides, that are online\"},{\"id\":\"10670\\/1.kg26w4\",\"content\":\"Toward Dialogue Modeling: A Semantic Annotation Scheme for Questions and Answers. International audience\"},{\"id\":\"10670\\/1.zmn1oz\",\"content\":\"The challenge of responsible AI. International audience\"},{\"id\":\"10670\\/1.fok7n3\",\"content\":\"ML-Based Feature Importance Estimation for Predicting Unethical Behaviour under Pressure. International audience\"},{\"id\":\"10670\\/1.8nu0uf\",\"content\":\"Semantic Systems. The Power of AI and Knowledge Graphs : 15th International Conference, SEMANTiCS 2019, Karlsruhe, Germany, September 9\\u201312, 2019, Proceedings. This open access book constitutes the refereed proceedings of the 15th International Conference on Semantic Systems, SEMANTiCS 2019, held in Karlsruhe, Germany, in September 2019. The 20 full papers and 8 short papers presented in this volume were carefully reviewed and selected from 88 submissions. They cover topics such as: web semantics and linked (open) data; machine learning and deep learning techniques; semantic information management and knowledge integration; terminology, thesaurus and ontology management; data mining and knowledge discovery; semantics in blockchain and distributed ledger technologies.\"},{\"id\":\"10670\\/1.tdnvft\",\"content\":\"Entropy-regularized Optimal Transport for Machine Learning. Le Transport Optimal r\\u00e9gularis\\u00e9 par l\\u2019Entropie (TOE) permet de d\\u00e9\\ufb01nir les Divergences de Sinkhorn (DS), une nouvelle classe de distance entre mesures de probabilit\\u00e9s bas\\u00e9es sur le TOE. Celles-ci permettentd\\u2019interpolerentredeuxautresdistancesconnues: leTransport Optimal(TO)etl\\u2019EcartMoyenMaximal(EMM).LesDSpeuvent\\u00eatre utilis\\u00e9es pour apprendre des mod\\u00e8les probabilistes avec de meilleures performances que les algorithmes existants pour une r\\u00e9gularisation ad\\u00e9quate. Ceci est justi\\ufb01\\u00e9 par un th\\u00e9or\\u00e8me sur l\\u2019approximation des SDpardes\\u00e9chantillons, prouvantqu\\u2019uner\\u00e9gularisationsusantepermet de se d\\u00e9barrasser de la mal\\u00e9diction de la dimension du TO, et l\\u2019on retrouve \\u00e0 l\\u2019in\\ufb01ni le taux de convergence des EMM. En\\ufb01n, nous pr\\u00e9sentons de nouveaux algorithmes de r\\u00e9solution pour le TOE bas\\u00e9s surl\\u2019optimisationstochastique\\u2018en-ligne\\u2019qui,contrairement\\u00e0l\\u2019\\u00e9tatde l\\u2019art, ne se restreignent pas aux mesures discr\\u00e8tes et s\\u2019adaptent bien aux probl\\u00e8mes de grande dimension.\"},{\"id\":\"10670\\/1.fp0hqv\",\"content\":\"Spectral Graph Wavelet Transform as Feature Extractor for Machine Learning in Neuroimaging. International audience\"},{\"id\":\"10670\\/1.dc5rg5\",\"content\":\"Cuarta revoluci\\u00f3n industrial (4.0.) o ciberindustria en el proceso penal: revoluci\\u00f3n digital, inteligencia artificial y el camino hacia la robotizaci\\u00f3n de la justicia. This paper aims to analyze the irruption of the fourth industrial revolution (4.0.) and its consequences, especially Artificial Intelligence, in Justice, particularly in Criminal Justice. The incorporation of expert systems, algorithms and computational models in counseling is now a reality, as well as in predicting and in legal decisions. The incorporation of judicial robotic or robot-judge (machine learning) represents a disturbing situation. The replacement of the human-judge creates an artificial cognition that raises doubts about ethics, reliability, the individualization of the sanction, the possible responsibility of the machine, the sensitivity of especially vulnerable victims or depressed groups, the violation of the people's rights, etc.\"},{\"id\":\"10670\\/1.fbxvrs\",\"content\":\"Du code dans ma th\\u00e8se, 3 interventions autour de l\\u2019auto-instrumentation en th\\u00e8se de doctorat. Trois doctorant.e.s se sont confront\\u00e9.e.s aux questions de la recherche m\\u00e9thodologique \\u00e9tant \\u00e0 la fois chercheurs et promoteurs de leur propres outils et de leurs m\\u00e9thodes dans des disciplines diff\\u00e9rentes : science politique, machine learning, Arts & Design. Le podcast du s\\u00e9minaire du MetSem - S\\u00e9minaire M\\u00e9thodologie de Sciences Po est en ligne, \\u00e0 \\u00e9couter sur le site MetSem\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/10251\\/126095\",\"content\":\"An\\u00e1lisis de im\\u00e1genes m\\u00e9dicas de cerebro para dar soporte al diagn\\u00f3stico. [CA] Durant aquest projecte s\\u2019ha treballat amb imatges m\\u00e8diques disponibles gr\\u00e0cies a la col\\u00b7laboraci\\u00f3 entre el centre d\\u2019investigaci\\u00f3 Pattern Recognition and Human Language Technology \\u2013PRHLT\\u2013 i la Fundaci\\u00f3 per al Foment de la Investigaci\\u00f3 Sanit\\u00e0ria i Biom\\u00e8dica de la Comunitat Valenciana \\u2013FISABIO\\u2013. La finalitat \\u00faltima d\\u2019aquest projecte ha sigut construir un classificador per a cada tipus d\\u2019imatge segons l\\u2019etapa de la malaltia Alzh\\u00e8imer en qu\\u00e8 es troba el pacient. Els classificadors han estat basats tant en Deep Learning, alguns utilitzant Convolutional Neural Networks, com en m\\u00e8todes cl\\u00e0ssics de Machine Learning. Abans d\\u2019entrenar els classificadors, s\\u2019ha avaluat la qualitat dels conjunts de dades i s\\u2019ha fet un estudi de les seues caracter\\u00edstiques. Finalment, s\\u2019han comparat els resultats obtinguts amb les diferents t\\u00e8cniques emprades\"},{\"id\":\"10670\\/1.ssfroa\",\"content\":\"OM-AI: A Toolkit to Support AI-Based Computer-Assisted Composition Workflows in OpenMusic. International audience\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/10251\\/124304\",\"content\":\"First-Stage Prostate Cancer Identification on Histopathological Images: Hand-Driven versus Automatic Learning. [EN] Analysis of histopathological image supposes the most reliable procedure to identify prostate cancer. Most studies try to develop computer aid-systems to face the Gleason grading problem. On the contrary, we delve into the discrimination between healthy and cancerous tissues in its earliest stage, only focusing on the information contained in the automatically segmented gland candidates. We propose a hand-driven learning approach, in which we perform an exhaustive hand-crafted feature extraction stage combining in a novel way descriptors of morphology, texture, fractals and contextual information of the candidates under study. Then, we carry out an in-depth statistical analysis to select the most relevant features that constitute the inputs to the optimised machine-learning classifiers. Additionally, we apply for the first time on prostate segmented glands, deep-learning algorithms modifying the popular VGG19 neural network. We fine-tuned the last convolutional block of the architecture to provide the model specific knowledge about the gland images. The hand-driven learning approach, using a nonlinear Support Vector Machine, reports a slight outperforming over the rest of experiments with a final multi-class accuracy of 0.876 +\\/- 0.026<\\/mml:semantics> in the discrimination between false glands (artefacts), benign glands and Gleason grade 3 glands.\"},{\"id\":\"10670\\/1.0qijb7\",\"content\":\"Knowledge Models and Image Processing Analysis in Remote Sensing: Examples of Yakutsk (Russia) and Kaunas (Lithuania). International audience\"},{\"id\":\"10670\\/1.w0wd5u\",\"content\":\"Leveraging machine learning for efficient mobility management and data transmission in fog computing. Fog computing is a new proposed architecture that complements the existing cloud computing through one or more layers of intermediate computing servers. These servers, called fogs, are deployed at the edge of the network and bridge the gap between end-user devices and the cloud. They provide compute and storage resources to devices in a similar way to the cloud but in a more distributed fashion. This new computing paradigm offers a set of new challenges that we aim to address in this research. We leverage the use of machine learning, particularly deep learning algorithms capable of taking advantage of the large volumes of data generated in smart city scenarios, to improve the efficiency of the fog computing middleware JAMScript. The first part of our work focuses on optimizing the handover procedure for mobile devices in a fog computing environment using a set of fog and cost predictors. These predictors are used to reduce the service interruptionsexperienced while transitioning from one fog node to another. We simulate a city level fog network with real-world data derived from taxi traces in Shanghai city. We then model the fog associations for vehicles using a feedforward neural network as well as the cost (latency) of interacting with a particular fog server using an recurrent neural network (RNN) with long short-term memory (LSTM) cells. We present a system architecture that describes the components of this predictive system as well as a smarter request routing scheme that can be implemented using it. The second part of our research introduces a learning logger architecture that utilizes an ensemble of LSTMs to model data streams derived from devices at the fog servers. We show how predictions from the learning model can be used to partially replace actual data, thereby saving on valuable bandwidth without compromising the integrity and usability of the data. Finally, we conduct a series of experiments that showcase the performance of our predictive systems and quantify their benefit in a fog computing environment. In these experiments, we use a Docker-container based emulator for a vehicular fog network created using JAMScript to evaluate the fog and cost predictors. For the learning logger experiments, we use a set of environmental sensor data streams.Experimental results show that these systems can yield a considerable reduction in resource usage and gain in transmission efficiency in a fog computing environment.\"},{\"id\":\"10670\\/1.pyqgsr\",\"content\":\"Modelling world agriculture as a learning machine? From mainstream models to Agribiom 1.0. International audience\"},{\"id\":\"10670\\/1.8sw3mq\",\"content\":\"Th\\u00e9orie des matrices al\\u00e9atoires pour l'apprentissage automatique en grande dimension et les r\\u00e9seaux de neurones. Le \\\"Big Data'' et les grands syst\\u00e8mes d'apprentissage sont omnipr\\u00e9sents dans les probl\\u00e8mes d'apprentissage automatique aujourd\\u2019hui. Contrairement \\u00e0 l'apprentissage de petite dimension, les algorithmes d'apprentissage en grande dimension sont sujets \\u00e0 divers ph\\u00e9nom\\u00e8nes contre-intuitifs et se comportent de mani\\u00e8re tr\\u00e8s diff\\u00e9rente des intuitions de petite dimension sur lesquelles ils sont construits. Cependant, en supposant que la dimension et le nombre des donn\\u00e9es sont \\u00e0 la fois grands et comparables, la th\\u00e9orie des matrices al\\u00e9atoires (RMT) fournit une approche syst\\u00e9matique pour \\u00e9valuer le comportement statistique de ces grands syst\\u00e8mes d'apprentissage, lorsqu'ils sont appliqu\\u00e9s \\u00e0 des donn\\u00e9es de grande dimension. L\\u2019objectif principal de cette th\\u00e8se est de proposer un sch\\u00e9ma d'analyse bas\\u00e9 sur la RMT, pour une grande famille de syst\\u00e8mes d\\u2019apprentissage automatique: d'\\u00e9valuer leurs performances, de mieux les comprendre et finalement les am\\u00e9liorer, afin de mieux g\\u00e9rer les probl\\u00e8mes de grandes dimensions aujourd'hui.Pr\\u00e9cis\\u00e9ment, nous commen\\u00e7ons par exploiter la connexion entre les grandes matrices \\u00e0 noyau, les projection al\\u00e9atoires non-lin\\u00e9aires et les r\\u00e9seaux de neurones al\\u00e9atoires simples. En consid\\u00e9rant que les donn\\u00e9es sont tir\\u00e9es ind\\u00e9pendamment d'un mod\\u00e8le de m\\u00e9lange gaussien, nous fournissons une caract\\u00e9risation pr\\u00e9cise des performances de ces syst\\u00e8mes d'apprentissage en grande dimension, exprim\\u00e9e en fonction des statistiques de donn\\u00e9es, de la dimensionnalit\\u00e9 et, surtout, des hyper-param\\u00e8tres du probl\\u00e8me. Lorsque des algorithmes d'apprentissage plus complexes sont consid\\u00e9r\\u00e9s, ce sch\\u00e9ma d'analyse peut \\u00eatre \\u00e9tendu pour acc\\u00e9der \\u00e0 de syst\\u00e8mes d'apprentissage qui sont d\\u00e9finis (implicitement) par des probl\\u00e8mes d'optimisation convexes, lorsque des points optimaux sont atteints. Pour trouver ces points, des m\\u00e9thodes d'optimisation telles que la descente de gradient sont r\\u00e9guli\\u00e8rement utilis\\u00e9es. \\u00c0 cet \\u00e9gard, dans le but d'avoir une meilleur compr\\u00e9hension th\\u00e9orique des m\\u00e9canismes internes de ces m\\u00e9thodes d'optimisation et, en particulier, leur impact sur le mod\\u00e8le d'apprentissage, nous \\u00e9valuons aussi la dynamique de descente de gradient dans les probl\\u00e8mes d'optimisation convexes et non convexes.Ces \\u00e9tudes pr\\u00e9liminaires fournissent une premi\\u00e8re compr\\u00e9hension quantitative des algorithmes d'apprentissage pour le traitement de donn\\u00e9es en grandes dimensions, ce qui permet de proposer de meilleurs crit\\u00e8res de conception pour les grands syst\\u00e8mes d\\u2019apprentissage et, par cons\\u00e9quent, d'avoir un gain de performance remarquable lorsqu'il est appliqu\\u00e9 \\u00e0 des jeux de donn\\u00e9es r\\u00e9els. Profond\\u00e9ment ancr\\u00e9 dans l'id\\u00e9e d'exploiter des donn\\u00e9es de grandes dimensions avec des informations r\\u00e9p\\u00e9t\\u00e9es \\u00e0 un niveau \\\"global'' plut\\u00f4t qu'\\u00e0 un niveau \\\"local'', ce sch\\u00e9ma d'analyse RMT permet une compr\\u00e9hension renouvel\\u00e9e et la possibilit\\u00e9 de contr\\u00f4ler et d'am\\u00e9liorer une famille beaucoup plus large de m\\u00e9thodes d'apprentissage automatique, ouvrant ainsi la porte \\u00e0 un nouveau sch\\u00e9ma d'apprentissage automatique pour l'intelligence artificielle.\"},{\"id\":\"10670\\/1.9snvqs\",\"content\":\"Minerva and Virgil-a program (Notes on Karpathy et al. 2014). Recent advances in machine learning and computer vision have resulted in the accurate and consistent identification of objects in images and videos. In this paper, a program named \\\"Minerva and Virgil\\\" is described whose purpose it is to counter State brutality, a term that we propose as the generalization of the already established \\\"police brutality\\\", by making use of these technologies.\"},{\"id\":\"10670\\/1.b5sy6l\",\"content\":\"Towards incorporating ethics in recommendation systems. International audience\"},{\"id\":\"10670\\/1.9m3tmj\",\"content\":\"Learning by stochastic serializations. Complex structures are typical in machine learning. Tailoring learning algorithms for every structure requires an effort that may be saved by defining a generic learning procedure adaptive to any complex structure. In this paper, we propose to map any complex structure onto a generic form, called serialization, over which we can apply any sequence-based density estimator. We then show how to transfer the learned density back onto the space of original structures. To expose the learning procedure to the structural particularities of the original structures, we take care that the serializations reflect accurately the structures\\u2019 properties. Enumerating all serializations is infeasible. We propose an effective way to sample representative serializations from the complete set of serializations which preserves the statistics of the complete set. Our method is competitive or better than state of the art learning algorithms that have been specifically designed for given structures. In addition, since the serialization involves sampling from a combinatorial process it provides considerable protection from overfitting, which we clearly demonstrate on a number of experiments.\"},{\"id\":\"10670\\/1.46oemr\",\"content\":\"Deuxi\\u00e8me s\\u00e9minaire Quaresmi. Ce jeudi 28 mars \\u00e0 14h, le laboratoire Quaresmi aura le plaisir d'accueillir pour son deuxi\\u00e8me s\\u00e9minaire Sylvie Vande Velde, physicienne et biologiste, doctorante au sein du machine learning group de l'ULB. Elle viendra nous pr\\u00e9senter un expos\\u00e9 intitul\\u00e9 \\\"Cancer et nouvelles th\\u00e9rapies\\\". \\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0 Dans nos soci\\u00e9t\\u00e9s modernes, de nombreuses personnes sont malheureusement \\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0 chaque ann\\u00e9e atteintes par le cancer. Face \\u00e0\\u00a0 cette maladie, la \\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0 recherche m\\u00e9dicale a d\\u00e9j\\u00e0 connu quelques gr...\"},{\"id\":\"10670\\/1.2mcjne\",\"content\":\"SIDE Summer School, day 1. This morning, we start the SIdE (Italian Econometric Association) Summer School, on Machine Learning Algorithms for Econometricians. Emmanuel Flachaire will start with a presentation of nonparametric econometric techniques. I will then get back to the geometry of (standard) econometric techniques, to introduce kernels. The first series of slides are online. I will then spend more time on the (popular) idea of \\\"least squares\\\" and mention other loss functions. Slides are online.\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/10251\\/127157\",\"content\":\"Aplicaci\\u00f3n de m\\u00e9todos de machine learning a la espectroscop\\u00eda de protones acelerados por l\\u00e1ser. [ES] En este trabajo de fin de grado se ha desarrollado un sistema de clasificaci\\u00f3n autom\\u00e1tica de im\\u00e1genes microsc\\u00f3picas basado en tecnolog\\u00eda de Deep Learning. Estas im\\u00e1genes son el resultado de un experimento en el que se hace colisionar protones acelerados contra un material detector, y consisten en un fondo gris con trazas con forma circular. La mayor\\u00eda de las im\\u00e1genes que se obtienen con este experimento no sirven debido a distintos factores como la falta de nitidez, por lo que es necesario clasificarlas. A su vez se ha implementado un sistema que extrae el radio y posici\\u00f3n de las trazas usando t\\u00e9cnicas de visi\\u00f3n artificial.\"},{\"id\":\"10670\\/1.qa08nc\",\"content\":\"Des machines et des hommes. La guerre n\\u2019aura pas lieu. M\\u00e9decine, biologie, \\u00e9conomie, finance, sociologie, sport, les domaines \\u00e0 mobiliser l\\u2019apprentissage machine sont de plus en plus nombreux. Si leur int\\u00e9r\\u00eat n\\u2019a cess\\u00e9 de s\\u2019intensifier ces derni\\u00e8res ann\\u00e9es, c\\u2019est qu\\u2019ils ont pr\\u00e9f\\u00e9r\\u00e9 percevoir les m\\u00e9thodes d\\u2019\\u00ab\\u00a0intelligence artificielle\\u00a0\\u00bb comme des outils techniques permettant de traiter de grandes bases de donn\\u00e9es avec un haut degr\\u00e9 de pr\\u00e9cision plut\\u00f4t qu\\u2019en tant que science concurrentielle. De fa\\u00e7on tr\\u00e8s tardive, les juristes l\\u2019explorent d\\u00e9sormais, non sans r\\u00e9fractaires. Tant\\u00f4t consid\\u00e9r\\u00e9e comme la n\\u00e9cessit\\u00e9 d\\u2019une nouvelle \\u00e8re \\u00ab\\u00a0Big Data\\u00a0\\u00bb, l\\u2019intelligence artificielle est aussi d\\u00e9nonc\\u00e9e par d\\u2019autres qui y voient la volont\\u00e9 d\\u2019une m\\u00e9canisation du droit o\\u00f9 la machine pr\\u00e9tendrait supplanter l\\u2019humain. Pourtant, comme pour toutes les autres sciences, les algorithmes seront de puissants outils pour la connaissance. La coop\\u00e9ration entre l\\u2019intelligence artificielle machine et l\\u2019intelligence naturelle humaine permettra de renforcer la compr\\u00e9hension que nous avons des m\\u00e9canismes juridiques et de leur application par les professionnels du droit.\"},{\"id\":\"10670\\/1.zzivgc\",\"content\":\"Online Platforms and the Labour Market: Learning (with Machines) from an Experiment in France. I study the effect of an online job search assistance program taking advantage of a previousexperiment made by the French public employment services, which provides some exogenousvariation in the use of this platform. I focus on the heterogeneity analysis of this treatment,using two main different approaches.The first one is theory-driven, and focus on the analysis of the heterogeneity of thetreatment with respect to various different labour market tightness indicators. Two mainassessments can be made based on this analysis. (i) Tightness indicators are (surprisingly)decorrelated, making it difficult to corroborate the rare significant results obtained. (ii) Theset of significant results obtained suggest that the treatment effect isincreasingin labourmarket tightness. I suggest competing ways of modelling the treatment consistent withthose results. I also document some evidence of a larger treatment effect for individuals withweaker employment prospects. This is in line with other empirical evidence in the literatureevaluating job search assistance programs.The second approach is more data-driven, and resorts to the new machine learning (ML)techniques developed for heterogeneity analysis. I focus on tree-based techniques and forests,which have been central in the development of these techniques. The results of this analysisshed light on the limits of ML in the exploration of treatment effect heterogeneity, especiallyas the main ML-specific test for treatment effect heterogeneity developed by Chernozhukovet al. (2018a) concludes that ML is unable to detect any heterogeneity \\u2014 yet this mightbe not that surprising after all given the lack of statistical power (low take-up) and theprobably low order of magnitude of the treatment effect studied. Still, I provide applicationsof a large part of the existing ML techniques for treatment effect heterogeneity, trying totake advantage of each of them to document which are the dimensions that are likely to beimportant to study treatment effect heterogeneity in my setting.\"},{\"id\":\"10670\\/1.dn9l78\",\"content\":\"Using social media, machine learning and natural language processing to map multiple recreational beneficiaries. International audience\"},{\"id\":\"10670\\/1.7yg7f1\",\"content\":\"Large-scale Nonlinear Variable Selection via Kernel Random Features. We propose a new method for input variable selection in nonlinear regression. The method is embedded into a kernel regression machine that can model general nonlinear functions, not being a priori limited to additive models. This is the first kernel-based variable selection method applicable to large datasets. It sidesteps the typical poor scaling properties of kernel methods by mapping the inputs into a relatively low-dimensional space of random features. The algorithm discovers the variables relevant for the regression task together with learning the prediction model through learning the appropriate nonlinear random feature maps. We demonstrate the outstanding performance of our method on a set of large-scale synthetic and real datasets.\"},{\"id\":\"10670\\/1.ktrx8d\",\"content\":\"Open data for humanists: big differences in small steps. \\u201cPhilologists must for at least two reasons open up the textual data upon which they base their work. First, researchers need to be able to download, modify and redistribute their textual data if they are to fully exploit both new methods that center around algorithmic analysis (e.g., corpus linguistics, computational linguistics, text mining, and various applications of machine learning) and new scholarly products and practices that computational methods enable [...]. Second, open data is es...\"},{\"id\":\"10670\\/1.3djx67\",\"content\":\"The Socio-Legal Relevance of Artificial Intelligence. L\\u2019article propose une analyse sociojuridique des questions d\\u2019\\u00e9quit\\u00e9, de responsabilit\\u00e9 et de transparence pos\\u00e9es par les applications d\\u2019intelligence artificielle\\u00a0(IA) employ\\u00e9es actuellement dans nos soci\\u00e9t\\u00e9s et de machine learning. Pour rendre compte de ces d\\u00e9fis juridiques et normatifs, nous analysons des cas probl\\u00e9matiques, comme la reconnaissance d\\u2019images fond\\u00e9e sur des bases de donn\\u00e9es qui pr\\u00e9sentent des biais de genre. Nous envisageons ensuite sept aspects de la transparence qui permettent de compl\\u00e9ter les notions d\\u2019explainable\\u00a0AI\\u00a0(XAI) dans la recherche en sciences informatiques. L\\u2019article examine aussi l\\u2019effet de miroir normatif provoqu\\u00e9 par l\\u2019usage des valeurs humaines et des structures soci\\u00e9tales comme donn\\u00e9es d\\u2019entra\\u00eenement pour les technologies d\\u2019apprentissage. Enfin, nous plaidons pour une approche multidisciplinaire dans la recherche, le d\\u00e9veloppement et la gouvernance en mati\\u00e8re d\\u2019IA.\"},{\"id\":\"10670\\/1.ms7s8f\",\"content\":\"OpenMethods introduction to: Analyzing Documents with TF-IDF | Programming Historian. Introduction: The indispensable Programming Historian\\u00a0comes with an introduction to Term Frequency - Inverse Document Frequency (tf-idf) provided by Matthew J. Lavin. The procedure, concerned with specificity of terms in a document, has its origins in information retrieval, but can be applied as an exploratory tool, finding textual similarity, or as a pre-processing tool for machine learning. It is therefore not only useful for textual scholars, but also for historians working with large collections of text.\"},{\"id\":\"10670\\/1.lhasgg\",\"content\":\"OpenMethods introduction to: Analyzing Documents with TF-IDF | Programming Historian. Introduction: The indispensable Programming Historian\\u00a0comes with an introduction to Term Frequency - Inverse Document Frequency (tf-idf) provided by Matthew J. Lavin. The procedure, concerned with specificity of terms in a document, has its origins in information retrieval, but can be applied as an exploratory tool, finding textual similarity, or as a pre-processing tool for machine learning. It is therefore not only useful for textual scholars, but also for historians working with large collections of text.\"},{\"id\":\"10670\\/1.f7bvpv\",\"content\":\"Identification of metabolites from tandem mass spectra with a machine learning approach utilizing structural features. Untargeted mass spectrometry is a powerful method for detecting metabolites in biological samples. However, fast and accurate identification of the metabolites' structures from MS\\/MS spectra is still a great challenge.\"},{\"id\":\"10670\\/1.x80hl3\",\"content\":\"Probabilistic Fondations of Econometrics, part 3. This post is the third one of our series on the history and foundations of econometric and machine learning models. Part 2 is online here. Exponential family and linear models The Gaussian linear model is a special case of a large family of linear models, obtained when the conditional distribution of [latex]Y[\\/latex] (given the covariates) belongs to the exponential family[latex display=\\\"true\\\"] f(y_i|\\\\theta_i,\\\\phi)=\\\\exp\\\\left(\\\\frac{y_i\\\\theta_i-b(\\\\theta_i)}{a(\\\\phi)}+c(y_i,\\\\phi)\\\\right) [\\/latex] ...\"},{\"id\":\"10670\\/1.rvntbi\",\"content\":\"Probabilistic Fondations of Econometrics, part 2. This post is the second one of our series on the history and foundations of econometric and machine learning models. Part 1 is online here. Geometric Properties of this Linear Model Let's define the scalar product in [latex]\\\\mathbb{R}^n[\\/latex], [latex]\\u27e8\\\\mathbf{a},\\\\mathbf{b}\\u27e9=\\\\mathbf{a}^T\\\\mathbf{b}[\\/latex], and let's note [latex]\\\\|\\\\cdot\\\\|[\\/latex] the associated Euclidean standard, [latex]\\\\|\\\\mathbf{a}\\\\|=\\\\sqrt{\\\\mathbf{a}^T\\\\mathbf{a}}[\\/latex] (denoted [latex]\\\\|\\\\cdot\\\\|_{\\\\ell_2}[\\/latex] in the nex...\"},{\"id\":\"10670\\/1.8og9dc\",\"content\":\"Combining domain filling with a self-organizing map to analyze multi-species hydrocarbon signatures on a regional scale. International audience\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/10251\\/123881\",\"content\":\"Active learning and social commitment projects as a teaching-learning intervention in engineering degrees. [EN] The purpose of universities, apart from produce qualified professionals with problem-solving capabilities and soft-skills, should be to develop the social responsibility sense on their students. In this context, our proposal combines project based learning (PBL) and service based learning (SBL) along with gamming and the use of open-source machines, with the aim to increase student\\u2019s motivation and their social commitment with an affordable budget. The strategy, from now on named OS-PBL-SR (Open-Source-based PBL projects with Social Responsibility), mainly includes three important aspects: (i) assignment with projects orientated towards a social benefit; (ii) development of the projects using open-source Do It Yourself desktop machines (DIY-DkM); and (iii) include gamming in the evaluation method. The strategy was applied in the subject Manufacturing Technology but it might be easily exportable to other technical subjects. The results from the last academic year are presented. Also, a new OS-PBL-SR proposal aimed to the design and fabrication of autonomy-oriented products for people in a dependency situation is presented. The results showed the beneficial impact on undergraduate students by keeping high levels of motivation reflected on excellent success rates and scores. In addition, essential advantages in the use of DIY-DkM were found regarding the implementation of this kind of PBL strategy.\"},{\"id\":\"10670\\/1.j5n6vo\",\"content\":\"M\\u00e9thodes d'apprentissage statistique pour le criblage virtuel de m\\u00e9dicament. Le processus de d\\u00e9couverte de m\\u00e9dicaments a un succ\\u00e8s limit\\u00e9 malgr\\u00e9 tous les progr\\u00e8s r\\u00e9alis\\u00e9s. En effet, on estime actuellement que le d\\u00e9veloppement d'un m\\u00e9dicament n\\u00e9cessite environ 1,8 milliard de dollars am\\u00e9ricains sur environ 13 ans. Nous nous concentrons dans cette th\\u00e8se sur des approches statistiques qui criblent virtuellement un grand ensemble de compos\\u00e9s chimique contre un grand nombre de prot\\u00e9ines. Leurs applications sont polyvalentes : elles permettent d\\u2019identifier des candidats m\\u00e9dicaments pour des cibles th\\u00e9rapeutiques connues, d\\u2019anticiper des effets secondaires potentiels, ou de proposer de nouvelles indications th\\u00e9rapeutiques pour des m\\u00e9dicaments connus. Cette th\\u00e8se est con\\u00e7ue selon deux cadres d'approches de criblage virtuel : les approches dans lesquelles les donn\\u00e9es sont d\\u00e9crites num\\u00e9riquement sur la base des connaissances des experts, et les approches bas\\u00e9es sur l'apprentissage automatique de la repr\\u00e9sentation num\\u00e9rique \\u00e0 partir du graphe mol\\u00e9culaire et de la s\\u00e9quence prot\\u00e9ique. Nous discutons ces approches et les appliquons pour guider la d\\u00e9couverte de m\\u00e9dicaments.\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/20.500.11794\\/34944\",\"content\":\"Inferring phenotypes from genotypes with machine learning : an application to the global problem of antibiotic resistance. Tableau d\\u2019honneur de la Facult\\u00e9 des \\u00e9tudes sup\\u00e9rieures et postdoctorales, 2018-2019\"},{\"id\":\"10670\\/1.b5iz8l\",\"content\":\"Machine learning for predicting psychotic relapse at 2\\u202fyears in schizophrenia in the national FACE-SZ cohort. International audience\"},{\"id\":\"10670\\/1.iqnl7v\",\"content\":\"Motorized individual mobility in commuting trips: modal preference or constrained mode choice? A machine learning approach. International audience\"},{\"id\":\"10670\\/1.sjsygu\",\"content\":\"Probabilistic Fondations of Econometrics, part 4. This post is the fourth one of our series on the history and foundations of econometric and machine learning models. Part 3 is online here. Goodness of Fit, and Model In the Gaussian linear model, the determination coefficient - noted [latex]R^2[\\/latex] - is often used as a measure of fit quality. It is based on the variance decomposition formula [latex display=\\\"true\\\"]\\\\underbrace{\\\\frac{1}{n}\\\\sum_{i=1}^n (y_i-\\\\bar{y})^2}_{\\\\text{total variance}}=\\\\underbrace{\\\\frac{1}{n}\\\\sum_{i=1}^n (y_i-\\\\widehat...\"},{\"id\":\"10670\\/1.ikxpbg\",\"content\":\"Contribution au d\\u00e9veloppement de l\\u2019apprentissage profond dans les syst\\u00e8mes distribu\\u00e9s. L'apprentissage profond permet de d\\u00e9velopper un nombre de services de plus en plus important. Il n\\u00e9cessite cependant de grandes bases de donn\\u00e9es d'apprentissage et beaucoup de puissance de calcul. Afin de r\\u00e9duire les co\\u00fbts de cet apprentissage profond, nous proposons la mise en \\u0153uvre d'un apprentissage collaboratif. Les futures utilisateurs des services permis par l'apprentissage profond peuvent ainsi participer \\u00e0 celui-ci en mettant \\u00e0 disposition leurs machines ainsi que leurs donn\\u00e9es sans d\\u00e9placer ces derni\\u00e8res sur le cloud. Nous proposons diff\\u00e9rentes m\\u00e9thodes afin d'apprendre des r\\u00e9seaux de neurones profonds dans ce contexte de syst\\u00e8me distribu\\u00e9.\"},{\"id\":\"10670\\/1.o1kjua\",\"content\":\"Intelligence artificielle, apprentissage machine et explicabilit\\u00e9 des mod\\u00e8les. International audience\"},{\"id\":\"http:\\/\\/hdl.handle.net\\/10251\\/124236\",\"content\":\"DEVELOPMENT OF AN ARTIFICIAL INTELLIGENCE IMPLEMENTATION PLAN FOR THE SPANISH PUBLIC SECTOR. [EN] The present work proposes an AI implementation plan for the Spanish Public Sector as a guideline for accomplishing three main goals: (1) to acquire a better understanding of the AI framework in the context of the Public Administration, (2) to identify the principal actors of this process of radical transformation that all administrations will have to face in the near future, and (3) to provide a feasible prioritization strategy for determining which AI systems should be applied first, and in which particular areas of the Public Sector. The first step for developing this strategy has been to conduct a study of the current situation of AI in both scenarios Europe and Spain. Secondly, for determining the priorities among the AI systems subjected to study -Machine Learning, Deep Learning, Bid Data analytics, Natural Language Processing, Computer Vision, Content Generation and Reinforcement Learning-, a quantitative analysis based on the PACE prioritization matrix has been carried out. Finally, the order of public areas to approach with the implementation of AI systems -Education, Healthcare, Smart cities, Citizen-Public Administration interaction and Security-, has been based on a qualitative analysis of the current situation of the considered public areas.\"},{\"id\":\"10670\\/1.emo6ff\",\"content\":\"Goal-oriented dialogue systems : state-of-the-art and future works. International audience\"},{\"id\":\"10670\\/1.6fxo42\",\"content\":\"A Machine Learning Approach to Study the Relationship between Features of the Urban Environment and Street Value. International audience\"},{\"id\":\"10670\\/1.bt9f1h\",\"content\":\"'Whatever it Takes' to Change Belief: Evidence from Twitter. The sovereign debt literature emphasizes the possibility of avoiding a self-fulfilling default crisis if markets anticipate the central bank to act as lender of last resort. This paper investigates the extent to which changes in belief about an intervention of the European Central Bank (ECB) explain the sudden reduction of government bond spreads for the distressed countries in summer 2012. We study Twitter data and extract belief using machine learning techniques. We find evidence of strong increases in the perceived likelihood of ECB intervention and show that those increases explain subsequent decreases in the bond spreads of the distressed countries.\"},{\"id\":\"10670\\/1.i8fbmj\",\"content\":\"Exploring dynamic hamiltonian Monte Carlo for bayesian neural networks. Neural network models have seen tremendous success in predictive tasks in machine learning and artificial intelligence, with some attributing their success to implicit use of Bayesian inference. Stan is a state-of-the-art software for Bayesian statistical computing used mainly in the statistical community, however, it is not optimized for use with neural network models. In this thesis, we replicated much of Stan's No U-Turn sampler in PyTorch and explored its use for sampling from Bayesian neural network models. We were able to explore different samplers, model structures and their sampling and predictive performances on a benchmark classification task. We found that Bayesian inference gives more robust predictive performance compared to their frequentist counterparts in general, but care is needed with the choice of prior and the MCMC sampler.\"},{\"id\":\"10670\\/1.7pxxly\",\"content\":\"The Next Generation Cognitive Security Operations Center: Adaptive Analytic Lambda Architecture for Efficient Defense against Adversarial Attacks. International audience\"}]"} \ No newline at end of file diff --git a/server/workers/backend/requirements.txt b/server/workers/backend/requirements.txt index 7800f0fad..e6e171bc6 100644 --- a/server/workers/backend/requirements.txt +++ b/server/workers/backend/requirements.txt @@ -1 +1,2 @@ redis +pandas diff --git a/server/workers/backend/src/headstart.py b/server/workers/backend/src/headstart.py index b60f520f7..e7d51ca96 100644 --- a/server/workers/backend/src/headstart.py +++ b/server/workers/backend/src/headstart.py @@ -5,6 +5,7 @@ import asyncio from tempfile import NamedTemporaryFile import redis +import pandas as pd redis_store = redis.StrictRedis(host="localhost", port=6379, db=0) @@ -47,7 +48,7 @@ def create_map(self, params, input_data): param_file.name, input_file.name] output = subprocess.check_output(cmd) output = [o for o in output.decode('utf-8').split('\n') if len(o) > 0] - return json.loads(output[-1]) + return pd.DataFrame(json.loads(output[-1])).to_json(orient="records") def run(self): k, params, input_data = self.next_item() diff --git a/server/workers/services/src/apis/request_validators.py b/server/workers/services/src/apis/request_validators.py index acfc0c618..d8aed6237 100644 --- a/server/workers/services/src/apis/request_validators.py +++ b/server/workers/services/src/apis/request_validators.py @@ -9,6 +9,10 @@ class SearchParamSchema(Schema): format="%Y-%m-%d") to = fields.Date(required=True, format="%Y-%m-%d") + vis_type = fields.Str(require=True) + year_range = fields.Str() + today = fields.Str() + raw = fields.Boolean() @pre_load def fix_years(self, in_data, **kwargs): diff --git a/server/workers/services/src/apis/triple.py b/server/workers/services/src/apis/triple.py index baa84e00d..a5a2c1dd7 100644 --- a/server/workers/services/src/apis/triple.py +++ b/server/workers/services/src/apis/triple.py @@ -28,18 +28,6 @@ def get_key(store, key): return result -search_fields = api.model('Search', { - "q": fields.String, - "sorting": fields.String, - "from": fields.DateTime, - "to": fields.DateTime, -}) - -mappings = api.model('Mappings', { - "index": fields.String -}) - - @api.route('/search') class Search(Resource): @api.doc(responses={200: 'OK', @@ -48,15 +36,13 @@ class Search(Resource): "sorting": "string, most-relevant or most-recent", "from": "yyyy-MM-dd", "to": "yyyy-MM-dd", - "vis_type": "string, overview or streamgraph"}) + "vis_type": "string, overview or streamgraph", + "raw": "boolean"}) # @api.marshal_with(search_fields) def post(self): """ """ data = request.get_json() - data = {k: data.get(k) - for k - in ["q", "sorting", "from", "to"]} errors = search_param_schema.validate(data, partial=True) if errors: abort(400, str(errors)) diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py index 80af1ef02..d2c797c26 100644 --- a/server/workers/triple/src/search_triple.py +++ b/server/workers/triple/src/search_triple.py @@ -110,10 +110,10 @@ def process_result(self, result): metadata["link"] = "" text = pd.DataFrame() text["id"] = metadata["id"] - text["content"] = metadata.apply(lambda x: ". ".join(x[["title", "paper_abstract"]]), axis=1).to_json() + text["content"] = metadata.apply(lambda x: ". ".join(x[["title", "paper_abstract"]]), axis=1) input_data = {} - input_data["metadata"] = metadata.to_json() - input_data["text"] = text.to_json() + input_data["metadata"] = metadata.to_json(orient='records') + input_data["text"] = text.to_json(orient='records') return input_data @staticmethod @@ -161,7 +161,10 @@ def run(self): res["id"] = k res["input_data"] = self.search(params) res["params"] = params - redis_store.rpush("input_data", json.dumps(res)) + if params.get('raw') is True: + redis_store.set(k+"_output", json.dumps(res)) + else: + redis_store.rpush("input_data", json.dumps(res)) if __name__ == '__main__': From 0ee7e8aed5b4bcc53127dab03ef7dcf720e394ef Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Thu, 12 Mar 2020 20:45:58 +0100 Subject: [PATCH 29/99] data cleaning --- server/workers/triple/src/search_triple.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py index d2c797c26..9a187dcfd 100644 --- a/server/workers/triple/src/search_triple.py +++ b/server/workers/triple/src/search_triple.py @@ -108,6 +108,7 @@ def process_result(self, result): metadata["subject"] = df.keyword.map(lambda x: "; ".join([self.clean_subject(s) for s in x]) if isinstance(x, list) else "") metadata["oa_state"] = 2 metadata["link"] = "" + metadata["relevance"] = df.index text = pd.DataFrame() text["id"] = metadata["id"] text["content"] = metadata.apply(lambda x: ". ".join(x[["title", "paper_abstract"]]), axis=1) @@ -136,7 +137,8 @@ def clean_subject(subject): subject_cleaned = re.sub(r"\. ", "; ", subject_cleaned) # replace inconsistent keyword separation subject_cleaned = re.sub(r" ?\d[:?-?]?(\d+.)+", "", subject_cleaned) # replace residuals like 5:621.313.323 or '5-76.95' subject_cleaned = re.sub(r"\w+:\w+-(\w+\/)+", "", subject_cleaned) # replace residuals like Info:eu-repo/classification/ - return subject + subject_cleaned = re.sub(r"\[\w+\.?\w+\]", "", subject_cleaned) # replace residuals like [shs.hisphilso] + return subject_cleaned @staticmethod def get_authors(authorlist): From 4ed2785e4fd0e762b2d8f07d3d348deb957f7105 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Sun, 15 Mar 2020 19:32:27 +0100 Subject: [PATCH 30/99] wip --- server/workers/services/src/apis/triple.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/server/workers/services/src/apis/triple.py b/server/workers/services/src/apis/triple.py index a5a2c1dd7..f3177afd3 100644 --- a/server/workers/services/src/apis/triple.py +++ b/server/workers/services/src/apis/triple.py @@ -28,6 +28,22 @@ def get_key(store, key): return result +@api.model(fields={"q": fields.String, + "sorting": fields.String, + "from_": fields.String, + "to": fields.String, + "vis_type": fields.String, + "raw": fields.Boolean}) +class SearchQuery(fields.Raw): + def format(self, value): + return {"q": value.q, + "sorting": value.sorting, + "from": getattr(value, "from"), + "to": value.to, + "vis_type": value.vis_type, + "raw": value.raw} + + @api.route('/search') class Search(Resource): @api.doc(responses={200: 'OK', @@ -38,7 +54,6 @@ class Search(Resource): "to": "yyyy-MM-dd", "vis_type": "string, overview or streamgraph", "raw": "boolean"}) - # @api.marshal_with(search_fields) def post(self): """ """ From dcb8fb1c1cadfb476850b1a18596dc8914accdc2 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Tue, 17 Mar 2020 16:07:33 +0100 Subject: [PATCH 31/99] remove blueprints; simplify structure; new content type --- server/workers/services/requirements.txt | 1 + server/workers/services/src/apis/__init__.py | 6 -- .../services/src/apis/request_validators.py | 7 +- server/workers/services/src/apis/triple.py | 98 ++++++++++++------- server/workers/services/src/app.py | 20 ++-- server/workers/services/src/requirements.txt | 6 -- server/workers/triple/src/search_triple.py | 4 +- 7 files changed, 83 insertions(+), 59 deletions(-) delete mode 100644 server/workers/services/src/requirements.txt diff --git a/server/workers/services/requirements.txt b/server/workers/services/requirements.txt index 698c4e3b8..fb4357158 100644 --- a/server/workers/services/requirements.txt +++ b/server/workers/services/requirements.txt @@ -6,3 +6,4 @@ gunicorn redis hiredis aioredis +pandas diff --git a/server/workers/services/src/apis/__init__.py b/server/workers/services/src/apis/__init__.py index 3f7e6262c..e69de29bb 100644 --- a/server/workers/services/src/apis/__init__.py +++ b/server/workers/services/src/apis/__init__.py @@ -1,6 +0,0 @@ -from flask_restx import Api -from .triple import api as triple_api - -api = Api(title="Head Start API", version="0.1", - description="Head Start API demo") -api.add_namespace(triple_api, path='/triple') diff --git a/server/workers/services/src/apis/request_validators.py b/server/workers/services/src/apis/request_validators.py index d8aed6237..95453acfb 100644 --- a/server/workers/services/src/apis/request_validators.py +++ b/server/workers/services/src/apis/request_validators.py @@ -12,7 +12,7 @@ class SearchParamSchema(Schema): vis_type = fields.Str(require=True) year_range = fields.Str() today = fields.Str() - raw = fields.Boolean() + result_type = fields.Str() @pre_load def fix_years(self, in_data, **kwargs): @@ -26,3 +26,8 @@ def fix_years(self, in_data, **kwargs): def is_not_in_future(self, date): if date > datetime.today().date(): raise ValidationError("Starting date can't be in the future.") + + @validates('result_type') + def validate_result_type(self, result_type): + if result_type not in ["json", "csv", "raw"]: + raise ValidationError("Result type must be one of ['json', 'csv', 'raw'].") diff --git a/server/workers/services/src/apis/triple.py b/server/workers/services/src/apis/triple.py index f3177afd3..bb45b777e 100644 --- a/server/workers/services/src/apis/triple.py +++ b/server/workers/services/src/apis/triple.py @@ -5,13 +5,16 @@ import redis import asyncio import aioredis +import pandas as pd from flask import Blueprint, request, make_response, jsonify, abort from flask_restx import Namespace, Resource, fields from .request_validators import SearchParamSchema redis_store = redis.StrictRedis(host="localhost", port=6379, db=0) -api = Namespace("triple", description="TRIPLE API") +triple_ns = Namespace("triple", description="TRIPLE API operations") + + search_param_schema = SearchParamSchema() @@ -28,32 +31,28 @@ def get_key(store, key): return result -@api.model(fields={"q": fields.String, - "sorting": fields.String, - "from_": fields.String, - "to": fields.String, - "vis_type": fields.String, - "raw": fields.Boolean}) -class SearchQuery(fields.Raw): - def format(self, value): - return {"q": value.q, - "sorting": value.sorting, - "from": getattr(value, "from"), - "to": value.to, - "vis_type": value.vis_type, - "raw": value.raw} +search_query = triple_ns.model("Search Query", + {"q": fields.String(required=True), + "sorting": fields.String(required=True), + "from": fields.String(required=True), + "to": fields.String(required=True), + "vis_type": fields.String(required=True), + "result_type": fields.String(required=True)}) +# class SearchQuery(fields.Raw): +# def format(self, value): +# return {"q": value.q, +# "sorting": value.sorting, +# "from": getattr(value, "from"), +# "to": value.to, +# "vis_type": value.vis_type, +# "raw": value.raw} -@api.route('/search') +@triple_ns.route('/search') class Search(Resource): - @api.doc(responses={200: 'OK', - 400: 'Invalid search parameters'}, - params={"q": "string, query term", - "sorting": "string, most-relevant or most-recent", - "from": "yyyy-MM-dd", - "to": "yyyy-MM-dd", - "vis_type": "string, overview or streamgraph", - "raw": "boolean"}) + @triple_ns.doc(responses={200: 'OK', + 400: 'Invalid search parameters'}) + @triple_ns.expect(search_query) def post(self): """ """ @@ -67,28 +66,59 @@ def post(self): redis_store.rpush("triple", json.dumps(d)) result = get_key(redis_store, k) - headers = {"Content-Type": "application/json"} + headers = {} + if data.get("result_type") == "json": + headers["Content-Type"] = "application/json" + if data.get("result_type") == "csv": + result = pd.read_json(result).to_csv() + headers["Content-Type"] = "text/csv" + headers["Content-disposition"] = "attachment; filename={0}.csv".format(k) + if data.get("result_type") == "raw": + headers["Content-Type"] = "application/json" return make_response(result, 200, headers) -@api.route('/example_data') +@triple_ns.route('/example_data') class ExampleData(Resource): + @triple_ns.doc(description="Returns example map data for the following parameters:\n" + "q=feminicide, sorting=most-relevant, from=2019-01-01, " + "to=2019-12-31, result_type=csv", + responses={200: 'OK'}) def get(self): - headers = {"Content-Type": "application/json"} - data = {"test": "document string"} - return make_response(data, + k = str(uuid.uuid4()) + data = {"q": "feminicide", + "sorting": "most-relevant", + "from": "2019-01-01", + "to": "2019-12-31", + "vis_type": "overview", + "result_type": "csv"} + d = {"id": k, "params": data, + "endpoint": "search"} + redis_store.rpush("triple", json.dumps(d)) + result = get_key(redis_store, k) + + headers = {} + if data.get("result_type") == "json": + headers["Content-Type"] = "application/json" + if data.get("result_type") == "csv": + result = pd.read_json(result).to_csv() + headers["Content-Type"] = "text/csv" + headers["Content-disposition"] = "attachment; filename={0}.csv".format(k) + if data.get("result_type") == "raw": + headers["Content-Type"] = "application/json" + return make_response(result, 200, headers) -@api.route('/mappings') +@triple_ns.route('/mappings') class Mappings(Resource): - @api.doc(responses={200: 'OK', - 400: 'Invalid search parameters'}, - params={"index": "Specify the ElasticSearch index to get the mapping of."}) - # @api.marshal_with(mappings) + @triple_ns.doc(responses={200: 'OK', + 400: 'Invalid search parameters'}, + params={"index": "Specify the ElasticSearch index to get the mapping of, currently either 'isidore-sources-triple' or 'isidore-documents-triple'"}) + # @triple_ns.marshal_with(mappings) def get(self): """ """ diff --git a/server/workers/services/src/app.py b/server/workers/services/src/app.py index c9d178122..298f60c43 100644 --- a/server/workers/services/src/app.py +++ b/server/workers/services/src/app.py @@ -1,17 +1,17 @@ from flask import Flask -from apis import api +from flask_restx import Api +from apis.triple import triple_ns +app = Flask('v1', instance_relative_config=True) +app.config.from_object('config.settings') +app.config.from_pyfile('settings.py', silent=True) -def new_services_app(settings_override=None): - flask_app = Flask('v2', instance_relative_config=True) - - flask_app.config.from_object('config.settings') - flask_app.config.from_pyfile('settings.py', silent=True) - - api.init_app(flask_app) - return flask_app +api = Api(app=app, title="Head Start API", version="0.1", + description="Head Start API demo", + endpoint='/api', + doc='/api/docs') +api.add_namespace(triple_ns, path='/triple') if __name__ == '__main__': - app = new_services_app() app.run(port=5001, debug=True) diff --git a/server/workers/services/src/requirements.txt b/server/workers/services/src/requirements.txt deleted file mode 100644 index cf48ee5cc..000000000 --- a/server/workers/services/src/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -flask -flask-restx -gunicorn -redis -hiredis -aioredis diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py index 9a187dcfd..5e247d09e 100644 --- a/server/workers/triple/src/search_triple.py +++ b/server/workers/triple/src/search_triple.py @@ -73,7 +73,7 @@ def search(self, parameters): body=body, size=100, sort=sort) - if parameters.get('raw'): + if parameters.get('result_type') == 'raw': return res else: return self.process_result(res) @@ -163,7 +163,7 @@ def run(self): res["id"] = k res["input_data"] = self.search(params) res["params"] = params - if params.get('raw') is True: + if params.get('result_type') == 'raw': redis_store.set(k+"_output", json.dumps(res)) else: redis_store.rpush("input_data", json.dumps(res)) From b6dbbad4857b25ac7f7dd9b2730b4494d9a8a8df Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Tue, 17 Mar 2020 23:55:00 +0100 Subject: [PATCH 32/99] wip --- examples/triple/README.md | 57 +++++++++++++++ server/services.docker | 2 +- server/workers/services/requirements.txt | 1 + .../services/src/apis/request_validators.py | 7 +- server/workers/services/src/apis/triple.py | 73 ++++++------------- server/workers/services/src/app.py | 4 +- server/workers/triple/src/search_triple.py | 4 +- 7 files changed, 87 insertions(+), 61 deletions(-) create mode 100644 examples/triple/README.md diff --git a/examples/triple/README.md b/examples/triple/README.md new file mode 100644 index 000000000..e9c2c0524 --- /dev/null +++ b/examples/triple/README.md @@ -0,0 +1,57 @@ +## Setup + +### Install docker and docker-compose + +Please follow the install instructions for your OS: + +* Windows: https://docs.docker.com/docker-for-windows/install/ +* Mac: https://docs.docker.com/docker-for-mac/install/ +* Ubuntu: https://docs.docker.com/docker-for-mac/install/ (also available for other Linux) + +Please follow the install instructions for docker-compose for your OS: https://docs.docker.com/compose/install/ + +## Usage + +### Setting up the Apache2 reverse proxy + +Following Apache2 mods have to be installted and enabled: + +* ssl +* proxy +* proxy_balancer +* proxy_http + +The following lines have to be added to the appropriate sites-available config of Apache2 webserver: + +``` + + # + # other config + + ProxyPass /api http://localhost:5001/api + ProxyPassReverse /api http://localhost:5001/api + ProxyPass /swaggerui http://localhost:5001/swaggerui + ProxyPassReverse /swaggerui http://localhost:5001/swaggerui + +``` + +After that, restart the Apache2 service. + +### Starting the backend service with docker-compose + +Following commands have to be executed from the root folder of the repository, where `docker-compose.yml` is located. + +* build images +``` +docker-compose build +``` + +* start services and send them to the docker daemon +``` +docker-compose up -d +``` + +* shut service down +``` +docker-compose down +``` diff --git a/server/services.docker b/server/services.docker index 7ad8e026c..17ce475c7 100644 --- a/server/services.docker +++ b/server/services.docker @@ -10,4 +10,4 @@ COPY workers/services/requirements.txt . RUN pip install --no-cache-dir -r requirements.txt COPY workers/services/src/ ./ -CMD gunicorn -b 0.0.0.0:5001 'app:new_services_app()' +CMD gunicorn -b 0.0.0.0:5001 'app:app' diff --git a/server/workers/services/requirements.txt b/server/workers/services/requirements.txt index fb4357158..20421809b 100644 --- a/server/workers/services/requirements.txt +++ b/server/workers/services/requirements.txt @@ -1,5 +1,6 @@ flask flask-restx +flask-cors Werkzeug==0.16.1 marshmallow gunicorn diff --git a/server/workers/services/src/apis/request_validators.py b/server/workers/services/src/apis/request_validators.py index 95453acfb..d8aed6237 100644 --- a/server/workers/services/src/apis/request_validators.py +++ b/server/workers/services/src/apis/request_validators.py @@ -12,7 +12,7 @@ class SearchParamSchema(Schema): vis_type = fields.Str(require=True) year_range = fields.Str() today = fields.Str() - result_type = fields.Str() + raw = fields.Boolean() @pre_load def fix_years(self, in_data, **kwargs): @@ -26,8 +26,3 @@ def fix_years(self, in_data, **kwargs): def is_not_in_future(self, date): if date > datetime.today().date(): raise ValidationError("Starting date can't be in the future.") - - @validates('result_type') - def validate_result_type(self, result_type): - if result_type not in ["json", "csv", "raw"]: - raise ValidationError("Result type must be one of ['json', 'csv', 'raw'].") diff --git a/server/workers/services/src/apis/triple.py b/server/workers/services/src/apis/triple.py index bb45b777e..f8c0240d1 100644 --- a/server/workers/services/src/apis/triple.py +++ b/server/workers/services/src/apis/triple.py @@ -31,21 +31,24 @@ def get_key(store, key): return result -search_query = triple_ns.model("Search Query", - {"q": fields.String(required=True), - "sorting": fields.String(required=True), - "from": fields.String(required=True), - "to": fields.String(required=True), - "vis_type": fields.String(required=True), - "result_type": fields.String(required=True)}) -# class SearchQuery(fields.Raw): -# def format(self, value): -# return {"q": value.q, -# "sorting": value.sorting, -# "from": getattr(value, "from"), -# "to": value.to, -# "vis_type": value.vis_type, -# "raw": value.raw} +search_query = triple_ns.model("SearchQuery", + {"q": fields.String(example='feminicide', + description='query string', + required=True), + "sorting": fields.String(example='most-recent', + description='most-relevant or most-recent', + required=True), + "from": fields.String(example='2019-01-01', + description='yyyy-MM-dd', + required=True), + "to": fields.String(example='2019-12-31', + description='yyyy-MM-dd', + required=True), + "vis_type": fields.String(example='overview', + description='overview or timeline', + required=True), + "raw": fields.Boolean(example="false", + description='raw results from ElasticSearch')}) @triple_ns.route('/search') @@ -53,6 +56,7 @@ class Search(Resource): @triple_ns.doc(responses={200: 'OK', 400: 'Invalid search parameters'}) @triple_ns.expect(search_query) + @triple_ns.produces(["application/json", "text/csv"]) def post(self): """ """ @@ -67,45 +71,12 @@ def post(self): result = get_key(redis_store, k) headers = {} - if data.get("result_type") == "json": + if request.headers["Accept"] == "application/json": headers["Content-Type"] = "application/json" - if data.get("result_type") == "csv": + if request.headers["Accept"] == "text/csv": result = pd.read_json(result).to_csv() headers["Content-Type"] = "text/csv" - headers["Content-disposition"] = "attachment; filename={0}.csv".format(k) - if data.get("result_type") == "raw": - headers["Content-Type"] = "application/json" - return make_response(result, - 200, - headers) - - -@triple_ns.route('/example_data') -class ExampleData(Resource): - @triple_ns.doc(description="Returns example map data for the following parameters:\n" - "q=feminicide, sorting=most-relevant, from=2019-01-01, " - "to=2019-12-31, result_type=csv", - responses={200: 'OK'}) - def get(self): - k = str(uuid.uuid4()) - data = {"q": "feminicide", - "sorting": "most-relevant", - "from": "2019-01-01", - "to": "2019-12-31", - "vis_type": "overview", - "result_type": "csv"} - d = {"id": k, "params": data, - "endpoint": "search"} - redis_store.rpush("triple", json.dumps(d)) - result = get_key(redis_store, k) - - headers = {} - if data.get("result_type") == "json": - headers["Content-Type"] = "application/json" - if data.get("result_type") == "csv": - result = pd.read_json(result).to_csv() - headers["Content-Type"] = "text/csv" - headers["Content-disposition"] = "attachment; filename={0}.csv".format(k) + headers["Content-Disposition"] = "attachment; filename={0}.csv".format(k) if data.get("result_type") == "raw": headers["Content-Type"] = "application/json" return make_response(result, diff --git a/server/workers/services/src/app.py b/server/workers/services/src/app.py index 298f60c43..39df3a587 100644 --- a/server/workers/services/src/app.py +++ b/server/workers/services/src/app.py @@ -1,14 +1,16 @@ from flask import Flask from flask_restx import Api +from flask_cors import CORS from apis.triple import triple_ns app = Flask('v1', instance_relative_config=True) app.config.from_object('config.settings') app.config.from_pyfile('settings.py', silent=True) +CORS(app, expose_headers=["Content-Disposition"]) api = Api(app=app, title="Head Start API", version="0.1", description="Head Start API demo", - endpoint='/api', + prefix='/api', doc='/api/docs') api.add_namespace(triple_ns, path='/triple') diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py index 5e247d09e..bbd99569b 100644 --- a/server/workers/triple/src/search_triple.py +++ b/server/workers/triple/src/search_triple.py @@ -73,7 +73,7 @@ def search(self, parameters): body=body, size=100, sort=sort) - if parameters.get('result_type') == 'raw': + if parameters.get('raw') is True: return res else: return self.process_result(res) @@ -163,7 +163,7 @@ def run(self): res["id"] = k res["input_data"] = self.search(params) res["params"] = params - if params.get('result_type') == 'raw': + if params.get('raw') is True: redis_store.set(k+"_output", json.dumps(res)) else: redis_store.rpush("input_data", json.dumps(res)) From 985a4f8258093acc56e02c5910e1d1436068df94 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Fri, 20 Mar 2020 10:25:27 +0100 Subject: [PATCH 33/99] mv config --- server/search_triple.docker | 1 + server/workers/triple/{src => }/config_example.json | 0 2 files changed, 1 insertion(+) rename server/workers/triple/{src => }/config_example.json (100%) diff --git a/server/search_triple.docker b/server/search_triple.docker index a0414ebfa..5fd649045 100644 --- a/server/search_triple.docker +++ b/server/search_triple.docker @@ -9,5 +9,6 @@ WORKDIR /headstart COPY workers/triple/requirements.txt . RUN pip install --no-cache-dir -r requirements.txt COPY workers/triple/src/ ./ +COPY workers/triple/config.json . ENTRYPOINT python search_triple.py diff --git a/server/workers/triple/src/config_example.json b/server/workers/triple/config_example.json similarity index 100% rename from server/workers/triple/src/config_example.json rename to server/workers/triple/config_example.json From 2a28d86a5b69438f3ef3e28e2a5a58135c6183d5 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Fri, 20 Mar 2020 10:36:27 +0100 Subject: [PATCH 34/99] get raw csv --- examples/triple/README.md | 7 ++++++- server/workers/services/src/apis/triple.py | 11 +++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/examples/triple/README.md b/examples/triple/README.md index e9c2c0524..2c0480c72 100644 --- a/examples/triple/README.md +++ b/examples/triple/README.md @@ -14,7 +14,7 @@ Please follow the install instructions for docker-compose for your OS: https://d ### Setting up the Apache2 reverse proxy -Following Apache2 mods have to be installted and enabled: +Following Apache2 mods have to be installed and enabled: * ssl * proxy @@ -39,6 +39,11 @@ After that, restart the Apache2 service. ### Starting the backend service with docker-compose +Setting up configs: + +* ElasticSearch core service: In `server/workers/services/triple/` copy `config_example.json` to `config.json` and fill in the fields. + + Following commands have to be executed from the root folder of the repository, where `docker-compose.yml` is located. * build images diff --git a/server/workers/services/src/apis/triple.py b/server/workers/services/src/apis/triple.py index f8c0240d1..d42eddaca 100644 --- a/server/workers/services/src/apis/triple.py +++ b/server/workers/services/src/apis/triple.py @@ -74,10 +74,17 @@ def post(self): if request.headers["Accept"] == "application/json": headers["Content-Type"] = "application/json" if request.headers["Accept"] == "text/csv": - result = pd.read_json(result).to_csv() + if data.get("raw") is True: + df = pd.DataFrame(result.get('input_data').get('hits').get('hits')) + df = pd.concat([df.drop(["_source"], axis=1), + df["_source"].apply(pd.Series)], + axis=1) + result = df.to_csv() + else: + result = pd.read_json(result).to_csv() headers["Content-Type"] = "text/csv" headers["Content-Disposition"] = "attachment; filename={0}.csv".format(k) - if data.get("result_type") == "raw": + if data.get("raw") is True: headers["Content-Type"] = "application/json" return make_response(result, 200, From 82475845e4f999a7bd19ec30b2a2c88b1b65e7de Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Fri, 20 Mar 2020 11:36:12 +0100 Subject: [PATCH 35/99] example configs added --- example_config.ini | 74 +++++++++++++++++++++++++++++++++++++++ example_config.js | 4 +++ examples/triple/README.md | 7 +++- 3 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 example_config.ini create mode 100644 example_config.js diff --git a/example_config.ini b/example_config.ini new file mode 100644 index 000000000..27eab5304 --- /dev/null +++ b/example_config.ini @@ -0,0 +1,74 @@ +; This is a sample configuration file for Headstart Preprocessing Scripts. +; Copy this file to config_local.php and enter your API keys here. + +[general] +# Full path to the preprocessing directory +preprocessing_dir = "/var/www/html/servicename/headstart/server/preprocessing/" +# Full path to the images directory for the client. Needs to be in the public_html/www directory. Make sure that your webserver has write access to this directory. +images_path = "/var/www/html/servicename/headstart/server/paper_preview/" +# Host of the client visualization +host = "http://localhost/" +# Relative path to the client visualization. Needs to be in the public_html/www directory. +vis_path = "headstart" +# Relative path to the client REST services. Needs to be in the public_html/www directory. +services_path = "headstart/server/services/" + +[snapshot] +# Set to 1 to enable snapshot feature, 0 to disable +snapshot_enabled = 0 +# Absolute path to node binary +node_path = "/home/shared-executables/v10.17.0www-data/bin/node" +nodemodules_path = "/home/shared-executables/v10.17.0www-data/lib/node_modules" +# Absolute path to getChartSVG.js +getsvg_path = "/var/www/html/servicename/headstart/server/services/getChartSVG.js" +# Absolute path to the directory, where the snapshots are stored. Webserver must have write access to this directory +storage_path = "/var/www/html/servicename/headstart/server/storage/" +# PHP File responsible for rendering the bubble in a way to be snapshotted. Relative path to general host +snapshot_php = "servicename/headstart/server/services/snapshot/headstart_snapshot.php" +# Thumbnail width +snapshot_width = "1200px" +# snapshot_local_protocol fallback for non-server environments +snapshot_local_protocol = "http://" + +[output] +# Relative paths for offline calculation +output_dir = "other-scripts/" +cooc = "cooc.csv"; +metadata = "metadata.csv" +output_scaling_clustering = "output_scaling_clustering.csv" +output_naming = "output_naming.csv" +unique_id = "vis_id2" +title = "Visualization" + +[connection] +# Full path to the sqlite datatabase file. Make sure that your webserver has write access to this file. For development purposes, duplicate headstart.sqlite in server/storage/ and rename it to a filename of your choice. Enter the path to this file here. +sqlite_db = "/var/www/html/servicename/headstart/server/storage/servicename.sqlite" + +[calculation] +# Path to the RScript binary +binary = "/usr/bin/Rscript" +# Relative path from preprocessing_dir to the R script +script = "other-scripts/text_similarity.R" +mode = "bookmarks" + +[naming] +api_key_zemanta = "" +api_key_calais = "" + +# Constants for column numbers in the scaling and clustering output +line_cluster_id = 10 +line_title = 1 +line_abstract = 2 + +# English stop word file +stop_words = "resources/english.stop"; + +# Thresholds for n-grams +threshold_title_ngrams = 2; +threshold_title_abstract_ngrams = 3; +threshold_single_words = 4; + +forbidden_names[] = "research" +forbidden_names[] = "science" +forbidden_names[] = "inquiry" +forbidden_names[] = "learning" diff --git a/example_config.js b/example_config.js new file mode 100644 index 000000000..88685d043 --- /dev/null +++ b/example_config.js @@ -0,0 +1,4 @@ +module.exports = { + publicPath : "http://localhost/example/dist/", + skin : "" +}; diff --git a/examples/triple/README.md b/examples/triple/README.md index 2c0480c72..19a314ebe 100644 --- a/examples/triple/README.md +++ b/examples/triple/README.md @@ -43,7 +43,6 @@ Setting up configs: * ElasticSearch core service: In `server/workers/services/triple/` copy `config_example.json` to `config.json` and fill in the fields. - Following commands have to be executed from the root folder of the repository, where `docker-compose.yml` is located. * build images @@ -60,3 +59,9 @@ docker-compose up -d ``` docker-compose down ``` + +### Deploying the example: + +Use a deployment script, or manually perform following actions: + +* use a script. From 4c65bb99f4667439bdd877d4268a8163d2e5fc4f Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Fri, 20 Mar 2020 23:36:41 +0100 Subject: [PATCH 36/99] proxy fix --- server/workers/services/src/app.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/server/workers/services/src/app.py b/server/workers/services/src/app.py index 39df3a587..c38a39bfb 100644 --- a/server/workers/services/src/app.py +++ b/server/workers/services/src/app.py @@ -2,11 +2,14 @@ from flask_restx import Api from flask_cors import CORS from apis.triple import triple_ns +from werkzeug.contrib.fixers import ProxyFix + app = Flask('v1', instance_relative_config=True) app.config.from_object('config.settings') app.config.from_pyfile('settings.py', silent=True) CORS(app, expose_headers=["Content-Disposition"]) +app = ProxyFix(app) api = Api(app=app, title="Head Start API", version="0.1", description="Head Start API demo", From ffaa4320886fafcb0be73c27005f6860e0391f55 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Sat, 21 Mar 2020 00:02:20 +0100 Subject: [PATCH 37/99] redis conf --- docker-compose.yml | 2 ++ server/workers/services/src/config/redis.conf | 9 +++++++++ 2 files changed, 11 insertions(+) create mode 100644 server/workers/services/src/config/redis.conf diff --git a/docker-compose.yml b/docker-compose.yml index 989f22d41..75cbfe24c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,6 +12,8 @@ services: depends_on: - redis network_mode: "host" + volumes: + - workers/services/src/config/redis.conf:/redis.conf redis: image: 'redis:4.0-alpine' diff --git a/server/workers/services/src/config/redis.conf b/server/workers/services/src/config/redis.conf new file mode 100644 index 000000000..61e1d5b1c --- /dev/null +++ b/server/workers/services/src/config/redis.conf @@ -0,0 +1,9 @@ +# The working directory. +# +# The DB will be written inside this directory, with the filename specified +# above using the 'dbfilename' configuration directive. +# +# The Append Only File will also be created inside this directory. +# +# Note that you must specify a directory here, not a file name. +dir /var/lib/redis From 1eceb4dcad4abb3fdb9ece3cacda6e0c3735deeb Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Sat, 21 Mar 2020 00:11:15 +0100 Subject: [PATCH 38/99] revert proxy fix --- server/workers/services/src/app.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/server/workers/services/src/app.py b/server/workers/services/src/app.py index c38a39bfb..7c03c61cf 100644 --- a/server/workers/services/src/app.py +++ b/server/workers/services/src/app.py @@ -2,14 +2,12 @@ from flask_restx import Api from flask_cors import CORS from apis.triple import triple_ns -from werkzeug.contrib.fixers import ProxyFix app = Flask('v1', instance_relative_config=True) app.config.from_object('config.settings') app.config.from_pyfile('settings.py', silent=True) CORS(app, expose_headers=["Content-Disposition"]) -app = ProxyFix(app) api = Api(app=app, title="Head Start API", version="0.1", description="Head Start API demo", From 738fd56b5a7e2579671dc8cfa908106751299145 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Sat, 21 Mar 2020 00:21:13 +0100 Subject: [PATCH 39/99] fix redis volume --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 75cbfe24c..fcace2a5b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,7 +13,7 @@ services: - redis network_mode: "host" volumes: - - workers/services/src/config/redis.conf:/redis.conf + - redis:var/lib/redis redis: image: 'redis:4.0-alpine' From bcd7364a8e7599e3354d177fde34e70cbb9ec3dc Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Sat, 21 Mar 2020 00:59:22 +0100 Subject: [PATCH 40/99] deployment issues --- docker-compose.yml | 2 +- server/services.docker | 1 + server/workers/services/requirements.txt | 3 +-- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index fcace2a5b..2b6c2d65f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,7 +13,7 @@ services: - redis network_mode: "host" volumes: - - redis:var/lib/redis + - redis:/var/lib/redis redis: image: 'redis:4.0-alpine' diff --git a/server/services.docker b/server/services.docker index 17ce475c7..4458bf2d0 100644 --- a/server/services.docker +++ b/server/services.docker @@ -8,6 +8,7 @@ RUN apk add build-base gcc WORKDIR /headstart COPY workers/services/requirements.txt . RUN pip install --no-cache-dir -r requirements.txt +RUN https://github.com/python-restx/flask-restx COPY workers/services/src/ ./ CMD gunicorn -b 0.0.0.0:5001 'app:app' diff --git a/server/workers/services/requirements.txt b/server/workers/services/requirements.txt index 20421809b..a1a387bbb 100644 --- a/server/workers/services/requirements.txt +++ b/server/workers/services/requirements.txt @@ -1,7 +1,6 @@ flask -flask-restx flask-cors -Werkzeug==0.16.1 +Werkzeug marshmallow gunicorn redis From 0e505bfbdcef7ef36b51362e945e4191e377329a Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Sat, 21 Mar 2020 01:25:44 +0100 Subject: [PATCH 41/99] fix proxy and flask-restx hotfix version --- server/services.docker | 3 ++- server/workers/services/src/app.py | 5 +++-- server/workers/services/src/config/redis.conf | 9 --------- 3 files changed, 5 insertions(+), 12 deletions(-) delete mode 100644 server/workers/services/src/config/redis.conf diff --git a/server/services.docker b/server/services.docker index 4458bf2d0..9f22a22a9 100644 --- a/server/services.docker +++ b/server/services.docker @@ -8,7 +8,8 @@ RUN apk add build-base gcc WORKDIR /headstart COPY workers/services/requirements.txt . RUN pip install --no-cache-dir -r requirements.txt -RUN https://github.com/python-restx/flask-restx +RUN apk add git +RUN pip install git+https://github.com/python-restx/flask-restx COPY workers/services/src/ ./ CMD gunicorn -b 0.0.0.0:5001 'app:app' diff --git a/server/workers/services/src/app.py b/server/workers/services/src/app.py index 7c03c61cf..72caecbbf 100644 --- a/server/workers/services/src/app.py +++ b/server/workers/services/src/app.py @@ -2,6 +2,7 @@ from flask_restx import Api from flask_cors import CORS from apis.triple import triple_ns +from werkzeug.middleware.proxy_fix import ProxyFix app = Flask('v1', instance_relative_config=True) @@ -11,9 +12,9 @@ api = Api(app=app, title="Head Start API", version="0.1", description="Head Start API demo", - prefix='/api', - doc='/api/docs') + prefix='/api', doc='/api/docs') api.add_namespace(triple_ns, path='/triple') +api = ProxyFix(api, x_proto=1, x_host=1) if __name__ == '__main__': diff --git a/server/workers/services/src/config/redis.conf b/server/workers/services/src/config/redis.conf deleted file mode 100644 index 61e1d5b1c..000000000 --- a/server/workers/services/src/config/redis.conf +++ /dev/null @@ -1,9 +0,0 @@ -# The working directory. -# -# The DB will be written inside this directory, with the filename specified -# above using the 'dbfilename' configuration directive. -# -# The Append Only File will also be created inside this directory. -# -# Note that you must specify a directory here, not a file name. -dir /var/lib/redis From 814e1aa986427c52946d07b0f04594dd8462a37b Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Sat, 21 Mar 2020 13:01:02 +0100 Subject: [PATCH 42/99] doesnt belong here --- docker-compose.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 2b6c2d65f..989f22d41 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,8 +12,6 @@ services: depends_on: - redis network_mode: "host" - volumes: - - redis:/var/lib/redis redis: image: 'redis:4.0-alpine' From b949a39d1848ee2d49da47c32392164ef817fee3 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Sun, 22 Mar 2020 00:34:31 +0100 Subject: [PATCH 43/99] monkeypatching 2nd attempt --- server/workers/services/src/app.py | 70 +++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 2 deletions(-) diff --git a/server/workers/services/src/app.py b/server/workers/services/src/app.py index 72caecbbf..ae5bbba87 100644 --- a/server/workers/services/src/app.py +++ b/server/workers/services/src/app.py @@ -1,8 +1,74 @@ -from flask import Flask -from flask_restx import Api +from flask import Flask, url_for +from flask_restx import Api, apidoc from flask_cors import CORS from apis.triple import triple_ns from werkzeug.middleware.proxy_fix import ProxyFix +from werkzeug import cached_property +from config import settings +import logging +from flask_restx.swagger import Swagger + + +log = logging.getLogger(__name__) + + +def _register_apidoc(self, app): + conf = app.extensions.setdefault('restx', {}) + custom_apidoc = apidoc.Apidoc('restx_doc', 'flask_restx.apidoc', + template_folder='templates', + static_folder='static', + static_url_path="/api/docs") + + @custom_apidoc.add_app_template_global + def swagger_static(filename): + return url_for('restx_doc.static', filename=filename) + + if not conf.get('apidoc_registered', False): + app.register_blueprint(custom_apidoc) + conf['apidoc_registered'] = True + + +@cached_property +def __schema__(self): + ''' + The Swagger specifications/schema for this API + :returns dict: the schema as a serializable dict + ''' + if not self._schema: + try: + self._schema = Swagger(self).as_dict() + if self.behind_proxy and "host" in self._schema: + del self._schema["host"] + except Exception: + # Log the source exception for debugging purpose + # and return an error message + msg = 'Unable to render schema' + log.exception(msg) # This will provide a full traceback + return {'error': msg} + return self._schema + + +def api_patches(app): + Api._register_apidoc = _register_apidoc + Api.__schema__ = __schema__ + + @property + def fix_specs_url(self): + if settings.BEHIND_PROXY: + return url_for(self.endpoint('specs'), _external=False) + else: + return url_for(self.endpoint('specs'), _external=True) + Api.specs_url = fix_specs_url + + api_fixed = Api( + app, + title="Head Start API", + description="Head Start API demo", + version="0.1", + prefix='/api', + doc="/api/docs") + + return api_fixed app = Flask('v1', instance_relative_config=True) From 0a0dac88e4a4ac82acfb4f4a189db6fae500d128 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Sun, 22 Mar 2020 21:40:09 +0100 Subject: [PATCH 44/99] behind proxy patches --- server/workers/services/src/app.py | 74 +------------ server/workers/services/src/utils/__init__.py | 0 .../services/src/utils/monkeypatches.py | 103 ++++++++++++++++++ 3 files changed, 107 insertions(+), 70 deletions(-) create mode 100644 server/workers/services/src/utils/__init__.py create mode 100644 server/workers/services/src/utils/monkeypatches.py diff --git a/server/workers/services/src/app.py b/server/workers/services/src/app.py index ae5bbba87..ec6e99ad0 100644 --- a/server/workers/services/src/app.py +++ b/server/workers/services/src/app.py @@ -1,74 +1,10 @@ from flask import Flask, url_for -from flask_restx import Api, apidoc +from flask_restx import Api from flask_cors import CORS from apis.triple import triple_ns from werkzeug.middleware.proxy_fix import ProxyFix -from werkzeug import cached_property from config import settings -import logging -from flask_restx.swagger import Swagger - - -log = logging.getLogger(__name__) - - -def _register_apidoc(self, app): - conf = app.extensions.setdefault('restx', {}) - custom_apidoc = apidoc.Apidoc('restx_doc', 'flask_restx.apidoc', - template_folder='templates', - static_folder='static', - static_url_path="/api/docs") - - @custom_apidoc.add_app_template_global - def swagger_static(filename): - return url_for('restx_doc.static', filename=filename) - - if not conf.get('apidoc_registered', False): - app.register_blueprint(custom_apidoc) - conf['apidoc_registered'] = True - - -@cached_property -def __schema__(self): - ''' - The Swagger specifications/schema for this API - :returns dict: the schema as a serializable dict - ''' - if not self._schema: - try: - self._schema = Swagger(self).as_dict() - if self.behind_proxy and "host" in self._schema: - del self._schema["host"] - except Exception: - # Log the source exception for debugging purpose - # and return an error message - msg = 'Unable to render schema' - log.exception(msg) # This will provide a full traceback - return {'error': msg} - return self._schema - - -def api_patches(app): - Api._register_apidoc = _register_apidoc - Api.__schema__ = __schema__ - - @property - def fix_specs_url(self): - if settings.BEHIND_PROXY: - return url_for(self.endpoint('specs'), _external=False) - else: - return url_for(self.endpoint('specs'), _external=True) - Api.specs_url = fix_specs_url - - api_fixed = Api( - app, - title="Head Start API", - description="Head Start API demo", - version="0.1", - prefix='/api', - doc="/api/docs") - - return api_fixed +from utils.monkeypatches import api_patches app = Flask('v1', instance_relative_config=True) @@ -76,11 +12,9 @@ def fix_specs_url(self): app.config.from_pyfile('settings.py', silent=True) CORS(app, expose_headers=["Content-Disposition"]) -api = Api(app=app, title="Head Start API", version="0.1", - description="Head Start API demo", - prefix='/api', doc='/api/docs') +api = api_patches(app, settings) api.add_namespace(triple_ns, path='/triple') -api = ProxyFix(api, x_proto=1, x_host=1) +api.app.wsgi_app = ProxyFix(api.app.wsgi_app, x_host=1, x_for=1, x_proto=1) if __name__ == '__main__': diff --git a/server/workers/services/src/utils/__init__.py b/server/workers/services/src/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/server/workers/services/src/utils/monkeypatches.py b/server/workers/services/src/utils/monkeypatches.py new file mode 100644 index 000000000..96c526ab1 --- /dev/null +++ b/server/workers/services/src/utils/monkeypatches.py @@ -0,0 +1,103 @@ +from flask import url_for +from flask_restx import Api, apidoc +from flask_restx.swagger import Swagger +from werkzeug.utils import cached_property +import logging + +log = logging.getLogger(__name__) + + +class ReverseProxied(object): + '''Wrap the application in this middleware and configure the + front-end server to add these headers, to let you quietly bind + this to a URL other than / and to an HTTP scheme that is + different than what is used locally. + + :param app: the WSGI application + ''' + + def __init__(self, app): + self.app = app + + def __call__(self, settings, environ, start_response): + PREFIX = settings.PROXY_PREFIX + environ['SCRIPT_NAME'] = PREFIX + path_info = environ['PATH_INFO'] + if path_info.startswith(PREFIX): + environ['PATH_INFO'] = path_info[len(PREFIX):] + + scheme = environ.get('HTTP_X_SCHEME', '') + if scheme: + environ['wsgi.url_scheme'] = scheme + return self.app(environ, start_response) + + +def _register_apidoc(self, app): + conf = app.extensions.setdefault('restx', {}) + custom_apidoc = apidoc.Apidoc('restx_doc', 'flask_restx.apidoc', + template_folder='templates', + static_folder='static', + static_url_path="/swaggerui") + + @custom_apidoc.add_app_template_global + def swagger_static(filename): + return url_for('restx_doc.static', filename=filename) + + if not conf.get('apidoc_registered', False): + app.register_blueprint(custom_apidoc) + conf['apidoc_registered'] = True + + +# from https://github.com/noirbizarre/flask-restplus/pull/596/files +# make swagger work behind reverse proxy + +@cached_property +def __schema__(self): + ''' + The Swagger specifications/schema for this API + :returns dict: the schema as a serializable dict + ''' + if not self._schema: + try: + self._schema = Swagger(self).as_dict() + if self.behind_proxy and "host" in self._schema: + del self._schema["host"] + except Exception: + # Log the source exception for debugging purpose + # and return an error message + msg = 'Unable to render schema' + log.exception(msg) # This will provide a full traceback + return {'error': msg} + return self._schema + + +@property +def specs_url(self): + ''' + The Swagger specifications absolute url (ie. `swagger.json`) + Use a relative url when behind a proxy. + :rtype: str + ''' + if self.behind_proxy: + # Use relative URL. + external = False + else: + external = True + return url_for(self.endpoint('specs'), _external=external) + + +def api_patches(app, settings): + Api._register_apidoc = _register_apidoc + Api.__schema__ = __schema__ + Api.specs_url = specs_url + + api_fixed = Api( + app, + title="Head Start API", + description="Head Start API demo", + version="0.1", + prefix='/api', + doc="/api/docs") + if settings.BEHIND_PROXY: + api_fixed.behind_proxy = True + return api_fixed From a1efe6c985dbeab89b12dcbb4f37de56dd3403ab Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Sun, 22 Mar 2020 22:00:42 +0100 Subject: [PATCH 45/99] deployment --- server/services.docker | 2 +- server/workers/services/src/app.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/services.docker b/server/services.docker index 9f22a22a9..3005caa5d 100644 --- a/server/services.docker +++ b/server/services.docker @@ -12,4 +12,4 @@ RUN apk add git RUN pip install git+https://github.com/python-restx/flask-restx COPY workers/services/src/ ./ -CMD gunicorn -b 0.0.0.0:5001 'app:app' +CMD gunicorn -b 127.0.0.1:5001 'app:app' diff --git a/server/workers/services/src/app.py b/server/workers/services/src/app.py index ec6e99ad0..2fea274ea 100644 --- a/server/workers/services/src/app.py +++ b/server/workers/services/src/app.py @@ -18,4 +18,4 @@ if __name__ == '__main__': - app.run(port=5001, debug=True) + app.run(host="localhost", port=5001, debug=True) From 97a63a98fb244ba93a39335d7822460f9c615404 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 23 Mar 2020 01:43:38 +0100 Subject: [PATCH 46/99] attempt with flasgger --- server/workers/services/requirements.txt | 2 + server/workers/services/src/app.py | 25 +++++- .../services/src/utils/monkeypatches.py | 86 +++++++++++++------ 3 files changed, 83 insertions(+), 30 deletions(-) diff --git a/server/workers/services/requirements.txt b/server/workers/services/requirements.txt index a1a387bbb..06918500e 100644 --- a/server/workers/services/requirements.txt +++ b/server/workers/services/requirements.txt @@ -7,3 +7,5 @@ redis hiredis aioredis pandas +pyyaml +flasgger diff --git a/server/workers/services/src/app.py b/server/workers/services/src/app.py index 2fea274ea..6c5a01f3c 100644 --- a/server/workers/services/src/app.py +++ b/server/workers/services/src/app.py @@ -1,20 +1,39 @@ -from flask import Flask, url_for +from flask import Flask from flask_restx import Api from flask_cors import CORS from apis.triple import triple_ns from werkzeug.middleware.proxy_fix import ProxyFix from config import settings -from utils.monkeypatches import api_patches +from utils.monkeypatches import ReverseProxied, __schema__, specs_url, _register_apidoc, inject_flasgger + + +def api_patches(app, settings): + Api._register_apidoc = _register_apidoc + Api.__schema__ = __schema__ + Api.specs_url = specs_url + + api_fixed = Api( + app, + title="Head Start API", + description="Head Start API demo", + version="0.1", + prefix='/api', + doc="/api/docs") + if settings.BEHIND_PROXY: + api_fixed.behind_proxy = True + return api_fixed app = Flask('v1', instance_relative_config=True) +app = inject_flasgger(app) +app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1, x_port=1, x_for=1, x_host=1, x_prefix=1) +app.wsgi_app = ReverseProxied(app.wsgi_app) app.config.from_object('config.settings') app.config.from_pyfile('settings.py', silent=True) CORS(app, expose_headers=["Content-Disposition"]) api = api_patches(app, settings) api.add_namespace(triple_ns, path='/triple') -api.app.wsgi_app = ProxyFix(api.app.wsgi_app, x_host=1, x_for=1, x_proto=1) if __name__ == '__main__': diff --git a/server/workers/services/src/utils/monkeypatches.py b/server/workers/services/src/utils/monkeypatches.py index 96c526ab1..74cedcf65 100644 --- a/server/workers/services/src/utils/monkeypatches.py +++ b/server/workers/services/src/utils/monkeypatches.py @@ -1,8 +1,11 @@ +import os +import json +import yaml +import logging from flask import url_for -from flask_restx import Api, apidoc +from flask_restx import apidoc from flask_restx.swagger import Swagger from werkzeug.utils import cached_property -import logging log = logging.getLogger(__name__) @@ -13,18 +16,26 @@ class ReverseProxied(object): this to a URL other than / and to an HTTP scheme that is different than what is used locally. + location /myprefix { + proxy_pass http://192.168.0.1:5001; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Scheme $scheme; + proxy_set_header X-Script-Name /myprefix; + } + :param app: the WSGI application ''' - def __init__(self, app): self.app = app - def __call__(self, settings, environ, start_response): - PREFIX = settings.PROXY_PREFIX - environ['SCRIPT_NAME'] = PREFIX - path_info = environ['PATH_INFO'] - if path_info.startswith(PREFIX): - environ['PATH_INFO'] = path_info[len(PREFIX):] + def __call__(self, environ, start_response): + script_name = environ.get('HTTP_X_SCRIPT_NAME', '') + if script_name: + environ['SCRIPT_NAME'] = script_name + path_info = environ['PATH_INFO'] + if path_info.startswith(script_name): + environ['PATH_INFO'] = path_info[len(script_name):] scheme = environ.get('HTTP_X_SCHEME', '') if scheme: @@ -32,6 +43,7 @@ def __call__(self, settings, environ, start_response): return self.app(environ, start_response) +# from https://github.com/noirbizarre/flask-restplus/issues/517 def _register_apidoc(self, app): conf = app.extensions.setdefault('restx', {}) custom_apidoc = apidoc.Apidoc('restx_doc', 'flask_restx.apidoc', @@ -83,21 +95,41 @@ def specs_url(self): external = False else: external = True - return url_for(self.endpoint('specs'), _external=external) - - -def api_patches(app, settings): - Api._register_apidoc = _register_apidoc - Api.__schema__ = __schema__ - Api.specs_url = specs_url - - api_fixed = Api( - app, - title="Head Start API", - description="Head Start API demo", - version="0.1", - prefix='/api', - doc="/api/docs") - if settings.BEHIND_PROXY: - api_fixed.behind_proxy = True - return api_fixed + url = url_for(self.endpoint('specs'), _external=external) + # from https://github.com/noirbizarre/flask-restplus/pull/226/files + if self.app.config.get('SWAGGER_BASEPATH', ''): + prefix = url.split('/swagger.json')[0] + url = prefix + self.app.config.get('SWAGGER_BASEPATH', '') + '/swagger.json' + return url + return url + + +def inject_flasgger(app): + from flasgger import Swagger + with open("config/swagger.json") as infile: + specs = json.load(infile) + swagger = yaml.load(json.dumps(specs)) + swagger["host"] = os.getenv("HOST_IP", "localhost:5001") + if swagger["host"] == "localhost:5001": + swagger["schemes"] = ["http"] + Swagger(app, template=swagger, config=getSwaggerConfig()) + return app + + +def getSwaggerConfig(): + return { + "headers": [ + ], + "specs": [ + { + "endpoint": 'apispec', + "route": '/apispec.json', + "rule_filter": lambda rule: True, # all in + "model_filter": lambda tag: True, # all in + } + ], + "static_url_path": "/flasgger_static", + # "static_folder": "static", # must be set by user + "swagger_ui": True, + "specs_route": "/api/" + } From 0d357b41c0c16eb2a53b74eb40ccb3b193f020a4 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 23 Mar 2020 14:33:24 +0100 Subject: [PATCH 47/99] secure redis --- docker-compose.yml | 3 +- examples/triple/README.md | 1 + server/headstart_backend.docker | 2 + server/search_triple.docker | 1 + server/services.docker | 1 + server/workers/backend/src/headstart.py | 6 +- server/workers/redis_config_example.json | 6 + server/workers/redis_example.conf | 1371 ++++++++++++++++++++ server/workers/services/src/apis/triple.py | 7 +- server/workers/triple/src/search_triple.py | 4 +- 10 files changed, 1398 insertions(+), 4 deletions(-) create mode 100644 server/workers/redis_config_example.json create mode 100644 server/workers/redis_example.conf diff --git a/docker-compose.yml b/docker-compose.yml index 989f22d41..9bc638622 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -16,9 +16,10 @@ services: redis: image: 'redis:4.0-alpine' restart: always - command: redis-server + command: ["redis-server", "/etc/redis/redis.conf", "--appendonly", "yes"] volumes: - 'redis:/var/lib/redis/data' + - ./server/workers/redis.conf:/etc/redis/redis.conf restart: always ports: - '6379:6379' diff --git a/examples/triple/README.md b/examples/triple/README.md index 19a314ebe..0c5593397 100644 --- a/examples/triple/README.md +++ b/examples/triple/README.md @@ -42,6 +42,7 @@ After that, restart the Apache2 service. Setting up configs: * ElasticSearch core service: In `server/workers/services/triple/` copy `config_example.json` to `config.json` and fill in the fields. +* Secure Redis: In `server/workers` copy `redis_config_example.json` to `redis_config.json` and `redis_example.conf` to `redis_example.conf` and in both files replace "long_secure_password" with a long, secure password (Line 507 in redis.conf, parameter `requirepass`). Following commands have to be executed from the root folder of the repository, where `docker-compose.yml` is located. diff --git a/server/headstart_backend.docker b/server/headstart_backend.docker index 21dc48783..58a689107 100644 --- a/server/headstart_backend.docker +++ b/server/headstart_backend.docker @@ -157,4 +157,6 @@ COPY preprocessing/resources ./resources COPY preprocessing/other-scripts . RUN mkdir -p /var/log/headstart && touch /var/log/headstart/headstart.log +COPY workers/redis_config.json . + ENTRYPOINT python3 headstart.py diff --git a/server/search_triple.docker b/server/search_triple.docker index 5fd649045..07b9de5d4 100644 --- a/server/search_triple.docker +++ b/server/search_triple.docker @@ -10,5 +10,6 @@ COPY workers/triple/requirements.txt . RUN pip install --no-cache-dir -r requirements.txt COPY workers/triple/src/ ./ COPY workers/triple/config.json . +COPY workers/redis_config.json . ENTRYPOINT python search_triple.py diff --git a/server/services.docker b/server/services.docker index 3005caa5d..29ed24a13 100644 --- a/server/services.docker +++ b/server/services.docker @@ -11,5 +11,6 @@ RUN pip install --no-cache-dir -r requirements.txt RUN apk add git RUN pip install git+https://github.com/python-restx/flask-restx COPY workers/services/src/ ./ +COPY workers/redis_config.json . CMD gunicorn -b 127.0.0.1:5001 'app:app' diff --git a/server/workers/backend/src/headstart.py b/server/workers/backend/src/headstart.py index e7d51ca96..00f1fdab5 100644 --- a/server/workers/backend/src/headstart.py +++ b/server/workers/backend/src/headstart.py @@ -7,7 +7,11 @@ import redis import pandas as pd -redis_store = redis.StrictRedis(host="localhost", port=6379, db=0) + +with open("redis_config.json") as infile: + redis_config = json.load(infile) + +redis_store = redis.StrictRedis(**redis_config) class Backend(object): diff --git a/server/workers/redis_config_example.json b/server/workers/redis_config_example.json new file mode 100644 index 000000000..a4ede8678 --- /dev/null +++ b/server/workers/redis_config_example.json @@ -0,0 +1,6 @@ +{ + "host":"127.0.0.1", + "port": 6379, + "db":0, + "password":"long_secure_password" +} diff --git a/server/workers/redis_example.conf b/server/workers/redis_example.conf new file mode 100644 index 000000000..7957a3b46 --- /dev/null +++ b/server/workers/redis_example.conf @@ -0,0 +1,1371 @@ +# Redis configuration file example. +# +# Note that in order to read the configuration file, Redis must be +# started with the file path as first argument: +# +# ./redis-server /path/to/redis.conf + +# Note on units: when memory size is needed, it is possible to specify +# it in the usual form of 1k 5GB 4M and so forth: +# +# 1k => 1000 bytes +# 1kb => 1024 bytes +# 1m => 1000000 bytes +# 1mb => 1024*1024 bytes +# 1g => 1000000000 bytes +# 1gb => 1024*1024*1024 bytes +# +# units are case insensitive so 1GB 1Gb 1gB are all the same. + +################################## INCLUDES ################################### + +# Include one or more other config files here. This is useful if you +# have a standard template that goes to all Redis servers but also need +# to customize a few per-server settings. Include files can include +# other files, so use this wisely. +# +# Notice option "include" won't be rewritten by command "CONFIG REWRITE" +# from admin or Redis Sentinel. Since Redis always uses the last processed +# line as value of a configuration directive, you'd better put includes +# at the beginning of this file to avoid overwriting config change at runtime. +# +# If instead you are interested in using includes to override configuration +# options, it is better to use include as the last line. +# +# include /path/to/local.conf +# include /path/to/other.conf + +################################## MODULES ##################################### + +# Load modules at startup. If the server is not able to load modules +# it will abort. It is possible to use multiple loadmodule directives. +# +# loadmodule /path/to/my_module.so +# loadmodule /path/to/other_module.so + +################################## NETWORK ##################################### + +# By default, if no "bind" configuration directive is specified, Redis listens +# for connections from all the network interfaces available on the server. +# It is possible to listen to just one or multiple selected interfaces using +# the "bind" configuration directive, followed by one or more IP addresses. +# +# Examples: +# +# bind 192.168.1.100 10.0.0.1 +# bind 127.0.0.1 ::1 +# +# ~~~ WARNING ~~~ If the computer running Redis is directly exposed to the +# internet, binding to all the interfaces is dangerous and will expose the +# instance to everybody on the internet. So by default we uncomment the +# following bind directive, that will force Redis to listen only into +# the IPv4 loopback interface address (this means Redis will be able to +# accept connections only from clients running into the same computer it +# is running). +# +# IF YOU ARE SURE YOU WANT YOUR INSTANCE TO LISTEN TO ALL THE INTERFACES +# JUST COMMENT THE FOLLOWING LINE. +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +bind 127.0.0.1 + +# Protected mode is a layer of security protection, in order to avoid that +# Redis instances left open on the internet are accessed and exploited. +# +# When protected mode is on and if: +# +# 1) The server is not binding explicitly to a set of addresses using the +# "bind" directive. +# 2) No password is configured. +# +# The server only accepts connections from clients connecting from the +# IPv4 and IPv6 loopback addresses 127.0.0.1 and ::1, and from Unix domain +# sockets. +# +# By default protected mode is enabled. You should disable it only if +# you are sure you want clients from other hosts to connect to Redis +# even if no authentication is configured, nor a specific set of interfaces +# are explicitly listed using the "bind" directive. +protected-mode yes + +# Accept connections on the specified port, default is 6379 (IANA #815344). +# If port 0 is specified Redis will not listen on a TCP socket. +port 6379 + +# TCP listen() backlog. +# +# In high requests-per-second environments you need an high backlog in order +# to avoid slow clients connections issues. Note that the Linux kernel +# will silently truncate it to the value of /proc/sys/net/core/somaxconn so +# make sure to raise both the value of somaxconn and tcp_max_syn_backlog +# in order to get the desired effect. +tcp-backlog 511 + +# Unix socket. +# +# Specify the path for the Unix socket that will be used to listen for +# incoming connections. There is no default, so Redis will not listen +# on a unix socket when not specified. +# +# unixsocket /tmp/redis.sock +# unixsocketperm 700 + +# Close the connection after a client is idle for N seconds (0 to disable) +timeout 0 + +# TCP keepalive. +# +# If non-zero, use SO_KEEPALIVE to send TCP ACKs to clients in absence +# of communication. This is useful for two reasons: +# +# 1) Detect dead peers. +# 2) Take the connection alive from the point of view of network +# equipment in the middle. +# +# On Linux, the specified value (in seconds) is the period used to send ACKs. +# Note that to close the connection the double of the time is needed. +# On other kernels the period depends on the kernel configuration. +# +# A reasonable value for this option is 300 seconds, which is the new +# Redis default starting with Redis 3.2.1. +tcp-keepalive 300 + +################################# GENERAL ##################################### + +# By default Redis does not run as a daemon. Use 'yes' if you need it. +# Note that Redis will write a pid file in /var/run/redis.pid when daemonized. +daemonize no + +# If you run Redis from upstart or systemd, Redis can interact with your +# supervision tree. Options: +# supervised no - no supervision interaction +# supervised upstart - signal upstart by putting Redis into SIGSTOP mode +# supervised systemd - signal systemd by writing READY=1 to $NOTIFY_SOCKET +# supervised auto - detect upstart or systemd method based on +# UPSTART_JOB or NOTIFY_SOCKET environment variables +# Note: these supervision methods only signal "process is ready." +# They do not enable continuous liveness pings back to your supervisor. +supervised no + +# If a pid file is specified, Redis writes it where specified at startup +# and removes it at exit. +# +# When the server runs non daemonized, no pid file is created if none is +# specified in the configuration. When the server is daemonized, the pid file +# is used even if not specified, defaulting to "/var/run/redis.pid". +# +# Creating a pid file is best effort: if Redis is not able to create it +# nothing bad happens, the server will start and run normally. +pidfile /var/run/redis_6379.pid + +# Specify the server verbosity level. +# This can be one of: +# debug (a lot of information, useful for development/testing) +# verbose (many rarely useful info, but not a mess like the debug level) +# notice (moderately verbose, what you want in production probably) +# warning (only very important / critical messages are logged) +loglevel notice + +# Specify the log file name. Also the empty string can be used to force +# Redis to log on the standard output. Note that if you use standard +# output for logging but daemonize, logs will be sent to /dev/null +logfile "" + +# To enable logging to the system logger, just set 'syslog-enabled' to yes, +# and optionally update the other syslog parameters to suit your needs. +# syslog-enabled no + +# Specify the syslog identity. +# syslog-ident redis + +# Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7. +# syslog-facility local0 + +# Set the number of databases. The default database is DB 0, you can select +# a different one on a per-connection basis using SELECT where +# dbid is a number between 0 and 'databases'-1 +databases 16 + +# By default Redis shows an ASCII art logo only when started to log to the +# standard output and if the standard output is a TTY. Basically this means +# that normally a logo is displayed only in interactive sessions. +# +# However it is possible to force the pre-4.0 behavior and always show a +# ASCII art logo in startup logs by setting the following option to yes. +always-show-logo yes + +################################ SNAPSHOTTING ################################ +# +# Save the DB on disk: +# +# save +# +# Will save the DB if both the given number of seconds and the given +# number of write operations against the DB occurred. +# +# In the example below the behaviour will be to save: +# after 900 sec (15 min) if at least 1 key changed +# after 300 sec (5 min) if at least 10 keys changed +# after 60 sec if at least 10000 keys changed +# +# Note: you can disable saving completely by commenting out all "save" lines. +# +# It is also possible to remove all the previously configured save +# points by adding a save directive with a single empty string argument +# like in the following example: +# +# save "" + +save 900 1 +save 300 10 +save 60 10000 + +# By default Redis will stop accepting writes if RDB snapshots are enabled +# (at least one save point) and the latest background save failed. +# This will make the user aware (in a hard way) that data is not persisting +# on disk properly, otherwise chances are that no one will notice and some +# disaster will happen. +# +# If the background saving process will start working again Redis will +# automatically allow writes again. +# +# However if you have setup your proper monitoring of the Redis server +# and persistence, you may want to disable this feature so that Redis will +# continue to work as usual even if there are problems with disk, +# permissions, and so forth. +stop-writes-on-bgsave-error yes + +# Compress string objects using LZF when dump .rdb databases? +# For default that's set to 'yes' as it's almost always a win. +# If you want to save some CPU in the saving child set it to 'no' but +# the dataset will likely be bigger if you have compressible values or keys. +rdbcompression yes + +# Since version 5 of RDB a CRC64 checksum is placed at the end of the file. +# This makes the format more resistant to corruption but there is a performance +# hit to pay (around 10%) when saving and loading RDB files, so you can disable it +# for maximum performances. +# +# RDB files created with checksum disabled have a checksum of zero that will +# tell the loading code to skip the check. +rdbchecksum yes + +# The filename where to dump the DB +dbfilename dump.rdb + +# The working directory. +# +# The DB will be written inside this directory, with the filename specified +# above using the 'dbfilename' configuration directive. +# +# The Append Only File will also be created inside this directory. +# +# Note that you must specify a directory here, not a file name. +dir ./ + +################################# REPLICATION ################################# + +# Master-Replica replication. Use replicaof to make a Redis instance a copy of +# another Redis server. A few things to understand ASAP about Redis replication. +# +# +------------------+ +---------------+ +# | Master | ---> | Replica | +# | (receive writes) | | (exact copy) | +# +------------------+ +---------------+ +# +# 1) Redis replication is asynchronous, but you can configure a master to +# stop accepting writes if it appears to be not connected with at least +# a given number of replicas. +# 2) Redis replicas are able to perform a partial resynchronization with the +# master if the replication link is lost for a relatively small amount of +# time. You may want to configure the replication backlog size (see the next +# sections of this file) with a sensible value depending on your needs. +# 3) Replication is automatic and does not need user intervention. After a +# network partition replicas automatically try to reconnect to masters +# and resynchronize with them. +# +# replicaof + +# If the master is password protected (using the "requirepass" configuration +# directive below) it is possible to tell the replica to authenticate before +# starting the replication synchronization process, otherwise the master will +# refuse the replica request. +# +# masterauth + +# When a replica loses its connection with the master, or when the replication +# is still in progress, the replica can act in two different ways: +# +# 1) if replica-serve-stale-data is set to 'yes' (the default) the replica will +# still reply to client requests, possibly with out of date data, or the +# data set may just be empty if this is the first synchronization. +# +# 2) if replica-serve-stale-data is set to 'no' the replica will reply with +# an error "SYNC with master in progress" to all the kind of commands +# but to INFO, replicaOF, AUTH, PING, SHUTDOWN, REPLCONF, ROLE, CONFIG, +# SUBSCRIBE, UNSUBSCRIBE, PSUBSCRIBE, PUNSUBSCRIBE, PUBLISH, PUBSUB, +# COMMAND, POST, HOST: and LATENCY. +# +#replica-serve-stale-data yes + +# You can configure a replica instance to accept writes or not. Writing against +# a replica instance may be useful to store some ephemeral data (because data +# written on a replica will be easily deleted after resync with the master) but +# may also cause problems if clients are writing to it because of a +# misconfiguration. +# +# Since Redis 2.6 by default replicas are read-only. +# +# Note: read only replicas are not designed to be exposed to untrusted clients +# on the internet. It's just a protection layer against misuse of the instance. +# Still a read only replica exports by default all the administrative commands +# such as CONFIG, DEBUG, and so forth. To a limited extent you can improve +# security of read only replicas using 'rename-command' to shadow all the +# administrative / dangerous commands. +#replica-read-only yes + +# Replication SYNC strategy: disk or socket. +# +# ------------------------------------------------------- +# WARNING: DISKLESS REPLICATION IS EXPERIMENTAL CURRENTLY +# ------------------------------------------------------- +# +# New replicas and reconnecting replicas that are not able to continue the replication +# process just receiving differences, need to do what is called a "full +# synchronization". An RDB file is transmitted from the master to the replicas. +# The transmission can happen in two different ways: +# +# 1) Disk-backed: The Redis master creates a new process that writes the RDB +# file on disk. Later the file is transferred by the parent +# process to the replicas incrementally. +# 2) Diskless: The Redis master creates a new process that directly writes the +# RDB file to replica sockets, without touching the disk at all. +# +# With disk-backed replication, while the RDB file is generated, more replicas +# can be queued and served with the RDB file as soon as the current child producing +# the RDB file finishes its work. With diskless replication instead once +# the transfer starts, new replicas arriving will be queued and a new transfer +# will start when the current one terminates. +# +# When diskless replication is used, the master waits a configurable amount of +# time (in seconds) before starting the transfer in the hope that multiple replicas +# will arrive and the transfer can be parallelized. +# +# With slow disks and fast (large bandwidth) networks, diskless replication +# works better. +repl-diskless-sync no + +# When diskless replication is enabled, it is possible to configure the delay +# the server waits in order to spawn the child that transfers the RDB via socket +# to the replicas. +# +# This is important since once the transfer starts, it is not possible to serve +# new replicas arriving, that will be queued for the next RDB transfer, so the server +# waits a delay in order to let more replicas arrive. +# +# The delay is specified in seconds, and by default is 5 seconds. To disable +# it entirely just set it to 0 seconds and the transfer will start ASAP. +repl-diskless-sync-delay 5 + +# Replicas send PINGs to server in a predefined interval. It's possible to change +# this interval with the repl_ping_replica_period option. The default value is 10 +# seconds. +# +# repl-ping-replica-period 10 + +# The following option sets the replication timeout for: +# +# 1) Bulk transfer I/O during SYNC, from the point of view of replica. +# 2) Master timeout from the point of view of replicas (data, pings). +# 3) Replica timeout from the point of view of masters (REPLCONF ACK pings). +# +# It is important to make sure that this value is greater than the value +# specified for repl-ping-replica-period otherwise a timeout will be detected +# every time there is low traffic between the master and the replica. +# +# repl-timeout 60 + +# Disable TCP_NODELAY on the replica socket after SYNC? +# +# If you select "yes" Redis will use a smaller number of TCP packets and +# less bandwidth to send data to replicas. But this can add a delay for +# the data to appear on the replica side, up to 40 milliseconds with +# Linux kernels using a default configuration. +# +# If you select "no" the delay for data to appear on the replica side will +# be reduced but more bandwidth will be used for replication. +# +# By default we optimize for low latency, but in very high traffic conditions +# or when the master and replicas are many hops away, turning this to "yes" may +# be a good idea. +repl-disable-tcp-nodelay no + +# Set the replication backlog size. The backlog is a buffer that accumulates +# replica data when replicas are disconnected for some time, so that when a replica +# wants to reconnect again, often a full resync is not needed, but a partial +# resync is enough, just passing the portion of data the replica missed while +# disconnected. +# +# The bigger the replication backlog, the longer the time the replica can be +# disconnected and later be able to perform a partial resynchronization. +# +# The backlog is only allocated once there is at least a replica connected. +# +# repl-backlog-size 1mb + +# After a master has no longer connected replicas for some time, the backlog +# will be freed. The following option configures the amount of seconds that +# need to elapse, starting from the time the last replica disconnected, for +# the backlog buffer to be freed. +# +# Note that replicas never free the backlog for timeout, since they may be +# promoted to masters later, and should be able to correctly "partially +# resynchronize" with the replicas: hence they should always accumulate backlog. +# +# A value of 0 means to never release the backlog. +# +# repl-backlog-ttl 3600 + +# The replica priority is an integer number published by Redis in the INFO output. +# It is used by Redis Sentinel in order to select a replica to promote into a +# master if the master is no longer working correctly. +# +# A replica with a low priority number is considered better for promotion, so +# for instance if there are three replicas with priority 10, 100, 25 Sentinel will +# pick the one with priority 10, that is the lowest. +# +# However a special priority of 0 marks the replica as not able to perform the +# role of master, so a replica with priority of 0 will never be selected by +# Redis Sentinel for promotion. +# +# By default the priority is 100. +#replica-priority 100 + +# It is possible for a master to stop accepting writes if there are less than +# N replicas connected, having a lag less or equal than M seconds. +# +# The N replicas need to be in "online" state. +# +# The lag in seconds, that must be <= the specified value, is calculated from +# the last ping received from the replica, that is usually sent every second. +# +# This option does not GUARANTEE that N replicas will accept the write, but +# will limit the window of exposure for lost writes in case not enough replicas +# are available, to the specified number of seconds. +# +# For example to require at least 3 replicas with a lag <= 10 seconds use: +# +# min-replicas-to-write 3 +# min-replicas-max-lag 10 +# +# Setting one or the other to 0 disables the feature. +# +# By default min-replicas-to-write is set to 0 (feature disabled) and +# min-replicas-max-lag is set to 10. + +# A Redis master is able to list the address and port of the attached +# replicas in different ways. For example the "INFO replication" section +# offers this information, which is used, among other tools, by +# Redis Sentinel in order to discover replica instances. +# Another place where this info is available is in the output of the +# "ROLE" command of a master. +# +# The listed IP and address normally reported by a replica is obtained +# in the following way: +# +# IP: The address is auto detected by checking the peer address +# of the socket used by the replica to connect with the master. +# +# Port: The port is communicated by the replica during the replication +# handshake, and is normally the port that the replica is using to +# listen for connections. +# +# However when port forwarding or Network Address Translation (NAT) is +# used, the replica may be actually reachable via different IP and port +# pairs. The following two options can be used by a replica in order to +# report to its master a specific set of IP and port, so that both INFO +# and ROLE will report those values. +# +# There is no need to use both the options if you need to override just +# the port or the IP address. +# +# replica-announce-ip 5.5.5.5 +# replica-announce-port 1234 + +################################## SECURITY ################################### + +# Require clients to issue AUTH before processing any other +# commands. This might be useful in environments in which you do not trust +# others with access to the host running redis-server. +# +# This should stay commented out for backward compatibility and because most +# people do not need auth (e.g. they run their own servers). +# +# Warning: since Redis is pretty fast an outside user can try up to +# 150k passwords per second against a good box. This means that you should +# use a very strong password otherwise it will be very easy to break. +# +requirepass long_secure_password + +# Command renaming. +# +# It is possible to change the name of dangerous commands in a shared +# environment. For instance the CONFIG command may be renamed into something +# hard to guess so that it will still be available for internal-use tools +# but not available for general clients. +# +# Example: +# +# rename-command CONFIG b840fc02d524045429941cc15f59e41cb7be6c52 +# +# It is also possible to completely kill a command by renaming it into +# an empty string: +# +# rename-command CONFIG "" +# +# Please note that changing the name of commands that are logged into the +# AOF file or transmitted to replicas may cause problems. + +################################### CLIENTS #################################### + +# Set the max number of connected clients at the same time. By default +# this limit is set to 10000 clients, however if the Redis server is not +# able to configure the process file limit to allow for the specified limit +# the max number of allowed clients is set to the current file limit +# minus 32 (as Redis reserves a few file descriptors for internal uses). +# +# Once the limit is reached Redis will close all the new connections sending +# an error 'max number of clients reached'. +# +# maxclients 10000 + +############################## MEMORY MANAGEMENT ################################ + +# Set a memory usage limit to the specified amount of bytes. +# When the memory limit is reached Redis will try to remove keys +# according to the eviction policy selected (see maxmemory-policy). +# +# If Redis can't remove keys according to the policy, or if the policy is +# set to 'noeviction', Redis will start to reply with errors to commands +# that would use more memory, like SET, LPUSH, and so on, and will continue +# to reply to read-only commands like GET. +# +# This option is usually useful when using Redis as an LRU or LFU cache, or to +# set a hard memory limit for an instance (using the 'noeviction' policy). +# +# WARNING: If you have replicas attached to an instance with maxmemory on, +# the size of the output buffers needed to feed the replicas are subtracted +# from the used memory count, so that network problems / resyncs will +# not trigger a loop where keys are evicted, and in turn the output +# buffer of replicas is full with DELs of keys evicted triggering the deletion +# of more keys, and so forth until the database is completely emptied. +# +# In short... if you have replicas attached it is suggested that you set a lower +# limit for maxmemory so that there is some free RAM on the system for replica +# output buffers (but this is not needed if the policy is 'noeviction'). +# +# maxmemory + +# MAXMEMORY POLICY: how Redis will select what to remove when maxmemory +# is reached. You can select among five behaviors: +# +# volatile-lru -> Evict using approximated LRU among the keys with an expire set. +# allkeys-lru -> Evict any key using approximated LRU. +# volatile-lfu -> Evict using approximated LFU among the keys with an expire set. +# allkeys-lfu -> Evict any key using approximated LFU. +# volatile-random -> Remove a random key among the ones with an expire set. +# allkeys-random -> Remove a random key, any key. +# volatile-ttl -> Remove the key with the nearest expire time (minor TTL) +# noeviction -> Don't evict anything, just return an error on write operations. +# +# LRU means Least Recently Used +# LFU means Least Frequently Used +# +# Both LRU, LFU and volatile-ttl are implemented using approximated +# randomized algorithms. +# +# Note: with any of the above policies, Redis will return an error on write +# operations, when there are no suitable keys for eviction. +# +# At the date of writing these commands are: set setnx setex append +# incr decr rpush lpush rpushx lpushx linsert lset rpoplpush sadd +# sinter sinterstore sunion sunionstore sdiff sdiffstore zadd zincrby +# zunionstore zinterstore hset hsetnx hmset hincrby incrby decrby +# getset mset msetnx exec sort +# +# The default is: +# +# maxmemory-policy noeviction + +# LRU, LFU and minimal TTL algorithms are not precise algorithms but approximated +# algorithms (in order to save memory), so you can tune it for speed or +# accuracy. For default Redis will check five keys and pick the one that was +# used less recently, you can change the sample size using the following +# configuration directive. +# +# The default of 5 produces good enough results. 10 Approximates very closely +# true LRU but costs more CPU. 3 is faster but not very accurate. +# +# maxmemory-samples 5 + +# Starting from Redis 5, by default a replica will ignore its maxmemory setting +# (unless it is promoted to master after a failover or manually). It means +# that the eviction of keys will be just handled by the master, sending the +# DEL commands to the replica as keys evict in the master side. +# +# This behavior ensures that masters and replicas stay consistent, and is usually +# what you want, however if your replica is writable, or you want the replica to have +# a different memory setting, and you are sure all the writes performed to the +# replica are idempotent, then you may change this default (but be sure to understand +# what you are doing). +# +# Note that since the replica by default does not evict, it may end using more +# memory than the one set via maxmemory (there are certain buffers that may +# be larger on the replica, or data structures may sometimes take more memory and so +# forth). So make sure you monitor your replicas and make sure they have enough +# memory to never hit a real out-of-memory condition before the master hits +# the configured maxmemory setting. +# +# replica-ignore-maxmemory yes + +############################# LAZY FREEING #################################### + +# Redis has two primitives to delete keys. One is called DEL and is a blocking +# deletion of the object. It means that the server stops processing new commands +# in order to reclaim all the memory associated with an object in a synchronous +# way. If the key deleted is associated with a small object, the time needed +# in order to execute the DEL command is very small and comparable to most other +# O(1) or O(log_N) commands in Redis. However if the key is associated with an +# aggregated value containing millions of elements, the server can block for +# a long time (even seconds) in order to complete the operation. +# +# For the above reasons Redis also offers non blocking deletion primitives +# such as UNLINK (non blocking DEL) and the ASYNC option of FLUSHALL and +# FLUSHDB commands, in order to reclaim memory in background. Those commands +# are executed in constant time. Another thread will incrementally free the +# object in the background as fast as possible. +# +# DEL, UNLINK and ASYNC option of FLUSHALL and FLUSHDB are user-controlled. +# It's up to the design of the application to understand when it is a good +# idea to use one or the other. However the Redis server sometimes has to +# delete keys or flush the whole database as a side effect of other operations. +# Specifically Redis deletes objects independently of a user call in the +# following scenarios: +# +# 1) On eviction, because of the maxmemory and maxmemory policy configurations, +# in order to make room for new data, without going over the specified +# memory limit. +# 2) Because of expire: when a key with an associated time to live (see the +# EXPIRE command) must be deleted from memory. +# 3) Because of a side effect of a command that stores data on a key that may +# already exist. For example the RENAME command may delete the old key +# content when it is replaced with another one. Similarly SUNIONSTORE +# or SORT with STORE option may delete existing keys. The SET command +# itself removes any old content of the specified key in order to replace +# it with the specified string. +# 4) During replication, when a replica performs a full resynchronization with +# its master, the content of the whole database is removed in order to +# load the RDB file just transferred. +# +# In all the above cases the default is to delete objects in a blocking way, +# like if DEL was called. However you can configure each case specifically +# in order to instead release memory in a non-blocking way like if UNLINK +# was called, using the following configuration directives: + +lazyfree-lazy-eviction no +lazyfree-lazy-expire no +lazyfree-lazy-server-del no +#replica-lazy-flush no + +############################## APPEND ONLY MODE ############################### + +# By default Redis asynchronously dumps the dataset on disk. This mode is +# good enough in many applications, but an issue with the Redis process or +# a power outage may result into a few minutes of writes lost (depending on +# the configured save points). +# +# The Append Only File is an alternative persistence mode that provides +# much better durability. For instance using the default data fsync policy +# (see later in the config file) Redis can lose just one second of writes in a +# dramatic event like a server power outage, or a single write if something +# wrong with the Redis process itself happens, but the operating system is +# still running correctly. +# +# AOF and RDB persistence can be enabled at the same time without problems. +# If the AOF is enabled on startup Redis will load the AOF, that is the file +# with the better durability guarantees. +# +# Please check http://redis.io/topics/persistence for more information. + +appendonly no + +# The name of the append only file (default: "appendonly.aof") + +appendfilename "appendonly.aof" + +# The fsync() call tells the Operating System to actually write data on disk +# instead of waiting for more data in the output buffer. Some OS will really flush +# data on disk, some other OS will just try to do it ASAP. +# +# Redis supports three different modes: +# +# no: don't fsync, just let the OS flush the data when it wants. Faster. +# always: fsync after every write to the append only log. Slow, Safest. +# everysec: fsync only one time every second. Compromise. +# +# The default is "everysec", as that's usually the right compromise between +# speed and data safety. It's up to you to understand if you can relax this to +# "no" that will let the operating system flush the output buffer when +# it wants, for better performances (but if you can live with the idea of +# some data loss consider the default persistence mode that's snapshotting), +# or on the contrary, use "always" that's very slow but a bit safer than +# everysec. +# +# More details please check the following article: +# http://antirez.com/post/redis-persistence-demystified.html +# +# If unsure, use "everysec". + +# appendfsync always +appendfsync everysec +# appendfsync no + +# When the AOF fsync policy is set to always or everysec, and a background +# saving process (a background save or AOF log background rewriting) is +# performing a lot of I/O against the disk, in some Linux configurations +# Redis may block too long on the fsync() call. Note that there is no fix for +# this currently, as even performing fsync in a different thread will block +# our synchronous write(2) call. +# +# In order to mitigate this problem it's possible to use the following option +# that will prevent fsync() from being called in the main process while a +# BGSAVE or BGREWRITEAOF is in progress. +# +# This means that while another child is saving, the durability of Redis is +# the same as "appendfsync none". In practical terms, this means that it is +# possible to lose up to 30 seconds of log in the worst scenario (with the +# default Linux settings). +# +# If you have latency problems turn this to "yes". Otherwise leave it as +# "no" that is the safest pick from the point of view of durability. + +no-appendfsync-on-rewrite no + +# Automatic rewrite of the append only file. +# Redis is able to automatically rewrite the log file implicitly calling +# BGREWRITEAOF when the AOF log size grows by the specified percentage. +# +# This is how it works: Redis remembers the size of the AOF file after the +# latest rewrite (if no rewrite has happened since the restart, the size of +# the AOF at startup is used). +# +# This base size is compared to the current size. If the current size is +# bigger than the specified percentage, the rewrite is triggered. Also +# you need to specify a minimal size for the AOF file to be rewritten, this +# is useful to avoid rewriting the AOF file even if the percentage increase +# is reached but it is still pretty small. +# +# Specify a percentage of zero in order to disable the automatic AOF +# rewrite feature. + +auto-aof-rewrite-percentage 100 +auto-aof-rewrite-min-size 64mb + +# An AOF file may be found to be truncated at the end during the Redis +# startup process, when the AOF data gets loaded back into memory. +# This may happen when the system where Redis is running +# crashes, especially when an ext4 filesystem is mounted without the +# data=ordered option (however this can't happen when Redis itself +# crashes or aborts but the operating system still works correctly). +# +# Redis can either exit with an error when this happens, or load as much +# data as possible (the default now) and start if the AOF file is found +# to be truncated at the end. The following option controls this behavior. +# +# If aof-load-truncated is set to yes, a truncated AOF file is loaded and +# the Redis server starts emitting a log to inform the user of the event. +# Otherwise if the option is set to no, the server aborts with an error +# and refuses to start. When the option is set to no, the user requires +# to fix the AOF file using the "redis-check-aof" utility before to restart +# the server. +# +# Note that if the AOF file will be found to be corrupted in the middle +# the server will still exit with an error. This option only applies when +# Redis will try to read more data from the AOF file but not enough bytes +# will be found. +aof-load-truncated yes + +# When rewriting the AOF file, Redis is able to use an RDB preamble in the +# AOF file for faster rewrites and recoveries. When this option is turned +# on the rewritten AOF file is composed of two different stanzas: +# +# [RDB file][AOF tail] +# +# When loading Redis recognizes that the AOF file starts with the "REDIS" +# string and loads the prefixed RDB file, and continues loading the AOF +# tail. +aof-use-rdb-preamble yes + +################################ LUA SCRIPTING ############################### + +# Max execution time of a Lua script in milliseconds. +# +# If the maximum execution time is reached Redis will log that a script is +# still in execution after the maximum allowed time and will start to +# reply to queries with an error. +# +# When a long running script exceeds the maximum execution time only the +# SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be +# used to stop a script that did not yet called write commands. The second +# is the only way to shut down the server in the case a write command was +# already issued by the script but the user doesn't want to wait for the natural +# termination of the script. +# +# Set it to 0 or a negative value for unlimited execution without warnings. +lua-time-limit 5000 + +################################ REDIS CLUSTER ############################### + +# Normal Redis instances can't be part of a Redis Cluster; only nodes that are +# started as cluster nodes can. In order to start a Redis instance as a +# cluster node enable the cluster support uncommenting the following: +# +# cluster-enabled yes + +# Every cluster node has a cluster configuration file. This file is not +# intended to be edited by hand. It is created and updated by Redis nodes. +# Every Redis Cluster node requires a different cluster configuration file. +# Make sure that instances running in the same system do not have +# overlapping cluster configuration file names. +# +# cluster-config-file nodes-6379.conf + +# Cluster node timeout is the amount of milliseconds a node must be unreachable +# for it to be considered in failure state. +# Most other internal time limits are multiple of the node timeout. +# +# cluster-node-timeout 15000 + +# A replica of a failing master will avoid to start a failover if its data +# looks too old. +# +# There is no simple way for a replica to actually have an exact measure of +# its "data age", so the following two checks are performed: +# +# 1) If there are multiple replicas able to failover, they exchange messages +# in order to try to give an advantage to the replica with the best +# replication offset (more data from the master processed). +# Replicas will try to get their rank by offset, and apply to the start +# of the failover a delay proportional to their rank. +# +# 2) Every single replica computes the time of the last interaction with +# its master. This can be the last ping or command received (if the master +# is still in the "connected" state), or the time that elapsed since the +# disconnection with the master (if the replication link is currently down). +# If the last interaction is too old, the replica will not try to failover +# at all. +# +# The point "2" can be tuned by user. Specifically a replica will not perform +# the failover if, since the last interaction with the master, the time +# elapsed is greater than: +# +# (node-timeout * replica-validity-factor) + repl-ping-replica-period +# +# So for example if node-timeout is 30 seconds, and the replica-validity-factor +# is 10, and assuming a default repl-ping-replica-period of 10 seconds, the +# replica will not try to failover if it was not able to talk with the master +# for longer than 310 seconds. +# +# A large replica-validity-factor may allow replicas with too old data to failover +# a master, while a too small value may prevent the cluster from being able to +# elect a replica at all. +# +# For maximum availability, it is possible to set the replica-validity-factor +# to a value of 0, which means, that replicas will always try to failover the +# master regardless of the last time they interacted with the master. +# (However they'll always try to apply a delay proportional to their +# offset rank). +# +# Zero is the only value able to guarantee that when all the partitions heal +# the cluster will always be able to continue. +# +# cluster-replica-validity-factor 10 + +# Cluster replicas are able to migrate to orphaned masters, that are masters +# that are left without working replicas. This improves the cluster ability +# to resist to failures as otherwise an orphaned master can't be failed over +# in case of failure if it has no working replicas. +# +# Replicas migrate to orphaned masters only if there are still at least a +# given number of other working replicas for their old master. This number +# is the "migration barrier". A migration barrier of 1 means that a replica +# will migrate only if there is at least 1 other working replica for its master +# and so forth. It usually reflects the number of replicas you want for every +# master in your cluster. +# +# Default is 1 (replicas migrate only if their masters remain with at least +# one replica). To disable migration just set it to a very large value. +# A value of 0 can be set but is useful only for debugging and dangerous +# in production. +# +# cluster-migration-barrier 1 + +# By default Redis Cluster nodes stop accepting queries if they detect there +# is at least an hash slot uncovered (no available node is serving it). +# This way if the cluster is partially down (for example a range of hash slots +# are no longer covered) all the cluster becomes, eventually, unavailable. +# It automatically returns available as soon as all the slots are covered again. +# +# However sometimes you want the subset of the cluster which is working, +# to continue to accept queries for the part of the key space that is still +# covered. In order to do so, just set the cluster-require-full-coverage +# option to no. +# +# cluster-require-full-coverage yes + +# This option, when set to yes, prevents replicas from trying to failover its +# master during master failures. However the master can still perform a +# manual failover, if forced to do so. +# +# This is useful in different scenarios, especially in the case of multiple +# data center operations, where we want one side to never be promoted if not +# in the case of a total DC failure. +# +# cluster-replica-no-failover no + +# In order to setup your cluster make sure to read the documentation +# available at http://redis.io web site. + +########################## CLUSTER DOCKER/NAT support ######################## + +# In certain deployments, Redis Cluster nodes address discovery fails, because +# addresses are NAT-ted or because ports are forwarded (the typical case is +# Docker and other containers). +# +# In order to make Redis Cluster working in such environments, a static +# configuration where each node knows its public address is needed. The +# following two options are used for this scope, and are: +# +# * cluster-announce-ip +# * cluster-announce-port +# * cluster-announce-bus-port +# +# Each instruct the node about its address, client port, and cluster message +# bus port. The information is then published in the header of the bus packets +# so that other nodes will be able to correctly map the address of the node +# publishing the information. +# +# If the above options are not used, the normal Redis Cluster auto-detection +# will be used instead. +# +# Note that when remapped, the bus port may not be at the fixed offset of +# clients port + 10000, so you can specify any port and bus-port depending +# on how they get remapped. If the bus-port is not set, a fixed offset of +# 10000 will be used as usually. +# +# Example: +# +# cluster-announce-ip 10.1.1.5 +# cluster-announce-port 6379 +# cluster-announce-bus-port 6380 + +################################## SLOW LOG ################################### + +# The Redis Slow Log is a system to log queries that exceeded a specified +# execution time. The execution time does not include the I/O operations +# like talking with the client, sending the reply and so forth, +# but just the time needed to actually execute the command (this is the only +# stage of command execution where the thread is blocked and can not serve +# other requests in the meantime). +# +# You can configure the slow log with two parameters: one tells Redis +# what is the execution time, in microseconds, to exceed in order for the +# command to get logged, and the other parameter is the length of the +# slow log. When a new command is logged the oldest one is removed from the +# queue of logged commands. + +# The following time is expressed in microseconds, so 1000000 is equivalent +# to one second. Note that a negative number disables the slow log, while +# a value of zero forces the logging of every command. +slowlog-log-slower-than 10000 + +# There is no limit to this length. Just be aware that it will consume memory. +# You can reclaim memory used by the slow log with SLOWLOG RESET. +slowlog-max-len 128 + +################################ LATENCY MONITOR ############################## + +# The Redis latency monitoring subsystem samples different operations +# at runtime in order to collect data related to possible sources of +# latency of a Redis instance. +# +# Via the LATENCY command this information is available to the user that can +# print graphs and obtain reports. +# +# The system only logs operations that were performed in a time equal or +# greater than the amount of milliseconds specified via the +# latency-monitor-threshold configuration directive. When its value is set +# to zero, the latency monitor is turned off. +# +# By default latency monitoring is disabled since it is mostly not needed +# if you don't have latency issues, and collecting data has a performance +# impact, that while very small, can be measured under big load. Latency +# monitoring can easily be enabled at runtime using the command +# "CONFIG SET latency-monitor-threshold " if needed. +latency-monitor-threshold 0 + +############################# EVENT NOTIFICATION ############################## + +# Redis can notify Pub/Sub clients about events happening in the key space. +# This feature is documented at http://redis.io/topics/notifications +# +# For instance if keyspace events notification is enabled, and a client +# performs a DEL operation on key "foo" stored in the Database 0, two +# messages will be published via Pub/Sub: +# +# PUBLISH __keyspace@0__:foo del +# PUBLISH __keyevent@0__:del foo +# +# It is possible to select the events that Redis will notify among a set +# of classes. Every class is identified by a single character: +# +# K Keyspace events, published with __keyspace@__ prefix. +# E Keyevent events, published with __keyevent@__ prefix. +# g Generic commands (non-type specific) like DEL, EXPIRE, RENAME, ... +# $ String commands +# l List commands +# s Set commands +# h Hash commands +# z Sorted set commands +# x Expired events (events generated every time a key expires) +# e Evicted events (events generated when a key is evicted for maxmemory) +# A Alias for g$lshzxe, so that the "AKE" string means all the events. +# +# The "notify-keyspace-events" takes as argument a string that is composed +# of zero or multiple characters. The empty string means that notifications +# are disabled. +# +# Example: to enable list and generic events, from the point of view of the +# event name, use: +# +# notify-keyspace-events Elg +# +# Example 2: to get the stream of the expired keys subscribing to channel +# name __keyevent@0__:expired use: +# +# notify-keyspace-events Ex +# +# By default all notifications are disabled because most users don't need +# this feature and the feature has some overhead. Note that if you don't +# specify at least one of K or E, no events will be delivered. +notify-keyspace-events "" + +############################### ADVANCED CONFIG ############################### + +# Hashes are encoded using a memory efficient data structure when they have a +# small number of entries, and the biggest entry does not exceed a given +# threshold. These thresholds can be configured using the following directives. +hash-max-ziplist-entries 512 +hash-max-ziplist-value 64 + +# Lists are also encoded in a special way to save a lot of space. +# The number of entries allowed per internal list node can be specified +# as a fixed maximum size or a maximum number of elements. +# For a fixed maximum size, use -5 through -1, meaning: +# -5: max size: 64 Kb <-- not recommended for normal workloads +# -4: max size: 32 Kb <-- not recommended +# -3: max size: 16 Kb <-- probably not recommended +# -2: max size: 8 Kb <-- good +# -1: max size: 4 Kb <-- good +# Positive numbers mean store up to _exactly_ that number of elements +# per list node. +# The highest performing option is usually -2 (8 Kb size) or -1 (4 Kb size), +# but if your use case is unique, adjust the settings as necessary. +list-max-ziplist-size -2 + +# Lists may also be compressed. +# Compress depth is the number of quicklist ziplist nodes from *each* side of +# the list to *exclude* from compression. The head and tail of the list +# are always uncompressed for fast push/pop operations. Settings are: +# 0: disable all list compression +# 1: depth 1 means "don't start compressing until after 1 node into the list, +# going from either the head or tail" +# So: [head]->node->node->...->node->[tail] +# [head], [tail] will always be uncompressed; inner nodes will compress. +# 2: [head]->[next]->node->node->...->node->[prev]->[tail] +# 2 here means: don't compress head or head->next or tail->prev or tail, +# but compress all nodes between them. +# 3: [head]->[next]->[next]->node->node->...->node->[prev]->[prev]->[tail] +# etc. +list-compress-depth 0 + +# Sets have a special encoding in just one case: when a set is composed +# of just strings that happen to be integers in radix 10 in the range +# of 64 bit signed integers. +# The following configuration setting sets the limit in the size of the +# set in order to use this special memory saving encoding. +set-max-intset-entries 512 + +# Similarly to hashes and lists, sorted sets are also specially encoded in +# order to save a lot of space. This encoding is only used when the length and +# elements of a sorted set are below the following limits: +zset-max-ziplist-entries 128 +zset-max-ziplist-value 64 + +# HyperLogLog sparse representation bytes limit. The limit includes the +# 16 bytes header. When an HyperLogLog using the sparse representation crosses +# this limit, it is converted into the dense representation. +# +# A value greater than 16000 is totally useless, since at that point the +# dense representation is more memory efficient. +# +# The suggested value is ~ 3000 in order to have the benefits of +# the space efficient encoding without slowing down too much PFADD, +# which is O(N) with the sparse encoding. The value can be raised to +# ~ 10000 when CPU is not a concern, but space is, and the data set is +# composed of many HyperLogLogs with cardinality in the 0 - 15000 range. +hll-sparse-max-bytes 3000 + +# Streams macro node max size / items. The stream data structure is a radix +# tree of big nodes that encode multiple items inside. Using this configuration +# it is possible to configure how big a single node can be in bytes, and the +# maximum number of items it may contain before switching to a new node when +# appending new stream entries. If any of the following settings are set to +# zero, the limit is ignored, so for instance it is possible to set just a +# max entires limit by setting max-bytes to 0 and max-entries to the desired +# value. +#stream-node-max-bytes 4096 +#stream-node-max-entries 100 + +# Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in +# order to help rehashing the main Redis hash table (the one mapping top-level +# keys to values). The hash table implementation Redis uses (see dict.c) +# performs a lazy rehashing: the more operation you run into a hash table +# that is rehashing, the more rehashing "steps" are performed, so if the +# server is idle the rehashing is never complete and some more memory is used +# by the hash table. +# +# The default is to use this millisecond 10 times every second in order to +# actively rehash the main dictionaries, freeing memory when possible. +# +# If unsure: +# use "activerehashing no" if you have hard latency requirements and it is +# not a good thing in your environment that Redis can reply from time to time +# to queries with 2 milliseconds delay. +# +# use "activerehashing yes" if you don't have such hard requirements but +# want to free memory asap when possible. +activerehashing yes + +# The client output buffer limits can be used to force disconnection of clients +# that are not reading data from the server fast enough for some reason (a +# common reason is that a Pub/Sub client can't consume messages as fast as the +# publisher can produce them). +# +# The limit can be set differently for the three different classes of clients: +# +# normal -> normal clients including MONITOR clients +# replica -> replica clients +# pubsub -> clients subscribed to at least one pubsub channel or pattern +# +# The syntax of every client-output-buffer-limit directive is the following: +# +# client-output-buffer-limit +# +# A client is immediately disconnected once the hard limit is reached, or if +# the soft limit is reached and remains reached for the specified number of +# seconds (continuously). +# So for instance if the hard limit is 32 megabytes and the soft limit is +# 16 megabytes / 10 seconds, the client will get disconnected immediately +# if the size of the output buffers reach 32 megabytes, but will also get +# disconnected if the client reaches 16 megabytes and continuously overcomes +# the limit for 10 seconds. +# +# By default normal clients are not limited because they don't receive data +# without asking (in a push way), but just after a request, so only +# asynchronous clients may create a scenario where data is requested faster +# than it can read. +# +# Instead there is a default limit for pubsub and replica clients, since +# subscribers and replicas receive data in a push fashion. +# +# Both the hard or the soft limit can be disabled by setting them to zero. +client-output-buffer-limit normal 0 0 0 +#client-output-buffer-limit replica 256mb 64mb 60 +client-output-buffer-limit pubsub 32mb 8mb 60 + +# Client query buffers accumulate new commands. They are limited to a fixed +# amount by default in order to avoid that a protocol desynchronization (for +# instance due to a bug in the client) will lead to unbound memory usage in +# the query buffer. However you can configure it here if you have very special +# needs, such us huge multi/exec requests or alike. +# +# client-query-buffer-limit 1gb + +# In the Redis protocol, bulk requests, that are, elements representing single +# strings, are normally limited ot 512 mb. However you can change this limit +# here. +# +# proto-max-bulk-len 512mb + +# Redis calls an internal function to perform many background tasks, like +# closing connections of clients in timeout, purging expired keys that are +# never requested, and so forth. +# +# Not all tasks are performed with the same frequency, but Redis checks for +# tasks to perform according to the specified "hz" value. +# +# By default "hz" is set to 10. Raising the value will use more CPU when +# Redis is idle, but at the same time will make Redis more responsive when +# there are many keys expiring at the same time, and timeouts may be +# handled with more precision. +# +# The range is between 1 and 500, however a value over 100 is usually not +# a good idea. Most users should use the default of 10 and raise this up to +# 100 only in environments where very low latency is required. +hz 10 + +# Normally it is useful to have an HZ value which is proportional to the +# number of clients connected. This is useful in order, for instance, to +# avoid too many clients are processed for each background task invocation +# in order to avoid latency spikes. +# +# Since the default HZ value by default is conservatively set to 10, Redis +# offers, and enables by default, the ability to use an adaptive HZ value +# which will temporary raise when there are many connected clients. +# +# When dynamic HZ is enabled, the actual configured HZ will be used as +# as a baseline, but multiples of the configured HZ value will be actually +# used as needed once more clients are connected. In this way an idle +# instance will use very little CPU time while a busy instance will be +# more responsive. +#dynamic-hz yes + +# When a child rewrites the AOF file, if the following option is enabled +# the file will be fsync-ed every 32 MB of data generated. This is useful +# in order to commit the file to the disk more incrementally and avoid +# big latency spikes. +aof-rewrite-incremental-fsync yes + +# When redis saves RDB file, if the following option is enabled +# the file will be fsync-ed every 32 MB of data generated. This is useful +# in order to commit the file to the disk more incrementally and avoid +# big latency spikes. +#rdb-save-incremental-fsync yes + +# Redis LFU eviction (see maxmemory setting) can be tuned. However it is a good +# idea to start with the default settings and only change them after investigating +# how to improve the performances and how the keys LFU change over time, which +# is possible to inspect via the OBJECT FREQ command. +# +# There are two tunable parameters in the Redis LFU implementation: the +# counter logarithm factor and the counter decay time. It is important to +# understand what the two parameters mean before changing them. +# +# The LFU counter is just 8 bits per key, it's maximum value is 255, so Redis +# uses a probabilistic increment with logarithmic behavior. Given the value +# of the old counter, when a key is accessed, the counter is incremented in +# this way: +# +# 1. A random number R between 0 and 1 is extracted. +# 2. A probability P is calculated as 1/(old_value*lfu_log_factor+1). +# 3. The counter is incremented only if R < P. +# +# The default lfu-log-factor is 10. This is a table of how the frequency +# counter changes with a different number of accesses with different +# logarithmic factors: +# +# +--------+------------+------------+------------+------------+------------+ +# | factor | 100 hits | 1000 hits | 100K hits | 1M hits | 10M hits | +# +--------+------------+------------+------------+------------+------------+ +# | 0 | 104 | 255 | 255 | 255 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# | 1 | 18 | 49 | 255 | 255 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# | 10 | 10 | 18 | 142 | 255 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# | 100 | 8 | 11 | 49 | 143 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# +# NOTE: The above table was obtained by running the following commands: +# +# redis-benchmark -n 1000000 incr foo +# redis-cli object freq foo +# +# NOTE 2: The counter initial value is 5 in order to give new objects a chance +# to accumulate hits. +# +# The counter decay time is the time, in minutes, that must elapse in order +# for the key counter to be divided by two (or decremented if it has a value +# less <= 10). +# +# The default value for the lfu-decay-time is 1. A Special value of 0 means to +# decay the counter every time it happens to be scanned. +# +# lfu-log-factor 10 +# lfu-decay-time 1 + +########################### ACTIVE DEFRAGMENTATION ####################### +# +# WARNING THIS FEATURE IS EXPERIMENTAL. However it was stress tested +# even in production and manually tested by multiple engineers for some +# time. +# +# What is active defragmentation? +# ------------------------------- +# +# Active (online) defragmentation allows a Redis server to compact the +# spaces left between small allocations and deallocations of data in memory, +# thus allowing to reclaim back memory. +# +# Fragmentation is a natural process that happens with every allocator (but +# less so with Jemalloc, fortunately) and certain workloads. Normally a server +# restart is needed in order to lower the fragmentation, or at least to flush +# away all the data and create it again. However thanks to this feature +# implemented by Oran Agra for Redis 4.0 this process can happen at runtime +# in an "hot" way, while the server is running. +# +# Basically when the fragmentation is over a certain level (see the +# configuration options below) Redis will start to create new copies of the +# values in contiguous memory regions by exploiting certain specific Jemalloc +# features (in order to understand if an allocation is causing fragmentation +# and to allocate it in a better place), and at the same time, will release the +# old copies of the data. This process, repeated incrementally for all the keys +# will cause the fragmentation to drop back to normal values. +# +# Important things to understand: +# +# 1. This feature is disabled by default, and only works if you compiled Redis +# to use the copy of Jemalloc we ship with the source code of Redis. +# This is the default with Linux builds. +# +# 2. You never need to enable this feature if you don't have fragmentation +# issues. +# +# 3. Once you experience fragmentation, you can enable this feature when +# needed with the command "CONFIG SET activedefrag yes". +# +# The configuration parameters are able to fine tune the behavior of the +# defragmentation process. If you are not sure about what they mean it is +# a good idea to leave the defaults untouched. + +# Enabled active defragmentation +# activedefrag yes + +# Minimum amount of fragmentation waste to start active defrag +# active-defrag-ignore-bytes 100mb + +# Minimum percentage of fragmentation to start active defrag +# active-defrag-threshold-lower 10 + +# Maximum percentage of fragmentation at which we use maximum effort +# active-defrag-threshold-upper 100 + +# Minimal effort for defrag in CPU percentage +# active-defrag-cycle-min 5 + +# Maximal effort for defrag in CPU percentage +# active-defrag-cycle-max 75 + +# Maximum number of set/hash/zset/list fields that will be processed from +# the main dictionary scan +# active-defrag-max-scan-fields 1000 diff --git a/server/workers/services/src/apis/triple.py b/server/workers/services/src/apis/triple.py index d42eddaca..cfc99f9f1 100644 --- a/server/workers/services/src/apis/triple.py +++ b/server/workers/services/src/apis/triple.py @@ -11,7 +11,12 @@ from flask_restx import Namespace, Resource, fields from .request_validators import SearchParamSchema -redis_store = redis.StrictRedis(host="localhost", port=6379, db=0) + +with open("redis_config.json") as infile: + redis_config = json.load(infile) + +redis_store = redis.StrictRedis(**redis_config) + triple_ns = Namespace("triple", description="TRIPLE API operations") diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py index bbd99569b..09c47497d 100644 --- a/server/workers/triple/src/search_triple.py +++ b/server/workers/triple/src/search_triple.py @@ -8,8 +8,10 @@ with open("config.json") as infile: config = json.load(infile) +with open("redis_config.json") as infile: + redis_config = json.load(infile) -redis_store = redis.StrictRedis(host="localhost", port=6379, db=0) +redis_store = redis.StrictRedis(**redis_config) class TripleClient(object): From 5889a4b399344cb1a3ed44e49d8e696285d95ab2 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 23 Mar 2020 15:02:48 +0100 Subject: [PATCH 48/99] update proxy readme --- examples/triple/README.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/examples/triple/README.md b/examples/triple/README.md index 0c5593397..556a2594e 100644 --- a/examples/triple/README.md +++ b/examples/triple/README.md @@ -28,10 +28,13 @@ The following lines have to be added to the appropriate sites-available config o # # other config - ProxyPass /api http://localhost:5001/api - ProxyPassReverse /api http://localhost:5001/api - ProxyPass /swaggerui http://localhost:5001/swaggerui - ProxyPassReverse /swaggerui http://localhost:5001/swaggerui + + ProxyPass http://127.0.0.1:5001/api + + + ProxyPass http://127.0.0.1:5001/swaggerui/ + + ``` From 26e036241de75133c7c6e000dd787915b47afc8b Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 23 Mar 2020 23:49:10 +0100 Subject: [PATCH 49/99] new parameter: limit --- server/workers/backend/src/headstart.py | 2 +- server/workers/services/src/apis/triple.py | 2 ++ server/workers/triple/src/search_triple.py | 14 ++++++++------ 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/server/workers/backend/src/headstart.py b/server/workers/backend/src/headstart.py index 00f1fdab5..816a15414 100644 --- a/server/workers/backend/src/headstart.py +++ b/server/workers/backend/src/headstart.py @@ -25,7 +25,7 @@ def __init__(self): self.default_params["MAX_CLUSTERS"] = 15 self.default_params["language"] = "english" self.default_params["taxonomy_separator"] = ";" - self.default_params["list_size"] = 100 + self.default_params["list_size"] = -1 def add_default_params(self, params): default_params = copy.deepcopy(self.default_params) diff --git a/server/workers/services/src/apis/triple.py b/server/workers/services/src/apis/triple.py index cfc99f9f1..12566fc12 100644 --- a/server/workers/services/src/apis/triple.py +++ b/server/workers/services/src/apis/triple.py @@ -52,6 +52,8 @@ def get_key(store, key): "vis_type": fields.String(example='overview', description='overview or timeline', required=True), + "limit": fields.Integer(example=100, + description='max. number of results'), "raw": fields.Boolean(example="false", description='raw results from ElasticSearch')}) diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py index 09c47497d..600018d34 100644 --- a/server/workers/triple/src/search_triple.py +++ b/server/workers/triple/src/search_triple.py @@ -52,8 +52,7 @@ def build_sort_order(self, parameters): sort.append("date:desc") return sort - def search(self, parameters): - index = "isidore-documents-triple" + def build_body(self, parameters): body = {"query": { "bool": { "must": [ @@ -69,12 +68,15 @@ def search(self, parameters): ] } }} - sort = self.build_sort_order(parameters) + return body + + def search(self, parameters): + index = "isidore-documents-triple" res = self.es.search( index=index, - body=body, - size=100, - sort=sort) + body=self.build_body(parameters), + size=parameters.get('limit', 100), + sort=self.build_sort_order(parameters)) if parameters.get('raw') is True: return res else: From 70578383dddc57b38920c30d2a869a2ca94b4956 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 23 Mar 2020 23:49:19 +0100 Subject: [PATCH 50/99] swagger --- server/workers/services/src/utils/monkeypatches.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/workers/services/src/utils/monkeypatches.py b/server/workers/services/src/utils/monkeypatches.py index 74cedcf65..fb3694b7c 100644 --- a/server/workers/services/src/utils/monkeypatches.py +++ b/server/workers/services/src/utils/monkeypatches.py @@ -129,7 +129,7 @@ def getSwaggerConfig(): } ], "static_url_path": "/flasgger_static", - # "static_folder": "static", # must be set by user + "static_folder": "static", # must be set by user "swagger_ui": True, "specs_route": "/api/" } From 759c77a40666ac417c71eb562241c1dd07ca97b3 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 23 Mar 2020 23:49:19 +0100 Subject: [PATCH 51/99] swagger --- server/workers/services/src/utils/monkeypatches.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/workers/services/src/utils/monkeypatches.py b/server/workers/services/src/utils/monkeypatches.py index 74cedcf65..4d2edecd5 100644 --- a/server/workers/services/src/utils/monkeypatches.py +++ b/server/workers/services/src/utils/monkeypatches.py @@ -129,7 +129,7 @@ def getSwaggerConfig(): } ], "static_url_path": "/flasgger_static", - # "static_folder": "static", # must be set by user + "static_folder": "static", # must be set by user "swagger_ui": True, - "specs_route": "/api/" + "specs_route": "/" } From c27b2276795063391167fe58883b65b88f4a1947 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Tue, 24 Mar 2020 11:00:44 +0100 Subject: [PATCH 52/99] new parameter: limit --- server/workers/services/src/apis/request_validators.py | 1 + 1 file changed, 1 insertion(+) diff --git a/server/workers/services/src/apis/request_validators.py b/server/workers/services/src/apis/request_validators.py index d8aed6237..7adbd37e6 100644 --- a/server/workers/services/src/apis/request_validators.py +++ b/server/workers/services/src/apis/request_validators.py @@ -10,6 +10,7 @@ class SearchParamSchema(Schema): to = fields.Date(required=True, format="%Y-%m-%d") vis_type = fields.Str(require=True) + limit = fields.Int() year_range = fields.Str() today = fields.Str() raw = fields.Boolean() From cbc148d8e176e4757d40d27ee4295aafaebd5698 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Tue, 24 Mar 2020 11:00:53 +0100 Subject: [PATCH 53/99] correct scope --- server/workers/triple/src/search_triple.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py index 600018d34..10cb0e1c3 100644 --- a/server/workers/triple/src/search_triple.py +++ b/server/workers/triple/src/search_triple.py @@ -6,14 +6,6 @@ import pandas as pd -with open("config.json") as infile: - config = json.load(infile) -with open("redis_config.json") as infile: - redis_config = json.load(infile) - -redis_store = redis.StrictRedis(**redis_config) - - class TripleClient(object): def __init__(self, config): @@ -174,5 +166,11 @@ def run(self): if __name__ == '__main__': + with open("config.json") as infile: + config = json.load(infile) + with open("redis_config.json") as infile: + redis_config = json.load(infile) + + redis_store = redis.StrictRedis(**redis_config) tc = TripleClient(config) tc.run() From dac792c6bed901fe0d8bcf02ae507bdcab031669 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Tue, 24 Mar 2020 13:02:54 +0100 Subject: [PATCH 54/99] tests & gitignore update --- .gitignore | 6 + .../{triple/tests => backend/src}/__init__.py | 0 server/workers/backend/src/tests/__init__.py | 0 .../backend/src/tests/test_clustering.py | 0 .../backend/src/tests/test_summarization.py | 12 + .../tests/testdata/expected_output_data.json | 203 + server/workers/triple/src/tests/__init__.py | 0 .../triple/src/tests/test_build_body.py | 0 .../triple/src/tests/test_clean_subject.py | 0 .../triple/src/tests/test_process_result.py | 51 + .../workers/triple/src/tests/test_search.py | 0 .../triple/src/tests/test_triple_client.py | 0 .../triple/src/tests/testdata/raw_data.json | 7107 +++++++++++++++++ 13 files changed, 7379 insertions(+) rename server/workers/{triple/tests => backend/src}/__init__.py (100%) create mode 100644 server/workers/backend/src/tests/__init__.py create mode 100644 server/workers/backend/src/tests/test_clustering.py create mode 100644 server/workers/backend/src/tests/test_summarization.py create mode 100644 server/workers/backend/src/tests/testdata/expected_output_data.json create mode 100644 server/workers/triple/src/tests/__init__.py create mode 100644 server/workers/triple/src/tests/test_build_body.py create mode 100644 server/workers/triple/src/tests/test_clean_subject.py create mode 100644 server/workers/triple/src/tests/test_process_result.py create mode 100644 server/workers/triple/src/tests/test_search.py create mode 100644 server/workers/triple/src/tests/test_triple_client.py create mode 100644 server/workers/triple/src/tests/testdata/raw_data.json diff --git a/.gitignore b/.gitignore index 305e732d5..0f13d05b3 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,9 @@ vis/stylesheets/*.css dist/ .idea/ config.js +*__pycache__* +.pytest_cache +.cache +.Rhistory +.ipynb_checkpoints +.Rprofile diff --git a/server/workers/triple/tests/__init__.py b/server/workers/backend/src/__init__.py similarity index 100% rename from server/workers/triple/tests/__init__.py rename to server/workers/backend/src/__init__.py diff --git a/server/workers/backend/src/tests/__init__.py b/server/workers/backend/src/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/server/workers/backend/src/tests/test_clustering.py b/server/workers/backend/src/tests/test_clustering.py new file mode 100644 index 000000000..e69de29bb diff --git a/server/workers/backend/src/tests/test_summarization.py b/server/workers/backend/src/tests/test_summarization.py new file mode 100644 index 000000000..bd4d8ecbe --- /dev/null +++ b/server/workers/backend/src/tests/test_summarization.py @@ -0,0 +1,12 @@ +from headstart import Backend +import pytest + +import pandas as pd + +@pytest.fixture +def input_data(): + pass + + +def test_bubble_titles_empty_keywords(): + pass diff --git a/server/workers/backend/src/tests/testdata/expected_output_data.json b/server/workers/backend/src/tests/testdata/expected_output_data.json new file mode 100644 index 000000000..e6e41458d --- /dev/null +++ b/server/workers/backend/src/tests/testdata/expected_output_data.json @@ -0,0 +1,203 @@ +{"input_data": [ + { + "id": "10670/1.2qtip2", + "title": "Brésil(s) n° 16 - 2019 : Género y justicia", + "authors": "CNRS, REGARDS", + "paper_abstract": "Contenido : • Dossier - Genre et justice Guita Grin Debert et Maria Filomena Gregori Violence de genre et justice Violência de gênero e justiça Gender Violence and Justice Lia Zanotta Machado Féminicide : nommer pour exister Feminicídio: nomear para existir Femicide: naming it into existence Luis Roberto Cardoso de Oliveira et Daniel Schroeter Simião « J’ai parlé à Dieu » : violences conjugales et impasses du système judiciaire brésilien Conversar com Deus: Violência Doméstica e Dilemas do J...", + "published_in": "OpenEdition", + "year": "2019-12-16", + "url": "http://rediceisal.hypotheses.org/28507", + "readers": 0, + "subject": "brésil s; género y; y justicia", + "oa_state": 2, + "link": "", + "relevance": 0, + "lang_detected": "catalan", + "cluster_labels": "Féminicide, La violence, Ouest France", + "x": "-0.182277973763649", + "y": "-0.321379842430639", + "area_uri": 1, + "area": "Féminicide, La violence, Ouest France" + }, + { + "id": "10670/1.4o5vxs", + "title": "La race de la violence de genre : analyse de la loi brésilienne contre la violence domestique", + "authors": "Bernardes, Márcia Nina", + "paper_abstract": "Cet article analyse la façon dont le racisme structurel affecte la construction des hiérarchies de genre au Brésil, en se concentrant sur le problème de la violence domestique dans ce pays. En s'appuyant sur les analyses féministes décoloniales et noires, l’article examine comment le racisme a eu des effets sur les résultats de la loi « Maria da Penha », importante loi rédigée par des féministes et considéré comme un point d’accomplissement au Brésil. Pourtant, alors que le nombre de féminicides commis contre les femmes blanches au Brésil a diminué au cours de la dernière décennie, le nombre de féminicides commis contre les femmes noires a augmenté. En examinant les diverses stratégies des féministes « hégémoniques » contre la violence sexiste, l’article soutient que le racisme a produit une invisibilité des femmes noires comme actrices de leur propre émancipation.", + "published_in": "OpenEdition", + "year": "2019-09-27", + "url": "http://journals.openedition.org/revdh/7078", + "readers": 0, + "subject": "racisme; blanchitude; violence domestique; racism; whiteness; domestic violence", + "oa_state": 2, + "link": "", + "relevance": 9, + "lang_detected": "french", + "cluster_labels": "Féminicide, La violence, Ouest France", + "x": "-0.0974353124894983", + "y": "-0.0130734148971714", + "area_uri": 1, + "area": "Féminicide, La violence, Ouest France" + }, + { + "id": "10670/1.5di7vf", + "title": "« S’émanciper sans se faire tuer ». Perspectives historiques de féminicides en France (1789-1860)", + "authors": "Giacinti, Margot", + "paper_abstract": "Communication réalisée dans le panel \"Violences de genre au quotidien\", discuté par Fabrice Virgili", + "published_in": "Institut des sciences humaines et sociales du CNRS", + "year": "2019-11-20", + "url": "https://hal.archives-ouvertes.fr/hal-02297823", + "readers": 0, + "subject": "féminicide; émancipation; ; ; ", + "oa_state": 2, + "link": "", + "relevance": 7, + "lang_detected": "french", + "cluster_labels": "Féminicide, La violence, Ouest France", + "x": "-0.361404278254434", + "y": "-0.159780730983399", + "area_uri": 1, + "area": "Féminicide, La violence, Ouest France" + }, + { + "id": "10670/1.c1x51g", + "title": "La violence contre les femmes au Guatemala : du génocide au féminicide", + "authors": "Jahan, Sébastien", + "paper_abstract": "« Il y a quelque chose d’important chez les femmes du Guatemala, surtout la femme indigène, quelque chose d’important qui est sa relation avec la terre ; entre la terre et la mère. La terre nourrit et la femme donne vie. Face à ça, la femme elle-même doit garder ça comme son secret à elle, un respect vis-à-vis de la terre. C’est comme une relation entre mari et femme, la relation entre la mère et la terre. Il y a un dialogue constant entre la terre et la femme » (Rigoberta Menchú Tum). Durant...", + "published_in": "OpenEdition", + "year": "2019-10-14", + "url": "http://books.openedition.org/pur/98967", + "readers": 0, + "subject": "corps; violence; sociologie de la violence; History; HIS; HBJD", + "oa_state": 2, + "link": "", + "relevance": 4, + "lang_detected": "french", + "cluster_labels": "Con Ruth, Ruth Fierro", + "x": "-0.0367226004351766", + "y": "0.294596909125972", + "area_uri": 3, + "area": "Con Ruth, Ruth Fierro" + }, + { + "id": "10670/1.chnjw7", + "title": "La circulación y la gestión de las emociones en interacción : estudio sobre la risa en un debate informal sobre el feminismo en Medellín, Colombia", + "authors": "Acosta Córdoba, Luisa", + "paper_abstract": "International audience", + "published_in": "Centre pour la communication scientifique directe", + "year": "2019-11-19", + "url": "https://hal.archives-ouvertes.fr/hal-02308138", + "readers": 0, + "subject": "analyse de l'interaction; rire; débat; féminisme; ; ; ", + "oa_state": 2, + "link": "", + "relevance": 6, + "lang_detected": "spanish", + "cluster_labels": "Analyse de l'interaction, Débat, Féminisme", + "x": "0.342227945771559", + "y": "-0.437951580479582", + "area_uri": 4, + "area": "Analyse de l'interaction, Débat, Féminisme" + }, + { + "id": "10670/1.o7stqs", + "title": "Ouest France - novembre 2019", + "authors": "Palmieri, Joelle", + "paper_abstract": "Du « drame de la jalousie » au « crime passionnel » : comment, au fil du temps, a-t-on évité de parler de féminicide ? Par Nawal Lyamini (avec le service documentation d'Ouest-France), le 23/11/2019. https://www.ouest-france.fr/faits-divers/feminicide/du-drame-de-la-jalousie-au-crime-passionnel-comment-au-fil-du-temps-t-eviter-de-parler-de-feminicide-6620759  Lire l'article", + "published_in": "OpenEdition", + "year": "2019-12-09", + "url": "http://domination.hypotheses.org/2099", + "readers": 0, + "subject": "france novembre; ouest france", + "oa_state": 2, + "link": "", + "relevance": 2, + "lang_detected": "french", + "cluster_labels": "Féminicide, La violence, Ouest France", + "x": "-0.429086596117543", + "y": "0.198502563564122", + "area_uri": 1, + "area": "Féminicide, La violence, Ouest France" + }, + { + "id": "10670/1.sjz58l", + "title": "México: mujeres en peligro con Ruth Fierro", + "authors": "CNRS, REGARDS", + "paper_abstract": "Paris : 15 de octubre de 2019 18h30-21h00 : Mexique : femmes en danger aveec Ruth Fierro : Feminicides au Mexique. Mardi 15 octobre 2019 entre 18H30 et 21H00 sur le Campus Condorcet au 5, cours des Humanités (salle à confirmer) Mardi 15 octobre 2019, la conférence, Mexique : femmes en danger, donne la parole à Ruth Fierro Pineda, avocate de formation pour l’État de Chihuahua (nord du Mexique) et coordinatrice Générale du Centro de Derechos Humanos de las Mujeres (CEDEHM), l'association lau...", + "published_in": "OpenEdition", + "year": "2019-10-14", + "url": "http://rediceisal.hypotheses.org/22150", + "readers": 0, + "subject": "con ruth; en peligro; ruth fierro", + "oa_state": 2, + "link": "", + "relevance": 5, + "lang_detected": "french", + "cluster_labels": "Con Ruth, Ruth Fierro", + "x": "0.141991505290715", + "y": "0.41034370246043", + "area_uri": 3, + "area": "Con Ruth, Ruth Fierro" + }, + { + "id": "10670/1.xozprv", + "title": "Féminicide : nommer pour exister", + "authors": "Machado, Lia Zanotta", + "paper_abstract": "Cet article analyse, d’un point de vue anthropologique et féministe, les propositions de classification des féminicides, telles qu’elles ont été discutées au Brésil, et qui ont donné lieu à une nouvelle qualification juridique inscrite dans le code pénal. L’auteur démontre l’importance d’une dénomination féministe de la violence de genre et du féminicide ainsi que du contre-discours qui s’oppose au crime passionnel comme argument dominant. Introduit dans le vocabulaire du droit, ce terme a entraîné une reconfiguration des débats dans la sphère juridique et a modifié la perception sociale et culturelle de la violence de genre, rendant possible d’autres formes de prévention du féminicide et de lutte contre sa banalisation.", + "published_in": "OpenEdition", + "year": "2019-12-13", + "url": "http://journals.openedition.org/bresils/5576", + "readers": 0, + "subject": "féminicide; violence de genre; pratiques juridiques; loi sur les violences domestiques; masculinité hégémonique; législation et pouvoir; Brésil; XXIe siècle; feminicide; gender violence; legal practices; domestic violence law; hegemonic masculinity; legislation and power; Brazil; t century; feminicídio; violência de gênero; práticas jurídicas; lei de violência doméstica; masculinidade hegemônica; legislação e poder; Brasil; século XXI", + "oa_state": 2, + "link": "", + "relevance": 1, + "lang_detected": "french", + "cluster_labels": "Féminicide, La violence, Ouest France", + "x": "-0.166289810747204", + "y": "-0.0775570208251076", + "area_uri": 1, + "area": "Féminicide, La violence, Ouest France" + }, + { + "id": "http://hdl.handle.net/10251/123963", + "title": "QUI LLANÇA LA PRIMERA PEDRA? VIOLÈNCIA FEMINICIDA, UN CRIM D¿ESTAT. Interpretacions des dels processos litogràfics, el dibuix i el llibre alternatiu.", + "authors": "García García, Gema", + "paper_abstract": "Les violències contra les dones han estat històricament espentades cap a la intimitat, han sigut llegides com a comportaments tolerats i legitimats, dinàmiques de poder persistents i assumides com alienes i particulars. La violència i l'agressió, manifestacions extremes del control dels cossos llegits com femenins, s'han entès com a pràctiques aïllades i allunyades del context polític i social. Existeix una evident vinculació entre les Violències Feminicides, les dinàmiques de poder i l'ordre legal de l'Estat, a més, l'estreta complicitat dels mitjans de comunicació que qüestionen, culpabilitzen a la víctima i converteixen la violència exercida sobre el seu cos en un espectacle de la crueltat i la morbositat. A partir de l'experimentació amb diferents processos litogràfics, del dibuix directe i del llibre alternatiu, aquest projecte pretén trobar connexions (lapidàries) entre les primerenques i aparentment innocents pràctiques feminicides des de la infància fins l'edat adulta i la producció activa al taller.", + "published_in": "Universitat Politècnica de València", + "year": "2019-07-22", + "url": "http://hdl.handle.net/10251/123963", + "readers": 0, + "subject": "Libro de artista; gráfica; feminismo; género; libro objeto; grabado; instalación; voz; mensajes.; Artist's book; graphic; feminism; gender; object book; engraving; installation; voice; messages.; DIBUJO; Máster Universitario en Producción Artística-Màster Universitari en Producció Artística", + "oa_state": 2, + "link": "", + "relevance": 8, + "lang_detected": "catalan", + "cluster_labels": "Dibujo", + "x": "0.309686346602382", + "y": "-0.0363154544596824", + "area_uri": 2, + "area": "Dibujo" + }, + { + "id": "http://hdl.handle.net/10251/130234", + "title": "Palimpsestos mexicanos", + "authors": "Navarro Rodríguez, Miguel Ángel", + "paper_abstract": "[ES] Palimpsestos mexicanos surgió tras la obtención de la beca Santander con destino en la Facultad de Arte y Diseño, UNAM, CDMX, México. La propuesta se concibe como un proyecto expositivo multidisciplinar constituido por un conjunto de subproyectos heterogéneos, en sinergia, uniendo fuerzas, realizados con disciplinas y técnicas diversas tales como el collage, la gráfica y la escultura. El planteamiento general de la obra gira alrededor de las problemáticas que afronta la sociedad mexicana: la corrupción política, las grandes desigualdades sociales, el crimen organizado, la discriminación que sufren los pueblos indígenas, la situación de indefensión de la mujer ante un entorno preeminentemente machista y los intolerables índices de feminicidios. En definitiva, un conjunto de factores que desestabilizan el país y crean inseguridad en la población. Palimpsestos mexicanos también visibiliza la lucha por los derechos civiles de los sectores más desfavorecidos de la sociedad, y empodera y convierte a la mujer mexicana en símbolo de esperanza de un futuro mejor. Proponiendo un dibujo de México alejado de los estereotipos y del folclorismo que presuntamente define su identidad nacional. Aportando una visión del país diferente al imaginario conocido, desde un punto de vista subjetivo, de carácter político pero no dogmático. Con la intención de mostrar, a través del arte contemporáneo, un México en construcción con unas problemáticas a resolver de dimensiones estratosféricas y fascinante en sus retos de futuro. En un país, el más poblado de habla hispana, que está llamado a ser una potencia económica y un referente cultural a escala global.", + "published_in": "Universitat Politècnica de València", + "year": "2019-11-05", + "url": "http://hdl.handle.net/10251/130234", + "readers": 0, + "subject": "POLÍTICA; DESIGUALDAD; CORRUPCIÓN; VIOLENCIA; SUBJETIVISMO; HETEROGÉNEA; SINERGIA.; POLITICS; INEQUALITY; CORRUPTION; VIOLENCE; SUBJETIVISM; HETEROGENEOUS; SYNERGY.; DIBUJO; Grado en Bellas Artes-Grau en Belles Arts", + "oa_state": 2, + "link": "", + "relevance": 3, + "lang_detected": "spanish", + "cluster_labels": "Dibujo", + "x": "0.479310774142849", + "y": "0.142614868925058", + "area_uri": 2, + "area": "Dibujo" + } +] +} diff --git a/server/workers/triple/src/tests/__init__.py b/server/workers/triple/src/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/server/workers/triple/src/tests/test_build_body.py b/server/workers/triple/src/tests/test_build_body.py new file mode 100644 index 000000000..e69de29bb diff --git a/server/workers/triple/src/tests/test_clean_subject.py b/server/workers/triple/src/tests/test_clean_subject.py new file mode 100644 index 000000000..e69de29bb diff --git a/server/workers/triple/src/tests/test_process_result.py b/server/workers/triple/src/tests/test_process_result.py new file mode 100644 index 000000000..5b05000fb --- /dev/null +++ b/server/workers/triple/src/tests/test_process_result.py @@ -0,0 +1,51 @@ +from ..search_triple import TripleClient +import json +import pytest +import unittest +from pandas.testing import assert_frame_equal +import pandas as pd + + +@pytest.fixture +def raw_data(): + with open("testdata/raw_data.json") as infile: + return json.load(infile) + + +@pytest.fixture +def triple_client(): + return TripleClient({ + "host": "localhost", + "user": "", + "pass": "", + "port": 9200}) + + +def test_process_result_keys(triple_client, raw_data): + result = triple_client.process_result(raw_data) + assert "metadata" in result + assert "text" in result + + +def test_process_result_dtypes(triple_client, raw_data): + result = triple_client.process_result(raw_data) + assert isinstance(json.loads(result.get('metadata')), list) + assert isinstance(json.loads(result.get('text')), list) + + +def test_metadata_keys(triple_client, raw_data): + result = triple_client.process_result(raw_data) + metadata = json.loads(result.get('metadata')) + for entry in metadata: + for k in ["id", "title", "authors", "paper_abstract", "published_in", + "year", "url", "readers", "subject", "oa_state", + "link", "relevance"]: + assert k in entry + + +def test_metadata_dtypes(triple_client, raw_data): + result = triple_client.process_result(raw_data) + metadata = json.loads(result.get('metadata')) + for entry in metadata: + for k, v in entry.items(): + assert isinstance(k, str) diff --git a/server/workers/triple/src/tests/test_search.py b/server/workers/triple/src/tests/test_search.py new file mode 100644 index 000000000..e69de29bb diff --git a/server/workers/triple/src/tests/test_triple_client.py b/server/workers/triple/src/tests/test_triple_client.py new file mode 100644 index 000000000..e69de29bb diff --git a/server/workers/triple/src/tests/testdata/raw_data.json b/server/workers/triple/src/tests/testdata/raw_data.json new file mode 100644 index 000000000..a30343d75 --- /dev/null +++ b/server/workers/triple/src/tests/testdata/raw_data.json @@ -0,0 +1,7107 @@ +{ + "_shards": { + "failed": 0, + "skipped": 0, + "successful": 5, + "total": 5 + }, + "hits": { + "hits": [ + { + "_id": "afj_jHABhI997ZgagPzb", + "_index": "isidore-documents-triple", + "_score": null, + "_source": { + "abstract": [ + "Contenido : • Dossier - Genre et justice Guita Grin Debert et Maria Filomena Gregori Violence de genre et justice Violência de gênero e justiça Gender Violence and Justice Lia Zanotta Machado Féminicide : nommer pour exister Feminicídio: nomear para existir Femicide: naming it into existence Luis Roberto Cardoso de Oliveira et Daniel Schroeter Simião « J’ai parlé à Dieu » : violences conjugales et impasses du système judiciaire brésilien Conversar com Deus: Violência Doméstica e Dilemas do J..." + ], + "author": [ + { + "firstname": [ + "REGARDS" + ], + "id": "cnrs_regards", + "lastname": [ + "CNRS" + ] + } + ], + "collection": [ + { + "id": "10670/2.raw0zt", + "name": "REDIAL & CEISAL" + } + ], + "date": [ + "2019-12-16" + ], + "datestamp": "2019-12-16", + "identifier": [ + "10670/1.2qtip2" + ], + "language": [ + { + "id": "http://lexvo.org/id/iso639-3/fra", + "label": "French" + } + ], + "publisher": [ + { + "contact": "contact@openedition.org", + "name": "OpenEdition", + "url": "http://www.openedition.org" + } + ], + "scope": [ + { + "id": "http://isidore.science/subject/publications", + "label": "Publications" + } + ], + "subject": [ + { + "id": "http://data.bnf.fr/ark:/12148/cb12346455m", + "label": [ + "Justicia", + "Raphidosperma", + "Campylostemon", + "Adhatoda", + "Sarotheca", + "Hemichoriste", + "Petalanthera", + "Adeloda", + "Raphidospora" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85071143", + "label": [ + "Justicia" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/63ce2558-0c0f-47d9-b408-bc3648c95b74", + "label": [ + "Étude de genre", + "Gender studies", + "Genre" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtZ5ajnsPcXB", + "label": [ + "justice", + "justice", + "justicia" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb133184141", + "label": [ + "Justice", + "Justice et droit", + "Injustice (science politique)", + "Justice (science politique)", + "Droit et justice", + "Justice (droit)", + "Injustice (droit)" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85071118", + "label": [ + "Justice", + "Injustice" + ] + }, + { + "id": "http://datos.bne.es/resource/XX526881", + "label": [ + "Justicia" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/11583", + "label": [ + "justice", + "justicia", + "justice" + ] + }, + { + "id": "http://GeoEthno#GUITA", + "label": [ + "Guita", + "Gita" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119337991", + "label": [ + "Violence", + "Brutalité", + "Violences", + "Brutalités" + ] + }, + { + "id": "http://datos.bne.es/resource/XX526747", + "label": [ + "Violencia" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85143514", + "label": [ + "Violence", + "Violent behavior" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtaXHZy6qadS", + "label": [ + "violence", + "violence", + "violencia" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb169817173", + "label": [ + "Gendèr", + "Gendèr panerus", + "Gender", + "Gendèr barung", + "Gèndèr", + "Gendèr panembung" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85053730", + "label": [ + "Gender (Musical instrument)" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb16654044n", + "label": [ + "Féminicide", + "Gynécide" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85061743", + "label": [ + "Homicide", + "Femicide" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtteqAxnS8SF", + "label": [ + "dieu", + "dios", + "god" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11975724f", + "label": [ + "Dieu", + "Idée de Dieu", + "Dieu (philosophie)", + "Être suprême" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85055517", + "label": [ + "God" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb124092496", + "label": [ + "Violence entre conjoints", + "Violence entre époux", + "Violences conjugales", + "Violence conjugale", + "Voies de fait entre époux", + "Conflits conjugaux", + "Querelles conjugales" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/13288", + "label": [ + "legal system", + "sistema judicial", + "système judiciaire" + ] + } + ], + "title": [ + "Brésil(s) n° 16 - 2019 : Género y justicia" + ], + "type": [ + { + "id": "http://isidore.science/ontology#blogPost", + "label": "Billets de blog" + } + ], + "url": [ + "http://rediceisal.hypotheses.org/28507" + ] + }, + "_type": "_doc", + "sort": [ + 1576454400000 + ] + }, + { + "_id": "bN9gi3ABhI997ZgaXz2U", + "_index": "isidore-documents-triple", + "_score": null, + "_source": { + "abstract": [ + "Cet article analyse, d’un point de vue anthropologique et féministe, les propositions de classification des féminicides, telles qu’elles ont été discutées au Brésil, et qui ont donné lieu à une nouvelle qualification juridique inscrite dans le code pénal. L’auteur démontre l’importance d’une dénomination féministe de la violence de genre et du féminicide ainsi que du contre-discours qui s’oppose au crime passionnel comme argument dominant. Introduit dans le vocabulaire du droit, ce terme a entraîné une reconfiguration des débats dans la sphère juridique et a modifié la perception sociale et culturelle de la violence de genre, rendant possible d’autres formes de prévention du féminicide et de lutte contre sa banalisation.", + "Este artigo analisa desde uma perspectiva teórica antropológica e feminista, as propostas de tipificação do feminicídio, tal como debatidas no Brasil e que resultaram na nova qualificadora incluída no seu Código Penal. O artigo relata e argumenta sobre a importância da nomeação feminista da violência de gênero e do feminicídio. Esta nomeação produziu uma narrativa contra-hegemônica ao entendimento de crimes passionais. Ao se introduzir na linguagem jurídica, estabeleceu novas configurações de embates jurídicos e introduziu alterações na percepção social e cultural de formas de prevenir a violência de gênero e de afastar a banalização dos feminicídios.", + "From an anthropological and feminist theoretical perspective, this article analyzes proposals for the typification of feminicide, as debated in Brazil, which resulted in the new qualifier included in the country’s Penal Code. The article reports and argues about the importance of explicit feminist identification of gender violence and femicide. Such identification produces a narrative that runs counter-hegemonically to the understanding of crimes of passion. By introducing itself into legal language, this terminology established new configurations of legal clashes and introduced changes in social and cultural perception of ways to how to prevent gender-based violence and reverse the banalization of femicide." + ], + "author": [ + { + "firstname": [ + "Lia Zanotta" + ], + "id": "machado_lia_zanotta", + "lastname": [ + "Machado" + ] + } + ], + "collection": [ + { + "id": "10670/2.ttiij2", + "name": "Brésil(s)" + } + ], + "date": [ + "2019-12-11" + ], + "datestamp": "2019-12-13", + "identifier": [ + "10670/1.xozprv", + "urn:doi:10.4000/bresils.5576" + ], + "keyword": [ + "féminicide", + "violence de genre", + "pratiques juridiques", + "loi sur les violences domestiques", + "masculinité hégémonique", + "législation et pouvoir", + "Brésil", + "XXIe siècle", + "feminicide", + "gender violence", + "legal practices", + "domestic violence law", + "hegemonic masculinity", + "legislation and power", + "Brazil", + "21st century", + "feminicídio", + "violência de gênero", + "práticas jurídicas", + "lei de violência doméstica", + "masculinidade hegemônica", + "legislação e poder", + "Brasil", + "século XXI" + ], + "language": [ + { + "id": "http://lexvo.org/id/iso639-3/fra", + "label": "French" + } + ], + "publisher": [ + { + "contact": "contact@openedition.org", + "name": "OpenEdition", + "url": "http://www.openedition.org" + } + ], + "scope": [ + { + "id": "http://isidore.science/subject/publications", + "label": "Publications" + } + ], + "subject": [ + { + "id": "http://www.eionet.europa.eu/gemet/concept/13077", + "label": [ + "assay", + "ensayo", + "analyse" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-76", + "label": [ + "Analyse" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/397", + "label": [ + "analysis", + "análisis", + "analyse" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119756783", + "label": [ + "Analyse", + "Analyse chimique", + "Dosage" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh2002006221", + "label": [ + "Analysis", + "Methods of analysis", + "Analysis and chemistry", + "Analytical methods", + "Chemical analysis", + "Analysis methods", + "Analysis and examination" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh99002428", + "label": [ + "Assaying" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-352", + "label": [ + "Belvédère", + "Point de vue" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/1447", + "label": [ + "classification", + "clasificación", + "classification" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrt9ldy0REXtf", + "label": [ + "clasificación", + "classification", + "classification" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb133183064", + "label": [ + "Classification", + "Connaissance, Classification de la", + "Classification de la connaissance" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh2002012033", + "label": [ + "Classification" + ] + }, + { + "id": "http://GeoEthno#BRESIL", + "label": [ + "Brésil", + "Brasil", + "Brazil", + "Etats-Unis du Brésil", + "République fédérative du Brésil", + "Federative Republic of Brazil" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtPZB1uLhkJZ", + "label": [ + "lieu", + "lugar", + "place" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtWpzy4rkNUg", + "label": [ + "nouvelle", + "novela corta", + "short story" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb12048699r", + "label": [ + "Qualification (droit)", + "Qualification juridique" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85075127", + "label": [ + "Law--Classification", + "Classification--Law" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/1542", + "label": [ + "code", + "códigos", + "code" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb133211330", + "label": [ + "Dénomination", + "Appellation", + "Désignation (lexicologie)" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119337991", + "label": [ + "Violence", + "Brutalité", + "Violences", + "Brutalités" + ] + }, + { + "id": "http://datos.bne.es/resource/XX526747", + "label": [ + "Violencia" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85143514", + "label": [ + "Violence", + "Violent behavior" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtaXHZy6qadS", + "label": [ + "violence", + "violence", + "violencia" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/63ce2558-0c0f-47d9-b408-bc3648c95b74", + "label": [ + "Étude de genre", + "Gender studies", + "Genre" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb16654044n", + "label": [ + "Féminicide", + "Gynécide" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85061743", + "label": [ + "Homicide", + "Femicide" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtVZNOkmee7s", + "label": [ + "crime", + "crime", + "crimen" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/11540", + "label": [ + "crime", + "delitos", + "crime" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb137343962", + "label": [ + "Crimes et criminels", + "Crimes violents", + "Délinquance", + "Criminalité", + "Criminels", + "Crime", + "Crimes" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85036577", + "label": [ + "Criminals", + "Offenders", + "Delinquents", + "Crime and criminals" + ] + }, + { + "id": "http://datos.bne.es/resource/XX526832", + "label": [ + "Delitos y faltas", + "Acciones delictivas", + "Acto punible", + "Crímenes", + "Faltas", + "Hechos delictivos", + "Infracciones" + ] + }, + { + "id": "http://datos.bne.es/resource/XX4576381", + "label": [ + "Delincuencia", + "Conducta delictiva", + "Criminalidad" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85033993", + "label": [ + "Crime", + "Crimes", + "Felonies", + "Misdemeanors", + "Urban crime", + "City crime", + "Crime--Social aspects", + "Crime and criminals", + "Delinquency" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85143536", + "label": [ + "Violent crimes", + "Crimes, Violent", + "Crimes of violence" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-705", + "label": [ + "Dictionnaire", + "Lexique", + "Vocabulaire" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/8990", + "label": [ + "vocabulary", + "vocabulario", + "vocabulaire" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrts9HqGPZ8e6", + "label": [ + "vocabulaire", + "vocabulario", + "vocabulary", + "glossaire", + "lexique" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb13162695n", + "label": [ + "Vocabulaire", + "Lexique" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh99005170", + "label": [ + "Vocabulary" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb13318687s", + "label": [ + "Droit", + "Sciences juridiques", + "Aspect juridique", + "Droit positif", + "Régime juridique", + "Systèmes juridiques", + "Système juridique", + "Science du droit", + "Ordre juridique" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh99004848", + "label": [ + "Law and legislation" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-719", + "label": [ + "Droit" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtgx5yOalcpT", + "label": [ + "derecho", + "droit", + "law" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtUpINzuzVc6", + "label": [ + "esfera", + "sphere", + "sphère" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119812876", + "label": [ + "Sphère" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85126590", + "label": [ + "Sphere" + ] + }, + { + "id": "http://datos.bne.es/resource/XX543469", + "label": [ + "Esfera" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb12651298v", + "label": [ + "Perception sociale", + "Cognition sociale", + "Perception interpersonnelle" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85123982", + "label": [ + "Social perception", + "Social cognition", + "Interpersonal perception", + "Cognition, Social" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119814612", + "label": [ + "Possibilité", + "Possibilité (logique)", + "Possible", + "Impossible", + "Possibilité (philosophie)", + "Impossibilité" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85105408", + "label": [ + "Possibility" + ] + }, + { + "id": "http://datos.bne.es/resource/XX531645", + "label": [ + "Posibilidad" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-1652", + "label": [ + "Prévention" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119329891", + "label": [ + "Prévention", + "Prévention et mesures de lutte", + "Programmes de prévention" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh99002416", + "label": [ + "Prevention" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb12067061k", + "label": [ + "Lutte contre" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh99004902", + "label": [ + "Control" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11981119f", + "label": [ + "Novae", + "Étoiles nouvelles", + "Novas", + "Étoiles temporaires" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85127463", + "label": [ + "Stars, New", + "New stars", + "Novae" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb123825501", + "label": [ + "Dos", + "Dorsum" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85010785", + "label": [ + "Back", + "Dorsum" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11932949s", + "label": [ + "Perspective", + "Perspective (art)", + "Perspective (architecture)", + "Perspective linéaire", + "Perspective aérienne (peinture)" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85100171", + "label": [ + "Perspective", + "Mechanical perspective", + "Linear perspective", + "Architectural perspective" + ] + }, + { + "id": "http://datos.bne.es/resource/XX526251", + "label": [ + "Perspectiva", + "Perspectiva arquitectónica", + "Perspectiva lineal", + "Perspectiva mecánica" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtIB6g7W9PYD", + "label": [ + "perspectiva", + "perspective", + "perspective" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-1523", + "label": [ + "Perspective" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb16960606h", + "label": [ + "Bancs à étirer", + "Orfèvres, Bancs d'", + "Bancs d'orfèvres", + "Argues" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb144661727", + "label": [ + "Explicit", + "Fin de manuscrit", + "Fin de texte" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtnh5uphFf23", + "label": [ + "identificación", + "identification", + "identification" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11961842g", + "label": [ + "Identification" + ] + }, + { + "id": "http://datos.bne.es/resource/XX534033", + "label": [ + "Identificación" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh99001565", + "label": [ + "Identification", + "Field guides", + "Keys (Identification guides)", + "Identification guides" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh2002011456", + "label": [ + "Identification" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb169817173", + "label": [ + "Gendèr", + "Gendèr panerus", + "Gender", + "Gendèr barung", + "Gèndèr", + "Gendèr panembung" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85053730", + "label": [ + "Gender (Musical instrument)" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtGQGSz7IFZE", + "label": [ + "pasión", + "passion", + "passion" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11933068t", + "label": [ + "Perception", + "Psychologie de la perception", + "Perception (psychologie)" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85099708", + "label": [ + "Perception", + "Supraliminal perception" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-1511", + "label": [ + "Perception" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb120485802", + "label": [ + "Lois", + "Loi", + "Lois et règlements", + "Loi (droit)", + "Lois écrites", + "Actes législatifs", + "Statuts (lois)" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85127611", + "label": [ + "Statutes", + "Legislative acts", + "Legislative enactments", + "Acts, Legislative", + "Enactments, Legislative", + "Laws (Statutes)" + ] + }, + { + "id": "http://datos.bne.es/resource/XX528757", + "label": [ + "Leyes", + "Ley (Derecho)", + "Leyes y normas legales" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrt2XKbjOmPcT", + "label": [ + "law text", + "ley", + "loi" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/4707", + "label": [ + "law (individual)", + "ley (individual)", + "loi" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-1201", + "label": [ + "Législation", + "Décret", + "Circulaire", + "Arrêté", + "Texte Réglementaire", + "Loi" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119743301", + "label": [ + "Employés de maison", + "Valets", + "Serviteurs", + "Personnel domestique", + "Service domestique", + "Gens de maison", + "Domestiques" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85038950", + "label": [ + "Household employees", + "Household staff", + "Household workers", + "Domestics", + "Domestic employees", + "Servants" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11974329t", + "label": [ + "Employées de maison", + "Domestiques", + "Personnel domestique", + "Femmes de ménage", + "Femmes employées de maison", + "Gens de maison", + "Bonnes", + "Femmes de chambre" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh98000555", + "label": [ + "Lady's maids" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85147520", + "label": [ + "Women household employees", + "Housemaids", + "Women servants", + "Women domestics", + "Maids, House" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119462329", + "label": [ + "Masculinité", + "Virilité", + "Masculinité (psychologie)" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85081797", + "label": [ + "Masculinity", + "Masculinity (Psychology)" + ] + }, + { + "id": "http://datos.bne.es/resource/XX546447", + "label": [ + "Masculinidad", + "Virilidad" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/4749", + "label": [ + "legislation", + "legislación", + "législation" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtwTMLkQ5qo9", + "label": [ + "legislación", + "legislation", + "législation" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb13319331k", + "label": [ + "Législation", + "Procédure législative", + "Processus législatif", + "Science législative", + "Technique législative", + "Activité législative", + "Travail législatif" + ] + }, + { + "id": "http://datos.bne.es/resource/XX548810", + "label": [ + "Legislación", + "Actividad legislativa", + "Formulación de las leyes", + "Leyes--Elaboración", + "Procedimiento legislativo", + "Proceso legislativo", + "Técnica legislativa", + "Trabajo legislativo" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtkvcoSA0U1y", + "label": [ + "poder", + "pouvoir", + "power", + "pouvoir (concept de)" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11930812p", + "label": [ + "Vingt et unième siècle", + "21e siècle", + "Vingt-et-unième siècle", + "XXIe siècle" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85139024", + "label": [ + "Twenty-first century", + "21st century" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrteByKqYsfRI", + "label": [ + "21st century", + "siglo XXI", + "XXIe siècle" + ] + } + ], + "title": [ + "Féminicide : nommer pour exister" + ], + "type": [ + { + "id": "http://isidore.science/ontology#article", + "label": "Articles" + } + ], + "url": [ + "http://journals.openedition.org/bresils/5576" + ] + }, + "_type": "_doc", + "sort": [ + 1576022400000 + ] + }, + { + "_id": "re9fjHABhI997Zgarx0L", + "_index": "isidore-documents-triple", + "_score": null, + "_source": { + "abstract": [ + "Du « drame de la jalousie » au « crime passionnel » : comment, au fil du temps, a-t-on évité de parler de féminicide ? Par Nawal Lyamini (avec le service documentation d'Ouest-France), le 23/11/2019. https://www.ouest-france.fr/faits-divers/feminicide/du-drame-de-la-jalousie-au-crime-passionnel-comment-au-fil-du-temps-t-eviter-de-parler-de-feminicide-6620759  Lire l'article" + ], + "author": [ + { + "firstname": [ + "Joelle" + ], + "id": "palmieri_joelle", + "lastname": [ + "Palmieri" + ], + "pid": [ + "0000-0002-0625-8702", + "160306779", + "joelle-palmieri" + ] + } + ], + "collection": [ + { + "id": "10670/2.s0oid6", + "name": "Colonialité : enjeux, paradoxes" + } + ], + "date": [ + "2019-12-09" + ], + "datestamp": "2019-12-09", + "identifier": [ + "10670/1.o7stqs" + ], + "language": [ + { + "id": "http://lexvo.org/id/iso639-3/fra", + "label": "French" + } + ], + "publisher": [ + { + "contact": "contact@openedition.org", + "name": "OpenEdition", + "url": "http://www.openedition.org" + } + ], + "scope": [ + { + "id": "http://isidore.science/subject/publications", + "label": "Publications" + } + ], + "subject": [ + { + "id": "http://GeoEthno#FRANCE", + "label": [ + "France", + "France", + "République française", + "French Republic", + "Royaume de France", + "Royaume de France (987-1791)" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb131873556", + "label": [ + "Novembre" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrt63hJdBieYy", + "label": [ + "drama", + "drama", + "drame" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11939701x", + "label": [ + "Théâtre (genre littéraire)", + "Littérature dramatique", + "Littérature théâtrale", + "Drame", + "Drames" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85039316", + "label": [ + "Drama", + "Stage", + "Drama, Modern", + "Plays", + "Drama--Philosophy" + ] + }, + { + "id": "http://datos.bne.es/resource/XX539770", + "label": [ + "Teatro (Género literario)", + "Drama", + "Obras de teatro" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtlWzPvv9a6N", + "label": [ + "celos", + "jalousie", + "jealousy" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119473037", + "label": [ + "Jalousie" + ] + }, + { + "id": "http://datos.bne.es/resource/XX525020", + "label": [ + "Celos", + "Celotipia" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85069852", + "label": [ + "Jealousy" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtVZNOkmee7s", + "label": [ + "crime", + "crime", + "crimen" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/11540", + "label": [ + "crime", + "delitos", + "crime" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb137343962", + "label": [ + "Crimes et criminels", + "Crimes violents", + "Délinquance", + "Criminalité", + "Criminels", + "Crime", + "Crimes" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85036577", + "label": [ + "Criminals", + "Offenders", + "Delinquents", + "Crime and criminals" + ] + }, + { + "id": "http://datos.bne.es/resource/XX526832", + "label": [ + "Delitos y faltas", + "Acciones delictivas", + "Acto punible", + "Crímenes", + "Faltas", + "Hechos delictivos", + "Infracciones" + ] + }, + { + "id": "http://datos.bne.es/resource/XX4576381", + "label": [ + "Delincuencia", + "Conducta delictiva", + "Criminalidad" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85033993", + "label": [ + "Crime", + "Crimes", + "Felonies", + "Misdemeanors", + "Urban crime", + "City crime", + "Crime--Social aspects", + "Crime and criminals", + "Delinquency" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85143536", + "label": [ + "Violent crimes", + "Crimes, Violent", + "Crimes of violence" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119782672", + "label": [ + "Fil", + "Fils (textile)", + "Fil à coudre", + "Fil (textile)", + "Bobines de fil" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85135003", + "label": [ + "Thread" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85149022", + "label": [ + "Yarn" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-1943", + "label": [ + "Temps" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb133189074", + "label": [ + "Temps" + ] + }, + { + "id": "http://datos.bne.es/resource/XX524393", + "label": [ + "Tiempo", + "Duración", + "Horas (Tiempo)" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85135395", + "label": [ + "Time", + "Hours (Time)" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb16654044n", + "label": [ + "Féminicide", + "Gynécide" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85061743", + "label": [ + "Homicide", + "Femicide" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/2284", + "label": [ + "documentation", + "documentación", + "documentation" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb133184637", + "label": [ + "Documentation" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh99005687", + "label": [ + "Documentation" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-712", + "label": [ + "Documentation" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtSKcTSyd4bv", + "label": [ + "documentación", + "documentation", + "source material" + ] + } + ], + "title": [ + "Ouest France - novembre 2019" + ], + "topic": [ + { + "id": "http://aurehal.archives-ouvertes.fr/subject/shs.hist", + "label": "Histoire", + "proba": "1.000" + } + ], + "type": [ + { + "id": "http://isidore.science/ontology#blogPost", + "label": "Billets de blog" + } + ], + "url": [ + "http://domination.hypotheses.org/2099" + ] + }, + "_type": "_doc", + "sort": [ + 1575849600000 + ] + }, + { + "_id": "xtw5i3ABhI997ZgaQcpM", + "_index": "isidore-documents-triple", + "_score": null, + "_source": { + "abstract": [ + "[ES] Palimpsestos mexicanos surgió tras la obtención de la beca Santander con destino en la Facultad de Arte y Diseño, UNAM, CDMX, México. La propuesta se concibe como un proyecto expositivo multidisciplinar constituido por un conjunto de subproyectos heterogéneos, en sinergia, uniendo fuerzas, realizados con disciplinas y técnicas diversas tales como el collage, la gráfica y la escultura. El planteamiento general de la obra gira alrededor de las problemáticas que afronta la sociedad mexicana: la corrupción política, las grandes desigualdades sociales, el crimen organizado, la discriminación que sufren los pueblos indígenas, la situación de indefensión de la mujer ante un entorno preeminentemente machista y los intolerables índices de feminicidios. En definitiva, un conjunto de factores que desestabilizan el país y crean inseguridad en la población. Palimpsestos mexicanos también visibiliza la lucha por los derechos civiles de los sectores más desfavorecidos de la sociedad, y empodera y convierte a la mujer mexicana en símbolo de esperanza de un futuro mejor. Proponiendo un dibujo de México alejado de los estereotipos y del folclorismo que presuntamente define su identidad nacional. Aportando una visión del país diferente al imaginario conocido, desde un punto de vista subjetivo, de carácter político pero no dogmático. Con la intención de mostrar, a través del arte contemporáneo, un México en construcción con unas problemáticas a resolver de dimensiones estratosféricas y fascinante en sus retos de futuro. En un país, el más poblado de habla hispana, que está llamado a ser una potencia económica y un referente cultural a escala global.", + "[EN] Mexican Palimpsests emerged after obtaining the Santander scholarship in the Faculty of Art and Design, UNAM, CDMX, Mexico. The proposal is conceived as a multidisciplinary exhibition project constituted by a set of heterogeneous subprojects, in synergy, joining forces, made with different disciplines and techniques such as collage, graphics and sculpture. The general approach of the work revolves around the problems faced by Mexican society: political corruption, large social inequalities, organized crime, discrimination suffered by indigenous peoples, the situation of women's helplessness in a pre-eminently macho environment and the intolerable indexes of feminicides. In short, a set of factors that destabilize the country and create insecurity in the population. Mexican Palimpsests also highlights the struggle for civil rights of the most disadvantaged sectors of society, and empowers and turns Mexican women into a symbol of hope for a better future. Proposing a drawing of Mexico away from the stereotypes and folklore that presumably defines their national identity. Providing a vision of the country different from the known imaginary, from a subjective point of view, of a political nature but not dogmatic. With the intention of showing, through contemporary art, a Mexico in construction with some problems to solve of stratospheric dimensions and fascinating in its future challenges. In a country, the most populous Spanish-speaking, which is called to be an economic power and a cultural reference on a global scale.", + "Navarro Rodríguez, MÁ. (2019). Palimpsestos mexicanos. http://hdl.handle.net/10251/130234", + "TFGM" + ], + "author": [ + { + "firstname": [ + "Miguel Ángel" + ], + "id": "navarro_rodriguez_miguel_angel", + "lastname": [ + "Navarro Rodríguez" + ] + } + ], + "collection": [ + { + "id": "10670/2.p88wns", + "name": "Repositorio Institucional de la Universitat Politècnica de València" + } + ], + "date": [ + "2019-11-05" + ], + "datestamp": "2019-11-05", + "identifier": [ + "http://hdl.handle.net/10251/130234" + ], + "keyword": [ + "POLÍTICA", + "DESIGUALDAD", + "CORRUPCIÓN", + "VIOLENCIA", + "SUBJETIVISMO", + "HETEROGÉNEA", + "SINERGIA.", + "POLITICS", + "INEQUALITY", + "CORRUPTION", + "VIOLENCE", + "SUBJETIVISM", + "HETEROGENEOUS", + "SYNERGY.", + "DIBUJO", + "Grado en Bellas Artes-Grau en Belles Arts" + ], + "language": [ + { + "id": "http://lexvo.org/id/iso639-3/spa", + "label": "Spanish" + } + ], + "publisher": [ + { + "contact": "riunet@bib.upv.es", + "name": "Universitat Politècnica de València", + "url": "http://www.upv.es" + } + ], + "scope": [ + { + "id": "http://isidore.science/subject/publications", + "label": "Publications" + } + ], + "subject": [ + { + "id": "http://datos.bne.es/resource/XX524633", + "label": [ + "Palimpsestos", + "Manuscritos (Palimpsestos)" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85080780", + "label": [ + "Palimpsests", + "Manuscripts (Palimpsests)" + ] + }, + { + "id": "http://datos.bne.es/resource/XX529611", + "label": [ + "Mexicanos", + "Mejicanos", + "México--Gentilicio" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85084533", + "label": [ + "Mexicans" + ] + }, + { + "id": "http://datos.bne.es/resource/XX532829", + "label": [ + "Fatalismo", + "Destino" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85047439", + "label": [ + "Fate and fatalism", + "Fatalism", + "Destiny" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtkpbNgKDbAx", + "label": [ + "destin", + "destino", + "fate", + "destinée" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/568", + "label": [ + "art", + "arte", + "art" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrttq9gp2ZMuc", + "label": [ + "art", + "art", + "arte", + "Arts" + ] + }, + { + "id": "http://datos.bne.es/resource/XX525289", + "label": [ + "Arte", + "Bellas artes" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85007461", + "label": [ + "Art", + "Western art (Western countries)", + "Fine arts", + "Iconography", + "Arts, Fine", + "Art, Occidental", + "Arts, Visual", + "Visual arts", + "Occidental art", + "Art, Visual", + "Art, Western (Western countries)" + ] + }, + { + "id": "http://datos.bne.es/resource/XX527044", + "label": [ + "Diseño" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85037197", + "label": [ + "Design" + ] + }, + { + "id": "http://GeoEthno#MEXICO", + "label": [ + "Mexico", + "Ciudad de México", + "Mexico City", + "District Fédéral", + "México", + "México D.F." + ] + }, + { + "id": "http://GeoEthno#MEXIQUE", + "label": [ + "Mexique", + "México", + "Mexico", + "Méjico", + "Estados Unidos Mexicanos", + "Etats-Unis du Mexique", + "United Mexican States", + "Nouvelle-Espagne", + "Nouvelle Espagne", + "Nueva España", + "Vice-Royauté de la Nouvelle-Espagne (1535-1821)", + "Nouvelle-Espagne (1535-1821)" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/6710", + "label": [ + "project", + "proyecto", + "projet" + ] + }, + { + "id": "http://datos.bne.es/resource/XX533512", + "label": [ + "Collage", + "Colage", + "Collages" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85027965", + "label": [ + "Collage", + "Collages" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrt4rR2kyMQuW", + "label": [ + "escultura", + "sculpture", + "sculpture" + ] + }, + { + "id": "http://datos.bne.es/resource/XX525481", + "label": [ + "Escultura" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85119004", + "label": [ + "Sculpture", + "Stonework, Decorative" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtZeR0GRIi3f", + "label": [ + "general", + "general", + "général" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/1052", + "label": [ + "building site", + "obra", + "chantier" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/13286", + "label": [ + "organisation (law)", + "sociedad", + "société (organisme)" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrt5Vyzd3D9vw", + "label": [ + "sociedad", + "société", + "society" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/7823", + "label": [ + "society", + "sociedad", + "société (humaine)" + ] + }, + { + "id": "http://datos.bne.es/resource/XX4659801", + "label": [ + "Corrupción política", + "Política--Corrupción", + "Politica--Prácticas corruptas", + "Políticos--Corrupción", + "Políticos--Prácticas corruptas" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85033057", + "label": [ + "Political corruption", + "Malversation", + "Corruption (in politics)", + "Politics, Practical--Corrupt practices", + "Graft in politics", + "Political scandals", + "Boss rule" + ] + }, + { + "id": "http://datos.bne.es/resource/XX542170", + "label": [ + "Crimen organizado", + "Gangsterismo" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85095528", + "label": [ + "Organized crime", + "Crime syndicates", + "Organised crime" + ] + }, + { + "id": "http://datos.bne.es/resource/XX525409", + "label": [ + "Discriminación" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85038376", + "label": [ + "Discrimination", + "Bias" + ] + }, + { + "id": "http://datos.bne.es/resource/XX4576821", + "label": [ + "Pueblos indígenas", + "Aborígenes", + "Indígenas", + "Nativos", + "Razas autóctonas", + "Razas indígenas" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85090174", + "label": [ + "Indigenous peoples", + "Native races", + "Aboriginal peoples", + "Aborigines", + "Indigenous populations", + "Native peoples" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/9344", + "label": [ + "woman", + "mujer", + "femme" + ] + }, + { + "id": "http://datos.bne.es/resource/XX4576329", + "label": [ + "Mujeres", + "Mujer" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85147274", + "label": [ + "Women", + "Womyn", + "Wimmin", + "Woman", + "Womon", + "Human females" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrthVHcfmv1xQ", + "label": [ + "femme", + "mujer", + "woman" + ] + }, + { + "id": "http://datos.bne.es/resource/XX528221", + "label": [ + "Índices" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85064865", + "label": [ + "Indexes", + "Indices" + ] + }, + { + "id": "http://datos.bne.es/resource/XX537623", + "label": [ + "Factores" + ] + }, + { + "id": "http://datos.bne.es/resource/XX526190", + "label": [ + "Población", + "Población humana" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85104910", + "label": [ + "Population", + "Populations, Human", + "Human populations", + "Population growth", + "Human population" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrt7vLkeZro2h", + "label": [ + "población", + "population", + "population" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtz9K8Omavxa", + "label": [ + "lucha", + "lutte", + "wrestling" + ] + }, + { + "id": "http://datos.bne.es/resource/XX526332", + "label": [ + "Derechos políticos y civiles", + "Derechos civiles", + "Derechos fundamentales", + "Libertades políticas", + "Libertades públicas" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85026371", + "label": [ + "Civil rights", + "Basic rights", + "Civil rights--Law and legislation", + "Civil liberties", + "Constitutional rights", + "Rights, Civil", + "Fundamental rights" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtr7EH9tyyDH", + "label": [ + "símbolo", + "symbol", + "symbole" + ] + }, + { + "id": "http://datos.bne.es/resource/XX527300", + "label": [ + "Esperanza" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85061922", + "label": [ + "Hope" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtzmgVgArOJh", + "label": [ + "dessin", + "dibujo", + "drawing" + ] + }, + { + "id": "http://datos.bne.es/resource/XX525302", + "label": [ + "Dibujo", + "Dibujos" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85039408", + "label": [ + "Drawing", + "Drawings", + "Sketching" + ] + }, + { + "id": "http://datos.bne.es/resource/XX525410", + "label": [ + "Nacionalismo", + "Conciencia nacional", + "Identidad nacional", + "Separatismo" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85090150", + "label": [ + "Nationalism", + "Identity, National", + "National identity", + "Consciousness, National", + "National consciousness" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtIebqI21aZk", + "label": [ + "vision", + "vision", + "visión", + "sight" + ] + }, + { + "id": "http://datos.bne.es/resource/XX525877", + "label": [ + "Vista", + "Visión" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85143872", + "label": [ + "Vision", + "Eyesight", + "Seeing", + "Sight" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtgWd5ryyeW5", + "label": [ + "imaginaire", + "imaginario", + "imaginings" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/14919", + "label": [ + "point", + "punto", + "point" + ] + }, + { + "id": "http://datos.bne.es/resource/XX526047", + "label": [ + "Carácter", + "Caracteres (Psicología)", + "Caracteriología" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85022615", + "label": [ + "Character", + "Ethology" + ] + }, + { + "id": "http://datos.bne.es/resource/XX555328", + "label": [ + "Intención" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85067189", + "label": [ + "Intention" + ] + }, + { + "id": "http://datos.bne.es/resource/XX560596", + "label": [ + "Arte--S.XX", + "Arte contemporáneo", + "Arte moderno--S.XX" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85007805", + "label": [ + "Art, Modern--20th century" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrt2f8QaSD6TS", + "label": [ + "block of flats", + "construcción", + "immeuble", + "apartment building", + "habitation-HLM", + "insula", + "insula" + ] + }, + { + "id": "http://datos.bne.es/resource/XX524879", + "label": [ + "Construcción", + "Edificación", + "Edificios--Construcción", + "Industria de la construcción", + "Sector de la construcción" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85017693", + "label": [ + "Building", + "Architectural engineering", + "Construction", + "Buildings--Design and construction", + "Engineering, Architectural" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtM9HMWQTGJV", + "label": [ + "building (process of)", + "construcción", + "construction" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtGsxH9sgu4T", + "label": [ + "poblado", + "poblado", + "poblado" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/12985", + "label": [ + "speech", + "habla", + "discours" + ] + }, + { + "id": "http://datos.bne.es/resource/XX526923", + "label": [ + "Lenguaje", + "Habla" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85074518", + "label": [ + "Language and languages", + "Foreign languages", + "Languages" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtD9Jt6A2Jia", + "label": [ + "potencia", + "power (strength)", + "puissance" + ] + }, + { + "id": "http://datos.bne.es/resource/XX536714", + "label": [ + "Country", + "Country (Música)", + "Hillbilly (Música)", + "Música country", + "Música hillbilly" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85033470", + "label": [ + "Country music", + "Western and country music", + "Country and western music", + "Country music--United States", + "Hillbilly music" + ] + }, + { + "id": "http://datos.bne.es/resource/XX529442", + "label": [ + "Folklore", + "Tradiciones populares" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85050104", + "label": [ + "Folklore", + "Folk beliefs", + "Folk-lore", + "Traditions" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/10891", + "label": [ + "folklore", + "folklore", + "folklore" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtoPEnUJFP1m", + "label": [ + "folklore", + "folklore", + "folklore" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/6371", + "label": [ + "politics", + "política", + "politique" + ] + }, + { + "id": "http://datos.bne.es/resource/XX4576319", + "label": [ + "Política", + "Ciencias políticas", + "Política--Teorías" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85104440", + "label": [ + "Political science", + "Political thought", + "Government", + "Political theory", + "Civil government", + "Commonwealth, The", + "Science, Political", + "Administration", + "Politics" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/6370", + "label": [ + "policy", + "política", + "politique" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtQdSP02tK3w", + "label": [ + "política", + "politics", + "politique" + ] + }, + { + "id": "http://datos.bne.es/resource/XX4662968", + "label": [ + "Corrupción", + "Delitos de corrupción", + "Prácticas corruptas" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh94002193", + "label": [ + "Corruption", + "Corrupt practices" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtNaA75fox5w", + "label": [ + "corrupción", + "corruption", + "corruption" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtaXHZy6qadS", + "label": [ + "violence", + "violence", + "violencia" + ] + }, + { + "id": "http://datos.bne.es/resource/XX526747", + "label": [ + "Violencia" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85143514", + "label": [ + "Violence", + "Violent behavior" + ] + }, + { + "id": "http://datos.bne.es/resource/XX534266", + "label": [ + "Subjetividad", + "Subjetivismo" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85129469", + "label": [ + "Subjectivity", + "Subjectivism" + ] + } + ], + "title": [ + "Palimpsestos mexicanos" + ], + "type": [ + { + "id": "http://isidore.science/ontology#thesis", + "label": "Mémoires, Thèses et HDR" + } + ], + "url": [ + "http://hdl.handle.net/10251/130234" + ] + }, + "_type": "_doc", + "sort": [ + 1572912000000 + ] + }, + { + "_id": "qwj5jXABhI997ZgaSHWV", + "_index": "isidore-documents-triple", + "_score": null, + "_source": { + "abstract": [ + "« Il y a quelque chose d’important chez les femmes du Guatemala, surtout la femme indigène, quelque chose d’important qui est sa relation avec la terre ; entre la terre et la mère. La terre nourrit et la femme donne vie. Face à ça, la femme elle-même doit garder ça comme son secret à elle, un respect vis-à-vis de la terre. C’est comme une relation entre mari et femme, la relation entre la mère et la terre. Il y a un dialogue constant entre la terre et la femme » (Rigoberta Menchú Tum). Durant..." + ], + "author": [ + { + "firstname": [ + "Sébastien" + ], + "id": "jahan_sebastien", + "lastname": [ + "Jahan" + ], + "pid": [ + "048920932" + ] + } + ], + "collection": [ + { + "id": "10670/2.v8a41w", + "name": "Presses universitaires de Rennes" + } + ], + "date": [ + "2019-10-14" + ], + "datestamp": "2019-10-14", + "identifier": [ + "10670/1.c1x51g", + "urn:doi:10.4000/books.pur.98967", + "urn:isbn:9782753508200", + "urn:eisbn:9782753566606" + ], + "keyword": [ + "corps", + "violence", + "sociologie de la violence", + "History", + "HIS010000", + "HBJD" + ], + "language": [ + { + "id": "http://lexvo.org/id/iso639-3/fra", + "label": "French" + } + ], + "publisher": [ + { + "contact": "contact@revues.org", + "name": "OpenEdition", + "url": "http://www.openedition.org" + } + ], + "scope": [ + { + "id": "http://isidore.science/subject/publications", + "label": "Publications" + } + ], + "subject": [ + { + "id": "http://data.bnf.fr/ark:/12148/cb119337991", + "label": [ + "Violence", + "Brutalité", + "Violences", + "Brutalités" + ] + }, + { + "id": "http://datos.bne.es/resource/XX526747", + "label": [ + "Violencia" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85143514", + "label": [ + "Violence", + "Violent behavior" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtaXHZy6qadS", + "label": [ + "violence", + "violence", + "violencia" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119556551", + "label": [ + "Femmes", + "Femme" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh2002006249", + "label": [ + "Women", + "Women's work" + ] + }, + { + "id": "http://datos.bne.es/resource/XX4576329", + "label": [ + "Mujeres", + "Mujer" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11967012s", + "label": [ + "Génocide", + "Extermination d'un peuple" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85053923", + "label": [ + "Genocide", + "Ethnic purification", + "Ethnic cleansing", + "Purification, Ethnic", + "Ethnocide", + "Cleansing, Ethnic" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb16654044n", + "label": [ + "Féminicide", + "Gynécide" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85061743", + "label": [ + "Homicide", + "Femicide" + ] + }, + { + "id": "http://GeoEthno#GUATEMALA", + "label": [ + "Guatemala", + "Guatemala", + "Guatemala", + "República de Guatemala", + "République du Guatemala", + "Republic of Guatemala" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrthVHcfmv1xQ", + "label": [ + "femme", + "mujer", + "woman" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/9344", + "label": [ + "woman", + "mujer", + "femme" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-921", + "label": [ + "Femme" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtpm08AzTZai", + "label": [ + "indígena", + "indigène", + "native" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11975911n", + "label": [ + "Terre", + "Globe terrestre" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85040427", + "label": [ + "Earth (Planet)", + "Earth" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtYJe0Kp7hmm", + "label": [ + "earth", + "terre", + "tierra" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtEo3JbgULtV", + "label": [ + "madre", + "mère", + "mother" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtmkxcYMqVKc", + "label": [ + "life", + "vida", + "vie" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11933780m", + "label": [ + "Vie", + "Sens de la vie", + "Philosophie de la vie", + "Vie, Sens de la", + "Vie (philosophie)" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85076807", + "label": [ + "Life", + "Life--Philosophy" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11935546c", + "label": [ + "Face", + "Facies (anatomie)" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85046785", + "label": [ + "Face", + "Human face" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11960513t", + "label": [ + "Secret" + ] + }, + { + "id": "http://datos.bne.es/resource/XX547040", + "label": [ + "Secreto", + "Confidencialidad" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85119431", + "label": [ + "Secrecy", + "Concealment" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb12321516c", + "label": [ + "Respect", + "Considération", + "Déférence", + "Égard" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85113175", + "label": [ + "Respect", + "Esteem", + "Deference" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb133184699", + "label": [ + "Époux", + "Relations époux-épouses", + "Relations conjugales", + "Mari et femme" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85063204", + "label": [ + "Husband and wife", + "Husband and wife--Law and legislation", + "Wife and husband", + "Man and wife", + "Matrimonial regime", + "Spouses--Legal status, laws, etc" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb13318335q", + "label": [ + "Couple", + "Mari et femme", + "Gens mariés", + "Relations conjugales", + "Personnes mariées", + "Couples", + "Vie conjugale", + "Relations mari-femme", + "Mariés" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh96009430", + "label": [ + "Couples" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb12481481z", + "label": [ + "Dialogue", + "Dialogue (communication interpersonnelle)", + "Dialogue, Analyse du", + "Analyse du dialogue" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh94003592", + "label": [ + "Dialogue analysis", + "Analysis of dialogue", + "DA (Interpersonal communication)" + ] + }, + { + "id": "http://datos.bne.es/resource/XX5080451", + "label": [ + "Análisis del diálogo", + "Análisis conversacional", + "Análisis de la conversación", + "Análisis del discurso dialógico", + "Conversación--Análisis del discurso", + "Diálogo, Análisis del", + "Diálogo (Comunicación interpersonal)" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtxindNEQjtS", + "label": [ + "diálogo", + "dialogue", + "dialogue" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb16505206p", + "label": [ + "Corps", + "Corps (anatomie)", + "Parties du corps" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrta44Qt67PN7", + "label": [ + "body", + "corps", + "cuerpo" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-1858", + "label": [ + "Sociologie" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/7831", + "label": [ + "sociology", + "sociología", + "sociologie" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb133408573", + "label": [ + "Sociologie", + "Aspect sociologique", + "Doctrines sociales" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh2001008870", + "label": [ + "Sociological aspects" + ] + }, + { + "id": "http://datos.bne.es/resource/XX525151", + "label": [ + "Sociología" + ] + } + ], + "title": [ + "La violence contre les femmes au Guatemala : du génocide au féminicide" + ], + "type": [ + { + "id": "http://isidore.science/ontology#book", + "label": "Livres et chapitres d'ouvrages" + } + ], + "url": [ + "http://books.openedition.org/pur/98967" + ] + }, + "_type": "_doc", + "sort": [ + 1571011200000 + ] + }, + { + "_id": "-uF8i3ABhI997ZgaqwLy", + "_index": "isidore-documents-triple", + "_score": null, + "_source": { + "abstract": [ + "Paris : 15 de octubre de 2019 18h30-21h00 : Mexique : femmes en danger aveec Ruth Fierro : Feminicides au Mexique. Mardi 15 octobre 2019 entre 18H30 et 21H00 sur le Campus Condorcet au 5, cours des Humanités (salle à confirmer) Mardi 15 octobre 2019, la conférence, Mexique : femmes en danger, donne la parole à Ruth Fierro Pineda, avocate de formation pour l’État de Chihuahua (nord du Mexique) et coordinatrice Générale du Centro de Derechos Humanos de las Mujeres (CEDEHM), l'association lau..." + ], + "author": [ + { + "firstname": [ + "REGARDS" + ], + "id": "cnrs_regards", + "lastname": [ + "CNRS" + ] + } + ], + "collection": [ + { + "id": "10670/2.raw0zt", + "name": "REDIAL & CEISAL" + } + ], + "date": [ + "2019-10-14" + ], + "datestamp": "2019-10-14", + "identifier": [ + "10670/1.sjz58l" + ], + "language": [ + { + "id": "http://lexvo.org/id/iso639-3/fra", + "label": "French" + } + ], + "publisher": [ + { + "contact": "contact@openedition.org", + "name": "OpenEdition", + "url": "http://www.openedition.org" + } + ], + "scope": [ + { + "id": "http://isidore.science/subject/publications", + "label": "Publications" + } + ], + "subject": [ + { + "id": "http://GeoEthno#PARIS", + "label": [ + "Paris", + "Lutèce" + ] + }, + { + "id": "http://GeoEthno#MEXIQUE", + "label": [ + "Mexique", + "México", + "Mexico", + "Méjico", + "Estados Unidos Mexicanos", + "Etats-Unis du Mexique", + "United Mexican States", + "Nouvelle-Espagne", + "Nouvelle Espagne", + "Nueva España", + "Vice-Royauté de la Nouvelle-Espagne (1535-1821)", + "Nouvelle-Espagne (1535-1821)" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119556551", + "label": [ + "Femmes", + "Femme" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh2002006249", + "label": [ + "Women", + "Women's work" + ] + }, + { + "id": "http://datos.bne.es/resource/XX4576329", + "label": [ + "Mujeres", + "Mujer" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/3852", + "label": [ + "hazard", + "peligros", + "danger" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb13187354v", + "label": [ + "Octobre" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb13319847b", + "label": [ + "Campus", + "Cités universitaires", + "Campus universitaires" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh2006002824", + "label": [ + "College campuses", + "Campuses, College", + "University campuses" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-410", + "label": [ + "Campus", + "Cité universitaire" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-2093", + "label": [ + "Voie urbaine", + "Avenue", + "Boulevard", + "Cours" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119753052", + "label": [ + "Enseignement classique", + "Humanités" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85026700", + "label": [ + "Classical education", + "Education, Classical" + ] + }, + { + "id": "http://datos.bne.es/resource/XX543132", + "label": [ + "Educación clásica" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb13189833r", + "label": [ + "Lettres classiques (enseignement supérieur)", + "Humanités gréco-latines", + "Humanités" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85062913", + "label": [ + "Humanities" + ] + }, + { + "id": "http://datos.bne.es/resource/XX527126", + "label": [ + "Humanidades" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-572", + "label": [ + "Congrès", + "Conférence", + "Colloque" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11933029x", + "label": [ + "Parole", + "Phonation", + "Langage oral", + "Langage articulé" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85126434", + "label": [ + "Speech", + "Talking" + ] + }, + { + "id": "http://datos.bne.es/resource/XX526839", + "label": [ + "Libertad condicional", + "Excarcelación sujeta a condición", + "Puesta en libertad bajo condición" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11975771z", + "label": [ + "Formation" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh00005618", + "label": [ + "Training of" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/8607", + "label": [ + "training", + "enseñanza", + "formation" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-886", + "label": [ + "État" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/8062", + "label": [ + "state", + "estado", + "état" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11937050q", + "label": [ + "État", + "État, Théorie de l'", + "Théorie de l'État", + "Autorités publiques", + "Autorité publique", + "Puissance publique", + "Pouvoirs publics" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85127474", + "label": [ + "State, The", + "Commonwealth, The", + "Administration" + ] + }, + { + "id": "http://GeoEthno#CHIHUAHUA", + "label": [ + "Chihuahua", + "Estado de Chihuahua", + "Chihuahua" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb123228482", + "label": [ + "Nord (point cardinal)", + "Nord" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85092449", + "label": [ + "North (The word)" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrt0bOnESvRsW", + "label": [ + "groupe associatif", + "grupo asociativo", + "sodality", + "association" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/607", + "label": [ + "association", + "asociaciones", + "association" + ] + }, + { + "id": "http://GeoEthno#LAU", + "label": [ + "Lau", + "Groupe de Lau", + "Lau Islands", + "Eastern Group" + ] + } + ], + "title": [ + "México: mujeres en peligro con Ruth Fierro" + ], + "type": [ + { + "id": "http://isidore.science/ontology#blogPost", + "label": "Billets de blog" + } + ], + "url": [ + "http://rediceisal.hypotheses.org/22150" + ] + }, + "_type": "_doc", + "sort": [ + 1571011200000 + ] + }, + { + "_id": "5Bsoj3ABhI997Zga0lr5", + "_index": "isidore-documents-triple", + "_score": null, + "_source": { + "abstract": [ + "International audience", + "Dans le cadre d'un projet de terrain pour la constitution d'un corpus d'espagnol parlé, nous avons enregistré, en 2018, un débat d'une heure et demie à Medellin sur le féminisme, intitulé ¿Será que soy feminista? (« Suis-je féministe ? »). Ce débat est le premier d'une série d'évènements organisés par un nouveau collectif féministe, Hiedras, qui souhaite créer des espaces de discussion sur les questions du genre en Amérique Latine, la région du monde la plus violente envers les femmes en dehors d'un contexte de guerre, selon l'ONU.Malgré les acquis récents en matière du droit des femmes en Colombie (la légalisation partielle de l'IVG ; la typification des crimes de genre : le féminicide – loi Rosa Elvira Cely – et l'attaque à l'acide – loi Natalia Ponce –) et la nomination de femmes à des postes politiques notables (dans le gouvernement actuel, neuf ministères sur seize sont occupés par des femmes), la vie quotidienne des Colombiennes est loin d'être facile. Trois indices statistiques le révèlent : le taux de grossesse chez les adolescentes (près de 20% des jeunes filles de 19 ans et moins ont déjà eu au moins une grossesse), le chômage (les femmes connaissent des taux de chômage plus élevés, avec 10.9%, 15.6% et 6.8% respectivement pour les niveaux Bac+5, Bac et sans études, contre 8.5%, 9.4% et 3.5% pour les hommes) et la violence domestique (entre 2002 et 2009, 11976 femmes ont été assassinées dans un contexte domestique). La ville de Medellin est particulièrement affectée par ce phénomène avec 70 féminicides dans les cinq premiers mois de 2018. Face à cette réalité, se réunir pour parler de féminisme à Medellin relève d'enjeux sociétaux graves, voire tragiques.Notre corpus présente des caractéristiques particulières qui en font, à nos yeux, un objet d'étude pertinent pour l'analyse conversationnelle. En effet, bien qu'elle soit organisée sous la forme de débat, notamment avec la gestion des tours de parole par une modératrice, l'interaction ne correspond pas au cadre traditionnel du débat politique ou académique, et cela principalement pour deux raisons. Premièrement, le public qui suit cet évènement est familiarisé en amont avec la thématique féministe ; en conséquence, les intervenantes ne sont pas soumises aux contraintes de la dissuasion politique. Il ne s'agit pas, pour autant, du regroupement d'une communauté déjà établie : les débats de ce type sont rares à Medellin et la question du genre en Colombie, plutôt restreinte aux milieux académiques, est peu repandue dans la population (Estrada, 1997). De plus, s'agissant du premier évènement de Hiedras, il n'existe pas une routinisation au préalable pour ce type d'interaction. Deuxièmement, l'évènement a lieu le soir dans un bar qui sert habituellement à des concerts de tango et de salsa ; ainsi, les intervenantes et le public consomment de l'alcool pendant la présentation. Dans ce contexte, les intervenantes se permettent d'utiliser un langage hybride entre, d'une part, des expressions techniques dont l'usage confirme leur statut épistémique en tant qu'expertes, et, d'autre part, des formules d'adresse informelles, visant directement le public. Celui-ci devient un autre interactant dans la communication, qui se réaffirme au fur et à mesure que le débat avance.Dans cette communication, nous nous intéresserons principalement au public, dont le rire est la manifestation la plus saisissable du point de vue de l'analyse de données. Les études sur le rire en analyse conversationnelle ont démontré son rôle à part entière dans l'interaction (Jefferson, Sacks, & Schegloff, 1987). Ici, le rire est une manifestation collective qui porte des caractéristiques particulières dues à la nature même de l'interaction. L'analyse séquentielle des moments suscitant le rire du public nous montre que ce rire n'obéit pas à une manifestation fortuite, bien au contraire : les intervenantes cherchent à placer certaines émotions dans leur discours dans le but de le susciter. Ainsi, plusieurs ressources multimodales sont mobilisées, qui vont de la grimace à la dérision. Cette analyse nous demande donc une vision multidimensionnelle du débat (Martel, 2000). En somme, nous voulons analyser les mécanismes par lesquels les intervenantes cherchent à provoquer cette réaction, tout en gardant les codes qu'exigent une activité communicative comme le débat et, surtout, la discussion sur un sujet aussi sensible en Colombie.Enfin, nous ne nous contenterons pas de l'analyse de séquences isolées. Nous porterons aussi un regard longitudinal sur l'émergence d'un espace de partage, voire de confidence, qui se crée entre les intervenantes et le public tout au long de l'interaction et qui se traduit par l'apparition, vers la fin, de récits intimes et personnels (Traverso, 2000). Nous voulons montrer que le rire est l'un des mécanismes privilégiés dans la création de cet espace de confidence, en prenant en compte l'identité de genre (Greco, 2014) comme un facteur décisif pour l'analyse de cette interaction." + ], + "author": [ + { + "firstname": [ + "Luisa" + ], + "id": "acosta_cordoba_luisa", + "lastname": [ + "Acosta Córdoba" + ], + "pid": [ + "22011823X", + "luisa-acosta-cordoba" + ] + } + ], + "collection": [ + { + "id": "10670/2.q0dtzi", + "name": "Hyper Article en Ligne - Sciences de l'Homme et de la Société" + } + ], + "date": [ + "2019-10-07" + ], + "datestamp": "2019-11-19", + "identifier": [ + "10670/1.chnjw7", + "hal-02308138" + ], + "keyword": [ + "analyse de l'interaction", + "rire", + "débat", + "féminisme", + "[SHS]Humanities and Social Sciences", + "[SHS.LANGUE]Humanities and Social Sciences/Linguistics", + "[SHS.GENRE]Humanities and Social Sciences/Gender studies" + ], + "language": [ + { + "id": "http://lexvo.org/id/iso639-3/fra", + "label": "French" + } + ], + "publisher": [ + { + "contact": "halshs@ccsd.cnrs.fr", + "name": "Centre pour la communication scientifique directe", + "url": "http://www.ccsd.cnrs.fr" + } + ], + "scope": [ + { + "id": "http://isidore.science/subject/publications", + "label": "Publications" + } + ], + "subject": [ + { + "id": "http://data.bnf.fr/ark:/12148/cb165905243", + "label": [ + "Revêtements d'icônes", + "Oklad", + "Revêtements d'icônes en argent", + "Revêtements d'icônes en or", + "Rizza", + "Risa", + "Icônes, Revêtements d'", + "Riza", + "Basma" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh98008109", + "label": [ + "Icon covers", + "Covers, Icon", + "Icon mountings" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh98008111", + "label": [ + "Silver icon covers" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh98008110", + "label": [ + "Gold icon covers" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrt45NtAFS36q", + "label": [ + "transport", + "transport", + "transporte", + "circulation" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119384324", + "label": [ + "Circulation", + "Trafic", + "Écoulement du trafic", + "Circulation routière", + "Circulation automobile", + "Trafic routier" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85136769", + "label": [ + "Traffic flow", + "Traffic volume" + ] + }, + { + "id": "http://datos.bne.es/resource/XX527108", + "label": [ + "Tráfico", + "Tránsito", + "Volumen de tráfico" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/4985", + "label": [ + "management", + "gestión", + "gestion" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-1005", + "label": [ + "Gestion" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11950206h", + "label": [ + "Gestion", + "Sciences de la gestion", + "Gestion, Sciences de la", + "Sciences de gestion", + "Techniques de gestion", + "Direction", + "Management" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh2002007911", + "label": [ + "Management" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11944030k", + "label": [ + "Émotions", + "Contrôle émotionnel", + "Régulation des émotions", + "Régulation émotionnelle", + "Expression des émotions", + "Contrôle des émotions", + "Émotivité" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85042818", + "label": [ + "Emotions", + "Human emotions", + "Passions", + "Feelings" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119327733", + "label": [ + "Rire" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85075047", + "label": [ + "Laughter", + "Laughing" + ] + }, + { + "id": "http://datos.bne.es/resource/XX532537", + "label": [ + "Risa" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtMf8WI76KPM", + "label": [ + "laugh", + "rire", + "risa" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11938848m", + "label": [ + "Féminisme", + "Émancipation de la femme", + "Cause féminine", + "Mouvement de libération des femmes", + "Mouvements de libération des femmes", + "Libération de la femme", + "Mouvement féministe" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85047741", + "label": [ + "Feminism", + "Women's liberation movement", + "Women--Emancipation", + "Women's lib", + "Emancipation of women", + "Women's movement", + "Feminist movement", + "Women's liberation" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/fb2c2351-7e1c-485d-9a31-41352b7973b1", + "label": [ + "Féminisme" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtukZlm2RM1p", + "label": [ + "feminism", + "féminisme", + "feminismo" + ] + }, + { + "id": "http://GeoEthno#MEDELLIN", + "label": [ + "Medellin" + ] + }, + { + "id": "http://GeoEthno#COLOMBIE", + "label": [ + "Colombie", + "Colombia", + "République de Colombie", + "Republic of Colombia" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb13516396s", + "label": [ + "Publics", + "Spectateurs", + "Public", + "Auditoire", + "Assistance (public)", + "Visiteurs", + "Fréquentation", + "Audience" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/6710", + "label": [ + "project", + "proyecto", + "projet" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb123646313", + "label": [ + "Projet", + "Projet de vie", + "Projet d'avenir", + "Projets", + "Projet (psychologie)", + "Projet collectif", + "Projet personnel", + "Projet individuel" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85055484", + "label": [ + "Goal (Psychology)", + "Goal setting", + "Setting of goals" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtlqFAeJrGfj", + "label": [ + "constitución", + "constitution", + "constitution" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrt0SEeMmB4DY", + "label": [ + "corpus", + "corpus", + "corpus" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119341539", + "label": [ + "Discussion", + "Palabres", + "Délibération", + "Art de la discussion" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85038407", + "label": [ + "Discussion", + "Group discussion" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/63ce2558-0c0f-47d9-b408-bc3648c95b74", + "label": [ + "Étude de genre", + "Gender studies", + "Genre" + ] + }, + { + "id": "http://GeoEthno#AMERIQUE_LATINE", + "label": [ + "Amérique latine", + "América Latina", + "Latin America", + "Latinoamérica" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11930876x", + "label": [ + "Amérique latine", + "Amérique espagnole" + ] + }, + { + "id": "http://GeoEthno#AMERIQUE_CENTRALE", + "label": [ + "Amérique centrale", + "América Central", + "Central America", + "Amérique latine", + "Latin America" + ] + }, + { + "id": "http://GeoEthno#AMERIQUE_DU_SUD", + "label": [ + "Amérique du Sud", + "América del Sur", + "South America", + "Amérique latine", + "Suramérica", + "Sudamérica", + "Latin America" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/7059", + "label": [ + "region", + "regiones", + "région" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtzuzR7iTq5A", + "label": [ + "region", + "région", + "región" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-538", + "label": [ + "Collectivité territoriale", + "Collectivité locale", + "Municipalité", + "Commune", + "Conseil régional", + "Conseil général", + "Région", + "Département" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11932363p", + "label": [ + "Monde" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtxhEN68d3Ra", + "label": [ + "cosmos", + "cosmos", + "cosmos", + "monde", + "univers" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/9380", + "label": [ + "world", + "mundo", + "monde" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119556551", + "label": [ + "Femmes", + "Femme" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh2002006249", + "label": [ + "Women", + "Women's work" + ] + }, + { + "id": "http://datos.bne.es/resource/XX4576329", + "label": [ + "Mujeres", + "Mujer" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb13318436b", + "label": [ + "Guerre", + "Polémologie", + "Conflits armés", + "Hostilités" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85145114", + "label": [ + "War", + "Hostilities", + "Conflict, Armed (War)", + "Wars", + "Armed conflict (War)", + "Fighting" + ] + }, + { + "id": "http://datos.bne.es/resource/XX525638", + "label": [ + "Guerra", + "Conflictos bélicos", + "Operaciones bélicas" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-570", + "label": [ + "Conflit", + "Guerre" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/9028", + "label": [ + "war", + "guerra", + "guerre" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtf1yEbB24G1", + "label": [ + "guerra", + "guerre", + "war" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtHDkQbAT5WW", + "label": [ + "materia", + "matière", + "matter" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11975809t", + "label": [ + "Matière" + ] + }, + { + "id": "http://datos.bne.es/resource/XX524811", + "label": [ + "Materia", + "Materia (Física)" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85082246", + "label": [ + "Matter" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb13318687s", + "label": [ + "Droit", + "Sciences juridiques", + "Aspect juridique", + "Droit positif", + "Régime juridique", + "Systèmes juridiques", + "Système juridique", + "Science du droit", + "Ordre juridique" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh99004848", + "label": [ + "Law and legislation" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-719", + "label": [ + "Droit" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtgx5yOalcpT", + "label": [ + "derecho", + "droit", + "law" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb125003662", + "label": [ + "Légalisation", + "Certification de signature" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85075773", + "label": [ + "Legalization" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11931009t", + "label": [ + "Avortement", + "Avortements", + "IVG", + "Interruption de grossesse", + "Interruption volontaire de grossesse", + "Avortement provoqué", + "Avortement légal", + "Avortement criminel" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85000196", + "label": [ + "Abortion", + "Feticide", + "Termination of pregnancy", + "Induced abortion", + "Pregnancy termination", + "Abortion, Induced", + "Foeticide" + ] + }, + { + "id": "http://datos.bne.es/resource/XX524423", + "label": [ + "Aborto", + "Aborto inducido", + "Embarazo--Interrupción" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb137343962", + "label": [ + "Crimes et criminels", + "Crimes violents", + "Délinquance", + "Criminalité", + "Criminels", + "Crime", + "Crimes" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85036577", + "label": [ + "Criminals", + "Offenders", + "Delinquents", + "Crime and criminals" + ] + }, + { + "id": "http://datos.bne.es/resource/XX526832", + "label": [ + "Delitos y faltas", + "Acciones delictivas", + "Acto punible", + "Crímenes", + "Faltas", + "Hechos delictivos", + "Infracciones" + ] + }, + { + "id": "http://datos.bne.es/resource/XX4576381", + "label": [ + "Delincuencia", + "Conducta delictiva", + "Criminalidad" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85033993", + "label": [ + "Crime", + "Crimes", + "Felonies", + "Misdemeanors", + "Urban crime", + "City crime", + "Crime--Social aspects", + "Crime and criminals", + "Delinquency" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85143536", + "label": [ + "Violent crimes", + "Crimes, Violent", + "Crimes of violence" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb16654044n", + "label": [ + "Féminicide", + "Gynécide" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85061743", + "label": [ + "Homicide", + "Femicide" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb120485802", + "label": [ + "Lois", + "Loi", + "Lois et règlements", + "Loi (droit)", + "Lois écrites", + "Actes législatifs", + "Statuts (lois)" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85127611", + "label": [ + "Statutes", + "Legislative acts", + "Legislative enactments", + "Acts, Legislative", + "Enactments, Legislative", + "Laws (Statutes)" + ] + }, + { + "id": "http://datos.bne.es/resource/XX528757", + "label": [ + "Leyes", + "Ley (Derecho)", + "Leyes y normas legales" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrt2XKbjOmPcT", + "label": [ + "law text", + "ley", + "loi" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/4707", + "label": [ + "law (individual)", + "ley (individual)", + "loi" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-1201", + "label": [ + "Législation", + "Décret", + "Circulaire", + "Arrêté", + "Texte Réglementaire", + "Loi" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119758790", + "label": [ + "Rosiers", + "Rosa" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85115443", + "label": [ + "Roses", + "Rose", + "Rosa" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/52", + "label": [ + "acid", + "acidos", + "acide" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11934544f", + "label": [ + "Postes", + "Services postaux", + "Poste", + "Livraison postale", + "Poste à relais", + "Service postal", + "Distribution postale", + "Acheminement du courrier" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85105472", + "label": [ + "Postal service", + "Mail", + "Mail service", + "Post-office" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11931211x", + "label": [ + "Classes dirigeantes", + "Élite politique", + "Notables", + "Classe dirigeante" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11934226p", + "label": [ + "Célébrités", + "Star system", + "Personnages célèbres", + "Personnes illustres", + "VIP", + "Personnalités", + "Jet-set", + "Femmes illustres", + "Vedettes", + "Notables", + "Gotha (personnes)", + "Superstars", + "Personnes célèbres", + "Sommités", + "Hommes illustres", + "Femmes célèbres", + "Gens célèbres", + "Grands personnages", + "Jet society", + "Personnages illustres", + "Hommes célèbres", + "Stars", + "People (personnes)" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh92004131", + "label": [ + "Socialites" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85021618", + "label": [ + "Celebrities", + "Famous people", + "Celebs", + "Cult of celebrity", + "Celebrity culture", + "Well-known people", + "Illustrious people", + "Famous persons" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119313476", + "label": [ + "Élite (sciences sociales)", + "Haute société", + "Establishment", + "Société, Haute", + "Notables", + "Élites (sciences sociales)" + ] + }, + { + "id": "http://datos.bne.es/resource/XX525263", + "label": [ + "Élites", + "Clases dirigentes", + "Élite (Ciencias sociales)" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85042580", + "label": [ + "Elite (Social sciences)", + "Elites (Social sciences)" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/3715", + "label": [ + "government (cabinet)", + "Gobierno (gabinete)", + "gouvernement" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11975971h", + "label": [ + "Gouvernement" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh00005941", + "label": [ + "Government" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtgU5LqkI2d1", + "label": [ + "gobierno", + "gouvernement", + "government" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb12264189v", + "label": [ + "Ministères", + "Secrétariats d'État", + "Administration centrale", + "Départements ministériels", + "Administrations centrales", + "Portefeuilles ministériels", + "Ministères d'État" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85046283", + "label": [ + "Executive departments", + "State ministries", + "Ministries, Government", + "Government ministries", + "Ministries, State", + "Departments, Executive" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtvWHPMsEEqO", + "label": [ + "everyday life", + "vida diaria", + "vie quotidienne", + "culture matérielle" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-1347", + "label": [ + "Mode de vie", + "Vie quotidienne" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11932512b", + "label": [ + "Moeurs et coutumes", + "Usages", + "Traditions", + "Coutumes", + "Vie sociale", + "Vie quotidienne", + "Us et coutumes" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh2001008851", + "label": [ + "Social life and customs", + "Customs" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119470173", + "label": [ + "Vie pratique", + "Vie quotidienne" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb12042412v", + "label": [ + "Statistiques", + "Données statistiques" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh99001414", + "label": [ + "Statistics", + "Statistical data", + "Cases, clinical reports, statistics" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh2003003690", + "label": [ + "Statistics" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/8076", + "label": [ + "statistics", + "estadísticas", + "statistiques" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/11130", + "label": [ + "rate", + "índice", + "taux" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11975959n", + "label": [ + "Tarifs", + "Barèmes de prix", + "Taux", + "Tarification" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh00007527", + "label": [ + "Rates" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtamhmSNBzoJ", + "label": [ + "embarazo", + "grossesse", + "pregnancy" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11941368c", + "label": [ + "Grossesse" + ] + }, + { + "id": "http://datos.bne.es/resource/XX526131", + "label": [ + "Embarazo", + "Gestación" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85106276", + "label": [ + "Pregnancy", + "Gestation" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119312279", + "label": [ + "Adolescentes", + "Jeunes filles" + ] + }, + { + "id": "http://datos.bne.es/resource/XX4667227", + "label": [ + "Mujeres adolescentes", + "Niñas adolescentes", + "Chicas adolescentes" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85000957", + "label": [ + "Teenage girls", + "Adolescent girls", + "Female adolescents" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11955267x", + "label": [ + "Jeunes femmes", + "Jeunes filles" + ] + }, + { + "id": "http://datos.bne.es/resource/XX5050280", + "label": [ + "Mujeres jóvenes", + "Chicas jóvenes" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85149296", + "label": [ + "Young women" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-501", + "label": [ + "Chômage" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119500644", + "label": [ + "Chômage", + "Pertes d'emplois", + "Chômage involontaire", + "Taux de chômage", + "Niveau de chômage" + ] + }, + { + "id": "http://datos.bne.es/resource/XX526110", + "label": [ + "Paro", + "Desempleo" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85139643", + "label": [ + "Unemployment", + "Joblessness" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/8768", + "label": [ + "unemployment", + "desempleo", + "chômage" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtj0rgSLp9Lc", + "label": [ + "chômage", + "desempleo", + "unemployment" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtO1lSQSNbHE", + "label": [ + "bac", + "ferry-boat" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11976108s", + "label": [ + "Hommes" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85083510", + "label": [ + "Men", + "Human males" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb14525166d", + "label": [ + "Et la violence" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtXFkSqqlD6k", + "label": [ + "domestic staff", + "emploi domestique", + "servidumbre", + "domesticité", + "domestique" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11933792m", + "label": [ + "Villes", + "Milieu urbain", + "Espace urbain", + "Régions urbaines", + "Communes urbaines", + "Environnement urbain", + "Monde urbain", + "Cités", + "Ville", + "Zones urbaines", + "Grandes villes", + "Centres urbains" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85026130", + "label": [ + "Cities and towns", + "Global cities", + "Towns", + "Urban systems", + "Municipalities", + "Urban areas" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-2075", + "label": [ + "Ville" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtN5zGcqx0YR", + "label": [ + "ciudad", + "town", + "ville", + "agglomération", + "agglomération urbaine" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/1421", + "label": [ + "city", + "ciudades", + "ville" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtoASMcUbb46", + "label": [ + "mes", + "mois", + "months" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb13318617m", + "label": [ + "Mois" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85087066", + "label": [ + "Months" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11935546c", + "label": [ + "Face", + "Facies (anatomie)" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85046785", + "label": [ + "Face", + "Human face" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119579482", + "label": [ + "Réalité", + "Réel" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85111773", + "label": [ + "Reality" + ] + }, + { + "id": "http://datos.bne.es/resource/XX528102", + "label": [ + "Realidad" + ] + }, + { + "id": "http://GeoEthno#GRAVES", + "label": [ + "Graves" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11938400h", + "label": [ + "Oeil", + "Globe oculaire", + "Oculus (anatomie)", + "Yeux" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85046642", + "label": [ + "Eye", + "Eyes", + "Eyeball", + "Visual system" + ] + }, + { + "id": "http://datos.bne.es/resource/XX525876", + "label": [ + "Ojos" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtZhnbHzuSzG", + "label": [ + "object", + "objet", + "objeto", + "objets", + "petit objet" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb124811129", + "label": [ + "Analyse de la conversation", + "Conversation, Analyse de la", + "Analyse conversationnelle", + "Analyse du discours conversationnel", + "Analyse des conversations", + "Analyse de conversation", + "Analyse des interactions verbales" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh94001018", + "label": [ + "Conversation analysis", + "Analysis of conversation", + "Conversational analysis", + "CA (Interpersonal communication)" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/2562", + "label": [ + "effect", + "efectos", + "effet" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtR5ppaggJ0o", + "label": [ + "bien", + "bien", + "good", + "honestum" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-1365", + "label": [ + "Morphologie", + "Forme" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11965217k", + "label": [ + "Tours (constructions)", + "Tours", + "Constructions de grande hauteur", + "Tours d'horloge" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85136296", + "label": [ + "Towers", + "Clock-towers", + "Church towers" + ] + }, + { + "id": "http://GeoEthno#TOURS", + "label": [ + "Tours" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11933029x", + "label": [ + "Parole", + "Phonation", + "Langage oral", + "Langage articulé" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85126434", + "label": [ + "Speech", + "Talking" + ] + }, + { + "id": "http://datos.bne.es/resource/XX526839", + "label": [ + "Libertad condicional", + "Excarcelación sujeta a condición", + "Puesta en libertad bajo condición" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/6371", + "label": [ + "politics", + "política", + "politique" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/6370", + "label": [ + "policy", + "política", + "politique" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtQdSP02tK3w", + "label": [ + "política", + "politics", + "politique" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb13319064q", + "label": [ + "Science politique", + "Politique, Science", + "Statologie", + "Politologie", + "Sciences politiques", + "Politique" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85104440", + "label": [ + "Political science", + "Political thought", + "Government", + "Political theory", + "Civil government", + "Commonwealth, The", + "Science, Political", + "Administration", + "Politics" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11975806s", + "label": [ + "Pensée politique et sociale", + "Pensée sociale", + "Et le pouvoir", + "Et la politique", + "Politique" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh2002011435", + "label": [ + "Political and social views", + "Social views" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-1597", + "label": [ + "Politique" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb13553871x", + "label": [ + "Pratiques politiques", + "Politique, Pratiques de la", + "Comportement politique", + "Moeurs politiques", + "Politique", + "Pratiques de la politique" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85104466", + "label": [ + "Politics, Practical", + "Electoral politics", + "Political behavior", + "Politics", + "Mass political behavior", + "Practical politics" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/10904", + "label": [ + "public", + "público", + "public" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtwV5F3Zewnp", + "label": [ + "public", + "public", + "público" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/14950", + "label": [ + "deterrence", + "disuasión", + "dissuasion" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb13319114f", + "label": [ + "Communauté", + "Communauté (philosophie)", + "Collectivité" + ] + }, + { + "id": "http://datos.bne.es/resource/XX525928", + "label": [ + "Comunidad", + "Colectividad" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85029195", + "label": [ + "Communities", + "Community" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-550", + "label": [ + "Communauté" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtKPKSRRMdbB", + "label": [ + "communauté", + "community", + "comunidad" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrt7vLkeZro2h", + "label": [ + "población", + "population", + "population" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-1625", + "label": [ + "Population" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/4056", + "label": [ + "human population", + "población humana", + "population" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119563454", + "label": [ + "Population", + "Natalité", + "Dimension de la population", + "Surpeuplement", + "Surpopulation", + "Accroissement de population" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh00006969", + "label": [ + "Population", + "Demography", + "Demographics" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtPZB1uLhkJZ", + "label": [ + "lieu", + "lugar", + "place" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-401", + "label": [ + "Café", + "Pub", + "Bar", + "Salon de thé" + ] + }, + { + "id": "http://GeoEthno#BAR", + "label": [ + "Bar" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11978058j", + "label": [ + "Concerts", + "Récitals (musique)", + "Auditions (concerts)" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119338383", + "label": [ + "Alcool", + "Alcool éthylique", + "Éthylique, Alcool", + "RN 64-17-5", + "Éthanol" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85003268", + "label": [ + "Alcohol", + "Grain alcohol", + "Drinking alcohol", + "Intoxicants", + "Potable alcohol" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/298", + "label": [ + "alcohol", + "alcohol", + "alcool" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-1196", + "label": [ + "Langage" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtVvp1ATO4pe", + "label": [ + "langage", + "language", + "lenguaje" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb120423179", + "label": [ + "Langage", + "Langue et langage", + "Langage et langues", + "Langues et langage" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh99005366", + "label": [ + "Language", + "Language (New words, slang, etc.)" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrttrEq4Rcpkr", + "label": [ + "technologie", + "technology", + "tecnología", + "technique", + "technique de fabrication", + "techniques" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11933639w", + "label": [ + "Technique", + "Modes opératoires", + "Techniques", + "Mode opératoire" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh2002007892", + "label": [ + "Technique" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11975752p", + "label": [ + "Usage", + "Bon usage (linguistique)", + "Usage (linguistique)" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh2002006425", + "label": [ + "Usage", + "Idioms, corrections, errors" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh2002006426", + "label": [ + "Use" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11938923j", + "label": [ + "Communication", + "Communication humaine", + "Moyens de communication" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh99002303", + "label": [ + "Communication" + ] + }, + { + "id": "http://datos.bne.es/resource/XX525191", + "label": [ + "Comunicación" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/1605", + "label": [ + "communications", + "comunicaciones", + "communication" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-552", + "label": [ + "Communication" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtaJkXATOYwB", + "label": [ + "metrología", + "métrologie", + "metrology", + "mesure", + "pondérologie" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119384533", + "label": [ + "Mesure", + "Mesurage", + "Mesures industrielles", + "Métrologie", + "Mesure industrielle" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh2002006406", + "label": [ + "Measurement" + ] + }, + { + "id": "http://datos.bne.es/resource/XX525006", + "label": [ + "Medición", + "Medida", + "Metrología" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-352", + "label": [ + "Belvédère", + "Point de vue" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtnQsh3FimJb", + "label": [ + "análisis de datos", + "analyse des données", + "data processing", + "analyse de données" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11940346t", + "label": [ + "Analyse des données", + "Analyse de données" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrt4otVkR3bqD", + "label": [ + "door", + "porte", + "puerta" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-1630", + "label": [ + "Porte", + "Portail" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtLXP4RGpOzE", + "label": [ + "naturaleza", + "nature", + "nature" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-1400", + "label": [ + "Nature" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11933874z", + "label": [ + "Nature" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85090277", + "label": [ + "Nature" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11934805c", + "label": [ + "Analyse séquentielle", + "MSC 62L10 (2000)", + "Analyse progressive", + "Méthodes séquentielles (statistique mathématique)" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85120148", + "label": [ + "Sequential analysis" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/12985", + "label": [ + "speech", + "habla", + "discours" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11948542x", + "label": [ + "Discours", + "Parties du discours (rhétorique)", + "Péroraisons", + "Speeches", + "Allocutions", + "Oraisons", + "Discours (genre littéraire)", + "Philippiques", + "Proclamations", + "Exortations", + "Exordes", + "Discours (rhétorique)", + "Harangues" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85126460", + "label": [ + "Speeches, addresses, etc", + "Addresses", + "Papers, Collected (Anthologies)", + "Orations", + "Collected papers (Anthologies)", + "Discourses" + ] + }, + { + "id": "http://datos.bne.es/resource/XX528471", + "label": [ + "Discursos", + "Alocuciones" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtSlc5i5N1Zb", + "label": [ + "discours", + "discurso", + "speech", + "discours politique" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/7168", + "label": [ + "resource", + "recursos", + "ressources" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb13323058j", + "label": [ + "Raillerie", + "Moquerie", + "Quolibet", + "Dérision", + "Sarcasme", + "Plaisanterie", + "Persiflage", + "Risée" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85114024", + "label": [ + "Ridicule" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/13077", + "label": [ + "assay", + "ensayo", + "analyse" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-76", + "label": [ + "Analyse" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/397", + "label": [ + "analysis", + "análisis", + "analyse" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119756783", + "label": [ + "Analyse", + "Analyse chimique", + "Dosage" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh2002006221", + "label": [ + "Analysis", + "Methods of analysis", + "Analysis and chemistry", + "Analytical methods", + "Chemical analysis", + "Analysis methods", + "Analysis and examination" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh99002428", + "label": [ + "Assaying" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtIebqI21aZk", + "label": [ + "vision", + "vision", + "visión", + "sight" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11947019s", + "label": [ + "Vision", + "Vue" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85143872", + "label": [ + "Vision", + "Eyesight", + "Seeing", + "Sight" + ] + }, + { + "id": "http://datos.bne.es/resource/XX525877", + "label": [ + "Vista", + "Visión" + ] + }, + { + "id": "http://GeoEthno#SOMME", + "label": [ + "Somme" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11979893h", + "label": [ + "Mécanismes" + ] + }, + { + "id": "http://datos.bne.es/resource/XX532849", + "label": [ + "Mecanismos", + "Movimientos mecánicos" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85082763", + "label": [ + "Mechanical movements", + "Mechanisms (Machinery)" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11950069v", + "label": [ + "Codes" + ] + }, + { + "id": "http://datos.bne.es/resource/XX526383", + "label": [ + "Códigos y claves", + "Cifras (Criptografía)", + "Cifras y claves", + "Claves", + "Códigos cifrados" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/14848", + "label": [ + "subject", + "materia (sujeto, en derecho)", + "sujet" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11987635h", + "label": [ + "Regard" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85053590", + "label": [ + "Gaze" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtXXK0nOUwSw", + "label": [ + "espace", + "espacio", + "space", + "notion d'espace" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb12011791z", + "label": [ + "Espace" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh2006003964", + "label": [ + "Space" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtYXZG1V9Vz3", + "label": [ + "partage", + "reparto", + "sharing" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb12204555d", + "label": [ + "Partage" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85121133", + "label": [ + "Sharing" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb155584254", + "label": [ + "Confidence", + "Confidences" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85119736", + "label": [ + "Self-disclosure", + "Revelation of self", + "Self-disclosing behavior", + "Self-revelation", + "Disclosure of self" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb12293950d", + "label": [ + "Vers", + "Animaux vermiformes", + "Vermes" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85148550", + "label": [ + "Worms", + "Vermes", + "Vermiform animals" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11940505s", + "label": [ + "Roman", + "Genre romanesque", + "Fiction (littérature)", + "Littérature romanesque", + "Récits", + "Romans" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh99001562", + "label": [ + "Fiction", + "Legends and stories", + "Novels", + "Stories" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11932861c", + "label": [ + "Nouvelles", + "Nouvelles (littérature)", + "Récits" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh99001562", + "label": [ + "Fiction", + "Legends and stories", + "Novels", + "Stories" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119757771", + "label": [ + "Personnel", + "Effectifs", + "Personnels", + "Personnel salarié" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh00002753", + "label": [ + "Employees", + "Relations with employees", + "Servants" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11931807b", + "label": [ + "Création", + "Création du monde", + "Création divine" + ] + }, + { + "id": "http://datos.bne.es/resource/XX526689", + "label": [ + "Creación", + "Creación (Teología)" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85033823", + "label": [ + "Creation", + "Biblical cosmogony" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb123864365", + "label": [ + "Identité sexuelle", + "Genre, Identité de", + "Identité sexuée", + "Identification au genre", + "Identité de genre" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh91003756", + "label": [ + "Gender identity", + "Sex identity (Gender identity)", + "Sexual identity (Gender identity)" + ] + }, + { + "id": "http://datos.bne.es/resource/XX546631", + "label": [ + "Identidad sexual", + "Identidad de género" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtUtKbZy9yLK", + "label": [ + "ciencias", + "sciences", + "sciences" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11933232c", + "label": [ + "Sciences", + "Sciences pures", + "Sciences exactes", + "Sciences fondamentales" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh00007934", + "label": [ + "Science" + ] + }, + { + "id": "http://datos.bne.es/resource/XX527870", + "label": [ + "Ciencias" + ] + }, + { + "id": "http://GeoEthno#LYON", + "label": [ + "Lyon", + "Lugdunum" + ] + }, + { + "id": "http://GeoEthno#FRANCE", + "label": [ + "France", + "France", + "République française", + "French Republic", + "Royaume de France", + "Royaume de France (987-1791)" + ] + } + ], + "title": [ + "La circulación y la gestión de las emociones en interacción : estudio sobre la risa en un debate informal sobre el feminismo en Medellín, Colombia", + "La circulation et la gestion des émotions en interaction : étude sur le rire dans un débat informel sur le féminisme, à Medellin, Colombie" + ], + "type": [ + { + "id": "http://isidore.science/ontology#conference", + "label": "Colloques et conférences" + } + ], + "url": [ + "https://hal.archives-ouvertes.fr/hal-02308138" + ] + }, + "_type": "_doc", + "sort": [ + 1570406400000 + ] + }, + { + "_id": "2ABzjXABhI997ZgaRiXz", + "_index": "isidore-documents-triple", + "_score": null, + "_source": { + "abstract": [ + "Communication réalisée dans le panel \"Violences de genre au quotidien\", discuté par Fabrice Virgili", + "International audience", + "Cette communication se propose d’explorer, à partir des archivesjudiciaires issues de la cour d’assises du Rhône sur la période 1789-1860, le lien entre recherche d’émancipation et féminicide à partirde la question suivante : le féminicide est-il une réaction/réponsemasculine aux tentatives d’émancipation féminine ? Si le féminiciden’a été défini que récemment, dans les années 1990, comme le meurtred’une femme parce qu’elle est une femme, le fait social qu’il désignen’a pourtant pas attendu d’être nommé pour exister. D’autres sociétés,à d’autres époques, font état de meurtres de femmes en tant quefemmes, lesquels se caractérisent par une violence léthale sur unepersonne dont la féminité joue un rôle dans la réalisation de l’acte.La féminité désigne ici le fait d’être considérée comme vulnérable,appropriable, contrôlable, ou encore punissable en dehors du cadrede la loi. Le féminicide est, par ailleurs, souvent le fait d’hommes quis’octroient le droit et/ou le pouvoir de mettre à mort une personneprétendant échapper à leur contrôle ; il y aurait donc un phénomènede tentative d’émancipation sous-jacent au féminicide." + ], + "author": [ + { + "firstname": [ + "Margot" + ], + "id": "giacinti_margot", + "lastname": [ + "Giacinti" + ], + "pid": [ + "0000-0002-8852-6589", + "margot-giacinti" + ] + } + ], + "collection": [ + { + "id": "10670/2.plzuxu", + "name": "Triangle. Action, discours, pensée politique et économique" + } + ], + "date": [ + "2019-08-29" + ], + "datestamp": "2019-11-20", + "identifier": [ + "10670/1.5di7vf", + "hal-02297823" + ], + "keyword": [ + "féminicide", + "émancipation", + "[SHS.GENRE]Humanities and Social Sciences/Gender studies", + "[SHS.SCIPO]Humanities and Social Sciences/Political science", + "[SHS.HIST]Humanities and Social Sciences/History" + ], + "language": [ + { + "id": "http://lexvo.org/id/iso639-3/fra", + "label": "French" + } + ], + "publisher": [ + { + "contact": "", + "name": "Institut des sciences humaines et sociales du CNRS", + "url": "http://www.cnrs.fr/inshs" + } + ], + "scope": [ + { + "id": "http://isidore.science/subject/publications", + "label": "Publications" + } + ], + "subject": [ + { + "id": "http://GeoEthno#FRANCE", + "label": [ + "France", + "France", + "République française", + "French Republic", + "Royaume de France", + "Royaume de France (987-1791)" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11938923j", + "label": [ + "Communication", + "Communication humaine", + "Moyens de communication" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh99002303", + "label": [ + "Communication" + ] + }, + { + "id": "http://datos.bne.es/resource/XX525191", + "label": [ + "Comunicación" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/1605", + "label": [ + "communications", + "comunicaciones", + "communication" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-552", + "label": [ + "Communication" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119337991", + "label": [ + "Violence", + "Brutalité", + "Violences", + "Brutalités" + ] + }, + { + "id": "http://datos.bne.es/resource/XX526747", + "label": [ + "Violencia" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85143514", + "label": [ + "Violence", + "Violent behavior" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/63ce2558-0c0f-47d9-b408-bc3648c95b74", + "label": [ + "Étude de genre", + "Gender studies", + "Genre" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb13516396s", + "label": [ + "Publics", + "Spectateurs", + "Public", + "Auditoire", + "Assistance (public)", + "Visiteurs", + "Fréquentation", + "Audience" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11948204j", + "label": [ + "Portes", + "Portes d'entrée", + "Issues", + "Portes de sortie" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh99005689", + "label": [ + "Doors" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-626", + "label": [ + "Cour" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtbFJDzRUZx0", + "label": [ + "cour", + "courtyard", + "patio", + "farmyard", + "yard" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11932772r", + "label": [ + "Rhône (cours d'eau)", + "Rhône" + ] + }, + { + "id": "http://GeoEthno#RHONE", + "label": [ + "Rhône", + "Ródano" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11938467s", + "label": [ + "Recherche", + "Activités de recherche", + "Recherche fondamentale", + "Projets de recherche", + "Programmes de recherche", + "Recherche scientifique", + "Recherche pure" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh2002006576", + "label": [ + "Research" + ] + }, + { + "id": "http://datos.bne.es/resource/XX525653", + "label": [ + "Investigación", + "Ciencia--Investigación", + "Investigación científica", + "Investigación y desarrollo", + "I+D" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-1738", + "label": [ + "Recherche", + "Recherche scientifique", + "Projet de recherche" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/7127", + "label": [ + "research", + "investigación", + "recherche" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtfi327GenVq", + "label": [ + "investigación", + "recherche", + "research" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/2651", + "label": [ + "emancipation", + "emancipación", + "émancipation" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb12571870c", + "label": [ + "Émancipation", + "Libération", + "Émancipation politique", + "Émancipation sociale" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrt1V0a6R9AjM", + "label": [ + "emancipación", + "emancipation", + "émancipation" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb16654044n", + "label": [ + "Féminicide", + "Gynécide" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85061743", + "label": [ + "Homicide", + "Femicide" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-108", + "label": [ + "Années 1990" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb12543155d", + "label": [ + "Années 1990", + "Nineties", + "Années nonante (vingtième siècle)", + "Années quatre-vingt-dix (vingtième siècle)", + "1990 (décennie)", + "Nonante, Années (vingtième siècle)", + "Quatre-vingt-dix, Années (vingtième siècle)", + "Décennie 1990", + "Années 90 (vingtième siècle)" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh96010818", + "label": [ + "Nineteen nineties", + "Nineties (Twentieth century decade)", + "1990s", + "90s (Twentieth century decade)" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrthVHcfmv1xQ", + "label": [ + "femme", + "mujer", + "woman" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119556551", + "label": [ + "Femmes", + "Femme" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh2002006249", + "label": [ + "Women", + "Women's work" + ] + }, + { + "id": "http://datos.bne.es/resource/XX4576329", + "label": [ + "Mujeres", + "Mujer" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/9344", + "label": [ + "woman", + "mujer", + "femme" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-921", + "label": [ + "Femme" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11978236s", + "label": [ + "Fait social" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-886", + "label": [ + "État" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/8062", + "label": [ + "state", + "estado", + "état" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11937050q", + "label": [ + "État", + "État, Théorie de l'", + "Théorie de l'État", + "Autorités publiques", + "Autorité publique", + "Puissance publique", + "Pouvoirs publics" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85127474", + "label": [ + "State, The", + "Commonwealth, The", + "Administration" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtaXHZy6qadS", + "label": [ + "violence", + "violence", + "violencia" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb16672499z", + "label": [ + "Joue", + "Bucca", + "Mala (anatomie)", + "Région génienne" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85022839", + "label": [ + "Cheek" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb120485802", + "label": [ + "Lois", + "Loi", + "Lois et règlements", + "Loi (droit)", + "Lois écrites", + "Actes législatifs", + "Statuts (lois)" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85127611", + "label": [ + "Statutes", + "Legislative acts", + "Legislative enactments", + "Acts, Legislative", + "Enactments, Legislative", + "Laws (Statutes)" + ] + }, + { + "id": "http://datos.bne.es/resource/XX528757", + "label": [ + "Leyes", + "Ley (Derecho)", + "Leyes y normas legales" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrt2XKbjOmPcT", + "label": [ + "law text", + "ley", + "loi" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/4707", + "label": [ + "law (individual)", + "ley (individual)", + "loi" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-1201", + "label": [ + "Législation", + "Décret", + "Circulaire", + "Arrêté", + "Texte Réglementaire", + "Loi" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11976108s", + "label": [ + "Hommes" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85083510", + "label": [ + "Men", + "Human males" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb13318687s", + "label": [ + "Droit", + "Sciences juridiques", + "Aspect juridique", + "Droit positif", + "Régime juridique", + "Systèmes juridiques", + "Système juridique", + "Science du droit", + "Ordre juridique" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh99004848", + "label": [ + "Law and legislation" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-719", + "label": [ + "Droit" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtgx5yOalcpT", + "label": [ + "derecho", + "droit", + "law" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtkvcoSA0U1y", + "label": [ + "poder", + "pouvoir", + "power", + "pouvoir (concept de)" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119325308", + "label": [ + "Mort", + "Mort (philosophie)", + "Philosophie de la mort", + "Décès" + ] + }, + { + "id": "http://datos.bne.es/resource/XX525535", + "label": [ + "Muerte", + "Muerte--Filosofía" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh99002024", + "label": [ + "Death and burial", + "Funeral and memorial services", + "Biography--Last years and death", + "Last illness", + "Burial", + "Interment", + "Funeral", + "Biography--Death and burial" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh99002023", + "label": [ + "Death" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-1368", + "label": [ + "Mort" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtNb90Egda4H", + "label": [ + "death", + "mort", + "muerte" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb12447752t", + "label": [ + "Contrôle" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtgEqPFxaYUJ", + "label": [ + "ciencia", + "science", + "science" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/7472", + "label": [ + "science", + "ciencia", + "science" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb121155321", + "label": [ + "Science" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85118553", + "label": [ + "Science", + "Natural science", + "Sciences", + "Science of science" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11931619s", + "label": [ + "Angers (Maine-et-Loire)", + "Angers", + "Juliomagus (ville ancienne)" + ] + }, + { + "id": "http://GeoEthno#ANGERS", + "label": [ + "Angers" + ] + } + ], + "title": [ + "« S’émanciper sans se faire tuer ». Perspectives historiques de féminicides en France (1789-1860)" + ], + "topic": [ + { + "id": "http://aurehal.archives-ouvertes.fr/subject/shs.hist", + "label": "Histoire", + "proba": "1.000" + } + ], + "type": [ + { + "id": "http://isidore.science/ontology#conference", + "label": "Colloques et conférences" + } + ], + "url": [ + "https://hal.archives-ouvertes.fr/hal-02297823" + ] + }, + "_type": "_doc", + "sort": [ + 1567036800000 + ] + }, + { + "_id": "ZOkAjHABhI997ZgafDcy", + "_index": "isidore-documents-triple", + "_score": null, + "_source": { + "abstract": [ + "Les violències contra les dones han estat històricament espentades cap a la intimitat, han sigut llegides com a comportaments tolerats i legitimats, dinàmiques de poder persistents i assumides com alienes i particulars. La violència i l'agressió, manifestacions extremes del control dels cossos llegits com femenins, s'han entès com a pràctiques aïllades i allunyades del context polític i social. Existeix una evident vinculació entre les Violències Feminicides, les dinàmiques de poder i l'ordre legal de l'Estat, a més, l'estreta complicitat dels mitjans de comunicació que qüestionen, culpabilitzen a la víctima i converteixen la violència exercida sobre el seu cos en un espectacle de la crueltat i la morbositat. A partir de l'experimentació amb diferents processos litogràfics, del dibuix directe i del llibre alternatiu, aquest projecte pretén trobar connexions (lapidàries) entre les primerenques i aparentment innocents pràctiques feminicides des de la infància fins l'edat adulta i la producció activa al taller.", + "Violence against women has historically been pushed towards intimacy, read as tolerated and legitimized behaviours, persistent dynamics of Power and assumed as alien and private. Violence and aggression, extreme manifestations of bodies control read as feminine ones, have been understood as isolated practices, away from the political and social context. There is an obvious link between the Feminicide Violence, the power dynamics and the legal order of the State, in addition, the close complicity of the Mass Media which questions, blames the victim and turns the violence on her body in a spectacle of cruelty and morbidity. From a phase of experimentation with different lithographic processes, from direct drawing and from alternative book, this project aims to find (stoning) connections between the early and seemingly innocent feminicide practices from childhood to adulthood and the active production at the Graphic Workshop.", + "García García, G. (2019). QUI LLANÇA LA PRIMERA PEDRA? VIOLÈNCIA FEMINICIDA, UN CRIM D¿ESTAT. Interpretacions des dels processos litogràfics, el dibuix i el llibre alternatiu. http://hdl.handle.net/10251/123963", + "TFGM" + ], + "author": [ + { + "firstname": [ + "Gema" + ], + "id": "garcia_garcia_gema", + "lastname": [ + "García García" + ] + } + ], + "collection": [ + { + "id": "10670/2.p88wns", + "name": "Repositorio Institucional de la Universitat Politècnica de València" + } + ], + "date": [ + "2019-07-22" + ], + "datestamp": "2019-07-22", + "identifier": [ + "http://hdl.handle.net/10251/123963" + ], + "keyword": [ + "Libro de artista", + "gráfica", + "feminismo", + "género", + "libro objeto", + "grabado", + "instalación", + "voz", + "mensajes.", + "Artist's book", + "graphic", + "feminism", + "gender", + "object book", + "engraving", + "installation", + "voice", + "messages.", + "DIBUJO", + "Máster Universitario en Producción Artística-Màster Universitari en Producció Artística" + ], + "language": [ + { + "id": "http://lexvo.org/id/iso639-3/cat", + "label": "Catalan" + } + ], + "publisher": [ + { + "contact": "riunet@bib.upv.es", + "name": "Universitat Politècnica de València", + "url": "http://www.upv.es" + } + ], + "scope": [ + { + "id": "http://isidore.science/subject/publications", + "label": "Publications" + } + ], + "subject": [ + { + "id": "http://datos.bne.es/resource/XX717277", + "label": [ + "Ordenadores--Salida en microfilme", + "COM", + "Salida de ordenador a microfilme", + "Sistema COM" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85029516", + "label": [ + "Computer output microfilm devices", + "Microfilm devices, Computer output" + ] + }, + { + "id": "http://datos.bne.es/resource/XX4576470", + "label": [ + "Poder" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtkvcoSA0U1y", + "label": [ + "poder", + "pouvoir", + "power", + "pouvoir (concept de)" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtRxLPuVsO8f", + "label": [ + "atelier", + "taller", + "workshop", + "centre de production", + "manufacture" + ] + }, + { + "id": "http://datos.bne.es/resource/XX4576276", + "label": [ + "Medios de comunicación social", + "Mass media", + "Medios de comunicación de masas" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85081863", + "label": [ + "Mass media", + "Mass communication", + "Media, The", + "Media, Mass" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtK3Ric1GReJ", + "label": [ + "book", + "libro", + "livre" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/959", + "label": [ + "book", + "libro", + "livre" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtm2h0nxwEpA", + "label": [ + "artist", + "artista", + "artiste" + ] + }, + { + "id": "http://datos.bne.es/resource/XX525610", + "label": [ + "Feminismo", + "Liberación de la mujer", + "Movimiento feminista", + "Movimientos de liberación de la mujer" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85047741", + "label": [ + "Feminism", + "Women's liberation movement", + "Women--Emancipation", + "Women's lib", + "Emancipation of women", + "Women's movement", + "Feminist movement", + "Women's liberation" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtukZlm2RM1p", + "label": [ + "feminism", + "féminisme", + "feminismo" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtZhnbHzuSzG", + "label": [ + "object", + "objet", + "objeto", + "objets", + "petit objet" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtgI8DSt7eam", + "label": [ + "engraving", + "grabado", + "gravure", + "carving", + "grabamiento" + ] + }, + { + "id": "http://datos.bne.es/resource/XX524817", + "label": [ + "Grabado", + "Grabados" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85043995", + "label": [ + "Engraving", + "Engravings", + "Siderography", + "Copper engraving", + "Line-engraving", + "Steel-engraving" + ] + }, + { + "id": "http://datos.bne.es/resource/XX529061", + "label": [ + "Voz" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtzmgVgArOJh", + "label": [ + "dessin", + "dibujo", + "drawing" + ] + }, + { + "id": "http://datos.bne.es/resource/XX525302", + "label": [ + "Dibujo", + "Dibujos" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85039408", + "label": [ + "Drawing", + "Drawings", + "Sketching" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/10044", + "label": [ + "plant production", + "producción", + "production végétale" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtg9XR0wFYKT", + "label": [ + "producción", + "production", + "production" + ] + }, + { + "id": "http://datos.bne.es/resource/XX525864", + "label": [ + "Producción", + "Producción (Teoría económica)" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85107209", + "label": [ + "Production (Economic theory)" + ] + } + ], + "title": [ + "QUI LLANÇA LA PRIMERA PEDRA? VIOLÈNCIA FEMINICIDA, UN CRIM D¿ESTAT. Interpretacions des dels processos litogràfics, el dibuix i el llibre alternatiu." + ], + "type": [ + { + "id": "http://isidore.science/ontology#thesis", + "label": "Mémoires, Thèses et HDR" + } + ], + "url": [ + "http://hdl.handle.net/10251/123963" + ] + }, + "_type": "_doc", + "sort": [ + 1563753600000 + ] + }, + { + "_id": "-efri3ABhI997ZgaBeFF", + "_index": "isidore-documents-triple", + "_score": null, + "_source": { + "abstract": [ + "Cet article analyse la façon dont le racisme structurel affecte la construction des hiérarchies de genre au Brésil, en se concentrant sur le problème de la violence domestique dans ce pays. En s'appuyant sur les analyses féministes décoloniales et noires, l’article examine comment le racisme a eu des effets sur les résultats de la loi « Maria da Penha », importante loi rédigée par des féministes et considéré comme un point d’accomplissement au Brésil. Pourtant, alors que le nombre de féminicides commis contre les femmes blanches au Brésil a diminué au cours de la dernière décennie, le nombre de féminicides commis contre les femmes noires a augmenté. En examinant les diverses stratégies des féministes « hégémoniques » contre la violence sexiste, l’article soutient que le racisme a produit une invisibilité des femmes noires comme actrices de leur propre émancipation.", + "This article discusses ways in which structural racism affects the construction of gender hierarchies in Brazil, focusing on the problem of domestic violence in the country. Resorting to decolonial and black feminist insights, it reflects on how racism impacts the results of the Maria da Penha Law, an important piece of legislation drafted by feminists, which is considered to be an important achievement in the contry. While the number of feminicides committed against white women in brazil has decreased in the past decade, the number of feminicides committed against black women has increased. Reviewing hegemonic feminist strategies against gender violence, it argues that racism has produced an invisibilty of black women as agents of their emancipation." + ], + "author": [ + { + "firstname": [ + "Márcia Nina" + ], + "id": "bernardes_marcia_nina", + "lastname": [ + "Bernardes" + ] + } + ], + "collection": [ + { + "id": "10670/2.j7686f", + "name": "La Revue des droits de l’homme" + } + ], + "date": [ + "2019-07-09" + ], + "datestamp": "2019-09-27", + "identifier": [ + "10670/1.4o5vxs", + "urn:doi:10.4000/revdh.7078" + ], + "keyword": [ + "racisme", + "blanchitude", + "violence domestique", + "racism", + "whiteness", + "domestic violence" + ], + "language": [ + { + "id": "http://lexvo.org/id/iso639-3/fra", + "label": "French" + } + ], + "publisher": [ + { + "contact": "contact@openedition.org", + "name": "OpenEdition", + "url": "http://www.openedition.org" + } + ], + "scope": [ + { + "id": "http://isidore.science/subject/publications", + "label": "Publications" + } + ], + "subject": [ + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtUVE1eP3TwN", + "label": [ + "human race", + "race", + "raza" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11956358g", + "label": [ + "Race", + "Races humaines", + "Race humaine" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85110232", + "label": [ + "Race" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119337991", + "label": [ + "Violence", + "Brutalité", + "Violences", + "Brutalités" + ] + }, + { + "id": "http://datos.bne.es/resource/XX526747", + "label": [ + "Violencia" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85143514", + "label": [ + "Violence", + "Violent behavior" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtaXHZy6qadS", + "label": [ + "violence", + "violence", + "violencia" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/13077", + "label": [ + "assay", + "ensayo", + "analyse" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-76", + "label": [ + "Analyse" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/397", + "label": [ + "analysis", + "análisis", + "analyse" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119756783", + "label": [ + "Analyse", + "Analyse chimique", + "Dosage" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh2002006221", + "label": [ + "Analysis", + "Methods of analysis", + "Analysis and chemistry", + "Analytical methods", + "Chemical analysis", + "Analysis methods", + "Analysis and examination" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh99002428", + "label": [ + "Assaying" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb120485802", + "label": [ + "Lois", + "Loi", + "Lois et règlements", + "Loi (droit)", + "Lois écrites", + "Actes législatifs", + "Statuts (lois)" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85127611", + "label": [ + "Statutes", + "Legislative acts", + "Legislative enactments", + "Acts, Legislative", + "Enactments, Legislative", + "Laws (Statutes)" + ] + }, + { + "id": "http://datos.bne.es/resource/XX528757", + "label": [ + "Leyes", + "Ley (Derecho)", + "Leyes y normas legales" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrt2XKbjOmPcT", + "label": [ + "law text", + "ley", + "loi" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/4707", + "label": [ + "law (individual)", + "ley (individual)", + "loi" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-1201", + "label": [ + "Législation", + "Décret", + "Circulaire", + "Arrêté", + "Texte Réglementaire", + "Loi" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb12004921v", + "label": [ + "Violence familiale", + "Violence dans la famille", + "Violence domestique" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85047071", + "label": [ + "Family violence", + "Domestic violence", + "Household violence", + "Intrafamily violence", + "Interparental violence" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtvIIi58hmrH", + "label": [ + "racism", + "racisme", + "racismo" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/2bf94cc8-d3c1-4db8-89ce-4ec8d298002e", + "label": [ + "Discrimination", + "Xénophobie", + "Racisme" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11940470z", + "label": [ + "Racisme", + "Haine raciale" + ] + }, + { + "id": "http://datos.bne.es/resource/XX4576282", + "label": [ + "Racismo", + "Prejuicio racial" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85110266", + "label": [ + "Racism", + "Race prejudice", + "Racial bias", + "Race bias", + "Bias, Racial" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119361544", + "label": [ + "Construction", + "Génie architectural" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85017693", + "label": [ + "Building", + "Architectural engineering", + "Construction", + "Buildings--Design and construction", + "Engineering, Architectural" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtM9HMWQTGJV", + "label": [ + "building (process of)", + "construcción", + "construction" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-576", + "label": [ + "Construction", + "Appareillage" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb123778307", + "label": [ + "Hiérarchie", + "Hiérarchies" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85060646", + "label": [ + "Hierarchies" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/63ce2558-0c0f-47d9-b408-bc3648c95b74", + "label": [ + "Étude de genre", + "Gender studies", + "Genre" + ] + }, + { + "id": "http://GeoEthno#BRESIL", + "label": [ + "Brésil", + "Brasil", + "Brazil", + "Etats-Unis du Brésil", + "République fédérative du Brésil", + "Federative Republic of Brazil" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/4599", + "label": [ + "land", + "tierra", + "pays" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb16570175g", + "label": [ + "États", + "États souverains", + "Pays", + "Puissances" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb119313921", + "label": [ + "Féministes" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85047761", + "label": [ + "Feminists" + ] + }, + { + "id": "http://datos.bne.es/resource/XX529695", + "label": [ + "Feministas" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11975863m", + "label": [ + "Noires", + "Femmes noires" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85147364", + "label": [ + "Women, Black", + "Black women", + "Women, Negro" + ] + }, + { + "id": "http://datos.bne.es/resource/XX5238537", + "label": [ + "Mujeres negras", + "Mujeres afrodescendientes", + "Mujeres de ascendencia africana", + "Negras" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/14919", + "label": [ + "point", + "punto", + "point" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrtkopriOiyE4", + "label": [ + "nombre", + "number", + "número", + "chiffre" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb12269548k", + "label": [ + "Blanches", + "Femmes blanches" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh89006763", + "label": [ + "Women, White", + "White women" + ] + }, + { + "id": "http://data.culture.fr/thesaurus/resource/ark:/67717/T990-2093", + "label": [ + "Voie urbaine", + "Avenue", + "Boulevard", + "Cours" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/6660", + "label": [ + "product", + "producto", + "produit" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb16770627b", + "label": [ + "Invisibilité", + "Homme invisible" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh2011000325", + "label": [ + "Invisibility" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11930823b", + "label": [ + "Actrices", + "Comédiennes", + "Actrices de théâtre", + "Artistes dramatiques" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85000755", + "label": [ + "Actresses", + "Female actors", + "Women actors" + ] + }, + { + "id": "http://datos.bne.es/resource/XX524525", + "label": [ + "Actrices" + ] + }, + { + "id": "http://www.eionet.europa.eu/gemet/concept/2651", + "label": [ + "emancipation", + "emancipación", + "émancipation" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb12571870c", + "label": [ + "Émancipation", + "Libération", + "Émancipation politique", + "Émancipation sociale" + ] + }, + { + "id": "http://ark.frantiq.fr/ark:/26678/pcrt1V0a6R9AjM", + "label": [ + "emancipación", + "emancipation", + "émancipation" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb169817173", + "label": [ + "Gendèr", + "Gendèr panerus", + "Gender", + "Gendèr barung", + "Gèndèr", + "Gendèr panembung" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85053730", + "label": [ + "Gender (Musical instrument)" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb11965103m", + "label": [ + "Country", + "Western music", + "Musique country", + "Western and country music", + "Musique hillbilly", + "Chansons country", + "Country music", + "Country (musique)", + "Hillbilly music", + "Musique western", + "Country and western music" + ] + }, + { + "id": "http://datos.bne.es/resource/XX536714", + "label": [ + "Country", + "Country (Música)", + "Hillbilly (Música)", + "Música country", + "Música hillbilly" + ] + }, + { + "id": "http://id.loc.gov/authorities/subjects/sh85033470", + "label": [ + "Country music", + "Western and country music", + "Country and western music", + "Country music--United States", + "Hillbilly music" + ] + }, + { + "id": "http://data.bnf.fr/ark:/12148/cb16960606h", + "label": [ + "Bancs à étirer", + "Orfèvres, Bancs d'", + "Bancs d'orfèvres", + "Argues" + ] + } + ], + "title": [ + "La race de la violence de genre : analyse de la loi brésilienne contre la violence domestique" + ], + "topic": [ + { + "id": "http://aurehal.archives-ouvertes.fr/subject/shs.socio", + "label": "Sociologie", + "proba": "1.000" + }, + { + "id": "http://aurehal.archives-ouvertes.fr/subject/shs.genre", + "label": "Estudios sobre el género", + "proba": "1.000" + } + ], + "type": [ + { + "id": "http://isidore.science/ontology#article", + "label": "Articles" + } + ], + "url": [ + "http://journals.openedition.org/revdh/7078" + ] + }, + "_type": "_doc", + "sort": [ + 1562630400000 + ] + } + ], + "max_score": null, + "total": { + "relation": "eq", + "value": 16 + } + }, + "timed_out": false, + "took": 15 +} From df5fff65c03991f02c9548a2335d5b1da5733490 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Wed, 25 Mar 2020 13:29:47 +0100 Subject: [PATCH 55/99] wip --- server/workers/triple/src/search_triple.py | 38 +++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py index 10cb0e1c3..5508834ab 100644 --- a/server/workers/triple/src/search_triple.py +++ b/server/workers/triple/src/search_triple.py @@ -3,6 +3,7 @@ import json import redis from elasticsearch import Elasticsearch +from elasticsearch_dsl import Search import pandas as pd @@ -44,6 +45,19 @@ def build_sort_order(self, parameters): sort.append("date:desc") return sort + @staticmethod + def parse_query(querystring): + parsed_query = {} + pos_phrases = [] + neg_phrases = [] + pos_keywords = [] + neg_keywords = [] + parsed_query["pos_phrases"] = pos_phrases + parsed_query["neg_phrases"] = neg_phrases + parsed_query["pos_keywords"] = pos_keywords + parsed_query["neg_keywords"] = neg_keywords + return parsed_query + def build_body(self, parameters): body = {"query": { "bool": { @@ -63,10 +77,32 @@ def build_body(self, parameters): return body def search(self, parameters): + s = Search(using=self.es) + parsed_query = self.parse_query(parameters.get('q')) + for q in parsed_query.get('pos_phrases'): + if q: + s = s.query("multi_match", query=q, fields=['title', 'body']) + for q in parsed_query.get('pos_keywords'): + if q: + s = s.query("multi_match", query=q, fields=['title', 'body']) + for q in parsed_query.get('neg_phrases'): + if q: + s = s.exclude("multi_match", query=q, fields=['title', 'body']) + for q in parsed_query.get('neg_keywords'): + if q: + s = s.exclude("multi_match", query=q, fields=['title', 'body']) + s = s.query("range", date=self.build_date_field( + parameters.get('from'), + parameters.get('to'))) index = "isidore-documents-triple" + # res = self.es.search( + # index=index, + # body=self.build_body(parameters), + # size=parameters.get('limit', 100), + # sort=self.build_sort_order(parameters)) res = self.es.search( index=index, - body=self.build_body(parameters), + body=s.to_dict(), size=parameters.get('limit', 100), sort=self.build_sort_order(parameters)) if parameters.get('raw') is True: From cb8459f389c0f8df83e49818d34153f1d68b3a4f Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Thu, 26 Mar 2020 20:44:57 +0100 Subject: [PATCH 56/99] first set of tests; first improvement of query parsing --- examples/triple/README.md | 4 +- server/workers/triple/requirements.txt | 1 + server/workers/triple/src/search_triple.py | 60 +++++++++++-------- .../triple/src/tests/test_build_body.py | 56 +++++++++++++++++ .../triple/src/tests/test_process_result.py | 5 +- 5 files changed, 94 insertions(+), 32 deletions(-) diff --git a/examples/triple/README.md b/examples/triple/README.md index 556a2594e..fee55d48b 100644 --- a/examples/triple/README.md +++ b/examples/triple/README.md @@ -66,6 +66,4 @@ docker-compose down ### Deploying the example: -Use a deployment script, or manually perform following actions: - -* use a script. +Use a deployment script, or manually deploy an example as described in [HOWTO: search repos](../../doc/howto_search_repos.md): diff --git a/server/workers/triple/requirements.txt b/server/workers/triple/requirements.txt index 3e2eadb89..1f04e19bd 100644 --- a/server/workers/triple/requirements.txt +++ b/server/workers/triple/requirements.txt @@ -1,4 +1,5 @@ elasticsearch +elasticsearch_dsl redis certifi pandas diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py index 5508834ab..af02dbfba 100644 --- a/server/workers/triple/src/search_triple.py +++ b/server/workers/triple/src/search_triple.py @@ -3,7 +3,7 @@ import json import redis from elasticsearch import Elasticsearch -from elasticsearch_dsl import Search +from elasticsearch_dsl import Search, Q import pandas as pd @@ -46,17 +46,33 @@ def build_sort_order(self, parameters): return sort @staticmethod - def parse_query(querystring): - parsed_query = {} - pos_phrases = [] - neg_phrases = [] - pos_keywords = [] - neg_keywords = [] - parsed_query["pos_phrases"] = pos_phrases - parsed_query["neg_phrases"] = neg_phrases - parsed_query["pos_keywords"] = pos_keywords - parsed_query["neg_keywords"] = neg_keywords - return parsed_query + def parse_query(query, fields): + qq = {} + # must_groups = list(re.finditer('([\w\-"]* +(?:and|&&|\+) *[\w\-"]*)+', query, re.MULTILINE)) + must_groups = [m.string for m in (re.finditer('([\w\-"]* +(?:and|&&|\+) *[\w\-"]*)+', query, re.I))] + must = [] + for g in must_groups: + terms = [t.strip() for t in re.split(r' and|&&|\+ +', g) if t.strip()] + for term in terms: + if term: + must.append(Q('multi_match', query=term, fields=fields)) + query = re.sub(r'([\w\-"]* +(?:and|&&|\+) *[\w\-"]*)+', '', query, re.M, re.I) + should_groups = [m.string for m in re.finditer('([\w\-"]+ +(?:or|\|+) *[\w\-"]*)+', query, re.I)] + query = re.sub(r'([\w\-"]+ +(?:or|\|+) *[\w\-"]*)+', '', query, re.I) + should_terms = re.split(" +", query) + should = [] + for g in should_groups + should_terms: + terms = [t.strip() for t in re.split(r' or|\|+', g) if t.strip()] + for term in terms: + if term: + should.append(Q('multi_match', query=term, fields=fields)) + exclude = [] + if must: + qq["must"] = must + if should: + qq["should"] = should + q = Q('bool', **qq) + return q def build_body(self, parameters): body = {"query": { @@ -78,23 +94,15 @@ def build_body(self, parameters): def search(self, parameters): s = Search(using=self.es) - parsed_query = self.parse_query(parameters.get('q')) - for q in parsed_query.get('pos_phrases'): - if q: - s = s.query("multi_match", query=q, fields=['title', 'body']) - for q in parsed_query.get('pos_keywords'): - if q: - s = s.query("multi_match", query=q, fields=['title', 'body']) - for q in parsed_query.get('neg_phrases'): - if q: - s = s.exclude("multi_match", query=q, fields=['title', 'body']) - for q in parsed_query.get('neg_keywords'): - if q: - s = s.exclude("multi_match", query=q, fields=['title', 'body']) + # TODO: replace from parameters + fields = ["title", "abstract"] + q = self.parse_query(parameters.get('q'), fields) + s = s.query(q) s = s.query("range", date=self.build_date_field( parameters.get('from'), parameters.get('to'))) index = "isidore-documents-triple" + sorting = self.build_sort_order(parameters) # res = self.es.search( # index=index, # body=self.build_body(parameters), @@ -104,7 +112,7 @@ def search(self, parameters): index=index, body=s.to_dict(), size=parameters.get('limit', 100), - sort=self.build_sort_order(parameters)) + sort=sorting) if parameters.get('raw') is True: return res else: diff --git a/server/workers/triple/src/tests/test_build_body.py b/server/workers/triple/src/tests/test_build_body.py index e69de29bb..804b6cda9 100644 --- a/server/workers/triple/src/tests/test_build_body.py +++ b/server/workers/triple/src/tests/test_build_body.py @@ -0,0 +1,56 @@ +from search_triple import TripleClient +import json +import pytest + + +@pytest.fixture +def triple_client(): + return TripleClient({ + "host": "localhost", + "user": "", + "pass": "", + "port": 9200}) + + +def test_query_parsing(triple_client): + queries = {'2019-ncov and sars-cov-2': {'bool': {'must': [{'multi_match': {'title': '2019-ncov', + 'fields': ['title', 'abstract']}}, + {'multi_match': {'title': 'sars-cov-2', + 'fields': ['title', 'abstract']}}]}}, + 'term1 term2 term3': {'bool': {'should': [{'multi_match': {'title': 'term1', + 'fields': ['title', 'abstract']}}, + {'multi_match': {'title': 'term2', 'fields': ['title', 'abstract']}}, + {'multi_match': {'title': 'term3', 'fields': ['title', 'abstract']}}]}} + # '(dog and cat) or (sun and moon)': '(textus:dog and textus:cat) or (textus:sun and textus:moon)', + # 'cats and "mice and cheese"': 'textus:cats and textus:"mice and cheese"', + # 'cats or (sun and moon)': 'textus:cats or (textus:sun and textus:moon)', + # '(parentheses or ("cats and dogs"))': '(textus:parentheses or (textus:"cats and dogs"))', + # '-2019-ncov': '-textus:2019-ncov', + # '--2019-ncov': '-textus:2019-ncov', + # '(a and b -"c")': '(textus:a and textus:b -textus:"c")', + # '2019-ncov or sars-cov-2': 'textus:2019-ncov or textus:sars-cov-2', + # '"2019-ncov" or "sars-cov-2"': 'textus:"2019-ncov" or textus:"sars-cov-2"', + # '"2019-ncov"+"sars-cov-2"': 'textus:"2019-ncov" textus:"sars-cov-2"', + # '"2019-ncov" + "sars-cov-2"': 'textus:"2019-ncov" textus:"sars-cov-2"', + # '"2019-ncov"+"sars-cov-2"': 'textus:"2019-ncov" textus:"sars-cov-2"', + # '(cats + dogs) or (sun - moon)': '(textus:cats textus:dogs) or (textus:sun -textus:moon)', + # 'science -(research or knowledge or theory)': 'textus:science -(textus:research or textus:knowledge or textus:theory)', + # 'orandor or andorand': 'textus:orandor or textus:andorand', + # 'science -research -knowledge -theory': 'textus:science -textus:research -textus:knowledge -textus:theory', + # 'a+b': 'textus:a textus:b', + # 'and or not': 'and or textus:not', + # '-(dogs+cats)': '-(textus:dogs textus:cats)', + # '((cats) and dogs)': '((textus:cats) and textus:dogs)', + # '""knowledge - and +domain visualization""': 'textus:""knowledge - and +domain visualization""', + # '((-""hello"") or test)': '((-textus:""hello"") or textus:test)', + # 'cats - dogs': 'textus:cats -textus:dogs', + # 'cats --- dogs': 'textus:cats -textus:dogs', + # 'cats +++ dogs': 'textus:cats textus:dogs', + # '\'\'test\'\'': "textus: ''test''", + # '+++++++++++++++science': 'textus:science', + # '+a -b': 'textus:a -textus:b', + # 'sars-cov-5 or 2019-ncov and sars-cov-2 && sars-cov-3 or sars-cov-4 + sars-cov-6' + } + for q, expected in queries.items(): + fields = ["title", "abstract"] + assert triple_client.parse_query(q, fields) == expected diff --git a/server/workers/triple/src/tests/test_process_result.py b/server/workers/triple/src/tests/test_process_result.py index 5b05000fb..aaf3fc44d 100644 --- a/server/workers/triple/src/tests/test_process_result.py +++ b/server/workers/triple/src/tests/test_process_result.py @@ -1,14 +1,13 @@ -from ..search_triple import TripleClient +from search_triple import TripleClient import json import pytest -import unittest from pandas.testing import assert_frame_equal import pandas as pd @pytest.fixture def raw_data(): - with open("testdata/raw_data.json") as infile: + with open("src/tests/testdata/raw_data.json") as infile: return json.load(infile) From 94af5fc58cc9347760ccbb273108e5024bc8c0e0 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Fri, 27 Mar 2020 13:35:01 +0100 Subject: [PATCH 57/99] first backend tests; fix backend redis scope --- examples/triple/README.md | 5 +++ server/workers/backend/src/headstart.py | 14 ++++---- server/workers/backend/src/tests/test_data.py | 36 +++++++++++++++++++ .../backend/src/tests/test_summarization.py | 4 +-- .../tests/testdata/expected_output_data.json | 2 +- .../triple/src/tests/test_process_result.py | 2 +- 6 files changed, 51 insertions(+), 12 deletions(-) create mode 100644 server/workers/backend/src/tests/test_data.py diff --git a/examples/triple/README.md b/examples/triple/README.md index fee55d48b..89458608f 100644 --- a/examples/triple/README.md +++ b/examples/triple/README.md @@ -59,6 +59,11 @@ docker-compose build docker-compose up -d ``` +* all in one: +``` +docker-compose up -d --build +``` + * shut service down ``` docker-compose down diff --git a/server/workers/backend/src/headstart.py b/server/workers/backend/src/headstart.py index 816a15414..b42e91f6b 100644 --- a/server/workers/backend/src/headstart.py +++ b/server/workers/backend/src/headstart.py @@ -8,17 +8,11 @@ import pandas as pd -with open("redis_config.json") as infile: - redis_config = json.load(infile) - -redis_store = redis.StrictRedis(**redis_config) - - class Backend(object): - def __init__(self): + def __init__(self, wd="./"): # path should be to where in the docker container the Rscript are - self.wd = "./" + self.wd = wd self.command = 'Rscript' self.hs = os.path.abspath(os.path.join(self.wd, "run_vis_layout.R")) self.default_params = {} @@ -61,5 +55,9 @@ def run(self): if __name__ == '__main__': + with open("redis_config.json") as infile: + redis_config = json.load(infile) + + redis_store = redis.StrictRedis(**redis_config) hsb = Backend() hsb.run() diff --git a/server/workers/backend/src/tests/test_data.py b/server/workers/backend/src/tests/test_data.py new file mode 100644 index 000000000..ae474d563 --- /dev/null +++ b/server/workers/backend/src/tests/test_data.py @@ -0,0 +1,36 @@ +import json +import pytest +from src.headstart import Backend +import pandas as pd +import pandas.api.types as ptypes + + +@pytest.fixture +def map_data(): + with open("tests/testdata/expected_output_data.json") as infile: + return json.load(infile).get('map_data') + + +@pytest.fixture +def input_data(): + with open("tests/testdata/input_data.json") as infile: + return json.load(infile) + + +@pytest.fixture +def backend(): + return Backend("../../../preprocessing/other-scripts") + + +@pytest.fixture +def params(): + return {"service": "triple", "q": "femicide"} + + +def test_map_data_dtypes(backend, input_data, params): + params = backend.add_default_params(params) + map_data = json.loads(backend.create_map(params, input_data)) + assert isinstance(map_data, list) + df = pd.DataFrame.from_records(map_data) + for col in df.columns: + assert ptypes.is_string_dtype(df[col]) diff --git a/server/workers/backend/src/tests/test_summarization.py b/server/workers/backend/src/tests/test_summarization.py index bd4d8ecbe..a970929f8 100644 --- a/server/workers/backend/src/tests/test_summarization.py +++ b/server/workers/backend/src/tests/test_summarization.py @@ -1,8 +1,8 @@ -from headstart import Backend +from src.headstart import Backend import pytest - import pandas as pd + @pytest.fixture def input_data(): pass diff --git a/server/workers/backend/src/tests/testdata/expected_output_data.json b/server/workers/backend/src/tests/testdata/expected_output_data.json index e6e41458d..0831c609a 100644 --- a/server/workers/backend/src/tests/testdata/expected_output_data.json +++ b/server/workers/backend/src/tests/testdata/expected_output_data.json @@ -1,4 +1,4 @@ -{"input_data": [ +{"map_data": [ { "id": "10670/1.2qtip2", "title": "Brésil(s) n° 16 - 2019 : Género y justicia", diff --git a/server/workers/triple/src/tests/test_process_result.py b/server/workers/triple/src/tests/test_process_result.py index aaf3fc44d..d3106d3aa 100644 --- a/server/workers/triple/src/tests/test_process_result.py +++ b/server/workers/triple/src/tests/test_process_result.py @@ -7,7 +7,7 @@ @pytest.fixture def raw_data(): - with open("src/tests/testdata/raw_data.json") as infile: + with open("tests/testdata/raw_data.json") as infile: return json.load(infile) From 539fd1b5e21cbe791a25aaa0de875fb46ac2f047 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Sat, 28 Mar 2020 14:29:50 +0100 Subject: [PATCH 58/99] collab map WIP --- docker-compose.yml | 8 + server/workers/services/requirements.txt | 3 + server/workers/services/src/apis/gsheets.py | 192 ++++++++++++++++++++ server/workers/services/src/apis/triple.py | 14 +- server/workers/services/src/apis/utils.py | 15 ++ server/workers/services/src/app.py | 4 + 6 files changed, 223 insertions(+), 13 deletions(-) create mode 100644 server/workers/services/src/apis/gsheets.py create mode 100644 server/workers/services/src/apis/utils.py diff --git a/docker-compose.yml b/docker-compose.yml index 9bc638622..876c79f23 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -48,5 +48,13 @@ services: source: /var/log/headstart target: /var/log/headstart + hsdb: + image: postgres + volumes: + - db:/var/lib/postgresql/data + ports: + - 5432:5432 + volumes: redis: + db: diff --git a/server/workers/services/requirements.txt b/server/workers/services/requirements.txt index 06918500e..1a26a4260 100644 --- a/server/workers/services/requirements.txt +++ b/server/workers/services/requirements.txt @@ -9,3 +9,6 @@ aioredis pandas pyyaml flasgger +google-api-python-client +google-auth-httplib2 +google-auth-oauthlib diff --git a/server/workers/services/src/apis/gsheets.py b/server/workers/services/src/apis/gsheets.py new file mode 100644 index 000000000..22a9fc373 --- /dev/null +++ b/server/workers/services/src/apis/gsheets.py @@ -0,0 +1,192 @@ +import os +import json +import uuid +import pickle +import redis +from datetime import datetime + +from googleapiclient.discovery import build +from google_auth_oauthlib.flow import InstalledAppFlow +from google.auth.transport.requests import Request + +from flask import Blueprint, request, make_response, jsonify, abort +from flask_restx import Namespace, Resource, fields +from utils import get_key +from models import Revisions, Visualizations +from app import db +import pandas as pd + +with open("redis_config.json") as infile: + redis_config = json.load(infile) + +redis_store = redis.StrictRedis(**redis_config) + +gsheets_ns = Namespace("google_sheets", description="Google Sheets API operations") + +# If modifying these scopes, delete the file token.pickle. +SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly'] + + +# search_param_schema = SearchParamSchema() + +def authenticate(): + creds = None + if os.path.exists('token.pickle'): + with open('token.pickle', 'rb') as token: + creds = pickle.load(token) + if not creds or not creds.valid: + if creds and creds.expired and creds.refresh_token: + creds.refresh(Request()) + else: + flow = InstalledAppFlow.from_client_secrets_file( + 'config/credentials.json', SCOPES) + creds = flow.run_local_server(port=0) + # Save the credentials for the next run + with open('token.pickle', 'wb') as token: + pickle.dump(creds, token) + return creds + + +gsheets_service = build('sheets', 'v4', credentials=authenticate()) +sheet = gsheets_service.spreadsheets() + + +# Call the Sheets API + + +def get_sheet_content(sheet_id, sheet_range): + res = sheet.values().get(spreadsheetId=sheet_id, + range=sheet_range).execute() + return res + + +def validate_data(df): + df.columns = df.iloc[0] + df.drop([0, 1], inplace=True) + # add column: Valid Bool + return df + + +def preprocess_data(df): + metadata = pd.DataFrame() + metadata["id"] = df.ID + metadata["title"] = df.Title + metadata["authors"] = df.Authors + metadata["paper_abstract"] = df.Abstract + metadata["published_in"] = df["Publication Venue"] + metadata["year"] = df["Publication Date"] + metadata["url"] = df["Link to PDF"] + metadata["readers"] = 0 + metadata["subject"] = df.Keywords + metadata["oa_state"] = df["Open Access"] + metadata["link"] = df["Link to PDF"] + metadata["relevance"] = df.index + metadata["comments"] = df.index + metadata["tags"] = df.Tags + text = pd.DataFrame() + text["id"] = metadata["id"] + text["content"] = metadata.apply(lambda x: ". ".join(x[["title", + "paper_abstract", + "subject", + "comments"]]), axis=1) + input_data = {} + input_data["metadata"] = metadata.to_json(orient='records') + input_data["text"] = text.to_json(orient='records') + return input_data + + +def get_sheet_id(vis_id): + # mock functionality + mock_db = {"covid19": "1csxG23x99DcxoEud782Bji76C7mGxKkAVMBz8gdf_0A"} + # replace with e.g. + # sheet_id = Visualizations.query.filter_by(vis_id=vis_id).first().??? + return mock_db.get(vis_id) + + +search_query = gsheets_ns.model("SearchQuery", + {"vis_id": fields.String(example='covid19', + description='hardcoded vis_id', + required=True)}) + + +@gsheets_ns.route('/search') +class Search(Resource): + @gsheets_ns.doc(responses={200: 'OK', + 400: 'Invalid search parameters'}) + @gsheets_ns.expect(search_query) + @gsheets_ns.produces(["application/json"]) + def post(self): + """ + """ + params = request.get_json() + # fill default params + params["q"] = params["vis_id"] + params["vis_type"] = "overview" + sheet_id = get_sheet_id(params.get('vis_id')) + covid19_range = "Resources!A1:N200" + sheet_content = get_sheet_content(sheet_id, covid19_range) + raw = pd.DataFrame(sheet_content.get('values')) + input_data = preprocess_data(raw) + + k = str(uuid.uuid4()) + res = {} + res["id"] = k + res["input_data"] = input_data + res["params"] = params + redis_store.rpush("input_data", json.dumps(res)) + result = get_key(redis_store, k) + + # headers = {} + # headers["Content-Type"] = "application/json" + # return make_response(result, + # 200, + # headers) + return pd.DataFrame. + + +def writeRevision(vis_id, data, rev_id=None): + + vis = Visualizations.query.filter_by(vis_id=vis_id).first() + + rev = rev_id + if rev is None: + r_id = vis.vis_latest + rev = r_id + 1 + + query = vis.vis_clean_query + + new_rev = Revisions({ + "rev_id": rev, + "rev_vis": vis_id, + "rev_user": "System", + "rev_timestamp": datetime.utcnow(), + "rev_comment": "Visualization created", + "rev_data": data, + "vis_query": query + }) + db.session.add(new_rev) + db.session.commit() + + +@gsheets_ns.route('/createVisualization') +class createVisualization(Resource): + def post(self, vis_id): + # param: map_id + # get map context for map ID + # get latest revision data via sheets ID from context + # if not assert equal + # add revision number to map context + # get latest revision + pass + + +@gsheets_ns.route('/existsVisualization') +class existsVisualization(Resource): + def get(self, vis_id): + map = Visualizations.query.filter_by(vis_id=vis_id).first() + exists = True if map else False + make_response(exists, + 200) + + +# @gsheets_ns.route('/get') diff --git a/server/workers/services/src/apis/triple.py b/server/workers/services/src/apis/triple.py index 12566fc12..f13f5aea0 100644 --- a/server/workers/services/src/apis/triple.py +++ b/server/workers/services/src/apis/triple.py @@ -10,6 +10,7 @@ from flask import Blueprint, request, make_response, jsonify, abort from flask_restx import Namespace, Resource, fields from .request_validators import SearchParamSchema +from utils import get_key with open("redis_config.json") as infile: @@ -23,19 +24,6 @@ search_param_schema = SearchParamSchema() -def get_key(store, key): - while True: - res = redis_store.get(key+"_output") - if res is None: - time.sleep(0.5) - else: - result = json.loads(res.decode('utf-8')) - redis_store.delete(key) - redis_store.delete(key+"_output") - break - return result - - search_query = triple_ns.model("SearchQuery", {"q": fields.String(example='feminicide', description='query string', diff --git a/server/workers/services/src/apis/utils.py b/server/workers/services/src/apis/utils.py new file mode 100644 index 000000000..b2475c0ba --- /dev/null +++ b/server/workers/services/src/apis/utils.py @@ -0,0 +1,15 @@ +import json +import time + + +def get_key(store, key): + while True: + res = store.get(key+"_output") + if res is None: + time.sleep(0.5) + else: + result = json.loads(res.decode('utf-8')) + store.delete(key) + store.delete(key+"_output") + break + return result diff --git a/server/workers/services/src/app.py b/server/workers/services/src/app.py index 6c5a01f3c..388a6699f 100644 --- a/server/workers/services/src/app.py +++ b/server/workers/services/src/app.py @@ -1,7 +1,9 @@ from flask import Flask from flask_restx import Api from flask_cors import CORS +from flask_sqlalchemy import SQLAlchemy from apis.triple import triple_ns +from apis.triple import gsheets_ns from werkzeug.middleware.proxy_fix import ProxyFix from config import settings from utils.monkeypatches import ReverseProxied, __schema__, specs_url, _register_apidoc, inject_flasgger @@ -26,6 +28,7 @@ def api_patches(app, settings): app = Flask('v1', instance_relative_config=True) app = inject_flasgger(app) +db = SQLAlchemy(app) app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1, x_port=1, x_for=1, x_host=1, x_prefix=1) app.wsgi_app = ReverseProxied(app.wsgi_app) app.config.from_object('config.settings') @@ -34,6 +37,7 @@ def api_patches(app, settings): api = api_patches(app, settings) api.add_namespace(triple_ns, path='/triple') +api.add_namespace(gsheets_ns, path='/gsheets') if __name__ == '__main__': From 7dcb1aac6083c89fbe5fe60b0dd6ebc29d13156f Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Sat, 28 Mar 2020 20:30:21 +0100 Subject: [PATCH 59/99] more WIP --- server/workers/services/requirements.txt | 1 + server/workers/services/src/apis/gsheets.py | 120 ++++++++++++------ server/workers/services/src/apis/triple.py | 2 +- server/workers/services/src/app.py | 37 ++++-- .../services/src/templates/tables.html | 18 +++ 5 files changed, 123 insertions(+), 55 deletions(-) create mode 100644 server/workers/services/src/templates/tables.html diff --git a/server/workers/services/requirements.txt b/server/workers/services/requirements.txt index 1a26a4260..78aab49d7 100644 --- a/server/workers/services/requirements.txt +++ b/server/workers/services/requirements.txt @@ -7,6 +7,7 @@ redis hiredis aioredis pandas +pandas_schema pyyaml flasgger google-api-python-client diff --git a/server/workers/services/src/apis/gsheets.py b/server/workers/services/src/apis/gsheets.py index 22a9fc373..40d2a16d7 100644 --- a/server/workers/services/src/apis/gsheets.py +++ b/server/workers/services/src/apis/gsheets.py @@ -9,12 +9,22 @@ from google_auth_oauthlib.flow import InstalledAppFlow from google.auth.transport.requests import Request -from flask import Blueprint, request, make_response, jsonify, abort +from flask import Blueprint, request, make_response, jsonify, abort, render_template from flask_restx import Namespace, Resource, fields -from utils import get_key +from apis.utils import get_key from models import Revisions, Visualizations -from app import db +from database import db import pandas as pd +from pandas_schema import Column, Schema +from pandas_schema.validation import (LeadingWhitespaceValidation, + TrailingWhitespaceValidation, + CanConvertValidation, + MatchesPatternValidation, + InRangeValidation, + InListValidation, + DateFormatValidation, + CustomSeriesValidation) + with open("redis_config.json") as infile: redis_config = json.load(infile) @@ -22,7 +32,7 @@ redis_store = redis.StrictRedis(**redis_config) gsheets_ns = Namespace("google_sheets", description="Google Sheets API operations") - +app = Blueprint('googlesheets', __name__) # If modifying these scopes, delete the file token.pickle. SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly'] @@ -61,10 +71,31 @@ def get_sheet_content(sheet_id, sheet_range): def validate_data(df): + schema = Schema([ + Column('ID', []), + Column('Title', []), + Column('Authors', []), + Column('Abstract', [CustomSeriesValidation(lambda s: ~s.str.len() < 10, 'Abstract not long enough')]), + Column('Publication Venue', []), + Column('Publication Date', [DateFormatValidation("%Y-%m-%d")]), + Column('Link to PDF', []), + Column('Keywords', []), + Column('Open access', [InListValidation(["yes", "no", "unknown"], case_sensitive=False)]), + Column('Comments', []), + Column('Tags', []), + Column('Include in map?', [InListValidation(["yes", "no"])]), + Column('Type', []), + Column('Area', []) + ]) df.columns = df.iloc[0] df.drop([0, 1], inplace=True) + df.reset_index(drop=True, inplace=True) # add column: Valid Bool - return df + errors = schema.validate(df) + errors_index_rows = [e.row for e in errors] + df_clean = df.drop(index=errors_index_rows) + df_errors = df.iloc[errors_index_rows] + return df_clean, errors, df_errors def preprocess_data(df): @@ -78,11 +109,12 @@ def preprocess_data(df): metadata["url"] = df["Link to PDF"] metadata["readers"] = 0 metadata["subject"] = df.Keywords - metadata["oa_state"] = df["Open Access"] + metadata["oa_state"] = df["Open access"] metadata["link"] = df["Link to PDF"] metadata["relevance"] = df.index - metadata["comments"] = df.index + metadata["comments"] = df.Comments metadata["tags"] = df.Tags + metadata["type"] = df.Type text = pd.DataFrame() text["id"] = metadata["id"] text["content"] = metadata.apply(lambda x: ". ".join(x[["title", @@ -109,39 +141,47 @@ def get_sheet_id(vis_id): required=True)}) -@gsheets_ns.route('/search') -class Search(Resource): - @gsheets_ns.doc(responses={200: 'OK', - 400: 'Invalid search parameters'}) - @gsheets_ns.expect(search_query) - @gsheets_ns.produces(["application/json"]) - def post(self): - """ - """ - params = request.get_json() - # fill default params - params["q"] = params["vis_id"] - params["vis_type"] = "overview" - sheet_id = get_sheet_id(params.get('vis_id')) - covid19_range = "Resources!A1:N200" - sheet_content = get_sheet_content(sheet_id, covid19_range) - raw = pd.DataFrame(sheet_content.get('values')) - input_data = preprocess_data(raw) - - k = str(uuid.uuid4()) - res = {} - res["id"] = k - res["input_data"] = input_data - res["params"] = params - redis_store.rpush("input_data", json.dumps(res)) - result = get_key(redis_store, k) - - # headers = {} - # headers["Content-Type"] = "application/json" - # return make_response(result, - # 200, - # headers) - return pd.DataFrame. +@app.route('/api/gsheets/search') +# class Search(Resource): +# @gsheets_ns.doc(responses={200: 'OK', +# 400: 'Invalid search parameters'}) +# @gsheets_ns.expect(search_query) +# # @gsheets_ns.produces(["application/json"]) +def search(): + """ + """ + params = request.args.to_dict() + # params = request.get_json() + # fill default params + params["q"] = params["vis_id"] + params["vis_type"] = "overview" + sheet_id = get_sheet_id(params.get('vis_id')) + covid19_range = "Resources!A1:N200" + sheet_content = get_sheet_content(sheet_id, covid19_range) + raw = pd.DataFrame(sheet_content.get('values')) + df_clean, errors, df_errors = validate_data(raw) + input_data = preprocess_data(df_clean) + + # k = str(uuid.uuid4()) + # res = {} + # res["id"] = k + # res["input_data"] = input_data + # res["params"] = params + # redis_store.rpush("input_data", json.dumps(res)) + # result = get_key(redis_store, k) + + # headers = {} + # headers["Content-Type"] = "application/json" + # return make_response(result, + # 200, + # headers) + # pd.DataFrame(json.loads(input_data["metadata"])).to_html(header=True) + return render_template("tables.html", + df_clean=df_clean.to_html(header=True), + errors="
".join([str(e) + for e in errors]), + df_errors=df_errors.to_html(header=True) + ) def writeRevision(vis_id, data, rev_id=None): diff --git a/server/workers/services/src/apis/triple.py b/server/workers/services/src/apis/triple.py index f13f5aea0..798f54884 100644 --- a/server/workers/services/src/apis/triple.py +++ b/server/workers/services/src/apis/triple.py @@ -10,7 +10,7 @@ from flask import Blueprint, request, make_response, jsonify, abort from flask_restx import Namespace, Resource, fields from .request_validators import SearchParamSchema -from utils import get_key +from apis.utils import get_key with open("redis_config.json") as infile: diff --git a/server/workers/services/src/app.py b/server/workers/services/src/app.py index 388a6699f..be3e23624 100644 --- a/server/workers/services/src/app.py +++ b/server/workers/services/src/app.py @@ -2,9 +2,13 @@ from flask_restx import Api from flask_cors import CORS from flask_sqlalchemy import SQLAlchemy -from apis.triple import triple_ns -from apis.triple import gsheets_ns from werkzeug.middleware.proxy_fix import ProxyFix + +from apis.triple import triple_ns +from apis.gsheets import gsheets_ns +from apis.gsheets import app as gsheets_bp + +from database import db from config import settings from utils.monkeypatches import ReverseProxied, __schema__, specs_url, _register_apidoc, inject_flasgger @@ -26,19 +30,24 @@ def api_patches(app, settings): return api_fixed -app = Flask('v1', instance_relative_config=True) -app = inject_flasgger(app) -db = SQLAlchemy(app) -app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1, x_port=1, x_for=1, x_host=1, x_prefix=1) -app.wsgi_app = ReverseProxied(app.wsgi_app) -app.config.from_object('config.settings') -app.config.from_pyfile('settings.py', silent=True) -CORS(app, expose_headers=["Content-Disposition"]) - -api = api_patches(app, settings) -api.add_namespace(triple_ns, path='/triple') -api.add_namespace(gsheets_ns, path='/gsheets') +def create_app(): + app = Flask('v1', instance_relative_config=True) + app = inject_flasgger(app) + app.config.from_object('config.settings') + app.config.from_pyfile('settings.py', silent=True) + db.init_app(app) + app.register_blueprint(gsheets_bp) + app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1, x_port=1, x_for=1, x_host=1, x_prefix=1) + app.wsgi_app = ReverseProxied(app.wsgi_app) + CORS(app, expose_headers=["Content-Disposition"]) + return app if __name__ == '__main__': + app = create_app() + + api = api_patches(app, settings) + api.add_namespace(triple_ns, path='/triple') + api.add_namespace(gsheets_ns, path='/gsheets') + app.run(host="localhost", port=5001, debug=True) diff --git a/server/workers/services/src/templates/tables.html b/server/workers/services/src/templates/tables.html new file mode 100644 index 000000000..9e2ac6a57 --- /dev/null +++ b/server/workers/services/src/templates/tables.html @@ -0,0 +1,18 @@ + + + + + Demo + + + +{{ df_clean|safe }} +
+{{ errors|safe }} +
+{{ df_errors|safe }} + +
+ + + From ff34c67986c94f4d7345ec27e9b8a1efdb2d3b02 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Sat, 28 Mar 2020 22:24:02 +0100 Subject: [PATCH 60/99] more WIP --- server/services.docker | 2 +- server/workers/services/requirements.txt | 2 + server/workers/services/src/apis/gsheets.py | 62 +++++++++++++-------- server/workers/services/src/app.py | 8 +-- server/workers/services/src/database.py | 4 ++ server/workers/services/src/manage.py | 11 ++++ server/workers/services/src/models.py | 26 +++++++++ 7 files changed, 87 insertions(+), 28 deletions(-) create mode 100644 server/workers/services/src/database.py create mode 100644 server/workers/services/src/manage.py create mode 100644 server/workers/services/src/models.py diff --git a/server/services.docker b/server/services.docker index 29ed24a13..7b33f6b37 100644 --- a/server/services.docker +++ b/server/services.docker @@ -13,4 +13,4 @@ RUN pip install git+https://github.com/python-restx/flask-restx COPY workers/services/src/ ./ COPY workers/redis_config.json . -CMD gunicorn -b 127.0.0.1:5001 'app:app' +CMD gunicorn -b 127.0.0.1:5001 'app:create_app()' diff --git a/server/workers/services/requirements.txt b/server/workers/services/requirements.txt index 78aab49d7..15b42ab22 100644 --- a/server/workers/services/requirements.txt +++ b/server/workers/services/requirements.txt @@ -1,5 +1,6 @@ flask flask-cors +flask_sqlalchemy Werkzeug marshmallow gunicorn @@ -13,3 +14,4 @@ flasgger google-api-python-client google-auth-httplib2 google-auth-oauthlib +psycopg2 diff --git a/server/workers/services/src/apis/gsheets.py b/server/workers/services/src/apis/gsheets.py index 40d2a16d7..e08fed2b7 100644 --- a/server/workers/services/src/apis/gsheets.py +++ b/server/workers/services/src/apis/gsheets.py @@ -99,6 +99,7 @@ def validate_data(df): def preprocess_data(df): + df = df[df["Include in map?"] == "yes"] metadata = pd.DataFrame() metadata["id"] = df.ID metadata["title"] = df.Title @@ -141,13 +142,8 @@ def get_sheet_id(vis_id): required=True)}) -@app.route('/api/gsheets/search') -# class Search(Resource): -# @gsheets_ns.doc(responses={200: 'OK', -# 400: 'Invalid search parameters'}) -# @gsheets_ns.expect(search_query) -# # @gsheets_ns.produces(["application/json"]) -def search(): +@app.route('/api/gsheets/raw') +def raw_exampe(): """ """ params = request.args.to_dict() @@ -161,29 +157,49 @@ def search(): raw = pd.DataFrame(sheet_content.get('values')) df_clean, errors, df_errors = validate_data(raw) input_data = preprocess_data(df_clean) - - # k = str(uuid.uuid4()) - # res = {} - # res["id"] = k - # res["input_data"] = input_data - # res["params"] = params - # redis_store.rpush("input_data", json.dumps(res)) - # result = get_key(redis_store, k) - - # headers = {} - # headers["Content-Type"] = "application/json" - # return make_response(result, - # 200, - # headers) - # pd.DataFrame(json.loads(input_data["metadata"])).to_html(header=True) return render_template("tables.html", df_clean=df_clean.to_html(header=True), errors="
".join([str(e) - for e in errors]), + for e in errors]), df_errors=df_errors.to_html(header=True) ) +@gsheets_ns.route('/search') +class Search(Resource): + @gsheets_ns.doc(responses={200: 'OK', + 400: 'Invalid search parameters'}) + @gsheets_ns.expect(search_query) + @gsheets_ns.produces(["application/json"]) + def search(): + """ + """ + params = request.get_json() + # fill default params + params["q"] = params["vis_id"] + params["vis_type"] = "overview" + sheet_id = get_sheet_id(params.get('vis_id')) + covid19_range = "Resources!A1:N200" + sheet_content = get_sheet_content(sheet_id, covid19_range) + raw = pd.DataFrame(sheet_content.get('values')) + df_clean, errors, df_errors = validate_data(raw) + input_data = preprocess_data(df_clean) + + k = str(uuid.uuid4()) + res = {} + res["id"] = k + res["input_data"] = input_data + res["params"] = params + redis_store.rpush("input_data", json.dumps(res)) + result = get_key(redis_store, k) + + headers = {} + headers["Content-Type"] = "application/json" + return make_response(result, + 200, + headers) + + def writeRevision(vis_id, data, rev_id=None): vis = Visualizations.query.filter_by(vis_id=vis_id).first() diff --git a/server/workers/services/src/app.py b/server/workers/services/src/app.py index be3e23624..5226f4373 100644 --- a/server/workers/services/src/app.py +++ b/server/workers/services/src/app.py @@ -40,14 +40,14 @@ def create_app(): app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1, x_port=1, x_for=1, x_host=1, x_prefix=1) app.wsgi_app = ReverseProxied(app.wsgi_app) CORS(app, expose_headers=["Content-Disposition"]) + + api = api_patches(app, settings) + api.add_namespace(triple_ns, path='/triple') + api.add_namespace(gsheets_ns, path='/gsheets') return app if __name__ == '__main__': app = create_app() - api = api_patches(app, settings) - api.add_namespace(triple_ns, path='/triple') - api.add_namespace(gsheets_ns, path='/gsheets') - app.run(host="localhost", port=5001, debug=True) diff --git a/server/workers/services/src/database.py b/server/workers/services/src/database.py new file mode 100644 index 000000000..f606adc00 --- /dev/null +++ b/server/workers/services/src/database.py @@ -0,0 +1,4 @@ +from flask_sqlalchemy import SQLAlchemy + + +db = SQLAlchemy() diff --git a/server/workers/services/src/manage.py b/server/workers/services/src/manage.py new file mode 100644 index 000000000..2aea3c2c2 --- /dev/null +++ b/server/workers/services/src/manage.py @@ -0,0 +1,11 @@ +from flask_sqlalchemy import SQLAlchemy +from app import create_app +from models import Visualizations, Revisions + + +app = create_app() +db = SQLAlchemy(app) + + +if __name__ == '__main__': + db.create_all() diff --git a/server/workers/services/src/models.py b/server/workers/services/src/models.py new file mode 100644 index 000000000..d7f9e8dd0 --- /dev/null +++ b/server/workers/services/src/models.py @@ -0,0 +1,26 @@ +from database import db + + +class Revisions(db.Model): + rev_id = db.Column(db.Integer, + db.ForeignKey('visualizations.vis_latest'), + nullable=False, + primary_key=True) + rev_vis = db.Column(db.Text, nullable=False, + primary_key=True) + vis_query = db.Column(db.Text, + db.ForeignKey('visualizations.vis_clean_query')) + rev_user = db.Column(db.Text) + rev_timestamp = db.Column(db.DateTime) + rev_comment = db.Column(db.Text) + rev_data = db.Column(db.Text) + + +class Visualizations(db.Model): + vis_id = db.Column(db.Text, nullable=False, unique=True, + primary_key=True) + vis_query = db.Column(db.Text) + vis_clean_query = db.Column(db.Text) + vis_title = db.Column(db.Text) + vis_latest = db.Column(db.Integer) + vis_params = db.Column(db.Text) From c9abcc943b7907f08afbeecf6dfeedf690688707 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Sun, 29 Mar 2020 00:37:46 +0100 Subject: [PATCH 61/99] finished for first sprint --- docker-compose.yml | 7 +++++-- server/workers/services/requirements.txt | 1 - server/workers/services/src/apis/gsheets.py | 10 +++------- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 876c79f23..009192bb9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -49,11 +49,14 @@ services: target: /var/log/headstart hsdb: - image: postgres + image: postgres:11 + restart: always + environment: + POSTGRES_PASSWORD: "password" volumes: - db:/var/lib/postgresql/data ports: - - 5432:5432 + - 54321:5432 volumes: redis: diff --git a/server/workers/services/requirements.txt b/server/workers/services/requirements.txt index 15b42ab22..93b25db08 100644 --- a/server/workers/services/requirements.txt +++ b/server/workers/services/requirements.txt @@ -14,4 +14,3 @@ flasgger google-api-python-client google-auth-httplib2 google-auth-oauthlib -psycopg2 diff --git a/server/workers/services/src/apis/gsheets.py b/server/workers/services/src/apis/gsheets.py index e08fed2b7..5ec79e91a 100644 --- a/server/workers/services/src/apis/gsheets.py +++ b/server/workers/services/src/apis/gsheets.py @@ -16,14 +16,10 @@ from database import db import pandas as pd from pandas_schema import Column, Schema -from pandas_schema.validation import (LeadingWhitespaceValidation, - TrailingWhitespaceValidation, - CanConvertValidation, - MatchesPatternValidation, - InRangeValidation, +from pandas_schema.validation import (MatchesPatternValidation, InListValidation, DateFormatValidation, - CustomSeriesValidation) + CustomElementValidation) with open("redis_config.json") as infile: @@ -75,7 +71,7 @@ def validate_data(df): Column('ID', []), Column('Title', []), Column('Authors', []), - Column('Abstract', [CustomSeriesValidation(lambda s: ~s.str.len() < 10, 'Abstract not long enough')]), + Column('Abstract', [CustomElementValidation(lambda s: len(s) > 5, 'Abstract not long enough')]), Column('Publication Venue', []), Column('Publication Date', [DateFormatValidation("%Y-%m-%d")]), Column('Link to PDF', []), From b745e5760110f3a0dbd4492f80144c3fc37b6f11 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 30 Mar 2020 00:29:53 +0200 Subject: [PATCH 62/99] basic query --- server/workers/triple/src/search_triple.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py index af02dbfba..8694b2b77 100644 --- a/server/workers/triple/src/search_triple.py +++ b/server/workers/triple/src/search_triple.py @@ -80,7 +80,9 @@ def build_body(self, parameters): "must": [ {"multi_match": { "query": parameters.get('q'), - "fields": ["title", "abstract"] + "fields": ["title", "abstract"], + "type": "cross_fields", + "operator": "and", }}, {"range": { "date": self.build_date_field( @@ -103,16 +105,16 @@ def search(self, parameters): parameters.get('to'))) index = "isidore-documents-triple" sorting = self.build_sort_order(parameters) - # res = self.es.search( - # index=index, - # body=self.build_body(parameters), - # size=parameters.get('limit', 100), - # sort=self.build_sort_order(parameters)) res = self.es.search( index=index, - body=s.to_dict(), + body=self.build_body(parameters), size=parameters.get('limit', 100), sort=sorting) + # res = self.es.search( + # index=index, + # body=s.to_dict(), + # size=parameters.get('limit', 100), + # sort=sorting) if parameters.get('raw') is True: return res else: From e4261a64e7bb26de667d6f73da2d826f69c6c86d Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 30 Mar 2020 21:40:42 +0200 Subject: [PATCH 63/99] add logging and debug mode --- docker-compose.yml | 2 + server/search_triple.docker | 2 +- .../{backend.env => example_backend.env} | 1 + server/workers/backend/src/headstart.py | 8 ++++ ...{redis_example.conf => example_redis.conf} | 0 ...example.json => example_redis_config.json} | 0 server/workers/services/src/apis/triple.py | 3 ++ server/workers/services/src/app.py | 9 ++++- .../services/src/config/example_settings.py | 8 ++++ .../services/src/config/settings_example.py | 1 - ...ig_example.json => example_es_config.json} | 0 server/workers/triple/src/search_triple.py | 40 +++++++++++++------ 12 files changed, 58 insertions(+), 16 deletions(-) rename server/workers/backend/{backend.env => example_backend.env} (80%) rename server/workers/{redis_example.conf => example_redis.conf} (100%) rename server/workers/{redis_config_example.json => example_redis_config.json} (100%) create mode 100644 server/workers/services/src/config/example_settings.py delete mode 100644 server/workers/services/src/config/settings_example.py rename server/workers/triple/{config_example.json => example_es_config.json} (100%) diff --git a/docker-compose.yml b/docker-compose.yml index 9bc638622..a23ff1950 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -29,6 +29,8 @@ services: build: context: server dockerfile: search_triple.docker + env_file: + - server/workers/triple/triple.env restart: always network_mode: "host" diff --git a/server/search_triple.docker b/server/search_triple.docker index 07b9de5d4..89e3ff44c 100644 --- a/server/search_triple.docker +++ b/server/search_triple.docker @@ -9,7 +9,7 @@ WORKDIR /headstart COPY workers/triple/requirements.txt . RUN pip install --no-cache-dir -r requirements.txt COPY workers/triple/src/ ./ -COPY workers/triple/config.json . +COPY workers/triple/es_config.json . COPY workers/redis_config.json . ENTRYPOINT python search_triple.py diff --git a/server/workers/backend/backend.env b/server/workers/backend/example_backend.env similarity index 80% rename from server/workers/backend/backend.env rename to server/workers/backend/example_backend.env index 6ea3e0991..a6a69208e 100644 --- a/server/workers/backend/backend.env +++ b/server/workers/backend/example_backend.env @@ -1,3 +1,4 @@ HEADSTART_LOGFILE=/var/log/headstart/headstart.log RENV_VERSION=0.6.0-98 CRAN_REPOS=https://cran.wu.ac.at +HEADSTART_LOGLEVEL=DEBUG diff --git a/server/workers/backend/src/headstart.py b/server/workers/backend/src/headstart.py index b42e91f6b..e12307324 100644 --- a/server/workers/backend/src/headstart.py +++ b/server/workers/backend/src/headstart.py @@ -1,4 +1,5 @@ import os +import sys import copy import json import subprocess @@ -6,6 +7,7 @@ from tempfile import NamedTemporaryFile import redis import pandas as pd +import logging class Backend(object): @@ -20,6 +22,11 @@ def __init__(self, wd="./"): self.default_params["language"] = "english" self.default_params["taxonomy_separator"] = ";" self.default_params["list_size"] = -1 + self.logger = logging.getLogger(__name__) + self.logger.setLevel(os.environ["HEADSTART_LOGLEVEL"]) + handler = logging.StreamHandler(sys.stdout) + handler.setLevel(os.environ["HEADSTART_LOGLEVEL"]) + self.logger.addHandler(handler) def add_default_params(self, params): default_params = copy.deepcopy(self.default_params) @@ -50,6 +57,7 @@ def create_map(self, params, input_data): def run(self): k, params, input_data = self.next_item() + self.logger.debug(params) result = self.create_map(params, input_data) redis_store.set(k+"_output", json.dumps(result)) diff --git a/server/workers/redis_example.conf b/server/workers/example_redis.conf similarity index 100% rename from server/workers/redis_example.conf rename to server/workers/example_redis.conf diff --git a/server/workers/redis_config_example.json b/server/workers/example_redis_config.json similarity index 100% rename from server/workers/redis_config_example.json rename to server/workers/example_redis_config.json diff --git a/server/workers/services/src/apis/triple.py b/server/workers/services/src/apis/triple.py index 12566fc12..a4b193c3d 100644 --- a/server/workers/services/src/apis/triple.py +++ b/server/workers/services/src/apis/triple.py @@ -68,12 +68,15 @@ def post(self): """ """ data = request.get_json() + triple_ns.logger.debug(data) errors = search_param_schema.validate(data, partial=True) + triple_ns.logger.debug(errors) if errors: abort(400, str(errors)) k = str(uuid.uuid4()) d = {"id": k, "params": data, "endpoint": "search"} + triple_ns.logger.debug(d) redis_store.rpush("triple", json.dumps(d)) result = get_key(redis_store, k) diff --git a/server/workers/services/src/app.py b/server/workers/services/src/app.py index 6c5a01f3c..2d71cb5cb 100644 --- a/server/workers/services/src/app.py +++ b/server/workers/services/src/app.py @@ -1,3 +1,5 @@ +import os +import sys from flask import Flask from flask_restx import Api from flask_cors import CORS @@ -5,6 +7,7 @@ from werkzeug.middleware.proxy_fix import ProxyFix from config import settings from utils.monkeypatches import ReverseProxied, __schema__, specs_url, _register_apidoc, inject_flasgger +import logging def api_patches(app, settings): @@ -25,11 +28,13 @@ def api_patches(app, settings): app = Flask('v1', instance_relative_config=True) +app.config.from_object('config.settings') +handler = logging.StreamHandler(sys.stdout) +handler.setLevel(app.logger.level) +app.logger.addHandler(handler) app = inject_flasgger(app) app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1, x_port=1, x_for=1, x_host=1, x_prefix=1) app.wsgi_app = ReverseProxied(app.wsgi_app) -app.config.from_object('config.settings') -app.config.from_pyfile('settings.py', silent=True) CORS(app, expose_headers=["Content-Disposition"]) api = api_patches(app, settings) diff --git a/server/workers/services/src/config/example_settings.py b/server/workers/services/src/config/example_settings.py new file mode 100644 index 000000000..acaa449c9 --- /dev/null +++ b/server/workers/services/src/config/example_settings.py @@ -0,0 +1,8 @@ +BEHIND_PROXY = True +SWAGGER_BASEPATH = "" +# change to appropriate domain +SERVER_NAME = "localhost" +# change to "production" +ENV = "development" +# change to False +DEBUG = True diff --git a/server/workers/services/src/config/settings_example.py b/server/workers/services/src/config/settings_example.py deleted file mode 100644 index d4efebf3c..000000000 --- a/server/workers/services/src/config/settings_example.py +++ /dev/null @@ -1 +0,0 @@ -REDIS_URL = "redis://:password@localhost:6379/0" diff --git a/server/workers/triple/config_example.json b/server/workers/triple/example_es_config.json similarity index 100% rename from server/workers/triple/config_example.json rename to server/workers/triple/example_es_config.json diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py index 8694b2b77..0c26ec923 100644 --- a/server/workers/triple/src/search_triple.py +++ b/server/workers/triple/src/search_triple.py @@ -1,10 +1,12 @@ import os +import sys import re import json import redis from elasticsearch import Elasticsearch from elasticsearch_dsl import Search, Q import pandas as pd +import logging class TripleClient(object): @@ -18,6 +20,11 @@ def __init__(self, config): send_get_body_as='POST', http_compress=True ) + self.logger = logging.getLogger(__name__) + self.logger.setLevel(os.environ["TRIPLE_LOGLEVEL"]) + handler = logging.StreamHandler(sys.stdout) + handler.setLevel(os.environ["TRIPLE_LOGLEVEL"]) + self.logger.addHandler(handler) def next_item(self): queue, msg = redis_store.blpop("triple") @@ -105,9 +112,13 @@ def search(self, parameters): parameters.get('to'))) index = "isidore-documents-triple" sorting = self.build_sort_order(parameters) + body = self.build_body(parameters) + self.logger.debug(index) + self.logger.debug(sorting) + self.logger.debug(body) res = self.es.search( index=index, - body=self.build_body(parameters), + body=body, size=parameters.get('limit', 100), sort=sorting) # res = self.es.search( @@ -197,26 +208,31 @@ def get_authors(authorlist): def run(self): while True: k, params, endpoint = self.next_item() + self.logger.debug(params) if endpoint == "mappings": res = self.get_mappings(params.get('index')) redis_store.set(k+"_output", json.dumps(res)) if endpoint == "search": - res = {} - res["id"] = k - res["input_data"] = self.search(params) - res["params"] = params - if params.get('raw') is True: - redis_store.set(k+"_output", json.dumps(res)) - else: - redis_store.rpush("input_data", json.dumps(res)) + try: + res = {} + res["id"] = k + res["input_data"] = self.search(params) + res["params"] = params + if params.get('raw') is True: + redis_store.set(k+"_output", json.dumps(res)) + else: + redis_store.rpush("input_data", json.dumps(res)) + except Exception as e: + self.logger.error(e) + self.logger.error(params) if __name__ == '__main__': - with open("config.json") as infile: - config = json.load(infile) + with open("es_config.json") as infile: + es_config = json.load(infile) with open("redis_config.json") as infile: redis_config = json.load(infile) redis_store = redis.StrictRedis(**redis_config) - tc = TripleClient(config) + tc = TripleClient(es_config) tc.run() From 668d6d6c674002b6ef517244d124eb47ef462154 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 30 Mar 2020 21:59:46 +0200 Subject: [PATCH 64/99] more logging --- server/workers/triple/example_triple.env | 1 + 1 file changed, 1 insertion(+) create mode 100644 server/workers/triple/example_triple.env diff --git a/server/workers/triple/example_triple.env b/server/workers/triple/example_triple.env new file mode 100644 index 000000000..8462b8340 --- /dev/null +++ b/server/workers/triple/example_triple.env @@ -0,0 +1 @@ +TRIPLE_LOGLEVEL=DEBUG From 1eefd6312a40e4853c9d884b76d11a2bb250d6e8 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Tue, 31 Mar 2020 18:48:47 +0200 Subject: [PATCH 65/99] deployment polishing --- examples/triple/README.md | 12 +++---- server/services.docker | 2 +- server/services/search.php | 2 +- server/workers/services/src/apis/triple.py | 41 ++++++++++++---------- server/workers/services/src/app.py | 2 +- 5 files changed, 31 insertions(+), 28 deletions(-) diff --git a/examples/triple/README.md b/examples/triple/README.md index 89458608f..25f3a9a7b 100644 --- a/examples/triple/README.md +++ b/examples/triple/README.md @@ -28,12 +28,12 @@ The following lines have to be added to the appropriate sites-available config o # # other config - - ProxyPass http://127.0.0.1:5001/api - - - ProxyPass http://127.0.0.1:5001/swaggerui/ - + # Proxy server settings for Head Start API + ProxyPass /api http://localhost:5001/api connectiontimeout=120 timeout=120 + ProxyPassReverse /api http://localhost:5001/api + ProxyPass /swaggerui http://localhost:5001/swaggerui + ProxyPassReverse /swaggerui http://localhost:5001/swaggerui + ``` diff --git a/server/services.docker b/server/services.docker index 29ed24a13..3a27f6922 100644 --- a/server/services.docker +++ b/server/services.docker @@ -13,4 +13,4 @@ RUN pip install git+https://github.com/python-restx/flask-restx COPY workers/services/src/ ./ COPY workers/redis_config.json . -CMD gunicorn -b 127.0.0.1:5001 'app:app' +CMD gunicorn -b 127.0.0.1:5001 'app:app' --timeout 120 diff --git a/server/services/search.php b/server/services/search.php index d72d4f3bf..d9fea5370 100644 --- a/server/services/search.php +++ b/server/services/search.php @@ -78,7 +78,7 @@ function search($repository, $dirty_query, $post_params, $param_types, $keyword_ $WORKING_DIR = $ini_array["general"]["preprocessing_dir"] . $ini_array["output"]["output_dir"]; if ($repository == "triple") { - $url = "http://localhost/api/" . $repository . "/search"; + $url = "http://127.0.0.1/api/" . $repository . "/search"; $payload = json_encode($post_params); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); diff --git a/server/workers/services/src/apis/triple.py b/server/workers/services/src/apis/triple.py index a4b193c3d..aaa378431 100644 --- a/server/workers/services/src/apis/triple.py +++ b/server/workers/services/src/apis/triple.py @@ -79,26 +79,29 @@ def post(self): triple_ns.logger.debug(d) redis_store.rpush("triple", json.dumps(d)) result = get_key(redis_store, k) - - headers = {} - if request.headers["Accept"] == "application/json": - headers["Content-Type"] = "application/json" - if request.headers["Accept"] == "text/csv": + try: + headers = {} + if request.headers["Accept"] == "application/json": + headers["Content-Type"] = "application/json" + if request.headers["Accept"] == "text/csv": + if data.get("raw") is True: + df = pd.DataFrame(result.get('input_data').get('hits').get('hits')) + df = pd.concat([df.drop(["_source"], axis=1), + df["_source"].apply(pd.Series)], + axis=1) + result = df.to_csv() + else: + result = pd.read_json(result).to_csv() + headers["Content-Type"] = "text/csv" + headers["Content-Disposition"] = "attachment; filename={0}.csv".format(k) if data.get("raw") is True: - df = pd.DataFrame(result.get('input_data').get('hits').get('hits')) - df = pd.concat([df.drop(["_source"], axis=1), - df["_source"].apply(pd.Series)], - axis=1) - result = df.to_csv() - else: - result = pd.read_json(result).to_csv() - headers["Content-Type"] = "text/csv" - headers["Content-Disposition"] = "attachment; filename={0}.csv".format(k) - if data.get("raw") is True: - headers["Content-Type"] = "application/json" - return make_response(result, - 200, - headers) + headers["Content-Type"] = "application/json" + return make_response(result, + 200, + headers) + except Exception as e: + triple_ns.logger.error(e) + abort(500, "Problem encountered, check logs.") @triple_ns.route('/mappings') diff --git a/server/workers/services/src/app.py b/server/workers/services/src/app.py index 2d71cb5cb..95f9a7f3a 100644 --- a/server/workers/services/src/app.py +++ b/server/workers/services/src/app.py @@ -31,7 +31,6 @@ def api_patches(app, settings): app.config.from_object('config.settings') handler = logging.StreamHandler(sys.stdout) handler.setLevel(app.logger.level) -app.logger.addHandler(handler) app = inject_flasgger(app) app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1, x_port=1, x_for=1, x_host=1, x_prefix=1) app.wsgi_app = ReverseProxied(app.wsgi_app) @@ -39,6 +38,7 @@ def api_patches(app, settings): api = api_patches(app, settings) api.add_namespace(triple_ns, path='/triple') +app.logger.debug(app.config) if __name__ == '__main__': From 9fbfac8a307fe03708886c5d3ae04948d8726657 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Wed, 1 Apr 2020 16:49:38 +0200 Subject: [PATCH 66/99] update windows docs --- examples/triple/README.md | 18 ++++++++++++++++-- .../workers/services/src/config/swagger.json | 1 + 2 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 server/workers/services/src/config/swagger.json diff --git a/examples/triple/README.md b/examples/triple/README.md index 25f3a9a7b..5e473c772 100644 --- a/examples/triple/README.md +++ b/examples/triple/README.md @@ -10,6 +10,15 @@ Please follow the install instructions for your OS: Please follow the install instructions for docker-compose for your OS: https://docs.docker.com/compose/install/ +### Windows + +It is recommended to install the latest version of [Docker for Windows](https://hub.docker.com/editions/community/docker-ce-desktop-windows) +Additionally, following settings may need to be activated: + +* [Volume Sharing](https://docs.microsoft.com/en-us/visualstudio/containers/troubleshooting-docker-errors?view=vs-2019) + +(In case Docker for Windows does not seem to start, it may be already running in the background and hiding in the task bar menu in the lower right corner.) + ## Usage ### Setting up the Apache2 reverse proxy @@ -44,8 +53,8 @@ After that, restart the Apache2 service. Setting up configs: -* ElasticSearch core service: In `server/workers/services/triple/` copy `config_example.json` to `config.json` and fill in the fields. -* Secure Redis: In `server/workers` copy `redis_config_example.json` to `redis_config.json` and `redis_example.conf` to `redis_example.conf` and in both files replace "long_secure_password" with a long, secure password (Line 507 in redis.conf, parameter `requirepass`). +* ElasticSearch core service: In `server/workers/services/triple/` copy `example_config.json` to `config.json` and fill in the fields. +* Secure Redis: In `server/workers` copy `example_redis_config.json` to `redis_config.json` and `example_redis.conf` to `redis.conf` and in both files replace "long_secure_password" with a long, secure password (Line 507 in redis.conf, parameter `requirepass`). Following commands have to be executed from the root folder of the repository, where `docker-compose.yml` is located. @@ -54,6 +63,11 @@ Following commands have to be executed from the root folder of the repository, w docker-compose build ``` +* on Windows: +``` +docker-compose -f docker-compose-win.yml build +``` + * start services and send them to the docker daemon ``` docker-compose up -d diff --git a/server/workers/services/src/config/swagger.json b/server/workers/services/src/config/swagger.json new file mode 100644 index 000000000..034adf72a --- /dev/null +++ b/server/workers/services/src/config/swagger.json @@ -0,0 +1 @@ +{"swagger": "2.0", "basePath": "/api", "paths": {"/triple/mappings": {"get": {"responses": {"200": {"description": "OK"}, "400": {"description": "Invalid search parameters"}}, "operationId": "get_mappings", "parameters": [{"description": "Specify the ElasticSearch index to get the mapping of, currently either 'isidore-sources-triple' or 'isidore-documents-triple'", "name": "index", "type": "string", "in": "query"}], "tags": ["triple"]}}, "/triple/search": {"post": {"responses": {"200": {"description": "OK"}, "400": {"description": "Invalid search parameters"}}, "operationId": "post_search", "parameters": [{"name": "payload", "required": true, "in": "body", "schema": {"$ref": "#/definitions/SearchQuery"}}], "produces": ["application/json", "text/csv"], "tags": ["triple"]}}}, "info": {"title": "Head Start API", "version": "0.1", "description": "Head Start API demo"}, "produces": ["application/json"], "consumes": ["application/json"], "tags": [{"name": "triple", "description": "TRIPLE API operations"}], "definitions": {"SearchQuery": {"required": ["from", "q", "sorting", "to", "vis_type"], "properties": {"q": {"type": "string", "description": "query string", "example": "feminicide"}, "sorting": {"type": "string", "description": "most-relevant or most-recent", "example": "most-recent"}, "from": {"type": "string", "description": "yyyy-MM-dd", "example": "2019-01-01"}, "to": {"type": "string", "description": "yyyy-MM-dd", "example": "2019-12-31"}, "vis_type": {"type": "string", "description": "overview or timeline", "example": "overview"}, "raw": {"type": "boolean", "description": "raw results from ElasticSearch", "example": "false"}}, "type": "object"}}, "responses": {"ParseError": {"description": "When a mask can't be parsed"}, "MaskError": {"description": "When any error occurs on mask"}}} From c9f0b424ec17235050b44bb274a343e723270013 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Thu, 2 Apr 2020 11:08:39 +0200 Subject: [PATCH 67/99] README and naming convention --- docker-compose.yml | 6 +- docker-compose_win.yml | 45 +++++++ example_config.ini | 74 ----------- example_config.js | 4 - examples/triple/README.md | 88 ------------- ...t_backend.docker => dataprocessing.docker} | 6 +- server/workers/README.md | 120 ++++++++++++++++++ .../example_dataprocessing.env} | 0 .../{backend => dataprocessing}/renv.lock | 0 .../requirements.txt | 0 .../src/__init__.py | 0 .../src/headstart.py | 4 +- .../src/tests/__init__.py | 0 .../src/tests/test_clustering.py | 0 .../src/tests/test_data.py | 12 +- .../src/tests/test_summarization.py | 2 +- .../tests/testdata/expected_output_data.json | 0 17 files changed, 180 insertions(+), 181 deletions(-) create mode 100644 docker-compose_win.yml delete mode 100644 example_config.ini delete mode 100644 example_config.js rename server/{headstart_backend.docker => dataprocessing.docker} (97%) create mode 100644 server/workers/README.md rename server/workers/{backend/example_backend.env => dataprocessing/example_dataprocessing.env} (100%) rename server/workers/{backend => dataprocessing}/renv.lock (100%) rename server/workers/{backend => dataprocessing}/requirements.txt (100%) rename server/workers/{backend => dataprocessing}/src/__init__.py (100%) rename server/workers/{backend => dataprocessing}/src/headstart.py (97%) rename server/workers/{backend => dataprocessing}/src/tests/__init__.py (100%) rename server/workers/{backend => dataprocessing}/src/tests/test_clustering.py (100%) rename server/workers/{backend => dataprocessing}/src/tests/test_data.py (66%) rename server/workers/{backend => dataprocessing}/src/tests/test_summarization.py (76%) rename server/workers/{backend => dataprocessing}/src/tests/testdata/expected_output_data.json (100%) diff --git a/docker-compose.yml b/docker-compose.yml index a23ff1950..261a4b276 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -34,12 +34,12 @@ services: restart: always network_mode: "host" - backend: + dataprocessing: build: context: server - dockerfile: headstart_backend.docker + dockerfile: dataprocessing.docker env_file: - - server/workers/backend/backend.env + - server/workers/dataprocessing/dataprocessing.env restart: always network_mode: "host" volumes: diff --git a/docker-compose_win.yml b/docker-compose_win.yml new file mode 100644 index 000000000..d853c304a --- /dev/null +++ b/docker-compose_win.yml @@ -0,0 +1,45 @@ +version: '3.7' + +services: + + api: + build: + context: server + dockerfile: services.docker + restart: always + ports: + - '5001:5001' + depends_on: + - redis + network_mode: "host" + + redis: + image: 'redis:4.0-alpine' + restart: always + command: ["redis-server", "/etc/redis/redis.conf", "--appendonly", "yes"] + - ./server/workers/redis.conf:/etc/redis/redis.conf + restart: always + ports: + - '6379:6379' + network_mode: "host" + + search_triple: + build: + context: server + dockerfile: search_triple.docker + env_file: + - server/workers/triple/triple.env + restart: always + network_mode: "host" + + dataprocessing: + build: + context: server + dockerfile: dataprocessing.docker + env_file: + - server/workers/dataprocessing/dataprocessing.env + restart: always + network_mode: "host" + +volumes: + redis: diff --git a/example_config.ini b/example_config.ini deleted file mode 100644 index 27eab5304..000000000 --- a/example_config.ini +++ /dev/null @@ -1,74 +0,0 @@ -; This is a sample configuration file for Headstart Preprocessing Scripts. -; Copy this file to config_local.php and enter your API keys here. - -[general] -# Full path to the preprocessing directory -preprocessing_dir = "/var/www/html/servicename/headstart/server/preprocessing/" -# Full path to the images directory for the client. Needs to be in the public_html/www directory. Make sure that your webserver has write access to this directory. -images_path = "/var/www/html/servicename/headstart/server/paper_preview/" -# Host of the client visualization -host = "http://localhost/" -# Relative path to the client visualization. Needs to be in the public_html/www directory. -vis_path = "headstart" -# Relative path to the client REST services. Needs to be in the public_html/www directory. -services_path = "headstart/server/services/" - -[snapshot] -# Set to 1 to enable snapshot feature, 0 to disable -snapshot_enabled = 0 -# Absolute path to node binary -node_path = "/home/shared-executables/v10.17.0www-data/bin/node" -nodemodules_path = "/home/shared-executables/v10.17.0www-data/lib/node_modules" -# Absolute path to getChartSVG.js -getsvg_path = "/var/www/html/servicename/headstart/server/services/getChartSVG.js" -# Absolute path to the directory, where the snapshots are stored. Webserver must have write access to this directory -storage_path = "/var/www/html/servicename/headstart/server/storage/" -# PHP File responsible for rendering the bubble in a way to be snapshotted. Relative path to general host -snapshot_php = "servicename/headstart/server/services/snapshot/headstart_snapshot.php" -# Thumbnail width -snapshot_width = "1200px" -# snapshot_local_protocol fallback for non-server environments -snapshot_local_protocol = "http://" - -[output] -# Relative paths for offline calculation -output_dir = "other-scripts/" -cooc = "cooc.csv"; -metadata = "metadata.csv" -output_scaling_clustering = "output_scaling_clustering.csv" -output_naming = "output_naming.csv" -unique_id = "vis_id2" -title = "Visualization" - -[connection] -# Full path to the sqlite datatabase file. Make sure that your webserver has write access to this file. For development purposes, duplicate headstart.sqlite in server/storage/ and rename it to a filename of your choice. Enter the path to this file here. -sqlite_db = "/var/www/html/servicename/headstart/server/storage/servicename.sqlite" - -[calculation] -# Path to the RScript binary -binary = "/usr/bin/Rscript" -# Relative path from preprocessing_dir to the R script -script = "other-scripts/text_similarity.R" -mode = "bookmarks" - -[naming] -api_key_zemanta = "" -api_key_calais = "" - -# Constants for column numbers in the scaling and clustering output -line_cluster_id = 10 -line_title = 1 -line_abstract = 2 - -# English stop word file -stop_words = "resources/english.stop"; - -# Thresholds for n-grams -threshold_title_ngrams = 2; -threshold_title_abstract_ngrams = 3; -threshold_single_words = 4; - -forbidden_names[] = "research" -forbidden_names[] = "science" -forbidden_names[] = "inquiry" -forbidden_names[] = "learning" diff --git a/example_config.js b/example_config.js deleted file mode 100644 index 88685d043..000000000 --- a/example_config.js +++ /dev/null @@ -1,4 +0,0 @@ -module.exports = { - publicPath : "http://localhost/example/dist/", - skin : "" -}; diff --git a/examples/triple/README.md b/examples/triple/README.md index 5e473c772..e69de29bb 100644 --- a/examples/triple/README.md +++ b/examples/triple/README.md @@ -1,88 +0,0 @@ -## Setup - -### Install docker and docker-compose - -Please follow the install instructions for your OS: - -* Windows: https://docs.docker.com/docker-for-windows/install/ -* Mac: https://docs.docker.com/docker-for-mac/install/ -* Ubuntu: https://docs.docker.com/docker-for-mac/install/ (also available for other Linux) - -Please follow the install instructions for docker-compose for your OS: https://docs.docker.com/compose/install/ - -### Windows - -It is recommended to install the latest version of [Docker for Windows](https://hub.docker.com/editions/community/docker-ce-desktop-windows) -Additionally, following settings may need to be activated: - -* [Volume Sharing](https://docs.microsoft.com/en-us/visualstudio/containers/troubleshooting-docker-errors?view=vs-2019) - -(In case Docker for Windows does not seem to start, it may be already running in the background and hiding in the task bar menu in the lower right corner.) - -## Usage - -### Setting up the Apache2 reverse proxy - -Following Apache2 mods have to be installed and enabled: - -* ssl -* proxy -* proxy_balancer -* proxy_http - -The following lines have to be added to the appropriate sites-available config of Apache2 webserver: - -``` - - # - # other config - - # Proxy server settings for Head Start API - ProxyPass /api http://localhost:5001/api connectiontimeout=120 timeout=120 - ProxyPassReverse /api http://localhost:5001/api - ProxyPass /swaggerui http://localhost:5001/swaggerui - ProxyPassReverse /swaggerui http://localhost:5001/swaggerui - - - -``` - -After that, restart the Apache2 service. - -### Starting the backend service with docker-compose - -Setting up configs: - -* ElasticSearch core service: In `server/workers/services/triple/` copy `example_config.json` to `config.json` and fill in the fields. -* Secure Redis: In `server/workers` copy `example_redis_config.json` to `redis_config.json` and `example_redis.conf` to `redis.conf` and in both files replace "long_secure_password" with a long, secure password (Line 507 in redis.conf, parameter `requirepass`). - -Following commands have to be executed from the root folder of the repository, where `docker-compose.yml` is located. - -* build images -``` -docker-compose build -``` - -* on Windows: -``` -docker-compose -f docker-compose-win.yml build -``` - -* start services and send them to the docker daemon -``` -docker-compose up -d -``` - -* all in one: -``` -docker-compose up -d --build -``` - -* shut service down -``` -docker-compose down -``` - -### Deploying the example: - -Use a deployment script, or manually deploy an example as described in [HOWTO: search repos](../../doc/howto_search_repos.md): diff --git a/server/headstart_backend.docker b/server/dataprocessing.docker similarity index 97% rename from server/headstart_backend.docker rename to server/dataprocessing.docker index 58a689107..18c91c801 100644 --- a/server/headstart_backend.docker +++ b/server/dataprocessing.docker @@ -145,14 +145,14 @@ RUN R -e 'options(repos="https://cran.wu.ac.at")' && \ R -e 'install.packages("renv", repos = c(CRAN = "https://cran.rstudio.com"))' WORKDIR /headstart -COPY workers/backend/requirements.txt . +COPY workers/dataprocessing/requirements.txt . RUN pip3 install --no-cache-dir -r requirements.txt -COPY workers/backend/renv.lock . +COPY workers/dataprocessing/renv.lock . RUN R -e 'renv::consent(provided = TRUE)' && \ R -e 'renv::restore()' -COPY workers/backend/src/ ./ +COPY workers/dataprocessing/src/ ./ COPY preprocessing/resources ./resources COPY preprocessing/other-scripts . RUN mkdir -p /var/log/headstart && touch /var/log/headstart/headstart.log diff --git a/server/workers/README.md b/server/workers/README.md new file mode 100644 index 000000000..241fe2489 --- /dev/null +++ b/server/workers/README.md @@ -0,0 +1,120 @@ +## Folder structure + +Following backend component containers are currently in `workers`: + +* dataprocessing: Executing the machine learning and natural language processing +* services: a Flask-based API, providing endpoints for each integrated data source (e.g. TRIPLE) +* triple: the elasticsearch-connector to TRIPLE + +Each comes with a docker file (ending on `.docker`), which is used for creating a container, and a source code folder. + +## Setup + +### Install docker and docker-compose + +Please follow the install instructions for your OS: + +* Windows: https://docs.docker.com/docker-for-windows/install/ +* Mac: https://docs.docker.com/docker-for-mac/install/ +* Ubuntu: https://docs.docker.com/docker-for-mac/install/ (also available for other Linux) + +Please follow the install instructions for docker-compose for your OS: https://docs.docker.com/compose/install/ + +### Windows + +It is recommended to install the latest version of [Docker for Windows](https://hub.docker.com/editions/community/docker-ce-desktop-windows). +Additionally, following settings may need to be activated: + +* [Volume Sharing](https://docs.microsoft.com/en-us/visualstudio/containers/troubleshooting-docker-errors?view=vs-2019) + +(In case Docker for Windows does not seem to start, it may be already running in the background and hiding in the task bar menu in the lower right corner.) + +### Setting up the Apache2 reverse proxy + +Following Apache2 mods have to be installed and enabled: + +* ssl +* proxy +* proxy_balancer +* proxy_http + +The following lines have to be added to the appropriate sites-available config of Apache2 webserver: + +``` + + # + # other config + + # Proxy server settings for Head Start API + ProxyPass /api http://localhost:5001/api connectiontimeout=120 timeout=120 + ProxyPassReverse /api http://localhost:5001/api + ProxyPass /swaggerui http://localhost:5001/swaggerui + ProxyPassReverse /swaggerui http://localhost:5001/swaggerui + + + +``` + +After that, restart the Apache2 service. + +## Usage + +Setting up configurations for each backend service: + +Dataprocessing: +* In `server/workers/dataprocessing` copy `example_dataprocessing.env` to `dataprocessing.env` and set the desired loglevel. + +Services: +* In `server/workers/services/config` copy `example_settings.py` to `settings.py` and change the values for `ENV` (`development` or `production`) and `DEBUG` (`TRUE` or `FALSE`). + + +TRIPLE ElasticSearch core service: +* In `server/workers/services/triple/` copy `example_config.json` to `config.json` and fill in the fields. + + +Secure Redis: +* In `server/workers` copy `example_redis_config.json` to `redis_config.json` and `example_redis.conf` to `redis.conf` and in both files replace "long_secure_password" with a long, secure password (Line 507 in redis.conf, parameter `requirepass`). + + +### Starting the backend services with docker-compose + +Following commands have to be executed from the root folder of the repository, where `docker-compose.yml` is located. + +* build images +``` +docker-compose build +``` + +* on Windows: +``` +docker-compose -f docker-compose-win.yml build +``` + +* start services and send them to the docker daemon +``` +docker-compose up -d +``` + +* on Windows: +``` +docker-compose -f docker-compose-win.yml up -d +``` + +* all in one: +``` +docker-compose up -d --build +``` + +* shut service down +``` +docker-compose down +``` + +* on Windows: +``` +docker-compose -f docker-compose-win.yml down +``` + +### Deploying the example: + +Use a deployment script, or manually deploy an example (currently only TRIPLE is integrated in this way) as described in [HOWTO: search repos](../../doc/howto_search_repos.md): diff --git a/server/workers/backend/example_backend.env b/server/workers/dataprocessing/example_dataprocessing.env similarity index 100% rename from server/workers/backend/example_backend.env rename to server/workers/dataprocessing/example_dataprocessing.env diff --git a/server/workers/backend/renv.lock b/server/workers/dataprocessing/renv.lock similarity index 100% rename from server/workers/backend/renv.lock rename to server/workers/dataprocessing/renv.lock diff --git a/server/workers/backend/requirements.txt b/server/workers/dataprocessing/requirements.txt similarity index 100% rename from server/workers/backend/requirements.txt rename to server/workers/dataprocessing/requirements.txt diff --git a/server/workers/backend/src/__init__.py b/server/workers/dataprocessing/src/__init__.py similarity index 100% rename from server/workers/backend/src/__init__.py rename to server/workers/dataprocessing/src/__init__.py diff --git a/server/workers/backend/src/headstart.py b/server/workers/dataprocessing/src/headstart.py similarity index 97% rename from server/workers/backend/src/headstart.py rename to server/workers/dataprocessing/src/headstart.py index e12307324..af55b6a32 100644 --- a/server/workers/backend/src/headstart.py +++ b/server/workers/dataprocessing/src/headstart.py @@ -10,7 +10,7 @@ import logging -class Backend(object): +class Dataprocessing(object): def __init__(self, wd="./"): # path should be to where in the docker container the Rscript are @@ -67,5 +67,5 @@ def run(self): redis_config = json.load(infile) redis_store = redis.StrictRedis(**redis_config) - hsb = Backend() + hsb = Dataprocessing() hsb.run() diff --git a/server/workers/backend/src/tests/__init__.py b/server/workers/dataprocessing/src/tests/__init__.py similarity index 100% rename from server/workers/backend/src/tests/__init__.py rename to server/workers/dataprocessing/src/tests/__init__.py diff --git a/server/workers/backend/src/tests/test_clustering.py b/server/workers/dataprocessing/src/tests/test_clustering.py similarity index 100% rename from server/workers/backend/src/tests/test_clustering.py rename to server/workers/dataprocessing/src/tests/test_clustering.py diff --git a/server/workers/backend/src/tests/test_data.py b/server/workers/dataprocessing/src/tests/test_data.py similarity index 66% rename from server/workers/backend/src/tests/test_data.py rename to server/workers/dataprocessing/src/tests/test_data.py index ae474d563..a0d68f55a 100644 --- a/server/workers/backend/src/tests/test_data.py +++ b/server/workers/dataprocessing/src/tests/test_data.py @@ -1,6 +1,6 @@ import json import pytest -from src.headstart import Backend +from src.headstart import Dataprocessing import pandas as pd import pandas.api.types as ptypes @@ -18,8 +18,8 @@ def input_data(): @pytest.fixture -def backend(): - return Backend("../../../preprocessing/other-scripts") +def dataprocessing(): + return Dataprocessing("../../../preprocessing/other-scripts") @pytest.fixture @@ -27,9 +27,9 @@ def params(): return {"service": "triple", "q": "femicide"} -def test_map_data_dtypes(backend, input_data, params): - params = backend.add_default_params(params) - map_data = json.loads(backend.create_map(params, input_data)) +def test_map_data_dtypes(dataprocessing, input_data, params): + params = dataprocessing.add_default_params(params) + map_data = json.loads(dataprocessing.create_map(params, input_data)) assert isinstance(map_data, list) df = pd.DataFrame.from_records(map_data) for col in df.columns: diff --git a/server/workers/backend/src/tests/test_summarization.py b/server/workers/dataprocessing/src/tests/test_summarization.py similarity index 76% rename from server/workers/backend/src/tests/test_summarization.py rename to server/workers/dataprocessing/src/tests/test_summarization.py index a970929f8..a78b0abf9 100644 --- a/server/workers/backend/src/tests/test_summarization.py +++ b/server/workers/dataprocessing/src/tests/test_summarization.py @@ -1,4 +1,4 @@ -from src.headstart import Backend +from src.headstart import Dataprocessing import pytest import pandas as pd diff --git a/server/workers/backend/src/tests/testdata/expected_output_data.json b/server/workers/dataprocessing/src/tests/testdata/expected_output_data.json similarity index 100% rename from server/workers/backend/src/tests/testdata/expected_output_data.json rename to server/workers/dataprocessing/src/tests/testdata/expected_output_data.json From c94e1e3f1977aba0584dcec49a5fd567b4a20b81 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Thu, 2 Apr 2020 13:46:40 +0200 Subject: [PATCH 68/99] clean up --- server/preprocessing/other-scripts/vis_layout.R | 4 ++-- server/workers/README.md | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/server/preprocessing/other-scripts/vis_layout.R b/server/preprocessing/other-scripts/vis_layout.R index f2ff7fd52..b992ec8a6 100644 --- a/server/preprocessing/other-scripts/vis_layout.R +++ b/server/preprocessing/other-scripts/vis_layout.R @@ -13,7 +13,7 @@ library(stringi) library(stringdist) library(plyr) library(onehot) -registerDoParallel(7) +registerDoParallel(3) vlog <- getLogger('vis') @@ -74,7 +74,7 @@ vis_layout <- function(text, metadata, service, features <- concatenate_features(distance_matrix) vlog$debug("get clusters") clusters <- create_clusters(as.dist(features), max_clusters=max_clusters) - layout <- get_ndms(as.dist(features), mindim=2, maxdim=2) + layout <- get_ndms(as.dist(features), maxit=500, mindim=2, maxdim=2) vlog$debug("get cluster summaries") metadata = replace_keywords_if_empty(metadata, stops, service) diff --git a/server/workers/README.md b/server/workers/README.md index 241fe2489..11f2624c5 100644 --- a/server/workers/README.md +++ b/server/workers/README.md @@ -57,7 +57,7 @@ The following lines have to be added to the appropriate sites-available config o After that, restart the Apache2 service. -## Usage +## Configuration Setting up configurations for each backend service: @@ -80,7 +80,7 @@ Secure Redis: Following commands have to be executed from the root folder of the repository, where `docker-compose.yml` is located. -* build images +**Build images** ``` docker-compose build ``` @@ -90,7 +90,7 @@ docker-compose build docker-compose -f docker-compose-win.yml build ``` -* start services and send them to the docker daemon +**Start services and send them to the docker daemon** ``` docker-compose up -d ``` @@ -100,7 +100,7 @@ docker-compose up -d docker-compose -f docker-compose-win.yml up -d ``` -* all in one: +**All in one:** ``` docker-compose up -d --build ``` From 7561268573b353cf46af83308eac9db1027e4be9 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Thu, 2 Apr 2020 14:06:25 +0200 Subject: [PATCH 69/99] formatting --- docker-compose_win.yml | 44 +++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/docker-compose_win.yml b/docker-compose_win.yml index d853c304a..aa9b06bfd 100644 --- a/docker-compose_win.yml +++ b/docker-compose_win.yml @@ -14,32 +14,32 @@ services: network_mode: "host" redis: - image: 'redis:4.0-alpine' - restart: always - command: ["redis-server", "/etc/redis/redis.conf", "--appendonly", "yes"] - - ./server/workers/redis.conf:/etc/redis/redis.conf - restart: always - ports: - - '6379:6379' - network_mode: "host" + image: 'redis:4.0-alpine' + restart: always + command: ["redis-server", "/etc/redis/redis.conf", "--appendonly", "yes"] + - ./server/workers/redis.conf:/etc/redis/redis.conf + restart: always + ports: + - '6379:6379' + network_mode: "host" search_triple: - build: - context: server - dockerfile: search_triple.docker - env_file: - - server/workers/triple/triple.env - restart: always - network_mode: "host" + build: + context: server + dockerfile: search_triple.docker + env_file: + - server/workers/triple/triple.env + restart: always + network_mode: "host" dataprocessing: - build: - context: server - dockerfile: dataprocessing.docker - env_file: - - server/workers/dataprocessing/dataprocessing.env - restart: always - network_mode: "host" + build: + context: server + dockerfile: dataprocessing.docker + env_file: + - server/workers/dataprocessing/dataprocessing.env + restart: always + network_mode: "host" volumes: redis: From 3f204b7be56fecff4b321546dd39312b0e8740db Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Thu, 2 Apr 2020 14:08:11 +0200 Subject: [PATCH 70/99] formatting --- server/workers/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/workers/README.md b/server/workers/README.md index 11f2624c5..5566e83f8 100644 --- a/server/workers/README.md +++ b/server/workers/README.md @@ -87,7 +87,7 @@ docker-compose build * on Windows: ``` -docker-compose -f docker-compose-win.yml build +docker-compose -f docker-compose_win.yml build ``` **Start services and send them to the docker daemon** @@ -97,7 +97,7 @@ docker-compose up -d * on Windows: ``` -docker-compose -f docker-compose-win.yml up -d +docker-compose -f docker-compose_win.yml up -d ``` **All in one:** @@ -112,7 +112,7 @@ docker-compose down * on Windows: ``` -docker-compose -f docker-compose-win.yml down +docker-compose -f docker-compose_win.yml down ``` ### Deploying the example: From 48156bea10c04298e0a1152b474240f6ff0dc992 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Thu, 2 Apr 2020 14:12:48 +0200 Subject: [PATCH 71/99] formatting --- docker-compose_win.yml | 63 +++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/docker-compose_win.yml b/docker-compose_win.yml index aa9b06bfd..cb63dc6be 100644 --- a/docker-compose_win.yml +++ b/docker-compose_win.yml @@ -3,43 +3,44 @@ version: '3.7' services: api: - build: - context: server - dockerfile: services.docker - restart: always - ports: - - '5001:5001' - depends_on: - - redis - network_mode: "host" + build: + context: server + dockerfile: services.docker + restart: always + ports: + - '5001:5001' + depends_on: + - redis + network_mode: "host" redis: - image: 'redis:4.0-alpine' - restart: always - command: ["redis-server", "/etc/redis/redis.conf", "--appendonly", "yes"] - - ./server/workers/redis.conf:/etc/redis/redis.conf - restart: always - ports: - - '6379:6379' - network_mode: "host" + image: 'redis:4.0-alpine' + restart: always + command: ["redis-server", "/etc/redis/redis.conf", "--appendonly", "yes"] + volumes: + - ./server/workers/redis.conf:/etc/redis/redis.conf + restart: always + ports: + - '6379:6379' + network_mode: "host" search_triple: - build: - context: server - dockerfile: search_triple.docker - env_file: - - server/workers/triple/triple.env - restart: always - network_mode: "host" + build: + context: server + dockerfile: search_triple.docker + env_file: + - server/workers/triple/triple.env + restart: always + network_mode: "host" dataprocessing: - build: - context: server - dockerfile: dataprocessing.docker - env_file: - - server/workers/dataprocessing/dataprocessing.env - restart: always - network_mode: "host" + build: + context: server + dockerfile: dataprocessing.docker + env_file: + - server/workers/dataprocessing/dataprocessing.env + restart: always + network_mode: "host" volumes: redis: From cf3991bd7cda2c69678e46f2602a38465b4debe7 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Thu, 2 Apr 2020 14:23:40 +0200 Subject: [PATCH 72/99] Update README.md --- server/workers/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/workers/README.md b/server/workers/README.md index 11f2624c5..d19e6309f 100644 --- a/server/workers/README.md +++ b/server/workers/README.md @@ -69,7 +69,8 @@ Services: TRIPLE ElasticSearch core service: -* In `server/workers/services/triple/` copy `example_config.json` to `config.json` and fill in the fields. +* In `server/workers/services/triple/` copy `example_es_config.json` to `es_config.json` and fill in the fields. +* In `server/workers/services/triple/` copy `example_triple.env` to `triple.env` and fill in the fields. Secure Redis: From 00ae54556c78ba523ccca6ebd55a3f2672586d74 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Thu, 2 Apr 2020 18:33:15 +0200 Subject: [PATCH 73/99] Update README.md --- server/workers/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/workers/README.md b/server/workers/README.md index d19e6309f..fe1e30f39 100644 --- a/server/workers/README.md +++ b/server/workers/README.md @@ -88,7 +88,7 @@ docker-compose build * on Windows: ``` -docker-compose -f docker-compose-win.yml build +docker-compose -f docker-compose_win.yml build ``` **Start services and send them to the docker daemon** @@ -98,7 +98,7 @@ docker-compose up -d * on Windows: ``` -docker-compose -f docker-compose-win.yml up -d +docker-compose -f docker-compose_win.yml up -d ``` **All in one:** @@ -113,7 +113,7 @@ docker-compose down * on Windows: ``` -docker-compose -f docker-compose-win.yml down +docker-compose -f docker-compose_win.yml down ``` ### Deploying the example: From d43866c2c08184a0353fd5a9ce72f99caef1602e Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Thu, 2 Apr 2020 19:20:29 +0200 Subject: [PATCH 74/99] windows networking --- docker-compose_win.yml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/docker-compose_win.yml b/docker-compose_win.yml index cb63dc6be..8cd7f6e45 100644 --- a/docker-compose_win.yml +++ b/docker-compose_win.yml @@ -8,10 +8,9 @@ services: dockerfile: services.docker restart: always ports: - - '5001:5001' + - '127.0.0.1:5001:5001' depends_on: - redis - network_mode: "host" redis: image: 'redis:4.0-alpine' @@ -21,8 +20,7 @@ services: - ./server/workers/redis.conf:/etc/redis/redis.conf restart: always ports: - - '6379:6379' - network_mode: "host" + - '127.0.0.1:6379:6379' search_triple: build: @@ -31,7 +29,6 @@ services: env_file: - server/workers/triple/triple.env restart: always - network_mode: "host" dataprocessing: build: @@ -40,7 +37,6 @@ services: env_file: - server/workers/dataprocessing/dataprocessing.env restart: always - network_mode: "host" volumes: redis: From 6375ca506a748c36dccf28373c058d83a9cd7a5c Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Thu, 2 Apr 2020 19:24:11 +0200 Subject: [PATCH 75/99] Update README.md --- server/workers/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/workers/README.md b/server/workers/README.md index fe1e30f39..d035a5202 100644 --- a/server/workers/README.md +++ b/server/workers/README.md @@ -65,7 +65,7 @@ Dataprocessing: * In `server/workers/dataprocessing` copy `example_dataprocessing.env` to `dataprocessing.env` and set the desired loglevel. Services: -* In `server/workers/services/config` copy `example_settings.py` to `settings.py` and change the values for `ENV` (`development` or `production`) and `DEBUG` (`TRUE` or `FALSE`). +* In `server/workers/services/src/config` copy `example_settings.py` to `settings.py` and change the values for `ENV` (`development` or `production`) and `DEBUG` (`TRUE` or `FALSE`). TRIPLE ElasticSearch core service: From cc6a0045d6bab253ba99fb2e8f1ecbee73ca0b3c Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Thu, 2 Apr 2020 19:27:28 +0200 Subject: [PATCH 76/99] Update README.md --- server/workers/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/workers/README.md b/server/workers/README.md index d035a5202..8e1e47415 100644 --- a/server/workers/README.md +++ b/server/workers/README.md @@ -70,7 +70,7 @@ Services: TRIPLE ElasticSearch core service: * In `server/workers/services/triple/` copy `example_es_config.json` to `es_config.json` and fill in the fields. -* In `server/workers/services/triple/` copy `example_triple.env` to `triple.env` and fill in the fields. +* In `server/workers/services/triple/` copy `example_triple.env` to `triple.env` and change the values if necessary. Secure Redis: From 3b1484c708b5f4caf12d37300235af72329f7390 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Thu, 2 Apr 2020 19:33:35 +0200 Subject: [PATCH 77/99] Update README.md --- server/workers/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/server/workers/README.md b/server/workers/README.md index 8e1e47415..848dc50ab 100644 --- a/server/workers/README.md +++ b/server/workers/README.md @@ -82,6 +82,8 @@ Secure Redis: Following commands have to be executed from the root folder of the repository, where `docker-compose.yml` is located. **Build images** + +* on Linux: ``` docker-compose build ``` @@ -92,6 +94,8 @@ docker-compose -f docker-compose_win.yml build ``` **Start services and send them to the docker daemon** + +* on Linux: ``` docker-compose up -d ``` @@ -102,11 +106,15 @@ docker-compose -f docker-compose_win.yml up -d ``` **All in one:** + +* on Linux: ``` docker-compose up -d --build ``` * shut service down + +* on Linux: ``` docker-compose down ``` From 0a0f858954180e6002b7180e42d549ed4b293264 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Thu, 2 Apr 2020 19:36:09 +0200 Subject: [PATCH 78/99] Update README.md --- server/workers/README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/server/workers/README.md b/server/workers/README.md index 848dc50ab..f1da8b475 100644 --- a/server/workers/README.md +++ b/server/workers/README.md @@ -38,6 +38,9 @@ Following Apache2 mods have to be installed and enabled: * proxy_balancer * proxy_http +Possibly also following modules need to be installed and enabled: +* mod_slotmem_shm + The following lines have to be added to the appropriate sites-available config of Apache2 webserver: ``` From 88c0153072562da9a671a9552c4d17c66946736f Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 6 Apr 2020 14:40:02 +0200 Subject: [PATCH 79/99] update searches --- server/services/search.php | 5 +++-- server/services/searchBASE.php | 3 ++- server/services/searchDOAJ.php | 3 ++- server/services/searchLinkedCat.php | 3 ++- server/services/searchLinkedCatAuthorview.php | 3 ++- server/services/searchLinkedCatBrowseview.php | 3 ++- server/services/searchOpenAire.php | 3 ++- server/services/searchPLOS.php | 3 ++- server/services/searchPubmed.php | 3 ++- server/services/searchTRIPLE.php | 6 +++++- 10 files changed, 24 insertions(+), 11 deletions(-) diff --git a/server/services/search.php b/server/services/search.php index 48a516c3e..a12ee529e 100644 --- a/server/services/search.php +++ b/server/services/search.php @@ -52,7 +52,8 @@ function cleanQuery($dirty_query, $transform_query_tolowercase) { } function search($repository, $dirty_query, $post_params, $param_types, $keyword_separator, $taxonomy_separator, $transform_query_tolowercase = true - , $retrieve_cached_map = true, $params_for_id = null, $num_labels = 3, $id = "area_uri", $subjects = "subject", $precomputed_id = null, $do_clean_query = true) { + , $retrieve_cached_map = true, $params_for_id = null, $num_labels = 3, $id = "area_uri", $subjects = "subject", $precomputed_id = null, $do_clean_query = true + , $backend = "legacy") { $INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/"; $ini_array = library\Toolkit::loadIni($INI_DIR); @@ -86,7 +87,7 @@ function search($repository, $dirty_query, $post_params, $param_types, $keyword_ $WORKING_DIR = $ini_array["general"]["preprocessing_dir"] . $ini_array["output"]["output_dir"]; - if ($repository == "triple") { + if ($backend == "api") { $url = "http://127.0.0.1/api/" . $repository . "/search"; $payload = json_encode($post_params); $ch = curl_init(); diff --git a/server/services/searchBASE.php b/server/services/searchBASE.php index d9c0f426c..3b0bb63cb 100644 --- a/server/services/searchBASE.php +++ b/server/services/searchBASE.php @@ -15,7 +15,8 @@ $result = search("base", $dirty_query, $post_params , array("from", "to", "document_types", "sorting") , ";", null, true, true, null, 3 - , "area_uri", "subject", $precomputed_id, false); + , "area_uri", "subject", $precomputed_id, false + , "legacy"); echo $result diff --git a/server/services/searchDOAJ.php b/server/services/searchDOAJ.php index be37b76f6..3867df3fc 100644 --- a/server/services/searchDOAJ.php +++ b/server/services/searchDOAJ.php @@ -15,7 +15,8 @@ $result = search("doaj", $dirty_query, $post_params , array("from", "to", "today", "sorting") , ";", null, true, true, null, 3 - , "area_uri", "subject", $precomputed_id, false); + , "area_uri", "subject", $precomputed_id, false + , "legacy"); echo $result diff --git a/server/services/searchLinkedCat.php b/server/services/searchLinkedCat.php index 69154f405..7595a92de 100644 --- a/server/services/searchLinkedCat.php +++ b/server/services/searchLinkedCat.php @@ -17,7 +17,8 @@ array("from", "to", "include_content_type", "today", "vis_type"), ";", null, $transform_query_tolowercase=false, true, null, 3, - "area_uri", "subject", $precomputed_id, true); + "area_uri", "subject", $precomputed_id, true, + "legacy"); echo $result diff --git a/server/services/searchLinkedCatAuthorview.php b/server/services/searchLinkedCatAuthorview.php index 0d1ccf526..a61e24518 100644 --- a/server/services/searchLinkedCatAuthorview.php +++ b/server/services/searchLinkedCatAuthorview.php @@ -19,7 +19,8 @@ ";", null, $transform_query_tolowercase = false, true, null, 3, - "area_uri", "subject", $precomputed_id, true + "area_uri", "subject", $precomputed_id, true, + "legacy" ); echo $result diff --git a/server/services/searchLinkedCatBrowseview.php b/server/services/searchLinkedCatBrowseview.php index b80cb5651..41c5e9221 100644 --- a/server/services/searchLinkedCatBrowseview.php +++ b/server/services/searchLinkedCatBrowseview.php @@ -19,7 +19,8 @@ ";", null, $transform_query_tolowercase = false, true, null, 3, - "area_uri", "subject", $precomputed_id, true + "area_uri", "subject", $precomputed_id, true, + "legacy" ); echo $result diff --git a/server/services/searchOpenAire.php b/server/services/searchOpenAire.php index 249dbbd6e..fab9340c5 100644 --- a/server/services/searchOpenAire.php +++ b/server/services/searchOpenAire.php @@ -24,7 +24,8 @@ "openaire_link", "obj_id", "acronym"), - ";", null, false, true, array("project_id", "funder")); + ";", null, false, true, array("project_id", "funder") + , "legacy"); echo $result diff --git a/server/services/searchPLOS.php b/server/services/searchPLOS.php index e490190b2..70bcccb25 100644 --- a/server/services/searchPLOS.php +++ b/server/services/searchPLOS.php @@ -15,7 +15,8 @@ $result = search("plos", $dirty_query, $post_params , array("article_types", "journals", "from", "to", "sorting") , ";", "/", true, true, null, 3 - , "area_uri", "subject", $precomputed_id, false); + , "area_uri", "subject", $precomputed_id, false + , "legacy"); echo $result diff --git a/server/services/searchPubmed.php b/server/services/searchPubmed.php index 7a5066d6c..7390abc86 100644 --- a/server/services/searchPubmed.php +++ b/server/services/searchPubmed.php @@ -17,7 +17,8 @@ $result = search("pubmed", $dirty_query , $post_params, array("from", "to", "sorting") , ";", null, true, true, null, 3 - , "area_uri", "subject", $precomputed_id, false); + , "area_uri", "subject", $precomputed_id, false + , "legacy"); echo $result diff --git a/server/services/searchTRIPLE.php b/server/services/searchTRIPLE.php index 5de28f23e..ac5976ae8 100644 --- a/server/services/searchTRIPLE.php +++ b/server/services/searchTRIPLE.php @@ -11,7 +11,11 @@ $post_params = $_POST; -$result = search("triple", $dirty_query, $post_params, array("from", "to", "sorting"), ";", null); +$result = search("triple", $dirty_query + , $post_params, array("from", "to", "sorting") + , ";", null, true, true, null, 3 + , "area_uri", "subject", $precomputed_id, false + , "api"); echo $result From b25d9b4ed39c5eec0b4d410e2d29e0bb98c011d3 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 6 Apr 2020 14:40:02 +0200 Subject: [PATCH 80/99] update searches --- server/services/search.php | 5 +++-- server/services/searchBASE.php | 3 ++- server/services/searchDOAJ.php | 3 ++- server/services/searchLinkedCat.php | 3 ++- server/services/searchLinkedCatAuthorview.php | 3 ++- server/services/searchLinkedCatBrowseview.php | 3 ++- server/services/searchOpenAire.php | 3 ++- server/services/searchPLOS.php | 3 ++- server/services/searchPubmed.php | 3 ++- server/services/searchTRIPLE.php | 7 ++++++- 10 files changed, 25 insertions(+), 11 deletions(-) diff --git a/server/services/search.php b/server/services/search.php index 48a516c3e..a12ee529e 100644 --- a/server/services/search.php +++ b/server/services/search.php @@ -52,7 +52,8 @@ function cleanQuery($dirty_query, $transform_query_tolowercase) { } function search($repository, $dirty_query, $post_params, $param_types, $keyword_separator, $taxonomy_separator, $transform_query_tolowercase = true - , $retrieve_cached_map = true, $params_for_id = null, $num_labels = 3, $id = "area_uri", $subjects = "subject", $precomputed_id = null, $do_clean_query = true) { + , $retrieve_cached_map = true, $params_for_id = null, $num_labels = 3, $id = "area_uri", $subjects = "subject", $precomputed_id = null, $do_clean_query = true + , $backend = "legacy") { $INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/"; $ini_array = library\Toolkit::loadIni($INI_DIR); @@ -86,7 +87,7 @@ function search($repository, $dirty_query, $post_params, $param_types, $keyword_ $WORKING_DIR = $ini_array["general"]["preprocessing_dir"] . $ini_array["output"]["output_dir"]; - if ($repository == "triple") { + if ($backend == "api") { $url = "http://127.0.0.1/api/" . $repository . "/search"; $payload = json_encode($post_params); $ch = curl_init(); diff --git a/server/services/searchBASE.php b/server/services/searchBASE.php index d9c0f426c..3b0bb63cb 100644 --- a/server/services/searchBASE.php +++ b/server/services/searchBASE.php @@ -15,7 +15,8 @@ $result = search("base", $dirty_query, $post_params , array("from", "to", "document_types", "sorting") , ";", null, true, true, null, 3 - , "area_uri", "subject", $precomputed_id, false); + , "area_uri", "subject", $precomputed_id, false + , "legacy"); echo $result diff --git a/server/services/searchDOAJ.php b/server/services/searchDOAJ.php index be37b76f6..3867df3fc 100644 --- a/server/services/searchDOAJ.php +++ b/server/services/searchDOAJ.php @@ -15,7 +15,8 @@ $result = search("doaj", $dirty_query, $post_params , array("from", "to", "today", "sorting") , ";", null, true, true, null, 3 - , "area_uri", "subject", $precomputed_id, false); + , "area_uri", "subject", $precomputed_id, false + , "legacy"); echo $result diff --git a/server/services/searchLinkedCat.php b/server/services/searchLinkedCat.php index 69154f405..7595a92de 100644 --- a/server/services/searchLinkedCat.php +++ b/server/services/searchLinkedCat.php @@ -17,7 +17,8 @@ array("from", "to", "include_content_type", "today", "vis_type"), ";", null, $transform_query_tolowercase=false, true, null, 3, - "area_uri", "subject", $precomputed_id, true); + "area_uri", "subject", $precomputed_id, true, + "legacy"); echo $result diff --git a/server/services/searchLinkedCatAuthorview.php b/server/services/searchLinkedCatAuthorview.php index 0d1ccf526..a61e24518 100644 --- a/server/services/searchLinkedCatAuthorview.php +++ b/server/services/searchLinkedCatAuthorview.php @@ -19,7 +19,8 @@ ";", null, $transform_query_tolowercase = false, true, null, 3, - "area_uri", "subject", $precomputed_id, true + "area_uri", "subject", $precomputed_id, true, + "legacy" ); echo $result diff --git a/server/services/searchLinkedCatBrowseview.php b/server/services/searchLinkedCatBrowseview.php index b80cb5651..41c5e9221 100644 --- a/server/services/searchLinkedCatBrowseview.php +++ b/server/services/searchLinkedCatBrowseview.php @@ -19,7 +19,8 @@ ";", null, $transform_query_tolowercase = false, true, null, 3, - "area_uri", "subject", $precomputed_id, true + "area_uri", "subject", $precomputed_id, true, + "legacy" ); echo $result diff --git a/server/services/searchOpenAire.php b/server/services/searchOpenAire.php index 249dbbd6e..fab9340c5 100644 --- a/server/services/searchOpenAire.php +++ b/server/services/searchOpenAire.php @@ -24,7 +24,8 @@ "openaire_link", "obj_id", "acronym"), - ";", null, false, true, array("project_id", "funder")); + ";", null, false, true, array("project_id", "funder") + , "legacy"); echo $result diff --git a/server/services/searchPLOS.php b/server/services/searchPLOS.php index e490190b2..70bcccb25 100644 --- a/server/services/searchPLOS.php +++ b/server/services/searchPLOS.php @@ -15,7 +15,8 @@ $result = search("plos", $dirty_query, $post_params , array("article_types", "journals", "from", "to", "sorting") , ";", "/", true, true, null, 3 - , "area_uri", "subject", $precomputed_id, false); + , "area_uri", "subject", $precomputed_id, false + , "legacy"); echo $result diff --git a/server/services/searchPubmed.php b/server/services/searchPubmed.php index 7a5066d6c..7390abc86 100644 --- a/server/services/searchPubmed.php +++ b/server/services/searchPubmed.php @@ -17,7 +17,8 @@ $result = search("pubmed", $dirty_query , $post_params, array("from", "to", "sorting") , ";", null, true, true, null, 3 - , "area_uri", "subject", $precomputed_id, false); + , "area_uri", "subject", $precomputed_id, false + , "legacy"); echo $result diff --git a/server/services/searchTRIPLE.php b/server/services/searchTRIPLE.php index 5de28f23e..0d83c05e4 100644 --- a/server/services/searchTRIPLE.php +++ b/server/services/searchTRIPLE.php @@ -8,10 +8,15 @@ use headstart\library; $dirty_query = library\CommUtils::getParameter($_POST, "q"); +$precomputed_id = (isset($_POST["unique_id"]))?($_POST["unique_id"]):(null); $post_params = $_POST; -$result = search("triple", $dirty_query, $post_params, array("from", "to", "sorting"), ";", null); +$result = search("triple", $dirty_query + , $post_params, array("from", "to", "sorting") + , ";", null, true, true, null, 3 + , "area_uri", "subject", $precomputed_id, false + , "api"); echo $result From 0a94e9b84d5950d16a23fba769cef8517ceb9baf Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 6 Apr 2020 15:04:28 +0200 Subject: [PATCH 81/99] logging --- .../workers/dataprocessing/src/headstart.py | 1 + server/workers/services/src/apis/gsheets.py | 60 +++++++++++-------- 2 files changed, 37 insertions(+), 24 deletions(-) diff --git a/server/workers/dataprocessing/src/headstart.py b/server/workers/dataprocessing/src/headstart.py index af55b6a32..bb118c864 100644 --- a/server/workers/dataprocessing/src/headstart.py +++ b/server/workers/dataprocessing/src/headstart.py @@ -51,6 +51,7 @@ def create_map(self, params, input_data): cmd = [self.command, self.hs, self.wd, params.get('q'), params.get('service'), param_file.name, input_file.name] + self.logger.debug(cmd) output = subprocess.check_output(cmd) output = [o for o in output.decode('utf-8').split('\n') if len(o) > 0] return pd.DataFrame(json.loads(output[-1])).to_json(orient="records") diff --git a/server/workers/services/src/apis/gsheets.py b/server/workers/services/src/apis/gsheets.py index 5ec79e91a..e0288e3e6 100644 --- a/server/workers/services/src/apis/gsheets.py +++ b/server/workers/services/src/apis/gsheets.py @@ -79,7 +79,7 @@ def validate_data(df): Column('Open access', [InListValidation(["yes", "no", "unknown"], case_sensitive=False)]), Column('Comments', []), Column('Tags', []), - Column('Include in map?', [InListValidation(["yes", "no"])]), + Column('Ready for inclusion in map?', [InListValidation(["yes", "no"])]), Column('Type', []), Column('Area', []) ]) @@ -89,13 +89,17 @@ def validate_data(df): # add column: Valid Bool errors = schema.validate(df) errors_index_rows = [e.row for e in errors] - df_clean = df.drop(index=errors_index_rows) - df_errors = df.iloc[errors_index_rows] + if errors_index_rows != [-1]: + df_clean = df.drop(index=errors_index_rows) + df_errors = df.iloc[errors_index_rows] + else: + df_clean = df + df_errors = pd.DataFrame() return df_clean, errors, df_errors def preprocess_data(df): - df = df[df["Include in map?"] == "yes"] + df = df[df["Ready for inclusion in map?"] == "yes"] metadata = pd.DataFrame() metadata["id"] = df.ID metadata["title"] = df.Title @@ -106,7 +110,7 @@ def preprocess_data(df): metadata["url"] = df["Link to PDF"] metadata["readers"] = 0 metadata["subject"] = df.Keywords - metadata["oa_state"] = df["Open access"] + metadata["oa_state"] = df.Access metadata["link"] = df["Link to PDF"] metadata["relevance"] = df.index metadata["comments"] = df.Comments @@ -167,33 +171,41 @@ class Search(Resource): 400: 'Invalid search parameters'}) @gsheets_ns.expect(search_query) @gsheets_ns.produces(["application/json"]) - def search(): + def post(self): """ """ params = request.get_json() # fill default params params["q"] = params["vis_id"] params["vis_type"] = "overview" + gsheets_ns.logger.debug(params) sheet_id = get_sheet_id(params.get('vis_id')) covid19_range = "Resources!A1:N200" - sheet_content = get_sheet_content(sheet_id, covid19_range) - raw = pd.DataFrame(sheet_content.get('values')) - df_clean, errors, df_errors = validate_data(raw) - input_data = preprocess_data(df_clean) - - k = str(uuid.uuid4()) - res = {} - res["id"] = k - res["input_data"] = input_data - res["params"] = params - redis_store.rpush("input_data", json.dumps(res)) - result = get_key(redis_store, k) - - headers = {} - headers["Content-Type"] = "application/json" - return make_response(result, - 200, - headers) + try: + sheet_content = get_sheet_content(sheet_id, covid19_range) + raw = pd.DataFrame(sheet_content.get('values')) + df_clean, errors, df_errors = validate_data(raw) + input_data = preprocess_data(df_clean) + except Exception as e: + gsheets_ns.logger.error(e) + abort(500, "Problem encountered during data collection, sorry") + + try: + k = str(uuid.uuid4()) + res = {} + res["id"] = k + res["input_data"] = input_data + res["params"] = params + redis_store.rpush("input_data", json.dumps(res)) + result = get_key(redis_store, k) + headers = {} + headers["Content-Type"] = "application/json" + return make_response(result, + 200, + headers) + except Exception as e: + gsheets_ns.logger.error(e) + abort(500, "Problem encountered during processing, sorry.") def writeRevision(vis_id, data, rev_id=None): From 2c4367eac31c1b45dbca5bf17daeadb230a18614 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 6 Apr 2020 15:47:52 +0200 Subject: [PATCH 82/99] now works up to map data generation --- server/workers/services/src/apis/gsheets.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/workers/services/src/apis/gsheets.py b/server/workers/services/src/apis/gsheets.py index e0288e3e6..a1e94090f 100644 --- a/server/workers/services/src/apis/gsheets.py +++ b/server/workers/services/src/apis/gsheets.py @@ -151,6 +151,7 @@ def raw_exampe(): # fill default params params["q"] = params["vis_id"] params["vis_type"] = "overview" + params["service"] = "gsheets" sheet_id = get_sheet_id(params.get('vis_id')) covid19_range = "Resources!A1:N200" sheet_content = get_sheet_content(sheet_id, covid19_range) @@ -178,6 +179,7 @@ def post(self): # fill default params params["q"] = params["vis_id"] params["vis_type"] = "overview" + params["service"] = "gsheets" gsheets_ns.logger.debug(params) sheet_id = get_sheet_id(params.get('vis_id')) covid19_range = "Resources!A1:N200" From e55b723df380d291847265402f230811dde9bd9a Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 6 Apr 2020 17:46:04 +0200 Subject: [PATCH 83/99] wip --- server/services/getLatestRevision.php | 48 +++++++++++---------- server/workers/services/src/apis/gsheets.py | 7 ++- 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/server/services/getLatestRevision.php b/server/services/getLatestRevision.php index f524ff5fd..c54e078d6 100644 --- a/server/services/getLatestRevision.php +++ b/server/services/getLatestRevision.php @@ -18,30 +18,34 @@ array("flags" => FILTER_NULL_ON_FAILURE)); $streamgraph = filter_input(INPUT_GET, "streamgraph", FILTER_VALIDATE_BOOLEAN, array("flags" => FILTER_NULL_ON_FAILURE)); +$backend = isset($_GET["vis_id"]) ? library\CommUtils::getParameter($_GET, "vis_id") : "legacy"; $persistence = new headstart\persistence\SQLitePersistence($ini_array["connection"]["sqlite_db"]); -if ($context === true) { - $data = $persistence->getLastVersion($vis_id, $details = false, $context = true)[0]; - $return_data = array("context" => array("id" => $data["rev_vis"], "query" => $data["vis_query"], "service" => $data["vis_title"] - , "timestamp" => $data["rev_timestamp"], "params" => $data["vis_params"]), - "data" => $data["rev_data"]); - if ($streamgraph === true) { - $calculation = new headstart\preprocessing\calculation\RCalculation($ini_array); - $working_dir = $ini_array["general"]["preprocessing_dir"] . $ini_array["output"]["output_dir"]; - $sg_output = $calculation->performStreamgraphCalculation($working_dir, $return_data["context"]["service"], $return_data["data"]); - $sg_output_json = end($sg_output); - $sg_output_json = mb_convert_encoding($sg_output_json, "UTF-8"); - - if (!library\Toolkit::isJSON($sg_output_json) || $sg_output_json == "null" || $sg_output_json == null) { - - $sg_output_json = json_encode(array("status" => "error")); - } - $return_data["streamgraph"] = $sg_output_json; - } - $jsonData = json_encode($return_data); - library\CommUtils::echoOrCallback($jsonData, $_GET); +if ($backend == "api") { } else { - $jsonData = $persistence->getLastVersion($vis_id); - library\CommUtils::echoOrCallback($jsonData[0], $_GET); + if ($context === true) { + $data = $persistence->getLastVersion($vis_id, $details = false, $context = true)[0]; + $return_data = array("context" => array("id" => $data["rev_vis"], "query" => $data["vis_query"], "service" => $data["vis_title"] + , "timestamp" => $data["rev_timestamp"], "params" => $data["vis_params"]), + "data" => $data["rev_data"]); + if ($streamgraph === true) { + $calculation = new headstart\preprocessing\calculation\RCalculation($ini_array); + $working_dir = $ini_array["general"]["preprocessing_dir"] . $ini_array["output"]["output_dir"]; + $sg_output = $calculation->performStreamgraphCalculation($working_dir, $return_data["context"]["service"], $return_data["data"]); + $sg_output_json = end($sg_output); + $sg_output_json = mb_convert_encoding($sg_output_json, "UTF-8"); + + if (!library\Toolkit::isJSON($sg_output_json) || $sg_output_json == "null" || $sg_output_json == null) { + + $sg_output_json = json_encode(array("status" => "error")); + } + $return_data["streamgraph"] = $sg_output_json; + } + $jsonData = json_encode($return_data); + library\CommUtils::echoOrCallback($jsonData, $_GET); + } else { + $jsonData = $persistence->getLastVersion($vis_id); + library\CommUtils::echoOrCallback($jsonData[0], $_GET); + } } diff --git a/server/workers/services/src/apis/gsheets.py b/server/workers/services/src/apis/gsheets.py index a1e94090f..9b873a193 100644 --- a/server/workers/services/src/apis/gsheets.py +++ b/server/workers/services/src/apis/gsheets.py @@ -200,9 +200,14 @@ def post(self): res["params"] = params redis_store.rpush("input_data", json.dumps(res)) result = get_key(redis_store, k) + result_df = pd.DataFrame.from_records(json.loads(result)) + result_df.sort_index(inplace=True) + result_df["area"] = df_clean["Area"] + uris = {a: i for i, a in enumerate(result_df.area.unique())} + result_df["area_uri"] = result_df.area.map(lambda x: uris.get(x)) headers = {} headers["Content-Type"] = "application/json" - return make_response(result, + return make_response(result_df.to_dict(), 200, headers) except Exception as e: From 4d67311311831ffa383364c73ad94364184ada3d Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 6 Apr 2020 17:52:52 +0200 Subject: [PATCH 84/99] fix serialization --- server/workers/dataprocessing/src/headstart.py | 2 +- server/workers/services/src/apis/triple.py | 2 +- server/workers/triple/src/search_triple.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/server/workers/dataprocessing/src/headstart.py b/server/workers/dataprocessing/src/headstart.py index af55b6a32..ce9931f1d 100644 --- a/server/workers/dataprocessing/src/headstart.py +++ b/server/workers/dataprocessing/src/headstart.py @@ -53,7 +53,7 @@ def create_map(self, params, input_data): param_file.name, input_file.name] output = subprocess.check_output(cmd) output = [o for o in output.decode('utf-8').split('\n') if len(o) > 0] - return pd.DataFrame(json.loads(output[-1])).to_json(orient="records") + return pd.DataFrame(json.loads(output[-1])).to_dict(orient="records") def run(self): k, params, input_data = self.next_item() diff --git a/server/workers/services/src/apis/triple.py b/server/workers/services/src/apis/triple.py index aaa378431..187b88bcf 100644 --- a/server/workers/services/src/apis/triple.py +++ b/server/workers/services/src/apis/triple.py @@ -91,7 +91,7 @@ def post(self): axis=1) result = df.to_csv() else: - result = pd.read_json(result).to_csv() + result = pd.DataFrame.from_records(result).to_csv() headers["Content-Type"] = "text/csv" headers["Content-Disposition"] = "attachment; filename={0}.csv".format(k) if data.get("raw") is True: diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py index 0c26ec923..3162c1456 100644 --- a/server/workers/triple/src/search_triple.py +++ b/server/workers/triple/src/search_triple.py @@ -166,8 +166,8 @@ def process_result(self, result): text["id"] = metadata["id"] text["content"] = metadata.apply(lambda x: ". ".join(x[["title", "paper_abstract"]]), axis=1) input_data = {} - input_data["metadata"] = metadata.to_json(orient='records') - input_data["text"] = text.to_json(orient='records') + input_data["metadata"] = metadata.to_dict(orient='records') + input_data["text"] = text.to_dict(orient='records') return input_data @staticmethod From e2f4ca915021c4db473923326ad04aa8b4e2749a Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 6 Apr 2020 19:22:15 +0200 Subject: [PATCH 85/99] good wip --- server/workers/services/src/apis/gsheets.py | 98 +++++++++++---------- 1 file changed, 53 insertions(+), 45 deletions(-) diff --git a/server/workers/services/src/apis/gsheets.py b/server/workers/services/src/apis/gsheets.py index 9b873a193..a30a39237 100644 --- a/server/workers/services/src/apis/gsheets.py +++ b/server/workers/services/src/apis/gsheets.py @@ -63,7 +63,8 @@ def authenticate(): def get_sheet_content(sheet_id, sheet_range): res = sheet.values().get(spreadsheetId=sheet_id, range=sheet_range).execute() - return res + raw = pd.DataFrame(res.get('values')) + return raw def validate_data(df): @@ -71,12 +72,12 @@ def validate_data(df): Column('ID', []), Column('Title', []), Column('Authors', []), - Column('Abstract', [CustomElementValidation(lambda s: len(s) > 5, 'Abstract not long enough')]), + Column('Abstract', []), Column('Publication Venue', []), Column('Publication Date', [DateFormatValidation("%Y-%m-%d")]), Column('Link to PDF', []), Column('Keywords', []), - Column('Open access', [InListValidation(["yes", "no", "unknown"], case_sensitive=False)]), + Column('Access', [InListValidation(["open", "closed", "unknown", "free"], case_sensitive=False)]), Column('Comments', []), Column('Tags', []), Column('Ready for inclusion in map?', [InListValidation(["yes", "no"])]), @@ -85,21 +86,20 @@ def validate_data(df): ]) df.columns = df.iloc[0] df.drop([0, 1], inplace=True) - df.reset_index(drop=True, inplace=True) # add column: Valid Bool + df = df[df["Ready for inclusion in map?"] == "yes"] errors = schema.validate(df) errors_index_rows = [e.row for e in errors] - if errors_index_rows != [-1]: - df_clean = df.drop(index=errors_index_rows) - df_errors = df.iloc[errors_index_rows] + if errors_index_rows == [-1]: + clean_df = df + errors_df = pd.DataFrame() else: - df_clean = df - df_errors = pd.DataFrame() - return df_clean, errors, df_errors + clean_df = df.drop(index=errors_index_rows) + errors_df = df.loc[errors_index_rows] + return clean_df, errors, errors_df -def preprocess_data(df): - df = df[df["Ready for inclusion in map?"] == "yes"] +def create_input_data(df): metadata = pd.DataFrame() metadata["id"] = df.ID metadata["title"] = df.Title @@ -128,6 +128,19 @@ def preprocess_data(df): return input_data +def post_process(clean_df, result_df): + sorter = clean_df["ID"] + sorterIndex = dict(zip(sorter, range(len(sorter)))) + result_df["orig_order"] = result_df["id"].map(sorterIndex) + result_df.sort_values(["orig_order"], ascending=[True], inplace=True) + result_df.drop("orig_order", axis=1, inplace=True) + result_df.index = clean_df.index + result_df["area"] = clean_df.Area + uris = {a: i for i, a in enumerate(result_df.area.unique())} + result_df["area_uri"] = result_df.area.map(lambda x: uris.get(x)) + return result_df + + def get_sheet_id(vis_id): # mock functionality mock_db = {"covid19": "1csxG23x99DcxoEud782Bji76C7mGxKkAVMBz8gdf_0A"} @@ -142,30 +155,6 @@ def get_sheet_id(vis_id): required=True)}) -@app.route('/api/gsheets/raw') -def raw_exampe(): - """ - """ - params = request.args.to_dict() - # params = request.get_json() - # fill default params - params["q"] = params["vis_id"] - params["vis_type"] = "overview" - params["service"] = "gsheets" - sheet_id = get_sheet_id(params.get('vis_id')) - covid19_range = "Resources!A1:N200" - sheet_content = get_sheet_content(sheet_id, covid19_range) - raw = pd.DataFrame(sheet_content.get('values')) - df_clean, errors, df_errors = validate_data(raw) - input_data = preprocess_data(df_clean) - return render_template("tables.html", - df_clean=df_clean.to_html(header=True), - errors="
".join([str(e) - for e in errors]), - df_errors=df_errors.to_html(header=True) - ) - - @gsheets_ns.route('/search') class Search(Resource): @gsheets_ns.doc(responses={200: 'OK', @@ -184,10 +173,9 @@ def post(self): sheet_id = get_sheet_id(params.get('vis_id')) covid19_range = "Resources!A1:N200" try: - sheet_content = get_sheet_content(sheet_id, covid19_range) - raw = pd.DataFrame(sheet_content.get('values')) - df_clean, errors, df_errors = validate_data(raw) - input_data = preprocess_data(df_clean) + raw = get_sheet_content(sheet_id, covid19_range) + clean_df, errors, errors_df = validate_data(raw) + input_data = create_input_data(clean_df) except Exception as e: gsheets_ns.logger.error(e) abort(500, "Problem encountered during data collection, sorry") @@ -200,11 +188,9 @@ def post(self): res["params"] = params redis_store.rpush("input_data", json.dumps(res)) result = get_key(redis_store, k) - result_df = pd.DataFrame.from_records(json.loads(result)) - result_df.sort_index(inplace=True) - result_df["area"] = df_clean["Area"] - uris = {a: i for i, a in enumerate(result_df.area.unique())} - result_df["area_uri"] = result_df.area.map(lambda x: uris.get(x)) + result_df = post_process(clean_df, pd.DataFrame(result)) + # result_df.index = result_df.index.astype(int) + # result_df.sort_index(inplace=True) headers = {} headers["Content-Type"] = "application/json" return make_response(result_df.to_dict(), @@ -260,4 +246,26 @@ def get(self, vis_id): 200) +@app.route('/api/gsheets/raw') +def raw_exampe(): + """ + """ + params = request.args.to_dict() + # params = request.get_json() + # fill default params + params["q"] = params["vis_id"] + params["vis_type"] = "overview" + params["service"] = "gsheets" + sheet_id = get_sheet_id(params.get('vis_id')) + covid19_range = "Resources!A1:N200" + sheet_content = get_sheet_content(sheet_id, covid19_range) + raw = pd.DataFrame(sheet_content.get('values')) + clean_df, errors, errors_df = validate_data(raw) + input_data = create_input_data(clean_df) + return render_template("tables.html", + clean_df=clean_df.to_html(header=True), + errors="
".join([str(e) + for e in errors]), + errors_df=errors_df.to_html(header=True) + ) # @gsheets_ns.route('/get') From 6b930559e8768d0accd9468a060bb987917749f8 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 6 Apr 2020 20:43:32 +0200 Subject: [PATCH 86/99] fix serialization --- server/workers/triple/src/search_triple.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/workers/triple/src/search_triple.py b/server/workers/triple/src/search_triple.py index 3162c1456..0c26ec923 100644 --- a/server/workers/triple/src/search_triple.py +++ b/server/workers/triple/src/search_triple.py @@ -166,8 +166,8 @@ def process_result(self, result): text["id"] = metadata["id"] text["content"] = metadata.apply(lambda x: ". ".join(x[["title", "paper_abstract"]]), axis=1) input_data = {} - input_data["metadata"] = metadata.to_dict(orient='records') - input_data["text"] = text.to_dict(orient='records') + input_data["metadata"] = metadata.to_json(orient='records') + input_data["text"] = text.to_json(orient='records') return input_data @staticmethod From 15996b61df1dd6476f8935f9ef623df498695acb Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 6 Apr 2020 20:43:48 +0200 Subject: [PATCH 87/99] wip --- server/workers/services/src/apis/gsheets.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/server/workers/services/src/apis/gsheets.py b/server/workers/services/src/apis/gsheets.py index a30a39237..e38bde2a3 100644 --- a/server/workers/services/src/apis/gsheets.py +++ b/server/workers/services/src/apis/gsheets.py @@ -90,12 +90,16 @@ def validate_data(df): df = df[df["Ready for inclusion in map?"] == "yes"] errors = schema.validate(df) errors_index_rows = [e.row for e in errors] + for e in errors: + e.row += 1 + error_messages = [str(e) for e in errors] if errors_index_rows == [-1]: clean_df = df errors_df = pd.DataFrame() else: clean_df = df.drop(index=errors_index_rows) errors_df = df.loc[errors_index_rows] + errors_df["reason"] = error_messages return clean_df, errors, errors_df @@ -193,7 +197,15 @@ def post(self): # result_df.sort_index(inplace=True) headers = {} headers["Content-Type"] = "application/json" - return make_response(result_df.to_dict(), + result = {} + result["data"] = result_df.to_dict(orient="records") + result["context"] = {"id": "covid19", + "query": "covid19", + "service": "gsheets", + "timestamp": datetime.utcnow(), + "params": params} + result["errors"] = errors_df.to_dict(orient="records") + return make_response(result, 200, headers) except Exception as e: From 8209be54f08a77f1793f93f7b83abfb8f98f4225 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 6 Apr 2020 20:57:40 +0200 Subject: [PATCH 88/99] no persisting if error --- server/services/search.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/server/services/search.php b/server/services/search.php index a12ee529e..2ad3d13a0 100644 --- a/server/services/search.php +++ b/server/services/search.php @@ -97,6 +97,10 @@ function search($repository, $dirty_query, $post_params, $param_types, $keyword_ curl_setopt($ch, CURLOPT_POSTFIELDS, $payload); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); $output_json = curl_exec($ch); + $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE); + if ($httpcode != 200) { + $output_json = NULL; + } } else { $calculation = new \headstart\preprocessing\calculation\RCalculation($ini_array); $output = $calculation->performCalculationAndReturnOutputAsJSON($WORKING_DIR, $query, $params_filename, $repository); From d91b57c8fe1aab1756cf61281d61ff746ffceca2 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 6 Apr 2020 21:55:05 +0200 Subject: [PATCH 89/99] fix search --- server/services/search.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/services/search.php b/server/services/search.php index 2ad3d13a0..6e5249b8f 100644 --- a/server/services/search.php +++ b/server/services/search.php @@ -87,7 +87,7 @@ function search($repository, $dirty_query, $post_params, $param_types, $keyword_ $WORKING_DIR = $ini_array["general"]["preprocessing_dir"] . $ini_array["output"]["output_dir"]; - if ($backend == "api") { + if ($backend === "api") { $url = "http://127.0.0.1/api/" . $repository . "/search"; $payload = json_encode($post_params); $ch = curl_init(); From 8cc60955fcd66c4fd292ab009ad9ea57ebb829af Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 6 Apr 2020 22:06:11 +0200 Subject: [PATCH 90/99] cleanup --- server/workers/services/src/app.py | 6 ++---- server/workers/services/src/config/example_settings.py | 2 -- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/server/workers/services/src/app.py b/server/workers/services/src/app.py index 08aa2ac26..1b45fb788 100644 --- a/server/workers/services/src/app.py +++ b/server/workers/services/src/app.py @@ -3,14 +3,11 @@ from flask import Flask from flask_restx import Api from flask_cors import CORS -from flask_sqlalchemy import SQLAlchemy from werkzeug.middleware.proxy_fix import ProxyFix from apis.triple import triple_ns from apis.gsheets import gsheets_ns -from apis.gsheets import app as gsheets_bp -from database import db from config import settings from utils.monkeypatches import ReverseProxied, __schema__, specs_url, _register_apidoc, inject_flasgger import logging @@ -46,7 +43,8 @@ def api_patches(app, settings): api.add_namespace(triple_ns, path='/triple') api.add_namespace(gsheets_ns, path='/gsheets') app.logger.debug(app.config) +app.logger.debug(app.url_map) if __name__ == '__main__': - app.run(host="localhost", port=5001, debug=True) + app.run(host="127.0.0.1", port=5001, debug=True) diff --git a/server/workers/services/src/config/example_settings.py b/server/workers/services/src/config/example_settings.py index acaa449c9..d602840e2 100644 --- a/server/workers/services/src/config/example_settings.py +++ b/server/workers/services/src/config/example_settings.py @@ -1,7 +1,5 @@ BEHIND_PROXY = True SWAGGER_BASEPATH = "" -# change to appropriate domain -SERVER_NAME = "localhost" # change to "production" ENV = "development" # change to False From bfeab8672f953f8a207702fe1e1f7497e5c719cf Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 6 Apr 2020 22:07:34 +0200 Subject: [PATCH 91/99] fix serialization --- server/workers/dataprocessing/src/headstart.py | 2 +- server/workers/services/src/apis/triple.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/workers/dataprocessing/src/headstart.py b/server/workers/dataprocessing/src/headstart.py index 6c0848c39..bb118c864 100644 --- a/server/workers/dataprocessing/src/headstart.py +++ b/server/workers/dataprocessing/src/headstart.py @@ -54,7 +54,7 @@ def create_map(self, params, input_data): self.logger.debug(cmd) output = subprocess.check_output(cmd) output = [o for o in output.decode('utf-8').split('\n') if len(o) > 0] - return pd.DataFrame(json.loads(output[-1])).to_dict(orient="records") + return pd.DataFrame(json.loads(output[-1])).to_json(orient="records") def run(self): k, params, input_data = self.next_item() diff --git a/server/workers/services/src/apis/triple.py b/server/workers/services/src/apis/triple.py index f42edd522..2fc75b123 100644 --- a/server/workers/services/src/apis/triple.py +++ b/server/workers/services/src/apis/triple.py @@ -79,7 +79,7 @@ def post(self): axis=1) result = df.to_csv() else: - result = pd.DataFrame.from_records(result).to_csv() + result = pd.read_json(json.loads(result)).to_csv() headers["Content-Type"] = "text/csv" headers["Content-Disposition"] = "attachment; filename={0}.csv".format(k) if data.get("raw") is True: From c76c1a45f89956505b0a493f4fd6e3d9764b06c7 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 6 Apr 2020 22:07:34 +0200 Subject: [PATCH 92/99] fix serialization --- server/workers/dataprocessing/src/headstart.py | 2 +- server/workers/services/src/apis/gsheets.py | 2 +- server/workers/services/src/apis/triple.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/server/workers/dataprocessing/src/headstart.py b/server/workers/dataprocessing/src/headstart.py index 6c0848c39..bb118c864 100644 --- a/server/workers/dataprocessing/src/headstart.py +++ b/server/workers/dataprocessing/src/headstart.py @@ -54,7 +54,7 @@ def create_map(self, params, input_data): self.logger.debug(cmd) output = subprocess.check_output(cmd) output = [o for o in output.decode('utf-8').split('\n') if len(o) > 0] - return pd.DataFrame(json.loads(output[-1])).to_dict(orient="records") + return pd.DataFrame(json.loads(output[-1])).to_json(orient="records") def run(self): k, params, input_data = self.next_item() diff --git a/server/workers/services/src/apis/gsheets.py b/server/workers/services/src/apis/gsheets.py index e38bde2a3..204b18b04 100644 --- a/server/workers/services/src/apis/gsheets.py +++ b/server/workers/services/src/apis/gsheets.py @@ -192,7 +192,7 @@ def post(self): res["params"] = params redis_store.rpush("input_data", json.dumps(res)) result = get_key(redis_store, k) - result_df = post_process(clean_df, pd.DataFrame(result)) + result_df = post_process(clean_df, pd.read_json(json.loads(result))) # result_df.index = result_df.index.astype(int) # result_df.sort_index(inplace=True) headers = {} diff --git a/server/workers/services/src/apis/triple.py b/server/workers/services/src/apis/triple.py index f42edd522..2fc75b123 100644 --- a/server/workers/services/src/apis/triple.py +++ b/server/workers/services/src/apis/triple.py @@ -79,7 +79,7 @@ def post(self): axis=1) result = df.to_csv() else: - result = pd.DataFrame.from_records(result).to_csv() + result = pd.read_json(json.loads(result)).to_csv() headers["Content-Type"] = "text/csv" headers["Content-Disposition"] = "attachment; filename={0}.csv".format(k) if data.get("raw") is True: From a82e0abed200f3621c84ac432001173d0ecfdd5e Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 6 Apr 2020 22:39:27 +0200 Subject: [PATCH 93/99] more serialization fixing --- server/workers/services/src/apis/gsheets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/workers/services/src/apis/gsheets.py b/server/workers/services/src/apis/gsheets.py index 204b18b04..85adee714 100644 --- a/server/workers/services/src/apis/gsheets.py +++ b/server/workers/services/src/apis/gsheets.py @@ -192,7 +192,7 @@ def post(self): res["params"] = params redis_store.rpush("input_data", json.dumps(res)) result = get_key(redis_store, k) - result_df = post_process(clean_df, pd.read_json(json.loads(result))) + result_df = post_process(clean_df, pd.DataFrame.from_records(json.loads(result))) # result_df.index = result_df.index.astype(int) # result_df.sort_index(inplace=True) headers = {} From c72fe98c9f06b3489e7992c689592c66c6179ea6 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Tue, 7 Apr 2020 16:08:28 +0200 Subject: [PATCH 94/99] result fields update --- server/workers/services/src/apis/gsheets.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/server/workers/services/src/apis/gsheets.py b/server/workers/services/src/apis/gsheets.py index 85adee714..a7b04b8c8 100644 --- a/server/workers/services/src/apis/gsheets.py +++ b/server/workers/services/src/apis/gsheets.py @@ -119,7 +119,7 @@ def create_input_data(df): metadata["relevance"] = df.index metadata["comments"] = df.Comments metadata["tags"] = df.Tags - metadata["type"] = df.Type + metadata["resulttype"] = df.Type text = pd.DataFrame() text["id"] = metadata["id"] text["content"] = metadata.apply(lambda x: ". ".join(x[["title", @@ -142,6 +142,11 @@ def post_process(clean_df, result_df): result_df["area"] = clean_df.Area uris = {a: i for i, a in enumerate(result_df.area.unique())} result_df["area_uri"] = result_df.area.map(lambda x: uris.get(x)) + oa_mapper = {"closed": 0, + "open": 1, + "unknown": 2, + "free": 3} + result_df["oa_state"] = result_df["oa_state"].map(lambda x: oa_mapper.get(x)) return result_df From bb1d3eba8594fcdd567495c4bbf293ff514e5f73 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Tue, 7 Apr 2020 21:57:20 +0200 Subject: [PATCH 95/99] first attempt at integration --- server/services/getCovisMap.php | 39 ++++++++++ server/services/updateCovis.php | 22 ++++++ server/workers/services/src/apis/gsheets.py | 80 +-------------------- 3 files changed, 64 insertions(+), 77 deletions(-) create mode 100644 server/services/getCovisMap.php create mode 100644 server/services/updateCovis.php diff --git a/server/services/getCovisMap.php b/server/services/getCovisMap.php new file mode 100644 index 000000000..1ba58f929 --- /dev/null +++ b/server/services/getCovisMap.php @@ -0,0 +1,39 @@ + FILTER_NULL_ON_FAILURE)); +$streamgraph = filter_input(INPUT_GET, "streamgraph", FILTER_VALIDATE_BOOLEAN, + array("flags" => FILTER_NULL_ON_FAILURE)); +$backend = isset($_GET["vis_id"]) ? library\CommUtils::getParameter($_GET, "vis_id") : "legacy"; + +$persistence = new headstart\persistence\SQLitePersistence($ini_array["connection"]["sqlite_db"]); + +if ($backend == "api") { +} else { + $data = $persistence->getLastVersion($vis_id, $details = false, $context = true)[0]; + $return_data = array("context" => array("id" => $data["rev_vis"], + "query" => $data["vis_query"], + "service" => $data["vis_title"], + "timestamp" => $data["rev_timestamp"], + "params" => $data["vis_params"], + "sheet_id" => $data["rev_data"]["sheet_id"], + "last_update" => $data["rev_data"]["last_update"]), + "data" => $data["rev_data"]["data"], + "errors" => $data["rev_data"]["errors"]); + $jsonData = json_encode($return_data); + library\CommUtils::echoOrCallback($jsonData[0], $_GET); +} diff --git a/server/services/updateCovis.php b/server/services/updateCovis.php new file mode 100644 index 000000000..9f674604f --- /dev/null +++ b/server/services/updateCovis.php @@ -0,0 +1,22 @@ + diff --git a/server/workers/services/src/apis/gsheets.py b/server/workers/services/src/apis/gsheets.py index a7b04b8c8..78123411c 100644 --- a/server/workers/services/src/apis/gsheets.py +++ b/server/workers/services/src/apis/gsheets.py @@ -12,8 +12,6 @@ from flask import Blueprint, request, make_response, jsonify, abort, render_template from flask_restx import Namespace, Resource, fields from apis.utils import get_key -from models import Revisions, Visualizations -from database import db import pandas as pd from pandas_schema import Column, Schema from pandas_schema.validation import (MatchesPatternValidation, @@ -204,85 +202,13 @@ def post(self): headers["Content-Type"] = "application/json" result = {} result["data"] = result_df.to_dict(orient="records") - result["context"] = {"id": "covid19", - "query": "covid19", - "service": "gsheets", - "timestamp": datetime.utcnow(), - "params": params} result["errors"] = errors_df.to_dict(orient="records") + result["sheet_id"] = sheet_id + # TODO: get last_update from spreadsheet + result["last_update"] = datetime.utcnow() return make_response(result, 200, headers) except Exception as e: gsheets_ns.logger.error(e) abort(500, "Problem encountered during processing, sorry.") - - -def writeRevision(vis_id, data, rev_id=None): - - vis = Visualizations.query.filter_by(vis_id=vis_id).first() - - rev = rev_id - if rev is None: - r_id = vis.vis_latest - rev = r_id + 1 - - query = vis.vis_clean_query - - new_rev = Revisions({ - "rev_id": rev, - "rev_vis": vis_id, - "rev_user": "System", - "rev_timestamp": datetime.utcnow(), - "rev_comment": "Visualization created", - "rev_data": data, - "vis_query": query - }) - db.session.add(new_rev) - db.session.commit() - - -@gsheets_ns.route('/createVisualization') -class createVisualization(Resource): - def post(self, vis_id): - # param: map_id - # get map context for map ID - # get latest revision data via sheets ID from context - # if not assert equal - # add revision number to map context - # get latest revision - pass - - -@gsheets_ns.route('/existsVisualization') -class existsVisualization(Resource): - def get(self, vis_id): - map = Visualizations.query.filter_by(vis_id=vis_id).first() - exists = True if map else False - make_response(exists, - 200) - - -@app.route('/api/gsheets/raw') -def raw_exampe(): - """ - """ - params = request.args.to_dict() - # params = request.get_json() - # fill default params - params["q"] = params["vis_id"] - params["vis_type"] = "overview" - params["service"] = "gsheets" - sheet_id = get_sheet_id(params.get('vis_id')) - covid19_range = "Resources!A1:N200" - sheet_content = get_sheet_content(sheet_id, covid19_range) - raw = pd.DataFrame(sheet_content.get('values')) - clean_df, errors, errors_df = validate_data(raw) - input_data = create_input_data(clean_df) - return render_template("tables.html", - clean_df=clean_df.to_html(header=True), - errors="
".join([str(e) - for e in errors]), - errors_df=errors_df.to_html(header=True) - ) -# @gsheets_ns.route('/get') From b1a3caa0d098fbd7dbdde9e959b9b5ec009397b3 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Tue, 7 Apr 2020 23:00:24 +0200 Subject: [PATCH 96/99] more integration wip --- server/services/getCovisMap.php | 9 +++++---- server/services/search.php | 2 +- server/services/updateCovis.php | 6 +----- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/server/services/getCovisMap.php b/server/services/getCovisMap.php index 1ba58f929..6975b2d41 100644 --- a/server/services/getCovisMap.php +++ b/server/services/getCovisMap.php @@ -25,15 +25,16 @@ if ($backend == "api") { } else { $data = $persistence->getLastVersion($vis_id, $details = false, $context = true)[0]; + $rev_data = json_decode($data["rev_data"], true); $return_data = array("context" => array("id" => $data["rev_vis"], "query" => $data["vis_query"], "service" => $data["vis_title"], "timestamp" => $data["rev_timestamp"], "params" => $data["vis_params"], - "sheet_id" => $data["rev_data"]["sheet_id"], - "last_update" => $data["rev_data"]["last_update"]), - "data" => $data["rev_data"]["data"], - "errors" => $data["rev_data"]["errors"]); + "sheet_id" => $rev_data["sheet_id"], + "last_update" => $rev_data["last_update"]), + "data" => $rev_data["data"], + "errors" => $rev_data["errors"]); $jsonData = json_encode($return_data); library\CommUtils::echoOrCallback($jsonData[0], $_GET); } diff --git a/server/services/search.php b/server/services/search.php index 6e5249b8f..e3e7d8e3b 100644 --- a/server/services/search.php +++ b/server/services/search.php @@ -88,7 +88,7 @@ function search($repository, $dirty_query, $post_params, $param_types, $keyword_ $WORKING_DIR = $ini_array["general"]["preprocessing_dir"] . $ini_array["output"]["output_dir"]; if ($backend === "api") { - $url = "http://127.0.0.1/api/" . $repository . "/search"; + $url = $ini_array["general"]["api_url"] . $repository . "/search"; $payload = json_encode($post_params); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); diff --git a/server/services/updateCovis.php b/server/services/updateCovis.php index 9f674604f..95c022e24 100644 --- a/server/services/updateCovis.php +++ b/server/services/updateCovis.php @@ -7,11 +7,7 @@ use headstart\library; -$dirty_query = library\CommUtils::getParameter($_POST, "q"); - -$post_params = $_POST; - -$result = search("gsheets", $dirty_query, $post_params +$result = search("gsheets", "covid19", array("vis_id" => "covid19") , array("vis_id") , ";", null, true, false, null, 3 , "area_uri", "subject", "covid19", false From f1fbfd4b785316c0a94d4845fa21bd243b577274 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Wed, 8 Apr 2020 16:38:42 +0200 Subject: [PATCH 97/99] cleanup --- server/workers/services/src/app.py | 4 +++- server/workers/services/src/manage.py | 11 +++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/server/workers/services/src/app.py b/server/workers/services/src/app.py index 1b45fb788..817f06b43 100644 --- a/server/workers/services/src/app.py +++ b/server/workers/services/src/app.py @@ -7,6 +7,7 @@ from apis.triple import triple_ns from apis.gsheets import gsheets_ns +from database import db from config import settings from utils.monkeypatches import ReverseProxied, __schema__, specs_url, _register_apidoc, inject_flasgger @@ -35,9 +36,10 @@ def api_patches(app, settings): handler = logging.StreamHandler(sys.stdout) handler.setLevel(app.logger.level) app = inject_flasgger(app) +db.init_app(app) app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1, x_port=1, x_for=1, x_host=1, x_prefix=1) app.wsgi_app = ReverseProxied(app.wsgi_app) -CORS(app, expose_headers=["Content-Disposition"]) +CORS(app, expose_headers=["Content-Disposition", "Access-Control-Allow-Origin"]) api = api_patches(app, settings) api.add_namespace(triple_ns, path='/triple') diff --git a/server/workers/services/src/manage.py b/server/workers/services/src/manage.py index 2aea3c2c2..23a19b74c 100644 --- a/server/workers/services/src/manage.py +++ b/server/workers/services/src/manage.py @@ -1,11 +1,10 @@ from flask_sqlalchemy import SQLAlchemy -from app import create_app +from app import app from models import Visualizations, Revisions - - -app = create_app() -db = SQLAlchemy(app) +from database import db if __name__ == '__main__': - db.create_all() + db.init_app(app) + with app.app_context(): + db.create_all() From 668d63bed0065b19716f12794ea0ac1a06593e11 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Wed, 8 Apr 2020 16:41:49 +0200 Subject: [PATCH 98/99] cleanup --- docker-compose.yml | 11 ----------- server/workers/README.md | 10 ++++++++++ 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index d2dc44a77..261a4b276 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -50,16 +50,5 @@ services: source: /var/log/headstart target: /var/log/headstart - hsdb: - image: postgres:11 - restart: always - environment: - POSTGRES_PASSWORD: "password" - volumes: - - db:/var/lib/postgresql/data - ports: - - 54321:5432 - volumes: redis: - db: diff --git a/server/workers/README.md b/server/workers/README.md index f1da8b475..5a7b23e84 100644 --- a/server/workers/README.md +++ b/server/workers/README.md @@ -130,3 +130,13 @@ docker-compose -f docker-compose_win.yml down ### Deploying the example: Use a deployment script, or manually deploy an example (currently only TRIPLE is integrated in this way) as described in [HOWTO: search repos](../../doc/howto_search_repos.md): + +Additionally, the `config_local.ini` now requires an additional parameter under `[general]`: + +``` +# URL to OKMaps API +api_url = "" + +``` + +where `api_url` is the full URL to the API endpoint. From 6589f63aa73ada8e25fd7dd58e1a29532c181844 Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Thu, 9 Apr 2020 16:30:50 +0200 Subject: [PATCH 99/99] harmonise headstart paths in triple example --- examples/triple/data-config_server.js | 2 +- examples/triple/headstart.php | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/triple/data-config_server.js b/examples/triple/data-config_server.js index 95d53c082..1d636b8d9 100644 --- a/examples/triple/data-config_server.js +++ b/examples/triple/data-config_server.js @@ -1 +1 @@ -data_config.server_url = window.location.href.replace(/[^/]*$/, '') + "headstart/server/"; +data_config.server_url = window.location.href.replace(/[^/]*$/, '') + "../../server/"; diff --git a/examples/triple/headstart.php b/examples/triple/headstart.php index 738ce70df..89afef469 100644 --- a/examples/triple/headstart.php +++ b/examples/triple/headstart.php @@ -17,8 +17,8 @@ }]; data_config.options = options_.dropdowns; - - + +