From 35285d45487bae3076792d8181f05b0edb5a09df Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 9 Jan 2019 20:10:27 -0500 Subject: [PATCH 1/4] Adding the Metrics API for getting datasets by subject, AND up to the month specified. (#5398) --- doc/sphinx-guides/source/api/metrics.rst | 14 +++++++-- .../edu/harvard/iq/dataverse/api/Metrics.java | 22 +++++++++++++ .../dataverse/metrics/MetricsServiceBean.java | 31 +++++++++++++++++++ 3 files changed, 65 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/api/metrics.rst b/doc/sphinx-guides/source/api/metrics.rst index 821b74b0a96..eca75a2da7f 100755 --- a/doc/sphinx-guides/source/api/metrics.rst +++ b/doc/sphinx-guides/source/api/metrics.rst @@ -24,7 +24,7 @@ Example: ``curl https://demo.dataverse.org/api/info/metrics/downloads`` To-Month -------- -Returns a count of various objects in dataverse up to a specified month ``$YYYY-DD`` in YYYY-MM format (i.e. ``2018-01``):: +Returns a count of various objects in dataverse up to a specified month ``$YYYY-DD`` in YYYY-MM format (e.g. ``2018-01``):: GET https://$SERVER/api/info/metrics/$type/toMonth/$YYYY-DD @@ -36,7 +36,7 @@ Example: ``curl https://demo.dataverse.org/api/info/metrics/dataverses/toMonth/2 Past Days --------- -Returns a count of various objects in dataverse for the past ``$days`` (i.e. ``30``):: +Returns a count of various objects in dataverse for the past ``$days`` (e.g. ``30``):: GET https://$SERVER/api/info/metrics/$type/pastDays/$days @@ -74,6 +74,16 @@ Returns the number of datasets by each subject:: GET https://$SERVER/api/info/metrics/datasets/bySubject + +By Subject, and to Month +~~~~~~~~~~~~~~~~~~~~~~~~ + +Returns the number of datasets by each subject, and up to a specified month ``$YYYY-DD`` in YYYY-MM format (e.g. ``2018-01``):: + + GET https://$SERVER/api/info/metrics/datasets/bySubject/toMonth/$YYYY-DD + +Example: ``curl https://demo.dataverse.org/api/info/metrics/datasets/bySubject/toMonth/2018-01`` + .. |CORS| raw:: html diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java b/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java index 6b77f7fa32c..2daa2947441 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java @@ -207,7 +207,29 @@ public Response getDatasetsBySubject() { return allowCors(error(BAD_REQUEST, ex.getLocalizedMessage())); } } + + @GET + @Path("datasets/bySubject/toMonth/{yyyymm}") + public Response getDatasetsBySubjectToMonth(@PathParam("yyyymm") String yyyymm) { + String metricName = "datasetsBySubjectToMonth"; + + try { + String sanitizedyyyymm = MetricsUtil.sanitizeYearMonthUserInput(yyyymm); + + String jsonArrayString = metricsSvc.returnUnexpiredCacheMonthly(metricName, sanitizedyyyymm); + + if (null == jsonArrayString) { //run query and save + JsonArrayBuilder jsonArrayBuilder = MetricsUtil.datasetsBySubjectToJson(metricsSvc.datasetsBySubjectToMonth(sanitizedyyyymm)); + jsonArrayString = jsonArrayBuilder.build().toString(); + metricsSvc.save(new Metric(metricName, sanitizedyyyymm, jsonArrayString), false); + } + return allowCors(ok(MetricsUtil.stringToJsonArrayBuilder(jsonArrayString))); + } catch (Exception ex) { + return allowCors(error(BAD_REQUEST, ex.getLocalizedMessage())); + } + } + /** Files */ @GET @Path("files") diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java index 10f9f7440f2..6e00777aa5e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java @@ -119,6 +119,37 @@ public List datasetsBySubject() { return query.getResultList(); } + public List datasetsBySubjectToMonth(String yyyymm) { + Query query = em.createNativeQuery("" + + "SELECT strvalue, count(dataset.id)\n" + + "FROM datasetfield_controlledvocabularyvalue \n" + + "JOIN controlledvocabularyvalue ON controlledvocabularyvalue.id = datasetfield_controlledvocabularyvalue.controlledvocabularyvalues_id\n" + + "JOIN datasetfield ON datasetfield.id = datasetfield_controlledvocabularyvalue.datasetfield_id\n" + + "JOIN datasetfieldtype ON datasetfieldtype.id = controlledvocabularyvalue.datasetfieldtype_id\n" + + "JOIN datasetversion ON datasetversion.id = datasetfield.datasetversion_id\n" + + "JOIN dvobject ON dvobject.id = datasetversion.dataset_id\n" + + "JOIN dataset ON dataset.id = datasetversion.dataset_id\n" + + "WHERE\n" + + "datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in \n" + + "(\n" + + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n" + + "from datasetversion\n" + + "join dataset on dataset.id = datasetversion.dataset_id\n" + + "where versionstate='RELEASED'\n" + + "and dataset.harvestingclient_id is null\n" + + "and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" + + "group by dataset_id \n" + + ")\n" + + "AND datasetfieldtype.name = 'subject'\n" + + "GROUP BY strvalue\n" + + "ORDER BY count(dataset.id) desc;" + ); + logger.info("query: " + query); + + return query.getResultList(); + } + + /** * @param yyyymm Month in YYYY-MM format. */ From ad5f9032f17ce67f6f32eed48616c93ec3f5d7d5 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 10 Jan 2019 11:07:55 -0500 Subject: [PATCH 2/4] added a test for the new "datasets by Subject, by month" metrics API (#5398) --- .../harvard/iq/dataverse/api/MetricsIT.java | 19 +++++++++++++++++++ .../edu/harvard/iq/dataverse/api/UtilIT.java | 5 +++++ 2 files changed, 24 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java index f0ae408b761..c8601df3452 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java @@ -2,6 +2,7 @@ import com.jayway.restassured.RestAssured; import com.jayway.restassured.response.Response; +import edu.harvard.iq.dataverse.metrics.MetricsUtil; import static javax.ws.rs.core.Response.Status.OK; import org.junit.AfterClass; import static org.junit.Assert.assertEquals; @@ -220,4 +221,22 @@ public void testGetDatasetsBySubject() { assertEquals(precache, postcache); } + @Test + public void testGetDatasetsBySubjectToMonth() { + String thismonth = MetricsUtil.getCurrentMonth(); + Response response = UtilIT.metricsDatasetsBySubjectToMonth(thismonth); + String precache = response.prettyPrint(); + response.then().assertThat() + .statusCode(OK.getStatusCode()); + + //Run each query twice and compare results to tests caching + // See the "TODO" at the beginning of the class; + // ideally, we'll want to have more comoprehensive tests. + response = UtilIT.metricsDatasetsBySubjectToMonth(thismonth); + String postcache = response.prettyPrint(); + response.then().assertThat() + .statusCode(OK.getStatusCode()); + + assertEquals(precache, postcache); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 15dd5538da5..a00acf1d260 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -1770,6 +1770,11 @@ static Response metricsDatasetsBySubject() { return requestSpecification.get("/api/info/metrics/datasets/bySubject"); } + static Response metricsDatasetsBySubjectToMonth(String month) { + RequestSpecification requestSpecification = given(); + return requestSpecification.get("/api/info/metrics/datasets/bySubject/toMonth/" + month); + } + static Response clearMetricCache() { RequestSpecification requestSpecification = given(); return requestSpecification.delete("/api/admin/clearMetricsCache"); From 63153821902442bd4a2968e22eb8cbf6de69b343 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 10 Jan 2019 14:14:56 -0500 Subject: [PATCH 3/4] typo in a comment (#5398) --- src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java index c8601df3452..60c9a4cad53 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java @@ -231,7 +231,7 @@ public void testGetDatasetsBySubjectToMonth() { //Run each query twice and compare results to tests caching // See the "TODO" at the beginning of the class; - // ideally, we'll want to have more comoprehensive tests. + // ideally, we'll want to have more comprehensive tests. response = UtilIT.metricsDatasetsBySubjectToMonth(thismonth); String postcache = response.prettyPrint(); response.then().assertThat() From 8ecf4f39d7bd04d20465d5102c19da1d2552bfe3 Mon Sep 17 00:00:00 2001 From: matthew-a-dunlap Date: Thu, 10 Jan 2019 15:13:35 -0500 Subject: [PATCH 4/4] datasets/bySubject call month code internal #5398 --- .../edu/harvard/iq/dataverse/api/Metrics.java | 16 +--------- .../dataverse/metrics/MetricsServiceBean.java | 29 ------------------- 2 files changed, 1 insertion(+), 44 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java b/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java index 2daa2947441..e4367961932 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java @@ -191,21 +191,7 @@ public Response getDatasetsPastDays(@PathParam("days") int days) { @GET @Path("datasets/bySubject") public Response getDatasetsBySubject() { - String metricName = "datasetsBySubject"; - - try { - String jsonArrayString = metricsSvc.returnUnexpiredCacheAllTime(metricName); - - if (null == jsonArrayString) { //run query and save - JsonArrayBuilder jsonArrayBuilder = MetricsUtil.datasetsBySubjectToJson(metricsSvc.datasetsBySubject()); - jsonArrayString = jsonArrayBuilder.build().toString(); - metricsSvc.save(new Metric(metricName, jsonArrayString), false); - } - - return allowCors(ok(MetricsUtil.stringToJsonArrayBuilder(jsonArrayString))); - } catch (Exception ex) { - return allowCors(error(BAD_REQUEST, ex.getLocalizedMessage())); - } + return getDatasetsBySubjectToMonth(MetricsUtil.getCurrentMonth()); } @GET diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java index 6e00777aa5e..ae0a0160ae8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java @@ -89,35 +89,6 @@ public List dataversesBySubject() { } /** Datasets */ - - public List datasetsBySubject() { - Query query = em.createNativeQuery("" - + "SELECT strvalue, count(dataset.id)\n" - + "FROM datasetfield_controlledvocabularyvalue \n" - + "JOIN controlledvocabularyvalue ON controlledvocabularyvalue.id = datasetfield_controlledvocabularyvalue.controlledvocabularyvalues_id\n" - + "JOIN datasetfield ON datasetfield.id = datasetfield_controlledvocabularyvalue.datasetfield_id\n" - + "JOIN datasetfieldtype ON datasetfieldtype.id = controlledvocabularyvalue.datasetfieldtype_id\n" - + "JOIN datasetversion ON datasetversion.id = datasetfield.datasetversion_id\n" - + "JOIN dvobject ON dvobject.id = datasetversion.dataset_id\n" - + "JOIN dataset ON dataset.id = datasetversion.dataset_id\n" - + "WHERE\n" - + "datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in \n" - + "(\n" - + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n" - + "from datasetversion\n" - + "join dataset on dataset.id = datasetversion.dataset_id\n" - + "where versionstate='RELEASED'\n" - + "and dataset.harvestingclient_id is null\n" - + "group by dataset_id \n" - + ")\n" - + "AND datasetfieldtype.name = 'subject'\n" - + "GROUP BY strvalue\n" - + "ORDER BY count(dataset.id) desc;" - ); - logger.info("query: " + query); - - return query.getResultList(); - } public List datasetsBySubjectToMonth(String yyyymm) { Query query = em.createNativeQuery(""