From 859ee1de3ccdd97f89c8d9d3236211a7973ff63c Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 30 Jun 2021 15:35:46 +0200 Subject: [PATCH 1/5] fix(jpa): introduce real defaults for DataSourceDefinition via MPCONFIG. #7980 Previously, with Dataverse software 5.3, the option to configure the database connection has been moved into the codebase. Admins can set details via MicroProfile Config. With updating to Payara 5.2021.4, we can provide default values for the connection details. Before, this had been tried with adding them to META-INF/microprofile-config.properties. However, this is not possible due to the timing of resource creation in the application server vs. reading the properties file. IQSS/dataverse#7980 --- .../edu/harvard/iq/dataverse/util/DataSourceProducer.java | 7 +++++-- src/main/resources/META-INF/microprofile-config.properties | 5 +---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java index b393ca4a605..131bd957036 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java @@ -16,9 +16,12 @@ // HINT: PGSimpleDataSource would work too, but as we use a connection pool, go with a javax.sql.ConnectionPoolDataSource // HINT: PGXADataSource is unnecessary (no distributed transactions used) and breaks ingest. className = "org.postgresql.ds.PGConnectionPoolDataSource", - user = "${MPCONFIG=dataverse.db.user}", + + // BEWARE: as this resource is created before defaults are read from META-INF/microprofile-config.properties, + // defaults must be provided in this Payara-proprietary manner. + user = "${MPCONFIG=dataverse.db.user:dataverse}", password = "${MPCONFIG=dataverse.db.password}", - url = "jdbc:postgresql://${MPCONFIG=dataverse.db.host}:${MPCONFIG=dataverse.db.port}/${MPCONFIG=dataverse.db.name}", + url = "jdbc:postgresql://${MPCONFIG=dataverse.db.host:localhost}:${MPCONFIG=dataverse.db.port:5432}/${MPCONFIG=dataverse.db.name:dataverse}", // If we ever need to change these pool settings, we need to remove this class and create the resource // from web.xml. We can use MicroProfile Config in there for these values, impossible to do in the annotation. // diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index 8c475f1c050..9e5d126d305 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -1,4 +1 @@ -dataverse.db.host=localhost -dataverse.db.port=5432 -dataverse.db.user=dataverse -dataverse.db.name=dataverse +# Entries use key=value From 9d3503a5fa9d265a5954ef34c74a505a9aa15001 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 30 Jun 2021 16:49:19 +0200 Subject: [PATCH 2/5] feat(jpa): add advanced pool config options to DataSourceDefinition #7980 --- .../iq/dataverse/util/DataSourceProducer.java | 43 +++++++++++++------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java index 131bd957036..e8e559fc4fe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java @@ -33,18 +33,37 @@ maxPoolSize = 100, // "The number of seconds that a physical connection should remain unused in the pool before the connection is closed for a connection pool. " // Payara DataSourceDefinitionDeployer default value = 300 (seconds) - maxIdleTime = 300) -// It's possible to add additional properties like this... -// -//properties = { -// "fish.payara.log-jdbc-calls=true" -//}) -// -// ... but at this time we don't think we need any. The full list -// of properties can be found at https://docs.payara.fish/community/docs/5.2021.4/documentation/payara-server/jdbc/advanced-connection-pool-properties.html#full-list-of-properties -// -// All these properties cannot be configured via MPCONFIG as Payara doesn't support this (yet). To be enhanced. -// See also https://github.com/payara/Payara/issues/5024 + maxIdleTime = 300, + + // Set more options via MPCONFIG, including defaults where applicable. + // TODO: Future versions of Payara (the one following 5.2021.4) will support setting + // integer properties like pool size, idle times, etc in a Payara-propietary way. + // See https://github.com/payara/Payara/pull/5272 + properties = { + // The following options are documented here: + // https://docs.payara.fish/community/docs/documentation/payara-server/jdbc/advanced-connection-pool-properties.html + "fish.payara.is-connection-validation-required=${MPCONFIG=dataverse.db.is-connection-validation-required:false}", + "fish.payara.connection-validation-method=${MPCONFIG=dataverse.db.connection-validation-method}", + "fish.payara.validation-table-name=${MPCONFIG=dataverse.db.validation-table-name}", + "fish.payara.validation-classname=${MPCONFIG=dataverse.db.validation-classname}", + "fish.payara.validate-atmost-once-period-in-seconds=${MPCONFIG=dataverse.db.validate-atmost-once-period-in-seconds:0}", + "fish.payara.connection-leak-timeout-in-seconds=${MPCONFIG=dataverse.db.connection-leak-timeout-in-seconds:0}", + "fish.payara.connection-leak-reclaim=${MPCONFIG=dataverse.db.connection-leak-reclaim:false}", + "fish.payara.connection-creation-retry-attempts=${MPCONFIG=dataverse.db.connection-creation-retry-attempts:0}", + "fish.payara.connection-creation-retry-interval-in-seconds=${MPCONFIG=dataverse.db.connection-creation-retry-interval-in-seconds:10}", + "fish.payara.statement-timeout-in-seconds=${MPCONFIG=dataverse.db.statement-timeout-in-seconds:-1}", + "fish.payara.lazy-connection-enlistment=${MPCONFIG=dataverse.db.lazy-connection-enlistment:false}", + "fish.payara.lazy-connection-association=${MPCONFIG=dataverse.db.lazy-connection-association:false}", + "fish.payara.pooling=${MPCONFIG=dataverse.db.pooling:true}", + "fish.payara.statement-cache-size=${MPCONFIG=dataverse.db.statement-cache-size:0}", + "fish.payara.match-connections=${MPCONFIG=dataverse.db.match-connections:true}", + "fish.payara.max-connection-usage-count=${MPCONFIG=dataverse.db.max-connection-usage-count:0}", + "fish.payara.statement-leak-timeout-in-seconds=${MPCONFIG=dataverse.db.statement-leak-timeout-in-seconds:0}", + "fish.payara.statement-leak-reclaim=${MPCONFIG=dataverse.db.statement-leak-reclaim:false}", + "fish.payara.statement-cache-type=${MPCONFIG=dataverse.db.statement-cache-type}", + "fish.payara.slow-query-threshold-in-seconds=${MPCONFIG=dataverse.db.slow-query-threshold-in-seconds:-1}", + "fish.payara.log-jdbc-calls=${MPCONFIG=dataverse.db.log-jdbc-calls:false}" + }) public class DataSourceProducer { @Resource(lookup = "java:app/jdbc/dataverse") From 5da1d76e7b58cb79e8a237019e9cd54d4315965b Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 30 Jun 2021 19:57:25 +0200 Subject: [PATCH 3/5] docs(jpa): add extensive docs about database connection configuration. #7980 --- .../source/installation/config.rst | 194 +++++++++++++----- 1 file changed, 143 insertions(+), 51 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 2a7d200e52c..ecbf3f5adfc 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -239,6 +239,149 @@ As for the "Remote only" authentication mode, it means that: - ``:DefaultAuthProvider`` has been set to use the desired authentication provider - The "builtin" authentication provider has been disabled (:ref:`api-toggle-auth-provider`). Note that disabling the "builtin" authentication provider means that the API endpoint for converting an account from a remote auth provider will not work. Converting directly from one remote authentication provider to another (i.e. from GitHub to Google) is not supported. Conversion from remote is always to "builtin". Then the user initiates a conversion from "builtin" to remote. Note that longer term, the plan is to permit multiple login options to the same Dataverse installation account per https://github.com/IQSS/dataverse/issues/3487 (so all this talk of conversion will be moot) but for now users can only use a single login option, as explained in the :doc:`/user/account` section of the User Guide. In short, "remote only" might work for you if you only plan to use a single remote authentication provider such that no conversion between remote authentication providers will be necessary. + + + +Database Persistence +-------------------- + +The Dataverse software uses a PostgreSQL server and a Solr Search Index to store objects users create. +You can configure basic and advanced settings of the PostgreSQL database connection with the help of +MicroProfile Config API. + +Basic Database Settings ++++++++++++++++++++++++ + +1. Any of these settings can be set via system properties (see :ref:`jvm-options`), environment variables or other + MicroProfile Config mechanisms supported by the appserver. + `See Payara docs for supported sources `_. +2. Remember to protect your secrets. For passwords, use an environment variable (bare minimum), a password alias named the same + as the key (OK) or use the "dir config source" of Payara (best). + + Alias creation example: + + .. code-block:: shell + + echo "AS_ADMIN_ALIASPASSWORD=changeme" > /tmp/p.txt + asadmin create-password-alias --passwordfile /tmp/p.txt dataverse.db.password + rm /tmp/p.txt + +3. Environment variables follow the key, replacing any dot, colon, dash, etc into an underscore "_" and all uppercase + letters. Example: ``dataverse.db.host`` -> ``DATAVERSE_DB_HOST`` + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default + * - dataverse.db.host + - The PostgreSQL server to connect to. + - ``localhost`` + * - dataverse.db.port + - The PostgreSQL server port to connect to. + - ``5432`` + * - dataverse.db.user + - The PostgreSQL user name to connect with. + - | ``dataverse`` + | (installer sets to ``dvnapp``) + * - dataverse.db.password + - The PostgreSQL users password to connect with. + + **Please note the safety advisory above.** + - *No default* + * - dataverse.db.name + - The PostgreSQL database name to use for the Dataverse installation. + - | ``dataverse`` + | (installer sets to ``dvndb``) + +Advanced Database Settings +++++++++++++++++++++++++++ + +The following options are useful in many scenarios. You might be interested in debug output during development or +monitoring performance in production. + +You can find more details within the +`Payara docs on Advanced Connection Pool Configuration `_. + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default + * - dataverse.db.is-connection-validation-required + - ``true``: Validate connections, allow server to reconnect in case of failure + - false + * - dataverse.db.connection-validation-method + - | The method of connection validation: + | ``table|autocommit|meta-data|custom-validation`` + - *No default* + * - dataverse.db.validation-table-name + - The name of the table used for validation if the validation method is set to ``table`` + - *No default* + * - dataverse.db.validation-classname + - The name of the custom class used for validation if the ``validation-method`` is set to ``custom-validation`` + - *No default* + * - dataverse.db.validate-atmost-once-period-in-seconds + - Specifies the time interval in seconds between successive requests to validate a connection at most once. + - ``0`` (disabled) + * - dataverse.db.connection-leak-timeout-in-seconds + - Specify timeout when connections count as "leaked". + - ``0`` (disabled) + * - dataverse.db.connection-leak-reclaim + - If enabled, leaked connection will be reclaimed by the pool after connection leak timeout occurs. + - ``false`` + * - dataverse.db.connection-creation-retry-attempts + - Number of attempts to create a new connection. + - ``0`` (no retries) + * - dataverse.db.connection-creation-retry-interval-in-seconds + - Time interval between retries while attempting to create a connection. Effective when "Creation Retry Attempts" is ``> 0``. + - ``10`` + * - dataverse.db.statement-timeout-in-seconds + - Timeout property of a connection to enable termination of abnormally long running queries. + - ``-1`` (disabled) + * - dataverse.db.lazy-connection-enlistment + - Enlist a resource to the transaction only when it is actually used in a method + - ``false`` + * - dataverse.db.lazy-connection-association + - Connections are lazily associated when an operation is performed on them + - ``false`` + * - dataverse.db.pooling + - When set to false, disables connection pooling for the pool + - ``true`` (enabled) + * - dataverse.db.statement-cache-size + - Caching is enabled when set to a positive non-zero value (for example, 10) + - ``0`` + * - dataverse.db.match-connections + - Turns connection matching for the pool on or off + - ``true`` + * - dataverse.db.max-connection-usage-count + - Connections will be reused by the pool for the specified number of times, after which they will be closed. + - ``0`` (disabled) + * - dataverse.db.statement-leak-timeout-in-seconds + - Specifiy timeout when statements should be considered to be "leaked" + - ``0`` (disabled) + * - dataverse.db.statement-leak-reclaim + - If enabled, leaked statement will be reclaimed by the pool after statement leak timeout occurs + - ``false`` + * - dataverse.db.statement-cache-type + - + - + * - dataverse.db.slow-query-threshold-in-seconds + - SQL queries that exceed this time in seconds will be logged. + - ``-1`` (disabled) + * - dataverse.db.log-jdbc-calls + - When set to true, all JDBC calls will be logged allowing tracing of all JDBC interactions including SQL + - ``false`` + + + + File Storage: Using a Local Filesystem and/or Swift and/or S3 object stores --------------------------------------------------------------------------- @@ -1037,57 +1180,6 @@ dataverse.auth.password-reset-timeout-in-minutes Users have 60 minutes to change their passwords by default. You can adjust this value here. -dataverse.db.name -+++++++++++++++++ - -The PostgreSQL database name to use for the Dataverse installation. - -Defaults to ``dataverse`` (but the installer sets it to ``dvndb``). - -Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_NAME``. - -dataverse.db.user -+++++++++++++++++ - -The PostgreSQL user name to connect with. - -Defaults to ``dataverse`` (but the installer sets it to ``dvnapp``). - -Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_USER``. - -dataverse.db.password -+++++++++++++++++++++ - -The PostgreSQL users password to connect with. - -Preferrably use a JVM alias, as passwords in environment variables aren't safe. - -.. code-block:: shell - - echo "AS_ADMIN_ALIASPASSWORD=change-me-super-secret" > /tmp/password.txt - asadmin create-password-alias --passwordfile /tmp/password.txt dataverse.db.password - rm /tmp/password.txt - -Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_PASSWORD``. - -dataverse.db.host -+++++++++++++++++ - -The PostgreSQL server to connect to. - -Defaults to ``localhost``. - -Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_HOST``. - -dataverse.db.port -+++++++++++++++++ - -The PostgreSQL server port to connect to. - -Defaults to ``5432``, the default PostgreSQL port. - -Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_PORT``. - dataverse.rserve.host +++++++++++++++++++++ From 02342dcf825a3c8be3b808b5cc84b38dd6c96481 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 30 Jun 2021 20:41:33 +0200 Subject: [PATCH 4/5] feat(jpa): remove exotic database connection options. #7980 As requested by @pdurbin, the long list was quite overwhelming. It's now damped down to 12 options in 3 subsubsections of the docs. --- .../source/installation/config.rst | 68 +++++++++---------- .../iq/dataverse/util/DataSourceProducer.java | 14 ++-- 2 files changed, 38 insertions(+), 44 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index ecbf3f5adfc..f783e58c88a 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -303,8 +303,13 @@ Advanced Database Settings The following options are useful in many scenarios. You might be interested in debug output during development or monitoring performance in production. -You can find more details within the -`Payara docs on Advanced Connection Pool Configuration `_. +You can find more details within the Payara docs: + +- `User Guide: Connection Pool Configuration `_ +- `Tech Doc: Advanced Connection Pool Configuration `_. + +Connection Validation +^^^^^^^^^^^^^^^^^^^^^ .. list-table:: :widths: 15 60 25 @@ -330,48 +335,45 @@ You can find more details within the * - dataverse.db.validate-atmost-once-period-in-seconds - Specifies the time interval in seconds between successive requests to validate a connection at most once. - ``0`` (disabled) + +Connection & Statement Leaks +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default * - dataverse.db.connection-leak-timeout-in-seconds - Specify timeout when connections count as "leaked". - ``0`` (disabled) * - dataverse.db.connection-leak-reclaim - If enabled, leaked connection will be reclaimed by the pool after connection leak timeout occurs. - ``false`` - * - dataverse.db.connection-creation-retry-attempts - - Number of attempts to create a new connection. - - ``0`` (no retries) - * - dataverse.db.connection-creation-retry-interval-in-seconds - - Time interval between retries while attempting to create a connection. Effective when "Creation Retry Attempts" is ``> 0``. - - ``10`` - * - dataverse.db.statement-timeout-in-seconds - - Timeout property of a connection to enable termination of abnormally long running queries. - - ``-1`` (disabled) - * - dataverse.db.lazy-connection-enlistment - - Enlist a resource to the transaction only when it is actually used in a method - - ``false`` - * - dataverse.db.lazy-connection-association - - Connections are lazily associated when an operation is performed on them - - ``false`` - * - dataverse.db.pooling - - When set to false, disables connection pooling for the pool - - ``true`` (enabled) - * - dataverse.db.statement-cache-size - - Caching is enabled when set to a positive non-zero value (for example, 10) - - ``0`` - * - dataverse.db.match-connections - - Turns connection matching for the pool on or off - - ``true`` - * - dataverse.db.max-connection-usage-count - - Connections will be reused by the pool for the specified number of times, after which they will be closed. - - ``0`` (disabled) * - dataverse.db.statement-leak-timeout-in-seconds - Specifiy timeout when statements should be considered to be "leaked" - ``0`` (disabled) * - dataverse.db.statement-leak-reclaim - If enabled, leaked statement will be reclaimed by the pool after statement leak timeout occurs - ``false`` - * - dataverse.db.statement-cache-type - - - - + +Logging & Slow Performance +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default + * - dataverse.db.statement-timeout-in-seconds + - Timeout property of a connection to enable termination of abnormally long running queries. + - ``-1`` (disabled) * - dataverse.db.slow-query-threshold-in-seconds - SQL queries that exceed this time in seconds will be logged. - ``-1`` (disabled) @@ -380,8 +382,6 @@ You can find more details within the - ``false`` - - File Storage: Using a Local Filesystem and/or Swift and/or S3 object stores --------------------------------------------------------------------------- diff --git a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java index e8e559fc4fe..850dfc063fb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java @@ -42,25 +42,19 @@ properties = { // The following options are documented here: // https://docs.payara.fish/community/docs/documentation/payara-server/jdbc/advanced-connection-pool-properties.html + // VALIDATION "fish.payara.is-connection-validation-required=${MPCONFIG=dataverse.db.is-connection-validation-required:false}", "fish.payara.connection-validation-method=${MPCONFIG=dataverse.db.connection-validation-method}", "fish.payara.validation-table-name=${MPCONFIG=dataverse.db.validation-table-name}", "fish.payara.validation-classname=${MPCONFIG=dataverse.db.validation-classname}", "fish.payara.validate-atmost-once-period-in-seconds=${MPCONFIG=dataverse.db.validate-atmost-once-period-in-seconds:0}", + // LEAK DETECTION "fish.payara.connection-leak-timeout-in-seconds=${MPCONFIG=dataverse.db.connection-leak-timeout-in-seconds:0}", "fish.payara.connection-leak-reclaim=${MPCONFIG=dataverse.db.connection-leak-reclaim:false}", - "fish.payara.connection-creation-retry-attempts=${MPCONFIG=dataverse.db.connection-creation-retry-attempts:0}", - "fish.payara.connection-creation-retry-interval-in-seconds=${MPCONFIG=dataverse.db.connection-creation-retry-interval-in-seconds:10}", - "fish.payara.statement-timeout-in-seconds=${MPCONFIG=dataverse.db.statement-timeout-in-seconds:-1}", - "fish.payara.lazy-connection-enlistment=${MPCONFIG=dataverse.db.lazy-connection-enlistment:false}", - "fish.payara.lazy-connection-association=${MPCONFIG=dataverse.db.lazy-connection-association:false}", - "fish.payara.pooling=${MPCONFIG=dataverse.db.pooling:true}", - "fish.payara.statement-cache-size=${MPCONFIG=dataverse.db.statement-cache-size:0}", - "fish.payara.match-connections=${MPCONFIG=dataverse.db.match-connections:true}", - "fish.payara.max-connection-usage-count=${MPCONFIG=dataverse.db.max-connection-usage-count:0}", "fish.payara.statement-leak-timeout-in-seconds=${MPCONFIG=dataverse.db.statement-leak-timeout-in-seconds:0}", "fish.payara.statement-leak-reclaim=${MPCONFIG=dataverse.db.statement-leak-reclaim:false}", - "fish.payara.statement-cache-type=${MPCONFIG=dataverse.db.statement-cache-type}", + // LOGGING, SLOWNESS, PERFORMANCE + "fish.payara.statement-timeout-in-seconds=${MPCONFIG=dataverse.db.statement-timeout-in-seconds:-1}", "fish.payara.slow-query-threshold-in-seconds=${MPCONFIG=dataverse.db.slow-query-threshold-in-seconds:-1}", "fish.payara.log-jdbc-calls=${MPCONFIG=dataverse.db.log-jdbc-calls:false}" }) From 73093be9405592081249ba086a90fbb5b7cfd978 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 30 Jun 2021 20:54:10 +0200 Subject: [PATCH 5/5] docs(jpa): add release note for 5.6 about #7980 --- doc/release-notes/7980-enhanced-dsd.md | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 doc/release-notes/7980-enhanced-dsd.md diff --git a/doc/release-notes/7980-enhanced-dsd.md b/doc/release-notes/7980-enhanced-dsd.md new file mode 100644 index 00000000000..2cade482064 --- /dev/null +++ b/doc/release-notes/7980-enhanced-dsd.md @@ -0,0 +1,10 @@ +### Default Values for Database Connections fixed + +Introduced in Dataverse release 5.3 a regression might have hit you: +the announced default values for the database connection never actually worked. + +With the update to Payara 5.2021.4 it was possible to introduce working +defaults. The documentation has been changed accordingly. + +Together with this change, you can now enable advanced connection pool +configurations useful for debugging and monitoring. See the docs for details. \ No newline at end of file