diff --git a/OPENCandel.md b/OPENCandel.md new file mode 100644 index 0000000..123eb62 --- /dev/null +++ b/OPENCandel.md @@ -0,0 +1,31 @@ +This branch is hacked to work on Datomic Cloud. + +Eventually, it should be merged into the main branch. + +# Configuration + +Built into the code for now, see https://github.com/Candelbio/pret/blob/cloudy/src/org/candelbio/pret/db.clj#L40 + +# Credentials + +Uses AWS credentials, which must have appropriate permissions. Uses the default profile, but you can change that by editing the configuration. + +# Usage + + is a local working directory + is a standard Pret config file + is a database name + +``` +./pret-dev request-db --import-config --database +./pret-dev prepare --import-config --working-directory +./pret-dev transact --import-config --working-directory --database +``` + + +# Administration + +The `list-dbs` and `delete-db` command should work. + +There are some other curation functions in db.clj, not yet hooked to cli. db/print-db-stats is particularly useful. + diff --git a/deps.edn b/deps.edn index 5158240..4779de4 100644 --- a/deps.edn +++ b/deps.edn @@ -2,8 +2,10 @@ com.cognitect/anomalies {:mvn/version "0.1.12"} com.cognitect.aws/endpoints {:mvn/version "1.1.11.969"} com.cognitect.aws/sns {:mvn/version "697.2.391.0"} - com.datomic/datomic-pro {:mvn/version "1.0.6316" - :exclusions [org.slf4j/jul-to-slf4j org.slf4j/slf4j-nop]} + #_ com.datomic/datomic-pro #_ {:mvn/version "1.0.6316" + :exclusions [org.slf4j/jul-to-slf4j org.slf4j/slf4j-nop]} + com.datomic/client-cloud {:mvn/version "1.0.122"} + com.cognitect.aws/api {:mvn/version "0.8.505"} com.cognitect.aws/s3 {:mvn/version "811.2.858.0"} org.clojure/data.csv {:mvn/version "1.0.0"} diff --git a/src/org/candelbio/pret/cli.clj b/src/org/candelbio/pret/cli.clj index aa61fd5..24b72bd 100644 --- a/src/org/candelbio/pret/cli.clj +++ b/src/org/candelbio/pret/cli.clj @@ -42,7 +42,7 @@ " Requires --database arg." " prepare Uses an import config file to generate all data needed to run an import." " Requires --import-config and --working-directory args." - " diff Generates all changes required to update an existing dataset to match the target." + #_ " diff Generates all changes required to update an existing dataset to match the target." " Requires --working-directory and --database arguments." " transact Transacts all data (as from prepare) for an import job into Datomic." " Requires --working-directory and --database arguments." @@ -95,11 +95,11 @@ (do (println "Error requesting database" database) (pprint result)) - (let [db-info (db/fetch-info database) - uri (:uri db-info)] - (db/init uri) - (println "Request successful, created database" (:db-name result)))) + (do + (db/init database) + (println "Request successful, created database" database))) :success) + #_ (catch Exception e (exit 1 (str "Error encountered creating candel database " database "\n" (when-let [err-data (ex-data e)] @@ -153,7 +153,7 @@ [{:keys [target-dir resume database datomic-uri skip-annotations update] :as ctx}] (when-not (and datomic-uri database) (exit 1 "ERROR: Transact needs a database to transact to.")) - (print-db-version datomic-uri) + #_ (print-db-version datomic-uri) (when resume (println (str "WARN: Resuming transaction job. This will skip transacting the import job entity. " "Transactions may take awhile to restart as previously successful IDs are found."))) @@ -168,6 +168,7 @@ " transactions, entire import job at " target-dir)))) +#_ (defn diff [{:keys [target-dir resume datomic-uri skip-annotations database] :as ctx}] (when-not (and datomic-uri database) @@ -227,7 +228,7 @@ { "request-db" request-db "prepare" prepare - "diff" diff + ;; "diff" diff "transact" transact "validate" validate "crosscheck-reference" crosscheck-reference @@ -280,7 +281,7 @@ [task database-name] (when-not (= task "request-db") (let [datomic-info (db/fetch-info database-name) - datomic-uri (:uri datomic-info)] + datomic-uri database-name #_ (:uri datomic-info)] (when-not datomic-uri (throw (ex-info (str "No such database: " database-name "\nEither name is wrong, or database has been deleted due to inactivity.") @@ -313,7 +314,7 @@ (defn -main [& args] - (try + (do ; try (println "pret version:" (release/version)) (let [argmap (validate-args args) {:keys [exit-message ok?]} argmap] @@ -327,6 +328,7 @@ (if (:errors task-results) (exit 1 (str "Task: " task " failed ")) (exit 0 (str "Task: " task " completed.")))))) + #_ (catch Throwable t (cli.error-handling/report-and-exit t)))) @@ -344,4 +346,4 @@ (def val-args (validate-args '("delete-db" "--database" "mt-test3"))) (def parsed-args (parse-task-args val-args)) - (delete-db (merge parsed-args (:options parsed-args)))) \ No newline at end of file + (delete-db (merge parsed-args (:options parsed-args)))) diff --git a/src/org/candelbio/pret/cli/error_handling.clj b/src/org/candelbio/pret/cli/error_handling.clj index e444ee0..c812469 100644 --- a/src/org/candelbio/pret/cli/error_handling.clj +++ b/src/org/candelbio/pret/cli/error_handling.clj @@ -4,11 +4,15 @@ (:import (java.util.concurrent ExecutionException))) +#_ (defn exit [code msg] (println msg) (shutdown-agents) (System/exit code)) +(defn exit [code msg] + (println :exiting-not code msg)) + (defn report-errors "For each key and value in ex-info, report error state." [err-map] diff --git a/src/org/candelbio/pret/db.clj b/src/org/candelbio/pret/db.clj index fe8582b..50c0423 100644 --- a/src/org/candelbio/pret/db.clj +++ b/src/org/candelbio/pret/db.clj @@ -1,5 +1,5 @@ (ns org.candelbio.pret.db - (:require [datomic.api :as d] + (:require [datomic.client.api :as d] [clojure.core.async :as a] [org.candelbio.pret.db.backend :as backend] [org.candelbio.pret.db.query :as dq] @@ -27,16 +27,24 @@ ;; +;;; Only used by diff and validation? + +#_ (defn get-connection [info] - (d/connect (:uri info))) + (backend/connect (:uri info))) (defn latest-db [info] - (d/db (get-connection info))) + (backend/db info)) + + + + + (defn exists? [datomic-uri] (try - (d/connect datomic-uri) + (backend/connect datomic-uri) true (catch RuntimeException e ;; since no ex-info on distributed Datomic, and only get RuntimeException, @@ -70,13 +78,17 @@ expected)) +(defn connect + [uri] + (backend/connect uri)) + (defn apply-schema [datomic-uri] - (let [conn (d/connect datomic-uri) + (let [conn (connect datomic-uri) schema-work schema/schema-txes] (doseq [raw-tx schema-work] ;; if a schema attr is not indexed, we add index true. this allows us to keep ;; schema edn in resources datomic impl agnostic while optimizing on-prem queries. - (let [tx (update-in raw-tx [:tx-data] + (let [tx raw-tx #_ (update-in raw-tx [:tx-data] (fn [tx-data] (mapv (fn [schema-ent] (if (and (:db/valueType schema-ent) @@ -84,13 +96,17 @@ (assoc schema-ent :db/index true) schema-ent)) tx-data)))] - (if (tx-effect? conn tx) + (if true #_ (tx-effect? conn tx) (do (log/info ::schema (:name tx) " not in database, transacting.") (db.tx/sync+retry conn (:tx-data tx))) (log/info ::schema "Skipping schema install for: " (:name tx))))))) + + + + (defn version [datomic-uri] - (let [conn (d/connect datomic-uri) + (let [conn (connect datomic-uri) db (d/db conn)] (-> db (d/pull '[:candel.schema/version] :candel/schema) @@ -99,11 +115,10 @@ (defn init "Loads all base schema, enums, and metamodel into database if necessary." [datomic-uri & {:keys [skip-bootstrap seed-data-dir include-proprietary]}] - (let [_ (d/create-database datomic-uri) - _ (do - (log/info "Database created.")) + (let [;; _ (backend/create-database datomic-uri) ;?? shoudln't this be done earlier + ;; _ (do (log/info "Database created.")) ;; db isn't ready yet if it hasn't been created, this timeout seems sufficient - conn (d/connect datomic-uri) + conn (connect datomic-uri) _ (log/info "Connected to database")] (apply-schema datomic-uri) (when-not skip-bootstrap @@ -178,6 +193,12 @@ :where [?e :import/txn-id ?id]] db txn-id)))) +(defn touch + [db id] + #_ (d/touch (d/entity db id)) + nil) ;TODO + + (defn head "Returns metadata about the last transaction: @@ -191,10 +212,10 @@ :where [?tx :db/txInstant]] db)) - txn-data (d/touch (d/entity db (first txn))) + txn-data (touch (first txn)) import-name (if (contains? txn-data :import/import) - (-> (d/touch (d/entity db (-> (:import/import txn-data) - :db/id))) + (-> (touch db (-> (:import/import txn-data) + :db/id)) :import/name) (throw (ex-info "No datasets transacted" {:error :no-imports-on-database})))] @@ -215,7 +236,7 @@ db) (map (fn [[name tx-id]] - (let [tx (d/touch (d/entity db tx-id))] + (let [tx (touch db tx-id)] {:timestamp (:db/txInstant tx) :import-name name :ent-id tx-id}))) diff --git a/src/org/candelbio/pret/db/backend.clj b/src/org/candelbio/pret/db/backend.clj index 9f78a12..f0f2688 100644 --- a/src/org/candelbio/pret/db/backend.clj +++ b/src/org/candelbio/pret/db/backend.clj @@ -1,27 +1,57 @@ (ns org.candelbio.pret.db.backend - (:require [datomic.api :as d] + (:require [datomic.client.api :as d] [org.candelbio.pret.db.config :as db.config])) +;;; MT note: I don't understand why this was carved out of db + +;;; Datomic Cloud config +;;; TODO shouldn't be in code +(def cfg {:server-type :ion + :region "us-east-1" + :system "PublicCANDEL5" + ;; This is from the Datomic Cloud ClientApiGatewayEnpoint output + :endpoint "https://nazpex6ueb.execute-api.us-east-1.amazonaws.com" + ;; :creds-profile "" + }) + +(def client (d/client cfg)) + +(defn connect + [db-name] + #_ (d/connect datomic-uri) + (d/connect client {:db-name db-name}) + ) + +(defn db + [name] + (d/db (connect name))) + + +#_ (defn ddb-base-uri [] (str "datomic:ddb://" (db.config/aws-region) "/" (db.config/ddb-table) "/")) +#_ (defn db-base-uri [] (or (db.config/base-uri) (ddb-base-uri))) (defn request-db [database] - (let [uri (str (db-base-uri) database) - result (d/create-database uri)] + (let [;; uri (str (db-base-uri) database) + result #_ (d/create-database uri) + (d/create-database client {:db-name database}) + ] (if result {:db-name database :database database - :uri uri} + :uri database} ;Not really {:error "Database already exists!"}))) +#_ (defn delete-db [database] (let [uri (str (db-base-uri) database) @@ -33,6 +63,11 @@ :uri uri} {:error "Database not deleted!"}))) +(defn delete-db + [db] + (d/delete-database client {:db-name db})) + +#_ (defn database-info "Retrieves the branch database's datomic uri. Returns the uri, {:error ...} or throws an exception if the user doesn't have @@ -40,11 +75,43 @@ [database] {:uri (str (db-base-uri) database)}) +(defn db-stats + [db] + (d/db-stats (d/db (d/connect client {:db-name db})))) + +;;; This is not terribly useful, but cli could pprint it. +(defn database-info + [db] + (db-stats db)) + +;;; Curation functions. + +(defn all-dbs + [] + (d/list-databases client {})) + (defn list-dbs [] - (try - (d/get-database-names (str (db-base-uri) "*")) - (catch Exception e - {:error (.getMessage e)}))) + (all-dbs)) + +(defn all-db-stats + [] + (let [dbs (all-dbs)] + (zipmap dbs (map db-stats dbs)))) + +(defn print-db-stats + [] + (doseq [db (all-dbs)] + (prn :db db) + (clojure.pprint/pprint (db-stats db)))) + + +(defn delete-all-dbs + [] + (doseq [db (all-dbs)] + (delete-db db))) +(defn create-database + [name] + (d/create-database client {:db-name name})) diff --git a/src/org/candelbio/pret/db/config.clj b/src/org/candelbio/pret/db/config.clj index addadd9..2a7051e 100644 --- a/src/org/candelbio/pret/db/config.clj +++ b/src/org/candelbio/pret/db/config.clj @@ -25,7 +25,7 @@ (wrap-config "CANDEL_REFERENCE_DATA_BUCKET" "candel.referenceDataBucket" - "pret-processed-reference-data-prod")) + "pici-pret-processed-reference-data-prod-right")) ;mt: this is where it actually is (def matrix-bucket (wrap-config diff --git a/src/org/candelbio/pret/db/gc_storage_call.clj b/src/org/candelbio/pret/db/gc_storage_call.clj index a229503..3331375 100644 --- a/src/org/candelbio/pret/db/gc_storage_call.clj +++ b/src/org/candelbio/pret/db/gc_storage_call.clj @@ -2,6 +2,9 @@ (:require [datomic.api :as d] [org.candelbio.pret.db.config :as config])) +;;; Note: won't work in Datomic Cloud environment + + (def db-list (d/get-database-names (str (config/base-uri) '*))) diff --git a/src/org/candelbio/pret/db/import_coordination.clj b/src/org/candelbio/pret/db/import_coordination.clj index 882dc69..b90fc1f 100644 --- a/src/org/candelbio/pret/db/import_coordination.clj +++ b/src/org/candelbio/pret/db/import_coordination.clj @@ -1,5 +1,5 @@ (ns org.candelbio.pret.db.import-coordination - (:require [datomic.api :as d] + (:require [datomic.client.api :as d] [org.candelbio.pret.db.query :as dq])) (defn import-entity-txn-eid diff --git a/src/org/candelbio/pret/db/query.clj b/src/org/candelbio/pret/db/query.clj index 1f95217..1cfd3dd 100644 --- a/src/org/candelbio/pret/db/query.clj +++ b/src/org/candelbio/pret/db/query.clj @@ -1,5 +1,5 @@ (ns org.candelbio.pret.db.query - (:require [datomic.api :as d])) + (:require [datomic.client.api :as d])) (defn q+retry "Invoke Datomic query wrapped in retry with simple linear retry logic. @@ -10,4 +10,4 @@ for us, but if this is ever refactored to use query in client, we want to constrain it so that all callers use retry, etc. set here." [& args] - (apply d/q args)) \ No newline at end of file + (apply d/q args)) diff --git a/src/org/candelbio/pret/db/transact.clj b/src/org/candelbio/pret/db/transact.clj index 685d64e..b6b5b87 100644 --- a/src/org/candelbio/pret/db/transact.clj +++ b/src/org/candelbio/pret/db/transact.clj @@ -1,5 +1,5 @@ (ns org.candelbio.pret.db.transact - (:require [datomic.api :as d] + (:require [datomic.client.api :as d] [clojure.pprint :refer [pprint]] [org.candelbio.pret.db.query :as db.query] [clojure.edn :as edn] @@ -17,7 +17,7 @@ "print errors when/if they happen - this can be improved with additional chrome" [f] (let [res (try - @(f) + (f) (catch Exception e (let [data (ex-data e)] ;; Some exceptions contain an anomaly via info @@ -30,6 +30,13 @@ ::anom/ex-data (ex-data e)}))))] res)) +;;; Or, not! (for debuggability) +#_ +(defn report-retryable + "print errors when/if they happen - this can be improved with additional chrome" + [f] + (f)) + ;; retry functions all here for convenience (defn exp-retry-fn [retry-n backoff] @@ -59,7 +66,7 @@ (log/info "retry> retrying result: " result) (if (<= n max-retries) (do - (Thread/sleep (next-try-fn n backoff)) + (Thread/sleep (int (next-try-fn n backoff))) (log/info "retry> attempt " n) (if-let [skip-result (skip-fn)] skip-result @@ -71,7 +78,9 @@ (defn- raw-tx-fn [conn data] - (d/transact-async conn data)) + #_ (d/transact-async conn data) + (d/transact conn {:tx-data data}) + ) (defn- tx-present? "Returns `nil` (falsy) if tx not present in db, otherwise map with info @@ -121,7 +130,8 @@ 3600 3000 ;;linear-retry-fn - scaled-retry-fn)] + #_ scaled-retry-fn + exp-retry-fn)] (if-not (::anom/category res) res @@ -294,7 +304,8 @@ for tempo-msec, up to max-retries." ([conn tx tempo-msec max-retries] (loop [retry 1] - (let [tx-result @(d/transact conn tx)] + ;; Where the action is! + (let [tx-result (d/transact conn {:tx-data tx})] (cond ;; if transaction succeeds, return result as normal (:db-after tx-result) diff --git a/src/org/candelbio/pret/import.clj b/src/org/candelbio/pret/import.clj index 5b181e9..e2b029e 100644 --- a/src/org/candelbio/pret/import.clj +++ b/src/org/candelbio/pret/import.clj @@ -5,7 +5,7 @@ [clojure.tools.logging :as log] [org.candelbio.pret.db :as db] [org.candelbio.pret.db.schema :as db.schema] - [org.candelbio.pret.import.diff.tx-data :as diff] + #_ [org.candelbio.pret.import.diff.tx-data :as diff] [org.candelbio.pret.import.tx-data :as tx-data] [org.candelbio.pret.import.engine :as engine] [org.candelbio.pret.util.aws :as s3] @@ -65,9 +65,11 @@ ;; linked to available processors, this is ;; an io bound operation, isn't it? ;; does this value even get respected? + #_ (+ 2 (.. Runtime getRuntime availableProcessors)) + 1 {:resume resume :skip-annotations skip-annotations :disable-remote-calls disable-remote-calls @@ -82,6 +84,7 @@ {:results (apply merge-with + (concat ref-results data-results))}))) +#_ (defn perform-diff "Performs the update operation: 1. prepared data is transacted to branch with temp dataset uids diff --git a/src/org/candelbio/pret/import/diff/changes.clj b/src/org/candelbio/pret/import/diff/changes.clj index 58363f6..b30d4d0 100644 --- a/src/org/candelbio/pret/import/diff/changes.clj +++ b/src/org/candelbio/pret/import/diff/changes.clj @@ -6,7 +6,7 @@ transacted that contais the changes this namespace collects." (:require [clojure.data :as data] [clojure.pprint :refer [pprint]] - [datomic.api :as d] + [datomic.client.api :as d] [org.candelbio.pret.db.metamodel :as metamodel] [org.candelbio.pret.db :as db] [org.candelbio.pret.db.schema :as db.schema]) diff --git a/src/org/candelbio/pret/import/tx_data.clj b/src/org/candelbio/pret/import/tx_data.clj index b24a330..c4ef205 100644 --- a/src/org/candelbio/pret/import/tx_data.clj +++ b/src/org/candelbio/pret/import/tx_data.clj @@ -3,7 +3,8 @@ [clojure.pprint :refer [pprint]] [clojure.edn :as edn] [clojure.walk :as w] - [datomic.api :as d] + [datomic.client.api :as d] + [org.candelbio.pret.db :as db] [org.candelbio.pret.db.metamodel :as metamodel] [clojure.tools.logging :as log] [clojure.java.io :as io] @@ -269,7 +270,7 @@ (conventions/diff-dir target-dir) target-dir) import-job-name (conventions/import-name target-dir) - conn (d/connect datomic-uri) + conn (db/connect datomic-uri) db (d/db conn) all-dataset-fnames (conventions/dataset-tx-data-filenames transact-dir) all-ref-fnames (if update diff --git a/src/org/candelbio/pret/import/upsert_coordination.clj b/src/org/candelbio/pret/import/upsert_coordination.clj index e0bc09a..0ce5e3a 100644 --- a/src/org/candelbio/pret/import/upsert_coordination.clj +++ b/src/org/candelbio/pret/import/upsert_coordination.clj @@ -2,7 +2,7 @@ (:require [org.candelbio.pret.db.metamodel :as metamodel] [clojure.java.io :as io] [clojure.tools.logging :as log] - [datomic.api :as d] + [datomic.client.api :as d] [org.candelbio.pret.db.schema :as db.schema] [clojure.edn :as edn] [org.candelbio.pret.db.query :as dq])) diff --git a/src/org/candelbio/pret/validation/post_import/util.clj b/src/org/candelbio/pret/validation/post_import/util.clj index 7d7e630..2f8eea1 100644 --- a/src/org/candelbio/pret/validation/post_import/util.clj +++ b/src/org/candelbio/pret/validation/post_import/util.clj @@ -6,7 +6,7 @@ [clojure.spec-alpha2 :as s] [clojure.walk :as walk] [org.candelbio.pret.validation.specs :as specs] - [datomic.api :as d] + [datomic.client.api :as d] [org.candelbio.pret.db :as db] [org.candelbio.pret.validation.post-import.query :as vquery] [org.candelbio.pret.db.schema :as db.schema]))