diff --git a/.github/workflows/unittests-rust.yml b/.github/workflows/unittests-rust.yml index 2b05cb4d6..0f07005dc 100644 --- a/.github/workflows/unittests-rust.yml +++ b/.github/workflows/unittests-rust.yml @@ -7,6 +7,17 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - name: Get test data + id: cache-test-data + uses: actions/cache@v4 + with: + path: test-data + key: test-data-v1 + - name: Download test data + if: steps.cache-test-data.outputs.cache-hit != 'true' + run: | + cd crates/dekoder/tests/data + ./download.sh - uses: actions/setup-python@v5 - name: Install task runner run: pip install poethepoet diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dfd0862f5..0686c5254 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ # See https://pre-commit.com/hooks.html for more hooks ci: autofix_prs: false - skip: [fmt] # will be run by a separate CI + skip: [fmt, clippy] # will be run by a separate CI repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.6.0 @@ -59,6 +59,13 @@ repos: language: system files: ^crates/.*\.rs$ args: [] + - id: clippy + name: clippy + description: Check Rust files with cargo clippy. + entry: cargo clippy --all-targets --all-features -- -Dclippy::all + pass_filenames: false + types: [file, rust] + language: system - repo: https://github.com/pre-commit/pre-commit rev: v3.8.0 hooks: diff --git a/Cargo.lock b/Cargo.lock index 80fa02400..2ec2eca5a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + [[package]] name = "ahash" version = "0.8.3" @@ -13,24 +19,180 @@ dependencies = [ "version_check", ] +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + [[package]] name = "allocator-api2" version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" +[[package]] +name = "anstyle" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" + +[[package]] +name = "arraydeque" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d902e3d592a523def97af8f317b08ce16b7ab854c1985a0c671e6f15cebc236" + +[[package]] +name = "assert_fs" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7efdb1fdb47602827a342857666feb372712cbc64b414172bd6b167a02927674" +dependencies = [ + "anstyle", + "doc-comment", + "globwalk", + "predicates", + "predicates-core", + "predicates-tree", + "tempfile", +] + [[package]] name = "autocfg" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "bstr" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cpufeatures" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51e852e6dc9a5bed1fae92dd2375037bf2b768725bf3be87811edee3249d09ad" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "dekoder" +version = "0.0.1" +dependencies = [ + "assert_fs", + "lz4_flex", + "ndarray", + "ndarray-npy", + "predicates", + "tar", + "thiserror", + "yaml-rust2", +] + +[[package]] +name = "difflib" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + [[package]] name = "eko" version = "0.0.1" @@ -48,6 +210,53 @@ dependencies = [ "num", ] +[[package]] +name = "encoding_rs" +version = "0.8.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "fastrand" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" + +[[package]] +name = "filetime" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf401df4a4e3872c4fe8151134cf483738e74b67fc934d6532c882b3d24a4550" +dependencies = [ + "cfg-if", + "libc", + "libredox", + "windows-sys 0.59.0", +] + +[[package]] +name = "flate2" +version = "1.0.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f211bbe8e69bbd0cfdea405084f128ae8b4aaa6b0b522fc8f2b009084797920" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "float-cmp" version = "0.9.0" @@ -57,6 +266,40 @@ dependencies = [ "num-traits", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "globset" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57da3b9b5b85bd66f31093f8c408b90a74431672542466497dcbdfdc02034be1" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "globwalk" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf760ebf69878d9fd8f110c89703d90ce35095324d1f1edcb595c63945ee757" +dependencies = [ + "bitflags", + "ignore", + "walkdir", +] + [[package]] name = "hashbrown" version = "0.14.0" @@ -67,6 +310,127 @@ dependencies = [ "allocator-api2", ] +[[package]] +name = "hashlink" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7" +dependencies = [ + "hashbrown", +] + +[[package]] +name = "ignore" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b46810df39e66e925525d6e38ce1e7f6e1d208f72dc39757880fcb66e2c58af1" +dependencies = [ + "crossbeam-deque", + "globset", + "log", + "memchr", + "regex-automata", + "same-file", + "walkdir", + "winapi-util", +] + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "libredox" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" +dependencies = [ + "bitflags", + "libc", + "redox_syscall", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "lz4_flex" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a8cbbb2831780bc3b9c15a41f5b49222ef756b6730a95f3decfdd15903eb5a3" +dependencies = [ + "twox-hash", +] + +[[package]] +name = "matrixmultiply" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9380b911e3e96d10c1f415da0876389aaf1b56759054eeb0de7df940c456ba1a" +dependencies = [ + "autocfg", + "rawpointer", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "miniz_oxide" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" +dependencies = [ + "adler", +] + +[[package]] +name = "ndarray" +version = "0.15.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" +dependencies = [ + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "rawpointer", +] + +[[package]] +name = "ndarray-npy" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f85776816e34becd8bd9540818d7dc77bf28307f3b3dcc51cc82403c6931680c" +dependencies = [ + "byteorder", + "ndarray", + "num-complex", + "num-traits", + "py_literal", + "zip", +] + +[[package]] +name = "normalize-line-endings" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" + [[package]] name = "num" version = "0.4.1" @@ -149,8 +513,440 @@ version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +[[package]] +name = "pest" +version = "2.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd53dff83f26735fdc1ca837098ccf133605d794cdae66acfc2bfac3ec809d95" +dependencies = [ + "memchr", + "thiserror", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a548d2beca6773b1c244554d36fcf8548a8a58e74156968211567250e48e49a" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c93a82e8d145725dcbaf44e5ea887c8a869efdcc28706df2d08c69e17077183" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "pest_meta" +version = "2.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a941429fea7e08bedec25e4f6785b6ffaacc6b755da98df5ef3e7dcf4a124c4f" +dependencies = [ + "once_cell", + "pest", + "sha2", +] + +[[package]] +name = "predicates" +version = "3.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e9086cc7640c29a356d1a29fd134380bee9d8f79a17410aa76e7ad295f42c97" +dependencies = [ + "anstyle", + "difflib", + "float-cmp", + "normalize-line-endings", + "predicates-core", + "regex", +] + +[[package]] +name = "predicates-core" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae8177bee8e75d6846599c6b9ff679ed51e882816914eec639944d7c9aa11931" + +[[package]] +name = "predicates-tree" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41b740d195ed3166cd147c8047ec98db0e22ec019eb8eeb76d343b795304fb13" +dependencies = [ + "predicates-core", + "termtree", +] + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "py_literal" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "102df7a3d46db9d3891f178dcc826dc270a6746277a9ae6436f8d29fd490a8e1" +dependencies = [ + "num-bigint", + "num-complex", + "num-traits", + "pest", + "pest_derive", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + +[[package]] +name = "redox_syscall" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + +[[package]] +name = "rustix" +version = "0.38.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "serde" +version = "1.0.208" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cff085d2cb684faa248efb494c39b68e522822ac0de72ccf08109abde717cfb2" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.208" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24008e81ff7613ed8e5ba0cfaf24e2c2f1e5b8a0495711e44fcd4882fca62bcf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "2.0.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fceb41e3d546d0bd83421d3409b1460cc7444cd389341a4c880fe7a042cb3d7" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tar" +version = "0.4.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb797dad5fb5b76fcf519e702f4a589483b5ef06567f160c392832c1f5e44909" +dependencies = [ + "filetime", + "libc", + "xattr", +] + +[[package]] +name = "tempfile" +version = "3.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +dependencies = [ + "cfg-if", + "fastrand", + "rustix", + "windows-sys 0.52.0", +] + +[[package]] +name = "termtree" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" + +[[package]] +name = "thiserror" +version = "1.0.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "static_assertions", +] + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "ucd-trie" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + [[package]] name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "xattr" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f" +dependencies = [ + "libc", + "linux-raw-sys", + "rustix", +] + +[[package]] +name = "yaml-rust2" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8902160c4e6f2fb145dbe9d6760a75e3c9522d8bf796ed7047c85919ac7115f8" +dependencies = [ + "arraydeque", + "encoding_rs", + "hashlink", +] + +[[package]] +name = "zip" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93ab48844d61251bb3835145c521d88aa4031d7139e8485990f60ca911fa0815" +dependencies = [ + "byteorder", + "crc32fast", + "flate2", + "thiserror", +] diff --git a/Cargo.toml b/Cargo.toml index f26518890..081204a1f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,5 +18,5 @@ edition = "2021" keywords = ["physics"] license = "GPL-3.0-or-later" repository = "https://github.com/NNPDF/eko" -rust-version = "1.60.0" +rust-version = "1.70.0" version = "0.0.1" diff --git a/crates/dekoder/Cargo.toml b/crates/dekoder/Cargo.toml new file mode 100644 index 000000000..f51bf869c --- /dev/null +++ b/crates/dekoder/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "dekoder" + +authors.workspace = true +description.workspace = true +readme.workspace = true +categories.workspace = true +edition.workspace = true +keywords.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true +version.workspace = true + +[package.metadata.docs.rs] +rustdoc-args = ["--html-in-header", "doc-header.html"] + +[dependencies] +tar = "0.4.41" +yaml-rust2 = "0.8" +lz4_flex = "0.9.2" +ndarray = "0.15.4" +ndarray-npy = "0.8.1" +thiserror = "1.0.63" + +[dev-dependencies] +assert_fs = "1.1.2" +predicates = "3.1.2" diff --git a/crates/dekoder/doc-header.html b/crates/dekoder/doc-header.html new file mode 120000 index 000000000..22282661d --- /dev/null +++ b/crates/dekoder/doc-header.html @@ -0,0 +1 @@ +../doc-header.html \ No newline at end of file diff --git a/crates/dekoder/src/eko.rs b/crates/dekoder/src/eko.rs new file mode 100644 index 000000000..1e950ac8a --- /dev/null +++ b/crates/dekoder/src/eko.rs @@ -0,0 +1,136 @@ +//! Utilities for reading and writing an eko output. +use std::fs::remove_dir_all; +use std::fs::File; +use std::io::BufWriter; +use std::path::PathBuf; +use yaml_rust2::Yaml; + +use crate::{EKOError, Operator, Result}; + +/// Default rel. error for the float comparison inside `EvolutionPoint`. +const EP_CMP_RTOL: f64 = 1e-5; +/// Default abs. error for the float comparison inside `EvolutionPoint`. +const EP_CMP_ATOL: f64 = 1e-3; + +/// A reference point in the evolution atlas. +pub struct EvolutionPoint { + /// Evolution scale. + pub scale: f64, + /// Number of flavors + pub nf: i64, +} + +impl TryFrom<&Yaml> for EvolutionPoint { + type Error = EKOError; + + /// Load from yaml. + fn try_from(yml: &Yaml) -> Result { + // work around float representation + let scale = yml["scale"].as_f64(); + let scale = if scale.is_some() { + scale.ok_or(EKOError::KeyError( + "because failed to read scale as float".to_owned(), + ))? + } else { + yml["scale"].as_i64().ok_or(EKOError::KeyError( + "because failed to read scale as float from int".to_owned(), + ))? as f64 + }; + let nf = yml["nf"] + .as_i64() + .ok_or(EKOError::KeyError("because failed to read nf".to_owned()))?; + Ok(Self { scale, nf }) + } +} + +/// Reimplementation of [`np.isclose`](https://numpy.org/doc/stable/reference/generated/numpy.isclose.html#numpy-isclose). +fn is_close(a: f64, b: f64, rtol: f64, atol: f64) -> bool { + (a - b).abs() <= atol + rtol * b.abs() +} + +impl PartialEq for EvolutionPoint { + /// Comparator using default tolerance for float comparisons. + fn eq(&self, other: &Self) -> bool { + self.nf == other.nf && is_close(self.scale, other.scale, EP_CMP_RTOL, EP_CMP_ATOL) + } +} + +impl Eq for EvolutionPoint {} + +/// EKO output +pub struct EKO { + /// Working directory + path: PathBuf, + /// final operators + operators: crate::inventory::Inventory, +} + +/// Operators directory. +const DIR_OPERATORS: &str = "operators/"; +/// Buffer capacity for tar writer +const TAR_WRITER_CAPACITY: usize = 128 * 1024; + +impl EKO { + /// Check our working directory is safe. + fn assert_working_dir(&self) -> Result<()> { + self.path + .exists() + .then_some(()) + .ok_or(EKOError::NoWorkingDir) + } + + /// Remove the working directory. + pub fn destroy(&self) -> Result<()> { + self.assert_working_dir()?; + Ok(remove_dir_all(&self.path)?) + } + + /// Write the content to an archive `dst` and remove the working directory. + pub fn write_and_destroy(&self, dst: PathBuf) -> Result<()> { + self.write(dst)?; + self.destroy() + } + + /// Write the content to an archive `dst`. + pub fn write(&self, dst: PathBuf) -> Result<()> { + self.assert_working_dir()?; + // create writer + let dst_file = File::create(&dst)?; + let dst_file = BufWriter::with_capacity(TAR_WRITER_CAPACITY, dst_file); + let mut ar = tar::Builder::new(dst_file); + // do it! + Ok(ar.append_dir_all(".", &self.path)?) + } + + /// Extract tar file from `src` to `dst`. + pub fn extract(src: PathBuf, dst: PathBuf) -> Result { + let mut ar = tar::Archive::new(File::open(&src)?); + ar.unpack(&dst)?; + Self::load_opened(dst) + } + + /// Load an EKO from a directory `path` (instead of tar). + pub fn load_opened(path: PathBuf) -> Result { + let mut operators = crate::inventory::Inventory::new(path.join(DIR_OPERATORS)); + operators.load_keys()?; + let obj = Self { path, operators }; + obj.assert_working_dir()?; + Ok(obj) + } + + /// List available evolution points. + pub fn available_operators(&self) -> Vec<&EvolutionPoint> { + self.operators.keys() + } + + /// Check if the operator at the evolution point `ep` is available. + pub fn has_operator(&self, ep: &EvolutionPoint) -> bool { + self.operators.has(ep) + } + + /// Load the operator at the evolution point `ep` from disk. + pub fn load_operator(&self, ep: &EvolutionPoint) -> Result { + self.assert_working_dir()?; + self.operators.load(ep) + } +} diff --git a/crates/dekoder/src/inventory.rs b/crates/dekoder/src/inventory.rs new file mode 100644 index 000000000..b9549e3d3 --- /dev/null +++ b/crates/dekoder/src/inventory.rs @@ -0,0 +1,94 @@ +//! Utilities for dealing with the eko assets. +use lz4_flex::frame::FrameDecoder; +use ndarray_npy::NpzReader; +use std::collections::HashMap; +use std::ffi::OsString; +use std::fs::{read_dir, read_to_string, File}; +use std::io::Cursor; +use std::path::PathBuf; +use yaml_rust2::{Yaml, YamlLoader}; + +use crate::{EKOError, Operator, Result}; + +/// Headers are in yaml files. +const HEADER_EXT: &str = "yaml"; + +/// Assets manager. +pub(crate) struct Inventory TryFrom<&'a Yaml, Error = EKOError>> { + /// Working directory + path: PathBuf, + /// Available keys + keys: HashMap, +} + +impl TryFrom<&'a Yaml, Error = EKOError>> Inventory { + /// Construct new manager pointing to `path`. + pub(crate) fn new(path: PathBuf) -> Self { + Self { + path, + keys: HashMap::new(), + } + } + + /// Load all available entries. + pub fn load_keys(&mut self) -> Result<()> { + for entry in read_dir(&self.path)? { + // is header file? + let entry = entry?.path(); + if !entry.extension().is_some_and(|ext| ext == HEADER_EXT) { + continue; + } + // read + let cnt = YamlLoader::load_from_str(&read_to_string(&entry)?) + .map_err(|_| EKOError::KeyError("because failed to read yaml file.".to_owned()))?; + // add to register + self.keys.insert( + entry + .file_name() + .ok_or(EKOError::KeyError( + "because failed to read file name".to_owned(), + ))? + .to_os_string(), + K::try_from(&cnt[0])?, + ); + } + Ok(()) + } + + /// List available keys. + pub fn keys(&self) -> Vec<&K> { + self.keys.values().collect() + } + + /// Check if `k` is available (with given precision). + pub fn has(&self, k: &K) -> bool { + self.keys.iter().any(|it| it.1 == k) + } + + /// Load `k` from disk. + pub fn load(&self, k: &K) -> Result { + // Find key + let k = self + .keys + .iter() + .find(|it| it.1 == k) + .ok_or(EKOError::KeyError("because it was not found".to_owned()))?; + // TODO determine if errors are available + let p = self.path.join(k.0).with_extension("npz.lz4"); + // Read npz.lz4 + let mut reader = FrameDecoder::new(File::open(&p)?); + let mut buffer = Vec::new(); + std::io::copy(&mut reader, &mut buffer)?; + let mut npz = NpzReader::new(Cursor::new(buffer)) + .map_err(|_| EKOError::OperatorLoadError(p.to_owned()))?; + let op = Some( + npz.by_name("operator.npy") + .map_err(|_| EKOError::OperatorLoadError(p.to_owned()))?, + ); + let err = Some( + npz.by_name("error.npy") + .map_err(|_| EKOError::OperatorLoadError(p.to_owned()))?, + ); + Ok(Operator { op, err }) + } +} diff --git a/crates/dekoder/src/lib.rs b/crates/dekoder/src/lib.rs new file mode 100644 index 000000000..b0bad01af --- /dev/null +++ b/crates/dekoder/src/lib.rs @@ -0,0 +1,47 @@ +//! Reading and writing eko outputs. +use ndarray::Array4; +use std::path::PathBuf; +use thiserror::Error; + +pub mod eko; +mod inventory; + +/// The EKO errors. +#[derive(Error, Debug)] +pub enum EKOError { + /// Working directory is not usable. + #[error("No working directory")] + NoWorkingDir, + /// Underlying I/O error. + #[error("I/O error")] + IOError(#[from] std::io::Error), + /// 4D operator is not readable. + #[error("Loading operator from `{0}` failed")] + OperatorLoadError(PathBuf), + /// Lookup error in an inventory. + #[error("Failed to read key(s) `{0}`")] + KeyError(String), +} + +/// A specialized [`Result`] type for EKO manipulation. +/// +/// [`Result`]: std::result::Result +pub type Result = std::result::Result; + +/// 4D evolution operator. +pub struct Operator { + /// The actual rank 4 tensor. + pub op: Option>, + /// The associated element-by-element error. + pub err: Option>, +} + +impl Default for Operator { + /// Empty initializer. + fn default() -> Self { + Self { + op: None, + err: None, + } + } +} diff --git a/crates/dekoder/tests/data/.gitignore b/crates/dekoder/tests/data/.gitignore new file mode 100644 index 000000000..d874ad67c --- /dev/null +++ b/crates/dekoder/tests/data/.gitignore @@ -0,0 +1 @@ +*.tar diff --git a/crates/dekoder/tests/data/download.sh b/crates/dekoder/tests/data/download.sh new file mode 100755 index 000000000..afcc880ec --- /dev/null +++ b/crates/dekoder/tests/data/download.sh @@ -0,0 +1,3 @@ +# upload: scp v0.15.tar nnpdf@data.nnpdf.science:WEB/eko/test-data + +curl -s -C - -O 'https://data.nnpdf.science/eko/test-data/v0.15.tar' diff --git a/crates/dekoder/tests/test_load.rs b/crates/dekoder/tests/test_load.rs new file mode 100644 index 000000000..a6db499cd --- /dev/null +++ b/crates/dekoder/tests/test_load.rs @@ -0,0 +1,87 @@ +use assert_fs::prelude::*; +use predicates::prelude::*; +use std::path::PathBuf; + +use dekoder::eko::{EvolutionPoint, EKO}; + +// assert_fs will clean up the directories for us, +// so for the most part we don't need worry about that. + +/// Get v0.15 test object. +fn v015tar() -> PathBuf { + let base: PathBuf = [env!("CARGO_MANIFEST_DIR"), "tests"].iter().collect(); + let src = base.join("data").join("v0.15.tar"); + assert!(predicate::path::exists().eval(&src)); + src +} + +#[test] +fn open() { + let src = v015tar(); + let dst = assert_fs::TempDir::new().unwrap(); + // open + let _eko = EKO::extract(src.to_owned(), dst.to_owned()).unwrap(); + let metadata = dst.child("metadata.yaml"); + metadata.assert(predicate::path::exists()); +} + +#[test] +fn destroy() { + let src = v015tar(); + let dst = assert_fs::TempDir::new().unwrap(); + { + // extract + destroy + let eko = EKO::extract(src.to_owned(), dst.to_owned()).unwrap(); + eko.destroy().unwrap(); + } + dst.assert(predicate::path::missing()); +} + +#[test] +fn save_as_other() { + let src = v015tar(); + let dst = assert_fs::TempDir::new().unwrap(); + // open + let eko = EKO::extract(src.to_owned(), dst.to_owned()).unwrap(); + // write to somewhere else + let tarb = assert_fs::NamedTempFile::new("v0.15b.tar").unwrap(); + eko.write(tarb.to_owned()).unwrap(); + tarb.assert(predicate::path::exists()); +} + +#[test] +fn has_operator() { + let src = v015tar(); + let dst = assert_fs::TempDir::new().unwrap(); + // open + let eko = EKO::extract(src.to_owned(), dst.to_owned()).unwrap(); + // check there is only one: + assert!(eko.available_operators().len() == 1); + // ask for one + let ep = EvolutionPoint { + scale: 10000., + nf: 4, + }; + // it is the one + assert!(ep.eq(eko.available_operators()[0])); + assert!(eko.has_operator(&ep)); +} + +#[test] +fn load_operator() { + let src = v015tar(); + let dst = assert_fs::TempDir::new().unwrap(); + // open + let eko = EKO::extract(src.to_owned(), dst.to_owned()).unwrap(); + // load + let ep = EvolutionPoint { + scale: 10000., + nf: 4, + }; + let operator = eko.load_operator(&ep).unwrap(); + assert!(operator.op.is_some()); + assert!(operator.err.is_some()); + let op = operator.op.unwrap(); + assert!(op.dim().0 > 0); + assert!(op.dim().0 == operator.err.unwrap().dim().0); +} diff --git a/crates/doc-header.html b/crates/doc-header.html index 3b2f9651b..32f0bba13 100644 --- a/crates/doc-header.html +++ b/crates/doc-header.html @@ -51,7 +51,7 @@ } // do the actual replace function replaceAbbrev(el) { - let txt = el.textContent; + let txt = el.innerHTML; for (k in parsedAbbrevs) txt = txt.replace(k, parsedAbbrevs[k]); el.innerHTML = txt; diff --git a/crates/ekore/refs.bib b/crates/ekore/refs.bib index e8ef02c2b..a8a049ac1 100644 --- a/crates/ekore/refs.bib +++ b/crates/ekore/refs.bib @@ -69,3 +69,28 @@ @article{Vogt2004ns volume = "170", year = "2005" } +@article{Ball2015tna, + author = "Ball, Richard D. and Bertone, Valerio and Bonvini, Marco and Forte, Stefano and Groth Merrild, Patrick and Rojo, Juan and Rottoli, Luca", + title = "{Intrinsic charm in a matched general-mass scheme}", + eprint = "1510.00009", + archivePrefix = "arXiv", + primaryClass = "hep-ph", + reportNumber = "EDINBURGH-2015-06, TIF-UNIMI-2015-11, EDINBURGH-2015-14, CERN-PH-TH-2015-176, OUTP-15-20P", + doi = "10.1016/j.physletb.2015.12.077", + journal = "Phys. Lett. B", + volume = "754", + pages = "49--58", + year = "2016" +} +@article{Buza1996wv, + author = "Buza, M. and Matiounine, Y. and Smith, J. and van Neerven, W. L.", + title = "{Charm electroproduction viewed in the variable flavor number scheme versus fixed order perturbation theory}", + eprint = "hep-ph/9612398", + archivePrefix = "arXiv", + reportNumber = "NIKHEF-96-027, ITP-SB-96-66, DESY-96-258, INLO-PUB-22-96", + doi = "10.1007/BF01245820", + journal = "Eur. Phys. J. C", + volume = "1", + pages = "301--320", + year = "1998" +} diff --git a/crates/ekore/src/bib.rs b/crates/ekore/src/bib.rs index 75da707bb..01b7025f8 100644 --- a/crates/ekore/src/bib.rs +++ b/crates/ekore/src/bib.rs @@ -1,4 +1,4 @@ -//! List of References (autogenerated on 2024-05-30T12:57:15.459698). +//! List of References (autogenerated on 2024-07-18T17:42:47.650964). #[allow(non_snake_case)] /// The Three loop splitting functions in QCD: The Nonsinglet case @@ -71,3 +71,27 @@ pub fn MuselliPhD() {} /// /// DOI: [10.1016/j.cpc.2005.03.103](https:dx.doi.org/10.1016/j.cpc.2005.03.103) pub fn Vogt2004ns() {} + +#[allow(non_snake_case)] +/// Intrinsic charm in a matched general-mass scheme +/// +/// Ball, Richard D. and Bertone, Valerio and Bonvini, Marco and Forte, Stefano and Groth Merrild, Patrick and Rojo, Juan and Rottoli, Luca +/// +/// Published in: Phys. Lett. B 754 (2016), 49--58 +/// +/// e-Print: [1510.00009](https://arxiv.org/abs/1510.00009) +/// +/// DOI: [10.1016/j.physletb.2015.12.077](https:dx.doi.org/10.1016/j.physletb.2015.12.077) +pub fn Ball2015tna() {} + +#[allow(non_snake_case)] +/// Charm electroproduction viewed in the variable flavor number scheme versus fixed order perturbation theory +/// +/// Buza, M. and Matiounine, Y. and Smith, J. and van Neerven, W. L. +/// +/// Published in: Eur. Phys. J. C 1 (1998), 301--320 +/// +/// e-Print: [hep-ph/9612398](https://arxiv.org/abs/hep-ph/9612398) +/// +/// DOI: [10.1007/BF01245820](https:dx.doi.org/10.1007/BF01245820) +pub fn Buza1996wv() {} diff --git a/pyproject.toml b/pyproject.toml index 89ef66c71..0f343b160 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -131,7 +131,7 @@ asv = ["asv-run", "asv-publish", "asv-preview"] bump-version = { "shell" = "python crates/bump-versions.py $(git describe --tags)" } compile = "pip install -e crates/eko/" rdocs.cmd = "cargo doc --workspace --no-deps" -rdocs.env = { RUSTDOCFLAGS = "--html-in-header crates/katex-header.html" } +rdocs.env = { RUSTDOCFLAGS = "--html-in-header crates/doc-header.html" } rdocs-view = "xdg-open target/doc/ekors/index.html" rdocs-clean = "rm -rf target/doc/" rtest = "cargo test --workspace"