diff --git a/.gitlab/scripts/check_layer_size.sh b/.gitlab/scripts/check_layer_size.sh index 573d3884a..42095354b 100755 --- a/.gitlab/scripts/check_layer_size.sh +++ b/.gitlab/scripts/check_layer_size.sh @@ -14,8 +14,8 @@ if [ -z "$LAYER_FILE" ]; then exit 1 fi -MAX_LAYER_COMPRESSED_SIZE_KB=$(expr 19 \* 1024) # 19 MB, amd64 is 19, while arm64 is 15 -MAX_LAYER_UNCOMPRESSED_SIZE_KB=$(expr 49 \* 1024) # 49 MB, amd is 49, while arm64 is 48 +MAX_LAYER_COMPRESSED_SIZE_KB=$(expr 20 \* 1024) # 20 MB, amd64 is 19, while arm64 is 18 +MAX_LAYER_UNCOMPRESSED_SIZE_KB=$(expr 51 \* 1024) # 50 MB, amd is 50.5, while arm64 is 47 LAYERS_DIR=".layers" diff --git a/bottlecap/Cargo.lock b/bottlecap/Cargo.lock index 3c206bc53..c61ae2695 100644 --- a/bottlecap/Cargo.lock +++ b/bottlecap/Cargo.lock @@ -344,7 +344,7 @@ version = "0.69.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" dependencies = [ - "bitflags", + "bitflags 2.6.0", "cexpr", "clang-sys", "itertools 0.12.1", @@ -376,6 +376,12 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bitflags" version = "2.6.0" @@ -425,9 +431,12 @@ dependencies = [ "hmac", "httpmock", "hyper 0.14.30", + "lazy_static", "log", + "nix", "proptest", "protobuf", + "rand", "regex", "reqwest", "rmp-serde", @@ -436,6 +445,7 @@ dependencies = [ "serde_json", "serial_test", "sha2", + "thiserror", "tokio", "tokio-util", "tracing", @@ -1572,7 +1582,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ - "bitflags", + "bitflags 2.6.0", "libc", ] @@ -1667,6 +1677,17 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" +[[package]] +name = "nix" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" +dependencies = [ + "bitflags 1.3.2", + "cfg-if", + "libc", +] + [[package]] name = "nom" version = "7.1.3" @@ -1928,7 +1949,7 @@ checksum = "b4c2511913b88df1637da85cc8d96ec8e43a3f8bb8ccb71ee1ac240d6f3df58d" dependencies = [ "bit-set", "bit-vec", - "bitflags", + "bitflags 2.6.0", "lazy_static", "num-traits", "rand", @@ -2176,7 +2197,7 @@ version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" dependencies = [ - "bitflags", + "bitflags 2.6.0", ] [[package]] @@ -2363,7 +2384,7 @@ version = "0.38.37" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" dependencies = [ - "bitflags", + "bitflags 2.6.0", "errno", "libc", "linux-raw-sys", @@ -2542,7 +2563,7 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags", + "bitflags 2.6.0", "core-foundation", "core-foundation-sys", "libc", diff --git a/bottlecap/Cargo.toml b/bottlecap/Cargo.toml index 5e539298a..7fcba69a5 100644 --- a/bottlecap/Cargo.toml +++ b/bottlecap/Cargo.toml @@ -18,12 +18,15 @@ datadog-trace-obfuscation = { git = "https://github.com/DataDog/libdatadog", rev dogstatsd = { git = "https://github.com/DataDog/libdatadog", rev = "92272e90a7919f07178f3246ef8f82295513cfed" } figment = { version = "0.10", default-features = false, features = ["yaml", "env"] } hyper = { version = "0.14", default-features = false, features = ["server"] } +lazy_static = { version = "1.5", default-features = false } log = { version = "0.4", default-features = false } +nix = { version = "0.26", default-features = false, features = ["feature", "fs"] } protobuf = { version = "3.5", default-features = false } regex = { version = "1.10", default-features = false } reqwest = { version = "0.12", features = ["json", "http2", "rustls-tls"], default-features = false } serde = { version = "1.0", default-features = false, features = ["derive"] } serde_json = { version = "1.0", default-features = false, features = ["alloc"] } +thiserror = { version = "1.0", default-features = false} tokio = { version = "1.37", default-features = false, features = ["macros", "rt-multi-thread"] } tokio-util = { version = "0.7", default-features = false } tracing = { version = "0.1", default-features = false } @@ -35,6 +38,7 @@ hex = { version = "0.4", default-features = false, features = ["std"] } base64 = { version = "0.22", default-features = false } rmp-serde = { version = "1.3.0", default-features = false } rustls = { version = "0.23.12", default-features = false, features = ["aws-lc-rs"] } +rand = { version = "0.8", default-features = false } [dev-dependencies] figment = { version = "0.10", default-features = false, features = ["yaml", "env", "test"] } diff --git a/bottlecap/LICENSE-3rdparty.yml b/bottlecap/LICENSE-3rdparty.yml index 83b5f497a..e20352a0e 100644 --- a/bottlecap/LICENSE-3rdparty.yml +++ b/bottlecap/LICENSE-3rdparty.yml @@ -2665,6 +2665,40 @@ third_party_libraries: THE SOFTWARE. - license: Apache-2.0 text: " Apache License\n Version 2.0, January 2004\n http://www.apache.org/licenses/\n\nTERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n1. Definitions.\n\n \"License\" shall mean the terms and conditions for use, reproduction,\n and distribution as defined by Sections 1 through 9 of this document.\n\n \"Licensor\" shall mean the copyright owner or entity authorized by\n the copyright owner that is granting the License.\n\n \"Legal Entity\" shall mean the union of the acting entity and all\n other entities that control, are controlled by, or are under common\n control with that entity. For the purposes of this definition,\n \"control\" means (i) the power, direct or indirect, to cause the\n direction or management of such entity, whether by contract or\n otherwise, or (ii) ownership of fifty percent (50%) or more of the\n outstanding shares, or (iii) beneficial ownership of such entity.\n\n \"You\" (or \"Your\") shall mean an individual or Legal Entity\n exercising permissions granted by this License.\n\n \"Source\" form shall mean the preferred form for making modifications,\n including but not limited to software source code, documentation\n source, and configuration files.\n\n \"Object\" form shall mean any form resulting from mechanical\n transformation or translation of a Source form, including but\n not limited to compiled object code, generated documentation,\n and conversions to other media types.\n\n \"Work\" shall mean the work of authorship, whether in Source or\n Object form, made available under the License, as indicated by a\n copyright notice that is included in or attached to the work\n (an example is provided in the Appendix below).\n\n \"Derivative Works\" shall mean any work, whether in Source or Object\n form, that is based on (or derived from) the Work and for which the\n editorial revisions, annotations, elaborations, or other modifications\n represent, as a whole, an original work of authorship. For the purposes\n of this License, Derivative Works shall not include works that remain\n separable from, or merely link (or bind by name) to the interfaces of,\n the Work and Derivative Works thereof.\n\n \"Contribution\" shall mean any work of authorship, including\n the original version of the Work and any modifications or additions\n to that Work or Derivative Works thereof, that is intentionally\n submitted to Licensor for inclusion in the Work by the copyright owner\n or by an individual or Legal Entity authorized to submit on behalf of\n the copyright owner. For the purposes of this definition, \"submitted\"\n means any form of electronic, verbal, or written communication sent\n to the Licensor or its representatives, including but not limited to\n communication on electronic mailing lists, source code control systems,\n and issue tracking systems that are managed by, or on behalf of, the\n Licensor for the purpose of discussing and improving the Work, but\n excluding communication that is conspicuously marked or otherwise\n designated in writing by the copyright owner as \"Not a Contribution.\"\n\n \"Contributor\" shall mean Licensor and any individual or Legal Entity\n on behalf of whom a Contribution has been received by Licensor and\n subsequently incorporated within the Work.\n\n2. Grant of Copyright License. Subject to the terms and conditions of\n this License, each Contributor hereby grants to You a perpetual,\n worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n copyright license to reproduce, prepare Derivative Works of,\n publicly display, publicly perform, sublicense, and distribute the\n Work and such Derivative Works in Source or Object form.\n\n3. Grant of Patent License. Subject to the terms and conditions of\n this License, each Contributor hereby grants to You a perpetual,\n worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n (except as stated in this section) patent license to make, have made,\n use, offer to sell, sell, import, and otherwise transfer the Work,\n where such license applies only to those patent claims licensable\n by such Contributor that are necessarily infringed by their\n Contribution(s) alone or by combination of their Contribution(s)\n with the Work to which such Contribution(s) was submitted. If You\n institute patent litigation against any entity (including a\n cross-claim or counterclaim in a lawsuit) alleging that the Work\n or a Contribution incorporated within the Work constitutes direct\n or contributory patent infringement, then any patent licenses\n granted to You under this License for that Work shall terminate\n as of the date such litigation is filed.\n\n4. Redistribution. You may reproduce and distribute copies of the\n Work or Derivative Works thereof in any medium, with or without\n modifications, and in Source or Object form, provided that You\n meet the following conditions:\n\n (a) You must give any other recipients of the Work or\n Derivative Works a copy of this License; and\n\n (b) You must cause any modified files to carry prominent notices\n stating that You changed the files; and\n\n (c) You must retain, in the Source form of any Derivative Works\n that You distribute, all copyright, patent, trademark, and\n attribution notices from the Source form of the Work,\n excluding those notices that do not pertain to any part of\n the Derivative Works; and\n\n (d) If the Work includes a \"NOTICE\" text file as part of its\n distribution, then any Derivative Works that You distribute must\n include a readable copy of the attribution notices contained\n within such NOTICE file, excluding those notices that do not\n pertain to any part of the Derivative Works, in at least one\n of the following places: within a NOTICE text file distributed\n as part of the Derivative Works; within the Source form or\n documentation, if provided along with the Derivative Works; or,\n within a display generated by the Derivative Works, if and\n wherever such third-party notices normally appear. The contents\n of the NOTICE file are for informational purposes only and\n do not modify the License. You may add Your own attribution\n notices within Derivative Works that You distribute, alongside\n or as an addendum to the NOTICE text from the Work, provided\n that such additional attribution notices cannot be construed\n as modifying the License.\n\n You may add Your own copyright statement to Your modifications and\n may provide additional or different license terms and conditions\n for use, reproduction, or distribution of Your modifications, or\n for any such Derivative Works as a whole, provided Your use,\n reproduction, and distribution of the Work otherwise complies with\n the conditions stated in this License.\n\n5. Submission of Contributions. Unless You explicitly state otherwise,\n any Contribution intentionally submitted for inclusion in the Work\n by You to the Licensor shall be under the terms and conditions of\n this License, without any additional terms or conditions.\n Notwithstanding the above, nothing herein shall supersede or modify\n the terms of any separate license agreement you may have executed\n with Licensor regarding such Contributions.\n\n6. Trademarks. This License does not grant permission to use the trade\n names, trademarks, service marks, or product names of the Licensor,\n except as required for reasonable and customary use in describing the\n origin of the Work and reproducing the content of the NOTICE file.\n\n7. Disclaimer of Warranty. Unless required by applicable law or\n agreed to in writing, Licensor provides the Work (and each\n Contributor provides its Contributions) on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n implied, including, without limitation, any warranties or conditions\n of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n PARTICULAR PURPOSE. You are solely responsible for determining the\n appropriateness of using or redistributing the Work and assume any\n risks associated with Your exercise of permissions under this License.\n\n8. Limitation of Liability. In no event and under no legal theory,\n whether in tort (including negligence), contract, or otherwise,\n unless required by applicable law (such as deliberate and grossly\n negligent acts) or agreed to in writing, shall any Contributor be\n liable to You for damages, including any direct, indirect, special,\n incidental, or consequential damages of any character arising as a\n result of this License or out of the use or inability to use the\n Work (including but not limited to damages for loss of goodwill,\n work stoppage, computer failure or malfunction, or any and all\n other commercial damages or losses), even if such Contributor\n has been advised of the possibility of such damages.\n\n9. Accepting Warranty or Additional Liability. While redistributing\n the Work or Derivative Works thereof, You may choose to offer,\n and charge a fee for, acceptance of support, warranty, indemnity,\n or other liability obligations and/or rights consistent with this\n License. However, in accepting such obligations, You may act only\n on Your own behalf and on Your sole responsibility, not on behalf\n of any other Contributor, and only if You agree to indemnify,\n defend, and hold each Contributor harmless for any liability\n incurred by, or claims asserted against, such Contributor by reason\n of your accepting any such warranty or additional liability.\n\nEND OF TERMS AND CONDITIONS\n\nAPPENDIX: How to apply the Apache License to your work.\n\n To apply the Apache License to your work, attach the following\n boilerplate notice, with the fields enclosed by brackets \"[]\"\n replaced with your own identifying information. (Don't include\n the brackets!) The text should be enclosed in the appropriate\n comment syntax for the file format. We also recommend that a\n file or class name and description of purpose be included on the\n same \"printed page\" as the copyright notice for easier\n identification within third-party archives.\n\nCopyright [yyyy] [name of copyright owner]\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n\thttp://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n" +- package_name: bitflags + package_version: 1.3.2 + repository: https://github.com/bitflags/bitflags + license: MIT/Apache-2.0 + licenses: + - license: MIT + text: | + Copyright (c) 2014 The Rust Project Developers + + Permission is hereby granted, free of charge, to any + person obtaining a copy of this software and associated + documentation files (the "Software"), to deal in the + Software without restriction, including without + limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of + the Software, and to permit persons to whom the Software + is furnished to do so, subject to the following + conditions: + + The above copyright notice and this permission notice + shall be included in all copies or substantial portions + of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF + ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED + TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT + SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR + IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + - license: Apache-2.0 + text: " Apache License\n Version 2.0, January 2004\n http://www.apache.org/licenses/\n\nTERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n1. Definitions.\n\n \"License\" shall mean the terms and conditions for use, reproduction,\n and distribution as defined by Sections 1 through 9 of this document.\n\n \"Licensor\" shall mean the copyright owner or entity authorized by\n the copyright owner that is granting the License.\n\n \"Legal Entity\" shall mean the union of the acting entity and all\n other entities that control, are controlled by, or are under common\n control with that entity. For the purposes of this definition,\n \"control\" means (i) the power, direct or indirect, to cause the\n direction or management of such entity, whether by contract or\n otherwise, or (ii) ownership of fifty percent (50%) or more of the\n outstanding shares, or (iii) beneficial ownership of such entity.\n\n \"You\" (or \"Your\") shall mean an individual or Legal Entity\n exercising permissions granted by this License.\n\n \"Source\" form shall mean the preferred form for making modifications,\n including but not limited to software source code, documentation\n source, and configuration files.\n\n \"Object\" form shall mean any form resulting from mechanical\n transformation or translation of a Source form, including but\n not limited to compiled object code, generated documentation,\n and conversions to other media types.\n\n \"Work\" shall mean the work of authorship, whether in Source or\n Object form, made available under the License, as indicated by a\n copyright notice that is included in or attached to the work\n (an example is provided in the Appendix below).\n\n \"Derivative Works\" shall mean any work, whether in Source or Object\n form, that is based on (or derived from) the Work and for which the\n editorial revisions, annotations, elaborations, or other modifications\n represent, as a whole, an original work of authorship. For the purposes\n of this License, Derivative Works shall not include works that remain\n separable from, or merely link (or bind by name) to the interfaces of,\n the Work and Derivative Works thereof.\n\n \"Contribution\" shall mean any work of authorship, including\n the original version of the Work and any modifications or additions\n to that Work or Derivative Works thereof, that is intentionally\n submitted to Licensor for inclusion in the Work by the copyright owner\n or by an individual or Legal Entity authorized to submit on behalf of\n the copyright owner. For the purposes of this definition, \"submitted\"\n means any form of electronic, verbal, or written communication sent\n to the Licensor or its representatives, including but not limited to\n communication on electronic mailing lists, source code control systems,\n and issue tracking systems that are managed by, or on behalf of, the\n Licensor for the purpose of discussing and improving the Work, but\n excluding communication that is conspicuously marked or otherwise\n designated in writing by the copyright owner as \"Not a Contribution.\"\n\n \"Contributor\" shall mean Licensor and any individual or Legal Entity\n on behalf of whom a Contribution has been received by Licensor and\n subsequently incorporated within the Work.\n\n2. Grant of Copyright License. Subject to the terms and conditions of\n this License, each Contributor hereby grants to You a perpetual,\n worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n copyright license to reproduce, prepare Derivative Works of,\n publicly display, publicly perform, sublicense, and distribute the\n Work and such Derivative Works in Source or Object form.\n\n3. Grant of Patent License. Subject to the terms and conditions of\n this License, each Contributor hereby grants to You a perpetual,\n worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n (except as stated in this section) patent license to make, have made,\n use, offer to sell, sell, import, and otherwise transfer the Work,\n where such license applies only to those patent claims licensable\n by such Contributor that are necessarily infringed by their\n Contribution(s) alone or by combination of their Contribution(s)\n with the Work to which such Contribution(s) was submitted. If You\n institute patent litigation against any entity (including a\n cross-claim or counterclaim in a lawsuit) alleging that the Work\n or a Contribution incorporated within the Work constitutes direct\n or contributory patent infringement, then any patent licenses\n granted to You under this License for that Work shall terminate\n as of the date such litigation is filed.\n\n4. Redistribution. You may reproduce and distribute copies of the\n Work or Derivative Works thereof in any medium, with or without\n modifications, and in Source or Object form, provided that You\n meet the following conditions:\n\n (a) You must give any other recipients of the Work or\n Derivative Works a copy of this License; and\n\n (b) You must cause any modified files to carry prominent notices\n stating that You changed the files; and\n\n (c) You must retain, in the Source form of any Derivative Works\n that You distribute, all copyright, patent, trademark, and\n attribution notices from the Source form of the Work,\n excluding those notices that do not pertain to any part of\n the Derivative Works; and\n\n (d) If the Work includes a \"NOTICE\" text file as part of its\n distribution, then any Derivative Works that You distribute must\n include a readable copy of the attribution notices contained\n within such NOTICE file, excluding those notices that do not\n pertain to any part of the Derivative Works, in at least one\n of the following places: within a NOTICE text file distributed\n as part of the Derivative Works; within the Source form or\n documentation, if provided along with the Derivative Works; or,\n within a display generated by the Derivative Works, if and\n wherever such third-party notices normally appear. The contents\n of the NOTICE file are for informational purposes only and\n do not modify the License. You may add Your own attribution\n notices within Derivative Works that You distribute, alongside\n or as an addendum to the NOTICE text from the Work, provided\n that such additional attribution notices cannot be construed\n as modifying the License.\n\n You may add Your own copyright statement to Your modifications and\n may provide additional or different license terms and conditions\n for use, reproduction, or distribution of Your modifications, or\n for any such Derivative Works as a whole, provided Your use,\n reproduction, and distribution of the Work otherwise complies with\n the conditions stated in this License.\n\n5. Submission of Contributions. Unless You explicitly state otherwise,\n any Contribution intentionally submitted for inclusion in the Work\n by You to the Licensor shall be under the terms and conditions of\n this License, without any additional terms or conditions.\n Notwithstanding the above, nothing herein shall supersede or modify\n the terms of any separate license agreement you may have executed\n with Licensor regarding such Contributions.\n\n6. Trademarks. This License does not grant permission to use the trade\n names, trademarks, service marks, or product names of the Licensor,\n except as required for reasonable and customary use in describing the\n origin of the Work and reproducing the content of the NOTICE file.\n\n7. Disclaimer of Warranty. Unless required by applicable law or\n agreed to in writing, Licensor provides the Work (and each\n Contributor provides its Contributions) on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n implied, including, without limitation, any warranties or conditions\n of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n PARTICULAR PURPOSE. You are solely responsible for determining the\n appropriateness of using or redistributing the Work and assume any\n risks associated with Your exercise of permissions under this License.\n\n8. Limitation of Liability. In no event and under no legal theory,\n whether in tort (including negligence), contract, or otherwise,\n unless required by applicable law (such as deliberate and grossly\n negligent acts) or agreed to in writing, shall any Contributor be\n liable to You for damages, including any direct, indirect, special,\n incidental, or consequential damages of any character arising as a\n result of this License or out of the use or inability to use the\n Work (including but not limited to damages for loss of goodwill,\n work stoppage, computer failure or malfunction, or any and all\n other commercial damages or losses), even if such Contributor\n has been advised of the possibility of such damages.\n\n9. Accepting Warranty or Additional Liability. While redistributing\n the Work or Derivative Works thereof, You may choose to offer,\n and charge a fee for, acceptance of support, warranty, indemnity,\n or other liability obligations and/or rights consistent with this\n License. However, in accepting such obligations, You may act only\n on Your own behalf and on Your sole responsibility, not on behalf\n of any other Contributor, and only if You agree to indemnify,\n defend, and hold each Contributor harmless for any liability\n incurred by, or claims asserted against, such Contributor by reason\n of your accepting any such warranty or additional liability.\n\nEND OF TERMS AND CONDITIONS\n\nAPPENDIX: How to apply the Apache License to your work.\n\n To apply the Apache License to your work, attach the following\n boilerplate notice, with the fields enclosed by brackets \"[]\"\n replaced with your own identifying information. (Don't include\n the brackets!) The text should be enclosed in the appropriate\n comment syntax for the file format. We also recommend that a\n file or class name and description of purpose be included on the\n same \"printed page\" as the copyright notice for easier\n identification within third-party archives.\n\nCopyright [yyyy] [name of copyright owner]\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n\thttp://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n" - package_name: bitflags package_version: 2.6.0 repository: https://github.com/bitflags/bitflags @@ -10259,6 +10293,34 @@ third_party_libraries: licenses: - license: MIT text: NOT FOUND +- package_name: nix + package_version: 0.26.4 + repository: https://github.com/nix-rust/nix + license: MIT + licenses: + - license: MIT + text: | + The MIT License (MIT) + + Copyright (c) 2015 Carl Lerche + nix-rust Authors + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. - package_name: num-traits package_version: 0.2.19 repository: https://github.com/rust-num/num-traits diff --git a/bottlecap/src/bin/bottlecap/main.rs b/bottlecap/src/bin/bottlecap/main.rs index c564d1a8a..b5c4b31e5 100644 --- a/bottlecap/src/bin/bottlecap/main.rs +++ b/bottlecap/src/bin/bottlecap/main.rs @@ -15,25 +15,23 @@ use bottlecap::{ event_bus::bus::EventBus, events::Event, lifecycle::{ - flush_control::FlushControl, - invocation_context::{InvocationContext, InvocationContextBuffer}, + flush_control::FlushControl, invocation::processor::Processor as InvocationProcessor, + listener::Listener as LifecycleListener, }, logger, logs::{ agent::LogsAgent, flusher::{build_fqdn_logs, Flusher as LogsFlusher}, }, - metrics::enhanced::lambda::Lambda as enhanced_metrics, secrets::decrypt, tags::{lambda, provider::Provider as TagProvider}, telemetry::{ self, client::TelemetryApiClient, - events::{Status, TelemetryEvent, TelemetryRecord}, + events::{TelemetryEvent, TelemetryRecord}, listener::TelemetryListener, }, traces::{ - hello_agent, stats_flusher::{self, StatsFlusher}, stats_processor, trace_agent, trace_flusher::{self, TraceFlusher}, @@ -55,15 +53,14 @@ use dogstatsd::{ use reqwest::Client; use serde::Deserialize; use std::{ - collections::hash_map, - collections::HashMap, + collections::{hash_map, HashMap}, env, - io::Error, - io::Result, + io::{Error, Result}, os::unix::process::CommandExt, path::Path, process::Command, sync::{Arc, Mutex}, + time::Instant, }; use telemetry::listener::TelemetryListenerConfig; use tokio::sync::mpsc::Sender; @@ -205,6 +202,7 @@ fn load_configs() -> (AwsConfig, Arc) { aws_secret_access_key: env::var("AWS_SECRET_ACCESS_KEY").unwrap_or_default(), aws_session_token: env::var("AWS_SESSION_TOKEN").unwrap_or_default(), function_name: env::var("AWS_LAMBDA_FUNCTION_NAME").unwrap_or_default(), + sandbox_init_time: Instant::now(), }; let lambda_directory = env::var("LAMBDA_TASK_ROOT").unwrap_or_else(|_| "/var/task".to_string()); let config = match config::get_config(Path::new(&lambda_directory)) { @@ -265,11 +263,13 @@ async fn extension_loop_active( ) -> Result<()> { let mut event_bus = EventBus::run(); - let tags_provider = setup_tag_provider( - aws_config, - config, - r.account_id.as_ref().unwrap_or(&"none".to_string()), - ); + let account_id = r + .account_id + .as_ref() + .unwrap_or(&"none".to_string()) + .to_string(); + let tags_provider = setup_tag_provider(aws_config, config, &account_id); + let (logs_agent_channel, logs_flusher) = start_logs_agent( config, resolved_api_key.clone(), @@ -294,6 +294,14 @@ async fn extension_loop_active( let trace_flusher = Arc::new(trace_flusher::ServerlessTraceFlusher { buffer: Arc::new(TokioMutex::new(Vec::new())), }); + + // Lifecycle Invocation Processor + let invocation_processor = Arc::new(TokioMutex::new(InvocationProcessor::new( + Arc::clone(&tags_provider), + Arc::clone(config), + aws_config, + Arc::clone(&metrics_aggr), + ))); let trace_processor = Arc::new(trace_processor::ServerlessTraceProcessor { obfuscation_config: Arc::new( obfuscation_config::ObfuscationConfig::new() @@ -312,37 +320,43 @@ async fn extension_loop_active( let trace_flusher_clone = trace_flusher.clone(); let stats_flusher_clone = stats_flusher.clone(); - let trace_agent = Box::new(trace_agent::TraceAgent { - config: Arc::clone(config), - trace_processor, - trace_flusher: trace_flusher_clone, - stats_processor, - stats_flusher: stats_flusher_clone, - tags_provider, - }); + let trace_agent = Box::new( + trace_agent::TraceAgent::new( + Arc::clone(config), + trace_processor.clone(), + trace_flusher_clone, + stats_processor, + stats_flusher_clone, + Arc::clone(&tags_provider), + ) + .await, + ); + let trace_agent_tx = trace_agent.get_sender_copy(); + tokio::spawn(async move { - let res = trace_agent.start_trace_agent().await; + let res = trace_agent.start().await; if let Err(e) = res { error!("Error starting trace agent: {e:?}"); } }); + + let lifecycle_listener = LifecycleListener { + invocation_processor: Arc::clone(&invocation_processor), + }; // TODO(astuyve): deprioritize this task after the first request tokio::spawn(async move { - let res = hello_agent::start_handler().await; + let res = lifecycle_listener.start().await; if let Err(e) = res { error!("Error starting hello agent: {e:?}"); } }); - let lambda_enhanced_metrics = - enhanced_metrics::new(Arc::clone(&metrics_aggr), Arc::clone(config)); let dogstatsd_cancel_token = start_dogstatsd(&metrics_aggr).await; let telemetry_listener_cancel_token = setup_telemetry_client(&r.extension_id, logs_agent_channel).await?; let flush_control = FlushControl::new(config.serverless_flush_strategy); - let mut invocation_context_buffer = InvocationContextBuffer::default(); let mut shutdown = false; let mut flush_interval = flush_control.get_flush_interval(); @@ -360,7 +374,9 @@ async fn extension_loop_active( "Invoke event {}; deadline: {}, invoked_function_arn: {}", request_id, deadline_ms, invoked_function_arn ); - lambda_enhanced_metrics.increment_invocation_metric(); + let mut p = invocation_processor.lock().await; + p.on_invoke_event(request_id); + drop(p); } Ok(NextEventResponse::Shutdown { shutdown_reason, @@ -385,92 +401,90 @@ async fn extension_loop_active( Event::Metric(event) => { debug!("Metric event: {:?}", event); } - Event::Telemetry(event) => match event.record { - TelemetryRecord::PlatformStart { request_id, .. } => { - invocation_context_buffer.insert(InvocationContext { - request_id, - runtime_duration_ms: 0.0, - }); - } - TelemetryRecord::PlatformInitReport { - initialization_type, - phase, - metrics, - } => { - debug!("Platform init report for initialization_type: {:?} with phase: {:?} and metrics: {:?}", initialization_type, phase, metrics); - lambda_enhanced_metrics - .set_init_duration_metric(metrics.duration_ms); - } - TelemetryRecord::PlatformRuntimeDone { - request_id, - status, - metrics, - .. - } => { - if let Some(metrics) = metrics { - invocation_context_buffer - .add_runtime_duration(&request_id, metrics.duration_ms); - lambda_enhanced_metrics - .set_runtime_duration_metric(metrics.duration_ms); + Event::Telemetry(event) => + match event.record { + TelemetryRecord::PlatformInitStart { .. } => { + let mut p = invocation_processor.lock().await; + p.on_platform_init_start(event.time); + drop(p); } - - if status != Status::Success { - lambda_enhanced_metrics.increment_errors_metric(); - if status == Status::Timeout { - lambda_enhanced_metrics.increment_timeout_metric(); - } + TelemetryRecord::PlatformInitReport { + initialization_type, + phase, + metrics, + } => { + debug!("Platform init report for initialization_type: {:?} with phase: {:?} and metrics: {:?}", initialization_type, phase, metrics); + let mut p = invocation_processor.lock().await; + p.on_platform_init_report(metrics.duration_ms); + drop(p); } - debug!( - "Runtime done for request_id: {:?} with status: {:?}", - request_id, status - ); - // TODO(astuyve) it'll be easy to - // pass the invocation deadline to - // flush tasks here, so they can - // retry if we have more time - if flush_control.should_flush_end() { - tokio::join!( - logs_flusher.flush(), - metrics_flusher.flush(), - trace_flusher.manual_flush(), - stats_flusher.manual_flush() - ); + TelemetryRecord::PlatformStart { request_id, .. } => { + let mut p = invocation_processor.lock().await; + p.on_platform_start(request_id, event.time); + drop(p); } - break; - } - TelemetryRecord::PlatformReport { - request_id, - status, - metrics, - .. - } => { - debug!( - "Platform report for request_id: {:?} with status: {:?}", - request_id, status - ); - lambda_enhanced_metrics.set_report_log_metrics(&metrics); - if let Some(invocation_context) = - invocation_context_buffer.remove(&request_id) - { - if invocation_context.runtime_duration_ms > 0.0 { - let post_runtime_duration_ms = metrics.duration_ms - - invocation_context.runtime_duration_ms; - lambda_enhanced_metrics.set_post_runtime_duration_metric( - post_runtime_duration_ms, + TelemetryRecord::PlatformRuntimeDone { + request_id, + status, + metrics, + .. + } => { + debug!( + "Runtime done for request_id: {:?} with status: {:?}", + request_id, status + ); + + let mut p = invocation_processor.lock().await; + if let Some(metrics) = metrics { + p.on_platform_runtime_done( + &request_id, + metrics.duration_ms, + status, + config.clone(), + tags_provider.clone(), + trace_processor.clone(), + trace_agent_tx.clone() + ).await; + } + drop(p); + + // TODO(astuyve) it'll be easy to + // pass the invocation deadline to + // flush tasks here, so they can + // retry if we have more time + if flush_control.should_flush_end() { + tokio::join!( + logs_flusher.flush(), + metrics_flusher.flush(), + trace_flusher.manual_flush(), + stats_flusher.manual_flush() ); - } else { - debug!("Impossible to compute post runtime duration for request_id: {:?}", request_id); } - } - if shutdown { break; } + TelemetryRecord::PlatformReport { + request_id, + status, + metrics, + .. + } => { + debug!( + "Platform report for request_id: {:?} with status: {:?}", + request_id, status + ); + let mut p = invocation_processor.lock().await; + p.on_platform_report(&request_id, metrics); + drop(p); + + if shutdown { + break; + } + } + _ => { + debug!("Unforwarded Telemetry event: {:?}", event); + } } - _ => { - debug!("Unforwarded Telemetry event: {:?}", event); - } - }, } } _ = flush_interval.tick() => { diff --git a/bottlecap/src/config/mod.rs b/bottlecap/src/config/mod.rs index c2b2655db..54feab25b 100644 --- a/bottlecap/src/config/mod.rs +++ b/bottlecap/src/config/mod.rs @@ -1,16 +1,23 @@ pub mod flush_strategy; pub mod log_level; pub mod processing_rule; +pub mod service_mapping; +pub mod trace_propagation_style; +use std::collections::HashMap; use std::path::Path; +use std::time::Instant; +use std::vec; use figment::providers::{Format, Yaml}; use figment::{providers::Env, Figment}; use serde::Deserialize; +use trace_propagation_style::{deserialize_trace_propagation_style, TracePropagationStyle}; use crate::config::flush_strategy::FlushStrategy; use crate::config::log_level::{deserialize_log_level, LogLevel}; use crate::config::processing_rule::{deserialize_processing_rules, ProcessingRule}; +use crate::config::service_mapping::deserialize_service_mapping; /// `FailoverConfig` is a struct that represents fields that are not supported in the extension yet. /// @@ -62,6 +69,17 @@ pub struct Config { pub serverless_flush_strategy: FlushStrategy, pub enhanced_metrics: bool, pub https_proxy: Option, + pub capture_lambda_payload: bool, + pub capture_lambda_payload_max_depth: u32, + #[serde(deserialize_with = "deserialize_service_mapping")] + pub service_mapping: HashMap, + // Trace Propagation + #[serde(deserialize_with = "deserialize_trace_propagation_style")] + pub trace_propagation_style: Vec, + #[serde(deserialize_with = "deserialize_trace_propagation_style")] + pub trace_propagation_style_extract: Vec, + pub trace_propagation_extract_first: bool, + pub trace_propagation_http_baggage_enabled: bool, } impl Default for Config { @@ -83,8 +101,18 @@ impl Default for Config { logs_config_processing_rules: None, // Metrics enhanced_metrics: true, - // Failover https_proxy: None, + capture_lambda_payload: false, + capture_lambda_payload_max_depth: 10, + service_mapping: HashMap::new(), + // Trace Propagation + trace_propagation_style: vec![ + TracePropagationStyle::Datadog, + TracePropagationStyle::TraceContext, + ], + trace_propagation_style_extract: vec![], + trace_propagation_extract_first: false, + trace_propagation_http_baggage_enabled: false, } } } @@ -125,13 +153,6 @@ fn failsover(figment: &Figment) -> Result<(), ConfigError> { )); } - let datadog_wrapper_set = - std::env::var("AWS_LAMBDA_EXEC_WRAPPER").unwrap_or_default() == "/opt/datadog_wrapper"; - if datadog_wrapper_set { - log_failover_reason("datadog_wrapper"); - return Err(ConfigError::UnsupportedField("datadog_wrapper".to_string())); - } - if failover_config.serverless_appsec_enabled || failover_config.appsec_enabled { log_failover_reason("appsec_enabled"); return Err(ConfigError::UnsupportedField("appsec_enabled".to_string())); @@ -192,16 +213,27 @@ pub fn get_config(config_directory: &Path) -> Result { } } + // Trace Propagation + // + // If not set by the user, set defaults + if config.trace_propagation_style_extract.is_empty() { + config + .trace_propagation_style_extract + .clone_from(&config.trace_propagation_style); + } + Ok(config) } #[allow(clippy::module_name_repetitions)] +#[derive(Debug, Clone)] pub struct AwsConfig { pub region: String, pub aws_access_key_id: String, pub aws_secret_access_key: String, pub aws_session_token: String, pub function_name: String, + pub sandbox_init_time: Instant, } #[cfg(test)] @@ -224,22 +256,6 @@ pub mod tests { }); } - #[test] - fn test_reject_datadog_wrapper() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_EXTENSION_VERSION", "next"); - jail.set_env("AWS_LAMBDA_EXEC_WRAPPER", "/opt/datadog_wrapper"); - - let config = get_config(Path::new("")).expect_err("should reject unknown fields"); - assert_eq!( - config, - ConfigError::UnsupportedField("datadog_wrapper".to_string()) - ); - Ok(()) - }); - } - #[test] fn test_allowed_but_disabled() { figment::Jail::expect_with(|jail| { @@ -269,13 +285,7 @@ pub mod tests { )?; jail.set_env("DD_SITE", "datad0g.com"); let config = get_config(Path::new("")).expect("should parse config"); - assert_eq!( - config, - Config { - site: "datad0g.com".to_string(), - ..Config::default() - } - ); + assert_eq!(config.site, "datad0g.com"); Ok(()) }); } @@ -291,13 +301,7 @@ pub mod tests { ", )?; let config = get_config(Path::new("")).expect("should parse config"); - assert_eq!( - config, - Config { - site: "datadoghq.com".to_string(), - ..Config::default() - } - ); + assert_eq!(config.site, "datadoghq.com"); Ok(()) }); } @@ -309,13 +313,7 @@ pub mod tests { jail.set_env("DD_SITE", "datadoghq.eu"); jail.set_env("DD_EXTENSION_VERSION", "next"); let config = get_config(Path::new("")).expect("should parse config"); - assert_eq!( - config, - Config { - site: "datadoghq.eu".to_string(), - ..Config::default() - } - ); + assert_eq!(config.site, "datadoghq.eu"); Ok(()) }); } @@ -327,14 +325,7 @@ pub mod tests { jail.set_env("DD_LOG_LEVEL", "TRACE"); jail.set_env("DD_EXTENSION_VERSION", "next"); let config = get_config(Path::new("")).expect("should parse config"); - assert_eq!( - config, - Config { - log_level: LogLevel::Trace, - site: "datadoghq.com".to_string(), - ..Config::default() - } - ); + assert_eq!(config.log_level, LogLevel::Trace); Ok(()) }); } @@ -349,6 +340,10 @@ pub mod tests { config, Config { site: "datadoghq.com".to_string(), + trace_propagation_style_extract: vec![ + TracePropagationStyle::Datadog, + TracePropagationStyle::TraceContext + ], ..Config::default() } ); @@ -363,14 +358,7 @@ pub mod tests { jail.set_env("DD_SERVERLESS_FLUSH_STRATEGY", "end"); jail.set_env("DD_EXTENSION_VERSION", "next"); let config = get_config(Path::new("")).expect("should parse config"); - assert_eq!( - config, - Config { - serverless_flush_strategy: FlushStrategy::End, - site: "datadoghq.com".to_string(), - ..Config::default() - } - ); + assert_eq!(config.serverless_flush_strategy, FlushStrategy::End); Ok(()) }); } @@ -383,14 +371,8 @@ pub mod tests { jail.set_env("DD_EXTENSION_VERSION", "next"); let config = get_config(Path::new("")).expect("should parse config"); assert_eq!( - config, - Config { - serverless_flush_strategy: FlushStrategy::Periodically(PeriodicStrategy { - interval: 100_000 - }), - site: "datadoghq.com".to_string(), - ..Config::default() - } + config.serverless_flush_strategy, + FlushStrategy::Periodically(PeriodicStrategy { interval: 100_000 }) ); Ok(()) }); @@ -403,13 +385,7 @@ pub mod tests { jail.set_env("DD_SERVERLESS_FLUSH_STRATEGY", "invalid_strategy"); jail.set_env("DD_EXTENSION_VERSION", "next"); let config = get_config(Path::new("")).expect("should parse config"); - assert_eq!( - config, - Config { - site: "datadoghq.com".to_string(), - ..Config::default() - } - ); + assert_eq!(config.serverless_flush_strategy, FlushStrategy::Default); Ok(()) }); } @@ -424,13 +400,7 @@ pub mod tests { ); jail.set_env("DD_EXTENSION_VERSION", "next"); let config = get_config(Path::new("")).expect("should parse config"); - assert_eq!( - config, - Config { - site: "datadoghq.com".to_string(), - ..Config::default() - } - ); + assert_eq!(config.serverless_flush_strategy, FlushStrategy::Default); Ok(()) }); } @@ -457,17 +427,13 @@ pub mod tests { jail.set_env("DD_EXTENSION_VERSION", "next"); let config = get_config(Path::new("")).expect("should parse config"); assert_eq!( - config, - Config { - logs_config_processing_rules: Some(vec![ProcessingRule { - kind: processing_rule::Kind::ExcludeAtMatch, - name: "exclude".to_string(), - pattern: "exclude".to_string(), - replace_placeholder: None - }]), - site: "datadoghq.com".to_string(), - ..Config::default() - } + config.logs_config_processing_rules, + Some(vec![ProcessingRule { + kind: processing_rule::Kind::ExcludeAtMatch, + name: "exclude".to_string(), + pattern: "exclude".to_string(), + replace_placeholder: None + }]) ); Ok(()) }); @@ -491,39 +457,75 @@ pub mod tests { )?; let config = get_config(Path::new("")).expect("should parse config"); assert_eq!( - config, - Config { - logs_config_processing_rules: Some(vec![ProcessingRule { - kind: processing_rule::Kind::ExcludeAtMatch, - name: "exclude".to_string(), - pattern: "exclude".to_string(), - replace_placeholder: None - }]), - site: "datadoghq.com".to_string(), - ..Config::default() - } + config.logs_config_processing_rules, + Some(vec![ProcessingRule { + kind: processing_rule::Kind::ExcludeAtMatch, + name: "exclude".to_string(), + pattern: "exclude".to_string(), + replace_placeholder: None + }]), ); Ok(()) }); } #[test] - fn test_ignore_apm_replace_tags() { + fn test_parse_trace_propagation_style() { figment::Jail::expect_with(|jail| { jail.clear_env(); jail.set_env( - "DD_APM_REPLACE_TAGS", - r#"[{"name":"resource.name","pattern":"(.*)/(foo[:%].+)","repl":"$1/{foo}"}]"#, + "DD_TRACE_PROPAGATION_STYLE", + "datadog,tracecontext,b3,b3multi", ); jail.set_env("DD_EXTENSION_VERSION", "next"); let config = get_config(Path::new("")).expect("should parse config"); + + let expected_styles = vec![ + TracePropagationStyle::Datadog, + TracePropagationStyle::TraceContext, + TracePropagationStyle::B3, + TracePropagationStyle::B3Multi, + ]; + assert_eq!(config.trace_propagation_style, expected_styles); + assert_eq!(config.trace_propagation_style_extract, expected_styles); + Ok(()) + }); + } + + #[test] + fn test_parse_trace_propagation_style_extract() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_TRACE_PROPAGATION_STYLE_EXTRACT", "datadog"); + jail.set_env("DD_EXTENSION_VERSION", "next"); + let config = get_config(Path::new("")).expect("should parse config"); + assert_eq!( - config, - Config { - site: "datadoghq.com".to_string(), - ..Config::default() - } + config.trace_propagation_style, + vec![ + TracePropagationStyle::Datadog, + TracePropagationStyle::TraceContext, + ] + ); + assert_eq!( + config.trace_propagation_style_extract, + vec![TracePropagationStyle::Datadog] + ); + Ok(()) + }); + } + + #[test] + fn test_ignore_apm_replace_tags() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env( + "DD_APM_REPLACE_TAGS", + r#"[{"name":"resource.name","pattern":"(.*)/(foo[:%].+)","repl":"$1/{foo}"}]"#, ); + jail.set_env("DD_EXTENSION_VERSION", "next"); + let config = get_config(Path::new("")); + assert!(config.is_ok()); Ok(()) }); } diff --git a/bottlecap/src/config/service_mapping.rs b/bottlecap/src/config/service_mapping.rs new file mode 100644 index 000000000..4deda11fd --- /dev/null +++ b/bottlecap/src/config/service_mapping.rs @@ -0,0 +1,35 @@ +use std::collections::HashMap; + +use serde::{Deserialize, Deserializer}; +use tracing::debug; + +#[allow(clippy::module_name_repetitions)] +pub fn deserialize_service_mapping<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let s: String = String::deserialize(deserializer)?; + + let map = s + .split(',') + .map(|pair| { + let mut split = pair.split(':'); + + let service = split.next(); + let to_map = split.next(); + + if let (Some(service), Some(to_map)) = (service, to_map) { + Ok((service.trim().to_string(), to_map.trim().to_string())) + } else { + debug!("Ignoring invalid service mapping pair: {pair}"); + Err(serde::de::Error::custom(format!( + "Failed to deserialize service mapping for pair: {pair}" + ))) + } + }) + .collect(); + + map +} diff --git a/bottlecap/src/config/trace_propagation_style.rs b/bottlecap/src/config/trace_propagation_style.rs new file mode 100644 index 000000000..6ebc9dc74 --- /dev/null +++ b/bottlecap/src/config/trace_propagation_style.rs @@ -0,0 +1,58 @@ +use std::{fmt::Display, str::FromStr}; + +use serde::{Deserialize, Deserializer}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TracePropagationStyle { + Datadog, + B3Multi, + B3, + TraceContext, + None, +} + +impl FromStr for TracePropagationStyle { + type Err = String; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "datadog" => Ok(TracePropagationStyle::Datadog), + "b3multi" => Ok(TracePropagationStyle::B3Multi), + "b3" => Ok(TracePropagationStyle::B3), + "tracecontext" => Ok(TracePropagationStyle::TraceContext), + "none" => Ok(TracePropagationStyle::None), + _ => Err(format!("Unknown trace propagation style: {s}")), + } + } +} + +impl Display for TracePropagationStyle { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let style = match self { + TracePropagationStyle::Datadog => "datadog", + TracePropagationStyle::B3Multi => "b3multi", + TracePropagationStyle::B3 => "b3", + TracePropagationStyle::TraceContext => "tracecontext", + TracePropagationStyle::None => "none", + }; + write!(f, "{style}") + } +} + +#[allow(clippy::module_name_repetitions)] +pub fn deserialize_trace_propagation_style<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let s: String = String::deserialize(deserializer)?; + + s.split(',') + .map(|style| { + TracePropagationStyle::from_str(style.trim()).map_err(|e| { + serde::de::Error::custom(format!("Failed to deserialize propagation style: {e}")) + }) + }) + .collect() +} diff --git a/bottlecap/src/lib.rs b/bottlecap/src/lib.rs index ce2cb847b..59b445215 100644 --- a/bottlecap/src/lib.rs +++ b/bottlecap/src/lib.rs @@ -25,6 +25,7 @@ pub mod lifecycle; pub mod logger; pub mod logs; pub mod metrics; +pub mod proc; pub mod secrets; pub mod tags; pub mod telemetry; diff --git a/bottlecap/src/lifecycle/invocation/context.rs b/bottlecap/src/lifecycle/invocation/context.rs new file mode 100644 index 000000000..97e6bbf14 --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/context.rs @@ -0,0 +1,348 @@ +use crate::metrics::enhanced::lambda::EnhancedMetricData; +use std::collections::VecDeque; + +use tracing::debug; + +#[derive(Debug, Clone, PartialEq)] +pub struct Context { + pub request_id: String, + pub runtime_duration_ms: f64, + pub init_duration_ms: f64, + pub start_time: i64, + pub enhanced_metric_data: Option, +} + +impl Context { + #[must_use] + pub fn new( + request_id: String, + runtime_duration_ms: f64, + init_duration_ms: f64, + start_time: i64, + enhanced_metric_data: Option, + ) -> Self { + Context { + request_id, + runtime_duration_ms, + init_duration_ms, + start_time, + enhanced_metric_data, + } + } +} + +#[allow(clippy::module_name_repetitions)] +pub struct ContextBuffer { + buffer: VecDeque, +} + +impl Default for ContextBuffer { + /// Creates a new `ContextBuffer` with a default capacity of 5. + /// + fn default() -> Self { + ContextBuffer { + buffer: VecDeque::::with_capacity(5), + } + } +} + +impl ContextBuffer { + #[allow(dead_code)] + fn with_capacity(capacity: usize) -> Self { + ContextBuffer { + buffer: VecDeque::::with_capacity(capacity), + } + } + + /// Inserts a context into the buffer. If the buffer is full, the oldest `Context` is removed. + /// + fn insert(&mut self, context: Context) { + if self.size() == self.buffer.capacity() { + self.buffer.pop_front(); + self.buffer.push_back(context); + } else { + if self.get(&context.request_id).is_some() { + self.remove(&context.request_id); + } + + self.buffer.push_back(context); + } + } + + /// Removes a context from the buffer. Returns the removed `Context` if found. + /// + pub fn remove(&mut self, request_id: &String) -> Option { + if let Some(i) = self + .buffer + .iter() + .position(|context| context.request_id == *request_id) + { + return self.buffer.remove(i); + } + debug!("Context for request_id: {:?} not found", request_id); + + None + } + + /// Returns a reference to a `Context` from the buffer if found. + /// + #[must_use] + pub fn get(&self, request_id: &String) -> Option<&Context> { + self.buffer + .iter() + .find(|context| context.request_id == *request_id) + } + + /// Creates a new `Context` and adds it to the buffer. + /// + pub fn create_context(&mut self, request_id: String) { + self.insert(Context::new(request_id, 0f64, 0f64, 0, None)); + } + + /// Adds the init duration to a `Context` in the buffer. + /// + pub fn add_init_duration(&mut self, request_id: &String, init_duration_ms: f64) { + if let Some(context) = self + .buffer + .iter_mut() + .find(|context| context.request_id == *request_id) + { + context.init_duration_ms = init_duration_ms; + } else { + debug!("Could not add init duration - context not found"); + } + } + + /// Adds the start time to a `Context` in the buffer. + /// + pub fn add_start_time(&mut self, request_id: &String, start_time: i64) { + if let Some(context) = self + .buffer + .iter_mut() + .find(|context| context.request_id == *request_id) + { + context.start_time = start_time; + } else { + debug!("Could not add start time - context not found"); + } + } + + /// Adds the runtime duration to a `Context` in the buffer. + /// + pub fn add_runtime_duration(&mut self, request_id: &String, runtime_duration_ms: f64) { + if let Some(context) = self + .buffer + .iter_mut() + .find(|context| context.request_id == *request_id) + { + context.runtime_duration_ms = runtime_duration_ms; + } else { + debug!("Could not add runtime duration - context not found"); + } + } + + /// Adds the network offset to a `Context` in the buffer. + /// + pub fn add_enhanced_metric_data( + &mut self, + request_id: &String, + enhanced_metric_data: Option, + ) { + if let Some(context) = self + .buffer + .iter_mut() + .find(|context| context.request_id == *request_id) + { + context.enhanced_metric_data = enhanced_metric_data; + } else { + debug!("Could not add network offset - context not found"); + } + } + + /// Returns the size of the buffer. + /// + #[must_use] + pub fn size(&self) -> usize { + self.buffer.len() + } + + /// Returns if the buffer is empty. + /// + #[must_use] + pub fn is_empty(&self) -> bool { + self.buffer.is_empty() + } +} + +#[cfg(test)] +#[allow(clippy::unwrap_used)] +mod tests { + use crate::proc::{CPUData, NetworkData}; + use std::collections::HashMap; + use tokio::sync::watch; + + use super::*; + + #[test] + fn test_insert() { + let mut buffer = ContextBuffer::with_capacity(2); + + let request_id = String::from("1"); + let context = Context::new(request_id.clone(), 0f64, 0f64, 0, None); + buffer.insert(context.clone()); + assert_eq!(buffer.size(), 1); + assert_eq!(buffer.get(&request_id).unwrap(), &context); + + let request_id_2 = String::from("2"); + let context = Context::new(request_id_2.clone(), 0f64, 0f64, 0, None); + buffer.insert(context.clone()); + assert_eq!(buffer.size(), 2); + assert_eq!(buffer.get(&request_id_2).unwrap(), &context); + + // This should replace the first context + let request_id_3 = String::from("3"); + let context = Context::new(request_id_3.clone(), 0f64, 0f64, 0, None); + buffer.insert(context.clone()); + assert_eq!(buffer.size(), 2); + assert_eq!(buffer.get(&request_id_3).unwrap(), &context); + + // First context should be None + assert!(buffer.get(&request_id).is_none()); + } + + #[test] + fn test_remove() { + let mut buffer = ContextBuffer::with_capacity(2); + + let request_id = String::from("1"); + let context = Context::new(request_id.clone(), 0f64, 0f64, 0, None); + buffer.insert(context.clone()); + assert_eq!(buffer.size(), 1); + assert_eq!(buffer.get(&request_id).unwrap(), &context); + + let request_id_2 = String::from("2"); + let context = Context::new(request_id_2.clone(), 0f64, 0f64, 0, None); + buffer.insert(context.clone()); + assert_eq!(buffer.size(), 2); + assert_eq!(buffer.get(&request_id_2).unwrap(), &context); + + // Remove the first context + assert_eq!(buffer.remove(&request_id).unwrap().request_id, request_id); + // Size is reduced by 1 + assert_eq!(buffer.size(), 1); + assert!(buffer.get(&request_id).is_none()); + + // Remove a context that doesn't exist + let unexistent_request_id = String::from("unexistent"); + assert!(buffer.remove(&unexistent_request_id).is_none()); + } + + #[test] + fn test_get() { + let mut buffer = ContextBuffer::with_capacity(2); + + let request_id = String::from("1"); + let context = Context::new(request_id.clone(), 0f64, 0f64, 0, None); + buffer.insert(context.clone()); + assert_eq!(buffer.size(), 1); + assert_eq!(buffer.get(&request_id).unwrap(), &context); + + let request_id_2 = String::from("2"); + let context = Context::new(request_id_2.clone(), 0f64, 0f64, 0, None); + buffer.insert(context.clone()); + assert_eq!(buffer.size(), 2); + assert_eq!(buffer.get(&request_id_2).unwrap(), &context); + + // Get a context that doesn't exist + let unexistent_request_id = String::from("unexistent"); + assert!(buffer.get(&unexistent_request_id).is_none()); + } + + #[test] + fn test_add_init_duration() { + let mut buffer = ContextBuffer::with_capacity(2); + + let request_id = String::from("1"); + let context = Context::new(request_id.clone(), 0f64, 0f64, 0, None); + buffer.insert(context.clone()); + assert_eq!(buffer.size(), 1); + assert_eq!(buffer.get(&request_id).unwrap(), &context); + + buffer.add_init_duration(&request_id, 100f64); + assert!((buffer.get(&request_id).unwrap().init_duration_ms - 100f64).abs() < f64::EPSILON); + } + + #[test] + fn test_add_start_time() { + let mut buffer = ContextBuffer::with_capacity(2); + + let request_id = String::from("1"); + let context = Context::new(request_id.clone(), 0f64, 0f64, 0, None); + buffer.insert(context.clone()); + assert_eq!(buffer.size(), 1); + assert_eq!(buffer.get(&request_id).unwrap(), &context); + + buffer.add_start_time(&request_id, 100); + assert_eq!(buffer.get(&request_id).unwrap().start_time, 100); + } + + #[test] + fn test_add_runtime_duration() { + let mut buffer = ContextBuffer::with_capacity(2); + + let request_id = String::from("1"); + let context = Context::new(request_id.clone(), 0f64, 0f64, 0, None); + buffer.insert(context.clone()); + assert_eq!(buffer.size(), 1); + assert_eq!(buffer.get(&request_id).unwrap(), &context); + + buffer.add_runtime_duration(&request_id, 100f64); + assert!( + (buffer.get(&request_id).unwrap().runtime_duration_ms - 100f64).abs() < f64::EPSILON + ); + } + + #[test] + fn test_add_enhanced_metric_data() { + let mut buffer = ContextBuffer::with_capacity(2); + + let request_id = String::from("1"); + let context = Context::new(request_id.clone(), 0f64, 0f64, 0, None); + buffer.insert(context.clone()); + assert_eq!(buffer.size(), 1); + assert_eq!(buffer.get(&request_id).unwrap(), &context); + + let network_offset = Some(NetworkData { + rx_bytes: 180f64, + tx_bytes: 254.0, + }); + + let mut individual_cpu_idle_times = HashMap::new(); + individual_cpu_idle_times.insert("cpu0".to_string(), 10f64); + individual_cpu_idle_times.insert("cpu1".to_string(), 20f64); + let cpu_offset = Some(CPUData { + total_user_time_ms: 100f64, + total_system_time_ms: 53.0, + total_idle_time_ms: 20f64, + individual_cpu_idle_times, + }); + + let uptime_offset = Some(50f64); + let (tmp_chan_tx, _) = watch::channel(()); + let (process_chan_tx, _) = watch::channel(()); + + let enhanced_metric_data = Some(EnhancedMetricData { + network_offset, + cpu_offset, + uptime_offset, + tmp_chan_tx, + process_chan_tx, + }); + + buffer.add_enhanced_metric_data(&request_id, enhanced_metric_data.clone()); + assert_eq!( + buffer.get(&request_id).unwrap().enhanced_metric_data, + enhanced_metric_data, + ); + } +} diff --git a/bottlecap/src/lifecycle/invocation/mod.rs b/bottlecap/src/lifecycle/invocation/mod.rs new file mode 100644 index 000000000..b62a757d9 --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/mod.rs @@ -0,0 +1,278 @@ +use base64::{engine::general_purpose, DecodeError, Engine}; +use datadog_trace_protobuf::pb::Span; +use rand::{rngs::OsRng, Rng, RngCore}; + +use crate::tags::lambda::tags::{INIT_TYPE, SNAP_START_VALUE}; +use serde_json::Value; +use tracing::debug; + +pub mod context; +pub mod processor; +pub mod span_inferrer; +pub mod triggers; + +const MAX_TAG_CHARS: usize = 4096; +const REDACTABLE_KEYS: [&str; 8] = [ + "password", + "passwd", + "pwd", + "secret", + "token", + "authorization", + "x-authorization", + "api_key", +]; + +pub fn base64_to_string(base64_string: &str) -> Result { + match general_purpose::STANDARD.decode(base64_string) { + Ok(bytes) => Ok(String::from_utf8_lossy(&bytes).to_string()), + Err(e) => Err(e), + } +} + +fn create_empty_span(name: String, resource: String, service: String) -> Span { + Span { + name, + resource, + service, + r#type: String::from("serverless"), + ..Default::default() + } +} + +fn generate_span_id() -> u64 { + if std::env::var(INIT_TYPE).map_or(false, |it| it == SNAP_START_VALUE) { + return OsRng.next_u64(); + } + + let mut rng = rand::thread_rng(); + rng.gen() +} + +pub fn tag_span_from_value(span: &mut Span, key: &str, value: &Value, depth: u32, max_depth: u32) { + // Null scenario + if value.is_null() { + span.meta.insert(key.to_string(), value.to_string()); + return; + } + + // Check max depth + if depth >= max_depth { + match serde_json::to_string(value) { + Ok(s) => { + let truncated = s.chars().take(MAX_TAG_CHARS).collect::(); + span.meta.insert(key.to_string(), truncated); + return; + } + Err(e) => { + debug!("Unable to serialize value for tagging {e}"); + return; + } + } + } + + let new_depth = depth + 1; + match value { + // Handle string case + Value::String(s) => { + if let Ok(p) = serde_json::from_str::(s) { + tag_span_from_value(span, key, &p, new_depth, max_depth); + } else { + let truncated = s.chars().take(MAX_TAG_CHARS).collect::(); + span.meta + .insert(key.to_string(), redact_value(key, truncated)); + } + } + + // Handle number case + Value::Number(n) => { + span.meta.insert(key.to_string(), n.to_string()); + } + + // Handle boolean case + Value::Bool(b) => { + span.meta.insert(key.to_string(), b.to_string()); + } + + // Handle object case + Value::Object(map) => { + for (k, v) in map { + let new_key = format!("{key}.{k}"); + tag_span_from_value(span, &new_key, v, new_depth, max_depth); + } + } + + Value::Array(a) => { + if a.is_empty() { + span.meta.insert(key.to_string(), "[]".to_string()); + return; + } + + for (i, v) in a.iter().enumerate() { + let new_key = format!("{key}.{i}"); + tag_span_from_value(span, &new_key, v, new_depth, max_depth); + } + } + Value::Null => {} + } +} + +fn redact_value(key: &str, value: String) -> String { + let split_key = key.split('.').last().unwrap_or_default(); + if REDACTABLE_KEYS.contains(&split_key) { + String::from("redacted") + } else { + value + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use serde_json::json; + + use super::*; + + #[test] + fn test_simple_tagging() { + let mut span = Span::default(); + let value = json!({ "request": { "simple": "value" } }); + + tag_span_from_value(&mut span, "payload", &value, 0, 10); + + let expected = HashMap::from([("payload.request.simple".to_string(), "value".to_string())]); + + assert_eq!(span.meta, expected); + } + + #[test] + fn test_complex_object() { + let mut span = Span::default(); + let value = json!({ + "request": { + "simple": "value", + "obj": { + "arr": ["a", "b", "c"], + "boolean": true, + "nested": { + "value": "nested_value" + } + }, + "empty": null, + "number": 1, + "boolean": true, + } + }); + + tag_span_from_value(&mut span, "payload", &value, 0, 10); + + let expected = HashMap::from([ + ("payload.request.simple".to_string(), "value".to_string()), + ("payload.request.obj.arr.0".to_string(), "a".to_string()), + ("payload.request.obj.arr.1".to_string(), "b".to_string()), + ("payload.request.obj.arr.2".to_string(), "c".to_string()), + ( + "payload.request.obj.boolean".to_string(), + "true".to_string(), + ), + ( + "payload.request.obj.nested.value".to_string(), + "nested_value".to_string(), + ), + ("payload.request.empty".to_string(), "null".to_string()), + ("payload.request.number".to_string(), "1".to_string()), + ("payload.request.boolean".to_string(), "true".to_string()), + ]); + + assert_eq!(span.meta, expected); + } + + #[test] + fn test_array_of_objects() { + let mut span = Span::default(); + let value = json!({ + "request": [ + { "simple": "value" }, + { "simple": "value" }, + { "simple": "value" }, + ] + }); + + tag_span_from_value(&mut span, "payload", &value, 0, 10); + + let expected = HashMap::from([ + ("payload.request.0.simple".to_string(), "value".to_string()), + ("payload.request.1.simple".to_string(), "value".to_string()), + ("payload.request.2.simple".to_string(), "value".to_string()), + ]); + + assert_eq!(span.meta, expected); + } + + #[test] + fn test_reach_max_depth() { + let mut span = Span::default(); + let value = json!({ + "hello": "world", + "empty": null, + "level1": { + "obj": { + "level3": 3 + }, + "arr": [null, true, "great", { "l3": "v3" }], + "boolean": true, + "number": 2, + "empty": null, + "empty_obj": {}, + "empty_arr": [] + }, + "arr": [{ "a": "b" }, { "c": "d" }] + }); + + tag_span_from_value(&mut span, "payload", &value, 0, 2); + + let expected = HashMap::from([ + ("payload.hello".to_string(), "world".to_string()), + ("payload.empty".to_string(), "null".to_string()), + ( + "payload.level1.obj".to_string(), + "{\"level3\":3}".to_string(), + ), + ( + "payload.level1.arr".to_string(), + "[null,true,\"great\",{\"l3\":\"v3\"}]".to_string(), + ), + ("payload.level1.boolean".to_string(), "true".to_string()), + ("payload.level1.number".to_string(), "2".to_string()), + ("payload.level1.empty".to_string(), "null".to_string()), + ("payload.level1.empty_obj".to_string(), "{}".to_string()), + ("payload.level1.empty_arr".to_string(), "[]".to_string()), + ("payload.arr.0".to_string(), "{\"a\":\"b\"}".to_string()), + ("payload.arr.1".to_string(), "{\"c\":\"d\"}".to_string()), + ]); + + assert_eq!(span.meta, expected); + } + + #[test] + fn test_tag_redacts_key() { + let mut span = Span::default(); + let value = json!({ + "request": { + "headers": { + "authorization": "secret token", + } + } + }); + + tag_span_from_value(&mut span, "payload", &value, 0, 10); + + let expected = HashMap::from([( + "payload.request.headers.authorization".to_string(), + "redacted".to_string(), + )]); + + assert_eq!(span.meta, expected); + } +} diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs new file mode 100644 index 000000000..b8bd3e40b --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -0,0 +1,575 @@ +use std::{ + collections::HashMap, + sync::{Arc, Mutex}, + time::{Instant, SystemTime, UNIX_EPOCH}, +}; + +use chrono::{DateTime, Utc}; +use datadog_trace_protobuf::pb::Span; +use datadog_trace_utils::{send_data::SendData, tracer_header_tags}; +use dogstatsd::aggregator::Aggregator as MetricsAggregator; +use serde_json::{json, Value}; +use tokio::sync::{mpsc::Sender, watch}; +use tracing::debug; + +use crate::{ + config::{self, AwsConfig}, + lifecycle::invocation::{ + base64_to_string, context::ContextBuffer, create_empty_span, generate_span_id, + span_inferrer::SpanInferrer, tag_span_from_value, + }, + metrics::enhanced::lambda::{EnhancedMetricData, Lambda as EnhancedMetrics}, + proc::{self, CPUData, NetworkData}, + tags::provider, + telemetry::events::{ReportMetrics, Status}, + traces::{ + context::SpanContext, + propagation::{ + text_map_propagator::{ + DatadogHeaderPropagator, DATADOG_PARENT_ID_KEY, DATADOG_SPAN_ID_KEY, + DATADOG_TRACE_ID_KEY, + }, + DatadogCompositePropagator, Propagator, + }, + trace_processor, + }, +}; + +pub const MS_TO_NS: f64 = 1_000_000.0; +pub const S_TO_NS: f64 = 1_000_000_000.0; +pub const PROACTIVE_INITIALIZATION_THRESHOLD_MS: u64 = 10_000; + +pub const DATADOG_INVOCATION_ERROR_MESSAGE_KEY: &str = "x-datadog-invocation-error-msg"; +pub const DATADOG_INVOCATION_ERROR_TYPE_KEY: &str = "x-datadog-invocation-error-type"; +pub const DATADOG_INVOCATION_ERROR_STACK_KEY: &str = "x-datadog-invocation-error-stack"; +pub const DATADOG_INVOCATION_ERROR_KEY: &str = "x-datadog-invocation-error"; + +pub struct Processor { + // Buffer containing context of the previous 5 invocations + pub context_buffer: ContextBuffer, + // Helper to infer span information + inferrer: SpanInferrer, + // Current invocation span + pub span: Span, + // Cold start span + cold_start_span: Option, + // Extracted span context from inferred span, headers, or payload + pub extracted_span_context: Option, + // Used to extract the trace context from inferred span, headers, or payload + propagator: DatadogCompositePropagator, + // Helper to send enhanced metrics + enhanced_metrics: EnhancedMetrics, + // AWS configuration from the Lambda environment + aws_config: AwsConfig, + // Flag to determine if a tracer was detected + tracer_detected: bool, + config: Arc, +} + +impl Processor { + #[must_use] + pub fn new( + tags_provider: Arc, + config: Arc, + aws_config: &AwsConfig, + metrics_aggregator: Arc>, + ) -> Self { + let service = config.service.clone().unwrap_or(String::from("aws.lambda")); + let resource = tags_provider + .get_canonical_resource_name() + .unwrap_or(String::from("aws.lambda")); + + let propagator = DatadogCompositePropagator::new(Arc::clone(&config)); + + Processor { + context_buffer: ContextBuffer::default(), + inferrer: SpanInferrer::new(config.service_mapping.clone()), + span: create_empty_span(String::from("aws.lambda"), resource, service), + cold_start_span: None, + extracted_span_context: None, + propagator, + enhanced_metrics: EnhancedMetrics::new(metrics_aggregator, Arc::clone(&config)), + aws_config: aws_config.clone(), + tracer_detected: false, + config: Arc::clone(&config), + } + } + + /// Given a `request_id`, creates the context and adds the enhanced metric offsets to the context buffer. + /// + pub fn on_invoke_event(&mut self, request_id: String) { + self.reset_state(); + self.set_init_tags(); + + self.context_buffer.create_context(request_id.clone()); + if self.config.enhanced_metrics { + // Collect offsets for network and cpu metrics + let network_offset: Option = proc::get_network_data().ok(); + let cpu_offset: Option = proc::get_cpu_data().ok(); + let uptime_offset: Option = proc::get_uptime().ok(); + + // Start a channel for monitoring tmp enhanced data + let (tmp_chan_tx, tmp_chan_rx) = watch::channel(()); + self.enhanced_metrics.set_tmp_enhanced_metrics(tmp_chan_rx); + + // Start a channel for monitoring file descriptor and thread count + let (process_chan_tx, process_chan_rx) = watch::channel(()); + self.enhanced_metrics + .set_process_enhanced_metrics(process_chan_rx); + + let enhanced_metric_offsets = Some(EnhancedMetricData { + network_offset, + cpu_offset, + uptime_offset, + tmp_chan_tx, + process_chan_tx, + }); + self.context_buffer + .add_enhanced_metric_data(&request_id, enhanced_metric_offsets); + } + + // Increment the invocation metric + self.enhanced_metrics.increment_invocation_metric(); + } + + /// Resets the state of the processor to default values. + /// + fn reset_state(&mut self) { + // Reset Span Context on Span + self.span.trace_id = 0; + self.span.parent_id = 0; + self.span.span_id = 0; + // Error + self.span.error = 0; + // Meta tags + self.span.meta.clear(); + // Extracted Span Context + self.extracted_span_context = None; + // Cold Start Span + self.cold_start_span = None; + } + + /// On the first invocation, determine if it's a cold start or proactive init. + /// + /// For every other invocation, it will always be warm start. + /// + fn set_init_tags(&mut self) { + let mut proactive_initialization = false; + let mut cold_start = false; + + // If it's empty, then we are in a cold start + if self.context_buffer.is_empty() { + let now = Instant::now(); + let time_since_sandbox_init = now.duration_since(self.aws_config.sandbox_init_time); + if time_since_sandbox_init.as_millis() > PROACTIVE_INITIALIZATION_THRESHOLD_MS.into() { + proactive_initialization = true; + } else { + cold_start = true; + } + } + + if proactive_initialization { + self.span.meta.insert( + String::from("proactive_initialization"), + proactive_initialization.to_string(), + ); + } + self.span + .meta + .insert(String::from("cold_start"), cold_start.to_string()); + + self.enhanced_metrics + .set_init_tags(proactive_initialization, cold_start); + } + + pub fn on_platform_init_start(&mut self, time: DateTime) { + // Create a cold start span + let mut cold_start_span = create_empty_span( + String::from("aws.lambda.cold_start"), + self.span.resource.clone(), + self.span.service.clone(), + ); + + let start_time: i64 = SystemTime::from(time) + .duration_since(UNIX_EPOCH) + .expect("time went backwards") + .as_nanos() + .try_into() + .unwrap_or_default(); + + cold_start_span.span_id = generate_span_id(); + cold_start_span.start = start_time; + + self.cold_start_span = Some(cold_start_span); + } + + /// Given the duration of the platform init report, set the init duration metric. + /// + #[allow(clippy::cast_possible_truncation)] + pub fn on_platform_init_report(&mut self, duration_ms: f64) { + self.enhanced_metrics.set_init_duration_metric(duration_ms); + + if let Some(cold_start_span) = &mut self.cold_start_span { + // `round` is intentionally meant to be a whole integer + cold_start_span.duration = (duration_ms * MS_TO_NS) as i64; + } + } + + /// Given a `request_id` and the time of the platform start, add the start time to the context buffer. + /// + /// Also, set the start time of the current span. + /// + pub fn on_platform_start(&mut self, request_id: String, time: DateTime) { + let start_time: i64 = SystemTime::from(time) + .duration_since(UNIX_EPOCH) + .expect("time went backwards") + .as_nanos() + .try_into() + .unwrap_or_default(); + self.context_buffer.add_start_time(&request_id, start_time); + self.span.start = start_time; + } + + #[allow(clippy::too_many_arguments)] + #[allow(clippy::cast_possible_truncation)] + pub async fn on_platform_runtime_done( + &mut self, + request_id: &String, + duration_ms: f64, + status: Status, + config: Arc, + tags_provider: Arc, + trace_processor: Arc, + trace_agent_tx: Sender, + ) { + self.context_buffer + .add_runtime_duration(request_id, duration_ms); + + // Set the runtime duration metric + self.enhanced_metrics + .set_runtime_duration_metric(duration_ms); + + if status != Status::Success { + // Increment the error metric + self.enhanced_metrics.increment_errors_metric(); + + // Increment the error type metric + if status == Status::Timeout { + self.enhanced_metrics.increment_timeout_metric(); + } + } + + if let Some(context) = self.context_buffer.get(request_id) { + // `round` is intentionally meant to be a whole integer + self.span.duration = (context.runtime_duration_ms * MS_TO_NS).round() as i64; + self.span + .meta + .insert("request_id".to_string(), request_id.clone()); + // todo(duncanista): add missing tags + // - language + // - metrics tags (for asm) + + if let Some(offsets) = &context.enhanced_metric_data { + self.enhanced_metrics.set_cpu_utilization_enhanced_metrics( + offsets.cpu_offset.clone(), + offsets.uptime_offset, + ); + // Send the signal to stop monitoring tmp + _ = offsets.tmp_chan_tx.send(()); + // Send the signal to stop monitoring file descriptors and threads + _ = offsets.process_chan_tx.send(()); + } + } + + if let Some(trigger_tags) = self.inferrer.get_trigger_tags() { + self.span.meta.extend(trigger_tags); + } + + self.inferrer.complete_inferred_spans(&self.span); + + if let Some(cold_start_span) = &mut self.cold_start_span { + cold_start_span.trace_id = self.span.trace_id; + cold_start_span.parent_id = self.span.parent_id; + } + + if self.tracer_detected { + let mut body_size = std::mem::size_of_val(&self.span); + let mut traces = vec![self.span.clone()]; + + if let Some(inferred_span) = &self.inferrer.inferred_span { + body_size += std::mem::size_of_val(inferred_span); + traces.push(inferred_span.clone()); + } + + if let Some(ws) = &self.inferrer.wrapped_inferred_span { + body_size += std::mem::size_of_val(ws); + traces.push(ws.clone()); + } + + if let Some(cold_start_span) = &self.cold_start_span { + body_size += std::mem::size_of_val(cold_start_span); + traces.push(cold_start_span.clone()); + } + + // todo: figure out what to do here + let header_tags = tracer_header_tags::TracerHeaderTags { + lang: "", + lang_version: "", + lang_interpreter: "", + lang_vendor: "", + tracer_version: "", + container_id: "", + client_computed_top_level: false, + client_computed_stats: false, + }; + + let send_data = trace_processor.process_traces( + config.clone(), + tags_provider.clone(), + header_tags, + vec![traces], + body_size, + ); + + if let Err(e) = trace_agent_tx.send(send_data).await { + debug!("Failed to send invocation span to agent: {e}"); + } + } + } + + /// Given a `request_id` and the duration in milliseconds of the platform report, + /// calculate the duration of the runtime if the `request_id` is found in the context buffer. + /// + /// If the `request_id` is not found in the context buffer, return `None`. + /// If the `runtime_duration_ms` hasn't been seen, return `None`. + /// + pub fn on_platform_report(&mut self, request_id: &String, metrics: ReportMetrics) { + // Set the report log metrics + self.enhanced_metrics.set_report_log_metrics(&metrics); + + if let Some(context) = self.context_buffer.get(request_id) { + if context.runtime_duration_ms != 0.0 { + let post_runtime_duration_ms = metrics.duration_ms - context.runtime_duration_ms; + + // Set the post runtime duration metric + self.enhanced_metrics + .set_post_runtime_duration_metric(post_runtime_duration_ms); + } + + // Set Network and CPU time metrics + if let Some(offsets) = context.enhanced_metric_data.clone() { + self.enhanced_metrics + .set_network_enhanced_metrics(offsets.network_offset); + self.enhanced_metrics + .set_cpu_time_enhanced_metrics(offsets.cpu_offset); + } + } + } + + /// If this method is called, it means that we are operating in a Universally Instrumented + /// runtime. Therefore, we need to set the `tracer_detected` flag to `true`. + /// + pub fn on_invocation_start(&mut self, headers: HashMap, payload: Vec) { + self.tracer_detected = true; + + let payload_value = match serde_json::from_slice::(&payload) { + Ok(value) => value, + Err(_) => json!({}), + }; + + // Tag the invocation span with the request payload + if self.config.capture_lambda_payload { + tag_span_from_value( + &mut self.span, + "function.request", + &payload_value, + 0, + self.config.capture_lambda_payload_max_depth, + ); + } + + self.inferrer.infer_span(&payload_value, &self.aws_config); + self.extracted_span_context = self.extract_span_context(&headers, &payload_value); + + // Set the extracted trace context to the spans + if let Some(sc) = &self.extracted_span_context { + self.span.trace_id = sc.trace_id; + self.span.parent_id = sc.span_id; + + // Set the right data to the correct root level span, + // If there's an inferred span, then that should be the root. + if self.inferrer.inferred_span.is_some() { + self.inferrer.set_parent_id(sc.span_id); + self.inferrer.extend_meta(sc.tags.clone()); + } else { + self.span.meta.extend(sc.tags.clone()); + } + } + + // If we have an inferred span, set the invocation span parent id + // to be the inferred span id, even if we don't have an extracted trace context + if let Some(inferred_span) = &self.inferrer.inferred_span { + self.span.parent_id = inferred_span.span_id; + } + } + + fn extract_span_context( + &mut self, + headers: &HashMap, + payload_value: &Value, + ) -> Option { + if let Some(sc) = self.inferrer.get_span_context(&self.propagator) { + return Some(sc); + } + + if let Some(payload_headers) = payload_value.get("headers") { + if let Some(sc) = self.propagator.extract(payload_headers) { + debug!("Extracted trace context from event headers"); + return Some(sc); + } + } + + if let Some(sc) = self.propagator.extract(headers) { + debug!("Extracted trace context from headers"); + return Some(sc); + } + + None + } + + /// Given trace context information, set it to the current span. + /// + pub fn on_invocation_end(&mut self, headers: HashMap, payload: Vec) { + let payload_value = match serde_json::from_slice::(&payload) { + Ok(value) => value, + Err(_) => json!({}), + }; + + // Tag the invocation span with the request payload + if self.config.capture_lambda_payload { + tag_span_from_value( + &mut self.span, + "function.response", + &payload_value, + 0, + self.config.capture_lambda_payload_max_depth, + ); + } + + if let Some(status_code) = payload_value.get("statusCode").and_then(Value::as_str) { + self.span + .meta + .insert("http.status_code".to_string(), status_code.to_string()); + + if status_code.len() == 3 && status_code.starts_with('5') { + self.span.error = 1; + } + + // If we have an inferred span, set the status code to it + self.inferrer.set_status_code(status_code.to_string()); + } + + self.update_span_context_from_headers(&headers); + self.set_span_error_from_headers(headers); + + if self.span.error == 1 { + self.enhanced_metrics.increment_errors_metric(); + } + } + + fn update_span_context_from_headers(&mut self, headers: &HashMap) { + let mut trace_id = 0; + let mut parent_id = 0; + let mut tags: HashMap = HashMap::new(); + + // If we have a trace context, this means we got it from + // distributed tracing + if let Some(sc) = &mut self.extracted_span_context { + debug!("Trace context was found, not extracting it from incoming headers"); + trace_id = sc.trace_id; + parent_id = sc.span_id; + tags.extend(sc.tags.clone()); + } + + // We are the root span, so we should extract the trace context + // from the tracer, which has sent it through end invocation headers + if trace_id == 0 { + debug!("No trace context found, extracting it from headers"); + // Extract trace context from headers manually + if let Some(header) = headers.get(DATADOG_TRACE_ID_KEY) { + trace_id = header.parse::().unwrap_or(0); + } + + if let Some(header) = headers.get(DATADOG_PARENT_ID_KEY) { + parent_id = header.parse::().unwrap_or(0); + } + + // TODO: sampling priority extraction + + // Extract tags from headers + // Used for 128 bit trace ids + tags = DatadogHeaderPropagator::extract_tags(headers); + } + + // We should always use the generated trace id from the tracer + if let Some(header) = headers.get(DATADOG_SPAN_ID_KEY) { + self.span.span_id = header.parse::().unwrap_or(0); + } + + self.span.trace_id = trace_id; + + if self.inferrer.inferred_span.is_some() { + self.inferrer.extend_meta(tags); + } else { + self.span.parent_id = parent_id; + self.span.meta.extend(tags); + } + } + + /// Given end invocation headers, set error metadata, if present, to the current span. + /// + fn set_span_error_from_headers(&mut self, headers: HashMap) { + let message = headers.get(DATADOG_INVOCATION_ERROR_MESSAGE_KEY); + let r#type = headers.get(DATADOG_INVOCATION_ERROR_TYPE_KEY); + let stack = headers.get(DATADOG_INVOCATION_ERROR_STACK_KEY); + + let is_error = headers + .get(DATADOG_INVOCATION_ERROR_KEY) + .map_or(false, |v| v.to_lowercase() == "true") + || message.is_some() + || stack.is_some() + || r#type.is_some() + || self.span.error == 1; + if is_error { + self.span.error = 1; + + if let Some(m) = message { + self.span + .meta + .insert(String::from("error.msg"), m.to_string()); + } + + if let Some(t) = r#type { + self.span + .meta + .insert(String::from("error.type"), t.to_string()); + } + + if let Some(s) = stack { + let decoded_stack = match base64_to_string(s) { + Ok(decoded) => decoded, + Err(e) => { + debug!("Failed to decode error stack: {e}"); + s.to_string() + } + }; + + self.span + .meta + .insert(String::from("error.stack"), decoded_stack); + } + + // todo: handle timeout + } + } +} diff --git a/bottlecap/src/lifecycle/invocation/span_inferrer.rs b/bottlecap/src/lifecycle/invocation/span_inferrer.rs new file mode 100644 index 000000000..a916305f7 --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/span_inferrer.rs @@ -0,0 +1,323 @@ +use std::collections::HashMap; + +use datadog_trace_protobuf::pb::Span; +use serde_json::Value; +use tracing::debug; + +use crate::config::AwsConfig; + +use crate::lifecycle::invocation::{ + generate_span_id, + triggers::{ + api_gateway_http_event::APIGatewayHttpEvent, + api_gateway_rest_event::APIGatewayRestEvent, + dynamodb_event::DynamoDbRecord, + event_bridge_event::EventBridgeEvent, + kinesis_event::KinesisRecord, + lambda_function_url_event::LambdaFunctionUrlEvent, + s3_event::S3Record, + sns_event::{SnsEntity, SnsRecord}, + sqs_event::{extract_trace_context_from_aws_trace_header, SqsRecord}, + step_function_event::StepFunctionEvent, + Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG, + }, +}; +use crate::traces::{context::SpanContext, propagation::Propagator}; + +#[derive(Default)] +pub struct SpanInferrer { + service_mapping: HashMap, + // Span inferred from the Lambda incoming request payload + pub inferred_span: Option, + // Nested span inferred from the Lambda incoming request payload + pub wrapped_inferred_span: Option, + // If the inferred span is async + is_async_span: bool, + // Carrier to extract the span context from + carrier: Option>, + // Generated Span Context from Step Functions or context taken from `AWSTraceHeader` when java->sqs->java + generated_span_context: Option, + // Tags generated from the trigger + trigger_tags: Option>, +} + +impl SpanInferrer { + #[must_use] + pub fn new(service_mapping: HashMap) -> Self { + Self { + service_mapping, + inferred_span: None, + wrapped_inferred_span: None, + is_async_span: false, + carrier: None, + generated_span_context: None, + trigger_tags: None, + } + } + + /// Given a byte payload, try to deserialize it into a `serde_json::Value` + /// and try matching it to a `Trigger` implementation, which will create + /// an inferred span and set it to `self.inferred_span` + /// + #[allow(clippy::too_many_lines)] + pub fn infer_span(&mut self, payload_value: &Value, aws_config: &AwsConfig) { + self.inferred_span = None; + self.wrapped_inferred_span = None; + self.is_async_span = false; + self.carrier = None; + self.generated_span_context = None; + self.trigger_tags = None; + + let mut trigger: Option> = None; + let mut inferred_span = Span { + span_id: generate_span_id(), + ..Default::default() + }; + + let mut is_step_function = false; + + if APIGatewayHttpEvent::is_match(payload_value) { + if let Some(t) = APIGatewayHttpEvent::new(payload_value.clone()) { + t.enrich_span(&mut inferred_span, &self.service_mapping); + + trigger = Some(Box::new(t)); + } + } else if APIGatewayRestEvent::is_match(payload_value) { + if let Some(t) = APIGatewayRestEvent::new(payload_value.clone()) { + t.enrich_span(&mut inferred_span, &self.service_mapping); + + trigger = Some(Box::new(t)); + } + } else if LambdaFunctionUrlEvent::is_match(payload_value) { + if let Some(t) = LambdaFunctionUrlEvent::new(payload_value.clone()) { + t.enrich_span(&mut inferred_span, &self.service_mapping); + + trigger = Some(Box::new(t)); + } + } else if SqsRecord::is_match(payload_value) { + if let Some(t) = SqsRecord::new(payload_value.clone()) { + t.enrich_span(&mut inferred_span, &self.service_mapping); + + self.generated_span_context = extract_trace_context_from_aws_trace_header( + t.attributes.aws_trace_header.clone(), + ); + + // Check for SNS event wrapped in the SQS body + if let Ok(sns_entity) = serde_json::from_str::(&t.body) { + debug!("Found an SNS event wrapped in the SQS body"); + let mut wrapped_inferred_span = Span { + span_id: generate_span_id(), + ..Default::default() + }; + + let wt = SnsRecord { + sns: sns_entity, + event_subscription_arn: None, + }; + wt.enrich_span(&mut wrapped_inferred_span, &self.service_mapping); + inferred_span.meta.extend(wt.get_tags()); + + wrapped_inferred_span.duration = + inferred_span.start - wrapped_inferred_span.start; + + self.wrapped_inferred_span = Some(wrapped_inferred_span); + } else if let Ok(event_bridge_entity) = + serde_json::from_str::(&t.body) + { + let mut wrapped_inferred_span = Span { + span_id: generate_span_id(), + ..Default::default() + }; + + event_bridge_entity + .enrich_span(&mut wrapped_inferred_span, &self.service_mapping); + inferred_span.meta.extend(event_bridge_entity.get_tags()); + + wrapped_inferred_span.duration = + inferred_span.start - wrapped_inferred_span.start; + + self.wrapped_inferred_span = Some(wrapped_inferred_span); + }; + + trigger = Some(Box::new(t)); + } + } else if SnsRecord::is_match(payload_value) { + if let Some(t) = SnsRecord::new(payload_value.clone()) { + t.enrich_span(&mut inferred_span, &self.service_mapping); + + if let Some(message) = &t.sns.message { + if let Ok(event_bridge_wrapper_message) = + serde_json::from_str::(message) + { + let mut wrapped_inferred_span = Span { + span_id: generate_span_id(), + ..Default::default() + }; + + event_bridge_wrapper_message + .enrich_span(&mut wrapped_inferred_span, &self.service_mapping); + inferred_span + .meta + .extend(event_bridge_wrapper_message.get_tags()); + + wrapped_inferred_span.duration = + inferred_span.start - wrapped_inferred_span.start; + + self.wrapped_inferred_span = Some(wrapped_inferred_span); + } + } + + trigger = Some(Box::new(t)); + } + } else if DynamoDbRecord::is_match(payload_value) { + if let Some(t) = DynamoDbRecord::new(payload_value.clone()) { + t.enrich_span(&mut inferred_span, &self.service_mapping); + + trigger = Some(Box::new(t)); + } + } else if S3Record::is_match(payload_value) { + if let Some(t) = S3Record::new(payload_value.clone()) { + t.enrich_span(&mut inferred_span, &self.service_mapping); + + trigger = Some(Box::new(t)); + } + } else if EventBridgeEvent::is_match(payload_value) { + if let Some(t) = EventBridgeEvent::new(payload_value.clone()) { + t.enrich_span(&mut inferred_span, &self.service_mapping); + + trigger = Some(Box::new(t)); + } + } else if KinesisRecord::is_match(payload_value) { + if let Some(t) = KinesisRecord::new(payload_value.clone()) { + t.enrich_span(&mut inferred_span, &self.service_mapping); + + trigger = Some(Box::new(t)); + } + } else if StepFunctionEvent::is_match(payload_value) { + if let Some(t) = StepFunctionEvent::new(payload_value.clone()) { + self.generated_span_context = Some(t.get_span_context()); + trigger = Some(Box::new(t)); + is_step_function = true; + } + } else { + debug!("Unable to infer span from payload: no matching trigger found"); + } + + // Inferred a trigger + if let Some(t) = trigger { + let mut trigger_tags = t.get_tags(); + trigger_tags.insert( + FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG.to_string(), + t.get_arn(&aws_config.region), + ); + + self.trigger_tags = Some(trigger_tags); + self.carrier = Some(t.get_carrier()); + self.is_async_span = t.is_async(); + + // For Step Functions, there is no inferred span + if is_step_function && self.generated_span_context.is_some() { + self.inferred_span = None; + } else { + self.inferred_span = Some(inferred_span); + } + } + } + + /// If a `self.inferred_span` exist, set the `parent_id` to + /// the span. + /// + pub fn set_parent_id(&mut self, parent_id: u64) { + if let Some(s) = &mut self.inferred_span { + s.parent_id = parent_id; + } + } + + pub fn extend_meta(&mut self, iter: HashMap) { + if let Some(s) = &mut self.inferred_span { + s.meta.extend(iter); + } + } + + pub fn set_status_code(&mut self, status_code: String) { + if let Some(s) = &mut self.inferred_span { + s.meta.insert("http.status_code".to_string(), status_code); + } + } + + // TODO: add status tag and other info from response + pub fn complete_inferred_spans(&mut self, invocation_span: &Span) { + if let Some(s) = &mut self.inferred_span { + if let Some(ws) = &mut self.wrapped_inferred_span { + // Set correct Parent ID for multiple inferred spans + ws.parent_id = s.parent_id; + s.parent_id = ws.span_id; + + // TODO: clean this logic + if self.is_async_span { + // SNS to SQS span duration will be set + if ws.duration == 0 { + let duration = s.start - ws.start; + ws.duration = duration; + } + } else { + let duration = s.start - ws.start; + ws.duration = duration; + } + + // Set error + ws.error = invocation_span.error; + ws.meta + .insert(String::from("peer.service"), s.service.clone()); + + ws.trace_id = invocation_span.trace_id; + } + + if self.is_async_span { + // SNS to SQS span duration will be set + if s.duration == 0 { + let duration = invocation_span.start - s.start; + s.duration = duration; + } + } else { + let duration = (invocation_span.start + invocation_span.duration) - s.start; + s.duration = duration; + } + + // Set error + s.error = invocation_span.error; + s.meta.insert( + String::from("peer.service"), + invocation_span.service.clone(), + ); + + s.trace_id = invocation_span.trace_id; + } + } + + /// Returns a clone of the carrier associated with the inferred span + /// + /// If the carrier is set, it will try to extract the span context, + /// otherwise it will + /// + pub fn get_span_context(&self, propagator: &impl Propagator) -> Option { + // Step Functions `SpanContext` is deterministically generated + if self.generated_span_context.is_some() { + return self.generated_span_context.clone(); + } + + if let Some(sc) = self.carrier.as_ref().and_then(|c| propagator.extract(c)) { + debug!("Extracted trace context from inferred span"); + return Some(sc); + } + + None + } + + /// Returns a clone of the tags associated with the inferred span + /// + #[must_use] + pub fn get_trigger_tags(&self) -> Option> { + self.trigger_tags.clone() + } +} diff --git a/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs new file mode 100644 index 000000000..cdf372001 --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_http_event.rs @@ -0,0 +1,444 @@ +use datadog_trace_protobuf::pb::Span; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::HashMap; +use tracing::debug; + +use crate::lifecycle::invocation::{ + processor::MS_TO_NS, + triggers::{ + get_aws_partition_by_region, lowercase_key, ServiceNameResolver, Trigger, + FUNCTION_TRIGGER_EVENT_SOURCE_TAG, + }, +}; + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct APIGatewayHttpEvent { + #[serde(rename = "routeKey")] + pub route_key: String, + #[serde(serialize_with = "lowercase_key")] + pub headers: HashMap, + #[serde(rename = "requestContext")] + pub request_context: RequestContext, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct RequestContext { + pub stage: String, + #[serde(rename = "requestId")] + pub request_id: String, + #[serde(rename = "apiId")] + pub api_id: String, + #[serde(rename = "domainName")] + pub domain_name: String, + #[serde(rename = "timeEpoch")] + pub time_epoch: i64, + pub http: RequestContextHTTP, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct RequestContextHTTP { + pub method: String, + pub path: String, + pub protocol: String, + #[serde(rename = "sourceIp")] + pub source_ip: String, + #[serde(rename = "userAgent")] + pub user_agent: String, +} + +impl Trigger for APIGatewayHttpEvent { + fn new(payload: Value) -> Option { + serde_json::from_value(payload).ok()? + } + + fn is_match(payload: &Value) -> bool { + let version = payload.get("version"); + let domain_name: Option<&Value> = payload + .get("requestContext") + .and_then(|d| d.get("domainName")); + + version.is_some_and(|v| v == "2.0") + && payload.get("rawQueryString").is_some() + && domain_name.is_some_and(|d| d.as_str().map_or(true, |s| !s.contains("lambda-url"))) + } + + #[allow(clippy::cast_possible_truncation)] + fn enrich_span(&self, span: &mut Span, service_mapping: &HashMap) { + debug!("Enriching an Inferred Span for an API Gateway HTTP Event"); + let resource = if self.route_key.is_empty() { + format!( + "{http_method} {route_key}", + http_method = self.request_context.http.method, + route_key = self.route_key + ) + } else { + self.route_key.clone() + }; + + let http_url = format!( + "https://{domain_name}{path}", + domain_name = self.request_context.domain_name, + path = self.request_context.http.path + ); + let start_time = (self.request_context.time_epoch as f64 * MS_TO_NS) as i64; + + let service_name = + self.resolve_service_name(service_mapping, &self.request_context.domain_name); + + span.name = "aws.httpapi".to_string(); + span.service = service_name; + span.resource.clone_from(&resource); + span.r#type = "http".to_string(); + span.start = start_time; + span.meta.extend(HashMap::from([ + ( + "endpoint".to_string(), + self.request_context.http.path.clone(), + ), + ("http.url".to_string(), http_url), + ( + "http.method".to_string(), + self.request_context.http.method.clone(), + ), + ( + "http.protocol".to_string(), + self.request_context.http.protocol.clone(), + ), + ( + "http.source_ip".to_string(), + self.request_context.http.source_ip.clone(), + ), + ( + "http.user_agent".to_string(), + self.request_context.http.user_agent.clone(), + ), + ("operation_name".to_string(), "aws.httpapi".to_string()), + ( + "request_id".to_string(), + self.request_context.request_id.clone(), + ), + ])); + } + + fn get_tags(&self) -> HashMap { + let mut tags = HashMap::from([ + ( + "http.url".to_string(), + format!( + "https://{domain_name}{path}", + domain_name = self.request_context.domain_name.clone(), + path = self.request_context.http.path.clone() + ), + ), + // path and URL are full + // /users/12345/profile + ( + "http.url_details.path".to_string(), + self.request_context.http.path.clone(), + ), + ( + "http.method".to_string(), + self.request_context.http.method.clone(), + ), + ( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "api-gateway".to_string(), + ), + ]); + // route is parameterized + // /users/{id}/profile + if !self.route_key.is_empty() { + tags.insert( + "http.route".to_string(), + self.route_key + .clone() + .split_whitespace() + .last() + .unwrap_or(&self.route_key.clone()) + .to_string(), + ); + } + + if let Some(referer) = self.headers.get("referer") { + tags.insert("http.referer".to_string(), referer.to_string()); + } + + if let Some(user_agent) = self.headers.get("user-agent") { + tags.insert("http.user_agent".to_string(), user_agent.to_string()); + } + + tags + } + + fn get_arn(&self, region: &str) -> String { + let partition = get_aws_partition_by_region(region); + format!( + "arn:{partition}:apigateway:{region}::/restapis/{api_id}/stages/{stage}", + partition = partition, + region = region, + api_id = self.request_context.api_id, + stage = self.request_context.stage + ) + } + + fn is_async(&self) -> bool { + self.headers + .get("x-amz-invocation-type") + .is_some_and(|v| v == "Event") + } + + fn get_carrier(&self) -> HashMap { + self.headers.clone() + } +} + +impl ServiceNameResolver for APIGatewayHttpEvent { + fn get_specific_identifier(&self) -> String { + self.request_context.api_id.clone() + } + + fn get_generic_identifier(&self) -> &'static str { + "lambda_api_gateway" + } +} +#[cfg(test)] +mod tests { + use super::*; + use crate::lifecycle::invocation::triggers::test_utils::read_json_file; + + #[test] + fn test_new() { + let json = read_json_file("api_gateway_http_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let result = APIGatewayHttpEvent::new(payload) + .expect("Failed to deserialize into APIGatewayHttpEvent"); + + let expected = APIGatewayHttpEvent { + route_key: "GET /httpapi/get".to_string(), + headers: HashMap::from([ + ("accept".to_string(), "*/*".to_string()), + ("content-length".to_string(), "0".to_string()), + ( + "host".to_string(), + "x02yirxc7a.execute-api.sa-east-1.amazonaws.com".to_string(), + ), + ("user-agent".to_string(), "curl/7.64.1".to_string()), + ( + "x-amzn-trace-id".to_string(), + "Root=1-613a52fb-4c43cfc95e0241c1471bfa05".to_string(), + ), + ("x-forwarded-for".to_string(), "38.122.226.210".to_string()), + ("x-forwarded-port".to_string(), "443".to_string()), + ("x-forwarded-proto".to_string(), "https".to_string()), + ("x-datadog-trace-id".to_string(), "12345".to_string()), + ("x-datadog-parent-id".to_string(), "67890".to_string()), + ("x-datadog-sampling-priority".to_string(), "2".to_string()), + ]), + request_context: RequestContext { + stage: "$default".to_string(), + request_id: "FaHnXjKCGjQEJ7A=".to_string(), + api_id: "x02yirxc7a".to_string(), + domain_name: "x02yirxc7a.execute-api.sa-east-1.amazonaws.com".to_string(), + time_epoch: 1_631_212_283_738, + http: RequestContextHTTP { + method: "GET".to_string(), + path: "/httpapi/get".to_string(), + protocol: "HTTP/1.1".to_string(), + source_ip: "38.122.226.210".to_string(), + user_agent: "curl/7.64.1".to_string(), + }, + }, + }; + + assert_eq!(result, expected); + } + + #[test] + fn test_is_match() { + let json = read_json_file("api_gateway_http_event.json"); + let payload = + serde_json::from_str(&json).expect("Failed to deserialize APIGatewayHttpEvent"); + + assert!(APIGatewayHttpEvent::is_match(&payload)); + } + + #[test] + + fn test_is_not_match() { + let json = read_json_file("api_gateway_proxy_event.json"); + let payload = + serde_json::from_str(&json).expect("Failed to deserialize APIGatewayHttpEvent"); + assert!(!APIGatewayHttpEvent::is_match(&payload)); + } + + #[test] + fn test_enrich_span() { + let json = read_json_file("api_gateway_http_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayHttpEvent::new(payload).expect("Failed to deserialize APIGatewayHttpEvent"); + let mut span = Span::default(); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); + assert_eq!(span.name, "aws.httpapi"); + assert_eq!( + span.service, + "x02yirxc7a.execute-api.sa-east-1.amazonaws.com" + ); + assert_eq!(span.resource, "GET /httpapi/get"); + assert_eq!(span.r#type, "http"); + assert_eq!( + span.meta, + HashMap::from([ + ("endpoint".to_string(), "/httpapi/get".to_string()), + ( + "http.url".to_string(), + "https://x02yirxc7a.execute-api.sa-east-1.amazonaws.com/httpapi/get" + .to_string() + ), + ("http.method".to_string(), "GET".to_string()), + ("http.protocol".to_string(), "HTTP/1.1".to_string()), + ("http.source_ip".to_string(), "38.122.226.210".to_string()), + ("http.user_agent".to_string(), "curl/7.64.1".to_string()), + ("operation_name".to_string(), "aws.httpapi".to_string()), + ("request_id".to_string(), "FaHnXjKCGjQEJ7A=".to_string()), + ]) + ); + } + + #[test] + fn test_get_tags() { + let json = read_json_file("api_gateway_http_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayHttpEvent::new(payload).expect("Failed to deserialize APIGatewayHttpEvent"); + let tags = event.get_tags(); + let expected = HashMap::from([ + ( + "http.url".to_string(), + "https://x02yirxc7a.execute-api.sa-east-1.amazonaws.com/httpapi/get".to_string(), + ), + ( + "http.url_details.path".to_string(), + "/httpapi/get".to_string(), + ), + ("http.method".to_string(), "GET".to_string()), + ("http.route".to_string(), "/httpapi/get".to_string()), + ("http.user_agent".to_string(), "curl/7.64.1".to_string()), + ( + "function_trigger.event_source".to_string(), + "api-gateway".to_string(), + ), + ]); + + assert_eq!(tags, expected); + } + + #[test] + + fn test_enrich_span_parameterized() { + let json = read_json_file("api_gateway_http_event_parameterized.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayHttpEvent::new(payload).expect("Failed to deserialize APIGatewayHttpEvent"); + let mut span = Span::default(); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); + assert_eq!(span.name, "aws.httpapi"); + assert_eq!( + span.service, + "9vj54we5ih.execute-api.sa-east-1.amazonaws.com" + ); + assert_eq!(span.resource, "GET /user/{id}"); + assert_eq!(span.r#type, "http"); + assert_eq!( + span.meta, + HashMap::from([ + ("endpoint".to_string(), "/user/42".to_string()), + ( + "http.url".to_string(), + "https://9vj54we5ih.execute-api.sa-east-1.amazonaws.com/user/42".to_string() + ), + ("http.method".to_string(), "GET".to_string()), + ("http.protocol".to_string(), "HTTP/1.1".to_string()), + ("http.source_ip".to_string(), "76.115.124.192".to_string()), + ("http.user_agent".to_string(), "curl/8.1.2".to_string()), + ("operation_name".to_string(), "aws.httpapi".to_string()), + ("request_id".to_string(), "Ur2JtjEfGjQEPOg=".to_string()), + ]) + ); + } + + #[test] + fn test_get_tags_parameterized() { + let json = read_json_file("api_gateway_http_event_parameterized.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayHttpEvent::new(payload).expect("Failed to deserialize APIGatewayHttpEvent"); + let tags = event.get_tags(); + + let expected = HashMap::from([ + ( + "http.url".to_string(), + "https://9vj54we5ih.execute-api.sa-east-1.amazonaws.com/user/42".to_string(), + ), + ("http.url_details.path".to_string(), "/user/42".to_string()), + ("http.method".to_string(), "GET".to_string()), + ("http.route".to_string(), "/user/{id}".to_string()), + ("http.user_agent".to_string(), "curl/8.1.2".to_string()), + ( + "function_trigger.event_source".to_string(), + "api-gateway".to_string(), + ), + ]); + assert_eq!(tags, expected); + } + + #[test] + fn test_get_arn() { + let json = read_json_file("api_gateway_http_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayHttpEvent::new(payload).expect("Failed to deserialize APIGatewayHttpEvent"); + assert_eq!( + event.get_arn("sa-east-1"), + "arn:aws:apigateway:sa-east-1::/restapis/x02yirxc7a/stages/$default" + ); + } + + #[test] + fn test_resolve_service_name() { + let json = read_json_file("api_gateway_http_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayHttpEvent::new(payload).expect("Failed to deserialize APIGatewayHttpEvent"); + + // Priority is given to the specific key + let specific_service_mapping = HashMap::from([ + ("x02yirxc7a".to_string(), "specific-service".to_string()), + ( + "lambda_api_gateway".to_string(), + "generic-service".to_string(), + ), + ]); + + assert_eq!( + event.resolve_service_name( + &specific_service_mapping, + &event.request_context.domain_name + ), + "specific-service" + ); + + let generic_service_mapping = HashMap::from([( + "lambda_api_gateway".to_string(), + "generic-service".to_string(), + )]); + assert_eq!( + event + .resolve_service_name(&generic_service_mapping, &event.request_context.domain_name), + "generic-service" + ); + } +} diff --git a/bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs new file mode 100644 index 000000000..67a1180be --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/triggers/api_gateway_rest_event.rs @@ -0,0 +1,420 @@ +use datadog_trace_protobuf::pb::Span; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::HashMap; +use tracing::debug; + +use crate::lifecycle::invocation::{ + processor::MS_TO_NS, + triggers::{ + get_aws_partition_by_region, lowercase_key, ServiceNameResolver, Trigger, + FUNCTION_TRIGGER_EVENT_SOURCE_TAG, + }, +}; + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct APIGatewayRestEvent { + #[serde(serialize_with = "lowercase_key")] + pub headers: HashMap, + #[serde(rename = "requestContext")] + pub request_context: RequestContext, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct RequestContext { + pub stage: String, + #[serde(rename = "requestId")] + pub request_id: String, + #[serde(rename = "apiId")] + pub api_id: String, + #[serde(rename = "domainName")] + pub domain_name: String, + #[serde(rename = "requestTimeEpoch")] + pub time_epoch: i64, + #[serde(rename = "httpMethod")] + pub method: String, + #[serde(rename = "resourcePath")] + pub resource_path: String, + pub path: String, + pub protocol: String, + pub identity: Identity, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct Identity { + #[serde(rename = "sourceIp")] + pub source_ip: String, + #[serde(rename = "userAgent")] + pub user_agent: String, +} + +impl Trigger for APIGatewayRestEvent { + fn new(payload: Value) -> Option { + match serde_json::from_value(payload) { + Ok(event) => Some(event), + Err(e) => { + debug!("Failed to deserialize APIGatewayRestEvent: {}", e); + None + } + } + } + + fn is_match(payload: &Value) -> bool { + let stage = payload.get("requestContext").and_then(|v| v.get("stage")); + let http_method = payload.get("httpMethod"); + let resource = payload.get("resource"); + stage.is_some() && http_method.is_some() && resource.is_some() + } + + #[allow(clippy::cast_possible_truncation)] + fn enrich_span(&self, span: &mut Span, service_mapping: &HashMap) { + debug!("Enriching an Inferred Span for an API Gateway REST Event"); + let resource = format!( + "{http_method} {path}", + http_method = self.request_context.method, + path = self.request_context.resource_path + ); + let http_url = format!( + "https://{domain_name}{path}", + domain_name = self.request_context.domain_name, + path = self.request_context.path + ); + let start_time = (self.request_context.time_epoch as f64 * MS_TO_NS) as i64; + + let service_name = + self.resolve_service_name(service_mapping, &self.request_context.domain_name); + + span.name = "aws.apigateway".to_string(); + span.service = service_name; + span.resource.clone_from(&resource); + span.r#type = "http".to_string(); + span.start = start_time; + span.meta.extend(HashMap::from([ + ("endpoint".to_string(), self.request_context.path.clone()), + ("http.url".to_string(), http_url), + ( + "http.method".to_string(), + self.request_context.method.clone(), + ), + ( + "http.protocol".to_string(), + self.request_context.protocol.clone(), + ), + ( + "http.source_ip".to_string(), + self.request_context.identity.source_ip.clone(), + ), + ( + "http.user_agent".to_string(), + self.request_context.identity.user_agent.clone(), + ), + ("operation_name".to_string(), "aws.apigateway".to_string()), + ( + "request_id".to_string(), + self.request_context.request_id.clone(), + ), + ( + "http.route".to_string(), + self.request_context.resource_path.clone(), + ), + ])); + } + + fn get_tags(&self) -> HashMap { + let mut tags = HashMap::from([ + ( + "http.url".to_string(), + format!( + "https://{domain_name}{path}", + domain_name = self.request_context.domain_name, + path = self.request_context.path + ), + ), + ( + "http.url_details.path".to_string(), + self.request_context.path.clone(), + ), + ( + "http.method".to_string(), + self.request_context.method.clone(), + ), + ( + "http.route".to_string(), + self.request_context.resource_path.clone(), + ), + ( + "http.user_agent".to_string(), + self.request_context.identity.user_agent.to_string(), + ), + ( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "api-gateway".to_string(), + ), + ]); + + if let Some(referer) = self.headers.get("referer") { + tags.insert("http.referer".to_string(), referer.to_string()); + } + + tags + } + + fn get_arn(&self, region: &str) -> String { + let partition = get_aws_partition_by_region(region); + format!( + "arn:{partition}:apigateway:{region}::/restapis/{api_id}/stages/{stage}", + partition = partition, + region = region, + api_id = self.request_context.api_id, + stage = self.request_context.stage + ) + } + + fn is_async(&self) -> bool { + self.headers + .get("x-amz-invocation-type") + .is_some_and(|v| v == "Event") + } + + fn get_carrier(&self) -> HashMap { + self.headers.clone() + } +} + +impl ServiceNameResolver for APIGatewayRestEvent { + fn get_specific_identifier(&self) -> String { + self.request_context.api_id.clone() + } + + fn get_generic_identifier(&self) -> &'static str { + "lambda_api_gateway" + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::lifecycle::invocation::triggers::test_utils::read_json_file; + + #[test] + fn test_new() { + let json = read_json_file("api_gateway_rest_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let result = APIGatewayRestEvent::new(payload) + .expect("Failed to deserialize into APIGatewayRestEvent"); + + let expected = APIGatewayRestEvent { + headers: HashMap::from([ + ("Header1".to_string(), "value1".to_string()), + ("Header2".to_string(), "value2".to_string()), + ]), + request_context: RequestContext { + stage: "$default".to_string(), + request_id: "id=".to_string(), + api_id: "id".to_string(), + domain_name: "id.execute-api.us-east-1.amazonaws.com".to_string(), + time_epoch: 1_583_349_317_135, + method: "GET".to_string(), + path: "/my/path".to_string(), + protocol: "HTTP/1.1".to_string(), + resource_path: "/path".to_string(), + identity: Identity { + source_ip: "IP".to_string(), + user_agent: "user-agent".to_string(), + }, + }, + }; + + assert_eq!(result, expected); + } + + #[test] + fn test_is_match() { + let json = read_json_file("api_gateway_rest_event.json"); + let payload = + serde_json::from_str(&json).expect("Failed to deserialize APIGatewayRestEvent"); + + assert!(APIGatewayRestEvent::is_match(&payload)); + } + + #[test] + fn test_is_not_match() { + let json = read_json_file("api_gateway_http_event.json"); + let payload = + serde_json::from_str(&json).expect("Failed to deserialize APIGatewayRestEvent"); + assert!(!APIGatewayRestEvent::is_match(&payload)); + } + + #[test] + fn test_enrich_span() { + let json = read_json_file("api_gateway_rest_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayRestEvent::new(payload).expect("Failed to deserialize APIGatewayRestEvent"); + let mut span = Span::default(); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); + assert_eq!(span.name, "aws.apigateway"); + assert_eq!(span.service, "id.execute-api.us-east-1.amazonaws.com"); + assert_eq!(span.resource, "GET /path"); + assert_eq!(span.r#type, "http"); + + assert_eq!( + span.meta, + HashMap::from([ + ("endpoint".to_string(), "/my/path".to_string()), + ( + "http.url".to_string(), + "https://id.execute-api.us-east-1.amazonaws.com/my/path".to_string() + ), + ("http.method".to_string(), "GET".to_string()), + ("http.protocol".to_string(), "HTTP/1.1".to_string()), + ("http.source_ip".to_string(), "IP".to_string()), + ("http.user_agent".to_string(), "user-agent".to_string()), + ("http.route".to_string(), "/path".to_string()), + ("operation_name".to_string(), "aws.apigateway".to_string()), + ("request_id".to_string(), "id=".to_string()), + ]) + ); + } + + #[test] + fn test_get_tags() { + let json = read_json_file("api_gateway_rest_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayRestEvent::new(payload).expect("Failed to deserialize APIGatewayRestEvent"); + let tags = event.get_tags(); + + let expected = HashMap::from([ + ( + "http.url".to_string(), + "https://id.execute-api.us-east-1.amazonaws.com/my/path".to_string(), + ), + ("http.url_details.path".to_string(), "/my/path".to_string()), + ("http.method".to_string(), "GET".to_string()), + ("http.route".to_string(), "/path".to_string()), + ("http.user_agent".to_string(), "user-agent".to_string()), + ( + "function_trigger.event_source".to_string(), + "api-gateway".to_string(), + ), + ]); + + assert_eq!(tags, expected); + } + + #[test] + fn test_enrich_parameterized_span() { + let json = read_json_file("api_gateway_rest_event_parameterized.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayRestEvent::new(payload).expect("Failed to deserialize APIGatewayRestEvent"); + let mut span = Span::default(); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); + assert_eq!(span.name, "aws.apigateway"); + assert_eq!( + span.service, + "mcwkra0ya4.execute-api.sa-east-1.amazonaws.com" + ); + assert_eq!(span.resource, "GET /user/{id}"); + assert_eq!(span.r#type, "http"); + let expected = HashMap::from([ + ("endpoint".to_string(), "/dev/user/42".to_string()), + ( + "http.url".to_string(), + "https://mcwkra0ya4.execute-api.sa-east-1.amazonaws.com/dev/user/42".to_string(), + ), + ("http.method".to_string(), "GET".to_string()), + ("http.protocol".to_string(), "HTTP/1.1".to_string()), + ("http.source_ip".to_string(), "76.115.124.192".to_string()), + ("http.user_agent".to_string(), "curl/8.1.2".to_string()), + ("http.route".to_string(), "/user/{id}".to_string()), + ("operation_name".to_string(), "aws.apigateway".to_string()), + ( + "request_id".to_string(), + "e16399f7-e984-463a-9931-745ba021a27f".to_string(), + ), + ]); + assert_eq!(span.meta, expected); + } + + #[test] + fn test_get_tags_parameterized() { + let json = read_json_file("api_gateway_rest_event_parameterized.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayRestEvent::new(payload).expect("Failed to deserialize APIGatewayRestEvent"); + let tags = event.get_tags(); + + assert_eq!( + tags, + HashMap::from([ + ( + "http.url".to_string(), + "https://mcwkra0ya4.execute-api.sa-east-1.amazonaws.com/dev/user/42" + .to_string(), + ), + ( + "http.url_details.path".to_string(), + "/dev/user/42".to_string(), + ), + ("http.method".to_string(), "GET".to_string()), + ("http.route".to_string(), "/user/{id}".to_string()), + ("http.user_agent".to_string(), "curl/8.1.2".to_string()), + ( + "function_trigger.event_source".to_string(), + "api-gateway".to_string() + ), + ]) + ); + } + + #[test] + fn test_get_arn() { + let json = read_json_file("api_gateway_rest_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayRestEvent::new(payload).expect("Failed to deserialize APIGatewayRestEvent"); + assert_eq!( + event.get_arn("us-east-1"), + "arn:aws:apigateway:us-east-1::/restapis/id/stages/$default" + ); + } + + #[test] + fn test_resolve_service_name() { + let json = read_json_file("api_gateway_rest_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + APIGatewayRestEvent::new(payload).expect("Failed to deserialize APIGatewayRestEvent"); + + // Priority is given to the specific key + let specific_service_mapping = HashMap::from([ + ("id".to_string(), "specific-service".to_string()), + ( + "lambda_api_gateway".to_string(), + "generic-service".to_string(), + ), + ]); + + assert_eq!( + event.resolve_service_name( + &specific_service_mapping, + &event.request_context.domain_name + ), + "specific-service" + ); + + let generic_service_mapping = HashMap::from([( + "lambda_api_gateway".to_string(), + "generic-service".to_string(), + )]); + assert_eq!( + event + .resolve_service_name(&generic_service_mapping, &event.request_context.domain_name), + "generic-service" + ); + } +} diff --git a/bottlecap/src/lifecycle/invocation/triggers/dynamodb_event.rs b/bottlecap/src/lifecycle/invocation/triggers/dynamodb_event.rs new file mode 100644 index 000000000..8503f46c5 --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/triggers/dynamodb_event.rs @@ -0,0 +1,283 @@ +use datadog_trace_protobuf::pb::Span; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::HashMap; +use tracing::debug; + +use crate::lifecycle::invocation::{ + processor::S_TO_NS, + triggers::{ServiceNameResolver, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}, +}; + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct DynamoDbEvent { + #[serde(rename = "Records")] + pub records: Vec, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct DynamoDbRecord { + #[serde(rename = "dynamodb")] + pub dynamodb: DynamoDbEntity, + #[serde(rename = "eventID")] + pub event_id: String, + #[serde(rename = "eventName")] + pub event_name: String, + #[serde(rename = "eventVersion")] + pub event_version: String, + #[serde(rename = "eventSourceARN")] + pub event_source_arn: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct DynamoDbEntity { + #[serde(rename = "ApproximateCreationDateTime")] + pub approximate_creation_date_time: f64, + #[serde(rename = "SizeBytes")] + pub size_bytes: i64, + #[serde(rename = "StreamViewType")] + pub stream_view_type: String, +} + +impl Trigger for DynamoDbRecord { + fn new(payload: Value) -> Option + where + Self: Sized, + { + let records = payload.get("Records").and_then(Value::as_array); + match records { + Some(records) => match serde_json::from_value::(records[0].clone()) { + Ok(event) => Some(event), + Err(e) => { + debug!("Failed to deserialize DynamoDB Record: {e}"); + None + } + }, + None => None, + } + } + + fn is_match(payload: &Value) -> bool + where + Self: Sized, + { + if let Some(first_record) = payload + .get("Records") + .and_then(Value::as_array) + .and_then(|r| r.first()) + .take() + { + first_record.get("dynamodb").is_some() + } else { + false + } + } + + #[allow(clippy::cast_possible_truncation)] + fn enrich_span(&self, span: &mut Span, service_mapping: &HashMap) { + debug!("Enriching an Inferred Span for a DynamoDB event"); + let table_name = self.get_specific_identifier(); + let resource = format!("{} {}", self.event_name.clone(), table_name); + + let start_time = (self.dynamodb.approximate_creation_date_time * S_TO_NS) as i64; + + let service_name = self.resolve_service_name(service_mapping, "dynamodb"); + + span.name = String::from("aws.dynamodb"); + span.service = service_name.to_string(); + span.resource = resource; + span.r#type = String::from("web"); + span.start = start_time; + span.meta.extend(HashMap::from([ + ("operation_name".to_string(), String::from("aws.dynamodb")), + ("event_id".to_string(), self.event_id.clone()), + ("event_name".to_string(), self.event_name.clone()), + ("event_version".to_string(), self.event_version.clone()), + ( + "event_source_arn".to_string(), + self.event_source_arn.clone(), + ), + ( + "size_bytes".to_string(), + self.dynamodb.size_bytes.to_string(), + ), + ( + "stream_view_type".to_string(), + self.dynamodb.stream_view_type.clone(), + ), + ("table_name".to_string(), table_name.to_string()), + ])); + } + + fn get_tags(&self) -> HashMap { + HashMap::from([( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "dynamodb".to_string(), + )]) + } + + fn get_arn(&self, _region: &str) -> String { + self.event_source_arn.clone() + } + + fn get_carrier(&self) -> HashMap { + HashMap::new() + } + + fn is_async(&self) -> bool { + true + } +} + +impl ServiceNameResolver for DynamoDbRecord { + fn get_specific_identifier(&self) -> String { + self.event_source_arn + .split('/') + .nth(1) + .unwrap_or_default() + .to_string() + } + + fn get_generic_identifier(&self) -> &'static str { + "lambda_dynamodb" + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::lifecycle::invocation::triggers::test_utils::read_json_file; + + #[test] + fn test_new() { + let json = read_json_file("dynamodb_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let result = DynamoDbRecord::new(payload).expect("Failed to deserialize into Record"); + + let expected = DynamoDbRecord { + dynamodb: DynamoDbEntity { + approximate_creation_date_time: 1_428_537_600.0, + size_bytes: 26, + stream_view_type: String::from("NEW_AND_OLD_IMAGES"), + }, + event_id: String::from("c4ca4238a0b923820dcc509a6f75849b"), + event_name: String::from("INSERT"), + event_version: String::from("1.1"), + event_source_arn: String::from("arn:aws:dynamodb:us-east-1:123456789012:table/ExampleTableWithStream/stream/2015-06-27T00:48:05.899"), + }; + + assert_eq!(result, expected); + } + + #[test] + fn test_is_match() { + let json = read_json_file("dynamodb_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize DynamoDbRecord"); + + assert!(DynamoDbRecord::is_match(&payload)); + } + + #[test] + fn test_is_not_match() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize SqsRecord"); + assert!(!DynamoDbRecord::is_match(&payload)); + } + + #[test] + fn test_enrich_span() { + let json = read_json_file("dynamodb_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = DynamoDbRecord::new(payload).expect("Failed to deserialize DynamoDbRecord"); + let mut span = Span::default(); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); + assert_eq!(span.name, "aws.dynamodb"); + assert_eq!(span.service, "dynamodb"); + assert_eq!(span.resource, "INSERT ExampleTableWithStream"); + assert_eq!(span.r#type, "web"); + + assert_eq!( + span.meta, + HashMap::from([ + ("operation_name".to_string(), "aws.dynamodb".to_string()), + ("event_id".to_string(), "c4ca4238a0b923820dcc509a6f75849b".to_string()), + ("event_name".to_string(), "INSERT".to_string()), + ("event_version".to_string(), "1.1".to_string()), + ( + "event_source_arn".to_string(), + "arn:aws:dynamodb:us-east-1:123456789012:table/ExampleTableWithStream/stream/2015-06-27T00:48:05.899".to_string() + ), + ("size_bytes".to_string(), "26".to_string()), + ("stream_view_type".to_string(), "NEW_AND_OLD_IMAGES".to_string()), + ("table_name".to_string(), "ExampleTableWithStream".to_string()), + ]) + ); + } + + #[test] + fn test_get_tags() { + let json = read_json_file("dynamodb_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = DynamoDbRecord::new(payload).expect("Failed to deserialize DynamoDbRecord"); + let tags = event.get_tags(); + + let expected = HashMap::from([( + "function_trigger.event_source".to_string(), + "dynamodb".to_string(), + )]); + + assert_eq!(tags, expected); + } + + #[test] + fn test_get_arn() { + let json = read_json_file("dynamodb_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = DynamoDbRecord::new(payload).expect("Failed to deserialize DynamoDbRecord"); + assert_eq!( + event.get_arn("us-east-1"), + "arn:aws:dynamodb:us-east-1:123456789012:table/ExampleTableWithStream/stream/2015-06-27T00:48:05.899" + ); + } + + #[test] + fn test_get_carrier() { + let json = read_json_file("dynamodb_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = DynamoDbRecord::new(payload).expect("Failed to deserialize DynamoDbRecord"); + let carrier = event.get_carrier(); + + let expected = HashMap::new(); + + assert_eq!(carrier, expected); + } + + #[test] + fn test_resolve_service_name() { + let json = read_json_file("dynamodb_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = DynamoDbRecord::new(payload).expect("Failed to deserialize DynamoDbRecord"); + + // Priority is given to the specific key + let specific_service_mapping = HashMap::from([ + ( + "ExampleTableWithStream".to_string(), + "specific-service".to_string(), + ), + ("lambda_dynamodb".to_string(), "generic-service".to_string()), + ]); + + assert_eq!( + event.resolve_service_name(&specific_service_mapping, "dynamodb"), + "specific-service" + ); + + let generic_service_mapping = + HashMap::from([("lambda_dynamodb".to_string(), "generic-service".to_string())]); + assert_eq!( + event.resolve_service_name(&generic_service_mapping, "dynamodb"), + "generic-service" + ); + } +} diff --git a/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs b/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs new file mode 100644 index 000000000..f9b1e17b1 --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/triggers/event_bridge_event.rs @@ -0,0 +1,287 @@ +use chrono::{DateTime, Utc}; +use datadog_trace_protobuf::pb::Span; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::HashMap; +use tracing::debug; + +use crate::lifecycle::invocation::{ + processor::{MS_TO_NS, S_TO_NS}, + triggers::{ + ServiceNameResolver, Trigger, DATADOG_CARRIER_KEY, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, + }, +}; + +const DATADOG_START_TIME_KEY: &str = "x-datadog-start-time"; +const DATADOG_RESOURCE_NAME_KEY: &str = "x-datadog-resource-name"; + +#[derive(Serialize, Deserialize, Debug, PartialEq)] +pub struct EventBridgeEvent { + pub id: String, + pub version: String, + pub account: String, + pub time: DateTime, + pub region: String, + pub resources: Vec, + pub source: String, + #[serde(rename = "detail-type")] + pub detail_type: String, + pub detail: Value, + #[serde(rename = "replay-name")] + pub replay_name: Option, +} + +impl Trigger for EventBridgeEvent { + fn new(payload: Value) -> Option { + match serde_json::from_value(payload) { + Ok(event) => Some(event), + Err(e) => { + debug!("Failed to deserialize EventBridge Event: {}", e); + None + } + } + } + + fn is_match(payload: &Value) -> bool { + payload.get("detail-type").is_some() + && payload + .get("source") + .and_then(Value::as_str) + .map_or(false, |s| s != "aws.events") + } + + #[allow(clippy::cast_possible_truncation)] + fn enrich_span(&self, span: &mut Span, service_mapping: &HashMap) { + // EventBridge events have a timestamp resolution in seconds + let start_time_seconds = self + .time + .timestamp_nanos_opt() + .unwrap_or((self.time.timestamp_millis() as f64 * S_TO_NS) as i64); + + let carrier = self.get_carrier(); + let resource_name = self.get_specific_identifier(); + let start_time = carrier + .get(DATADOG_START_TIME_KEY) + .and_then(|s| s.parse::().ok()) + .map_or(start_time_seconds, |s| (s * MS_TO_NS) as i64); + + let service_name = self.resolve_service_name(service_mapping, "eventbridge"); + + span.name = String::from("aws.eventbridge"); + span.service = service_name.to_string(); + span.resource = resource_name; + span.r#type = String::from("web"); + span.start = start_time; + span.meta.extend(HashMap::from([ + ("operation_name".to_string(), "aws.eventbridge".to_string()), + ("detail_type".to_string(), self.detail_type.clone()), + ])); + } + + fn get_tags(&self) -> HashMap { + HashMap::from([( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "eventbridge".to_string(), + )]) + } + + fn get_arn(&self, _region: &str) -> String { + self.source.clone() + } + + fn get_carrier(&self) -> HashMap { + if let Ok(detail) = serde_json::from_value::>(self.detail.clone()) { + if let Some(carrier) = detail.get(DATADOG_CARRIER_KEY) { + return serde_json::from_value(carrier.clone()).unwrap_or_default(); + } + } + HashMap::new() + } + + fn is_async(&self) -> bool { + true + } +} + +impl ServiceNameResolver for EventBridgeEvent { + fn get_specific_identifier(&self) -> String { + let carrier = self.get_carrier(); + carrier + .get(DATADOG_RESOURCE_NAME_KEY) + .unwrap_or(&self.source) + .to_string() + } + + fn get_generic_identifier(&self) -> &'static str { + "lambda_eventbridge" + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::lifecycle::invocation::triggers::test_utils::read_json_file; + + #[test] + fn test_new() { + let json = read_json_file("eventbridge_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let result = + EventBridgeEvent::new(payload).expect("Failed to deserialize into EventBridgeEvent"); + + let expected = EventBridgeEvent { + id: "bd3c8258-8d30-007c-2562-64715b2d0ea8".to_string(), + version: "0".to_string(), + account: "601427279990".to_string(), + time: DateTime::parse_from_rfc3339("2024-11-09T08:22:15Z") + .expect("Failed to parse time") + .with_timezone(&Utc), + region: "eu-west-1".to_string(), + resources: vec![], + source: "my.event".to_string(), + detail_type: "UserSignUp".to_string(), + detail: serde_json::json!({ + "hello": "there", + "_datadog": { + "x-datadog-trace-id": "5827606813695714842", + "x-datadog-parent-id": "4726693487091824375", + "x-datadog-sampled": "1", + "x-datadog-sampling-priority": "1", + "x-datadog-resource-name": "testBus", + "x-datadog-start-time": "1731183820135" + } + }), + replay_name: None, + }; + + assert_eq!(result, expected); + } + + #[test] + fn test_is_match() { + let json = read_json_file("eventbridge_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize EventBridgeEvent"); + + assert!(EventBridgeEvent::is_match(&payload)); + } + + #[test] + fn test_is_not_match() { + let json = read_json_file("api_gateway_http_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize EventBridgeEvent"); + assert!(!EventBridgeEvent::is_match(&payload)); + } + + #[test] + fn test_enrich_span() { + let json = read_json_file("eventbridge_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + EventBridgeEvent::new(payload).expect("Failed to deserialize into EventBridgeEvent"); + + let mut span = Span::default(); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); + + let expected = serde_json::from_str(&read_json_file("eventbridge_span.json")) + .expect("Failed to deserialize into Span"); + assert_eq!(span, expected); + } + + #[test] + fn test_enrich_span_no_resource_name() { + let json = read_json_file("eventbridge_no_resource_name_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + EventBridgeEvent::new(payload).expect("Failed to deserialize into EventBridgeEvent"); + + let mut span = Span::default(); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); + + assert_eq!(span.resource, "my.event"); + } + + #[test] + fn test_enrich_span_no_timestamp() { + let json = read_json_file("eventbridge_no_timestamp_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + EventBridgeEvent::new(payload).expect("Failed to deserialize into EventBridgeEvent"); + + let mut span = Span::default(); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); + + assert_eq!(span.resource, "testBus"); + // Seconds resolution + assert_eq!(span.start, 1_731_140_535_000_000_000); + } + + #[test] + fn test_get_arn() { + let json = read_json_file("eventbridge_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = EventBridgeEvent::new(payload).expect("Failed to deserialize EventBridgeEvent"); + assert_eq!(event.get_arn("us-east-1"), "my.event"); + } + + #[test] + fn test_get_carrier() { + let json = read_json_file("eventbridge_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + EventBridgeEvent::new(payload).expect("Failed to deserialize EventBridge Event"); + let carrier = event.get_carrier(); + + let expected = HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "5827606813695714842".to_string(), + ), + ( + "x-datadog-parent-id".to_string(), + "4726693487091824375".to_string(), + ), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ("x-datadog-sampled".to_string(), "1".to_string()), + ("x-datadog-resource-name".to_string(), "testBus".to_string()), + ( + "x-datadog-start-time".to_string(), + "1731183820135".to_string(), + ), + ]); + + assert_eq!(carrier, expected); + } + + #[test] + fn test_resolve_service_name() { + let json = read_json_file("eventbridge_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = EventBridgeEvent::new(payload).expect("Failed to deserialize EventBridgeEvent"); + + // Priority is given to the specific key + let specific_service_mapping = HashMap::from([ + ("testBus".to_string(), "specific-service".to_string()), + ( + "lambda_eventbridge".to_string(), + "generic-service".to_string(), + ), + ]); + + assert_eq!( + event.resolve_service_name(&specific_service_mapping, "eventbridge"), + "specific-service" + ); + + let generic_service_mapping = HashMap::from([( + "lambda_eventbridge".to_string(), + "generic-service".to_string(), + )]); + assert_eq!( + event.resolve_service_name(&generic_service_mapping, "eventbridge"), + "generic-service" + ); + } +} diff --git a/bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs b/bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs new file mode 100644 index 000000000..ae55add0c --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/triggers/kinesis_event.rs @@ -0,0 +1,294 @@ +#![allow(clippy::module_name_repetitions)] +use base64::engine::general_purpose; +use base64::Engine; +use datadog_trace_protobuf::pb::Span; +use serde::{Deserialize, Serialize}; +use serde_json::{from_slice, Value}; +use std::collections::HashMap; +use tracing::debug; + +use crate::lifecycle::invocation::{ + processor::S_TO_NS, + triggers::{ + ServiceNameResolver, Trigger, DATADOG_CARRIER_KEY, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, + }, +}; + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct KinesisEvent { + #[serde(rename = "Records")] + pub records: Vec, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct KinesisRecord { + #[serde(rename = "eventID")] + pub event_id: String, + #[serde(rename = "eventName")] + pub event_name: String, + #[serde(rename = "eventSourceARN")] + pub event_source_arn: String, + #[serde(rename = "eventVersion")] + pub event_version: String, + pub kinesis: KinesisEntity, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct KinesisEntity { + #[serde(rename = "approximateArrivalTimestamp")] + pub approximate_arrival_timestamp: f64, + #[serde(rename = "partitionKey")] + pub partition_key: String, + pub data: String, +} + +impl Trigger for KinesisRecord { + fn new(payload: Value) -> Option { + let records = payload.get("Records").and_then(Value::as_array); + match records { + Some(records) => match serde_json::from_value::(records[0].clone()) { + Ok(event) => Some(event), + Err(e) => { + debug!("Failed to deserialize Kinesis Record: {e}"); + None + } + }, + None => None, + } + } + + fn is_match(payload: &Value) -> bool { + if let Some(first_record) = payload + .get("Records") + .and_then(Value::as_array) + .and_then(|r| r.first()) + .take() + { + first_record.get("kinesis").is_some() + } else { + false + } + } + + #[allow(clippy::cast_possible_truncation)] + fn enrich_span(&self, span: &mut Span, service_mapping: &HashMap) { + let stream_name = self.get_specific_identifier(); + let shard_id = self.event_id.split(':').next().unwrap_or_default(); + let service_name = self.resolve_service_name(service_mapping, "kinesis"); + + span.name = String::from("aws.kinesis"); + span.service = service_name; + span.start = (self.kinesis.approximate_arrival_timestamp * S_TO_NS) as i64; + span.resource.clone_from(&stream_name); + span.r#type = "web".to_string(); + span.meta = HashMap::from([ + ("operation_name".to_string(), "aws.kinesis".to_string()), + ("stream_name".to_string(), stream_name.to_string()), + ("shard_id".to_string(), shard_id.to_string()), + ( + "event_source_arn".to_string(), + self.event_source_arn.to_string(), + ), + ("event_id".to_string(), self.event_id.to_string()), + ("event_name".to_string(), self.event_name.to_string()), + ("event_version".to_string(), self.event_version.to_string()), + ( + "partition_key".to_string(), + self.kinesis.partition_key.to_string(), + ), + ]); + } + + fn get_tags(&self) -> HashMap { + HashMap::from([( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "kinesis".to_string(), + )]) + } + + fn get_arn(&self, _region: &str) -> String { + self.event_source_arn.clone() + } + + fn get_carrier(&self) -> HashMap { + if let Ok(decoded_base64) = general_purpose::STANDARD.decode(&self.kinesis.data) { + if let Ok(as_json_map) = from_slice::>(&decoded_base64) { + if let Some(carrier) = as_json_map.get(DATADOG_CARRIER_KEY) { + return serde_json::from_value(carrier.clone()).unwrap_or_default(); + } + } + }; + HashMap::new() + } + + fn is_async(&self) -> bool { + true + } +} + +impl ServiceNameResolver for KinesisRecord { + fn get_specific_identifier(&self) -> String { + self.event_source_arn + .split('/') + .last() + .unwrap_or_default() + .to_string() + } + + fn get_generic_identifier(&self) -> &'static str { + "lambda_kinesis" + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::lifecycle::invocation::triggers::test_utils::read_json_file; + + #[test] + fn test_new() { + let json = read_json_file("kinesis_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let result = KinesisRecord::new(payload).expect("Failed to deserialize into Record"); + + let expected = KinesisRecord { + event_id: + "shardId-000000000002:49624230154685806402418173680709770494154422022871973922" + .to_string(), + event_name: "aws:kinesis:record".to_string(), + event_source_arn: "arn:aws:kinesis:sa-east-1:425362996713:stream/kinesisStream" + .to_string(), + event_version: "1.0".to_string(), + kinesis: KinesisEntity { + approximate_arrival_timestamp: 1_643_638_425.163, + partition_key: "partitionkey".to_string(), + data: "eyJmb28iOiAiYmFyIiwgIl9kYXRhZG9nIjogeyJ4LWRhdGFkb2ctdHJhY2UtaWQiOiAiNDk0ODM3NzMxNjM1NzI5MTQyMSIsICJ4LWRhdGFkb2ctcGFyZW50LWlkIjogIjI4NzYyNTMzODAwMTg2ODEwMjYiLCAieC1kYXRhZG9nLXNhbXBsaW5nLXByaW9yaXR5IjogIjEifX0=".to_string(), + }, + }; + + assert_eq!(result, expected); + } + + #[test] + fn test_is_match() { + let json = read_json_file("kinesis_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize S3Record"); + + assert!(KinesisRecord::is_match(&payload)); + } + + #[test] + fn test_is_not_match() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize SqsRecord"); + assert!(!KinesisRecord::is_match(&payload)); + } + + #[test] + fn test_enrich_span() { + let json = read_json_file("kinesis_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = KinesisRecord::new(payload).expect("Failed to deserialize S3Record"); + let mut span = Span::default(); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); + assert_eq!(span.name, "aws.kinesis"); + assert_eq!(span.service, "kinesis"); + assert_eq!(span.resource, "kinesisStream"); + assert_eq!(span.r#type, "web"); + + assert_eq!( + span.meta, + HashMap::from([ + ("operation_name".to_string(), "aws.kinesis".to_string()), + ("stream_name".to_string(), "kinesisStream".to_string()), + ("shard_id".to_string(), "shardId-000000000002".to_string()), + ( + "event_source_arn".to_string(), + "arn:aws:kinesis:sa-east-1:425362996713:stream/kinesisStream".to_string() + ), + ( + "event_id".to_string(), + "shardId-000000000002:49624230154685806402418173680709770494154422022871973922" + .to_string() + ), + ("event_name".to_string(), "aws:kinesis:record".to_string()), + ("event_version".to_string(), "1.0".to_string()), + ("partition_key".to_string(), "partitionkey".to_string()), + ]) + ); + } + + #[test] + fn test_get_tags() { + let json = read_json_file("kinesis_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = KinesisRecord::new(payload).expect("Failed to deserialize KinesisRecord"); + let tags = event.get_tags(); + + let expected = HashMap::from([( + "function_trigger.event_source".to_string(), + "kinesis".to_string(), + )]); + + assert_eq!(tags, expected); + } + + #[test] + fn test_get_arn() { + let json = read_json_file("kinesis_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = KinesisRecord::new(payload).expect("Failed to deserialize KinesisRecord"); + assert_eq!( + event.get_arn("us-east-1"), + "arn:aws:kinesis:sa-east-1:425362996713:stream/kinesisStream".to_string() + ); + } + + #[test] + fn test_get_carrier() { + let json = read_json_file("kinesis_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = KinesisRecord::new(payload).expect("Failed to deserialize KinesisRecord"); + let carrier = event.get_carrier(); + + let expected = HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "4948377316357291421".to_string(), + ), + ( + "x-datadog-parent-id".to_string(), + "2876253380018681026".to_string(), + ), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ]); + + assert_eq!(carrier, expected); + } + + #[test] + fn test_resolve_service_name() { + let json = read_json_file("kinesis_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = KinesisRecord::new(payload).expect("Failed to deserialize KinesisRecord"); + + // Priority is given to the specific key + let specific_service_mapping = HashMap::from([ + ("kinesisStream".to_string(), "specific-service".to_string()), + ("lambda_kinesis".to_string(), "generic-service".to_string()), + ]); + + assert_eq!( + event.resolve_service_name(&specific_service_mapping, "kinesis"), + "specific-service" + ); + + let generic_service_mapping = + HashMap::from([("lambda_kinesis".to_string(), "generic-service".to_string())]); + assert_eq!( + event.resolve_service_name(&generic_service_mapping, "kinesis"), + "generic-service" + ); + } +} diff --git a/bottlecap/src/lifecycle/invocation/triggers/lambda_function_url_event.rs b/bottlecap/src/lifecycle/invocation/triggers/lambda_function_url_event.rs new file mode 100644 index 000000000..18bdc734d --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/triggers/lambda_function_url_event.rs @@ -0,0 +1,350 @@ +use std::{collections::HashMap, env}; + +use datadog_trace_protobuf::pb::Span; +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +use crate::lifecycle::invocation::{ + processor::MS_TO_NS, + triggers::{lowercase_key, ServiceNameResolver, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}, +}; + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct LambdaFunctionUrlEvent { + #[serde(serialize_with = "lowercase_key")] + pub headers: HashMap, + #[serde(rename = "requestContext")] + pub request_context: RequestContext, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct RequestContext { + pub http: Http, + #[serde(rename = "accountId")] + pub account_id: String, + #[serde(rename = "domainName")] + pub domain_name: String, + #[serde(rename = "timeEpoch")] + pub time_epoch: i64, + #[serde(rename = "requestId")] + pub request_id: String, + #[serde(rename = "apiId")] + pub api_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct Http { + pub method: String, + pub path: String, + pub protocol: String, + #[serde(rename = "sourceIp")] + pub source_ip: String, + #[serde(rename = "userAgent")] + pub user_agent: String, +} + +impl Trigger for LambdaFunctionUrlEvent { + fn new(payload: serde_json::Value) -> Option + where + Self: Sized, + { + serde_json::from_value(payload).ok()? + } + + fn is_match(payload: &serde_json::Value) -> bool + where + Self: Sized, + { + payload + .get("requestContext") + .and_then(|rc| rc.get("domainName")) + .and_then(Value::as_str) + .map_or(false, |dn| dn.contains("lambda-url")) + } + + #[allow(clippy::cast_possible_truncation)] + fn enrich_span(&self, span: &mut Span, service_mapping: &HashMap) { + let resource = format!( + "{} {}", + self.request_context.http.method, self.request_context.http.path + ); + + let http_url = format!( + "https://{domain_name}{path}", + domain_name = self.request_context.domain_name.clone(), + path = self.request_context.http.path.clone() + ); + + let start_time = (self.request_context.time_epoch as f64 * MS_TO_NS) as i64; + + let service_name = + self.resolve_service_name(service_mapping, &self.request_context.domain_name); + + span.name = String::from("aws.lambda.url"); + span.service = service_name; + span.resource = resource; + span.r#type = String::from("http"); + span.start = start_time; + span.meta.extend([ + ( + "endpoint".to_string(), + self.request_context.http.path.clone(), + ), + ("http.url".to_string(), http_url), + ( + "http.method".to_string(), + self.request_context.http.method.clone(), + ), + ( + "http.user_agent".to_string(), + self.request_context.http.user_agent.clone(), + ), + ( + "http.source_ip".to_string(), + self.request_context.http.source_ip.clone(), + ), + ( + "http.protocol".to_string(), + self.request_context.http.protocol.clone(), + ), + ("operation_name".to_string(), "aws.lambda.url".to_string()), + ( + "request_id".to_string(), + self.request_context.request_id.clone(), + ), + ]); + } + + fn get_tags(&self) -> HashMap { + let mut tags = HashMap::from([ + ( + "http.url".to_string(), + format!( + "https://{domain_name}{path}", + domain_name = self.request_context.domain_name.clone(), + path = self.request_context.http.path.clone() + ), + ), + // path and URL are full + // /users/12345/profile + ( + "http.url_details.path".to_string(), + self.request_context.http.path.clone(), + ), + ( + "http.method".to_string(), + self.request_context.http.method.clone(), + ), + ( + "http.user_agent".to_string(), + self.request_context.http.user_agent.clone(), + ), + ( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "lambda-function-url".to_string(), + ), + ]); + + if let Some(referer) = self.headers.get("referer") { + tags.insert("http.referer".to_string(), referer.clone()); + } + + tags + } + + fn get_arn(&self, region: &str) -> String { + let function_name = env::var("AWS_LAMBDA_FUNCTION_NAME").unwrap_or_default(); + format!( + "arn:aws:lambda:{region}:{}:url:{}", + self.request_context.account_id, function_name + ) + } + + fn get_carrier(&self) -> HashMap { + self.headers.clone() + } + + fn is_async(&self) -> bool { + self.headers + .get("x-amz-invocation-type") + .is_some_and(|v| v == "Event") + } +} + +impl ServiceNameResolver for LambdaFunctionUrlEvent { + fn get_specific_identifier(&self) -> String { + self.request_context.api_id.clone() + } + + fn get_generic_identifier(&self) -> &'static str { + "lambda_url" + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::lifecycle::invocation::triggers::test_utils::read_json_file; + + #[test] + fn test_new() { + let json = read_json_file("lambda_function_url_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let result = LambdaFunctionUrlEvent::new(payload) + .expect("Failed to deserialize into LambdaFunctionUrlEvent"); + + let expected = LambdaFunctionUrlEvent { + headers: HashMap::from([ + ("accept".to_string(), "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9".to_string()), + ("accept-language".to_string(), "en-US,en;q=0.9".to_string()), + ("accept-encoding".to_string(), "gzip, deflate, br".to_string()), + ("sec-fetch-mode".to_string(), "navigate".to_string()), + ("sec-fetch-site".to_string(), "none".to_string()), + ("sec-fetch-user".to_string(), "?1".to_string()), + ("sec-fetch-dest".to_string(), "document".to_string()), + ("sec-ch-ua".to_string(), "\"Google Chrome\";v=\"95\", \"Chromium\";v=\"95\", \";Not A Brand\";v=\"99\"".to_string()), + ("sec-ch-ua-platform".to_string(), "\"macOS\"".to_string()), + ("sec-ch-ua-mobile".to_string(), "?0".to_string()), + ("upgrade-insecure-requests".to_string(), "1".to_string()), + ( + "accept-language".to_string(), + "en-US,en;q=0.9".to_string(), + ), + ("user-agent".to_string(), "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36".to_string()), + ( + "x-amzn-trace-id".to_string(), + "Root=1-61953929-1ec00c3011062a48477b169e".to_string(), + ), + ("x-forwarded-for".to_string(), "71.195.30.42".to_string()), + ("x-forwarded-port".to_string(), "443".to_string()), + ("x-forwarded-proto".to_string(), "https".to_string()), + ("pragma".to_string(), "no-cache".to_string()), + ("cache-control".to_string(), "no-cache".to_string()), + ("host".to_string(), "a8hyhsshac.lambda-url.eu-south-1.amazonaws.com".to_string()), + + ]), + request_context: RequestContext { + request_id: String::from("ec4d58f8-2b8b-4ceb-a1d5-2be7bff58505"), + time_epoch: 1_637_169_449_721, + http: Http { + method: String::from("GET"), + path: String::from("/"), + protocol: String::from("HTTP/1.1"), + source_ip: String::from("71.195.30.42"), + user_agent: String::from("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"), + }, + account_id: String::from("601427279990"), + domain_name: String::from("a8hyhsshac.lambda-url.eu-south-1.amazonaws.com"), + api_id: String::from("a8hyhsshac"), + }, + }; + + assert_eq!(result, expected); + } + + #[test] + fn test_is_match() { + let json = read_json_file("lambda_function_url_event.json"); + let payload = + serde_json::from_str(&json).expect("Failed to deserialize LambdaFunctionUrlEvent"); + + assert!(LambdaFunctionUrlEvent::is_match(&payload)); + } + + #[test] + fn test_is_not_match() { + let json = read_json_file("api_gateway_proxy_event.json"); + let payload = + serde_json::from_str(&json).expect("Failed to deserialize LambdaFunctionUrlEvent"); + assert!(!LambdaFunctionUrlEvent::is_match(&payload)); + } + + #[test] + fn test_enrich_span() { + let json = read_json_file("lambda_function_url_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = LambdaFunctionUrlEvent::new(payload) + .expect("Failed to deserialize LambdaFunctionUrlEvent"); + let mut span = Span::default(); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); + assert_eq!(span.name, "aws.lambda.url"); + assert_eq!( + span.service, + "a8hyhsshac.lambda-url.eu-south-1.amazonaws.com" + ); + assert_eq!(span.resource, "GET /"); + assert_eq!(span.r#type, "http"); + assert_eq!( + span.meta, + HashMap::from([ + ("http.protocol".to_string(), "HTTP/1.1".to_string()), + ("http.source_ip".to_string(), "71.195.30.42".to_string()), + ("operation_name".to_string(), "aws.lambda.url".to_string()), + ("request_id".to_string(), "ec4d58f8-2b8b-4ceb-a1d5-2be7bff58505".to_string()), + ("http.url".to_string(), "https://a8hyhsshac.lambda-url.eu-south-1.amazonaws.com/".to_string()), + ("http.method".to_string(), "GET".to_string()), + ("http.user_agent".to_string(), "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36".to_string()), + ("endpoint".to_string(), "/".to_string()), + ]) + ); + } + + #[test] + fn test_get_tags() { + let json = read_json_file("lambda_function_url_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = LambdaFunctionUrlEvent::new(payload) + .expect("Failed to deserialize LambdaFunctionUrlEvent"); + let tags = event.get_tags(); + let expected = HashMap::from([ + ("function_trigger.event_source".to_string(), "lambda-function-url".to_string()), + ("http.method".to_string(), "GET".to_string()), + ("http.url_details.path".to_string(), "/".to_string()), + ("http.user_agent".to_string(), "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36".to_string()), + ("http.url".to_string(), "https://a8hyhsshac.lambda-url.eu-south-1.amazonaws.com/".to_string()), + ]); + + assert_eq!(tags, expected); + } + + #[test] + fn test_get_arn() { + env::set_var("AWS_LAMBDA_FUNCTION_NAME", "mock-lambda"); + let json = read_json_file("lambda_function_url_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = LambdaFunctionUrlEvent::new(payload) + .expect("Failed to deserialize LambdaFunctionUrlEvent"); + assert_eq!( + event.get_arn("sa-east-1"), + "arn:aws:lambda:sa-east-1:601427279990:url:mock-lambda" + ); + env::remove_var("AWS_LAMBDA_FUNCTION_NAME"); + } + + #[test] + fn test_resolve_service_name() { + let json = read_json_file("lambda_function_url_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = LambdaFunctionUrlEvent::new(payload) + .expect("Failed to deserialize LambdaFunctionUrlEvent"); + + // Priority is given to the specific key + let specific_service_mapping = HashMap::from([ + ("a8hyhsshac".to_string(), "specific-service".to_string()), + ("lambda_url".to_string(), "generic-service".to_string()), + ]); + + assert_eq!( + event.resolve_service_name(&specific_service_mapping, "domain-name"), + "specific-service" + ); + + let generic_service_mapping = + HashMap::from([("lambda_url".to_string(), "generic-service".to_string())]); + assert_eq!( + event.resolve_service_name(&generic_service_mapping, "domain-name"), + "generic-service" + ); + } +} diff --git a/bottlecap/src/lifecycle/invocation/triggers/mod.rs b/bottlecap/src/lifecycle/invocation/triggers/mod.rs new file mode 100644 index 000000000..2f9a0100a --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/triggers/mod.rs @@ -0,0 +1,98 @@ +use std::{collections::HashMap, hash::BuildHasher}; + +use datadog_trace_protobuf::pb::Span; +use serde::{ser::SerializeMap, Serializer}; +use serde_json::Value; + +pub mod api_gateway_http_event; +pub mod api_gateway_rest_event; +pub mod dynamodb_event; +pub mod event_bridge_event; +pub mod kinesis_event; +pub mod lambda_function_url_event; +pub mod s3_event; +pub mod sns_event; +pub mod sqs_event; +pub mod step_function_event; + +pub const DATADOG_CARRIER_KEY: &str = "_datadog"; +pub const FUNCTION_TRIGGER_EVENT_SOURCE_TAG: &str = "function_trigger.event_source"; +pub const FUNCTION_TRIGGER_EVENT_SOURCE_ARN_TAG: &str = "function_trigger.event_source_arn"; + +/// Resolves the service name for a given trigger depending on +/// service mapping configuration. +pub trait ServiceNameResolver { + /// Get the specific service name for this trigger type, it will + /// be used as a key to resolve the service name + fn get_specific_identifier(&self) -> String; + + /// Get the generic service mapping key for the trigger + fn get_generic_identifier(&self) -> &'static str; +} + +pub trait Trigger: ServiceNameResolver { + fn new(payload: Value) -> Option + where + Self: Sized; + fn is_match(payload: &Value) -> bool + where + Self: Sized; + fn enrich_span(&self, span: &mut Span, service_mapping: &HashMap); + fn get_tags(&self) -> HashMap; + fn get_arn(&self, region: &str) -> String; + fn get_carrier(&self) -> HashMap; + fn is_async(&self) -> bool; + + /// Default implementation for service name resolution + fn resolve_service_name( + &self, + service_mapping: &HashMap, + fallback: &str, + ) -> String { + service_mapping + .get(&self.get_specific_identifier()) + .or_else(|| service_mapping.get(self.get_generic_identifier())) + .unwrap_or(&fallback.to_string()) + .to_string() + } +} + +#[must_use] +pub fn get_aws_partition_by_region(region: &str) -> String { + match region { + r if r.starts_with("us-gov-") => "aws-us-gov".to_string(), + r if r.starts_with("cn-") => "aws-cn".to_string(), + _ => "aws".to_string(), + } +} + +/// Serialize a `HashMap` with lowercase keys +/// +pub fn lowercase_key( + map: &HashMap, + serializer: S, +) -> Result +where + S: Serializer, + H: BuildHasher, +{ + let mut map_serializer = serializer.serialize_map(Some(map.len()))?; + for (key, value) in map { + map_serializer.serialize_entry(&key.to_lowercase(), value)?; + } + map_serializer.end() +} + +#[cfg(test)] +pub mod test_utils { + use std::fs; + use std::path::PathBuf; + + #[must_use] + pub fn read_json_file(file_name: &str) -> String { + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("tests/payloads"); + path.push(file_name); + fs::read_to_string(path).expect("Failed to read file") + } +} diff --git a/bottlecap/src/lifecycle/invocation/triggers/s3_event.rs b/bottlecap/src/lifecycle/invocation/triggers/s3_event.rs new file mode 100644 index 000000000..43065cb0f --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/triggers/s3_event.rs @@ -0,0 +1,277 @@ +use std::collections::HashMap; + +use chrono::{DateTime, Utc}; +use datadog_trace_protobuf::pb::Span; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use tracing::debug; + +use crate::lifecycle::invocation::{ + processor::MS_TO_NS, + triggers::{ServiceNameResolver, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_TAG}, +}; + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct S3Event { + #[serde(rename = "Records")] + pub records: Vec, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct S3Record { + #[serde(rename = "eventSource")] + pub event_source: String, + #[serde(rename = "eventTime")] + pub event_time: DateTime, + #[serde(rename = "eventName")] + pub event_name: String, + pub s3: S3Entity, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct S3Entity { + pub bucket: S3Bucket, + pub object: S3Object, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct S3Bucket { + pub name: String, + pub arn: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct S3Object { + pub key: String, + pub size: i64, + #[serde(rename = "eTag")] + pub e_tag: String, +} + +impl Trigger for S3Record { + fn new(payload: serde_json::Value) -> Option { + let records = payload.get("Records").and_then(Value::as_array); + match records { + Some(records) => match serde_json::from_value::(records[0].clone()) { + Ok(event) => Some(event), + Err(e) => { + debug!("Failed to deserialize S3 Record: {e}"); + None + } + }, + None => None, + } + } + + fn is_match(payload: &serde_json::Value) -> bool { + if let Some(first_record) = payload + .get("Records") + .and_then(Value::as_array) + .and_then(|r| r.first()) + .take() + { + first_record.get("s3").is_some() + } else { + false + } + } + + #[allow(clippy::cast_possible_truncation)] + fn enrich_span(&self, span: &mut Span, service_mapping: &HashMap) { + debug!("Enriching an InferredSpan span with S3 event"); + let bucket_name = self.get_specific_identifier(); + let start_time = self + .event_time + .timestamp_nanos_opt() + .unwrap_or((self.event_time.timestamp_millis() as f64 * MS_TO_NS) as i64); + + let service_name = self.resolve_service_name(service_mapping, "s3"); + + span.name = String::from("aws.s3"); + span.service = service_name.to_string(); + span.resource.clone_from(&bucket_name); + span.r#type = String::from("web"); + span.start = start_time; + span.meta.extend(HashMap::from([ + ("operation_name".to_string(), String::from("aws.s3")), + ("event_name".to_string(), self.event_name.clone()), + ("bucketname".to_string(), bucket_name), + ("bucket_arn".to_string(), self.s3.bucket.arn.clone()), + ("object_key".to_string(), self.s3.object.key.clone()), + ("object_size".to_string(), self.s3.object.size.to_string()), + ("object_etag".to_string(), self.s3.object.e_tag.clone()), + ])); + } + + fn get_tags(&self) -> HashMap { + HashMap::from([( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "s3".to_string(), + )]) + } + + fn get_arn(&self, _region: &str) -> String { + self.event_source.clone() + } + + fn get_carrier(&self) -> HashMap { + HashMap::new() + } + + fn is_async(&self) -> bool { + true + } +} + +impl ServiceNameResolver for S3Record { + fn get_specific_identifier(&self) -> String { + self.s3.bucket.name.clone() + } + + fn get_generic_identifier(&self) -> &'static str { + "lambda_s3" + } +} + +#[cfg(test)] +#[allow(clippy::unwrap_used)] +mod tests { + use super::*; + use crate::lifecycle::invocation::triggers::test_utils::read_json_file; + + #[test] + fn test_new() { + let json = read_json_file("s3_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let result = S3Record::new(payload).expect("Failed to deserialize into Record"); + + let expected = S3Record { + event_source: String::from("aws:s3:sample:event:source"), + event_time: DateTime::parse_from_rfc3339("2023-01-07T00:00:00.000Z") + .unwrap() + .with_timezone(&Utc), + event_name: String::from("ObjectCreated:Put"), + s3: S3Entity { + bucket: S3Bucket { + name: String::from("example-bucket"), + arn: String::from("arn:aws:s3:::example-bucket"), + }, + object: S3Object { + key: String::from("test/key"), + size: 1024, + e_tag: String::from("0123456789abcdef0123456789abcdef"), + }, + }, + }; + + assert_eq!(result, expected); + } + + #[test] + fn test_is_match() { + let json = read_json_file("s3_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize S3Record"); + + assert!(S3Record::is_match(&payload)); + } + + #[test] + fn test_is_not_match() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize SqsRecord"); + assert!(!S3Record::is_match(&payload)); + } + + #[test] + fn test_enrich_span() { + let json = read_json_file("s3_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = S3Record::new(payload).expect("Failed to deserialize S3Record"); + let mut span = Span::default(); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); + assert_eq!(span.name, "aws.s3"); + assert_eq!(span.service, "s3"); + assert_eq!(span.resource, "example-bucket"); + assert_eq!(span.r#type, "web"); + + assert_eq!( + span.meta, + HashMap::from([ + ("operation_name".to_string(), "aws.s3".to_string()), + ("event_name".to_string(), "ObjectCreated:Put".to_string()), + ("bucketname".to_string(), "example-bucket".to_string()), + ( + "bucket_arn".to_string(), + "arn:aws:s3:::example-bucket".to_string() + ), + ("object_key".to_string(), "test/key".to_string()), + ("object_size".to_string(), "1024".to_string()), + ( + "object_etag".to_string(), + "0123456789abcdef0123456789abcdef".to_string() + ) + ]) + ); + } + + #[test] + fn test_get_tags() { + let json = read_json_file("s3_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = S3Record::new(payload).expect("Failed to deserialize S3Record"); + let tags = event.get_tags(); + + let expected = HashMap::from([( + "function_trigger.event_source".to_string(), + "s3".to_string(), + )]); + + assert_eq!(tags, expected); + } + + #[test] + fn test_get_arn() { + let json = read_json_file("s3_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = S3Record::new(payload).expect("Failed to deserialize S3Record"); + assert_eq!(event.get_arn("us-east-1"), "aws:s3:sample:event:source"); + } + + #[test] + fn test_get_carrier() { + let json = read_json_file("s3_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = S3Record::new(payload).expect("Failed to deserialize SqsRecord"); + let carrier = event.get_carrier(); + + let expected = HashMap::new(); + + assert_eq!(carrier, expected); + } + + #[test] + fn test_resolve_service_name() { + let json = read_json_file("s3_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = S3Record::new(payload).expect("Failed to deserialize S3Record"); + + // Priority is given to the specific key + let specific_service_mapping = HashMap::from([ + ("example-bucket".to_string(), "specific-service".to_string()), + ("lambda_s3".to_string(), "generic-service".to_string()), + ]); + + assert_eq!( + event.resolve_service_name(&specific_service_mapping, "s3"), + "specific-service" + ); + + let generic_service_mapping = + HashMap::from([("lambda_s3".to_string(), "generic-service".to_string())]); + assert_eq!( + event.resolve_service_name(&generic_service_mapping, "s3"), + "generic-service" + ); + } +} diff --git a/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs b/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs new file mode 100644 index 000000000..091dcdf53 --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/triggers/sns_event.rs @@ -0,0 +1,383 @@ +use std::collections::HashMap; + +use chrono::{DateTime, Utc}; +use datadog_trace_protobuf::pb::Span; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use tracing::debug; + +use crate::lifecycle::invocation::{ + base64_to_string, + processor::MS_TO_NS, + triggers::{ + event_bridge_event::EventBridgeEvent, ServiceNameResolver, Trigger, DATADOG_CARRIER_KEY, + FUNCTION_TRIGGER_EVENT_SOURCE_TAG, + }, +}; + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct SnsEvent { + #[serde(rename = "Records")] + pub records: Vec, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct SnsRecord { + #[serde(rename = "Sns")] + pub sns: SnsEntity, + #[serde(rename = "EventSubscriptionArn")] + pub event_subscription_arn: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct SnsEntity { + #[serde(rename = "MessageId")] + pub message_id: String, + #[serde(rename = "Type")] + pub r#type: String, + #[serde(rename = "TopicArn")] + pub topic_arn: String, + #[serde(rename = "MessageAttributes")] + pub message_attributes: HashMap, + #[serde(rename = "Timestamp")] + pub timestamp: DateTime, + #[serde(rename = "Subject")] + pub subject: Option, + #[serde(rename = "Message")] + pub message: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct MessageAttribute { + #[serde(rename = "Type")] + pub r#type: String, + #[serde(rename = "Value")] + pub value: String, +} + +impl Trigger for SnsRecord { + fn new(payload: Value) -> Option { + match payload.get("Records").and_then(Value::as_array) { + Some(records) => match serde_json::from_value::(records[0].clone()) { + Ok(record) => Some(record), + Err(e) => { + debug!("Failed to deserialize SNS Record: {e}"); + None + } + }, + None => None, + } + } + + fn is_match(payload: &serde_json::Value) -> bool { + if let Some(first_record) = payload + .get("Records") + .and_then(Value::as_array) + .and_then(|r| r.first()) + .take() + { + return first_record.get("Sns").is_some(); + } + + false + } + + #[allow(clippy::cast_possible_truncation)] + fn enrich_span(&self, span: &mut Span, service_mapping: &HashMap) { + debug!("Enriching an Inferred Span for an SNS Event"); + let resource_name = self.get_specific_identifier(); + + let start_time = self + .sns + .timestamp + .timestamp_nanos_opt() + .unwrap_or((self.sns.timestamp.timestamp_millis() as f64 * MS_TO_NS) as i64); + + let service_name = self.resolve_service_name(service_mapping, "sns"); + + span.name = "aws.sns".to_string(); + span.service = service_name.to_string(); + span.resource.clone_from(&resource_name); + span.r#type = "web".to_string(); + span.start = start_time; + span.meta.extend([ + ("operation_name".to_string(), "aws.sns".to_string()), + ("topicname".to_string(), resource_name), + ("topic_arn".to_string(), self.sns.topic_arn.clone()), + ("message_id".to_string(), self.sns.message_id.clone()), + ("type".to_string(), self.sns.r#type.clone()), + ]); + + if let Some(subject) = &self.sns.subject { + span.meta.insert("subject".to_string(), subject.clone()); + } + + if let Some(event_subscription_arn) = &self.event_subscription_arn { + span.meta.insert( + "event_subscription_arn".to_string(), + event_subscription_arn.clone(), + ); + } + } + + fn get_tags(&self) -> HashMap { + HashMap::from([( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "sns".to_string(), + )]) + } + + fn get_arn(&self, _region: &str) -> String { + self.sns.topic_arn.clone() + } + + fn get_carrier(&self) -> HashMap { + if let Some(ma) = self.sns.message_attributes.get(DATADOG_CARRIER_KEY) { + match ma.r#type.as_str() { + "String" => return serde_json::from_str(&ma.value).unwrap_or_default(), + "Binary" => { + if let Ok(carrier) = base64_to_string(&ma.value) { + return serde_json::from_str(&carrier).unwrap_or_default(); + } + } + _ => { + debug!("Unsupported type in SNS message attribute"); + } + } + } else if let Some(event_bridge_message) = &self.sns.message { + if let Ok(event) = serde_json::from_str::(event_bridge_message) { + return event.get_carrier(); + } + } + + HashMap::new() + } + + fn is_async(&self) -> bool { + true + } +} + +impl ServiceNameResolver for SnsRecord { + fn get_specific_identifier(&self) -> String { + self.sns + .topic_arn + .split(':') + .last() + .unwrap_or_default() + .to_string() + } + + fn get_generic_identifier(&self) -> &'static str { + "lambda_sns" + } +} + +#[cfg(test)] +#[allow(clippy::unwrap_used)] +mod tests { + use datadog_trace_protobuf::pb::Span; + + use super::*; + use crate::lifecycle::invocation::triggers::test_utils::read_json_file; + + #[test] + fn test_new() { + let json = read_json_file("sns_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let result = SnsRecord::new(payload).expect("Failed to deserialize into SnsRecord"); + + let message_attributes = HashMap::::from([ + ("_datadog".to_string(), MessageAttribute { + r#type: "String".to_string(), + value: "{\"x-datadog-trace-id\": \"4948377316357291421\", \"x-datadog-parent-id\": \"6746998015037429512\", \"x-datadog-sampling-priority\": \"1\"}".to_string(), + }) + ]); + + let expected = SnsRecord { + event_subscription_arn: Some("arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy:224b60ba-befc-4830-ad96-f1f0ac94eb04".to_string()), + sns: SnsEntity { + message_id: "87056a47-f506-5d77-908b-303605d3b197".to_string(), + r#type: "Notification".to_string(), + topic_arn: "arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy" + .to_string(), + message_attributes, + timestamp: DateTime::parse_from_rfc3339("2022-01-31T14:13:41.637Z") + .unwrap() + .with_timezone(&Utc), + subject: None, + message: Some("Asynchronously invoking a Lambda function with SNS.".to_string()), + }, + }; + + assert_eq!(result, expected); + } + + #[test] + fn test_is_match() { + let json = read_json_file("sns_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize SnsRecord"); + + assert!(SnsRecord::is_match(&payload)); + } + + #[test] + fn test_is_not_match() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize SqsRecord"); + assert!(!SnsRecord::is_match(&payload)); + } + + #[test] + fn test_enrich_span() { + let json = read_json_file("sns_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SnsRecord::new(payload).expect("Failed to deserialize SnsRecord"); + let mut span = Span::default(); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); + assert_eq!(span.name, "aws.sns"); + assert_eq!(span.service, "sns"); + assert_eq!(span.resource, "serverlessTracingTopicPy"); + assert_eq!(span.r#type, "web"); + + assert_eq!( + span.meta, + HashMap::from([ + ("operation_name".to_string(), "aws.sns".to_string()), + ("topicname".to_string(), "serverlessTracingTopicPy".to_string()), + ("topic_arn".to_string(), "arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy".to_string()), + ("message_id".to_string(), "87056a47-f506-5d77-908b-303605d3b197".to_string()), + ("type".to_string(), "Notification".to_string()), + ("event_subscription_arn".to_string(), "arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy:224b60ba-befc-4830-ad96-f1f0ac94eb04".to_string()) + ]) + ); + } + + #[test] + fn test_get_tags() { + let json = read_json_file("sns_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SnsRecord::new(payload).expect("Failed to deserialize SnsRecord"); + let tags = event.get_tags(); + + let expected = HashMap::from([( + "function_trigger.event_source".to_string(), + "sns".to_string(), + )]); + + assert_eq!(tags, expected); + } + + #[test] + fn test_get_arn() { + let json = read_json_file("sns_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SnsRecord::new(payload).expect("Failed to deserialize SnsRecord"); + assert_eq!( + event.get_arn("us-east-1"), + "arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy" + ); + } + + #[test] + fn test_get_carrier() { + let json = read_json_file("sns_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SnsRecord::new(payload).expect("Failed to deserialize SnsRecord"); + let carrier = event.get_carrier(); + + let expected = HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "4948377316357291421".to_string(), + ), + ( + "x-datadog-parent-id".to_string(), + "6746998015037429512".to_string(), + ), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ]); + + assert_eq!(carrier, expected); + } + + #[test] + fn test_get_carrier_from_binary_value() { + let json = read_json_file("sns_event_binary.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SnsRecord::new(payload).expect("Failed to deserialize SnsRecord"); + let carrier = event.get_carrier(); + + let expected = HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "4948377316357291421".to_string(), + ), + ( + "x-datadog-parent-id".to_string(), + "6746998015037429512".to_string(), + ), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ]); + + assert_eq!(carrier, expected); + } + + #[test] + fn test_get_carrier_from_event_bridge() { + let json = read_json_file("eventbridge_sns_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + println!("{payload:?}"); + let event = SnsRecord::new(payload).expect("Failed to deserialize SnsRecord"); + let carrier = event.get_carrier(); + + let expected = HashMap::from([ + ( + "x-datadog-resource-name".to_string(), + "test-bus".to_string(), + ), + ("x-datadog-trace-id".to_string(), "12345".to_string()), + ( + "x-datadog-start-time".to_string(), + "1726515840997".to_string(), + ), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ("x-datadog-parent-id".to_string(), "67890".to_string()), + ( + "x-datadog-tags".to_string(), + "_dd.p.dm=-1,_dd.p.tid=123567890".to_string(), + ), + ]); + + assert_eq!(carrier, expected); + } + + #[test] + fn test_resolve_service_name() { + let json = read_json_file("sns_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SnsRecord::new(payload).expect("Failed to deserialize SnsRecord"); + + // Priority is given to the specific key + let specific_service_mapping = HashMap::from([ + ( + "serverlessTracingTopicPy".to_string(), + "specific-service".to_string(), + ), + ("lambda_sns".to_string(), "generic-service".to_string()), + ]); + + assert_eq!( + event.resolve_service_name(&specific_service_mapping, "sns"), + "specific-service" + ); + + let generic_service_mapping = + HashMap::from([("lambda_sns".to_string(), "generic-service".to_string())]); + assert_eq!( + event.resolve_service_name(&generic_service_mapping, "sns"), + "generic-service" + ); + } +} diff --git a/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs b/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs new file mode 100644 index 000000000..a4bf0e44f --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/triggers/sqs_event.rs @@ -0,0 +1,525 @@ +use datadog_trace_protobuf::pb::Span; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::HashMap; +use tracing::debug; + +use crate::lifecycle::invocation::{ + processor::MS_TO_NS, + triggers::{ + event_bridge_event::EventBridgeEvent, + get_aws_partition_by_region, + sns_event::{SnsEntity, SnsRecord}, + ServiceNameResolver, Trigger, DATADOG_CARRIER_KEY, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, + }, +}; +use crate::traces::context::{Sampling, SpanContext}; + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct SqsEvent { + #[serde(rename = "Records")] + pub records: Vec, +} + +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +pub struct SqsRecord { + #[serde(rename = "messageId")] + pub message_id: String, + #[serde(rename = "receiptHandle")] + pub receipt_handle: String, + pub attributes: Attributes, + #[serde(rename = "messageAttributes")] + pub message_attributes: HashMap, + #[serde(rename = "md5OfBody")] + pub md5_of_body: String, + #[serde(rename = "eventSource")] + pub event_source: String, + #[serde(rename = "eventSourceARN")] + pub event_source_arn: String, + #[serde(rename = "awsRegion")] + pub aws_region: String, + pub body: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct MessageAttribute { + #[serde(rename = "stringValue")] + pub string_value: Option, + #[serde(rename = "binaryValue")] + pub binary_value: Option, + #[serde(rename = "stringListValues")] + pub string_list_values: Option>, + #[serde(rename = "binaryListValues")] + pub binary_list_values: Option>, + #[serde(rename = "dataType")] + pub data_type: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct Attributes { + #[serde(rename = "ApproximateFirstReceiveTimestamp")] + pub approximate_first_receive_timestamp: String, + #[serde(rename = "ApproximateReceiveCount")] + pub approximate_receive_count: String, + #[serde(rename = "SentTimestamp")] + pub sent_timestamp: String, + #[serde(rename = "SenderId")] + pub sender_id: String, + #[serde(rename = "AWSTraceHeader")] + pub aws_trace_header: Option, +} + +impl Trigger for SqsRecord { + fn new(payload: Value) -> Option { + let records = payload.get("Records").and_then(Value::as_array); + match records { + Some(records) => match serde_json::from_value::(records[0].clone()) { + Ok(event) => Some(event), + Err(e) => { + debug!("Failed to deserialize SQS Record: {e}"); + None + } + }, + None => None, + } + } + + fn is_match(payload: &Value) -> bool { + if let Some(first_record) = payload + .get("Records") + .and_then(Value::as_array) + .and_then(|r| r.first()) + .take() + { + first_record + .get("eventSource") + .and_then(Value::as_str) + .map_or(false, |s| s == "aws:sqs") + } else { + false + } + } + + #[allow(clippy::cast_possible_truncation)] + fn enrich_span(&self, span: &mut Span, service_mapping: &HashMap) { + debug!("Enriching an Inferred Span for an SQS Event"); + let resource = self.get_specific_identifier(); + let start_time = (self + .attributes + .sent_timestamp + .parse::() + .unwrap_or_default() as f64 + * MS_TO_NS) as i64; + + let service_name = self.resolve_service_name(service_mapping, "sqs"); + + span.name = "aws.sqs".to_string(); + span.service = service_name.to_string(); + span.resource = resource; + span.r#type = "web".to_string(); + span.start = start_time; + span.meta.extend(HashMap::from([ + ("operation_name".to_string(), "aws.sqs".to_string()), + ("receipt_handle".to_string(), self.receipt_handle.clone()), + ( + "retry_count".to_string(), + self.attributes.approximate_receive_count.clone(), + ), + ("sender_id".to_string(), self.attributes.sender_id.clone()), + ("source_arn".to_string(), self.event_source_arn.clone()), + ("aws_region".to_string(), self.aws_region.clone()), + ])); + } + + fn get_tags(&self) -> HashMap { + HashMap::from([ + ( + "retry_count".to_string(), + self.attributes.approximate_receive_count.clone(), + ), + ("sender_id".to_string(), self.attributes.sender_id.clone()), + ("source_arn".to_string(), self.event_source_arn.clone()), + ("aws_region".to_string(), self.aws_region.clone()), + ( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "sqs".to_string(), + ), + ]) + } + + fn get_arn(&self, region: &str) -> String { + if let [_, _, _, _, account, queue_name] = self + .event_source_arn + .split(':') + .collect::>() + .as_slice() + { + format!( + "arn:{}:sqs:{}:{}:{}", + get_aws_partition_by_region(region), + region, + account, + queue_name + ) + } else { + String::new() + } + } + + fn get_carrier(&self) -> HashMap { + let carrier = HashMap::new(); + + if let Some(ma) = self.message_attributes.get(DATADOG_CARRIER_KEY) { + if let Some(string_value) = &ma.string_value { + return serde_json::from_str(string_value).unwrap_or_default(); + } + } + + // Check for SNS event sent through SQS + if let Ok(sns_entity) = serde_json::from_str::(&self.body) { + let sns_record = SnsRecord { + sns: sns_entity, + event_subscription_arn: None, + }; + + return sns_record.get_carrier(); + } else if let Ok(event) = serde_json::from_str::(&self.body) { + return event.get_carrier(); + } + + // TODO: AWSTraceHeader + carrier + } + + fn is_async(&self) -> bool { + true + } +} + +impl ServiceNameResolver for SqsRecord { + fn get_specific_identifier(&self) -> String { + self.event_source_arn + .split(':') + .last() + .unwrap_or_default() + .to_string() + } + + fn get_generic_identifier(&self) -> &'static str { + "lambda_sqs" + } +} + +// extractTraceContextfromAWSTraceHeader extracts trace context from the +// AWSTraceHeader directly. Unlike the other carriers in this file, it should +// not be passed to the tracer.Propagator, instead extracting context directly. +pub(crate) fn extract_trace_context_from_aws_trace_header( + headers_string: Option, +) -> Option { + let value = headers_string?; + if !value.starts_with("Root=") { + return None; + } + + let mut start_part = 0; + let mut trace_id = String::new(); + let mut parent_id = String::new(); + let mut sampled = String::new(); + + let length = value.len(); + while start_part < length { + let end_part = value[start_part..] + .find(';') + .map_or(length, |i| i + start_part); + let part = &value[start_part..end_part]; + + if part.starts_with("Root=") { + if trace_id.is_empty() { + trace_id = part[24..].to_string(); + } + } else if let Some(parent_part) = part.strip_prefix("Parent=") { + if parent_id.is_empty() { + parent_id = parent_part.to_string(); + } + } else if part.starts_with("Sampled=") && sampled.is_empty() { + sampled = part[8..].to_string(); + } + + if !trace_id.is_empty() && !parent_id.is_empty() && !sampled.is_empty() { + break; + } + start_part = end_part + 1; + } + + let trace_id = u64::from_str_radix(&trace_id, 16).ok()?; + let parent_id = u64::from_str_radix(&parent_id, 16).ok()?; + + if trace_id == 0 || parent_id == 0 { + debug!("awstrace_header contains empty trace or parent ID"); + return None; + } + + let sampling_priority = i8::from(sampled == "1"); + + Some(SpanContext { + // the context from AWS Header is used by Datadog only and does not contain the upper + // 64 bits like other 128 w3c compliant trace ids + trace_id, + span_id: parent_id, + sampling: Some(Sampling { + priority: Some(sampling_priority), + mechanism: None, + }), + origin: None, + tags: HashMap::new(), + links: Vec::new(), + }) +} + +#[cfg(test)] +#[allow(clippy::unwrap_used)] +mod tests { + use super::*; + use crate::lifecycle::invocation::triggers::test_utils::read_json_file; + + #[test] + fn test_new() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let result = SqsRecord::new(payload).expect("Failed to deserialize into Record"); + + let message_attributes = HashMap::::from([ + ("_datadog".to_string(), MessageAttribute { + string_value: Some("{\"x-datadog-trace-id\":\"2684756524522091840\",\"x-datadog-parent-id\":\"7431398482019833808\",\"x-datadog-sampling-priority\":\"1\"}".to_string()), + binary_value: None, + string_list_values: Some(vec![]), + binary_list_values: Some(vec![]), + data_type: "String".to_string(), + }) + ]); + + let expected = SqsRecord { + message_id: "19dd0b57-b21e-4ac1-bd88-01bbb068cb78".to_string(), + receipt_handle: "MessageReceiptHandle".to_string(), + attributes: Attributes { + approximate_first_receive_timestamp: "1523232000001".to_string(), + approximate_receive_count: "1".to_string(), + sent_timestamp: "1523232000000".to_string(), + sender_id: "123456789012".to_string(), + aws_trace_header: None, + }, + message_attributes, + md5_of_body: "{{{md5_of_body}}}".to_string(), + event_source: "aws:sqs".to_string(), + event_source_arn: "arn:aws:sqs:us-east-1:123456789012:MyQueue".to_string(), + aws_region: "us-east-1".to_string(), + body: "Hello from SQS!".to_string(), + }; + + assert_eq!(result, expected); + } + + #[test] + fn test_is_match() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize SqsRecord"); + + assert!(SqsRecord::is_match(&payload)); + } + + #[test] + fn test_is_not_match() { + let json = read_json_file("api_gateway_http_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize SqsRecord"); + assert!(!SqsRecord::is_match(&payload)); + } + + #[test] + fn test_enrich_span() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SqsRecord::new(payload).expect("Failed to deserialize SqsRecord"); + let mut span = Span::default(); + let service_mapping = HashMap::new(); + event.enrich_span(&mut span, &service_mapping); + assert_eq!(span.name, "aws.sqs"); + assert_eq!(span.service, "sqs"); + assert_eq!(span.resource, "MyQueue"); + assert_eq!(span.r#type, "web"); + + assert_eq!( + span.meta, + HashMap::from([ + ("operation_name".to_string(), "aws.sqs".to_string()), + ( + "receipt_handle".to_string(), + "MessageReceiptHandle".to_string(), + ), + ("retry_count".to_string(), 1.to_string()), + ("sender_id".to_string(), "123456789012".to_string()), + ( + "source_arn".to_string(), + "arn:aws:sqs:us-east-1:123456789012:MyQueue".to_string() + ), + ("aws_region".to_string(), "us-east-1".to_string()), + ]) + ); + } + + #[test] + fn test_get_tags() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SqsRecord::new(payload).expect("Failed to deserialize SqsRecord"); + let tags = event.get_tags(); + + let expected = HashMap::from([ + ("retry_count".to_string(), 1.to_string()), + ("sender_id".to_string(), "123456789012".to_string()), + ( + "source_arn".to_string(), + "arn:aws:sqs:us-east-1:123456789012:MyQueue".to_string(), + ), + ("aws_region".to_string(), "us-east-1".to_string()), + ( + "function_trigger.event_source".to_string(), + "sqs".to_string(), + ), + ]); + + assert_eq!(tags, expected); + } + + #[test] + fn test_get_arn() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SqsRecord::new(payload).expect("Failed to deserialize SqsRecord"); + assert_eq!( + event.get_arn("us-east-1"), + "arn:aws:sqs:us-east-1:123456789012:MyQueue" + ); + } + + #[test] + fn test_get_carrier() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SqsRecord::new(payload).expect("Failed to deserialize SqsRecord"); + let carrier = event.get_carrier(); + + let expected = HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "2684756524522091840".to_string(), + ), + ( + "x-datadog-parent-id".to_string(), + "7431398482019833808".to_string(), + ), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ]); + + assert_eq!(carrier, expected); + } + + #[test] + fn test_get_carrier_from_sns() { + let json = read_json_file("sns_sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SqsRecord::new(payload).expect("Failed to deserialize SqsRecord"); + let carrier = event.get_carrier(); + + let expected = HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "2776434475358637757".to_string(), + ), + ( + "x-datadog-parent-id".to_string(), + "4493917105238181843".to_string(), + ), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ]); + + assert_eq!(carrier, expected); + } + + #[test] + fn test_get_carrier_from_eventbridge() { + let json = read_json_file("eventbridge_sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SqsRecord::new(payload).expect("Failed to deserialize EventBridgeEvent"); + let carrier = event.get_carrier(); + + let expected = HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "7379586022458917877".to_string(), + ), + ( + "traceparent".to_string(), + "00-000000000000000066698e63821a03f5-24b17e9b6476c018-01".to_string(), + ), + ("x-datadog-tags".to_string(), "_dd.p.dm=-0".to_string()), + ( + "x-datadog-parent-id".to_string(), + "2644033662113726488".to_string(), + ), + ("tracestate".to_string(), "dd=t.dm:-0;s:1".to_string()), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ]); + + assert_eq!(carrier, expected); + } + + #[test] + fn test_resolve_service_name() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SqsRecord::new(payload).expect("Failed to deserialize SqsRecord"); + + // Priority is given to the specific key + let specific_service_mapping = HashMap::from([ + ("MyQueue".to_string(), "specific-service".to_string()), + ("lambda_sqs".to_string(), "generic-service".to_string()), + ]); + + assert_eq!( + event.resolve_service_name(&specific_service_mapping, "sqs"), + "specific-service" + ); + + let generic_service_mapping = + HashMap::from([("lambda_sqs".to_string(), "generic-service".to_string())]); + assert_eq!( + event.resolve_service_name(&generic_service_mapping, "sqs"), + "generic-service" + ); + } + + #[test] + fn extract_java_sqs_header_context() { + let json = read_json_file("eventbridge_sqs_java_header_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = SqsRecord::new(payload).expect("Failed to deserialize EventBridgeEvent"); + + assert_eq!( + extract_trace_context_from_aws_trace_header(Some( + event.attributes.aws_trace_header.unwrap().to_string() + )) + .unwrap(), + SpanContext { + trace_id: 130_944_522_478_755_159, + span_id: 9_032_698_535_745_367_362, + sampling: Some(Sampling { + priority: Some("0".parse().unwrap()), + mechanism: None, + }), + origin: None, + tags: HashMap::new(), + links: Vec::new(), + } + ); + } +} diff --git a/bottlecap/src/lifecycle/invocation/triggers/step_function_event.rs b/bottlecap/src/lifecycle/invocation/triggers/step_function_event.rs new file mode 100644 index 000000000..6169be7d3 --- /dev/null +++ b/bottlecap/src/lifecycle/invocation/triggers/step_function_event.rs @@ -0,0 +1,383 @@ +use std::collections::HashMap; + +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use sha2::{Digest, Sha256}; + +use crate::{ + lifecycle::invocation::triggers::{ + ServiceNameResolver, Trigger, FUNCTION_TRIGGER_EVENT_SOURCE_TAG, + }, + traces::{ + context::{Sampling, SpanContext}, + propagation::text_map_propagator::DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY, + }, +}; + +#[allow(clippy::module_name_repetitions)] +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct LegacyStepFunctionEvent { + #[serde(rename = "Payload")] + pub payload: StepFunctionEvent, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct StepFunctionEvent { + #[serde(rename = "Execution")] + pub execution: Execution, + #[serde(rename = "State")] + pub state: State, + #[serde(rename = "StateMachine")] + pub state_machine: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct Execution { + #[serde(rename = "Id")] + id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct State { + #[serde(rename = "Name")] + name: String, + #[serde(rename = "EnteredTime")] + entered_time: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct StateMachine { + #[serde(rename = "Id")] + id: String, +} + +impl Trigger for StepFunctionEvent { + fn new(payload: serde_json::Value) -> Option + where + Self: Sized, + { + let p = payload.get("Payload").unwrap_or(&payload); + match serde_json::from_value::(p.clone()) { + Ok(event) => Some(event), + Err(e) => { + tracing::debug!("Failed to deserialize Step Function Event: {e}"); + None + } + } + } + + fn is_match(payload: &serde_json::Value) -> bool + where + Self: Sized, + { + // Check first if the payload is a Legacy Step Function event + let p = payload.get("Payload").unwrap_or(payload); + + let execution_id = p + .get("Execution") + .and_then(Value::as_object) + .and_then(|e| e.get("Id")); + let state = p.get("State").and_then(Value::as_object); + let name = state.and_then(|s| s.get("Name")); + let entered_time = state.and_then(|s| s.get("EnteredTime")); + + execution_id.is_some() && name.is_some() && entered_time.is_some() + } + + fn enrich_span( + &self, + _span: &mut datadog_trace_protobuf::pb::Span, + _service_mapping: &HashMap, + ) { + } + + fn get_tags(&self) -> HashMap { + HashMap::from([( + FUNCTION_TRIGGER_EVENT_SOURCE_TAG.to_string(), + "states".to_string(), + )]) + } + + fn get_arn(&self, _region: &str) -> String { + if let Some(sm) = &self.state_machine { + return sm.id.clone(); + } + + String::new() + } + + fn get_carrier(&self) -> HashMap { + HashMap::new() + } + + fn is_async(&self) -> bool { + true + } +} + +impl StepFunctionEvent { + #[must_use] + pub fn get_span_context(&self) -> SpanContext { + let (lo_tid, hi_tid) = Self::generate_trace_id(self.execution.id.clone()); + let tags = HashMap::from([( + DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY.to_string(), + format!("{hi_tid:x}"), + )]); + + let parent_id = Self::generate_parent_id( + self.execution.id.clone(), + self.state.name.clone(), + self.state.entered_time.clone(), + ); + + SpanContext { + trace_id: lo_tid, + span_id: parent_id, + // Priority Auto Keep + sampling: Some(Sampling { + priority: Some(1), + mechanism: None, + }), + origin: Some("states".to_string()), + tags, + links: vec![], + } + } + + /// Generates a random 64 bit ID from the formatted hash of the + /// Step Function Execution ARN, the State Name, and the State Entered Time + /// + fn generate_parent_id( + execution_id: String, + state_name: String, + state_entered_time: String, + ) -> u64 { + let unique_string = format!("{execution_id}#{state_name}#{state_entered_time}"); + + let hash = Sha256::digest(unique_string.as_bytes()); + Self::get_positive_u64(&hash[0..8]) + } + + /// Generates a random 128 bit ID from the Step Function Execution ARN + /// + fn generate_trace_id(execution_arn: String) -> (u64, u64) { + let hash = Sha256::digest(execution_arn.as_bytes()); + + let lower_order_bits = Self::get_positive_u64(&hash[8..16]); + let higher_order_bits = Self::get_positive_u64(&hash[0..8]); + + (lower_order_bits, higher_order_bits) + } + + /// Converts the first 8 bytes of a byte array to a positive `u64` + /// + fn get_positive_u64(hash_bytes: &[u8]) -> u64 { + let mut result: u64 = hash_bytes + .iter() + .take(8) + .fold(0, |acc, &byte| (acc << 8) + u64::from(byte)); + + // Ensure the highest bit is always 0 + result &= !(1u64 << 63); + + // Return 1 if result is 0 + if result == 0 { + 1 + } else { + result + } + } +} + +impl ServiceNameResolver for StepFunctionEvent { + fn get_specific_identifier(&self) -> String { + String::new() + } + + fn get_generic_identifier(&self) -> &'static str { + "lambda_stepfunction" + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::lifecycle::invocation::triggers::test_utils::read_json_file; + + #[test] + fn test_new() { + let json = read_json_file("step_function_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let result = StepFunctionEvent::new(payload).expect("Failed to deserialize into Event"); + + let expected = StepFunctionEvent { + execution: Execution { + id: String::from("arn:aws:states:us-east-1:425362996713:execution:agocsTestSF:bc9f281c-3daa-4e5a-9a60-471a3810bf44"), + }, + state: State { + name: String::from("agocsTest1"), + entered_time: String::from("2024-07-30T19:55:53.018Z"), + }, + state_machine: Some(StateMachine { + id: String::from("arn:aws:states:us-east-1:425362996713:stateMachine:agocsTestSF"), + }), + }; + + assert_eq!(result, expected); + } + + #[test] + fn test_new_legacy_event() { + let json = read_json_file("step_function_legacy_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let result = StepFunctionEvent::new(payload).expect("Failed to deserialize into Event"); + + let expected = StepFunctionEvent { + execution: Execution { + id: String::from("arn:aws:states:us-east-1:425362996713:execution:agocsTestSF:bc9f281c-3daa-4e5a-9a60-471a3810bf44"), + }, + state: State { + name: String::from("agocsTest1"), + entered_time: String::from("2024-07-30T19:55:53.018Z"), + }, + state_machine: Some(StateMachine { + id: String::from("arn:aws:states:us-east-1:425362996713:stateMachine:agocsTestSF"), + }), + }; + + assert_eq!(result, expected); + } + + #[test] + fn test_is_match() { + let json = read_json_file("step_function_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize StepFunctionEvent"); + + assert!(StepFunctionEvent::is_match(&payload)); + } + + #[test] + fn test_is_match_legacy_event() { + let json = read_json_file("step_function_legacy_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize StepFunctionEvent"); + + assert!(StepFunctionEvent::is_match(&payload)); + } + + #[test] + fn test_is_not_match() { + let json = read_json_file("sqs_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize SqsRecord"); + assert!(!StepFunctionEvent::is_match(&payload)); + } + + #[test] + fn test_get_tags() { + let json = read_json_file("step_function_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + StepFunctionEvent::new(payload).expect("Failed to deserialize StepFunctionEvent"); + let tags = event.get_tags(); + + let expected = HashMap::from([( + "function_trigger.event_source".to_string(), + "states".to_string(), + )]); + + assert_eq!(tags, expected); + } + + #[test] + fn test_get_arn() { + let json = read_json_file("step_function_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + StepFunctionEvent::new(payload).expect("Failed to deserialize StepFunctionEvent"); + assert_eq!( + event.get_arn("us-east-1"), + "arn:aws:states:us-east-1:425362996713:stateMachine:agocsTestSF" + ); + } + + #[test] + fn test_get_carrier() { + let json = read_json_file("step_function_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + StepFunctionEvent::new(payload).expect("Failed to deserialize StepFunctionEvent"); + let carrier = event.get_carrier(); + + let expected = HashMap::new(); + + assert_eq!(carrier, expected); + } + + #[test] + fn get_span_context() { + let json = read_json_file("step_function_event.json"); + let payload = serde_json::from_str(&json).expect("Failed to deserialize into Value"); + let event = + StepFunctionEvent::new(payload).expect("Failed to deserialize StepFunctionEvent"); + + let span_context = event.get_span_context(); + + let expected = SpanContext { + trace_id: 5_744_042_798_732_701_615, + span_id: 2_902_498_116_043_018_663, + sampling: Some(Sampling { + priority: Some(1), + mechanism: None, + }), + origin: Some("states".to_string()), + tags: HashMap::from([( + DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY.to_string(), + "1914fe7789eb32be".to_string(), + )]), + links: vec![], + }; + + assert_eq!(span_context, expected); + } + + #[test] + fn test_generate_parent_id() { + let parent_id = StepFunctionEvent::generate_parent_id( + String::from("arn:aws:states:sa-east-1:601427271234:express:DatadogStateMachine:acaf1a67-336a-e854-1599-2a627eb2dd8a:c8baf081-31f1-464d-971f-70cb17d01111"), + String::from("step-one"), + String::from("2022-12-08T21:08:19.224Z") + ); + + assert_eq!(parent_id, 4_340_734_536_022_949_921); + + let parent_id = StepFunctionEvent::generate_parent_id( + String::from("arn:aws:states:sa-east-1:601427271234:express:DatadogStateMachine:acaf1a67-336a-e854-1599-2a627eb2dd8a:c8baf081-31f1-464d-971f-70cb17d01111"), + String::from("step-one"), + String::from("2022-12-08T21:08:19.224Y") + ); + + assert_eq!(parent_id, 981_693_280_319_792_699); + } + + #[test] + fn test_generate_trace_id() { + let (lo_tid, hi_tid) = StepFunctionEvent::generate_trace_id(String::from( + "arn:aws:states:sa-east-1:425362996713:stateMachine:MyStateMachine-b276uka1j", + )); + let hex_tid = format!("{hi_tid:x}"); + + assert_eq!(lo_tid, 1_680_583_253_837_593_461); + assert_eq!(hi_tid, 6_984_552_746_569_958_392); + + assert_eq!(hex_tid, "60ee1db79e4803f8"); + + let (lo_tid, hi_tid) = StepFunctionEvent::generate_trace_id( + String::from("arn:aws:states:us-east-1:425362996713:execution:agocsTestSF:bc9f281c-3daa-4e5a-9a60-471a3810bf44") + ); + let hex_tid = format!("{hi_tid:x}"); + + assert_eq!(lo_tid, 5_744_042_798_732_701_615); + assert_eq!(hi_tid, 1_807_349_139_850_867_390); + + assert_eq!(hex_tid, "1914fe7789eb32be"); + } +} diff --git a/bottlecap/src/lifecycle/invocation_context.rs b/bottlecap/src/lifecycle/invocation_context.rs deleted file mode 100644 index 24e8e4541..000000000 --- a/bottlecap/src/lifecycle/invocation_context.rs +++ /dev/null @@ -1,72 +0,0 @@ -use std::collections::VecDeque; - -use tracing::debug; - -#[derive(Debug, Clone)] -pub struct InvocationContext { - pub request_id: String, - pub runtime_duration_ms: f64, -} - -#[allow(clippy::module_name_repetitions)] -pub struct InvocationContextBuffer { - buffer: VecDeque, -} - -impl Default for InvocationContextBuffer { - fn default() -> Self { - InvocationContextBuffer { - buffer: VecDeque::::with_capacity(5), - } - } -} - -impl InvocationContextBuffer { - pub fn insert(&mut self, invocation_context: InvocationContext) { - if self.buffer.len() == self.buffer.capacity() { - self.buffer.pop_front(); - self.buffer.push_back(invocation_context); - } else { - if self.get(&invocation_context.request_id).is_some() { - self.remove(&invocation_context.request_id); - } - - self.buffer.push_back(invocation_context); - } - } - - pub fn remove(&mut self, request_id: &String) -> Option { - if let Some(i) = self - .buffer - .iter() - .position(|context| context.request_id == *request_id) - { - return self.buffer.remove(i); - } - debug!("Context for request_id: {:?} not found", request_id); - - None - } - - #[must_use] - pub fn get(&self, request_id: &String) -> Option<&InvocationContext> { - self.buffer - .iter() - .find(|context| context.request_id == *request_id) - } - - pub fn add_runtime_duration(&mut self, request_id: &String, runtime_duration_ms: f64) { - if let Some(context) = self - .buffer - .iter_mut() - .find(|context| context.request_id == *request_id) - { - context.runtime_duration_ms = runtime_duration_ms; - } else { - self.insert(InvocationContext { - request_id: request_id.to_string(), - runtime_duration_ms, - }); - } - } -} diff --git a/bottlecap/src/lifecycle/listener.rs b/bottlecap/src/lifecycle/listener.rs new file mode 100644 index 000000000..388fba133 --- /dev/null +++ b/bottlecap/src/lifecycle/listener.rs @@ -0,0 +1,185 @@ +// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::HashMap; +use std::convert::Infallible; +use std::net::SocketAddr; +use std::sync::Arc; + +use hyper::service::{make_service_fn, service_fn}; +use hyper::{http, Body, Method, Request, Response, StatusCode}; +use serde_json::json; +use tokio::sync::Mutex; +use tracing::{debug, error, warn}; + +use crate::lifecycle::invocation::processor::Processor as InvocationProcessor; +use crate::traces::propagation::text_map_propagator::{ + DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY, DATADOG_SAMPLING_PRIORITY_KEY, DATADOG_TAGS_KEY, + DATADOG_TRACE_ID_KEY, +}; + +const HELLO_PATH: &str = "/lambda/hello"; +const START_INVOCATION_PATH: &str = "/lambda/start-invocation"; +const END_INVOCATION_PATH: &str = "/lambda/end-invocation"; +const AGENT_PORT: usize = 8124; + +pub struct Listener { + pub invocation_processor: Arc>, +} + +impl Listener { + pub async fn start(&self) -> Result<(), Box> { + let invocation_processor = self.invocation_processor.clone(); + + let make_svc = make_service_fn(move |_| { + let invocation_processor = invocation_processor.clone(); + + let service = service_fn(move |req| Self::handler(req, invocation_processor.clone())); + + async move { Ok::<_, Infallible>(service) } + }); + + let port = u16::try_from(AGENT_PORT).expect("AGENT_PORT is too large"); + let addr = SocketAddr::from(([127, 0, 0, 1], port)); + let server_builder = hyper::Server::try_bind(&addr)?; + + let server = server_builder.serve(make_svc); + + // start hyper http server + if let Err(e) = server.await { + error!("Failed to start the Lifecycle Listener {e}"); + return Err(e.into()); + } + + Ok(()) + } + + async fn handler( + req: Request, + invocation_processor: Arc>, + ) -> http::Result> { + match (req.method(), req.uri().path()) { + (&Method::POST, START_INVOCATION_PATH) => { + Self::start_invocation_handler(req, invocation_processor).await + } + (&Method::POST, END_INVOCATION_PATH) => { + match Self::end_invocation_handler(req, invocation_processor).await { + Ok(response) => Ok(response), + Err(e) => { + error!("Failed to end invocation {e}"); + Ok(Response::builder() + .status(500) + .body(Body::empty()) + .expect("no body")) + } + } + } + (&Method::GET, HELLO_PATH) => Self::hello_handler(), + _ => { + let mut not_found = Response::default(); + *not_found.status_mut() = StatusCode::NOT_FOUND; + Ok(not_found) + } + } + } + + async fn start_invocation_handler( + req: Request, + invocation_processor: Arc>, + ) -> http::Result> { + debug!("Received start invocation request"); + let (parts, body) = req.into_parts(); + match hyper::body::to_bytes(body).await { + Ok(b) => { + let body = b.to_vec(); + let mut processor = invocation_processor.lock().await; + + let headers = Self::headers_to_map(parts.headers); + + processor.on_invocation_start(headers, body); + + let mut response = Response::builder().status(200); + + // If a `SpanContext` exists, then tell the tracer to use it. + // todo: update this whole code with DatadogHeaderPropagator::inject + // since this logic looks messy + if let Some(sp) = &processor.extracted_span_context { + response = response.header(DATADOG_TRACE_ID_KEY, sp.trace_id.to_string()); + if let Some(priority) = sp.sampling.and_then(|s| s.priority) { + response = + response.header(DATADOG_SAMPLING_PRIORITY_KEY, priority.to_string()); + } + + // Handle 128 bit trace ids + if let Some(trace_id_higher_order_bits) = + sp.tags.get(DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY) + { + response = response.header( + DATADOG_TAGS_KEY, + format!("{DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY}={trace_id_higher_order_bits}"), + ); + } + } + + drop(processor); + + response.body(Body::from(json!({}).to_string())) + } + Err(e) => { + error!("Could not read start invocation request body {e}"); + + Response::builder() + .status(400) + .body(Body::from("Could not read start invocation request body")) + } + } + } + + async fn end_invocation_handler( + req: Request, + invocation_processor: Arc>, + ) -> http::Result> { + debug!("Received end invocation request"); + let (parts, body) = req.into_parts(); + match hyper::body::to_bytes(body).await { + Ok(b) => { + let body = b.to_vec(); + let mut processor = invocation_processor.lock().await; + + let headers = Self::headers_to_map(parts.headers); + processor.on_invocation_end(headers, body); + drop(processor); + + Response::builder() + .status(200) + .body(Body::from(json!({}).to_string())) + } + Err(e) => { + error!("Could not read end invocation request body {e}"); + + Response::builder() + .status(400) + .body(Body::from("Could not read end invocation request body")) + } + } + } + + fn hello_handler() -> http::Result> { + warn!("[DEPRECATED] Please upgrade your tracing library, the /hello route is deprecated"); + Response::builder() + .status(200) + .body(Body::from(json!({}).to_string())) + } + + fn headers_to_map(headers: http::HeaderMap) -> HashMap { + headers + .iter() + .map(|(k, v)| { + ( + k.as_str().to_string(), + v.to_str().unwrap_or_default().to_string(), + ) + }) + .collect() + } +} diff --git a/bottlecap/src/lifecycle/mod.rs b/bottlecap/src/lifecycle/mod.rs index 1c0924d84..a0b3eda68 100644 --- a/bottlecap/src/lifecycle/mod.rs +++ b/bottlecap/src/lifecycle/mod.rs @@ -1,2 +1,3 @@ pub mod flush_control; -pub mod invocation_context; +pub mod invocation; +pub mod listener; diff --git a/bottlecap/src/logs/lambda/processor.rs b/bottlecap/src/logs/lambda/processor.rs index 955872d8f..d80db8912 100644 --- a/bottlecap/src/logs/lambda/processor.rs +++ b/bottlecap/src/logs/lambda/processor.rs @@ -6,7 +6,7 @@ use tracing::error; use crate::config; use crate::events::Event; -use crate::lifecycle::invocation_context::InvocationContext; +use crate::lifecycle::invocation::context::Context as InvocationContext; use crate::logs::aggregator::Aggregator; use crate::logs::processor::{Processor, Rule}; use crate::tags::provider; @@ -53,10 +53,7 @@ impl LambdaProcessor { service, tags, rules, - invocation_context: InvocationContext { - request_id: String::new(), - runtime_duration_ms: 0.0, - }, + invocation_context: InvocationContext::new(String::new(), 0.0, 0.0, 0, None), orphan_logs: Vec::new(), ready_logs: Vec::new(), event_bus, @@ -90,8 +87,13 @@ impl LambdaProcessor { runtime_version_arn, .. // TODO: check if we could do something with this metrics: `initialization_type` and `phase` } => { + if let Err(e) = self.event_bus.send(Event::Telemetry(copy)).await { + error!("Failed to send PlatformInitStart to the main event bus: {}", e); + } + let rv = runtime_version.unwrap_or("?".to_string()); // TODO: check what does containers display let rv_arn = runtime_version_arn.unwrap_or("?".to_string()); // TODO: check what do containers display + Ok(Message::new( format!("INIT_START Runtime Version: {rv} Runtime Version ARN: {rv_arn}"), None, @@ -181,7 +183,6 @@ impl LambdaProcessor { )) }, // TODO: PlatformInitRuntimeDone - // TODO: PlatformInitReport // TODO: PlatformExtension // TODO: PlatformTelemetrySubscription // TODO: PlatformLogsDropped diff --git a/bottlecap/src/metrics/enhanced/constants.rs b/bottlecap/src/metrics/enhanced/constants.rs index 5011b7d64..fcd833676 100644 --- a/bottlecap/src/metrics/enhanced/constants.rs +++ b/bottlecap/src/metrics/enhanced/constants.rs @@ -3,7 +3,10 @@ pub const BASE_LAMBDA_INVOCATION_PRICE: f64 = 0.000_000_2; pub const X86_LAMBDA_PRICE_PER_GB_SECOND: f64 = 0.000_016_666_7; pub const ARM_LAMBDA_PRICE_PER_GB_SECOND: f64 = 0.000_013_333_4; pub const MS_TO_SEC: f64 = 0.001; -pub const MB_TO_GB: f64 = 1024.0; +pub const MB_TO_GB: f64 = 1_024.0; + +// tmp directory path +pub const TMP_PATH: &str = "/tmp/"; // Enhanced metrics pub const MAX_MEMORY_USED_METRIC: &str = "aws.lambda.enhanced.max_memory_used"; @@ -21,5 +24,23 @@ pub const OUT_OF_MEMORY_METRIC: &str = "aws.lambda.enhanced.out_of_memory"; pub const TIMEOUTS_METRIC: &str = "aws.lambda.enhanced.timeouts"; pub const ERRORS_METRIC: &str = "aws.lambda.enhanced.errors"; pub const INVOCATIONS_METRIC: &str = "aws.lambda.enhanced.invocations"; +pub const RX_BYTES_METRIC: &str = "aws.lambda.enhanced.rx_bytes"; +pub const TX_BYTES_METRIC: &str = "aws.lambda.enhanced.tx_bytes"; +pub const TOTAL_NETWORK_METRIC: &str = "aws.lambda.enhanced.total_network"; +pub const CPU_SYSTEM_TIME_METRIC: &str = "aws.lambda.enhanced.cpu_system_time"; +pub const CPU_USER_TIME_METRIC: &str = "aws.lambda.enhanced.cpu_user_time"; +pub const CPU_TOTAL_TIME_METRIC: &str = "aws.lambda.enhanced.cpu_total_time"; +pub const CPU_TOTAL_UTILIZATION_PCT_METRIC: &str = "aws.lambda.enhanced.cpu_total_utilization_pct"; +pub const CPU_TOTAL_UTILIZATION_METRIC: &str = "aws.lambda.enhanced.cpu_total_utilization"; +pub const NUM_CORES_METRIC: &str = "aws.lambda.enhanced.num_cores"; +pub const CPU_MAX_UTILIZATION_METRIC: &str = "aws.lambda.enhanced.cpu_max_utilization"; +pub const CPU_MIN_UTILIZATION_METRIC: &str = "aws.lambda.enhanced.cpu_min_utilization"; +pub const TMP_MAX_METRIC: &str = "aws.lambda.enhanced.tmp_max"; +pub const TMP_USED_METRIC: &str = "aws.lambda.enhanced.tmp_used"; +pub const TMP_FREE_METRIC: &str = "aws.lambda.enhanced.tmp_free"; +pub const FD_MAX_METRIC: &str = "aws.lambda.enhanced.fd_max"; +pub const FD_USE_METRIC: &str = "aws.lambda.enhanced.fd_use"; +pub const THREADS_MAX_METRIC: &str = "aws.lambda.enhanced.threads_max"; +pub const THREADS_USE_METRIC: &str = "aws.lambda.enhanced.threads_use"; //pub const ASM_INVOCATIONS_METRIC: &str = "aws.lambda.enhanced.asm.invocations"; pub const ENHANCED_METRICS_ENV_VAR: &str = "DD_ENHANCED_METRICS"; diff --git a/bottlecap/src/metrics/enhanced/lambda.rs b/bottlecap/src/metrics/enhanced/lambda.rs index 0db917f31..2129853a2 100644 --- a/bottlecap/src/metrics/enhanced/lambda.rs +++ b/bottlecap/src/metrics/enhanced/lambda.rs @@ -1,21 +1,67 @@ -use super::constants::{self, BASE_LAMBDA_INVOCATION_PRICE}; +use crate::metrics::enhanced::{ + constants::{self, BASE_LAMBDA_INVOCATION_PRICE}, + statfs::statfs_info, +}; +use crate::proc::{self, CPUData, NetworkData}; use crate::telemetry::events::ReportMetrics; -use dogstatsd::aggregator::Aggregator; use dogstatsd::metric; use dogstatsd::metric::{Metric, MetricValue}; +use dogstatsd::{aggregator::Aggregator, metric::SortedTags}; +use std::collections::HashMap; use std::env::consts::ARCH; use std::sync::{Arc, Mutex}; +use std::time::Duration; +use tokio::{ + sync::watch::{Receiver, Sender}, + time::interval, +}; +use tracing::debug; use tracing::error; pub struct Lambda { pub aggregator: Arc>, pub config: Arc, + // Dynamic value tags are the ones we cannot obtain statically from the sandbox + dynamic_value_tags: HashMap, } impl Lambda { #[must_use] pub fn new(aggregator: Arc>, config: Arc) -> Lambda { - Lambda { aggregator, config } + Lambda { + aggregator, + config, + dynamic_value_tags: HashMap::new(), + } + } + + /// Set the dynamic value tags that are not available at compile time + pub fn set_init_tags(&mut self, proactive_initialization: bool, cold_start: bool) { + self.dynamic_value_tags.remove("cold_start"); + self.dynamic_value_tags.remove("proactive_initialization"); + + self.dynamic_value_tags + .insert(String::from("cold_start"), cold_start.to_string()); + + // Only set `proactive_initialization` tag if it is true + if proactive_initialization { + self.dynamic_value_tags.insert( + String::from("proactive_initialization"), + String::from("true"), + ); + } + } + + fn get_dynamic_value_tags(&self) -> Option { + let vec_tags: Vec = self + .dynamic_value_tags + .iter() + .map(|(k, v)| format!("{k}:{v}")) + .collect(); + + let string_tags = vec_tags.join(","); + + SortedTags::parse(&string_tags).ok() } pub fn increment_invocation_metric(&self) { @@ -37,7 +83,7 @@ impl Lambda { let metric = Metric::new( constants::INIT_DURATION_METRIC.into(), MetricValue::distribution(init_duration_ms * constants::MS_TO_SEC), - None, + self.get_dynamic_value_tags(), ); if let Err(e) = self @@ -54,7 +100,11 @@ impl Lambda { if !self.config.enhanced_metrics { return; } - let metric = Metric::new(metric_name.into(), MetricValue::distribution(1f64), None); + let metric = Metric::new( + metric_name.into(), + MetricValue::distribution(1f64), + self.get_dynamic_value_tags(), + ); if let Err(e) = self .aggregator .lock() @@ -73,7 +123,7 @@ impl Lambda { constants::RUNTIME_DURATION_METRIC.into(), MetricValue::distribution(duration_ms), // Datadog expects this value as milliseconds, not seconds - None, + self.get_dynamic_value_tags(), ); if let Err(e) = self .aggregator @@ -93,7 +143,7 @@ impl Lambda { constants::POST_RUNTIME_DURATION_METRIC.into(), MetricValue::distribution(duration_ms), // Datadog expects this value as milliseconds, not seconds - None, + self.get_dynamic_value_tags(), ); if let Err(e) = self .aggregator @@ -105,6 +155,460 @@ impl Lambda { } } + pub fn generate_network_enhanced_metrics( + network_data_offset: NetworkData, + network_data_end: NetworkData, + aggr: &mut std::sync::MutexGuard, + tags: Option, + ) { + let rx_bytes = network_data_end.rx_bytes - network_data_offset.rx_bytes; + let tx_bytes = network_data_end.tx_bytes - network_data_offset.tx_bytes; + let total_network = rx_bytes + tx_bytes; + + let metric = Metric::new( + constants::RX_BYTES_METRIC.into(), + MetricValue::distribution(rx_bytes), + tags.clone(), + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert rx_bytes metric: {}", e); + } + + let metric = Metric::new( + constants::TX_BYTES_METRIC.into(), + MetricValue::distribution(tx_bytes), + tags.clone(), + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert tx_bytes metric: {}", e); + } + + let metric = Metric::new( + constants::TOTAL_NETWORK_METRIC.into(), + MetricValue::distribution(total_network), + tags.clone(), + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert total_network metric: {}", e); + } + } + + pub fn set_network_enhanced_metrics(&self, network_offset: Option) { + if !self.config.enhanced_metrics { + return; + } + + if let Some(offset) = network_offset { + let mut aggr: std::sync::MutexGuard = + self.aggregator.lock().expect("lock poisoned"); + + match proc::get_network_data() { + Ok(data) => { + Self::generate_network_enhanced_metrics( + offset, + data, + &mut aggr, + self.get_dynamic_value_tags(), + ); + } + Err(_e) => { + debug!("Could not find data to generate network enhanced metrics"); + } + } + } else { + debug!("Could not find data to generate network enhanced metrics"); + } + } + + pub(crate) fn generate_cpu_time_enhanced_metrics( + cpu_data_offset: &CPUData, + cpu_data_end: &CPUData, + aggr: &mut std::sync::MutexGuard, + tags: Option, + ) { + let cpu_user_time = cpu_data_end.total_user_time_ms - cpu_data_offset.total_user_time_ms; + let cpu_system_time = + cpu_data_end.total_system_time_ms - cpu_data_offset.total_system_time_ms; + let cpu_total_time = cpu_user_time + cpu_system_time; + + let metric = Metric::new( + constants::CPU_USER_TIME_METRIC.into(), + MetricValue::distribution(cpu_user_time), + tags.clone(), + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert cpu_user_time metric: {}", e); + } + + let metric = Metric::new( + constants::CPU_SYSTEM_TIME_METRIC.into(), + MetricValue::distribution(cpu_system_time), + tags.clone(), + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert cpu_system_time metric: {}", e); + } + + let metric = Metric::new( + constants::CPU_TOTAL_TIME_METRIC.into(), + MetricValue::distribution(cpu_total_time), + tags.clone(), + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert cpu_total_time metric: {}", e); + } + } + + pub fn set_cpu_time_enhanced_metrics(&self, cpu_offset: Option) { + if !self.config.enhanced_metrics { + return; + } + + let mut aggr: std::sync::MutexGuard = + self.aggregator.lock().expect("lock poisoned"); + + let cpu_data = proc::get_cpu_data(); + match (cpu_offset, cpu_data) { + (Some(cpu_offset), Ok(cpu_data)) => { + Self::generate_cpu_time_enhanced_metrics( + &cpu_offset, + &cpu_data, + &mut aggr, + self.get_dynamic_value_tags(), + ); + } + (_, _) => { + debug!("Could not find data to generate cpu time enhanced metrics"); + } + } + } + + pub(crate) fn generate_cpu_utilization_enhanced_metrics( + cpu_data_offset: &CPUData, + cpu_data_end: &CPUData, + uptime_data_offset: f64, + uptime_data_end: f64, + aggr: &mut std::sync::MutexGuard, + tags: Option, + ) { + let num_cores = cpu_data_end.individual_cpu_idle_times.len() as f64; + let uptime = uptime_data_end - uptime_data_offset; + let total_idle_time = cpu_data_end.total_idle_time_ms - cpu_data_offset.total_idle_time_ms; + + let mut max_idle_time = 0.0; + let mut min_idle_time = f64::MAX; + + for (cpu_name, cpu_idle_time) in &cpu_data_end.individual_cpu_idle_times { + if let Some(cpu_idle_time_offset) = + cpu_data_offset.individual_cpu_idle_times.get(cpu_name) + { + let idle_time = cpu_idle_time - cpu_idle_time_offset; + if idle_time < min_idle_time { + min_idle_time = idle_time; + } + if idle_time > max_idle_time { + max_idle_time = idle_time; + } + } + } + + // Maximally utilized CPU is the one with the least time spent in the idle process + // Multiply by 100 to report as percentage + let cpu_max_utilization = ((uptime - min_idle_time) / uptime) * 100.0; + + // Minimally utilized CPU is the one with the most time spent in the idle process + // Multiply by 100 to report as percentage + let cpu_min_utilization = ((uptime - max_idle_time) / uptime) * 100.0; + + // CPU total utilization is the proportion of total non-idle time to the total uptime across all cores + let cpu_total_utilization_decimal = + ((uptime * num_cores) - total_idle_time) / (uptime * num_cores); + // Multiply by 100 to report as percentage + let cpu_total_utilization_pct = cpu_total_utilization_decimal * 100.0; + // Multiply by num_cores to report in terms of cores + let cpu_total_utilization = cpu_total_utilization_decimal * num_cores; + + let metric = Metric::new( + constants::CPU_TOTAL_UTILIZATION_PCT_METRIC.into(), + MetricValue::distribution(cpu_total_utilization_pct), + tags.clone(), + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert cpu_total_utilization_pct metric: {}", e); + } + + let metric = Metric::new( + constants::CPU_TOTAL_UTILIZATION_METRIC.into(), + MetricValue::distribution(cpu_total_utilization), + tags.clone(), + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert cpu_total_utilization metric: {}", e); + } + + let metric = Metric::new( + constants::NUM_CORES_METRIC.into(), + MetricValue::distribution(num_cores), + tags.clone(), + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert num_cores metric: {}", e); + } + + let metric = Metric::new( + constants::CPU_MAX_UTILIZATION_METRIC.into(), + MetricValue::distribution(cpu_max_utilization), + tags.clone(), + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert cpu_max_utilization metric: {}", e); + } + + let metric = Metric::new( + constants::CPU_MIN_UTILIZATION_METRIC.into(), + MetricValue::distribution(cpu_min_utilization), + tags.clone(), + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert cpu_min_utilization metric: {}", e); + } + } + + pub fn set_cpu_utilization_enhanced_metrics( + &self, + cpu_offset: Option, + uptime_offset: Option, + ) { + if !self.config.enhanced_metrics { + return; + } + + let mut aggr: std::sync::MutexGuard = + self.aggregator.lock().expect("lock poisoned"); + + let cpu_data = proc::get_cpu_data(); + let uptime_data = proc::get_uptime(); + match (cpu_offset, cpu_data, uptime_offset, uptime_data) { + (Some(cpu_offset), Ok(cpu_data), Some(uptime_offset), Ok(uptime_data)) => { + Self::generate_cpu_utilization_enhanced_metrics( + &cpu_offset, + &cpu_data, + uptime_offset, + uptime_data, + &mut aggr, + self.get_dynamic_value_tags(), + ); + } + (_, _, _, _) => { + debug!("Could not find data to generate cpu utilization enhanced metrics"); + } + } + } + + pub fn generate_tmp_enhanced_metrics( + tmp_max: f64, + tmp_used: f64, + aggr: &mut std::sync::MutexGuard, + tags: Option, + ) { + let metric = Metric::new( + constants::TMP_MAX_METRIC.into(), + MetricValue::distribution(tmp_max), + tags.clone(), + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert tmp_max metric: {}", e); + } + + let metric = Metric::new( + constants::TMP_USED_METRIC.into(), + MetricValue::distribution(tmp_used), + tags.clone(), + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert tmp_used metric: {}", e); + } + + let tmp_free = tmp_max - tmp_used; + let metric = Metric::new( + constants::TMP_FREE_METRIC.into(), + MetricValue::distribution(tmp_free), + tags.clone(), + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert tmp_free metric: {}", e); + } + } + + #[allow(unreachable_code)] + #[allow(unused_variables)] + #[allow(unused_mut)] + pub fn set_tmp_enhanced_metrics(&self, mut send_metrics: Receiver<()>) { + // Temporarily disabled + return; + if !self.config.enhanced_metrics { + return; + } + + let aggr = Arc::clone(&self.aggregator); + let tags = self.get_dynamic_value_tags(); + + tokio::spawn(async move { + // Set tmp_max and initial value for tmp_used + let (bsize, blocks, bavail) = match statfs_info(constants::TMP_PATH) { + Ok(stats) => stats, + Err(err) => { + debug!("Could not emit tmp enhanced metrics. {:?}", err); + return; + } + }; + let tmp_max = bsize * blocks; + let mut tmp_used = bsize * (blocks - bavail); + + let mut interval = interval(Duration::from_millis(10)); + loop { + tokio::select! { + biased; + // When the stop signal is received, generate final metrics + _ = send_metrics.changed() => { + let mut aggr: std::sync::MutexGuard = + aggr.lock().expect("lock poisoned"); + Self::generate_tmp_enhanced_metrics(tmp_max, tmp_used, &mut aggr, tags); + return; + } + // Otherwise keep monitoring tmp usage periodically + _ = interval.tick() => { + let (bsize, blocks, bavail) = match statfs_info(constants::TMP_PATH) { + Ok(stats) => stats, + Err(err) => { + debug!("Could not emit tmp enhanced metrics. {:?}", err); + return; + } + }; + tmp_used = tmp_used.max(bsize * (blocks - bavail)); + } + } + } + }); + } + + pub fn generate_fd_enhanced_metrics( + fd_max: f64, + fd_use: f64, + aggr: &mut std::sync::MutexGuard, + tags: Option, + ) { + let metric = Metric::new( + constants::FD_MAX_METRIC.into(), + MetricValue::distribution(fd_max), + tags.clone(), + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert fd_max metric: {}", e); + } + + // Check if fd_use value is valid before inserting metric + if fd_use > 0.0 { + let metric = Metric::new( + constants::FD_USE_METRIC.into(), + MetricValue::distribution(fd_use), + tags, + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert fd_use metric: {}", e); + } + } + } + + pub fn generate_threads_enhanced_metrics( + threads_max: f64, + threads_use: f64, + aggr: &mut std::sync::MutexGuard, + tags: Option, + ) { + let metric = Metric::new( + constants::THREADS_MAX_METRIC.into(), + MetricValue::distribution(threads_max), + tags.clone(), + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert threads_max metric: {}", e); + } + + // Check if threads_use value is valid before inserting metric + if threads_use > 0.0 { + let metric = Metric::new( + constants::THREADS_USE_METRIC.into(), + MetricValue::distribution(threads_use), + tags, + ); + if let Err(e) = aggr.insert(metric) { + error!("Failed to insert threads_use metric: {}", e); + } + } + } + + #[allow(unreachable_code)] + #[allow(unused_variables)] + #[allow(unused_mut)] + pub fn set_process_enhanced_metrics(&self, mut send_metrics: Receiver<()>) { + // Temporarily disabled + return; + if !self.config.enhanced_metrics { + return; + } + + let aggr = Arc::clone(&self.aggregator); + let tags = self.get_dynamic_value_tags(); + + tokio::spawn(async move { + // get list of all process ids + let pids = proc::get_pid_list(); + + // Set fd_max and initial value for fd_use to -1 + let fd_max = proc::get_fd_max_data(&pids); + let mut fd_use = -1_f64; + + // Set threads_max and initial value for threads_use to -1 + let threads_max = proc::get_threads_max_data(&pids); + let mut threads_use = -1_f64; + + let mut interval = interval(Duration::from_millis(1)); + loop { + tokio::select! { + biased; + // When the stop signal is received, generate final metrics + _ = send_metrics.changed() => { + let mut aggr: std::sync::MutexGuard = + aggr.lock().expect("lock poisoned"); + Self::generate_fd_enhanced_metrics(fd_max, fd_use, &mut aggr, tags.clone()); + Self::generate_threads_enhanced_metrics(threads_max, threads_use, &mut aggr, tags); + return; + } + // Otherwise keep monitoring file descriptor and thread usage periodically + _ = interval.tick() => { + match proc::get_fd_use_data(&pids) { + Ok(fd_use_curr) => { + fd_use = fd_use.max(fd_use_curr); + }, + Err(_) => { + debug!("Could not update file descriptor use enhanced metric."); + } + }; + match proc::get_threads_use_data(&pids) { + Ok(threads_use_curr) => { + threads_use = threads_use.max(threads_use_curr); + }, + Err(_) => { + debug!("Could not update threads use enhanced metric."); + } + }; + } + } + } + }); + } + fn calculate_estimated_cost_usd(billed_duration_ms: u64, memory_size_mb: u64) -> f64 { let gb_seconds = (billed_duration_ms as f64 * constants::MS_TO_SEC) * (memory_size_mb as f64 / constants::MB_TO_GB); @@ -130,7 +634,7 @@ impl Lambda { let metric = metric::Metric::new( constants::DURATION_METRIC.into(), MetricValue::distribution(metrics.duration_ms * constants::MS_TO_SEC), - None, + self.get_dynamic_value_tags(), ); if let Err(e) = aggr.insert(metric) { error!("failed to insert duration metric: {}", e); @@ -138,7 +642,7 @@ impl Lambda { let metric = metric::Metric::new( constants::BILLED_DURATION_METRIC.into(), MetricValue::distribution(metrics.billed_duration_ms as f64 * constants::MS_TO_SEC), - None, + self.get_dynamic_value_tags(), ); if let Err(e) = aggr.insert(metric) { error!("failed to insert billed duration metric: {}", e); @@ -146,7 +650,7 @@ impl Lambda { let metric = metric::Metric::new( constants::MAX_MEMORY_USED_METRIC.into(), MetricValue::distribution(metrics.max_memory_used_mb as f64), - None, + self.get_dynamic_value_tags(), ); if let Err(e) = aggr.insert(metric) { error!("failed to insert max memory used metric: {}", e); @@ -154,7 +658,7 @@ impl Lambda { let metric = metric::Metric::new( constants::MEMORY_SIZE_METRIC.into(), MetricValue::distribution(metrics.memory_size_mb as f64), - None, + self.get_dynamic_value_tags(), ); if let Err(e) = aggr.insert(metric) { error!("failed to insert memory size metric: {}", e); @@ -165,7 +669,7 @@ impl Lambda { let metric = metric::Metric::new( constants::ESTIMATED_COST_METRIC.into(), MetricValue::distribution(cost_usd), - None, + self.get_dynamic_value_tags(), ); if let Err(e) = aggr.insert(metric) { error!("failed to insert estimated cost metric: {}", e); @@ -173,9 +677,28 @@ impl Lambda { } } +#[derive(Clone, Debug)] +pub struct EnhancedMetricData { + pub network_offset: Option, + pub cpu_offset: Option, + pub uptime_offset: Option, + pub tmp_chan_tx: Sender<()>, + pub process_chan_tx: Sender<()>, +} + +impl PartialEq for EnhancedMetricData { + fn eq(&self, other: &Self) -> bool { + self.network_offset == other.network_offset + && self.cpu_offset == other.cpu_offset + && self.uptime_offset == other.uptime_offset + } +} + #[cfg(test)] #[allow(clippy::unwrap_used)] mod tests { + use std::collections::HashMap; + use super::*; use crate::config; use dogstatsd::metric::EMPTY_TAGS; @@ -230,6 +753,7 @@ mod tests { } #[test] + #[allow(clippy::too_many_lines)] fn test_disabled() { let (metrics_aggr, no_config) = setup(); let my_config = Arc::new(config::Config { @@ -285,6 +809,60 @@ mod tests { assert!(aggr .get_entry_by_id(constants::ESTIMATED_COST_METRIC.into(), &None) .is_none()); + assert!(aggr + .get_entry_by_id(constants::RX_BYTES_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::TX_BYTES_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::TOTAL_NETWORK_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::CPU_USER_TIME_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::CPU_SYSTEM_TIME_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::CPU_TOTAL_TIME_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::CPU_TOTAL_UTILIZATION_PCT_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::CPU_TOTAL_UTILIZATION_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::NUM_CORES_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::CPU_MIN_UTILIZATION_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::CPU_MAX_UTILIZATION_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::TMP_MAX_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::TMP_USED_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::TMP_FREE_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::FD_MAX_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::FD_USE_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::THREADS_MAX_METRIC.into(), &None) + .is_none()); + assert!(aggr + .get_entry_by_id(constants::THREADS_USE_METRIC.into(), &None) + .is_none()); } #[test] @@ -307,4 +885,219 @@ mod tests { assert_sketch(&metrics_aggr, constants::MAX_MEMORY_USED_METRIC, 128.0); assert_sketch(&metrics_aggr, constants::MEMORY_SIZE_METRIC, 256.0); } + + #[test] + fn test_set_network_enhanced_metrics() { + let (metrics_aggr, my_config) = setup(); + let lambda = Lambda::new(metrics_aggr.clone(), my_config); + + let network_offset = NetworkData { + rx_bytes: 180.0, + tx_bytes: 254.0, + }; + let network_data = NetworkData { + rx_bytes: 20180.0, + tx_bytes: 75000.0, + }; + + Lambda::generate_network_enhanced_metrics( + network_offset, + network_data, + &mut lambda.aggregator.lock().expect("lock poisoned"), + None, + ); + + assert_sketch(&metrics_aggr, constants::RX_BYTES_METRIC, 20000.0); + assert_sketch(&metrics_aggr, constants::TX_BYTES_METRIC, 74746.0); + assert_sketch(&metrics_aggr, constants::TOTAL_NETWORK_METRIC, 94746.0); + } + + #[test] + fn test_set_cpu_time_enhanced_metrics() { + let (metrics_aggr, my_config) = setup(); + let lambda = Lambda::new(metrics_aggr.clone(), my_config); + + let mut individual_cpu_idle_time_offsets = HashMap::new(); + individual_cpu_idle_time_offsets.insert("cpu0".to_string(), 10.0); + individual_cpu_idle_time_offsets.insert("cpu1".to_string(), 20.0); + let cpu_offset = CPUData { + total_user_time_ms: 100.0, + total_system_time_ms: 3.0, + total_idle_time_ms: 20.0, + individual_cpu_idle_times: individual_cpu_idle_time_offsets, + }; + + let mut individual_cpu_idle_times_end = HashMap::new(); + individual_cpu_idle_times_end.insert("cpu0".to_string(), 30.0); + individual_cpu_idle_times_end.insert("cpu1".to_string(), 80.0); + let cpu_data = CPUData { + total_user_time_ms: 200.0, + total_system_time_ms: 56.0, + total_idle_time_ms: 100.0, + individual_cpu_idle_times: individual_cpu_idle_times_end, + }; + + Lambda::generate_cpu_time_enhanced_metrics( + &cpu_offset, + &cpu_data, + &mut lambda.aggregator.lock().expect("lock poisoned"), + None, + ); + + assert_sketch(&metrics_aggr, constants::CPU_USER_TIME_METRIC, 100.0); + assert_sketch(&metrics_aggr, constants::CPU_SYSTEM_TIME_METRIC, 53.0); + assert_sketch(&metrics_aggr, constants::CPU_TOTAL_TIME_METRIC, 153.0); + } + + #[test] + fn test_set_cpu_utilization_enhanced_metrics() { + let (metrics_aggr, my_config) = setup(); + let lambda = Lambda::new(metrics_aggr.clone(), my_config); + + let mut individual_cpu_idle_time_offsets = HashMap::new(); + individual_cpu_idle_time_offsets.insert("cpu0".to_string(), 10.0); + individual_cpu_idle_time_offsets.insert("cpu1".to_string(), 30.0); + let cpu_offset = CPUData { + total_user_time_ms: 50.0, + total_system_time_ms: 10.0, + total_idle_time_ms: 10.0, + individual_cpu_idle_times: individual_cpu_idle_time_offsets, + }; + let uptime_offset = 1_891_100.0; + + let mut individual_cpu_idle_times_end = HashMap::new(); + individual_cpu_idle_times_end.insert("cpu0".to_string(), 570.0); + individual_cpu_idle_times_end.insert("cpu1".to_string(), 600.0); + let cpu_data = CPUData { + total_user_time_ms: 200.0, + total_system_time_ms: 170.0, + total_idle_time_ms: 1130.0, + individual_cpu_idle_times: individual_cpu_idle_times_end, + }; + let uptime_data = 1_891_900.0; + + Lambda::generate_cpu_utilization_enhanced_metrics( + &cpu_offset, + &cpu_data, + uptime_offset, + uptime_data, + &mut lambda.aggregator.lock().expect("lock poisoned"), + None, + ); + + // the differences above and metric values below are from an invocation using the go agent to verify the calculations + assert_sketch( + &metrics_aggr, + constants::CPU_TOTAL_UTILIZATION_PCT_METRIC, + 30.0, + ); + assert_sketch(&metrics_aggr, constants::CPU_TOTAL_UTILIZATION_METRIC, 0.6); + assert_sketch(&metrics_aggr, constants::NUM_CORES_METRIC, 2.0); + assert_sketch(&metrics_aggr, constants::CPU_MAX_UTILIZATION_METRIC, 30.0); + assert_sketch(&metrics_aggr, constants::CPU_MIN_UTILIZATION_METRIC, 28.75); + } + + #[test] + fn test_set_tmp_enhanced_metrics() { + let (metrics_aggr, my_config) = setup(); + let lambda = Lambda::new(metrics_aggr.clone(), my_config); + + let tmp_max = 550_461_440.0; + let tmp_used = 12_165_120.0; + + Lambda::generate_tmp_enhanced_metrics( + tmp_max, + tmp_used, + &mut lambda.aggregator.lock().expect("lock poisoned"), + None, + ); + + assert_sketch(&metrics_aggr, constants::TMP_MAX_METRIC, 550_461_440.0); + assert_sketch(&metrics_aggr, constants::TMP_USED_METRIC, 12_165_120.0); + assert_sketch(&metrics_aggr, constants::TMP_FREE_METRIC, 538_296_320.0); + } + + #[test] + fn test_set_fd_enhanced_metrics_valid_fd_use() { + let (metrics_aggr, my_config) = setup(); + let lambda = Lambda::new(metrics_aggr.clone(), my_config); + + let fd_max = 1024.0; + let fd_use = 175.0; + + Lambda::generate_fd_enhanced_metrics( + fd_max, + fd_use, + &mut lambda.aggregator.lock().expect("lock poisoned"), + None, + ); + + assert_sketch(&metrics_aggr, constants::FD_MAX_METRIC, 1024.0); + assert_sketch(&metrics_aggr, constants::FD_USE_METRIC, 175.0); + } + + #[test] + fn test_set_fd_enhanced_metrics_invalid_fd_use() { + let (metrics_aggr, my_config) = setup(); + let lambda = Lambda::new(metrics_aggr.clone(), my_config); + + let fd_max = 1024.0; + let fd_use = -1.0; + + Lambda::generate_fd_enhanced_metrics( + fd_max, + fd_use, + &mut lambda.aggregator.lock().expect("lock poisoned"), + None, + ); + + assert_sketch(&metrics_aggr, constants::FD_MAX_METRIC, 1024.0); + + let aggr = lambda.aggregator.lock().expect("lock poisoned"); + assert!(aggr + .get_entry_by_id(constants::FD_USE_METRIC.into(), &None) + .is_none()); + } + + #[test] + fn test_set_threads_enhanced_metrics_valid_threads_use() { + let (metrics_aggr, my_config) = setup(); + let lambda = Lambda::new(metrics_aggr.clone(), my_config); + + let threads_max = 1024.0; + let threads_use = 40.0; + + Lambda::generate_threads_enhanced_metrics( + threads_max, + threads_use, + &mut lambda.aggregator.lock().expect("lock poisoned"), + None, + ); + + assert_sketch(&metrics_aggr, constants::THREADS_MAX_METRIC, 1024.0); + assert_sketch(&metrics_aggr, constants::THREADS_USE_METRIC, 40.0); + } + + #[test] + fn test_set_threads_enhanced_metrics_invalid_threads_use() { + let (metrics_aggr, my_config) = setup(); + let lambda = Lambda::new(metrics_aggr.clone(), my_config); + + let threads_max = 1024.0; + let threads_use = -1.0; + + Lambda::generate_threads_enhanced_metrics( + threads_max, + threads_use, + &mut lambda.aggregator.lock().expect("lock poisoned"), + None, + ); + + assert_sketch(&metrics_aggr, constants::THREADS_MAX_METRIC, 1024.0); + + let aggr = lambda.aggregator.lock().expect("lock poisoned"); + assert!(aggr + .get_entry_by_id(constants::THREADS_USE_METRIC.into(), &None) + .is_none()); + } } diff --git a/bottlecap/src/metrics/enhanced/mod.rs b/bottlecap/src/metrics/enhanced/mod.rs index a2638024e..bca7c1bf0 100644 --- a/bottlecap/src/metrics/enhanced/mod.rs +++ b/bottlecap/src/metrics/enhanced/mod.rs @@ -1,2 +1,3 @@ pub mod constants; pub mod lambda; +pub mod statfs; diff --git a/bottlecap/src/metrics/enhanced/statfs.rs b/bottlecap/src/metrics/enhanced/statfs.rs new file mode 100644 index 000000000..84e7412f1 --- /dev/null +++ b/bottlecap/src/metrics/enhanced/statfs.rs @@ -0,0 +1,26 @@ +#![allow(clippy::module_name_repetitions)] + +use nix::sys::statfs::statfs; +use std::io; +use std::path::Path; + +#[cfg(not(target_os = "windows"))] +#[allow(clippy::cast_lossless)] +/// Returns the block size, total number of blocks, and number of blocks available for the specified directory path. +/// +pub fn statfs_info(path: &str) -> Result<(f64, f64, f64), io::Error> { + let stat = statfs(Path::new(path)).map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; + Ok(( + stat.block_size() as f64, + stat.blocks() as f64, + stat.blocks_available() as f64, + )) +} + +#[cfg(target_os = "windows")] +fn statfs_info(path: &str) -> Result<(f64, f64, f64), io::Error> { + Err(io::Error::new( + io::ErrorKind::Other, + "Cannot get tmp data on Windows", + )) +} diff --git a/bottlecap/src/proc/clock.rs b/bottlecap/src/proc/clock.rs new file mode 100644 index 000000000..8c7c9b328 --- /dev/null +++ b/bottlecap/src/proc/clock.rs @@ -0,0 +1,20 @@ +use nix::unistd::{sysconf, SysconfVar}; +use std::io; + +#[allow(clippy::cast_sign_loss)] +#[cfg(not(target_os = "windows"))] +pub fn get_clk_tck() -> Result { + match sysconf(SysconfVar::CLK_TCK) { + Ok(Some(clk_tck)) if clk_tck > 0 => Ok(clk_tck as u64), + _ => Err(io::Error::new( + io::ErrorKind::NotFound, + "Could not find system clock ticks per second", + )), + } +} + +#[cfg(target_os = "windows")] +pub fn get_clk_tck() -> Result { + // Windows does not have this concept + Ok(1) +} diff --git a/bottlecap/src/proc/constants.rs b/bottlecap/src/proc/constants.rs new file mode 100644 index 000000000..452cdf4fb --- /dev/null +++ b/bottlecap/src/proc/constants.rs @@ -0,0 +1,8 @@ +pub const PROC_NET_DEV_PATH: &str = "/proc/net/dev"; +pub const PROC_STAT_PATH: &str = "/proc/stat"; +pub const PROC_UPTIME_PATH: &str = "/proc/uptime"; +pub const PROC_PATH: &str = "/proc"; + +pub const LAMDBA_NETWORK_INTERFACE: &str = "vinternal_1"; +pub const LAMBDA_FILE_DESCRIPTORS_DEFAULT_LIMIT: f64 = 1024.0; +pub const LAMBDA_EXECUTION_PROCESSES_DEFAULT_LIMIT: f64 = 1024.0; diff --git a/bottlecap/src/proc/mod.rs b/bottlecap/src/proc/mod.rs new file mode 100644 index 000000000..f6fa819c2 --- /dev/null +++ b/bottlecap/src/proc/mod.rs @@ -0,0 +1,523 @@ +pub mod clock; +pub mod constants; + +use std::{ + collections::HashMap, + fs::{self, File}, + io::{self, BufRead}, +}; + +use constants::{ + LAMDBA_NETWORK_INTERFACE, PROC_NET_DEV_PATH, PROC_PATH, PROC_STAT_PATH, PROC_UPTIME_PATH, +}; +use regex::Regex; +use tracing::debug; + +#[must_use] +pub fn get_pid_list() -> Vec { + get_pid_list_from_path(PROC_PATH) +} + +pub fn get_pid_list_from_path(path: &str) -> Vec { + let mut pids = Vec::::new(); + + let Ok(entries) = fs::read_dir(path) else { + debug!("Could not list /proc files"); + return pids; + }; + + pids.extend(entries.filter_map(|entry| { + entry.ok().and_then(|dir_entry| { + // Check if the entry is a directory + if dir_entry.file_type().ok()?.is_dir() { + // If the directory name can be parsed as an integer, it will be added to the list + dir_entry.file_name().to_str()?.parse::().ok() + } else { + None + } + }) + })); + + pids +} + +#[derive(Copy, Clone, Debug, PartialEq)] +pub struct NetworkData { + pub rx_bytes: f64, + pub tx_bytes: f64, +} + +pub fn get_network_data() -> Result { + get_network_data_from_path(PROC_NET_DEV_PATH) +} + +fn get_network_data_from_path(path: &str) -> Result { + let file = File::open(path)?; + let reader = io::BufReader::new(file); + + for line in reader.lines() { + let line = line?; + let mut values = line.split_whitespace(); + + if values.next().map_or(false, |interface_name| { + interface_name.starts_with(LAMDBA_NETWORK_INTERFACE) + }) { + // Read the value for received bytes if present + let rx_bytes: Option = values.next().and_then(|s| s.parse().ok()); + + // Skip over the next 7 values representing metrics for received data and + // read the value for bytes transmitted if present + let tx_bytes: Option = values.nth(7).and_then(|s| s.parse().ok()); + + match (rx_bytes, tx_bytes) { + (Some(rx_val), Some(tx_val)) => { + return Ok(NetworkData { + rx_bytes: rx_val, + tx_bytes: tx_val, + }) + } + (_, _) => { + return Err(io::Error::new( + io::ErrorKind::NotFound, + "Network data not found", + )) + } + } + } + } + + Err(io::Error::new( + io::ErrorKind::NotFound, + "Network data not found", + )) +} + +#[derive(Clone, Debug, PartialEq)] +pub struct CPUData { + pub total_user_time_ms: f64, + pub total_system_time_ms: f64, + pub total_idle_time_ms: f64, + pub individual_cpu_idle_times: HashMap, +} + +pub fn get_cpu_data() -> Result { + get_cpu_data_from_path(PROC_STAT_PATH) +} + +fn get_cpu_data_from_path(path: &str) -> Result { + let file = File::open(path)?; + let reader = io::BufReader::new(file); + + let mut cpu_data = CPUData { + total_user_time_ms: 0.0, + total_system_time_ms: 0.0, + total_idle_time_ms: 0.0, + individual_cpu_idle_times: HashMap::new(), + }; + + // SC_CLK_TCK is the system clock frequency in ticks per second + // We'll use this to convert CPU times from user HZ to milliseconds + let clktck = clock::get_clk_tck()? as f64; + + for line in reader.lines() { + let line = line?; + let mut values = line.split_whitespace(); + + if let Some(label) = values.next() { + if label == "cpu" { + // Parse CPU times for total user, system, and idle + let user: Option = values.next().and_then(|s| s.parse().ok()); + values.next(); // skip "nice" + let system: Option = values.next().and_then(|s| s.parse().ok()); + let idle: Option = values.next().and_then(|s| s.parse().ok()); + + match (user, system, idle) { + (Some(user_val), Some(system_val), Some(idle_val)) => { + // Divide values by clock tick to covert to seconds, then multiply by 1000 to convert to ms + cpu_data.total_user_time_ms = (user_val / clktck) * 1000.0; + cpu_data.total_system_time_ms = (system_val / clktck) * 1000.0; + cpu_data.total_idle_time_ms = (idle_val / clktck) * 1000.0; + } + (_, _, _) => { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "Failed to parse CPU data", + )) + } + } + } else if label.starts_with("cpu") { + // Parse per core (i.e. "cpu0", "cpu1", etc.) idle times + // Skip the first three values (user, nice, system) and get the 4th value (idle) + let idle: Option = values.nth(3).and_then(|s| s.parse().ok()); + + match idle { + Some(idle_val) => { + // Divide value by clock tick to covert to seconds, then multiply by 1000 to convert to ms + cpu_data + .individual_cpu_idle_times + .insert(label.to_string(), (idle_val / clktck) * 1000.0); + } + None => { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "Failed to parse per-core CPU data", + )) + } + } + } + } + } + + if cpu_data.individual_cpu_idle_times.is_empty() { + return Err(io::Error::new( + io::ErrorKind::NotFound, + "Per-core CPU data not found", + )); + } + + Ok(cpu_data) +} + +pub fn get_uptime() -> Result { + get_uptime_from_path(PROC_UPTIME_PATH) +} + +fn get_uptime_from_path(path: &str) -> Result { + let file = File::open(path)?; + let reader = io::BufReader::new(file); + + if let Some(line) = reader.lines().next() { + let line = line?; + let mut values = line.split_whitespace(); + + let uptime: Option = values.next().and_then(|s| s.parse().ok()); + let idle: Option = values.next().and_then(|s| s.parse().ok()); + + match (uptime, idle) { + // Check that the file is correctly formatted (i.e. has both values) + // Multiply val by 1000 to convert seconds to milliseconds + (Some(uptime_val), Some(_idle_val)) => return Ok(uptime_val * 1000.0), + (_, _) => { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "Failed to parse uptime data", + )); + } + } + } + + Err(io::Error::new( + io::ErrorKind::NotFound, + "Uptime data not found", + )) +} + +#[must_use] +pub fn get_fd_max_data(pids: &[i64]) -> f64 { + get_fd_max_data_from_path(PROC_PATH, pids) +} + +fn get_fd_max_data_from_path(path: &str, pids: &[i64]) -> f64 { + let mut fd_max = constants::LAMBDA_FILE_DESCRIPTORS_DEFAULT_LIMIT; + // regex to capture the soft limit value (first numeric value after the title) + let re = Regex::new(r"^Max open files\s+(\d+)").expect("Failed to create regex"); + + for &pid in pids { + let limits_path = format!("{path}/{pid}/limits"); + let Ok(file) = File::open(&limits_path) else { + continue; + }; + + let reader = io::BufReader::new(file); + for line in reader.lines().map_while(Result::ok) { + if let Some(line_items) = re.captures(&line) { + if let Ok(fd_max_pid) = line_items[1].parse() { + fd_max = fd_max.min(fd_max_pid); + } else { + debug!("File descriptor max data not found in file {}", limits_path); + } + break; + } + } + } + + fd_max +} + +pub fn get_fd_use_data(pids: &[i64]) -> Result { + get_fd_use_data_from_path(PROC_PATH, pids) +} + +fn get_fd_use_data_from_path(path: &str, pids: &[i64]) -> Result { + let mut fd_use = 0; + + for &pid in pids { + let fd_path = format!("{path}/{pid}/fd"); + let Ok(files) = fs::read_dir(fd_path) else { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "File descriptor use data not found", + )); + }; + let count = files.count(); + fd_use += count; + } + + Ok(fd_use as f64) +} + +#[must_use] +pub fn get_threads_max_data(pids: &[i64]) -> f64 { + get_threads_max_data_from_path(PROC_PATH, pids) +} + +fn get_threads_max_data_from_path(path: &str, pids: &[i64]) -> f64 { + let mut threads_max = constants::LAMBDA_EXECUTION_PROCESSES_DEFAULT_LIMIT; + // regex to capture the soft limit value (first numeric value after the title) + let re = Regex::new(r"^Max processes\s+(\d+)").expect("Failed to create regex"); + + for &pid in pids { + let limits_path = format!("{path}/{pid}/limits"); + let Ok(file) = File::open(&limits_path) else { + continue; + }; + + let reader = io::BufReader::new(file); + for line in reader.lines().map_while(Result::ok) { + if let Some(line_items) = re.captures(&line) { + if let Ok(threads_max_pid) = line_items[1].parse() { + threads_max = threads_max.min(threads_max_pid); + } else { + debug!("Threads max data not found in file {}", limits_path); + } + break; + } + } + } + + threads_max +} + +pub fn get_threads_use_data(pids: &[i64]) -> Result { + get_threads_use_data_from_path(PROC_PATH, pids) +} + +fn get_threads_use_data_from_path(path: &str, pids: &[i64]) -> Result { + let mut threads_use = 0; + + for &pid in pids { + let task_path = format!("{path}/{pid}/task"); + let Ok(files) = fs::read_dir(task_path) else { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "Threads use data not found", + )); + }; + + threads_use += files + .flatten() + .filter_map(|dir_entry| dir_entry.file_type().ok()) + .filter(fs::FileType::is_dir) + .count(); + } + + Ok(threads_use as f64) +} + +#[cfg(test)] +#[allow(clippy::unwrap_used)] +mod tests { + use super::*; + use std::path::PathBuf; + + fn path_from_root(file: &str) -> String { + let mut safe_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + safe_path.push(file); + safe_path.to_str().unwrap().to_string() + } + + #[test] + fn test_get_pid_list() { + let path = "./tests/proc"; + let mut pids = get_pid_list_from_path(path_from_root(path).as_str()); + pids.sort_unstable(); + assert_eq!(pids.len(), 2); + assert_eq!(pids[0], 13); + assert_eq!(pids[1], 142); + + let path = "./tests/incorrect_folder"; + let pids = get_pid_list_from_path(path); + assert_eq!(pids.len(), 0); + } + + #[test] + fn test_get_network_data() { + let path = "./tests/proc/net/valid_dev"; + let network_data_result = get_network_data_from_path(path_from_root(path).as_str()); + assert!(network_data_result.is_ok()); + let network_data = network_data_result.unwrap(); + assert!((network_data.rx_bytes - 180.0).abs() < f64::EPSILON); + assert!((network_data.tx_bytes - 254.0).abs() < f64::EPSILON); + + let path = "./tests/proc/net/invalid_dev_malformed"; + let network_data_result = get_network_data_from_path(path); + assert!(network_data_result.is_err()); + + let path = "./tests/proc/net/invalid_dev_non_numerical_value"; + let network_data_result = get_network_data_from_path(path); + assert!(network_data_result.is_err()); + + let path = "./tests/proc/net/missing_interface_dev"; + let network_data_result = get_network_data_from_path(path); + assert!(network_data_result.is_err()); + + let path = "./tests/proc/net/nonexistent_dev"; + let network_data_result = get_network_data_from_path(path); + assert!(network_data_result.is_err()); + } + + #[test] + fn test_get_cpu_data() { + let path = "./tests/proc/stat/valid_stat"; + let cpu_data_result = get_cpu_data_from_path(path_from_root(path).as_str()); + assert!(cpu_data_result.is_ok()); + let cpu_data = cpu_data_result.unwrap(); + assert!((cpu_data.total_user_time_ms - 23370.0).abs() < f64::EPSILON); + assert!((cpu_data.total_system_time_ms - 1880.0).abs() < f64::EPSILON); + assert!((cpu_data.total_idle_time_ms - 178_380.0).abs() < f64::EPSILON); + assert_eq!(cpu_data.individual_cpu_idle_times.len(), 2); + assert!( + (*cpu_data + .individual_cpu_idle_times + .get("cpu0") + .expect("cpu0 not found") + - 91880.0) + .abs() + < f64::EPSILON + ); + assert!( + (*cpu_data + .individual_cpu_idle_times + .get("cpu1") + .expect("cpu1 not found") + - 86490.0) + .abs() + < f64::EPSILON + ); + + let path = "./tests/proc/stat/invalid_stat_non_numerical_value_1"; + let cpu_data_result = get_cpu_data_from_path(path); + assert!(cpu_data_result.is_err()); + + let path = "./tests/proc/stat/invalid_stat_non_numerical_value_2"; + let cpu_data_result = get_cpu_data_from_path(path); + assert!(cpu_data_result.is_err()); + + let path = "./tests/proc/stat/invalid_stat_malformed_first_line"; + let cpu_data_result = get_cpu_data_from_path(path); + assert!(cpu_data_result.is_err()); + + let path = "./tests/proc/stat/invalid_stat_malformed_per_cpu_line"; + let cpu_data_result = get_cpu_data_from_path(path); + assert!(cpu_data_result.is_err()); + + let path = "./tests/proc/stat/invalid_stat_missing_cpun_data"; + let cpu_data_result = get_cpu_data_from_path(path); + assert!(cpu_data_result.is_err()); + + let path = "./tests/proc/stat/nonexistent_stat"; + let cpu_data_result = get_cpu_data_from_path(path); + assert!(cpu_data_result.is_err()); + } + + #[test] + fn test_get_uptime_data() { + let path = "./tests/proc/uptime/valid_uptime"; + let uptime_data_result = get_uptime_from_path(path_from_root(path).as_str()); + assert!(uptime_data_result.is_ok()); + let uptime_data = uptime_data_result.unwrap(); + assert!((uptime_data - 3_213_103_123_000.0).abs() < f64::EPSILON); + + let path = "./tests/proc/uptime/invalid_data_uptime"; + let uptime_data_result = get_uptime_from_path(path); + assert!(uptime_data_result.is_err()); + + let path = "./tests/proc/uptime/malformed_uptime"; + let uptime_data_result = get_uptime_from_path(path); + assert!(uptime_data_result.is_err()); + + let path = "./tests/proc/uptime/nonexistent_uptime"; + let uptime_data_result = get_uptime_from_path(path); + assert!(uptime_data_result.is_err()); + } + + #[test] + fn test_get_fd_max_data() { + let path = "./tests/proc/process/valid"; + let pids = get_pid_list_from_path(path_from_root(path).as_str()); + let fd_max = get_fd_max_data_from_path(path, &pids); + assert!((fd_max - 900.0).abs() < f64::EPSILON); + + let path = "./tests/proc/process/invalid_malformed"; + let fd_max = get_fd_max_data_from_path(path, &pids); + // assert that fd_max is equal to AWS Lambda limit + assert!((fd_max - constants::LAMBDA_FILE_DESCRIPTORS_DEFAULT_LIMIT).abs() < f64::EPSILON); + + let path = "./tests/proc/process/invalid_missing"; + let fd_max = get_fd_max_data_from_path(path, &pids); + // assert that fd_max is equal to AWS Lambda limit + assert!((fd_max - constants::LAMBDA_FILE_DESCRIPTORS_DEFAULT_LIMIT).abs() < f64::EPSILON); + } + + #[test] + fn test_get_fd_use_data() { + let path = "./tests/proc/process/valid"; + let pids = get_pid_list_from_path(path_from_root(path).as_str()); + let fd_use_result = get_fd_use_data_from_path(path, &pids); + assert!(fd_use_result.is_ok()); + let fd_use = fd_use_result.unwrap(); + assert!((fd_use - 5.0).abs() < f64::EPSILON); + + let path = "./tests/proc/process/invalid_missing"; + let fd_use_result = get_fd_use_data_from_path(path, &pids); + assert!(fd_use_result.is_err()); + } + + #[test] + fn test_get_threads_max_data() { + let path = "./tests/proc/process/valid"; + let pids = get_pid_list_from_path(path_from_root(path).as_str()); + let threads_max = get_threads_max_data_from_path(path, &pids); + assert!((threads_max - 1024.0).abs() < f64::EPSILON); + + let path = "./tests/proc/process/invalid_malformed"; + let threads_max = get_threads_max_data_from_path(path, &pids); + // assert that threads_max is equal to AWS Lambda limit + assert!( + (threads_max - constants::LAMBDA_EXECUTION_PROCESSES_DEFAULT_LIMIT).abs() + < f64::EPSILON + ); + + let path = "./tests/proc/process/invalid_missing"; + let threads_max = get_threads_max_data_from_path(path, &pids); + // assert that threads_max is equal to AWS Lambda limit + assert!( + (threads_max - constants::LAMBDA_EXECUTION_PROCESSES_DEFAULT_LIMIT).abs() + < f64::EPSILON + ); + } + + #[test] + fn test_get_threads_use_data() { + let path = "./tests/proc/process/valid"; + let pids = get_pid_list_from_path(path_from_root(path).as_str()); + let threads_use_result = get_threads_use_data_from_path(path, &pids); + assert!(threads_use_result.is_ok()); + let threads_use = threads_use_result.unwrap(); + assert!((threads_use - 5.0).abs() < f64::EPSILON); + + let path = "./tests/proc/process/invalid_missing"; + let threads_use_result = get_threads_use_data_from_path(path, &pids); + assert!(threads_use_result.is_err()); + } +} diff --git a/bottlecap/src/secrets/decrypt.rs b/bottlecap/src/secrets/decrypt.rs index a29e6a39d..615e35284 100644 --- a/bottlecap/src/secrets/decrypt.rs +++ b/bottlecap/src/secrets/decrypt.rs @@ -241,6 +241,7 @@ mod tests { aws_secret_access_key: "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY".to_string(), aws_session_token: "AQoDYXdzEJr...".to_string(), function_name: "arn:some-function".to_string(), + sandbox_init_time: Instant::now(), }, RequestArgs { service: "secretsmanager".to_string(), diff --git a/bottlecap/src/tags/lambda/tags.rs b/bottlecap/src/tags/lambda/tags.rs index caf0e3f89..ad1fb0c80 100644 --- a/bottlecap/src/tags/lambda/tags.rs +++ b/bottlecap/src/tags/lambda/tags.rs @@ -10,8 +10,10 @@ use tracing::debug; const QUALIFIER_ENV_VAR: &str = "AWS_LAMBDA_FUNCTION_VERSION"; const RUNTIME_VAR: &str = "AWS_EXECUTION_ENV"; const MEMORY_SIZE_VAR: &str = "AWS_LAMBDA_FUNCTION_MEMORY_SIZE"; -const INIT_TYPE: &str = "AWS_LAMBDA_INITIALIZATION_TYPE"; +pub const INIT_TYPE: &str = "AWS_LAMBDA_INITIALIZATION_TYPE"; const INIT_TYPE_KEY: &str = "init_type"; +// Value for INIT_TYPE when the function is using SnapStart +pub const SNAP_START_VALUE: &str = "snap-start"; // FunctionARNKey is the tag key for a function's arn pub const FUNCTION_ARN_KEY: &str = "function_arn"; @@ -239,6 +241,11 @@ impl Lambda { self.tags_map.get(FUNCTION_ARN_KEY) } + #[must_use] + pub fn get_function_name(&self) -> Option<&String> { + self.tags_map.get(FUNCTION_NAME_KEY) + } + #[must_use] pub fn get_tags_map(&self) -> &hash_map::HashMap { &self.tags_map diff --git a/bottlecap/src/tags/provider.rs b/bottlecap/src/tags/provider.rs index a3a6881df..b6e775ac2 100644 --- a/bottlecap/src/tags/provider.rs +++ b/bottlecap/src/tags/provider.rs @@ -47,6 +47,11 @@ impl Provider { self.tag_provider.get_canonical_id() } + #[must_use] + pub fn get_canonical_resource_name(&self) -> Option { + self.tag_provider.get_canonical_resource_name() + } + #[must_use] pub fn get_tags_map(&self) -> &hash_map::HashMap { self.tag_provider.get_tags_map() @@ -56,6 +61,7 @@ impl Provider { trait GetTags { fn get_tags_vec(&self) -> Vec; fn get_canonical_id(&self) -> Option; + fn get_canonical_resource_name(&self) -> Option; fn get_tags_map(&self) -> &hash_map::HashMap; } @@ -72,6 +78,12 @@ impl GetTags for TagProvider { } } + fn get_canonical_resource_name(&self) -> Option { + match self { + TagProvider::Lambda(lambda_tags) => lambda_tags.get_function_name().cloned(), + } + } + fn get_tags_map(&self) -> &hash_map::HashMap { match self { TagProvider::Lambda(lambda_tags) => lambda_tags.get_tags_map(), diff --git a/bottlecap/src/traces/context.rs b/bottlecap/src/traces/context.rs new file mode 100644 index 000000000..600ae1c39 --- /dev/null +++ b/bottlecap/src/traces/context.rs @@ -0,0 +1,20 @@ +use std::collections::HashMap; + +use datadog_trace_protobuf::pb::SpanLink; + +#[derive(Copy, Clone, Default, Debug, PartialEq)] +pub struct Sampling { + pub priority: Option, + pub mechanism: Option, +} + +#[derive(Clone, Default, Debug, PartialEq)] +#[allow(clippy::module_name_repetitions)] +pub struct SpanContext { + pub trace_id: u64, + pub span_id: u64, + pub sampling: Option, + pub origin: Option, + pub tags: HashMap, + pub links: Vec, +} diff --git a/bottlecap/src/traces/hello_agent.rs b/bottlecap/src/traces/hello_agent.rs deleted file mode 100644 index c3584ccf3..000000000 --- a/bottlecap/src/traces/hello_agent.rs +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ -// SPDX-License-Identifier: Apache-2.0 - -// TODO(Astuyve): Deprecate. -// older clients require the 127.0.0.1:8126/lambda/hello route -// to identify the presence of the extension. - -use hyper::service::{make_service_fn, service_fn}; -use hyper::{http, Body, Method, Request, Response, Server, StatusCode}; -use serde_json::json; -use std::convert::Infallible; -use std::net::SocketAddr; -use tracing::{error, warn}; - -const HELLO_PATH: &str = "/lambda/hello"; -const AGENT_PORT: usize = 8124; - -pub async fn start_handler() -> Result<(), Box> { - let make_svc = make_service_fn(move |_| { - let service = service_fn(hello_handler); - - async move { Ok::<_, Infallible>(service) } - }); - - let port = u16::try_from(AGENT_PORT).expect("AGENT_PORT is too large"); - let addr = SocketAddr::from(([127, 0, 0, 1], port)); - let server_builder = Server::try_bind(&addr)?; - - let server = server_builder.serve(make_svc); - - // start hyper http server - if let Err(e) = server.await { - error!("Server error: {e}"); - return Err(e.into()); - } - - Ok(()) -} - -async fn hello_handler(req: Request) -> http::Result> { - if let (&Method::GET, HELLO_PATH) = (req.method(), req.uri().path()) { - warn!("[DEPRECATED] Please upgrade your tracing library, the /hello route is deprecated"); - Response::builder() - .status(200) - .body(Body::from(json!({}).to_string())) - } else { - let mut not_found = Response::default(); - *not_found.status_mut() = StatusCode::NOT_FOUND; - Ok(not_found) - } -} diff --git a/bottlecap/src/traces/mod.rs b/bottlecap/src/traces/mod.rs index b70d26a83..9c87051cf 100644 --- a/bottlecap/src/traces/mod.rs +++ b/bottlecap/src/traces/mod.rs @@ -1,9 +1,31 @@ // Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -pub mod hello_agent; +pub mod context; +pub mod propagation; pub mod stats_flusher; pub mod stats_processor; pub mod trace_agent; pub mod trace_flusher; pub mod trace_processor; + +// URL for a call to the Lambda runtime API. The value may be replaced if `AWS_LAMBDA_RUNTIME_API` is set. +const LAMBDA_RUNTIME_URL_PREFIX: &str = "http://127.0.0.1:9001"; + +// URL for a call from the Datadog Lambda Library to the Lambda Extension +const LAMBDA_EXTENSION_URL_PREFIX: &str = "http://127.0.0.1:8124"; + +// the first part of a URL for a call from Statsd +const LAMBDA_STATSD_URL_PREFIX: &str = "http://127.0.0.1:8125"; + +// the first part of a URL from the non-routable address for DNS traces +const DNS_NON_ROUTABLE_ADDRESS_URL_PREFIX: &str = "0.0.0.0"; + +// the first part of a URL from the localhost address for DNS traces +const DNS_LOCAL_HOST_ADDRESS_URL_PREFIX: &str = "127.0.0.1"; + +// URL from the `_AWS_XRAY_DAEMON_ADDRESS` for DNS traces +const AWS_XRAY_DAEMON_ADDRESS_URL_PREFIX: &str = "169.254.79.129"; + +// Name of the placeholder invocation span set by Java and Go tracers +const INVOCATION_SPAN_RESOURCE: &str = "dd-tracer-serverless-span"; diff --git a/bottlecap/src/traces/propagation/carrier.rs b/bottlecap/src/traces/propagation/carrier.rs new file mode 100644 index 000000000..d0f2182fa --- /dev/null +++ b/bottlecap/src/traces/propagation/carrier.rs @@ -0,0 +1,123 @@ +/// Code inspired, and copied, by OpenTelemetry Rust project. +/// +/// +use std::collections::HashMap; + +use serde_json::Value; + +/// Injector provides an interface for a carrier to be used +/// with a Propagator to inject a Context into the carrier. +/// +pub trait Injector { + /// Set a value in the carrier. + fn set(&mut self, key: &str, value: String); +} + +pub trait Extractor { + /// Get a value from the carrier. + fn get(&self, key: &str) -> Option<&str>; + + /// Get all keys from the carrier. + fn keys(&self) -> Vec<&str>; +} + +impl Injector for HashMap { + /// Set a key and value in the `HashMap`. + fn set(&mut self, key: &str, value: String) { + self.insert(key.to_lowercase(), value); + } +} + +impl Extractor for HashMap { + /// Get a value for a key from the `HashMap`. + fn get(&self, key: &str) -> Option<&str> { + self.get(&key.to_lowercase()).map(String::as_str) + } + + /// Collect all the keys from the `HashMap`. + fn keys(&self) -> Vec<&str> { + self.keys().map(String::as_str).collect::>() + } +} + +impl Injector for Value { + /// Set a key and value in the `Value`. + fn set(&mut self, key: &str, value: String) { + if let Value::Object(map) = self { + map.insert(key.to_lowercase(), Value::String(value)); + } + } +} + +impl Extractor for Value { + /// Get a value for a key from the `Value`. + fn get(&self, key: &str) -> Option<&str> { + if let Value::Object(map) = self { + map.get(&key.to_lowercase()).and_then(|v| v.as_str()) + } else { + None + } + } + + /// Collect all the keys from the `Value`. + fn keys(&self) -> Vec<&str> { + if let Value::Object(map) = self { + map.keys().map(String::as_str).collect::>() + } else { + Vec::new() + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn hash_map_get() { + let mut carrier = HashMap::new(); + carrier.set("headerName", "value".to_string()); + + assert_eq!( + Extractor::get(&carrier, "HEADERNAME"), + Some("value"), + "case insensitive extraction" + ); + } + + #[test] + fn hash_map_keys() { + let mut carrier = HashMap::new(); + carrier.set("headerName1", "value1".to_string()); + carrier.set("headerName2", "value2".to_string()); + + let got = Extractor::keys(&carrier); + assert_eq!(got.len(), 2); + assert!(got.contains(&"headername1")); + assert!(got.contains(&"headername2")); + } + + #[test] + fn serde_value_get() { + let mut carrier = Value::Object(serde_json::Map::new()); + carrier.set("headerName", "value".to_string()); + + assert_eq!( + Extractor::get(&carrier, "HEADERNAME"), + Some("value"), + "case insensitive extraction" + ); + } + + #[test] + fn serde_value_keys() { + let mut carrier = Value::Object(serde_json::Map::new()); + carrier.set("headerName1", "value1".to_string()); + carrier.set("headerName2", "value2".to_string()); + + let got = Extractor::keys(&carrier); + assert_eq!(got.len(), 2); + assert!(got.contains(&"headername1")); + assert!(got.contains(&"headername2")); + } +} diff --git a/bottlecap/src/traces/propagation/error.rs b/bottlecap/src/traces/propagation/error.rs new file mode 100644 index 000000000..af7a37d9b --- /dev/null +++ b/bottlecap/src/traces/propagation/error.rs @@ -0,0 +1,33 @@ +use thiserror::Error; + +#[derive(Error, Debug, Copy, Clone)] +#[error("Cannot {} from {}, {}", operation, message, propagator_name)] +pub struct Error { + message: &'static str, + // which propagator this error comes from + propagator_name: &'static str, + // what operation was attempted + operation: &'static str, +} + +impl Error { + /// Error when extracting a value from a carrier + #[must_use] + pub fn extract(message: &'static str, propagator_name: &'static str) -> Self { + Self { + message, + propagator_name, + operation: "extract", + } + } + + /// Error when injecting a value into a carrier + #[allow(clippy::must_use_candidate)] + pub fn inject(message: &'static str, propagator_name: &'static str) -> Self { + Self { + message, + propagator_name, + operation: "inject", + } + } +} diff --git a/bottlecap/src/traces/propagation/mod.rs b/bottlecap/src/traces/propagation/mod.rs new file mode 100644 index 000000000..300e6eff9 --- /dev/null +++ b/bottlecap/src/traces/propagation/mod.rs @@ -0,0 +1,885 @@ +use std::{collections::HashMap, sync::Arc}; + +use crate::{ + config::{self, trace_propagation_style::TracePropagationStyle}, + traces::context::SpanContext, +}; +use carrier::{Extractor, Injector}; +use datadog_trace_protobuf::pb::SpanLink; +use text_map_propagator::{ + BAGGAGE_PREFIX, DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY, DATADOG_LAST_PARENT_ID_KEY, + TRACESTATE_KEY, +}; + +pub mod carrier; +pub mod error; +pub mod text_map_propagator; + +pub trait Propagator { + fn extract(&self, carrier: &dyn Extractor) -> Option; + fn inject(&self, context: SpanContext, carrier: &mut dyn Injector); +} + +pub struct DatadogCompositePropagator { + propagators: Vec>, + config: Arc, +} + +#[allow(clippy::never_loop)] +impl Propagator for DatadogCompositePropagator { + fn extract(&self, carrier: &dyn Extractor) -> Option { + if self.config.trace_propagation_extract_first { + for propagator in &self.propagators { + let context = propagator.extract(carrier); + + if self.config.trace_propagation_http_baggage_enabled { + if let Some(mut context) = context { + Self::attach_baggage(&mut context, carrier); + return Some(context); + } + } + + return context; + } + } + + let (contexts, styles) = self.extract_available_contexts(carrier); + if contexts.is_empty() { + return None; + } + + let mut context = Self::resolve_contexts(contexts, styles, carrier); + if self.config.trace_propagation_http_baggage_enabled { + Self::attach_baggage(&mut context, carrier); + } + + Some(context) + } + + fn inject(&self, _context: SpanContext, _carrier: &mut dyn Injector) { + todo!() + } +} + +impl DatadogCompositePropagator { + #[must_use] + pub fn new(config: Arc) -> Self { + let propagators: Vec> = config + .trace_propagation_style_extract + .iter() + .filter_map(|style| match style { + TracePropagationStyle::Datadog => { + Some(Box::new(text_map_propagator::DatadogHeaderPropagator) + as Box) + } + TracePropagationStyle::TraceContext => { + Some(Box::new(text_map_propagator::TraceContextPropagator) + as Box) + } + _ => None, + }) + .collect(); + + Self { + propagators, + config, + } + } + + fn extract_available_contexts( + &self, + carrier: &dyn Extractor, + ) -> (Vec, Vec) { + let mut contexts = Vec::::new(); + let mut styles = Vec::::new(); + + for (i, propagator) in self.propagators.iter().enumerate() { + if let Some(context) = propagator.extract(carrier) { + contexts.push(context); + styles.push(self.config.trace_propagation_style_extract[i]); + } + } + + (contexts, styles) + } + + fn resolve_contexts( + contexts: Vec, + styles: Vec, + _carrier: &dyn Extractor, + ) -> SpanContext { + let mut primary_context = contexts[0].clone(); + let mut links = Vec::::new(); + + let mut i = 1; + for context in contexts.iter().skip(1) { + let style = styles[i]; + + if context.span_id != 0 + && context.trace_id != 0 + && context.trace_id != primary_context.trace_id + { + let sampling = context.sampling.unwrap_or_default().priority.unwrap_or(0); + let tracestate: Option = match style { + TracePropagationStyle::TraceContext => { + context.tags.get(TRACESTATE_KEY).cloned() + } + _ => None, + }; + let attributes = HashMap::from([ + ("reason".to_string(), "terminated_context".to_string()), + ("context_headers".to_string(), style.to_string()), + ]); + let trace_id_high_str = context + .tags + .get(DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY) + .cloned() + .unwrap_or_default(); + let trace_ig_high = u64::from_str_radix(&trace_id_high_str, 16).unwrap_or_default(); + + links.push(SpanLink { + trace_id: context.trace_id, + trace_id_high: trace_ig_high, + span_id: context.span_id, + flags: u32::from(sampling > 0), + tracestate: tracestate.unwrap_or_default(), + attributes, + }); + } else if style == TracePropagationStyle::TraceContext { + if let Some(tracestate) = context.tags.get(TRACESTATE_KEY) { + primary_context + .tags + .insert(TRACESTATE_KEY.to_string(), tracestate.clone()); + } + + if primary_context.trace_id == context.trace_id + && primary_context.span_id != context.span_id + { + let mut dd_context: Option = None; + if styles.contains(&TracePropagationStyle::Datadog) { + let position = styles + .iter() + .position(|&s| s == TracePropagationStyle::Datadog) + .unwrap_or_default(); + dd_context = contexts.get(position).cloned(); + } + + if let Some(parent_id) = context.tags.get(DATADOG_LAST_PARENT_ID_KEY) { + primary_context + .tags + .insert(DATADOG_LAST_PARENT_ID_KEY.to_string(), parent_id.clone()); + } else if let Some(sc) = dd_context { + primary_context.tags.insert( + DATADOG_LAST_PARENT_ID_KEY.to_string(), + format!("{:016x}", sc.span_id), + ); + } + + primary_context.span_id = context.span_id; + } + } + + i += 1; + } + + primary_context.links = links; + + primary_context + } + + fn attach_baggage(context: &mut SpanContext, carrier: &dyn Extractor) { + let keys = carrier.keys(); + + for key in keys { + if let Some(stripped) = key.strip_prefix(BAGGAGE_PREFIX) { + context.tags.insert( + stripped.to_string(), + carrier.get(key).unwrap_or_default().to_string(), + ); + } + } + } +} + +#[cfg(test)] +pub mod tests { + use std::vec; + + use lazy_static::lazy_static; + + use crate::traces::context::Sampling; + + use super::*; + + fn lower_64_bits(value: u128) -> u64 { + (value & 0xFFFF_FFFF_FFFF_FFFF) as u64 + } + + lazy_static! { + static ref TRACE_ID: u128 = 171_395_628_812_617_415_352_188_477_958_425_669_623; + static ref TRACE_ID_LOWER_ORDER_BITS: u64 = lower_64_bits(*TRACE_ID); + static ref TRACE_ID_HEX: String = String::from("80f198ee56343ba864fe8b2a57d3eff7"); + + // TraceContext Headers + static ref VALID_TRACECONTEXT_HEADERS_BASIC: HashMap = HashMap::from([ + ( + "traceparent".to_string(), + format!("00-{}-00f067aa0ba902b7-01", *TRACE_ID_HEX) + ), + ( + "tracestate".to_string(), + "dd=p:00f067aa0ba902b7;s:2;o:rum".to_string() + ), + ]); + static ref VALID_TRACECONTEXT_HEADERS_RUM_NO_SAMPLING_DECISION: HashMap = + HashMap::from([ + ( + "traceparent".to_string(), + format!("00-{}-00f067aa0ba902b7-00", *TRACE_ID_HEX) + ), + ( + "tracestate".to_string(), + "dd=o:rum".to_string() + ), + ]); + static ref VALID_TRACECONTEXT_HEADERS: HashMap = HashMap::from([ + ( + "traceparent".to_string(), + format!("00-{}-00f067aa0ba902b7-01", *TRACE_ID_HEX) + ), + ( + "tracestate".to_string(), + "dd=s:2;o:rum;t.dm:-4;t.usr.id:baz64,congo=t61rcWkgMz".to_string() + ), + ]); + static ref VALID_TRACECONTEXT_HEADERS_VALID_64_BIT_TRACE_ID: HashMap = + HashMap::from([ + ( + "traceparent".to_string(), + "00-000000000000000064fe8b2a57d3eff7-00f067aa0ba902b7-01".to_string() + ), + ( + "tracestate".to_string(), + "dd=s:2;o:rum;t.dm:-4;t.usr.id:baz64,congo=t61rcWkgMzE".to_string() + ), + ]); + + // Datadog Headers + static ref VALID_DATADOG_HEADERS: HashMap = HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "13088165645273925489".to_string(), + ), + ("x-datadog-parent-id".to_string(), "5678".to_string(),), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ("x-datadog-origin".to_string(), "synthetics".to_string()), + ]); + static ref VALID_DATADOG_HEADERS_NO_PRIORITY: HashMap = HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "13088165645273925489".to_string(), + ), + ("x-datadog-parent-id".to_string(), "5678".to_string(),), + ("x-datadog-origin".to_string(), "synthetics".to_string()), + ]); + static ref VALID_DATADOG_HEADERS_MATCHING_TRACE_CONTEXT_VALID_TRACE_ID: HashMap = + HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + TRACE_ID_LOWER_ORDER_BITS.to_string() + ), + ("x-datadog-parent-id".to_string(), "5678".to_string()), + ("x-datadog-origin".to_string(), "synthetics".to_string()), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ]); + static ref INVALID_DATADOG_HEADERS: HashMap = HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "13088165645273925489".to_string(), + ), + ("x-datadog-parent-id".to_string(), "parent_id".to_string(),), + ("x-datadog-sampling-priority".to_string(), "sample".to_string()), + ]); + + // Fixtures + // + static ref ALL_VALID_HEADERS: HashMap = { + let mut h = HashMap::new(); + h.extend(VALID_DATADOG_HEADERS.clone()); + h.extend(VALID_TRACECONTEXT_HEADERS.clone()); + // todo: add b3 + h + }; + static ref DATADOG_TRACECONTEXT_MATCHING_TRACE_ID_HEADERS: HashMap = { + let mut h = HashMap::new(); + h.extend(VALID_DATADOG_HEADERS_MATCHING_TRACE_CONTEXT_VALID_TRACE_ID.clone()); + // We use 64-bit traceparent trace id value here so it can match for + // both 128-bit enabled and disabled + h.extend(VALID_TRACECONTEXT_HEADERS_VALID_64_BIT_TRACE_ID.clone()); + h + }; + // Edge cases + static ref ALL_HEADERS_CHAOTIC_1: HashMap = { + let mut h = HashMap::new(); + h.extend(VALID_DATADOG_HEADERS_MATCHING_TRACE_CONTEXT_VALID_TRACE_ID.clone()); + h.extend(VALID_TRACECONTEXT_HEADERS_VALID_64_BIT_TRACE_ID.clone()); + // todo: add b3 + h + }; + static ref ALL_HEADERS_CHAOTIC_2: HashMap = { + let mut h = HashMap::new(); + h.extend(VALID_DATADOG_HEADERS.clone()); + h.extend(VALID_TRACECONTEXT_HEADERS_VALID_64_BIT_TRACE_ID.clone()); + // todo: add b3 + h + }; + static ref NO_TRACESTATE_SUPPORT_NOT_MATCHING_TRACE_ID: HashMap = { + let mut h = HashMap::new(); + h.extend(VALID_DATADOG_HEADERS.clone()); + h.extend(VALID_TRACECONTEXT_HEADERS_RUM_NO_SAMPLING_DECISION.clone()); + h + }; + } + + macro_rules! test_propagation_extract { + ($($name:ident: $value:expr,)*) => { + $( + #[test] + fn $name() { + let (styles, carrier, expected) = $value; + let mut config = config::Config::default(); + config.trace_propagation_style_extract = vec![TracePropagationStyle::Datadog, TracePropagationStyle::TraceContext]; + if let Some(s) = styles { + config.trace_propagation_style_extract.clone_from(&s); + } + + let propagator = DatadogCompositePropagator::new(Arc::new(config)); + + let context = propagator.extract(&carrier).unwrap_or_default(); + + assert_eq!(context, expected); + } + )* + } + } + + test_propagation_extract! { + // Datadog Headers + valid_datadog_default: ( + None, + VALID_DATADOG_HEADERS.clone(), + SpanContext { + trace_id: 13_088_165_645_273_925_489, + span_id: 5678, + sampling: Some(Sampling { + priority: Some(1), + mechanism: None, + }), + origin: Some("synthetics".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-3".to_string()) + ]), + links: vec![], + } + ), + valid_datadog_no_priority: ( + None, + VALID_DATADOG_HEADERS_NO_PRIORITY.clone(), + SpanContext { + trace_id: 13_088_165_645_273_925_489, + span_id: 5678, + sampling: Some(Sampling { + priority: Some(2), + mechanism: None, + }), + origin: Some("synthetics".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-3".to_string()) + ]), + links: vec![], + }, + ), + invalid_datadog: ( + Some(vec![TracePropagationStyle::Datadog]), + INVALID_DATADOG_HEADERS.clone(), + SpanContext::default(), + ), + valid_datadog_explicit_style: ( + Some(vec![TracePropagationStyle::Datadog]), + VALID_DATADOG_HEADERS.clone(), + SpanContext { + trace_id: 13_088_165_645_273_925_489, + span_id: 5678, + sampling: Some(Sampling { + priority: Some(1), + mechanism: None, + }), + origin: Some("synthetics".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-3".to_string()) + ]), + links: vec![], + }, + ), + invalid_datadog_negative_trace_id: ( + Some(vec![TracePropagationStyle::Datadog]), + HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "-1".to_string(), + ), + ("x-datadog-parent-id".to_string(), "5678".to_string(),), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ("x-datadog-origin".to_string(), "synthetics".to_string()), + ]), + SpanContext::default(), + ), + valid_datadog_no_datadog_style: ( + Some(vec![TracePropagationStyle::TraceContext]), + VALID_DATADOG_HEADERS.clone(), + SpanContext::default(), + ), + // TraceContext Headers + valid_tracecontext_simple: ( + Some(vec![TracePropagationStyle::TraceContext]), + VALID_TRACECONTEXT_HEADERS_BASIC.clone(), + SpanContext { + trace_id: 7_277_407_061_855_694_839, + span_id: 67_667_974_448_284_343, + sampling: Some(Sampling { + priority: Some(2), + mechanism: None, + }), + origin: Some("rum".to_string()), + tags: HashMap::from([ + ("tracestate".to_string(), "dd=p:00f067aa0ba902b7;s:2;o:rum".to_string()), + ("_dd.p.tid".to_string(), "9291375655657946024".to_string()), + ("traceparent".to_string(), "00-80f198ee56343ba864fe8b2a57d3eff7-00f067aa0ba902b7-01".to_string()), + ("_dd.parent_id".to_string(), "00f067aa0ba902b7".to_string()), + ]), + links: vec![], + } + ), + valid_tracecontext_rum_no_sampling_decision: ( + Some(vec![TracePropagationStyle::TraceContext]), + VALID_TRACECONTEXT_HEADERS_RUM_NO_SAMPLING_DECISION.clone(), + SpanContext { + trace_id: 7_277_407_061_855_694_839, + span_id: 67_667_974_448_284_343, + sampling: Some(Sampling { + priority: Some(0), + mechanism: None, + }), + origin: Some("rum".to_string()), + tags: HashMap::from([ + ("_dd.p.tid".to_string(), "9291375655657946024".to_string()), + ("tracestate".to_string(), "dd=o:rum".to_string()), + ("traceparent".to_string(), "00-80f198ee56343ba864fe8b2a57d3eff7-00f067aa0ba902b7-00".to_string()), + ]), + links: vec![], + } + ), + // B3 Headers + // todo: all of them + // B3 single Headers + // todo: all of them + // All Headers + valid_all_headers: ( + None, + ALL_VALID_HEADERS.clone(), + SpanContext { + trace_id: 13_088_165_645_273_925_489, + span_id: 5678, + sampling: Some(Sampling { + priority: Some(1), + mechanism: None, + }), + origin: Some("synthetics".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-3".to_string()) + ]), + links: vec![ + SpanLink { + trace_id: 7_277_407_061_855_694_839, + trace_id_high: 0, + span_id: 67_667_974_448_284_343, + flags: 1, + tracestate: "dd=s:2;o:rum;t.dm:-4;t.usr.id:baz64,congo=t61rcWkgMz".to_string(), + attributes: HashMap::from([ + ("reason".to_string(), "terminated_context".to_string()), + ("context_headers".to_string(), "tracecontext".to_string()), + ]), + } + ], + }, + ), + valid_all_headers_all_styles: ( + Some(vec![TracePropagationStyle::Datadog, TracePropagationStyle::TraceContext]), + ALL_VALID_HEADERS.clone(), + SpanContext { + trace_id: 13_088_165_645_273_925_489, + span_id: 5678, + sampling: Some(Sampling { + priority: Some(1), + mechanism: None, + }), + origin: Some("synthetics".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-3".to_string()) + ]), + links: vec![ + SpanLink { + trace_id: 7_277_407_061_855_694_839, + trace_id_high: 0, + span_id: 67_667_974_448_284_343, + flags: 1, + tracestate: "dd=s:2;o:rum;t.dm:-4;t.usr.id:baz64,congo=t61rcWkgMz".to_string(), + attributes: HashMap::from([ + ("reason".to_string(), "terminated_context".to_string()), + ("context_headers".to_string(), "tracecontext".to_string()), + ]), + } + // todo: b3 span links + ], + }, + ), + valid_all_headers_datadog_style: ( + Some(vec![TracePropagationStyle::Datadog]), + ALL_VALID_HEADERS.clone(), + SpanContext { + trace_id: 13_088_165_645_273_925_489, + span_id: 5678, + sampling: Some(Sampling { + priority: Some(1), + mechanism: None, + }), + origin: Some("synthetics".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-3".to_string()) + ]), + links: vec![] + }, + ), + // todo: valid_all_headers_b3_style + // todo: valid_all_headers_both_b3_styles + // todo: valid_all_headers_b3_single_style + none_style: ( + Some(vec![TracePropagationStyle::None]), + ALL_VALID_HEADERS.clone(), + SpanContext::default(), + ), + valid_style_and_none_still_extracts: ( + Some(vec![TracePropagationStyle::Datadog, TracePropagationStyle::None]), + ALL_VALID_HEADERS.clone(), + SpanContext { + trace_id: 13_088_165_645_273_925_489, + span_id: 5678, + sampling: Some(Sampling { + priority: Some(1), + mechanism: None, + }), + origin: Some("synthetics".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-3".to_string()) + ]), + links: vec![], + } + ), + // Order matters + // todo: order_matters_b3_single_header_first + // todo: order_matters_b3_first + // todo: order_matters_b3_second_no_datadog_headers + // Tracestate is still added when TraceContext style comes later and matches + // first style's `trace_id` + additional_tracestate_support_when_present_and_matches_first_style_trace_id: ( + Some(vec![TracePropagationStyle::Datadog, TracePropagationStyle::TraceContext]), + DATADOG_TRACECONTEXT_MATCHING_TRACE_ID_HEADERS.clone(), + SpanContext { + trace_id: 7_277_407_061_855_694_839, + span_id: 67_667_974_448_284_343, + sampling: Some(Sampling { + priority: Some(1), + mechanism: None, + }), + origin: Some("synthetics".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-3".to_string()), + ("_dd.parent_id".to_string(), "000000000000162e".to_string()), + (TRACESTATE_KEY.to_string(), "dd=s:2;o:rum;t.dm:-4;t.usr.id:baz64,congo=t61rcWkgMzE".to_string()) + ]), + links: vec![], + } + ), + // Tracestate is not added when TraceContext style comes later and does not + // match first style's `trace_id` + no_additional_tracestate_support_when_present_and_trace_id_does_not_match: ( + Some(vec![TracePropagationStyle::Datadog, TracePropagationStyle::TraceContext]), + NO_TRACESTATE_SUPPORT_NOT_MATCHING_TRACE_ID.clone(), + SpanContext { + trace_id: 13_088_165_645_273_925_489, + span_id: 5678, + sampling: Some(Sampling { + priority: Some(1), + mechanism: None, + }), + origin: Some("synthetics".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-3".to_string()) + ]), + links: vec![ + SpanLink { + trace_id: 7_277_407_061_855_694_839, + trace_id_high: 0, + span_id: 67_667_974_448_284_343, + flags: 0, + tracestate: "dd=o:rum".to_string(), + attributes: HashMap::from([ + ("reason".to_string(), "terminated_context".to_string()), + ("context_headers".to_string(), "tracecontext".to_string()), + ]), + } + ], + } + ), + valid_all_headers_no_style: ( + Some(vec![]), + ALL_VALID_HEADERS.clone(), + SpanContext::default(), + ), + datadog_tracecontext_conflicting_span_ids: ( + Some(vec![TracePropagationStyle::Datadog, TracePropagationStyle::TraceContext]), + HashMap::from([ + ( + "x-datadog-trace-id".to_string(), + "9291375655657946024".to_string(), + ), + ("x-datadog-parent-id".to_string(), "15".to_string(),), + ("traceparent".to_string(), "00-000000000000000080f198ee56343ba8-000000000000000a-01".to_string()), + ]), + SpanContext { + trace_id: 9_291_375_655_657_946_024, + span_id: 10, + sampling: Some(Sampling { + priority: Some(2), + mechanism: None, + }), + origin: None, + tags: HashMap::from([ + ("_dd.parent_id".to_string(), "000000000000000f".to_string()), + ("_dd.p.dm".to_string(), "-3".to_string()), + ]), + links: vec![], + } + ), + // todo: all_headers_all_styles_tracecontext_t_id_match_no_span_link + all_headers_all_styles_do_not_create_span_link_for_context_w_out_span_id: ( + Some(vec![TracePropagationStyle::TraceContext, TracePropagationStyle::Datadog]), + ALL_HEADERS_CHAOTIC_2.clone(), + SpanContext { + trace_id: 7_277_407_061_855_694_839, + span_id: 67_667_974_448_284_343, + sampling: Some(Sampling { + priority: Some(2), + mechanism: None, + }), + origin: Some("rum".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-4".to_string()), + ("_dd.p.tid".to_string(), "0".to_string()), + ("_dd.p.usr.id".to_string(), "baz64".to_string()), + ("traceparent".to_string(), "00-000000000000000064fe8b2a57d3eff7-00f067aa0ba902b7-01".to_string()), + ("tracestate".to_string(), "dd=s:2;o:rum;t.dm:-4;t.usr.id:baz64,congo=t61rcWkgMzE".to_string()), + ]), + links: vec![ + SpanLink { + trace_id: 13_088_165_645_273_925_489, + trace_id_high: 0, + span_id: 5678, + flags: 1, + tracestate: String::new(), + attributes: HashMap::from([ + ("reason".to_string(), "terminated_context".to_string()), + ("context_headers".to_string(), "datadog".to_string()), + ]), + } + ], + } + ), + all_headers_all_styles_tracecontext_primary_only_datadog_t_id_diff: ( + Some(vec![TracePropagationStyle::TraceContext, TracePropagationStyle::Datadog]), + ALL_VALID_HEADERS.clone(), + SpanContext { + trace_id: 7_277_407_061_855_694_839, + span_id: 67_667_974_448_284_343, + sampling: Some(Sampling { + priority: Some(2), + mechanism: None, + }), + origin: Some("rum".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-4".to_string()), + ("_dd.p.tid".to_string(), "9291375655657946024".to_string()), + ("_dd.p.usr.id".to_string(), "baz64".to_string()), + ("traceparent".to_string(), "00-80f198ee56343ba864fe8b2a57d3eff7-00f067aa0ba902b7-01".to_string()), + ("tracestate".to_string(), "dd=s:2;o:rum;t.dm:-4;t.usr.id:baz64,congo=t61rcWkgMz".to_string()), + ]), + links: vec![ + SpanLink { + trace_id: 13_088_165_645_273_925_489, + trace_id_high: 0, + span_id: 5678, + flags: 1, + tracestate: String::new(), + attributes: HashMap::from([ + ("reason".to_string(), "terminated_context".to_string()), + ("context_headers".to_string(), "datadog".to_string()), + ]), + } + ], + } + ), + // todo: fix this test + all_headers_all_styles_datadog_primary_only_datadog_t_id_diff: ( + Some(vec![TracePropagationStyle::Datadog, TracePropagationStyle::TraceContext]), + ALL_VALID_HEADERS.clone(), + SpanContext { + trace_id: 13_088_165_645_273_925_489, + span_id: 5678, + sampling: Some(Sampling { + priority: Some(1), + mechanism: None, + }), + origin: Some("synthetics".to_string()), + tags: HashMap::from([ + ("_dd.p.dm".to_string(), "-3".to_string()) + ]), + links: vec![ + SpanLink { + trace_id: 7_277_407_061_855_694_839, + // this should be `9291375655657946024` not `0`, but we don't have this data + // with the current definition of `SpanContext` + trace_id_high: 0, + span_id: 67_667_974_448_284_343, + flags: 1, + tracestate: "dd=s:2;o:rum;t.dm:-4;t.usr.id:baz64,congo=t61rcWkgMz".to_string(), + attributes: HashMap::from([ + ("reason".to_string(), "terminated_context".to_string()), + ("context_headers".to_string(), "tracecontext".to_string()), + ]), + } + ], + } + ), + // todo: datadog_primary_match_tracecontext_dif_from_b3_b3multi_invalid + } + + #[test] + fn test_new_filter_propagators() { + let config = config::Config { + trace_propagation_style_extract: vec![ + TracePropagationStyle::Datadog, + TracePropagationStyle::TraceContext, + TracePropagationStyle::B3, + TracePropagationStyle::B3Multi, + ], + ..Default::default() + }; + + let propagator = DatadogCompositePropagator::new(Arc::new(config)); + + assert_eq!(propagator.propagators.len(), 2); + } + + #[test] + fn test_new_no_propagators() { + let config = config::Config { + trace_propagation_style_extract: vec![TracePropagationStyle::None], + ..Default::default() + }; + let propagator = DatadogCompositePropagator::new(Arc::new(config)); + + assert_eq!(propagator.propagators.len(), 0); + } + + #[test] + fn test_extract_available_contexts() { + let config = config::Config { + trace_propagation_style_extract: vec![ + TracePropagationStyle::Datadog, + TracePropagationStyle::TraceContext, + ], + ..Default::default() + }; + + let propagator = DatadogCompositePropagator::new(Arc::new(config)); + + let carrier = HashMap::from([ + ( + "traceparent".to_string(), + "00-80f198ee56343ba864fe8b2a57d3eff7-00f067aa0ba902b7-01".to_string(), + ), + ( + "tracestate".to_string(), + "dd=p:00f067aa0ba902b7;s:2;o:rum".to_string(), + ), + ( + "x-datadog-trace-id".to_string(), + "7277407061855694839".to_string(), + ), + ( + "x-datadog-parent-id".to_string(), + "67667974448284343".to_string(), + ), + ("x-datadog-sampling-priority".to_string(), "2".to_string()), + ("x-datadog-origin".to_string(), "rum".to_string()), + ( + "x-datadog-tags".to_string(), + "_dd.p.test=value,_dd.p.tid=9291375655657946024,any=tag".to_string(), + ), + ]); + let (contexts, styles) = propagator.extract_available_contexts(&carrier); + + assert_eq!(contexts.len(), 2); + assert_eq!(styles.len(), 2); + } + + #[test] + fn test_extract_available_contexts_no_contexts() { + let config = config::Config { + trace_propagation_style_extract: vec![TracePropagationStyle::Datadog], + ..Default::default() + }; + + let propagator = DatadogCompositePropagator::new(Arc::new(config)); + + let carrier = HashMap::from([ + ( + "traceparent".to_string(), + "00-80f198ee56343ba864fe8b2a57d3eff7-00f067aa0ba902b7-01".to_string(), + ), + ( + "tracestate".to_string(), + "dd=p:00f067aa0ba902b7;s:2;o:rum".to_string(), + ), + ]); + let (contexts, styles) = propagator.extract_available_contexts(&carrier); + + assert_eq!(contexts.len(), 0); + assert_eq!(styles.len(), 0); + } + + #[test] + fn test_attach_baggage() { + let mut context = SpanContext::default(); + let carrier = HashMap::from([ + ("x-datadog-trace-id".to_string(), "123".to_string()), + ("x-datadog-parent-id".to_string(), "5678".to_string()), + ("ot-baggage-key1".to_string(), "value1".to_string()), + ]); + + DatadogCompositePropagator::attach_baggage(&mut context, &carrier); + + assert_eq!(context.tags.len(), 1); + assert_eq!(context.tags.get("key1").expect("Missing tag"), "value1"); + } +} diff --git a/bottlecap/src/traces/propagation/text_map_propagator.rs b/bottlecap/src/traces/propagation/text_map_propagator.rs new file mode 100644 index 000000000..520428630 --- /dev/null +++ b/bottlecap/src/traces/propagation/text_map_propagator.rs @@ -0,0 +1,531 @@ +use std::collections::HashMap; + +use lazy_static::lazy_static; +use regex::Regex; +use tracing::{debug, error, warn}; + +use crate::traces::context::{Sampling, SpanContext}; +use crate::traces::propagation::{ + carrier::{Extractor, Injector}, + error::Error, + Propagator, +}; + +// Datadog Keys +pub const DATADOG_TRACE_ID_KEY: &str = "x-datadog-trace-id"; +pub const DATADOG_PARENT_ID_KEY: &str = "x-datadog-parent-id"; +pub const DATADOG_SPAN_ID_KEY: &str = "x-datadog-span-id"; +pub const DATADOG_SAMPLING_PRIORITY_KEY: &str = "x-datadog-sampling-priority"; +const DATADOG_ORIGIN_KEY: &str = "x-datadog-origin"; +pub const DATADOG_TAGS_KEY: &str = "x-datadog-tags"; + +pub const DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY: &str = "_dd.p.tid"; +const DATADOG_PROPAGATION_ERROR_KEY: &str = "_dd.propagation_error"; +pub const DATADOG_LAST_PARENT_ID_KEY: &str = "_dd.parent_id"; +const DATADOG_SAMPLING_DECISION_KEY: &str = "_dd.p.dm"; + +// Traceparent Keys +const TRACEPARENT_KEY: &str = "traceparent"; +pub const TRACESTATE_KEY: &str = "tracestate"; + +pub const BAGGAGE_PREFIX: &str = "ot-baggage-"; + +lazy_static! { + static ref TRACEPARENT_REGEX: Regex = + Regex::new(r"(?i)^([a-f0-9]{2})-([a-f0-9]{32})-([a-f0-9]{16})-([a-f0-9]{2})(-.*)?$") + .expect("failed creating regex"); + static ref INVALID_SEGMENT_REGEX: Regex = Regex::new(r"^0+$").expect("failed creating regex"); + static ref VALID_TAG_KEY_REGEX: Regex = + Regex::new(r"^_dd\.p\.[\x21-\x2b\x2d-\x7e]+$").expect("failed creating regex"); + static ref VALID_TAG_VALUE_REGEX: Regex = + Regex::new(r"^[\x20-\x2b\x2d-\x7e]*$").expect("failed creating regex"); + static ref INVALID_ASCII_CHARACTERS_REGEX: Regex = + Regex::new(r"[^\x20-\x7E]+").expect("failed creating regex"); + static ref VALID_SAMPLING_DECISION_REGEX: Regex = + Regex::new(r"^-([0-9])$").expect("failed creating regex"); +} + +#[derive(Clone, Copy)] +pub struct DatadogHeaderPropagator; + +impl Propagator for DatadogHeaderPropagator { + fn extract(&self, carrier: &dyn Extractor) -> Option { + Self::extract_context(carrier) + } + + fn inject(&self, _context: SpanContext, _carrier: &mut dyn Injector) { + todo!(); + } +} + +impl DatadogHeaderPropagator { + fn extract_context(carrier: &dyn Extractor) -> Option { + let trace_id = match Self::extract_trace_id(carrier) { + Ok(trace_id) => trace_id, + Err(e) => { + debug!("{e}"); + return None; + } + }; + + let parent_id = Self::extract_parent_id(carrier).unwrap_or(0); + let sampling_priority = match Self::extract_sampling_priority(carrier) { + Ok(sampling_priority) => sampling_priority, + Err(e) => { + debug!("{e}"); + return None; + } + }; + let origin = Self::extract_origin(carrier); + let mut tags = Self::extract_tags(carrier); + Self::validate_sampling_decision(&mut tags); + + Some(SpanContext { + trace_id, + span_id: parent_id, + sampling: Some(Sampling { + priority: Some(sampling_priority), + mechanism: None, + }), + origin, + tags, + links: Vec::new(), + }) + } + + fn validate_sampling_decision(tags: &mut HashMap) { + let should_remove = + tags.get(DATADOG_SAMPLING_DECISION_KEY) + .map_or(false, |sampling_decision| { + let is_invalid = !VALID_SAMPLING_DECISION_REGEX.is_match(sampling_decision); + if is_invalid { + warn!("Failed to decode `_dd.p.dm`: {}", sampling_decision); + } + is_invalid + }); + + if should_remove { + tags.remove(DATADOG_SAMPLING_DECISION_KEY); + tags.insert( + DATADOG_PROPAGATION_ERROR_KEY.to_string(), + "decoding_error".to_string(), + ); + } + + // todo: appsec standalone + } + + fn extract_trace_id(carrier: &dyn Extractor) -> Result { + let trace_id = carrier + .get(DATADOG_TRACE_ID_KEY) + .ok_or(Error::extract("`trace_id` not found", "datadog"))?; + + if INVALID_SEGMENT_REGEX.is_match(trace_id) { + return Err(Error::extract("Invalid `trace_id` found", "datadog")); + } + + trace_id + .parse::() + .map_err(|_| Error::extract("Failed to decode `trace_id`", "datadog")) + } + + fn extract_parent_id(carrier: &dyn Extractor) -> Option { + let parent_id = carrier.get(DATADOG_PARENT_ID_KEY)?; + + parent_id.parse::().ok() + } + + fn extract_sampling_priority(carrier: &dyn Extractor) -> Result { + // todo: enum? Default is USER_KEEP=2 + let sampling_priority = carrier.get(DATADOG_SAMPLING_PRIORITY_KEY).unwrap_or("2"); + + sampling_priority + .parse::() + .map_err(|_| Error::extract("Failed to decode `sampling_priority`", "datadog")) + } + + fn extract_origin(carrier: &dyn Extractor) -> Option { + let origin = carrier.get(DATADOG_ORIGIN_KEY)?; + Some(origin.to_string()) + } + + pub fn extract_tags(carrier: &dyn Extractor) -> HashMap { + let carrier_tags = carrier.get(DATADOG_TAGS_KEY).unwrap_or_default(); + let mut tags: HashMap = HashMap::new(); + + // todo: + // - trace propagation disabled + // - trace propagation max lenght + + let pairs = carrier_tags.split(','); + for pair in pairs { + if let Some((k, v)) = pair.split_once('=') { + // todo: reject key on tags extract reject + if k.starts_with("_dd.p.") { + tags.insert(k.to_string(), v.to_string()); + } + } + } + + // Handle 128bit trace ID + if !tags.is_empty() { + if let Some(trace_id_higher_order_bits) = + carrier.get(DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY) + { + if !Self::higher_order_bits_valid(trace_id_higher_order_bits) { + warn!("Malformed Trace ID: {trace_id_higher_order_bits} Failed to decode trace ID from carrier."); + tags.insert( + DATADOG_PROPAGATION_ERROR_KEY.to_string(), + format!("malformed tid {trace_id_higher_order_bits}"), + ); + tags.remove(DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY); + } + } + } + + if !tags.contains_key(DATADOG_SAMPLING_DECISION_KEY) { + tags.insert(DATADOG_SAMPLING_DECISION_KEY.to_string(), "-3".to_string()); + } + + tags + } + + fn higher_order_bits_valid(trace_id_higher_order_bits: &str) -> bool { + if trace_id_higher_order_bits.len() != 16 { + return false; + } + + match u64::from_str_radix(trace_id_higher_order_bits, 16) { + Ok(_) => {} + Err(_) => return false, + } + + true + } +} + +struct Traceparent { + sampling_priority: i8, + trace_id: u128, + span_id: u64, +} + +struct Tracestate { + sampling_priority: Option, + origin: Option, + lower_order_trace_id: Option, +} + +#[derive(Clone, Copy)] +pub struct TraceContextPropagator; + +impl Propagator for TraceContextPropagator { + fn extract(&self, carrier: &dyn Extractor) -> Option { + Self::extract_context(carrier) + } + + fn inject(&self, _context: SpanContext, _carrier: &mut dyn Injector) { + todo!() + } +} + +impl TraceContextPropagator { + fn extract_context(carrier: &dyn Extractor) -> Option { + let tp = carrier.get(TRACEPARENT_KEY)?.trim(); + + match Self::extract_traceparent(tp) { + Ok(traceparent) => { + let mut tags = HashMap::new(); + tags.insert(TRACEPARENT_KEY.to_string(), tp.to_string()); + + let mut origin = None; + let mut sampling_priority = traceparent.sampling_priority; + if let Some(ts) = carrier.get(TRACESTATE_KEY) { + if let Some(tracestate) = Self::extract_tracestate(ts, &mut tags) { + if let Some(lpid) = tracestate.lower_order_trace_id { + tags.insert(DATADOG_LAST_PARENT_ID_KEY.to_string(), lpid); + } + + origin = tracestate.origin; + + sampling_priority = Self::define_sampling_priority( + traceparent.sampling_priority, + tracestate.sampling_priority, + ); + } + } else { + debug!("No `dd` value found in tracestate"); + } + + let (trace_id_higher_order_bits, trace_id_lower_order_bits) = + Self::split_trace_id(traceparent.trace_id); + tags.insert( + DATADOG_HIGHER_ORDER_TRACE_ID_BITS_KEY.to_string(), + trace_id_higher_order_bits.to_string(), + ); + + Some(SpanContext { + trace_id: trace_id_lower_order_bits, + span_id: traceparent.span_id, + sampling: Some(Sampling { + priority: Some(sampling_priority), + mechanism: None, + }), + origin, + tags, + links: Vec::new(), + }) + } + Err(e) => { + error!("Failed to extract traceparent: {e}"); + None + } + } + } + + fn extract_tracestate( + tracestate: &str, + tags: &mut HashMap, + ) -> Option { + let ts_v = tracestate.split(',').map(str::trim); + let ts = ts_v.clone().collect::>().join(","); + + if INVALID_ASCII_CHARACTERS_REGEX.is_match(&ts) { + debug!("Received invalid tracestate header {tracestate}"); + return None; + } + + tags.insert(TRACESTATE_KEY.to_string(), ts.to_string()); + + let mut dd: Option> = None; + for v in ts_v.clone() { + if let Some(stripped) = v.strip_prefix("dd=") { + dd = Some( + stripped + .split(';') + .filter_map(|item| { + let mut parts = item.splitn(2, ':'); + Some((parts.next()?.to_string(), parts.next()?.to_string())) + }) + .collect(), + ); + } + } + + if let Some(dd) = dd { + let mut tracestate = Tracestate { + sampling_priority: None, + origin: None, + lower_order_trace_id: None, + }; + + if let Some(ts_sp) = dd.get("s") { + if let Ok(p_sp) = ts_sp.parse::() { + tracestate.sampling_priority = Some(p_sp); + } + } + + if let Some(o) = dd.get("o") { + tracestate.origin = Some(Self::decode_tag_value(o)); + } + + if let Some(lo_tid) = dd.get("p") { + tracestate.lower_order_trace_id = Some(lo_tid.to_string()); + } + + // Convert from `t.` to `_dd.p.` + for (k, v) in &dd { + if let Some(stripped) = k.strip_prefix("t.") { + let nk = format!("_dd.p.{stripped}"); + tags.insert(nk, Self::decode_tag_value(v)); + } + } + + return Some(tracestate); + } + + None + } + + fn decode_tag_value(value: &str) -> String { + value.replace('~', "=") + } + + fn define_sampling_priority( + traceparent_sampling_priority: i8, + tracestate_sampling_priority: Option, + ) -> i8 { + if let Some(ts_sp) = tracestate_sampling_priority { + if (traceparent_sampling_priority == 1 && ts_sp > 0) + || (traceparent_sampling_priority == 0 && ts_sp < 0) + { + return ts_sp; + } + } + + traceparent_sampling_priority + } + + fn extract_traceparent(traceparent: &str) -> Result { + let captures = TRACEPARENT_REGEX + .captures(traceparent) + .ok_or_else(|| Error::extract("invalid traceparent", "traceparent"))?; + + let version = &captures[1]; + let trace_id = &captures[2]; + let span_id = &captures[3]; + let flags = &captures[4]; + let tail = captures.get(5).map_or("", |m| m.as_str()); + + Self::extract_version(version, tail)?; + + let trace_id = Self::extract_trace_id(trace_id)?; + let span_id = Self::extract_span_id(span_id)?; + + let trace_flags = Self::extract_trace_flags(flags)?; + let sampling_priority = i8::from(trace_flags & 0x1 != 0); + + Ok(Traceparent { + sampling_priority, + trace_id, + span_id, + }) + } + + fn extract_version(version: &str, tail: &str) -> Result<(), Error> { + match version { + "ff" => { + return Err(Error::extract( + "`ff` is an invalid traceparent version", + "traceparent", + )) + } + "00" => { + if !tail.is_empty() { + return Err(Error::extract("Traceparent with version `00` should contain only 4 values delimited by `-`", "traceparent")); + } + } + _ => { + warn!("Unsupported traceparent version {version}, still atempenting to parse"); + } + } + + Ok(()) + } + + fn extract_trace_id(trace_id: &str) -> Result { + if INVALID_SEGMENT_REGEX.is_match(trace_id) { + return Err(Error::extract( + "`0` value for trace_id is invalid", + "traceparent", + )); + } + + u128::from_str_radix(trace_id, 16) + .map_err(|_| Error::extract("Failed to decode trace_id", "traceparent")) + } + + #[allow(clippy::cast_possible_truncation)] + fn split_trace_id(trace_id: u128) -> (u64, u64) { + let trace_id_lower_order_bits = trace_id as u64; + let trace_id_higher_order_bits = (trace_id >> 64) as u64; + + (trace_id_higher_order_bits, trace_id_lower_order_bits) + } + + fn extract_span_id(span_id: &str) -> Result { + if INVALID_SEGMENT_REGEX.is_match(span_id) { + return Err(Error::extract( + "`0` value for span_id is invalid", + "traceparent", + )); + } + + u64::from_str_radix(span_id, 16) + .map_err(|_| Error::extract("Failed to decode span_id", "traceparent")) + } + + fn extract_trace_flags(flags: &str) -> Result { + if flags.len() != 2 { + return Err(Error::extract("Invalid trace flags length", "traceparent")); + } + + u8::from_str_radix(flags, 16) + .map_err(|_| Error::extract("Failed to decode trace_flags", "traceparent")) + } +} + +#[cfg(test)] +#[allow(clippy::unwrap_used)] +mod test { + use super::*; + + #[test] + fn test_extract_datadog_propagator() { + let headers = HashMap::from([ + ("x-datadog-trace-id".to_string(), "1234".to_string()), + ("x-datadog-parent-id".to_string(), "5678".to_string()), + ("x-datadog-sampling-priority".to_string(), "1".to_string()), + ("x-datadog-origin".to_string(), "synthetics".to_string()), + ( + "x-datadog-tags".to_string(), + "_dd.p.test=value,_dd.p.tid=4321,any=tag".to_string(), + ), + ]); + + let propagator = DatadogHeaderPropagator; + + let context = propagator + .extract(&headers) + .expect("couldn't extract trace context"); + + assert_eq!(context.trace_id, 1234); + assert_eq!(context.span_id, 5678); + assert_eq!(context.sampling.unwrap().priority, Some(1)); + assert_eq!(context.origin, Some("synthetics".to_string())); + println!("{:?}", context.tags); + assert_eq!(context.tags.get("_dd.p.test").unwrap(), "value"); + assert_eq!(context.tags.get("_dd.p.tid").unwrap(), "4321"); + assert_eq!(context.tags.get("_dd.p.dm").unwrap(), "-3"); + } + + #[test] + fn test_extract_traceparent_propagator() { + let headers = HashMap::from([ + ( + "traceparent".to_string(), + "00-80f198ee56343ba864fe8b2a57d3eff7-00f067aa0ba902b7-01".to_string(), + ), + ( + "tracestate".to_string(), + "dd=p:00f067aa0ba902b7;s:2;o:rum".to_string(), + ), + ]); + + let propagator = TraceContextPropagator; + let context = propagator + .extract(&headers) + .expect("couldn't extract trace context"); + + assert_eq!(context.trace_id, 7_277_407_061_855_694_839); + assert_eq!(context.span_id, 67_667_974_448_284_343); + assert_eq!(context.sampling.unwrap().priority, Some(2)); + assert_eq!(context.origin, Some("rum".to_string())); + assert_eq!( + context.tags.get("traceparent").unwrap(), + "00-80f198ee56343ba864fe8b2a57d3eff7-00f067aa0ba902b7-01" + ); + assert_eq!( + context.tags.get("tracestate").unwrap(), + "dd=p:00f067aa0ba902b7;s:2;o:rum" + ); + assert_eq!( + context.tags.get("_dd.p.tid").unwrap(), + "9291375655657946024" + ); + assert_eq!( + context.tags.get("_dd.parent_id").unwrap(), + "00f067aa0ba902b7" + ); + } +} diff --git a/bottlecap/src/traces/trace_agent.rs b/bottlecap/src/traces/trace_agent.rs index 13ad6a321..c617def85 100644 --- a/bottlecap/src/traces/trace_agent.rs +++ b/bottlecap/src/traces/trace_agent.rs @@ -34,6 +34,7 @@ pub struct TraceAgent { pub stats_processor: Arc, pub stats_flusher: Arc, pub tags_provider: Arc, + tx: Sender, } #[derive(Clone, Copy)] @@ -43,9 +44,15 @@ pub enum ApiVersion { } impl TraceAgent { - pub async fn start_trace_agent(&self) -> Result<(), Box> { - let now = Instant::now(); - + #[must_use] + pub async fn new( + config: Arc, + trace_processor: Arc, + trace_flusher: Arc, + stats_processor: Arc, + stats_flusher: Arc, + tags_provider: Arc, + ) -> TraceAgent { // setup a channel to send processed traces to our flusher. tx is passed through each // endpoint_handler to the trace processor, which uses it to send de-serialized // processed trace payloads to our trace flusher. @@ -54,9 +61,24 @@ impl TraceAgent { // start our trace flusher. receives trace payloads and handles buffering + deciding when to // flush to backend. - let trace_flusher = self.trace_flusher.clone(); + let trace_flusher = trace_flusher.clone(); trace_flusher.start_trace_flusher(trace_rx).await; + TraceAgent { + config, + trace_processor, + trace_flusher, + stats_processor, + stats_flusher, + tags_provider, + tx: trace_tx, + } + } + + pub async fn start(&self) -> Result<(), Box> { + let now = Instant::now(); + let trace_tx = self.tx.clone(); + // channels to send processed stats to our stats flusher. let (stats_tx, stats_rx): ( Sender, @@ -197,7 +219,6 @@ impl TraceAgent { tags_provider: Arc, version: ApiVersion, ) -> http::Result> { - debug!("Received traces to process"); let (parts, body) = req.into_parts(); if let Some(response) = http_utils::verify_request_content_length( @@ -267,4 +288,9 @@ impl TraceAgent { .status(200) .body(Body::from(response_json.to_string())) } + + #[must_use] + pub fn get_sender_copy(&self) -> Sender { + self.tx.clone() + } } diff --git a/bottlecap/src/traces/trace_processor.rs b/bottlecap/src/traces/trace_processor.rs index 90f41d8cd..f060dad43 100644 --- a/bottlecap/src/traces/trace_processor.rs +++ b/bottlecap/src/traces/trace_processor.rs @@ -11,10 +11,14 @@ use ddcommon::Endpoint; use std::str::FromStr; use std::sync::Arc; -use tracing::debug; - use crate::config; +use crate::traces::{ + AWS_XRAY_DAEMON_ADDRESS_URL_PREFIX, DNS_LOCAL_HOST_ADDRESS_URL_PREFIX, + DNS_NON_ROUTABLE_ADDRESS_URL_PREFIX, INVOCATION_SPAN_RESOURCE, LAMBDA_EXTENSION_URL_PREFIX, + LAMBDA_RUNTIME_URL_PREFIX, LAMBDA_STATSD_URL_PREFIX, +}; use datadog_trace_obfuscation::obfuscate::obfuscate_span; +use datadog_trace_protobuf::pb::Span; use datadog_trace_utils::trace_utils::SendData; use datadog_trace_utils::trace_utils::{self}; @@ -31,11 +35,10 @@ struct ChunkProcessor { } impl TraceChunkProcessor for ChunkProcessor { - fn process(&mut self, chunk: &mut datadog_trace_protobuf::pb::TraceChunk, _index: usize) { - chunk.spans.retain(|span| { - (span.resource != "127.0.0.1" || span.resource != "0.0.0.0") - && span.name != "dns.lookup" - }); + fn process(&mut self, chunk: &mut pb::TraceChunk, _index: usize) { + chunk + .spans + .retain(|span| !filter_span_from_lambda_library_or_runtime(span)); for span in &mut chunk.spans { self.tags_provider.get_tags_map().iter().for_each(|(k, v)| { span.meta.insert(k.clone(), v.clone()); @@ -49,6 +52,53 @@ impl TraceChunkProcessor for ChunkProcessor { } } +fn filter_span_from_lambda_library_or_runtime(span: &Span) -> bool { + if let Some(url) = span.meta.get("http.url") { + if url.starts_with(LAMBDA_RUNTIME_URL_PREFIX) + || url.starts_with(LAMBDA_EXTENSION_URL_PREFIX) + || url.starts_with(LAMBDA_STATSD_URL_PREFIX) + { + return true; + } + } + + if let (Some(tcp_host), Some(tcp_port)) = ( + span.meta.get("tcp.remote.host"), + span.meta.get("tcp.remote.port"), + ) { + { + let tcp_lambda_url_prefix = format!("http://{tcp_host}:{tcp_port}"); + if tcp_lambda_url_prefix.starts_with(LAMBDA_RUNTIME_URL_PREFIX) + || tcp_lambda_url_prefix.starts_with(LAMBDA_EXTENSION_URL_PREFIX) + || tcp_lambda_url_prefix.starts_with(LAMBDA_STATSD_URL_PREFIX) + { + return true; + } + } + } + + if let Some(dns_address) = span.meta.get("dns.address") { + if dns_address.starts_with(DNS_NON_ROUTABLE_ADDRESS_URL_PREFIX) + || dns_address.starts_with(DNS_LOCAL_HOST_ADDRESS_URL_PREFIX) + || dns_address.starts_with(AWS_XRAY_DAEMON_ADDRESS_URL_PREFIX) + { + return true; + } + } + if span.resource == INVOCATION_SPAN_RESOURCE { + return true; + } + + if span.name == "dns.lookup" + || span.resource == DNS_LOCAL_HOST_ADDRESS_URL_PREFIX + || span.resource == DNS_NON_ROUTABLE_ADDRESS_URL_PREFIX + { + return true; + } + + false +} + #[allow(clippy::module_name_repetitions)] pub trait TraceProcessor { fn process_traces( @@ -70,7 +120,6 @@ impl TraceProcessor for ServerlessTraceProcessor { traces: Vec>, body_size: usize, ) -> SendData { - debug!("Received traces to process"); let payload = trace_utils::collect_trace_chunks( V07(traces), &header_tags, diff --git a/bottlecap/tests/payloads/api_gateway_http_event.json b/bottlecap/tests/payloads/api_gateway_http_event.json new file mode 100644 index 000000000..061a02522 --- /dev/null +++ b/bottlecap/tests/payloads/api_gateway_http_event.json @@ -0,0 +1,38 @@ +{ + "version": "2.0", + "routeKey": "GET /httpapi/get", + "rawPath": "/httpapi/get", + "rawQueryString": "", + "headers": { + "accept": "*/*", + "content-length": "0", + "host": "x02yirxc7a.execute-api.sa-east-1.amazonaws.com", + "user-agent": "curl/7.64.1", + "x-amzn-trace-id": "Root=1-613a52fb-4c43cfc95e0241c1471bfa05", + "x-forwarded-for": "38.122.226.210", + "x-forwarded-port": "443", + "x-forwarded-proto": "https", + "x-datadog-trace-id": "12345", + "x-datadog-parent-id": "67890", + "x-datadog-sampling-priority": "2" + }, + "requestContext": { + "accountId": "425362996713", + "apiId": "x02yirxc7a", + "domainName": "x02yirxc7a.execute-api.sa-east-1.amazonaws.com", + "domainPrefix": "x02yirxc7a", + "http": { + "method": "GET", + "path": "/httpapi/get", + "protocol": "HTTP/1.1", + "sourceIp": "38.122.226.210", + "userAgent": "curl/7.64.1" + }, + "requestId": "FaHnXjKCGjQEJ7A=", + "routeKey": "GET /httpapi/get", + "stage": "$default", + "time": "09/Sep/2021:18:31:23 +0000", + "timeEpoch": 1631212283738 + }, + "isBase64Encoded": false +} diff --git a/bottlecap/tests/payloads/api_gateway_http_event_parameterized.json b/bottlecap/tests/payloads/api_gateway_http_event_parameterized.json new file mode 100644 index 000000000..89ff72b9c --- /dev/null +++ b/bottlecap/tests/payloads/api_gateway_http_event_parameterized.json @@ -0,0 +1,38 @@ +{ + "version": "2.0", + "routeKey": "GET /user/{id}", + "rawPath": "/user/42", + "rawQueryString": "", + "headers": { + "accept": "*/*", + "content-length": "0", + "host": "9vj54we5ih.execute-api.sa-east-1.amazonaws.com", + "user-agent": "curl/8.1.2", + "x-amzn-trace-id": "Root=1-65f49d71-505edb3b69b8abd513cfa08b", + "x-forwarded-for": "76.115.124.192", + "x-forwarded-port": "443", + "x-forwarded-proto": "https" + }, + "requestContext": { + "accountId": "425362996713", + "apiId": "9vj54we5ih", + "domainName": "9vj54we5ih.execute-api.sa-east-1.amazonaws.com", + "domainPrefix": "9vj54we5ih", + "http": { + "method": "GET", + "path": "/user/42", + "protocol": "HTTP/1.1", + "sourceIp": "76.115.124.192", + "userAgent": "curl/8.1.2" + }, + "requestId": "Ur2JtjEfGjQEPOg=", + "routeKey": "GET /user/{id}", + "stage": "$default", + "time": "15/Mar/2024:19:11:45 +0000", + "timeEpoch": 1710529905066 + }, + "pathParameters": { + "id": "42" + }, + "isBase64Encoded": false +} diff --git a/bottlecap/tests/payloads/api_gateway_proxy_event.json b/bottlecap/tests/payloads/api_gateway_proxy_event.json new file mode 100644 index 000000000..de1155eb1 --- /dev/null +++ b/bottlecap/tests/payloads/api_gateway_proxy_event.json @@ -0,0 +1,127 @@ +{ + "body": "eyJ0ZXN0IjoiYm9keSJ9", + "resource": "/{proxy+}", + "path": "/path/to/resource", + "httpMethod": "POST", + "isBase64Encoded": true, + "queryStringParameters": { + "foo": "bar" + }, + "multiValueQueryStringParameters": { + "foo": [ + "bar" + ] + }, + "pathParameters": { + "proxy": "/path/to/resource" + }, + "stageVariables": { + "baz": "qux" + }, + "headers": { + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", + "Accept-Encoding": "gzip, deflate, sdch", + "Accept-Language": "en-US,en;q=0.8", + "Cache-Control": "max-age=0", + "CloudFront-Forwarded-Proto": "https", + "CloudFront-Is-Desktop-Viewer": "true", + "CloudFront-Is-Mobile-Viewer": "false", + "CloudFront-Is-SmartTV-Viewer": "false", + "CloudFront-Is-Tablet-Viewer": "false", + "CloudFront-Viewer-Country": "US", + "Host": "1234567890.execute-api.us-east-1.amazonaws.com", + "Upgrade-Insecure-Requests": "1", + "User-Agent": "Custom User Agent String", + "Via": "1.1 08f323deadbeefa7af34d5feb414ce27.cloudfront.net (CloudFront)", + "X-Amz-Cf-Id": "cDehVQoZnx43VYQb9j2-nvCh-9z396Uhbp027Y2JvkCPNLmGJHqlaA==", + "X-Forwarded-For": "127.0.0.1, 127.0.0.2", + "X-Forwarded-Port": "443", + "X-Forwarded-Proto": "https", + "X-Datadog-Trace-Id": "12345", + "X-Datadog-Parent-Id": "67890", + "x-datadog-sampling-priority": "2" + }, + "multiValueHeaders": { + "Accept": [ + "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" + ], + "Accept-Encoding": [ + "gzip, deflate, sdch" + ], + "Accept-Language": [ + "en-US,en;q=0.8" + ], + "Cache-Control": [ + "max-age=0" + ], + "CloudFront-Forwarded-Proto": [ + "https" + ], + "CloudFront-Is-Desktop-Viewer": [ + "true" + ], + "CloudFront-Is-Mobile-Viewer": [ + "false" + ], + "CloudFront-Is-SmartTV-Viewer": [ + "false" + ], + "CloudFront-Is-Tablet-Viewer": [ + "false" + ], + "CloudFront-Viewer-Country": [ + "US" + ], + "Host": [ + "0123456789.execute-api.us-east-1.amazonaws.com" + ], + "Upgrade-Insecure-Requests": [ + "1" + ], + "User-Agent": [ + "Custom User Agent String" + ], + "Via": [ + "1.1 08f323deadbeefa7af34d5feb414ce27.cloudfront.net (CloudFront)" + ], + "X-Amz-Cf-Id": [ + "cDehVQoZnx43VYQb9j2-nvCh-9z396Uhbp027Y2JvkCPNLmGJHqlaA==" + ], + "X-Forwarded-For": [ + "127.0.0.1, 127.0.0.2" + ], + "X-Forwarded-Port": [ + "443" + ], + "X-Forwarded-Proto": [ + "https" + ] + }, + "requestContext": { + "accountId": "123456789012", + "resourceId": "123456", + "stage": "prod", + "requestId": "c6af9ac6-7b61-11e6-9a41-93e8deadbeef", + "requestTime": "09/Apr/2015:12:34:56 +0000", + "requestTimeEpoch": 1428582896000, + "identity": { + "cognitoIdentityPoolId": null, + "accountId": null, + "cognitoIdentityId": null, + "caller": null, + "accessKey": null, + "sourceIp": "127.0.0.1", + "cognitoAuthenticationType": null, + "cognitoAuthenticationProvider": null, + "userArn": null, + "userAgent": "Custom User Agent String", + "user": null + }, + "domainName": "70ixmpl4fl.execute-api.us-east-2.amazonaws.com", + "path": "/prod/path/to/resource", + "resourcePath": "/{proxy+}", + "httpMethod": "POST", + "apiId": "1234567890", + "protocol": "HTTP/1.1" + } +} diff --git a/bottlecap/tests/payloads/api_gateway_rest_event.json b/bottlecap/tests/payloads/api_gateway_rest_event.json new file mode 100644 index 000000000..df9c5bb88 --- /dev/null +++ b/bottlecap/tests/payloads/api_gateway_rest_event.json @@ -0,0 +1,80 @@ +{ + "version": "1.0", + "resource": "/my/path", + "path": "/my/path", + "httpMethod": "GET", + "headers": { + "Header1": "value1", + "Header2": "value2" + }, + "multiValueHeaders": { + "Header1": [ + "value1" + ], + "Header2": [ + "value1", + "value2" + ] + }, + "queryStringParameters": { + "parameter1": "value1", + "parameter2": "value" + }, + "multiValueQueryStringParameters": { + "parameter1": [ + "value1", + "value2" + ], + "parameter2": [ + "value" + ] + }, + "requestContext": { + "accountId": "123456789012", + "apiId": "id", + "authorizer": { + "claims": null, + "scopes": null + }, + "domainName": "id.execute-api.us-east-1.amazonaws.com", + "domainPrefix": "id", + "extendedRequestId": "request-id", + "httpMethod": "GET", + "identity": { + "accessKey": null, + "accountId": null, + "caller": null, + "cognitoAuthenticationProvider": null, + "cognitoAuthenticationType": null, + "cognitoIdentityId": null, + "cognitoIdentityPoolId": null, + "principalOrgId": null, + "sourceIp": "IP", + "user": null, + "userAgent": "user-agent", + "userArn": null, + "clientCert": { + "clientCertPem": "CERT_CONTENT", + "subjectDN": "www.example.com", + "issuerDN": "Example issuer", + "serialNumber": "a1:a1:a1:a1:a1:a1:a1:a1:a1:a1:a1:a1:a1:a1:a1:a1", + "validity": { + "notBefore": "May 28 12:30:02 2019 GMT", + "notAfter": "Aug 5 09:36:04 2021 GMT" + } + } + }, + "path": "/my/path", + "protocol": "HTTP/1.1", + "requestId": "id=", + "requestTime": "04/Mar/2020:19:15:17 +0000", + "requestTimeEpoch": 1583349317135, + "resourceId": null, + "resourcePath": "/path", + "stage": "$default" + }, + "pathParameters": null, + "stageVariables": null, + "body": "Hello from Lambda!", + "isBase64Encoded": false +} diff --git a/bottlecap/tests/payloads/api_gateway_rest_event_parameterized.json b/bottlecap/tests/payloads/api_gateway_rest_event_parameterized.json new file mode 100644 index 000000000..65527ccb6 --- /dev/null +++ b/bottlecap/tests/payloads/api_gateway_rest_event_parameterized.json @@ -0,0 +1,111 @@ +{ + "resource": "/user/{id}", + "path": "/user/42", + "httpMethod": "GET", + "headers": { + "Accept": "*/*", + "CloudFront-Forwarded-Proto": "https", + "CloudFront-Is-Desktop-Viewer": "true", + "CloudFront-Is-Mobile-Viewer": "false", + "CloudFront-Is-SmartTV-Viewer": "false", + "CloudFront-Is-Tablet-Viewer": "false", + "CloudFront-Viewer-ASN": "7922", + "CloudFront-Viewer-Country": "US", + "Host": "mcwkra0ya4.execute-api.sa-east-1.amazonaws.com", + "User-Agent": "curl/8.1.2", + "Via": "2.0 xxx.cloudfront.net (CloudFront)", + "X-Amz-Cf-Id": "Tz3yUVcJkwOhQGqZgKTzrEHqAoOd8ZprYAHpg2S6BNxdd-Ym79pb6g==", + "X-Amzn-Trace-Id": "Root=1-65f49d20-7ba106216238dd0078a5db31", + "X-Forwarded-For": "76.115.124.192, 15.158.54.119", + "X-Forwarded-Port": "443", + "X-Forwarded-Proto": "https" + }, + "multiValueHeaders": { + "Accept": [ + "*/*" + ], + "CloudFront-Forwarded-Proto": [ + "https" + ], + "CloudFront-Is-Desktop-Viewer": [ + "true" + ], + "CloudFront-Is-Mobile-Viewer": [ + "false" + ], + "CloudFront-Is-SmartTV-Viewer": [ + "false" + ], + "CloudFront-Is-Tablet-Viewer": [ + "false" + ], + "CloudFront-Viewer-ASN": [ + "7922" + ], + "CloudFront-Viewer-Country": [ + "US" + ], + "Host": [ + "mcwkra0ya4.execute-api.sa-east-1.amazonaws.com" + ], + "User-Agent": [ + "curl/8.1.2" + ], + "Via": [ + "2.0 xxx.cloudfront.net (CloudFront)" + ], + "X-Amz-Cf-Id": [ + "Tz3yUVcJkwOhQGqZgKTzrEHqAoOd8ZprYAHpg2S6BNxdd-Ym79pb6g==" + ], + "X-Amzn-Trace-Id": [ + "Root=1-65f49d20-7ba106216238dd0078a5db31" + ], + "X-Forwarded-For": [ + "76.115.124.192, 15.158.54.119" + ], + "X-Forwarded-Port": [ + "443" + ], + "X-Forwarded-Proto": [ + "https" + ] + }, + "queryStringParameters": null, + "multiValueQueryStringParameters": null, + "pathParameters": { + "id": "42" + }, + "stageVariables": null, + "requestContext": { + "resourceId": "ojg3nk", + "resourcePath": "/user/{id}", + "httpMethod": "GET", + "extendedRequestId": "Ur19IHYDmjQEU5A=", + "requestTime": "15/Mar/2024:19:10:24 +0000", + "path": "/dev/user/42", + "accountId": "425362996713", + "protocol": "HTTP/1.1", + "stage": "dev", + "domainPrefix": "mcwkra0ya4", + "requestTimeEpoch": 1710529824520, + "requestId": "e16399f7-e984-463a-9931-745ba021a27f", + "identity": { + "cognitoIdentityPoolId": null, + "accountId": null, + "cognitoIdentityId": null, + "caller": null, + "sourceIp": "76.115.124.192", + "principalOrgId": null, + "accessKey": null, + "cognitoAuthenticationType": null, + "cognitoAuthenticationProvider": null, + "userArn": null, + "userAgent": "curl/8.1.2", + "user": null + }, + "domainName": "mcwkra0ya4.execute-api.sa-east-1.amazonaws.com", + "apiId": "mcwkra0ya4" + }, + "body": null, + "isBase64Encoded": false +} diff --git a/bottlecap/tests/payloads/dynamodb_event.json b/bottlecap/tests/payloads/dynamodb_event.json new file mode 100644 index 000000000..df0cf7ea4 --- /dev/null +++ b/bottlecap/tests/payloads/dynamodb_event.json @@ -0,0 +1,93 @@ +{ + "Records": [ + { + "eventID": "c4ca4238a0b923820dcc509a6f75849b", + "eventName": "INSERT", + "eventVersion": "1.1", + "eventSource": "aws:dynamodb", + "awsRegion": "us-east-1", + "dynamodb": { + "Keys": { + "Id": { + "N": "101" + } + }, + "NewImage": { + "Message": { + "S": "New item!" + }, + "Id": { + "N": "101" + } + }, + "ApproximateCreationDateTime": 1428537600, + "SequenceNumber": "4421584500000000017450439091", + "SizeBytes": 26, + "StreamViewType": "NEW_AND_OLD_IMAGES" + }, + "eventSourceARN": "arn:aws:dynamodb:us-east-1:123456789012:table/ExampleTableWithStream/stream/2015-06-27T00:48:05.899" + }, + { + "eventID": "c81e728d9d4c2f636f067f89cc14862c", + "eventName": "MODIFY", + "eventVersion": "1.1", + "eventSource": "aws:dynamodb", + "awsRegion": "us-east-1", + "dynamodb": { + "Keys": { + "Id": { + "N": "101" + } + }, + "NewImage": { + "Message": { + "S": "This item has changed" + }, + "Id": { + "N": "101" + } + }, + "OldImage": { + "Message": { + "S": "New item!" + }, + "Id": { + "N": "101" + } + }, + "ApproximateCreationDateTime": 1428537600, + "SequenceNumber": "4421584500000000017450439092", + "SizeBytes": 59, + "StreamViewType": "NEW_AND_OLD_IMAGES" + }, + "eventSourceARN": "arn:aws:dynamodb:us-east-1:123456789012:table/ExampleTableWithStream/stream/2015-06-27T00:48:05.899" + }, + { + "eventID": "eccbc87e4b5ce2fe28308fd9f2a7baf3", + "eventName": "REMOVE", + "eventVersion": "1.1", + "eventSource": "aws:dynamodb", + "awsRegion": "us-east-1", + "dynamodb": { + "Keys": { + "Id": { + "N": "101" + } + }, + "OldImage": { + "Message": { + "S": "This item has changed" + }, + "Id": { + "N": "101" + } + }, + "ApproximateCreationDateTime": 1428537600, + "SequenceNumber": "4421584500000000017450439093", + "SizeBytes": 38, + "StreamViewType": "NEW_AND_OLD_IMAGES" + }, + "eventSourceARN": "arn:aws:dynamodb:us-east-1:123456789012:table/ExampleTableWithStream/stream/2015-06-27T00:48:05.899" + } + ] +} diff --git a/bottlecap/tests/payloads/eventbridge_event.json b/bottlecap/tests/payloads/eventbridge_event.json new file mode 100644 index 000000000..8c9d91d9e --- /dev/null +++ b/bottlecap/tests/payloads/eventbridge_event.json @@ -0,0 +1,21 @@ +{ + "version": "0", + "id": "bd3c8258-8d30-007c-2562-64715b2d0ea8", + "detail-type": "UserSignUp", + "source": "my.event", + "account": "601427279990", + "time": "2024-11-09T08:22:15Z", + "region": "eu-west-1", + "resources": [], + "detail": { + "hello": "there", + "_datadog": { + "x-datadog-trace-id": "5827606813695714842", + "x-datadog-parent-id": "4726693487091824375", + "x-datadog-sampled": "1", + "x-datadog-sampling-priority": "1", + "x-datadog-resource-name": "testBus", + "x-datadog-start-time": "1731183820135" + } + } +} diff --git a/bottlecap/tests/payloads/eventbridge_no_resource_name_event.json b/bottlecap/tests/payloads/eventbridge_no_resource_name_event.json new file mode 100644 index 000000000..778e40628 --- /dev/null +++ b/bottlecap/tests/payloads/eventbridge_no_resource_name_event.json @@ -0,0 +1,19 @@ +{ + "version": "0", + "id": "bd3c8258-8d30-007c-2562-64715b2d0ea8", + "detail-type": "UserSignUp", + "source": "my.event", + "account": "601427279990", + "time": "2024-11-09T08:22:15Z", + "region": "eu-west-1", + "resources": [], + "detail": { + "hello": "there", + "_datadog": { + "x-datadog-trace-id": "5827606813695714842", + "x-datadog-parent-id": "4726693487091824375", + "x-datadog-sampling-priority": "1", + "x-datadog-start-time": "1731183820135" + } + } +} diff --git a/bottlecap/tests/payloads/eventbridge_no_timestamp_event.json b/bottlecap/tests/payloads/eventbridge_no_timestamp_event.json new file mode 100644 index 000000000..d5e8d9c6c --- /dev/null +++ b/bottlecap/tests/payloads/eventbridge_no_timestamp_event.json @@ -0,0 +1,19 @@ +{ + "version": "0", + "id": "bd3c8258-8d30-007c-2562-64715b2d0ea8", + "detail-type": "UserSignUp", + "source": "my.event", + "account": "601427279990", + "time": "2024-11-09T08:22:15Z", + "region": "eu-west-1", + "resources": [], + "detail": { + "hello": "there", + "_datadog": { + "x-datadog-trace-id": "5827606813695714842", + "x-datadog-parent-id": "4726693487091824375", + "x-datadog-sampling-priority": "1", + "x-datadog-resource-name": "testBus" + } + } +} diff --git a/bottlecap/tests/payloads/eventbridge_sns_event.json b/bottlecap/tests/payloads/eventbridge_sns_event.json new file mode 100644 index 000000000..176c86021 --- /dev/null +++ b/bottlecap/tests/payloads/eventbridge_sns_event.json @@ -0,0 +1,17 @@ +{ + "Records":[ + { + "Sns":{ + "MessageId":"12345678-90abc-def-1234-567890abcdef", + "Type":"Notification", + "TopicArn":"arn:aws:sns:us-east-1:123456789012:test-notifier", + "MessageAttributes":{ + + }, + "Timestamp":"2024-09-16T19:44:01.713Z", + "Subject":"", + "Message":"{\"version\":\"0\",\"id\":\"12345678-90abc-def-1234-567890abcdef\",\"detail-type\":\"TestDetail\",\"source\":\"com.test.source\",\"account\":\"12345667890\",\"time\":\"2024-09-16T19:44:01Z\",\"region\":\"us-east-1\",\"resources\":[],\"detail\":{\"foo\":\"bar\",\"_datadog\":{\"x-datadog-trace-id\":\"12345\",\"x-datadog-parent-id\":\"67890\",\"x-datadog-sampling-priority\":\"1\",\"x-datadog-start-time\":\"1726515840997\",\"x-datadog-resource-name\":\"test-bus\",\"x-datadog-tags\":\"_dd.p.dm=-1,_dd.p.tid=123567890\"}}}" + } + } + ] +} diff --git a/bottlecap/tests/payloads/eventbridge_span.json b/bottlecap/tests/payloads/eventbridge_span.json new file mode 100644 index 000000000..0515abd69 --- /dev/null +++ b/bottlecap/tests/payloads/eventbridge_span.json @@ -0,0 +1,16 @@ +{ + "service": "eventbridge", + "name": "aws.eventbridge", + "resource": "testBus", + "trace_id": 0, + "span_id": 0, + "parent_id": 0, + "start": 1731183820135000064, + "duration": 0, + "meta": { + "operation_name": "aws.eventbridge", + "detail_type": "UserSignUp" + }, + "metrics": {}, + "type": "web" +} \ No newline at end of file diff --git a/bottlecap/tests/payloads/eventbridge_sqs_event.json b/bottlecap/tests/payloads/eventbridge_sqs_event.json new file mode 100644 index 000000000..b3a392a50 --- /dev/null +++ b/bottlecap/tests/payloads/eventbridge_sqs_event.json @@ -0,0 +1,20 @@ +{ + "Records": [ + { + "messageId": "e995e54f-1724-41fa-82c0-8b81821f854e", + "receiptHandle": "AQEB4mIfRcyqtzn1X5Ss+ConhTejVGc+qnAcmu3/Z9ZvbNkaPcpuDLX/bzvPD/ZkAXJUXZcemGSJmd7L3snZHKMP2Ck8runZiyl4mubiLb444pZvdiNPuGRJ6a3FvgS/GQPzho/9nNMyOi66m8Viwh70v4EUCPGO4JmD3TTDAUrrcAnqU4WSObjfC/NAp9bI6wH2CEyAYEfex6Nxplbl/jBf9ZUG0I3m3vQd0Q4l4gd4jIR4oxQUglU2Tldl4Kx5fMUAhTRLAENri6HsY81avBkKd9FAuxONlsITB5uj02kOkvLlRGEcalqsKyPJ7AFaDLrOLaL3U+yReroPEJ5R5nwhLOEbeN5HROlZRXeaAwZOIN8BjqdeooYTIOrtvMEVb7a6OPLMdH1XB+ddevtKAH8K9Tm2ZjpaA7dtBGh1zFVHzBk=", + "body": "{\"version\":\"0\",\"id\":\"af718b2a-b987-e8c0-7a2b-a188fad2661a\",\"detail-type\":\"my.Detail\",\"source\":\"my.Source\",\"account\":\"425362996713\",\"time\":\"2023-08-03T22:49:03Z\",\"region\":\"us-east-1\",\"resources\":[],\"detail\":{\"text\":\"Hello, world!\",\"_datadog\":{\"x-datadog-trace-id\":\"7379586022458917877\",\"x-datadog-parent-id\":\"2644033662113726488\",\"x-datadog-sampling-priority\":\"1\",\"x-datadog-tags\":\"_dd.p.dm=-0\",\"traceparent\":\"00-000000000000000066698e63821a03f5-24b17e9b6476c018-01\",\"tracestate\":\"dd=t.dm:-0;s:1\"}}}", + "attributes": { + "ApproximateReceiveCount": "1", + "SentTimestamp": "1691102943638", + "SenderId": "AIDAJXNJGGKNS7OSV23OI", + "ApproximateFirstReceiveTimestamp": "1691102943647" + }, + "messageAttributes": {}, + "md5OfBody": "93d9f0cd8886d1e000a1a0b7007bffc4", + "eventSource": "aws:sqs", + "eventSourceARN": "arn:aws:sqs:us-east-1:425362996713:lambda-eb-sqs-lambda-dev-demo-queue", + "awsRegion": "us-east-1" + } + ] +} diff --git a/bottlecap/tests/payloads/eventbridge_sqs_java_header_event.json b/bottlecap/tests/payloads/eventbridge_sqs_java_header_event.json new file mode 100644 index 000000000..033740244 --- /dev/null +++ b/bottlecap/tests/payloads/eventbridge_sqs_java_header_event.json @@ -0,0 +1,21 @@ +{ + "Records": [ + { + "messageId": "e995e54f-1724-41fa-82c0-8b81821f854e", + "receiptHandle": "AQEB4mIfRcyqtzn1X5Ss+ConhTejVGc+qnAcmu3/Z9ZvbNkaPcpuDLX/bzvPD/ZkAXJUXZcemGSJmd7L3snZHKMP2Ck8runZiyl4mubiLb444pZvdiNPuGRJ6a3FvgS/GQPzho/9nNMyOi66m8Viwh70v4EUCPGO4JmD3TTDAUrrcAnqU4WSObjfC/NAp9bI6wH2CEyAYEfex6Nxplbl/jBf9ZUG0I3m3vQd0Q4l4gd4jIR4oxQUglU2Tldl4Kx5fMUAhTRLAENri6HsY81avBkKd9FAuxONlsITB5uj02kOkvLlRGEcalqsKyPJ7AFaDLrOLaL3U+yReroPEJ5R5nwhLOEbeN5HROlZRXeaAwZOIN8BjqdeooYTIOrtvMEVb7a6OPLMdH1XB+ddevtKAH8K9Tm2ZjpaA7dtBGh1zFVHzBk=", + "body": "{\"version\":\"0\",\"id\":\"af718b2a-b987-e8c0-7a2b-a188fad2661a\",\"detail-type\":\"my.Detail\",\"source\":\"my.Source\",\"account\":\"425362996713\",\"time\":\"2023-08-03T22:49:03Z\",\"region\":\"us-east-1\",\"resources\":[],\"detail\":{\"text\":\"Hello, world!\",\"_datadog\":{\"x-datadog-trace-id\":\"7379586022458917877\",\"x-datadog-parent-id\":\"2644033662113726488\",\"x-datadog-sampling-priority\":\"1\",\"x-datadog-tags\":\"_dd.p.dm=-0\",\"traceparent\":\"00-000000000000000066698e63821a03f5-24b17e9b6476c018-01\",\"tracestate\":\"dd=t.dm:-0;s:1\"}}}", + "attributes": { + "ApproximateReceiveCount": "1", + "AWSTraceHeader": "Root=1-64cc2edd-112fbf1701d1355973a11d57;Parent=7d5a9776024b2d42;Sampled=0", + "SentTimestamp": "1691102943638", + "SenderId": "AIDAJXNJGGKNS7OSV23OI", + "ApproximateFirstReceiveTimestamp": "1691102943647" + }, + "messageAttributes": {}, + "md5OfBody": "93d9f0cd8886d1e000a1a0b7007bffc4", + "eventSource": "aws:sqs", + "eventSourceARN": "arn:aws:sqs:us-east-1:425362996713:lambda-eb-sqs-lambda-dev-demo-queue", + "awsRegion": "us-east-1" + } + ] +} diff --git a/bottlecap/tests/payloads/kinesis_event.json b/bottlecap/tests/payloads/kinesis_event.json new file mode 100644 index 000000000..822530822 --- /dev/null +++ b/bottlecap/tests/payloads/kinesis_event.json @@ -0,0 +1,20 @@ +{ + "Records": [ + { + "kinesis": { + "kinesisSchemaVersion": "1.0", + "partitionKey": "partitionkey", + "sequenceNumber": "49624230154685806402418173680709770494154422022871973922", + "data": "eyJmb28iOiAiYmFyIiwgIl9kYXRhZG9nIjogeyJ4LWRhdGFkb2ctdHJhY2UtaWQiOiAiNDk0ODM3NzMxNjM1NzI5MTQyMSIsICJ4LWRhdGFkb2ctcGFyZW50LWlkIjogIjI4NzYyNTMzODAwMTg2ODEwMjYiLCAieC1kYXRhZG9nLXNhbXBsaW5nLXByaW9yaXR5IjogIjEifX0=", + "approximateArrivalTimestamp": 1643638425.163 + }, + "eventSource": "aws:kinesis", + "eventVersion": "1.0", + "eventID": "shardId-000000000002:49624230154685806402418173680709770494154422022871973922", + "eventName": "aws:kinesis:record", + "invokeIdentityArn": "arn:aws:iam::425362996713:role/inferred-spans-python-dev-sa-east-1-lambdaRole", + "awsRegion": "sa-east-1", + "eventSourceARN": "arn:aws:kinesis:sa-east-1:425362996713:stream/kinesisStream" + } + ] +} diff --git a/bottlecap/tests/payloads/lambda_function_url_event.json b/bottlecap/tests/payloads/lambda_function_url_event.json new file mode 100644 index 000000000..324dae524 --- /dev/null +++ b/bottlecap/tests/payloads/lambda_function_url_event.json @@ -0,0 +1,46 @@ +{ + "version": "2.0", + "routeKey": "$default", + "rawPath": "/", + "rawQueryString": "", + "headers": { + "sec-fetch-mode": "navigate", + "sec-fetch-site": "none", + "accept-language": "en-US,en;q=0.9", + "x-forwarded-proto": "https", + "x-forwarded-port": "443", + "x-forwarded-for": "71.195.30.42", + "sec-fetch-user": "?1", + "pragma": "no-cache", + "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", + "sec-ch-ua": "\"Google Chrome\";v=\"95\", \"Chromium\";v=\"95\", \";Not A Brand\";v=\"99\"", + "sec-ch-ua-mobile": "?0", + "x-amzn-trace-id": "Root=1-61953929-1ec00c3011062a48477b169e", + "sec-ch-ua-platform": "\"macOS\"", + "host": "a8hyhsshac.lambda-url.eu-south-1.amazonaws.com", + "upgrade-insecure-requests": "1", + "cache-control": "no-cache", + "accept-encoding": "gzip, deflate, br", + "sec-fetch-dest": "document", + "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36" + }, + "requestContext": { + "accountId": "601427279990", + "apiId": "a8hyhsshac", + "domainName": "a8hyhsshac.lambda-url.eu-south-1.amazonaws.com", + "domainPrefix": "a8hyhsshac", + "http": { + "method": "GET", + "path": "/", + "protocol": "HTTP/1.1", + "sourceIp": "71.195.30.42", + "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36" + }, + "requestId": "ec4d58f8-2b8b-4ceb-a1d5-2be7bff58505", + "routeKey": "$default", + "stage": "$default", + "time": "17/Nov/2021:17:17:29 +0000", + "timeEpoch": 1637169449721 + }, + "isBase64Encoded": false +} diff --git a/bottlecap/tests/payloads/s3_event.json b/bottlecap/tests/payloads/s3_event.json new file mode 100644 index 000000000..031dc8c3a --- /dev/null +++ b/bottlecap/tests/payloads/s3_event.json @@ -0,0 +1,38 @@ +{ + "Records": [ + { + "eventVersion": "2.0", + "eventSource": "aws:s3:sample:event:source", + "awsRegion": "us-east-1", + "eventTime": "2023-01-07T00:00:00.000Z", + "eventName": "ObjectCreated:Put", + "userIdentity": { + "principalId": "EXAMPLE" + }, + "requestParameters": { + "sourceIPAddress": "127.0.0.1" + }, + "responseElements": { + "x-amz-request-id": "EXAMPLE123456789", + "x-amz-id-2": "EXAMPLE123/5678abcdefghijklambdaisawesome/mnopqrstuvwxyzABCDEFGH" + }, + "s3": { + "s3SchemaVersion": "1.0", + "configurationId": "testConfigRule", + "bucket": { + "name": "example-bucket", + "ownerIdentity": { + "principalId": "EXAMPLE" + }, + "arn": "arn:aws:s3:::example-bucket" + }, + "object": { + "key": "test/key", + "size": 1024, + "eTag": "0123456789abcdef0123456789abcdef", + "sequencer": "0A1B2C3D4E5F678901" + } + } + } + ] +} diff --git a/bottlecap/tests/payloads/sns_event.json b/bottlecap/tests/payloads/sns_event.json new file mode 100644 index 000000000..ef8062a0e --- /dev/null +++ b/bottlecap/tests/payloads/sns_event.json @@ -0,0 +1,50 @@ +{ + "Records": [ + { + "EventSource": "aws:sns", + "EventVersion": "1.0", + "EventSubscriptionArn": "arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy:224b60ba-befc-4830-ad96-f1f0ac94eb04", + "Sns": { + "Type": "Notification", + "MessageId": "87056a47-f506-5d77-908b-303605d3b197", + "TopicArn": "arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy", + "Subject": null, + "Message": "Asynchronously invoking a Lambda function with SNS.", + "Timestamp": "2022-01-31T14:13:41.637Z", + "SignatureVersion": "1", + "Signature": "BmwnJb0Ku2KgQef9QOgaSSTwLyUsbkRq90lzD5Vn4mAcRUOq2ForfMOYbxMB6idljWIWy9t/jK4AIMxPGk/eOGiRcENx3BvAcGcoDayBRFY13+xUGaPn5Lfoht/ZJ7/hmCgFWKRa8ooATZL+AwGAw6Id8qzf0R3M3k2asy5Vxa4ODKiFW9OzWY/zFgsYJhddR3JrQl9YOMRyIobNNHT96o1TwjGsSUTEemrxA6jQtb3QbardEKO+2SuataLEZki7gE2D2sA300WqZecumI339q7la+OIj6VDGDwFoppE2sh8hzJYXAH7oo11giwltE0V3/eLFCVhsE8Y1KD/yDPPsA==", + "SigningCertUrl": "https://sns.sa-east-1.amazonaws.com/SimpleNotificationService-7ff5318490ec183fbaddaa2a969abfda.pem", + "UnsubscribeUrl": "https://sns.sa-east-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy:224b60ba-befc-4830-ad96-f1f0ac94eb04", + "MessageAttributes": { + "_datadog": { + "Type": "String", + "Value": "{\"x-datadog-trace-id\": \"4948377316357291421\", \"x-datadog-parent-id\": \"6746998015037429512\", \"x-datadog-sampling-priority\": \"1\"}" + } + } + } + }, + { + "EventSource": "aws:sns", + "EventVersion": "1.0", + "EventSubscriptionArn": "arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy:224b60ba-befc-4830-ad96-f1f0ac94eb04", + "Sns": { + "Type": "Notification", + "MessageId": "87056a47-f506-5d77-908b-303605d3b197", + "TopicArn": "arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy", + "Subject": null, + "Message": "Asynchronously invoking a Lambda function with SNS.", + "Timestamp": "2022-01-31T14:13:41.637Z", + "SignatureVersion": "1", + "Signature": "BmwnJb0Ku2KgQef9QOgaSSTwLyUsbkRq90lzD5Vn4mAcRUOq2ForfMOYbxMB6idljWIWy9t/jK4AIMxPGk/eOGiRcENx3BvAcGcoDayBRFY13+xUGaPn5Lfoht/ZJ7/hmCgFWKRa8ooATZL+AwGAw6Id8qzf0R3M3k2asy5Vxa4ODKiFW9OzWY/zFgsYJhddR3JrQl9YOMRyIobNNHT96o1TwjGsSUTEemrxA6jQtb3QbardEKO+2SuataLEZki7gE2D2sA300WqZecumI339q7la+OIj6VDGDwFoppE2sh8hzJYXAH7oo11giwltE0V3/eLFCVhsE8Y1KD/yDPPsA==", + "SigningCertUrl": "https://sns.sa-east-1.amazonaws.com/SimpleNotificationService-7ff5318490ec183fbaddaa2a969abfda.pem", + "UnsubscribeUrl": "https://sns.sa-east-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:sa-east-1:425362996713:serverlessTracingTopicPy:224b60ba-befc-4830-ad96-f1f0ac94eb04", + "MessageAttributes": { + "_datadog": { + "Type": "String", + "Value": "{\"x-datadog-trace-id\": \"4948377316357291421\", \"x-datadog-parent-id\": \"6746998015037429512\", \"x-datadog-sampling-priority\": \"1\"}" + } + } + } + } + ] +} diff --git a/bottlecap/tests/payloads/sns_event_binary.json b/bottlecap/tests/payloads/sns_event_binary.json new file mode 100644 index 000000000..4a9a2b500 --- /dev/null +++ b/bottlecap/tests/payloads/sns_event_binary.json @@ -0,0 +1,27 @@ +{ + "Records": [ + { + "EventSource": "aws:sns", + "EventVersion": "1.0", + "EventSubscriptionArn": "arn:aws:sns:eu-west-1:601427279990:serverlessTracingTopicPy:224b60ba-befc-4830-ad96-f1f0ac94eb04", + "Sns": { + "Type": "Notification", + "MessageId": "87056a47-f506-5d77-908b-303605d3b197", + "TopicArn": "arn:aws:sns:eu-west-1:601427279990:serverlessTracingTopicPy", + "Subject": null, + "Message": "Asynchronously invoking a Lambda function with SNS.", + "Timestamp": "2022-01-31T14:13:41.637Z", + "SignatureVersion": "1", + "Signature": "BmwnJb0Ku2KgQef9QOgaSSTwLyUsbkRq90lzD5Vn4mAcRUOq2ForfMOYbxMB6idljWIWy9t/jK4AIMxPGk/eOGiRcENx3BvAcGcoDayBRFY13+xUGaPn5Lfoht/ZJ7/hmCgFWKRa8ooATZL+AwGAw6Id8qzf0R3M3k2asy5Vxa4ODKiFW9OzWY/zFgsYJhddR3JrQl9YOMRyIobNNHT96o1TwjGsSUTEemrxA6jQtb3QbardEKO+2SuataLEZki7gE2D2sA300WqZecumI339q7la+OIj6VDGDwFoppE2sh8hzJYXAH7oo11giwltE0V3/eLFCVhsE8Y1KD/yDPPsA==", + "SigningCertUrl": "https://sns.eu-west-1.amazonaws.com/SimpleNotificationService-7ff5318490ec183fbaddaa2a969abfda.pem", + "UnsubscribeUrl": "https://sns.eu-west-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:eu-west-1:601427279990:serverlessTracingTopicPy:224b60ba-befc-4830-ad96-f1f0ac94eb04", + "MessageAttributes": { + "_datadog": { + "Type": "Binary", + "Value": "eyJ4LWRhdGFkb2ctdHJhY2UtaWQiOiI0OTQ4Mzc3MzE2MzU3MjkxNDIxIiwieC1kYXRhZG9nLXBhcmVudC1pZCI6IjY3NDY5OTgwMTUwMzc0Mjk1MTIiLCJ4LWRhdGFkb2ctc2FtcGxpbmctcHJpb3JpdHkiOiIxIn0=" + } + } + } + } + ] +} diff --git a/bottlecap/tests/payloads/sns_sqs_event.json b/bottlecap/tests/payloads/sns_sqs_event.json new file mode 100644 index 000000000..c1746d8fb --- /dev/null +++ b/bottlecap/tests/payloads/sns_sqs_event.json @@ -0,0 +1,20 @@ +{ + "Records": [ + { + "messageId": "64812b68-4d9b-4dca-b3fb-9b18f255ee51", + "receiptHandle": "AQEBER6aRkfG8092GvkL7FRwCwbQ7LLDW9Tlk/CembqHe+suS2kfFxXiukomvaIN61QoyQMoRgWuV52SDkiQno2u+5hP64BDbmw+e/KR9ayvIfHJ3M6RfyQLaWNWm3hDFBCKTnBMVIxtdx0N9epZZewyokjKcrNYtmCghFgTCvZzsQkowi5rnoHAVHJ3je1c3bDnQ1KLrZFgajDnootYXDwEPuMq5FIxrf4EzTe0S7S+rnRm+GaQfeBLBVAY6dASL9usV3/AFRqDtaI7GKI+0F2NCgLlqj49VlPRz4ldhkGknYlKTZTluAqALWLJS62/J1GQo53Cs3nneJcmu5ajB2zzmhhRXoXINEkLhCD5ujZfcsw9H4xqW69Or4ECvlqx14bUU2rtMIW0QM2p7pEeXnyocymQv6m1te113eYWTVmaJ4I=", + "body": "{\n \"Type\" : \"Notification\",\n \"MessageId\" : \"0a0ab23e-4861-5447-82b7-e8094ff3e332\",\n \"TopicArn\" : \"arn:aws:sns:eu-west-1:601427279990:js-library-test-dev-demoTopic-15WGUVRCBMPAA\",\n \"Message\" : \"{\\\"hello\\\":\\\"harv\\\",\\\"nice of you to join us\\\":\\\"david\\\",\\\"anotherThing\\\":{\\\"foo\\\":\\\"bar\\\",\\\"blah\\\":null,\\\"harv\\\":123},\\\"vals\\\":[{\\\"thingOne\\\":1},{\\\"thingTwo\\\":2}],\\\"ajTimestamp\\\":1639777617957}\",\n \"Timestamp\" : \"2021-12-17T21:46:58.040Z\",\n \"SignatureVersion\" : \"1\",\n \"Signature\" : \"FR35/7E8C3LHEVk/rC4XxXlXwV/5mNkFNPgDhHSnJ2I6hIoSrTROAm7h5xm1PuBkAeFDvq0zofw91ouk9zZyvhdrMLFIIgrjEyNayRmEffmoEAkzLFUsgtQX7MmTl644r4NuWiM0Oiz7jueRvIcKXcZr7Nc6GJcWV1ymec8oOmuHNMisnPMxI07LIQVYSyAfv6P9r2jEWMVIukRoCzwTnRk4bUUYhPSGHI7OC3AsxxXBbv8snqTrLM/4z2rXCf6jHCKNxWeLlm9/45PphCkEyx5BWS4/71KaoMWUWy8+6CCsy+uF3XTCVmvSEYLyEwTSzOY+vCUjazrRW93498i70g==\",\n \"SigningCertUrl\" : \"https://sns.eu-west-1.amazonaws.com/SimpleNotificationService-7ff5318490ec183fbaddaa2a969abfda.pem\",\n \"UnsubscribeUrl\" : \"https://sns.eu-west-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:eu-west-1:601427279990:js-library-test-dev-demoTopic-15WGUVRCBMPAA:1290f550-9a8a-4e8f-a900-8f5f96dcddda\",\n \"MessageAttributes\" : {\n \"_datadog\" : {\"Type\":\"String\",\"Value\":\"{\\\"x-datadog-trace-id\\\":\\\"2776434475358637757\\\",\\\"x-datadog-parent-id\\\":\\\"4493917105238181843\\\",\\\"x-datadog-sampling-priority\\\":\\\"1\\\"}\"}\n }\n}", + "attributes": { + "ApproximateReceiveCount": "1", + "SentTimestamp": "1639777618130", + "SenderId": "AIDAIOA2GYWSHW4E2VXIO", + "ApproximateFirstReceiveTimestamp": "1639777618132" + }, + "messageAttributes": {}, + "md5OfBody": "ee19d8b1377919239ad3fd5dabc33739", + "eventSource": "aws:sqs", + "eventSourceARN": "arn:aws:sqs:eu-west-1:601427279990:aj-js-library-test-dev-demo-queue", + "awsRegion": "eu-west-1" + } + ] +} diff --git a/bottlecap/tests/payloads/sqs_event.json b/bottlecap/tests/payloads/sqs_event.json new file mode 100644 index 000000000..5cc7837fd --- /dev/null +++ b/bottlecap/tests/payloads/sqs_event.json @@ -0,0 +1,27 @@ +{ + "Records": [ + { + "messageId": "19dd0b57-b21e-4ac1-bd88-01bbb068cb78", + "receiptHandle": "MessageReceiptHandle", + "body": "Hello from SQS!", + "attributes": { + "ApproximateReceiveCount": "1", + "SentTimestamp": "1523232000000", + "SenderId": "123456789012", + "ApproximateFirstReceiveTimestamp": "1523232000001" + }, + "messageAttributes": { + "_datadog": { + "stringValue": "{\"x-datadog-trace-id\":\"2684756524522091840\",\"x-datadog-parent-id\":\"7431398482019833808\",\"x-datadog-sampling-priority\":\"1\"}", + "stringListValues": [], + "binaryListValues": [], + "dataType": "String" + } + }, + "md5OfBody": "{{{md5_of_body}}}", + "eventSource": "aws:sqs", + "eventSourceARN": "arn:aws:sqs:us-east-1:123456789012:MyQueue", + "awsRegion": "us-east-1" + } + ] +} diff --git a/bottlecap/tests/payloads/step_function_event.json b/bottlecap/tests/payloads/step_function_event.json new file mode 100644 index 000000000..1461c7164 --- /dev/null +++ b/bottlecap/tests/payloads/step_function_event.json @@ -0,0 +1,19 @@ +{ + "Execution": { + "Id": "arn:aws:states:us-east-1:425362996713:execution:agocsTestSF:bc9f281c-3daa-4e5a-9a60-471a3810bf44", + "Input": {}, + "StartTime": "2024-07-30T19:55:52.976Z", + "Name": "bc9f281c-3daa-4e5a-9a60-471a3810bf44", + "RoleArn": "arn:aws:iam::425362996713:role/test-serverless-stepfunctions-dev-AgocsTestSFRole-tRkeFXScjyk4", + "RedriveCount": 0 + }, + "StateMachine": { + "Id": "arn:aws:states:us-east-1:425362996713:stateMachine:agocsTestSF", + "Name": "agocsTestSF" + }, + "State": { + "Name": "agocsTest1", + "EnteredTime": "2024-07-30T19:55:53.018Z", + "RetryCount": 0 + } +} diff --git a/bottlecap/tests/payloads/step_function_legacy_event.json b/bottlecap/tests/payloads/step_function_legacy_event.json new file mode 100644 index 000000000..74e4c010a --- /dev/null +++ b/bottlecap/tests/payloads/step_function_legacy_event.json @@ -0,0 +1,21 @@ +{ + "Payload": { + "Execution": { + "Id": "arn:aws:states:us-east-1:425362996713:execution:agocsTestSF:bc9f281c-3daa-4e5a-9a60-471a3810bf44", + "Input": {}, + "StartTime": "2024-07-30T19:55:52.976Z", + "Name": "bc9f281c-3daa-4e5a-9a60-471a3810bf44", + "RoleArn": "arn:aws:iam::425362996713:role/test-serverless-stepfunctions-dev-AgocsTestSFRole-tRkeFXScjyk4", + "RedriveCount": 0 + }, + "StateMachine": { + "Id": "arn:aws:states:us-east-1:425362996713:stateMachine:agocsTestSF", + "Name": "agocsTestSF" + }, + "State": { + "Name": "agocsTest1", + "EnteredTime": "2024-07-30T19:55:53.018Z", + "RetryCount": 0 + } + } +} diff --git a/bottlecap/tests/proc/13/.gitkeep b/bottlecap/tests/proc/13/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/142/.gitkeep b/bottlecap/tests/proc/142/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/net/invalid_dev_malformed b/bottlecap/tests/proc/net/invalid_dev_malformed new file mode 100644 index 000000000..5cd9f0ec9 --- /dev/null +++ b/bottlecap/tests/proc/net/invalid_dev_malformed @@ -0,0 +1,5 @@ +Inter-| Receive | Transmit +face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed +lo: 7490 63 0 0 0 0 0 0 7490 63 0 0 0 0 0 0 +vinternal_1: 180 3 0 0 0 ... +telemetry1_sb: 17284 50 0 0 0 0 0 0 15279 78 0 0 0 0 0 0 \ No newline at end of file diff --git a/bottlecap/tests/proc/net/invalid_dev_non_numerical_value b/bottlecap/tests/proc/net/invalid_dev_non_numerical_value new file mode 100644 index 000000000..9aae3404e --- /dev/null +++ b/bottlecap/tests/proc/net/invalid_dev_non_numerical_value @@ -0,0 +1,5 @@ +Inter-| Receive | Transmit +face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed +lo: 7490 63 0 0 0 0 0 0 7490 63 0 0 0 0 0 0 +vinternal_1: INVALID 3 0 0 0 0 0 0 254 4 0 0 0 0 0 0 +telemetry1_sb: 17284 50 0 0 0 0 0 0 15279 78 0 0 0 0 0 0 \ No newline at end of file diff --git a/bottlecap/tests/proc/net/missing_interface_dev b/bottlecap/tests/proc/net/missing_interface_dev new file mode 100644 index 000000000..fb4a0224d --- /dev/null +++ b/bottlecap/tests/proc/net/missing_interface_dev @@ -0,0 +1,4 @@ +Inter-| Receive | Transmit +face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed +lo: 7490 63 0 0 0 0 0 0 7490 63 0 0 0 0 0 0 +telemetry1_sb: 17284 50 0 0 0 0 0 0 15279 78 0 0 0 0 0 0 \ No newline at end of file diff --git a/bottlecap/tests/proc/net/valid_dev b/bottlecap/tests/proc/net/valid_dev new file mode 100644 index 000000000..a20f0cc97 --- /dev/null +++ b/bottlecap/tests/proc/net/valid_dev @@ -0,0 +1,5 @@ +Inter-| Receive | Transmit +face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed +lo: 7490 63 0 0 0 0 0 0 7490 63 0 0 0 0 0 0 +vinternal_1: 180 3 0 0 0 0 0 0 254 4 0 0 0 0 0 0 +telemetry1_sb: 17284 50 0 0 0 0 0 0 15279 78 0 0 0 0 0 0 diff --git a/bottlecap/tests/proc/process/invalid_malformed/31/limits b/bottlecap/tests/proc/process/invalid_malformed/31/limits new file mode 100644 index 000000000..2d25ac301 --- /dev/null +++ b/bottlecap/tests/proc/process/invalid_malformed/31/limits @@ -0,0 +1,17 @@ +Limit Soft Limit Hard Limit Units +Max cpu time unlimited unlimited seconds +Max file size unlimited unlimited bytes +Max data size unlimited unlimited bytes +Max stack size 8388608 unlimited bytes +Max core file size unlimited unlimited bytes +Max resident set unlimited unlimited bytes +Max processes 1024 1024 +Max open files 1024 +Max locked memory 65536 65536 bytes +Max address space unlimited unlimited bytes +Max file locks unlimited unlimited locks +Max pending signals 4622 4622 signals +Max msgqueue size 819200 819200 bytes +Max nice priority 0 0 +Max realtime priority 0 0 +Max realtime timeout unlimited unlimited us diff --git a/bottlecap/tests/proc/process/invalid_malformed/9/limits b/bottlecap/tests/proc/process/invalid_malformed/9/limits new file mode 100644 index 000000000..2436ec085 --- /dev/null +++ b/bottlecap/tests/proc/process/invalid_malformed/9/limits @@ -0,0 +1,17 @@ +Limit Soft Limit Hard Limit Units +Max cpu time unlimited unlimited seconds +Max file size unlimited unlimited bytes +Max data size unlimited unlimited bytes +Max stack size 8388608 unlimited bytes +Max core file size unlimited unlimited bytes +Max resident set unlimited unlimited bytes +Max processes 1024 +Max open files 1024 1024 +Max locked memory 65536 65536 bytes +Max address space unlimited unlimited bytes +Max file locks unlimited unlimited locks +Max pending signals 4622 4622 signals +Max msgqueue size 819200 819200 bytes +Max nice priority 0 0 +Max realtime priority 0 0 +Max realtime timeout unlimited unlimited us diff --git a/bottlecap/tests/proc/process/invalid_missing/31/limits b/bottlecap/tests/proc/process/invalid_missing/31/limits new file mode 100644 index 000000000..c7dc2c55d --- /dev/null +++ b/bottlecap/tests/proc/process/invalid_missing/31/limits @@ -0,0 +1,15 @@ +Limit Soft Limit Hard Limit Units +Max cpu time unlimited unlimited seconds +Max file size unlimited unlimited bytes +Max data size unlimited unlimited bytes +Max stack size 8388608 unlimited bytes +Max core file size unlimited unlimited bytes +Max resident set unlimited unlimited bytes +Max locked memory 65536 65536 bytes +Max address space unlimited unlimited bytes +Max file locks unlimited unlimited locks +Max pending signals 4622 4622 signals +Max msgqueue size 819200 819200 bytes +Max nice priority 0 0 +Max realtime priority 0 0 +Max realtime timeout unlimited unlimited us diff --git a/bottlecap/tests/proc/process/invalid_missing/9/limits b/bottlecap/tests/proc/process/invalid_missing/9/limits new file mode 100644 index 000000000..07de49ec4 --- /dev/null +++ b/bottlecap/tests/proc/process/invalid_missing/9/limits @@ -0,0 +1,15 @@ +Limit Soft Limit Hard Limit Units +Max cpu time unlimited unlimited seconds +Max file size unlimited unlimited bytes +Max data size unlimited unlimited bytes +Max stack size 8388608 unlimited bytes +Max core file size unlimited unlimited bytes +Max resident set unlimited unlimited bytes +Max locked memory 65536 65536 bytes +Max address space unlimited unlimited bytes +Max file locks unlimited unlimited locks +Max pending signals 4622 4622 signals +Max msgqueue size 819200 819200 bytes +Max nice priority 0 0 +Max realtime priority 0 0 +Max realtime timeout unlimited unlimited us diff --git a/bottlecap/tests/proc/process/valid/31/fd/1 b/bottlecap/tests/proc/process/valid/31/fd/1 new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/process/valid/31/fd/2 b/bottlecap/tests/proc/process/valid/31/fd/2 new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/process/valid/31/limits b/bottlecap/tests/proc/process/valid/31/limits new file mode 100644 index 000000000..75d41eee4 --- /dev/null +++ b/bottlecap/tests/proc/process/valid/31/limits @@ -0,0 +1,17 @@ +Limit Soft Limit Hard Limit Units +Max cpu time unlimited unlimited seconds +Max file size unlimited unlimited bytes +Max data size unlimited unlimited bytes +Max stack size 8388608 unlimited bytes +Max core file size unlimited unlimited bytes +Max resident set unlimited unlimited bytes +Max processes 1024 1024 processes +Max open files 900 1024 files +Max locked memory 65536 65536 bytes +Max address space unlimited unlimited bytes +Max file locks unlimited unlimited locks +Max pending signals 4622 4622 signals +Max msgqueue size 819200 819200 bytes +Max nice priority 0 0 +Max realtime priority 0 0 +Max realtime timeout unlimited unlimited us diff --git a/bottlecap/tests/proc/process/valid/31/task/1/.gitkeep b/bottlecap/tests/proc/process/valid/31/task/1/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/process/valid/31/task/2/.gitkeep b/bottlecap/tests/proc/process/valid/31/task/2/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/process/valid/31/task/3 b/bottlecap/tests/proc/process/valid/31/task/3 new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/process/valid/9/fd/1 b/bottlecap/tests/proc/process/valid/9/fd/1 new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/process/valid/9/fd/2 b/bottlecap/tests/proc/process/valid/9/fd/2 new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/process/valid/9/fd/3 b/bottlecap/tests/proc/process/valid/9/fd/3 new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/process/valid/9/limits b/bottlecap/tests/proc/process/valid/9/limits new file mode 100644 index 000000000..664f04c88 --- /dev/null +++ b/bottlecap/tests/proc/process/valid/9/limits @@ -0,0 +1,17 @@ +Limit Soft Limit Hard Limit Units +Max cpu time unlimited unlimited seconds +Max file size unlimited unlimited bytes +Max data size unlimited unlimited bytes +Max stack size 8388608 unlimited bytes +Max core file size unlimited unlimited bytes +Max resident set unlimited unlimited bytes +Max processes 1024 1024 processes +Max open files 1024 1024 files +Max locked memory 65536 65536 bytes +Max address space unlimited unlimited bytes +Max file locks unlimited unlimited locks +Max pending signals 4622 4622 signals +Max msgqueue size 819200 819200 bytes +Max nice priority 0 0 +Max realtime priority 0 0 +Max realtime timeout unlimited unlimited us diff --git a/bottlecap/tests/proc/process/valid/9/task/1/.gitkeep b/bottlecap/tests/proc/process/valid/9/task/1/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/process/valid/9/task/2/.gitkeep b/bottlecap/tests/proc/process/valid/9/task/2/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/process/valid/9/task/3/.gitkeep b/bottlecap/tests/proc/process/valid/9/task/3/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/bottlecap/tests/proc/stat/invalid_stat_malformed_first_line b/bottlecap/tests/proc/stat/invalid_stat_malformed_first_line new file mode 100644 index 000000000..7071a126d --- /dev/null +++ b/bottlecap/tests/proc/stat/invalid_stat_malformed_first_line @@ -0,0 +1,2 @@ +cpu 2337 +... diff --git a/bottlecap/tests/proc/stat/invalid_stat_malformed_per_cpu_line b/bottlecap/tests/proc/stat/invalid_stat_malformed_per_cpu_line new file mode 100644 index 000000000..d4dd4badd --- /dev/null +++ b/bottlecap/tests/proc/stat/invalid_stat_malformed_per_cpu_line @@ -0,0 +1,10 @@ +cpu 2337 0 188 17838 8 0 16 181 0 0 +cpu0 1453 0 87 8649 2 0 10 85 0 0 +cpu1 884 0 ... +intr 67620 0 0 0 0 354 4356 233 1294 89 759 185 359 8 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +ctxt 135197 +btime 1716225108 +processes 1428 +procs_running 1 +procs_blocked 0 +softirq 27242 0 2425 2 696 6166 0 48 7838 0 10067 diff --git a/bottlecap/tests/proc/stat/invalid_stat_missing_cpun_data b/bottlecap/tests/proc/stat/invalid_stat_missing_cpun_data new file mode 100644 index 000000000..75119c03d --- /dev/null +++ b/bottlecap/tests/proc/stat/invalid_stat_missing_cpun_data @@ -0,0 +1,8 @@ +cpu 2337 0 188 17838 8 0 16 181 0 0 +intr 67620 0 0 0 0 354 4356 233 1294 89 759 185 359 8 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +ctxt 135197 +btime 1716225108 +processes 1428 +procs_running 1 +procs_blocked 0 +softirq 27242 0 2425 2 696 6166 0 48 7838 0 10067 diff --git a/bottlecap/tests/proc/stat/invalid_stat_non_numerical_value_1 b/bottlecap/tests/proc/stat/invalid_stat_non_numerical_value_1 new file mode 100644 index 000000000..d72287175 --- /dev/null +++ b/bottlecap/tests/proc/stat/invalid_stat_non_numerical_value_1 @@ -0,0 +1,2 @@ +cpu 2337 0 INVALID 17838 8 0 16 181 0 0 +... diff --git a/bottlecap/tests/proc/stat/invalid_stat_non_numerical_value_2 b/bottlecap/tests/proc/stat/invalid_stat_non_numerical_value_2 new file mode 100644 index 000000000..816ba9009 --- /dev/null +++ b/bottlecap/tests/proc/stat/invalid_stat_non_numerical_value_2 @@ -0,0 +1,2 @@ +cpu INVALID 0 188 17838 8 0 16 181 0 0 +... diff --git a/bottlecap/tests/proc/stat/valid_stat b/bottlecap/tests/proc/stat/valid_stat new file mode 100644 index 000000000..d0a082700 --- /dev/null +++ b/bottlecap/tests/proc/stat/valid_stat @@ -0,0 +1,10 @@ +cpu 2337 0 188 17838 8 0 16 181 0 0 +cpu0 884 0 100 9188 5 0 6 95 0 0 +cpu1 1453 0 87 8649 2 0 10 85 0 0 +intr 67620 0 0 0 0 354 4356 233 1294 89 759 185 359 8 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +ctxt 135197 +btime 1716225108 +processes 1428 +procs_running 1 +procs_blocked 0 +softirq 27242 0 2425 2 696 6166 0 48 7838 0 10067 diff --git a/bottlecap/tests/proc/uptime/invalid_data_uptime b/bottlecap/tests/proc/uptime/invalid_data_uptime new file mode 100644 index 000000000..7fc664612 --- /dev/null +++ b/bottlecap/tests/proc/uptime/invalid_data_uptime @@ -0,0 +1 @@ +3213103123 INVALID diff --git a/bottlecap/tests/proc/uptime/malformed_uptime b/bottlecap/tests/proc/uptime/malformed_uptime new file mode 100644 index 000000000..e75900cd7 --- /dev/null +++ b/bottlecap/tests/proc/uptime/malformed_uptime @@ -0,0 +1 @@ +3213103123 diff --git a/bottlecap/tests/proc/uptime/valid_uptime b/bottlecap/tests/proc/uptime/valid_uptime new file mode 100644 index 000000000..91c626c1b --- /dev/null +++ b/bottlecap/tests/proc/uptime/valid_uptime @@ -0,0 +1 @@ +3213103123 32131