Skip to content

Conversation

@wsmoses
Copy link
Member

@wsmoses wsmoses commented Dec 27, 2025

No description provided.

Copy link
Contributor

@github-actions github-actions bot left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

EnzymeJAX Benchmarks

Details
Benchmark suite Current: 55fa9b8 Previous: 008dac3 Ratio
actmtch / JaXPipe / cpu / Primal 0.000007219600011012517 s 0.000006760500036762096 s 1.07
actmtch / Jax / cpu / Primal 0.000006937700009075343 s 0.000006411220001609764 s 1.08
actmtch / HLOOpt / cpu / Primal 0.000008001980004337384 s 0.000008145720021275339 s 0.98
actmtch / PartOpt / cpu / Primal 0.000006918760018379544 s 0.000006577820022357627 s 1.05
actmtch / IPartOpt / cpu / Primal 0.000007273139972312492 s 0.000006845999960205517 s 1.06
actmtch / DefOpt / cpu / Primal 0.000008002299991858309 s 0.000007159520037021139 s 1.12
actmtch / IDefOpt / cpu / Primal 0.000007745939992673811 s 0.000007819799966455321 s 0.99
actmtch / JaXPipe / cpu / Forward 0.000011593719964366756 s 0.00001100208001844294 s 1.05
actmtch / Jax / cpu / Forward 0.000010134320027646027 s 0.00001000555997052288 s 1.01
actmtch / HLOOpt / cpu / Forward 0.000012213939999128343 s 0.000011567560004550616 s 1.06
actmtch / PartOpt / cpu / Forward 0.000012317599994275952 s 0.000010661280030035412 s 1.16
actmtch / IPartOpt / cpu / Forward 0.000011965199983023922 s 0.000011272220053797356 s 1.06
actmtch / DefOpt / cpu / Forward 0.00001164608001090528 s 0.000010422680006740849 s 1.12
actmtch / IDefOpt / cpu / Forward 0.000011969359975410044 s 0.000010585179988993332 s 1.13
actmtch / JaXPipe / cpu / PreRev 0.000011538819981069535 s 0.00001065782000296167 s 1.08
actmtch / JaXPipe / cpu / PostRev 0.000011596199947234709 s 0.000009895780012811885 s 1.17
actmtch / JaXPipe / cpu / BothRev 0.000013018939980611322 s 0.000011262480038567446 s 1.16
actmtch / Jax / cpu / BothRev 0.000010528179991524669 s 0.00001015358000586275 s 1.04
actmtch / HLOOpt / cpu / PreRev 0.000012500800030466051 s 0.000010812100017574266 s 1.16
actmtch / HLOOpt / cpu / PostRev 0.000013378120002016658 s 0.000012807180000891094 s 1.04
actmtch / HLOOpt / cpu / BothRev 0.00001181432000521454 s 0.00001181797997560352 s 1.00
actmtch / PartOpt / cpu / PreRev 0.00001183654000669776 s 0.000010439159987072345 s 1.13
actmtch / PartOpt / cpu / PostRev 0.000011016879971066374 s 0.000010471360028532217 s 1.05
actmtch / PartOpt / cpu / BothRev 0.000012776619996657246 s 0.000011898939937964317 s 1.07
actmtch / IPartOpt / cpu / PreRev 0.000011382860038793295 s 0.000010801919979712692 s 1.05
actmtch / IPartOpt / cpu / PostRev 0.000010706939992815025 s 0.000009592260012141196 s 1.12
actmtch / IPartOpt / cpu / BothRev 0.000012827480013584136 s 0.000011451419995864853 s 1.12
actmtch / DefOpt / cpu / PreRev 0.000011133059997519012 s 0.000010753459991974524 s 1.04
actmtch / DefOpt / cpu / PostRev 0.000011579539959711838 s 0.00001099387999602186 s 1.05
actmtch / DefOpt / cpu / BothRev 0.000012121499994464102 s 0.000011193439931957982 s 1.08
actmtch / IDefOpt / cpu / PreRev 0.00001178591996904288 s 0.000011068260055253632 s 1.06
actmtch / IDefOpt / cpu / PostRev 0.00001255560002391576 s 0.000011407880001570448 s 1.10
actmtch / IDefOpt / cpu / BothRev 0.000012186220010335091 s 0.000011293160023342352 s 1.08
actmtch / JaXPipe / cuda / Primal 0.000002368 s 0.000002015 s 1.18
actmtch / Jax / cuda / Primal 0.0000024 s 0.000002016 s 1.19
actmtch / HLOOpt / cuda / Primal 0.000002368 s 0.000002016 s 1.17
actmtch / PartOpt / cuda / Primal 0.000002399 s 0.000002016 s 1.19
actmtch / IPartOpt / cuda / Primal 0.000002368 s 0.000002015 s 1.18
actmtch / DefOpt / cuda / Primal 0.0000023670000000000004 s 0.000002016 s 1.17
actmtch / IDefOpt / cuda / Primal 0.000002368 s 0.000002016 s 1.17
actmtch / JaXPipe / cuda / Forward 0.000010656 s 0.000009664 s 1.10
actmtch / Jax / cuda / Forward 0.000010272 s 0.000009888 s 1.04
actmtch / HLOOpt / cuda / Forward 0.000010528 s 0.000009952 s 1.06
actmtch / PartOpt / cuda / Forward 0.000010432 s 0.000009728 s 1.07
actmtch / IPartOpt / cuda / Forward 0.0000104 s 0.000009888 s 1.05
actmtch / DefOpt / cuda / Forward 0.000010432 s 0.000010016 s 1.04
actmtch / IDefOpt / cuda / Forward 0.00001056 s 0.000010144 s 1.04
actmtch / JaXPipe / cuda / PreRev 0.000010784 s 0.000009569 s 1.13
actmtch / JaXPipe / cuda / PostRev 0.000011520000000000002 s 0.00000976 s 1.18
actmtch / JaXPipe / cuda / BothRev 0.000010624 s 0.000009728 s 1.09
actmtch / Jax / cuda / BothRev 0.000011648 s 0.000010271 s 1.13
actmtch / HLOOpt / cuda / PreRev 0.00001056 s 0.000010144 s 1.04
actmtch / HLOOpt / cuda / PostRev 0.000010688 s 0.000009888 s 1.08
actmtch / HLOOpt / cuda / BothRev 0.000010816 s 0.000009985 s 1.08
actmtch / PartOpt / cuda / PreRev 0.000010719 s 0.000010176 s 1.05
actmtch / PartOpt / cuda / PostRev 0.000010496 s 0.000009825 s 1.07
actmtch / PartOpt / cuda / BothRev 0.000010592 s 0.000009408 s 1.13
actmtch / IPartOpt / cuda / PreRev 0.000010944 s 0.000010016 s 1.09
actmtch / IPartOpt / cuda / PostRev 0.000010816 s 0.000009888 s 1.09
actmtch / IPartOpt / cuda / BothRev 0.000010753 s 0.000009696 s 1.11
actmtch / DefOpt / cuda / PreRev 0.000010912 s 0.000010048 s 1.09
actmtch / DefOpt / cuda / PostRev 0.000010815 s 0.000009824 s 1.10
actmtch / DefOpt / cuda / BothRev 0.000010719 s 0.00001024 s 1.05
actmtch / IDefOpt / cuda / PreRev 0.00001056 s 0.000010304 s 1.02
actmtch / IDefOpt / cuda / PostRev 0.000010528 s 0.000009729 s 1.08
actmtch / IDefOpt / cuda / BothRev 0.000010368 s 0.000010111 s 1.03
actmtch / JaXPipe / tpu / Primal 5.63275e-7 s 5.63425e-7 s 1.00
actmtch / Jax / tpu / Primal 5.96975e-7 s 5.966250000000001e-7 s 1.00
actmtch / HLOOpt / tpu / Primal 0.00000209415 s 0.0000021007 s 1.00
actmtch / PartOpt / tpu / Primal 5.964250000000001e-7 s 5.967e-7 s 1.00
actmtch / IPartOpt / tpu / Primal 5.528999999999999e-7 s 5.5285e-7 s 1.00
actmtch / DefOpt / tpu / Primal 0.000002154975 s 0.00000216535 s 1.00
actmtch / IDefOpt / tpu / Primal 0.0000021036 s 0.000002095925 s 1.00
actmtch / JaXPipe / tpu / Forward 0.000003828075000000001 s 0.000003833699999999999 s 1.00
actmtch / Jax / tpu / Forward 0.00000121105 s 0.0000012137 s 1.00
actmtch / HLOOpt / tpu / Forward 0.000003940775 s 0.000003934825 s 1.00
actmtch / PartOpt / tpu / Forward 0.00000391235 s 0.000003914125 s 1.00
actmtch / IPartOpt / tpu / Forward 0.000003945825 s 0.0000039562 s 1.00
actmtch / DefOpt / tpu / Forward 0.000003918575000000001 s 0.0000039197 s 1.00
actmtch / IDefOpt / tpu / Forward 0.0000039444 s 0.000003948875 s 1.00
actmtch / JaXPipe / tpu / PreRev 0.000003477675 s 0.000003474775 s 1.00
actmtch / JaXPipe / tpu / PostRev 0.0000016404750000000002 s 0.000001638225 s 1.00
actmtch / JaXPipe / tpu / BothRev 0.000003493275 s 0.0000034755000000000004 s 1.01
actmtch / Jax / tpu / BothRev 0.000001636525 s 0.0000016347999999999998 s 1.00
actmtch / HLOOpt / tpu / PreRev 0.00000347935 s 0.0000034835 s 1.00
actmtch / HLOOpt / tpu / PostRev 0.000003423075 s 0.0000034127000000000004 s 1.00
actmtch / HLOOpt / tpu / BothRev 0.000003487 s 0.00000346445 s 1.01
actmtch / PartOpt / tpu / PreRev 0.00000341505 s 0.0000034168 s 1.00
actmtch / PartOpt / tpu / PostRev 0.000001585025 s 0.000001595925 s 0.99
actmtch / PartOpt / tpu / BothRev 0.00000341325 s 0.00000340855 s 1.00
actmtch / IPartOpt / tpu / PreRev 0.0000034763500000000006 s 0.000003468525 s 1.00
actmtch / IPartOpt / tpu / PostRev 0.000001633675 s 0.000001630525 s 1.00
actmtch / IPartOpt / tpu / BothRev 0.000003478425 s 0.00000348495 s 1.00
actmtch / DefOpt / tpu / PreRev 0.0000034184999999999995 s 0.000003405575 s 1.00
actmtch / DefOpt / tpu / PostRev 0.0000034131250000000005 s 0.000003419525 s 1.00
actmtch / DefOpt / tpu / BothRev 0.0000034026 s 0.00000341945 s 1.00
actmtch / IDefOpt / tpu / PreRev 0.000003491375 s 0.000003468625 s 1.01
actmtch / IDefOpt / tpu / PostRev 0.0000034063500000000004 s 0.0000034068500000000003 s 1.00
actmtch / IDefOpt / tpu / BothRev 0.000003481775 s 0.000003474975 s 1.00
actmtch / JaXPipe / cpu / Primal 0.000016955000000000003 s 0.000006760500036762096 s 2.51
actmtch / Jax / cpu / Primal 0.000017037 s 0.000006411220001609764 s 2.66
actmtch / HLOOpt / cpu / Primal 0.00001744 s 0.000008145720021275339 s 2.14
actmtch / PartOpt / cpu / Primal 0.000016834 s 0.000006577820022357627 s 2.56
actmtch / IPartOpt / cpu / Primal 0.000016868999999999997 s 0.000006845999960205517 s 2.46
actmtch / DefOpt / cpu / Primal 0.000017491 s 0.000007159520037021139 s 2.44
actmtch / IDefOpt / cpu / Primal 0.000017797 s 0.000007819799966455321 s 2.28
actmtch / JaXPipe / cpu / Forward 0.000023931 s 0.00001100208001844294 s 2.18
actmtch / Jax / cpu / Forward 0.000022711 s 0.00001000555997052288 s 2.27
actmtch / HLOOpt / cpu / Forward 0.000024147 s 0.000011567560004550616 s 2.09
actmtch / PartOpt / cpu / Forward 0.000024114 s 0.000010661280030035412 s 2.26
actmtch / IPartOpt / cpu / Forward 0.000023981 s 0.000011272220053797356 s 2.13
actmtch / DefOpt / cpu / Forward 0.000023467 s 0.000010422680006740849 s 2.25
actmtch / IDefOpt / cpu / Forward 0.000023612 s 0.000010585179988993332 s 2.23
actmtch / JaXPipe / cpu / PreRev 0.000024495 s 0.00001065782000296167 s 2.30
actmtch / JaXPipe / cpu / PostRev 0.000022046 s 0.000009895780012811885 s 2.23
actmtch / JaXPipe / cpu / BothRev 0.000024167 s 0.000011262480038567446 s 2.15
actmtch / Jax / cpu / BothRev 0.000022147 s 0.00001015358000586275 s 2.18
actmtch / HLOOpt / cpu / PreRev 0.000024502 s 0.000010812100017574266 s 2.27
actmtch / HLOOpt / cpu / PostRev 0.000024333 s 0.000012807180000891094 s 1.90
actmtch / HLOOpt / cpu / BothRev 0.000024398 s 0.00001181797997560352 s 2.06
actmtch / PartOpt / cpu / PreRev 0.000024112 s 0.000010439159987072345 s 2.31
actmtch / PartOpt / cpu / PostRev 0.000022053 s 0.000010471360028532217 s 2.11
actmtch / PartOpt / cpu / BothRev 0.000024206 s 0.000011898939937964317 s 2.03
actmtch / IPartOpt / cpu / PreRev 0.000024315 s 0.000010801919979712692 s 2.25
actmtch / IPartOpt / cpu / PostRev 0.0000215 s 0.000009592260012141196 s 2.24
actmtch / IPartOpt / cpu / BothRev 0.000024032 s 0.000011451419995864853 s 2.10
actmtch / DefOpt / cpu / PreRev 0.00002439 s 0.000010753459991974524 s 2.27
actmtch / DefOpt / cpu / PostRev 0.000024632 s 0.00001099387999602186 s 2.24
actmtch / DefOpt / cpu / BothRev 0.000024346 s 0.000011193439931957982 s 2.18
actmtch / IDefOpt / cpu / PreRev 0.000024297 s 0.000011068260055253632 s 2.20
actmtch / IDefOpt / cpu / PostRev 0.000024202 s 0.000011407880001570448 s 2.12
actmtch / IDefOpt / cpu / BothRev 0.000024456 s 0.000011293160023342352 s 2.17
add_one / JaXPipe / cpu / Primal 0.000006766400019841967 s 0.000007061899959808216 s 0.96
add_one / Jax / cpu / Primal 0.00000701046000358474 s 0.000007725300010861246 s 0.91
add_one / HLOOpt / cpu / Primal 0.0000067159000354877205 s 0.000007127159988158383 s 0.94
add_one / PartOpt / cpu / Primal 0.000006456360015363316 s 0.0000064727400240371934 s 1.00
add_one / IPartOpt / cpu / Primal 0.000007295920004253276 s 0.000006855559977338999 s 1.06
add_one / DefOpt / cpu / Primal 0.000006764760000805836 s 0.000006799359998694854 s 0.99
add_one / IDefOpt / cpu / Primal 0.000006472800005212776 s 0.000006532919996971032 s 0.99
add_one / JaXPipe / cpu / Forward 0.000010426539975014748 s 0.000010692179985198891 s 0.98
add_one / Jax / cpu / Forward 0.000010362539997004206 s 0.000009852040057012344 s 1.05
add_one / HLOOpt / cpu / Forward 0.000010711020013332018 s 0.00001061569998455525 s 1.01
add_one / PartOpt / cpu / Forward 0.000010018179991675423 s 0.000010313280026821304 s 0.97
add_one / IPartOpt / cpu / Forward 0.000010749960010798533 s 0.000010236940006507212 s 1.05
add_one / DefOpt / cpu / Forward 0.000010087039981954147 s 0.00001021022000713856 s 0.99
add_one / IDefOpt / cpu / Forward 0.000010709299986046971 s 0.00000989591997495154 s 1.08
add_one / JaXPipe / cpu / PreRev 0.000012811520000468593 s 0.000011687020005410889 s 1.10
add_one / JaXPipe / cpu / PostRev 0.00001250757997695473 s 0.000011235759975534164 s 1.11
add_one / JaXPipe / cpu / BothRev 0.000012652120012717204 s 0.000012070920010955888 s 1.05
add_one / Jax / cpu / BothRev 0.000012484280023272732 s 0.00001110520000111137 s 1.12
add_one / HLOOpt / cpu / PreRev 0.000012529659989013453 s 0.0000117855600456096 s 1.06
add_one / HLOOpt / cpu / PostRev 0.0000147597399973165 s 0.0000174333799895976 s 0.85
add_one / HLOOpt / cpu / BothRev 0.000012448559991753428 s 0.000011470099998405203 s 1.09
add_one / PartOpt / cpu / PreRev 0.000012288619982427918 s 0.000011000640006386676 s 1.12
add_one / PartOpt / cpu / PostRev 0.000013095279991830466 s 0.000011238540000704234 s 1.17
add_one / PartOpt / cpu / BothRev 0.000013286659968798632 s 0.000011834960005217 s 1.12
add_one / IPartOpt / cpu / PreRev 0.000011644140013231665 s 0.000011671399961414864 s 1.00
add_one / IPartOpt / cpu / PostRev 0.000012916999985463915 s 0.000012116419975427562 s 1.07
add_one / IPartOpt / cpu / BothRev 0.00001268416000129946 s 0.000011671220026983064 s 1.09
add_one / DefOpt / cpu / PreRev 0.00001207088002047385 s 0.000011632380019364063 s 1.04
add_one / DefOpt / cpu / PostRev 0.00001316150001002825 s 0.000011632059986368405 s 1.13
add_one / DefOpt / cpu / BothRev 0.000013018680010645767 s 0.00001143328000580368 s 1.14
add_one / IDefOpt / cpu / PreRev 0.000012351220002528862 s 0.00001101984002161771 s 1.12
add_one / IDefOpt / cpu / PostRev 0.000012618619975910406 s 0.000011647559977063794 s 1.08
add_one / IDefOpt / cpu / BothRev 0.00001241630000549776 s 0.00001107572001274093 s 1.12
add_one / JaXPipe / cuda / Primal 0.000002303 s 0.0000019200000000000003 s 1.20
add_one / Jax / cuda / Primal 0.000002303 s 0.0000019200000000000003 s 1.20
add_one / HLOOpt / cuda / Primal 0.000002303 s 0.0000019200000000000003 s 1.20
add_one / PartOpt / cuda / Primal 0.000002303 s 0.0000019200000000000003 s 1.20
add_one / IPartOpt / cuda / Primal 0.000002303 s 0.0000019200000000000003 s 1.20
add_one / DefOpt / cuda / Primal 0.000002303 s 0.0000019200000000000003 s 1.20
add_one / IDefOpt / cuda / Primal 0.000002303 s 0.0000019200000000000003 s 1.20
add_one / JaXPipe / cuda / Forward 0.0000104 s 0.000010144 s 1.03
add_one / Jax / cuda / Forward 0.000010112 s 0.000010112 s 1
add_one / HLOOpt / cuda / Forward 0.000010433 s 0.000011391 s 0.92
add_one / PartOpt / cuda / Forward 0.000010464 s 0.000009824 s 1.07
add_one / IPartOpt / cuda / Forward 0.000010656 s 0.000011392 s 0.94
add_one / DefOpt / cuda / Forward 0.000010496 s 0.000011328 s 0.93
add_one / IDefOpt / cuda / Forward 0.000010271 s 0.000009953 s 1.03
add_one / JaXPipe / cuda / PreRev 0.000025984 s 0.000029536 s 0.88
add_one / JaXPipe / cuda / PostRev 0.000024736 s 0.000029408 s 0.84
add_one / JaXPipe / cuda / BothRev 0.000026304 s 0.000024704 s 1.06
add_one / Jax / cuda / BothRev 0.0000256 s 0.000023936 s 1.07
add_one / HLOOpt / cuda / PreRev 0.000026271 s 0.000029056 s 0.90
add_one / HLOOpt / cuda / PostRev 0.000025024 s 0.000024895 s 1.01
add_one / HLOOpt / cuda / BothRev 0.00002528 s 0.000028992 s 0.87
add_one / PartOpt / cuda / PreRev 0.000025152 s 0.000024801 s 1.01
add_one / PartOpt / cuda / PostRev 0.000025696 s 0.00002464 s 1.04
add_one / PartOpt / cuda / BothRev 0.000025632 s 0.000024736 s 1.04
add_one / IPartOpt / cuda / PreRev 0.000025631 s 0.00002496 s 1.03
add_one / IPartOpt / cuda / PostRev 0.000025664 s 0.000024607 s 1.04
add_one / IPartOpt / cuda / BothRev 0.000025472000000000003 s 0.000024544 s 1.04
add_one / DefOpt / cuda / PreRev 0.00002496 s 0.000024896 s 1.00
add_one / DefOpt / cuda / PostRev 0.000025632 s 0.00002464 s 1.04
add_one / DefOpt / cuda / BothRev 0.000025504 s 0.00002528 s 1.01
add_one / IDefOpt / cuda / PreRev 0.000025824 s 0.000024736 s 1.04
add_one / IDefOpt / cuda / PostRev 0.000025952 s 0.000024896 s 1.04
add_one / IDefOpt / cuda / BothRev 0.000025376 s 0.000024544 s 1.03
add_one / JaXPipe / tpu / Primal 0.000001427175 s 0.000001425475 s 1.00
add_one / Jax / tpu / Primal 0.0000014035 s 0.000001400975 s 1.00
add_one / HLOOpt / tpu / Primal 0.0000014274 s 0.00000143205 s 1.00
add_one / PartOpt / tpu / Primal 0.0000014040749999999998 s 0.0000014018749999999998 s 1.00
add_one / IPartOpt / tpu / Primal 0.00000142675 s 0.00000142615 s 1.00
add_one / DefOpt / tpu / Primal 0.0000014067750000000002 s 0.0000014055 s 1.00
add_one / IDefOpt / tpu / Primal 0.00000142765 s 0.000001431325 s 1.00
add_one / JaXPipe / tpu / Forward 0.0000018545 s 0.0000018029 s 1.03
add_one / Jax / tpu / Forward 0.000001844525 s 0.00000184345 s 1.00
add_one / HLOOpt / tpu / Forward 0.0000018445 s 0.000001803225 s 1.02
add_one / PartOpt / tpu / Forward 0.00000183855 s 0.000001844125 s 1.00
add_one / IPartOpt / tpu / Forward 0.0000018445 s 0.000001798375 s 1.03
add_one / DefOpt / tpu / Forward 0.00000184355 s 0.00000183825 s 1.00
add_one / IDefOpt / tpu / Forward 0.000001852225 s 0.0000018087 s 1.02
add_one / JaXPipe / tpu / PreRev 0.000002239075 s 0.0000022364 s 1.00
add_one / JaXPipe / tpu / PostRev 0.000002232975 s 0.000002194825 s 1.02
add_one / JaXPipe / tpu / BothRev 0.000002230075 s 0.000002238375 s 1.00
add_one / Jax / tpu / BothRev 0.0000022493 s 0.00000217835 s 1.03
add_one / HLOOpt / tpu / PreRev 0.00000223905 s 0.000002233875 s 1.00
add_one / HLOOpt / tpu / PostRev 0.00000223535 s 0.0000021854000000000003 s 1.02
add_one / HLOOpt / tpu / BothRev 0.00000224435 s 0.0000022366 s 1.00
add_one / PartOpt / tpu / PreRev 0.0000022434 s 0.0000021858250000000004 s 1.03
add_one / PartOpt / tpu / PostRev 0.000002232775 s 0.00000223615 s 1.00
add_one / PartOpt / tpu / BothRev 0.00000224575 s 0.0000021889500000000004 s 1.03
add_one / IPartOpt / tpu / PreRev 0.000002230875 s 0.000002251875 s 0.99
add_one / IPartOpt / tpu / PostRev 0.000002239375 s 0.0000021842 s 1.03
add_one / IPartOpt / tpu / BothRev 0.00000223475 s 0.00000223985 s 1.00
add_one / DefOpt / tpu / PreRev 0.000002246575 s 0.000002184575 s 1.03
add_one / DefOpt / tpu / PostRev 0.000002232275 s 0.000002233075 s 1.00
add_one / DefOpt / tpu / BothRev 0.0000022449 s 0.0000021877 s 1.03
add_one / IDefOpt / tpu / PreRev 0.0000022411 s 0.000002241625 s 1.00
add_one / IDefOpt / tpu / PostRev 0.000002244575 s 0.0000021856000000000003 s 1.03
add_one / IDefOpt / tpu / BothRev 0.000002235925 s 0.000002233375 s 1.00
add_one / JaXPipe / cpu / Primal 0.000016287 s 0.000007061899959808216 s 2.31
add_one / Jax / cpu / Primal 0.000016311 s 0.000007725300010861246 s 2.11
add_one / HLOOpt / cpu / Primal 0.00001648 s 0.000007127159988158383 s 2.31
add_one / PartOpt / cpu / Primal 0.000016492 s 0.0000064727400240371934 s 2.55
add_one / IPartOpt / cpu / Primal 0.000016366 s 0.000006855559977338999 s 2.39
add_one / DefOpt / cpu / Primal 0.000016330999999999997 s 0.000006799359998694854 s 2.40
add_one / IDefOpt / cpu / Primal 0.000016204 s 0.000006532919996971032 s 2.48
add_one / JaXPipe / cpu / Forward 0.000022432 s 0.000010692179985198891 s 2.10
add_one / Jax / cpu / Forward 0.000022011 s 0.000009852040057012344 s 2.23
add_one / HLOOpt / cpu / Forward 0.000022103 s 0.00001061569998455525 s 2.08
add_one / PartOpt / cpu / Forward 0.000021894 s 0.000010313280026821304 s 2.12
add_one / IPartOpt / cpu / Forward 0.000022175 s 0.000010236940006507212 s 2.17
add_one / DefOpt / cpu / Forward 0.000022219 s 0.00001021022000713856 s 2.18
add_one / IDefOpt / cpu / Forward 0.000021966 s 0.00000989591997495154 s 2.22
add_one / JaXPipe / cpu / PreRev 0.000024711 s 0.000011687020005410889 s 2.11
add_one / JaXPipe / cpu / PostRev 0.000024140000000000003 s 0.000011235759975534164 s 2.15
add_one / JaXPipe / cpu / BothRev 0.000024108 s 0.000012070920010955888 s 2.00
add_one / Jax / cpu / BothRev 0.000024243 s 0.00001110520000111137 s 2.18
add_one / HLOOpt / cpu / PreRev 0.000024312 s 0.0000117855600456096 s 2.06
add_one / HLOOpt / cpu / PostRev 0.00002469 s 0.0000174333799895976 s 1.42
add_one / HLOOpt / cpu / BothRev 0.000024286 s 0.000011470099998405203 s 2.12
add_one / PartOpt / cpu / PreRev 0.000024633 s 0.000011000640006386676 s 2.24
add_one / PartOpt / cpu / PostRev 0.000024643 s 0.000011238540000704234 s 2.19
add_one / PartOpt / cpu / BothRev 0.000024298 s 0.000011834960005217 s 2.05
add_one / IPartOpt / cpu / PreRev 0.000024282 s 0.000011671399961414864 s 2.08
add_one / IPartOpt / cpu / PostRev 0.00002473 s 0.000012116419975427562 s 2.04
add_one / IPartOpt / cpu / BothRev 0.000024642 s 0.000011671220026983064 s 2.11
add_one / DefOpt / cpu / PreRev 0.000024803 s 0.000011632380019364063 s 2.13
add_one / DefOpt / cpu / PostRev 0.000024186 s 0.000011632059986368405 s 2.08
add_one / DefOpt / cpu / BothRev 0.000024643 s 0.00001143328000580368 s 2.16
add_one / IDefOpt / cpu / PreRev 0.000025039 s 0.00001101984002161771 s 2.27
add_one / IDefOpt / cpu / PostRev 0.000024626 s 0.000011647559977063794 s 2.11
add_one / IDefOpt / cpu / BothRev 0.000024622 s 0.00001107572001274093 s 2.22
add_two / JaXPipe / cpu / Primal 0.000007352780012297444 s 0.000006867799966130406 s 1.07
add_two / Jax / cpu / Primal 0.000007023840007605031 s 0.000006908039995323634 s 1.02
add_two / HLOOpt / cpu / Primal 0.000007302940011868486 s 0.000007006639998508036 s 1.04
add_two / PartOpt / cpu / Primal 0.000007367939979303628 s 0.000007027579995337874 s 1.05
add_two / IPartOpt / cpu / Primal 0.00000707068001247535 s 0.000007588979979118449 s 0.93
add_two / DefOpt / cpu / Primal 0.000006897260036566877 s 0.0000068686600297951375 s 1.00
add_two / IDefOpt / cpu / Primal 0.000007111559971235692 s 0.000006989980029175058 s 1.02
add_two / JaXPipe / cpu / Forward 0.000010896840058194357 s 0.000010030479979832308 s 1.09
add_two / Jax / cpu / Forward 0.000010803940012920066 s 0.000009928719991876278 s 1.09
add_two / HLOOpt / cpu / Forward 0.00001132066002355714 s 0.00001017656001749856 s 1.11
add_two / PartOpt / cpu / Forward 0.000010628779973558268 s 0.000010274779997416772 s 1.03
add_two / IPartOpt / cpu / Forward 0.000010467019992574933 s 0.000010454699986439663 s 1.00
add_two / DefOpt / cpu / Forward 0.00001064430001861183 s 0.000010264359962093295 s 1.04
add_two / IDefOpt / cpu / Forward 0.0000106617399615061 s 0.00001037967996126099 s 1.03
add_two / JaXPipe / cpu / PreRev 0.000015257120030582883 s 0.000013865560013073265 s 1.10
add_two / JaXPipe / cpu / PostRev 0.000014976320026107714 s 0.000014588599997296116 s 1.03
add_two / JaXPipe / cpu / BothRev 0.000014595680013371748 s 0.000014942600009817398 s 0.98
add_two / Jax / cpu / BothRev 0.00001503940003203752 s 0.000013541219987018849 s 1.11
add_two / HLOOpt / cpu / PreRev 0.000015224779999698513 s 0.000014437700037888137 s 1.05
add_two / HLOOpt / cpu / PostRev 0.000017304300035903 s 0.00001636072001019784 s 1.06
add_two / HLOOpt / cpu / BothRev 0.0000158201599970198 s 0.000014554019971910747 s 1.09
add_two / PartOpt / cpu / PreRev 0.00001535404000605922 s 0.0000141802000325697 s 1.08
add_two / PartOpt / cpu / PostRev 0.00001492956002948631 s 0.0000140553999972326 s 1.06
add_two / PartOpt / cpu / BothRev 0.000014828399971520411 s 0.000014440060003835245 s 1.03
add_two / IPartOpt / cpu / PreRev 0.000015484920049857463 s 0.000014146200001050602 s 1.09
add_two / IPartOpt / cpu / PostRev 0.000014987399963501957 s 0.00001453995997508173 s 1.03
add_two / IPartOpt / cpu / BothRev 0.000015284160026567408 s 0.000014311239992821356 s 1.07
add_two / DefOpt / cpu / PreRev 0.00001545057999464916 s 0.000013563660022555269 s 1.14
add_two / DefOpt / cpu / PostRev 0.000014861840045341525 s 0.00001501925997217768 s 0.99
add_two / DefOpt / cpu / BothRev 0.000014815800004726044 s 0.00001491654000346898 s 0.99
add_two / IDefOpt / cpu / PreRev 0.00001569946002746292 s 0.00001355600003080326 s 1.16
add_two / IDefOpt / cpu / PostRev 0.000014131400057522114 s 0.000014451440019911388 s 0.98
add_two / IDefOpt / cpu / BothRev 0.000014751260041521163 s 0.000014129720002529213 s 1.04
add_two / JaXPipe / cuda / Primal 0.0000024 s 0.0000019200000000000003 s 1.25
add_two / Jax / cuda / Primal 0.0000024 s 0.0000019200000000000003 s 1.25
add_two / HLOOpt / cuda / Primal 0.0000024 s 0.0000019200000000000003 s 1.25
add_two / PartOpt / cuda / Primal 0.0000024 s 0.000001919 s 1.25
add_two / IPartOpt / cuda / Primal 0.0000024 s 0.0000019200000000000003 s 1.25
add_two / DefOpt / cuda / Primal 0.0000024 s 0.0000019200000000000003 s 1.25
add_two / IDefOpt / cuda / Primal 0.0000024 s 0.0000019200000000000003 s 1.25
add_two / JaXPipe / cuda / Forward 0.000010752 s 0.000009664 s 1.11
add_two / Jax / cuda / Forward 0.0000104 s 0.000009376 s 1.11
add_two / HLOOpt / cuda / Forward 0.00001008 s 0.000009856 s 1.02
add_two / PartOpt / cuda / Forward 0.000010336 s 0.000009888 s 1.05
add_two / IPartOpt / cuda / Forward 0.000010464 s 0.000009696 s 1.08
add_two / DefOpt / cuda / Forward 0.000010528 s 0.000009792 s 1.08
add_two / IDefOpt / cuda / Forward 0.0000104 s 0.00000976 s 1.07
add_two / JaXPipe / cuda / PreRev 0.000032672 s 0.000032608 s 1.00
add_two / JaXPipe / cuda / PostRev 0.000032896000000000005 s 0.000032352 s 1.02
add_two / JaXPipe / cuda / BothRev 0.000031744 s 0.00003264 s 0.97
add_two / Jax / cuda / BothRev 0.000032736 s 0.000032513 s 1.01
add_two / HLOOpt / cuda / PreRev 0.000033024 s 0.00003264 s 1.01
add_two / HLOOpt / cuda / PostRev 0.000032672 s 0.000031616 s 1.03
add_two / HLOOpt / cuda / BothRev 0.000032416 s 0.000032896000000000005 s 0.99
add_two / PartOpt / cuda / PreRev 0.000032064 s 0.000032767999999999995 s 0.98
add_two / PartOpt / cuda / PostRev 0.000032127999999999995 s 0.000032064 s 1.00
add_two / PartOpt / cuda / BothRev 0.000032 s 0.000032576 s 0.98
add_two / IPartOpt / cuda / PreRev 0.000032767 s 0.000032928 s 1.00
add_two / IPartOpt / cuda / PostRev 0.000032671 s 0.00003168 s 1.03
add_two / IPartOpt / cuda / BothRev 0.000032 s 0.000032576 s 0.98
add_two / DefOpt / cuda / PreRev 0.000032288 s 0.000032671 s 0.99
add_two / DefOpt / cuda / PostRev 0.00003184 s 0.00003168 s 1.01
add_two / DefOpt / cuda / BothRev 0.00003232 s 0.000031647000000000004 s 1.02
add_two / IDefOpt / cuda / PreRev 0.000033759999999999995 s 0.000032064 s 1.05
add_two / IDefOpt / cuda / PostRev 0.000032448 s 0.000032096 s 1.01
add_two / IDefOpt / cuda / BothRev 0.000034624 s 0.000031937 s 1.08
add_two / JaXPipe / tpu / Primal 0.0000014315249999999998 s 0.0000014319 s 1.00
add_two / Jax / tpu / Primal 0.000001472625 s 0.000001423975 s 1.03
add_two / HLOOpt / tpu / Primal 0.0000014293 s 0.0000014321 s 1.00
add_two / PartOpt / tpu / Primal 0.00000147075 s 0.000001420075 s 1.04
add_two / IPartOpt / tpu / Primal 0.0000014236750000000002 s 0.00000142885 s 1.00
add_two / DefOpt / tpu / Primal 0.0000014884 s 0.000001434525 s 1.04
add_two / IDefOpt / tpu / Primal 0.000001426675 s 0.000001431175 s 1.00
add_two / JaXPipe / tpu / Forward 0.000001832375 s 0.0000018271 s 1.00
add_two / Jax / tpu / Forward 0.000001835525 s 0.000001827825 s 1.00
add_two / HLOOpt / tpu / Forward 0.000001826125 s 0.00000183625 s 0.99
add_two / PartOpt / tpu / Forward 0.0000018389 s 0.00000182075 s 1.01
add_two / IPartOpt / tpu / Forward 0.0000018237 s 0.000001831175 s 1.00
add_two / DefOpt / tpu / Forward 0.000001827525 s 0.000001834675 s 1.00
add_two / IDefOpt / tpu / Forward 0.000001829475 s 0.000001831225 s 1.00
add_two / JaXPipe / tpu / PreRev 0.0000028336499999999995 s 0.000002835075 s 1.00
add_two / JaXPipe / tpu / PostRev 0.000002746725 s 0.000002755875 s 1.00
add_two / JaXPipe / tpu / BothRev 0.000002844775 s 0.0000028421000000000003 s 1.00
add_two / Jax / tpu / BothRev 0.000002751675 s 0.000002751975 s 1.00
add_two / HLOOpt / tpu / PreRev 0.00000283765 s 0.0000028349 s 1.00
add_two / HLOOpt / tpu / PostRev 0.000002763925 s 0.0000027513250000000003 s 1.00
add_two / HLOOpt / tpu / BothRev 0.0000028384500000000003 s 0.000002831625 s 1.00
add_two / PartOpt / tpu / PreRev 0.0000027563749999999995 s 0.000002742475 s 1.01
add_two / PartOpt / tpu / PostRev 0.000002841225 s 0.00000283765 s 1.00
add_two / PartOpt / tpu / BothRev 0.000002754725 s 0.000002765825 s 1.00
add_two / IPartOpt / tpu / PreRev 0.000002845525 s 0.000002844575 s 1.00
add_two / IPartOpt / tpu / PostRev 0.000002757075 s 0.000002751 s 1.00
add_two / IPartOpt / tpu / BothRev 0.000002845125 s 0.00000283395 s 1.00
add_two / DefOpt / tpu / PreRev 0.0000027537 s 0.000002757975 s 1.00
add_two / DefOpt / tpu / PostRev 0.0000028369 s 0.0000028295749999999995 s 1.00
add_two / DefOpt / tpu / BothRev 0.000002757175 s 0.00000274965 s 1.00
add_two / IDefOpt / tpu / PreRev 0.0000028262749999999995 s 0.00000283355 s 1.00
add_two / IDefOpt / tpu / PostRev 0.0000027492 s 0.0000027563749999999995 s 1.00
add_two / IDefOpt / tpu / BothRev 0.000002836075 s 0.000002845875 s 1.00
add_two / JaXPipe / cpu / Primal 0.000016893 s 0.000006867799966130406 s 2.46
add_two / Jax / cpu / Primal 0.000016822 s 0.000006908039995323634 s 2.44
add_two / HLOOpt / cpu / Primal 0.000016814 s 0.000007006639998508036 s 2.40
add_two / PartOpt / cpu / Primal 0.000016916000000000002 s 0.000007027579995337874 s 2.41
add_two / IPartOpt / cpu / Primal 0.000016712000000000002 s 0.000007588979979118449 s 2.20
add_two / DefOpt / cpu / Primal 0.000016862 s 0.0000068686600297951375 s 2.45
add_two / IDefOpt / cpu / Primal 0.000016424 s 0.000006989980029175058 s 2.35
add_two / JaXPipe / cpu / Forward 0.000022859 s 0.000010030479979832308 s 2.28
add_two / Jax / cpu / Forward 0.000022533000000000003 s 0.000009928719991876278 s 2.27
add_two / HLOOpt / cpu / Forward 0.00002204 s 0.00001017656001749856 s 2.17
add_two / PartOpt / cpu / Forward 0.000022677 s 0.000010274779997416772 s 2.21
add_two / IPartOpt / cpu / Forward 0.00002264 s 0.000010454699986439663 s 2.17
add_two / DefOpt / cpu / Forward 0.000022732 s 0.000010264359962093295 s 2.21
add_two / IDefOpt / cpu / Forward 0.000022696 s 0.00001037967996126099 s 2.19
add_two / JaXPipe / cpu / PreRev 0.000029123 s 0.000013865560013073265 s 2.10
add_two / JaXPipe / cpu / PostRev 0.000028398 s 0.000014588599997296116 s 1.95
add_two / JaXPipe / cpu / BothRev 0.000028664 s 0.000014942600009817398 s 1.92
add_two / Jax / cpu / BothRev 0.000027973 s 0.000013541219987018849 s 2.07
add_two / HLOOpt / cpu / PreRev 0.000028032 s 0.000014437700037888137 s 1.94
add_two / HLOOpt / cpu / PostRev 0.000028681 s 0.00001636072001019784 s 1.75
add_two / HLOOpt / cpu / BothRev 0.000028315 s 0.000014554019971910747 s 1.95
add_two / PartOpt / cpu / PreRev 0.000028194 s 0.0000141802000325697 s 1.99
add_two / PartOpt / cpu / PostRev 0.000028483 s 0.0000140553999972326 s 2.03
add_two / PartOpt / cpu / BothRev 0.000028486 s 0.000014440060003835245 s 1.97
add_two / IPartOpt / cpu / PreRev 0.000028273 s 0.000014146200001050602 s 2.00
add_two / IPartOpt / cpu / PostRev 0.000028293 s 0.00001453995997508173 s 1.95
add_two / IPartOpt / cpu / BothRev 0.000028342 s 0.000014311239992821356 s 1.98
add_two / DefOpt / cpu / PreRev 0.000028173 s 0.000013563660022555269 s 2.08
add_two / DefOpt / cpu / PostRev 0.000028743 s 0.00001501925997217768 s 1.91
add_two / DefOpt / cpu / BothRev 0.000028375 s 0.00001491654000346898 s 1.90
add_two / IDefOpt / cpu / PreRev 0.000028259 s 0.00001355600003080326 s 2.08
add_two / IDefOpt / cpu / PostRev 0.000029261 s 0.000014451440019911388 s 2.02
add_two / IDefOpt / cpu / BothRev 0.000028729 s 0.000014129720002529213 s 2.03
cache / JaXPipe / cpu / Primal 0.00000703515999703086 s 0.000006572259999302332 s 1.07
cache / Jax / cpu / Primal 0.000006481280006482848 s 0.000006501319994640653 s 1.00
cache / HLOOpt / cpu / Primal 0.000006892579958730493 s 0.00000615048002146068 s 1.12
cache / PartOpt / cpu / Primal 0.000006388960018739454 s 0.000006232679988897871 s 1.03
cache / IPartOpt / cpu / Primal 0.000006422680016839877 s 0.000006268780007303576 s 1.02
cache / DefOpt / cpu / Primal 0.000006636940015596337 s 0.000006588100022781873 s 1.01
cache / IDefOpt / cpu / Primal 0.0000067897400640504205 s 0.000006464800007961458 s 1.05
cache / JaXPipe / cpu / Forward 0.000015402900016852074 s 0.000014711600006194204 s 1.05
cache / Jax / cpu / Forward 0.00001615365999896312 s 0.00001440755996554799 s 1.12
cache / HLOOpt / cpu / Forward 0.00001568711998515937 s 0.00001561347999086138 s 1.00
cache / PartOpt / cpu / Forward 0.000015765419993840625 s 0.000014749180008948317 s 1.07
cache / IPartOpt / cpu / Forward 0.000015741440029160005 s 0.000016127100006997353 s 0.98
cache / DefOpt / cpu / Forward 0.00001535362001050089 s 0.000015200499992715775 s 1.01
cache / IDefOpt / cpu / Forward 0.000016016859999581355 s 0.000015227600015350615 s 1.05
cache / JaXPipe / cpu / PreRev 0.00001720934001241403 s 0.000016161640023710787 s 1.06
cache / JaXPipe / cpu / PostRev 0.00002033172002484207 s 0.000021423939997475828 s 0.95
cache / JaXPipe / cpu / BothRev 0.000017232439959116163 s 0.00001640085999497387 s 1.05
cache / Jax / cpu / BothRev 0.00002290846001415048 s 0.000021007039958931272 s 1.09
cache / HLOOpt / cpu / PreRev 0.00001822302002437937 s 0.000016289080003843993 s 1.12
cache / HLOOpt / cpu / PostRev 0.000018971819981743467 s 0.000019273419984529027 s 0.98
cache / HLOOpt / cpu / BothRev 0.000016825060038172522 s 0.000017125759968621422 s 0.98
cache / PartOpt / cpu / PreRev 0.000016901340004551457 s 0.000016171379975276068 s 1.05
cache / PartOpt / cpu / PostRev 0.00002087683998979628 s 0.000019693940039360317 s 1.06
cache / PartOpt / cpu / BothRev 0.000016831579987410805 s 0.000016042779989220435 s 1.05
cache / IPartOpt / cpu / PreRev 0.00001679199996033276 s 0.000015654539993192884 s 1.07
cache / IPartOpt / cpu / PostRev 0.00002178967999498127 s 0.00001996903996769106 s 1.09
cache / IPartOpt / cpu / BothRev 0.000016785419966254267 s 0.000015126959997360244 s 1.11
cache / DefOpt / cpu / PreRev 0.000016394440026488156 s 0.000015228679976644345 s 1.08
cache / DefOpt / cpu / PostRev 0.000017087640017052764 s 0.000015360740017058562 s 1.11
cache / DefOpt / cpu / BothRev 0.00001771725996150053 s 0.000015114279985937172 s 1.17
cache / IDefOpt / cpu / PreRev 0.000017714879959385144 s 0.000015915160092845325 s 1.11
cache / IDefOpt / cpu / PostRev 0.00001806345996556047 s 0.000016638339993733097 s 1.09
cache / IDefOpt / cpu / BothRev 0.00001745837993439636 s 0.000016164739990927045 s 1.08
cache / JaXPipe / cuda / Primal 0.000002304 s 0.000002303 s 1.00
cache / Jax / cuda / Primal 0.000002335 s 0.000002304 s 1.01
cache / HLOOpt / cuda / Primal 0.000002304 s 0.00000224 s 1.03
cache / PartOpt / cuda / Primal 0.000002304 s 0.00000224 s 1.03
cache / IPartOpt / cuda / Primal 0.000002303 s 0.000002303 s 1
cache / DefOpt / cuda / Primal 0.000002336 s 0.00000224 s 1.04
cache / IDefOpt / cuda / Primal 0.000002304 s 0.000002208 s 1.04
cache / JaXPipe / cuda / Forward 0.000002336 s 0.000002335 s 1.00
cache / Jax / cuda / Forward 0.000002336 s 0.000002304 s 1.01
cache / HLOOpt / cuda / Forward 0.000002336 s 0.000002335 s 1.00
cache / PartOpt / cuda / Forward 0.000002336 s 0.000002335 s 1.00
cache / IPartOpt / cuda / Forward 0.000002336 s 0.000002335 s 1.00
cache / DefOpt / cuda / Forward 0.000002335 s 0.000002272 s 1.03
cache / IDefOpt / cuda / Forward 0.000002336 s 0.000002335 s 1.00
cache / JaXPipe / cuda / PreRev 0.000011104 s 0.000009984 s 1.11
cache / JaXPipe / cuda / PostRev 0.00001056 s 0.000010528 s 1.00
cache / JaXPipe / cuda / BothRev 0.000010816 s 0.0000104 s 1.04
cache / Jax / cuda / BothRev 0.000010656 s 0.000010656 s 1
cache / HLOOpt / cuda / PreRev 0.000013568 s 0.000013504 s 1.00
cache / HLOOpt / cuda / PostRev 0.000013536 s 0.000013536 s 1
cache / HLOOpt / cuda / BothRev 0.000013568 s 0.000013536 s 1.00
cache / PartOpt / cuda / PreRev 0.00001072 s 0.000011745 s 0.91
cache / PartOpt / cuda / PostRev 0.000010976 s 0.000011520000000000002 s 0.95
cache / PartOpt / cuda / BothRev 0.000010496 s 0.000010463 s 1.00
cache / IPartOpt / cuda / PreRev 0.00001056 s 0.000010624 s 0.99
cache / IPartOpt / cuda / PostRev 0.000010656 s 0.000010432 s 1.02
cache / IPartOpt / cuda / BothRev 0.000010912 s 0.0000104 s 1.05
cache / DefOpt / cuda / PreRev 0.00001104 s 0.00001072 s 1.03
cache / DefOpt / cuda / PostRev 0.000010816 s 0.000010336 s 1.05
cache / DefOpt / cuda / BothRev 0.00001072 s 0.000010752 s 1.00
cache / IDefOpt / cuda / PreRev 0.000010784 s 0.000010591 s 1.02
cache / IDefOpt / cuda / PostRev 0.000010528 s 0.000010304 s 1.02
cache / IDefOpt / cuda / BothRev 0.000010944 s 0.000010464 s 1.05
cache / JaXPipe / tpu / Primal 0.000002458025 s 0.0000024717 s 0.99
cache / Jax / tpu / Primal 0.000002463875 s 0.000002463125 s 1.00
cache / HLOOpt / tpu / Primal 0.00000246395 s 0.000002479075 s 0.99
cache / PartOpt / tpu / Primal 0.0000024572 s 0.00000247065 s 0.99
cache / IPartOpt / tpu / Primal 0.000002463925 s 0.0000024608 s 1.00
cache / DefOpt / tpu / Primal 0.0000024608250000000003 s 0.0000024597 s 1.00
cache / IDefOpt / tpu / Primal 0.000002470675 s 0.000002462275 s 1.00
cache / JaXPipe / tpu / Forward 0.0000035389 s 0.000003541175 s 1.00
cache / Jax / tpu / Forward 0.00000352655 s 0.000003548125 s 0.99
cache / HLOOpt / tpu / Forward 0.000003550425 s 0.000003560675 s 1.00
cache / PartOpt / tpu / Forward 0.0000035305 s 0.000003532225 s 1.00
cache / IPartOpt / tpu / Forward 0.0000035556 s 0.00000355105 s 1.00
cache / DefOpt / tpu / Forward 0.0000035295500000000004 s 0.0000035362 s 1.00
cache / IDefOpt / tpu / Forward 0.0000035523250000000003 s 0.000003544875 s 1.00
cache / JaXPipe / tpu / PreRev 0.000004939325 s 0.00000495055 s 1.00
cache / JaXPipe / tpu / PostRev 0.0000049496 s 0.00000496035 s 1.00
cache / JaXPipe / tpu / BothRev 0.000004973425 s 0.000004976425 s 1.00
cache / Jax / tpu / BothRev 0.0000049778 s 0.000004994475 s 1.00
cache / HLOOpt / tpu / PreRev 0.00000393935 s 0.0000039297 s 1.00
cache / HLOOpt / tpu / PostRev 0.000004140825 s 0.000004137525000000001 s 1.00
cache / HLOOpt / tpu / BothRev 0.00000396615 s 0.0000039311 s 1.01
cache / PartOpt / tpu / PreRev 0.000004972625 s 0.0000049712 s 1.00
cache / PartOpt / tpu / PostRev 0.000004962525 s 0.000004976825 s 1.00
cache / PartOpt / tpu / BothRev 0.000004987400000000001 s 0.00000496935 s 1.00
cache / IPartOpt / tpu / PreRev 0.0000049838500000000006 s 0.000004975975 s 1.00
cache / IPartOpt / tpu / PostRev 0.00000497355 s 0.0000049639 s 1.00
cache / IPartOpt / tpu / BothRev 0.00000498515 s 0.0000049710750000000005 s 1.00
cache / DefOpt / tpu / PreRev 0.000004974025 s 0.000004988775 s 1.00
cache / DefOpt / tpu / PostRev 0.00000497755 s 0.00000495615 s 1.00
cache / DefOpt / tpu / BothRev 0.000004964624999999999 s 0.00000498125 s 1.00
cache / IDefOpt / tpu / PreRev 0.000004966125 s 0.000004979 s 1.00
cache / IDefOpt / tpu / PostRev 0.0000049804 s 0.000004971375 s 1.00
cache / IDefOpt / tpu / BothRev 0.000004963425000000001 s 0.000004941475 s 1.00
cache / JaXPipe / cpu / Primal 0.000018746 s 0.000006572259999302332 s 2.85
cache / Jax / cpu / Primal 0.000018664 s 0.000006501319994640653 s 2.87
cache / HLOOpt / cpu / Primal 0.000018886 s 0.00000615048002146068 s 3.07
cache / PartOpt / cpu / Primal 0.0000188 s 0.000006232679988897871 s 3.02
cache / IPartOpt / cpu / Primal 0.000019005 s 0.000006268780007303576 s 3.03
cache / DefOpt / cpu / Primal 0.000018801 s 0.000006588100022781873 s 2.85
cache / IDefOpt / cpu / Primal 0.000018793 s 0.000006464800007961458 s 2.91
cache / JaXPipe / cpu / Forward 0.000021407 s 0.000014711600006194204 s 1.46
cache / Jax / cpu / Forward 0.000021713 s 0.00001440755996554799 s 1.51
cache / HLOOpt / cpu / Forward 0.000021407 s 0.00001561347999086138 s 1.37
cache / PartOpt / cpu / Forward 0.000021528 s 0.000014749180008948317 s 1.46
cache / IPartOpt / cpu / Forward 0.000021912 s 0.000016127100006997353 s 1.36
cache / DefOpt / cpu / Forward 0.000021429 s 0.000015200499992715775 s 1.41
cache / IDefOpt / cpu / Forward 0.000021381 s 0.000015227600015350615 s 1.40
cache / JaXPipe / cpu / PreRev 0.000022426 s 0.000016161640023710787 s 1.39
cache / JaXPipe / cpu / PostRev 0.000024815 s 0.000021423939997475828 s 1.16
cache / JaXPipe / cpu / BothRev 0.000022093 s 0.00001640085999497387 s 1.35
cache / Jax / cpu / BothRev 0.000027524 s 0.000021007039958931272 s 1.31
cache / HLOOpt / cpu / PreRev 0.00002281 s 0.000016289080003843993 s 1.40
cache / HLOOpt / cpu / PostRev 0.000021713 s 0.000019273419984529027 s 1.13
cache / HLOOpt / cpu / BothRev 0.000022537 s 0.000017125759968621422 s 1.32
cache / PartOpt / cpu / PreRev 0.000021972 s 0.000016171379975276068 s 1.36
cache / PartOpt / cpu / PostRev 0.00002734 s 0.000019693940039360317 s 1.39
cache / PartOpt / cpu / BothRev 0.000021219 s 0.000016042779989220435 s 1.32
cache / IPartOpt / cpu / PreRev 0.000022573 s 0.000015654539993192884 s 1.44
cache / IPartOpt / cpu / PostRev 0.00002402 s 0.00001996903996769106 s 1.20
cache / IPartOpt / cpu / BothRev 0.000022776 s 0.000015126959997360244 s 1.51
cache / DefOpt / cpu / PreRev 0.000022344 s 0.000015228679976644345 s 1.47
cache / DefOpt / cpu / PostRev 0.000022585 s 0.000015360740017058562 s 1.47
cache / DefOpt / cpu / BothRev 0.000022190000000000003 s 0.000015114279985937172 s 1.47
cache / IDefOpt / cpu / PreRev 0.000023259 s 0.000015915160092845325 s 1.46
cache / IDefOpt / cpu / PostRev 0.000022371 s 0.000016638339993733097 s 1.34
cache / IDefOpt / cpu / BothRev 0.000022109 s 0.000016164739990927045 s 1.37
Concat / JaXPipe / cpu / Primal 0.0000072837200059439054 s 0.000007459219978045439 s 0.98
Concat / Jax / cpu / Primal 0.000006921380017956835 s 0.0000072401800116495 s 0.96
Concat / HLOOpt / cpu / Primal 0.000006893979998494615 s 0.0000072882999938883585 s 0.95
Concat / PartOpt / cpu / Primal 0.00000673363994792453 s 0.000007322039991777274 s 0.92
Concat / IPartOpt / cpu / Primal 0.000006922380016476382 s 0.000006696399968859623 s 1.03
Concat / DefOpt / cpu / Primal 0.00000726314001440187 s 0.000006468439987656894 s 1.12
Concat / IDefOpt / cpu / Primal 0.000006981500018810039 s 0.000006598119989575935 s 1.06
Concat / JaXPipe / cpu / Forward 0.000010243020024063298 s 0.000009809859993765712 s 1.04
Concat / Jax / cpu / Forward 0.000010658020028131431 s 0.000010438159988552795 s 1.02
Concat / HLOOpt / cpu / Forward 0.000010832239986484638 s 0.000010427059996800382 s 1.04
Concat / PartOpt / cpu / Forward 0.000011084539964940632 s 0.00001027719998091925 s 1.08
Concat / IPartOpt / cpu / Forward 0.000010677139971448924 s 0.000010685560009733309 s 1.00
Concat / DefOpt / cpu / Forward 0.000010531460011407036 s 0.000010202059975199518 s 1.03
Concat / IDefOpt / cpu / Forward 0.00001071437995051383 s 0.00000999803997729032 s 1.07
Concat / JaXPipe / cpu / PreRev 0.000012318640046942165 s 0.000012138919973949667 s 1.01
Concat / JaXPipe / cpu / PostRev 0.000012623739976334035 s 0.00001107852001041465 s 1.14
Concat / JaXPipe / cpu / BothRev 0.000011935380007344063 s 0.000010961260022668284 s 1.09
Concat / Jax / cpu / BothRev 0.000011716140006683418 s 0.00001153500001237262 s 1.02
Concat / HLOOpt / cpu / PreRev 0.000012878919987997504 s 0.000012123960059398087 s 1.06
Concat / HLOOpt / cpu / PostRev 0.000014366100031111272 s 0.00001364110003123642 s 1.05
Concat / HLOOpt / cpu / BothRev 0.000012416499985192784 s 0.000010911959998338716 s 1.14
Concat / PartOpt / cpu / PreRev 0.000012328079947110382 s 0.00001119947999541182 s 1.10
Concat / PartOpt / cpu / PostRev 0.000012449759960873052 s 0.000011387060012566508 s 1.09
Concat / PartOpt / cpu / BothRev 0.000012604939993252628 s 0.000012363559999357677 s 1.02
Concat / IPartOpt / cpu / PreRev 0.000012384740030029206 s 0.000012330119961916352 s 1.00
Concat / IPartOpt / cpu / PostRev 0.00001236557999618526 s 0.000011503100004119916 s 1.07
Concat / IPartOpt / cpu / BothRev 0.00001235948001522047 s 0.000011435539990998222 s 1.08
Concat / DefOpt / cpu / PreRev 0.000012153559955550009 s 0.000011610259998633407 s 1.05
Concat / DefOpt / cpu / PostRev 0.0000125023399687052 s 0.000011580839955058763 s 1.08
Concat / DefOpt / cpu / BothRev 0.000012181199945189292 s 0.000011326979984005448 s 1.08
Concat / IDefOpt / cpu / PreRev 0.00001198115998704452 s 0.000011609920002229044 s 1.03
Concat / IDefOpt / cpu / PostRev 0.000012231559985593776 s 0.000011728899999070565 s 1.04
Concat / IDefOpt / cpu / BothRev 0.000012601960006577427 s 0.0000115970599836146 s 1.09
Concat / JaXPipe / cuda / Primal 0.0000024 s 0.000001888 s 1.27
Concat / Jax / cuda / Primal 0.0000024 s 0.0000019200000000000003 s 1.25
Concat / HLOOpt / cuda / Primal 0.0000024 s 0.0000019200000000000003 s 1.25
Concat / PartOpt / cuda / Primal 0.000002431 s 0.0000019200000000000003 s 1.27
Concat / IPartOpt / cuda / Primal 0.0000024 s 0.0000019200000000000003 s 1.25
Concat / DefOpt / cuda / Primal 0.0000024 s 0.0000019200000000000003 s 1.25
Concat / IDefOpt / cuda / Primal 0.0000024 s 0.0000019200000000000003 s 1.25
Concat / JaXPipe / cuda / Forward 0.000010816 s 0.000009887 s 1.09
Concat / Jax / cuda / Forward 0.000010656 s 0.000010176 s 1.05
Concat / HLOOpt / cuda / Forward 0.00001056 s 0.000009824 s 1.07
Concat / PartOpt / cuda / Forward 0.000010432 s 0.000009631 s 1.08
Concat / IPartOpt / cuda / Forward 0.000010528 s 0.000010304 s 1.02
Concat / DefOpt / cuda / Forward 0.00001056 s 0.000010048 s 1.05
Concat / IDefOpt / cuda / Forward 0.000010433 s 0.00000944 s 1.11
Concat / JaXPipe / cuda / PreRev 0.000017184 s 0.000016545 s 1.04
Concat / JaXPipe / cuda / PostRev 0.00001696 s 0.00001584 s 1.07
Concat / JaXPipe / cuda / BothRev 0.000016672 s 0.00001632 s 1.02
Concat / Jax / cuda / BothRev 0.00001696 s 0.000016032 s 1.06
Concat / HLOOpt / cuda / PreRev 0.000016927999999999998 s 0.000015968 s 1.06
Concat / HLOOpt / cuda / PostRev 0.000016704 s 0.00001584 s 1.05
Concat / HLOOpt / cuda / BothRev 0.000017152 s 0.000016576000000000002 s 1.03
Concat / PartOpt / cuda / PreRev 0.000016607 s 0.000016 s 1.04
Concat / PartOpt / cuda / PostRev 0.000016896000000000002 s 0.000016063999999999997 s 1.05
Concat / PartOpt / cuda / BothRev 0.000016608 s 0.000016288 s 1.02
Concat / IPartOpt / cuda / PreRev 0.000016864 s 0.000016416 s 1.03
Concat / IPartOpt / cuda / PostRev 0.000017088 s 0.00001584 s 1.08
Concat / IPartOpt / cuda / BothRev 0.000016672 s 0.000016 s 1.04
Concat / DefOpt / cuda / PreRev 0.000016672 s 0.000016192 s 1.03
Concat / DefOpt / cuda / PostRev 0.000016544 s 0.000016511 s 1.00
Concat / DefOpt / cuda / BothRev 0.000017056 s 0.00001584 s 1.08
Concat / IDefOpt / cuda / PreRev 0.000016576000000000002 s 0.00001616 s 1.03
Concat / IDefOpt / cuda / PostRev 0.000016863 s 0.000015712 s 1.07
Concat / IDefOpt / cuda / BothRev 0.00001664 s 0.000016608 s 1.00
Concat / JaXPipe / tpu / Primal 0.000001531875 s 0.0000014826 s 1.03
Concat / Jax / tpu / Primal 0.0000015232499999999998 s 0.0000014854999999999998 s 1.03
Concat / HLOOpt / tpu / Primal 0.000001526225 s 0.0000014761 s 1.03
Concat / PartOpt / tpu / Primal 0.00000152175 s 0.0000014868 s 1.02
Concat / IPartOpt / tpu / Primal 0.000001530375 s 0.000001476675 s 1.04
Concat / DefOpt / tpu / Primal 0.000001529075 s 0.0000014921 s 1.02
Concat / IDefOpt / tpu / Primal 0.0000015343 s 0.0000014804 s 1.04
Concat / JaXPipe / tpu / Forward 0.000001567825 s 0.0000015552 s 1.01
Concat / Jax / tpu / Forward 0.0000015580250000000002 s 0.000001504375 s 1.04
Concat / HLOOpt / tpu / Forward 0.000001583725 s 0.00000154035 s 1.03
Concat / PartOpt / tpu / Forward 0.00000156155 s 0.00000151385 s 1.03
Concat / IPartOpt / tpu / Forward 0.0000015799 s 0.000001547175 s 1.02
Concat / DefOpt / tpu / Forward 0.0000015544250000000002 s 0.000001514675 s 1.03
Concat / IDefOpt / tpu / Forward 0.000001575825 s 0.000001551225 s 1.02
Concat / JaXPipe / tpu / PreRev 0.000002000525 s 0.0000019532250000000003 s 1.02
Concat / JaXPipe / tpu / PostRev 0.000002093125 s 0.000002041625 s 1.03
Concat / JaXPipe / tpu / BothRev 0.0000020023750000000005 s 0.0000019443750000000003 s 1.03
Concat / Jax / tpu / BothRev 0.000002086275 s 0.000002032475 s 1.03
Concat / HLOOpt / tpu / PreRev 0.000002006625 s 0.0000019443750000000003 s 1.03
Concat / HLOOpt / tpu / PostRev 0.00000207315 s 0.000002035425 s 1.02
Concat / HLOOpt / tpu / BothRev 0.000002004675 s 0.00000195195 s 1.03
Concat / PartOpt / tpu / PreRev 0.000002073075 s 0.000002037175 s 1.02
Concat / PartOpt / tpu / PostRev 0.000002008475 s 0.000001951725 s 1.03
Concat / PartOpt / tpu / BothRev 0.00000208055 s 0.0000020347 s 1.02
Concat / IPartOpt / tpu / PreRev 0.0000020052 s 0.00000195195 s 1.03
Concat / IPartOpt / tpu / PostRev 0.000002071875 s 0.0000020362 s 1.02
Concat / IPartOpt / tpu / BothRev 0.000002009775 s 0.00000194775 s 1.03
Concat / DefOpt / tpu / PreRev 0.00000206905 s 0.0000020317 s 1.02
Concat / DefOpt / tpu / PostRev 0.00000200295 s 0.000001952 s 1.03
Concat / DefOpt / tpu / BothRev 0.0000020736 s 0.000002034425 s 1.02
Concat / IDefOpt / tpu / PreRev 0.000002004825 s 0.00000194375 s 1.03
Concat / IDefOpt / tpu / PostRev 0.000002073175 s 0.00000203705 s 1.02
Concat / IDefOpt / tpu / BothRev 0.000002019025 s 0.000001943275 s 1.04
Concat / JaXPipe / cpu / Primal 0.000016271 s 0.000007459219978045439 s 2.18
Concat / Jax / cpu / Primal 0.000016385 s 0.0000072401800116495 s 2.26
Concat / HLOOpt / cpu / Primal 0.000016308 s 0.0000072882999938883585 s 2.24
Concat / PartOpt / cpu / Primal 0.000016233999999999997 s 0.000007322039991777274 s 2.22
Concat / IPartOpt / cpu / Primal 0.000016502 s 0.000006696399968859623 s 2.46
Concat / DefOpt / cpu / Primal 0.000016357 s 0.000006468439987656894 s 2.53
Concat / IDefOpt / cpu / Primal 0.000016388 s 0.000006598119989575935 s 2.48
Concat / JaXPipe / cpu / Forward 0.000022094 s 0.000009809859993765712 s 2.25
Concat / Jax / cpu / Forward 0.000021833000000000003 s 0.000010438159988552795 s 2.09
Concat / HLOOpt / cpu / Forward 0.000021861 s 0.000010427059996800382 s 2.10
Concat / PartOpt / cpu / Forward 0.000021752 s 0.00001027719998091925 s 2.12
Concat / IPartOpt / cpu / Forward 0.000021993 s 0.000010685560009733309 s 2.06
Concat / DefOpt / cpu / Forward 0.000021576 s 0.000010202059975199518 s 2.11
Concat / IDefOpt / cpu / Forward 0.000021602 s 0.00000999803997729032 s 2.16
Concat / JaXPipe / cpu / PreRev 0.000024894 s 0.000012138919973949667 s 2.05
Concat / JaXPipe / cpu / PostRev 0.000024047 s 0.00001107852001041465 s 2.17
Concat / JaXPipe / cpu / BothRev 0.00002412 s 0.000010961260022668284 s 2.20
Concat / Jax / cpu / BothRev 0.000025439 s 0.00001153500001237262 s 2.21
Concat / HLOOpt / cpu / PreRev 0.000024853 s 0.000012123960059398087 s 2.05
Concat / HLOOpt / cpu / PostRev 0.000024595 s 0.00001364110003123642 s 1.80
Concat / HLOOpt / cpu / BothRev 0.000023725 s 0.000010911959998338716 s 2.17
Concat / PartOpt / cpu / PreRev 0.00002466 s 0.00001119947999541182 s 2.20
Concat / PartOpt / cpu / PostRev 0.000024136 s 0.000011387060012566508 s 2.12
Concat / PartOpt / cpu / BothRev 0.000024135 s 0.000012363559999357677 s 1.95
Concat / IPartOpt / cpu / PreRev 0.000024518 s 0.000012330119961916352 s 1.99
Concat / IPartOpt / cpu / PostRev 0.000024315 s 0.000011503100004119916 s 2.11
Concat / IPartOpt / cpu / BothRev 0.000024385 s 0.000011435539990998222 s 2.13
Concat / DefOpt / cpu / PreRev 0.000024394 s 0.000011610259998633407 s 2.10
Concat / DefOpt / cpu / PostRev 0.000024824 s 0.000011580839955058763 s 2.14
Concat / DefOpt / cpu / BothRev 0.000024718 s 0.000011326979984005448 s 2.18
Concat / IDefOpt / cpu / PreRev 0.000025113 s 0.000011609920002229044 s 2.16
Concat / IDefOpt / cpu / PostRev 0.000024463 s 0.000011728899999070565 s 2.09
Concat / IDefOpt / cpu / BothRev 0.000024249 s 0.0000115970599836146 s 2.09
const_scatter / JaXPipe / cpu / Primal 0.000006942440004422678 s 0.000006414199997379911 s 1.08
const_scatter / Jax / cpu / Primal 0.000006782720001865527 s 0.000006381759994837921 s 1.06
const_scatter / HLOOpt / cpu / Primal 0.000007485039996026898 s 0.000007092419973560027 s 1.06
const_scatter / PartOpt / cpu / Primal 0.000006834959976913524 s 0.000006388200008586864 s 1.07
const_scatter / IPartOpt / cpu / Primal 0.000006775319989174023 s 0.000007287040007213363 s 0.93
const_scatter / DefOpt / cpu / Primal 0.00000752270002521982 s 0.000006763159999536583 s 1.11
const_scatter / IDefOpt / cpu / Primal 0.000007737359965176438 s 0.000007052119999571005 s 1.10
const_scatter / JaXPipe / cpu / Forward 0.0000114659000064421 s 0.000010790360001919908 s 1.06
const_scatter / Jax / cpu / Forward 0.000010237739979856995 s 0.000009555260021443246 s 1.07
const_scatter / HLOOpt / cpu / Forward 0.000011819380006272697 s 0.000011073819969169565 s 1.07
const_scatter / PartOpt / cpu / Forward 0.00001176101997771184 s 0.000010634000009304144 s 1.11
const_scatter / IPartOpt / cpu / Forward 0.000011280500029897666 s 0.000010531140032981057 s 1.07
const_scatter / DefOpt / cpu / Forward 0.000011436239983595442 s 0.000010720419995777774 s 1.07
const_scatter / IDefOpt / cpu / Forward 0.00001176203997601988 s 0.000010679459992388729 s 1.10
const_scatter / JaXPipe / cpu / PreRev 0.0002922202800436 s 0.0002888223200261 s 1.01
const_scatter / JaXPipe / cpu / PostRev 0.0002837334600189 s 0.0002839717799724 s 1.00
const_scatter / JaXPipe / cpu / BothRev 0.0002878149000389 s 0.0002854160999595 s 1.01
const_scatter / Jax / cpu / BothRev 0.0002857083800063 s 0.0002842025400059 s 1.01
const_scatter / HLOOpt / cpu / PreRev 0.0002875807999771 s 0.0002855643800376 s 1.01
const_scatter / HLOOpt / cpu / PostRev 0.0002913887399972 s 0.0002889120800318 s 1.01
const_scatter / HLOOpt / cpu / BothRev 0.0002873395399456 s 0.0002857153400054 s 1.01
const_scatter / PartOpt / cpu / PreRev 0.0002878320199943 s 0.000283256499988 s 1.02
const_scatter / PartOpt / cpu / PostRev 0.0002853346000392 s 0.0002831990800314 s 1.01
const_scatter / PartOpt / cpu / BothRev 0.0002866487000574 s 0.0002844155800357 s 1.01
const_scatter / IPartOpt / cpu / PreRev 0.0002906409999559 s 0.0002835344200138 s 1.03
const_scatter / IPartOpt / cpu / PostRev 0.0002858707000177 s 0.0002824978799799 s 1.01
const_scatter / IPartOpt / cpu / BothRev 0.0002870949999851 s 0.0002845528399939 s 1.01
const_scatter / DefOpt / cpu / PreRev 0.0002860695399976 s 0.0002848829400136 s 1.00
const_scatter / DefOpt / cpu / PostRev 0.0002878341799987 s 0.0002848792600161 s 1.01
const_scatter / DefOpt / cpu / BothRev 0.0002880138000091 s 0.0002847787200244 s 1.01
const_scatter / IDefOpt / cpu / PreRev 0.0002853387000504 s 0.0002828045599926 s 1.01
const_scatter / IDefOpt / cpu / PostRev 0.0002877711400014 s 0.0002841483999873 s 1.01
const_scatter / IDefOpt / cpu / BothRev 0.0002866851399448 s 0.0002834731599705 s 1.01
const_scatter / JaXPipe / cuda / Primal 0.0000024 s 0.000001887 s 1.27
const_scatter / Jax / cuda / Primal 0.0000024 s 0.000001887 s 1.27
const_scatter / HLOOpt / cuda / Primal 0.0000024 s 0.000001887 s 1.27
const_scatter / PartOpt / cuda / Primal 0.0000024 s 0.000001888 s 1.27
const_scatter / IPartOpt / cuda / Primal 0.000002431 s 0.000001887 s 1.29
const_scatter / DefOpt / cuda / Primal 0.0000024 s 0.000001887 s 1.27
const_scatter / IDefOpt / cuda / Primal 0.0000024 s 0.000001887 s 1.27
const_scatter / JaXPipe / cuda / Forward 0.000010624 s 0.000009632 s 1.10
const_scatter / Jax / cuda / Forward 0.000010528 s 0.00001008 s 1.04
const_scatter / HLOOpt / cuda / Forward 0.000010848 s 0.000010016 s 1.08
const_scatter / PartOpt / cuda / Forward 0.000010752 s 0.000009568 s 1.12
const_scatter / IPartOpt / cuda / Forward 0.000010752 s 0.000010081 s 1.07
const_scatter / DefOpt / cuda / Forward 0.000010592 s 0.000010048 s 1.05
const_scatter / IDefOpt / cuda / Forward 0.000010592 s 0.000010144 s 1.04
const_scatter / JaXPipe / cuda / PreRev 0.000016672 s 0.000016576000000000002 s 1.01
const_scatter / JaXPipe / cuda / PostRev 0.000016672 s 0.000016096 s 1.04
const_scatter / JaXPipe / cuda / BothRev 0.00001648 s 0.000015935999999999998 s 1.03
const_scatter / Jax / cuda / BothRev 0.000016608 s 0.000016576000000000002 s 1.00
const_scatter / HLOOpt / cuda / PreRev 0.000016768000000000003 s 0.000016416 s 1.02
const_scatter / HLOOpt / cuda / PostRev 0.000016672 s 0.000016383999999999998 s 1.02
const_scatter / HLOOpt / cuda / BothRev 0.00001664 s 0.00001616 s 1.03
const_scatter / PartOpt / cuda / PreRev 0.000016927999999999998 s 0.000015744 s 1.08
const_scatter / PartOpt / cuda / PostRev 0.000016864 s 0.00001616 s 1.04
const_scatter / PartOpt / cuda / BothRev 0.000016351 s 0.000016031 s 1.02
const_scatter / IPartOpt / cuda / PreRev 0.000016416 s 0.000016063999999999997 s 1.02
const_scatter / IPartOpt / cuda / PostRev 0.00001648 s 0.00001664 s 0.99
const_scatter / IPartOpt / cuda / BothRev 0.000016544 s 0.000016255999999999998 s 1.02
const_scatter / DefOpt / cuda / PreRev 0.000017056 s 0.000016288 s 1.05
const_scatter / DefOpt / cuda / PostRev 0.000016768000000000003 s 0.000015935999999999998 s 1.05
const_scatter / DefOpt / cuda / BothRev 0.000016704 s 0.000015904000000000002 s 1.05
const_scatter / IDefOpt / cuda / PreRev 0.000016704 s 0.000015808 s 1.06
const_scatter / IDefOpt / cuda / PostRev 0.000016896000000000002 s 0.00001712 s 0.99
const_scatter / IDefOpt / cuda / BothRev 0.000016672 s 0.000015935999999999998 s 1.05
const_scatter / JaXPipe / tpu / Primal 0.00000380255 s 0.000003813725 s 1.00
const_scatter / Jax / tpu / Primal 0.00000380495 s 0.000003810925 s 1.00
const_scatter / HLOOpt / tpu / Primal 0.00000380265 s 0.000003826475 s 0.99
const_scatter / PartOpt / tpu / Primal 0.0000038142 s 0.000003823775 s 1.00
const_scatter / IPartOpt / tpu / Primal 0.000003787525 s 0.000003800625 s 1.00
const_scatter / DefOpt / tpu / Primal 0.000003814275 s 0.0000038115 s 1.00
const_scatter / IDefOpt / tpu / Primal 0.00000380185 s 0.000003787725 s 1.00
const_scatter / JaXPipe / tpu / Forward 0.00000645125 s 0.000006466275 s 1.00
const_scatter / Jax / tpu / Forward 0.00000648315 s 0.0000065071 s 1.00
const_scatter / HLOOpt / tpu / Forward 0.0000064634 s 0.000006460749999999999 s 1.00
const_scatter / PartOpt / tpu / Forward 0.00000648155 s 0.000006514025 s 1.00
const_scatter / IPartOpt / tpu / Forward 0.0000064754 s 0.00000643815 s 1.01
const_scatter / DefOpt / tpu / Forward 0.000006464299999999999 s 0.00000649575 s 1.00
const_scatter / IDefOpt / tpu / Forward 0.00000645075 s 0.000006461475 s 1.00
const_scatter / JaXPipe / tpu / PreRev 0.0000066127 s 0.00000667555 s 0.99
const_scatter / JaXPipe / tpu / PostRev 0.000006620374999999999 s 0.000006664774999999999 s 0.99
const_scatter / JaXPipe / tpu / BothRev 0.00000659845 s 0.0000066576250000000005 s 0.99
const_scatter / Jax / tpu / BothRev 0.000006601825 s 0.0000066598 s 0.99
const_scatter / HLOOpt / tpu / PreRev 0.0000066093 s 0.0000066682 s 0.99
const_scatter / HLOOpt / tpu / PostRev 0.00000660655 s 0.000006652350000000001 s 0.99
const_scatter / HLOOpt / tpu / BothRev 0.0000066264 s 0.000006684425000000001 s 0.99
const_scatter / PartOpt / tpu / PreRev 0.000006608074999999999 s 0.000006659425 s 0.99
const_scatter / PartOpt / tpu / PostRev 0.000006594449999999999 s 0.0000066629 s 0.99
const_scatter / PartOpt / tpu / BothRev 0.00000662995 s 0.000006664474999999999 s 0.99
const_scatter / IPartOpt / tpu / PreRev 0.000006589424999999999 s 0.000006663175 s 0.99
const_scatter / IPartOpt / tpu / PostRev 0.000006604125 s 0.000006668525 s 0.99
const_scatter / IPartOpt / tpu / BothRev 0.0000065919500000000005 s 0.00000667465 s 0.99
const_scatter / DefOpt / tpu / PreRev 0.00000662365 s 0.000006678150000000001 s 0.99
const_scatter / DefOpt / tpu / PostRev 0.0000066049 s 0.0000066493 s 0.99
const_scatter / DefOpt / tpu / BothRev 0.0000066047 s 0.000006677975 s 0.99
const_scatter / IDefOpt / tpu / PreRev 0.000006596575 s 0.000006650975 s 0.99
const_scatter / IDefOpt / tpu / PostRev 0.000006620825 s 0.000006669874999999999 s 0.99
const_scatter / IDefOpt / tpu / BothRev 0.000006612 s 0.00000665875 s 0.99
const_scatter / JaXPipe / cpu / Primal 0.000016258000000000003 s 0.000006414199997379911 s 2.53
const_scatter / Jax / cpu / Primal 0.000015996 s 0.000006381759994837921 s 2.51
const_scatter / HLOOpt / cpu / Primal 0.000016801 s 0.000007092419973560027 s 2.37
const_scatter / PartOpt / cpu / Primal 0.00001609 s 0.000006388200008586864 s 2.52
const_scatter / IPartOpt / cpu / Primal 0.00001604 s 0.000007287040007213363 s 2.20
const_scatter / DefOpt / cpu / Primal 0.000017035999999999997 s 0.000006763159999536583 s 2.52
const_scatter / IDefOpt / cpu / Primal 0.000017076 s 0.000007052119999571005 s 2.42
const_scatter / JaXPipe / cpu / Forward 0.000022700000000000003 s 0.000010790360001919908 s 2.10
const_scatter / Jax / cpu / Forward 0.00002104 s 0.000009555260021443246 s 2.20
const_scatter / HLOOpt / cpu / Forward 0.000022671 s 0.000011073819969169565 s 2.05
const_scatter / PartOpt / cpu / Forward 0.000022578 s 0.000010634000009304144 s 2.12
const_scatter / IPartOpt / cpu / Forward 0.000022519 s 0.000010531140032981057 s 2.14
const_scatter / DefOpt / cpu / Forward 0.000022517 s 0.000010720419995777774 s 2.10
const_scatter / IDefOpt / cpu / Forward 0.000022841 s 0.000010679459992388729 s 2.14
const_scatter / JaXPipe / cpu / PreRev 0.0005289009999999 s 0.0002888223200261 s 1.83
const_scatter / JaXPipe / cpu / PostRev 0.000528158 s 0.0002839717799724 s 1.86
const_scatter / JaXPipe / cpu / BothRev 0.000534416 s 0.0002854160999595 s 1.87
const_scatter / Jax / cpu / BothRev 0.000535907 s 0.0002842025400059 s 1.89
const_scatter / HLOOpt / cpu / PreRev 0.000547216 s 0.0002855643800376 s 1.92
const_scatter / HLOOpt / cpu / PostRev 0.000531717 s 0.0002889120800318 s 1.84
const_scatter / HLOOpt / cpu / BothRev 0.000538897 s 0.0002857153400054 s 1.89
const_scatter / PartOpt / cpu / PreRev 0.000546002 s 0.000283256499988 s 1.93
const_scatter / PartOpt / cpu / PostRev 0.000538957 s 0.0002831990800314 s 1.90
const_scatter / PartOpt / cpu / BothRev 0.00055507 s 0.0002844155800357 s 1.95
const_scatter / IPartOpt / cpu / PreRev 0.000530415 s 0.0002835344200138 s 1.87
const_scatter / IPartOpt / cpu / PostRev 0.0005393409999999 s 0.0002824978799799 s 1.91
const_scatter / IPartOpt / cpu / BothRev 0.00053379 s 0.0002845528399939 s 1.88
const_scatter / DefOpt / cpu / PreRev 0.000542865 s 0.0002848829400136 s 1.91
const_scatter / DefOpt / cpu / PostRev 0.000531501 s 0.0002848792600161 s 1.87
const_scatter / DefOpt / cpu / BothRev 0.000533267 s 0.0002847787200244 s 1.87
const_scatter / IDefOpt / cpu / PreRev 0.000526363 s 0.0002828045599926 s 1.86
const_scatter / IDefOpt / cpu / PostRev 0.000543177 s 0.0002841483999873 s 1.91
const_scatter / IDefOpt / cpu / BothRev 0.00053693 s 0.0002834731599705 s 1.89
GenDot / JaXPipe / cpu / Primal 0.00000785711996286409 s 0.000007298380041902419 s 1.08
GenDot / Jax / cpu / Primal 0.000007979060001161997 s 0.000007048700008454034 s 1.13
GenDot / HLOOpt / cpu / Primal 0.000008108880037980271 s 0.00000794292001046415 s 1.02
GenDot / PartOpt / cpu / Primal 0.000007248740021168487 s 0.000006769920055376133 s 1.07
GenDot / IPartOpt / cpu / Primal 0.000007814359969415819 s 0.000007515740007875138 s 1.04
GenDot / DefOpt / cpu / Primal 0.00000745851998544822 s 0.000007409619984173332 s 1.01
GenDot / IDefOpt / cpu / Primal 0.000007287919979717117 s 0.000007622040011483477 s 0.96
GenDot / JaXPipe / cpu / Forward 0.000011590379999688594 s 0.000011569240014068782 s 1.00
GenDot / Jax / cpu / Forward 0.000010831540021172258 s 0.000009718079991216654 s 1.11
GenDot / HLOOpt / cpu / Forward 0.000011714159973053028 s 0.000011745740011974704 s 1.00
GenDot / PartOpt / cpu / Forward 0.00001143725995461864 s 0.000011508360021252884 s 0.99
GenDot / IPartOpt / cpu / Forward 0.00001174454007013992 s 0.000011128580026706914 s 1.06
GenDot / DefOpt / cpu / Forward 0.000011835579980470355 s 0.000010798399998748209 s 1.10
GenDot / IDefOpt / cpu / Forward 0.00001138744002673775 s 0.00001077956000699487 s 1.06
GenDot / JaXPipe / cpu / PreRev 0.00001151437999396876 s 0.000010790999995151652 s 1.07
GenDot / JaXPipe / cpu / PostRev 0.000010849239997696711 s 0.000010381940037405 s 1.05
GenDot / JaXPipe / cpu / BothRev 0.000011640240018095938 s 0.000011352839983373995 s 1.03
GenDot / Jax / cpu / BothRev 0.00001215170002069499 s 0.000011042040014217492 s 1.10
GenDot / HLOOpt / cpu / PreRev 0.000011743560035029076 s 0.000011735859989130403 s 1.00
GenDot / HLOOpt / cpu / PostRev 0.00001364941998872382 s 0.000013340659997993496 s 1.02
GenDot / HLOOpt / cpu / BothRev 0.000012027779966956586 s 0.000010679819979486638 s 1.13
GenDot / PartOpt / cpu / PreRev 0.000011477460011519725 s 0.000010874159997911192 s 1.06
GenDot / PartOpt / cpu / PostRev 0.00001051305997862073 s 0.000011166739977852555 s 0.94
GenDot / PartOpt / cpu / BothRev 0.000012005600037809929 s 0.000011426359942561248 s 1.05
GenDot / IPartOpt / cpu / PreRev 0.000011795319987868423 s 0.000010923079989879626 s 1.08
GenDot / IPartOpt / cpu / PostRev 0.000010875979996853858 s 0.000010891519996221178 s 1.00
GenDot / IPartOpt / cpu / BothRev 0.000011626440045802157 s 0.000011160020021634409 s 1.04
GenDot / DefOpt / cpu / PreRev 0.00001203732000249147 s 0.000011140939977849483 s 1.08
GenDot / DefOpt / cpu / PostRev 0.000011123419972136616 s 0.000011533419992701966 s 0.96
GenDot / DefOpt / cpu / BothRev 0.000011107980044471332 s 0.000010761060020740842 s 1.03
GenDot / IDefOpt / cpu / PreRev 0.000011620680006672044 s 0.000010950339974442614 s 1.06
GenDot / IDefOpt / cpu / PostRev 0.00001194121996377362 s 0.00001149753998106462 s 1.04
GenDot / IDefOpt / cpu / BothRev 0.000011630720027824282 s 0.000010877040003833827 s 1.07
GenDot / JaXPipe / cuda / Primal 0.000002496 s 0.000002016 s 1.24
GenDot / Jax / cuda / Primal 0.000002496 s 0.000002015 s 1.24
GenDot / HLOOpt / cuda / Primal 0.000002495 s 0.000001984 s 1.26
GenDot / PartOpt / cuda / Primal 0.000002496 s 0.000002016 s 1.24
GenDot / IPartOpt / cuda / Primal 0.000002527 s 0.000002016 s 1.25
GenDot / DefOpt / cuda / Primal 0.000002495 s 0.000002015 s 1.24
GenDot / IDefOpt / cuda / Primal 0.000002495 s 0.000002015 s 1.24
GenDot / JaXPipe / cuda / Forward 0.000010496 s 0.000009409 s 1.12
GenDot / Jax / cuda / Forward 0.000010624 s 0.000009728 s 1.09
GenDot / HLOOpt / cuda / Forward 0.000010624 s 0.000009696 s 1.10
GenDot / PartOpt / cuda / Forward 0.000010528 s 0.000009792 s 1.08
GenDot / IPartOpt / cuda / Forward 0.000010624 s 0.000009024 s 1.18
GenDot / DefOpt / cuda / Forward 0.00001056 s 0.000009984 s 1.06
GenDot / IDefOpt / cuda / Forward 0.000010336 s 0.000009855 s 1.05
GenDot / JaXPipe / cuda / PreRev 0.00001056 s 0.000008703000000000001 s 1.21
GenDot / JaXPipe / cuda / PostRev 0.000010464 s 0.000009856 s 1.06
GenDot / JaXPipe / cuda / BothRev 0.000010656 s 0.000009536 s 1.12
GenDot / Jax / cuda / BothRev 0.000010656 s 0.000009312000000000002 s 1.14
GenDot / HLOOpt / cuda / PreRev 0.000010752 s 0.000009504 s 1.13
GenDot / HLOOpt / cuda / PostRev 0.000010528 s 0.000014752 s 0.71
GenDot / HLOOpt / cuda / BothRev 0.000010816 s 0.00001104 s 0.98
GenDot / PartOpt / cuda / PreRev 0.00001088 s 0.000010912 s 1.00
GenDot / PartOpt / cuda / PostRev 0.000010592 s 0.000011104 s 0.95
GenDot / PartOpt / cuda / BothRev 0.000010464 s 0.000011104 s 0.94
GenDot / IPartOpt / cuda / PreRev 0.000010336 s 0.000010784 s 0.96
GenDot / IPartOpt / cuda / PostRev 0.000010592 s 0.000011072 s 0.96
GenDot / IPartOpt / cuda / BothRev 0.000010624 s 0.00000992 s 1.07
GenDot / DefOpt / cuda / PreRev 0.000010688 s 0.000009856 s 1.08
GenDot / DefOpt / cuda / PostRev 0.000010687 s 0.000009568 s 1.12
GenDot / DefOpt / cuda / BothRev 0.000010592 s 0.000009856 s 1.07
GenDot / IDefOpt / cuda / PreRev 0.000010496 s 0.000009856 s 1.06
GenDot / IDefOpt / cuda / PostRev 0.000010624 s 0.000009536 s 1.11
GenDot / IDefOpt / cuda / BothRev 0.00001056 s 0.000009632 s 1.10
GenDot / JaXPipe / tpu / Primal 9.30175e-7 s 9.29875e-7 s 1.00
GenDot / Jax / tpu / Primal 9.2525e-7 s 9.2665e-7 s 1.00
GenDot / HLOOpt / tpu / Primal 0.000001571525 s 0.00000158335 s 0.99
GenDot / PartOpt / tpu / Primal 9.261e-7 s 9.2605e-7 s 1.00
GenDot / IPartOpt / tpu / Primal 9.30125e-7 s 9.30725e-7 s 1.00
GenDot / DefOpt / tpu / Primal 0.0000014832999999999998 s 0.0000015009749999999998 s 0.99
GenDot / IDefOpt / tpu / Primal 0.0000015776499999999998 s 0.000001590625 s 0.99
GenDot / JaXPipe / tpu / Forward 0.00000316455 s 0.00000317515 s 1.00
GenDot / Jax / tpu / Forward 0.00000232115 s 0.00000233035 s 1.00
GenDot / HLOOpt / tpu / Forward 0.00000310585 s 0.0000031335 s 0.99
GenDot / PartOpt / tpu / Forward 0.0000032151000000000004 s 0.000003231625 s 0.99
GenDot / IPartOpt / tpu / Forward 0.0000031104500000000004 s 0.0000031330000000000003 s 0.99
GenDot / DefOpt / tpu / Forward 0.00000321245 s 0.0000032337 s 0.99
GenDot / IDefOpt / tpu / Forward 0.000003118575 s 0.0000031358 s 0.99
GenDot / JaXPipe / tpu / PreRev 0.000002957375 s 0.000002985475 s 0.99
GenDot / JaXPipe / tpu / PostRev 0.000002415725 s 0.000002399875 s 1.01
GenDot / JaXPipe / tpu / BothRev 0.00000294695 s 0.000002993825 s 0.98
GenDot / Jax / tpu / BothRev 0.000002412525 s 0.00000239935 s 1.01
GenDot / HLOOpt / tpu / PreRev 0.00000295365 s 0.0000029822 s 0.99
GenDot / HLOOpt / tpu / PostRev 0.0000029358250000000004 s 0.0000029412 s 1.00
GenDot / HLOOpt / tpu / BothRev 0.0000029637750000000003 s 0.000003004175 s 0.99
GenDot / PartOpt / tpu / PreRev 0.0000029361 s 0.000002957425 s 0.99
GenDot / PartOpt / tpu / PostRev 0.0000023942 s 0.000002396275 s 1.00
GenDot / PartOpt / tpu / BothRev 0.0000029226500000000003 s 0.000002944225 s 0.99
GenDot / IPartOpt / tpu / PreRev 0.00000295645 s 0.000002986025 s 0.99
GenDot / IPartOpt / tpu / PostRev 0.000002407825 s 0.000002399275 s 1.00
GenDot / IPartOpt / tpu / BothRev 0.0000029629 s 0.0000029928 s 0.99
GenDot / DefOpt / tpu / PreRev 0.000002928875 s 0.00000293195 s 1.00
GenDot / DefOpt / tpu / PostRev 0.0000029557 s 0.000002990275 s 0.99
GenDot / DefOpt / tpu / BothRev 0.000002927825 s 0.00000293015 s 1.00
GenDot / IDefOpt / tpu / PreRev 0.0000029506 s 0.000002993225 s 0.99
GenDot / IDefOpt / tpu / PostRev 0.0000029288 s 0.000002935 s 1.00
GenDot / IDefOpt / tpu / BothRev 0.000002955075 s 0.0000029784500000000003 s 0.99
GenDot / JaXPipe / cpu / Primal 0.000018780000000000003 s 0.000007298380041902419 s 2.57
GenDot / Jax / cpu / Primal 0.000018568 s 0.000007048700008454034 s 2.63
GenDot / HLOOpt / cpu / Primal 0.000017395999999999997 s 0.00000794292001046415 s 2.19
GenDot / PartOpt / cpu / Primal 0.000018348 s 0.000006769920055376133 s 2.71
GenDot / IPartOpt / cpu / Primal 0.00001866 s 0.000007515740007875138 s 2.48
GenDot / DefOpt / cpu / Primal 0.000017811 s 0.000007409619984173332 s 2.40
GenDot / IDefOpt / cpu / Primal 0.000017219000000000002 s 0.000007622040011483477 s 2.26
GenDot / JaXPipe / cpu / Forward 0.000024197 s 0.000011569240014068782 s 2.09
GenDot / Jax / cpu / Forward 0.00002535 s 0.000009718079991216654 s 2.61
GenDot / HLOOpt / cpu / Forward 0.000023621 s 0.000011745740011974704 s 2.01
GenDot / PartOpt / cpu / Forward 0.000023907 s 0.000011508360021252884 s 2.08
GenDot / IPartOpt / cpu / Forward 0.000023977 s 0.000011128580026706914 s 2.15
GenDot / DefOpt / cpu / Forward 0.000023875 s 0.000010798399998748209 s 2.21
GenDot / IDefOpt / cpu / Forward 0.000024121 s 0.00001077956000699487 s 2.24
GenDot / JaXPipe / cpu / PreRev 0.000024632 s 0.000010790999995151652 s 2.28
GenDot / JaXPipe / cpu / PostRev 0.000025074 s 0.000010381940037405 s 2.42
GenDot / JaXPipe / cpu / BothRev 0.000023867 s 0.000011352839983373995 s 2.10
GenDot / Jax / cpu / BothRev 0.000025014 s 0.000011042040014217492 s 2.27
GenDot / HLOOpt / cpu / PreRev 0.00002406 s 0.000011735859989130403 s 2.05
GenDot / HLOOpt / cpu / PostRev 0.000025006 s 0.000013340659997993496 s 1.87
GenDot / HLOOpt / cpu / BothRev 0.00002395 s 0.000010679819979486638 s 2.24
GenDot / PartOpt / cpu / PreRev 0.000024535 s 0.000010874159997911192 s 2.26
GenDot / PartOpt / cpu / PostRev 0.00002498 s 0.000011166739977852555 s 2.24
GenDot / PartOpt / cpu / BothRev 0.000024243 s 0.000011426359942561248 s 2.12
GenDot / IPartOpt / cpu / PreRev 0.000024087 s 0.000010923079989879626 s 2.21
GenDot / IPartOpt / cpu / PostRev 0.00002538 s 0.000010891519996221178 s 2.33
GenDot / IPartOpt / cpu / BothRev 0.000024242 s 0.000011160020021634409 s 2.17
GenDot / DefOpt / cpu / PreRev 0.000024062 s 0.000011140939977849483 s 2.16
GenDot / DefOpt / cpu / PostRev 0.000024387 s 0.000011533419992701966 s 2.11
GenDot / DefOpt / cpu / BothRev 0.000024593 s 0.000010761060020740842 s 2.29
GenDot / IDefOpt / cpu / PreRev 0.00002408 s 0.000010950339974442614 s 2.20
GenDot / IDefOpt / cpu / PostRev 0.000024644 s 0.00001149753998106462 s 2.14
GenDot / IDefOpt / cpu / BothRev 0.000024445 s 0.000010877040003833827 s 2.25
hlo_ffi / JaXPipe / cpu / Primal 0.000010300519998054367 s 0.000010284639974997845 s 1.00
hlo_ffi / Jax / cpu / Primal 0.000010605800007397192 s 0.000009667560016168864 s 1.10
hlo_ffi / HLOOpt / cpu / Primal 0.000010277780011165306 s 0.000010284979998687047 s 1.00
hlo_ffi / PartOpt / cpu / Primal 0.0000097367600119469 s 0.00000951727997744456 s 1.02
hlo_ffi / IPartOpt / cpu / Primal 0.000010110559978784296 s 0.000010334960024920293 s 0.98
hlo_ffi / DefOpt / cpu / Primal 0.000010027260004790151 s 0.000010058319985546404 s 1.00
hlo_ffi / IDefOpt / cpu / Primal 0.000009868300021480535 s 0.000009962120047930512 s 0.99
hlo_ffi / JaXPipe / cpu / Forward 0.000014289099981397158 s 0.000014187380020302954 s 1.01
hlo_ffi / Jax / cpu / Forward 0.000014404420026039589 s 0.000013873439984308789 s 1.04
hlo_ffi / HLOOpt / cpu / Forward 0.00001463523998609162 s 0.00001400121997903625 s 1.05
hlo_ffi / PartOpt / cpu / Forward 0.000014507260029859026 s 0.00001432539997040294 s 1.01
hlo_ffi / IPartOpt / cpu / Forward 0.000014647200068793608 s 0.000014166840010148008 s 1.03
hlo_ffi / DefOpt / cpu / Forward 0.000014413099961529952 s 0.000014209059972927207 s 1.01
hlo_ffi / IDefOpt / cpu / Forward 0.000014777579990550291 s 0.000014338580022013049 s 1.03
hlo_ffi / JaXPipe / cpu / PreRev 0.00001507004002633039 s 0.000014636240020990954 s 1.03
hlo_ffi / JaXPipe / cpu / PostRev 0.000014439260021390507 s 0.000014110460006122592 s 1.02
hlo_ffi / JaXPipe / cpu / BothRev 0.000014131999996607192 s 0.00001390391998029372 s 1.02
hlo_ffi / Jax / cpu / BothRev 0.00001473306000661978 s 0.000014062179998290958 s 1.05
hlo_ffi / HLOOpt / cpu / PreRev 0.00001478114003475639 s 0.000014028819978193496 s 1.05
hlo_ffi / HLOOpt / cpu / PostRev 0.000016831040011311415 s 0.00001594318003299122 s 1.06
hlo_ffi / HLOOpt / cpu / BothRev 0.000014247620019887107 s 0.000014413599992622038 s 0.99
hlo_ffi / PartOpt / cpu / PreRev 0.000014889680014675833 s 0.00001416582001183997 s 1.05
hlo_ffi / PartOpt / cpu / PostRev 0.000014285999986896058 s 0.00001393637999171915 s 1.03
hlo_ffi / PartOpt / cpu / BothRev 0.00001478552000662603 s 0.0000143006199868978 s 1.03
hlo_ffi / IPartOpt / cpu / PreRev 0.00001464822000343702 s 0.000013980779976918712 s 1.05
hlo_ffi / IPartOpt / cpu / PostRev 0.000014256760023272363 s 0.000014213119957275922 s 1.00
hlo_ffi / IPartOpt / cpu / BothRev 0.000014587660007236992 s 0.000014170980020935531 s 1.03
hlo_ffi / DefOpt / cpu / PreRev 0.000015153680014918792 s 0.000014085239999985788 s 1.08
hlo_ffi / DefOpt / cpu / PostRev 0.000014386819957508124 s 0.000014310660035334876 s 1.01
hlo_ffi / DefOpt / cpu / BothRev 0.00001439107999431144 s 0.000013982019991090056 s 1.03
hlo_ffi / IDefOpt / cpu / PreRev 0.0000153047399908246 s 0.000014316020051410303 s 1.07
hlo_ffi / IDefOpt / cpu / PostRev 0.000014281279982242268 s 0.000014057119979042908 s 1.02
hlo_ffi / IDefOpt / cpu / BothRev 0.000014564420007445732 s 0.000014209260025381808 s 1.02
hlo_ffi / JaXPipe / cuda / Primal 0.000002336 s 0.000001983 s 1.18
hlo_ffi / Jax / cuda / Primal 0.000002336 s 0.000001983 s 1.18
hlo_ffi / HLOOpt / cuda / Primal 0.000002335 s 0.000001984 s 1.18
hlo_ffi / PartOpt / cuda / Primal 0.000002336 s 0.000001983 s 1.18
hlo_ffi / IPartOpt / cuda / Primal 0.000002336 s 0.000001984 s 1.18
hlo_ffi / DefOpt / cuda / Primal 0.000002336 s 0.000001983 s 1.18
hlo_ffi / IDefOpt / cuda / Primal 0.000002336 s 0.000001983 s 1.18
hlo_ffi / JaXPipe / cuda / Forward 0.000002431 s 0.000002049 s 1.19
hlo_ffi / Jax / cuda / Forward 0.000002431 s 0.00000208 s 1.17
hlo_ffi / HLOOpt / cuda / Forward 0.000002431 s 0.00000208 s 1.17
hlo_ffi / PartOpt / cuda / Forward 0.000002432 s 0.000002048 s 1.19
hlo_ffi / IPartOpt / cuda / Forward 0.000002431 s 0.00000208 s 1.17
hlo_ffi / DefOpt / cuda / Forward 0.000002431 s 0.00000208 s 1.17
hlo_ffi / IDefOpt / cuda / Forward 0.000002431 s 0.000002048 s 1.19
hlo_ffi / JaXPipe / cuda / PreRev 0.0000024 s 0.000002047 s 1.17
hlo_ffi / JaXPipe / cuda / PostRev 0.000002431 s 0.000002048 s 1.19
hlo_ffi / JaXPipe / cuda / BothRev 0.000002431 s 0.000002048 s 1.19
hlo_ffi / Jax / cuda / BothRev 0.0000024 s 0.000002048 s 1.17
hlo_ffi / HLOOpt / cuda / PreRev 0.0000024 s 0.000002048 s 1.17
hlo_ffi / HLOOpt / cuda / PostRev 0.000002431 s 0.000002048 s 1.19
hlo_ffi / HLOOpt / cuda / BothRev 0.0000024 s 0.000002047 s 1.17
hlo_ffi / PartOpt / cuda / PreRev 0.0000024 s 0.000002048 s 1.17
hlo_ffi / PartOpt / cuda / PostRev 0.0000024 s 0.000002048 s 1.17
hlo_ffi / PartOpt / cuda / BothRev 0.000002431 s 0.000002047 s 1.19
hlo_ffi / IPartOpt / cuda / PreRev 0.000002431 s 0.000002047 s 1.19
hlo_ffi / IPartOpt / cuda / PostRev 0.0000024 s 0.000002048 s 1.17
hlo_ffi / IPartOpt / cuda / BothRev 0.000002431 s 0.000002047 s 1.19
hlo_ffi / DefOpt / cuda / PreRev 0.0000024 s 0.000002047 s 1.17
hlo_ffi / DefOpt / cuda / PostRev 0.000002399 s 0.000002048 s 1.17
hlo_ffi / DefOpt / cuda / BothRev 0.000002431 s 0.000002047 s 1.19
hlo_ffi / IDefOpt / cuda / PreRev 0.0000024 s 0.000002048 s 1.17
hlo_ffi / IDefOpt / cuda / PostRev 0.0000024 s 0.000002048 s 1.17
hlo_ffi / IDefOpt / cuda / BothRev 0.0000024 s 0.000002047 s 1.17
hlo_ffi / JaXPipe / tpu / Primal 9.27275e-7 s 9.186e-7 s 1.01
hlo_ffi / Jax / tpu / Primal 9.519e-7 s 9.53225e-7 s 1.00
hlo_ffi / HLOOpt / tpu / Primal 9.074e-7 s 8.98e-7 s 1.01
hlo_ffi / PartOpt / tpu / Primal 9.55275e-7 s 9.543e-7 s 1.00
hlo_ffi / IPartOpt / tpu / Primal 9.19475e-7 s 8.99475e-7 s 1.02
hlo_ffi / DefOpt / tpu / Primal 9.5195e-7 s 9.518e-7 s 1.00
hlo_ffi / IDefOpt / tpu / Primal 9.0965e-7 s 8.9985e-7 s 1.01
hlo_ffi / JaXPipe / tpu / Forward 9.49775e-7 s 9.4875e-7 s 1.00
hlo_ffi / Jax / tpu / Forward 9.81225e-7 s 9.810750000000002e-7 s 1.00
hlo_ffi / HLOOpt / tpu / Forward 9.74325e-7 s 9.737250000000002e-7 s 1.00
hlo_ffi / PartOpt / tpu / Forward 9.3465e-7 s 9.3345e-7 s 1.00
hlo_ffi / IPartOpt / tpu / Forward 9.74925e-7 s 9.7375e-7 s 1.00
hlo_ffi / DefOpt / tpu / Forward 9.34775e-7 s 9.33125e-7 s 1.00
hlo_ffi / IDefOpt / tpu / Forward 9.75e-7 s 9.739e-7 s 1.00
hlo_ffi / JaXPipe / tpu / PreRev 9.38325e-7 s 9.315e-7 s 1.01
hlo_ffi / JaXPipe / tpu / PostRev 9.65875e-7 s 9.64925e-7 s 1.00
hlo_ffi / JaXPipe / tpu / BothRev 9.6265e-7 s 9.62e-7 s 1.00
hlo_ffi / Jax / tpu / BothRev 9.654e-7 s 9.64325e-7 s 1.00
hlo_ffi / HLOOpt / tpu / PreRev 9.626e-7 s 9.62075e-7 s 1.00
hlo_ffi / HLOOpt / tpu / PostRev 9.65125e-7 s 9.6465e-7 s 1.00
hlo_ffi / HLOOpt / tpu / BothRev 9.63225e-7 s 9.6245e-7 s 1.00
hlo_ffi / PartOpt / tpu / PreRev 9.65575e-7 s 9.64725e-7 s 1.00
hlo_ffi / PartOpt / tpu / PostRev 9.628499999999998e-7 s 9.616e-7 s 1.00
hlo_ffi / PartOpt / tpu / BothRev 9.65275e-7 s 9.6475e-7 s 1.00
hlo_ffi / IPartOpt / tpu / PreRev 9.62775e-7 s 9.62225e-7 s 1.00
hlo_ffi / IPartOpt / tpu / PostRev 9.6545e-7 s 9.649e-7 s 1.00
hlo_ffi / IPartOpt / tpu / BothRev 9.62425e-7 s 9.6195e-7 s 1.00
hlo_ffi / DefOpt / tpu / PreRev 9.652e-7 s 9.648e-7 s 1.00
hlo_ffi / DefOpt / tpu / PostRev 9.628499999999998e-7 s 9.618500000000002e-7 s 1.00
hlo_ffi / DefOpt / tpu / BothRev 9.655e-7 s 9.644e-7 s 1.00
hlo_ffi / IDefOpt / tpu / PreRev 9.625749999999998e-7 s 9.617e-7 s 1.00
hlo_ffi / IDefOpt / tpu / PostRev 9.65575e-7 s 9.64525e-7 s 1.00
hlo_ffi / IDefOpt / tpu / BothRev 9.62e-7 s 9.61725e-7 s 1.00
hlo_ffi / JaXPipe / cpu / Primal 0.000021912 s 0.000010284639974997845 s 2.13
hlo_ffi / Jax / cpu / Primal 0.000021297 s 0.000009667560016168864 s 2.20
hlo_ffi / HLOOpt / cpu / Primal 0.000021895 s 0.000010284979998687047 s 2.13
hlo_ffi / PartOpt / cpu / Primal 0.000021773 s 0.00000951727997744456 s 2.29
hlo_ffi / IPartOpt / cpu / Primal 0.000021533 s 0.000010334960024920293 s 2.08
hlo_ffi / DefOpt / cpu / Primal 0.000021824 s 0.000010058319985546404 s 2.17
hlo_ffi / IDefOpt / cpu / Primal 0.00002156 s 0.000009962120047930512 s 2.16
hlo_ffi / JaXPipe / cpu / Forward 0.000029767 s 0.000014187380020302954 s 2.10
hlo_ffi / Jax / cpu / Forward 0.000028821 s 0.000013873439984308789 s 2.08
hlo_ffi / HLOOpt / cpu / Forward 0.000029477 s 0.00001400121997903625 s 2.11
hlo_ffi / PartOpt / cpu / Forward 0.000029477 s 0.00001432539997040294 s 2.06
hlo_ffi / IPartOpt / cpu / Forward 0.000051235 s 0.000014166840010148008 s 3.62
hlo_ffi / DefOpt / cpu / Forward 0.000029381 s 0.000014209059972927207 s 2.07
hlo_ffi / IDefOpt / cpu / Forward 0.000029461 s 0.000014338580022013049 s 2.05
hlo_ffi / JaXPipe / cpu / PreRev 0.000029691 s 0.000014636240020990954 s 2.03
hlo_ffi / JaXPipe / cpu / PostRev 0.000029516 s 0.000014110460006122592 s 2.09
hlo_ffi / JaXPipe / cpu / BothRev 0.000029423000000000003 s 0.00001390391998029372 s 2.12
hlo_ffi / Jax / cpu / BothRev 0.000029443 s 0.000014062179998290958 s 2.09
hlo_ffi / HLOOpt / cpu / PreRev 0.000029658 s 0.000014028819978193496 s 2.11
hlo_ffi / HLOOpt / cpu / PostRev 0.000029211 s 0.00001594318003299122 s 1.83
hlo_ffi / HLOOpt / cpu / BothRev 0.000029152 s 0.000014413599992622038 s 2.02
hlo_ffi / PartOpt / cpu / PreRev 0.000030429 s 0.00001416582001183997 s 2.15
hlo_ffi / PartOpt / cpu / PostRev 0.000029354 s 0.00001393637999171915 s 2.11
hlo_ffi / PartOpt / cpu / BothRev 0.000029272 s 0.0000143006199868978 s 2.05
hlo_ffi / IPartOpt / cpu / PreRev 0.000029437000000000003 s 0.000013980779976918712 s 2.11
hlo_ffi / IPartOpt / cpu / PostRev 0.000029139 s 0.000014213119957275922 s 2.05
hlo_ffi / IPartOpt / cpu / BothRev 0.000029071 s 0.000014170980020935531 s 2.05
hlo_ffi / DefOpt / cpu / PreRev 0.000028776 s 0.000014085239999985788 s 2.04
hlo_ffi / DefOpt / cpu / PostRev 0.000028943 s 0.000014310660035334876 s 2.02
hlo_ffi / DefOpt / cpu / BothRev 0.000029463 s 0.000013982019991090056 s 2.11
hlo_ffi / IDefOpt / cpu / PreRev 0.000029611 s 0.000014316020051410303 s 2.07
hlo_ffi / IDefOpt / cpu / PostRev 0.000028978 s 0.000014057119979042908 s 2.06
hlo_ffi / IDefOpt / cpu / BothRev 0.000028856 s 0.000014209260025381808 s 2.03
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / JaXPipe / cpu / Primal 0.0010039214000244 s 0.0009100650000618 s 1.10
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / Jax / cpu / Primal 0.0009614745999897 s 0.0009172600000056 s 1.05
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / HLOOpt / cpu / Primal 0.0010102142000505 s 0.0010144106002371 s 1.00
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / PartOpt / cpu / Primal 0.000998028199956 s 0.0008878655999069 s 1.12
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IPartOpt / cpu / Primal 0.0009538141998746 s 0.0009339372000795 s 1.02
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / DefOpt / cpu / Primal 0.0009959525999875 s 0.0009609147998162 s 1.04
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IDefOpt / cpu / Primal 0.0010155622000638 s 0.0009962112000721 s 1.02
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / JaXPipe / cpu / Forward 0.0023812780001208 s 0.0022266109999691 s 1.07
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / Jax / cpu / Forward 0.0024514062001799 s 0.0023936325999784 s 1.02
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / HLOOpt / cpu / Forward 0.002346252999996 s 0.0021506333998331 s 1.09
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / PartOpt / cpu / Forward 0.0023489485999562 s 0.0023729827998067 s 0.99
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IPartOpt / cpu / Forward 0.0023375856000711 s 0.0022126754000964 s 1.06
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / DefOpt / cpu / Forward 0.0023471927999707 s 0.0022777125999709 s 1.03
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IDefOpt / cpu / Forward 0.0023751313999127 s 0.0021534280000196 s 1.10
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / JaXPipe / cpu / PreRev 0.00564026739994 s 0.0053284488000826 s 1.06
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / JaXPipe / cpu / PostRev 0.0062279716000375 s 0.0056835981999938 s 1.10
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / JaXPipe / cpu / BothRev 0.0068182679999154 s 0.0052150422000522 s 1.31
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / Jax / cpu / BothRev 0.0073328499999661 s 0.0060669299999062 s 1.21
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / HLOOpt / cpu / PreRev 0.0058033710000927 s 0.0037044362001324 s 1.57
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / HLOOpt / cpu / PostRev 0.0060785982000197 s 0.0064265009998962 s 0.95
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / HLOOpt / cpu / BothRev 0.0058111185998313 s 0.0037709973999881 s 1.54
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / PartOpt / cpu / PreRev 0.0059775396000986 s 0.0061171409999587 s 0.98
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / PartOpt / cpu / PostRev 0.0059417072001451 s 0.003930622000098 s 1.51
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / PartOpt / cpu / BothRev 0.0065524416000698 s 0.0062224390000665 s 1.05
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IPartOpt / cpu / PreRev 0.0055133678000856 s 0.0037930405999759 s 1.45
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IPartOpt / cpu / PostRev 0.0060521013999277 s 0.0063594317998649 s 0.95
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IPartOpt / cpu / BothRev 0.0063977804000387 s 0.0037310412000806 s 1.71
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / DefOpt / cpu / PreRev 0.0061178789999758 s 0.0063060106000193 s 0.97
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / DefOpt / cpu / PostRev 0.0049902105999535 s 0.0037435317999552 s 1.33
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / DefOpt / cpu / BothRev 0.0054361225999855 s 0.006533680999928 s 0.83
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IDefOpt / cpu / PreRev 0.0055036247999851 s 0.0037383431999842 s 1.47
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IDefOpt / cpu / PostRev 0.0055093387998567 s 0.0064990833999218 s 0.85
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IDefOpt / cpu / BothRev 0.005504897999981 s 0.0041281814000285 s 1.33
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / JaXPipe / cuda / Primal 0.00029491 s 0.000280289 s 1.05
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / Jax / cuda / Primal 0.000295199 s 0.000279648 s 1.06
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / HLOOpt / cuda / Primal 0.000301438 s 0.000287072 s 1.05
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / PartOpt / cuda / Primal 0.000294335 s 0.000278816 s 1.06
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IPartOpt / cuda / Primal 0.000294719 s 0.000279456 s 1.05
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / DefOpt / cuda / Primal 0.000302719 s 0.0002867199999999 s 1.06
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IDefOpt / cuda / Primal 0.000301823 s 0.000287648 s 1.05
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / JaXPipe / cuda / Forward 0.0005823009999999 s 0.0005568 s 1.05
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / Jax / cuda / Forward 0.000567806 s 0.000538881 s 1.05
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / HLOOpt / cuda / Forward 0.0005823009999999 s 0.000557632 s 1.04
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / PartOpt / cuda / Forward 0.000582493 s 0.000557569 s 1.04
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IPartOpt / cuda / Forward 0.0005827489999999 s 0.00055776 s 1.04
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / DefOpt / cuda / Forward 0.00058275 s 0.000557344 s 1.05
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IDefOpt / cuda / Forward 0.000582654 s 0.000558176 s 1.04
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / JaXPipe / cuda / PreRev 0.001055772 s 0.0010261119999999 s 1.03
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / JaXPipe / cuda / PostRev 0.001011708 s 0.00098528 s 1.03
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / JaXPipe / cuda / BothRev 0.001050588 s 0.001025441 s 1.02
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / Jax / cuda / BothRev 0.00100358 s 0.00098752 s 1.02
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / HLOOpt / cuda / PreRev 0.001036988 s 0.001013985 s 1.02
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / HLOOpt / cuda / PostRev 0.001059932 s 0.001037569 s 1.02
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / HLOOpt / cuda / BothRev 0.001036828 s 0.001011681 s 1.02
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / PartOpt / cuda / PreRev 0.0010510679999999 s 0.001027937 s 1.02
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / PartOpt / cuda / PostRev 0.000999292 s 0.000975713 s 1.02
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / PartOpt / cuda / BothRev 0.0010524759999999 s 0.001026113 s 1.03
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IPartOpt / cuda / PreRev 0.001051323 s 0.001026017 s 1.02
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IPartOpt / cuda / PostRev 0.000999612 s 0.000974785 s 1.03
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IPartOpt / cuda / BothRev 0.0010527 s 0.001026593 s 1.03
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / DefOpt / cuda / PreRev 0.001052444 s 0.001022561 s 1.03
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / DefOpt / cuda / PostRev 0.000986972 s 0.000960865 s 1.03
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / DefOpt / cuda / BothRev 0.001052444 s 0.001022273 s 1.03
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IDefOpt / cuda / PreRev 0.0010527 s 0.0010197449999999 s 1.03
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IDefOpt / cuda / PostRev 0.0010538829999999 s 0.001020033 s 1.03
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IDefOpt / cuda / BothRev 0.001053116 s 0.001020737 s 1.03
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / JaXPipe / tpu / Primal 0.00012902825 s 0.00012457525 s 1.04
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / Jax / tpu / Primal 0.000123856 s 0.000126331 s 0.98
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / HLOOpt / tpu / Primal 0.0001572454999999 s 0.00015273475 s 1.03
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / PartOpt / tpu / Primal 0.0001311532499999 s 0.00013435025 s 0.98
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IPartOpt / tpu / Primal 0.00013611 s 0.00013136425 s 1.04
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / DefOpt / tpu / Primal 0.0001448299999999 s 0.000147822 s 0.98
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IDefOpt / tpu / Primal 0.000156234 s 0.0001507285 s 1.04
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / JaXPipe / tpu / Forward 0.0002137427499999 s 0.00021200975 s 1.01
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / Jax / tpu / Forward 0.00026156525 s 0.00026122475 s 1.00
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / HLOOpt / tpu / Forward 0.0002200979999999 s 0.0002122135 s 1.04
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / PartOpt / tpu / Forward 0.0002134945 s 0.00021845075 s 0.98
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IPartOpt / tpu / Forward 0.0002163495 s 0.000212008 s 1.02
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / DefOpt / tpu / Forward 0.0002181427499999 s 0.0002185624999999 s 1.00
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IDefOpt / tpu / Forward 0.000216343 s 0.00021246625 s 1.02
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / JaXPipe / tpu / PreRev 0.00035519525 s 0.00035407075 s 1.00
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / JaXPipe / tpu / PostRev 0.000256345 s 0.0002561415 s 1.00
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / JaXPipe / tpu / BothRev 0.000356134 s 0.00035483825 s 1.00
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / Jax / tpu / BothRev 0.000257403 s 0.0002573317499999 s 1.00
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / HLOOpt / tpu / PreRev 0.00035591675 s 0.00035495725 s 1.00
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / HLOOpt / tpu / PostRev 0.0002921275 s 0.000291605 s 1.00
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / HLOOpt / tpu / BothRev 0.0003558854999999 s 0.0003548719999999 s 1.00
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / PartOpt / tpu / PreRev 0.0003564505 s 0.00035591275 s 1.00
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / PartOpt / tpu / PostRev 0.00027174825 s 0.0002716535 s 1.00
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / PartOpt / tpu / BothRev 0.0003562185 s 0.0003560215 s 1.00
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IPartOpt / tpu / PreRev 0.000356099 s 0.0003549675 s 1.00
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IPartOpt / tpu / PostRev 0.000272676 s 0.00027230025 s 1.00
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IPartOpt / tpu / BothRev 0.0003562935 s 0.00035493875 s 1.00
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / DefOpt / tpu / PreRev 0.0003590067499999 s 0.0003586415 s 1.00
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / DefOpt / tpu / PostRev 0.00028354525 s 0.00028358525 s 1.00
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / DefOpt / tpu / BothRev 0.0003587524999999 s 0.0003578824999999 s 1.00
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IDefOpt / tpu / PreRev 0.0003580875 s 0.00035715925 s 1.00
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IDefOpt / tpu / PostRev 0.0003016697499999 s 0.00030180325 s 1.00
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IDefOpt / tpu / BothRev 0.00035880775 s 0.0003571105 s 1.00
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / JaXPipe / cpu / Primal 0.002625761 s 0.0009100650000618 s 2.89
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / Jax / cpu / Primal 0.002773359 s 0.0009172600000056 s 3.02
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / HLOOpt / cpu / Primal 0.002904247 s 0.0010144106002371 s 2.86
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / PartOpt / cpu / Primal 0.00256553 s 0.0008878655999069 s 2.89
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IPartOpt / cpu / Primal 0.002778128 s 0.0009339372000795 s 2.97
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / DefOpt / cpu / Primal 0.002636032 s 0.0009609147998162 s 2.74
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IDefOpt / cpu / Primal 0.002799902 s 0.0009962112000721 s 2.81
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / JaXPipe / cpu / Forward 0.006817701 s 0.0022266109999691 s 3.06
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / Jax / cpu / Forward 0.0064618149999999 s 0.0023936325999784 s 2.70
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / HLOOpt / cpu / Forward 0.006902682 s 0.0021506333998331 s 3.21
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / PartOpt / cpu / Forward 0.006539366 s 0.0023729827998067 s 2.76
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IPartOpt / cpu / Forward 0.007164774 s 0.0022126754000964 s 3.24
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / DefOpt / cpu / Forward 0.007001921 s 0.0022777125999709 s 3.07
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IDefOpt / cpu / Forward 0.006407242 s 0.0021534280000196 s 2.98
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / JaXPipe / cpu / PreRev 0.011527896 s 0.0053284488000826 s 2.16
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / JaXPipe / cpu / PostRev 0.00996614 s 0.0056835981999938 s 1.75
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / JaXPipe / cpu / BothRev 0.010669916 s 0.0052150422000522 s 2.05
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / Jax / cpu / BothRev 0.010400828 s 0.0060669299999062 s 1.71
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / HLOOpt / cpu / PreRev 0.009659235 s 0.0037044362001324 s 2.61
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / HLOOpt / cpu / PostRev 0.010672925 s 0.0064265009998962 s 1.66
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / HLOOpt / cpu / BothRev 0.00973761 s 0.0037709973999881 s 2.58
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / PartOpt / cpu / PreRev 0.00932807 s 0.0061171409999587 s 1.52
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / PartOpt / cpu / PostRev 0.0097632049999999 s 0.003930622000098 s 2.48
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / PartOpt / cpu / BothRev 0.010051062 s 0.0062224390000665 s 1.62
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IPartOpt / cpu / PreRev 0.009343837 s 0.0037930405999759 s 2.46
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IPartOpt / cpu / PostRev 0.010767216 s 0.0063594317998649 s 1.69
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IPartOpt / cpu / BothRev 0.009076604 s 0.0037310412000806 s 2.43
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / DefOpt / cpu / PreRev 0.009829074 s 0.0063060106000193 s 1.56
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / DefOpt / cpu / PostRev 0.008634329 s 0.0037435317999552 s 2.31
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / DefOpt / cpu / BothRev 0.01011538 s 0.006533680999928 s 1.55
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IDefOpt / cpu / PreRev 0.009421947 s 0.0037383431999842 s 2.52
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IDefOpt / cpu / PostRev 0.009242628 s 0.0064990833999218 s 1.42
llama_dim_288_hidden_dim_768_n_layers_6_n_heads_6_n_kv_heads_6_vocab_size_32000_seq_len_256 / IDefOpt / cpu / BothRev 0.010162697 s 0.0041281814000285 s 2.46
scatter_sum / JaXPipe / cpu / Primal 0.00000891479998244904 s 0.000008388459973502904 s 1.06
scatter_sum / Jax / cpu / Primal 0.000008681560029799584 s 0.000007979300016813795 s 1.09
scatter_sum / HLOOpt / cpu / Primal 0.000008318920044985134 s 0.000008007539981917944 s 1.04
scatter_sum / PartOpt / cpu / Primal 0.000007885479981268872 s 0.000008130419992085081 s 0.97
scatter_sum / IPartOpt / cpu / Primal 0.000008869099983712659 s 0.000007912760020190035 s 1.12
scatter_sum / DefOpt / cpu / Primal 0.000007743220076008583 s 0.000008083239990810397 s 0.96
scatter_sum / IDefOpt / cpu / Primal 0.000008080520001385593 s 0.000007748680000077002 s 1.04
scatter_sum / JaXPipe / cpu / Forward 0.00001307714001086424 s 0.000011205040054846904 s 1.17
scatter_sum / Jax / cpu / Forward 0.000013185999996494502 s 0.000011392160013201649 s 1.16
scatter_sum / HLOOpt / cpu / Forward 0.00001344131996120268 s 0.000012204599997858169 s 1.10
scatter_sum / PartOpt / cpu / Forward 0.00001287555996896117 s 0.00001159305999863136 s 1.11
scatter_sum / IPartOpt / cpu / Forward 0.000012895499967271462 s 0.000012415900000632972 s 1.04
scatter_sum / DefOpt / cpu / Forward 0.00001320730000770709 s 0.000011770320006689872 s 1.12
scatter_sum / IDefOpt / cpu / Forward 0.000013759560006292305 s 0.000012068499972883727 s 1.14
scatter_sum / JaXPipe / cpu / PreRev 0.000012424539963831194 s 0.000011716140024873311 s 1.06
scatter_sum / JaXPipe / cpu / PostRev 0.000012827079990529457 s 0.000011641600021903288 s 1.10
scatter_sum / JaXPipe / cpu / BothRev 0.00001327384000433085 s 0.000011614340000960513 s 1.14
scatter_sum / Jax / cpu / BothRev 0.000012478819962780108 s 0.000012184240022179438 s 1.02
scatter_sum / HLOOpt / cpu / PreRev 0.00001311911996708659 s 0.000013060520022918356 s 1.00
scatter_sum / HLOOpt / cpu / PostRev 0.000015047379993120556 s 0.00001368204001664708 s 1.10
scatter_sum / HLOOpt / cpu / BothRev 0.000012374719999570515 s 0.000011856519995490089 s 1.04
scatter_sum / PartOpt / cpu / PreRev 0.000012537739967228846 s 0.000011116820005554472 s 1.13
scatter_sum / PartOpt / cpu / PostRev 0.000012782399981006163 s 0.00001186079993203748 s 1.08
scatter_sum / PartOpt / cpu / BothRev 0.000013314620009623468 s 0.000012269600038052886 s 1.09
scatter_sum / IPartOpt / cpu / PreRev 0.00001272332004191412 s 0.000011251180003455376 s 1.13
scatter_sum / IPartOpt / cpu / PostRev 0.0000127420800163236 s 0.000011199540022062138 s 1.14
scatter_sum / IPartOpt / cpu / BothRev 0.00001244659999429132 s 0.000011617919954005627 s 1.07
scatter_sum / DefOpt / cpu / PreRev 0.00001303895995079074 s 0.000011557839989109198 s 1.13
scatter_sum / DefOpt / cpu / PostRev 0.000012511300074038443 s 0.000011769799975809292 s 1.06
scatter_sum / DefOpt / cpu / BothRev 0.00001265794002392795 s 0.00001178735999019409 s 1.07
scatter_sum / IDefOpt / cpu / PreRev 0.000012090279988115073 s 0.000012042659973303673 s 1.00
scatter_sum / IDefOpt / cpu / PostRev 0.000012204719960209331 s 0.000011558019987205625 s 1.06
scatter_sum / IDefOpt / cpu / BothRev 0.00001252989997738041 s 0.00001182768003673118 s 1.06
scatter_sum / JaXPipe / cuda / Primal 0.000010464 s 0.000009344 s 1.12
scatter_sum / Jax / cuda / Primal 0.000010624 s 0.000009472 s 1.12
scatter_sum / HLOOpt / cuda / Primal 0.000010431 s 0.0000096 s 1.09
scatter_sum / PartOpt / cuda / Primal 0.000010496 s 0.000009727 s 1.08
scatter_sum / IPartOpt / cuda / Primal 0.000010687 s 0.000010175 s 1.05
scatter_sum / DefOpt / cuda / Primal 0.000010719 s 0.000009952 s 1.08
scatter_sum / IDefOpt / cuda / Primal 0.000010911 s 0.000009375 s 1.16
scatter_sum / JaXPipe / cuda / Forward 0.000016927000000000002 s 0.000016575 s 1.02
scatter_sum / Jax / cuda / Forward 0.000017216 s 0.0000168 s 1.02
scatter_sum / HLOOpt / cuda / Forward 0.000017216 s 0.000016864 s 1.02
scatter_sum / PartOpt / cuda / Forward 0.000017056 s 0.000017088 s 1.00
scatter_sum / IPartOpt / cuda / Forward 0.000018048 s 0.000016736 s 1.08
scatter_sum / DefOpt / cuda / Forward 0.000017344 s 0.000016735 s 1.04
scatter_sum / IDefOpt / cuda / Forward 0.000017344 s 0.000016864 s 1.03
scatter_sum / JaXPipe / cuda / PreRev 0.00001808 s 0.000016544 s 1.09
scatter_sum / JaXPipe / cuda / PostRev 0.00001712 s 0.0000168 s 1.02
scatter_sum / JaXPipe / cuda / BothRev 0.000017184 s 0.000016832 s 1.02
scatter_sum / Jax / cuda / BothRev 0.000017823 s 0.000016672 s 1.07
scatter_sum / HLOOpt / cuda / PreRev 0.000017152 s 0.000017216 s 1.00
scatter_sum / HLOOpt / cuda / PostRev 0.000016768000000000003 s 0.000016672 s 1.01
scatter_sum / HLOOpt / cuda / BothRev 0.000016704 s 0.000016096 s 1.04
scatter_sum / PartOpt / cuda / PreRev 0.000017344 s 0.000017024 s 1.02
scatter_sum / PartOpt / cuda / PostRev 0.000016864 s 0.000016255999999999998 s 1.04
scatter_sum / PartOpt / cuda / BothRev 0.000016895 s 0.000016992 s 0.99
scatter_sum / IPartOpt / cuda / PreRev 0.000017152 s 0.000017024 s 1.01
scatter_sum / IPartOpt / cuda / PostRev 0.000016927999999999998 s 0.000016063999999999997 s 1.05
scatter_sum / IPartOpt / cuda / BothRev 0.000017344 s 0.000016255999999999998 s 1.07
scatter_sum / DefOpt / cuda / PreRev 0.000017056 s 0.000016864 s 1.01
scatter_sum / DefOpt / cuda / PostRev 0.000017023 s 0.000016224 s 1.05
scatter_sum / DefOpt / cuda / BothRev 0.000017216 s 0.000016992 s 1.01
scatter_sum / IDefOpt / cuda / PreRev 0.000016512 s 0.00001712 s 0.96
scatter_sum / IDefOpt / cuda / PostRev 0.000017087 s 0.000016736 s 1.02
scatter_sum / IDefOpt / cuda / BothRev 0.000017056 s 0.000016768000000000003 s 1.02
scatter_sum / JaXPipe / tpu / Primal 0.000001350375 s 0.00000134455 s 1.00
scatter_sum / Jax / tpu / Primal 0.0000014045750000000002 s 0.0000014054 s 1.00
scatter_sum / HLOOpt / tpu / Primal 0.000001350675 s 0.000001344475 s 1.00
scatter_sum / PartOpt / tpu / Primal 0.0000014045 s 0.0000014051749999999998 s 1.00
scatter_sum / IPartOpt / tpu / Primal 0.00000135025 s 0.000001343975 s 1.00
scatter_sum / DefOpt / tpu / Primal 0.000001404225 s 0.0000014051 s 1.00
scatter_sum / IDefOpt / tpu / Primal 0.0000013498 s 0.00000134455 s 1.00
scatter_sum / JaXPipe / tpu / Forward 0.00000270605 s 0.0000027043 s 1.00
scatter_sum / Jax / tpu / Forward 0.000002726925 s 0.0000027198 s 1.00
scatter_sum / HLOOpt / tpu / Forward 0.000002704975 s 0.000002700425 s 1.00
scatter_sum / PartOpt / tpu / Forward 0.00000269205 s 0.000002684225 s 1.00
scatter_sum / IPartOpt / tpu / Forward 0.000002711975 s 0.000002698875 s 1.00
scatter_sum / DefOpt / tpu / Forward 0.0000026949 s 0.0000026813 s 1.01
scatter_sum / IDefOpt / tpu / Forward 0.0000027153 s 0.000002702775 s 1.00
scatter_sum / JaXPipe / tpu / PreRev 0.000002686125 s 0.000002679125 s 1.00
scatter_sum / JaXPipe / tpu / PostRev 0.0000026841 s 0.00000268105 s 1.00
scatter_sum / JaXPipe / tpu / BothRev 0.000002708375 s 0.00000270165 s 1.00
scatter_sum / Jax / tpu / BothRev 0.0000027438249999999995 s 0.000002743075 s 1.00
scatter_sum / HLOOpt / tpu / PreRev 0.0000027030749999999995 s 0.000002693675 s 1.00
scatter_sum / HLOOpt / tpu / PostRev 0.0000027426250000000003 s 0.0000027449 s 1.00
scatter_sum / HLOOpt / tpu / BothRev 0.000002703675 s 0.00000270395 s 1.00
scatter_sum / PartOpt / tpu / PreRev 0.000002751425 s 0.0000027547 s 1.00
scatter_sum / PartOpt / tpu / PostRev 0.000002704425 s 0.000002690425 s 1.01
scatter_sum / PartOpt / tpu / BothRev 0.000002741725 s 0.0000027442 s 1.00
scatter_sum / IPartOpt / tpu / PreRev 0.0000027050000000000004 s 0.000002699325 s 1.00
scatter_sum / IPartOpt / tpu / PostRev 0.000002745 s 0.0000027351249999999995 s 1.00
scatter_sum / IPartOpt / tpu / BothRev 0.0000027031 s 0.0000027066 s 1.00
scatter_sum / DefOpt / tpu / PreRev 0.0000027501750000000003 s 0.0000027368500000000003 s 1.00
scatter_sum / DefOpt / tpu / PostRev 0.00000270405 s 0.000002691025 s 1.00
scatter_sum / DefOpt / tpu / BothRev 0.000002743625 s 0.00000274225 s 1.00
scatter_sum / IDefOpt / tpu / PreRev 0.0000027037 s 0.000002698475 s 1.00
scatter_sum / IDefOpt / tpu / PostRev 0.000002742 s 0.0000027346 s 1.00
scatter_sum / IDefOpt / tpu / BothRev 0.000002708125 s 0.0000027007 s 1.00
scatter_sum / JaXPipe / cpu / Primal 0.000019616 s 0.000008388459973502904 s 2.34
scatter_sum / Jax / cpu / Primal 0.000019654 s 0.000007979300016813795 s 2.46
scatter_sum / HLOOpt / cpu / Primal 0.000019547 s 0.000008007539981917944 s 2.44
scatter_sum / PartOpt / cpu / Primal 0.000019908 s 0.000008130419992085081 s 2.45
scatter_sum / IPartOpt / cpu / Primal 0.000019405 s 0.000007912760020190035 s 2.45
scatter_sum / DefOpt / cpu / Primal 0.000019763 s 0.000008083239990810397 s 2.44
scatter_sum / IDefOpt / cpu / Primal 0.00001952 s 0.000007748680000077002 s 2.52
scatter_sum / JaXPipe / cpu / Forward 0.000028219 s 0.000011205040054846904 s 2.52
scatter_sum / Jax / cpu / Forward 0.000027913 s 0.000011392160013201649 s 2.45
scatter_sum / HLOOpt / cpu / Forward 0.000027952000000000003 s 0.000012204599997858169 s 2.29
scatter_sum / PartOpt / cpu / Forward 0.000027289 s 0.00001159305999863136 s 2.35
scatter_sum / IPartOpt / cpu / Forward 0.000028189 s 0.000012415900000632972 s 2.27
scatter_sum / DefOpt / cpu / Forward 0.000027962 s 0.000011770320006689872 s 2.38
scatter_sum / IDefOpt / cpu / Forward 0.000027716 s 0.000012068499972883727 s 2.30
scatter_sum / JaXPipe / cpu / PreRev 0.000028212 s 0.000011716140024873311 s 2.41
scatter_sum / JaXPipe / cpu / PostRev 0.000028202 s 0.000011641600021903288 s 2.42
scatter_sum / JaXPipe / cpu / BothRev 0.000028132 s 0.000011614340000960513 s 2.42
scatter_sum / Jax / cpu / BothRev 0.000028274 s 0.000012184240022179438 s 2.32
scatter_sum / HLOOpt / cpu / PreRev 0.00002799 s 0.000013060520022918356 s 2.14
scatter_sum / HLOOpt / cpu / PostRev 0.000027682 s 0.00001368204001664708 s 2.02
scatter_sum / HLOOpt / cpu / BothRev 0.000028427 s 0.000011856519995490089 s 2.40
scatter_sum / PartOpt / cpu / PreRev 0.000027622 s 0.000011116820005554472 s 2.48
scatter_sum / PartOpt / cpu / PostRev 0.000028005 s 0.00001186079993203748 s 2.36
scatter_sum / PartOpt / cpu / BothRev 0.000028604000000000003 s 0.000012269600038052886 s 2.33
scatter_sum / IPartOpt / cpu / PreRev 0.000028467 s 0.000011251180003455376 s 2.53
scatter_sum / IPartOpt / cpu / PostRev 0.000028642 s 0.000011199540022062138 s 2.56
scatter_sum / IPartOpt / cpu / BothRev 0.000027965 s 0.000011617919954005627 s 2.41
scatter_sum / DefOpt / cpu / PreRev 0.00002863 s 0.000011557839989109198 s 2.48
scatter_sum / DefOpt / cpu / PostRev 0.000027672 s 0.000011769799975809292 s 2.35
scatter_sum / DefOpt / cpu / BothRev 0.000028244 s 0.00001178735999019409 s 2.40
scatter_sum / IDefOpt / cpu / PreRev 0.000028516 s 0.000012042659973303673 s 2.37
scatter_sum / IDefOpt / cpu / PostRev 0.000028134 s 0.000011558019987205625 s 2.43
scatter_sum / IDefOpt / cpu / BothRev 0.000028133 s 0.00001182768003673118 s 2.38
slicing / JaXPipe / cpu / Primal 0.0000065482799709570825 s 0.000006638120012212312 s 0.99
slicing / Jax / cpu / Primal 0.000006330080004772754 s 0.000006729740034643328 s 0.94
slicing / HLOOpt / cpu / Primal 0.0000066157199671579294 s 0.00000629295998805901 s 1.05
slicing / PartOpt / cpu / Primal 0.000006220999966899399 s 0.000006045859981895773 s 1.03
slicing / IPartOpt / cpu / Primal 0.0000064770999961183404 s 0.000006430899966289871 s 1.01
slicing / DefOpt / cpu / Primal 0.000006425020010283333 s 0.000006045840000297176 s 1.06
slicing / IDefOpt / cpu / Primal 0.000006333220026135677 s 0.000006516900029964745 s 0.97
slicing / JaXPipe / cpu / Forward 0.000010439060006319776 s 0.00000963860000410932 s 1.08
slicing / Jax / cpu / Forward 0.00001004757999908179 s 0.0000093633799951931 s 1.07
slicing / HLOOpt / cpu / Forward 0.000010833739970621537 s 0.000009715839969430815 s 1.12
slicing / PartOpt / cpu / Forward 0.000010022279957411228 s 0.000009247120015061228 s 1.08
slicing / IPartOpt / cpu / Forward 0.000010619779968692456 s 0.000009682860036264171 s 1.10
slicing / DefOpt / cpu / Forward 0.000010489020014574635 s 0.00000980009998784226 s 1.07
slicing / IDefOpt / cpu / Forward 0.000010189799995714566 s 0.00000932759996430832 s 1.09
slicing / JaXPipe / cpu / PreRev 0.000011211060036657728 s 0.000010271939991071124 s 1.09
slicing / JaXPipe / cpu / PostRev 0.000010807120033859974 s 0.000011063819965784204 s 0.98
slicing / JaXPipe / cpu / BothRev 0.00001091166001970123 s 0.00001043989999743644 s 1.05
slicing / Jax / cpu / BothRev 0.000010656760023266543 s 0.000009840040020208107 s 1.08
slicing / HLOOpt / cpu / PreRev 0.000011326640005790978 s 0.000010213960003966348 s 1.11
slicing / HLOOpt / cpu / PostRev 0.000012954499998159009 s 0.00001261207993593416 s 1.03
slicing / HLOOpt / cpu / BothRev 0.000010528479997446991 s 0.000009878719984044436 s 1.07
slicing / PartOpt / cpu / PreRev 0.000010800219997690876 s 0.000010214859985353542 s 1.06
slicing / PartOpt / cpu / PostRev 0.00001103093997699034 s 0.000009956999983842252 s 1.11
slicing / PartOpt / cpu / BothRev 0.000011035020033887122 s 0.000010311199994248454 s 1.07
slicing / IPartOpt / cpu / PreRev 0.000010606920050122426 s 0.000009981699986383318 s 1.06
slicing / IPartOpt / cpu / PostRev 0.000010787960000016027 s 0.00001045855996380851 s 1.03
slicing / IPartOpt / cpu / BothRev 0.000010163059932892791 s 0.000009998220002671588 s 1.02
slicing / DefOpt / cpu / PreRev 0.000010366580008849269 s 0.000009611019986550672 s 1.08
slicing / DefOpt / cpu / PostRev 0.000010809200002768192 s 0.000009998039986385266 s 1.08
slicing / DefOpt / cpu / BothRev 0.000010585219988570317 s 0.00000963705998401565 s 1.10
slicing / IDefOpt / cpu / PreRev 0.00001079602006029745 s 0.000010339760019633103 s 1.04
slicing / IDefOpt / cpu / PostRev 0.000010678720036594312 s 0.000009949260011126173 s 1.07
slicing / IDefOpt / cpu / BothRev 0.000010291880025761202 s 0.000009970660012186271 s 1.03
slicing / JaXPipe / cuda / Primal 0.000002271 s 0.000001887 s 1.20
slicing / Jax / cuda / Primal 0.00000224 s 0.000001887 s 1.19
slicing / HLOOpt / cuda / Primal 0.000002271 s 0.000001887 s 1.20
slicing / PartOpt / cuda / Primal 0.00000224 s 0.000001887 s 1.19
slicing / IPartOpt / cuda / Primal 0.000002271 s 0.000001888 s 1.20
slicing / DefOpt / cuda / Primal 0.000002271 s 0.000001887 s 1.20
slicing / IDefOpt / cuda / Primal 0.000002271 s 0.000001887 s 1.20
slicing / JaXPipe / cuda / Forward 0.000010688 s 0.000011328 s 0.94
slicing / Jax / cuda / Forward 0.000010304 s 0.00001088 s 0.95
slicing / HLOOpt / cuda / Forward 0.000009792 s 0.000009823 s 1.00
slicing / PartOpt / cuda / Forward 0.000010272 s 0.000010048 s 1.02
slicing / IPartOpt / cuda / Forward 0.00001088 s 0.000009824 s 1.11
slicing / DefOpt / cuda / Forward 0.000010145 s 0.000009184 s 1.10
slicing / IDefOpt / cuda / Forward 0.000010303 s 0.000009952 s 1.04
slicing / JaXPipe / cuda / PreRev 0.000010207 s 0.000009632 s 1.06
slicing / JaXPipe / cuda / PostRev 0.000010209 s 0.000010016 s 1.02
slicing / JaXPipe / cuda / BothRev 0.000010592 s 0.0000096 s 1.10
slicing / Jax / cuda / BothRev 0.000010047 s 0.000009728 s 1.03
slicing / HLOOpt / cuda / PreRev 0.00001024 s 0.000010112 s 1.01
slicing / HLOOpt / cuda / PostRev 0.000010272 s 0.000009728 s 1.06
slicing / HLOOpt / cuda / BothRev 0.000010399 s 0.000009472 s 1.10
slicing / PartOpt / cuda / PreRev 0.0000104 s 0.000009887 s 1.05
slicing / PartOpt / cuda / PostRev 0.000010208 s 0.0000096 s 1.06
slicing / PartOpt / cuda / BothRev 0.000010176 s 0.00000912 s 1.12
slicing / IPartOpt / cuda / PreRev 0.00001056 s 0.00000992 s 1.06
slicing / IPartOpt / cuda / PostRev 0.00001024 s 0.0000096 s 1.07
slicing / IPartOpt / cuda / BothRev 0.000010431 s 0.000009504 s 1.10
slicing / DefOpt / cuda / PreRev 0.000011488 s 0.000009696 s 1.18
slicing / DefOpt / cuda / PostRev 0.000011455999999999998 s 0.0000096 s 1.19
slicing / DefOpt / cuda / BothRev 0.000010528 s 0.000010016 s 1.05
slicing / IDefOpt / cuda / PreRev 0.00001072 s 0.000011104 s 0.97
slicing / IDefOpt / cuda / PostRev 0.00000976 s 0.000009504 s 1.03
slicing / IDefOpt / cuda / BothRev 0.000009984 s 0.00000976 s 1.02
slicing / JaXPipe / tpu / Primal 9.64575e-7 s 0.00000102565 s 0.94
slicing / Jax / tpu / Primal 9.7485e-7 s 9.7245e-7 s 1.00
slicing / HLOOpt / tpu / Primal 9.66875e-7 s 0.0000010265 s 0.94
slicing / PartOpt / tpu / Primal 9.7105e-7 s 9.73325e-7 s 1.00
slicing / IPartOpt / tpu / Primal 9.696249999999998e-7 s 0.0000010236000000000002 s 0.95
slicing / DefOpt / tpu / Primal 9.68275e-7 s 9.688499999999998e-7 s 1.00
slicing / IDefOpt / tpu / Primal 9.71725e-7 s 0.0000010255 s 0.95
slicing / JaXPipe / tpu / Forward 0.0000014101749999999998 s 0.000001410875 s 1.00
slicing / Jax / tpu / Forward 0.0000014199999999999995 s 0.0000014758 s 0.96
slicing / HLOOpt / tpu / Forward 0.000001516125 s 0.000001517325 s 1.00
slicing / PartOpt / tpu / Forward 0.0000014393 s 0.00000150075 s 0.96
slicing / IPartOpt / tpu / Forward 0.0000015214 s 0.0000015239249999999998 s 1.00
slicing / DefOpt / tpu / Forward 0.000001438925 s 0.0000014960499999999998 s 0.96
slicing / IDefOpt / tpu / Forward 0.00000151915 s 0.000001517025 s 1.00
slicing / JaXPipe / tpu / PreRev 0.000002377825 s 0.00000257555 s 0.92
slicing / JaXPipe / tpu / PostRev 0.0000025288500000000004 s 0.0000025172250000000003 s 1.00
slicing / JaXPipe / tpu / BothRev 0.000002399275 s 0.00000258475 s 0.93
slicing / Jax / tpu / BothRev 0.0000025425500000000003 s 0.000002545125 s 1.00
slicing / HLOOpt / tpu / PreRev 0.000002404275 s 0.000002599825 s 0.92
slicing / HLOOpt / tpu / PostRev 0.00000253525 s 0.000002543425 s 1.00
slicing / HLOOpt / tpu / BothRev 0.00000239775 s 0.000002587075 s 0.93
slicing / PartOpt / tpu / PreRev 0.0000025392749999999995 s 0.0000025336500000000003 s 1.00
slicing / PartOpt / tpu / PostRev 0.000002402925 s 0.0000025847 s 0.93
slicing / PartOpt / tpu / BothRev 0.0000025355 s 0.000002535825 s 1.00
slicing / IPartOpt / tpu / PreRev 0.000002404025 s 0.0000025883499999999995 s 0.93
slicing / IPartOpt / tpu / PostRev 0.000002539025 s 0.000002543975 s 1.00
slicing / IPartOpt / tpu / BothRev 0.0000024053 s 0.0000025956 s 0.93
slicing / DefOpt / tpu / PreRev 0.000002545075 s 0.0000025313750000000004 s 1.01
slicing / DefOpt / tpu / PostRev 0.0000023991 s 0.000002591925 s 0.93
slicing / DefOpt / tpu / BothRev 0.000002545375 s 0.0000025369750000000004 s 1.00
slicing / IDefOpt / tpu / PreRev 0.00000239115 s 0.000002578725 s 0.93
slicing / IDefOpt / tpu / PostRev 0.000002547625 s 0.0000025319000000000003 s 1.01
slicing / IDefOpt / tpu / BothRev 0.0000024014 s 0.000002579375 s 0.93
slicing / JaXPipe / cpu / Primal 0.000015819 s 0.000006638120012212312 s 2.38
slicing / Jax / cpu / Primal 0.000015948 s 0.000006729740034643328 s 2.37
slicing / HLOOpt / cpu / Primal 0.000015993 s 0.00000629295998805901 s 2.54
slicing / PartOpt / cpu / Primal 0.000015715 s 0.000006045859981895773 s 2.60
slicing / IPartOpt / cpu / Primal 0.000016159 s 0.000006430899966289871 s 2.51
slicing / DefOpt / cpu / Primal 0.000015951 s 0.000006045840000297176 s 2.64
slicing / IDefOpt / cpu / Primal 0.000015929999999999998 s 0.000006516900029964745 s 2.44
slicing / JaXPipe / cpu / Forward 0.000021174 s 0.00000963860000410932 s 2.20
slicing / Jax / cpu / Forward 0.000020963 s 0.0000093633799951931 s 2.24
slicing / HLOOpt / cpu / Forward 0.000020563000000000003 s 0.000009715839969430815 s 2.12
slicing / PartOpt / cpu / Forward 0.00002106 s 0.000009247120015061228 s 2.28
slicing / IPartOpt / cpu / Forward 0.000020651 s 0.000009682860036264171 s 2.13
slicing / DefOpt / cpu / Forward 0.000021087000000000003 s 0.00000980009998784226 s 2.15
slicing / IDefOpt / cpu / Forward 0.000021137 s 0.00000932759996430832 s 2.27
slicing / JaXPipe / cpu / PreRev 0.000022157 s 0.000010271939991071124 s 2.16
slicing / JaXPipe / cpu / PostRev 0.000021512 s 0.000011063819965784204 s 1.94
slicing / JaXPipe / cpu / BothRev 0.000021666 s 0.00001043989999743644 s 2.08
slicing / Jax / cpu / BothRev 0.000021633 s 0.000009840040020208107 s 2.20
slicing / HLOOpt / cpu / PreRev 0.000021822 s 0.000010213960003966348 s 2.14
slicing / HLOOpt / cpu / PostRev 0.000021535 s 0.00001261207993593416 s 1.71
slicing / HLOOpt / cpu / BothRev 0.000021792 s 0.000009878719984044436 s 2.21
slicing / PartOpt / cpu / PreRev 0.000021958 s 0.000010214859985353542 s 2.15
slicing / PartOpt / cpu / PostRev 0.000022592 s 0.000009956999983842252 s 2.27
slicing / PartOpt / cpu / BothRev 0.000022035 s 0.000010311199994248454 s 2.14
slicing / IPartOpt / cpu / PreRev 0.000021929 s 0.000009981699986383318 s 2.20
slicing / IPartOpt / cpu / PostRev 0.000021677 s 0.00001045855996380851 s 2.07
slicing / IPartOpt / cpu / BothRev 0.000022 s 0.000009998220002671588 s 2.20
slicing / DefOpt / cpu / PreRev 0.000021978 s 0.000009611019986550672 s 2.29
slicing / DefOpt / cpu / PostRev 0.000021702 s 0.000009998039986385266 s 2.17
slicing / DefOpt / cpu / BothRev 0.000021582 s 0.00000963705998401565 s 2.24
slicing / IDefOpt / cpu / PreRev 0.000021962 s 0.000010339760019633103 s 2.12
slicing / IDefOpt / cpu / PostRev 0.000021569000000000003 s 0.000009949260011126173 s 2.17
slicing / IDefOpt / cpu / BothRev 0.000021586 s 0.000009970660012186271 s 2.16
sum / JaXPipe / cpu / Primal 0.000009008359993458726 s 0.000007645399964530952 s 1.18
sum / Jax / cpu / Primal 0.000008545260016035172 s 0.000007648059981875122 s 1.12
sum / HLOOpt / cpu / Primal 0.000008168699987436411 s 0.000007830219947209117 s 1.04
sum / PartOpt / cpu / Primal 0.00000808555999356031 s 0.000008145459978550208 s 0.99
sum / IPartOpt / cpu / Primal 0.000008008760005395743 s 0.000008449280003333114 s 0.95
sum / DefOpt / cpu / Primal 0.000008466720028081908 s 0.000007587159980175784 s 1.12
sum / IDefOpt / cpu / Primal 0.000008163700076693203 s 0.0000079674000335217 s 1.02
sum / JaXPipe / cpu / Forward 0.00001200092003273312 s 0.000011516220010889813 s 1.04
sum / Jax / cpu / Forward 0.000011746379968826658 s 0.000011451039999883506 s 1.03
sum / HLOOpt / cpu / Forward 0.000012756340020132484 s 0.000011789640011556912 s 1.08
sum / PartOpt / cpu / Forward 0.00001183330000458227 s 0.000011053859989260672 s 1.07
sum / IPartOpt / cpu / Forward 0.00001191765999465133 s 0.000011822620008388183 s 1.01
sum / DefOpt / cpu / Forward 0.00001159913997980766 s 0.000010920239947154187 s 1.06
sum / IDefOpt / cpu / Forward 0.000012075040012859973 s 0.000011344999993525562 s 1.06
sum / JaXPipe / cpu / PreRev 0.00001152241999079706 s 0.000010926760069196462 s 1.05
sum / JaXPipe / cpu / PostRev 0.000011665639976854437 s 0.000010825520002981649 s 1.08
sum / JaXPipe / cpu / BothRev 0.000011525720001372976 s 0.00001116623997404531 s 1.03
sum / Jax / cpu / BothRev 0.00001180508004836156 s 0.000011224160007259342 s 1.05
sum / HLOOpt / cpu / PreRev 0.000011804760006270954 s 0.000011186540041308036 s 1.06
sum / HLOOpt / cpu / PostRev 0.000013719979988309203 s 0.000012992799984203885 s 1.06
sum / HLOOpt / cpu / BothRev 0.000011332519989082355 s 0.00001103836000766023 s 1.03
sum / PartOpt / cpu / PreRev 0.000011368540008334092 s 0.00001072174000000814 s 1.06
sum / PartOpt / cpu / PostRev 0.000011433200006649713 s 0.000010569659998509453 s 1.08
sum / PartOpt / cpu / BothRev 0.00001228032001563406 s 0.00001155784001639404 s 1.06
sum / IPartOpt / cpu / PreRev 0.000011059839989684406 s 0.000011202260011486942 s 0.99
sum / IPartOpt / cpu / PostRev 0.00001159669999651669 s 0.000011255660010647262 s 1.03
sum / IPartOpt / cpu / BothRev 0.000011579119955058558 s 0.000010993680016326834 s 1.05
sum / DefOpt / cpu / PreRev 0.000011204459970031166 s 0.000011120220005977898 s 1.01
sum / DefOpt / cpu / PostRev 0.000011259659986535551 s 0.000011071139997511637 s 1.02
sum / DefOpt / cpu / BothRev 0.000011220119968129438 s 0.000011151440021421876 s 1.01
sum / IDefOpt / cpu / PreRev 0.000011430999975345911 s 0.000010621079964039382 s 1.08
sum / IDefOpt / cpu / PostRev 0.0000115595999523066 s 0.000011201700044694008 s 1.03
sum / IDefOpt / cpu / BothRev 0.000011434259995439789 s 0.00001039341997966403 s 1.10
sum / JaXPipe / cuda / Primal 0.000002431 s 0.000002048 s 1.19
sum / Jax / cuda / Primal 0.000002431 s 0.000002048 s 1.19
sum / HLOOpt / cuda / Primal 0.000002431 s 0.000002047 s 1.19
sum / PartOpt / cuda / Primal 0.000002431 s 0.000002047 s 1.19
sum / IPartOpt / cuda / Primal 0.000002431 s 0.000002048 s 1.19
sum / DefOpt / cuda / Primal 0.000002432 s 0.000002048 s 1.19
sum / IDefOpt / cuda / Primal 0.000002432 s 0.000002048 s 1.19
sum / JaXPipe / cuda / Forward 0.000010848 s 0.000010272 s 1.06
sum / Jax / cuda / Forward 0.000010592 s 0.000010048 s 1.05
sum / HLOOpt / cuda / Forward 0.000010431 s 0.000010368 s 1.01
sum / PartOpt / cuda / Forward 0.000010656 s 0.00000992 s 1.07
sum / IPartOpt / cuda / Forward 0.00001056 s 0.000010209 s 1.03
sum / DefOpt / cuda / Forward 0.000010816 s 0.000010016 s 1.08
sum / IDefOpt / cuda / Forward 0.0000104 s 0.000010272 s 1.01
sum / JaXPipe / cuda / PreRev 0.000010592 s 0.000010048 s 1.05
sum / JaXPipe / cuda / PostRev 0.00001024 s 0.000009633 s 1.06
sum / JaXPipe / cuda / BothRev 0.000009791 s 0.000009376 s 1.04
sum / Jax / cuda / BothRev 0.000010208 s 0.0000096 s 1.06
sum / HLOOpt / cuda / PreRev 0.00001024 s 0.000009567 s 1.07
sum / HLOOpt / cuda / PostRev 0.000010016 s 0.00000944 s 1.06
sum / HLOOpt / cuda / BothRev 0.000010176 s 0.000009185 s 1.11
sum / PartOpt / cuda / PreRev 0.000010369 s 0.000010208 s 1.02
sum / PartOpt / cuda / PostRev 0.000010273 s 0.000009824 s 1.05
sum / PartOpt / cuda / BothRev 0.000010144 s 0.000009888 s 1.03
sum / IPartOpt / cuda / PreRev 0.000010368 s 0.000010208 s 1.02
sum / IPartOpt / cuda / PostRev 0.000010176 s 0.000009664 s 1.05
sum / IPartOpt / cuda / BothRev 0.00001024 s 0.000009664 s 1.06
sum / DefOpt / cuda / PreRev 0.000010208 s 0.000009984 s 1.02
sum / DefOpt / cuda / PostRev 0.00001024 s 0.000009664 s 1.06
sum / DefOpt / cuda / BothRev 0.000010368 s 0.000009504 s 1.09
sum / IDefOpt / cuda / PreRev 0.000010176 s 0.000009856 s 1.03
sum / IDefOpt / cuda / PostRev 0.000010143 s 0.000009408 s 1.08
sum / IDefOpt / cuda / BothRev 0.000010497 s 0.000009632 s 1.09
sum / JaXPipe / tpu / Primal 5.102e-7 s 5.1015e-7 s 1.00
sum / Jax / tpu / Primal 5.4705e-7 s 5.471999999999999e-7 s 1.00
sum / HLOOpt / tpu / Primal 5.099749999999999e-7 s 5.10525e-7 s 1.00
sum / PartOpt / tpu / Primal 5.467249999999999e-7 s 5.4695e-7 s 1.00
sum / IPartOpt / tpu / Primal 5.10125e-7 s 5.106750000000001e-7 s 1.00
sum / DefOpt / tpu / Primal 5.47375e-7 s 5.473499999999999e-7 s 1.00
sum / IDefOpt / tpu / Primal 5.102750000000001e-7 s 5.10425e-7 s 1.00
sum / JaXPipe / tpu / Forward 0.000001553225 s 0.000001554225 s 1.00
sum / Jax / tpu / Forward 0.0000014973 s 0.0000015011250000000002 s 1.00
sum / HLOOpt / tpu / Forward 0.000001532475 s 0.000001527725 s 1.00
sum / PartOpt / tpu / Forward 0.000001493025 s 0.000001494675 s 1.00
sum / IPartOpt / tpu / Forward 0.0000015319749999999998 s 0.000001532525 s 1.00
sum / DefOpt / tpu / Forward 0.0000014962500000000005 s 0.0000014973 s 1.00
sum / IDefOpt / tpu / Forward 0.00000153685 s 0.00000152915 s 1.01
sum / JaXPipe / tpu / PreRev 9.98275e-7 s 0.00000104985 s 0.95
sum / JaXPipe / tpu / PostRev 0.0000010403 s 0.00000108665 s 0.96
sum / JaXPipe / tpu / BothRev 0.00000100355 s 0.000001049175 s 0.96
sum / Jax / tpu / BothRev 0.000001037 s 0.000001091625 s 0.95
sum / HLOOpt / tpu / PreRev 0.0000010062 s 0.0000010556 s 0.95
sum / HLOOpt / tpu / PostRev 0.000001037925 s 0.0000010847000000000002 s 0.96
sum / HLOOpt / tpu / BothRev 0.0000010008999999999998 s 0.000001045625 s 0.96
sum / PartOpt / tpu / PreRev 0.000001043825 s 0.0000010876 s 0.96
sum / PartOpt / tpu / PostRev 0.0000010074 s 0.00000104685 s 0.96
sum / PartOpt / tpu / BothRev 0.00000104105 s 0.0000010919 s 0.95
sum / IPartOpt / tpu / PreRev 0.0000010023250000000002 s 0.000001051 s 0.95
sum / IPartOpt / tpu / PostRev 0.0000010406 s 0.00000108285 s 0.96
sum / IPartOpt / tpu / BothRev 0.0000010006750000000002 s 0.000001049375 s 0.95
sum / DefOpt / tpu / PreRev 0.000001036575 s 0.00000109035 s 0.95
sum / DefOpt / tpu / PostRev 0.0000010012999999999998 s 0.000001048725 s 0.95
sum / DefOpt / tpu / BothRev 0.0000010409 s 0.0000010904 s 0.95
sum / IDefOpt / tpu / PreRev 0.00000100815 s 0.000001045875 s 0.96
sum / IDefOpt / tpu / PostRev 0.00000104665 s 0.0000010865 s 0.96
sum / IDefOpt / tpu / BothRev 0.000001001575 s 0.000001051825 s 0.95
sum / JaXPipe / cpu / Primal 0.00001857 s 0.000007645399964530952 s 2.43
sum / Jax / cpu / Primal 0.000018236 s 0.000007648059981875122 s 2.38
sum / HLOOpt / cpu / Primal 0.000018365 s 0.000007830219947209117 s 2.35
sum / PartOpt / cpu / Primal 0.000018303 s 0.000008145459978550208 s 2.25
sum / IPartOpt / cpu / Primal 0.000018149 s 0.000008449280003333114 s 2.15
sum / DefOpt / cpu / Primal 0.000018068 s 0.000007587159980175784 s 2.38
sum / IDefOpt / cpu / Primal 0.000017919000000000002 s 0.0000079674000335217 s 2.25
sum / JaXPipe / cpu / Forward 0.000025533 s 0.000011516220010889813 s 2.22
sum / Jax / cpu / Forward 0.000024808 s 0.000011451039999883506 s 2.17
sum / HLOOpt / cpu / Forward 0.000024778 s 0.000011789640011556912 s 2.10
sum / PartOpt / cpu / Forward 0.000024683 s 0.000011053859989260672 s 2.23
sum / IPartOpt / cpu / Forward 0.000025597 s 0.000011822620008388183 s 2.17
sum / DefOpt / cpu / Forward 0.000025044 s 0.000010920239947154187 s 2.29
sum / IDefOpt / cpu / Forward 0.00002502 s 0.000011344999993525562 s 2.21
sum / JaXPipe / cpu / PreRev 0.000024141 s 0.000010926760069196462 s 2.21
sum / JaXPipe / cpu / PostRev 0.000023639 s 0.000010825520002981649 s 2.18
sum / JaXPipe / cpu / BothRev 0.000023534 s 0.00001116623997404531 s 2.11
sum / Jax / cpu / BothRev 0.000023691 s 0.000011224160007259342 s 2.11
sum / HLOOpt / cpu / PreRev 0.000023652 s 0.000011186540041308036 s 2.11
sum / HLOOpt / cpu / PostRev 0.000022948 s 0.000012992799984203885 s 1.77
sum / HLOOpt / cpu / BothRev 0.000023056 s 0.00001103836000766023 s 2.09
sum / PartOpt / cpu / PreRev 0.000023645 s 0.00001072174000000814 s 2.21
sum / PartOpt / cpu / PostRev 0.000023782 s 0.000010569659998509453 s 2.25
sum / PartOpt / cpu / BothRev 0.000023532 s 0.00001155784001639404 s 2.04
sum / IPartOpt / cpu / PreRev 0.000023668 s 0.000011202260011486942 s 2.11
sum / IPartOpt / cpu / PostRev 0.000023593 s 0.000011255660010647262 s 2.10
sum / IPartOpt / cpu / BothRev 0.000023265 s 0.000010993680016326834 s 2.12
sum / DefOpt / cpu / PreRev 0.000023519000000000003 s 0.000011120220005977898 s 2.11
sum / DefOpt / cpu / PostRev 0.000023671 s 0.000011071139997511637 s 2.14
sum / DefOpt / cpu / BothRev 0.000023578 s 0.000011151440021421876 s 2.11
sum / IDefOpt / cpu / PreRev 0.000023501 s 0.000010621079964039382 s 2.21
sum / IDefOpt / cpu / PostRev 0.000023193 s 0.000011201700044694008 s 2.07
sum / IDefOpt / cpu / BothRev 0.000023513 s 0.00001039341997966403 s 2.26
value_and_grad / JaXPipe / cpu / Primal 0.000015732999991087126 s 0.000014406580003196723 s 1.09
value_and_grad / Jax / cpu / Primal 0.000014810059965384426 s 0.00001402247997248196 s 1.06
value_and_grad / HLOOpt / cpu / Primal 0.00001470501998483087 s 0.000013926480032750988 s 1.06
value_and_grad / PartOpt / cpu / Primal 0.000014893719999236057 s 0.000013946120016044003 s 1.07
value_and_grad / IPartOpt / cpu / Primal 0.000014808839996476308 s 0.000013939880000179985 s 1.06
value_and_grad / DefOpt / cpu / Primal 0.000015844200024730526 s 0.000014367180010594894 s 1.10
value_and_grad / IDefOpt / cpu / Primal 0.0000143204399591923 s 0.000013935539982412593 s 1.03
value_and_grad / JaXPipe / cuda / Primal 0.000033696 s 0.000033759999999999995 s 1.00
value_and_grad / Jax / cuda / Primal 0.000032800000000000004 s 0.000033056 s 0.99
value_and_grad / HLOOpt / cuda / Primal 0.000032959 s 0.000032416 s 1.02
value_and_grad / PartOpt / cuda / Primal 0.000033312 s 0.000033056 s 1.01
value_and_grad / IPartOpt / cuda / Primal 0.000033376 s 0.00003264 s 1.02
value_and_grad / DefOpt / cuda / Primal 0.000037408 s 0.000032672 s 1.14
value_and_grad / IDefOpt / cuda / Primal 0.000038304 s 0.000032064 s 1.19
value_and_grad / JaXPipe / tpu / Primal 0 s 0 s 1
value_and_grad / Jax / tpu / Primal 0 s 0 s 1
value_and_grad / HLOOpt / tpu / Primal 0 s 0 s 1
value_and_grad / PartOpt / tpu / Primal 0 s 0 s 1
value_and_grad / IPartOpt / tpu / Primal 0 s 0 s 1
value_and_grad / DefOpt / tpu / Primal 0 s 0 s 1
value_and_grad / IDefOpt / tpu / Primal 0 s 0 s 1
value_and_grad / JaXPipe / cpu / Primal 0.00002832 s 0.000014406580003196723 s 1.97
value_and_grad / Jax / cpu / Primal 0.000027644 s 0.00001402247997248196 s 1.97
value_and_grad / HLOOpt / cpu / Primal 0.000028958 s 0.000013926480032750988 s 2.08
value_and_grad / PartOpt / cpu / Primal 0.000028077 s 0.000013946120016044003 s 2.01
value_and_grad / IPartOpt / cpu / Primal 0.000028116 s 0.000013939880000179985 s 2.02
value_and_grad / DefOpt / cpu / Primal 0.000028348 s 0.000014367180010594894 s 1.97
value_and_grad / IDefOpt / cpu / Primal 0.000028127 s 0.000013935539982412593 s 2.02
jaxmd20 / JaXPipe / cuda / Primal 0.00146473 s 0.001438656 s 1.02
jaxmd20 / Jax / cuda / Primal 0.001486554 s 0.001457794 s 1.02
jaxmd20 / HLOOpt / cuda / Primal 0.00134841 s 0.001319074 s 1.02
jaxmd20 / PartOpt / cuda / Primal 0.002185368 s 0.001330945 s 1.64
jaxmd20 / IPartOpt / cuda / Primal 0.001354778 s 0.001306946 s 1.04
jaxmd20 / DefOpt / cuda / Primal 0.000933276 s 0.0009199049999999 s 1.01
jaxmd20 / IDefOpt / cuda / Primal 0.000962489 s 0.000942625 s 1.02
jaxmd20 / JaXPipe / cuda / Forward 0.001615096 s 0.001553729 s 1.04
jaxmd20 / Jax / cuda / Forward 0.0018402499999999 s 0.001784097 s 1.03
jaxmd20 / HLOOpt / cuda / Forward 0.0017243459999999 s 0.001622465 s 1.06
jaxmd20 / PartOpt / cuda / Forward 0.001693848 s 0.001625025 s 1.04
jaxmd20 / IPartOpt / cuda / Forward 0.00170233 s 0.001625729 s 1.05
jaxmd20 / DefOpt / cuda / Forward 0.001696474 s 0.001640737 s 1.03
jaxmd20 / IDefOpt / cuda / Forward 0.001697848 s 0.001621602 s 1.05
jaxmd20 / JaXPipe / cuda / PreRev 0.002734709 s 0.002683234 s 1.02
jaxmd20 / JaXPipe / cuda / PostRev 0.005466986 s 0.005323364 s 1.03
jaxmd20 / JaXPipe / cuda / BothRev 0.002729614 s 0.00268874 s 1.02
jaxmd20 / Jax / cuda / BothRev 0.00544833 s 0.005367429 s 1.02
jaxmd20 / HLOOpt / cuda / PreRev 0.002818321 s 0.002735042 s 1.03
jaxmd20 / HLOOpt / cuda / PostRev 0.005491684 s 0.0053471399999999 s 1.03
jaxmd20 / HLOOpt / cuda / BothRev 0.002806739 s 0.002724066 s 1.03
jaxmd20 / PartOpt / cuda / PreRev 0.002888757 s 0.002834625 s 1.02
jaxmd20 / PartOpt / cuda / PostRev 0.00561831 s 0.005432037 s 1.03
jaxmd20 / PartOpt / cuda / BothRev 0.002821075 s 0.002772929 s 1.02
jaxmd20 / IPartOpt / cuda / PreRev 0.00289135 s 0.002814114 s 1.03
jaxmd20 / IPartOpt / cuda / PostRev 0.005608123 s 0.005404004 s 1.04
jaxmd20 / IPartOpt / cuda / BothRev 0.002819157 s 0.0027513299999999 s 1.02
jaxmd20 / DefOpt / cuda / PreRev 0.002883508 s 0.002829603 s 1.02
jaxmd20 / DefOpt / cuda / PostRev 0.002809719 s 0.002740963 s 1.03
jaxmd20 / DefOpt / cuda / BothRev 0.002808598 s 0.002774787 s 1.01
jaxmd20 / IDefOpt / cuda / PreRev 0.002899861 s 0.0027995549999999 s 1.04
jaxmd20 / IDefOpt / cuda / PostRev 0.002328951 s 0.002319777 s 1.00
jaxmd20 / IDefOpt / cuda / BothRev 0.002813973 s 0.002749858 s 1.02
jaxmd20 / JaXPipe / tpu / Primal 0.00928997125 s 0.00927314125 s 1.00
jaxmd20 / Jax / tpu / Primal 0.00927781875 s 0.009277410625 s 1.00
jaxmd20 / HLOOpt / tpu / Primal 0.009164066875 s 0.0091523299999999 s 1.00
jaxmd20 / PartOpt / tpu / Primal 0.009197623125 s 0.0092039425 s 1.00
jaxmd20 / IPartOpt / tpu / Primal 0.009202970625 s 0.009203678125 s 1.00
jaxmd20 / DefOpt / tpu / Primal 0.0087961468749999 s 0.008808284375 s 1.00
jaxmd20 / IDefOpt / tpu / Primal 0.00870232375 s 0.0087018675 s 1.00
jaxmd20 / JaXPipe / tpu / Forward 0.01742221625 s 0.017406293125 s 1.00
jaxmd20 / Jax / tpu / Forward 0.018751168125 s 0.018734816875 s 1.00
jaxmd20 / HLOOpt / tpu / Forward 0.0174076375 s 0.017393216875 s 1.00
jaxmd20 / PartOpt / tpu / Forward 0.017422039375 s 0.0174203675 s 1.00
jaxmd20 / IPartOpt / tpu / Forward 0.017415935 s 0.017406431875 s 1.00
jaxmd20 / DefOpt / tpu / Forward 0.017423855 s 0.017418041875 s 1.00
jaxmd20 / IDefOpt / tpu / Forward 0.01741325 s 0.017410129375 s 1.00
jaxmd20 / JaXPipe / tpu / PreRev 0.02546834875 s 0.0254688181249999 s 1.00
jaxmd20 / JaXPipe / tpu / PostRev 0.021871454375 s 0.0218769525 s 1.00
jaxmd20 / JaXPipe / tpu / BothRev 0.025455125 s 0.025468754375 s 1.00
jaxmd20 / Jax / tpu / BothRev 0.0218732125 s 0.021861596875 s 1.00
jaxmd20 / HLOOpt / tpu / PreRev 0.025570619375 s 0.0255831725 s 1.00
jaxmd20 / HLOOpt / tpu / PostRev 0.02071977375 s 0.020709916875 s 1.00
jaxmd20 / HLOOpt / tpu / BothRev 0.0256775375 s 0.02568878875 s 1.00
jaxmd20 / PartOpt / tpu / PreRev 0.02549829125 s 0.0254527875 s 1.00
jaxmd20 / PartOpt / tpu / PostRev 0.021519879375 s 0.02152217 s 1.00
jaxmd20 / PartOpt / tpu / BothRev 0.02558424 s 0.0255547325 s 1.00
jaxmd20 / IPartOpt / tpu / PreRev 0.025462704375 s 0.025477075625 s 1.00
jaxmd20 / IPartOpt / tpu / PostRev 0.0215184693749999 s 0.021248858125 s 1.01
jaxmd20 / IPartOpt / tpu / BothRev 0.025552276875 s 0.0255718356249999 s 1.00
jaxmd20 / DefOpt / tpu / PreRev 0.02549917625 s 0.025456913125 s 1.00
jaxmd20 / DefOpt / tpu / PostRev 0.018810454375 s 0.0188292325 s 1.00
jaxmd20 / DefOpt / tpu / BothRev 0.025578944375 s 0.02555605125 s 1.00
jaxmd20 / IDefOpt / tpu / PreRev 0.025464384375 s 0.02547475125 s 1.00
jaxmd20 / IDefOpt / tpu / PostRev 0.018324838125 s 0.018316490625 s 1.00
jaxmd20 / IDefOpt / tpu / BothRev 0.025549673125 s 0.0255632825 s 1.00
jaxmd40 / JaXPipe / cpu / Primal 0.0747914049999999 s 0.072844339 s 1.03
jaxmd40 / Jax / cpu / Primal 0.069468799 s 0.085290419 s 0.81
jaxmd40 / HLOOpt / cpu / Primal 0.106450839 s 0.089723403 s 1.19
jaxmd40 / PartOpt / cpu / Primal 0.070216272 s 0.074612173 s 0.94
jaxmd40 / IPartOpt / cpu / Primal 0.068599852 s 0.066914133 s 1.03
jaxmd40 / DefOpt / cpu / Primal 0.106511561 s 0.093375959 s 1.14
jaxmd40 / IDefOpt / cpu / Primal 0.091593121 s 0.0910838289999999 s 1.01
jaxmd40 / JaXPipe / cpu / Forward 0.187023589 s 0.170574911 s 1.10
jaxmd40 / Jax / cpu / Forward 0.092947221 s 0.091175984 s 1.02
jaxmd40 / HLOOpt / cpu / Forward 0.185286036 s 0.172308492 s 1.08
jaxmd40 / PartOpt / cpu / Forward 0.180664049 s 0.176623812 s 1.02
jaxmd40 / IPartOpt / cpu / Forward 0.187013622 s 0.174147301 s 1.07
jaxmd40 / DefOpt / cpu / Forward 0.183556958 s 0.173570474 s 1.06
jaxmd40 / IDefOpt / cpu / Forward 0.192778608 s 0.172948483 s 1.11
jaxmd40 / JaXPipe / cpu / PreRev 0.242472471 s 0.246458502 s 0.98
jaxmd40 / JaXPipe / cpu / PostRev 0.153631364 s 0.145292605 s 1.06
jaxmd40 / JaXPipe / cpu / BothRev 0.25967822 s 0.232384171 s 1.12
jaxmd40 / Jax / cpu / BothRev 0.159423095 s 0.144972575 s 1.10
jaxmd40 / HLOOpt / cpu / PreRev 0.265623481 s 0.209976645 s 1.27
jaxmd40 / HLOOpt / cpu / PostRev 0.210901206 s 0.167030558 s 1.26
jaxmd40 / HLOOpt / cpu / BothRev 0.274567512 s 0.240362218 s 1.14
jaxmd40 / PartOpt / cpu / PreRev 0.238989489 s 0.225781603 s 1.06
jaxmd40 / PartOpt / cpu / PostRev 0.160843294 s 0.133505024 s 1.20
jaxmd40 / PartOpt / cpu / BothRev 0.292582825 s 0.2396432289999999 s 1.22
jaxmd40 / IPartOpt / cpu / PreRev 0.255632113 s 0.210954968 s 1.21
jaxmd40 / IPartOpt / cpu / PostRev 0.1516275259999999 s 0.129907668 s 1.17
jaxmd40 / IPartOpt / cpu / BothRev 0.286090633 s 0.2442911049999999 s 1.17
jaxmd40 / DefOpt / cpu / PreRev 0.257849151 s 0.207436634 s 1.24
jaxmd40 / DefOpt / cpu / PostRev 0.206645418 s 0.160227876 s 1.29
jaxmd40 / DefOpt / cpu / BothRev 0.281099629 s 0.266801465 s 1.05
jaxmd40 / IDefOpt / cpu / PreRev 0.255983835 s 0.213239064 s 1.20
jaxmd40 / IDefOpt / cpu / PostRev 0.210175975 s 0.166438509 s 1.26
jaxmd40 / IDefOpt / cpu / BothRev 0.2749302799999999 s 0.2551048099999999 s 1.08
neuralgcm_v1/deterministic_2_8_deg_inner_steps_24_outer_steps_4 / JaXPipe / cuda / Primal 1.6996809780000002 s 1.702317745 s 1.00
neuralgcm_v1/deterministic_2_8_deg_inner_steps_24_outer_steps_4 / Jax / cuda / Primal 1.70232081 s 1.705465397 s 1.00
neuralgcm_v1/deterministic_2_8_deg_inner_steps_24_outer_steps_4 / HLOOpt / cuda / Primal 1.711292707 s 1.716033577 s 1.00
neuralgcm_v1/deterministic_2_8_deg_inner_steps_24_outer_steps_4 / PartOpt / cuda / Primal 1.6942493010000002 s 1.697429808 s 1.00
neuralgcm_v1/deterministic_2_8_deg_inner_steps_24_outer_steps_4 / IPartOpt / cuda / Primal 1.692028909 s 1.694712204 s 1.00
neuralgcm_v1/deterministic_2_8_deg_inner_steps_24_outer_steps_4 / DefOpt / cuda / Primal 1.662351114 s 1.665316659 s 1.00
neuralgcm_v1/deterministic_2_8_deg_inner_steps_24_outer_steps_4 / IDefOpt / cuda / Primal 1.911666814 s 1.915802135 s 1.00
neuralgcm_v1/deterministic_2_8_deg_inner_steps_24_outer_steps_4 / JaXPipe / tpu / Primal 3.038814636875 s 3.03911668 s 1.00
neuralgcm_v1/deterministic_2_8_deg_inner_steps_24_outer_steps_4 / Jax / tpu / Primal 3.039413823125 s 3.0396639975 s 1.00
neuralgcm_v1/deterministic_2_8_deg_inner_steps_24_outer_steps_4 / HLOOpt / tpu / Primal 3.121635935625 s 3.122058054375 s 1.00
neuralgcm_v1/deterministic_2_8_deg_inner_steps_24_outer_steps_4 / PartOpt / tpu / Primal 3.060370549375 s 3.06050589875 s 1.00
neuralgcm_v1/deterministic_2_8_deg_inner_steps_24_outer_steps_4 / IPartOpt / tpu / Primal 3.060577015625 s 3.060716875 s 1.00
neuralgcm_v1/deterministic_2_8_deg_inner_steps_24_outer_steps_4 / DefOpt / tpu / Primal 2.102382259375 s 2.1026504125 s 1.00
neuralgcm_v1/deterministic_2_8_deg_inner_steps_24_outer_steps_4 / IDefOpt / tpu / Primal 2.948189166875 s 2.94873416125 s 1.00
neuralgcm_v1/deterministic_2_8_deg_inner_steps_2_outer_steps_2 / JaXPipe / cpu / Primal 6.840652416 s 6.26335534 s 1.09
neuralgcm_v1/deterministic_2_8_deg_inner_steps_2_outer_steps_2 / Jax / cpu / Primal 6.863899384000001 s 6.275570235999999 s 1.09
neuralgcm_v1/deterministic_2_8_deg_inner_steps_2_outer_steps_2 / HLOOpt / cpu / Primal 6.724595834 s 6.183279134 s 1.09
neuralgcm_v1/deterministic_2_8_deg_inner_steps_2_outer_steps_2 / PartOpt / cpu / Primal 6.896469751 s 6.324766463 s 1.09
neuralgcm_v1/deterministic_2_8_deg_inner_steps_2_outer_steps_2 / IPartOpt / cpu / Primal 6.834958245 s 6.510950545 s 1.05
neuralgcm_v1/deterministic_2_8_deg_inner_steps_2_outer_steps_2 / DefOpt / cpu / Primal 2.729002233 s 2.547048578 s 1.07
neuralgcm_v1/deterministic_2_8_deg_inner_steps_2_outer_steps_2 / IDefOpt / cpu / Primal 7.604181316999999 s 6.79261008 s 1.12

This comment was automatically generated by workflow using github-action-benchmark.

@wsmoses
Copy link
Member Author

wsmoses commented Dec 29, 2025

considering enzyme dus handler of : %dynamic-update-slice.25 = f32[4,6129,12272]{2,1,0} dynamic-update-slice(%pad.159, %slice.340, %constant.199, %constant.199, %constant.199), sharding={devices=[1,8,8]<=[8,8]T(1,0)}, metadata={op_name="dynamic-update-slice.43"}
 + input: %pad.159 = f32[4,6129,12272]{2,1,0} pad(%pad.103, %constant.167), padding=0_0x2_0x0_0, sharding={devices=[1,8,8]<=[8,8]T(1,0)}, metadata={op_name=
"pad.257"}
 + update: %slice.340 = f32[4,2,12272]{2,1,0} slice(%get-tuple-element.9), slice={[8:12], [6:8], [8:12280]}, sharding={devices=[1,8,8]<=[8,8]T(1,0)}, metadata={op_name="slice.630"}
 ** maskOp: %dynamic-slice = pred[4,767,1534]{2,1,0} dynamic-slice(%pad, %constant, %multiply, %multiply), dynamic_slice_sizes={4,767,1534}, metadata={op_name="dynamic-update-slice.43"} 
 ** slicefixer: needs_slice:1 needs_pad:0
 new slice: %slice = f32[4,767,1534]{2,1,0} slice(%dynamic-slice), slice={[8:12], [0:767], [0:1534]}, sharding={devices=[1,8,8]<=[8,8]T(1,0)}, metadata={op_name="dynamic-update-slice.43"}
 ** fallback new pad: %slice = f32[4,767,1534]{2,1,0} slice(%dynamic-slice), slice={[8:12], [0:767], [0:1534]}, sharding={devices=[1,8,8]<=[8,8]T(1,0)}, metadata={op_name="dynamic-update-slice.43"}
 ** result: %select = f32[4,767,1534]{2,1,0} select(%dynamic-slice, %select, %slice), metadata={op_name="dynamic-update-slice.43"}

@wsmoses
Copy link
Member Author

wsmoses commented Dec 29, 2025




./module_0049.reactant_first_t....0005.spmd-partitioner.after_spmd-partitioning.before_call-inliner.txt:  %pad.624 = pred[4,6136,1534]{2,1,0} pad(%broadcast.507, %constant.146), padding=0_0x0_6134x0_0, metadata={op_name="dynamic-update-slice.129"}
./module_0049.reactant_first_t....0005.spmd-partitioner.after_spmd-partitioning.before_call-inliner.txt:  %dynamic-slice.3319 = pred[4,767,1534]{2,1,0} dynamic-slice(%pad.624, %constant.15, %multiply.7297, %multiply.7347), dynamic_slice_sizes={4,767,1534}, metadata={op_name="dynamic-update-slice.129"}
./module_0049.reactant_first_t....0002.spmd-cleanup.after_pipeline-start.before_dce.txt:  %pad.624 = pred[4,6136,1534]{2,1,0} pad(%broadcast.2880, %constant.11390), padding=0_0x0_6134x0_0, metadata={op_name="dynamic-update-slice.129"}
./module_0049.reactant_first_t....0002.spmd-cleanup.after_pipeline-start.before_dce.txt:  %dynamic-slice.3319 = pred[4,767,1534]{2,1,0} dynamic-slice(%pad.624, %constant.11400, %multiply.13487, %multiply.13488), dynamic_slice_sizes={4,767,1534}, metadata={op_name="dynamic-update-slice.129"}
./module_0049.reactant_first_t....0004.spmd-cleanup.after_cse.before_pipeline-end.txt:  %pad.624 = pred[4,6136,1534]{2,1,0} pad(%broadcast.507, %constant.146), padding=0_0x0_6134x0_0, metadata={op_name="dynamic-update-slice.129"}
./module_0049.reactant_first_t....0004.spmd-cleanup.after_cse.before_pipeline-end.txt:  %dynamic-slice.3319 = pred[4,767,1534]{2,1,0} dynamic-slice(%pad.624, %constant.15, %multiply.7297, %multiply.7347), dynamic_slice_sizes={4,767,1534}, metadata={op_name="dynamic-update-slice.129"}
./module_0049.reactant_first_t....0003.spmd-cleanup.after_dce.before_tuple-simplifier.txt:  %pad.624 = pred[4,6136,1534]{2,1,0} pad(%broadcast.2880, %constant.11390), padding=0_0x0_6134x0_0, metadata={op_name="dynamic-update-slice.129"}
./module_0049.reactant_first_t....0003.spmd-cleanup.after_dce.before_tuple-simplifier.txt:  %dynamic-slice.3319 = pred[4,767,1534]{2,1,0} dynamic-slice(%pad.624, %constant.11400, %multiply.13487, %multiply.13488), dynamic_slice_sizes={4,767,1534}, metadata={op_name="dynamic-update-slice.129"}


considering enzyme dus handler of : %dynamic-update-slice.62 = f32[4,6129,12272]{2,1,0} dynamic-update-slice(%pad.203, %slice.777, %constant.163, %constant.163, %constant.163), sharding={devices=[1,8,8]<=[8,8]T(1,0)}, metadata={op_name="dynamic-update-slice.129"}

 + input: %pad.203 = f32[4,6129,12272]{2,1,0} pad(%pad.147, %constant.160), padding=0_0x2_0x0_0, sharding={devices=[1,8,8]<=[8,8]T(1,0)}, metadata={op_name=
"pad.340"}
 + update: %slice.777 = f32[4,2,12272]{2,1,0} slice(%arg1_velocities_v_data_parent__path___args__1__12__2__2__1__.0), slice={[8:12], [6:8], [8:12280]}, sharding={devices=[1,8,8]<=[8,8]T(1,0)}, metadata={op_name="slice.1383"}
 ** maskOp: %dynamic-slice = pred[4,767,1534]{2,1,0} dynamic-slice(%pad, %constant, %multiply, %multiply), dynamic_slice_sizes={4,767,1534}, metadata={op_name="dynamic-update-slice.129"}
 ** slicefixer: needs_slice:1 needs_pad:0
 new slice: %slice = f32[4,767,1534]{2,1,0} slice(%dynamic-slice), slice={[8:12], [0:767], [0:1534]}, sharding={devices=[1,8,8]<=[8,8]T(1,0)}, metadata={op_name="dynamic-update-slice.129"}
 ** fallback new pad: %slice = f32[4,767,1534]{2,1,0} slice(%dynamic-slice), slice={[8:12], [0:767], [0:1534]}, sharding={devices=[1,8,8]<=[8,8]T(1,0)}, metadata={op_name="dynamic-update-slice.129"}
 ** result: %select = f32[4,767,1534]{2,1,0} select(%dynamic-slice, %select, %slice), metadata={op_name="dynamic-update-slice.129"}


^[[91m^[[1m│ ^[[22m^[[39m  exception =
^[[91m^[[1m│ ^[[22m^[[39m   INTERNAL: during context [hlo verifier]: Expected instruction to have shape equal to pred[4,6135,1534], actual shape is pred[4,6136,1534]:
^[[91m^[[1m│ ^[[22m^[[39m   %pad.624 = pred[4,6136,1534]{2,1,0} pad(%broadcast.507, %constant.146), padding=0_0x0_6134x0_0, metadata={op_name="dynamic-update-slice.129"}
^[[91m^[[1m│ ^[[22m^[[39m
^[[91m^[[1m│ ^[[22m^[[39m   Failed after spmd-partitioning


%broadcast.507 = pred[4,1,1534]{2,1,0} broadcast(%constant.145), dimensions={}, metadata={op_name="dynamic-update-slice.84"}

%pad.624 = pred[4,6136,1534]{2,1,0} pad(%broadcast.507, %constant.146), padding=0_0x0_6134x0_0, metadata={op_name="dynamic-update-slice.129"}
%dynamic-slice.3319 = pred[4,767,1534]{2,1,0} dynamic-slice(%pad.624, %constant.15, %multiply.7297, %multiply.7347), dynamic_slice_sizes={4,767,1534}, metadata={op_name="dynamic-update-slice.129"}

@wsmoses
Copy link
Member Author

wsmoses commented Dec 29, 2025

okay I'm very confused and my attempt to make a test failed. Relevant dumps:

INTERNAL: during context [hlo verifier]: Expected instruction to have shape equal to pred[4,6135,1534], actual shape is pred[4,6136,1534]:
 %pad.624 = pred[4,6136,1534]{2,1,0} pad(%broadcast.507, %constant.146), padding=0_0x0_6134x0_0, metadata={op_name="dynamic-update-slice.129"}

Failed after spmd-partitioning

xvd.tar.gz

attempted test i created (but didn't trigger):

TEST_P(SpmdPartitioningTest, NonDivisibleDusSliceWithEnzymeOpt) {
  absl::string_view hlo_string = R"hlo(
  HloModule module

  ENTRY entry {  
    %dus_in = f32[4,6129,12272]{2,1,0} parameter(0), sharding={devices=[1,8,8]<=[8,8]T(1,0)}
    %arg1 = f32[20,6144,12288]{2,1,0} parameter(1), sharding={devices=[1,8,8]<=[8,8]T(1,0)}
    %constant.163 = s32[] constant(0)
    %slice = f32[4,2,12272]{2,1,0} slice(%arg1), slice={[8:12], [6:8], [8:12280]}, sharding={devices=[1,8,8]<=[8,8]T(1,0)}
    ROOT %dus_out = f32[4,6129,12272]{2,1,0} dynamic-update-slice(%dus_in, %slice, %constant.163, %constant.163, %constant.163), sharding={devices=[1,8,8]<=[8,8]T(1,0)}
}
)hlo";

  TF_ASSERT_OK_AND_ASSIGN(auto module,
                          PartitionComputation(hlo_string, /*num_devices=*/64,
                                               SpmdPartitionerOptions(),
                                               /*enable_enzyme_opt=*/true));

  const auto root = module->entry_computation()->root_instruction();
   VLOG(0) << "root: " << module->ToString() << "\n" << std::flush;
  std::cerr << "root: " << module->ToString() << "\n" << std::flush;

  // TODO actual test once found bug
  if (false) {
  auto sharded_input = AllOf(op::Parameter(1), op::Shape("f32[20,768,1536]"));
  auto sharded_dus_in = AllOf(op::Parameter(0), op::Shape("f32[4,766,1534]"));

  EXPECT_THAT(
      root,
      AllOf(op::Select(op::Pad(_, _), sharded_dus_in,
                       op::Pad(op::Slice(

                                   op::DynamicSlice(sharded_input, _, _, _)),
                               _)),
            op::Shape("f32[4,766,1534]")));
                       }
}

@wsmoses wsmoses merged commit 0de15a4 into main Jan 4, 2026
23 of 24 checks passed
@wsmoses wsmoses deleted the extrot branch January 4, 2026 22:42
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants