From bdfd3eebd3cc40b40ec5a8bf5d6934a4131db9c4 Mon Sep 17 00:00:00 2001 From: Ben Sykes Date: Fri, 27 Oct 2023 16:21:34 -0700 Subject: [PATCH 1/7] Add SpectatorHistogram extension --- distribution/pom.xml | 2 + .../spectator-histogram-size-comparison.png | Bin 0 -> 45331 bytes docs/configuration/extensions.md | 1 + .../extensions-contrib/spectator-histogram.md | 386 +++++++++ .../spectator-histogram/pom.xml | 141 ++++ .../histogram/NullableOffsetsHeader.java | 378 +++++++++ .../histogram/SpectatorHistogram.java | 423 ++++++++++ .../SpectatorHistogramAggregator.java | 103 +++ .../SpectatorHistogramAggregatorFactory.java | 373 +++++++++ .../SpectatorHistogramBufferAggregator.java | 131 ++++ .../SpectatorHistogramColumnPartSupplier.java | 41 + .../SpectatorHistogramComplexMetricSerde.java | 109 +++ ...tatorHistogramIndexBasedComplexColumn.java | 63 ++ .../histogram/SpectatorHistogramIndexed.java | 185 +++++ .../SpectatorHistogramJsonSerializer.java | 39 + .../histogram/SpectatorHistogramModule.java | 94 +++ .../SpectatorHistogramObjectStrategy.java | 60 ++ ...atorHistogramPercentilePostAggregator.java | 153 ++++ ...torHistogramPercentilesPostAggregator.java | 156 ++++ .../SpectatorHistogramSerializer.java | 112 +++ ...rg.apache.druid.initialization.DruidModule | 16 + .../histogram/NullableOffsetsHeaderTest.java | 441 +++++++++++ .../SpectatorHistogramAggregatorTest.java | 733 ++++++++++++++++++ .../histogram/SpectatorHistogramTest.java | 451 +++++++++++ .../src/test/resources/input_data.tsv | 12 + .../src/test/resources/pre_agg_data.tsv | 6 + pom.xml | 1 + .../query/aggregation/AggregatorUtil.java | 5 + .../aggregation/post/PostAggregatorIds.java | 2 + website/.spelling | 11 + 30 files changed, 4628 insertions(+) create mode 100644 docs/assets/spectator-histogram-size-comparison.png create mode 100644 docs/development/extensions-contrib/spectator-histogram.md create mode 100644 extensions-contrib/spectator-histogram/pom.xml create mode 100644 extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/NullableOffsetsHeader.java create mode 100644 extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogram.java create mode 100644 extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregator.java create mode 100644 extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorFactory.java create mode 100644 extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramBufferAggregator.java create mode 100644 extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramColumnPartSupplier.java create mode 100644 extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramComplexMetricSerde.java create mode 100644 extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramIndexBasedComplexColumn.java create mode 100644 extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramIndexed.java create mode 100644 extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramJsonSerializer.java create mode 100644 extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramModule.java create mode 100644 extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramObjectStrategy.java create mode 100644 extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramPercentilePostAggregator.java create mode 100644 extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramPercentilesPostAggregator.java create mode 100644 extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramSerializer.java create mode 100644 extensions-contrib/spectator-histogram/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule create mode 100644 extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/NullableOffsetsHeaderTest.java create mode 100644 extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorTest.java create mode 100644 extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramTest.java create mode 100644 extensions-contrib/spectator-histogram/src/test/resources/input_data.tsv create mode 100644 extensions-contrib/spectator-histogram/src/test/resources/pre_agg_data.tsv diff --git a/distribution/pom.xml b/distribution/pom.xml index ab8e082c9ab8..e41fdde64f53 100644 --- a/distribution/pom.xml +++ b/distribution/pom.xml @@ -450,6 +450,8 @@ org.apache.druid.extensions.contrib:opentelemetry-emitter -c org.apache.druid.extensions:druid-iceberg-extensions + -c + org.apache.druid.extensions.contrib:druid-spectator-histogram diff --git a/docs/assets/spectator-histogram-size-comparison.png b/docs/assets/spectator-histogram-size-comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..306f45abd8179ab5ee7cea0423f0fba1f9948b28 GIT binary patch literal 45331 zcmeFZWmH>Vqwb3oXz|igic@HdLvgnj*A{mu?(UETN+|&f6nA$gxI4vK++B+k91=)4 z>HppPo_*eZ#<=5-dq12H_X~r-T63;7*P8Y`&o5ygRpkh9DR9xy&Snc_@`u&U;=5wF zR;`Y)dz8?CzyOtf1?4#@3+G#nau5Gi=^=w^!#=mTq4-{w6&%$I@XIX%B zB@dR)ZQ0UAVwkSeF!c`mM}7^!GI2-HkO!Y7u|0RcJT@dKR>~n@n2ukU8_c&;if^V; z_;QGj|06dzQjNS1TL~LdXqX^XKJ=Uc^+^#Ma>g7yZ#qOb8()SxKE>>H@Vq4IaB)jw zN;dMoR5vYb$Xku?7Q~cH>n2E8&ZDQVyH_#zwqUZQ*<1Sg5OY1q<(&gLnx9g3ZmP>v& z+m`h1_4;xf;*s-H;nRdTL^fxqc)2l$N)j&Q64tn*5~d4e5BCqj>`fYYuY(T}gr<5f z%7=bpanX;m7VG1hfAMpWh+IItn}(#@CHlckPo%(`{& zqde*`Y5fc-c$+mSS6Sn%2IqI*4*hPFgU0WH5BQFlOxX2a?XdE?p{2w8nt#ThUs=qb zS{Hbzs|=Y4s{9FPkUyanG<0g?lhUv2FgV#4;NA8a5;_4Z7e=t)f%^&ZNXwXxg|8=& zt$Bw7OP)up)WU15)MGch1;f)##qFD2Crw|GcU}TMK%yb9fPc>4{J#w`J@tvZtf?W#>ZrpriP6)WYZY&If+;&IEIhnq^C3*W0V~#gqTUSB8GGT(S=a~6kT)B|y6E?3=wZIX+!E%jf(a{n}o{&m3 z7XaH}V6=dC(C*wB`Fo=6-fS7u>hj0^o|L^?6_KV8fU-0$e|o!wOuJqx-!XE#@3Spg zYtr}%YRb$9vf!IBSDlkm4viY!8Nz7QLGUFgXgH^LpdQ%ZfG{qdwMp4XyN5p(!#4GM z=!&W2JRp9mKvWfNV6@_S;+I7w=A~XbV@K-Brjd_oqIlPo_(gq_Ln<8KZnCPj+5_kZ zK+0mDZ{=s94yJ%R7hh!0(%W>I`>haoT=~7yt*2#uBxjVG$Td#IyCpKb#Nw=MW3Nn#Id~p6V-Cg`@Z9Wpy_0-aj>2$TIg> z8m%%KC?Mxkgt%l?OlciUuX<%g%kq#t{j9b-uC=^-2lWPr{J>DT53C`yB) zJOKu=ZZ~0+8CEpMSB-z9tTQPIN#~mQOvV(O>5BO7r<7Duf`w}luUuY9AW`B#U2+r{ z;s_vCteTdl^ot=$z>-_{rejm{zWEKcA$1#acWsPyo6+J z7L-}V3tZ_G>tZo$G&C$?H-xay6i>;<4$o+{o>CGpSR5q5Q*U58=O=erEPZ|i9p$oG zkWW!#K;B=8qfyb+5(P2yBu{1v%x0{}sUHonsprP>x~fu%u+Fh&J&$ej$*IteVRJ$LY#jfw|43Mts#3Huk*$QR zn6G$AxorIu3xA?wC1>u#=|Nt3*??coZfNlNID=XXyf|@#R!rF$n(y$B2_<9@J@iyrYqumXnTP5viQbKrL@RnwZQmu`hm5e2eSWrzC^%3S zJ{G7P6Cj@9n_-YD+Da0d`xT!UQTH1Brg=tF9Efy|xhcSgYZ zyF7=rX1StIOh2kR^_W|4j*>7dgDMQ`r4B@H*XPlG#uk*-ZMc>bP&O+3ZgJcxTp}s> zG{xc6sa5mP^)&Kzn}B^K>$qP_rOY8sjqL>?A8}QjGZfqBH9ObJUc^;>>%Sx;fMhiF(iAuf^8aI3WzMpc?xTPjo_vp{WQ}b!< zOwgIeu(OU?uzoFX1;mo8lUK@-hZryPe<37LYO2(mIiOCi< zRXvnZ<8~#BQ%-QT2mm8?Ro+2)pt=#%UC4J#m{un_JWI1nldOJBEq3Xe}8pYU7)U$pq_4H)DR7Nw1Xv zZpQQ4&wLmejsBulJSmNMVR>>TGeY&XAvTtf_!P7YM0~hD5@RxGIF6k?_G6Z3gWEO3 zH4cJ1a7@C+_`&U>mN*b;s{L9rz*WUB_$d!;a%MyX`rFGhDH}baU}SDEk71ycC=ZD) zgaI_qOv^`)g_!^uB%1ht(mkT>F8}<1BkR}Q%r+{QMt$-Uo0j`*CZ*DdI-&XEMM=f- z!ZA><6)9qwzv53g$QozOYLT$=Gd4;``O?2UUM#6z`p-eHVAo@6r7YXSuub{4KcGA=`qvxqey82yB|;a)R-~GO1Px{xP?HJ9Nt;WuogEy~35Vt+WK^a=+_&HyQ@c zb{B{HD5Wc*+dBfY+E88Th3WDo?U4q{v`q4KOz@9h_{uD_KqYil_+kjn?-i#Pi;JU+Uo$jl$H>`Yg!D3v=cAMCx)yo1(I;`=} z=r(E20FMa6_{V%MKCq{G{O(6BExl*x5_gD*fQoo!;h0aH2yL>tW-8~5FECOvE$vDz zQVtfSFqowKir)!;jQW(LsNo3yeh`F9cU|Ev?j-<{reuxbOXNC7;|Oze4Zyw55=d}M z5YTidS8&hn&O*Y-=1LlV2hNij`+d+ZU(6;}+H5n|<;w=O9GbGc{E)vHs96~aNS-Xr zjiN#*{1g|^i~LHM!o>08!zI6{$y9uOt`wW+S_`#rv9;%^P+2RIg5D57nWdq zZt}oV=;{6ENXWCA*Wn;*-_O)ze7&e1eahwK8y+8utD~>V#3K(v9A2rZs~;U7-xi>6 zK5}_JPsk4+OH&$_NdLXE;{QWrw~C_OZnm^_(Y?PkhWKvsYl#zGopuHZ(eS;;xpj!! z=#x6`@w6jJMCXH%A$039#bkN?NVR_G)Qu|z*?`3En>3EqIJCPXzg;)CyAd>3uh`S^ z7m@q+p)iXP3*CP#j|aD|pzph8s=yjj{#p>nXuRGW8-Wc05t%8xD&D3GDLH8Mm&osk z?ue2k8bO=vi`U1+GioK8P7iRV5YR*L0DsVp4%F@*DnHwO_pjm7W$8i`!3LNY_+*yBGNnB_`4TWW#_!7~av}TZ}R#=b_ z(3-kpCbfV9f+6Tx0L6g6=-i7|oqvZg)DfaXd}K$5gw4_j)V zqVOySMrO2vb%m=Hsydhr8jrQ*RCaIompLs5ZX|3TMLhDP`HId)GBP{+^f8gQtEjM~ zq+5UKm9SX7)r$AxKKeUBpj*hQ>$>aawk>reiIcSil6$7Vcp!_$ezhf3wcJjal^&l1%~ z!|v?Irw0&ET<{(8i%9rt&N|NL3PquYs3O!R`8##cvt*pdeb(raE&PtHjUoF(*kA=|jk=zVo9=ElG?-_@g2Uv*2@1?Qjt=_y0k)-n6zx#ARm2C)d z`yl_CPvNB589d8VBE^v9qe}k0nA6tHdj%2bM$8kFmkE%;5d=E5dXJUB>3A^p0_c4& zc-D}*pI=C%J~7Ks;quurHU=OZjlN|uIDSF8CFL9fsUgs%z-|djxL8# z^uMVYq9kTwVgX~EormZT)sfe3C%JbXl*!qAr;3yIrVtp&-LhP4ZdppSVfiVOibjxj z)kTaP!D2t$_lI%K+Etr_ z{_v@JV7{7s-*V+6DPl?SYAW%&nXh2Dz~{>j>*^@s&}gbH*?>2^!m20S*MaPwCkHB3 zpY8Re*qVRcX9+z1;jePS5|vT{I{zI2eq$8aP1+#Gn|auiIk;wpD3C0d_;_wSv$I1P zd-ErH_DYAtfh+cB0Lmi$v#9@M;*o7vN&C3bFRLSo8!Rl?;fAxDJZ z?!YuZY#;62=nr(p;DF$HJh%uko&`plq}`n=pq}>J%_7atM4DD%I~_^*F2{tSKTr~_ zIrfn`I9)9Z7K9zhM&b_($A+9rRf$LMp#^(MISF=z@(yQ##EXK7Oi9-;mr26xz|slLOn@I|%U9AT5_S2#mMs?Qd|NTqIH@0t z_}rI}O?MJ&ZvUoBzdQD}V)eMde>Rq~USui+Rl!W@j&;VG=oau@+q;&u~zM{le%xx4y`!mUcPzFF0k-JpYC3B2=ihn0W! z{zr;4DF3NMHaIHKoG16q_Sjy;q1o5o7MBk^GCJDC5#5*`yxgGt(2UQNRVu!6w8%Wy zcjs@&LSMVn2zH~Jo79S@7froz_)v5YpZ4CFTSI=Q2rKBG3^O$sZTn($<`p<&&%ci! zL3eHsODC_)H~FsGSc@vv!SrRF;?kuL+}41Q$uvc&ZFK zwWy1wj<68a;DQI1%#SL00kl9-wJcuig?yd0N$vt98m79KtoT`5qD1Fezxhw!rxN$9 zJi3$ujdbQvs>+A>h3N#Wv4y9CqeF_~CkL8|d>J*CA`sJq9|qNH_vU;Y4`+KM{6oh&;G z--TwJzAbsi2%256%gxK-In0+kH>Ifh(MNcevAyKvl8t+JON=+(V>Xcfd)E7C0O4kI zJJ;Qt>55rl;^ETojFz#W_9eBM5F)Jrg6%YZFB%2&a#ci`?_x7e%YAF1bP1_-y&we0 zhvVcHzhjBONZ~#P;5_MMZhxki_D44SJDX0`LV4aGL0W^|33iBz3>sM=*J)3%-p(YY z-RIamEHHQEFU=Qf&hV-xrtNjuR5RYr_7IP8Y0tHoK4hfBsAz6}rxHm%HQFKN0zxc< z!=k?4F~k!#oo7?{{!qp_S^>$6_?~1ut^AK+CZ&Nc>vP{i!0k_5fser7gt<`#J?Xan}Qck;a1= znof8Zt!!V(8}Hh)?YY8z4QA=+@odmJea@In`;G z`N1xeIb7BBD^7NbO1dCS<#!5}*JF(F=n8iP)$aQ7Be|$_)RITq5(^T~fX=okS=AA3 zfg#!SXst&wXilJpkZ^-`OcyO-dcPgiB2{seEFYb6$P~&kiC5T+5eGSQ>7NF z=fRpf(21fn-!ddW>chzY)pen${3SS_$u#&8u@E6T!$~wSDCi~Cz^~hcOH7o+{QTwk zfW1bO!nwzwkmc7s0pa(H_m0bTWtX&q>T#(#e&=}Yq1nT>7PT64KyH`uE#VpcCh0fS zuSndgmb&~mNf{el=^js$x(>y1`dPX)0f7 zj+MP7iEF=^b)%Km9D4f7`HhD~V9=+M@9{0yFSGOLWuH zlPH-wnx#H|m%7ZzhN;HaE}5yGO9yHEKDXgTV!llwKji>CHCK*uV;hw*kHKH0`eprq zl^JUu7VSoIayUPkGNjbD3XQl6(RzzqQ9A5|3|-uSvbN1p3YlJYf+^n(gWjndR0m-iAj=WgQ51-2AJ##G^blgLqB zxX=^;Ja)G0W`qTs90UgmAc+R)jG}G)?xFw%CJ!6%Ktcc|Fx9+%i^3rFW(eycX#Qk-LLuxrb#&dBIuUNvDC21$^G}8iPmo6Y%iJNQM z!n;sf1KLcF;EdB9aJ+~^*F`WNcfFo5&cdJ(W>v4bLBKQQkKNF7x#ilVu@S^mLHfwp z>tAkBG9XC`Mz3$6Z#nr4)xlABQA1Hhu|#? zAP<@rf=*it_$<1LEN7CfNYBg>bQe&)99<5f9=XTOu0}ZO(TZOUdm)zydgffIR&MTH z5mh{}!=|67_BYW(XCL>9yIT|UFLp0ZSOZKnJWwJ{l~YX=?ky3|DQL~9r_mDBsoLg8 zfUp}6IWAEGp=~dvTT_xbOq&$Vd8`6^j=LJ6zjjYgBHS%4c`_Xt4+EXA(>Xo8HWe>| zl?#hmlEwPy*P?oHqM4?hdKnmvJ5!%efbJZJoP%?An^uq0@5Ttt5RSPCq_*-&2DBX# z1jkEzAM!GKpO=)=%^I8}xN7h0xrcU+TE7I*(#56VVgJYv_;E7mf0z@rh&_esLl6V+3Y}=G1NTnxe>h~Csvp>L zp^RFA>f`v+WnlvF!S9RH#LK_%*6|LOgC zkP}C2ZBbX5aO#ynS=y7I_38cD4i4J}3pmew_$De0PepuhL|Kk3BH#j5UrYL`enk!) zIg_~hy|SIySbb;>?Rd*1e?N>x+p&;Ve%u>koKG-N=Jv=8aWkG{1HGBXE`yScq?iAC zKzM^89Cf+9Yi}r7jL5v$H11J1jq({e(9ICy_dgAkM$FYZvQ6AJF}Z*ehn-Nqx@boI z;ZS40Yx4NB5`9p{1K~)7kPEblY<2CIKUbFDA4^1O^n4v3hs5nCSREp_MFw? zKXgIvt1w)ZJBE``LPuxAfNMKZu+QA@t8-7Wj#x6n04Xgm;s}YW4(XK5;kH6lohQ3E zez(JPCe_<`@@!jivjI=W9}ZqDeYA>iHF*&b^R6HOAjW@}-zuE{i+RL{sVbX^qK`y3 zZ}gg0uskaT0i{&hvmUy+clkI%eOIx4pn^VCPte3Tbf&t;mF!7pXD~Q3W1YpBs3vZd zt-RgH+%)K%XE{8py<{x`>gv&{y*0&f3_XTXZ08U0T*XZUgmAK+MtU1@^{Imcic*|6 zIZT?337b9*1O>}k9=XTy%wCY(KpTLZ{x{T<(6KR;K+XCM!>SbvFKKx9qH~teeT)ca z`wV5aK+&O#?Iwh@2!+IeT6GB-rCxJQ?4Zv8aqhM}k91$BljLYpmiUIHyS*oGUD#gdqy9ON%?OKkl2E*OF?+03;pwiJ)Xg{ddqdxO4 ztjI)jt=jX>>FN#YIVawofHXUoMvHRiDj!sD`Uk$diieC@VN?G?DPA`t*7Zq%R_#o3 zF)6R3d#Mpp$&!!FB>@}vyU1W;YBr+=Sxm725|1#}vKTedstFg#tKR6&AI>^XP3&wv@KbLvc z&22Jm=_maYkdO(|wQ5ej>fstqY5mw4ygDN!Dj8;q4q%OnEm0|ygK6K5Z;TC>WR2NP zhpv^)I>g?HCo}LJHYA|DzQbQjrvpC~FJ{HQLi<3c8={^+yAp|#q$>rXDD`{0tYkKW5fu-m$3mOGtpsU*DQ~ft+{>cKafJjB;z(Y)L5(1r}fd zjC153p&8VDBr!F62U7nf?i;pzJN=2SaHfi;i<8f)BuzC2jTRS9ITGC*$k6SlQn# z#c}!3I4_=P{x#)NZV!}KevS=n3EC!5D&-X#UFs^=!b{B4la7;5-m3HM)`kdMjvi5o ze>IQt-{6Zh=8_qOg*nVPFDnGoMdMQ!NJRoM!2uaD{!m($-}zI;6R zx%0v0ir?-RtCM7-THX$)TJphxWtF(7LMh=jav85`7=+9N%Ptp<-FI!VFy`+3(tg*~ zH|u}t_c5xKU*!C3*Q448rJSP-2n!X1+a&WhfeDw77B|{BJ7)R)*b9yS;P-JDY{~Xy zSp@h#28#u3fBiP1cfAMZ!Y%zelqB-F;KJYhz<-grNHBRPt^2uPZjIW*eFU%VstO3R z(1b((ZPut!@vgzHtk6JI*7U@16cjZ6O~Jfksk)a>PihX83_=B(m~s0yw^5_qs?cEG z?uD|3=R%xXXYohPUqZUMvVxj${aoQPZD-A0{Ju@h4*i<dThzjv11ciCUKeRe%k zuH`;#bi8v~@Li?-5iIZWI&th!|LR$u3!@p;U}dg5rKh`3*DNmWMVOHJta-)o1z^`BcI4P2+w-qM+ZtGLNaQk3kl6E-SP11zp69tYIP|9Bx@1dgm2mS- zGe2v7LmxpzN6fWfFEE31ddiNtD*P>yuf5`xN~;8aIaS=>OPBcI&XKELv3NCIqMOd^ zB61DW;h{_1QUxL$7rfVhh|_L}T<7%4i4-B?RAcXDKyL2V>f8K%ahXM#*ZwQ=C{IcWz z{R=2Pi2gQsUg_^!!&mSp8B>p@Zu=Ja`kHk> z4;!M#Bm#Dvt!e$%3>)yD3Y;|t(7H10u6g7KG!u_pPM{PskGy2_lctrcWsl;>G|!lx zeim%twqtnwe@;no2fEYkeIAO1{IPziO5<8RT6m60#yi_@vgp2UsmCedNJMleFUbh+ zevy0UbcuanoVWTp+C&mw(tgL~gbAotkya2r{T`UwDQ%Tje^5z0;+jdrXZ$(!l7|M4 zWwwwgx}Z@ss(8~%`?09pxx{r&s*?mE7`pBq1$kw z#qoykPJA_8n-|>AfiB0%IO{(?G8P6v7R-76`0+zi17$l31@@Z z4Gp6Vz(MZkW=|$~Li(7#;okK`ddVxS`?RirzcIesXL5W^4*DlAna@NV;S!)0O~YA4 zHAo_53D452f_fj%p-D}>@V1qP;>@j`V`I{A&`U(`s~f+Weh7+j`*Y-c-hQ|=wBqkG zIJFq3D*`5255>TF(w9vRHnpgfyNT1Ap%P*RFDZZ{{x+b zuC&XHMX2LpF-{JN)xAVkj4wWAyi23SX z=yNMUAC|uD3Ob;F)m+%o5%ROwOH&gTZcG?;b5dtTen&zvfObDhIFYU3vHPI;j=@Pb zR@6v==80P8HpZV^1GuO)My*S~S7$$N z^bNILy`U}Y*275LhD|iiq1$nQ=h}@(7BvM$Z0LgQhPhz${$jujdF>*TQFm=emzk818Z z+6OU+w6Owg{g_-@p@azgb-vG!>rJ)8yPXcd-*0{IRu(AQuSPd!grH|vOV1s@wCNv2 zz>7|jr8W@v+ci^G3k?R*=cj*RVGbUuDgk#Rw*j81{@M9#fH zh?6ragN^`S>EMtW;UXu>O#z>)q)wvUKZ0c2Mjw81@%n{CpN*1v1j6UpVw#)GrN1`q8DrII8@aT`39A}1Uim799TgZCVwY8YX?*)n+`Y!*i{#Mc zlTD3VRwc&AQ5>upT^7T({|{p2|Ls=(U$^_tC(AMX2Nd=0q$-a95&YDeDtvf&%5{pQLl@Xshs zs*<{$hJp*70X;-Du^IB;@9*1u?wc~(N80H86(R8|&bHhcI(Q)q`O|Mx^-WP%xRd)| zcmR(bVuR8a+0S8J80;~0<}I21UEgdR_P61Gi*-WL3I7+5AcO(gmLze-!+9C^1gxSB zcKoNT!v9hhYIJWY{oB(q=>JzZq4FQPA#m`&l!ofkW&aQdbpI=HuwVKYS-=JV|Hb}) z+z9GXk`Cl3B;+|dMB(qVJsJ5gPXi@?$|!1TfK@ow41?ZKnJLdT27uWUG{;cSVp!$B z@F)A>bk_}S3X8hff~Od7VG;TE z`Rot}a~!s}Q@%p}^76LDX#NJ7+-`En_{Wrg-<;euOu?Q zGD{h(=qoB;{ZcRvAADhk_o9L>BwUL(#Z4K8o9OA4Km%Dum0Z5pc|oe| z`p^gzecbYqHs(;dn-q!LN!qa-FT(Qi_3yL&kiTayWV@UxzSW2tI>cv@W)nzJ7Gi}v zXP3CR9u5R&<6S*%9(``OQFUs7;%J%~s5PIXibD0{q&vD>1~WtJBKxN4fDPsoMWFq({>nrT~cx?9f$dMrrVpfev!#r zy6vA$tEB!fSH@gOd?=($M6Ad-7h9(oSs))a{HYPo1X!@yhGKu>>xNbGQFu>EC1Zer zhJyz)u|aWlS=kq@eEEG`bzaC9B!_0PVxLzI>iHNsjik{x?uc;SJS<(fRz}S6d8H() z8#J8!EZVUG;9hJespEYQq{C zp@xlE>T4HQA2Q-v8hf!vprd((w1-sTrK0RJ2xRFZX~yrjMz&!h;@v>biL>`{xMNKV zs9{Lsoz~nA^A8pnUXctH4n)GSRx)PrVq0SE+eORP?ZCDVu=P3`;q4&sxaDP^y4CU) zHMu2~o-C6|PF=C(OO6A+y5HYs1wbE0G!Cl+O+r`sEG&uU(*8PHayIQDNhvy&tHQ$q zb;qZm&bE>`pZa567As<<{wQ(*1yqXhlhaTfTQIf+`{698sG3-R$U$IxVuBl)VEZ%y zRM2AzUl!19a9XPvrPhV*#c}3KZH-p;KJIwd@&e@{VQD|3c&k_s5l{?{MmpQxE<7Zr zR^4k1d$H1ogg2pooEVfM&Hx3!^&Up$lP!As_Roj&$xF#hGU=Ij4&~}u=`&=Mv|}RI zj(;#_`6@Vdw15BZkJLi#JQbOZTMkGU9ugj#?o9#$+b&ie+B+Ihc*iOrG+({`&(qWO zk?2JrvVZOz9`8LfWN8dt86msqOq^TwVKhVZOM_hIR)_|{0|N?N-YOwzNAx=dnzcA@ za;7xYo4#Tm8M0KBaY8$)^E?FT{+3eUcRU*e{qVQZY+>P%)7RE*upDLYQvF)3M^vPo zMk*&KJo0ZlQNuHAbl@B%5bk?PG9EG~(VsxuZzNdaOh8KF_db~(&1Id}$2oIf*oi3a z*~O0Tl4zD*4G!{q_RnSv$X&@3*$?>q7z4rg#<&2D>!E z8kFgVE%)2t$^|g*;d_{&wk>3Bo>0~Zeu@9lx?=HC6Qq~tmF57)bS4({*<J)5@L33 zD1;=6BEq}PqpG{r-V~snFwDOSmZ%AG1(dh)sf91kJ$zbdefSZtc^#_Mwrp}9jb{FNuBL6!JOLL^~iui>aGAE$NIjt*5KiLv1b$b;0l$Db&{iYKiQscK{+;73;PHD z!*w^76FO_dD0gE{YN*0Db|)lh&E+aUUp!N6O=^a5akm_WGpVZ|_8aN#=A$e+OArcO za@*V&O*`8aCMc@#QG>+oDNh&KtSSiB%7Imn^-IfXP#bXt!+L_r`=)S@gJe6x; ztr<1$bK)tmKXPu{duq6~#vp26>bGh-k0AGzbH#D)HpinPqv4B#sf{N&)Xzs0Uu(~; zb=g(;9p|eC6XKG82$d*{4BJr_;j&z+)G%Sr(42}KHn0g*EZ~{ZtNJ8o1hj6vE3nRz zYWYPQ11D@V?v1jOl-2?as3Koy*s<@rkVEF&=6tIt^^nz2xe#)7T>ZwwHh<>rJ8#5->x036233UeY*1603n}wdTYR1F11)YPyFiF-Elw>dZ8HOXJ=)#DLzn!q zN0_$TeeN3;`vY9h)xO98^1j=5I)x67wtw^bY)j)|-@O=Y=&lQ%Xpnwo)wV5+c}h12 z)vPAF#`9$B*xM69F*5i*d#)wW#m8P>S0+EZW{x-X$+y)QTfd3?et9G&f6@+0Cajr{ zHh*@uWf&+KI(j3F~mfQ#StVz-4S#77H-$VL!Xf0195R+deC zxb~qoaizX}x_IhsUPefGhiT1WK&Lkf8!VU1(Ns9%`J_JJ9Jehe9zVT1D*~>dd6F~e zDJU~M9TR(XXE)uTyK}wT(88e-{(3f+sYIO3@&rUXF1nR=v|^>wM4jWaZ}pJfOc!!GF#f5%7m>fB6bo zO#JkbOsH-Z^}1mahsmKsLDVAVNAlHObB1|KBa{FNO6Podfc+AxPeD1nKSF7I6@f@t zdY$c>!CdH~_FnxsVIqeCgG81XBMKZ&hz&DtH9eJ0#dEoe!^E%}5d}MzcM3&I%qV`n zS*`-vfT(f1&rs{e7+;}aGy2W>X9xnxg=z&d*v>NapmT4{!(@&>D4q5ixfi&bztmxp zc)crgo7xI=tU6kCmz)*6y_vdA<%JdkNbf!sLfdXij+UEsqD|^$m97>pQOnszr#XYs zjJw_h7#j8Vx2#>iPbiK0=l;Nc77Q>#Jv3RlC3e2#46-xYR5SM3C!oIkR2g4VNcsF? zk{{egF%0# z^6A~l3eNUG??9cqeI;Q=cttsbmMr@kbd^yqI%C?_9G|7iU~Kv&!=jP@b=e-9H!2H& zVo-I85ohC{LL7v{aj?ZsWaSxoNzS%%wrU*jXmIK)2rQ~{bKjR^wzNR+B_{3sU!eIL zOPi-v+-9lQxao1$uitm%5J&KIaRf=kC#x$RCfV`{6HYG>Lzo_^hYOGgIv~)qcE!bvt)+_cst?G z>Ltz>Y@c@PJ?}UiX^(EY<+2Xr-`yO1IAX7yVlyf%#K5FxcHB9gc$T{O47cW}m+#2) zhJT`7MEG-Vy3CY;uwVu2lZ>pGX)*z8>M$&-6|WtWd(}zf6~+=y$Cx_i(0t7ZDfME> z@E+@f+hcX$q*EiXDqp-)D!fpr$(vep$cjkFM|2&{H#k1)wmrWSAUhmsz}inG|8`kO zf4H+oqmdITDeMU6QlQ~Bhz$iEP}`r=DEI?9W7Ox-WfqDOaLUBx#s%J$R9txw)4J_ne!2WcDZ2{ zK=3X}#XcL*Hm76V@q#XKkY+-ks~^IPrx5JGchnK_WSfNCyyeb z<1-LDI{T}K-A$NfHpCIrAkxc#%J;>=41wRC6S%Q6Gc1bj%q}J1fNCkEAriT?$rlUs zDeqjrIV`p@V44Z>BiA2v+f}YD(9-p8656FI~4)=CDxo(PxP; zDGe7(ZHajPn1Lus91EPW1BG!MgV7!V1BFekg0q7K@OH7pz+KTk4u;DMUTmHr#A&5d z7s^6Ep8f3AUlYQC572^!zA8+SVhlERzlyyZ_BaWclMet}>Vx-L&^!~KQ8TKN6C?eG z6p74?dbih5WT#?D9|c7Td=$M(XJMy6t}1kUdcSsZ#xq+`GZ*4fOpvx*BJ{r8A7xCD zMg_%FuRUasQh_Uss@lI0W~&r^thZ8%^kN1JR$H;Gr%Wijr=T0LqhiM);o6BLq{9kR z1rMQL>;&7aFc#Pa={Fjxe|sii4Nau;0B^)3%qJ>5cXWh9_aR(XLsy! zC*D~ey!RB47b%~^a(UKHudnRC&|n_DnVMS zb1DwptV8+Ir&vY%qee`in%5rGl%!^OLZKP+TW@gUarnLrbEA6L@bv7Ubu+~NwI!3S zG$|i_8vI^X0l3W|=5>IV_QrDF5Fufj#(!M=#kyOx;L5o_T1K&+rgZm_4=eCCDIO3I z^IV~rStjm~^&O+cW#;o=Y3tdr&sZAt&7xnmOOGEbWWnN$l_t)A=l6(MQ}?w1o*fFh zRIm&Yf%dK+R9`jdHvf#}zXHeGA+wQxT1qg_u@SDZU{l&d;*wWDYA@+BLvZ(;pH(h);?~3Y#Yv7w~{t)+d^e`FQvd>d%9?zdtfk%_T<+T zm6X$;LK&Eb1`eeN?2e=LXt^B}4gUP(%v?{#?95)0c&SjwE;mLJL-rCYJxkntJ$r!9 zJEa?r3=SL`atdS9c&Am3K8ts%Kuk*`LP^~05u6j|d7SDHTW;s_c&w*25@n1pDQ zqP*XplUE@gJhz94nXbaCSS$m@O*s0qcpS;FLpE~Y(|(^a9?_(%LUP&O8)T=Six3M{ z;*(F^zi)IvQeylgwwyj~__>)X!|hcnW+b=hy30WjbZlW)QGQ7V9F&^qEJ9IP_5`aH z!SxZM1@|`SH6d`e;j$Y03DPJiu8#JKj5!4LtxZZj>#^wvooSgO@YA#eXW(}i(ei%m z-=#f4d7y~=$~hL(`3=*71JL0Ht&FHP&@HXw@cJ<5l=P(7o8%|fS#b%3*&HF~d1KHh zppGEl#L0eKHzr(O2P9a1HYd2BVpHtS|CAI2pdQ>V3jW^m&liq@Lg9HL}XMEWWIHCD|+4Hd4k9sJXT$AR>AuP zo30AHM4+Vk@q?vYNHySA#%fjO|4!wC*GG^V5q)Q|8}qy=;5m5M+u1v^f7^r6ccps& zr<-_WT6@?S&tqf;nf_~owfbZrCAcGF6S6#e0v5_qpI{@2|tnexx$;g&qPOFi;{IWI}@1iVbbIv^;qm@Wl^zE5G}g= z5b%y6{(|`WZ4cKF+|oT_fK9?*obrT*L;Dd_bOaK?GbQHpPWWo`ov*C!mz&qhZ3EW3 z9G$8h$}0j6PnnGrFWckRJg0dEw463`;^)Qa1J^;cKLkU*fJI^1#KQy#4_IggvIJqA zXn#O)ldl?=D309mna1yYp6_FvrY5jp&!2-bYv2FAH{L(6|8ZCYTu3vG#YShVUtC=a z#9ibzbRhc8Hh#t+!(ArU%6aEcr`;}pnJ^`#f?iMZbSo{0_}gpRIroLq{*xur=}UUI z!R<7s;$!LoSEg>cz9wY?p9FsWZbo;-;d;^h5=&ql+~wRoC?3IAEl%_Hzt(j9!XZRf~hcyT&ORtht<6&#tF7@taX-N&fk{{2}uwmWwC?@aGzw^b$Rn z0tQt5V1OmPo8|oUffS?q9or z?pgw$@$q>CQ|U803WK`q<1tv&BvoN2lJ8ywmJ6Hxs>~9Zk-A$b*nJ(uZ8+a5XPe;f zn240)FS%9-|1YD6e6pW|^N->59k~zk9c<6nsgU&ix2eos$DK=e=lcT$NsoO#{_`+- z+smYH{pSo1qLNs!0dP(E^v;(4Y=Qx3uH`C(21S1N!)KhHd-}k5Lj+{A0GI=Kz;4Ql z-fhnrJIUy@nTGzg2=IuwW-cU7RraGSu%Z{)UXO)%*?WP4G{ge4C@Jto3Q9N8oWZu! zoGx5i+o>nOTI2##2ISIcqi_rZiJdMd-Rta1pqM@;p)IcWJ1O*LR*!N5t)}zHnbIa> zNJkiOPtSB=7Fd{JKk3s%+CCN2YuX@yGyc?7tdH;?&O?8Mn+P=m@&EA)+u@v_-~^0h z#sC4f@)DmxOKfwIVe{Gq?p4BTo7V-^NH38w@%=lIOFS_L&W}rT{Z`CBe7ft z+tWdB;puR*tTk43ZRdBx(R=mQ&FKEkGUpHCP&!*8Zu&-oQlqg()Ryz8 zP^kRO0Gdo{w!4WXy8#iwKGQqhw{vary-UrF8bN=fK_qb5RP0B{!qd*}NdzScrW}fqf5_p^EaRY@bg)~<2N}x^Ib30fyZijosm)tt#QC2?KGTYV3Z;^Zm zm~M|Pa+t=i!SzD(mgW;mMLOv`X=K8zW7+f<_rzhzR`M2qe|}DVE9}IWzlJV3!z=@4 zEiG<+gYi_i?vBMRIMm0YTK8D(#G>B|m{FJ6-XXqnp9c7V3GjdUR!?p$mV#+$)B89` zFylCYCg#%#ZNz)t`eW@6vI%(t{;T(O&&(x?|ws>+x0umu6 z8Vk6m5j{gl%@#`3RW0k^{!v(1GK93g-uuwN+xrioI(dnOYG$scELYy{1Z|wKznV4) zXxZL^Ng8NK!RuVOK`F^Ocr1wKTR>g+>5wglDC1nBi4R6}@%5cAIAcz}YwnxJABDl) zA)@1=POnvcUQ0K|g4Rq5#Ow{73j!`V754{=O5d}L(NgB2SeuBQ=*Wdf{(g)j-wh;M zJhol+=l5tn)+>Y%+}q>2G;Wyw+c4U5wuKdkCOm}Zi;r1UjB_-|A->yX8;|U%PmPZ^ zC5P!@H=^$Sh*y7B9l8FT*bKGHBOn%>P@IihrEt^=$8C1$Mv<~}UZ!kaDldo)FHUX=>#2u1ZIrm6OjQ%^I!KLC7UKm?E6vDGmxbL;#SEsPV z@ojlQhZnzO6K*boAJPOk73gMvo9?W=vEOp37*79V8%Pa8CpBd>{k%DiLQ_*gm;Fy< z_pUsK@jk7>HTb0_!ef7JctvKRb13@grOarYwR%&rdl!+o(-wO-&gyK=>0L)-uS&gR zPe=eGN;+t7v)c%w<>LPXc-uNDA=zfLD+2y zCSvsa%!6DCQ4lzpHssQ$mBa6h_Jqy>v~T~WSjXWpImh!yU68W5~=W^b_*n_FF2Z^bium+}l ze3~*>X;Kp5MM7sKBtlcmN1*3QbnwG<)kh;v`V#L=mO~bOF3(VN>uyScZ_JATSVk~^ zyyBT@wgA=_PtXU(%?FwJ=q2ZV)&ES{UIdufd2v3accMjsg0fB6V-^Spxt>tv~aJC9cx7U*QA!KxG8`6GQK+h}{vJ0bdHUx8q zdBCk-`wox2_P~QjYq0IM@(9Ac;X53_QZFkm#1~H9e>xz(@}U}Q-gPbmL*M+V&n6OQ zipsTstKhq{v(wBBMZUy--62%WOa@?`5)se`7ArlC6EP?b4vridyJ5WM3-x?6z>kRW zO;9<)SJNXv<#imtd2Ne|2xjn+-R$38gh^Cd6FvGd0bsdkrk4wTOF~ zhTXWK2rJ=|Nn>h6NDaDXA62#`YTot?CucEE z{~%rBGywO-j83ObWYc^EE!t%U&|jTrAM^k4e6gMAqg|}dPLTUkd?0t9)k$-o9{w!TedWIO4o%TlTE-=SVVJ=a=OaKA(Cyf2 z0LAM+a1E7i%tO?aQBqAY>i%%yhOi7bwOvJ8yPIBYt2?CVFDhc6HQ)9S{%%U%ZW`d5 z`gz)R{R&+RtUp{Nr6w4V%bt49s=8V}g?O-8+YWYyFzrr&-#dcoe;h#Mw+m;#+}NwW zTUy%>8G&^32l57Y#zHEMTolvX#Ev7fS}n@la(hpF%?^gg=hmHSH#N%U6-V>xSV{+) z??EwMio$!BFuALq`%$_1TWXm>tN;9FeG1KVEr&NWA)cyq#>V6h7Uvp!{+r{f_Ewe0 zyJ!c!2(WI~dE6|RJw7mV&jwtYv$6|*k3cTGggTaDGXIEnr-MDg(ZL5VM|n25+eHd=tl6oO z^i4;Gvlx&%ftHF${!A!XEF|gh<}JPcr9-*aP=ZRY%1iVGo9Z;G^+A^ug~}rMSid7E zlZ7ix265-UX@K`lRbDT;{0fM1I@)JOp8H`9#GiR{ zxySYWLU0XctXk%C(?{wfZc2(I6-RwBna8@R9_Eu{AK~^o5@F)QZMDK1uAa0q^@;QA z!$+$;qc{P2G^O+?1F9@^a(U6r=(VTdK>9Zqbm}W7a6x8XVflG?bZP}PPjdk~&%j~D zilF1ZpY8bzYn^$?La&Rak>{t3%$aqyUrN1L$8~>}wLWdG+;xRQqe~Fnt_p66$J|tE zyaZl*;^OFXc!^vk>ccm;4DtldvId@(oaMJeZuns{&b&l^sTppvz9sx7UET4?K+yF^ z**aFw8g^LRx94Y#)Muz!l#p&%$gEqV){nZ#fDk7E&AF|C>bX=F3F=*9dii|gV&Vm?w{0X=spCp$+6vm4QtcK3X9 z%KONLAH5|*+Hy{{-~2y!-d?=AGWIU~$5%+AifRa>^Cq~fJ|d`BUN1v7kB+V=c#gOBolk@xqJJOK?Q{VA z`n%(yPs1NHAKR z5%@+CE>c(5>Y}aHzP-5>-AOg!mp<3N*!uUcz7VMCXG>rSfb)C)jpPt3+B3G*cEip1 zBY5bzvu4yVuB{1r6LSF#)8Bl`}f)V zU0`L`{8lbBpK<%Kpr>I6U0QF`LsM}(iqh#Yw$hf$HHj|gpNrnHc;4XF`~1e8p?0+& zy4FCNsqc*HkK5fCk49=Q<;-F#5n$(WX9eqzgXzCdda5@x%x4*BqWh+Aul|7z>!JxV z=fGp@*0Dx385sh9f~YRiW-nbiMaVRo+&!yv#~afv1~vMlN!H%{fw^wywO_m&he7Us zeJn6?olHFMvlF8~O~3dbOmr!^-)HjIPH7TITCfi2_y(y>4h_f2t-Z{(?X0EjYTkg< zEaS$i6)n5&w6aS^tzWt+jc1TP<^q1&4i>PdgmrCMnfU z_8;_>Lx^TOdG_Pfk(L^d0L|42gA>;}ELU(A76?7*{yX?zB6}ebpp#HtLRhue!q5Tu zY*+jLR|wMoDP{hjd8t_=f$}Fyqj<`rgeLwlEWQ8YaqIFvCp8H;^;;;N4IhxQdv$l3 zdXv-b#K6M}BsTqk1#Zp%;)K_;*k=q!8UJtaF`1$aG|}XyUo9aAWbu^2Dhw)Xnw^5B z058UaHSggg{U-Xm6L7!vG>hiR>r_RM!7uHV*qk2o!z>%?3Z8|s-*1CkcoKm zADSsfKKEBZ4nMMq%y70aL3;s{)bXD%*UEKrZSNqAt%vnHwTepFk`*mL z4Mr@JX`Ljdivx0O1%U_sv${Dz=?$o`O)kgp4yEiI9V4Bu3E2XE0BTXeB468Ij^7Z+ zat;9+IOUaa%gH0m(SFMO%tU3Ux|r*WC&|I6 z^%P#a`EXfQB|Gpom9POI77gWji2yhjs!?fy-MLvMw~7zV7fe~F4^aci!DExjr4y4g zJ{*-KTwSi`{&5qEWzvz-nTTtV9=oot2B&SDo1A*eo|m{ZfN)k-oKm+YmPV~v8H?qT zrb%1hkh)2wK$sMA`#VvR2`QFxO`J;Qs1HGL0&PFqJxHU&oxb8;;%8jY-G`iM`?0q~ zh3O~d!OQ-fB?}XFBR<=FwJAD~ISXdGqrH!9e|{WdGFpAbw883(#v99w?wNSRb^n(v z9^ z{t@exRe$I%E2`O{=#n?(b=>zXO99>Zs&;>)z$_qb;WFxeoc&9AdvpGLJ43E34g{N- zS1_EE>J5!BU*8NZRlU~II9!WR>2xHbWm-V0Jufk7fC%N%!H#9{*78Ki(BR1Y$<>Lg zpb5Ioeq@@XqvhYPnAh9lEq%CkhWd`FK-?N-Zr5l z_Xi!5DQyZMCM;$)Z;HbBRcLFvQ%N_K_wFn1a!?h*4|KQN)MUf)W(mwO^LeX$ujjoL z-Qi$-Z@PapizhC>7FTHz57WM>tMwvI>W?F-fAQZ^tqu|8lujK~IDOCb%SD5nx+%GR zB~|PCaLG!U=LbQ2&^w-wd6?S|NPDA?WT^tzKbK}#`tWCWdT+!~|ab3CJFDs;G96#-j!1bRlUljn+bz;Ii&^KmOf+Y88*#TgR zE_T7SHqP5SLajy28~qR*)_=MEd6wj&XRUA` z=rd-K+bZU#W=3b5fQGg6s+0eG%3oc@9K#l<7}8mr&+eVxu79ic!90Nf;36V^bRjfk z^EWP68=)B-fTo#O@i4o!3!cN!cMwwlw49y{b%Sc6weA`|o6;iBl6h%F-XyHadTIUT z?X}@zG{vjkiXczk=lBmKiNfVe1g%kDsX(X+N3V)8Wi~$PsHVHuaCldx#(~xH(fZ8# zO~2DL5u#IQHF0!;e?_`_xjpIt!h2* z#N~D_+^{u$mU_aU=$+l%D5Xh}{!{lKmG-lIoxr+_dkwBTY2*f_*yv$(B2KYWC+UJ= zc-AQ$yx;E12hq}_l6tLp!^ds!@S=9_6`zFGa9i?~=4jM#DeGNB7WcAU^)=&qkMJcI z6d_{%qI2!GBdg6JHLcco!}y;q;pHG&>(lNrc?$qsF(*}%oWA~DIv5ffs`pOW{$=*r z(^qRd$0p&_$-FtrMGj02P`-M74I2Aq5)TWK{l6I&GFVcU2HU?>GGi!YH} zV>vx?BmVwbHn^hs7KBlr$IaZ%9lHP47w$W(LpC9gD%6`QqB24)Y}x_&TEf41T9b^f zYW7H_b_bP{%gfZ-5}mVNinP65#Pr5ysh~&<;-^i%dB>k{8Sy7{z_2$tc~CBq&iNtP z&>6u(x5i8<(v649E@IPQ^NwM#vsgAeu}4Vkq@=Uio{Rk{&CyY-w^c zxrW56=KB+G2rzl=%A@L6@H8*y3w6ngWq&kXOskf0q!-tfsWerrHm?d_EV@llROlO+ zt`rroil;w%;s|Fbha9jkX?NKc!?wXiPSa_;Rd5kj-sA>n&%XluRp>(y1Pju3;f2$HaEvX0eomYQkx zEa2D2rHq;9hfZ=nP%X+rwPq6!Tf97&{QPWk#+z1A(d_D~^0?|?+OIq85BM=F#Q3nTaYATjC26Ky5Tkvz?}a%rfke8$5JNM2BAZj(>E zbfE{STiPvDZEzCyN8pVloD(s1`Q!;S^xjVfiAyAR8--V1^4#6_i7x3aj_)s1vxRGS zW@2{j_K3uYp$G1x7uCM^)K?m%8Ys^vt8fjB_ce2JBa%tOI6E{9P46T4QRV|>GV_E>k(IRSe-Yu|dCqId;*p1U~b5ltdD)M1_$gAFkQ06v~l0&Vov0y6|^}vlJ*1x08-azS)nFdM>ISdrE zF0Z?eDYJPTaQsH}>1XN$W~}5lLjA?ML54WSIaE3^i9(c~C8I|r^8M4XaK~HdzEL4x z4nvZ#uV4VoR(3oJ`P8ALKD+e>B@BAkp8^c+KF>>U1|8gE0J|_1hHl#?t-|X>t&qv#LNYyt zexJI=9swSJMjn2yF2RgY{pTx0YY86!&Iem6>(;#fSVf#aT>eb zC~+FxEr1eay>xNF5D?#u#O51IpFyCfqZzCuGa{+q}Kq3I$t8f7d6c;;Pem1)llFOGToO z4l9`c@tDDbmf#EGD6ww#uz-H*aKRryXIE}X@IAUa5`%N4YUvwjsb*KA>!M7g+3tWS zwy*GC4(}+&-%FKS3_9lYDqsz#oT7fHa7Tjegx{fVbU)Cjfg&g%Yt8Z`C-lqlZOkVi z3rcIA@QF`<6T(|{c4M1s{sk2hkniyO3AkmA0kl`4rH_rL$*_8%nXGX1hi02qUC|za z{rxwEc>D;vf{zGizi!>Xk8!fCZ)&KA(fxwr#Iwi+J?NjP4AAKkhEMMT$QpsB`->fT ze#;$4Fp|=tt2~SC;Vjk*GPsn6sKF!2{kzA&FCX9(rZwj2*)8T)tAiurdb_u)JsUi$ zL`^!vf+#pf!@C&aruPa9@Xc07#@W__ri#u*=phLS8)$T(rSzk*U6}Lfgi<5Pq4|7| z{*$XPm4{?B;n-GBk}FPFTu=DiAQQ*1{i8!}wBHkX>&cR|Uidd*4r7IwnPnYWhDY+y074VEpw}dNs>vo9#Hn7HrccZX1wNVV&Y7k8Nl*Ny22%DbSRd zW0Ldyra4Uj5?c5UfVj$HY4;C$`S^s1e|WX4g65?RF})3;hG1QV@h{dr`Cl-G#yzbM zztn~hyHX=dalH-@=EJFLlT&YMmOcglLc-{g5X@G9xr|xXu+8RlDLu9L8iu|MR0)fB;!431O(=sE*!r9xHebKh!7+rGVhCP~_RM?cDrk zYaxBaLN~e&GuV?STz>|KtEkz6X>KFv?fQO=?5UxA_VYW!l0${h74s$QzNmiPq=ZV_ z=VL9x!X)uoVj0^EGSX-?lq?d|HbxHx2dLxbI3>$bER@AA-Op14=L2;}3W0C+LsB9^H3D4T@u)9V~B7Ulo3YeQXHO2Ip*)rDj z6Gk!?%I^^zgV?np>|woM>kjE=hUptfDk5VGtq}4fYK}Bcl~bla#A)!TDLwM!=Ko}H z+3OU^Y96!8fcNyJ6D@e0|D~jzClm4U+fkSH7>>^R6F@4E51ozC*S$sIl`x2ruY4VK zLQJ%*V$KrZlMgrT zL_ePUyQ35Ja5EnASYCcQ-zSwW{*O{5Q=+N)#n{=Q(B}%0B(KW__i8TbnAX`K8$q;S zEcF}h)QS^td)X^qp@^9jtAs`LRd^^0S^WJpM#2h&+@N9odsnttyJ6znU?PK?(F$>z z@RE>YqNW(is2Y$U_gWjI-SZ+3R|wVs8jReH7`4;#XZMSji88Gr*%Z1x^2vD%kWvOB zT?9j6j1y+wIRv9oA*;GUc)wKaP+AQZ1){RXrD1X;nc*TG6IA(T!UUs3-K4)>VJ!>H3O1AnxukB%s%3=BP zZ7wQIk*84-BJ|>LelzWHZi+^p%lcT;IW@XZt>bP7A09oj1O2NsyU{z0KNOp-7Aafc-pks zl?o$_&_RNR>?2+G%XnlB|F=`5^f#_)$9h5VCZd{xS4?ai*}Q5nDHbI$%%GXOhz0zs z@I@$AlExbYTvoGFo#>7L0;EFA&9dOPEpN)Qmk{xCP421XJZl@BX_)yAMXX{Z5pjz; z*;!pecR%_Eh2__^@E`Lmbp3PU@q23-~DMNmF-RJ)6Oon=0^067WnMugD z!}+>gB>QN@fR+yOt3{WdBojk(3~U6ijte~UT5B7Y=WITX$*sbWB#F=Q9H5)jAMS5T zg@KiKDafV}I(|6+rIHsTMTWaXgb@QuA6obbC3v$5nLS!MhdzJY#cAoAM0>r49}2#X zGQeg0=FmYtnm1+Nr+ia=4H8XlzmV%nk6MduOUC59JDpXBzlvq zbni)0&CZkZR=%9D#(*hHUG_?sr?OSljBw<#myWcC-Yf3_40wn!6g)MH8oLJNO({|; zv_y*P6AsT0oc7Y?<0SUW-jUX51JZqe+q9XSw?5~l#2i5o+DpS8B#hob;i;~@KMxlX)XN!I5L zsYNiD5%SpCNx3=QNY42y@Ejto7L8WGNAN9-PRYM#Z{0i66)u|6D)-)N-VIzCE_Tn*k{fHm>(fNXl#OAMrU;Dbxw6uWQzEROu#TeIEO$Y@M^_bEnkH~ zC?VgPvxmOxt!!rY70)ItGc|@>HY>ilzf6Ooj1>p-VH+HtSCk=UKw-1;loFHP!G zJxnD1EZju7i<2M`gCoqmGkG0Ocvl^^W`n1dl*Q>U* zAGb>wxb{Col{9ff`r|YT_5*%Xj!uVW6=TPF{5%3@sQuakO8p`|o%;%LKZ zo?_TSp@f$+bumYlN@#L&rnIO$z&#<(qeEn?W$p~1{Wq4%OP}^er`L(a8l1l6^=|Dd z5apgZ;rQE*Vh5k0tt4q;JgGM1L(JoP+AdA>tSL?d@yR#AL&)tT!{K=7XA2fw#-qfv zEv3)x?72BUfsrXCOgd6Nu30TOHF7g45U~#IVb^(7=g)d8y$JPV!1+xydwR=|k#fYq z@>}cI^bnnV{AxI1>2C#yCu^pY`O2_gV0I3;E$H7v_IlK65x@C~tbmxSubbFKY;iCj z%Vwmzw-gYtBZt53*)UjbWZ@;WP{Y0l&-qjsB}2VD^5)yG)t{A~*{Jc0bulF+xN*E^ zl{X&B7|;t&2#m?CGlq(!FqQ?GP?`EL<5F3WDM~$<%TdZR=e2Ku=(O3p0vE| zv2G-R*K#gVFh{gCk)bJu`&lkD7Gq5Z1|8>GlzNhe1)*!?JK4*KC=8`pfk7~RbM>*P zZbeZXGE+q*4e)D3mN8(8cOuN>GhF77kwMDbHIeiYr`e<&cGQbO9ep|hDpR)WTzPu=mAQ+a#DhRV%z2)I?fK{ZJvXi2lOzKS2rXQo^ zSyHf(VT_u6`sSyNK0Zx4fAtloDX4cTH7sq|{XC%qi|jB0ch8`0#M^k`BPntkv3RHk zL55)@HQPoTa*tLDZgO0+V*ZeEw1uX0$iN-M?ep`fj)CViB8NiTCM+Jd=?Jn^7SLvY zU(J=$RbjjIY(a|?Pr!{%`yd4mDfXO4)HH7?a86DJx+yN8Vbpu3u2{&|!P0iuqDY+{D57CY{BkdIafs{0589D3M8E zW)zA@U=Wk+c-BOv5M1XajVw4%R$=iY^05{>s3}2E!s1LJ zj!7?D7a1luqI_l`ruReg7co_1(mbWERB7?1GIHwm8+RPrUHxLCVm8*o^hzO!;+P-f zFzFNhbH9A3*FEP(DnD_}#h?+-oRGfya3T_|3i6tJ4c%j2q_E!@&6|MaQwBNdC@&c) zRsH;c-asC;q?+VciR{;kgpzr#@P0N-3R7lTYFj!qZ2@$znV(Qqai(`!954kfj^UMv z#5MKFB4r7pmRl`yj)bJd$M%n?BC<9XBXo?(tr*B7NovOiX3epjhGAT8$?FWNDgMW4 zzz+*XIT#k3`qLE0MsdmTh@gf>@Bq&uG8ndv*Uq(!@$v0e!DB5;7 zj`HTnWm5BdFf)+Qwq#gkIm5+jj1bi_&Y|r#8y!yy4G6;spMX-)f|0?roYj(R4)-G`bEyGyAq%t>;h%-M`OHH=d zlCU9Oru6I26NFRn6NOv#2M?btiA7Um{<6U&;j#RA%{0@R_U&6PsbVN0iC-HK`mgXL zRUGF37;&L4=)_cOw}r?+r9P20!**p;plPfks+9=%{zZ^0^P1<$NS~Vz7RSWxtJ|VntDhue0a+Z_;kL7L_ zv6$ShOJsWDtAVDqdi?nLJVAil)P^z#)!|q`O*u z@JIUv{;KmSR5XZCxuB$UL)cazDlG6W%i1(7ROLk&Od-N$uaXgu#~~sr^-66E4veH` zCnn2*GB%AMk^Ef&%hLHVOhvHmwM(MH=#!@AArDSqq$L>(i9iI}03^!BBCO z&t`ubumiA+sb+$yf|vD21%pVBA?4e^w;Qup6=)z2Q-sWhvFJNWDm-1yhEyK1sNG zrdw?l3eO{|JKG@r%N?knRRmMzG1E>`ASlOfoXf z%^+%Jn5h1o`D1aD{6jyxNI@c7Y(vf=8mGa+ z<6_lg$=pc02w&b=_T_sX>dl>OPZU218YULASCGg1#5eL;vqNHgWNv=fz_vP&99<~> zoHHBVkl)N!%a9=@&bD-HFu{5UoYj0MZ*<3XmezkD(>rHQd1!j=xavx9{GqI%a{>bFjwT^2pe(r61?mr_(B0iO zM4%WWFHWN=kqfI}D2p5IJU|n!IqRqZQqZmKdV(W0>uv{I7^;8I+mvETj3hZJrk1jN zuD^ZhKE`PgtkO_acl8H`SBZGUk%qxw2KCQuOIMxUyXW(E--6_CQKp<{&Ai=w-~S+8|rxV;1JH>vLV!|PL*ueNYk{39TiHeL3xf{qXU8~4AA zz5hmxZHQWtpdBx&7e$hq4~pxr?Z(aLrhGuse~kEAgDr-yZ}ug!zTnno3z5AIQ@#fw z@4f-sgqqAt9##fSsOluPGg*z`Atdg!gb;3De(;T)36(afT9RCh!FmP(`3S|n;O@sw=AB|dwk?O^t+At0wdAlbA`Q@4gT`I7YPRoXkd<2_Sh zN^%};Ha&9rS?BCE)07!W&FLpUna#WeisBUBJOnunLnycv90^y( zS}vdOL679a`JN|Qux1{dvz$Bc(qWMV@Xw)I{cMliX}g$S{9qAY0ueR$7bE0?dT5x2Lv`s(bS1R5OIp^@!F zB!e|)BXP`Dydrp2Y$n3+-d!Q4&Q}t&K8ancagJrBTLn;0*J&f)YOYSS==3;WA&D=) zKZIYoqoG=f=xbNo{ISVpf+wf~-F$G0T`>6=8i2{eq!}g}m`qX~&aXo--|j&69((Wj z+p%ZIz4RPU>OCw&wy{T{LDXreYIT&g(*3;7J?-cbEuFH3wP880i|(ngH%i1H3Ag+h zCQk9Wo%4GccV6(+{;kWTGADZcFA5xa`rW#Zsz2WVRuXrpFo}G{cSyQB4y! z=N-0kD;bX`s;<-)Ph7to*=5+_E3or1Br~?8Qt%@$WUe^Sb*dI(~QbZeypr zqx^RSa=%bHkye`V>KeR~kU`$XzVFD1<>13P-h0i)?P|tS4b%Ls%j~ zV>5@@xdqA1B7#CSSOJ1WMS=z<*`mIWa{RhJib!J@Oj;uVE~PJolGG1@?w8fC@n%4r zAYL9QBHkCboa0AYllx6d-ybHaUjvnbCbXe?m|P7RTe+Hqjpg47dg9c=zXX!jprJj3 z0f#{+^#_pwmq4e6Y_5<~O|sZal>JM>LcS+%c{+f!rUy6?Ei@|7mfAkbzbgfrN&}i| z{v}Z^;@cai1{3Z9obi8L4{$;Xd7v%5SUKRZ5}>Kyy>ZLiz%gjx&_G)>x8H!v{th%H z12m=eSB4}>Vh99i{k!~bVECOJm>phSb@i}NN$rqn{T(fa&!&B*OZc=ev(*h}XA=bm zDqT{7C665`AVwgS%+0$&Lvw{lXufhrLql_hbm|00KX(ZrJ8uR@!26PQGo08TB$gPD zJcFRAwvQPWM2g))mA>Z=0~6j3{?hhaE$(^k^yU};qRT#yF;aWCj$V^T7jTXMktD`9 z7IPk;FT<+;)fc}1boqbj3*^5N{2zS*KD<1BJqe%?AKm}y@_#P`{=Y8&Ulii&f9mz$ z3*ilXdO*GYmqH-?r(XZP5O2Q!sn>tL82_UX|F16oSyI1cvqMa+04d=59_Q$9R`Sxd zr7{_>({Foz_+qKl37bpI+PhH{1p-*d`xMEha;uZKHM5*B~87oD5@fBhD!-gc5W&%9sB)N-#P z#Fwz(1b79CnL;Y6SZVLSNTpjP6~cqc!?t)^i8R{J@Y%^IJnV(i+NR3|p2pnzdw=#h zuAclJ2UDEb%~l1FmCQG4-*JlJI=4R2Y@*E?@RHLU(>^QRHzrwm@>^X+Be$L-)@`^+ z*l61Q0>qM%;|>%_iu7xR`_sH&nC(zhc!yf(2#gWF%OnMd8Rg2?Bxpu``#s&9$)i=1 zyO}STIH=gnXy>l{cO7`_l8yk8!vfnP^$;mMM!@=t{3$l9R zey-xMdd+*2aRTd4{rFMKQ%|f(2IrQkkX<+1x)gButaD&^vGFO`g{E9KBgB0%H+${L z)Nh9}?!(E3#xCnf58@gvK9kKgCihFlXY*VzOcs{}94-&(-FS1ubdQXumlV@lhZV>s z685g|P)l=D?G)s4+3bdmQOYIjY5;AphUgbXrQCV+`IEpU@egaF{Sm!HTh!DcqP(u~ zz|R;GAt%qSPg?%rE!QWM=idh!Du$gy)`11}8psMv447c;ORxAIWZs|G_II$Kl21=W zs>ta`o-$rcie_|w*-N_)y`}u&K1mvqKf@Ewbj67z6CH`_c$a3bWU`NG@ICHZJpRs@ zJzwv!7I`lYw_R1m)!RLMp>mI4HUy}+IVQ95bh}UgLZjLJmfD43afeb#XOd6nFZ+#} zQv$V@;k2t!hqP@Y2UlJ(w~5%`EZddjT~6<4>ea`%uGQj?w)R_r?vuLBL_;^4zOG!q zuf)l#L}B&Wd2(pie@{B zA?Mn+0+y=>N*-OUQal=8q?lDRo_nni-|kbx3k|iB&hOfkJycRT-6aNH&G!xR-8VH} zoE^)EBzb8jG#nPD%$F>afW28qLt^6#l?4|pgp*zwmIfoz>8{>A6K#t$jL!J>&us1S z53j?8ME_)v7tALNNNvttrA%FUUzfIcr6p9||L%5090!1y=PcLF{3Do`%CpX8D(cNw zVLZ!KLda49c88Ofc8^~U_L*WDM;w32q}<03D(*)^Wwn{IOdGGQ3pUakG4 zKb10d8wJ1SLl%#gO?ArusBu^cR;&_8_nB%|1>T-#hfIymMyQF&M#=vynnb1d(c^pw z6I?Yc6_@CIoZrzGzRvk8JIyRD7s>)yK{TH_`Psv2A{HWyQ-|VovjMUcq&s&iXiwiY z)sLk;jF(Xk%*Rm|*BqNXSP$ERbH3~Ut-8C4it7IY1&)A(NDWF#4<+4=#1I1r(nCn2 z(hS{52#gHfjg&M3f;7_IEh*hKg!DbX|I>ZHPj{ZrnuoL2IcLXb@2?v(!peNdYfH4{ z_IWIv$*Z~bC7Y2?Z;8Fq(db%(UJVN}M&evzre;Dc2RBho4JNb|qjh&J9 zR#dzW$U#=ybxx+ik66bbr$_3%&( z62CI`G81tIh{M#G9BmA<&m-Mi!UvfEc7l}Aum?K9?Z+^mJc2X1b)1a4wv`!~Owr^|D?rN%!?RZBhm8tM@Ecb@7}YJzhv> z(6}t@xAvyFrCT1|S*3z!*ewFLBt~twn>EKq50O{t_?t z+S@iDNA+hvPz#5vCWjcKm>LxnaoMwqwp$+meysNEH5PpLaKjGEIok~D;#*dCqs<8n zej2f?vpSa{?x9g89{sFb4fGvas9>EBhl-q;j|j<*2lk!(`1A%paMdkp}v0S zA%D!n=iB90KCR+9?@wo6QKHAf&iC0X)e;<|GUK3HTd!B<8*V?J1(pb=u^r2dv_1Z1 zp?Cu?yhM5E`Hy`j>mA$2Wcg8pVdD8#h^onDZ9~6UyB6Bl@9zJfWqDRU9WOa;;T~x{ zEXJAN%g?R6o4W)zvfrpqR?d2| zBKf~016W7rmZ=J8pg!$z)xw>6<%xFH8vvOpZGX?w;+_$l@=RAf;_d0Qvklpt_wU?D zU;kb!2_X7um8JMK0ObkLMwgkkc)$$KJ;bGhk$vUV>Cw3VM8~)_TVmt zDTsk#?I9ab1&Y-(@#*~8(ojbYYO(89bUshl@}{<5_xE%L@X$0)w@PQvj!%=H%B}Ym zwGDIME3fhMr+z)W+z6|T@DNuY@0>1jMj2XX!i1^P=Jo|y-YS9l;5abwF3YEr$Tp*OyohaO@L=&^G@KraQ*7c7qhwATK3dZ-CV z-eDL2`*SavTh*sk`OzdUBDo^Pzalx63nONHWa&m_(?;zX^}9b4!=7C+UK`!XUH%R5 zbnVKEkn^33pxwOJul4A-s!9};N{h_;y|Iy^d_(n7*#auP@!LskE7IOl{IJL{@njJ) z^mdr}RJve*ven%MuT~`4vc`fUEy-WS!hcj0E>Mo@u6u(IeVlwfYvQWU6qV-_WO?rX$L%8wo_d=z!lMB$} zxgC_cX)LF1~nS|tJ!!jg0P-bWQgAr*`~~;(5WbJLZ}8z`6mBG z%6OM}#MIa26=)CYfiK5fK?|0j=|}OwC3Yl;inDx(q}|bicSx7e5jSx`DV6qQTLW{W zjbYfqUdxc<Ywjhz0Eh?pJzULYbN-Jk&;lqy5sO#9M3|J8r4>ZwoISDp;Mwr3ts%n?IAc|>h~IDAN8ptW%@TSurID;GT1}Jy~P?xuM zyg~BuydpRke zkJ$Xm%R>|^=|kAhD`!eb@#Pu(OPQa?W{H5q4D@M*$)g@J=KUB>C$O08!nmP8eJ5Th zl+L92yyXCBg)+KByc#~HyN~Wj-V*u>?q?}zXxY4Lxm-^Z`YpFk(M+~0njlQG2zMcD zUa!P&mo&%GX{28yN6V#p#aLVOxH?XPXJPEBOyUpU4`FWVRI@|5!ff zk3u^fKqE&`saO{id|xt9b`tuTnpJ=fY~vOb;9*u z>lT0pSriSv6RZdTq*iDUY>Xy%RXjSE3ivhGC?rh5J{W~zxZd}0PmIMY_wb*jWS^BZ`H{O1fQ_}s6zNf!x7IJ* zRwMGXqkx2FfMkm@xhwY$AYBJT^QhMr5?9DnmFSe~&_(~dego0c^~+lN=L)^y~z&<^^$LguYdxyma%zjgi=v4n787>Hcrex`cbKj zzIZ%wLFwi^K6K|OZbNTI`A@nc(+tu4$r=KrRrx)CRDzwY(sA+=*EZb0p7*@Kpw*^t z{a2jC20xUd<^2HHtxlZDQVu3*+&~rJTfv*t+(VKhN&|hvaT4ai`N(dqY+-Yq#sJriv9d=B z9TO4`^*&`}yPXl7Y+Z_zIB-FfEE?n@%VY^ixxGx>WC`>MKmaOh$suq)OGgRhVj@mI zP{w?*>4}y1sNJ74q!wYQ8N!~Gaw5o%eH+e#Lc8yRn}AfnG2t(Wrz?6=U2N@n&*!j_ z5^QeV2)`1afa^gMmE8tvu?rpST(A8ju{OUIrWE4vuEt)YreFuNrB0o^JTVVH;}t4H zabHZ0w#gE?AMBV3zEBPeMW?mWL!aj|eufw%5c4Ha^;)dj)|aXdW`{q`ojW1Jd#ayq zr;6L#-zNX9d^_>b3$IVsS`1Vwb1BQfs#`R0@FZB*eH^c&^liFX$FF5_sH=U z;A)rq!F!(-Am58Lou=rtg-w3GyOkOQ%uYNkSCy5xxhMapm@<+Mvu3-+9_eUJAq+1( z2rlc|eG;C~V95g}gT^k?!2G^%fYUuDrpRRRd zy7fPct$h5a@)H!8U9mJK_F@3@AC-(*To1E$VrAVS@5X+<8tl(=icgj!^1Gm2Rrag! zx^(a9PDQm(k?$Og$F|DF6E|{R!0e>O{_4Rs#Q|fn^BRK5Fi=y>zeYjAJRBFPU>%|; z_AKIxUKaWHAIii+@J8X5F4GM^wFfU$1diM;NGdK}mj%0DH?a%yp`X&>cR|+tsh*pC z<9Z<XLWHWs+BOmV<5R-689GYRtNd!^zzVwGqE@7CE7av1i ziRBbmas~b^Gga9m^Fr1&`p$S?{Y}AmBa4ZAu)1?U_~ZDh(PKmX@uh|3Kf(3Snf)^6qJ;Vi zJ)gL{(BgnCm>iiZQ|usnmQv|~lDbrun05Njp1qVNFzIOa4!Z(z!_DZe@_-A5aU#2( z|4`D%S_@lngXODVGd&u^V0-eGH7Mw6WB&=1!}6zVFEmbS{U;3}HzhYQTnEfhnCq{W z?kWxP9*O_;s@0e~g>7D^G+Gep46s{pEXY)(R{L$llxgd=&@m&8S6W=CrOO}#d>Qg;%i8&rZ z)t1hR%p|dEJ4pq10Cz~d`znRWI-ZaUWT?TQq8EQYzb#-eczY6sT<_|4aYe#0{Vptl zY7fve$D)Q{CJaS7cHv9Phkm|xKARGiB)pfrs}<(U^MP!$wDKzEh4dY0h5D z>*z{FY`O8N^DZt$j50be7CerU$1T2>EMKp#Pup~j5BKrOkj$V-*Zq(rIx=;qL8li6 z?Mr?SkL9*O627`qxFEYJms!75z&48ITFOf1+uSe3KPN-T3$5_n%Yh|kcF51spOAT! z{UBykc$=h?9q_Gg_g_DMV|X$=M;%vzmus#VfCV=0V|vX06Ro?#=s4#T`dBx1Q#-=b zmqPY4`n^ML=&U4+r3$79JFfOv0KD0ZMX$bWZ@SF94KWI(Z-Xm>lYTOU(((m|;mk8= zD&aGbd~p>DAz5>6qpv7^{=^B&(RKC@QRBSR#~TSzAZPgetxFCF_xe_&dwjn6#|!b~ zc&@4bIZLEX{1UOEBj0E`{tR^`Zt&dierq&M>UgXnTLDV_bin3K^DLAy@`9;~Fx8>} zRKmuVE#cO>oP3pwv%Hc&ouH?1Z)>tHD#}(X5V>dro*`mX!t4ss3}d6Xf|!0+HO%qj z=_i-M0cqIXB>8}*>HY}Y_b^08on_^p(j7&)pAGUSlRpi6!nCIRp6V+S z+TmmI$rFoh(+f|JHIRWa#W?E4&F`Tb(?80uZ*OTUb}NW0_#pk`9>?Ty&F=Rf?DH0M zdr~~=}l)3-4R| zM?Z*UncY?}Pv!PD$va?H&-{*O;$#^k{OZ@Jp5qv6ig>`2ac;|4|Ddk3=MN7PtnVlE zhu)9|%Y1fRypw0NtdQU#YjoYOB&PUIz-xQZF$~*MFfV3dIwTBKrM^#0un_|aQWO(_ zbe-yqR!O~HfljO@DqE6x56vjCZp2ojm%-$bwL?pE*0SOG!|UH<+y4UM`gR3c4a$6 zS5ip6CZTObwBNx~mj|f5qNVKHur8^I$>K!8~b= zD22_Eq|zTO+Zps_NeO8sd{r;HJLwx^xGH0)5N>=gqiwT^pEG<`v}bQj`%USc{yAO6 z9Fc3nvm#?op;&DxV%;ZV6nH>@$JXM~%OHLf-i3~NKZw8J3PPKBGqoW@Q@j!gmPrcd z@`Bp6J?Wg5Ht&L(Xg;>{-^Ie1d5QzDXeI)CapxI^MoejiNuq7ZPrf*|u%IH(^0^Xj zG;%ek?(5mOm^mzetC-I_<5eR*61EpW%=1sPK2cdt&>}WlG%{F$t(G@7P1jLAvi{b> z16ZKZTVjpXELL2cP!;o2C;g@2Cg8{shzgBT-mvg-Cmge36vbWotqYIV2D#aKZ^Dis zKRbijQ-y>HboWngTh`FCs%65zdize*o*Lr~iT-)>0ZAtgLDE^Ryz)@uxKf11-7vdt z3gX1TJj{}t#Gh14@7*}Wz}%U_k?iajy7#n9#jRa}RF{CMnPqg|uFOEgF0ArbbYhFA-Z*RoYqRi0%3QpECu^cH7Vvk?it~}~Os4HrHTUdX zJb)3e4`)>VLwHXmk1}C!&01!n^7HVoBuw}ab9#!`7W==7j9RQLk&R9pqfAvXIIpOd zC+VvQ_ZOWrsULcFHylYf4wgJ1TU@YMHg<_@?Ubf3FTi0QENKxF_iw?Z??`T)_~n$y zDENHTMmJqtH)oX1NwAw)PSr)3wMS1JTw z%5&QHDVXm|W+3xd3bqGXnZUf+*dH9rU>tIy(V4JbDu8k0K0 ztId;@j%w+Z+YU>03p*)}Ze|(jW`BJ>%ABNHZ;wi5c2JV21@WuP~Rl4`quG3WS@t`Tv&8^UuMO|XsQgd_78lM8(XqWq|hT(x>1

zCjR$2Evn%>zu(C*)7j>qhSO*ms{Bk2CzYSPE2K+vlFA~?ZXnfsR(uW^{|}Jg|IAJ) z^4mFCQp}p)2Y^Xs2a52DtIKLFF6lLD+K9IBN53sRF|8}%b;wgAo-l_R~IgKu`OBzk$Z)wG38pk*?io?dLiIWb4d zF~Rj8K3mwebhlP52Yg;FM-)iCv^txBGNF-@*^Icdyx=aN6d?wvpF+|V_*tvV ztbgM1%j1fn6fz3?C}+)L5eFjaTyTKUdQ|h;CwiB2arRn|L$Mq-mR#C%cxTU{cDvj0 zMNkr5&W8K3Xwg?rA_P;3Di-F)m@kG%xMi6f7LVpphFp<=OhWVgf`xy#W^86bsu3 z(z#6NaiYn7?{ymI`sb^W37=($$6)pXQx%c=!~@5Ua=;|(VXI04UA5`!%PJX=E-UkT z24)KjIHB+39u+oI9W;QGinYDY7@|CU`WsvqMu@oAKBjs+=P1vfwkV`Ns!>k4l~1&G zwgt-6h6w91!;Y^kJLWtC;jsGt`t|3xlkx4Jfj9g4A_~JqFcify{nTM8k9~IN$-_*U zF8t#x{uZW{CdZJs#ehVjnAmpGW z9S`Bg|9;2CuIG&=IH|RhmqDpR*RYv8Kg^9e|6Knu5H)aviB2|$-;)ElG_hlt5kwzH z?5pUAS6Pkek%<5#p^VZ!R?5BiSrcLyEDiBvMrD5l@#a@nvFz}YQm+i=zZM3myb(J) z9>OOLsYXj0BDFHWWXqyGDfyF${fy;PVPG8PO{#po!;Aq2M`1LDAv*5j?3F$#0U+iW zhVm2V)-XEv9GGO@Grv(4z{w_|U9+QI zF7IH|cO-*Bul7u^$j=5sQILuNA!=wO`9*mG;Yp{v1`$V}BOWU1EzNO$ew?;wv8Coc zN7`)!J#W5$1ItxT0Nvb5K!8rf8CyE}`I=JXi-%!VI?aW1&V?)utU!(HCVD=|S(eao zaMkbqFp((!_qHbR+tdxzX?M!eC=g zsXg{3`Up*V5QnUD@kcA;)v|GdaQ+eWp6otmXT=r%I^@%0ElZMunICYUbvw?f^@PuN zr>*4g=e~5d!+xl9%xW=)XN;3(h+5jsi_?}4sSeXs0aMO$xOk6UfnIVxJen~t_UhjK zAac-*=HN5@xgnX@2F$Oz)G_g+`j$wM4H?E2t(1DN0Av&5<=We=AYjznCRd$@L;?L=9uu*mVzP*qRF#ZoO4yTe=3; z5GW*C+f}IW%&Jd)^oDX=A&U1QDv4c}?_WbZXIR-HE9Nxw8le~~$#J!fJ3-a|DkfbO zF^>9Oa!YFm8LEbjK=`MXmGExtW z%k@jN-0fk$=l`9RQntZd`dim?sGJZfWngVN$5bWa&GD|oE05+8B}gUkP6qm}NFEy_ z7PcI0O7!NON=>M#UD{ZmXo#HXH1wxMI%}zgng!JyCwiuAF2N*KUj-r&* zX(03qAz;Grx$hY*F?`fzU3QB9mCzSAY#zuR@uRB$piagq*sF^``G8p_b$*(*(&|8z z)}T5w8|V^7XfPJYeUpr6_=kk&*Ly8oN;ZBDY2OrABF8*>VqwIl6z1E_q^)H>HAqws z?3q?B7vmIoFQbgh9h<<872n}*LzVj3bIIY5;xJc)ax^ZDI)P^+`V+Gy76UFq(k{w= zuKq3A{OVGTDZ&Nld=^1ce2951Xq)e`qinu@Snk+Ns}6g9LeRRJIVTi+_LO0qm0$wP zvyDjMCc>5PDqkpc2S^~;wLMSmPKUxBKspLCIr;M*#LlBfXy;^E6wAEC@MxI)AwB=1AO88c!Bw2h{=3xPET_{vZf&Y zkJiW2VR+fxbWj4g0JMD0h4ood4IvVTJ9jfpLt$}4c+IKz?OW@A?f8mN%8$=M%CrqW zR*eyxC~RF`mH;sSSsoB!0JeN}<6zgR#uc9_ynh&eG1(c02nOe;y?n7~#AYtm^Na zg=$~3WE54=w=?1QzooossEhXL7V|YcRlTL9S!Iq9Nu)^|IdYoVYYI#o>F(3w zBfy=j6 z{u~2;He1Gh7MaQP@Anb$oR>|DzR~{9Vus7i|EiO}uPP?`n|VpJZEdPs*B!6;G4>=) zDn~_nJ~s-Mv8p=SGmfK&beI05b1)*T>RxT!wYX|s9AqM5HE>L5Pq*2kCg$6w7!h~j zWN&2q&Fs7#Gzz({Cp(luL%eR<1=7ZO@hF@$AN5~{TwZ|^)y|n$TL#OW4asr|HAjhD zh+;vVRF}+jDb!`@0_`H6Rao_!)Ex_TRwo1BmDtri0@n0wv^Y?C`tl?*UGv;8bn@3o z*SY_d;eP@29V14co!urFYNx9im!_y@EinxuJ3yH6x^Bu|Xox!C-eYSPG?HjyYZ>xFRb5nc4%re;jI~TF&-q!p%}{Lz zPJ1}Xl1rFITPqgf?GdxKE_w`%tS|9c z+=;q+GfBC=5zVi|-F62@56eih`#QfS6}v+9T1q85nfKSZi}p;jQ}pc~^;JDyR=31y ziz6$Yt%Ng;h1yLk`^ju{g;Wl0KY7&-9`>Bc`G~E@QkCo|zivXUSMS$`=vF$S-*Zh8 zzpt|X*Zf}X<06oKzsa!N@WhB9$e|mt?x@!$Rwt(8;k@SEdPM_>PYL=e4?J=ek8GIk z`br5tPB<0s*_TRl0?djlsbE}5i%Ubwb_VoSbZ$gAscesz5lNAK>(%*Fdlb#p{ejQX zVQU20BKMfzyxi$`HS`}@mWhBad#rK{4tFUjuV^_!Y)AnaQC|zJW<5k0u z|9gkVa1_1yuefhM6#DD2sJY`OSHX4!R8q_1>i#&DZ#IN=lDH897lm{lKx0gt|s`kk*x$StUvxr{{Qv8Y`E*uJ8KT5=8;2oC~PQah-H* z&y0aZG%Fg%!^n1+ZPSlvfLq6TMh4K=CxV|tW?I~&NgqXUo`Uv+s_|?rmOv3c)R0$a z@+?HPtg(gcezng7BmZ0<7|Onr?n{>H!}!lX)*FIn){g!|p|WG~Fe-z`UIkV__d=@- z7vGAEo2Yd7)BfMSk?0(_oR;bv1hAvB6#w6O%MM3|s&2*!z`eeZ|H=YA1?kpta2$y# z^Q?ee{2euTeykIx@mvl>8J^_85ZY{@Bte>nxH(yQl8tW+|KZk9Y`osol>!%EM>Dh7 zE2k~Pkz>whSnd+C#07`-PhVC?D^~nxb}IwXNO4vAPg}lxk*8tg(AhQ|Kbgwt?|%1$sp^q7K~0s4t!5_>4fsLj)gTqJ Hra}J)%BOTQ literal 0 HcmV?d00001 diff --git a/docs/configuration/extensions.md b/docs/configuration/extensions.md index 5fbb20e74efe..3c150333c291 100644 --- a/docs/configuration/extensions.md +++ b/docs/configuration/extensions.md @@ -100,6 +100,7 @@ All of these community extensions can be downloaded using [pull-deps](../operati |gce-extensions|GCE Extensions|[link](../development/extensions-contrib/gce-extensions.md)| |prometheus-emitter|Exposes [Druid metrics](../operations/metrics.md) for Prometheus server collection (https://prometheus.io/)|[link](../development/extensions-contrib/prometheus.md)| |kubernetes-overlord-extensions|Support for launching tasks in k8s without Middle Managers|[link](../development/extensions-contrib/k8s-jobs.md)| +|druid-spectator-histogram|Support for efficient approximate percentile queries|[link](../development/extensions-contrib/spectator-histogram.md)| ## Promoting community extensions to core extensions diff --git a/docs/development/extensions-contrib/spectator-histogram.md b/docs/development/extensions-contrib/spectator-histogram.md new file mode 100644 index 000000000000..30d5048da377 --- /dev/null +++ b/docs/development/extensions-contrib/spectator-histogram.md @@ -0,0 +1,386 @@ +--- +id: spectator-histogram +title: "Spectator Histogram module" +--- + + + +## Summary +This module provides Apache Druid approximate histogram aggregators and percentile +post-aggregators based on Spectator fixed-bucket histograms. + +Consider using this extension if you need percentile approximations and: +* want fast and accurate queries +* at a lower storage cost +* and have a large dataset +* using only positive measurements + +> The main benefit of this extension over data-sketches is the reduced storage +footprint. Which leads to smaller segment sizes, faster loading from deep storage +and lower memory usage. + +In the Druid instance shown below, the example Wikipedia dataset is loaded 3 times. +* As-is, no rollup applied +* With a single extra metric column of type `spectatorHistogram` ingesting the `added` column +* With a single extra metric column of type `quantilesDoublesSketch` ingesting the `added` column + +Spectator histograms average just 6 extra bytes per row, while the data-sketch +adds 48 bytes per row. This is an 8 x reduction in additional storage size. +![Comparison of datasource sizes in web console](../../assets/spectator-histogram-size-comparison.png) + +As rollup improves, so does the size saving. For example, ingesting the wikipedia data +with day-grain query granularity and removing all dimensions except `countryName`, +we get to a segment that has just 106 rows. The base segment is 87 bytes per row, +adding a single `spectatorHistogram` column adds just 27 bytes per row on average vs +`quantilesDoublesSketch` adding 255 bytes per row. This is a 9.4 x reduction in additional storage size. +Storage gains will differ per dataset depending on the variance and rollup of the data. + +## Background +[Spectator](https://netflix.github.io/atlas-docs/spectator/) is a simple library +for instrumenting code to record dimensional time series data. +It was built, primarily, to work with [Atlas](https://netflix.github.io/atlas-docs/). +Atlas was developed by Netflix to manage dimensional time series data for near +real-time operational insight. + +With the [Atlas-Druid](https://github.com/Netflix-Skunkworks/iep-apps/tree/main/atlas-druid) +service, it's possible to use the power of Atlas queries, backed by Druid as a +data store to benefit from high-dimensionality and high-cardinality data. + +SpectatorHistogram is designed for efficient parallel aggregations while still +allowing for filtering and grouping by dimensions. +It provides similar functionality to the built-in data-sketch aggregator, but is +opinionated and optimized for typical measurements of cloud services and web-apps. +Measurements such as page load time, transferred bytes, response time, request latency, etc. +Through some trade-offs we're able to provide a significantly more compact +representation with the same aggregation performance and accuracy as +data-sketches (depending on data-set, see limitations below). + +## Limitations +* Supports positive numeric values within the range of [0, 2^53). Negatives are +coerced to 0. +* Fixed buckets with increasing bucket widths. Relative accuracy is maintained, +but absolute accuracy reduces with larger values. + +> If either of these limitations are a problem, then the data-sketch aggregator +is most likely a better choice. + +## Functionality +The SpectatorHistogram aggregator is capable of generating histograms from raw numeric +values as well as aggregating/combining pre-aggregated histograms generated using +the SpectatorHistogram aggregator itself. +While you can generate histograms on the fly at query time, it is generally more +performant to generate histograms during ingestion and then combine them at +query time. This is especially true where rollup is enabled. It may be misleading or +incorrect to generate histogram from already rolled-up summed data. + +The module provides postAggregators, `percentileSpectatorHistogram` (singular) and +`percentilesSpectatorHistogram` (plural), that can be used to compute approximate +percentiles from histograms generated by the SpectatorHistogram aggregator. +Again, these postAggregators can be used to compute percentiles from raw numeric +values via the SpectatorHistogram aggregator or from pre-aggregated histograms. + +> If you're only using the aggregator to compute percentiles from raw numeric values, +then you can use the built-in data-sketch aggregator instead. The performance +and accuracy are comparable, the data-sketch aggregator supports negative values, +and you don't need to load an additional extension. + +An aggregated SpectatorHistogram can also be queried using a `longSum` or `doubleSum` +aggregator to retrieve the population of the histogram. This is effectively the count +of the number of values that were aggregated into the histogram. This flexibility can +avoid the need to maintain a separate metric for the count of values. + +For high-frequency measurements, you may need to pre-aggregate data at the client prior +to sending into Druid. For example, if you're measuring individual image render times +on an image-heavy website, you may want to aggregate the render times for a page-view +into a single histogram prior to sending to Druid in real-time. This can reduce the +amount of data that's needed to send from the client across the wire. + +SpectatorHistogram supports ingesting pre-aggregated histograms in real-time and batch. +They can be sent as a JSON map, keyed by the spectator bucket ID and the value is the +count of values. This is the same format as the serialized JSON representation of the +histogram. The keys need not be ordered or contiguous e.g. + +```json +{ "4": 8, "5": 15, "6": 37, "7": 9, "8": 3, "10": 1, "13": 1 } +``` + +## Loading the extension +To use SpectatorHistogram, make sure you [include](../../configuration/extensions.md#loading-extensions) the extension in your config file: + +``` +druid.extensions.loadList=["druid-spectator-histogram"] +``` + +## Aggregators + +The result of the aggregation is a histogram that is built by ingesting numeric values from +the raw data, or from combining pre-aggregated histograms. The result is represented in +JSON format where the keys are the bucket index and the values are the count of entries +in that bucket. + +The buckets are defined as per the Spectator [PercentileBuckets](https://github.com/Netflix/spectator/blob/main/spectator-api/src/main/java/com/netflix/spectator/api/histogram/PercentileBuckets.java) specification. +See [Appendix](#histogram-bucket-boundaries) for the full list of bucket boundaries. +```js + // The set of buckets is generated by using powers of 4 and incrementing by one-third of the + // previous power of 4 in between as long as the value is less than the next power of 4 minus + // the delta. + // + // Base: 1, 2, 3 + // + // 4 (4^1), delta = 1 (~1/3 of 4) + // 5, 6, 7, ..., 14, + // + // 16 (4^2), delta = 5 (~1/3 of 16) + // 21, 26, 31, ..., 56, + // + // 64 (4^3), delta = 21 (~1/3 of 64) + // ... +``` + +There are multiple aggregator types included, all of which are based on the same +underlying implementation. The different types signal to the Atlas-Druid service (if using) +how to handle the resulting data from a query. + +* spectatorHistogramTimer signals that the histogram is representing +a collection of timer values. It is recommended to normalize timer values to nanoseconds +at, or prior to, ingestion. If queried via the Atlas-Druid service, it will +normalize timers to second resolution at query time as a more natural unit of time +for human consumption. +* spectatorHistogram and spectatorHistogramDistribution are generic histograms that +can be used to represent any measured value without units. No normalization is +required or performed. + +### `spectatorHistogram` aggregator +Alias: `spectatorHistogramDistribution`, `spectatorHistogramTimer` + +To aggregate at query time: +``` +{ + "type" : "spectatorHistogram", + "name" : , + "fieldName" : + } +``` + +| Property | Description | Required? | +|-----------|--------------------------------------------------------------------------------------------------------------|-----------| +| type | This String must be one of "spectatorHistogram", "spectatorHistogramTimer", "spectatorHistogramDistribution" | yes | +| name | A String for the output (result) name of the aggregation. | yes | +| fieldName | A String for the name of the input field containing raw numeric values or pre-aggregated histograms. | yes | + +### `longSum`, `doubleSum` and `floatSum` aggregators +To get the population size (count of events contributing to the histogram): +``` +{ + "type" : "longSum", + "name" : , + "fieldName" : + } +``` + +| Property | Description | Required? | +|-----------|--------------------------------------------------------------------------------|-----------| +| type | Must be "longSum", "doubleSum", or "floatSum". | yes | +| name | A String for the output (result) name of the aggregation. | yes | +| fieldName | A String for the name of the input field containing pre-aggregated histograms. | yes | + +## Post Aggregators + +### Percentile (singular) +This returns a single percentile calculation based on the distribution of the values in the aggregated histogram. + +``` +{ + "type": "percentileSpectatorHistogram", + "name": , + "field": { + "type": "fieldAccess", + "fieldName": + }, + "percentile": +} +``` + +| Property | Description | Required? | +|------------|-------------------------------------------------------------|-----------| +| type | This String should always be "percentileSpectatorHistogram" | yes | +| name | A String for the output (result) name of the calculation. | yes | +| field | A field reference pointing to the aggregated histogram. | yes | +| percentile | A single decimal percentile between 0.0 and 100.0 | yes | + +### Percentiles (multiple) +This returns an array of percentiles corresponding to those requested. + +``` +{ + "type": "percentilesSpectatorHistogram", + "name": , + "field": { + "type": "fieldAccess", + "fieldName": + }, + "percentiles": [25, 50, 75, 99.5] +} +``` + +> Note: It's more efficient to request multiple percentiles in a single query +than to request individual percentiles in separate queries. This array-based +helper is provided for convenience and has a marginal performance benefit over +using the singular percentile post-aggregator multiple times within a query. +The more expensive part of the query is the aggregation of the histogram. +The post-aggregation calculations all happen on the same aggregated histogram. + +Results will contain arrays matching the length and order of the requested +array of percentiles. + +``` +"percentilesAdded": [ + 0.5504911679884643, // 25th percentile + 4.013975155279504, // 50th percentile + 78.89518317503394, // 75th percentile + 8580.024999999994 // 99.5th percentile +] +``` + +| Property | Description | Required? | +|-------------|--------------------------------------------------------------|-----------| +| type | This String should always be "percentilesSpectatorHistogram" | yes | +| name | A String for the output (result) name of the calculation. | yes | +| field | A field reference pointing to the aggregated histogram. | yes | +| percentiles | Non-empty array of decimal percentiles between 0.0 and 100.0 | yes | + +## Appendix + +### Example Query +Example query using the sample wikipedia dataset: +```json +{ + "queryType": "timeseries", + "dataSource": { + "type": "table", + "name": "wikipedia" + }, + "intervals": { + "type": "intervals", + "intervals": [ + "0000-01-01/9999-12-31" + ] + }, + "granularity": { + "type": "all" + }, + "aggregations": [ + { + "type": "spectatorHistogram", + "name": "histogram_added", + "fieldName": "added" + } + ], + "postAggregations": [ + { + "type": "percentileSpectatorHistogram", + "name": "medianAdded", + "field": { + "type": "fieldAccess", + "fieldName": "histogram_added" + }, + "percentile": "50.0" + } + ] +} +``` +Results in +```json +[ + { + "result": { + "histogram_added": { + "0": 11096, "1": 632, "2": 297, "3": 187, "4": 322, "5": 161, + "6": 174, "7": 127, "8": 125, "9": 162, "10": 123, "11": 106, + "12": 95, "13": 104, "14": 95, "15": 588, "16": 540, "17": 690, + "18": 719, "19": 478, "20": 288, "21": 250, "22": 219, "23": 224, + "24": 737, "25": 424, "26": 343, "27": 266, "28": 232, "29": 217, + "30": 171, "31": 164, "32": 161, "33": 530, "34": 339, "35": 236, + "36": 181, "37": 152, "38": 113, "39": 128, "40": 80, "41": 75, + "42": 289, "43": 145, "44": 138, "45": 83, "46": 45, "47": 46, + "48": 64, "49": 65, "50": 71, "51": 421, "52": 525, "53": 59, + "54": 31, "55": 35, "56": 8, "57": 10, "58": 5, "59": 4, "60": 11, + "61": 10, "62": 5, "63": 2, "64": 2, "65": 1, "67": 1, "68": 1, + "69": 1, "70": 1, "71": 1, "78": 2 + }, + "medianAdded": 4.013975155279504 + }, + "timestamp": "2016-06-27T00:00:00.000Z" + } +] +``` + +### Histogram Bucket Boundaries +These are the upper bounds of each bucket index. There are 276 buckets. +The first bucket index is 0 and the last bucket index is 275. +```json +[ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 21, 26, 31, 36, 41, 46, + 51, 56, 64, 85, 106, 127, 148, 169, 190, 211, 232, 256, 341, 426, 511, 596, + 681, 766, 851, 936, 1024, 1365, 1706, 2047, 2388, 2729, 3070, 3411, 3752, + 4096, 5461, 6826, 8191, 9556, 10921, 12286, 13651, 15016, 16384, 21845, + 27306, 32767, 38228, 43689, 49150, 54611, 60072, 65536, 87381, 109226, + 131071, 152916, 174761, 196606, 218451, 240296, 262144, 349525, 436906, + 524287, 611668, 699049, 786430, 873811, 961192, 1048576, 1398101, 1747626, + 2097151, 2446676, 2796201, 3145726, 3495251, 3844776, 4194304, 5592405, + 6990506, 8388607, 9786708, 11184809, 12582910, 13981011, 15379112, 16777216, + 22369621, 27962026, 33554431, 39146836, 44739241, 50331646, 55924051, + 61516456, 67108864, 89478485, 111848106, 134217727, 156587348, 178956969, + 201326590, 223696211, 246065832, 268435456, 357913941, 447392426, 536870911, + 626349396, 715827881, 805306366, 894784851, 984263336, 1073741824, 1431655765, + 1789569706, 2147483647, 2505397588, 2863311529, 3221225470, 3579139411, + 3937053352, 4294967296, 5726623061, 7158278826, 8589934591, 10021590356, + 11453246121, 12884901886, 14316557651, 15748213416, 17179869184, 22906492245, + 28633115306, 34359738367, 40086361428, 45812984489, 51539607550, 57266230611, + 62992853672, 68719476736, 91625968981, 114532461226, 137438953471, + 160345445716, 183251937961, 206158430206, 229064922451, 251971414696, + 274877906944, 366503875925, 458129844906, 549755813887, 641381782868, + 733007751849, 824633720830, 916259689811, 1007885658792, 1099511627776, + 1466015503701, 1832519379626, 2199023255551, 2565527131476, 2932031007401, + 3298534883326, 3665038759251, 4031542635176, 4398046511104, 5864062014805, + 7330077518506, 8796093022207, 10262108525908, 11728124029609, 13194139533310, + 14660155037011, 16126170540712, 17592186044416, 23456248059221, + 29320310074026, 35184372088831, 41048434103636, 46912496118441, + 52776558133246, 58640620148051, 64504682162856, 70368744177664, + 93824992236885, 117281240296106, 140737488355327, 164193736414548, + 187649984473769, 211106232532990, 234562480592211, 258018728651432, + 281474976710656, 375299968947541, 469124961184426, 562949953421311, + 656774945658196, 750599937895081, 844424930131966, 938249922368851, + 1032074914605736, 1125899906842624, 1501199875790165, 1876499844737706, + 2251799813685247, 2627099782632788, 3002399751580329, 3377699720527870, + 3752999689475411, 4128299658422952, 4503599627370496, 6004799503160661, + 7505999378950826, 9007199254740991, 10508399130531156, 12009599006321321, + 13510798882111486, 15011998757901651, 16513198633691816, 18014398509481984, + 24019198012642645, 30023997515803306, 36028797018963967, 42033596522124628, + 48038396025285289, 54043195528445950, 60047995031606611, 66052794534767272, + 72057594037927936, 96076792050570581, 120095990063213226, 144115188075855871, + 168134386088498516, 192153584101141161, 216172782113783806, 240191980126426451, + 264211178139069096, 288230376151711744, 384307168202282325, 480383960252852906, + 576460752303423487, 672537544353994068, 768614336404564649, 864691128455135230, + 960767920505705811, 1056844712556276392, 1152921504606846976, 1537228672809129301, + 1921535841011411626, 2305843009213693951, 2690150177415976276, 3074457345618258601, + 3458764513820540926, 3843071682022823251, 4227378850225105576, 9223372036854775807 +] +``` diff --git a/extensions-contrib/spectator-histogram/pom.xml b/extensions-contrib/spectator-histogram/pom.xml new file mode 100644 index 000000000000..cf15f4bf0006 --- /dev/null +++ b/extensions-contrib/spectator-histogram/pom.xml @@ -0,0 +1,141 @@ + + + + + + org.apache.druid + druid + 29.0.0-SNAPSHOT + ../../pom.xml + + 4.0.0 + + org.apache.druid.extensions.contrib + druid-spectator-histogram + druid-spectator-histogram + Druid extension for generating histograms based on Netflix's Spectator library + + + + com.netflix.spectator + spectator-api + 1.7.0 + + + com.google.guava + guava + ${guava.version} + provided + + + org.apache.druid + druid-processing + ${project.parent.version} + provided + + + com.google.code.findbugs + jsr305 + provided + + + com.google.inject + guice + provided + + + com.fasterxml.jackson.core + jackson-databind + provided + + + it.unimi.dsi + fastutil + provided + + + com.fasterxml.jackson.core + jackson-core + provided + + + com.fasterxml.jackson.core + jackson-annotations + provided + + + it.unimi.dsi + fastutil-core + provided + + + com.google.errorprone + error_prone_annotations + provided + + + org.apache.druid + druid-sql + ${project.parent.version} + provided + + + org.apache.calcite + calcite-core + provided + + + org.apache.druid + druid-server + provided + ${project.parent.version} + + + + + junit + junit + test + + + org.apache.druid + druid-processing + ${project.parent.version} + test-jar + test + + + org.apache.druid + druid-server + ${project.parent.version} + test-jar + test + + + org.apache.druid + druid-sql + ${project.parent.version} + test-jar + test + + + diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/NullableOffsetsHeader.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/NullableOffsetsHeader.java new file mode 100644 index 000000000000..90f319ebdd52 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/NullableOffsetsHeader.java @@ -0,0 +1,378 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.google.common.base.Preconditions; +import org.apache.druid.io.Channels; +import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; +import org.apache.druid.segment.serde.Serializer; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; +import org.apache.druid.segment.writeout.WriteOutBytes; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.IntBuffer; +import java.nio.LongBuffer; +import java.nio.channels.WritableByteChannel; +import java.util.BitSet; +import java.util.Objects; + +public class NullableOffsetsHeader implements Serializer +{ + private final WriteOutBytes offsetsWriter; + private final SegmentWriteOutMedium segmentWriteOutMedium; + private final BitSet valueBitmap; + private int size = 0; + private final IntBuffer offsetsReader; + private final ByteBuffer bitsetBuffer; + private final int[] cumlCardinality; + private final int cardinality; + + private static final int CUML_COUNT_SIZE = Long.SIZE; + private static final int CUML_COUNT_BYTES = Long.BYTES; + + public static NullableOffsetsHeader read(ByteBuffer buffer) + { + // Size + BitmapLength + ValueBitMap + Offsets + final int size = buffer.getInt(); + final int bitmapLength = buffer.getInt(); + final int offsetPosition = buffer.position() + bitmapLength; + + // Grab the bitset + final ByteBuffer bitsetBuffer = buffer.slice(); + bitsetBuffer.limit(bitmapLength); + + int[] cumlCardinality = null; + int cardinality = 0; + if (bitmapLength >= CUML_COUNT_BYTES) { + // Create a quick lookup of the cumulative count of set bits up to + // a given int index in the bitset. This is used to quickly get to + // near the offset that we want. + // Tradeoff is memory use vs scanning per get() call. + LongBuffer bitBuffer = bitsetBuffer.asLongBuffer(); + cumlCardinality = new int[bitBuffer.limit()]; + int i = 0; + + while (bitBuffer.hasRemaining()) { + long bits = bitBuffer.get(); + cardinality += Long.bitCount(bits); + cumlCardinality[i++] = cardinality; + } + + // Count any bits in the remaining bytes after the end of the 64-bit chunks + // In cases where bitsetBuffer length doesn't directly divide into 64 + // there will be up to 7 bytes remaining, with at least 1 bit set somewhere + // else the bytes would have been omitted. + // We use cardinality to compute where offsets end, so the full count is important. + int baseByteIndex = i * (CUML_COUNT_SIZE / Byte.SIZE); + for (int byteIndex = baseByteIndex; byteIndex < bitsetBuffer.limit(); byteIndex++) { + // Read the bit set for this byte within the 64 bits that need counting. + int bitset = bitsetBuffer.get(byteIndex) & 0xFF; + cardinality += BYTE_CARDINALITY[bitset]; + } + } else if (bitmapLength > 0) { + while (bitsetBuffer.hasRemaining()) { + int bitset = bitsetBuffer.get() & 0xFF; + cardinality += BYTE_CARDINALITY[bitset]; + } + } else if (buffer.hasRemaining()) { + // The header is "full", so the bitmap was omitted. + // We'll have an offset per entry. + cardinality = size; + } + + // Grab the offsets + buffer.position(offsetPosition); + final int offsetsLength = cardinality * Integer.BYTES; + final ByteBuffer offsetsBuffer = buffer.slice(); + offsetsBuffer.limit(offsetsLength); + + // Set the buffer position to after the offsets + // to mark this whole header as "read" + buffer.position(offsetPosition + offsetsLength); + + return new NullableOffsetsHeader(size, bitsetBuffer, cardinality, cumlCardinality, offsetsBuffer); + } + + public static NullableOffsetsHeader create(SegmentWriteOutMedium segmentWriteOutMedium) throws IOException + { + Preconditions.checkNotNull(segmentWriteOutMedium, "segmentWriteOutMedium"); + return new NullableOffsetsHeader(segmentWriteOutMedium); + } + + // Constructor for reading + private NullableOffsetsHeader(int size, ByteBuffer bitset, int cardinality, int[] cumlCardinality, ByteBuffer offsetsBuffer) + { + this.segmentWriteOutMedium = null; + this.offsetsWriter = null; + this.valueBitmap = null; + + this.size = size; + this.offsetsReader = offsetsBuffer.asIntBuffer(); + this.bitsetBuffer = bitset; + this.cumlCardinality = cumlCardinality; + this.cardinality = cardinality; + } + + // Constructor for writing + private NullableOffsetsHeader(SegmentWriteOutMedium segmentWriteOutMedium) throws IOException + { + this.offsetsReader = null; + this.cumlCardinality = null; + this.cardinality = 0; + this.bitsetBuffer = null; + + this.valueBitmap = new BitSet(); + this.segmentWriteOutMedium = segmentWriteOutMedium; + this.offsetsWriter = this.segmentWriteOutMedium.makeWriteOutBytes(); + } + + public int size() + { + return size; + } + + public int getCardinality() + { + return cardinality; + } + + private void checkWriteable() + { + if (valueBitmap == null) { + throw new NullPointerException("Write during deserialization"); + } + } + + private void checkReadable() + { + if (offsetsReader == null) { + throw new NullPointerException("Read during serialization"); + } + } + + public void writeNull() + { + checkWriteable(); + + // Nothing to write, but we need to "store" the null + size++; + } + + public void writeOffset(int offset) throws IOException + { + checkWriteable(); + + int index = size++; + valueBitmap.set(index); + offsetsWriter.writeInt(offset); + } + + @Override + public long getSerializedSize() + { + checkWriteable(); + + // Size + BitmapLength + ValueBitMap + Offsets + int sizeField = Integer.BYTES; + int bitmapLength = Integer.BYTES; + // if all values are set, we omit the bitmap, so bytes taken by the bitmap is zero + // bitset.length returns the highest bit index that's set. + // i.e. the length in bits. Round up to the nearest byte. + int valueBitMap = (size == valueBitmap.cardinality()) ? 0 : (valueBitmap.length() + 7) / 8; + int offsetSize = valueBitmap.cardinality() * Integer.BYTES; + return sizeField + bitmapLength + valueBitMap + offsetSize; + } + + @Override + public void writeTo(WritableByteChannel channel, @Nullable FileSmoosher smoosher) throws IOException + { + checkWriteable(); + + // Size + BitmapLength + ValueBitMap + Offsets + ByteBuffer headerBytes = ByteBuffer.allocate(Integer.BYTES + Integer.BYTES); + + // Size + headerBytes.putInt(size); + + // BitmapLength + byte[] bytes = null; + + // Omit bitmap if all entries are set + if (size == valueBitmap.cardinality()) { + headerBytes.putInt(0); + } else { + bytes = valueBitmap.toByteArray(); + headerBytes.putInt(bytes.length); + } + + // Write the size and length + headerBytes.flip(); + Channels.writeFully(channel, headerBytes); + + // Write the ValueBitmap + if (bytes != null) { + Channels.writeFully(channel, ByteBuffer.wrap(bytes)); + } + + // Write the Offsets + offsetsWriter.writeTo(channel); + } + + @Nullable + public Offset get(int index) + { + checkReadable(); + + // Return null for any out of range indexes + if (this.cardinality == 0 || index < 0 || index >= this.size) { + return null; + } + + // Find the index to the offset for this row index + int offsetIndex = getOffsetIndex(index); + if (offsetIndex < 0) { + return null; + } + + // Special case for the first entry + if (offsetIndex == 0) { + return new Offset(0, this.offsetsReader.get(0)); + } + + return new Offset(this.offsetsReader.get(offsetIndex - 1), this.offsetsReader.get(offsetIndex)); + } + + // Exposed for testing + int getOffsetIndex(int index) + { + if (this.cardinality == this.size) { + // If "full" return index + return index; + } + + // Bitset omits trailing nulls, so if index is off the end it's a null. + final int bytePos = index / Byte.SIZE; + if (bytePos >= this.bitsetBuffer.limit()) { + return -1; + } + + final int indexByte = this.bitsetBuffer.get(bytePos) & 0xFF; + // Check for null, is our bit is set. + if ((indexByte & (1 << index % Byte.SIZE)) == 0) { + return -1; + } + + // Get the cardinality for the (index/CUML_COUNT_SIZE)th entry. + // Use that to jump to that point in the bitset to add any incremental bit counts + // until we get to index. + // That is then the index position of the offset in the offsets buffer. + final int baseInt = index / CUML_COUNT_SIZE; + int baseByteIndex = baseInt * (CUML_COUNT_SIZE / Byte.SIZE); + int offsetIndex = baseInt == 0 ? 0 : this.cumlCardinality[baseInt - 1]; + + // We always need to count the bits in the byte containing our index. + // So do that here, then go back and fill in the counts for the + // bytes between baseByteIndex and bytePos. + // We need to mask out only the bits up to and including our index + // to avoid counting later bits. + int mask = (1 << index - (bytePos * Byte.SIZE)) - 1; + int byteCardinality = BYTE_CARDINALITY[indexByte & mask]; + offsetIndex += byteCardinality; + + // After getting the cumulative cardinality upto the 64 bit boundary immediately + // preceeding the 64 bits that contains our index, we need to accumulate the + // cardinality up to the byte including our index. + for (int byteIndex = baseByteIndex; byteIndex < bytePos; byteIndex++) { + // Read the bit set for this byte within the 64 bits that need counting. + int bitset = this.bitsetBuffer.get(byteIndex) & 0xFF; + offsetIndex += BYTE_CARDINALITY[bitset]; + } + + return offsetIndex; + } + + public static class Offset + { + private final int start; + private final int end; + + Offset(int start, int end) + { + this.start = start; + this.end = end; + } + + int getStart() + { + return start; + } + + int getEnd() + { + return end; + } + + int getLength() + { + return end - start; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + Offset offset = (Offset) o; + return start == offset.start && end == offset.end; + } + + @Override + public int hashCode() + { + return Objects.hash(start, end); + } + } + + // The count of bits in a byte, keyed by the byte value itself + private static final int[] BYTE_CARDINALITY = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 + }; +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogram.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogram.java new file mode 100644 index 000000000000..2ac7ccfa5ef1 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogram.java @@ -0,0 +1,423 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.netflix.spectator.api.histogram.PercentileBuckets; +import it.unimi.dsi.fastutil.shorts.Short2LongMap; +import it.unimi.dsi.fastutil.shorts.Short2LongMaps; +import it.unimi.dsi.fastutil.shorts.Short2LongOpenHashMap; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.jackson.JacksonUtils; +import org.apache.druid.java.util.common.parsers.ParseException; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +// Since queries don't come from SpectatorHistogramAggregator in the case of +// using longSum or doubleSum aggregations. They come from LongSumBufferAggregator. +// Therefore, we extended Number here. +// This will prevent class casting exceptions if trying to query with sum rather +// than explicitly as a SpectatorHistogram +// +// The SpectatorHistorgram is a Number. That number is of intValue(), +// which is the count of the number of events in the histogram +// (adding up the counts across all buckets). +// +// There are a few useful aggregators, which as Druid Native Queries use: +// type: "longSum" - Aggregates and returns the number of events in the histogram. +// i.e. the sum of all bucket counts. +// type: "spectatorHistogramDistribution" - Aggregates and returns a map (bucketIndex -> bucketCount) +// representing a SpectatorHistogram. The represented data is a distribution. +// type: "spectatorHistogramTimer" - Aggregates and returns a map (bucketIndex -> bucketCount) +// representing a SpectatorHistogram. The represented data is measuring time. +public class SpectatorHistogram extends Number +{ + private static final int MAX_ENTRY_BYTES = Short.BYTES + Long.BYTES; + private static final int LOW_COUNT_FLAG = 0x0200; + private static final int BYTE_VALUE = 0x8000; + private static final int SHORT_VALUE = 0x4000; + private static final int INT_VALUE = 0xC000; + private static final int VALUE_SIZE_MASK = 0xFC00; + private static final int KEY_MASK = 0x01FF; + + private static final ObjectMapper JSON_MAPPER = new ObjectMapper(); + + // Values are packed into few bytes depending on the size of the counts + // The bucket index falls in the range 0-276, so we need 9 bits for the bucket index. + // Counts can range from 1 to Long.MAX_VALUE, so we need 1 to 64 bits for the value. + // To optimize storage, we use the remaining top 7 bits of the bucket index short to + // encode the storage type for the count value. + // AAbb bbYx xxxx xxxx + // | +-- 9 bits - The bucket index + // +------------- 1 bit - Low-count flag, set if count <= 63 + // ++++ ++-------------- 6 bits - If low-count flag is set, + // The count value, zero extra bytes used. + // If low-count flag is not set, + // The value length indicator as encoded below + // ++------------------- 2 bits - 00 = 8 bytes used for value + // 10 = 1 byte used for value + // 01 = 2 bytes used for value + // 11 = 4 bytes used for value + // + // Example: + // ------------------------------------------------------------------------------------------ + // Consider the histogram: [10, 30, 40x3, 50x2, 100x256] + // That is there is one value of 10, and 3 values of 40, etc. As shown in the table below: + // + // Bucket Index | Bucket Range | Bucket Count + // 10 | [10,11) | 1 + // 17 | [26,31) | 1 + // 19 | [36,41) | 3 + // 21 | [46,51) | 2 + // 25 | [85,106) | 256 + // + // See com.netflix.spectator.api.histogram.PercentileBuckets + // for an explaination of how the bucket index is assigned + // to each of the values: (10, 17, 19, 21, 25). + // + // Based on the specification above the histogram is serialized into a + // byte array to minimize storage size: + // In Base 10: [64, 25, 1, 0, 6, 10, 6, 17, 14, 19, 10, 21] + // In Binary: [01000000, 00011001, 00000001, 00000000, 00000110, 00001010, + // 00000110, 00010001, 00001110, 00010011, 00001010, 00010101] + // + // Each groups of bits (which varies in length), represent a histogram bucket index and count + // 01000000000110010000000100000000 + // 01 - Since the low count bit is NOT set, leading 2 bits 01 indicates that the bucket count + // value is encoded in 2 bytes. + // 0000 - Since the low count bit is Not set these bits are unused, the bucket count will + // be encoded in an additional two bytes. + // 0 - Low count bit is NOT set + // 000011001 - These 9 bits represent the bucket index of 25 + // 0000000100000000 - These 16 bits represent the bucket count of 256 + // + // 0000011000001010 + // 000001 - Low count bit IS set, so these 6-bits represent a bucket count of 1 + // 1 - Low count bit IS set + // 000001010 - These 9 bits represent the bucket index of 10 + // + // 0000011000010001 + // 000001 - Bucket count of 1 + // 1 - Low count bit IS set + // 000010001 - Bucket index of 17 + // + // 0000111000010011 + // 000011 - Bucket count of 3 + // 1 - Low count bit IS set + // 000010011 - Bucket index of 19 + // + // 0000101000010101 + // 000010 - Bucket count of 2 + // 1 - Low count bit IS set + // 000010101 - Bucket index of 21 + // ------------------------------------------------------------------------------------------ + private Short2LongOpenHashMap backingMap; + + // The sum of counts in the histogram. + // These are accumulated when an entry is added, or when another histogram is merged into this one. + private long sumOfCounts = 0; + + static int getMaxIntermdiateHistogramSize() + { + return PercentileBuckets.length() * MAX_ENTRY_BYTES; + } + + @Nullable + static SpectatorHistogram deserialize(Object serializedHistogram) + { + if (serializedHistogram == null) { + return null; + } + if (serializedHistogram instanceof byte[]) { + return fromByteBuffer(ByteBuffer.wrap((byte[]) serializedHistogram)); + } + if (serializedHistogram instanceof SpectatorHistogram) { + return (SpectatorHistogram) serializedHistogram; + } + if (serializedHistogram instanceof String) { + // Try parse as JSON into HashMap + try { + HashMap map = JSON_MAPPER.readerFor(HashMap.class).readValue((String) serializedHistogram); + SpectatorHistogram histogram = new SpectatorHistogram(); + for (Map.Entry entry : map.entrySet()) { + histogram.add(entry.getKey(), entry.getValue()); + } + return histogram; + } + catch (JsonProcessingException e) { + throw new ParseException((String) serializedHistogram, e, "String cannot be deserialized as JSON to a Spectator Histogram"); + } + } + if (serializedHistogram instanceof HashMap) { + SpectatorHistogram histogram = new SpectatorHistogram(); + for (Map.Entry entry : ((HashMap) serializedHistogram).entrySet()) { + histogram.add(entry.getKey(), (Number) entry.getValue()); + } + return histogram; + } + throw new ParseException( + null, + "Object cannot be deserialized to a Spectator Histogram " + + serializedHistogram.getClass() + ); + } + + @Nullable + static SpectatorHistogram fromByteBuffer(ByteBuffer buffer) + { + if (buffer == null || !buffer.hasRemaining()) { + return null; + } + SpectatorHistogram histogram = new SpectatorHistogram(); + while (buffer.hasRemaining()) { + short key = buffer.getShort(); + short idx = (short) (key & KEY_MASK); + long val; + if ((key & LOW_COUNT_FLAG) == LOW_COUNT_FLAG) { + // Value/count is encoded in the top 6 bits of the short + val = (key & VALUE_SIZE_MASK) >>> 10; + } else { + switch (key & VALUE_SIZE_MASK) { + case BYTE_VALUE: + val = buffer.get() & 0xFF; + break; + + case SHORT_VALUE: + val = buffer.getShort() & 0xFFFF; + break; + + case INT_VALUE: + val = buffer.getInt() & 0xFFFFFFFFL; + break; + + default: + val = buffer.getLong(); + break; + } + } + + histogram.add(idx, val); + } + if (histogram.isEmpty()) { + return null; + } + return histogram; + } + + private Short2LongOpenHashMap writableMap() + { + if (backingMap == null) { + backingMap = new Short2LongOpenHashMap(); + } + return backingMap; + } + + private Short2LongMap readableMap() + { + if (isEmpty()) { + return Short2LongMaps.EMPTY_MAP; + } + return backingMap; + } + + @Nullable + byte[] toBytes() + { + if (isEmpty()) { + return null; + } + ByteBuffer buffer = ByteBuffer.allocate(MAX_ENTRY_BYTES * size()); + for (Short2LongMap.Entry e : Short2LongMaps.fastIterable(readableMap())) { + short key = e.getShortKey(); + long value = e.getLongValue(); + if (value <= 0x3F) { + // Value/count is encoded in the top 6 bits of the key bytes + buffer.putShort((short) ((key | LOW_COUNT_FLAG) | ((int) ((value << 10) & VALUE_SIZE_MASK)))); + } else if (value <= 0xFF) { + buffer.putShort((short) (key | BYTE_VALUE)); + buffer.put((byte) value); + } else if (value <= 0xFFFF) { + buffer.putShort((short) (key | SHORT_VALUE)); + buffer.putShort((short) value); + } else if (value <= 0xFFFFFFFFL) { + buffer.putShort((short) (key | INT_VALUE)); + buffer.putInt((int) value); + } else { + buffer.putShort(key); + buffer.putLong(value); + } + } + return Arrays.copyOf(buffer.array(), buffer.position()); + } + + void insert(Number num) + { + this.add(PercentileBuckets.indexOf(num.longValue()), 1L); + } + + void merge(SpectatorHistogram source) + { + if (source == null) { + return; + } + Short2LongOpenHashMap writableMap = writableMap(); + for (Short2LongMap.Entry entry : Short2LongMaps.fastIterable(source.readableMap())) { + writableMap.addTo(entry.getShortKey(), entry.getLongValue()); + this.sumOfCounts += entry.getLongValue(); + } + } + + // Exposed for testing + void add(int bucket, long count) + { + if (bucket >= PercentileBuckets.length() || bucket < 0) { + throw new IAE("Bucket index out of range (0, " + PercentileBuckets.length() + ")"); + } + writableMap().addTo((short) bucket, count); + this.sumOfCounts += count; + } + + private void add(Object key, Number value) + { + if (key instanceof String) { + this.add(Integer.parseInt((String) key), value.longValue()); + return; + } + if (Number.class.isAssignableFrom(key.getClass())) { + this.add(((Number) key).intValue(), value.longValue()); + return; + } + throw new IAE( + "Cannot add " + key.getClass() + "/" + value.getClass() + " to a Spectator Histogram" + ); + } + + // Used for testing + long get(int idx) + { + return readableMap().get((short) idx); + } + + // Accessible for serialization + void serialize(JsonGenerator jsonGenerator, SerializerProvider serializerProvider) throws IOException + { + JacksonUtils.writeObjectUsingSerializerProvider(jsonGenerator, serializerProvider, readableMap()); + } + + public boolean isEmpty() + { + return backingMap == null || backingMap.isEmpty(); + } + + public int size() + { + return readableMap().size(); + } + + public long getSum() + { + return sumOfCounts; + } + + @Override + public String toString() + { + return readableMap().toString(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + SpectatorHistogram that = (SpectatorHistogram) o; + return Objects.equals(readableMap(), that.readableMap()); + } + + @Override + public int hashCode() + { + return readableMap().hashCode(); + } + + @Override + public int intValue() + { + return (int) getSum(); + } + + @Override + public long longValue() + { + return getSum(); + } + + @Override + public float floatValue() + { + return getSum(); + } + + @Override + public double doubleValue() + { + return getSum(); + } + + /** + * Compute approximate percentile for the histogram + * @param percentile The percentile to compute + * @return the approximate percentile + */ + public double getPercentileValue(double percentile) + { + double[] pcts = new double[]{percentile}; + return getPercentileValues(pcts)[0]; + } + + /** + * Compute approximate percentiles for the histogram + * @param percentiles The percentiles to compute + * @return an array of approximate percentiles in the order of those provided + */ + public double[] getPercentileValues(double[] percentiles) + { + long[] counts = new long[PercentileBuckets.length()]; + for (Map.Entry e : readableMap().short2LongEntrySet()) { + counts[e.getKey()] = e.getValue(); + } + double[] results = new double[percentiles.length]; + PercentileBuckets.percentiles(counts, percentiles, results); + return results; + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregator.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregator.java new file mode 100644 index 000000000000..200a3204b7ac --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregator.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.google.errorprone.annotations.concurrent.GuardedBy; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.segment.ColumnValueSelector; + +import javax.annotation.Nullable; + + +/** + * Aggregator to build Spectator style histograms. + */ +public class SpectatorHistogramAggregator implements Aggregator +{ + + private final ColumnValueSelector selector; + + @GuardedBy("this") + private final SpectatorHistogram counts; + + + public SpectatorHistogramAggregator(ColumnValueSelector selector) + { + this.selector = selector; + counts = new SpectatorHistogram(); + } + + @Override + public void aggregate() + { + Object obj = selector.getObject(); + if (obj == null) { + return; + } + if (obj instanceof SpectatorHistogram) { + SpectatorHistogram other = (SpectatorHistogram) obj; + synchronized (this) { + counts.merge(other); + } + } else if (obj instanceof Number) { + synchronized (this) { + counts.insert((Number) obj); + } + } else { + throw new IAE( + "Expected a long or a SpectatorHistogramMap, but received [%s] of type [%s]", + obj, + obj.getClass() + ); + } + } + + @Nullable + @Override + public synchronized Object get() + { + return counts.isEmpty() ? null : counts; + } + + @Override + public synchronized float getFloat() + { + return counts.getSum(); + } + + @Override + public synchronized long getLong() + { + return counts.getSum(); + } + + @Override + public synchronized boolean isNull() + { + return counts.isEmpty(); + } + + @Override + public synchronized void close() + { + + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorFactory.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorFactory.java new file mode 100644 index 000000000000..c02a990fe140 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorFactory.java @@ -0,0 +1,373 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; +import org.apache.druid.query.aggregation.AggregateCombiner; +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.AggregatorFactoryNotMergeableException; +import org.apache.druid.query.aggregation.AggregatorUtil; +import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.query.aggregation.ObjectAggregateCombiner; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.column.ValueType; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Objects; + +@JsonTypeName(SpectatorHistogramAggregatorFactory.TYPE_NAME) +public class SpectatorHistogramAggregatorFactory extends AggregatorFactory +{ + + @Nonnull + private final String name; + @Nonnull + private final String fieldName; + + @Nonnull + private final byte cacheTypeId; + + public static final String TYPE_NAME = "spectatorHistogram"; + + @JsonCreator + public SpectatorHistogramAggregatorFactory( + @JsonProperty("name") final String name, + @JsonProperty("fieldName") final String fieldName + ) + { + this(name, fieldName, AggregatorUtil.SPECTATOR_HISTOGRAM_CACHE_TYPE_ID); + } + + public SpectatorHistogramAggregatorFactory( + final String name, + final String fieldName, + final byte cacheTypeId + ) + { + this.name = Objects.requireNonNull(name, "Must have a valid, non-null aggregator name"); + this.fieldName = Objects.requireNonNull(fieldName, "Parameter fieldName must be specified"); + this.cacheTypeId = cacheTypeId; + } + + + @Override + public byte[] getCacheKey() + { + return new CacheKeyBuilder( + cacheTypeId + ).appendString(fieldName).build(); + } + + + @Override + public Aggregator factorize(ColumnSelectorFactory metricFactory) + { + return new SpectatorHistogramAggregator(metricFactory.makeColumnValueSelector(fieldName)); + } + + @Override + public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) + { + return new SpectatorHistogramBufferAggregator(metricFactory.makeColumnValueSelector(fieldName)); + } + + // This is used when writing metrics to segment files to check whether the column is sorted. + // Since there is no sensible way really to compare histograms, compareTo always returns 1. + public static final Comparator COMPARATOR = (o, o1) -> { + if (o == null && o1 == null) { + return 0; + } else if (o != null && o1 == null) { + return -1; + } else if (o == null) { + return 1; + } + return Integer.compare(o.hashCode(), o1.hashCode()); + }; + + @Override + public Comparator getComparator() + { + return COMPARATOR; + } + + @Override + public Object combine(@Nullable Object lhs, @Nullable Object rhs) + { + if (lhs == null) { + return rhs; + } + if (rhs == null) { + return lhs; + } + SpectatorHistogram lhsHisto = (SpectatorHistogram) lhs; + SpectatorHistogram rhsHisto = (SpectatorHistogram) rhs; + lhsHisto.merge(rhsHisto); + return lhsHisto; + } + + @Override + public AggregatorFactory getCombiningFactory() + { + return new SpectatorHistogramAggregatorFactory(name, name); + } + + @Override + public AggregatorFactory getMergingFactory(AggregatorFactory other) throws AggregatorFactoryNotMergeableException + { + if (other.getName().equals(this.getName()) && this.getClass() == other.getClass()) { + return getCombiningFactory(); + } else { + throw new AggregatorFactoryNotMergeableException(this, other); + } + } + + @Override + public List getRequiredColumns() + { + return Collections.singletonList( + new SpectatorHistogramAggregatorFactory( + fieldName, + fieldName + ) + ); + } + + @Override + public Object deserialize(Object serializedHistogram) + { + return SpectatorHistogram.deserialize(serializedHistogram); + } + + @Nullable + @Override + public Object finalizeComputation(@Nullable Object object) + { + return object; + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @JsonProperty + public String getFieldName() + { + return fieldName; + } + + @Override + public List requiredFields() + { + return Collections.singletonList(fieldName); + } + + @Override + public String getComplexTypeName() + { + return TYPE_NAME; + } + + @Override + public ValueType getType() + { + return ValueType.COMPLEX; + } + + @Override + public ValueType getFinalizedType() + { + return ValueType.COMPLEX; + } + + @Override + public int getMaxIntermediateSize() + { + return SpectatorHistogram.getMaxIntermdiateHistogramSize(); + } + + @Override + public AggregateCombiner makeAggregateCombiner() + { + return new ObjectAggregateCombiner() + { + private SpectatorHistogram combined = null; + + @Override + public void reset(final ColumnValueSelector selector) + { + combined = null; + fold(selector); + } + + @Override + public void fold(final ColumnValueSelector selector) + { + SpectatorHistogram other = (SpectatorHistogram) selector.getObject(); + if (other == null) { + return; + } + if (combined == null) { + combined = new SpectatorHistogram(); + } + combined.merge(other); + } + + @Nullable + @Override + public SpectatorHistogram getObject() + { + return combined; + } + + @Override + public Class classOfObject() + { + return SpectatorHistogram.class; + } + }; + } + + @Override + public boolean equals(final Object o) + { + if (this == o) { + return true; + } + if (o == null || !getClass().equals(o.getClass())) { + return false; + } + final SpectatorHistogramAggregatorFactory that = (SpectatorHistogramAggregatorFactory) o; + + //TODO: samarth should we check for equality of contents in count arrays? + return Objects.equals(name, that.name) && + Objects.equals(fieldName, that.fieldName); + } + + @Override + public int hashCode() + { + return Objects.hash(name, fieldName); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "name=" + name + + ", fieldName=" + fieldName + + "}"; + } + + @JsonTypeName(SpectatorHistogramAggregatorFactory.Timer.TYPE_NAME) + public static class Timer extends SpectatorHistogramAggregatorFactory + { + public static final String TYPE_NAME = "spectatorHistogramTimer"; + + public Timer( + @JsonProperty("name") final String name, + @JsonProperty("fieldName") final String fieldName + ) + { + super(name, fieldName, AggregatorUtil.SPECTATOR_HISTOGRAM_TIMER_CACHE_TYPE_ID); + } + + public Timer(final String name, final String fieldName, final byte cacheTypeId) + { + super(name, fieldName, cacheTypeId); + } + + @Override + public String getComplexTypeName() + { + return TYPE_NAME; + } + + @Override + public AggregatorFactory getCombiningFactory() + { + return new SpectatorHistogramAggregatorFactory.Timer(getName(), getName()); + } + + @Override + public List getRequiredColumns() + { + return Collections.singletonList( + new SpectatorHistogramAggregatorFactory.Timer( + getFieldName(), + getFieldName() + ) + ); + } + } + + @JsonTypeName(SpectatorHistogramAggregatorFactory.Distribution.TYPE_NAME) + public static class Distribution extends SpectatorHistogramAggregatorFactory + { + public static final String TYPE_NAME = "spectatorHistogramDistribution"; + + public Distribution( + @JsonProperty("name") final String name, + @JsonProperty("fieldName") final String fieldName + ) + { + super(name, fieldName, AggregatorUtil.SPECTATOR_HISTOGRAM_DISTRIBUTION_CACHE_TYPE_ID); + } + + public Distribution(final String name, final String fieldName, final byte cacheTypeId) + { + super(name, fieldName, cacheTypeId); + } + + @Override + public String getComplexTypeName() + { + return TYPE_NAME; + } + + @Override + public AggregatorFactory getCombiningFactory() + { + return new SpectatorHistogramAggregatorFactory.Distribution(getName(), getName()); + } + + @Override + public List getRequiredColumns() + { + return Collections.singletonList( + new SpectatorHistogramAggregatorFactory.Distribution( + getFieldName(), + getFieldName() + ) + ); + } + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramBufferAggregator.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramBufferAggregator.java new file mode 100644 index 000000000000..f2a808d44d75 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramBufferAggregator.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.google.common.base.Preconditions; +import it.unimi.dsi.fastutil.ints.Int2ObjectMap; +import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.segment.ColumnValueSelector; + +import javax.annotation.Nonnull; +import java.nio.ByteBuffer; +import java.util.IdentityHashMap; + +/** + * Aggregator that builds Spectator Histograms over numeric values read from {@link ByteBuffer} + */ +public class SpectatorHistogramBufferAggregator implements BufferAggregator +{ + + @Nonnull + private final ColumnValueSelector selector; + private final IdentityHashMap> histogramCache = new IdentityHashMap<>(); + + public SpectatorHistogramBufferAggregator( + final ColumnValueSelector valueSelector + ) + { + Preconditions.checkNotNull(valueSelector); + this.selector = valueSelector; + } + + @Override + public void init(ByteBuffer buffer, int position) + { + SpectatorHistogram emptyCounts = new SpectatorHistogram(); + addToCache(buffer, position, emptyCounts); + } + + @Override + public void aggregate(ByteBuffer buffer, int position) + { + Object obj = selector.getObject(); + if (obj == null) { + return; + } + SpectatorHistogram counts = histogramCache.get(buffer).get(position); + if (obj instanceof SpectatorHistogram) { + SpectatorHistogram other = (SpectatorHistogram) obj; + counts.merge(other); + } else if (obj instanceof Number) { + counts.insert((Number) obj); + } else { + throw new IAE( + "Expected a number or a long[], but received [%s] of type [%s]", + obj, + obj.getClass() + ); + } + } + + @Override + public Object get(final ByteBuffer buffer, final int position) + { + // histogramCache is an IdentityHashMap where the reference of buffer is used for equality checks. + // So the returned object isn't impacted by the changes in the buffer object made by concurrent threads. + + SpectatorHistogram spectatorHistogram = histogramCache.get(buffer).get(position); + if (spectatorHistogram.isEmpty()) { + return null; + } + return spectatorHistogram; + } + + @Override + public float getFloat(final ByteBuffer buffer, final int position) + { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public long getLong(final ByteBuffer buffer, final int position) + { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public void close() + { + histogramCache.clear(); + } + + @Override + public void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer) + { + SpectatorHistogram histogram = histogramCache.get(oldBuffer).get(oldPosition); + addToCache(newBuffer, newPosition, histogram); + final Int2ObjectMap map = histogramCache.get(oldBuffer); + map.remove(oldPosition); + if (map.isEmpty()) { + histogramCache.remove(oldBuffer); + } + } + + private void addToCache(final ByteBuffer buffer, final int position, final SpectatorHistogram histogram) + { + Int2ObjectMap map = histogramCache.computeIfAbsent( + buffer, + b -> new Int2ObjectOpenHashMap<>() + ); + map.put(position, histogram); + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramColumnPartSupplier.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramColumnPartSupplier.java new file mode 100644 index 000000000000..4b4b218a037d --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramColumnPartSupplier.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.google.common.base.Supplier; +import org.apache.druid.segment.column.ComplexColumn; + +public class SpectatorHistogramColumnPartSupplier implements Supplier +{ + private final SpectatorHistogramIndexed complexType; + private final String typeName; + + public SpectatorHistogramColumnPartSupplier(final String typeName, final SpectatorHistogramIndexed complexType) + { + this.complexType = complexType; + this.typeName = typeName; + } + + @Override + public ComplexColumn get() + { + return new SpectatorHistogramIndexBasedComplexColumn(typeName, complexType); + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramComplexMetricSerde.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramComplexMetricSerde.java new file mode 100644 index 000000000000..ffad30dd81c0 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramComplexMetricSerde.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import org.apache.druid.data.input.InputRow; +import org.apache.druid.segment.GenericColumnSerializer; +import org.apache.druid.segment.column.ColumnBuilder; +import org.apache.druid.segment.data.ObjectStrategy; +import org.apache.druid.segment.serde.ComplexMetricExtractor; +import org.apache.druid.segment.serde.ComplexMetricSerde; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; + +import java.nio.ByteBuffer; + +public class SpectatorHistogramComplexMetricSerde extends ComplexMetricSerde +{ + private static final SpectatorHistogramObjectStrategy STRATEGY = new SpectatorHistogramObjectStrategy(); + private final String typeName; + + SpectatorHistogramComplexMetricSerde(String type) + { + this.typeName = type; + } + + @Override + public String getTypeName() + { + return typeName; + } + + @Override + public ComplexMetricExtractor getExtractor() + { + return new ComplexMetricExtractor() + { + @Override + public Class extractedClass() + { + return SpectatorHistogram.class; + } + + @Override + public Object extractValue(final InputRow inputRow, final String metricName) + { + final Object object = inputRow.getRaw(metricName); + if (object == null || object instanceof SpectatorHistogram || object instanceof Number) { + return object; + } + if (object instanceof String) { + String objectString = (String) object; + // Ignore empty values + if (objectString.trim().isEmpty()) { + return null; + } + // Treat as long number, if it looks like a number + if (Character.isDigit((objectString).charAt(0))) { + return Long.parseLong((String) object); + } + } + // Delegate all other interpretation to SpectatorHistogram + return SpectatorHistogram.deserialize(object); + } + }; + } + + @Override + public void deserializeColumn(ByteBuffer buffer, ColumnBuilder builder) + { + final SpectatorHistogramIndexed column = SpectatorHistogramIndexed.read( + buffer, + STRATEGY + ); + builder.setComplexColumnSupplier(new SpectatorHistogramColumnPartSupplier(this.typeName, column)); + } + + @Override + public ObjectStrategy getObjectStrategy() + { + return STRATEGY; + } + + @Override + public GenericColumnSerializer getSerializer(SegmentWriteOutMedium segmentWriteOutMedium, String column) + { + return SpectatorHistogramSerializer.create( + segmentWriteOutMedium, + column, + this.getObjectStrategy() + ); + } + +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramIndexBasedComplexColumn.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramIndexBasedComplexColumn.java new file mode 100644 index 000000000000..f1f0ffa53040 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramIndexBasedComplexColumn.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import org.apache.druid.segment.column.ComplexColumn; + +public class SpectatorHistogramIndexBasedComplexColumn implements ComplexColumn +{ + private final SpectatorHistogramIndexed index; + private final String typeName; + + public SpectatorHistogramIndexBasedComplexColumn(String typeName, SpectatorHistogramIndexed index) + { + this.index = index; + this.typeName = typeName; + } + + @Override + public Class getClazz() + { + return index.getClazz(); + } + + @Override + public String getTypeName() + { + return typeName; + } + + @Override + public Object getRowValue(int rowNum) + { + return index.get(rowNum); + } + + @Override + public int getLength() + { + return index.size(); + } + + @Override + public void close() + { + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramIndexed.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramIndexed.java new file mode 100644 index 000000000000..54b76bb05f32 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramIndexed.java @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.data.CloseableIndexed; +import org.apache.druid.segment.data.IndexedIterable; +import org.apache.druid.segment.data.ObjectStrategy; +import org.apache.druid.segment.serde.Serializer; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.nio.channels.WritableByteChannel; +import java.util.Iterator; + +/** + * A generic, flat storage mechanism. Use static SpectatorHistogramSerializer.create to construct. + * Supports efficient storage for sparse columns that contain lots of nulls. + *

+ * Storage Format: + *

+ * byte 1: version (0x1) + * byte 2: reserved flags + * bytes 3-6 =>; numBytesUsed for header and values + * bytes 7-some =>; header including count, bitmap of present values and offsets to values. + * bytes (header.serializedSize + 6)-(numBytesUsed + 6): bytes representing the values. If offset is null, then the value is null. + */ +public class SpectatorHistogramIndexed implements CloseableIndexed, Serializer +{ + static final byte VERSION_ONE = 0x1; + static final byte RESERVED_FLAGS = 0x0; + + public static SpectatorHistogramIndexed read(ByteBuffer buffer, ObjectStrategy strategy) + { + byte versionFromBuffer = buffer.get(); + + if (VERSION_ONE == versionFromBuffer) { + // Reserved flags, not currently used + buffer.get(); + int sizeOfOffsetsAndValues = buffer.getInt(); + ByteBuffer bufferToUse = buffer.slice(); + bufferToUse.limit(sizeOfOffsetsAndValues); + + buffer.position(buffer.position() + sizeOfOffsetsAndValues); + + return new SpectatorHistogramIndexed( + bufferToUse, + strategy + ); + } + throw new IAE("Unknown version[%d]", (int) versionFromBuffer); + } + + private final ObjectStrategy strategy; + private final int size; + private final NullableOffsetsHeader offsetsHeader; + private final ByteBuffer valueBuffer; + + private SpectatorHistogramIndexed( + ByteBuffer buffer, + ObjectStrategy strategy + ) + { + this.strategy = strategy; + offsetsHeader = NullableOffsetsHeader.read(buffer); + // Size is count of entries + size = offsetsHeader.size(); + // The rest of the buffer is the values + valueBuffer = buffer.slice(); + } + + /** + * Checks if {@code index} a valid `element index` in SpectatorHistogramIndexed. + * Similar to Preconditions.checkElementIndex() except this method throws {@link IAE} with custom error message. + *

+ * Used here to get existing behavior(same error message and exception) of V1 GenericIndexed. + * + * @param index index identifying an element of an SpectatorHistogramIndexed. + */ + private void checkIndex(int index) + { + if (index < 0) { + throw new IAE("Index[%s] < 0", index); + } + if (index >= size) { + throw new IAE("Index[%d] >= size[%d]", index, size); + } + } + + public Class getClazz() + { + return strategy.getClazz(); + } + + @Override + public int size() + { + return size; + } + + @Nullable + @Override + public SpectatorHistogram get(int index) + { + checkIndex(index); + + NullableOffsetsHeader.Offset offset = offsetsHeader.get(index); + if (offset == null) { + return null; + } + + ByteBuffer copyValueBuffer = valueBuffer.asReadOnlyBuffer(); + copyValueBuffer.position(offset.getStart()); + copyValueBuffer.limit(offset.getStart() + offset.getLength()); + + return strategy.fromByteBuffer(copyValueBuffer, offset.getLength()); + } + + @Override + public int indexOf(@Nullable SpectatorHistogram value) + { + throw new UnsupportedOperationException("Reverse lookup not allowed."); + } + + @Override + public Iterator iterator() + { + return IndexedIterable.create(this).iterator(); + } + + @Override + public long getSerializedSize() + { + throw new UnsupportedOperationException("Serialization not supported here"); + } + + @Override + public void writeTo(WritableByteChannel channel, FileSmoosher smoosher) + { + throw new UnsupportedOperationException("Serialization not supported here"); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("headerBuffer", offsetsHeader); + inspector.visit("firstValueBuffer", valueBuffer); + inspector.visit("strategy", strategy); + } + + @Override + public String toString() + { + return "SpectatorHistogramIndexed[" + "size: " + + size() + + " cardinality: " + + offsetsHeader.getCardinality() + + ']'; + } + + @Override + public void close() + { + // nothing to close + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramJsonSerializer.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramJsonSerializer.java new file mode 100644 index 000000000000..fb0a32b45024 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramJsonSerializer.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.SerializerProvider; + +import java.io.IOException; + +public class SpectatorHistogramJsonSerializer extends JsonSerializer +{ + @Override + public void serialize( + SpectatorHistogram spectatorHistogram, + JsonGenerator jsonGenerator, + SerializerProvider serializerProvider + ) throws IOException + { + spectatorHistogram.serialize(jsonGenerator, serializerProvider); + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramModule.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramModule.java new file mode 100644 index 000000000000..4c0a348560d9 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramModule.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.fasterxml.jackson.databind.Module; +import com.fasterxml.jackson.databind.jsontype.NamedType; +import com.fasterxml.jackson.databind.module.SimpleModule; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; +import com.google.inject.Binder; +import org.apache.druid.initialization.DruidModule; +import org.apache.druid.segment.serde.ComplexMetrics; + +import java.util.List; + +/** + * Module defining various aggregators for Spectator Histograms + */ +public class SpectatorHistogramModule implements DruidModule +{ + @VisibleForTesting + public static void registerSerde() + { + ComplexMetrics.registerSerde( + SpectatorHistogramAggregatorFactory.TYPE_NAME, + new SpectatorHistogramComplexMetricSerde(SpectatorHistogramAggregatorFactory.TYPE_NAME) + ); + ComplexMetrics.registerSerde( + SpectatorHistogramAggregatorFactory.Timer.TYPE_NAME, + new SpectatorHistogramComplexMetricSerde(SpectatorHistogramAggregatorFactory.Timer.TYPE_NAME) + ); + ComplexMetrics.registerSerde( + SpectatorHistogramAggregatorFactory.Distribution.TYPE_NAME, + new SpectatorHistogramComplexMetricSerde(SpectatorHistogramAggregatorFactory.Distribution.TYPE_NAME) + ); + } + + @Override + public List getJacksonModules() + { + return ImmutableList.of( + new SimpleModule( + getClass().getSimpleName() + ).registerSubtypes( + new NamedType( + SpectatorHistogramAggregatorFactory.class, + SpectatorHistogramAggregatorFactory.TYPE_NAME + ), + new NamedType( + SpectatorHistogramAggregatorFactory.Timer.class, + SpectatorHistogramAggregatorFactory.Timer.TYPE_NAME + ), + new NamedType( + SpectatorHistogramAggregatorFactory.Distribution.class, + SpectatorHistogramAggregatorFactory.Distribution.TYPE_NAME + ), + new NamedType( + SpectatorHistogramPercentilePostAggregator.class, + SpectatorHistogramPercentilePostAggregator.TYPE_NAME + ), + new NamedType( + SpectatorHistogramPercentilesPostAggregator.class, + SpectatorHistogramPercentilesPostAggregator.TYPE_NAME + ) + ).addSerializer(SpectatorHistogram.class, new SpectatorHistogramJsonSerializer()) + ); + } + + @Override + public void configure(Binder binder) + { + registerSerde(); + //TODO: samarth this probably needs to be added for sql + //SqlBindings.addAggregator(binder, SpectatorHistogramQuantileSqlAggregator.class); + //SqlBindings.addAggregator(binder, GenerateSpectatorHistogramSqlAggregator.class); + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramObjectStrategy.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramObjectStrategy.java new file mode 100644 index 000000000000..33b59bd6ad6d --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramObjectStrategy.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import org.apache.druid.segment.data.ObjectStrategy; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class SpectatorHistogramObjectStrategy implements ObjectStrategy +{ + private static final byte[] EMPTY_BYTES = null; + + @Override + public Class getClazz() + { + return SpectatorHistogram.class; + } + + @Override + public SpectatorHistogram fromByteBuffer(ByteBuffer readOnlyBuffer, int numBytes) + { + if (numBytes == 0) { + return null; + } + return SpectatorHistogram.fromByteBuffer(readOnlyBuffer); + } + + @Override + public byte[] toBytes(@Nullable SpectatorHistogram val) + { + if (val == null) { + return EMPTY_BYTES; + } + return val.toBytes(); + } + + @Override + public int compare(SpectatorHistogram o1, SpectatorHistogram o2) + { + return SpectatorHistogramAggregatorFactory.COMPARATOR.compare(o1, o2); + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramPercentilePostAggregator.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramPercentilePostAggregator.java new file mode 100644 index 000000000000..80854c57d460 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramPercentilePostAggregator.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.google.common.primitives.Doubles; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.post.PostAggregatorIds; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.column.ColumnType; + +import java.util.Comparator; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +public class SpectatorHistogramPercentilePostAggregator implements PostAggregator +{ + + private final String name; + private final PostAggregator field; + + private final double percentile; + + public static final String TYPE_NAME = "percentileSpectatorHistogram"; + + @JsonCreator + public SpectatorHistogramPercentilePostAggregator( + @JsonProperty("name") final String name, + @JsonProperty("field") final PostAggregator field, + @JsonProperty("percentile") final double percentile + ) + { + this.name = Preconditions.checkNotNull(name, "name is null"); + this.field = Preconditions.checkNotNull(field, "field is null"); + Preconditions.checkArgument( + percentile >= 0 && percentile <= 100, + "Percentile argument not in range (0, 100)" + ); + this.percentile = percentile; + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @Override + public ColumnType getType(ColumnInspector signature) + { + return ColumnType.DOUBLE; + } + + @JsonProperty + public PostAggregator getField() + { + return field; + } + + @JsonProperty + public double getPercentile() + { + return percentile; + } + + @Override + public Object compute(final Map combinedAggregators) + { + final SpectatorHistogram sketch = (SpectatorHistogram) field.compute(combinedAggregators); + return sketch.getPercentileValue(percentile); + } + + @Override + public Comparator getComparator() + { + return Doubles::compare; + } + + @Override + public Set getDependentFields() + { + return field.getDependentFields(); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "name='" + name + '\'' + + ", field=" + field + + ", fraction=" + percentile + + "}"; + } + + @Override + public byte[] getCacheKey() + { + final CacheKeyBuilder builder = new CacheKeyBuilder( + PostAggregatorIds.SPECTATOR_HISTOGRAM_SKETCH_PERCENTILE_CACHE_TYPE_ID).appendCacheable(field); + builder.appendDouble(percentile); + return builder.build(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + SpectatorHistogramPercentilePostAggregator that = (SpectatorHistogramPercentilePostAggregator) o; + return Double.compare(that.percentile, percentile) == 0 && + Objects.equals(name, that.name) && + Objects.equals(field, that.field); + } + + @Override + public int hashCode() + { + return Objects.hash(name, field, percentile); + } + + @Override + public PostAggregator decorate(final Map map) + { + return this; + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramPercentilesPostAggregator.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramPercentilesPostAggregator.java new file mode 100644 index 000000000000..11ce9e0d9bd4 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramPercentilesPostAggregator.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.google.common.primitives.Doubles; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.post.PostAggregatorIds; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.column.ColumnType; + +import java.util.Arrays; +import java.util.Comparator; +import java.util.Map; +import java.util.Set; + +public class SpectatorHistogramPercentilesPostAggregator implements PostAggregator +{ + private final String name; + private final PostAggregator field; + + private final double[] percentiles; + + public static final String TYPE_NAME = "percentilesSpectatorHistogram"; + + @JsonCreator + public SpectatorHistogramPercentilesPostAggregator( + @JsonProperty("name") final String name, + @JsonProperty("field") final PostAggregator field, + @JsonProperty("percentiles") final double[] percentiles + ) + { + this.name = Preconditions.checkNotNull(name, "name is null"); + this.field = Preconditions.checkNotNull(field, "field is null"); + this.percentiles = Preconditions.checkNotNull(percentiles, "array of fractions is null"); + Preconditions.checkArgument(this.percentiles.length >= 1, "Array of percentiles cannot " + + "be empty"); + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @Override + public ColumnType getType(ColumnInspector signature) + { + return ColumnType.DOUBLE_ARRAY; + } + + @JsonProperty + public PostAggregator getField() + { + return field; + } + + @JsonProperty + public double[] getPercentiles() + { + return percentiles; + } + + @Override + public Object compute(final Map combinedAggregators) + { + final SpectatorHistogram sketch = (SpectatorHistogram) field.compute(combinedAggregators); + return sketch.getPercentileValues(percentiles); + } + + @Override + public Comparator getComparator() + { + return Doubles::compare; + } + + @Override + public Set getDependentFields() + { + return field.getDependentFields(); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "name='" + name + '\'' + + ", field=" + field + + ", percentiles=" + Arrays.toString(percentiles) + + "}"; + } + + @Override + public byte[] getCacheKey() + { + final CacheKeyBuilder builder = new CacheKeyBuilder( + PostAggregatorIds.SPECTATOR_HISTOGRAM_SKETCH_PERCENTILES_CACHE_TYPE_ID).appendCacheable(field); + for (final double value : percentiles) { + builder.appendDouble(value); + } + return builder.build(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + final SpectatorHistogramPercentilesPostAggregator that = (SpectatorHistogramPercentilesPostAggregator) o; + if (!name.equals(that.name)) { + return false; + } + if (!Arrays.equals(percentiles, that.percentiles)) { + return false; + } + return field.equals(that.field); + } + + @Override + public int hashCode() + { + return (name.hashCode() * 31 + field.hashCode()) * 31 + Arrays.hashCode(percentiles); + } + + @Override + public PostAggregator decorate(final Map map) + { + return this; + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramSerializer.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramSerializer.java new file mode 100644 index 000000000000..2e4608fee788 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramSerializer.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.google.common.primitives.Ints; +import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.GenericColumnSerializer; +import org.apache.druid.segment.data.ColumnCapacityExceededException; +import org.apache.druid.segment.data.ObjectStrategy; +import org.apache.druid.segment.serde.MetaSerdeHelper; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; +import org.apache.druid.segment.writeout.WriteOutBytes; + +import java.io.IOException; +import java.nio.channels.WritableByteChannel; + +public class SpectatorHistogramSerializer implements GenericColumnSerializer +{ + private static final MetaSerdeHelper META_SERDE_HELPER = MetaSerdeHelper + .firstWriteByte((SpectatorHistogramSerializer x) -> SpectatorHistogramIndexed.VERSION_ONE) + .writeByte(x -> SpectatorHistogramIndexed.RESERVED_FLAGS) + // numBytesUsed field is header + values (i.e. all bytes _after_ this) + .writeInt(x -> Ints.checkedCast(x.offsetsHeader.getSerializedSize() + x.valuesBuffer.size())); + + public static SpectatorHistogramSerializer create( + SegmentWriteOutMedium segmentWriteOutMedium, + String columnName, + ObjectStrategy strategy + ) + { + return new SpectatorHistogramSerializer( + columnName, + segmentWriteOutMedium, + strategy + ); + } + + private final String columnName; + private final SegmentWriteOutMedium segmentWriteOutMedium; + private final ObjectStrategy objectStrategy; + private NullableOffsetsHeader offsetsHeader; + private WriteOutBytes valuesBuffer; + + private int rowCount = 0; + + private SpectatorHistogramSerializer( + String columnName, + SegmentWriteOutMedium segmentWriteOutMedium, + ObjectStrategy strategy + ) + { + this.columnName = columnName; + this.segmentWriteOutMedium = segmentWriteOutMedium; + this.objectStrategy = strategy; + } + + @Override + public void open() throws IOException + { + this.offsetsHeader = NullableOffsetsHeader.create(segmentWriteOutMedium); + this.valuesBuffer = segmentWriteOutMedium.makeWriteOutBytes(); + } + + @Override + public void serialize(ColumnValueSelector selector) throws IOException + { + rowCount++; + if (rowCount < 0) { + throw new ColumnCapacityExceededException(columnName); + } + Object value = selector.getObject(); + if (value == null) { + offsetsHeader.writeNull(); + } else { + objectStrategy.writeTo((SpectatorHistogram) value, valuesBuffer); + offsetsHeader.writeOffset(Ints.checkedCast(valuesBuffer.size())); + } + } + + @Override + public long getSerializedSize() + { + // Meta header, Offsets, Values + return META_SERDE_HELPER.size(this) + offsetsHeader.getSerializedSize() + valuesBuffer.size(); + } + + @Override + public void writeTo(WritableByteChannel channel, FileSmoosher smoosher) throws IOException + { + META_SERDE_HELPER.writeTo(channel, this); + offsetsHeader.writeTo(channel, null); + valuesBuffer.writeTo(channel); + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule b/extensions-contrib/spectator-histogram/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule new file mode 100644 index 000000000000..f158b84da3f9 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.druid.spectator.histogram.SpectatorHistogramModule diff --git a/extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/NullableOffsetsHeaderTest.java b/extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/NullableOffsetsHeaderTest.java new file mode 100644 index 000000000000..add0d88efceb --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/NullableOffsetsHeaderTest.java @@ -0,0 +1,441 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium; +import org.junit.Assert; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.nio.channels.WritableByteChannel; +import java.util.Arrays; +import java.util.List; + +public class NullableOffsetsHeaderTest +{ + @Test + public void testShouldAcceptNullWrites() throws IOException + { + NullableOffsetsHeader header = NullableOffsetsHeader.create(new OnHeapMemorySegmentWriteOutMedium()); + header.writeNull(); + header.writeNull(); + header.writeNull(); + + Assert.assertEquals("Size should be count of entries", 3, header.size()); + + header = serde(header); + Assert.assertEquals("Size should be count of entries", 3, header.size()); + + Assert.assertNull("Should return null for null entries by index", header.get(0)); + Assert.assertNull("Should return null for null entries by index", header.get(1)); + Assert.assertNull("Should return null for null entries by index", header.get(2)); + } + + @Test + public void testShouldAcceptOffsetWrites() throws IOException + { + NullableOffsetsHeader header = NullableOffsetsHeader.create(new OnHeapMemorySegmentWriteOutMedium()); + header.writeOffset(123); + header.writeOffset(456); + + Assert.assertEquals("Size should be count of entries", 2, header.size()); + + header = serde(header); + Assert.assertEquals("Size should be count of entries", 2, header.size()); + + Assert.assertNotNull("Should flag nulls by index", header.get(0)); + Assert.assertNotNull("Should flag nulls by index", header.get(1)); + + Assert.assertEquals("Should return value for entries by index", 0, header.get(0).getStart()); + Assert.assertEquals("Should return value for entries by index", 123, header.get(0).getEnd()); + Assert.assertEquals("Should return value for entries by index", 123, header.get(1).getStart()); + Assert.assertEquals("Should return value for entries by index", 456, header.get(1).getEnd()); + } + + @Test + public void testShouldAcceptMixedWrites() throws IOException + { + NullableOffsetsHeader header = NullableOffsetsHeader.create(new OnHeapMemorySegmentWriteOutMedium()); + header.writeOffset(123); + header.writeNull(); + header.writeNull(); + header.writeOffset(456); + header.writeOffset(789); + header.writeNull(); + + Assert.assertEquals("Size should be count of entries", 6, header.size()); + + header = serde(header); + Assert.assertEquals("Size should be count of entries", 6, header.size()); + + Assert.assertNotNull("Should flag nulls by index", header.get(0)); + Assert.assertNull("Should flag nulls by index", header.get(1)); + Assert.assertNull("Should flag nulls by index", header.get(2)); + Assert.assertNotNull("Should flag nulls by index", header.get(3)); + Assert.assertNotNull("Should flag nulls by index", header.get(4)); + Assert.assertNull("Should flag nulls by index", header.get(5)); + + Assert.assertEquals("Should return value for entries by index", 0, header.get(0).getStart()); + Assert.assertEquals("Should return value for entries by index", 123, header.get(0).getEnd()); + Assert.assertEquals("Should return value for entries by index", 123, header.get(3).getStart()); + Assert.assertEquals("Should return value for entries by index", 456, header.get(3).getEnd()); + Assert.assertEquals("Should return value for entries by index", 456, header.get(4).getStart()); + Assert.assertEquals("Should return value for entries by index", 789, header.get(4).getEnd()); + } + + @Test + public void testGiveAccessToOffsets() throws IOException + { + NullableOffsetsHeader header = NullableOffsetsHeader.create(new OnHeapMemorySegmentWriteOutMedium()); + header.writeOffset(123); + header.writeNull(); + header.writeNull(); + header.writeOffset(456); + header.writeOffset(789); + header.writeNull(); + + header = serde(header); + + Assert.assertNull("Should return null for 6", header.get(6)); + + Assert.assertNull("Should return null for 5", header.get(5)); + + Assert.assertEquals("Offset at 4", 789, header.get(4).getEnd()); + Assert.assertEquals("Offset prior to 4", 456, header.get(4).getStart()); + + Assert.assertEquals("Offset at 3", 456, header.get(3).getEnd()); + Assert.assertEquals("Offset prior to 3", 123, header.get(3).getStart()); + + Assert.assertNull("Should return null for 2", header.get(2)); + + Assert.assertNull("Should return null for 1", header.get(1)); + + Assert.assertEquals("Offset at 0", 123, header.get(0).getEnd()); + Assert.assertEquals("Offset prior to 0", 0, header.get(0).getStart()); + } + + @Test + public void testGiveAccessToSingleOffsetNulls() throws IOException + { + NullableOffsetsHeader header = NullableOffsetsHeader.create(new OnHeapMemorySegmentWriteOutMedium()); + header.writeNull(); + header.writeOffset(123); + header.writeNull(); + header.writeNull(); + header.writeNull(); + + header = serde(header); + + Assert.assertEquals("Offset at 1", 123, header.get(1).getEnd()); + Assert.assertEquals("Offset prior to 1", 0, header.get(1).getStart()); + + Assert.assertNull("Nulls for anything not set", header.get(0)); + Assert.assertNull("Nulls for anything not set", header.get(-1)); + Assert.assertNull("Nulls for anything not set", header.get(3)); + Assert.assertNull("Nulls for anything not set", header.get(100)); + } + + @Test + public void testShouldSerializeAndDeserialize() throws IOException + { + NullableOffsetsHeader header = NullableOffsetsHeader.create(new OnHeapMemorySegmentWriteOutMedium()); + header.writeOffset(123); + header.writeNull(); + header.writeNull(); + header.writeOffset(456); + header.writeOffset(789); + header.writeNull(); + + // Length + BitmapLength + Bitmap + Offsets + // 4 + 4 + 1 + 12 = 21 bytes + Assert.assertEquals("Serialized size should be minimal", 21, header.getSerializedSize()); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final WritableByteChannel channel = Channels.newChannel(baos); + header.writeTo(channel, null); + channel.close(); + + final ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray()); + Assert.assertEquals( + "Reported size and actual size should match", + header.getSerializedSize(), + byteBuffer.remaining() + ); + + NullableOffsetsHeader deserialized = NullableOffsetsHeader.read(byteBuffer); + Assert.assertEquals(0, byteBuffer.remaining()); + + Assert.assertEquals("Deserialized should match pre-serialized size", header.size(), deserialized.size()); + + // Nulls should return the previous offset + List expected = Arrays.asList( + new NullableOffsetsHeader.Offset(0, 123), + null, + null, + new NullableOffsetsHeader.Offset(123, 456), + new NullableOffsetsHeader.Offset(456, 789), + null + ); + + for (int i = 0; i < header.size(); i++) { + Assert.assertEquals("Deserialized should match pre-serialized values", expected.get(i), deserialized.get(i)); + } + } + + @Test + public void testShouldSerializeAndDeserializeAllNulls() throws IOException + { + NullableOffsetsHeader header = NullableOffsetsHeader.create(new OnHeapMemorySegmentWriteOutMedium()); + for (int i = 0; i < 10000; i++) { + header.writeNull(); + } + + // Length + BitmapLength + Bitmap + Offsets + // 4 + 4 + 0 + 0 = 8 bytes + Assert.assertEquals("Serialized size should be minimal", 8, header.getSerializedSize()); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final WritableByteChannel channel = Channels.newChannel(baos); + header.writeTo(channel, null); + channel.close(); + + final ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray()); + Assert.assertEquals( + "Reported size and actual size should match", + header.getSerializedSize(), + byteBuffer.remaining() + ); + + NullableOffsetsHeader deserialized = NullableOffsetsHeader.read(byteBuffer); + Assert.assertEquals(0, byteBuffer.remaining()); + + Assert.assertEquals("Deserialized should match pre-serialized size", header.size(), deserialized.size()); + + for (int i = 0; i < header.size(); i++) { + Assert.assertNull("Deserialized should be null", deserialized.get(i)); + } + } + + @Test + public void testShouldSerializeAndDeserializeAllValues() throws IOException + { + NullableOffsetsHeader header = NullableOffsetsHeader.create(new OnHeapMemorySegmentWriteOutMedium()); + for (int i = 0; i < 10000; i++) { + header.writeOffset(i + 1); + } + + // Length + BitmapLength + Bitmap + Offsets + // 4 + 4 + 0 + 40000 = 40008 bytes + // Bitmap is omitted if all values are set + Assert.assertEquals("Serialized size should be minimal", 40008, header.getSerializedSize()); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final WritableByteChannel channel = Channels.newChannel(baos); + header.writeTo(channel, null); + channel.close(); + + final ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray()); + Assert.assertEquals( + "Reported size and actual size should match", + header.getSerializedSize(), + byteBuffer.remaining() + ); + + NullableOffsetsHeader deserialized = NullableOffsetsHeader.read(byteBuffer); + Assert.assertEquals(0, byteBuffer.remaining()); + + Assert.assertEquals("Deserialized should match pre-serialized size", header.size(), deserialized.size()); + + for (int i = 0; i < header.size(); i++) { + Assert.assertNotNull("Deserialized should be set " + i, deserialized.get(i)); + Assert.assertEquals("Deserialized should match pre-serialized nulls " + i, i + 1, deserialized.get(i).getEnd()); + } + } + + @Test + public void testShouldFindOffsetFromIndexSingleWord() throws IOException + { + // Should return the exact index of the offset to read, or negative if not present + List expectedOffsetIndexes = ImmutableList.of(15, 21, 30, 31); + NullableOffsetsHeader header = createHeaderWithIndexesSet(expectedOffsetIndexes); + Assert.assertEquals("Size should be count of entries", 32, header.size()); + header = serde(header); + + for (int i = 0; i < header.size(); i++) { + int offsetIndex = header.getOffsetIndex(i); + int expected = expectedOffsetIndexes.indexOf(i); + Assert.assertEquals("Offset " + i, expected, offsetIndex); + } + } + + @Test + public void testShouldFindOffsetFromIndexMultipleWords() throws IOException + { + // Should return the exact index of the offset to read, or negative if not present + List expectedOffsetIndexes = ImmutableList.of(15, 21, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 70, 100); + NullableOffsetsHeader header = createHeaderWithIndexesSet(expectedOffsetIndexes); + Assert.assertEquals("Size should be count of entries", 101, header.size()); + header = serde(header); + + for (int i = 0; i < header.size(); i++) { + int offsetIndex = header.getOffsetIndex(i); + int expected = expectedOffsetIndexes.indexOf(i); + Assert.assertEquals("Offset " + i, expected, offsetIndex); + } + } + + @Test + public void testShouldFindOffsetFromIndexFull() throws IOException + { + // For a full header, the bitset is omitted. + // The expected index, is the queried index. + final int size = 500; + NullableOffsetsHeader header = NullableOffsetsHeader.create(new OnHeapMemorySegmentWriteOutMedium()); + for (int i = 0; i < size; i++) { + header.writeOffset(i + 1); + } + Assert.assertEquals("Size should be count of entries", size, header.size()); + header = serde(header); + + for (int i = 0; i < size; i++) { + int offsetIndex = header.getOffsetIndex(i); + Assert.assertEquals("Offset " + i, i, offsetIndex); + } + } + + @Test + public void testShouldFindOffsetFromIndexEmpty() throws IOException + { + // For an empty header, the bitset is omitted. + // The expected index, is always -1. + final int size = 500; + NullableOffsetsHeader header = NullableOffsetsHeader.create(new OnHeapMemorySegmentWriteOutMedium()); + for (int i = 0; i < size; i++) { + header.writeNull(); + } + Assert.assertEquals("Size should be count of entries", size, header.size()); + header = serde(header); + + for (int i = 0; i < size; i++) { + int offsetIndex = header.getOffsetIndex(i); + Assert.assertEquals("Offset " + i, -1, offsetIndex); + } + } + + @Test + public void testShouldWorkWithBitsSetAfter64bitBoundary() throws IOException + { + List expectedOffsetIndexes = ImmutableList.of(0, 1, 2, 3, 4, 256, 257); + NullableOffsetsHeader header = createHeaderWithIndexesSet(expectedOffsetIndexes); + Assert.assertEquals("Size should be count of entries", 258, header.size()); + header = serde(header); + Assert.assertEquals("Size should be count of entries", 258, header.size()); + Assert.assertEquals("Cardinality should be count of non-nulls", expectedOffsetIndexes.size(), header.getCardinality()); + + for (int i = 0; i < header.size(); i++) { + int offsetIndex = header.getOffsetIndex(i); + int expectedOffset = expectedOffsetIndexes.indexOf(i); + Assert.assertEquals("Offset " + i, expectedOffset, offsetIndex); + + NullableOffsetsHeader.Offset offset = header.get(i); + if (expectedOffset < 0) { + Assert.assertNull("Null Offset " + i, offset); + } else { + int expectedOffsetStart = expectedOffset; + int expectedOffsetEnd = expectedOffset + 1; + Assert.assertEquals("Offset Start " + i, expectedOffsetStart, offset.getStart()); + Assert.assertEquals("Offset End " + i, expectedOffsetEnd, offset.getEnd()); + Assert.assertEquals("Offset Length " + i, 1, offset.getLength()); + } + } + } + + @Test + public void testShouldWorkOnLongByteBoundaries() throws IOException + { + for (int x = 1; x < 24; x++) { + int boundary = ((int) Math.pow(2, x)) - 1; + List expectedOffsetIndexes = ImmutableList.of(boundary - 1); + NullableOffsetsHeader header = createHeaderWithIndexesSet(expectedOffsetIndexes); + Assert.assertEquals("Size should be count of entries", boundary, header.size()); + header = serde(header); + Assert.assertEquals("Size should be count of entries", boundary, header.size()); + Assert.assertEquals( + "Cardinality should be count of non-nulls", + expectedOffsetIndexes.size(), + header.getCardinality() + ); + + for (int i = 0; i < header.size(); i++) { + int offsetIndex = header.getOffsetIndex(i); + int expectedOffset = expectedOffsetIndexes.indexOf(i); + Assert.assertEquals("Offset " + i, expectedOffset, offsetIndex); + + NullableOffsetsHeader.Offset offset = header.get(i); + if (expectedOffset < 0) { + Assert.assertNull("Null Offset " + i, offset); + } else { + int expectedOffsetStart = expectedOffset; + int expectedOffsetEnd = expectedOffset + 1; + Assert.assertEquals("Offset Start " + i, expectedOffsetStart, offset.getStart()); + Assert.assertEquals("Offset End " + i, expectedOffsetEnd, offset.getEnd()); + Assert.assertEquals("Offset Length " + i, 1, offset.getLength()); + } + } + } + } + + /** + * Test helper to serialize and deserialize a NullableOffsetsHeader + * + * @param in The NullableOffsetsHeader to serialize + * @return The deserialized representation of in. + */ + NullableOffsetsHeader serde(NullableOffsetsHeader in) throws IOException + { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final WritableByteChannel channel = Channels.newChannel(baos); + in.writeTo(channel, null); + channel.close(); + + final ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray()); + return NullableOffsetsHeader.read(byteBuffer); + } + + /** + * Helper to make a header with the provided indexes set + */ + NullableOffsetsHeader createHeaderWithIndexesSet(List indexes) throws IOException + { + NullableOffsetsHeader header = NullableOffsetsHeader.create(new OnHeapMemorySegmentWriteOutMedium()); + int offset = 1; + for (Integer idx : indexes) { + while (header.size() < idx) { + header.writeNull(); + } + header.writeOffset(offset++); + } + return header; + } +} diff --git a/extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorTest.java b/extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorTest.java new file mode 100644 index 000000000000..1c30cfc05c36 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorTest.java @@ -0,0 +1,733 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.netflix.spectator.api.histogram.PercentileBuckets; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.query.Druids; +import org.apache.druid.query.QueryPlus; +import org.apache.druid.query.QueryRunner; +import org.apache.druid.query.QueryRunnerTestHelper; +import org.apache.druid.query.Result; +import org.apache.druid.query.aggregation.AggregationTestHelper; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.AggregatorUtil; +import org.apache.druid.query.groupby.GroupByQueryConfig; +import org.apache.druid.query.groupby.GroupByQueryRunnerTest; +import org.apache.druid.query.groupby.ResultRow; +import org.apache.druid.query.metadata.SegmentMetadataQueryConfig; +import org.apache.druid.query.metadata.SegmentMetadataQueryQueryToolChest; +import org.apache.druid.query.metadata.SegmentMetadataQueryRunnerFactory; +import org.apache.druid.query.metadata.metadata.ColumnAnalysis; +import org.apache.druid.query.metadata.metadata.SegmentAnalysis; +import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery; +import org.apache.druid.query.timeseries.TimeseriesResultValue; +import org.apache.druid.segment.IndexIO; +import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.segment.QueryableIndexSegment; +import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.column.ColumnConfig; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.apache.druid.timeline.SegmentId; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.File; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +@RunWith(Parameterized.class) +public class SpectatorHistogramAggregatorTest extends InitializedNullHandlingTest +{ + public static final String INPUT_DATA_PARSE_SPEC = String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"product\"],", + " \"dimensionExclusions\": [],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"product\", \"cost\"]", + " }", + "}" + ); + @Rule + public final TemporaryFolder tempFolder = new TemporaryFolder(); + + private static final SegmentMetadataQueryRunnerFactory METADATA_QR_FACTORY = new SegmentMetadataQueryRunnerFactory( + new SegmentMetadataQueryQueryToolChest(new SegmentMetadataQueryConfig()), + QueryRunnerTestHelper.NOOP_QUERYWATCHER + ); + private static final Map EXPECTED_HISTOGRAMS = new HashMap<>(); + + static { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 1L); + EXPECTED_HISTOGRAMS.put("A", histogram); + + histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(30 + 40 + 40 + 40 + 50 + 50), 1L); + EXPECTED_HISTOGRAMS.put("B", histogram); + + histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(50 + 20000), 1L); + EXPECTED_HISTOGRAMS.put("C", histogram); + } + + private final AggregationTestHelper helper; + private final AggregationTestHelper timeSeriesHelper; + + public SpectatorHistogramAggregatorTest(final GroupByQueryConfig config) + { + SpectatorHistogramModule.registerSerde(); + SpectatorHistogramModule module = new SpectatorHistogramModule(); + helper = AggregationTestHelper.createGroupByQueryAggregationTestHelper( + module.getJacksonModules(), config, tempFolder); + timeSeriesHelper = AggregationTestHelper.createTimeseriesQueryAggregationTestHelper( + module.getJacksonModules(), + tempFolder + ); + } + + @Parameterized.Parameters(name = "{0}") + public static Collection constructorFeeder() + { + final List constructors = new ArrayList<>(); + for (GroupByQueryConfig config : GroupByQueryRunnerTest.testConfigs()) { + constructors.add(new Object[]{config}); + } + return constructors; + } + + // this is to test Json properties and equals + @Test + public void serializeDeserializeFactoryWithFieldName() throws Exception + { + ObjectMapper objectMapper = new DefaultObjectMapper(); + new SpectatorHistogramModule().getJacksonModules().forEach(objectMapper::registerModule); + SpectatorHistogramAggregatorFactory factory = new SpectatorHistogramAggregatorFactory( + "name", + "filedName", + AggregatorUtil.SPECTATOR_HISTOGRAM_CACHE_TYPE_ID + ); + AggregatorFactory other = objectMapper.readValue( + objectMapper.writeValueAsString(factory), + AggregatorFactory.class + ); + + Assert.assertEquals(factory, other); + } + + @Test + public void testBuildingHistogramQueryTime() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"longSum\", \"name\": \"cost_sum\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [\"product\"],", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"cost_histogram\", \"fieldName\": " + + "\"cost_sum\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + List results = seq.toList(); + assertResultsMatch(results, 0, "A"); + assertResultsMatch(results, 1, "B"); + assertResultsMatch(results, 2, "C"); + } + + @Test + public void testBuildingAndMergingHistograms() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimenions\": [],", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + SpectatorHistogram expected = new SpectatorHistogram(); + expected.add(PercentileBuckets.indexOf(10), 1L); + expected.add(PercentileBuckets.indexOf(30), 1L); + expected.add(PercentileBuckets.indexOf(40), 3L); + expected.add(PercentileBuckets.indexOf(50), 3L); + expected.add(PercentileBuckets.indexOf(20000), 1L); + + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + Assert.assertEquals(expected, results.get(0).get(0)); + } + + @Test + public void testBuildingAndMergingHistogramsTimeseriesQuery() throws Exception + { + Object rawseq = timeSeriesHelper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"timeseries\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + SpectatorHistogram expected = new SpectatorHistogram(); + expected.add(PercentileBuckets.indexOf(10), 1L); + expected.add(PercentileBuckets.indexOf(30), 1L); + expected.add(PercentileBuckets.indexOf(40), 3L); + expected.add(PercentileBuckets.indexOf(50), 3L); + expected.add(PercentileBuckets.indexOf(20000), 1L); + + Sequence> seq = (Sequence>) rawseq; + List> results = seq.toList(); + Assert.assertEquals(1, results.size()); + SpectatorHistogram value = (SpectatorHistogram) results.get(0).getValue().getMetric("merged_cost_histogram"); + Assert.assertEquals(expected, value); + } + + @Test + public void testBuildingAndMergingGroupbyHistograms() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [\"product\"],", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + + List results = seq.toList(); + Assert.assertEquals(6, results.size()); + + SpectatorHistogram expectedA = new SpectatorHistogram(); + expectedA.add(PercentileBuckets.indexOf(10), 1L); + Assert.assertEquals(expectedA, results.get(0).get(1)); + + SpectatorHistogram expectedB = new SpectatorHistogram(); + expectedB.add(PercentileBuckets.indexOf(30), 1L); + expectedB.add(PercentileBuckets.indexOf(40), 3L); + expectedB.add(PercentileBuckets.indexOf(50), 2L); + Assert.assertEquals(expectedB, results.get(1).get(1)); + + SpectatorHistogram expectedC = new SpectatorHistogram(); + expectedC.add(PercentileBuckets.indexOf(50), 1L); + expectedC.add(PercentileBuckets.indexOf(20000), 1L); + Assert.assertEquals(expectedC, results.get(2).get(1)); + + Assert.assertNull(results.get(3).get(1)); + Assert.assertNull(results.get(4).get(1)); + Assert.assertNull(results.get(5).get(1)); + } + + @Test + public void testBuildingAndCountingHistograms() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimenions\": [],", + " \"aggregations\": [", + " {\"type\": \"longSum\", \"name\": \"count_histogram\", \"fieldName\": " + + "\"histogram\"},", + " {\"type\": \"doubleSum\", \"name\": \"double_count_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + // Check longSum + Assert.assertEquals(9L, results.get(0).get(0)); + // Check doubleSum + Assert.assertEquals(9.0, (Double) results.get(0).get(1), 0.001); + } + + @Test + public void testBuildingAndCountingHistogramsWithNullFilter() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimenions\": [],", + " \"aggregations\": [", + " {\"type\": \"longSum\", \"name\": \"count_histogram\", \"fieldName\": " + + "\"histogram\"},", + " {\"type\": \"doubleSum\", \"name\": \"double_count_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"],", + " \"filter\": {\n", + " \"fields\": [\n", + " {\n", + " \"field\": {\n", + " \"dimension\": \"histogram\",\n", + " \"value\": \"0\",\n", + " \"type\": \"selector\"\n", + " },\n", + " \"type\": \"not\"\n", + " },\n", + " {\n", + " \"field\": {\n", + " \"dimension\": \"histogram\",\n", + " \"value\": \"\",\n", + " \"type\": \"selector\"\n", + " },\n", + " \"type\": \"not\"\n", + " }\n", + " ],\n", + " \"type\": \"and\"\n", + " }", + "}" + ) + ); + + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + // Check longSum + Assert.assertEquals(9L, results.get(0).get(0)); + // Check doubleSum + Assert.assertEquals(9.0, (Double) results.get(0).get(1), 0.001); + } + + @Test + public void testIngestAsHistogramDistribution() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogramDistribution\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimenions\": [],", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + SpectatorHistogram expected = new SpectatorHistogram(); + expected.add(PercentileBuckets.indexOf(10), 1L); + expected.add(PercentileBuckets.indexOf(30), 1L); + expected.add(PercentileBuckets.indexOf(40), 3L); + expected.add(PercentileBuckets.indexOf(50), 3L); + expected.add(PercentileBuckets.indexOf(20000), 1L); + + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + Assert.assertEquals(expected, results.get(0).get(0)); + } + + @Test + public void testIngestHistogramsTimer() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogramTimer\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimenions\": [],", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + SpectatorHistogram expected = new SpectatorHistogram(); + expected.add(PercentileBuckets.indexOf(10), 1L); + expected.add(PercentileBuckets.indexOf(30), 1L); + expected.add(PercentileBuckets.indexOf(40), 3L); + expected.add(PercentileBuckets.indexOf(50), 3L); + expected.add(PercentileBuckets.indexOf(20000), 1L); + + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + Assert.assertEquals(expected, results.get(0).get(0)); + } + + @Test + public void testIngestingPreaggregatedHistograms() throws Exception + { + Object rawseq = timeSeriesHelper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("pre_agg_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"timeseries\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + SpectatorHistogram expected = new SpectatorHistogram(); + expected.add(PercentileBuckets.indexOf(10), 1L); + expected.add(PercentileBuckets.indexOf(30), 1L); + expected.add(PercentileBuckets.indexOf(40), 3L); + expected.add(PercentileBuckets.indexOf(50), 3L); + expected.add(PercentileBuckets.indexOf(20000), 1L); + + Sequence> seq = (Sequence>) rawseq; + List> results = seq.toList(); + Assert.assertEquals(1, results.size()); + SpectatorHistogram value = (SpectatorHistogram) results.get(0).getValue().getMetric("merged_cost_histogram"); + Assert.assertEquals(expected, value); + } + + @Test + public void testMetadataQueryTimer() throws Exception + { + File segmentDir = tempFolder.newFolder(); + helper.createIndex( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogramTimer\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + segmentDir, + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + true + ); + + ObjectMapper mapper = (ObjectMapper) TestHelper.makeJsonMapper(); + SpectatorHistogramModule module = new SpectatorHistogramModule(); + module.getJacksonModules().forEach(mod -> mapper.registerModule(mod)); + IndexIO indexIO = new IndexIO( + mapper, + new ColumnConfig() {} + ); + + QueryableIndex index = indexIO.loadIndex(segmentDir); + + SegmentId segmentId = SegmentId.dummy("segmentId"); + QueryRunner runner = QueryRunnerTestHelper.makeQueryRunner( + METADATA_QR_FACTORY, + segmentId, + new QueryableIndexSegment(index, segmentId), + null + ); + + SegmentMetadataQuery segmentMetadataQuery = Druids.newSegmentMetadataQueryBuilder() + .dataSource("test_datasource") + .intervals("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z") + .merge(true) + .build(); + List results = runner.run(QueryPlus.wrap(segmentMetadataQuery)).toList(); + System.out.println(results); + Assert.assertEquals(1, results.size()); + Map columns = results.get(0).getColumns(); + Assert.assertNotNull(columns.get("histogram")); + Assert.assertEquals("spectatorHistogramTimer", columns.get("histogram").getType()); + } + + @Test + public void testMetadataQueryDistribution() throws Exception + { + File segmentDir = tempFolder.newFolder(); + helper.createIndex( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogramDistribution\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + segmentDir, + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + true + ); + + ObjectMapper mapper = (ObjectMapper) TestHelper.makeJsonMapper(); + SpectatorHistogramModule module = new SpectatorHistogramModule(); + module.getJacksonModules().forEach(mod -> mapper.registerModule(mod)); + IndexIO indexIO = new IndexIO( + mapper, + new ColumnConfig() { } + ); + + QueryableIndex index = indexIO.loadIndex(segmentDir); + + SegmentId segmentId = SegmentId.dummy("segmentId"); + QueryRunner runner = QueryRunnerTestHelper.makeQueryRunner( + METADATA_QR_FACTORY, + segmentId, + new QueryableIndexSegment(index, segmentId), + null + ); + + SegmentMetadataQuery segmentMetadataQuery = Druids.newSegmentMetadataQueryBuilder() + .dataSource("test_datasource") + .intervals("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z") + .merge(true) + .build(); + List results = runner.run(QueryPlus.wrap(segmentMetadataQuery)).toList(); + System.out.println(results); + Assert.assertEquals(1, results.size()); + Map columns = results.get(0).getColumns(); + Assert.assertNotNull(columns.get("histogram")); + Assert.assertEquals("spectatorHistogramDistribution", columns.get("histogram").getType()); + } + + @Test + public void testPercentilePostAggregator() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimenions\": [],", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"postAggregations\": [", + " {\"type\": \"percentileSpectatorHistogram\", \"name\": \"percentileValue\", \"field\": {\"type\": \"fieldAccess\",\"fieldName\": \"merged_cost_histogram\"}" + + ", \"percentile\": \"50.0\"},", + " {\"type\": \"percentilesSpectatorHistogram\", \"name\": \"percentileValues\", \"field\": {\"type\": \"fieldAccess\",\"fieldName\": \"merged_cost_histogram\"}" + + ", \"percentiles\": [25.0, 50.0, 75.0, 99.0]}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + SpectatorHistogram expected = new SpectatorHistogram(); + expected.add(PercentileBuckets.indexOf(10), 1L); + expected.add(PercentileBuckets.indexOf(30), 1L); + expected.add(PercentileBuckets.indexOf(40), 3L); + expected.add(PercentileBuckets.indexOf(50), 3L); + expected.add(PercentileBuckets.indexOf(20000), 1L); + + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + // Check on Median (true median is 40) + Assert.assertEquals(40.0, (double) results.get(0).get(1), 0.2); + // True percentiles for 25, 50, 75, 99 + double[] expectedPercentiles = new double[]{40.0, 40.0, 50.0, 18404.0}; + double[] resultPercentiles = (double[]) results.get(0).get(2); + + for (int i = 0; i < expectedPercentiles.length; i++) { + double expectedPercentile = expectedPercentiles[i]; + double resultPercentile = resultPercentiles[i]; + double error18pcnt = expectedPercentile * 0.18; + // Should be within 18% + Assert.assertEquals(expectedPercentile, resultPercentile, error18pcnt); + } + } + + private static void assertResultsMatch(List results, int rowNum, String expectedProduct) + { + ResultRow row = results.get(rowNum); + Object product = row.get(0); + Assert.assertTrue("Expected dimension of type String", product instanceof String); + Assert.assertEquals("Product values didn't match", expectedProduct, product); + Object histogram = row.get(1); + Assert.assertTrue( + "Expected histogram metric of type SpectatorHistogramUtils.HistogramMap", + histogram instanceof SpectatorHistogram + ); + Assert.assertEquals("Count values didn't match", EXPECTED_HISTOGRAMS.get(product), histogram); + } + +} diff --git a/extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramTest.java b/extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramTest.java new file mode 100644 index 000000000000..fb15ac85e4c4 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramTest.java @@ -0,0 +1,451 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.netflix.spectator.api.histogram.PercentileBuckets; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; +import org.junit.Assert; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.nio.channels.WritableByteChannel; + +public class SpectatorHistogramTest +{ + @Test + public void testToBytesSmallValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.insert(10); + histogram.insert(30); + histogram.insert(40); + histogram.insert(40); + histogram.insert(40); + histogram.insert(50); + histogram.insert(50); + // Check the full range of bucket IDs still work + long bigValue = PercentileBuckets.get(270); + histogram.insert(bigValue); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 8, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = 0; + Assert.assertEquals("Should compact small values within key bytes", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(3L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(2L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(bigValue))); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 8, deserialized.getSum()); + } + + @Test + public void testToBytesSmallishValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 64L); + histogram.add(PercentileBuckets.indexOf(30), 127L); + histogram.add(PercentileBuckets.indexOf(40), 111L); + histogram.add(PercentileBuckets.indexOf(50), 99L); + histogram.add(270, 100L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 501, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Byte.BYTES; + Assert.assertEquals("Should compact small values to a byte", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(64L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(127L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(111L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(99L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(100L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 501, deserialized.getSum()); + } + + @Test + public void testToBytesMedValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 512L); + histogram.add(PercentileBuckets.indexOf(30), 1024L); + histogram.add(PercentileBuckets.indexOf(40), 2048L); + histogram.add(PercentileBuckets.indexOf(50), 4096L); + histogram.add(270, 8192L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 15872, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Short.BYTES; + Assert.assertEquals("Should compact medium values to short", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(512L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(1024L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(2048L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(4096L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(8192L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 15872, deserialized.getSum()); + } + + @Test + public void testToBytesLargerValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 100000L); + histogram.add(PercentileBuckets.indexOf(30), 200000L); + histogram.add(PercentileBuckets.indexOf(40), 500000L); + histogram.add(PercentileBuckets.indexOf(50), 10000000L); + histogram.add(270, 50000000L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 60800000, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Integer.BYTES; + Assert.assertEquals("Should compact larger values to integer", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(100000L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(200000L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(500000L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(10000000L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(50000000L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 60800000, deserialized.getSum()); + } + + @Test + public void testToBytesBiggestValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 10000000000L); + histogram.add(PercentileBuckets.indexOf(30), 20000000000L); + histogram.add(PercentileBuckets.indexOf(40), 50000000000L); + histogram.add(PercentileBuckets.indexOf(50), 100000000000L); + histogram.add(270, 5000000000000L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 5180000000000L, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Long.BYTES; + Assert.assertEquals("Should not compact larger values", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(10000000000L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(20000000000L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(50000000000L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(100000000000L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(5000000000000L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 5180000000000L, deserialized.getSum()); + } + + @Test + public void testToBytesMixedValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 1L); + histogram.add(PercentileBuckets.indexOf(30), 300L); + histogram.add(PercentileBuckets.indexOf(40), 200000L); + histogram.add(PercentileBuckets.indexOf(50), 100000000000L); + histogram.add(270, 5000000000000L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 5100000200301L, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + Assert.assertEquals("Should not compact larger values", (5 * keySize) + 0 + 2 + 4 + 8 + 8, bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(300L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(200000L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(100000000000L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(5000000000000L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 5100000200301L, deserialized.getSum()); + } + + @Test + public void testToBytesBoundaryValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(6, 63L); + histogram.add(7, 64L); + histogram.add(8, 255L); + histogram.add(9, 256L); + histogram.add(16, 65535L); + histogram.add(17, 65536L); + histogram.add(32, 4294967295L); + histogram.add(33, 4294967296L); + + Assert.assertEquals("Should have size matching number of buckets", 8, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 8590066300L, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + Assert.assertEquals("Should compact", (8 * keySize) + 0 + 1 + 1 + 2 + 2 + 4 + 4 + 8, bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(63L, deserialized.get(6)); + Assert.assertEquals(64L, deserialized.get(7)); + Assert.assertEquals(255L, deserialized.get(8)); + Assert.assertEquals(256L, deserialized.get(9)); + Assert.assertEquals(65535L, deserialized.get(16)); + Assert.assertEquals(65536L, deserialized.get(17)); + Assert.assertEquals(4294967295L, deserialized.get(32)); + Assert.assertEquals(4294967296L, deserialized.get(33)); + + Assert.assertEquals("Should have size matching number of buckets", 8, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 8590066300L, deserialized.getSum()); + } + + @Test(expected = IAE.class) + public void testBucketOutOfRangeMax() throws IAE + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(500, 1); + } + + @Test(expected = IAE.class) + public void testBucketOutOfRangeNegative() throws IAE + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(-2, 1); + } + + @Test + public void testSerializeAndDeserialize() throws IOException + { + SegmentWriteOutMedium medium = new OnHeapMemorySegmentWriteOutMedium(); + SpectatorHistogramObjectStrategy strategy = new SpectatorHistogramObjectStrategy(); + SpectatorHistogramSerializer serializer = SpectatorHistogramSerializer.create(medium, "test", strategy); + serializer.open(); + + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(6, 63L); + histogram.add(7, 64L); + histogram.add(8, 255L); + histogram.add(9, 256L); + histogram.add(16, 65535L); + histogram.add(17, 65536L); + histogram.add(32, 4294967295L); + histogram.add(33, 4294967296L); + + ColumnValueSelector selector = new ColumnValueSelector() + { + private int callCount = 0; + + @Override + public boolean isNull() + { + return false; + } + + @Override + public long getLong() + { + return 0; + } + + @Override + public float getFloat() + { + return 0; + } + + @Override + public double getDouble() + { + return 0; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + + } + + @Override + public SpectatorHistogram getObject() + { + // On every 3rd fetch and after 6, we'll return a null. + // Columns ending with a lot of nulls won't add to the + // size of the segment at all. + ++callCount; + if ((callCount % 3 == 0) || callCount > 6) { + return null; + } + return histogram; + } + + @Override + public Class classOfObject() + { + return histogram.getClass(); + } + }; + + int count = 0; + // Serialize lots of nulls at the end to ensure + // we don't waste space on nulls. + for (int i = 0; i < 125000; i++) { + serializer.serialize(selector); + count++; + } + + long serializedSize = serializer.getSerializedSize(); + // Column header = 6 bytes + // Offset header (Size + BitmapLength + ValueBitMap + Offsets) + // size = 4 bytes + // bitmap length = 4 bytes + // bitmap = 1 byte + // offsets * 4 = 16 bytes (no offset for nulls) + // Offset header = 25 bytes + // 4 values = 152 bytes + // each value = 38 bytes + // Total = 6 + 25 + 152 = 183 + Assert.assertEquals("Expect serialized size", 183L, serializedSize); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final WritableByteChannel channel = Channels.newChannel(baos); + serializer.writeTo(channel, null); + channel.close(); + + final ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray()); + Assert.assertEquals(serializer.getSerializedSize(), byteBuffer.remaining()); + SpectatorHistogramIndexed indexedDeserialized = SpectatorHistogramIndexed.read(byteBuffer, strategy); + Assert.assertEquals(0, byteBuffer.remaining()); + + Assert.assertEquals("Count of entries should match", count, indexedDeserialized.size()); + + for (int i = 0; i < count; i++) { + SpectatorHistogram deserialized = indexedDeserialized.get(i); + if ((i + 1) % 3 == 0 || i >= 6) { + // Expect null + Assert.assertNull(deserialized); + } else { + Assert.assertEquals(63L, deserialized.get(6)); + Assert.assertEquals(64L, deserialized.get(7)); + Assert.assertEquals(255L, deserialized.get(8)); + Assert.assertEquals(256L, deserialized.get(9)); + Assert.assertEquals(65535L, deserialized.get(16)); + Assert.assertEquals(65536L, deserialized.get(17)); + Assert.assertEquals(4294967295L, deserialized.get(32)); + Assert.assertEquals(4294967296L, deserialized.get(33)); + } + } + } + + @Test + public void testPercentileComputation0() + { + SpectatorHistogram h = new SpectatorHistogram(); + h.insert(0); + Assert.assertEquals(0.1, h.getPercentileValue(10.0), 0.01); + Assert.assertEquals(0.5, h.getPercentileValue(50.0), 0.01); + Assert.assertEquals(0.99, h.getPercentileValue(99.0), 0.01); + Assert.assertEquals(1.0, h.getPercentileValue(100.0), 0.01); + } + + @Test + public void testPercentileComputation1_100() + { + SpectatorHistogram h = new SpectatorHistogram(); + for (int i = 0; i < 100; i++) { + h.insert(i); + } + // Precision assigned to half of the bucket width + Assert.assertEquals(10.0, h.getPercentileValue(10.0), 0.5); + Assert.assertEquals(50.0, h.getPercentileValue(50.0), 2.5); + Assert.assertEquals(99.0, h.getPercentileValue(99.0), 10.5); + Assert.assertEquals(100.0, h.getPercentileValue(100.0), 10.5); + } + + @Test + public void testPercentileComputation0_Big() + { + SpectatorHistogram h = new SpectatorHistogram(); + // one very small value, 99 very big values + h.add(0, 1); + h.add(200, 99); + long upperBoundOfBucket0 = PercentileBuckets.get(0); + long upperBoundOfBucket200 = PercentileBuckets.get(200); + long lowerBoundOfBucket200 = PercentileBuckets.get(199); + long widthOfBucket = upperBoundOfBucket200 - lowerBoundOfBucket200; + // P1 should be pulled towards the very low value + // P >1 should be pulled towards the very big value + Assert.assertEquals(upperBoundOfBucket0, h.getPercentileValue(1.0), 0.01); + Assert.assertEquals(lowerBoundOfBucket200, h.getPercentileValue(50.0), widthOfBucket / 2.0); + Assert.assertEquals(upperBoundOfBucket200, h.getPercentileValue(99.0), widthOfBucket / 2.0); + Assert.assertEquals(upperBoundOfBucket200, h.getPercentileValue(100.0), widthOfBucket / 2.0); + } + + @Test + public void testMedianOfSequence() + { + int[] nums = new int[]{9, 10, 12, 13, 13, 13, 15, 15, 16, 16, 18, 22, 23, 24, 24, 25}; + SpectatorHistogram h = new SpectatorHistogram(); + + for (int num : nums) { + h.insert(num); + } + + // Expect middle of the "15.5" bucket, which is 18.0 + int index = PercentileBuckets.indexOf(15); + long upperBoundOfFifteenPointFiveBucket = PercentileBuckets.get(index); + long lowerBoundOfFifteenPointFiveBucket = PercentileBuckets.get(index - 1); + long halfBucketWidth = ((upperBoundOfFifteenPointFiveBucket - lowerBoundOfFifteenPointFiveBucket) / 2); + long middleOfFifteenPointFiveBucket = lowerBoundOfFifteenPointFiveBucket + halfBucketWidth; + + Assert.assertEquals(middleOfFifteenPointFiveBucket, h.getPercentileValue(50.0), 0.01); + } +} diff --git a/extensions-contrib/spectator-histogram/src/test/resources/input_data.tsv b/extensions-contrib/spectator-histogram/src/test/resources/input_data.tsv new file mode 100644 index 000000000000..9938f51e26b4 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/test/resources/input_data.tsv @@ -0,0 +1,12 @@ +2016010101 A 10 +2016010101 B 30 +2016010101 B 40 +2016010101 B 40 +2016010101 B 40 +2016010101 B 50 +2016010101 B 50 +2016010101 C 50 +2016010101 C 20000 +2016010101 D +2016010101 E +2016010101 F \ No newline at end of file diff --git a/extensions-contrib/spectator-histogram/src/test/resources/pre_agg_data.tsv b/extensions-contrib/spectator-histogram/src/test/resources/pre_agg_data.tsv new file mode 100644 index 000000000000..6a16d6b1c591 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/test/resources/pre_agg_data.tsv @@ -0,0 +1,6 @@ +2016010101 A {"10":1} +2016010101 B {"17":1, "19":3, "21":2} +2016010101 C {"60":1, "21":1} +2016010101 D {} +2016010101 E {} +2016010101 F {} diff --git a/pom.xml b/pom.xml index 81cb00bb0cf5..6149c5866db4 100644 --- a/pom.xml +++ b/pom.xml @@ -229,6 +229,7 @@ extensions-contrib/opentelemetry-emitter extensions-contrib/kubernetes-overlord-extensions extensions-contrib/druid-iceberg-extensions + extensions-contrib/spectator-histogram distribution diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java index 4f82bdcfe69d..93cf75857c30 100755 --- a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java @@ -129,6 +129,11 @@ public class AggregatorUtil // TDigest sketch aggregators public static final byte TDIGEST_BUILD_SKETCH_CACHE_TYPE_ID = 0x38; + // Spectator histogram aggregators + public static final byte SPECTATOR_HISTOGRAM_CACHE_TYPE_ID = 0x39; + public static final byte SPECTATOR_HISTOGRAM_DISTRIBUTION_CACHE_TYPE_ID = 0x3A; + public static final byte SPECTATOR_HISTOGRAM_TIMER_CACHE_TYPE_ID = 0x3B; + public static final byte MEAN_CACHE_TYPE_ID = 0x41; // ANY aggregator diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/post/PostAggregatorIds.java b/processing/src/main/java/org/apache/druid/query/aggregation/post/PostAggregatorIds.java index f65208bd9069..ed4bbfdc82b5 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/post/PostAggregatorIds.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/post/PostAggregatorIds.java @@ -66,4 +66,6 @@ public class PostAggregatorIds public static final byte KLL_FLOATS_SKETCH_TO_QUANTILE_CACHE_TYPE_ID = 42; public static final byte KLL_FLOATS_SKETCH_TO_QUANTILES_CACHE_TYPE_ID = 43; public static final byte KLL_FLOATS_SKETCH_TO_STRING_CACHE_TYPE_ID = 44; + public static final byte SPECTATOR_HISTOGRAM_SKETCH_PERCENTILE_CACHE_TYPE_ID = 45; + public static final byte SPECTATOR_HISTOGRAM_SKETCH_PERCENTILES_CACHE_TYPE_ID = 46; } diff --git a/website/.spelling b/website/.spelling index 7561bcec965d..175774e4ac24 100644 --- a/website/.spelling +++ b/website/.spelling @@ -430,6 +430,7 @@ pluggable podSpec postgres postgresql +pre-aggregate pre-aggregated pre-aggregates pre-aggregating @@ -948,6 +949,7 @@ prometheus Pushgateway flushPeriod postAggregator +postAggregators quantileFromTDigestSketch quantilesFromTDigestSketch tDigestSketch @@ -2373,3 +2375,12 @@ markUnused markUsed segmentId aggregateMultipleValues + +- ../docs/development/extensions-contrib/spectator-histogram.md +SpectatorHistogram +PercentileBuckets +spectatorHistogram +spectatorHistogramTimer +spectatorHistogramDistribution +percentileSpectatorHistogram +percentilesSpectatorHistogram From 6c9b882b7172a8fb645dc4169a195baf0e3df4e5 Mon Sep 17 00:00:00 2001 From: Ben Sykes Date: Fri, 5 Jan 2024 15:43:11 -0800 Subject: [PATCH 2/7] Clarify documentation Cleanup comments --- .../extensions-contrib/spectator-histogram.md | 75 ++++++++++++++++++- .../histogram/NullableOffsetsHeader.java | 9 +++ .../histogram/SpectatorHistogram.java | 2 +- .../SpectatorHistogramAggregatorFactory.java | 3 +- .../histogram/SpectatorHistogramModule.java | 3 - 5 files changed, 82 insertions(+), 10 deletions(-) diff --git a/docs/development/extensions-contrib/spectator-histogram.md b/docs/development/extensions-contrib/spectator-histogram.md index 30d5048da377..cc6e0d39e527 100644 --- a/docs/development/extensions-contrib/spectator-histogram.md +++ b/docs/development/extensions-contrib/spectator-histogram.md @@ -73,12 +73,14 @@ representation with the same aggregation performance and accuracy as data-sketches (depending on data-set, see limitations below). ## Limitations -* Supports positive numeric values within the range of [0, 2^53). Negatives are +* Supports positive long integer values within the range of [0, 2^53). Negatives are coerced to 0. -* Fixed buckets with increasing bucket widths. Relative accuracy is maintained, -but absolute accuracy reduces with larger values. +* Decimals are not supported. +* 276 fixed buckets with increasing bucket widths. In practice, the observed error of computed percentiles is in the range (0.1%, 3%). See [Bucket Boundaries](#histogram-bucket-boundaries) for the full list of bucket boundaries. +* DruidSQL queries are yet not supported. You must use native Druid queries. +* Vectorized queries are yet not supported. -> If either of these limitations are a problem, then the data-sketch aggregator +> If any of these limitations are a problem, then the data-sketch aggregator is most likely a better choice. ## Functionality @@ -268,6 +270,69 @@ array of percentiles. ## Appendix +### Example Ingestion Spec +Example of ingesting the sample wikipedia dataset with a histogram metric column: +```json +{ + "type": "index_parallel", + "spec": { + "ioConfig": { + "type": "index_parallel", + "inputSource": { + "type": "http", + "uris": ["https://druid.apache.org/data/wikipedia.json.gz"] + }, + "inputFormat": { "type": "json" } + }, + "dataSchema": { + "granularitySpec": { + "segmentGranularity": "day", + "queryGranularity": "minute", + "rollup": true + }, + "dataSource": "wikipedia", + "timestampSpec": { "column": "timestamp", "format": "iso" }, + "dimensionsSpec": { + "dimensions": [ + "isRobot", + "channel", + "flags", + "isUnpatrolled", + "page", + "diffUrl", + "comment", + "isNew", + "isMinor", + "isAnonymous", + "user", + "namespace", + "cityName", + "countryName", + "regionIsoCode", + "metroCode", + "countryIsoCode", + "regionName" + ] + }, + "metricsSpec": [ + { "name": "count", "type": "count" }, + { "name": "sum_added", "type": "longSum", "fieldName": "added" }, + { + "name": "hist_added", + "type": "spectatorHistogram", + "fieldName": "added" + } + ] + }, + "tuningConfig": { + "type": "index_parallel", + "partitionsSpec": { "type": "hashed" }, + "forceGuaranteedRollup": true + } + } +} +``` + ### Example Query Example query using the sample wikipedia dataset: ```json @@ -335,6 +400,8 @@ Results in ### Histogram Bucket Boundaries These are the upper bounds of each bucket index. There are 276 buckets. The first bucket index is 0 and the last bucket index is 275. +As you can see the bucket widths increase as the bucket index increases. This leads to a greater absolute error for larger values, but maintains a relative error of rough percentage across the number range. +i.e the maximum error at value 10 is 0 since the bucket width is 1. But for a value of 16,000,000,000 the bucket width is 1,431,655,768 giving an error of up to ~8.9%. In practice, the observed error of computed percentiles is in the range (0.1%, 3%). ```json [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 21, 26, 31, 36, 41, 46, diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/NullableOffsetsHeader.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/NullableOffsetsHeader.java index 90f319ebdd52..61f839c0d246 100644 --- a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/NullableOffsetsHeader.java +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/NullableOffsetsHeader.java @@ -35,6 +35,15 @@ import java.util.BitSet; import java.util.Objects; +/** + * A header for storing offsets for columns with nullable values. + * Provides fast access to the offset start/end for a given row index, while supporting null values. + * For cases where data is sparse, this can save a lot of space. + * The nulls are stored in a bitset, and the offsets are stored in an int array. + * The cost of the nulls is 1 bit per row, the cost of the non-nulls is 4 bytes per row for the offset. + * In cases where every row is non-null, the bitset is omitted. + * In either case, we need the offsets because the values are variable length. + */ public class NullableOffsetsHeader implements Serializer { private final WriteOutBytes offsetsWriter; diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogram.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogram.java index 2ac7ccfa5ef1..8fa2c5044f8f 100644 --- a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogram.java +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogram.java @@ -45,7 +45,7 @@ // This will prevent class casting exceptions if trying to query with sum rather // than explicitly as a SpectatorHistogram // -// The SpectatorHistorgram is a Number. That number is of intValue(), +// The SpectatorHistogram is a Number. That number is of intValue(), // which is the count of the number of events in the histogram // (adding up the counts across all buckets). // diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorFactory.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorFactory.java index c02a990fe140..235d4781da42 100644 --- a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorFactory.java +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorFactory.java @@ -44,9 +44,9 @@ @JsonTypeName(SpectatorHistogramAggregatorFactory.TYPE_NAME) public class SpectatorHistogramAggregatorFactory extends AggregatorFactory { - @Nonnull private final String name; + @Nonnull private final String fieldName; @@ -267,7 +267,6 @@ public boolean equals(final Object o) } final SpectatorHistogramAggregatorFactory that = (SpectatorHistogramAggregatorFactory) o; - //TODO: samarth should we check for equality of contents in count arrays? return Objects.equals(name, that.name) && Objects.equals(fieldName, that.fieldName); } diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramModule.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramModule.java index 4c0a348560d9..b12c600d6b42 100644 --- a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramModule.java +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramModule.java @@ -87,8 +87,5 @@ public List getJacksonModules() public void configure(Binder binder) { registerSerde(); - //TODO: samarth this probably needs to be added for sql - //SqlBindings.addAggregator(binder, SpectatorHistogramQuantileSqlAggregator.class); - //SqlBindings.addAggregator(binder, GenerateSpectatorHistogramSqlAggregator.class); } } From 0f4aaba45d43e7bf453591260db8a93e7a41b555 Mon Sep 17 00:00:00 2001 From: Ben Sykes Date: Fri, 5 Jan 2024 15:54:21 -0800 Subject: [PATCH 3/7] Use ColumnValueSelector directly so that we support being queried as a Number using longSum or doubleSum aggregators as well as a histogram. When queried as a Number, we're returning the count of entries in the histogram. --- ...tatorHistogramIndexBasedComplexColumn.java | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramIndexBasedComplexColumn.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramIndexBasedComplexColumn.java index f1f0ffa53040..2e54fcf0d45e 100644 --- a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramIndexBasedComplexColumn.java +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramIndexBasedComplexColumn.java @@ -19,12 +19,18 @@ package org.apache.druid.spectator.histogram; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.column.ComplexColumn; +import org.apache.druid.segment.data.ReadableOffset; + +import javax.annotation.Nullable; public class SpectatorHistogramIndexBasedComplexColumn implements ComplexColumn { private final SpectatorHistogramIndexed index; private final String typeName; + private static final Number ZERO = 0; public SpectatorHistogramIndexBasedComplexColumn(String typeName, SpectatorHistogramIndexed index) { @@ -60,4 +66,65 @@ public int getLength() public void close() { } + + @Override + public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) + { + // Use ColumnValueSelector directly so that we support being queried as a Number using + // longSum or doubleSum aggregators, the NullableNumericBufferAggregator will call isNull. + // This allows us to behave as a Number or SpectatorHistogram object. + // When queried as a Number, we're returning the count of entries in the histogram. + // As such, we can safely return 0 where the histogram is null. + return new ColumnValueSelector() + { + @Override + public boolean isNull() + { + return getObject() == null; + } + + private Number getOrZero() + { + SpectatorHistogram histogram = getObject(); + return histogram != null ? histogram : ZERO; + } + + @Override + public long getLong() + { + return getOrZero().longValue(); + } + + @Override + public float getFloat() + { + return getOrZero().floatValue(); + } + + @Override + public double getDouble() + { + return getOrZero().doubleValue(); + } + + @Nullable + @Override + public SpectatorHistogram getObject() + { + return (SpectatorHistogram) getRowValue(offset.getOffset()); + } + + @Override + public Class classOfObject() + { + return getClazz(); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("column", SpectatorHistogramIndexBasedComplexColumn.this); + } + }; + } } From e73105930d62e4201580710a30b0ac076b0d916e Mon Sep 17 00:00:00 2001 From: Ben Sykes Date: Mon, 8 Jan 2024 17:06:33 -0800 Subject: [PATCH 4/7] Apply suggestions from code review Co-authored-by: Victoria Lim --- .../extensions-contrib/spectator-histogram.md | 88 +++++++++---------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/docs/development/extensions-contrib/spectator-histogram.md b/docs/development/extensions-contrib/spectator-histogram.md index cc6e0d39e527..01d403fb6ec8 100644 --- a/docs/development/extensions-contrib/spectator-histogram.md +++ b/docs/development/extensions-contrib/spectator-histogram.md @@ -26,30 +26,28 @@ title: "Spectator Histogram module" This module provides Apache Druid approximate histogram aggregators and percentile post-aggregators based on Spectator fixed-bucket histograms. -Consider using this extension if you need percentile approximations and: -* want fast and accurate queries -* at a lower storage cost -* and have a large dataset -* using only positive measurements +Consider SpectatorHistogram to compute percentile approximations. This extension has a reduced storage footprint compared to the [DataSketches extension](../extensions-core/datasketches-extension.md), which results in smaller segment sizes, faster loading from deep storage, and lower memory usage. This extension provides fast and accurate queries on large datasets at low storage cost. -> The main benefit of this extension over data-sketches is the reduced storage -footprint. Which leads to smaller segment sizes, faster loading from deep storage -and lower memory usage. +This aggregator only applies when your raw data contains positive long integer values. Do not use this aggregator if you have negative values in your data. In the Druid instance shown below, the example Wikipedia dataset is loaded 3 times. -* As-is, no rollup applied -* With a single extra metric column of type `spectatorHistogram` ingesting the `added` column -* With a single extra metric column of type `quantilesDoublesSketch` ingesting the `added` column +* `wikipedia` contains the dataset ingested as is, without rollup +* `wikipedia_spectator` contains the dataset with a single extra metric column of type `spectatorHistogram` for the `added` column +* `wikipedia_datasketch` contains the dataset with a single extra metric column of type `quantilesDoublesSketch` for the `added` column + +Spectator histograms average just 6 extra bytes per row, while the DataSketch +adds 48 bytes per row. This represents an eightfold reduction in additional storage size for spectator histograms. -Spectator histograms average just 6 extra bytes per row, while the data-sketch -adds 48 bytes per row. This is an 8 x reduction in additional storage size. ![Comparison of datasource sizes in web console](../../assets/spectator-histogram-size-comparison.png) -As rollup improves, so does the size saving. For example, ingesting the wikipedia data -with day-grain query granularity and removing all dimensions except `countryName`, -we get to a segment that has just 106 rows. The base segment is 87 bytes per row, -adding a single `spectatorHistogram` column adds just 27 bytes per row on average vs -`quantilesDoublesSketch` adding 255 bytes per row. This is a 9.4 x reduction in additional storage size. +As rollup improves, so does the size savings. For example, when you ingest the Wikipedia dataset +with day-grain query granularity and remove all dimensions except `countryName`, +this results in a segment that has just 106 rows. The base segment has 87 bytes per row. +Compare the following bytes per row for SpectatorHistogram versus DataSketches: +* An additional `spectatorHistogram` column adds 27 bytes per row on average. +* An additional `quantilesDoublesSketch` column adds 255 bytes per row. + +SpectatorHistogram reduces the additional storage size by 9.4 times in this example. Storage gains will differ per dataset depending on the variance and rollup of the data. ## Background @@ -66,41 +64,43 @@ data store to benefit from high-dimensionality and high-cardinality data. SpectatorHistogram is designed for efficient parallel aggregations while still allowing for filtering and grouping by dimensions. It provides similar functionality to the built-in data-sketch aggregator, but is -opinionated and optimized for typical measurements of cloud services and web-apps. -Measurements such as page load time, transferred bytes, response time, request latency, etc. -Through some trade-offs we're able to provide a significantly more compact +opinionated and optimized for typical measurements from cloud services and web apps. +For example, measurements such as page load time, transferred bytes, response time, and request latency. +Through some trade-offs SpectatorHistogram provides a significantly more compact representation with the same aggregation performance and accuracy as -data-sketches (depending on data-set, see limitations below). +data-sketches. Note that results depend on the dataset. +Also see the [limitations](#limitations] of this extension. ## Limitations * Supports positive long integer values within the range of [0, 2^53). Negatives are coerced to 0. -* Decimals are not supported. -* 276 fixed buckets with increasing bucket widths. In practice, the observed error of computed percentiles is in the range (0.1%, 3%). See [Bucket Boundaries](#histogram-bucket-boundaries) for the full list of bucket boundaries. -* DruidSQL queries are yet not supported. You must use native Druid queries. -* Vectorized queries are yet not supported. +* Does not support decimals. +* Does not support Druid SQL queries, only native queries. +* Does not support vectorized queries. +* Generates 276 fixed buckets with increasing bucket widths. In practice, the observed error of computed percentiles ranges from 0.1% to 3%, exclusive. See [Bucket boundaries](#histogram-bucket-boundaries) for the full list of bucket boundaries. -> If any of these limitations are a problem, then the data-sketch aggregator -is most likely a better choice. +:::tip +If these limitations don't work for your use case, then use [DataSketches](../extensions-core/datasketches-extension.md) instead. +::: ## Functionality -The SpectatorHistogram aggregator is capable of generating histograms from raw numeric -values as well as aggregating/combining pre-aggregated histograms generated using +The SpectatorHistogram aggregator can generate histograms from raw numeric +values as well as aggregating or combining pre-aggregated histograms generated using the SpectatorHistogram aggregator itself. While you can generate histograms on the fly at query time, it is generally more performant to generate histograms during ingestion and then combine them at query time. This is especially true where rollup is enabled. It may be misleading or -incorrect to generate histogram from already rolled-up summed data. +incorrect to generate histograms from already rolled-up summed data. The module provides postAggregators, `percentileSpectatorHistogram` (singular) and -`percentilesSpectatorHistogram` (plural), that can be used to compute approximate +`percentilesSpectatorHistogram` (plural), to compute approximate percentiles from histograms generated by the SpectatorHistogram aggregator. Again, these postAggregators can be used to compute percentiles from raw numeric values via the SpectatorHistogram aggregator or from pre-aggregated histograms. > If you're only using the aggregator to compute percentiles from raw numeric values, then you can use the built-in data-sketch aggregator instead. The performance -and accuracy are comparable, the data-sketch aggregator supports negative values, +and accuracy are comparable. However, the DataSketch aggregator supports negative values, and you don't need to load an additional extension. An aggregated SpectatorHistogram can also be queried using a `longSum` or `doubleSum` @@ -117,7 +117,7 @@ amount of data that's needed to send from the client across the wire. SpectatorHistogram supports ingesting pre-aggregated histograms in real-time and batch. They can be sent as a JSON map, keyed by the spectator bucket ID and the value is the count of values. This is the same format as the serialized JSON representation of the -histogram. The keys need not be ordered or contiguous e.g. +histogram. The keys need not be ordered or contiguous. For example: ```json { "4": 8, "5": 15, "6": 37, "7": 9, "8": 3, "10": 1, "13": 1 } @@ -138,7 +138,7 @@ JSON format where the keys are the bucket index and the values are the count of in that bucket. The buckets are defined as per the Spectator [PercentileBuckets](https://github.com/Netflix/spectator/blob/main/spectator-api/src/main/java/com/netflix/spectator/api/histogram/PercentileBuckets.java) specification. -See [Appendix](#histogram-bucket-boundaries) for the full list of bucket boundaries. +See [Histogram bucket boundaries](#histogram-bucket-boundaries) for the full list of bucket boundaries. ```js // The set of buckets is generated by using powers of 4 and incrementing by one-third of the // previous power of 4 in between as long as the value is less than the next power of 4 minus @@ -157,8 +157,8 @@ See [Appendix](#histogram-bucket-boundaries) for the full list of bucket boundar ``` There are multiple aggregator types included, all of which are based on the same -underlying implementation. The different types signal to the Atlas-Druid service (if using) -how to handle the resulting data from a query. +underlying implementation. If you use the Atlas-Druid service, the different types +signal the service on how to handle the resulting data from a query. * spectatorHistogramTimer signals that the histogram is representing a collection of timer values. It is recommended to normalize timer values to nanoseconds @@ -242,14 +242,14 @@ This returns an array of percentiles corresponding to those requested. } ``` -> Note: It's more efficient to request multiple percentiles in a single query +> It's more efficient to request multiple percentiles in a single query than to request individual percentiles in separate queries. This array-based helper is provided for convenience and has a marginal performance benefit over using the singular percentile post-aggregator multiple times within a query. The more expensive part of the query is the aggregation of the histogram. The post-aggregation calculations all happen on the same aggregated histogram. -Results will contain arrays matching the length and order of the requested +The results contain arrays matching the length and order of the requested array of percentiles. ``` @@ -268,7 +268,7 @@ array of percentiles. | field | A field reference pointing to the aggregated histogram. | yes | | percentiles | Non-empty array of decimal percentiles between 0.0 and 100.0 | yes | -## Appendix +## Examples ### Example Ingestion Spec Example of ingesting the sample wikipedia dataset with a histogram metric column: @@ -397,11 +397,11 @@ Results in ] ``` -### Histogram Bucket Boundaries -These are the upper bounds of each bucket index. There are 276 buckets. +## Histogram bucket boundaries +The following array lists the upper bounds of each bucket index. There are 276 buckets in total. The first bucket index is 0 and the last bucket index is 275. -As you can see the bucket widths increase as the bucket index increases. This leads to a greater absolute error for larger values, but maintains a relative error of rough percentage across the number range. -i.e the maximum error at value 10 is 0 since the bucket width is 1. But for a value of 16,000,000,000 the bucket width is 1,431,655,768 giving an error of up to ~8.9%. In practice, the observed error of computed percentiles is in the range (0.1%, 3%). +The bucket widths increase as the bucket index increases. This leads to a greater absolute error for larger values, but maintains a relative error of rough percentage across the number range. +For example, the maximum error at value 10 is zero since the bucket width is 1 (the difference of `11-10`). For a value of 16,000,000,000, the bucket width is 1,431,655,768 (from `17179869184-15748213416`). This gives an error of up to ~8.9%, from `1,431,655,768/16,000,000,000*100`. In practice, the observed error of computed percentiles is in the range of (0.1%, 3%). ```json [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 21, 26, 31, 36, 41, 46, From 373348e3bbad867bfe88ebc6fb8492abc767116c Mon Sep 17 00:00:00 2001 From: Ben Sykes Date: Mon, 8 Jan 2024 17:19:55 -0800 Subject: [PATCH 5/7] Fix references --- .../extensions-contrib/spectator-histogram.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/development/extensions-contrib/spectator-histogram.md b/docs/development/extensions-contrib/spectator-histogram.md index 01d403fb6ec8..14f349ee812e 100644 --- a/docs/development/extensions-contrib/spectator-histogram.md +++ b/docs/development/extensions-contrib/spectator-histogram.md @@ -63,12 +63,12 @@ data store to benefit from high-dimensionality and high-cardinality data. SpectatorHistogram is designed for efficient parallel aggregations while still allowing for filtering and grouping by dimensions. -It provides similar functionality to the built-in data-sketch aggregator, but is +It provides similar functionality to the built-in DataSketches `quantilesDoublesSketch` aggregator, but is opinionated and optimized for typical measurements from cloud services and web apps. For example, measurements such as page load time, transferred bytes, response time, and request latency. Through some trade-offs SpectatorHistogram provides a significantly more compact representation with the same aggregation performance and accuracy as -data-sketches. Note that results depend on the dataset. +DataSketches Quantiles Sketch. Note that results depend on the dataset. Also see the [limitations](#limitations] of this extension. ## Limitations @@ -99,9 +99,9 @@ Again, these postAggregators can be used to compute percentiles from raw numeric values via the SpectatorHistogram aggregator or from pre-aggregated histograms. > If you're only using the aggregator to compute percentiles from raw numeric values, -then you can use the built-in data-sketch aggregator instead. The performance +then you can use the built-in quantilesDoublesSketch aggregator instead. The performance and accuracy are comparable. However, the DataSketch aggregator supports negative values, -and you don't need to load an additional extension. +and you don't need to download an additional extension. An aggregated SpectatorHistogram can also be queried using a `longSum` or `doubleSum` aggregator to retrieve the population of the histogram. This is effectively the count @@ -271,7 +271,7 @@ array of percentiles. ## Examples ### Example Ingestion Spec -Example of ingesting the sample wikipedia dataset with a histogram metric column: +Example of ingesting the sample Wikipedia dataset with a histogram metric column: ```json { "type": "index_parallel", @@ -334,7 +334,7 @@ Example of ingesting the sample wikipedia dataset with a histogram metric column ``` ### Example Query -Example query using the sample wikipedia dataset: +Example query using the sample Wikipedia dataset: ```json { "queryType": "timeseries", From aa77294d28c5aa85ef7b1fd108e07e45503c560c Mon Sep 17 00:00:00 2001 From: Ben Sykes Date: Mon, 8 Jan 2024 19:42:55 -0800 Subject: [PATCH 6/7] Fix spelling --- docs/development/extensions-contrib/spectator-histogram.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/development/extensions-contrib/spectator-histogram.md b/docs/development/extensions-contrib/spectator-histogram.md index 14f349ee812e..c2f2d2b95c07 100644 --- a/docs/development/extensions-contrib/spectator-histogram.md +++ b/docs/development/extensions-contrib/spectator-histogram.md @@ -35,7 +35,7 @@ In the Druid instance shown below, the example Wikipedia dataset is loaded 3 tim * `wikipedia_spectator` contains the dataset with a single extra metric column of type `spectatorHistogram` for the `added` column * `wikipedia_datasketch` contains the dataset with a single extra metric column of type `quantilesDoublesSketch` for the `added` column -Spectator histograms average just 6 extra bytes per row, while the DataSketch +Spectator histograms average just 6 extra bytes per row, while the `quantilesDoublesSketch` adds 48 bytes per row. This represents an eightfold reduction in additional storage size for spectator histograms. ![Comparison of datasource sizes in web console](../../assets/spectator-histogram-size-comparison.png) @@ -100,7 +100,7 @@ values via the SpectatorHistogram aggregator or from pre-aggregated histograms. > If you're only using the aggregator to compute percentiles from raw numeric values, then you can use the built-in quantilesDoublesSketch aggregator instead. The performance -and accuracy are comparable. However, the DataSketch aggregator supports negative values, +and accuracy are comparable. However, the DataSketches aggregator supports negative values, and you don't need to download an additional extension. An aggregated SpectatorHistogram can also be queried using a `longSum` or `doubleSum` From 78e023542d7d4a2eff7fe595b87126192d661b06 Mon Sep 17 00:00:00 2001 From: Ben Sykes Date: Tue, 9 Jan 2024 11:08:59 -0800 Subject: [PATCH 7/7] Update docs/development/extensions-contrib/spectator-histogram.md Co-authored-by: Victoria Lim --- .../development/extensions-contrib/spectator-histogram.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/development/extensions-contrib/spectator-histogram.md b/docs/development/extensions-contrib/spectator-histogram.md index c2f2d2b95c07..e6d12517e5cf 100644 --- a/docs/development/extensions-contrib/spectator-histogram.md +++ b/docs/development/extensions-contrib/spectator-histogram.md @@ -64,8 +64,12 @@ data store to benefit from high-dimensionality and high-cardinality data. SpectatorHistogram is designed for efficient parallel aggregations while still allowing for filtering and grouping by dimensions. It provides similar functionality to the built-in DataSketches `quantilesDoublesSketch` aggregator, but is -opinionated and optimized for typical measurements from cloud services and web apps. -For example, measurements such as page load time, transferred bytes, response time, and request latency. +opinionated to maintain higher absolute accuracy at smaller values. +Larger values have lower absolute accuracy; however, relative accuracy is maintained across the range. +See [Bucket boundaries](#histogram-bucket-boundaries) for more information. +The SpectatorHistogram is optimized for typical measurements from cloud services and web apps, +such as page load time, transferred bytes, response time, and request latency. + Through some trade-offs SpectatorHistogram provides a significantly more compact representation with the same aggregation performance and accuracy as DataSketches Quantiles Sketch. Note that results depend on the dataset.