From 2a79081b936ba67849ae2ec718e5b022bfc97fad Mon Sep 17 00:00:00 2001 From: mlm483 Date: Thu, 27 Apr 2023 15:29:04 -0400 Subject: [PATCH 1/4] [BI-1760] - added unit tests --- .../utilities/response/FileUtilUnitTest.java | 21 +++++++++++++++++- .../files/fileutil/file_all_null_columns.csv | 4 ++++ .../files/fileutil/file_all_null_columns.xls | Bin 0 -> 36864 bytes 3 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 src/test/resources/files/fileutil/file_all_null_columns.csv create mode 100644 src/test/resources/files/fileutil/file_all_null_columns.xls diff --git a/src/test/java/org/breedinginsight/utilities/response/FileUtilUnitTest.java b/src/test/java/org/breedinginsight/utilities/response/FileUtilUnitTest.java index 14d00a25c..fbf630d8c 100644 --- a/src/test/java/org/breedinginsight/utilities/response/FileUtilUnitTest.java +++ b/src/test/java/org/breedinginsight/utilities/response/FileUtilUnitTest.java @@ -41,7 +41,6 @@ void parseExcelRemoveAllNullRows() { assertEquals(2, resultTable.rowCount(), "Wrong number of rows were parsed"); } - @Test @SneakyThrows void parseCsvNoRemoveSomeNullRows() { @@ -60,6 +59,26 @@ void parseExcelNoRemoveSomeNullRows() { assertEquals(5, resultTable.rowCount(), "Wrong number of rows were parsed"); } + @Test + @SneakyThrows + void parseCsvRemoveAllNullColumns() { + // Columns with no header and no data should be silently dropped. + File file = new File("src/test/resources/files/fileutil/file_all_null_columns.csv"); + InputStream inputStream = new FileInputStream(file); + Table resultTable = FileUtil.parseTableFromCsv(inputStream); + assertEquals(21, resultTable.columnCount(), "Wrong number of columns were parsed"); + } + + @Test + @SneakyThrows + void parseExcelRemoveAllNullColumns() { + // Columns with no header and no data should be silently dropped. + File file = new File("src/test/resources/files/fileutil/file_all_null_columns.xls"); + InputStream inputStream = new FileInputStream(file); + Table resultTable = FileUtil.parseTableFromExcel(inputStream, 0); + assertEquals(21, resultTable.columnCount(), "Wrong number of columns were parsed"); + } + @Test @SneakyThrows void writeExcelCheckColumns() { diff --git a/src/test/resources/files/fileutil/file_all_null_columns.csv b/src/test/resources/files/fileutil/file_all_null_columns.csv new file mode 100644 index 000000000..ceb3dbef3 --- /dev/null +++ b/src/test/resources/files/fileutil/file_all_null_columns.csv @@ -0,0 +1,4 @@ +Germplasm Name,Germplasm GID,Test (T) or Check (C ),Exp Title,Exp Description,Exp Unit,Exp Type,Env,Env Location,Env Year,Exp Unit ID,Exp Replicate #,Exp Block #,Row,Column,Treatment Factors,ObsUnitID,Color,INCAUDPS,INCW10SUM,INCW6SUM,,, +BRG,453,C,KRSP22-3,INSV phenotyping trial BRG x Eruption RIL population and parents,Plot,Disease resistance screening,"Salinas, CA 2022","Salinas, CA",2022,89,1,1,,,,,Green,120,6,0,,, +Eruption,454,C,KRSP22-3,INSV phenotyping trial BRG x Eruption RIL population and parents,Plot,Disease resistance screening,"Salinas, CA 2022","Salinas, CA",2022,90,1,1,,,,,Medium red,110,4,0,,, +BxE16-001,455,,KRSP22-3,INSV phenotyping trial BRG x Eruption RIL population and parents,Plot,Disease resistance screening,"Salinas, CA 2022","Salinas, CA",2022,91,1,1,,,,,Dark red,110,6,0,,, \ No newline at end of file diff --git a/src/test/resources/files/fileutil/file_all_null_columns.xls b/src/test/resources/files/fileutil/file_all_null_columns.xls new file mode 100644 index 0000000000000000000000000000000000000000..7c76e4754b83204a341c02d214380b2c7e70239a GIT binary patch literal 36864 zcmeHw349dA@_%p6O*j&+aKiur639iueVCg7ISH2t7&gg-tZa6}?k0o>F?k#ciim)6 zsvvSJA}Dg-q8ut9frkikh{EGgc`EW0^ZVA!&hG5&W(oeD&;Rf9&*U?^(>2{y)m7D1 z)jhqF6Q9+)wBp@nmzm;tjJdPlD}9)k46ed;f2r)xaKEyWiqdx?t^uk!{trpOM-4@K zd9o+cKk+`o+?bb|vCH`0?1SO;Q+Y5I zY;qak^;lDZxB9|o1L5->q2?~(vmdL^GW>p2RhH>8d!iM?`M6t*Q11odv#s!17oRHj zfzaj)0n&xFV&9{_n$4wVAnvV-XBwtsT9(a>`226yaQlz&;q%bGG<--673<1&ks_*; zhl=?;q=p-7{17S(dUYlB5S5B`cS*(Lp*6mQzFp%W?LX#{5314ip*8wGq=t$`yXZ?N zmJj>uZpBeYVTBw^fh~?>`OIvwNEK|wwT>9`wc=A+bt{h+%o7@7Z3El33u)IbA}hGh z=pd;uI=Gcb8+d>gcK1hR^0{N|E45|btfj3|ScE+ulnGTwu-aBxOM%T&wFm3OA{omf zRyuJKXO`mVAOSEsxQ_+UHnc7H&I*dj0+lcpj6XD$x9h~o8+GFBT12xD*4v(#*aLfd z0&6mo(a|=d-C1Yo!$wqUZ>1|W7D&rmwqtL?_pnicp3u`T`#XLm4_*&t4d3#&$ZOuGHe!SqHgns>k3c?}TXK)qpkSgGv zoWg4Xr-1)Q3wkEvuPwo?@)Yf`js!o=GH}@%xt`E3I582A2oI3pel|GASa}l%xPq>~ zRNpEe(SGVl@Y8H8E}Kbs>Pv7dp9=e2R*l@8P39w#m^8P!T*rK(4m9}+1IBklsT|M3*4)dAb%@7;$ii} z?d$h+Rv*}$!ew+Ve2R4Osh}(N3tCg)5#dc4E3v0*g)8cd_<Pw^<&uJpAqG0zTFP zx06GVz>i~nz@6d;+^IZ8Jc9k$(UsSi%Tvye94@PGheuZ53EYt`>c=qgK>aD|hHQ%3 zQWm~Ou)zA=u9d1oS*abIZo08L$RFHsMH3|?9F%x!WCGSUVKuUQe3KT9LsR6~2M`}O;;E@_oWO$?o6fz#E0mYI>YCu88 zWrJA7c%UfbUiEnJ5C#gLMNXT5P zMnaLU8c4{5t42a@yc$TztgA*sG4!EGj;FZR4yMM{LE>6FNLQ+Z#I<%1?5cyrwRVv3 zR0oM`?KH0*64%|5iHvfs9jrN22Z?L#VD?=dB(AlC%&j^|Tx*A`9un8uX;VEU zuC;?@8d-Nxu#fbjXJgE@cG^`BiEHfyRu745?X<5R64%=4P(38BwbQYBNL*_t$clvW zrDe>k(k>4p?a+o^dQPaYxC@DG2F^lbt z>MrvFWu6@Js0~6NwL#dUHVC&iAZ?QZ?F8$XmPDYM7(J;09);L9kZkPmcXAt3<~CRo zmAMg8sU20a9Rj*`BUlH1IFia}*GQtOgG3o_lzQz7BsN2E=#HZ~cS8f)7>kZBb}q!q z&%&7l)D>AjB=R7l-%7& zOvz!6E(nh&WR~Y3DaQtV>>4OdRaL4UrU(ap(ZHkXxeUo&IaKAtR|*G?ob;ZF;w$<7$=|HLM?0ZFLENnBu+@!?#v!W!j@pvo(@NjqXD%Ib0@-9avXyO z|5c8a11S~^*utg*$6~t1=gG4QQK9zYd6*bMML3*`Q7M%;)cs4P5)THdAr*Q4`R7X| z;lC{vrCJjIY6klM)eI#4)eH>ys~H&hS2Hl^uVx^*It)-4PzD(hW*=niVWM{wVQ4NK zj%h{;;MH(WjxO7tm=uGFQD;nv{? z!@!?8gojm!f&ZkjDrf1#!GMeOeZE_L=8x$Gi;QT`g>tbuIlSK7Wu7{f&7-9|%c1gU zRapmH&>~oS#<5VGu`CCFJktwX?`TOv#cDHe4?geQ<-3)hbWMpj<~Qx>VJMNQrz8{U zNT#g|GUCdIK*p2L`|ay9&dJnQl8JI8)4>H9acx8(1h9$k3XLg^ZfdyKwWKb25#TWV$<&iF83mTtyLt=ELV* z`R)tnWCDmxSJ#%-8f3aw!_tnYym$Cp=VTfy$vCmJb}q=sE$w*9FUQ_?PNs>Hj1x=i z=z@&g(vGK`KY!jinWjoIPAsjn3o>#`JDyVc&0^XM$K7Qg>G6Z$vCmJKo?}>mKGWH z5?m%Zjy%`4qm9G(>AS|hM0E%)LnB*~Gk=~1OcKRn%} z$0#aPmY39aB_F%ed^bP*^Z?tR;`8~O@mxHw)6G+tDfnt!=?lK{kqqXr@kkBhaiznb zoGYj1BXLEG{a#kC#FE67SdzFBOY_}4Sm$#0%6V$eo)u4bbUhv-DZrlw3@}D@L(08S zH%L*}3qR|#)#cbIM1nEaHlgJnXwyN_#-mb=HbwXelu^iibJ!$ni?DKUvNI+d*85E{U4>KiPASU6ei=TF29cBQp z$<^w|ahNM87D{l>P-=m;T?k|T!?R3fkM1i9!3t3WQM25tiKu5#q?g$jq@-oOcR z@`s|v?8GwP&6jn9BH2KFVG&HB$X;b`4bh*0kZd7rk(6#5{`BxvQW`yB{a+4$e*n@1sro45K~W$uqkpZ;*3V{r+B0)^_QNzS_~d0Rn9ORqwINAF_27yr5gnYL8vU! zSK$)g**ac&(s)TTnE83?YJABcM(fWZ{ey?RyADWw*XSa+myw*juSYCRUI2gX3W%G)6tSEYZD2+cWA`3!ZueC>UYL zlGk$PRw;-J5}u=eoXwFflFUW#>fvuFkYo$J9Rijo30JA;J2}H5*j*%~sAa{11=(uI zf63ULjTL^4I|IEsJCdaA#WE9P6H*gr;Ma*OvkBry5ra2s3$R|pC1}i=CF&+@iHsf5 zqQ#8_EAjK@m9l$;U^XmVg}?nOHBL;+|LMklV;B6&a4!xg=1=hkT~UFaBUjHGQ68=_ zYBkw9o*O!;kO#UT`5OxhQM( zLIX2Tw9I@6mtAC*HJ_-}>A1A?q12{8W6sG(6SF~Rk_&ldYfL@i++JZuZ(HtftK&l8b1w)udo);o~ zK8PVJ3E?#QT+RR$8M$0RgCk8l(285Khyx? zC1wcSI>e}7Hd0(rVUe-WV8RGM9ql-M2*;aqg2OOE3Ewci);!g2gi1qAh5|+j$tIR) zFqU{zZw_W}Hih6zqc?|eW=IF6d~Gq0FCY&K{saB9@I%gHfdfA_UZ6!Ys>r#?;Egcm z0*yWgvVafLg}`JCCKK6pw$6|P)tO+eFpHdgjWO5K|3^15q^61b;ycX84R83M6raap zH^q6%tvPsa3JnwR%?qPG7vHo1PUByTYa^g}LUzFkcZvZuqO6D47i)5CB4B3BmMFw< z!WH2n3WWj&t+j3#;L-$zaEP@uxKA?x@;&5v^oYhfa74WH2BZr-QY6kOcqD2=Glg7C zF(~Ju5{LSPEeF@q{LyyB;a(o<5Vy{_*WtdrrG%eDoTG%_$cXDqT$7Zj_hLpkQJ5bR zKRE(Sq$o}eh$lVjO%&)l%7J(ic`p=XME#Khw5^?UeyIG_ylu^fS)R7M`ahC8zfdmo8jy1_W zgm60TcwX=k91v&oMg#}M8bh9$r@$on%o)%PGl0cu8To4%G#K>1vVW?FqhMqxI&H5 zOyndANK}l5qC&1qNMs0wqo_a7NsH#<$b)h1_EphA!NHjvxTatyc>6F;KmazyvW0t7 zA)lkoo5U4R7GQ=2a9u=SiF_g!VcXV!6PK?k<~fa-)A1TaIcvQ~4$<(RnTCm&F%K{p zwSP0{k$>Pch*7Y~7gfAD20)WAkC`4RI?G#}0z%}_fUmslF- z7z{=X8L|ZD%@N_ONkE=8W{N@}3nIdlVNyE83rTaHCI@+qqhLjI&=kV*Ak$(?&WpGw z7(KEJvsS158?qp1eIk$yHj=^{PbL;-hTLYj)Md1J9in}8UeVoqqy*&db^ z+LQ!g7-Z;#u^PsW$j5*yghzpc!We{F#RSc+os_N75Jh$Xd&oszONtP(KOwIx1R?{% zE?k6=_eu#f5;tp)Oj7y`*%+Z< zhKZYGC=xsfF!Q2ogyGy$}1 zVl#!kD>D-Z!Nja)H57(tG9znqhq*~YmGzi2OBKQ_ z^EjGqK)a-cjzHUy6J?G*SfY_+3YKY^qK6W1(71#dvmCN4JvVSjde80=T_g%YBXNYL zriLaY49&N^q@{CyenA0^94Gfe>7Hc@1Fd6rCQJZHPNHLy6h<*haK;7jS4E($!M24$ zOcBHCK~yxtg#q4Q9N@5)L@8OvIt84J!H86JU|M|au!M{uxQ)vZT_T1IOBKN*y5kbA zEG{!ih57&{XAa4Tj0}y6p}IqcbCQTixk5h5p)Oa#B^rwa9dMZgQ{e9kEo=GW2!oLH zW)qf`G7yCDgTDl=3GDOm9wyANgxnShKF`w{1r~&dXms#*a69p_Tx3LKB!YzugpMRp zUCOB~L}4bWn6VWV0AntC6<3njrF&>ZL>JUa&=|)PxRMRRdkz|47dmnu)&`r)o1u#f z(XYAauxA3^Vh2Aw9aEZ8V0d#NW3Qs(m&uG(U|QcXg)t71`6RxN;yXtjfD?!;2WDu( z;l^H3kEs2M>`tB8ZY(3&-GkmSL*O>-#znuqSW00nunvF)0yZihmIVQ2(e%JHCak1D z_ts0t(hf%WC*B+I+q7$;gO+WMQMbP_^`ki-y)k9q$BP34j{IZunqj}*OF1<(ZY>v} z3pp2c@3lTxjk{ZWzkhJivNeO=xf|Cx@MiO*F{{4nk$&#&;j{kUtVv(ZD!)ZP?%1;{ zsK>MA17|JCTs8Td!f`2^`^}kaY(4kQQ~&PicC1@*hw=yIbw4Q@{AH7yvtH_X^+^2L zYwlY&wS2taoon|O1zwr^-j3+*X>SdRS5MorZ{F!!Yw!0z5)c=5bbH62!rtsWYsDuc z4-CEEbkp@G`EDn>t-Z+Itu?#;!A-SiWqme|Ya05^-afC*x^@1{NZqarb2iVJ(*4Qf zY5OZ1j{Pb6dV|l8myIfeL@M#{$X;Hn&aC-%ALP^>`hqF2EF-Tog@@7S^cO#S>R?3e zXTO`Vd)mp;!KV&3XX8|5u4%6xRZ{rO%E{;7&)Su7a%JC7x-?%kU~9z7 z_5Eg^OW&2bF)sJy5&u(z63=(d4lG;q(uAzDtrxx4^iubf&wmQYJlkXJ#uwKfsoZif z;`nRn`^(-5xt8dm@3A07`*rq;wc%HX?4G-pUw^vQiTAhNKl1=xaqQ^$Nv{Mii zQ?H*td+qA99-n^IuwrR)osl8Syz4y2e-bwHgYTz=tZBaXivcY@4AEWwsL?k!N}3#b z?}Y|m4gAod--TRwa(m!Y{hm)6asBgq!B-M*cJAQo*3&uj zX{QQKeufdWsWoFX8f0vXUFbq=-x`^0Xx_trPvh%lkKfw;Ztl%`Of9trY)`2TKvUUyozF!N!b2AUcUMTzWyD7!z?@dqm zK|MI&>W=7x2lnp#*Nu)7Pi(C{VS2y0FMim4{jVz)9Qpb1GffUe*8kw~sX4zC950z3 zIQs6`oI@vrufK5paNdfHb2I&Cm4^4Px3*u47mxT)E?sr%tC2y5A=_rJ%BU58bn>8X zTLZfmcMa||ebKBA*Dm_JQPle9rtg0K*wYefgW$-TQh!cmBPhofdZ5x%lZi%Z-iBZrs}W_Mp}2?k5V>9aFdeqxXiT zpPXtjzWmfXbuYaA)c6hgm+L$j)p^;(WB<&#+%2dg>xumnd#7%CW5(I#4YQuyzp(lH z{g0h_CpLJ`=Be)l6pvc9epS}AxItHr`0mKqsO#2v%_m$=|9i{6e`5RTb^1?V{x)dl zGa)-p2jm1CD|$CzMb6-LxZ6nw+*#mfBV?c$&K!L36!Kk$bi4$YmPaOQ#c@`@gu;j3M}Cjb8LTlWp)!k=8$ zuae*M!EpY_?G-&Eu3fDB<)^Q{D!csPr<;D?Wj#G#zk7D$4!>XcENjoN4_@?J_iTf2 zSC{;Hd&9(cvUWeXvmv_dZ#PctId*d2^m_xR&zM-AzB?&os(bHWuB!GfONdT5G%u#j z_2RYqV<+5N9rrJ~-0UCSFE^+=^04Qw9{sj%`#J8hmk+)BuY(br%f}qfpEhBQs@3}4 zwQJp5GvU(3W$CG3wws6w_B?!Q$liB-4$t|# zU;9Iw$1kkitAQ?L_nJfFxhq`d(9^mox;_d3G8$!>UUoWZCf6N;F?LuR#Ic=WO7)@r{m4UBJmiaE zXV;GZ-u=0=JAU=q|H{tK(mh^mvwg{eyKL~@l-iz3B<%1q8bEc0OnYS=$c=3g6 zhnlv0K|6NF&yO8$`05+=4qv?bYhAB8uRPxUvw_j-HXCkfMvUzjQ~cKYn@wuv&mWq# zWl7t6vo^(Teb#;JGwBnzE-1+hn&Y3f=04k{|6*62VV^D@h=8FACF53x_zYShrh<9mwNO)IlK3! zwO{|Zt--i<`vRxGv;O#guYj;i1CEs}JM!7A+LPN18ryZ~k1x-QIX=w$K=PvUSuZ_V z{LkSfy%RGXXUgup=GTL7PA#9eyS7hG z)Y3z*OiL(wckS#q$L$!~)!V0Z^LWi*)23~2pAH}W{sFhO{F8rA()+bKee>z<9&I^~ zgFW*Gky8eF~ah zxHQYZ)%0&qt{Wa6cXjigjn~57zaBi~`UCDgW3!6$t+OW#%bND3QNR4%#liWvUOn^R z=V4zQb6nf{ zrWfa}y|dx!I3W16&5}_Q8q`0Kl)TYPdo8D(ciIQ9&i*K;?}F9mUueYN__2QElqH>) z?S1Np4bxr@Tsn2<9}!u@P&pEggBf;nj++ZyA4{Qn9h+^X{+3-)@_D;NyS4JmZV@JNa%0 zwzmvc9IiH+0Gg(aQv|H+&_Z&jVp z9sfcG=p=sEIXU-1(Y>Q01d2(E5ENxm+>*qk`W}dAIQ`z#iE4x4^Bw*#b+|C| z#M`Icr+?k8PWR}*!MASL*%+b@dsee&v3BReSC2HviM-e)e|5zxW#IMAp%wcSp&N#y)WHd?~N+Yo0IEQ(KoEsM=k&*0)REi$4dony+W2Z60VQeX$ zWiFLXEpUU)25hzrM)jz&_*!RwyVp9`a%)Jpg#(jm<~IuaC$i~zC%p!_Ha>IlaFwp> z;gex+j+*_p?uS#U*%{GU0=$y|?<{;y6(Hrpq(;Qq9uLqNi&6a@clKk#!{|+=Y?@Ge zy71Wnwey7In`n9qtaKe@#d1A5SazL?SE9Oe)K5BdaY}~-TY+7OD%_#xxhjDMV^PBK zd9?JcV#~0fX_-Pw5+?fYil@Q+sxf86 zeRup-^tY8guqmd(_c7~zXhp?pk>|jH9XWK6d_;Yz-7xVLuD>RPc;$68BN+A5mtDsc z)dr&qzU)oGLdq@msDdwBE3{=QKcOvY!k*s{#FBZ8B7y3qW^ktkyE+UL1=%J7S zmPG(-Ox>~aTwj3M^FkAG54N1dW35LP?12)&i022ONXG!-Q!J@aq6%Wk4SSP_O|j&T zB?n?~4_-$uU}7=WKOdnEwM+yJu?1OJB1XJaa$^Z_uTse!CF%xYQxRXXKM|W7?#2qE zhv>VZR;(2pS!*n9(-P>qfv;GhH-yd2DK_FfPMB~LHrjX8--^v0ZEU=e<;MvlhH6r8 zVrdVBK#6u**tqbpU z=Oxf1T~IrcWvs9OBbMatyzSa~3+>1ns3whqSYhOfC3!owT|2eVjwZuYlVl%Duh_Pf z;QPLE=QjUM^DSbAx-P?IzsOK)(sl;rIw!r0o?5!!KV-q4FK<@5~(J5AIoVqa^Cu;=PMZXDfPaq%v@LYyG&iNe1&gIwSGF9g?6~4)W zPNZC>!WV1VIH*j~L4IGzWn}j~ph5Y4%1h<#y=aWKOd(|o_`Km=*VnU^)v%LJezvj? z?0>zNtxR67Tvo0Hp5*xJP(1M-YQk+CPj(mBhc4 zdy&;JMqXCRy_#6^lX6w6HTP;Fi~m%QC@5iM&~g}!WIECyHE~3e!KYgzGi?RUutrwe z)0>W6>QYS)+>@dp%>dG^v5huErduNx?R`$S#v%HbCF$1af-!@V6f8qKP{C! zaF1RiCD!!ByoIct2(c))q zOMU7my~$`dG_HcbrZM$o41G2cJ{f!}VJ16C#y=7fdqTMw@Qf9ELZi*r6UxPGJ)vC8 z))U$?Z0iaBIS)!wPu!q?TTkd;#n^h{ChtkPv!2jD(MiU?AQ5}wCibL!R1EdRU-*=I zLe`zkV(>{ip*#>hDHMA`e!|APyB+Ve0p7;DyB+WDcD%dW@$N3?eV;Sl-38ub!4YY} z-Hvyg7S6)H&Vc#8cyM%-9c4nif}5huSv;t(j`3=c7^lQ4WRP?`BtnUm{!f1XUWq}E zN9f9fAeUyme)oQQzW;`o)U0#IE$7HdHNlPA_@?`aw}PP;;+xE53Qlr)GjUS7vm7TR z&fdaF;e8uUN`pVfNvY0hoP_HaoRmI#04b$1{`jT=80qZ9j_@p80eZAUYXY>KNoxZ1Flqr#dI+{0XDys- zand|~6Hc1@(;5LiGTVof)&yv+fN+0@vpQ=6&ejUVIl8|<4`{0VZUsh&20a=Se|0Xf zhCTH1pPRGVmTL6mi@GqvU>tAC=XtY9SOp;rShd6{tqpLvhKPivJGvWZDbqE{C@W)N zLRQMawB)$hA&J&r#^R2M3l}v|e3(kzqasx)_|=l1l>p60;xyquob({?7B=+Zo#=HxVIr8zz3!NTSwdX`UfciK)! zTiR*tzr>0MF z{RUtCg@WRPWGjJ22=6!=k2ez$ALAu(wZE|~)=HmvsBXhVtyt3&Tioc~gavp*67_u` zq#(Y@iXhT^qN>Ik1o|r6rtwo<`n17K7(Dp$gw@fTrHBXdO<9k|PtQNlf7-w%(f=Qe CO7^b+ literal 0 HcmV?d00001 From e8319a30aa957cdf424b7300ada4ebbf114b3822 Mon Sep 17 00:00:00 2001 From: mlm483 Date: Fri, 28 Apr 2023 18:17:44 -0400 Subject: [PATCH 2/4] [BI-1760] - drop null columns in file parsing methods --- .../parsers/ParsingExceptionType.java | 1 + .../breedinginsight/utilities/FileUtil.java | 45 ++++++++++++++++++- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/breedinginsight/services/parsers/ParsingExceptionType.java b/src/main/java/org/breedinginsight/services/parsers/ParsingExceptionType.java index fcae51935..a4b516093 100644 --- a/src/main/java/org/breedinginsight/services/parsers/ParsingExceptionType.java +++ b/src/main/java/org/breedinginsight/services/parsers/ParsingExceptionType.java @@ -25,6 +25,7 @@ public enum ParsingExceptionType { DUPLICATE_COLUMN_NAMES("Found duplicate column names"), MISSING_EXPECTED_COLUMNS("Missing expected columns"), ERROR_READING_FILE("Error reading file"), + MISSING_COLUMN_HEADER("Found data in column with no header"), MISSING_SHEET( String.format("Worksheet titled '%s' is missing", FileUtil.EXCEL_DATA_SHEET_NAME) ), EMPTY_ROW("Empty row"), INVALID_TRAIT_STATUS("Invalid trait status value"), diff --git a/src/main/java/org/breedinginsight/utilities/FileUtil.java b/src/main/java/org/breedinginsight/utilities/FileUtil.java index e04a332fc..947bccd36 100644 --- a/src/main/java/org/breedinginsight/utilities/FileUtil.java +++ b/src/main/java/org/breedinginsight/utilities/FileUtil.java @@ -25,6 +25,7 @@ import tech.tablesaw.api.ColumnType; import tech.tablesaw.api.StringColumn; import tech.tablesaw.api.Table; +import tech.tablesaw.columns.Column; import tech.tablesaw.io.csv.CsvReadOptions; import tech.tablesaw.io.json.JsonReadOptions; @@ -106,6 +107,19 @@ public static Table parseTableFromExcel(InputStream inputStream, Integer headerR while (headerIterator.hasNext()) { Cell cell = headerIterator.next(); StringColumn column = StringColumn.create(formatter.formatCellValue(cell), columns.get(formatter.formatCellValue(cell))); + // Drop columns with no data, throw exception if column has data but no header. + if (cell.getCellType() == CellType.BLANK) + { + // If data in column with no header, throw parsing exception, user likely wants to add header. + for (String value : column.asList()) { + if (!value.isBlank()) + { + throw new ParsingException(ParsingExceptionType.MISSING_COLUMN_HEADER); + } + } + // Silently drop columns with neither headers nor data, user likely doesn't know they exist. + continue; + } if (!colNames.add(column.name())) { throw new ParsingException(ParsingExceptionType.DUPLICATE_COLUMN_NAMES); } @@ -127,7 +141,7 @@ public static Table parseTableFromCsv(InputStream inputStream) throws ParsingExc .columnTypesToDetect(acceptedTypes) .separator(',') ); - return removeNullRows(df); + return removeNullColumns(removeNullRows(df)); } catch (IOException e) { log.error(e.getMessage()); throw new ParsingException(ParsingExceptionType.ERROR_READING_FILE); @@ -152,10 +166,11 @@ public static Table removeNullRows(Table table) { List allNullRows = new ArrayList<>(); // Find all null rows table.stream().forEach(row -> { - Boolean allNull = true; + boolean allNull = true; for (String columnName: row.columnNames()) { if (row.getObject(columnName) != null && !row.getObject(columnName).toString().isEmpty()) { allNull = false; + break; } } if (allNull) { @@ -168,4 +183,30 @@ public static Table removeNullRows(Table table) { } return table; } + + /** Removes columns with an empty or null header and no data from a table. */ + public static Table removeNullColumns(Table table) throws ParsingException { + ArrayList toRemove = new ArrayList<>(); + int columnIndex = 0; + for (Column column : table.columns()) { + // Empty/null column headers are replaced with a placeholder by tablesaw, e.g. "C23" for the 23rd column. + // See https://github.com/jtablesaw/tablesaw/blob/42ca803e1a5fff1d4a01f5a3deabc38ced783125/core/src/main/java/tech/tablesaw/io/FileReader.java#L101. + String placeholderName = String.format("C%d", columnIndex); + if (column.name().equals(placeholderName)) { + if (column.countMissing() == column.size()) { + // Silently drop columns with neither headers nor data, user likely doesn't know they exist. + toRemove.add(column); + } + else { + // If data in column with no header, throw parsing exception, user likely wants to add header. + throw new ParsingException(ParsingExceptionType.MISSING_COLUMN_HEADER); + } + } + ++columnIndex; + } + + table.removeColumns(toRemove.toArray(Column[]::new)); + + return table; + } } From aaf011f3307d6ccfcf0bd0d98f8a72a5d65ff6be Mon Sep 17 00:00:00 2001 From: mlm483 Date: Mon, 1 May 2023 09:53:08 -0400 Subject: [PATCH 3/4] [BI-1760] - renamed variable --- src/main/java/org/breedinginsight/utilities/FileUtil.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/breedinginsight/utilities/FileUtil.java b/src/main/java/org/breedinginsight/utilities/FileUtil.java index 947bccd36..54a960e71 100644 --- a/src/main/java/org/breedinginsight/utilities/FileUtil.java +++ b/src/main/java/org/breedinginsight/utilities/FileUtil.java @@ -186,7 +186,7 @@ public static Table removeNullRows(Table table) { /** Removes columns with an empty or null header and no data from a table. */ public static Table removeNullColumns(Table table) throws ParsingException { - ArrayList toRemove = new ArrayList<>(); + ArrayList columnsToRemove = new ArrayList<>(); int columnIndex = 0; for (Column column : table.columns()) { // Empty/null column headers are replaced with a placeholder by tablesaw, e.g. "C23" for the 23rd column. @@ -195,7 +195,7 @@ public static Table removeNullColumns(Table table) throws ParsingException { if (column.name().equals(placeholderName)) { if (column.countMissing() == column.size()) { // Silently drop columns with neither headers nor data, user likely doesn't know they exist. - toRemove.add(column); + columnsToRemove.add(column); } else { // If data in column with no header, throw parsing exception, user likely wants to add header. @@ -205,7 +205,7 @@ public static Table removeNullColumns(Table table) throws ParsingException { ++columnIndex; } - table.removeColumns(toRemove.toArray(Column[]::new)); + table.removeColumns(columnsToRemove.toArray(Column[]::new)); return table; } From 5d6867851c4013776a50c25f2c0fb8e7a8f1739f Mon Sep 17 00:00:00 2001 From: mlm483 Date: Mon, 1 May 2023 14:14:31 -0400 Subject: [PATCH 4/4] [BI-1760] - renamed exception message constants --- .../services/parsers/ParsingExceptionType.java | 4 ++-- .../breedinginsight/services/parsers/excel/ExcelParser.java | 2 +- src/main/java/org/breedinginsight/utilities/FileUtil.java | 5 ++--- .../services/parsers/TraitFileParserUnitTest.java | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/breedinginsight/services/parsers/ParsingExceptionType.java b/src/main/java/org/breedinginsight/services/parsers/ParsingExceptionType.java index a4b516093..31e7356d2 100644 --- a/src/main/java/org/breedinginsight/services/parsers/ParsingExceptionType.java +++ b/src/main/java/org/breedinginsight/services/parsers/ParsingExceptionType.java @@ -20,12 +20,12 @@ public enum ParsingExceptionType { - MISSING_COLUMN_NAMES("Missing column names row"), + MISSING_COLUMN_NAMES_ROW("Missing column names row"), COLUMN_NAME_NOT_STRING("Column name must be string cell"), DUPLICATE_COLUMN_NAMES("Found duplicate column names"), MISSING_EXPECTED_COLUMNS("Missing expected columns"), ERROR_READING_FILE("Error reading file"), - MISSING_COLUMN_HEADER("Found data in column with no header"), + MISSING_COLUMN_NAME("Missing one or more column names"), MISSING_SHEET( String.format("Worksheet titled '%s' is missing", FileUtil.EXCEL_DATA_SHEET_NAME) ), EMPTY_ROW("Empty row"), INVALID_TRAIT_STATUS("Invalid trait status value"), diff --git a/src/main/java/org/breedinginsight/services/parsers/excel/ExcelParser.java b/src/main/java/org/breedinginsight/services/parsers/excel/ExcelParser.java index c51913693..bee8dae82 100644 --- a/src/main/java/org/breedinginsight/services/parsers/excel/ExcelParser.java +++ b/src/main/java/org/breedinginsight/services/parsers/excel/ExcelParser.java @@ -46,7 +46,7 @@ public static List parse(Sheet sheet, Set columns) throws P Row columnNames = sheet.getRow(EXCEL_COLUMN_NAMES_ROW); if (columnNames == null) { - throw new ParsingException(ParsingExceptionType.MISSING_COLUMN_NAMES); + throw new ParsingException(ParsingExceptionType.MISSING_COLUMN_NAMES_ROW); } Map indexColNameMap = new HashMap<>(); diff --git a/src/main/java/org/breedinginsight/utilities/FileUtil.java b/src/main/java/org/breedinginsight/utilities/FileUtil.java index 54a960e71..37f401f76 100644 --- a/src/main/java/org/breedinginsight/utilities/FileUtil.java +++ b/src/main/java/org/breedinginsight/utilities/FileUtil.java @@ -32,7 +32,6 @@ import java.io.*; import java.math.BigDecimal; import java.util.*; -import java.util.stream.Collectors; @Slf4j @@ -114,7 +113,7 @@ public static Table parseTableFromExcel(InputStream inputStream, Integer headerR for (String value : column.asList()) { if (!value.isBlank()) { - throw new ParsingException(ParsingExceptionType.MISSING_COLUMN_HEADER); + throw new ParsingException(ParsingExceptionType.MISSING_COLUMN_NAME); } } // Silently drop columns with neither headers nor data, user likely doesn't know they exist. @@ -199,7 +198,7 @@ public static Table removeNullColumns(Table table) throws ParsingException { } else { // If data in column with no header, throw parsing exception, user likely wants to add header. - throw new ParsingException(ParsingExceptionType.MISSING_COLUMN_HEADER); + throw new ParsingException(ParsingExceptionType.MISSING_COLUMN_NAME); } } ++columnIndex; diff --git a/src/test/java/org/breedinginsight/services/parsers/TraitFileParserUnitTest.java b/src/test/java/org/breedinginsight/services/parsers/TraitFileParserUnitTest.java index 06f8a9e85..17bd289b9 100644 --- a/src/test/java/org/breedinginsight/services/parsers/TraitFileParserUnitTest.java +++ b/src/test/java/org/breedinginsight/services/parsers/TraitFileParserUnitTest.java @@ -81,7 +81,7 @@ void parseCsvEmptyFile() { InputStream inputStream = new FileInputStream(file); ParsingException e = assertThrows(ParsingException.class, () -> parser.parseCsv(inputStream), "expected parsing exception"); - assertEquals(ParsingExceptionType.MISSING_COLUMN_NAMES, e.getType(), "Wrong type"); + assertEquals(ParsingExceptionType.MISSING_COLUMN_NAMES_ROW, e.getType(), "Wrong type"); } @Test