From c5bc60755f22955fcfa935085123d81d438f4423 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 8 Nov 2022 14:58:53 -0500 Subject: [PATCH 1/2] detect NetCDF and HDF5 files based on content #9117 --- doc/release-notes/9117-file-type-detection.md | 1 + modules/dataverse-parent/pom.xml | 5 +++ pom.xml | 8 +++- .../harvard/iq/dataverse/util/FileUtil.java | 33 +++++++++++++++++ .../iq/dataverse/util/FileUtilTest.java | 35 ++++++++++++++++++ src/test/resources/hdf/hdf4/hdf4test | Bin 0 -> 30275 bytes src/test/resources/hdf/hdf5/vlen_string_dset | Bin 0 -> 6304 bytes src/test/resources/netcdf/madis-raob.nc | Bin 0 -> 150612 bytes 8 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 doc/release-notes/9117-file-type-detection.md create mode 100644 src/test/resources/hdf/hdf4/hdf4test create mode 100644 src/test/resources/hdf/hdf5/vlen_string_dset create mode 100644 src/test/resources/netcdf/madis-raob.nc diff --git a/doc/release-notes/9117-file-type-detection.md b/doc/release-notes/9117-file-type-detection.md new file mode 100644 index 00000000000..7901b478acc --- /dev/null +++ b/doc/release-notes/9117-file-type-detection.md @@ -0,0 +1 @@ +NetCDF and HDF5 files are now detected based on their content rather than just their file extension. diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index c1ba693da1b..e36a78b11be 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -299,6 +299,11 @@ true + + unidata-all + Unidata All + https://artifacts.unidata.ucar.edu/repository/unidata-all/ + dvn.private Local repository for hosting jars not available from network repositories. diff --git a/pom.xml b/pom.xml index c6459cfc55c..8b6f98c5896 100644 --- a/pom.xml +++ b/pom.xml @@ -25,6 +25,7 @@ 0.8.7 5.2.1 2.4.1 + 5.5.3 org.junit.jupiter diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 339de904f9e..dc4f8b97f9a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -108,6 +108,8 @@ import java.util.Arrays; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; +import ucar.nc2.NetcdfFile; +import ucar.nc2.NetcdfFiles; /** * a 4.0 implementation of the DVN FileUtil; @@ -467,6 +469,11 @@ public static String determineFileType(File f, String fileName) throws IOExcepti fileType = "application/fits"; } } + + // step 3: Check if NetCDF or HDF5 + if (fileType == null) { + fileType = checkNetcdfOrHdf5(f); + } // step 3: check the mime type of this file with Jhove if (fileType == null){ @@ -669,6 +676,32 @@ private static boolean isGraphMLFile(File file) { return isGraphML; } + public static String checkNetcdfOrHdf5(File file) { + try ( NetcdfFile netcdfFile = NetcdfFiles.open(file.getAbsolutePath())) { + if (netcdfFile == null) { + // Can't open as a NetCDF or HDF5 file. + return null; + } + String type = netcdfFile.getFileTypeId(); + if (type == null) { + return null; + } + switch (type) { + case "NETCDF": + return "application/netcdf"; + case "NetCDF-4": + return "application/netcdf"; + case "HDF5": + return "application/x-hdf5"; + default: + break; + } + } catch (IOException ex) { + return null; + } + return null; + } + // from MD5Checksum.java public static String calculateChecksum(String datafile, ChecksumType checksumType) { diff --git a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java index 01fb8aad6cf..e710236e446 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java @@ -373,4 +373,39 @@ public void testIsThumbnailSupported() throws Exception { assertFalse(FileUtil.isThumbnailSupported(filewBogusContentType)); } } + + @Test + public void testNetcdfFile() throws IOException { + // We got madis-raob.nc from https://www.unidata.ucar.edu/software/netcdf/examples/files.html + String path = "src/test/resources/netcdf/"; + String pathAndFile = path + "madis-raob.nc"; + File file = new File(pathAndFile); + String contentType = FileUtil.determineFileType(file, pathAndFile); + assertEquals("application/netcdf", contentType); + } + + @Test + public void testHdf5File() throws IOException { + // We got vlen_string_dset.h5 from https://github.com/h5py/h5py/blob/3.7.0/h5py/tests/data_files/vlen_string_dset.h5 + // and named in "vlen_string_dset" with no file extension for this test. + String path = "src/test/resources/hdf/hdf5/"; + String pathAndFile = path + "vlen_string_dset"; + File file = new File(pathAndFile); + String contentType = FileUtil.determineFileType(file, pathAndFile); + assertEquals("application/x-hdf5", contentType); + } + + @Test + public void testHdf4File() throws IOException { + // We got test.hdf from https://people.sc.fsu.edu/~jburkardt/data/hdf/hdf.html + // and named in "hdf4test" with no file extension for this test. + // HDF4 is the old format, the previous generation before HDF5. + // We can't detect it based on its content. + String path = "src/test/resources/hdf/hdf4/"; + String pathAndFile = path + "hdf4test"; + File file = new File(pathAndFile); + String contentType = FileUtil.determineFileType(file, pathAndFile); + assertEquals("application/octet-stream", contentType); + } + } diff --git a/src/test/resources/hdf/hdf4/hdf4test b/src/test/resources/hdf/hdf4/hdf4test new file mode 100644 index 0000000000000000000000000000000000000000..4674fdde19487d5c28b44f54562fbe5118284e0d GIT binary patch literal 30275 zcmeI532+ou8pprf0b@it1YvblR8$N}IE3IzGLs;|5Rn8#K_Mgq2@DxXCJ;a|;g(C} zlFNX|jUXx_9$>r>FT@*9sC8@C+TGf+T+3p2E!p4K(;0HiWCsbTq-W}v|35FUyI+6r z&Fh}mmG@%9nuo|i(buICBGP0uZfMkYnIuy19zHy3o8dpicZ=E~lSM{1!Q03)1%EBR z9eLEJ;X9M(UVN#MzFqFaFE#RSGyHBd&s5U?YUa6LWK0-dkEK4!$g^F_@JaY+($(AX z-8gPJUiYu*>iL_IJ`KOeOjp0(OrMVb!Aw{GlbJq4#FdC|%YM}N({vdvGx6k;@X&pR zpN(%7dRRh9e*_=HInKgw#ivsq{_gr-^RplIKjJ%+K1XD%#z)JkJ^`=ge*izqOjqwV z)92zhn(69ynCbKI-`&wkW@hS%~h5^=}iwfyQ^;I;e@;rp8D>IazVi}9SNkzYOA82Ojr518rd z51Hw-i*e!jmh4}B8#8?w{x-apM|~E)JLk0=uVvQrQcqcp^I{+4G;Ufi596Q2_oJLE z@IT`t*bja@=Wm?XN_;oGo|pQY@p@jX@E$W=J?+>?UyXmtOjrNBnZ5@9otduwxS38H zDT>2)VE^g|X*%Vvz*A2~`PFNDE~Q=~@H_EZ{5t*}nd$0ZH`6!Z|7)h>JsKa)zY*U9ujN;tW2SGy6JKNh z>M5&HFPrh7;WAa?ytd(KAJUz4^=}*L zJ7hav>oF~Kza&xR7vYad3+X9xaWD1u{pwp^hHpmtP54eCA))v`inJjn-332h7>&=zv=(v1A-GtK-Z_2l^2J8g*_ZJF)~&Z4m+oJF>T9z8l&br|)kUMJ{c=!in3wRT5n()h z7v)Kl)Pk{ta$NCo@kVAXj)c(HM)`6?4J1pAzZ*H(m(e8}rPqCHPfqX7E6VQ?!wO1E z{TwdLI9wAk{v3H~hl(-|7aibmlP38&ocHX`<8b`B_Tk)xPWpOY-k60OF$?#_EMj10 zdUlpXrFc9=)dNyFtkT%Z_z@z}8fJc04|g_P6|Yx_5e7MrvkC zzq%(A(jq!UFSEugio2uc)!|N}xttfZgZH+RZy6oBkNb*fX)5PStX#m={zAEkoXy!& zxE7Hk=hUdLUs}yaPgI&^)2cnyY7Tm9f1$1nV(WUUIq0qZ9Xl)EUF4}fL8- zP&M1C*~yxXrdHclhnj6$PtC?iOKBw+(}%T@OUT)lJ}1H`RRdLR*vm&xobXldDOYVu z&|CWpZ!pIWT(!X*JMbqs!(BMamhS(v+tR4vmeg>|U}~5Eo-jTg{;xMphxIOSxwz7l zIQ@L?Tid*?LpyYB9qp?_>u9gv7ixa)xkh)k4adAz zu9NG@-H|GdFp65I1>{w40+B{}^|mF_SKa_kpwV^`sqPZ4=x3@4RK7a9jPQG3{=44# z`;N%ux=pAnO{fMmp{{bi(TCJ%LcU%^?;m0$PC7}vBuJukmM(13RhxzP{OhzMkF(el zmzy`zD7m-b;l^EG^vRZ#ot~YMR=2-P&r7ngrFRbmEv)nkTK@r}xdx zlX1Hg!|+5qxk_$eAnHbT7418tNJnb#>NmLBx1WudgxIH?jpNilhS@Tz-Bnh#i_rZ> zyYP2?NeXa<(Msx7CX$RM^CMaPI;ezWA}IpRzy^HlN=b(PFdW7~8O(u2umaY>7T5)Q zp%MKy9EbnFPw=ydLx48u2!${R2X5jVkwC+AL_-s33gZ0W^aP z;UZ`bEr8oxM=Q7(T0=gV<3^nzqa0Xq;3M=GR2Z@2~eKsxk= zTOkAbK_=V={o!^P09lX?17Q%{0fXUAxC@5BP`DfJfnhKla$p4HLLN9_B#eT5D1gy0 z23#-}3c(HIU_2B7x8e>Dl)waFh}=;MlVCDTfqUUTma#qFd2z=r(jax*dHSeH`6^ z?m%~>Qb8+`_S2Hk`1L7zpRMfak6(dW?T(C5+T z(HGDc&==7c(S7JXbU(TueF=RDtwbx)m(iEeSI}3`SJ79|*U;C{*U{I}1Ly(t4fGB4 zAbJpe6MYkX3w;ZH8+{vn2Ym;97kw8!gdRfQL*GLWqleM=(f830&=1fL(GSs&(2vlM z(T~wj&`;1$(NEDM=n?cY^fUBx^mFtJ^b7P$^h@+B^egmh^lS7edbCWemlXjk0#*d9 z2v`xYB490ts0V@Jl1gr>H5wId)MZk)H6#**(Rs^gF zSP`%yU`4=+fE58N0#*d92v`xYB492`H9)W111pX&d z_@*14R1hZFg@XgW2Z8M*v5qo}Isz`V`K zDI;Z+Rgmjt?-oWS%Y0nd2$Xx8>*L*~(oy!Cqdub3#tBC#T%QOqrhi za;GiVT~b)?DVh`~Qf|H)ZQ#$b6E6#?`8Gak@y2Z6`cW(4bUiZR96RY#JDvJIYW}`5 zcmmD#Su@i5o8(kaJGEk2xvdCT5%@htpla^OhSub!-Y7KmerxZqD%Q2r8_Vx{Z49Yo zMHUT2nUwN#qS{9Miaj|=XYD!8`KBEB%gw}q^OO?I(|Ml2EcxbA0cT|)fGKC|d4hM{ zf%_Wrc|uq)lkwLx&^)a2WW)DA6Rx?8fWJwigE@9R1I1y z;a-`_0(Og7pl-1&VM)1VY`2`Z^?X5_%IUY1U*Y*e1r_guhM>pS%&T)tJ_?)Z%)UQry6jh@04UGw*YB!2rFXn#u~N3t3Hbt!!b9MivnWxM0Bl E0NB#lt^fc4 literal 0 HcmV?d00001 diff --git a/src/test/resources/hdf/hdf5/vlen_string_dset b/src/test/resources/hdf/hdf5/vlen_string_dset new file mode 100644 index 0000000000000000000000000000000000000000..dd20547f8e9a5d0597c76763b2618131c96033cd GIT binary patch literal 6304 zcmeHI%}N7749@JfI#k4;lX|KzAo>Q1ySfLl#rEK_B2{~_uv@8*pf~Xedh}6zD?6Da zW$`FT1-}#~nIw~){rL9lYmuF;xAt1Z<~=oNGlsT&sm*pIV|%L)G;r!))PE!rIN~3X z9;Ypx|Glq&mFE;XZN7OOM~4lGyd~iO6#_+m(&ZPv*0(m6ek#uljI4JQL*rv%g&%rH zE0MplFhhmO&dC#$mf^b*OGBM2yZaQG&i9l~VQ!Or+$V{oz4oVWpwvE=LAq{-em0)5 z_I}qVdXD>LExpT5#$U8vmfGpEZi43m0U+?#5m*fuoT*r_M%MQ%9nD&uHLhM~X+J;h zTtxj;!51x+bD>PtbU2&@Z+`=Po~;^}<#>8PCQp_hRqg8O_Ff00e*l5C8%|00{gt0&|>?K0E*b literal 0 HcmV?d00001 diff --git a/src/test/resources/netcdf/madis-raob.nc b/src/test/resources/netcdf/madis-raob.nc new file mode 100644 index 0000000000000000000000000000000000000000..d0cae0d077d12c7dd1279f63274997847c5a627a GIT binary patch literal 150612 zcmeHQZEzjcd0yGrl5A{jQf%{;ED*o}$w(H)HU%X4%CcnqA?vy}rD@DccO`AOU%4L^ zqBLkGGy};@!%RCAoRYYZ8JG#3Ap;Dgp+x~^LOV^GeoRZ!q;;pA3_tk6q(7Qwn7Yq< z_U_f$bI-l^thkb{#e2u=-Mf3v+4nr>Iq!4M*>|sE1A{G=Wzn_{+r`+{=G^fk#T1VG zPg}}KA97E-NqpUctv&Be9uH37`xB*eC#7?TaIQ6VGMy+DbP>L{rHQWvr`>cRlbhM+rI8XJ8ynR%hOKQplS<)w7min9yJ67Fz_bM)bX^N%X=A70m{2=Faz`)WRK6}3vMcJA9BX*T|21k*6~Dgz$v(t zkJ7g7?80YPS2vE=dx;ZH&U40+t}u|Ux0dk1p|zplrQA>ozl_%WO!_!3QJQs0r*J5f zMk>FKRK^CBamXoj(9X9K=_b6SJL2ZNOoFmm)D~#}jhnFFK-c!C6GtZ|@@|2y&R@R- z#}^L_oJwZWlc&;X41A}1bR3J>K+3TbZhk!HWr3NU$|T(6?Y50e`C=;N0L9ZzvM4G? zW#rrmH|M6ugEDUIKRG-Sw~xEyQ)%xDMc0mJ-EnWi8%OhI()Nfme!ny6+S`xEM?Pck z+0(hRyK|KI(vGsq1()pk#JwG{j(Eq3j(a+ebsXrhVa`5WvepG@Pu)vjh3y-TWXMJ* zn@xJI4~LYF*x{$!JmTc@ZbH;f^iZPtd)!mbvw6@#L)Eh9SUGKnQoh~YVUHCH_Jm&qs4?m0aYNwLV~6fHTrT6% z;dH^xWiy}{?kW%$*XC_IP)@sbI6a;$CXmic+s^p7n=RN$FE7|MpI$VHXoFDtocnoq zd_Mi=W3f2d7a|?*L^$8<6bqS@FbGX3+(zCW&!h{Em-cV&I^$Ees2>>zKjN1q5r)cH z2RwEPnh3b9OG4>RiI%2xH=b~kUcxKPh^86I*RmN2Agg8%mFD>=M_=w-jPKjOyzi=$puWN&w<~vWh&zCxrJ)OxV;1ediv;c~3 z?U!>Tv(GE!#huW`t-d%FufE|W;ZG3Jo$yKD^ zN*p*uZ5;O|(?Sr^1#p76M0~giM4%3ezTH1BD!y~B#piH((LF_Z-Q8VE*t0O^v>x%PaSdYijfj zU0%6wTvMZO=<>>aV_Rk4C}#;Uk^2S4A(g(=?-v#M*NZca=BMHxtuej%7xW-EH-c|o5@p;ro1=A zi<20w&>&yGGG|s%emj4L1v`J*t~RK z5>?9Eq2z^0zWQUlQC5GcgdqQ|Rq}f@oWd}G#w)e*@2Zj?QwuS8sF(k?QvT}{`==y{ zkjyUEdv__{%}Ty}sXJ6LyPTigBH^}E%`c?FDN99CNJWmMLN=pZ&fZcv*DB>qlVs$D zWQ6jyQ@a)O7%?JaDetZlQ^1+bn0+V4x!$Bl*yG#Hpp8#3iIN~jv6LS=Ax&1Q>xw@c z5jrHL|LA>aJ?ilL*w&>nbPZ;@R55&L{wn6}rv|-b63)A1bZI^(($F!sS0BN?-5(N& z$ueD}_G!l!%=>j^Q5rf=9~{3mg0ddPG3J{2!l(;m6XF`Bp|Wb)bRHk1fjyU;wOSs0 z7sst$`gAaV4enF34CJ8?%D~q0^aI$h%jVqEiY=6;W;ye=h;kl6`gYjjFB4F1zutB0 zNM@!>39yS$uQOL6{W>on zDr6xz&r&(Up5ZhWQSi;jhxln~&n)U?BYit@XTBK0uc+mmrE;cI8T5)^si@3TvS+op zkF-#M@5JRxPaZyMi>`!K1>_uab2y8#G;^V=Ph^tClxz3fnF*WnWs76UU`2u)6F%#- zeXr%q-^8_mqmSTNUdtgn=(84EfL*)1sDkst*1*o;1Zat|Z^>4=b}P6~{ctV%G}d=; zZ7k?FYGKWK*{{8X6NRzooK_rP%5*Et^09JYwKH3;5UbG z)y&p8q@%Xlk2OEa6UkP}1XzQ0Hr3WODHozorB<=^&37zo|Gjf`UFDSy*SC>QUjLJA zr8d&F|AzQXI+LY|Vc>NuF}*eqzn)HDxzob}oxQz#@9wV7?#^zztLt|9x1;MbWTN12 z8`>9ETdg@a8I&m7QLP_d7S9yZ2`@cqkGeF4cN<`jiW&8$#r&LfrV*@mdY@&q&m(o+xU5KuT}S0Enl~B-_~?7br|)*p%Hdh_x6Gc-p{2=$3cn)T_XMt+ z*mn-;O7@+^p%HHN?Gr`SwC@n^Wn$ldmFzn_ZQia|&*OL4dB95*Q?{73N=;kSHp0=w zw)aZ5(bFRw8ew%lXZ&lG4qzy;+&~n@57cg*&H}cR?TykxK~SquNc7!4jv`B zSizj{5XsULwn~4z9*fEa)F2N>LibT^Cv8~OJ8M}3-@|{En5x==e*E+YNY^TC9~huK zL4ONuujU6kwYCdfQ5*P~(9<3&89E{6Q)NG5ZJ_5f_sCCX^uF}+17aL)_ZJE|4-YKx zkRt9+`2BLp=51EXH-@lp6LrBNAdc0#P+RDQ8(}APK(D_bWL7wQ?JSEY3en3y-d9(uZ^kKW5YB5`OmKhy68<1+H0MgnYDWL|m5p37zk$a-N2h zy7a-?p;}I`CU|A#T6h@&6lu{tcULKUPwD(kb=SE=MT#bh%;48{%KGsV6jd-hSp1c1t-?t? z=V+YLDr}FY^coHR+X|Z@B<_;yl5_L?_Y2aJxjRj=C&trk69Sc0) z1%I-pkcd1MxM9GZwns7u%d!XDEIbFj)DfCIS6`o(xuG2sz$R;7mi{*x6MXwD(zS{@ zL>m+Aty_m0g9F@CT?1(FI{mkQg?qG#dPN%>?1@q@Wssm-yItw?e|fzxil0~WErQ;^ zK;zwj15xxo5(gCF*KuGa&-K-IY@)TE7yS7F`DX-Hf(udB?{ACZ2NXe9*Yahb&3fLn zdj7&3II?Csx}N)y2m@XaA+DR=tDIP0ihDzxkBi&tpR?12*vs_4iI%@e5%c z5zj{S_SaHzRl^ry9sk8gQKvRhr)WC9)2vR4g6p5ZvWCw;=Qn-sUh6%p_60oGxd=SK zbNCm}N_>c->4tnz)LhR8)^mUD$CL}T-Y@znTo7aYl5dEr?_Fm8A*Af84`1v1a4@op zf9B`&%Db6mI0a_(%zp`<&wpnGcUv=^k33Jm%8wbyx_Ft#MAO?v%=ImZXcpD0q3IOK zS%ouFJUjf|%X3KADr}FY>9OBkKCI=0?PxeIuMzXHMbh$D zXuKM5A&Qn8aDjDv5qPjjI{u13#wY(AeV*P?-!GI!haSDF8m{_mo}V}E{QQ-3Xuq}D zT=3dOIR=+vzG}t5Un|98$}eAtRd(s!3zZvc45oIy-8>ea-f8(RjYSYgOmz@~PPWr0 zr~|E0>EcP{Sa@Xi-!AZNUzS+-C#{z6Jx{j!b@9c#R1fUR^)ZQu*Vac-bKSZs& F z%jNOG0_R@Dh)H{#m{{JKCCTdN$ZFv75;(N^mJNxeK>{!`GX&0D*Za* z{Oghx<2m_Ri&n4AW9G=uy?Y;QM9dJ?A(CDzF>_j~tFNyu_If%r`=_rF=6-!5Yqk;x zrLB`9+uGP?)@{9RFLR6>@%>+Ed|Kv5L|r%Gha&A-ez3MzYWp(B#Su4tF2?(T?^uBL zE3t7}|Dni#P5;i?-?Z9)4sHb6pCH|&Lu2jVQ6=sI1yvmcsPp!^V*oP9EJlua|36tI zM;sB=CDIt6T8x~Q`08K)_f*FNjf|1QSm1x(!9CjjdPN=!REv?))=e1{=;m`gCeV)w zv+K0U#a~|z=03_d4ce8{vtd$5oKOqEnZIR4-}EF z<-b|un^xn`ZvscwPSKAg`@anotp5>jYOTlL5PEO2(?$UrZTHJmxgozNsh!`OzyIFS zWv$vW3tRC%Fx4TX?Li%AuUn(2^8S06?kDw2^NXqLhpbgwUnHCTI#uX;P_JrhvsK=6 z*49grb?sb^b$wZMow&XALzxGPt~ZhgimYpSz`DNVx=tLpyhG-IGG`|a)LXBw@{YUK zA1Jb3F?W}JH|u&+>iYU~)~YK$3tp_9rm!`mv+A{k@Y(q^Q^58Yd3LawDIxpa+&Q5dv}evyt^Q`e?;vzXn)r2=56~u-;&#Y z0lFQIbJzC&nwqWpW}bgHt#0rACvXLE?!iycm!F-}@3&rzt(tCMDaE-xzf zJw}bC)atc)e7m*DTDAXW+z0XPbo-EA2e#ALL${v3LvMSn*VPvF`S?Ys%iCnDUzZBK z4(dbgp{tJ^uP?QQRG$jD*3PqduDvXCZNX&`9}%>_qSqh2{VU{J+x|SyUh;XiX#02N zzOU#twSB#H^PB7XvyfcZ_06o;O{v$`S?C0<$so=>vR;2O9Oo{{L|L*ccu_aD%{;!H zcs}?jY)5>1WWDaLYP%$H6*Fw$r2KuBM*n6EbbIhg>briOBI~viw=cDcR4+xkb#Y9r z+j`wz<`{Ru`9GEU5mDET_@PLqxH?4{ms!ow!R*mmOk0eaSt*!IZ!e|!BpNP<~85KzYf zjf`!_7~nt>^+Ier)hqHCpjzC%)IL((l)(Vqm;ZKK)BMdB;yAs(QRYHKy^qEPMfj_5 zfwjL<`{idWi}0R<@O85?4;G;9yCeC$u*lc)fLTRcB3ap1!Ji>%{UhhpI*t*D{lYOIC!xjO7IjDw@G>oAQo5C<1o!~J;A zcvr?D zeJKXk@B0hT?apfcSn_A}G+Xu0JpXRm`SVynb!qGu)R%a^)VfV^4v2@Nx>V?P zP@ih?aJLxLhjqKTb^Cp}{T1DAMEkRDH?MAgD7XCpzTiW?i$2u$_2S{=c-X)ntNyH> zZma&8b-QVG`ylB6*3C&Ph$7_}_P|fjUbk+3ogjt$4tciv#2Wg+bKncs&?9U5rvx2>HRymN z)1le8$_aIirU7f{chACp@%N@8>bd{B8LIs%$Cq5o@21!UtfMYS%l+qH_3FerPQUNa z#B1qy|FO)0h&pb@0oL$lZvSt|?Y{sG4?j-{e6#A?YU{V^t9f4DwDa=2>HX}M{YU!a z*z<=GU<4QeMt~7u1Q-EEfDvE>7y(9r5nu!u0Y-okU<4QeMt~7u1Q-EEfDvE>7y(9r z5nu!u0Y-okU<4QeMt~7u1Q-EEfDvE>7y(9r5nu!u0Y-okU<4QeMt~7u1Q-EEfDvE> z7y(9r5nu!u0Y-okU<4QeMt~7u1Q-EEfDvE>7y(9r5nu!u0Y-okU<4QeMt~7u1Q-EE zfDvE>7y(9r5nu!u0Y-okU<4QeMt~7u1Q-EEfDvE>7y(9r5nu!u0Y-okU<4QeMt~7u z1Q-EEfDvE>7y(9r5nu!u0Y-okU<4QeMt~7u1Q-EEfDvE>7y(9r5nu!u0Y-okU<4Qe zMt~7u1Q-EEfDvE>7y(9r5nu!u0Y-okU<4QeMt~7u1Q-EEfDvE>7y(9r5nu!u0Y-ok zU<4QeMt~7u1Q-EEfDvE>7y(9r5nu!u0Y+d2A#k2Qi~u9R2rvSS03*N%FanGKBftnS z0*nA7u&fARnW1F`Ej4ge7y(9r5nu!u0Y-okU<4QeMt~7u1Q>w}6@eC7ARyzHbD?0l z@88{NpKy|1!Yj3N-20G| z&AaxfQ*i5;v8U5M>7^5Pe9Colb<(QMw-;D>DYtXT^(LnZk)c+LGyZ<(Gr#;G=wFt# z`u+V!`s2o#O9}Tq{V(hK@A{9Sethr$(cjNJ;P1ciQ~&y%*j+;TGsyGoU``-dDhzh?^$bb<2}}({f;$w+pIMhpRxvj z?{Q;)6qi1V2e}U4qijm{rTxl0TpAuS&Wtm0AokT?OvJwW)S1|06BDt=E)Owf4O~*lKj&yS8q# zYLydQ>-)=%M)uux@@=E?f^vD^d_J)6x1T!GH}lk)-d{{i^!D`h^uE;7)0f9~4s*Uu z*w8=4z7P8@p06>7n~%fXCM%~+#M;nd%-OQoXsxJN+CN;{2j}SeAI%!i*J9EDNf~mi zMyObXP078)UmL~seOS3ODeFzcbl!9zeRHKT5Qx$LBsQ#B_rF)#pT(ZG7qM-!EJ|xSUt@n1ZX+6Jn(+4fhUw<|tkovv z&AoB{|BTW-Y?iLiG~HRN#pJ%It`RP8oHw(t|7aNZYBu!snw3{AKc6?lIVw)#`P%(+ z*1%YwH2}>RxC7f-9Iu|WVn4%}=q$EB!IsDNIcx{9?ZdVc8`i;N?Rf6?Z`l6E+*g_U zG)`ysEVCJLk{+)Hhva&i;e)YxFb9|e%mL;AbAUO(9AFMG2bcpFHV4G}6v4X`kKw(FFaP9B>><2M z@etmlC}MN5jm=uI6WHQ7{w?h9#rqY%i!^w@BKA8t7x7(-W_*6jHZ}IyyA+>AhqN`z z6DxYQ{UyUqO{;!ymO8N=i~u9B0ugA$_^S8Ky+$#ReTN@2O4s}IKQv4C->;dab8j|F z_ub8A>AwEgX6c@IW7Y)UJAQf2sJy<`f9^9W&ooW%%U?9IuYcpKkDHX)_r;BKCh1Jm z^l$!8v-`f1Gs3U;st1kIncBzcF6_Apw;#73bAUO(9AFMG2bcrQ0phJJ7-zFv)JhOynce;_qt(XqVEZ8INxuVeyidfey;+X_-!vb*M+UG zw9zp(ex}^mXUZ2cp217u(%x#44?WXv)Yc?bVlx;4Mqni(&}hBq@^}o%9AFMG2bcrQ z0pFb9|e z%mL;AbKv91feU#ay&Sf$#5o?Fb9|e%z>54f!OCP zD|XrZ@4v-jVjdIx#+G(c)9hG?zx{^4_ZH0UOMlMmOQ;NO+E1-I^{@Whu3FcFYyDR~ zG7=m&dM|#zuWyC5QxV+DuB5AZ9}8W`+t7_ Date: Tue, 22 Nov 2022 11:47:14 -0500 Subject: [PATCH 2/2] fix netcdf "classic" detection, beef up release note #9117 Also fix test so it doesn't rely on the file extension ".nc". --- doc/release-notes/9117-file-type-detection.md | 4 ++++ .../edu/harvard/iq/dataverse/util/FileUtil.java | 13 ++++++++++++- .../harvard/iq/dataverse/util/FileUtilTest.java | 3 ++- .../resources/netcdf/{madis-raob.nc => madis-raob} | Bin 4 files changed, 18 insertions(+), 2 deletions(-) rename src/test/resources/netcdf/{madis-raob.nc => madis-raob} (100%) diff --git a/doc/release-notes/9117-file-type-detection.md b/doc/release-notes/9117-file-type-detection.md index 7901b478acc..462eaace8ed 100644 --- a/doc/release-notes/9117-file-type-detection.md +++ b/doc/release-notes/9117-file-type-detection.md @@ -1 +1,5 @@ NetCDF and HDF5 files are now detected based on their content rather than just their file extension. + +Both "classic" NetCDF 3 files and more modern NetCDF 4 files are detected based on content. + +Detection for HDF4 files is only done through the file extension ".hdf", as before. diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index dc4f8b97f9a..257bc166ea0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -687,7 +687,7 @@ public static String checkNetcdfOrHdf5(File file) { return null; } switch (type) { - case "NETCDF": + case "NetCDF": return "application/netcdf"; case "NetCDF-4": return "application/netcdf"; @@ -697,6 +697,17 @@ public static String checkNetcdfOrHdf5(File file) { break; } } catch (IOException ex) { + /** + * When an HDF4 file is passed, it won't be detected. Instead, we've + * seen exceptions like this: + * + * ucar.nc2.internal.iosp.hdf4.H4header makeDimension WARNING: + * **dimension length=0 for TagVGroup= *refno=124 tag= VG (1965) + * Vgroup length=28 class= Dim0.0 name= ixx using data 123 + * + * java.lang.IllegalArgumentException: Dimension length =0 must be > + * 0 + */ return null; } return null; diff --git a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java index e710236e446..5fafb2be479 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java @@ -377,8 +377,9 @@ public void testIsThumbnailSupported() throws Exception { @Test public void testNetcdfFile() throws IOException { // We got madis-raob.nc from https://www.unidata.ucar.edu/software/netcdf/examples/files.html + // and named it "madis-raob" with no file extension for this test. String path = "src/test/resources/netcdf/"; - String pathAndFile = path + "madis-raob.nc"; + String pathAndFile = path + "madis-raob"; File file = new File(pathAndFile); String contentType = FileUtil.determineFileType(file, pathAndFile); assertEquals("application/netcdf", contentType); diff --git a/src/test/resources/netcdf/madis-raob.nc b/src/test/resources/netcdf/madis-raob similarity index 100% rename from src/test/resources/netcdf/madis-raob.nc rename to src/test/resources/netcdf/madis-raob