From 0153e0c5a2313037c38e393037dfab61c29611ad Mon Sep 17 00:00:00 2001 From: Artur Jankowski Date: Tue, 26 Nov 2024 14:30:27 +0100 Subject: [PATCH] fix: Correctly calculate `Content-Length` when reading from a stream --- src/intTest/java/com/box/sdk/BoxFileIT.java | 27 ++++++++++++++-- .../java/com/box/sdk/BinaryBodyUtils.java | 29 ++++++++++++++++-- src/test/resources/sample-files/text.pdf | Bin 0 -> 11323 bytes 3 files changed, 51 insertions(+), 5 deletions(-) create mode 100644 src/test/resources/sample-files/text.pdf diff --git a/src/intTest/java/com/box/sdk/BoxFileIT.java b/src/intTest/java/com/box/sdk/BoxFileIT.java index b4e675d55..6fa76e409 100644 --- a/src/intTest/java/com/box/sdk/BoxFileIT.java +++ b/src/intTest/java/com/box/sdk/BoxFileIT.java @@ -146,6 +146,27 @@ public void getRepresentationContentSucceeds() throws InterruptedException { } } + @Test + public void getRepresentationContentWithExtractedTextSucceeds() throws InterruptedException { + BoxAPIConnection api = jwtApiForServiceAccount(); + String fileName = "text.pdf"; + BoxFile file = null; + try { + file = uploadSampleFileToUniqueFolder(api, fileName); + final String fileId = file.getID(); + String representationHint = "[extracted_text]"; + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + Retry.retry(() -> { + new BoxFile(api, fileId).getRepresentationContent(representationHint, outputStream); + byte[] downloadedRepresentationContent = outputStream.toByteArray(); + String text = new String(downloadedRepresentationContent, StandardCharsets.UTF_8); + assertTrue(text.contains("Lorem ipsum")); + }, 5, 100); + } finally { + deleteFile(file); + } + } + @Test public void uploadFileStreamSucceeds() { BoxAPIConnection api = jwtApiForServiceAccount(); @@ -156,7 +177,7 @@ public void uploadFileStreamSucceeds() { BoxFile uploadedFile = null; try { - InputStream uploadStream = new ByteArrayInputStream(fileContent); + InputStream uploadStream = new ByteArrayInputStream(fileContent); BoxFile.Info uploadedFileInfo = folder.uploadFile(uploadStream, BoxFileIT.generateString()); uploadedFile = uploadedFileInfo.getResource(); @@ -552,11 +573,11 @@ public void canPaginateOverListOfVersions() { byte[] fileBytes = "Version 2".getBytes(StandardCharsets.UTF_8); uploadedFile.uploadNewVersion( - new ByteArrayInputStream(fileBytes), null, fileBytes.length, mock(ProgressListener.class)); + new ByteArrayInputStream(fileBytes), null, fileBytes.length, mock(ProgressListener.class)); fileBytes = "Version 3".getBytes(StandardCharsets.UTF_8); uploadedFile.uploadNewVersion( - new ByteArrayInputStream(fileBytes), null, fileBytes.length, mock(ProgressListener.class)); + new ByteArrayInputStream(fileBytes), null, fileBytes.length, mock(ProgressListener.class)); Collection versionsPart1 = uploadedFile.getVersionsRange(0, 1); assertThat(versionsPart1.size(), is(1)); diff --git a/src/main/java/com/box/sdk/BinaryBodyUtils.java b/src/main/java/com/box/sdk/BinaryBodyUtils.java index 002b3b5a6..da4161f5a 100644 --- a/src/main/java/com/box/sdk/BinaryBodyUtils.java +++ b/src/main/java/com/box/sdk/BinaryBodyUtils.java @@ -9,6 +9,7 @@ */ final class BinaryBodyUtils { private static final int BUFFER_SIZE = 8192; + private static final String X_ORIGINAL_CONTENT_LENGTH = "X-Original-Content-Length"; private BinaryBodyUtils() { // utility class has no public constructor @@ -73,12 +74,36 @@ static void writeStreamWithContentLength(BoxAPIResponse response, OutputStream o } else { input = response.getBody(); } - writeStreamTo(input, output, response.getContentLength()); + writeStreamTo(input, output, getContentLengthFromAPIResponse(response)); } finally { response.close(); } } + /** + * Get the content length from the API response. + * In some cases, the Content-Length is not provided in the response headers. + * This could happen when getting the content representation for a compressed data. + * In that case the API will switch to chunk mode and provide the length in the "X-Original-Content-Length" header. + * + * @param response API response. + * @return Content length. + */ + private static long getContentLengthFromAPIResponse(BoxAPIResponse response) { + long length = response.getContentLength(); + try { + if (length == -1 && response.getHeaders().containsKey(X_ORIGINAL_CONTENT_LENGTH)) { + length = Integer.parseInt(response.getHeaders().get(X_ORIGINAL_CONTENT_LENGTH).get(0)); + } + } catch (NumberFormatException e) { + throw new RuntimeException( + "Invalid content length: " + response.getHeaders().get("X-Original-Content-Length" + ).get(0)); + } + + return length; + } + /** * Writes content of input stream to provided output. * @@ -127,7 +152,7 @@ static void writeStreamTo(InputStream input, OutputStream output, long expectedL } if (totalBytesRead != expectedLength) { throw new IOException("Stream ended prematurely. Expected " + expectedLength - + " bytes, but read " + totalBytesRead + " bytes."); + + " bytes, but read " + totalBytesRead + " bytes."); } } catch (IOException e) { throw new RuntimeException("Error during streaming: " + e.getMessage(), e); diff --git a/src/test/resources/sample-files/text.pdf b/src/test/resources/sample-files/text.pdf new file mode 100644 index 0000000000000000000000000000000000000000..fec4053950fe6a2d575bc5e80adacbc85b4bb85f GIT binary patch literal 11323 zcma)i1z20#)@=)gQna{3u;LOza4GH%E$#t=LvW{9akt{uqQ#246l*E&4uxXHrT7ay z=lu8Fd+z_<+sT*gHP+m7+06H4j5+C4B&1k@Y#>y+o{i&;-47=@BR##SAOHuz-oy%3 zP!PZ_1+{U8IKjVdjGZA85L0_I2!LGyVrSuO3E<)e1B8W9Va`qvV_Q`B$4LqDF%ulm zgAPuZclza+D-s4a9g)s^Arj9mOfS%*Oa#{Zto(#CGm&~87chh}DCwTLxSZkB>j6J) zNteIR6~CA<%1E^H@{yqSmh9*bUFa~kG-ICBU3H(xPe%0c^}}c-`LJ2lJz_n{ed}CF zabQtAqQjHQ`fl@VaNhddJ@p6j@ouDBwh1+s0qo4dG3;3yTI2^OEK>0oxSZqJlmON)IAoEO%^#W8py6*U7v2jP9N8qJi7S{n45xbH11PC9P+LmxS zjp@^~CaQMNtLSuaeLk!($Uc%h=3{fg|02VZ)Dz(JLdzi8P&TRRb;fS6qUemmqV%kC z>(0C{<%U&!|75}8sR@*owlq$=ny4IX7ThzHl!VD>IfrVto_>kmNO-p(1`4+A^|7TiD) zVg@x9vv&vRa=-yF9~i*P#i@@952ZiHd3f%x090|ZH&ur?19ahoNk{?MH6ZTJ0DSlaQ}mEUHkrI0a0vD*If-_j*s10++MrA%3u*u z&$z~Vl@M_!(%z!S2j#h}@qcd+n?QtQ|GKhWtOJdzihV`g+rX0A$iCxNaxIalLli~T zrXW}y=aDo5K|d^>&p-1qkNIF%{NtyO+L+^|W8HDY8dQrJ81XBgt!!o7FgTUtuP6*; zq-u5KpxwVQe4c?h^VlI>rk@UGj`%B#MI<0&{q6HkJQQ)U$ARQQ^$oGscy%Pp!N2>_0sa8B4Bsz#(%4NVcVu90^RZd(Dj+mK(Me}<&Z~E zLTC|AG2Gkg!!P#3|s_Kzi$FL_(&U6o;s=^P84M{2NF+R7w$EsHEje?5le6$;1VXeuw#^w z8Ix>hqg1o2Yw#^qw%>2lSVyLqA>qXcTcjdqm=(T&U=WY;;k=d%>C+f78=;jt>A)oJ zNh-s{DH19J!usbFU$Ghqa_h%+`>ikxrI;(D70!0+Kx$1!#VoNuarERh#yQPB(V;b> zbRsZTYQWYj@~nC*>41VoZAp}n6S8 zdg6h?$sq`z)4OsdRO1;yjM|BfiE4645F`jqVRUu&m;|3i*7Gb**JiwPNAFNaqi=sKG{m8MpP%#{{)r))>WF96L={g*(~QJL0T}r-Rn%>BqEgjEuV0Vc{_dUdYgWGdQ0eZ8XX*h)SZdl zD`O!;v=K?tYtZ|w7u;*w8ylV+ULIZ{R~G)VPy$jo$d>df=?jw(lX%Ho$$80LiEPPi ziM$3+33o}BdTYg%n4;p4+>kV@fnr&mmTmr=XzhvBqLuxG%LMsk;G{*-m&w;f2d_j5 zeWh=MLqb9_rDCO;q;h*vqmZLOBv@brkW1P}1vd2#y`C}%H?RVj4K1;pvvT6HfF@f_ zSyD{3l+9KY=0QH-mwg`BDbg&R%yW^qR?&SuuPE?JK-sJD+~6nLTr0mbxj?20d4N*z zH>FGve~h!EzV)J!e%ro9Q?v8%_W9Q6>E2m`=d9Un@rs->ov|J4%U4iRClpBT8jT*+*&j=JxXS zmPW;ph;UFTkSVSyZl>9Q_$dG8uZ97B5-!G)>(?_Q*S<5`5x8S`73+Y#8k z0MDDb_LeO#bO`nf&Uxu=FIi)tvB^hi4wIViVtY0wp z7;x$zTwt`*~no zcJI^Vb_r;5woiLa?}zsn$;IGxweQu#h4Ygg;JMm`v!dOaXWY1B^krEi!mC2_fthH_ z^WC?n_9Ct`BZ2#@`oAh9G0bMtQ@Dv z$GM?dX@B@;MV-oB#$86UzfdJ}?G0S9RBUFh-D9&@h$=zG&3ja4mAj2bt zfYd7)+B`VKtiJl3J*k0%?L^**^-yD3$phZcUa{&=u( z2779zT7893W@Ii3%cZzcpQCb%^A{iXJfMLE7X zegrdx55?VOo1A%J+3}QGd*OULpzXPzBXqV0lAhdJfunJ&HT3agv2T4w`%NGFV%x&6 zjZpkFX14NEy+&Jn+a$Bep~-;Ki?UGz^`_0yxnk?|sj4Z>@@v@b-Wj3Qi%Fa1oF>t> zTx-+Q-c3dB^jEf?_1d2rYh2}i2C%-@O}6!@)vP?5tIM&CnjflrZSQxT^s97Upv#SG za&~fZuiz)k+~w}aME#$JUVe)z_G*Uu_tSO*`yYj#d41z{2XpO#7n_|ZA=%!B%Xh@r z72deVim-h7^=L%orzKcY zKLj0mtxK&tr*o?l<9%>-yKkD@XBD$cY3Lz#Ch<88i(XzCGb=u8M@=}9EPByjd^>5DbPL^Q z4TxGt~)^Za4c=oAak}&yB#f{>1J?k)8md}OP$N@RxII?Hfmn^?urk7~zgw6LErxx=@ z3oG3z9_*n8nyuWvtv{kJ($DLQ?J2Q_yrli=k1CG*YTIHw{qDt=Lb2|VpJDwn`6tbM z;BgQ14^H`H#l^*pVGuLG17lMI=>J9F9;oJD4Dnw~6!yPy%LlsX0;hyWw`!5ptz$^cK4!>z9ySj^s^KWvj;p77OV{#8` z_@h(w;Syt3gxJ~Gv#LQXTx^UV)D<{im4LuZouCfR_D-mLzbD5oX$O~Epmy*HC>oop z+1nc1{R@$TI>DU9E#VJ?IN?0i_|FEAlLNs18fxZj`LIZC4o=huiv7EnNV{n`HQ_pkd0>%Z>*Kfd8Be6afaQ9kV7KTqag8~n$H zD;e9u>Hhz6AU4JpaJ>Tbhou-?tU%Re1%r73tRNmRfCtD60CI2v;WC6QJVcS@41gCtuBfZU?{oaUICucK|E={9E^uKE!U>-ReAEYV?t z2JJRIeTF@@$=>{dNsp(+|Lm6+K}8nj$Mgu4s(7R%7*m(aSm0$P(#-RZ~b1j&G|dXIfYiI3%M??o1D2inBeT;@9RnN7w# zR`ZqPDI3~~Q{>2-1gc2(zmxyURQtS!f~{<@>`bG!v%*?0xc__<1Bz=^v|dO*SOB~& z3gp7>aKpeZ&g5W`wz4PXe4;Tq`0A%Em9j~MHnVig>9pe*kw_F^a>kt6*PB$9qz!{a zsFhnz`s+-dB}AK)1`1KNep)U$qGMT^_}Fwr7efgBBd8%6`Q9e?h=T$Z@|B3k%jHv z#1P$tb%y>R8w*AM#wVZJe&yeJogOuIjmfKyaR3n|eAQIF!WF1~KvJNNz69=& zMLyu*RsA4Gf7R(H**TBCWc6nK%y*kN&=h^CEQE*XCa#l`h|GsmjK6DzBTyXOz+xoq zl>g`9yJ!3cwkiBXEuNJMp0(r#<=9wt*M)%)2M&h1>(xhrbuUoB(QyXGm6SEN(<>J; z)|7!M%qxYczcPt+Y!7vj*{hA%Z$diJ(6j104UmU_0_BGbJ1Ibd&ALfh zIDbLG8qKTp=u@eyRLG@7f*nR1s0p9O5SYaipA3gHD^%vG>)8cvh#vVOUjVgwjke=W zpio?vViE>c#*mVL%GAzQXyx0^E-ozxjD2Y6B{~za21hE-H0zKlM~YTqqg`ruPDbY< zy7@4dT+NWmtH(7<{>_z6DdgrPkHJknm>?nr3x^NPQcWO?_do`7j-<{+D+UvXQ1Ta` zhRheT^`O)VOx&kkL1G`cjCt{*Yh{C#h_Vk7dqG5@wT}2I{VYNolvZ(Ksc$4aQ?B|yWF^p)78Utvc zX#r+WU#_l@pv(Zw9!nxy;yh;_d0t+$s1SN{@6mtns5;^j((lOnvoOZ=8(b@$1(Zn| z-rllHV6GF1k`m_iZNKn*^rabC$E!3IXJ7RAPA1f}7_prdyLDzx#?{J@r|X9q9U1M1 z{`Bu;$-*Df{Flend*nrWJ-8P$#QibL$a^nHw)}BOluP?Tr;-H?AwgSe)vr4AQo|FM z36xEC=gkoB>S8VW)mUK9rl{`ysIO&&8Wrl$2nQ5cC;`%Olp*`C6M|jiN@buG9CE2K z>(SJ;^aBabbf6O)d>8AeJ^o+Pn23h>iaIC_W znV;{ULQ6Q(GP(|?W1$>q`+=}euTH~CSE!+QD2WM@--9OT=4JEOXrhk2n{-&FeXV2X zNQ{%TlrG{UWMFLGp)TxBb?OPJu|8cI&}uGXNLGxkK&_kUFIL@wc?S$fKRqMo7Oh8o z48tUjII~KwREtj}LShSJ18UgmtP6%#DQ6?vX`vu4syq@BE`1_NSJV@zkdqhws2-|^ z8Sgi2dVT6zXUb#6M5}7O9qK0$fzLc7>iT@Z^jLf!%S zW_^fPU$`!q6jhf_ncE&E4| ze3=F>Zr5F0p{$!=kMd7s%pnpsM0=m(furjFHUtW+M@gPLf-b@ME?Ee}Z}{;0>yu5T zA^K2MITgL?=gZ@AE#;kR(?cc@C}Z|w&-MyEZ_7tKMwXp?|Hi4rv4}uw9C}Y3NuM9Ap~oail$sgXh21{j_6T1RI4YLGm*v3h;2Q&Gk6oNMF9 zn)sD!8hU=%mKT@8CUePr&Yo}Is3Ovd*rA+*gHS#M$b8QiVGuJ(y=-AyS~9>GV zd@DaL9FSIOEtDmSNO9jlXL<^-24Y!r>ED zjEY+UBOzbSVC!l@8|PPOsX!Q~op76TL*T$pBkO4jO}vW%J?6H?5{Y_gcxAz6^H|T$=|W7P9r2y8!xI+cx^}Kb%M-T zkuwgZ5a>UzPH3(eKXP6>+bu1?JAYDY98+1oP))@y?f7j#i`yx#AySVoQrzLGmbiq+ z)7G%M=y!)12Z(P&%8n?vT>!4D&Uh4CnJ)zAyxI?ShM9`3*>$Vy>wVZi_#tn@n3=FG zMrdtlNF{edG&nV+L>;Kq*j6|TV4N1wX!Kr81s|>T=iJVCgz@Qf4rBo$Xwl&u&&>0e z$q%Nskv^})#JK$+iQIRO<2R@i*+sZ!O%ku|}Rp*FOp=v%+d7>jVSVWU{ zb@c_)OGD5_Jyienj50QM_5C)qn==GcCU=0#bjx-ds_NkJYJRVGQB#+&b$+02SKIw+ zX~Uk~a;f?F=<@+t0XC)bL9A=55NGpwn+KZrw+T1Dz|%;vu9}Q+)kXb(BQ9 zTY4PbAPFDDDFmQOv*WX?c7LUueUvk8!k?@i+w3;l)oOZD6Qo+%+NYsO0OAzzGW%Ng z*`d~=)1u->HQebOUvz5uH-XqmS}_Aq)qdX(V@4-aUIr1?4dvZa zdyzRxz+RhJMGp#9*GLMR*009O^5XiRA;M0pPFT{5p0Yz4AltMCk^mz;fbhyQcf?y= z88lmL9dy=SeO1;`)^EwF$yzXN$`zEO!^qVVc5&PnrdyS9T;jUubf#Qu!w>>~soUe% zSgcET)V9{7mrg?`_WJjI_0G4+vzcx~_Uy$X{8s+bGVS)-8`bJhkrj7*+}%DeRvcJ~ z;MMz{Rp#af1r@80*9Ad*$RVtv!}bypcqqO6z+?N$EmE?fWX$PkB9odZ*gG@6Ishg_ zUVe9VXFh7K2Qr90l8e}d`sPOMTdHC$Dw$8HvSR4_sM_7#ovCTNy<0BFz8h0RJwb!f z3^LSbj0c%Q(=J!tHC=(~@-yjs_Ink3Tajx+f;TzeIgZz*_e^EvIHoixN1=3Qz)V#P zBij+TmoS?G3|3#!va{njF1vAF=DlOO-6Z+PA|{1ti0o@Hbr-()JijjX_tg}x(O1R1 z8v_Hux`V#3oZH~zpQfCl=(#jSX~KjRcjz45^G9iTc~cCj702@TNQ$#WBwM*G!Ss%NJyzcPfn}2=QvB*rJ_}9OuP2* zP=AkKKwikO{ld0%`zWfGeJdy=?byOTvfmr2YOo+~LLzW4-O0w8qWMsGr_@tI&RI%}YZ%l!Bog{i~g3k*&26_jmd^J{fyL%(*+JyLpQ=gkU zHRGHbmBvJl@igRk8i-!W+adY3IP~lk_ETADVX~w&ODMuzHF(Ez>NG6|#;3B3Es_#qXfjgFgSf;z zh+!294Glo$OnaW25{2a2cJZk^CCZ$Pm2xe_A()4fU`%s};qAvd$qOMpO4P!A?YkRT z*WJ>yt~=Y9wqx%DfbMuCi~_`UF+Ou@;eX3p-&-p1~=7JL(HwTY}c2Sl*N}UIh$_EeG=ODkh2A zExQ!@(t6z&7qBIY^n)6MtQo6!+*O*HuCHZD!&*oA@;=e)M)27zL|a%GHe@VDNs(&? zHD|6g_!GqdChW%M;{FzlP-X3zJY%?78djh>AL>l~@FIIHg@g;KBcD}WP3 zr1>QIq?u8`sH&)sy}nV7QFfH9tsb^9L{(vJ zKq2i75?=1SNUh9St?DD;7CE2Uj950fJzkqEJv^GdX!@49>v^!Jbk24S5*St*rETGN z7&rSkPHxl0i}7Qlj$dfz;C=Z%2hVu70|VyIQ{Ewy`;Lg!yY}tg8*ePA)A?nE?#*0D z9k<;_k^8lo8}qBPkV@tG%Ek-D*%2+lz}ol@e1U;%jLjf~s+Pqqv_8u%B~u zw(LWUGfX<9Bf~eM)C;gxkHcJBLa~-h0e*?VwMe28NGsE)6KI9am?t?!J^3o63V`** zZjA_^^&{z}$|~A5cJuVqh7bq(D|#&7`|>4qlgma;>+9uSU|s>*nWU62PXC~hN`zbf zb=RV0q!(;`L5X$ECDwC}ac-A9^Bc0J&7qr7KPe%m$AQDZrkG^i#V{oi_s{yEmRGL9 z(#Um|tfyC^Go?uFg|~iWD}y5F4(d-RmND0le$Kmn!h2njb~YHKnBKh1y41dv6*x|z z<0qMNe>yx&k>05Lbs;4~u+V+&Nj9IW#o4}Vb_ETMsR2~6#3*{X`aGrm%&PjM_@2v{ zdIO<)pi)giO^0vZn0V7Ia;9pG1=p+|Zwdm@) z@}2#cbmOo7-ML;VLTb}f$yWr11G0iEERY7Z=#ov*)riT0 zfbq1bqZ+a9$h;{E`A1ssm>==c4&WFt%bHLPydHTW0Rwqal^70A~NWZ+_{hOLuV0SDXCipAf2U2H- z7=y!Y#=BgtHJiG`kS49SwJxL2b*~7>C2)I=m)JeUPhFB}A_+7044RVGzuEH~HXfa+OG+q4MwM-Ijp0~#*1|{6J(^t?U#_)#Z;4cA6s0IszBowzio!h z*h!ky%w>Xi4!ID!_j$-h(>B z^=bxVMtE++$==Mx6kg)VU~6outPXeuae_hZ?EpY98wWs-LCgheV+P>l5(R<4dW-;7 z7h@-9PXN3U?{`t3yE7vyyEwf36P~pIFg%pzNt!{O;a-1Za5oYUC3OrE{G1$|TtFZv zHxLBm1p;+AIOySTxSgWC+5g?-Z_>8A6T}=Y_5acgct~2oD=KB}%