diff --git a/src/intTest/java/com/box/sdk/BoxFileIT.java b/src/intTest/java/com/box/sdk/BoxFileIT.java index b4e675d55..6fa76e409 100644 --- a/src/intTest/java/com/box/sdk/BoxFileIT.java +++ b/src/intTest/java/com/box/sdk/BoxFileIT.java @@ -146,6 +146,27 @@ public void getRepresentationContentSucceeds() throws InterruptedException { } } + @Test + public void getRepresentationContentWithExtractedTextSucceeds() throws InterruptedException { + BoxAPIConnection api = jwtApiForServiceAccount(); + String fileName = "text.pdf"; + BoxFile file = null; + try { + file = uploadSampleFileToUniqueFolder(api, fileName); + final String fileId = file.getID(); + String representationHint = "[extracted_text]"; + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + Retry.retry(() -> { + new BoxFile(api, fileId).getRepresentationContent(representationHint, outputStream); + byte[] downloadedRepresentationContent = outputStream.toByteArray(); + String text = new String(downloadedRepresentationContent, StandardCharsets.UTF_8); + assertTrue(text.contains("Lorem ipsum")); + }, 5, 100); + } finally { + deleteFile(file); + } + } + @Test public void uploadFileStreamSucceeds() { BoxAPIConnection api = jwtApiForServiceAccount(); @@ -156,7 +177,7 @@ public void uploadFileStreamSucceeds() { BoxFile uploadedFile = null; try { - InputStream uploadStream = new ByteArrayInputStream(fileContent); + InputStream uploadStream = new ByteArrayInputStream(fileContent); BoxFile.Info uploadedFileInfo = folder.uploadFile(uploadStream, BoxFileIT.generateString()); uploadedFile = uploadedFileInfo.getResource(); @@ -552,11 +573,11 @@ public void canPaginateOverListOfVersions() { byte[] fileBytes = "Version 2".getBytes(StandardCharsets.UTF_8); uploadedFile.uploadNewVersion( - new ByteArrayInputStream(fileBytes), null, fileBytes.length, mock(ProgressListener.class)); + new ByteArrayInputStream(fileBytes), null, fileBytes.length, mock(ProgressListener.class)); fileBytes = "Version 3".getBytes(StandardCharsets.UTF_8); uploadedFile.uploadNewVersion( - new ByteArrayInputStream(fileBytes), null, fileBytes.length, mock(ProgressListener.class)); + new ByteArrayInputStream(fileBytes), null, fileBytes.length, mock(ProgressListener.class)); Collection versionsPart1 = uploadedFile.getVersionsRange(0, 1); assertThat(versionsPart1.size(), is(1)); diff --git a/src/main/java/com/box/sdk/BinaryBodyUtils.java b/src/main/java/com/box/sdk/BinaryBodyUtils.java index 002b3b5a6..da4161f5a 100644 --- a/src/main/java/com/box/sdk/BinaryBodyUtils.java +++ b/src/main/java/com/box/sdk/BinaryBodyUtils.java @@ -9,6 +9,7 @@ */ final class BinaryBodyUtils { private static final int BUFFER_SIZE = 8192; + private static final String X_ORIGINAL_CONTENT_LENGTH = "X-Original-Content-Length"; private BinaryBodyUtils() { // utility class has no public constructor @@ -73,12 +74,36 @@ static void writeStreamWithContentLength(BoxAPIResponse response, OutputStream o } else { input = response.getBody(); } - writeStreamTo(input, output, response.getContentLength()); + writeStreamTo(input, output, getContentLengthFromAPIResponse(response)); } finally { response.close(); } } + /** + * Get the content length from the API response. + * In some cases, the Content-Length is not provided in the response headers. + * This could happen when getting the content representation for a compressed data. + * In that case the API will switch to chunk mode and provide the length in the "X-Original-Content-Length" header. + * + * @param response API response. + * @return Content length. + */ + private static long getContentLengthFromAPIResponse(BoxAPIResponse response) { + long length = response.getContentLength(); + try { + if (length == -1 && response.getHeaders().containsKey(X_ORIGINAL_CONTENT_LENGTH)) { + length = Integer.parseInt(response.getHeaders().get(X_ORIGINAL_CONTENT_LENGTH).get(0)); + } + } catch (NumberFormatException e) { + throw new RuntimeException( + "Invalid content length: " + response.getHeaders().get("X-Original-Content-Length" + ).get(0)); + } + + return length; + } + /** * Writes content of input stream to provided output. * @@ -127,7 +152,7 @@ static void writeStreamTo(InputStream input, OutputStream output, long expectedL } if (totalBytesRead != expectedLength) { throw new IOException("Stream ended prematurely. Expected " + expectedLength - + " bytes, but read " + totalBytesRead + " bytes."); + + " bytes, but read " + totalBytesRead + " bytes."); } } catch (IOException e) { throw new RuntimeException("Error during streaming: " + e.getMessage(), e); diff --git a/src/test/resources/sample-files/text.pdf b/src/test/resources/sample-files/text.pdf new file mode 100644 index 000000000..fec405395 Binary files /dev/null and b/src/test/resources/sample-files/text.pdf differ