From 8cea20fb770926677f03ef8aa4933edf3a8f2e6c Mon Sep 17 00:00:00 2001 From: bayang Date: Fri, 20 May 2022 09:28:22 +0200 Subject: [PATCH] fix: try to make metadata grabber work on ARM --- .../service/metadata/FetchMetadataService.kt | 35 ++++++++++++++++++- .../metadata/FetchMetadataServiceTest.kt | 18 ++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/src/main/kotlin/io/github/bayang/jelu/service/metadata/FetchMetadataService.kt b/src/main/kotlin/io/github/bayang/jelu/service/metadata/FetchMetadataService.kt index 3c579ce3..39fb8f7b 100644 --- a/src/main/kotlin/io/github/bayang/jelu/service/metadata/FetchMetadataService.kt +++ b/src/main/kotlin/io/github/bayang/jelu/service/metadata/FetchMetadataService.kt @@ -82,7 +82,14 @@ class FetchMetadataService( val process: Process = builder.start() val exitVal = process.waitFor() if (exitVal == 0) { - val output: String = process.inputStream.bufferedReader().readText() + var output: String = process.inputStream.bufferedReader().readText() + // on ARM the fetch-ebook-metadata binary outputs a python byte string instead of a regular string + // cf test case for a sample string + // so we try to clean it ourselves... This is ugly + if (! output.startsWith('<')) { + logger.trace { "fetch metadata output is not regular xml : $output" } + output = cleanXml(output) + } logger.trace { "fetch metadata output $output" } val parseOpf: MetadataDto = parseOpf(output) if (!isbn.isNullOrBlank()) { @@ -105,6 +112,32 @@ class FetchMetadataService( return MetadataDto() } + fun removeTrailingAndLeadingChars(output: String): String { + if (output.isNullOrBlank()) { + return output + } + var startIndex = 0 + var endIndex = output.length - 1 + while (output[startIndex] != '<') { + startIndex ++ + } + while (output[endIndex] != '>') { + endIndex -- + } + return output.substring(startIndex, endIndex + 1) + } + + fun cleanXml(input: String): String { + var trimmed = removeTrailingAndLeadingChars(input) + if (trimmed.contains("\\'")) { + trimmed = trimmed.replace("\\'", "'") + } + if (trimmed.contains("\\n")) { + trimmed = trimmed.replace("\\n", "") + } + return trimmed + } + fun parseOpf(input: String): MetadataDto { val stream = BufferedInputStream(ByteArrayInputStream(input.toByteArray(Charsets.UTF_8))) val root: SMHierarchicCursor = factory.rootElementCursor(stream) diff --git a/src/test/kotlin/io/github/bayang/jelu/service/metadata/FetchMetadataServiceTest.kt b/src/test/kotlin/io/github/bayang/jelu/service/metadata/FetchMetadataServiceTest.kt index 32bb70a2..5ead5593 100644 --- a/src/test/kotlin/io/github/bayang/jelu/service/metadata/FetchMetadataServiceTest.kt +++ b/src/test/kotlin/io/github/bayang/jelu/service/metadata/FetchMetadataServiceTest.kt @@ -70,4 +70,22 @@ class FetchMetadataServiceTest(@Autowired private val fetchMetadataService: Fetc Assertions.assertEquals("9782361831523", metadata.isbn13) Assertions.assertEquals(2, metadata.authors.size) } + + @Test + fun testParseOpfExtraCharactersAroundXml() { + var input = """ + |b'\n\n \n daabc6db-820e-43d8-bf8c-2b89bcf07460\n dd53ac75-d6c3-4428-ab42-f054a5358a36\n The Fellowship of the Ring\n J. R. R. Tolkien\n calibre (4.99.4) [https://calibre-ebook.com]\n 1986-08-15T08:20:07.274919+00:00\n One Ring to rule them all, One Ring to find them, One Ring to bring them all and in the darkeness bind them. In ancient times the Rings of Power were crafted by the Elven-smiths, and Sauron, The Dark Lord, forged the One Ring, filling it with his own power so that he could rule all others. But the One Ring was taken from him, and though he sought it throughout Middle-earth, it remained lost to him. After many ages it fell into the hands of Bilbo Baggins, as told in The Hobbit. In a sleepy village in the Shire, young Frodo Baggins finds himself faced with an immense task, as his elderly cousin Bilbo entrusts the Ring to his care. Frodo must leave his home and make a perilous journey across Middle-earth to the Cracks of Doom, there to destroy the Ring and foil the Dark Lord in his evil purpose.\n Ballantine Books\n 3flBjgEACAAJ\n 9780808520764\n 0345339703\n eng\n Fiction\n Classics\n Fantasy\n Epic\n Juvenile Fiction\n Fantasy & Magic\n \n \n \n \n\n' + """.trimMargin() + if (! input.startsWith('<')) { + input = fetchMetadataService.cleanXml(input) + } + val metadata = fetchMetadataService.parseOpf(input) + Assertions.assertEquals("3flBjgEACAAJ", metadata.googleId) + Assertions.assertEquals("0345339703", metadata.amazonId) + Assertions.assertNull(metadata.goodreadsId) + Assertions.assertEquals("9780808520764", metadata.isbn13) + Assertions.assertEquals("The Fellowship of the Ring", metadata.title) + Assertions.assertEquals(1, metadata.authors.size) + Assertions.assertEquals("J. R. R. Tolkien", metadata.authors.first()) + } }