Skip to content

Commit

Permalink
fix: try to make metadata grabber work on ARM
Browse files Browse the repository at this point in the history
  • Loading branch information
bayang committed May 20, 2022
1 parent bd42faa commit 8cea20f
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,14 @@ class FetchMetadataService(
val process: Process = builder.start()
val exitVal = process.waitFor()
if (exitVal == 0) {
val output: String = process.inputStream.bufferedReader().readText()
var output: String = process.inputStream.bufferedReader().readText()
// on ARM the fetch-ebook-metadata binary outputs a python byte string instead of a regular string
// cf test case for a sample string
// so we try to clean it ourselves... This is ugly
if (! output.startsWith('<')) {
logger.trace { "fetch metadata output is not regular xml : $output" }
output = cleanXml(output)
}
logger.trace { "fetch metadata output $output" }
val parseOpf: MetadataDto = parseOpf(output)
if (!isbn.isNullOrBlank()) {
Expand All @@ -105,6 +112,32 @@ class FetchMetadataService(
return MetadataDto()
}

fun removeTrailingAndLeadingChars(output: String): String {
if (output.isNullOrBlank()) {
return output
}
var startIndex = 0
var endIndex = output.length - 1
while (output[startIndex] != '<') {
startIndex ++
}
while (output[endIndex] != '>') {
endIndex --
}
return output.substring(startIndex, endIndex + 1)
}

fun cleanXml(input: String): String {
var trimmed = removeTrailingAndLeadingChars(input)
if (trimmed.contains("\\'")) {
trimmed = trimmed.replace("\\'", "'")
}
if (trimmed.contains("\\n")) {
trimmed = trimmed.replace("\\n", "")
}
return trimmed
}

fun parseOpf(input: String): MetadataDto {
val stream = BufferedInputStream(ByteArrayInputStream(input.toByteArray(Charsets.UTF_8)))
val root: SMHierarchicCursor = factory.rootElementCursor(stream)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,22 @@ class FetchMetadataServiceTest(@Autowired private val fetchMetadataService: Fetc
Assertions.assertEquals("9782361831523", metadata.isbn13)
Assertions.assertEquals(2, metadata.authors.size)
}

@Test
fun testParseOpfExtraCharactersAroundXml() {
var input = """
|b'<?xml version=\'1.0\' encoding=\'utf-8\'?>\n<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="uuid_id" version="2.0">\n <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n <dc:identifier opf:scheme="calibre" id="calibre_id">daabc6db-820e-43d8-bf8c-2b89bcf07460</dc:identifier>\n <dc:identifier opf:scheme="uuid" id="uuid_id">dd53ac75-d6c3-4428-ab42-f054a5358a36</dc:identifier>\n <dc:title>The Fellowship of the Ring</dc:title>\n <dc:creator opf:file-as="Unknown" opf:role="aut">J. R. R. Tolkien</dc:creator>\n <dc:contributor opf:file-as="calibre" opf:role="bkp">calibre (4.99.4) [https://calibre-ebook.com]</dc:contributor>\n <dc:date>1986-08-15T08:20:07.274919+00:00</dc:date>\n <dc:description>One Ring to rule them all, One Ring to find them, One Ring to bring them all and in the darkeness bind them. In ancient times the Rings of Power were crafted by the Elven-smiths, and Sauron, The Dark Lord, forged the One Ring, filling it with his own power so that he could rule all others. But the One Ring was taken from him, and though he sought it throughout Middle-earth, it remained lost to him. After many ages it fell into the hands of Bilbo Baggins, as told in The Hobbit. In a sleepy village in the Shire, young Frodo Baggins finds himself faced with an immense task, as his elderly cousin Bilbo entrusts the Ring to his care. Frodo must leave his home and make a perilous journey across Middle-earth to the Cracks of Doom, there to destroy the Ring and foil the Dark Lord in his evil purpose.</dc:description>\n <dc:publisher>Ballantine Books</dc:publisher>\n <dc:identifier opf:scheme="GOOGLE">3flBjgEACAAJ</dc:identifier>\n <dc:identifier opf:scheme="ISBN">9780808520764</dc:identifier>\n <dc:identifier opf:scheme="AMAZON">0345339703</dc:identifier>\n <dc:language>eng</dc:language>\n <dc:subject>Fiction</dc:subject>\n <dc:subject>Classics</dc:subject>\n <dc:subject>Fantasy</dc:subject>\n <dc:subject>Epic</dc:subject>\n <dc:subject>Juvenile Fiction</dc:subject>\n <dc:subject>Fantasy &amp; Magic</dc:subject>\n <meta name="calibre:author_link_map" content="{}"/>\n <meta name="calibre:rating" content="5"/>\n </metadata>\n <guide/>\n</package>\n'
""".trimMargin()
if (! input.startsWith('<')) {
input = fetchMetadataService.cleanXml(input)
}
val metadata = fetchMetadataService.parseOpf(input)
Assertions.assertEquals("3flBjgEACAAJ", metadata.googleId)
Assertions.assertEquals("0345339703", metadata.amazonId)
Assertions.assertNull(metadata.goodreadsId)
Assertions.assertEquals("9780808520764", metadata.isbn13)
Assertions.assertEquals("The Fellowship of the Ring", metadata.title)
Assertions.assertEquals(1, metadata.authors.size)
Assertions.assertEquals("J. R. R. Tolkien", metadata.authors.first())
}
}

0 comments on commit 8cea20f

Please sign in to comment.