From dbde592476c0f66ecacf93559a03bc3753daa457 Mon Sep 17 00:00:00 2001 From: "Hannes R. Brunsch" Date: Wed, 10 Jul 2024 13:44:56 +0200 Subject: [PATCH] feat: treat paragraph ends as new lines --- src/main/java/eu/snik/tag/DocxLoader.java | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/main/java/eu/snik/tag/DocxLoader.java b/src/main/java/eu/snik/tag/DocxLoader.java index 9ba9ac13..6093ea56 100644 --- a/src/main/java/eu/snik/tag/DocxLoader.java +++ b/src/main/java/eu/snik/tag/DocxLoader.java @@ -3,6 +3,7 @@ import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.LinkedHashSet; @@ -35,16 +36,21 @@ public DocxLoader(InputStream in) throws IOException { } /** from https://stackoverflow.com/questions/19676282/docx4j-find-and-replace */ - static List getAllElementsFromObject(Object obj, Class toSearch) { + static List getAllElementsFromObject(Object obj, Class... toSearch) { List result = new ArrayList(); if (obj instanceof JAXBElement) obj = ((JAXBElement) obj).getValue(); - if (obj.getClass().equals(toSearch)) result.add(obj); else if (obj instanceof ContentAccessor) { + if (obj instanceof ContentAccessor) { List children = ((ContentAccessor) obj).getContent(); for (Object child : children) { result.addAll(getAllElementsFromObject(child, toSearch)); } } + + if (Arrays.asList(toSearch).contains(obj.getClass())) { + result.add(obj); + } + return result; } @@ -57,10 +63,14 @@ public String getText() { var doc = wordMLPackage.getMainDocumentPart(); var parts = new ArrayList(); - List texts = getAllElementsFromObject(doc, org.docx4j.wml.Text.class); + List texts = getAllElementsFromObject(doc, org.docx4j.wml.Text.class, org.docx4j.wml.P.class); for (Object t : texts) { - org.docx4j.wml.Text content = (org.docx4j.wml.Text) t; - parts.add(content.getValue()); + if(t instanceof org.docx4j.wml.P) { + parts.add("\n\n"); + } else { + org.docx4j.wml.Text content = (org.docx4j.wml.Text) t; + parts.add(content.getValue()); + } } return parts .stream() @@ -81,7 +91,7 @@ public String getText() { return a + (b.startsWith(" ") ? b : (" " + b)); } - return a + '\n' + b; + return a + b; } ) .get();