Skip to content

Commit

Permalink
feat: treat paragraph ends as new lines
Browse files Browse the repository at this point in the history
  • Loading branch information
Yagnap committed Jul 10, 2024
1 parent 7ec9a8e commit dbde592
Showing 1 changed file with 16 additions and 6 deletions.
22 changes: 16 additions & 6 deletions src/main/java/eu/snik/tag/DocxLoader.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.LinkedHashSet;
Expand Down Expand Up @@ -35,16 +36,21 @@ public DocxLoader(InputStream in) throws IOException {
}

/** from https://stackoverflow.com/questions/19676282/docx4j-find-and-replace */
static List<Object> getAllElementsFromObject(Object obj, Class<?> toSearch) {
static List<Object> getAllElementsFromObject(Object obj, Class<?>... toSearch) {
List<Object> result = new ArrayList<Object>();
if (obj instanceof JAXBElement) obj = ((JAXBElement<?>) obj).getValue();

if (obj.getClass().equals(toSearch)) result.add(obj); else if (obj instanceof ContentAccessor) {
if (obj instanceof ContentAccessor) {
List<?> children = ((ContentAccessor) obj).getContent();
for (Object child : children) {
result.addAll(getAllElementsFromObject(child, toSearch));
}
}

if (Arrays.asList(toSearch).contains(obj.getClass())) {
result.add(obj);
}

return result;
}

Expand All @@ -57,10 +63,14 @@ public String getText() {
var doc = wordMLPackage.getMainDocumentPart();
var parts = new ArrayList<String>();

List<Object> texts = getAllElementsFromObject(doc, org.docx4j.wml.Text.class);
List<Object> texts = getAllElementsFromObject(doc, org.docx4j.wml.Text.class, org.docx4j.wml.P.class);
for (Object t : texts) {
org.docx4j.wml.Text content = (org.docx4j.wml.Text) t;
parts.add(content.getValue());
if(t instanceof org.docx4j.wml.P) {
parts.add("\n\n");
} else {
org.docx4j.wml.Text content = (org.docx4j.wml.Text) t;
parts.add(content.getValue());
}
}
return parts
.stream()
Expand All @@ -81,7 +91,7 @@ public String getText() {
return a + (b.startsWith(" ") ? b : (" " + b));
}

return a + '\n' + b;
return a + b;
}
)
.get();
Expand Down

0 comments on commit dbde592

Please sign in to comment.