Skip to content

Commit

Permalink
Merge pull request #221 from jelovirt/feature/add-tests
Browse files Browse the repository at this point in the history
Add BOM support for Reader input
  • Loading branch information
jelovirt authored May 9, 2024
2 parents 831acd4 + 86ac549 commit f3b40d1
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 30 deletions.
32 changes: 25 additions & 7 deletions src/main/java/com/elovirta/dita/markdown/MarkdownReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.CharBuffer;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand Down Expand Up @@ -351,18 +352,19 @@ Map.Entry<URI, Locator> getSchema(char[] data, InputSource input) throws SAXPars
@VisibleForTesting
char[] getMarkdownContent(final InputSource input) throws IOException {
final CharArrayWriter out = new CharArrayWriter();
final String encoding = input.getEncoding() != null ? input.getEncoding() : StandardCharsets.UTF_8.name();
final boolean isUtf8 = "UTF-8".equalsIgnoreCase(encoding);
if (input.getByteStream() != null) {
final String encoding = input.getEncoding() != null ? input.getEncoding() : "UTF-8";
try (
BufferedInputStream is = "UTF-8".equalsIgnoreCase(encoding)
BufferedInputStream is = isUtf8
? consumeBOM(input.getByteStream())
: new BufferedInputStream(input.getByteStream());
Reader in = new InputStreamReader(is, encoding)
) {
copy(in, out);
}
} else if (input.getCharacterStream() != null) {
try (Reader in = input.getCharacterStream()) {
try (Reader in = isUtf8 ? consumeBOM(input.getCharacterStream()) : input.getCharacterStream()) {
copy(in, out);
}
} else if (input.getSystemId() != null) {
Expand All @@ -372,11 +374,8 @@ char[] getMarkdownContent(final InputSource input) throws IOException {
} catch (final URISyntaxException e) {
throw new IllegalArgumentException(e);
}
final String encoding = input.getEncoding() != null ? input.getEncoding() : "UTF-8";
try (
BufferedInputStream is = "UTF-8".equalsIgnoreCase(encoding)
? consumeBOM(inUrl.openStream())
: new BufferedInputStream(inUrl.openStream());
BufferedInputStream is = isUtf8 ? consumeBOM(inUrl.openStream()) : new BufferedInputStream(inUrl.openStream());
Reader in = new InputStreamReader(is, encoding)
) {
copy(in, out);
Expand All @@ -403,4 +402,23 @@ private BufferedInputStream consumeBOM(final InputStream in) throws IOException
}
return bin;
}

/**
* Returns a reader that skips the BOM if present.
*
* @param in the original reader
* @return a reader without a possible BOM
*/
private BufferedReader consumeBOM(final Reader in) throws IOException {
final BufferedReader bin = new BufferedReader(in);
bin.mark(1);
try {
if (bin.read() != '\uFEFF') {
bin.reset();
}
} catch (final IOException e) {
bin.reset();
}
return bin;
}
}
18 changes: 12 additions & 6 deletions src/test/java/com/elovirta/dita/markdown/MDitaReaderCoreTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
import static org.junit.jupiter.api.Assertions.*;

import com.elovirta.dita.utils.AbstractReaderTest;
import java.io.InputStream;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.opentest4j.AssertionFailedError;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

public class MDitaReaderCoreTest extends AbstractReaderTest {
Expand Down Expand Up @@ -81,11 +83,15 @@ public void test_unsupported(String file) {
@ParameterizedTest
@ValueSource(strings = { "header.md", "invalid_header.md", "invalid_header_third.md" })
public void test_fail(String file) {
try {
run(file);
fail();
} catch (Exception e) {
assertEquals(SAXException.class, e.getCause().getClass());
}
assertThrows(
SAXException.class,
() -> {
final String input = "/" + getSrc() + file;
try (final InputStream in = getClass().getResourceAsStream(input)) {
final InputSource i = new InputSource(in);
reader.parse(i);
}
}
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@

import com.elovirta.dita.utils.AbstractReaderTest;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

public class MDitaReaderExtendedTest extends AbstractReaderTest {
Expand Down Expand Up @@ -80,12 +82,16 @@ public void test(String file) throws Exception {
@ParameterizedTest
@ValueSource(strings = { "header.md", "invalid_header.md", "invalid_header_third.md" })
public void test_fail(String file) {
try {
run(file);
fail();
} catch (Exception e) {
assertEquals(SAXException.class, e.getCause().getClass());
}
assertThrows(
SAXException.class,
() -> {
final String input = "/" + getSrc() + file;
try (final InputStream in = getClass().getResourceAsStream(input)) {
final InputSource i = new InputSource(in);
reader.parse(i);
}
}
);
}

@Test
Expand Down
56 changes: 45 additions & 11 deletions src/test/java/com/elovirta/dita/markdown/MarkdownReaderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,16 @@
import com.vladsch.flexmark.util.data.MutableDataSet;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URI;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Map;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;
import org.junit.jupiter.params.provider.ValueSource;
import org.xml.sax.*;
import org.xml.sax.helpers.XMLFilterImpl;
Expand Down Expand Up @@ -183,21 +187,51 @@ public void test_schemaParseFailure_withoutErrorHandler() throws Exception {
@ParameterizedTest
@ValueSource(strings = { "invalid_header.md" })
public void test_fail(String file) {
try {
run(file);
fail();
} catch (Exception e) {
assertEquals(SAXException.class, e.getCause().getClass());
}
assertThrows(
SAXException.class,
() -> {
final String input = "/" + getSrc() + file;
try (final InputStream in = getClass().getResourceAsStream(input)) {
final InputSource i = new InputSource(in);
reader.parse(i);
}
}
);
}

@Test
public void getMarkdownContent_url() throws Exception {
final String input = getSrc() + "testBOM.md";
@ParameterizedTest
@CsvSource({ "markdown/testBOM.md, UTF-8", "markdown/testNoBOM.md, UTF-8", "markdown/testNoBOM.md, ISO-8859-1" })
public void getMarkdownContent_url(String input, String encoding) throws Exception {
final URL in = getClass().getResource("/" + input);
final InputSource i = new InputSource(in.toString());
final char[] content = new MarkdownReader().getMarkdownContent(i);
assertEquals('W', content[0]);
i.setEncoding(encoding);
final char[] act = new MarkdownReader().getMarkdownContent(i);
assertEquals('W', act[0]);
}

@ParameterizedTest
@CsvSource({ "/markdown/testBOM.md, UTF-8", "/markdown/testNoBOM.md, UTF-8", "/markdown/testNoBOM.md, ISO-8859-1" })
public void getMarkdownContent_byteStream(String input, String encoding) throws Exception {
try (InputStream in = getClass().getResourceAsStream(input)) {
final InputSource i = new InputSource(in);
i.setEncoding(encoding);
final char[] act = new MarkdownReader().getMarkdownContent(i);
assertEquals('W', act[0]);
}
}

@ParameterizedTest
@CsvSource({ "/markdown/testBOM.md, UTF-8", "/markdown/testNoBOM.md, UTF-8", "/markdown/testNoBOM.md, ISO-8859-1" })
public void getMarkdownContent_characterStream(String input, String encoding) throws Exception {
try (
InputStream in = getClass().getResourceAsStream(input);
Reader r = new InputStreamReader(in, StandardCharsets.UTF_8)
) {
final InputSource i = new InputSource(r);
i.setEncoding(encoding);
final char[] act = new MarkdownReader().getMarkdownContent(i);
assertEquals('W', act[0]);
}
}

@Test
Expand Down

0 comments on commit f3b40d1

Please sign in to comment.