From 84bcfffa846b441020cbb4d22d74a64e83f9b89c Mon Sep 17 00:00:00 2001 From: Carl Wilson Date: Mon, 28 Oct 2024 14:22:00 +0000 Subject: [PATCH] FIX: Module reporting XML schema errors Fixed minor issues in the XmlHandler, JPEG2000 and HTML modules that meant reporting output was not valid against the schema. - replaced the `XmlHandler:cleanURIString()` implementation with something less radical that doesn't produce invalid XML in testing; - fixed badly formatted dates in the HTML module document details; - fixed bug in the JPEG2000 module where empty `CompositeLayerHeader` lists were created, these fail report schema validation; - bumped the JPEG2000 and HTML module versions and release dates; - added fixes to test scripts for the above; and - removed some commented out code from XML reporting module. --- jhove-bbt/scripts/create-1.33-target.sh | 31 ++++ .../hul/ois/jhove/handler/XmlHandler.java | 78 +++++----- .../hul/ois/jhove/module/HtmlModule.java | 8 +- .../hul/ois/jhove/module/Jpeg2000Module.java | 4 +- .../module/jpeg2000/ComposLayerHdrBox.java | 135 ++++++++---------- 5 files changed, 133 insertions(+), 123 deletions(-) diff --git a/jhove-bbt/scripts/create-1.33-target.sh b/jhove-bbt/scripts/create-1.33-target.sh index 0191c69e3..50acffc85 100755 --- a/jhove-bbt/scripts/create-1.33-target.sh +++ b/jhove-bbt/scripts/create-1.33-target.sh @@ -56,3 +56,34 @@ echo "TEST BASELINE: Creating baseline" echo " - copying ${baselineRoot} baseline to ${targetRoot}" cp -R "${baselineRoot}" "${targetRoot}" +# Update release details for HTML module +find "${targetRoot}" -type f -name "*.html.jhove.xml" -exec sed -i 's/HTML-hul<\/reportingModule>/HTML-hul<\/reportingModule>/' {} \; +find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/HTML-hul<\/module>/HTML-hul<\/module>/' {} \; +find "${targetRoot}" -type f -name "audit-HTML-hul.jhove.xml" -exec sed -i 's/1.4.4<\/release>/1.4.5<\/release>/' {} \; +find "${targetRoot}" -type f -name "audit-HTML-hul.jhove.xml" -exec sed -i 's/2024-08-22/2024-11-27/' {} \; +find "${targetRoot}" -type f -name "audit-HTML-hul.jhove.xml" -exec sed -i 's/01-08-2002/2002-08-01/' {} \; +find "${targetRoot}" -type f -name "audit-HTML-hul.jhove.xml" -exec sed -i 's/31-05-2001/2001-05-31/' {} \; + +# Update release details for JPEG 2000 module +find "${targetRoot}" -type f -name "*.jp2.jhove.xml" -exec sed -i 's/JPEG2000-hul<\/reportingModule>/JPEG2000-hul<\/reportingModule>/' {} \; +find "${targetRoot}" -type f -name "*.jpx.jhove.xml" -exec sed -i 's/JPEG2000-hul<\/reportingModule>/JPEG2000-hul<\/reportingModule>/' {} \; +find "${targetRoot}" -type f -name "*.md.jhove.xml" -exec sed -i 's/JPEG2000-hul<\/reportingModule>/JPEG2000-hul<\/reportingModule>/' {} \; +find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/JPEG2000-hul<\/module>/JPEG2000-hul<\/module>/' {} \; +find "${targetRoot}" -type f -name "audit-JPEG2000-hul.jhove.xml" -exec sed -i 's/1.4.4<\/release>/1.4.5<\/release>/' {} \; +find "${targetRoot}" -type f -name "audit-JPEG2000-hul.jhove.xml" -exec sed -i 's/2023-03-16/2024-11-27/' {} \; + +# Copy the files affected by the relative URL output changes to the XML reporting module +if [[ -f "${candidateRoot}/errors/modules/JPEG2000-hul/ランダム日本語テキスト.jp2.jhove.xml" ]]; then + cp "${candidateRoot}/errors/modules/JPEG2000-hul/ランダム日本語テキスト.jp2.jhove.xml" "${targetRoot}/errors/modules/JPEG2000-hul/ランダム日本語テキスト.jp2.jhove.xml" +fi +if [[ -f "${candidateRoot}/errors/modules/JPEG2000-hul/隨機中國文字.jp2.jhove.xml" ]]; then + cp "${candidateRoot}/errors/modules/JPEG2000-hul/隨機中國文字.jp2.jhove.xml" "${targetRoot}/errors/modules/JPEG2000-hul/隨機中國文字.jp2.jhove.xml" +fi + +# Copy the files affected by the change to the JPEG-2000 module that prevents empty CompositeListHeader lists from been created +if [[ -f "${candidateRoot}/errors/modules/JPEG2000-hul/is_jpx.jp2.jhove.xml" ]]; then + cp "${candidateRoot}/errors/modules/JPEG2000-hul/is_jpx.jp2.jhove.xml" "${targetRoot}/errors/modules/JPEG2000-hul/is_jpx.jp2.jhove.xml" +fi +if [[ -f "${candidateRoot}/examples/modules/JPEG2000-hul/ROITest.jpx.jhove.xml" ]]; then + cp "${candidateRoot}/examples/modules/JPEG2000-hul/ROITest.jpx.jhove.xml" "${targetRoot}/examples/modules/JPEG2000-hul/ROITest.jpx.jhove.xml" +fi diff --git a/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/XmlHandler.java b/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/XmlHandler.java index e84b4fee3..f24daa2dd 100644 --- a/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/XmlHandler.java +++ b/jhove-core/src/main/java/edu/harvard/hul/ois/jhove/handler/XmlHandler.java @@ -20,6 +20,9 @@ package edu.harvard.hul.ois.jhove.handler; +import java.io.File; +import java.net.URI; +import java.net.URISyntaxException; import java.text.NumberFormat; import java.util.Date; import java.util.Iterator; @@ -352,11 +355,6 @@ public void show(RepInfo info) { _writer.println(margn2 + element("reportingModule", attr2, module.getName())); } - /* - * else { String [][] attr2 = { {"severity", "error"} }; _writer.println - * (margn2 + element ("message", attr2, - * "file not found or not readable")); } - */ Date date = info.getCreated(); if (date != null) { _writer.println(margn2 + element("created", toDateTime(date))); @@ -4455,51 +4453,41 @@ private void writeAESTimeRangePart(String indent, String elementName, AESAudioMe element(elementName, attributes, String.valueOf(timeDesc.getSamples()))); } - /* - * Clean up a URI string by escaping forbidden characters. We assume - * (perhaps dangerously) that a % is the start of an already escaped - * hexadecimal sequence. + /** + * Returns a path normalised URI from the presented string path.@interface + * Solution based upon the follwing post from Eugene Yokota: + * https://eed3si9n.com/encoding-file-path-as-URI-reference/ */ - private String cleanURIString(String uri) { - StringBuffer sb = new StringBuffer(uri.length() * 2); - boolean change = false; - for (int i = 0; i < uri.length(); i++) { - char c = uri.charAt(i); - if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') - || (c >= '0' && c <= '9') || (c == '%') || // assume it's an - // escape - ("-_.!~*'();/?:@=+$,".indexOf(c) >= 0)) { - sb.append(c); - } else { - int cval = c; - - // More significant hex digit - int mshd = (cval >> 4); - if (mshd >= 10) { - mshd += 'A' - 10; - } else { - mshd += '0'; - } - sb.append('%'); - sb.append((char) mshd); - - // Less significant hex digit - int lshd = (cval & 0X0F); - if (lshd >= 10) { - lshd += 'A' - 10; + private static final String cleanURIString(final String path) { + File input = new File(path); + final boolean isWindows = System.getProperty("os.name").toLowerCase(Locale.ENGLISH).contains("windows"); + final String fileScheme = "file"; + try { + if (isWindows && !path.isEmpty() && path.startsWith(Character.toString(File.separatorChar))) { + if (path.startsWith("\\")) { + return new URI(fileScheme, normaliseToSlash(path), null).toString(); } else { - lshd += '0'; + return new URI(fileScheme, "", normaliseToSlash(path), null).toString(); } - sb.append((char) lshd); - change = true; + } else if (input.isAbsolute()) { + return new URI(fileScheme, "", normaliseToSlash(ensureHeadSlash(input.getAbsolutePath())), null) + .toString(); } + return new URI(null, normaliseToSlash(path), null).toString(); + } catch (URISyntaxException e) { + // If this fails simply return the original path + return path; } - // For efficiency, return the original string - // if nothing changed. - if (change) { - return sb.toString(); - } - return uri; + } + + private static final String ensureHeadSlash(final String name) { + return (!name.isEmpty() && name.startsWith(Character.toString(File.separatorChar))) + ? Character.toString(File.separatorChar) + name + : name; + } + + private static final String normaliseToSlash(final String name) { + return (File.separatorChar == '/') ? name : name.replace(File.separatorChar, '/'); } /** Appends a Rational value to a StringBuffer */ diff --git a/jhove-modules/html-hul/src/main/java/edu/harvard/hul/ois/jhove/module/HtmlModule.java b/jhove-modules/html-hul/src/main/java/edu/harvard/hul/ois/jhove/module/HtmlModule.java index 9d3f8d727..abe70a23f 100644 --- a/jhove-modules/html-hul/src/main/java/edu/harvard/hul/ois/jhove/module/HtmlModule.java +++ b/jhove-modules/html-hul/src/main/java/edu/harvard/hul/ois/jhove/module/HtmlModule.java @@ -105,8 +105,8 @@ public class HtmlModule extends ModuleBase { private static final String XHTML_1_1_STR = "XHTML 1.1"; private static final String NAME = "HTML-hul"; - private static final String RELEASE = "1.4.4"; - private static final int[] DATE = { 2024, 8, 22 }; + private static final String RELEASE = "1.4.5"; + private static final int[] DATE = { 2024, 11, 27 }; private static final String[] FORMAT = { "HTML" }; private static final String COVERAGE = "HTML 3.2, HTML 4.0 Strict," + "HTML 4.0 Transitional, HTML 4.0 Frameset, " @@ -231,7 +231,7 @@ public HtmlModule() { + "(Second Edition)", DocumentType.REPORT); doc.setPublisher(w3cAgent); - doc.setDate("01-08-2002"); + doc.setDate("2002-08-01"); doc.setIdentifier(new Identifier("http://www.w3.org/TR/xhtml1/", IdentifierType.URL)); _specification.add(doc); @@ -240,7 +240,7 @@ public HtmlModule() { doc = new Document(" XHTML(TM) 1.1 - Module-based XHTML", DocumentType.REPORT); doc.setPublisher(w3cAgent); - doc.setDate("31-05-2001"); + doc.setDate("2001-05-31"); doc.setIdentifier(new Identifier( "http://www.w3.org/TR/2001/REC-xhtml11-20010531/", IdentifierType.URL)); diff --git a/jhove-modules/jpeg2000-hul/src/main/java/edu/harvard/hul/ois/jhove/module/Jpeg2000Module.java b/jhove-modules/jpeg2000-hul/src/main/java/edu/harvard/hul/ois/jhove/module/Jpeg2000Module.java index 71b0dfcf3..6b6e8fa52 100644 --- a/jhove-modules/jpeg2000-hul/src/main/java/edu/harvard/hul/ois/jhove/module/Jpeg2000Module.java +++ b/jhove-modules/jpeg2000-hul/src/main/java/edu/harvard/hul/ois/jhove/module/Jpeg2000Module.java @@ -105,8 +105,8 @@ public class Jpeg2000Module extends ModuleBase { ******************************************************************/ private static final String NAME = "JPEG2000-hul"; - private static final String RELEASE = "1.4.4"; - private static final int[] DATE = { 2023, 03, 16 }; + private static final String RELEASE = "1.4.5"; + private static final int[] DATE = { 2024, 11, 27 }; private static final String[] FORMAT = { "JPEG 2000", "JP2", "JPX" }; private static final String COVERAGE = "JP2 (ISO/IEC 15444-1:2000/" + "ITU-T Rec. T.800 (200)), JPX (ISO/IEC 15444-2:2004)"; diff --git a/jhove-modules/jpeg2000-hul/src/main/java/edu/harvard/hul/ois/jhove/module/jpeg2000/ComposLayerHdrBox.java b/jhove-modules/jpeg2000-hul/src/main/java/edu/harvard/hul/ois/jhove/module/jpeg2000/ComposLayerHdrBox.java index 6bb72a170..996441b2f 100644 --- a/jhove-modules/jpeg2000-hul/src/main/java/edu/harvard/hul/ois/jhove/module/jpeg2000/ComposLayerHdrBox.java +++ b/jhove-modules/jpeg2000-hul/src/main/java/edu/harvard/hul/ois/jhove/module/jpeg2000/ComposLayerHdrBox.java @@ -24,138 +24,129 @@ public class ComposLayerHdrBox extends JP2Box { private Property channelDefProp; private Property codestreamRegProp; private List colorSpecs; - - /** - * Constructor with superbox. + * Constructor with superbox. * - * @param parent parent superbox of this box + * @param parent parent superbox of this box */ public ComposLayerHdrBox(RandomAccessFile raf, BoxHolder parent) { super(raf, parent); } - /** Reads the box, putting appropriate information in - * the RepInfo object. setModule, setBoxHeader, - * setRepInfo and setDataInputStream must be called - * before readBox is called. - * readBox must completely consume the - * box, so that the next byte to be read by the - * DataInputStream is the FF byte of the next Box. + /** + * Reads the box, putting appropriate information in + * the RepInfo object. setModule, setBoxHeader, + * setRepInfo and setDataInputStream must be called + * before readBox is called. + * readBox must completely consume the + * box, so that the next byte to be read by the + * DataInputStream is the FF byte of the next Box. */ @Override - public boolean readBox() throws IOException { + public boolean readBox() throws IOException { if (_parentBox != null) { // Box must be at top level. wrongBoxContext(); return false; } - initBytesRead (); + initBytesRead(); hasBoxes = true; - colorSpecs = new LinkedList<> (); - + colorSpecs = new LinkedList<>(); + // Unlike some other boxes, compositing layer boxes // are numbered by their order in the file, starting - // with 0. A definite case of design by committee. + // with 0. A definite case of design by committee. JP2Box box = null; boolean hasOpacity = false; boolean hasChannelDef = false; - while (hasNext ()) { - box = (JP2Box) next (); + while (hasNext()) { + box = (JP2Box) next(); if (box == null) { break; } if (box instanceof ColorGroupBox || - box instanceof OpacityBox || - box instanceof ChannelDefBox || - box instanceof CodestreamRegBox || - box instanceof IPRBox || - box instanceof ResolutionBox || - box instanceof LabelBox) { - if (!box.readBox ()) { - return false; - } - if (box instanceof OpacityBox) { - hasOpacity = true; - } - else if (box instanceof ChannelDefBox) { - hasChannelDef = true; - } - if (box instanceof LabelBox) { - label = new Property ("Label", - PropertyType.STRING, - ((LabelBox) box).getLabel ()); - } - } - else { - box.skipBox (); + box instanceof OpacityBox || + box instanceof ChannelDefBox || + box instanceof CodestreamRegBox || + box instanceof IPRBox || + box instanceof ResolutionBox || + box instanceof LabelBox) { + if (!box.readBox()) { + return false; + } + if (box instanceof OpacityBox) { + hasOpacity = true; + } else if (box instanceof ChannelDefBox) { + hasChannelDef = true; + } + if (box instanceof LabelBox) { + label = new Property("Label", + PropertyType.STRING, + ((LabelBox) box).getLabel()); + } + } else { + box.skipBox(); } } if (hasOpacity && hasChannelDef) { - _repInfo.setMessage (new ErrorMessage - (MessageConstants.JPEG2000_HUL_12, - _module.getFilePos ())); - _repInfo.setValid (false); + _repInfo.setMessage(new ErrorMessage(MessageConstants.JPEG2000_HUL_12, + _module.getFilePos())); + _repInfo.setValid(false); } - finalizeBytesRead (); - - List propList = new ArrayList (4); + finalizeBytesRead(); + + List propList = new ArrayList(4); if (label != null) { - propList.add (label); + propList.add(label); } - if (!colorSpecs.isEmpty ()) { - propList.add (new Property ("ColorSpecs", + if (!colorSpecs.isEmpty()) { + propList.add(new Property("ColorSpecs", PropertyType.PROPERTY, PropertyArity.LIST, colorSpecs)); } if (opacityProp != null) { - propList.add (opacityProp); + propList.add(opacityProp); } if (channelDefProp != null) { - propList.add (channelDefProp); + propList.add(channelDefProp); } if (codestreamRegProp != null) { - propList.add (codestreamRegProp); + propList.add(codestreamRegProp); } - _module.addComposLayer(new Property - ("CompositeLayerHeader", + if (!propList.isEmpty()) { + _module.addComposLayer(new Property("CompositeLayerHeader", PropertyType.PROPERTY, PropertyArity.LIST, propList)); + } return true; } - /** Add a color specification property. */ - protected void addColorSpec (Property p) - { - colorSpecs.add (p); + protected void addColorSpec(Property p) { + colorSpecs.add(p); } - + /** Add an opacity property. */ - protected void addOpacity (Property p) - { + protected void addOpacity(Property p) { opacityProp = p; } - + /** Add channel definition property. */ - protected void addChannelDef (Property p) - { + protected void addChannelDef(Property p) { channelDefProp = p; } - + /** Add codestream registration property. */ - protected void addCodestreamReg (Property p) - { + protected void addCodestreamReg(Property p) { codestreamRegProp = p; } - /** Returns the name of the Box. */ + /** Returns the name of the Box. */ @Override - protected String getSelfPropName () - { + protected String getSelfPropName() { return "Compositing Layer Header Box"; } }