Skip to content

Commit

Permalink
Formalize extended attribute xml chars (#2)
Browse files Browse the repository at this point in the history
* Created scenario to reproduce xml chars issue.

* Added apache commons text dependency.
Updated XMLUtils to use StringEscapeUtils to escape and unescape XML.
Updated XmlDecomposer to rely on the XML bytes and unscape/escape XML chars for the formalized extended attributes.

* Extended decompose scenario for special characters to all XML special chars.

* Created special character scenario for compose of formalized attributes.
Updated compose of formalize attribute to use getBytes i.s.o. getText, and apply unescape to count characters (just like at  decompose side).

* Added formalized attributes into the CR and CRLF composed and decomposed scenarios.

* Added getLineSeparator method to FileContentAndCharset class.
Refactored compose and decompose to use line separator from file for extended attributes.
Updated new line handling scenarios to adjust length with right new-line chars.

* Moved assembly of executable jar to build phase in workflow.

* Refactored Executor main to catch exception itself and handle System exit code correctly.

* Added steps for Cucumber to run PDC in separate process and check exit code.
Added feature file for testing exit code.
Fixed type in special characters feature file.

* Updated release notes for version 1.6.1.
  • Loading branch information
harmen-xb authored Mar 8, 2024
1 parent de88a7e commit 9377a93
Show file tree
Hide file tree
Showing 21 changed files with 346 additions and 84 deletions.
8 changes: 1 addition & 7 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:

# Build with Maven
- name: Build
run: mvn --batch-mode install -DskipTests
run: mvn --batch-mode install assembly:single -DskipTests
working-directory: ./PowerDeComposer

# Test with Maven
Expand All @@ -53,12 +53,6 @@ jobs:
path: PowerDeComposer/target/surefire-reports/TEST-*.xml
reporter: java-junit

# Build single jar
- name: Assemble Jar
if: runner.os == 'Linux'
run: mvn --batch-mode assembly:single -DskipTests
working-directory: ./PowerDeComposer

# Publish jars.
- uses: actions/upload-artifact@v4
if: runner.os == 'Linux'
Expand Down
11 changes: 11 additions & 0 deletions Documentation/docs/ReleaseNotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,17 @@ Click on the header of a version number to go to the documentation of that speci

## Version 1.6.0

- [ ] 1.6.1 <sup>08-08-2024</sup>
> !!! warning "Bug fixes"
> * [ ] Decompose model
> - [X] Fixed new-line handling so line endings are not accidentally converted.
> - [X] Fixed special characters and new-line handling for formalized attributes.
> - [X] Corrected formalize extended attributes naming in config.
> * [ ] Compose model
> - [X] Fixed special characters and new-line handling for deformalizing attributes.
> * [ ] General
> - [X] Setting correct exit code (1) when an error occured when running PowerDeComposer.
- [ ] 1.6.0 <sup>02-08-2023</sup>
> !!! success "Enhanced features"
> * [ ] Decompose model
Expand Down
6 changes: 6 additions & 0 deletions PowerDeComposer/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,12 @@
<artifactId>commons-io</artifactId>
<version>2.11.0</version>
</dependency>
<!-- Apache Commons Text for XML escape/unescaping. -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
<version>1.11.0</version>
</dependency>
<!-- JAX-B for the config. -->
<dependency>
<groupId>jakarta.xml.bind</groupId>
Expand Down
63 changes: 35 additions & 28 deletions PowerDeComposer/src/main/java/com/xbreeze/xml/Executor.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public class Executor {
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
public static void main(String[] args) {

// Setup the global LogManager.
LogManager logManager = LogManager.getLogManager();
Expand All @@ -70,34 +70,41 @@ protected synchronized void setOutputStream(OutputStream out) throws SecurityExc
logger.setUseParentHandlers(false);

// Check the passed arguments.
if (args.length == 3 || args.length == 4) {
String operationType = args[0];

// Parse the config.
PowerDeComposerConfig pdcConfig;
if (args.length == 4) {
pdcConfig = PowerDeComposerConfig.fromFile(Paths.get(args[3]).toFile());
}
else {
// Create the default PowerDeComposerConfig.
pdcConfig = PowerDeComposerConfig.GetDefaultConfig();
}

// Perform the operation.
if (operationType.equalsIgnoreCase("decompose")) {
String xmlFilePath = args[1].trim();
String targetDirectory = args[2].trim();
new XmlDecomposer(xmlFilePath, targetDirectory, pdcConfig.getDecomposeConfig());
} else
if (operationType.equalsIgnoreCase("compose")) {
String xmlSourceFile = args[1].trim();
String xmlTargetFile = args[2].trim();
new XmlComposer(xmlSourceFile, xmlTargetFile);
} else {
throw new Exception("First argument should be compose or decompose");
try {
if (args.length == 3 || args.length == 4) {
String operationType = args[0];

// Parse the config.
PowerDeComposerConfig pdcConfig;
if (args.length == 4) {
pdcConfig = PowerDeComposerConfig.fromFile(Paths.get(args[3]).toFile());
}
} else {
throw new Exception("Expecting exactly 3 or 4 arguments: (decompose, xml-file-path, target-directory[, config-file-location]) or (compose, xml-source-file, xml-target-file[, config-file-location]).");
else {
// Create the default PowerDeComposerConfig.
pdcConfig = PowerDeComposerConfig.GetDefaultConfig();
}

// Perform the operation.
if (operationType.equalsIgnoreCase("decompose")) {
String xmlFilePath = args[1].trim();
String targetDirectory = args[2].trim();
new XmlDecomposer(xmlFilePath, targetDirectory, pdcConfig.getDecomposeConfig());
} else
if (operationType.equalsIgnoreCase("compose")) {
String xmlSourceFile = args[1].trim();
String xmlTargetFile = args[2].trim();
new XmlComposer(xmlSourceFile, xmlTargetFile);
} else {
throw new Exception("First argument should be compose or decompose");
}
} else {
throw new Exception("Expecting exactly 3 or 4 arguments: (decompose, xml-file-path, target-directory[, config-file-location]) or (compose, xml-source-file, xml-target-file[, config-file-location]).");
}
} catch (Exception e) {
System.err.println("An error ocurred while running PowerDeComposer: ");
System.err.print(e.getMessage());
// Exit java with exit-code 1, so other processes can detect something went wrong.
System.exit(1);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ private String resolveIncludes(FileContentAndCharset xmlFileContentsAndCharset,

// Create a string buffer for the extended attribute text.
StringBuffer extendedAttributeText = new StringBuffer();
extendedAttributeText.append("\r\n<a:ExtendedAttributesText>");
extendedAttributeText.append(xmlFileContentsAndCharset.getLineSeparator());
extendedAttributeText.append("<a:ExtendedAttributesText>");

// Find the OriginatingExtension elements.
AutoPilot ap_extension = new AutoPilot(nav);
Expand All @@ -142,16 +143,20 @@ private String resolveIncludes(FileContentAndCharset xmlFileContentsAndCharset,
String extAttrObjectID = nav.toString(nav.getAttrVal("ObjectID"));
String extAttrName = nav.toString(nav.getAttrVal("Name"));
// Replace a LF without preceding LF to CRLF (since VTD-NAV removed it during parsing).
String extAttrValue = nav.toString(nav.getText()).replaceAll("(?<!\r)\n", "\r\n");
int extendedAttributeTextIndex = nav.getText();
String extAttrValue = new String(nav.getXML().getBytes(nav.getTokenOffset(extendedAttributeTextIndex), nav.getTokenLength(extendedAttributeTextIndex)));
// Add the current extended attribute to the list for the current extension.
extensionExtAttrTextBuffer.append(String.format("{%s},%s,%d=%s\r\n", extAttrObjectID, extAttrName, extAttrValue.length(), extAttrValue));
// For the length we use the unescaped version of the extended attribute text.
extensionExtAttrTextBuffer.append(String.format("{%s},%s,%d=%s", extAttrObjectID, extAttrName, XMLUtils.unescapeXMLChars(extAttrValue).length(), extAttrValue));
extensionExtAttrTextBuffer.append(xmlFileContentsAndCharset.getLineSeparator());
}
extensionExtAttrTextBuffer.append("\r\n");
extensionExtAttrTextBuffer.append(xmlFileContentsAndCharset.getLineSeparator());
String extensionExtAttrText = extensionExtAttrTextBuffer.toString();

// Add the extension extended attributes to the extended attributes buffer.
// For the length we use the unescaped version of the extended attribute text.
// The length is minus 2, to compensate for the trailing CRLF.
extendedAttributeText.append(String.format("{%s},%s,%d=%s", extObjectID, extName, extensionExtAttrText.length() - 2, extensionExtAttrText));
extendedAttributeText.append(String.format("{%s},%s,%d=%s", extObjectID, extName, XMLUtils.unescapeXMLChars(extensionExtAttrText).length() - 2, extensionExtAttrText));
}
extendedAttributeText.append("</a:ExtendedAttributesText>");
// Insert the ExtendedAttributesText element.
Expand Down Expand Up @@ -209,7 +214,7 @@ private String resolveIncludes(FileContentAndCharset xmlFileContentsAndCharset,
// otherwise return the original one
if (deformalizedExtendedAttributes || includeCount > 0) {
String resolvedXML = XMLUtils.getResultingXml(vm);
logger.fine(String.format("XML file %s with includes resolved:", xmlFile.toString()));
logger.fine(String.format("XML file %s with includes and formalized attributes resolved:", xmlFile.toString()));
logger.fine("**** Begin of XML file ****");
logger.fine(resolvedXML);
logger.fine("**** End of XML file ****");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ else if (decomposedCDValue.equals(composedCDValue)) {

// Transform the ExtendedAttributeText elements to separate XML elements.
if (decomposeConfig.formalizeExtendedAttributes()) {
nv = formalizeExtendedAttributesText(nv);
nv = formalizeExtendedAttributesText(nv, xmlFileContentsAndCharset);
}

// Get the existing list of files in the decomposed model (if it exists). This is needed to track files which are written and which need to be deleted.
Expand Down Expand Up @@ -425,7 +425,7 @@ else if (globalIds.contains(identifierReplacementValue))
return xm.outputAndReparse();
}

private VTDNav formalizeExtendedAttributesText(VTDNav nv) throws Exception {
private VTDNav formalizeExtendedAttributesText(VTDNav nv, FileContentAndCharset xmlFileContentsAndCharset) throws Exception {
logger.info("Formalizing extended attributes in document...");

// We are going to replace all ExtnededAttributeText elements with it's formal representation, so we need an XmlModifier.
Expand All @@ -446,8 +446,10 @@ private VTDNav formalizeExtendedAttributesText(VTDNav nv) throws Exception {
// The found token should be a starting tag.
if (nv.getTokenType(extAttrsTextNodeIndex) == VTDNav.TOKEN_STARTING_TAG) {
// Get the extended attribute text.
// Replace LF with CRLF, since VTD-Nav removes the carriage returns in the file (and PowerDesigner always has CRLF).
String extendedAttributesText = nv.toString(nv.getText()).replace("\n", "\r\n");
int extendedAttributeTextIndex = nv.getText();
String extendedAttributesText = new String(nv.getXML().getBytes(nv.getTokenOffset(extendedAttributeTextIndex), nv.getTokenLength(extendedAttributeTextIndex)));
// We unescape the XML characters here, so the length property in the extended attributes can be used (cause it doesn't account for escaped XML characters).
extendedAttributesText = XMLUtils.unescapeXMLChars(extendedAttributesText);
logger.fine(String.format("Found extended attributes text: %s", extendedAttributesText.replaceAll("\n", "[LF]\n").replaceAll("\r", "[CR]")));

// The extended attribute text needs to be parsed so we can create the new XML elements.
Expand All @@ -461,7 +463,8 @@ private VTDNav formalizeExtendedAttributesText(VTDNav nv) throws Exception {
Pattern extensionExtAttrsPattern = Pattern.compile(extAttrRegex);
Matcher extExtAttrsMatcher = extensionExtAttrsPattern.matcher(extendedAttributesText);
StringBuffer extExtAttrsXml = new StringBuffer();
extExtAttrsXml.append("\r\n<ExtendedAttributes>");
extExtAttrsXml.append(xmlFileContentsAndCharset.getLineSeparator());
extExtAttrsXml.append("<ExtendedAttributes>");
int currentExtensionExtAttrEnd = -1;
while (extExtAttrsMatcher.find()) {
String guid = extExtAttrsMatcher.group(1);
Expand All @@ -473,7 +476,8 @@ private VTDNav formalizeExtendedAttributesText(VTDNav nv) throws Exception {
if (extAttrsEnd > extendedAttributesText.length())
throw new Exception("Error while formalizing extended attributes text: The extended attribute length is longer then the contents of the string. This should never happen!");

String extExtAttrContent = extendedAttributesText.substring(extAttrStart, extAttrsEnd);
// Get the extended attribute contents and escape XML chars, so we can make valid XML.
String extExtAttrContent = XMLUtils.escapeXMLChars(extendedAttributesText.substring(extAttrStart, extAttrsEnd));

// If we are inside a extension section, so currentExtensionExtAttrEnd != -1. And we find a match where the the end index is after the end of extension section we have a problem.
// The end of an extension section should always be equal or after any child sections.
Expand All @@ -483,14 +487,16 @@ private VTDNav formalizeExtendedAttributesText(VTDNav nv) throws Exception {
// If we reached the end of a previous extension list, we update the end to -1 so this match is handled as a OriginatingExtension.
if (currentExtensionExtAttrEnd != -1 && extAttrStart >= currentExtensionExtAttrEnd) {
logger.fine("The new match is outside of the extension section, so resetting end index.");
extExtAttrsXml.append("\r\n</OriginatingExtension>");
extExtAttrsXml.append(xmlFileContentsAndCharset.getLineSeparator());
extExtAttrsXml.append("</OriginatingExtension>");
currentExtensionExtAttrEnd = -1;
}

// If we outside of a extension attribute list, a new extension part is started.
if (currentExtensionExtAttrEnd == -1) {
logger.fine(String.format("Found extention [ObjectID=%s;Name=%s;Length=%d;Content='%s'", guid, name, extAttrLength, extExtAttrContent));
extExtAttrsXml.append(String.format("\r\n<OriginatingExtension ObjectID=\"%s\" Name=\"%s\">", guid, name));
extExtAttrsXml.append(xmlFileContentsAndCharset.getLineSeparator());
extExtAttrsXml.append(String.format("<OriginatingExtension ObjectID=\"%s\" Name=\"%s\">", guid, name));
// Now we have added the element for the OriginatingExtension, we need to loop over the matches within the content part of the extension extended attributes.
// For each extended attribute we find, we add a separate XML element.
// Update the end of the extension extended attribute list to the current one. This way in the next loop we know we are handling an extended attribute part.
Expand All @@ -499,16 +505,19 @@ private VTDNav formalizeExtendedAttributesText(VTDNav nv) throws Exception {
// We are inside a extension section, so we treat the match as an extended attribute within the extension.
else {
logger.fine(String.format("Found extended attributes [ObjectID=%s;Name=%s;Length=%d;Value='%s'", guid, name, extAttrLength, extExtAttrContent));
extExtAttrsXml.append(String.format("\r\n<ExtendedAttribute ObjectID=\"%s\" Name=\"%s\">%s</ExtendedAttribute>", guid, name, extExtAttrContent));
extExtAttrsXml.append(xmlFileContentsAndCharset.getLineSeparator());
extExtAttrsXml.append(String.format("<ExtendedAttribute ObjectID=\"%s\" Name=\"%s\">%s</ExtendedAttribute>", guid, name, extExtAttrContent));
// Update the region to scan to after the current extended attribute.
extExtAttrsMatcher.region(extAttrsEnd, extExtAttrsMatcher.regionEnd());
}
}
// If we exited the while loop and the end index is not -1, we need to add the ending tag of the extension element.
if (currentExtensionExtAttrEnd != -1) {
extExtAttrsXml.append("\r\n</OriginatingExtension>");
extExtAttrsXml.append(xmlFileContentsAndCharset.getLineSeparator());
extExtAttrsXml.append("</OriginatingExtension>");
}
extExtAttrsXml.append("\r\n</ExtendedAttributes>");
extExtAttrsXml.append(xmlFileContentsAndCharset.getLineSeparator());
extExtAttrsXml.append("</ExtendedAttributes>");
xm.insertAfterElement(extExtAttrsXml.toString());
// Now we added the replacement of the textual extended attributes, we can remove the ExtendedAttributesText element.
xm.remove(nv.expandWhiteSpaces(nv.getElementFragment(), VTDNav.WS_LEADING));
Expand Down Expand Up @@ -785,7 +794,7 @@ private Path parseAndWriteDocumentParts(VTDNav nv, Charset fileCharset, TargetFi
// Loop through the include attributes to add the min the include tag.
for (String includeAttributeName : includeAttributesWithValues.keySet()) {
// Insert the include sub element in the include tag.
includeElementStringBuffer.append(String.format(" %s=\"%s\"", includeAttributeName, XMLUtils.excapeXMLChars(includeAttributesWithValues.get(includeAttributeName))));
includeElementStringBuffer.append(String.format(" %s=\"%s\"", includeAttributeName, XMLUtils.escapeXMLChars(includeAttributesWithValues.get(includeAttributeName))));
}
includeElementStringBuffer.append(" />");

Expand Down
Original file line number Diff line number Diff line change
@@ -1,22 +1,51 @@
package com.xbreeze.xml.utils;

import java.nio.charset.Charset;
import java.util.logging.Logger;

public class FileContentAndCharset {
private static final Logger logger = Logger.getGlobal();

private String _fileContents;
private Charset _fileCharset;
private String _lineSeparator;

public FileContentAndCharset(String fileContents, Charset fileCharset) {
this._fileContents = fileContents;
this._fileCharset = fileCharset;
}

public String getFileContents() {
return _fileContents;
return this._fileContents;
}

public Charset getFileCharset() {
return _fileCharset;
return this._fileCharset;
}

/**
* Get the line separator of the file.
* @return The line separator for the file.
* @throws Exception If the line separator can't be found and exception is thrown.
*/
public String getLineSeparator() throws Exception {
// If the line separator is not set yet, derive it from the file contents.
if (this._lineSeparator == null) {
// Find the first line-feed character.
int firstNewLineIndex = this._fileContents.indexOf('\n');

if (firstNewLineIndex == -1)
throw new Exception("Cannot detect line separator. No line-feed character found in file!");

// Check whether there is a carriage return before the line-feed character.
if (this._fileContents.charAt(firstNewLineIndex - 1) == '\r') {
this._lineSeparator = "\r\n";
} else {
this._lineSeparator = "\n";
}
logger.fine(String.format("Found line separator: %s", this._lineSeparator.replace("\n", "[LF]").replace("\r", "[CR]")));
}
return this._lineSeparator;
}

public byte[] getBytes() {
Expand Down
15 changes: 13 additions & 2 deletions PowerDeComposer/src/main/java/com/xbreeze/xml/utils/XMLUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.text.StringEscapeUtils;

import com.ximpleware.AutoPilot;
import com.ximpleware.ModifyException;
import com.ximpleware.NavException;
Expand All @@ -49,8 +51,17 @@ public class XMLUtils {
* @param input The text to escape.
* @return The escaped input.
*/
public static String excapeXMLChars(String input) {
return input.replaceAll("\\<", "&lt;").replaceAll("\\>", "&gt;");
public static String escapeXMLChars(String input) {
return StringEscapeUtils.escapeXml10(input);
}

/**
* Unescape XML characters.
* @param input The xml input with escaped XML.
* @return The xml with unescaped chars.
*/
public static String unescapeXMLChars(String input) {
return StringEscapeUtils.unescapeXml(input);
}

/**
Expand Down
Loading

0 comments on commit 9377a93

Please sign in to comment.