Skip to content

Commit

Permalink
Add option to preserve newlines in XML documents (#1345)
Browse files Browse the repository at this point in the history
* Add a option to `pugixml` to create node_newline` DOM nodes while parsing white space. Stores as CR/LF in it's value member.
* Add option to `XmlReadOptions.readNewlines` for XML read options.
* Add in a new `NewlineElement` to store new elements corresponding to `blank_node` DOM nodes. Stores
a CR/LF character in it's doc string. 
* Add Python wrapper `PyNewlineElement`.
  • Loading branch information
kwokcb authored May 12, 2023
1 parent adb9f05 commit cce5379
Show file tree
Hide file tree
Showing 11 changed files with 676 additions and 564 deletions.
1 change: 1 addition & 0 deletions python/Scripts/mxupdate.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def main():
try:
readOptions = mx.XmlReadOptions()
readOptions.readComments = True
readOptions.readNewlines = True
mx.readFromXmlFile(doc, filename, mx.FileSearchPath(), readOptions)
validDocs[filename] = doc
except mx.Exception:
Expand Down
1,110 changes: 555 additions & 555 deletions resources/Materials/Examples/StandardSurface/standard_surface_chess_set.mtlx

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions source/MaterialXCore/Element.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -731,6 +731,7 @@ INSTANTIATE_CONCRETE_SUBCLASS(Look, "look")
INSTANTIATE_CONCRETE_SUBCLASS(LookGroup, "lookgroup")
INSTANTIATE_CONCRETE_SUBCLASS(MaterialAssign, "materialassign")
INSTANTIATE_CONCRETE_SUBCLASS(Member, "member")
INSTANTIATE_CONCRETE_SUBCLASS(NewlineElement, "newline")
INSTANTIATE_CONCRETE_SUBCLASS(Node, "node")
INSTANTIATE_CONCRETE_SUBCLASS(NodeDef, "nodedef")
INSTANTIATE_CONCRETE_SUBCLASS(NodeGraph, "nodegraph")
Expand Down
21 changes: 21 additions & 0 deletions source/MaterialXCore/Element.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class TypedElement;
class ValueElement;
class Token;
class CommentElement;
class NewlineElement;
class GenericElement;
class StringResolver;
class Document;
Expand Down Expand Up @@ -51,6 +52,11 @@ using CommentElementPtr = shared_ptr<CommentElement>;
/// A shared pointer to a const CommentElement
using ConstCommentElementPtr = shared_ptr<const CommentElement>;

/// A shared pointer to a NewlineElement
using NewlineElementPtr = shared_ptr<NewlineElement>;
/// A shared pointer to a const NewlineElement
using ConstNewlineElementPtr = shared_ptr<const NewlineElement>;

/// A shared pointer to a GenericElement
using GenericElementPtr = shared_ptr<GenericElement>;
/// A shared pointer to a const GenericElement
Expand Down Expand Up @@ -1153,6 +1159,21 @@ class MX_CORE_API CommentElement : public Element
static const string CATEGORY;
};

/// @class NewlineElement
/// An element representing a newline within a document.
class MX_CORE_API NewlineElement : public Element
{
public:
NewlineElement(ElementPtr parent, const string& name) :
Element(parent, CATEGORY, name)
{
}
virtual ~NewlineElement() { }

public:
static const string CATEGORY;
};

/// @class GenericElement
/// A generic element subclass, for instantiating elements with unrecognized categories.
class MX_CORE_API GenericElement : public Element
Expand Down
39 changes: 36 additions & 3 deletions source/MaterialXFormat/External/PugiXML/pugixml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3419,7 +3419,31 @@ PUGI__NS_BEGIN
{
mark = s; // Save this offset while searching for a terminator.

PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
// MaterialX: Enable newline tracking when processing whitespace.
if (PUGI__OPTSET(parse_newlines))
{
if (PUGI__IS_CHARTYPE(*s, ct_space))
{
unsigned int lineCount = 0;
while (PUGI__IS_CHARTYPE(*s, ct_space))
{
if (s[0] == '\n')
{
lineCount++;
}
++s;
}
for (size_t i=1; i<lineCount; i++)
{
PUGI__PUSHNODE(node_newline);
PUGI__POPNODE();
}
}
}
else
{
PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
}

if (*s == '<' || !*s)
{
Expand Down Expand Up @@ -4179,6 +4203,11 @@ PUGI__NS_BEGIN
node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
break;

// MaterialX: Handle newline output
case node_newline:
writer.write_string("");
break;

case node_pi:
writer.write('<', '?');
writer.write_string(node->name ? node->name + 0 : default_name);
Expand Down Expand Up @@ -4246,8 +4275,12 @@ PUGI__NS_BEGIN
if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
writer.write('\n');

if ((indent_flags & indent_indent) && indent_length)
text_output_indent(writer, indent, indent_length, depth);
// MaterialX: don't indent new line nodes
if (PUGI__NODETYPE(node) != node_newline)
{
if ((indent_flags & indent_indent) && indent_length)
text_output_indent(writer, indent, indent_length, depth);
}

if (PUGI__NODETYPE(node) == node_element)
{
Expand Down
4 changes: 4 additions & 0 deletions source/MaterialXFormat/External/PugiXML/pugixml.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ namespace pugi
node_pcdata, // Plain character data, i.e. 'text'
node_cdata, // Character data, i.e. '<![CDATA[text]]>'
node_comment, // Comment tag, i.e. '<!-- text -->'
node_newline, // MaterialX: A newline node
node_pi, // Processing instruction, i.e. '<?name?>'
node_declaration, // Document declaration, i.e. '<?xml version="1.0"?>'
node_doctype // Document type declaration, i.e. '<!DOCTYPE doc>'
Expand Down Expand Up @@ -201,6 +202,9 @@ namespace pugi
// This flag is off by default.
const unsigned int parse_embed_pcdata = 0x2000;

// MaterialX: This flag determines if newlines are added to the DOM tree. This flag is off by default.
const unsigned int parse_newlines = 0x4000;

// The default parsing mode.
// Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
Expand Down
35 changes: 29 additions & 6 deletions source/MaterialXFormat/XmlIo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,18 @@ void elementFromXml(const xml_node& xmlNode, ElementPtr elem, const XmlReadOptio
ElementPtr child = elem->addChildOfCategory(category, name);
elementFromXml(xmlChild, child, readOptions);

// Handle the interpretation of XML comments.
if (readOptions && readOptions->readComments && category.empty())
// Handle the interpretation of XML comments and newlines.
if (readOptions && category.empty())
{
child = elem->changeChildCategory(child, CommentElement::CATEGORY);
child->setDocString(xmlChild.value());
if (readOptions->readComments && xmlChild.type() == node_comment)
{
child = elem->changeChildCategory(child, CommentElement::CATEGORY);
child->setDocString(xmlChild.value());
}
else if (readOptions->readNewlines && xmlChild.type() == node_newline)
{
child = elem->changeChildCategory(child, NewlineElement::CATEGORY);
}
}
}
}
Expand Down Expand Up @@ -131,6 +138,14 @@ void elementToXml(ConstElementPtr elem, xml_node& xmlNode, const XmlWriteOptions
continue;
}

// Write XML newlines.
if (child->getCategory() == NewlineElement::CATEGORY)
{
xml_node xmlChild = xmlNode.append_child(node_newline);
xmlChild.set_value("\n");
continue;
}

xml_node xmlChild = xmlNode.append_child(child->getCategory().c_str());
elementToXml(child, xmlChild, writeOptions);
}
Expand Down Expand Up @@ -253,9 +268,16 @@ void validateParseResult(const xml_parse_result& result, const FilePath& filenam
unsigned int getParseOptions(const XmlReadOptions* readOptions)
{
unsigned int parseOptions = parse_default;
if (readOptions && readOptions->readComments)
if (readOptions)
{
parseOptions |= parse_comments;
if (readOptions->readComments)
{
parseOptions |= parse_comments;
}
if (readOptions->readNewlines)
{
parseOptions |= parse_newlines;
}
}
return parseOptions;
}
Expand All @@ -268,6 +290,7 @@ unsigned int getParseOptions(const XmlReadOptions* readOptions)

XmlReadOptions::XmlReadOptions() :
readComments(false),
readNewlines(false),
upgradeVersion(true),
readXIncludeFunction(readFromXmlFile)
{
Expand Down
4 changes: 4 additions & 0 deletions source/MaterialXFormat/XmlIo.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ class MX_FORMAT_API XmlReadOptions
/// Defaults to false.
bool readComments;

/// If true, then XML newlines will be read into documents as newline elements.
/// Defaults to false.
bool readNewlines;

/// If true, then documents from earlier versions of MaterialX will be upgraded
/// to the current version. Defaults to true.
bool upgradeVersion;
Expand Down
21 changes: 21 additions & 0 deletions source/MaterialXTest/MaterialXFormat/XmlIo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,27 @@ TEST_CASE("Load content", "[xmlio]")
REQUIRE_THROWS_AS(mx::readFromXmlFile(nonExistentDoc, "NonExistent.mtlx", mx::FileSearchPath(), &readOptions), mx::ExceptionFileMissing);
}

TEST_CASE("Comments and newlines", "[xmlio]")
{
mx::FilePath testPath("resources/Materials/Examples/StandardSurface/standard_surface_chess_set.mtlx");

// Read the example file into an XML string buffer.
std::string origXml = mx::readFile(testPath);

// Convert the string to a document with comments and newlines preserved.
mx::DocumentPtr doc = mx::createDocument();
mx::XmlReadOptions readOptions;
readOptions.readComments = true;
readOptions.readNewlines = true;
mx::readFromXmlString(doc, origXml, mx::FileSearchPath(), &readOptions);

// Write the document to a new XML string buffer.
std::string newXml = mx::writeToXmlString(doc);

// Verify that the XML string buffers are identical.
REQUIRE(origXml == newXml);
}

TEST_CASE("Locale region testing", "[xmlio]")
{
// In the United States, the thousands separator is a comma, while in Germany it is a period.
Expand Down
3 changes: 3 additions & 0 deletions source/PyMaterialX/PyMaterialXCore/PyElement.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,9 @@ void bindPyElement(py::module& mod)
py::class_<mx::CommentElement, mx::CommentElementPtr, mx::Element>(mod, "CommentElement")
.def_readonly_static("CATEGORY", &mx::CommentElement::CATEGORY);

py::class_<mx::NewlineElement, mx::NewlineElementPtr, mx::Element>(mod, "NewlineElement")
.def_readonly_static("CATEGORY", &mx::NewlineElement::CATEGORY);

py::class_<mx::GenericElement, mx::GenericElementPtr, mx::Element>(mod, "GenericElement")
.def_readonly_static("CATEGORY", &mx::GenericElement::CATEGORY);

Expand Down
1 change: 1 addition & 0 deletions source/PyMaterialX/PyMaterialXFormat/PyXmlIo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ void bindPyXmlIo(py::module& mod)
.def(py::init())
.def_readwrite("readXIncludeFunction", &mx::XmlReadOptions::readXIncludeFunction)
.def_readwrite("readComments", &mx::XmlReadOptions::readComments)
.def_readwrite("readNewlines", &mx::XmlReadOptions::readNewlines)
.def_readwrite("upgradeVersion", &mx::XmlReadOptions::upgradeVersion)
.def_readwrite("parentXIncludes", &mx::XmlReadOptions::parentXIncludes);

Expand Down

0 comments on commit cce5379

Please sign in to comment.