diff --git a/apps/routerconsole/java/src/net/i2p/router/news/NewsXMLParser.java b/apps/routerconsole/java/src/net/i2p/router/news/NewsXMLParser.java
index 068b19aa0..5285ceb43 100644
--- a/apps/routerconsole/java/src/net/i2p/router/news/NewsXMLParser.java
+++ b/apps/routerconsole/java/src/net/i2p/router/news/NewsXMLParser.java
@@ -18,7 +18,6 @@ import net.i2p.util.Log;
import org.cybergarage.util.Debug;
import org.cybergarage.xml.Node;
import org.cybergarage.xml.ParserException;
-import org.cybergarage.xml.parser.JaxpParser;
/**
* Parse out the news.xml file which is in Atom format (RFC4287).
@@ -39,7 +38,9 @@ public class NewsXMLParser {
"del", "ins", "em", "strong", "mark", "sub", "sup", "tt", "code", "strike", "s", "u",
"h4", "h5", "h6",
"ol", "ul", "li", "dl", "dt", "dd",
- "table", "tr", "td", "th"
+ "table", "tr", "td", "th",
+ // put in by parser
+ XMLParser.TEXT_NAME
}));
/**
@@ -94,7 +95,7 @@ public class NewsXMLParser {
public void parse(InputStream in) throws IOException {
_entries = null;
_metadata = null;
- JaxpParser parser = new JaxpParser();
+ XMLParser parser = new XMLParser(_context);
try {
Node root = parser.parse(in);
extract(root);
@@ -255,7 +256,7 @@ public class NewsXMLParser {
}
if (e == null)
break;
- buf.append(sn.toString());
+ XMLParser.toString(buf, sn);
}
if (e == null)
continue;
diff --git a/apps/routerconsole/java/src/net/i2p/router/news/XMLParser.java b/apps/routerconsole/java/src/net/i2p/router/news/XMLParser.java
new file mode 100644
index 000000000..2f14aaefa
--- /dev/null
+++ b/apps/routerconsole/java/src/net/i2p/router/news/XMLParser.java
@@ -0,0 +1,174 @@
+package net.i2p.router.news;
+
+/******************************************************************
+* Contains code modified from JaxpParser:
+*
+* CyberXML for Java
+*
+* Copyright (C) Satoshi Konno 2004
+*
+* Author: Markus Thurner (http://thoean.com)
+*
+* Contains code modified from Node:
+*
+* CyberXML for Java
+*
+* Copyright (C) Satoshi Konno 2002
+******************************************************************/
+
+import org.w3c.dom.NamedNodeMap;
+
+import net.i2p.I2PAppContext;
+import net.i2p.util.Log;
+import org.cybergarage.xml.Attribute;
+import org.cybergarage.xml.Node;
+import org.cybergarage.xml.XML;
+import org.cybergarage.xml.parser.JaxpParser;
+
+
+/**
+ * Override so that XHTML is parsed correctly.
+ *
+ * This requires us to maintain mixed text and subnodes and output both.
+ *
+ * @since 0.9.17
+ */
+public class XMLParser extends JaxpParser {
+ private final Log _log;
+
+ public static final String TEXT_NAME = "#text";
+
+ public XMLParser(I2PAppContext ctx) {
+ super();
+ _log = ctx.logManager().getLog(XMLParser.class);
+ }
+
+ /**
+ * Modified from UPnP JaxpParser
+ *
+ * @param parentNode null if at top
+ * @param rank parse level, only for debug
+ * @return the parsed node, or the parent node, unused except at top level
+ */
+ @Override
+ public org.cybergarage.xml.Node parse(Node parentNode, org.w3c.dom.Node domNode, int rank) {
+ int domNodeType = domNode.getNodeType();
+ String domNodeName = domNode.getNodeName();
+ String domNodeValue = domNode.getNodeValue();
+ NamedNodeMap attrs = domNode.getAttributes();
+ int arrrsLen = (attrs != null) ? attrs.getLength() : 0;
+
+ if (_log.shouldLog(Log.DEBUG)) {
+ String val = domNodeValue != null ?
+ " = \"" + domNodeValue.replace("\n", "\\n").replace("\r", "\\r") + '"' :
+ "";
+ _log.debug("[" + rank + "] ELEM : \"" + domNodeName + '"' + val +
+ " type = " + domNodeType + " with " + arrrsLen + " attrs");
+ }
+
+ // I2P -
+ // If it's only whitespace, skip it altogether.
+ // Only add it to the value if we don't have any other nodes.
+ // Otherwise, add it as a node.
+ if (domNodeType == org.w3c.dom.Node.TEXT_NODE) {
+ if (domNodeValue.replaceAll("[ \t\r\n]", "").length() == 0) {
+ return parentNode;
+ }
+ if (!parentNode.hasNodes()) {
+ parentNode.addValue(domNodeValue);
+ return parentNode;
+ }
+ // else we will add it as a node below
+ } else if (domNodeType != org.w3c.dom.Node.ELEMENT_NODE) {
+ return parentNode;
+ }
+
+ Node node = new Node();
+ node.setName(domNodeName);
+ node.setValue(domNodeValue);
+
+ if (parentNode != null) {
+ // I2P - take the value and convert it to a text node, if it's not just whitespace
+ String oldValue = parentNode.getValue();
+ if (oldValue != null && oldValue.length() > 0) {
+ parentNode.setValue("");
+ Node text = new Node();
+ text.setName(TEXT_NAME);
+ text.setValue(oldValue);
+ parentNode.addNode(text);
+ if (_log.shouldLog(Log.DEBUG))
+ _log.debug("Converted value to node");
+ }
+ parentNode.addNode(node);
+ }
+ if (domNodeType == org.w3c.dom.Node.TEXT_NODE)
+ return parentNode;
+
+ if (attrs != null) {
+ for (int n = 0; n < arrrsLen; n++) {
+ org.w3c.dom.Node attr = attrs.item(n);
+ String attrName = attr.getNodeName();
+ String attrValue = attr.getNodeValue();
+ node.setAttribute(attrName, attrValue);
+ }
+ }
+
+ org.w3c.dom.Node child = domNode.getFirstChild();
+ if (child == null) {
+ node.setValue("");
+ return node;
+ }
+ do{
+ parse(node, child, rank+1);
+ child = child.getNextSibling();
+ } while (child != null);
+
+ return node;
+ }
+
+ /**
+ * A replacement for Node.toString(), which does not recognize #text.
+ */
+ public static void toString(StringBuilder buf, Node node) {
+ output(buf, node, 0);
+ }
+
+
+ /**
+ * A replacement for Node.output(), which does not recognize #text.
+ * Also, we use the empty entity, so
does not turn into
.
+ */
+ private static void output(StringBuilder buf, Node node, int indentLevel) {
+ String name = node.getName();
+ String value = XML.escapeXMLChars(node.getValue());
+ if (name.equals(TEXT_NAME)) {
+ buf.append(value);
+ return;
+ }
+
+ String indentString = node.getIndentLevelString(indentLevel);
+ buf.append(indentString).append('<').append(name);
+ int nAttributes = node.getNAttributes();
+ for (int n = 0; n < nAttributes; n++) {
+ Attribute attr = node.getAttribute(n);
+ buf.append(' ').append(attr.getName()).append("=\"").append(XML.escapeXMLChars(attr.getValue())).append('"');
+ }
+
+ // As in Node, output either the nodes or the value.
+ // If mixed values and nodes, the values must be text nodes. See parser above.
+ if (node.hasNodes()) {
+ buf.append(">\n");
+ int nChildNodes = node.getNNodes();
+ for (int n = 0; n < nChildNodes; n++) {
+ Node cnode = node.getNode(n);
+ output(buf, cnode, indentLevel + 1);
+ }
+ buf.append(indentString).append("").append(name).append(">\n");
+ } else {
+ if (value == null || value.length() == 0)
+ buf.append("/>");
+ else
+ buf.append('>').append(value).append("").append(name).append('>');
+ }
+ }
+}