forked from I2P_Developers/i2p.i2p
SU3 News: Fix parsing of the XHTMl nodes
This commit is contained in:
@ -18,7 +18,6 @@ import net.i2p.util.Log;
|
||||
import org.cybergarage.util.Debug;
|
||||
import org.cybergarage.xml.Node;
|
||||
import org.cybergarage.xml.ParserException;
|
||||
import org.cybergarage.xml.parser.JaxpParser;
|
||||
|
||||
/**
|
||||
* Parse out the news.xml file which is in Atom format (RFC4287).
|
||||
@ -39,7 +38,9 @@ public class NewsXMLParser {
|
||||
"del", "ins", "em", "strong", "mark", "sub", "sup", "tt", "code", "strike", "s", "u",
|
||||
"h4", "h5", "h6",
|
||||
"ol", "ul", "li", "dl", "dt", "dd",
|
||||
"table", "tr", "td", "th"
|
||||
"table", "tr", "td", "th",
|
||||
// put in by parser
|
||||
XMLParser.TEXT_NAME
|
||||
}));
|
||||
|
||||
/**
|
||||
@ -94,7 +95,7 @@ public class NewsXMLParser {
|
||||
public void parse(InputStream in) throws IOException {
|
||||
_entries = null;
|
||||
_metadata = null;
|
||||
JaxpParser parser = new JaxpParser();
|
||||
XMLParser parser = new XMLParser(_context);
|
||||
try {
|
||||
Node root = parser.parse(in);
|
||||
extract(root);
|
||||
@ -255,7 +256,7 @@ public class NewsXMLParser {
|
||||
}
|
||||
if (e == null)
|
||||
break;
|
||||
buf.append(sn.toString());
|
||||
XMLParser.toString(buf, sn);
|
||||
}
|
||||
if (e == null)
|
||||
continue;
|
||||
|
174
apps/routerconsole/java/src/net/i2p/router/news/XMLParser.java
Normal file
174
apps/routerconsole/java/src/net/i2p/router/news/XMLParser.java
Normal file
@ -0,0 +1,174 @@
|
||||
package net.i2p.router.news;
|
||||
|
||||
/******************************************************************
|
||||
* Contains code modified from JaxpParser:
|
||||
*
|
||||
* CyberXML for Java
|
||||
*
|
||||
* Copyright (C) Satoshi Konno 2004
|
||||
*
|
||||
* Author: Markus Thurner (http://thoean.com)
|
||||
*
|
||||
* Contains code modified from Node:
|
||||
*
|
||||
* CyberXML for Java
|
||||
*
|
||||
* Copyright (C) Satoshi Konno 2002
|
||||
******************************************************************/
|
||||
|
||||
import org.w3c.dom.NamedNodeMap;
|
||||
|
||||
import net.i2p.I2PAppContext;
|
||||
import net.i2p.util.Log;
|
||||
import org.cybergarage.xml.Attribute;
|
||||
import org.cybergarage.xml.Node;
|
||||
import org.cybergarage.xml.XML;
|
||||
import org.cybergarage.xml.parser.JaxpParser;
|
||||
|
||||
|
||||
/**
|
||||
* Override so that XHTML is parsed correctly.
|
||||
*
|
||||
* This requires us to maintain mixed text and subnodes and output both.
|
||||
*
|
||||
* @since 0.9.17
|
||||
*/
|
||||
public class XMLParser extends JaxpParser {
|
||||
private final Log _log;
|
||||
|
||||
public static final String TEXT_NAME = "#text";
|
||||
|
||||
public XMLParser(I2PAppContext ctx) {
|
||||
super();
|
||||
_log = ctx.logManager().getLog(XMLParser.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* Modified from UPnP JaxpParser
|
||||
*
|
||||
* @param parentNode null if at top
|
||||
* @param rank parse level, only for debug
|
||||
* @return the parsed node, or the parent node, unused except at top level
|
||||
*/
|
||||
@Override
|
||||
public org.cybergarage.xml.Node parse(Node parentNode, org.w3c.dom.Node domNode, int rank) {
|
||||
int domNodeType = domNode.getNodeType();
|
||||
String domNodeName = domNode.getNodeName();
|
||||
String domNodeValue = domNode.getNodeValue();
|
||||
NamedNodeMap attrs = domNode.getAttributes();
|
||||
int arrrsLen = (attrs != null) ? attrs.getLength() : 0;
|
||||
|
||||
if (_log.shouldLog(Log.DEBUG)) {
|
||||
String val = domNodeValue != null ?
|
||||
" = \"" + domNodeValue.replace("\n", "\\n").replace("\r", "\\r") + '"' :
|
||||
"";
|
||||
_log.debug("[" + rank + "] ELEM : \"" + domNodeName + '"' + val +
|
||||
" type = " + domNodeType + " with " + arrrsLen + " attrs");
|
||||
}
|
||||
|
||||
// I2P -
|
||||
// If it's only whitespace, skip it altogether.
|
||||
// Only add it to the value if we don't have any other nodes.
|
||||
// Otherwise, add it as a node.
|
||||
if (domNodeType == org.w3c.dom.Node.TEXT_NODE) {
|
||||
if (domNodeValue.replaceAll("[ \t\r\n]", "").length() == 0) {
|
||||
return parentNode;
|
||||
}
|
||||
if (!parentNode.hasNodes()) {
|
||||
parentNode.addValue(domNodeValue);
|
||||
return parentNode;
|
||||
}
|
||||
// else we will add it as a node below
|
||||
} else if (domNodeType != org.w3c.dom.Node.ELEMENT_NODE) {
|
||||
return parentNode;
|
||||
}
|
||||
|
||||
Node node = new Node();
|
||||
node.setName(domNodeName);
|
||||
node.setValue(domNodeValue);
|
||||
|
||||
if (parentNode != null) {
|
||||
// I2P - take the value and convert it to a text node, if it's not just whitespace
|
||||
String oldValue = parentNode.getValue();
|
||||
if (oldValue != null && oldValue.length() > 0) {
|
||||
parentNode.setValue("");
|
||||
Node text = new Node();
|
||||
text.setName(TEXT_NAME);
|
||||
text.setValue(oldValue);
|
||||
parentNode.addNode(text);
|
||||
if (_log.shouldLog(Log.DEBUG))
|
||||
_log.debug("Converted value to node");
|
||||
}
|
||||
parentNode.addNode(node);
|
||||
}
|
||||
if (domNodeType == org.w3c.dom.Node.TEXT_NODE)
|
||||
return parentNode;
|
||||
|
||||
if (attrs != null) {
|
||||
for (int n = 0; n < arrrsLen; n++) {
|
||||
org.w3c.dom.Node attr = attrs.item(n);
|
||||
String attrName = attr.getNodeName();
|
||||
String attrValue = attr.getNodeValue();
|
||||
node.setAttribute(attrName, attrValue);
|
||||
}
|
||||
}
|
||||
|
||||
org.w3c.dom.Node child = domNode.getFirstChild();
|
||||
if (child == null) {
|
||||
node.setValue("");
|
||||
return node;
|
||||
}
|
||||
do{
|
||||
parse(node, child, rank+1);
|
||||
child = child.getNextSibling();
|
||||
} while (child != null);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
/**
|
||||
* A replacement for Node.toString(), which does not recognize #text.
|
||||
*/
|
||||
public static void toString(StringBuilder buf, Node node) {
|
||||
output(buf, node, 0);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A replacement for Node.output(), which does not recognize #text.
|
||||
* Also, we use the empty entity, so <br /> does not turn into <br></br>.
|
||||
*/
|
||||
private static void output(StringBuilder buf, Node node, int indentLevel) {
|
||||
String name = node.getName();
|
||||
String value = XML.escapeXMLChars(node.getValue());
|
||||
if (name.equals(TEXT_NAME)) {
|
||||
buf.append(value);
|
||||
return;
|
||||
}
|
||||
|
||||
String indentString = node.getIndentLevelString(indentLevel);
|
||||
buf.append(indentString).append('<').append(name);
|
||||
int nAttributes = node.getNAttributes();
|
||||
for (int n = 0; n < nAttributes; n++) {
|
||||
Attribute attr = node.getAttribute(n);
|
||||
buf.append(' ').append(attr.getName()).append("=\"").append(XML.escapeXMLChars(attr.getValue())).append('"');
|
||||
}
|
||||
|
||||
// As in Node, output either the nodes or the value.
|
||||
// If mixed values and nodes, the values must be text nodes. See parser above.
|
||||
if (node.hasNodes()) {
|
||||
buf.append(">\n");
|
||||
int nChildNodes = node.getNNodes();
|
||||
for (int n = 0; n < nChildNodes; n++) {
|
||||
Node cnode = node.getNode(n);
|
||||
output(buf, cnode, indentLevel + 1);
|
||||
}
|
||||
buf.append(indentString).append("</").append(name).append(">\n");
|
||||
} else {
|
||||
if (value == null || value.length() == 0)
|
||||
buf.append("/>");
|
||||
else
|
||||
buf.append('>').append(value).append("</").append(name).append('>');
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user