Browse Source

REPORT-31492 word导出中Html无法解析小于号

bugfix/10.0
Hugh.C 5 years ago
parent
commit
2b0774489a
  1. 9
      fine-itext-old/src/com/fr/third/com/lowagie/text/html/simpleparser/HTMLWorker.java
  2. 29
      fine-itext-old/src/com/fr/third/com/lowagie/text/xml/simpleparser/SimpleXMLParser.java

9
fine-itext-old/src/com/fr/third/com/lowagie/text/html/simpleparser/HTMLWorker.java

@ -775,11 +775,14 @@ public class HTMLWorker implements SimpleXMLDocHandler, DocListener {
+ " h1 h2 h3 h4 h5 h6 img hr";
public static final HashMap tagsSupported = new HashMap();
public static final HashMap tagsPrefixSupported = new HashMap();
static {
StringTokenizer tok = new StringTokenizer(tagsSupportedString);
while (tok.hasMoreTokens())
tagsSupported.put(tok.nextToken(), null);
while (tok.hasMoreTokens()) {
String s = tok.nextToken();
tagsSupported.put(s, null);
tagsPrefixSupported.put(s.charAt(0), null);
}
}
}

29
fine-itext-old/src/com/fr/third/com/lowagie/text/xml/simpleparser/SimpleXMLParser.java

@ -75,6 +75,7 @@
*/
package com.fr.third.com.lowagie.text.xml.simpleparser;
import com.fr.third.com.lowagie.text.html.simpleparser.HTMLWorker;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
@ -230,16 +231,13 @@ public final class SimpleXMLParser {
// we are in an unknown state before there's actual content
case UNKNOWN:
if(character == '<') {
saveState(TEXT);
state = TAG_ENCOUNTERED;
beginnOfTag((char) reader.read(), UNKNOWN);
}
break;
// we can encounter any content
case TEXT:
if(character == '<') {
flush();
saveState(state);
state = TAG_ENCOUNTERED;
beginnOfTag((char) reader.read(), UNKNOWN);
} else if(character == '&') {
saveState(state);
entity.setLength(0);
@ -499,6 +497,27 @@ public final class SimpleXMLParser {
private void saveState(int s) {
stack.push(new Integer(s));
}
/**
* 处理标签的开头若不在支持标签范围内<符号作为文本处理<1111 (仿造浏览器的处理方式)
*/
public void beginnOfTag(char c, int type) {
previousCharacter = c;
if (c == -1) {
return;
}
if (c == '/' || HTMLWorker.tagsPrefixSupported.containsKey(c)) {
if (type == TEXT) {
flush();
}
saveState(TEXT);
state = TAG_ENCOUNTERED;
return;
}
text.append((char) character);
nowhite = true;
}
/**
* Flushes the text that is currently in the buffer.
* The text can be ignored, added to the document

Loading…
Cancel
Save