Pull request #438: REPORT-31492 word导出中Html无法解析小于号

Merge in CORE/base-third from ~HUGH.C/base-third:bugfix/10.0 to bugfix/10.0 * commit '2b0774489afbf24f55859cfb95b40dcea1511909': REPORT-31492 word导出中Html无法解析小于号
5 years ago · c0eff74bec
2 changed files with 30 additions and 8 deletions
--- a/fine-itext-old/src/com/fr/third/com/lowagie/text/html/simpleparser/HTMLWorker.java
+++ b/fine-itext-old/src/com/fr/third/com/lowagie/text/html/simpleparser/HTMLWorker.java
@ -775,11 +775,14 @@ public class HTMLWorker implements SimpleXMLDocHandler, DocListener {
 			+ " h1 h2 h3 h4 h5 h6 img hr";
 	public static final HashMap tagsSupported = new HashMap();
 	public static final HashMap tagsPrefixSupported = new HashMap();
 	static {
 		StringTokenizer tok = new StringTokenizer(tagsSupportedString);
-		while (tok.hasMoreTokens())
+		while (tok.hasMoreTokens()) {
-			tagsSupported.put(tok.nextToken(), null);
+			String s = tok.nextToken();
 			tagsSupported.put(s, null);
 			tagsPrefixSupported.put(s.charAt(0), null);
 		}
 	}
 }
--- a/fine-itext-old/src/com/fr/third/com/lowagie/text/xml/simpleparser/SimpleXMLParser.java
+++ b/fine-itext-old/src/com/fr/third/com/lowagie/text/xml/simpleparser/SimpleXMLParser.java
@ -75,6 +75,7 @@
 */
 package com.fr.third.com.lowagie.text.xml.simpleparser;
 import com.fr.third.com.lowagie.text.html.simpleparser.HTMLWorker;
 import java.io.BufferedReader;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
@ -230,16 +231,13 @@ public final class SimpleXMLParser {
            // we are in an unknown state before there's actual content
 			case UNKNOWN:
                if(character == '<') {
-                    saveState(TEXT);
+                    beginnOfTag((char) reader.read(), UNKNOWN);
                    state = TAG_ENCOUNTERED;
                }
                break;
            // we can encounter any content
 			case TEXT:
                if(character == '<') {
-                    flush();
+                    beginnOfTag((char) reader.read(), UNKNOWN);
                    saveState(state);
                    state = TAG_ENCOUNTERED;
                } else if(character == '&') {
                    saveState(state);
                    entity.setLength(0);
@ -499,6 +497,27 @@ public final class SimpleXMLParser {
    private void saveState(int s) {
    	stack.push(new Integer(s));
    }
    /**
     * 处理标签的开头，若不在支持标签范围内，将<符号作为文本处理，例：<1111 (仿造浏览器的处理方式)
     */
    public void beginnOfTag(char c, int type) {
        previousCharacter = c;
        if (c == -1) {
            return;
        }
        if (c == '/' || HTMLWorker.tagsPrefixSupported.containsKey(c)) {
            if (type == TEXT) {
                flush();
            }
            saveState(TEXT);
            state = TAG_ENCOUNTERED;
            return;
        }
        text.append((char) character);
        nowhite = true;
    }
    /**
     * Flushes the text that is currently in the buffer.
     * The text can be ignored, added to the document