Pull request #438: REPORT-31492 word导出中Html无法解析小于号

Merge in CORE/base-third from ~HUGH.C/base-third:bugfix/10.0 to bugfix/10.0 * commit '2b0774489afbf24f55859cfb95b40dcea1511909': REPORT-31492 word导出中Html无法解析小于号
5 years ago · c0eff74bec
2 changed files with 30 additions and 8 deletions
--- a/fine-itext-old/src/com/fr/third/com/lowagie/text/html/simpleparser/HTMLWorker.java
+++ b/fine-itext-old/src/com/fr/third/com/lowagie/text/html/simpleparser/HTMLWorker.java
@ -775,11 +775,14 @@ public class HTMLWorker implements SimpleXMLDocHandler, DocListener {
 			+ " h1 h2 h3 h4 h5 h6 img hr";

 	public static final HashMap tagsSupported = new HashMap();
+	public static final HashMap tagsPrefixSupported = new HashMap();

 	static {
 		StringTokenizer tok = new StringTokenizer(tagsSupportedString);
-		while (tok.hasMoreTokens())
-			tagsSupported.put(tok.nextToken(), null);
+		while (tok.hasMoreTokens()) {
+			String s = tok.nextToken();
+			tagsSupported.put(s, null);
+			tagsPrefixSupported.put(s.charAt(0), null);
+		}
 	}
-
 }
--- a/fine-itext-old/src/com/fr/third/com/lowagie/text/xml/simpleparser/SimpleXMLParser.java
+++ b/fine-itext-old/src/com/fr/third/com/lowagie/text/xml/simpleparser/SimpleXMLParser.java
@ -75,6 +75,7 @@
 */
 package com.fr.third.com.lowagie.text.xml.simpleparser;

+import com.fr.third.com.lowagie.text.html.simpleparser.HTMLWorker;
 import java.io.BufferedReader;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
@ -230,16 +231,13 @@ public final class SimpleXMLParser {
            // we are in an unknown state before there's actual content
 			case UNKNOWN:
                if(character == '<') {
-                    saveState(TEXT);
-                    state = TAG_ENCOUNTERED;
+                    beginnOfTag((char) reader.read(), UNKNOWN);
                }
                break;
            // we can encounter any content
 			case TEXT:
                if(character == '<') {
-                    flush();
-                    saveState(state);
-                    state = TAG_ENCOUNTERED;
+                    beginnOfTag((char) reader.read(), UNKNOWN);
                } else if(character == '&') {
                    saveState(state);
                    entity.setLength(0);
@ -499,6 +497,27 @@ public final class SimpleXMLParser {
    private void saveState(int s) {
    	stack.push(new Integer(s));
    }
+
+    /**
+     * 处理标签的开头，若不在支持标签范围内，将<符号作为文本处理，例：<1111 (仿造浏览器的处理方式)
+     */
+    public void beginnOfTag(char c, int type) {
+        previousCharacter = c;
+        if (c == -1) {
+            return;
+        }
+        if (c == '/' || HTMLWorker.tagsPrefixSupported.containsKey(c)) {
+            if (type == TEXT) {
+                flush();
+            }
+            saveState(TEXT);
+            state = TAG_ENCOUNTERED;
+            return;
+        }
+        text.append((char) character);
+        nowhite = true;
+    }
+
    /**
     * Flushes the text that is currently in the buffer.
     * The text can be ignored, added to the document