Browse Source

REPORT-25253 Html无法解析小于号

bugfix/10.0
Hugh.C 5 years ago
parent
commit
4f71896be8
  1. 9
      fine-itext/src/com/fr/third/v2/lowagie/text/html/simpleparser/HTMLWorker.java
  2. 30
      fine-itext/src/com/fr/third/v2/lowagie/text/xml/simpleparser/SimpleXMLParser.java

9
fine-itext/src/com/fr/third/v2/lowagie/text/html/simpleparser/HTMLWorker.java

@ -804,10 +804,15 @@ public class HTMLWorker implements SimpleXMLDocHandler, DocListener {
public static final HashMap tagsSupported = new HashMap(); public static final HashMap tagsSupported = new HashMap();
public static final HashMap tagsPrefixSupported = new HashMap();
static { static {
StringTokenizer tok = new StringTokenizer(tagsSupportedString); StringTokenizer tok = new StringTokenizer(tagsSupportedString);
while (tok.hasMoreTokens()) while (tok.hasMoreTokens()){
tagsSupported.put(tok.nextToken(), null); String s = tok.nextToken();
tagsSupported.put(s, null);
tagsPrefixSupported.put(s.charAt(0), null);
}
} }
} }

30
fine-itext/src/com/fr/third/v2/lowagie/text/xml/simpleparser/SimpleXMLParser.java

@ -75,6 +75,8 @@
*/ */
package com.fr.third.v2.lowagie.text.xml.simpleparser; package com.fr.third.v2.lowagie.text.xml.simpleparser;
import com.fr.third.v2.lowagie.text.html.simpleparser.HTMLWorker;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
@ -229,16 +231,13 @@ public final class SimpleXMLParser {
// we are in an unknown state before there's actual content // we are in an unknown state before there's actual content
case UNKNOWN: case UNKNOWN:
if(character == '<') { if(character == '<') {
saveState(TEXT); beginnOfTag((char) reader.read(), UNKNOWN);
state = TAG_ENCOUNTERED;
} }
break; break;
// we can encounter any content // we can encounter any content
case TEXT: case TEXT:
if(character == '<') { if(character == '<') {
flush(); beginnOfTag((char) reader.read(), TEXT);
saveState(state);
state = TAG_ENCOUNTERED;
} else if(character == '&') { } else if(character == '&') {
saveState(state); saveState(state);
entity.setLength(0); entity.setLength(0);
@ -481,6 +480,27 @@ public final class SimpleXMLParser {
} }
} }
/**
* 处理标签的开头若不在支持标签范围内<符号作为文本处理<1111 (仿造浏览器的处理方式)
*/
public void beginnOfTag(char c, int type) {
previousCharacter = c;
if (c == -1) {
return;
}
if (c == '/' || HTMLWorker.tagsPrefixSupported.containsKey(c)) {
if (type == TEXT) {
flush();
}
saveState(TEXT);
state = TAG_ENCOUNTERED;
return;
}
text.append((char) character);
nowhite = true;
}
/** /**
* Gets a state from the stack * Gets a state from the stack
* @return the previous state * @return the previous state

Loading…
Cancel
Save