@ -75,6 +75,7 @@
* /
package com.fr.third.com.lowagie.text.xml.simpleparser ;
import com.fr.third.com.lowagie.text.html.simpleparser.HTMLWorker ;
import java.io.BufferedReader ;
import java.io.ByteArrayOutputStream ;
import java.io.IOException ;
@ -230,16 +231,13 @@ public final class SimpleXMLParser {
// we are in an unknown state before there's actual content
case UNKNOWN :
if ( character = = '<' ) {
saveState ( TEXT ) ;
state = TAG_ENCOUNTERED ;
beginnOfTag ( ( char ) reader . read ( ) , UNKNOWN ) ;
}
break ;
// we can encounter any content
case TEXT :
if ( character = = '<' ) {
flush ( ) ;
saveState ( state ) ;
state = TAG_ENCOUNTERED ;
beginnOfTag ( ( char ) reader . read ( ) , UNKNOWN ) ;
} else if ( character = = '&' ) {
saveState ( state ) ;
entity . setLength ( 0 ) ;
@ -499,6 +497,27 @@ public final class SimpleXMLParser {
private void saveState ( int s ) {
stack . push ( new Integer ( s ) ) ;
}
/ * *
* 处理标签的开头 , 若不在支持标签范围内 , 将 < 符号作为文本处理 , 例 : < 1111 ( 仿造浏览器的处理方式 )
* /
public void beginnOfTag ( char c , int type ) {
previousCharacter = c ;
if ( c = = - 1 ) {
return ;
}
if ( c = = '/' | | HTMLWorker . tagsPrefixSupported . containsKey ( c ) ) {
if ( type = = TEXT ) {
flush ( ) ;
}
saveState ( TEXT ) ;
state = TAG_ENCOUNTERED ;
return ;
}
text . append ( ( char ) character ) ;
nowhite = true ;
}
/ * *
* Flushes the text that is currently in the buffer .
* The text can be ignored , added to the document