|
|
@ -75,6 +75,7 @@ |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
package com.fr.third.com.lowagie.text.xml.simpleparser; |
|
|
|
package com.fr.third.com.lowagie.text.xml.simpleparser; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import com.fr.third.com.lowagie.text.html.simpleparser.HTMLWorker; |
|
|
|
import java.io.BufferedReader; |
|
|
|
import java.io.BufferedReader; |
|
|
|
import java.io.ByteArrayOutputStream; |
|
|
|
import java.io.ByteArrayOutputStream; |
|
|
|
import java.io.IOException; |
|
|
|
import java.io.IOException; |
|
|
@ -230,16 +231,13 @@ public final class SimpleXMLParser { |
|
|
|
// we are in an unknown state before there's actual content
|
|
|
|
// we are in an unknown state before there's actual content
|
|
|
|
case UNKNOWN: |
|
|
|
case UNKNOWN: |
|
|
|
if(character == '<') { |
|
|
|
if(character == '<') { |
|
|
|
saveState(TEXT); |
|
|
|
beginnOfTag((char) reader.read(), UNKNOWN); |
|
|
|
state = TAG_ENCOUNTERED; |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
break; |
|
|
|
break; |
|
|
|
// we can encounter any content
|
|
|
|
// we can encounter any content
|
|
|
|
case TEXT: |
|
|
|
case TEXT: |
|
|
|
if(character == '<') { |
|
|
|
if(character == '<') { |
|
|
|
flush(); |
|
|
|
beginnOfTag((char) reader.read(), TEXT); |
|
|
|
saveState(state); |
|
|
|
|
|
|
|
state = TAG_ENCOUNTERED; |
|
|
|
|
|
|
|
} else if(character == '&') { |
|
|
|
} else if(character == '&') { |
|
|
|
saveState(state); |
|
|
|
saveState(state); |
|
|
|
entity.setLength(0); |
|
|
|
entity.setLength(0); |
|
|
@ -499,6 +497,27 @@ public final class SimpleXMLParser { |
|
|
|
private void saveState(int s) { |
|
|
|
private void saveState(int s) { |
|
|
|
stack.push(new Integer(s)); |
|
|
|
stack.push(new Integer(s)); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** |
|
|
|
|
|
|
|
* 处理标签的开头,若不在支持标签范围内,将<符号作为文本处理,例:<1111 (仿造浏览器的处理方式) |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
public void beginnOfTag(char c, int type) { |
|
|
|
|
|
|
|
previousCharacter = c; |
|
|
|
|
|
|
|
if (c == -1) { |
|
|
|
|
|
|
|
return; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (c == '/' || HTMLWorker.tagsPrefixSupported.containsKey(c)) { |
|
|
|
|
|
|
|
if (type == TEXT) { |
|
|
|
|
|
|
|
flush(); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
saveState(TEXT); |
|
|
|
|
|
|
|
state = TAG_ENCOUNTERED; |
|
|
|
|
|
|
|
return; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
text.append((char) character); |
|
|
|
|
|
|
|
nowhite = true; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/** |
|
|
|
/** |
|
|
|
* Flushes the text that is currently in the buffer. |
|
|
|
* Flushes the text that is currently in the buffer. |
|
|
|
* The text can be ignored, added to the document |
|
|
|
* The text can be ignored, added to the document |
|
|
|