|
|
@ -75,6 +75,7 @@ |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
package com.fr.third.com.lowagie.text.xml.simpleparser; |
|
|
|
package com.fr.third.com.lowagie.text.xml.simpleparser; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import com.fr.third.com.lowagie.text.html.simpleparser.HTMLWorker; |
|
|
|
import java.io.BufferedReader; |
|
|
|
import java.io.BufferedReader; |
|
|
|
import java.io.ByteArrayOutputStream; |
|
|
|
import java.io.ByteArrayOutputStream; |
|
|
|
import java.io.IOException; |
|
|
|
import java.io.IOException; |
|
|
@ -117,7 +118,7 @@ public final class SimpleXMLParser { |
|
|
|
private final static int ATTRIBUTE_KEY = 12; |
|
|
|
private final static int ATTRIBUTE_KEY = 12; |
|
|
|
private final static int ATTRIBUTE_EQUAL = 13; |
|
|
|
private final static int ATTRIBUTE_EQUAL = 13; |
|
|
|
private final static int ATTRIBUTE_VALUE = 14; |
|
|
|
private final static int ATTRIBUTE_VALUE = 14; |
|
|
|
|
|
|
|
|
|
|
|
/** the state stack */ |
|
|
|
/** the state stack */ |
|
|
|
Stack stack; |
|
|
|
Stack stack; |
|
|
|
/** The current character. */ |
|
|
|
/** The current character. */ |
|
|
@ -161,7 +162,7 @@ public final class SimpleXMLParser { |
|
|
|
String attributekey = null; |
|
|
|
String attributekey = null; |
|
|
|
/** the attribute value. */ |
|
|
|
/** the attribute value. */ |
|
|
|
String attributevalue = null; |
|
|
|
String attributevalue = null; |
|
|
|
|
|
|
|
|
|
|
|
/** |
|
|
|
/** |
|
|
|
* Creates a Simple XML parser object. |
|
|
|
* Creates a Simple XML parser object. |
|
|
|
* Call go(BufferedReader) immediately after creation. |
|
|
|
* Call go(BufferedReader) immediately after creation. |
|
|
@ -207,7 +208,7 @@ public final class SimpleXMLParser { |
|
|
|
} |
|
|
|
} |
|
|
|
return; |
|
|
|
return; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// dealing with \n and \r
|
|
|
|
// dealing with \n and \r
|
|
|
|
if (character == '\n' && eol) { |
|
|
|
if (character == '\n' && eol) { |
|
|
|
eol = false; |
|
|
|
eol = false; |
|
|
@ -225,21 +226,18 @@ public final class SimpleXMLParser { |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
columns++; |
|
|
|
columns++; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
switch(state) { |
|
|
|
switch(state) { |
|
|
|
// we are in an unknown state before there's actual content
|
|
|
|
// we are in an unknown state before there's actual content
|
|
|
|
case UNKNOWN: |
|
|
|
case UNKNOWN: |
|
|
|
if(character == '<') { |
|
|
|
if(character == '<') { |
|
|
|
saveState(TEXT); |
|
|
|
beginnOfTag((char) reader.read(), UNKNOWN); |
|
|
|
state = TAG_ENCOUNTERED; |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
break; |
|
|
|
break; |
|
|
|
// we can encounter any content
|
|
|
|
// we can encounter any content
|
|
|
|
case TEXT: |
|
|
|
case TEXT: |
|
|
|
if(character == '<') { |
|
|
|
if(character == '<') { |
|
|
|
flush(); |
|
|
|
beginnOfTag((char) reader.read(), TEXT); |
|
|
|
saveState(state); |
|
|
|
|
|
|
|
state = TAG_ENCOUNTERED; |
|
|
|
|
|
|
|
} else if(character == '&') { |
|
|
|
} else if(character == '&') { |
|
|
|
saveState(state); |
|
|
|
saveState(state); |
|
|
|
entity.setLength(0); |
|
|
|
entity.setLength(0); |
|
|
@ -499,6 +497,27 @@ public final class SimpleXMLParser { |
|
|
|
private void saveState(int s) { |
|
|
|
private void saveState(int s) { |
|
|
|
stack.push(new Integer(s)); |
|
|
|
stack.push(new Integer(s)); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** |
|
|
|
|
|
|
|
* 处理标签的开头,若不在支持标签范围内,将<符号作为文本处理,例:<1111 (仿造浏览器的处理方式) |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
public void beginnOfTag(char c, int type) { |
|
|
|
|
|
|
|
previousCharacter = c; |
|
|
|
|
|
|
|
if (c == -1) { |
|
|
|
|
|
|
|
return; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (c == '/' || HTMLWorker.tagsPrefixSupported.containsKey(c)) { |
|
|
|
|
|
|
|
if (type == TEXT) { |
|
|
|
|
|
|
|
flush(); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
saveState(TEXT); |
|
|
|
|
|
|
|
state = TAG_ENCOUNTERED; |
|
|
|
|
|
|
|
return; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
text.append((char) character); |
|
|
|
|
|
|
|
nowhite = true; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/** |
|
|
|
/** |
|
|
|
* Flushes the text that is currently in the buffer. |
|
|
|
* Flushes the text that is currently in the buffer. |
|
|
|
* The text can be ignored, added to the document |
|
|
|
* The text can be ignored, added to the document |
|
|
|