diff --git a/fine-itext-old/src/com/fr/third/com/lowagie/text/html/simpleparser/HTMLWorker.java b/fine-itext-old/src/com/fr/third/com/lowagie/text/html/simpleparser/HTMLWorker.java
deleted file mode 100644
index 11e918722..000000000
--- a/fine-itext-old/src/com/fr/third/com/lowagie/text/html/simpleparser/HTMLWorker.java
+++ /dev/null
@@ -1,788 +0,0 @@
-/*
- * Copyright 2004 Paulo Soares
- *
- * The contents of this file are subject to the Mozilla Public License Version 1.1
- * (the "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the License.
- *
- * The Original Code is 'iText, a free JAVA-PDF library'.
- *
- * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
- * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
- * All Rights Reserved.
- * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
- * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
- *
- * Contributor(s): all the names of the contributors are added in the source code
- * where applicable.
- *
- * Alternatively, the contents of this file may be used under the terms of the
- * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
- * provisions of LGPL are applicable instead of those above. If you wish to
- * allow use of your version of this file only under the terms of the LGPL
- * License and not to allow others to use your version of this file under
- * the MPL, indicate your decision by deleting the provisions above and
- * replace them with the notice and other provisions required by the LGPL.
- * If you do not delete the provisions above, a recipient may use your version
- * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
- *
- * This library is free software; you can redistribute it and/or modify it
- * under the terms of the MPL as stated above or under the terms of the GNU
- * Library General Public License as published by the Free Software Foundation;
- * either version 2 of the License, or any later version.
- *
- * This library is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
- * details.
- *
- * Contributions by:
- * Lubos Strapko
- *
- * If you didn't download this code from the following link, you should check if
- * you aren't using an obsolete version:
- * http://www.lowagie.com/iText/
- */
-
-package com.fr.third.com.lowagie.text.html.simpleparser;
-
-import com.fr.third.com.lowagie.text.Chunk;
-import com.fr.third.com.lowagie.text.DocListener;
-import com.fr.third.com.lowagie.text.DocumentException;
-import com.fr.third.com.lowagie.text.Element;
-import com.fr.third.com.lowagie.text.ElementTags;
-import com.fr.third.com.lowagie.text.ExceptionConverter;
-import com.fr.third.com.lowagie.text.FontFactoryImp;
-import com.fr.third.com.lowagie.text.HeaderFooter;
-import com.fr.third.com.lowagie.text.Image;
-import com.fr.third.com.lowagie.text.List;
-import com.fr.third.com.lowagie.text.ListItem;
-import com.fr.third.com.lowagie.text.Paragraph;
-import com.fr.third.com.lowagie.text.Phrase;
-import com.fr.third.com.lowagie.text.Rectangle;
-import com.fr.third.com.lowagie.text.TextElementArray;
-import com.fr.third.com.lowagie.text.html.CSSUtils;
-import com.fr.third.com.lowagie.text.html.HtmlTags;
-import com.fr.third.com.lowagie.text.html.Markup;
-import com.fr.third.com.lowagie.text.pdf.PdfPTable;
-import com.fr.third.com.lowagie.text.pdf.draw.LineSeparator;
-import com.fr.third.com.lowagie.text.xml.simpleparser.SimpleXMLDocHandler;
-import com.fr.third.com.lowagie.text.xml.simpleparser.SimpleXMLParser;
-import com.fr.third.sun.misc.BASE64Decoder;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.Reader;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Stack;
-import java.util.StringTokenizer;
-
-public class HTMLWorker implements SimpleXMLDocHandler, DocListener {
-
- protected ArrayList objectList;
-
- protected DocListener document;
-
- private Paragraph currentParagraph;
-
- private ChainedProperties cprops = new ChainedProperties();
-
- private Stack stack = new Stack();
-
- private boolean pendingTR = false;
-
- private boolean pendingTD = false;
-
- private boolean pendingLI = false;
-
- private StyleSheet style = new StyleSheet();
-
- private boolean isPRE = false;
-
- private Stack tableState = new Stack();
-
- private boolean skipText = false;
-
- private HashMap interfaceProps;
-
- private FactoryProperties factoryProperties = new FactoryProperties();
-
- /** Creates a new instance of HTMLWorker
- * @param document A class that implements DocListener
- * */
- public HTMLWorker(DocListener document) {
- this.document = document;
- }
-
- public void setStyleSheet(StyleSheet style) {
- this.style = style;
- }
-
- public StyleSheet getStyleSheet() {
- return style;
- }
-
- public void setInterfaceProps(HashMap interfaceProps) {
- this.interfaceProps = interfaceProps;
- FontFactoryImp ff = null;
- if (interfaceProps != null)
- ff = (FontFactoryImp) interfaceProps.get("font_factory");
- if (ff != null)
- factoryProperties.setFontImp(ff);
- }
-
- public HashMap getInterfaceProps() {
- return interfaceProps;
- }
-
- public void parse(Reader reader) throws IOException {
- SimpleXMLParser.parse(this, null, reader, true);
- }
-
- public static ArrayList parseToList(Reader reader, StyleSheet style)
- throws IOException {
- return parseToList(reader, style, null);
- }
-
- public static ArrayList parseToList(Reader reader, StyleSheet style,
- HashMap interfaceProps) throws IOException {
- HTMLWorker worker = new HTMLWorker(null);
- if (style != null)
- worker.style = style;
- worker.document = worker;
- worker.setInterfaceProps(interfaceProps);
- worker.objectList = new ArrayList();
- worker.parse(reader);
- return worker.objectList;
- }
-
- public void endDocument() {
- try {
- for (int k = 0; k < stack.size(); ++k)
- document.add((Element) stack.elementAt(k));
- if (currentParagraph != null)
- document.add(currentParagraph);
- currentParagraph = null;
- } catch (Exception e) {
- throw new ExceptionConverter(e);
- }
- }
-
- public void startDocument() {
- HashMap h = new HashMap();
- style.applyStyle("body", h);
- cprops.addToChain("body", h);
- }
-
-
- public void startElement(String tag, HashMap h) {
- if (!tagsSupported.containsKey(tag))
- return;
- try {
- style.applyStyle(tag, h);
- if(tag.equals("p")){
- h.put(Markup.CSS_KEY_MARGINTOP, "16px");
- h.put(Markup.CSS_KEY_MARGINBOTTOM, "16px");
- }
- String follow = (String) FactoryProperties.followTags.get(tag);
- if (follow != null) {
- HashMap prop = new HashMap();
- prop.put(follow, null);
- FactoryProperties.insertStyle(h, this.cprops);
- prop.putAll(h);
-
- cprops.addToChain(follow, prop);
- return;
- }
- FactoryProperties.insertStyle(h, cprops);
- if (tag.equals(HtmlTags.ANCHOR)) {
- cprops.addToChain(tag, h);
- if (currentParagraph == null) {
- currentParagraph = new Paragraph();
- }
- stack.push(currentParagraph);
- currentParagraph = new Paragraph();
- return;
- }
- if (tag.equals(HtmlTags.NEWLINE)) {
- if (currentParagraph == null) {
- currentParagraph = new Paragraph();
- }
- currentParagraph.add(factoryProperties
- .createChunk("\n", cprops));
- return;
- }
- if (tag.equals(HtmlTags.HORIZONTALRULE)) {
- // Attempting to duplicate the behavior seen on Firefox with
- // http://www.w3schools.com/tags/tryit.asp?filename=tryhtml_hr_test
- // where an initial break is only inserted when the preceding element doesn't
- // end with a break, but a trailing break is always inserted.
- boolean addLeadingBreak = true;
- if (currentParagraph == null) {
- currentParagraph = new Paragraph();
- addLeadingBreak = false;
- }
- if (addLeadingBreak) { // Not a new paragraph
- int numChunks = currentParagraph.getChunks().size();
- if (numChunks == 0 ||
- ((Chunk)(currentParagraph.getChunks().get(numChunks - 1))).getContent().endsWith("\n"))
- addLeadingBreak = false;
- }
- String align = (String) h.get("align");
- int hrAlign = Element.ALIGN_CENTER;
- if (align != null) {
- if (align.equalsIgnoreCase("left"))
- hrAlign = Element.ALIGN_LEFT;
- if (align.equalsIgnoreCase("right"))
- hrAlign = Element.ALIGN_RIGHT;
- }
- String width = (String) h.get("width");
- float hrWidth = 1;
- if (width != null) {
- float tmpWidth = Markup.parseLength(width, Markup.DEFAULT_FONT_SIZE);
- if (tmpWidth > 0) hrWidth = tmpWidth;
- if (!width.endsWith("%"))
- hrWidth = 100; // Treat a pixel width as 100% for now.
- }
- String size = (String) h.get("size");
- float hrSize = 1;
- if (size != null) {
- float tmpSize = Markup.parseLength(size, Markup.DEFAULT_FONT_SIZE);
- if (tmpSize > 0)
- hrSize = tmpSize;
- }
- if (addLeadingBreak)
- currentParagraph.add(Chunk.NEWLINE);
- currentParagraph.add(new LineSeparator(hrSize, hrWidth, null, hrAlign, currentParagraph.getLeading()/2));
- currentParagraph.add(Chunk.NEWLINE);
- return;
- }
- if (tag.equals(HtmlTags.CHUNK) || tag.equals(HtmlTags.SPAN)) {
- cprops.addToChain(tag, h);
- return;
- }
- if (tag.equals(HtmlTags.IMAGE)) {
- String src = (String) h.get(ElementTags.SRC);
- if (src == null)
- return;
- cprops.addToChain(tag, h);
- Image img = null;
- if (interfaceProps != null) {
- ImageProvider ip = (ImageProvider) interfaceProps
- .get("img_provider");
- if (ip != null)
- img = ip.getImage(src, h, cprops, document);
- if (img == null) {
- HashMap images = (HashMap) interfaceProps
- .get("img_static");
- if (images != null) {
- Image tim = (Image) images.get(src);
- if (tim != null)
- img = Image.getInstance(tim);
- } else {
- if (!src.startsWith("http")) { // relative src references only
- String baseurl = (String) interfaceProps
- .get("img_baseurl");
- if (baseurl != null) {
- src = baseurl + src;
- img = Image.getInstance(src);
- }
- }
- }
- }
- }
- //处理base64编码图片
- if(src.startsWith("data")){
- BASE64Decoder decoder = new BASE64Decoder();
- String[] srcArray = src.split(",");
- String base64string = srcArray[srcArray.length -1];
- byte[] bytes = decoder.decodeBuffer(base64string);
- try {
- img = Image.getInstance(bytes);
- }catch (Exception e){
-
- }
-
- }
- if (img == null) {
- if (!src.startsWith("http")) {
- String path = cprops.getProperty("image_path");
- if (path == null)
- path = "";
- src = new File(path, src).getPath();
- }
- img = Image.getInstance(src);
- }
- if(img == null){
- return;
- }
- img.setSrcString(src);
- String align = (String) h.get("align");
- String width = (String) h.get("width");
- String height = (String) h.get("height");
- String before = cprops.getProperty("before");
- String after = cprops.getProperty("after");
- if (before != null)
- img.setSpacingBefore(Float.parseFloat(before));
- if (after != null)
- img.setSpacingAfter(Float.parseFloat(after));
- float actualFontSize = Markup.parseLength(cprops
- .getProperty(ElementTags.SIZE),
- Markup.DEFAULT_FONT_SIZE);
- if (actualFontSize <= 0f)
- actualFontSize = Markup.DEFAULT_FONT_SIZE;
- float widthInPoints = Markup.parseLength(width, actualFontSize);
- float heightInPoints = Markup.parseLength(height,
- actualFontSize);
- if (widthInPoints > 0 && heightInPoints > 0) {
- img.scaleAbsolute(widthInPoints, heightInPoints);
- } else if (widthInPoints > 0) {
- heightInPoints = img.getHeight() * widthInPoints
- / img.getWidth();
- img.scaleAbsolute(widthInPoints, heightInPoints);
- } else if (heightInPoints > 0) {
- widthInPoints = img.getWidth() * heightInPoints
- / img.getHeight();
- img.scaleAbsolute(widthInPoints, heightInPoints);
- }
- img.setWidthPercentage(0);
- if (align != null) {
- endElement("p");
- int ralign = Image.MIDDLE;
- if (align.equalsIgnoreCase("left"))
- ralign = Image.LEFT;
- else if (align.equalsIgnoreCase("right"))
- ralign = Image.RIGHT;
- img.setAlignment(ralign);
- Img i = null;
- boolean skip = false;
- if (interfaceProps != null) {
- i = (Img) interfaceProps.get("img_interface");
- if (i != null)
- skip = i.process(img, h, cprops, document);
- }
- if (!skip)
- document.add(img);
- cprops.removeChain(tag);
- } else {
- Chunk ck = new Chunk(img, 0, 0);
- if(cprops.hasPropertyInChain("img", "padding-left")){
- String ss = cprops.getPropertyFromChain("img", "padding-left");
- ck.setAttribute("padding-left", Float.toString(Markup.parseLength(ss)));
- }
- if(cprops.hasPropertyInChain("img", "padding-right")){
- String ss = cprops.getPropertyFromChain("img", "padding-right");
- ck.setAttribute("padding-right", Float.toString(Markup.parseLength(ss)));
- }
- cprops.removeChain(tag);
- if (currentParagraph == null) {
- currentParagraph = FactoryProperties
- .createParagraph(cprops);
- }
-
- currentParagraph.add(ck);
- }
- return;
- }
- endElement("p");
- if (tag.equals("h1") || tag.equals("h2") || tag.equals("h3")
- || tag.equals("h4") || tag.equals("h5") || tag.equals("h6")) {
- if (!h.containsKey(ElementTags.SIZE)) {
- int v = 7 - Integer.parseInt(tag.substring(1));
- h.put(ElementTags.SIZE, Integer.toString(v));
- }
- cprops.addToChain(tag, h);
- return;
- }
- if (tag.equals(HtmlTags.UNORDEREDLIST)) {
- if (pendingLI)
- endElement(HtmlTags.LISTITEM);
- skipText = true;
- cprops.addToChain(tag, h);
- List list = new List(false);
- try{
- list.setIndentationLeft(new Float(cprops.getProperty("indent")).floatValue());
- }catch (Exception e) {
- list.setAutoindent(true);
- }
- list.setListSymbol("\u2022");
- stack.push(list);
- return;
- }
- if (tag.equals(HtmlTags.ORDEREDLIST)) {
- if (pendingLI)
- endElement(HtmlTags.LISTITEM);
- skipText = true;
- cprops.addToChain(tag, h);
- List list = new List(true);
- try{
- list.setIndentationLeft(new Float(cprops.getProperty("indent")).floatValue());
- }catch (Exception e) {
- list.setAutoindent(true);
- }
- stack.push(list);
- return;
- }
- if (tag.equals(HtmlTags.LISTITEM)) {
- if (pendingLI)
- endElement(HtmlTags.LISTITEM);
- skipText = false;
- pendingLI = true;
- cprops.addToChain(tag, h);
- ListItem item = FactoryProperties.createListItem(cprops);
- stack.push(item);
- return;
- }
- if (tag.equals(HtmlTags.DIV) || tag.equals(HtmlTags.BODY) || tag.equals("p")) {
- cprops.addToChain(tag, h);
- return;
- }
- if (tag.equals(HtmlTags.PRE)) {
- if (!h.containsKey(ElementTags.FACE)) {
- h.put(ElementTags.FACE, "Courier");
- }
- cprops.addToChain(tag, h);
- isPRE = true;
- return;
- }
- if (tag.equals("tr")) {
- if (pendingTR)
- endElement("tr");
- skipText = true;
- pendingTR = true;
- cprops.addToChain("tr", h);
- return;
- }
- if (tag.equals("td") || tag.equals("th")) {
- if (pendingTD)
- endElement(tag);
- skipText = false;
- pendingTD = true;
- cprops.addToChain("td", h);
- stack.push(new IncCell(tag, cprops));
- return;
- }
- if (tag.equals("table")) {
- cprops.addToChain("table", h);
- IncTable table = new IncTable(h);
- stack.push(table);
- tableState.push(new boolean[] { pendingTR, pendingTD });
- pendingTR = pendingTD = false;
- skipText = true;
- return;
- }
- } catch (Exception e) {
- throw new ExceptionConverter(e);
- }
- }
-
-
-
- public void endElement(String tag) {
- if (!tagsSupported.containsKey(tag))
- return;
- try {
- String follow = (String) FactoryProperties.followTags.get(tag);
- if (follow != null) {
- cprops.removeChain(follow);
- return;
- }
- if (tag.equals("font") || tag.equals("span")) {
- cprops.removeChain(tag);
- return;
- }
- if (tag.equals("a")) {
- if (currentParagraph == null) {
- currentParagraph = new Paragraph();
- }
- boolean skip = false;
- if (interfaceProps != null) {
- ALink i = (ALink) interfaceProps.get("alink_interface");
- if (i != null)
- skip = i.process(currentParagraph, cprops);
- }
- if (!skip) {
- String href = cprops.getProperty("href");
- if (href != null) {
- ArrayList chunks = currentParagraph.getChunks();
- int size = chunks.size();
- for (int k = 0; k < size; ++k) {
- Chunk ck = (Chunk) chunks.get(k);
- ck.setAnchor(href);
- }
- }
- }
- Paragraph tmp = (Paragraph) stack.pop();
- Phrase tmp2 = new Phrase();
- tmp2.add(currentParagraph);
- tmp.add(tmp2);
- currentParagraph = tmp;
- cprops.removeChain("a");
- return;
- }
- if (tag.equals("br")) {
- return;
- }
- if (currentParagraph != null) {
- if (stack.empty())
- document.add(currentParagraph);
- else {
- Object obj = stack.pop();
- if (obj instanceof TextElementArray) {
- TextElementArray current = (TextElementArray) obj;
- current.add(currentParagraph);
- }
- stack.push(obj);
- }
- }
- currentParagraph = null;
- if (tag.equals(HtmlTags.UNORDEREDLIST)
- || tag.equals(HtmlTags.ORDEREDLIST)) {
- if (pendingLI)
- endElement(HtmlTags.LISTITEM);
- skipText = false;
- cprops.removeChain(tag);
- if (stack.empty())
- return;
- Object obj = stack.pop();
- if (!(obj instanceof List)) {
- stack.push(obj);
- return;
- }
- if (stack.empty())
- document.add((Element) obj);
- else
- ((TextElementArray) stack.peek()).add(obj);
- return;
- }
- if (tag.equals(HtmlTags.LISTITEM)) {
- pendingLI = false;
- skipText = true;
- cprops.removeChain(tag);
- if (stack.empty())
- return;
- Object obj = stack.pop();
- if (!(obj instanceof ListItem)) {
- stack.push(obj);
- return;
- }
- if (stack.empty()) {
- document.add((Element) obj);
- return;
- }
- Object list = stack.pop();
- if (!(list instanceof List)) {
- stack.push(list);
- return;
- }
- ListItem item = (ListItem) obj;
- ((List) list).add(item);
- ArrayList cks = item.getChunks();
- if (!cks.isEmpty())
- item.getListSymbol()
- .setFont(((Chunk) cks.get(0)).getFont());
- stack.push(list);
- return;
- }
- if (tag.equals("div") || tag.equals("body")) {
- cprops.removeChain(tag);
- return;
- }
- if (tag.equals(HtmlTags.PRE)) {
- cprops.removeChain(tag);
- isPRE = false;
- return;
- }
- if (tag.equals("p")) {
- cprops.removeChain(tag);
- return;
- }
- if (tag.equals("h1") || tag.equals("h2") || tag.equals("h3")
- || tag.equals("h4") || tag.equals("h5") || tag.equals("h6")) {
- cprops.removeChain(tag);
- return;
- }
- if (tag.equals("table")) {
- if (pendingTR)
- endElement("tr");
- cprops.removeChain("table");
- IncTable table = (IncTable) stack.pop();
- PdfPTable tb = table.buildTable();
- tb.setSplitRows(true);
- if (stack.empty())
- document.add(tb);
- else
- ((TextElementArray) stack.peek()).add(tb);
- boolean state[] = (boolean[]) tableState.pop();
- pendingTR = state[0];
- pendingTD = state[1];
- skipText = false;
- return;
- }
- if (tag.equals("tr")) {
- if (pendingTD)
- endElement("td");
- pendingTR = false;
- String rowHeightPx = cprops.getLastChainProperty("height");
-
- cprops.removeChain("tr");
- ArrayList cells = new ArrayList();
- IncTable table = null;
- while (true) {
- Object obj = stack.pop();
- if (obj instanceof IncCell) {
- cells.add(((IncCell) obj).getCell());
- }
- if (obj instanceof IncTable) {
- table = (IncTable) obj;
- break;
- }
- }
- float rowHeight = 0.0f;
- if(rowHeightPx!=null){
- rowHeight = CSSUtils.parseFloat(rowHeightPx);
- }
- table.addCols(cells);
- table.endRow(rowHeight);
-
- stack.push(table);
- skipText = true;
- return;
- }
- if (tag.equals("td") || tag.equals("th")) {
- pendingTD = false;
- cprops.removeChain("td");
- skipText = true;
- return;
- }
- } catch (Exception e) {
- throw new ExceptionConverter(e);
- }
- }
-
- public void text(String str) {
- if (skipText)
- return;
- String content = str;
- if (isPRE) {
- if (currentParagraph == null) {
- currentParagraph = FactoryProperties.createParagraph(cprops);
- }
- Chunk chunk = factoryProperties.createChunk(content, cprops);
- currentParagraph.add(chunk);
- return;
- }
- if (content.trim().length() == 0 && content.indexOf(' ') < 0) {
- return;
- }
-
- StringBuffer buf = new StringBuffer();
- int len = content.length();
- char character;
- boolean newline = false;
- for (int i = 0; i < len; i++) {
- switch (character = content.charAt(i)) {
- case ' ':
- if (!newline) {
- buf.append(character);
- }
- break;
- case '\n':
- if (i > 0) {
- newline = true;
- buf.append(' ');
- }
- break;
- case '\r':
- break;
- case '\t':
- break;
- default:
- newline = false;
- buf.append(character);
- }
- }
- if (currentParagraph == null) {
- currentParagraph = FactoryProperties.createParagraph(cprops);
- }
- Chunk chunk = factoryProperties.createChunk(buf.toString(), cprops);
- currentParagraph.add(chunk);
- }
-
- public boolean add(Element element) throws DocumentException {
- objectList.add(element);
- return true;
- }
-
- public void clearTextWrap() throws DocumentException {
- }
-
- public void close() {
- }
-
- public boolean newPage() {
- return true;
- }
-
- public void open() {
- }
-
- public void resetFooter() {
- }
-
- public void resetHeader() {
- }
-
- public void resetPageCount() {
- }
-
- public void setFooter(HeaderFooter footer) {
- }
-
- public void setHeader(HeaderFooter header) {
- }
-
- public boolean setMarginMirroring(boolean marginMirroring) {
- return false;
- }
-
- /**
- * @see DocListener#setMarginMirroring(boolean)
- * @since 2.1.6
- */
- public boolean setMarginMirroringTopBottom(boolean marginMirroring) {
- return false;
- }
-
- public boolean setMargins(float marginLeft, float marginRight,
- float marginTop, float marginBottom) {
- return true;
- }
-
- public void setPageCount(int pageN) {
- }
-
- public boolean setPageSize(Rectangle pageSize) {
- return true;
- }
-
- public static final String tagsSupportedString = "ol ul li a pre font span br p div body table td th tr i b u sub sup em strong s strike"
- + " h1 h2 h3 h4 h5 h6 img hr";
-
- public static final HashMap tagsSupported = new HashMap();
- public static final HashMap tagsPrefixSupported = new HashMap();
-
- static {
- StringTokenizer tok = new StringTokenizer(tagsSupportedString);
- while (tok.hasMoreTokens()) {
- String s = tok.nextToken();
- tagsSupported.put(s, null);
- tagsPrefixSupported.put(s.charAt(0), null);
- }
- }
-}
diff --git a/fine-itext-old/src/com/fr/third/com/lowagie/text/xml/simpleparser/SimpleXMLParser.java b/fine-itext-old/src/com/fr/third/com/lowagie/text/xml/simpleparser/SimpleXMLParser.java
deleted file mode 100755
index d61de9b2e..000000000
--- a/fine-itext-old/src/com/fr/third/com/lowagie/text/xml/simpleparser/SimpleXMLParser.java
+++ /dev/null
@@ -1,780 +0,0 @@
-/*
- * Copyright 2003 Paulo Soares
- *
- * The contents of this file are subject to the Mozilla Public License Version 1.1
- * (the "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the License.
- *
- * The Original Code is 'iText, a free JAVA-PDF library'.
- *
- * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
- * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
- * All Rights Reserved.
- * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
- * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
- *
- * Contributor(s): all the names of the contributors are added in the source code
- * where applicable.
- *
- * Alternatively, the contents of this file may be used under the terms of the
- * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
- * provisions of LGPL are applicable instead of those above. If you wish to
- * allow use of your version of this file only under the terms of the LGPL
- * License and not to allow others to use your version of this file under
- * the MPL, indicate your decision by deleting the provisions above and
- * replace them with the notice and other provisions required by the LGPL.
- * If you do not delete the provisions above, a recipient may use your version
- * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
- *
- * This library is free software; you can redistribute it and/or modify it
- * under the terms of the MPL as stated above or under the terms of the GNU
- * Library General Public License as published by the Free Software Foundation;
- * either version 2 of the License, or any later version.
- *
- * This library is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
- * details.
- *
- * If you didn't download this code from the following link, you should check if
- * you aren't using an obsolete version:
- * http://www.lowagie.com/iText/
- *
- * The code to recognize the encoding in this class and in the convenience class IanaEncodings was taken from Apache Xerces published under the following license:
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * Part of this code is based on the Quick-and-Dirty XML parser by Steven Brandt.
- * The code for the Quick-and-Dirty parser was published in JavaWorld (java tip 128).
- * Steven Brandt and JavaWorld gave permission to use the code for free.
- * (Bruno Lowagie and Paulo Soares chose to use it under the MPL/LGPL in
- * conformance with the rest of the code).
- * The original code can be found on this url: http://www.javaworld.com/javatips/jw-javatip128_p.html.
- * It was substantially refactored by Bruno Lowagie.
- *
- * The method 'private static String getEncodingName(byte[] b4)' was found
- * in org.apache.xerces.impl.XMLEntityManager, originaly published by the
- * Apache Software Foundation under the Apache Software License; now being
- * used in iText under the MPL.
- */
-package com.fr.third.com.lowagie.text.xml.simpleparser;
-
-import com.fr.third.com.lowagie.text.html.simpleparser.HTMLWorker;
-import java.io.BufferedReader;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.Reader;
-import java.util.HashMap;
-import java.util.Stack;
-
-/**
- * A simple XML and HTML parser. This parser is, like the SAX parser,
- * an event based parser, but with much less functionality.
- *
- * The parser can: - *
- *
<[CDATA[ ... ]]>
construct
- * \r\n
and \r
to \n
on input, in accordance with the XML Specification, Section 2.11
- *
- */
-public final class SimpleXMLParser {
- /** possible states */
- private final static int UNKNOWN = 0;
- private final static int TEXT = 1;
- private final static int TAG_ENCOUNTERED = 2;
- private final static int EXAMIN_TAG = 3;
- private final static int TAG_EXAMINED = 4;
- private final static int IN_CLOSETAG = 5;
- private final static int SINGLE_TAG = 6;
- private final static int CDATA = 7;
- private final static int COMMENT = 8;
- private final static int PI = 9;
- private final static int ENTITY = 10;
- private final static int QUOTE = 11;
- private final static int ATTRIBUTE_KEY = 12;
- private final static int ATTRIBUTE_EQUAL = 13;
- private final static int ATTRIBUTE_VALUE = 14;
-
- /** the state stack */
- Stack stack;
- /** The current character. */
- int character = 0;
- /** The previous character. */
- int previousCharacter = -1;
- /** the line we are currently reading */
- int lines = 1;
- /** the column where the current character occurs */
- int columns = 0;
- /** was the last character equivalent to a newline? */
- boolean eol = false;
- /**
- * A boolean indicating if the next character should be taken into account
- * if it's a space character. When nospace is false, the previous character
- * wasn't whitespace.
- * @since 2.1.5
- */
- boolean nowhite = false;
- /** the current state */
- int state;
- /** Are we parsing HTML? */
- boolean html;
- /** current text (whatever is encountered between tags) */
- StringBuffer text = new StringBuffer();
- /** current entity (whatever is encountered between & and ;) */
- StringBuffer entity = new StringBuffer();
- /** current tagname */
- String tag = null;
- /** current attributes */
- HashMap attributes = null;
- /** The handler to which we are going to forward document content */
- SimpleXMLDocHandler doc;
- /** The handler to which we are going to forward comments. */
- SimpleXMLDocHandlerComment comment;
- /** Keeps track of the number of tags that are open. */
- int nested = 0;
- /** the quote character that was used to open the quote. */
- int quoteCharacter = '"';
- /** the attribute key. */
- String attributekey = null;
- /** the attribute value. */
- String attributevalue = null;
-
- /**
- * Creates a Simple XML parser object.
- * Call go(BufferedReader) immediately after creation.
- */
- private SimpleXMLParser(SimpleXMLDocHandler doc, SimpleXMLDocHandlerComment comment, boolean html) {
- this.doc = doc;
- this.comment = comment;
- this.html = html;
- stack = new Stack();
- state = html ? TEXT : UNKNOWN;
- }
-
- /**
- * Does the actual parsing. Perform this immediately
- * after creating the parser object.
- */
- private void go(Reader r) throws IOException {
- BufferedReader reader;
- if (r instanceof BufferedReader)
- reader = (BufferedReader)r;
- else
- reader = new BufferedReader(r);
- doc.startDocument();
- while(true) {
- // read a new character
- if (previousCharacter == -1) {
- character = reader.read();
- }
- // or re-examine the previous character
- else {
- character = previousCharacter;
- previousCharacter = -1;
- }
-
- // the end of the file was reached
- if (character == -1) {
- if (html) {
- if (html && state == TEXT)
- flush();
- doc.endDocument();
- } else {
- throwException("Missing end tag");
- }
- return;
- }
-
- // dealing with \n and \r
- if (character == '\n' && eol) {
- eol = false;
- continue;
- } else if (eol) {
- eol = false;
- } else if (character == '\n') {
- lines++;
- columns = 0;
- } else if (character == '\r') {
- eol = true;
- character = '\n';
- lines++;
- columns = 0;
- } else {
- columns++;
- }
-
- switch(state) {
- // we are in an unknown state before there's actual content
- case UNKNOWN:
- if(character == '<') {
- beginnOfTag((char) reader.read(), UNKNOWN);
- }
- break;
- // we can encounter any content
- case TEXT:
- if(character == '<') {
- beginnOfTag((char) reader.read(), TEXT);
- } else if(character == '&') {
- saveState(state);
- entity.setLength(0);
- state = ENTITY;
- } else if (Character.isWhitespace((char)character) && character != 12288) {
- if (nowhite)
- text.append((char)character);
- nowhite = false;
- } else {
- text.append((char)character);
- nowhite = true;
- }
- break;
- // we have just seen a < and are wondering what we are looking at
- // true
- * @return the escaped string
- */
- public static String escapeXML(String s, boolean onlyASCII) {
- char cc[] = s.toCharArray();
- int len = cc.length;
- StringBuffer sb = new StringBuffer();
- for (int k = 0; k < len; ++k) {
- int c = cc[k];
- switch (c) {
- case '<':
- sb.append("<");
- break;
- case '>':
- sb.append(">");
- break;
- case '&':
- sb.append("&");
- break;
- case '"':
- sb.append(""");
- break;
- case '\'':
- sb.append("'");
- break;
- default:
- if ((c == 0x9) || (c == 0xA) || (c == 0xD)
- || ((c >= 0x20) && (c <= 0xD7FF))
- || ((c >= 0xE000) && (c <= 0xFFFD))
- || ((c >= 0x10000) && (c <= 0x10FFFF))) {
- if (onlyASCII && c > 127)
- sb.append("").append(c).append(';');
- else
- sb.append((char)c);
- }
- }
- }
- return sb.toString();
- }
- /**
- * Returns the IANA encoding name that is auto-detected from
- * the bytes specified, with the endian-ness of that encoding where appropriate.
- * (method found in org.apache.xerces.impl.XMLEntityManager, originally published
- * by the Apache Software Foundation under the Apache Software License; now being
- * used in iText under the MPL)
- * @param b4 The first four bytes of the input.
- * @return an IANA-encoding string
- */
- private static String getEncodingName(byte[] b4) {
-
- // UTF-16, with BOM
- int b0 = b4[0] & 0xFF;
- int b1 = b4[1] & 0xFF;
- if (b0 == 0xFE && b1 == 0xFF) {
- // UTF-16, big-endian
- return "UTF-16BE";
- }
- if (b0 == 0xFF && b1 == 0xFE) {
- // UTF-16, little-endian
- return "UTF-16LE";
- }
-
- // UTF-8 with a BOM
- int b2 = b4[2] & 0xFF;
- if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
- return "UTF-8";
- }
-
- // other encodings
- int b3 = b4[3] & 0xFF;
- if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
- // UCS-4, big endian (1234)
- return "ISO-10646-UCS-4";
- }
- if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
- // UCS-4, little endian (4321)
- return "ISO-10646-UCS-4";
- }
- if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
- // UCS-4, unusual octet order (2143)
- // REVISIT: What should this be?
- return "ISO-10646-UCS-4";
- }
- if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
- // UCS-4, unusual octet order (3412)
- // REVISIT: What should this be?
- return "ISO-10646-UCS-4";
- }
- if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
- // UTF-16, big-endian, no BOM
- // (or could turn out to be UCS-2...
- // REVISIT: What should this be?
- return "UTF-16BE";
- }
- if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
- // UTF-16, little-endian, no BOM
- // (or could turn out to be UCS-2...
- return "UTF-16LE";
- }
- if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
- // EBCDIC
- // a la xerces1, return CP037 instead of EBCDIC here
- return "CP037";
- }
-
- // default encoding
- return "UTF-8";
- }
-}
\ No newline at end of file
diff --git a/fine-itext-old/src/main/java/com/fr/third/com/lowagie/text/html/simpleparser/HTMLWorker.java b/fine-itext-old/src/main/java/com/fr/third/com/lowagie/text/html/simpleparser/HTMLWorker.java
index b48594a17..11e918722 100644
--- a/fine-itext-old/src/main/java/com/fr/third/com/lowagie/text/html/simpleparser/HTMLWorker.java
+++ b/fine-itext-old/src/main/java/com/fr/third/com/lowagie/text/html/simpleparser/HTMLWorker.java
@@ -775,11 +775,14 @@ public class HTMLWorker implements SimpleXMLDocHandler, DocListener {
+ " h1 h2 h3 h4 h5 h6 img hr";
public static final HashMap tagsSupported = new HashMap();
+ public static final HashMap tagsPrefixSupported = new HashMap();
static {
StringTokenizer tok = new StringTokenizer(tagsSupportedString);
- while (tok.hasMoreTokens())
- tagsSupported.put(tok.nextToken(), null);
+ while (tok.hasMoreTokens()) {
+ String s = tok.nextToken();
+ tagsSupported.put(s, null);
+ tagsPrefixSupported.put(s.charAt(0), null);
+ }
}
-
}
diff --git a/fine-itext-old/src/main/java/com/fr/third/com/lowagie/text/xml/simpleparser/SimpleXMLParser.java b/fine-itext-old/src/main/java/com/fr/third/com/lowagie/text/xml/simpleparser/SimpleXMLParser.java
index 0ef41865b..e2873f902 100644
--- a/fine-itext-old/src/main/java/com/fr/third/com/lowagie/text/xml/simpleparser/SimpleXMLParser.java
+++ b/fine-itext-old/src/main/java/com/fr/third/com/lowagie/text/xml/simpleparser/SimpleXMLParser.java
@@ -75,6 +75,7 @@
*/
package com.fr.third.com.lowagie.text.xml.simpleparser;
+import com.fr.third.com.lowagie.text.html.simpleparser.HTMLWorker;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
@@ -117,7 +118,7 @@ public final class SimpleXMLParser {
private final static int ATTRIBUTE_KEY = 12;
private final static int ATTRIBUTE_EQUAL = 13;
private final static int ATTRIBUTE_VALUE = 14;
-
+
/** the state stack */
Stack stack;
/** The current character. */
@@ -161,7 +162,7 @@ public final class SimpleXMLParser {
String attributekey = null;
/** the attribute value. */
String attributevalue = null;
-
+
/**
* Creates a Simple XML parser object.
* Call go(BufferedReader) immediately after creation.
@@ -207,7 +208,7 @@ public final class SimpleXMLParser {
}
return;
}
-
+
// dealing with \n and \r
if (character == '\n' && eol) {
eol = false;
@@ -225,21 +226,18 @@ public final class SimpleXMLParser {
} else {
columns++;
}
-
+
switch(state) {
// we are in an unknown state before there's actual content
case UNKNOWN:
if(character == '<') {
- saveState(TEXT);
- state = TAG_ENCOUNTERED;
+ beginnOfTag((char) reader.read(), UNKNOWN);
}
break;
// we can encounter any content
case TEXT:
if(character == '<') {
- flush();
- saveState(state);
- state = TAG_ENCOUNTERED;
+ beginnOfTag((char) reader.read(), TEXT);
} else if(character == '&') {
saveState(state);
entity.setLength(0);
@@ -499,6 +497,27 @@ public final class SimpleXMLParser {
private void saveState(int s) {
stack.push(new Integer(s));
}
+
+ /**
+ * 处理标签的开头,若不在支持标签范围内,将<符号作为文本处理,例:<1111 (仿造浏览器的处理方式)
+ */
+ public void beginnOfTag(char c, int type) {
+ previousCharacter = c;
+ if (c == -1) {
+ return;
+ }
+ if (c == '/' || HTMLWorker.tagsPrefixSupported.containsKey(c)) {
+ if (type == TEXT) {
+ flush();
+ }
+ saveState(TEXT);
+ state = TAG_ENCOUNTERED;
+ return;
+ }
+ text.append((char) character);
+ nowhite = true;
+ }
+
/**
* Flushes the text that is currently in the buffer.
* The text can be ignored, added to the document