diff --git a/fine-itext-old/src/com/fr/third/com/lowagie/text/html/simpleparser/HTMLWorker.java b/fine-itext-old/src/com/fr/third/com/lowagie/text/html/simpleparser/HTMLWorker.java
new file mode 100644
index 000000000..11e918722
--- /dev/null
+++ b/fine-itext-old/src/com/fr/third/com/lowagie/text/html/simpleparser/HTMLWorker.java
@@ -0,0 +1,788 @@
+/*
+ * Copyright 2004 Paulo Soares
+ *
+ * The contents of this file are subject to the Mozilla Public License Version 1.1
+ * (the "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the License.
+ *
+ * The Original Code is 'iText, a free JAVA-PDF library'.
+ *
+ * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
+ * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
+ * All Rights Reserved.
+ * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
+ * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
+ *
+ * Contributor(s): all the names of the contributors are added in the source code
+ * where applicable.
+ *
+ * Alternatively, the contents of this file may be used under the terms of the
+ * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
+ * provisions of LGPL are applicable instead of those above. If you wish to
+ * allow use of your version of this file only under the terms of the LGPL
+ * License and not to allow others to use your version of this file under
+ * the MPL, indicate your decision by deleting the provisions above and
+ * replace them with the notice and other provisions required by the LGPL.
+ * If you do not delete the provisions above, a recipient may use your version
+ * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the MPL as stated above or under the terms of the GNU
+ * Library General Public License as published by the Free Software Foundation;
+ * either version 2 of the License, or any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
+ * details.
+ *
+ * Contributions by:
+ * Lubos Strapko
+ *
+ * If you didn't download this code from the following link, you should check if
+ * you aren't using an obsolete version:
+ * http://www.lowagie.com/iText/
+ */
+
+package com.fr.third.com.lowagie.text.html.simpleparser;
+
+import com.fr.third.com.lowagie.text.Chunk;
+import com.fr.third.com.lowagie.text.DocListener;
+import com.fr.third.com.lowagie.text.DocumentException;
+import com.fr.third.com.lowagie.text.Element;
+import com.fr.third.com.lowagie.text.ElementTags;
+import com.fr.third.com.lowagie.text.ExceptionConverter;
+import com.fr.third.com.lowagie.text.FontFactoryImp;
+import com.fr.third.com.lowagie.text.HeaderFooter;
+import com.fr.third.com.lowagie.text.Image;
+import com.fr.third.com.lowagie.text.List;
+import com.fr.third.com.lowagie.text.ListItem;
+import com.fr.third.com.lowagie.text.Paragraph;
+import com.fr.third.com.lowagie.text.Phrase;
+import com.fr.third.com.lowagie.text.Rectangle;
+import com.fr.third.com.lowagie.text.TextElementArray;
+import com.fr.third.com.lowagie.text.html.CSSUtils;
+import com.fr.third.com.lowagie.text.html.HtmlTags;
+import com.fr.third.com.lowagie.text.html.Markup;
+import com.fr.third.com.lowagie.text.pdf.PdfPTable;
+import com.fr.third.com.lowagie.text.pdf.draw.LineSeparator;
+import com.fr.third.com.lowagie.text.xml.simpleparser.SimpleXMLDocHandler;
+import com.fr.third.com.lowagie.text.xml.simpleparser.SimpleXMLParser;
+import com.fr.third.sun.misc.BASE64Decoder;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Stack;
+import java.util.StringTokenizer;
+
+public class HTMLWorker implements SimpleXMLDocHandler, DocListener {
+
+ protected ArrayList objectList;
+
+ protected DocListener document;
+
+ private Paragraph currentParagraph;
+
+ private ChainedProperties cprops = new ChainedProperties();
+
+ private Stack stack = new Stack();
+
+ private boolean pendingTR = false;
+
+ private boolean pendingTD = false;
+
+ private boolean pendingLI = false;
+
+ private StyleSheet style = new StyleSheet();
+
+ private boolean isPRE = false;
+
+ private Stack tableState = new Stack();
+
+ private boolean skipText = false;
+
+ private HashMap interfaceProps;
+
+ private FactoryProperties factoryProperties = new FactoryProperties();
+
+ /** Creates a new instance of HTMLWorker
+ * @param document A class that implements DocListener
+ * */
+ public HTMLWorker(DocListener document) {
+ this.document = document;
+ }
+
+ public void setStyleSheet(StyleSheet style) {
+ this.style = style;
+ }
+
+ public StyleSheet getStyleSheet() {
+ return style;
+ }
+
+ public void setInterfaceProps(HashMap interfaceProps) {
+ this.interfaceProps = interfaceProps;
+ FontFactoryImp ff = null;
+ if (interfaceProps != null)
+ ff = (FontFactoryImp) interfaceProps.get("font_factory");
+ if (ff != null)
+ factoryProperties.setFontImp(ff);
+ }
+
+ public HashMap getInterfaceProps() {
+ return interfaceProps;
+ }
+
+ public void parse(Reader reader) throws IOException {
+ SimpleXMLParser.parse(this, null, reader, true);
+ }
+
+ public static ArrayList parseToList(Reader reader, StyleSheet style)
+ throws IOException {
+ return parseToList(reader, style, null);
+ }
+
+ public static ArrayList parseToList(Reader reader, StyleSheet style,
+ HashMap interfaceProps) throws IOException {
+ HTMLWorker worker = new HTMLWorker(null);
+ if (style != null)
+ worker.style = style;
+ worker.document = worker;
+ worker.setInterfaceProps(interfaceProps);
+ worker.objectList = new ArrayList();
+ worker.parse(reader);
+ return worker.objectList;
+ }
+
+ public void endDocument() {
+ try {
+ for (int k = 0; k < stack.size(); ++k)
+ document.add((Element) stack.elementAt(k));
+ if (currentParagraph != null)
+ document.add(currentParagraph);
+ currentParagraph = null;
+ } catch (Exception e) {
+ throw new ExceptionConverter(e);
+ }
+ }
+
+ public void startDocument() {
+ HashMap h = new HashMap();
+ style.applyStyle("body", h);
+ cprops.addToChain("body", h);
+ }
+
+
+ public void startElement(String tag, HashMap h) {
+ if (!tagsSupported.containsKey(tag))
+ return;
+ try {
+ style.applyStyle(tag, h);
+ if(tag.equals("p")){
+ h.put(Markup.CSS_KEY_MARGINTOP, "16px");
+ h.put(Markup.CSS_KEY_MARGINBOTTOM, "16px");
+ }
+ String follow = (String) FactoryProperties.followTags.get(tag);
+ if (follow != null) {
+ HashMap prop = new HashMap();
+ prop.put(follow, null);
+ FactoryProperties.insertStyle(h, this.cprops);
+ prop.putAll(h);
+
+ cprops.addToChain(follow, prop);
+ return;
+ }
+ FactoryProperties.insertStyle(h, cprops);
+ if (tag.equals(HtmlTags.ANCHOR)) {
+ cprops.addToChain(tag, h);
+ if (currentParagraph == null) {
+ currentParagraph = new Paragraph();
+ }
+ stack.push(currentParagraph);
+ currentParagraph = new Paragraph();
+ return;
+ }
+ if (tag.equals(HtmlTags.NEWLINE)) {
+ if (currentParagraph == null) {
+ currentParagraph = new Paragraph();
+ }
+ currentParagraph.add(factoryProperties
+ .createChunk("\n", cprops));
+ return;
+ }
+ if (tag.equals(HtmlTags.HORIZONTALRULE)) {
+ // Attempting to duplicate the behavior seen on Firefox with
+ // http://www.w3schools.com/tags/tryit.asp?filename=tryhtml_hr_test
+ // where an initial break is only inserted when the preceding element doesn't
+ // end with a break, but a trailing break is always inserted.
+ boolean addLeadingBreak = true;
+ if (currentParagraph == null) {
+ currentParagraph = new Paragraph();
+ addLeadingBreak = false;
+ }
+ if (addLeadingBreak) { // Not a new paragraph
+ int numChunks = currentParagraph.getChunks().size();
+ if (numChunks == 0 ||
+ ((Chunk)(currentParagraph.getChunks().get(numChunks - 1))).getContent().endsWith("\n"))
+ addLeadingBreak = false;
+ }
+ String align = (String) h.get("align");
+ int hrAlign = Element.ALIGN_CENTER;
+ if (align != null) {
+ if (align.equalsIgnoreCase("left"))
+ hrAlign = Element.ALIGN_LEFT;
+ if (align.equalsIgnoreCase("right"))
+ hrAlign = Element.ALIGN_RIGHT;
+ }
+ String width = (String) h.get("width");
+ float hrWidth = 1;
+ if (width != null) {
+ float tmpWidth = Markup.parseLength(width, Markup.DEFAULT_FONT_SIZE);
+ if (tmpWidth > 0) hrWidth = tmpWidth;
+ if (!width.endsWith("%"))
+ hrWidth = 100; // Treat a pixel width as 100% for now.
+ }
+ String size = (String) h.get("size");
+ float hrSize = 1;
+ if (size != null) {
+ float tmpSize = Markup.parseLength(size, Markup.DEFAULT_FONT_SIZE);
+ if (tmpSize > 0)
+ hrSize = tmpSize;
+ }
+ if (addLeadingBreak)
+ currentParagraph.add(Chunk.NEWLINE);
+ currentParagraph.add(new LineSeparator(hrSize, hrWidth, null, hrAlign, currentParagraph.getLeading()/2));
+ currentParagraph.add(Chunk.NEWLINE);
+ return;
+ }
+ if (tag.equals(HtmlTags.CHUNK) || tag.equals(HtmlTags.SPAN)) {
+ cprops.addToChain(tag, h);
+ return;
+ }
+ if (tag.equals(HtmlTags.IMAGE)) {
+ String src = (String) h.get(ElementTags.SRC);
+ if (src == null)
+ return;
+ cprops.addToChain(tag, h);
+ Image img = null;
+ if (interfaceProps != null) {
+ ImageProvider ip = (ImageProvider) interfaceProps
+ .get("img_provider");
+ if (ip != null)
+ img = ip.getImage(src, h, cprops, document);
+ if (img == null) {
+ HashMap images = (HashMap) interfaceProps
+ .get("img_static");
+ if (images != null) {
+ Image tim = (Image) images.get(src);
+ if (tim != null)
+ img = Image.getInstance(tim);
+ } else {
+ if (!src.startsWith("http")) { // relative src references only
+ String baseurl = (String) interfaceProps
+ .get("img_baseurl");
+ if (baseurl != null) {
+ src = baseurl + src;
+ img = Image.getInstance(src);
+ }
+ }
+ }
+ }
+ }
+ //处理base64编码图片
+ if(src.startsWith("data")){
+ BASE64Decoder decoder = new BASE64Decoder();
+ String[] srcArray = src.split(",");
+ String base64string = srcArray[srcArray.length -1];
+ byte[] bytes = decoder.decodeBuffer(base64string);
+ try {
+ img = Image.getInstance(bytes);
+ }catch (Exception e){
+
+ }
+
+ }
+ if (img == null) {
+ if (!src.startsWith("http")) {
+ String path = cprops.getProperty("image_path");
+ if (path == null)
+ path = "";
+ src = new File(path, src).getPath();
+ }
+ img = Image.getInstance(src);
+ }
+ if(img == null){
+ return;
+ }
+ img.setSrcString(src);
+ String align = (String) h.get("align");
+ String width = (String) h.get("width");
+ String height = (String) h.get("height");
+ String before = cprops.getProperty("before");
+ String after = cprops.getProperty("after");
+ if (before != null)
+ img.setSpacingBefore(Float.parseFloat(before));
+ if (after != null)
+ img.setSpacingAfter(Float.parseFloat(after));
+ float actualFontSize = Markup.parseLength(cprops
+ .getProperty(ElementTags.SIZE),
+ Markup.DEFAULT_FONT_SIZE);
+ if (actualFontSize <= 0f)
+ actualFontSize = Markup.DEFAULT_FONT_SIZE;
+ float widthInPoints = Markup.parseLength(width, actualFontSize);
+ float heightInPoints = Markup.parseLength(height,
+ actualFontSize);
+ if (widthInPoints > 0 && heightInPoints > 0) {
+ img.scaleAbsolute(widthInPoints, heightInPoints);
+ } else if (widthInPoints > 0) {
+ heightInPoints = img.getHeight() * widthInPoints
+ / img.getWidth();
+ img.scaleAbsolute(widthInPoints, heightInPoints);
+ } else if (heightInPoints > 0) {
+ widthInPoints = img.getWidth() * heightInPoints
+ / img.getHeight();
+ img.scaleAbsolute(widthInPoints, heightInPoints);
+ }
+ img.setWidthPercentage(0);
+ if (align != null) {
+ endElement("p");
+ int ralign = Image.MIDDLE;
+ if (align.equalsIgnoreCase("left"))
+ ralign = Image.LEFT;
+ else if (align.equalsIgnoreCase("right"))
+ ralign = Image.RIGHT;
+ img.setAlignment(ralign);
+ Img i = null;
+ boolean skip = false;
+ if (interfaceProps != null) {
+ i = (Img) interfaceProps.get("img_interface");
+ if (i != null)
+ skip = i.process(img, h, cprops, document);
+ }
+ if (!skip)
+ document.add(img);
+ cprops.removeChain(tag);
+ } else {
+ Chunk ck = new Chunk(img, 0, 0);
+ if(cprops.hasPropertyInChain("img", "padding-left")){
+ String ss = cprops.getPropertyFromChain("img", "padding-left");
+ ck.setAttribute("padding-left", Float.toString(Markup.parseLength(ss)));
+ }
+ if(cprops.hasPropertyInChain("img", "padding-right")){
+ String ss = cprops.getPropertyFromChain("img", "padding-right");
+ ck.setAttribute("padding-right", Float.toString(Markup.parseLength(ss)));
+ }
+ cprops.removeChain(tag);
+ if (currentParagraph == null) {
+ currentParagraph = FactoryProperties
+ .createParagraph(cprops);
+ }
+
+ currentParagraph.add(ck);
+ }
+ return;
+ }
+ endElement("p");
+ if (tag.equals("h1") || tag.equals("h2") || tag.equals("h3")
+ || tag.equals("h4") || tag.equals("h5") || tag.equals("h6")) {
+ if (!h.containsKey(ElementTags.SIZE)) {
+ int v = 7 - Integer.parseInt(tag.substring(1));
+ h.put(ElementTags.SIZE, Integer.toString(v));
+ }
+ cprops.addToChain(tag, h);
+ return;
+ }
+ if (tag.equals(HtmlTags.UNORDEREDLIST)) {
+ if (pendingLI)
+ endElement(HtmlTags.LISTITEM);
+ skipText = true;
+ cprops.addToChain(tag, h);
+ List list = new List(false);
+ try{
+ list.setIndentationLeft(new Float(cprops.getProperty("indent")).floatValue());
+ }catch (Exception e) {
+ list.setAutoindent(true);
+ }
+ list.setListSymbol("\u2022");
+ stack.push(list);
+ return;
+ }
+ if (tag.equals(HtmlTags.ORDEREDLIST)) {
+ if (pendingLI)
+ endElement(HtmlTags.LISTITEM);
+ skipText = true;
+ cprops.addToChain(tag, h);
+ List list = new List(true);
+ try{
+ list.setIndentationLeft(new Float(cprops.getProperty("indent")).floatValue());
+ }catch (Exception e) {
+ list.setAutoindent(true);
+ }
+ stack.push(list);
+ return;
+ }
+ if (tag.equals(HtmlTags.LISTITEM)) {
+ if (pendingLI)
+ endElement(HtmlTags.LISTITEM);
+ skipText = false;
+ pendingLI = true;
+ cprops.addToChain(tag, h);
+ ListItem item = FactoryProperties.createListItem(cprops);
+ stack.push(item);
+ return;
+ }
+ if (tag.equals(HtmlTags.DIV) || tag.equals(HtmlTags.BODY) || tag.equals("p")) {
+ cprops.addToChain(tag, h);
+ return;
+ }
+ if (tag.equals(HtmlTags.PRE)) {
+ if (!h.containsKey(ElementTags.FACE)) {
+ h.put(ElementTags.FACE, "Courier");
+ }
+ cprops.addToChain(tag, h);
+ isPRE = true;
+ return;
+ }
+ if (tag.equals("tr")) {
+ if (pendingTR)
+ endElement("tr");
+ skipText = true;
+ pendingTR = true;
+ cprops.addToChain("tr", h);
+ return;
+ }
+ if (tag.equals("td") || tag.equals("th")) {
+ if (pendingTD)
+ endElement(tag);
+ skipText = false;
+ pendingTD = true;
+ cprops.addToChain("td", h);
+ stack.push(new IncCell(tag, cprops));
+ return;
+ }
+ if (tag.equals("table")) {
+ cprops.addToChain("table", h);
+ IncTable table = new IncTable(h);
+ stack.push(table);
+ tableState.push(new boolean[] { pendingTR, pendingTD });
+ pendingTR = pendingTD = false;
+ skipText = true;
+ return;
+ }
+ } catch (Exception e) {
+ throw new ExceptionConverter(e);
+ }
+ }
+
+
+
+ public void endElement(String tag) {
+ if (!tagsSupported.containsKey(tag))
+ return;
+ try {
+ String follow = (String) FactoryProperties.followTags.get(tag);
+ if (follow != null) {
+ cprops.removeChain(follow);
+ return;
+ }
+ if (tag.equals("font") || tag.equals("span")) {
+ cprops.removeChain(tag);
+ return;
+ }
+ if (tag.equals("a")) {
+ if (currentParagraph == null) {
+ currentParagraph = new Paragraph();
+ }
+ boolean skip = false;
+ if (interfaceProps != null) {
+ ALink i = (ALink) interfaceProps.get("alink_interface");
+ if (i != null)
+ skip = i.process(currentParagraph, cprops);
+ }
+ if (!skip) {
+ String href = cprops.getProperty("href");
+ if (href != null) {
+ ArrayList chunks = currentParagraph.getChunks();
+ int size = chunks.size();
+ for (int k = 0; k < size; ++k) {
+ Chunk ck = (Chunk) chunks.get(k);
+ ck.setAnchor(href);
+ }
+ }
+ }
+ Paragraph tmp = (Paragraph) stack.pop();
+ Phrase tmp2 = new Phrase();
+ tmp2.add(currentParagraph);
+ tmp.add(tmp2);
+ currentParagraph = tmp;
+ cprops.removeChain("a");
+ return;
+ }
+ if (tag.equals("br")) {
+ return;
+ }
+ if (currentParagraph != null) {
+ if (stack.empty())
+ document.add(currentParagraph);
+ else {
+ Object obj = stack.pop();
+ if (obj instanceof TextElementArray) {
+ TextElementArray current = (TextElementArray) obj;
+ current.add(currentParagraph);
+ }
+ stack.push(obj);
+ }
+ }
+ currentParagraph = null;
+ if (tag.equals(HtmlTags.UNORDEREDLIST)
+ || tag.equals(HtmlTags.ORDEREDLIST)) {
+ if (pendingLI)
+ endElement(HtmlTags.LISTITEM);
+ skipText = false;
+ cprops.removeChain(tag);
+ if (stack.empty())
+ return;
+ Object obj = stack.pop();
+ if (!(obj instanceof List)) {
+ stack.push(obj);
+ return;
+ }
+ if (stack.empty())
+ document.add((Element) obj);
+ else
+ ((TextElementArray) stack.peek()).add(obj);
+ return;
+ }
+ if (tag.equals(HtmlTags.LISTITEM)) {
+ pendingLI = false;
+ skipText = true;
+ cprops.removeChain(tag);
+ if (stack.empty())
+ return;
+ Object obj = stack.pop();
+ if (!(obj instanceof ListItem)) {
+ stack.push(obj);
+ return;
+ }
+ if (stack.empty()) {
+ document.add((Element) obj);
+ return;
+ }
+ Object list = stack.pop();
+ if (!(list instanceof List)) {
+ stack.push(list);
+ return;
+ }
+ ListItem item = (ListItem) obj;
+ ((List) list).add(item);
+ ArrayList cks = item.getChunks();
+ if (!cks.isEmpty())
+ item.getListSymbol()
+ .setFont(((Chunk) cks.get(0)).getFont());
+ stack.push(list);
+ return;
+ }
+ if (tag.equals("div") || tag.equals("body")) {
+ cprops.removeChain(tag);
+ return;
+ }
+ if (tag.equals(HtmlTags.PRE)) {
+ cprops.removeChain(tag);
+ isPRE = false;
+ return;
+ }
+ if (tag.equals("p")) {
+ cprops.removeChain(tag);
+ return;
+ }
+ if (tag.equals("h1") || tag.equals("h2") || tag.equals("h3")
+ || tag.equals("h4") || tag.equals("h5") || tag.equals("h6")) {
+ cprops.removeChain(tag);
+ return;
+ }
+ if (tag.equals("table")) {
+ if (pendingTR)
+ endElement("tr");
+ cprops.removeChain("table");
+ IncTable table = (IncTable) stack.pop();
+ PdfPTable tb = table.buildTable();
+ tb.setSplitRows(true);
+ if (stack.empty())
+ document.add(tb);
+ else
+ ((TextElementArray) stack.peek()).add(tb);
+ boolean state[] = (boolean[]) tableState.pop();
+ pendingTR = state[0];
+ pendingTD = state[1];
+ skipText = false;
+ return;
+ }
+ if (tag.equals("tr")) {
+ if (pendingTD)
+ endElement("td");
+ pendingTR = false;
+ String rowHeightPx = cprops.getLastChainProperty("height");
+
+ cprops.removeChain("tr");
+ ArrayList cells = new ArrayList();
+ IncTable table = null;
+ while (true) {
+ Object obj = stack.pop();
+ if (obj instanceof IncCell) {
+ cells.add(((IncCell) obj).getCell());
+ }
+ if (obj instanceof IncTable) {
+ table = (IncTable) obj;
+ break;
+ }
+ }
+ float rowHeight = 0.0f;
+ if(rowHeightPx!=null){
+ rowHeight = CSSUtils.parseFloat(rowHeightPx);
+ }
+ table.addCols(cells);
+ table.endRow(rowHeight);
+
+ stack.push(table);
+ skipText = true;
+ return;
+ }
+ if (tag.equals("td") || tag.equals("th")) {
+ pendingTD = false;
+ cprops.removeChain("td");
+ skipText = true;
+ return;
+ }
+ } catch (Exception e) {
+ throw new ExceptionConverter(e);
+ }
+ }
+
+ public void text(String str) {
+ if (skipText)
+ return;
+ String content = str;
+ if (isPRE) {
+ if (currentParagraph == null) {
+ currentParagraph = FactoryProperties.createParagraph(cprops);
+ }
+ Chunk chunk = factoryProperties.createChunk(content, cprops);
+ currentParagraph.add(chunk);
+ return;
+ }
+ if (content.trim().length() == 0 && content.indexOf(' ') < 0) {
+ return;
+ }
+
+ StringBuffer buf = new StringBuffer();
+ int len = content.length();
+ char character;
+ boolean newline = false;
+ for (int i = 0; i < len; i++) {
+ switch (character = content.charAt(i)) {
+ case ' ':
+ if (!newline) {
+ buf.append(character);
+ }
+ break;
+ case '\n':
+ if (i > 0) {
+ newline = true;
+ buf.append(' ');
+ }
+ break;
+ case '\r':
+ break;
+ case '\t':
+ break;
+ default:
+ newline = false;
+ buf.append(character);
+ }
+ }
+ if (currentParagraph == null) {
+ currentParagraph = FactoryProperties.createParagraph(cprops);
+ }
+ Chunk chunk = factoryProperties.createChunk(buf.toString(), cprops);
+ currentParagraph.add(chunk);
+ }
+
+ public boolean add(Element element) throws DocumentException {
+ objectList.add(element);
+ return true;
+ }
+
+ public void clearTextWrap() throws DocumentException {
+ }
+
+ public void close() {
+ }
+
+ public boolean newPage() {
+ return true;
+ }
+
+ public void open() {
+ }
+
+ public void resetFooter() {
+ }
+
+ public void resetHeader() {
+ }
+
+ public void resetPageCount() {
+ }
+
+ public void setFooter(HeaderFooter footer) {
+ }
+
+ public void setHeader(HeaderFooter header) {
+ }
+
+ public boolean setMarginMirroring(boolean marginMirroring) {
+ return false;
+ }
+
+ /**
+ * @see DocListener#setMarginMirroring(boolean)
+ * @since 2.1.6
+ */
+ public boolean setMarginMirroringTopBottom(boolean marginMirroring) {
+ return false;
+ }
+
+ public boolean setMargins(float marginLeft, float marginRight,
+ float marginTop, float marginBottom) {
+ return true;
+ }
+
+ public void setPageCount(int pageN) {
+ }
+
+ public boolean setPageSize(Rectangle pageSize) {
+ return true;
+ }
+
+ public static final String tagsSupportedString = "ol ul li a pre font span br p div body table td th tr i b u sub sup em strong s strike"
+ + " h1 h2 h3 h4 h5 h6 img hr";
+
+ public static final HashMap tagsSupported = new HashMap();
+ public static final HashMap tagsPrefixSupported = new HashMap();
+
+ static {
+ StringTokenizer tok = new StringTokenizer(tagsSupportedString);
+ while (tok.hasMoreTokens()) {
+ String s = tok.nextToken();
+ tagsSupported.put(s, null);
+ tagsPrefixSupported.put(s.charAt(0), null);
+ }
+ }
+}
diff --git a/fine-itext-old/src/com/fr/third/com/lowagie/text/xml/simpleparser/SimpleXMLParser.java b/fine-itext-old/src/com/fr/third/com/lowagie/text/xml/simpleparser/SimpleXMLParser.java
new file mode 100755
index 000000000..d61de9b2e
--- /dev/null
+++ b/fine-itext-old/src/com/fr/third/com/lowagie/text/xml/simpleparser/SimpleXMLParser.java
@@ -0,0 +1,780 @@
+/*
+ * Copyright 2003 Paulo Soares
+ *
+ * The contents of this file are subject to the Mozilla Public License Version 1.1
+ * (the "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the License.
+ *
+ * The Original Code is 'iText, a free JAVA-PDF library'.
+ *
+ * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
+ * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
+ * All Rights Reserved.
+ * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
+ * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
+ *
+ * Contributor(s): all the names of the contributors are added in the source code
+ * where applicable.
+ *
+ * Alternatively, the contents of this file may be used under the terms of the
+ * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
+ * provisions of LGPL are applicable instead of those above. If you wish to
+ * allow use of your version of this file only under the terms of the LGPL
+ * License and not to allow others to use your version of this file under
+ * the MPL, indicate your decision by deleting the provisions above and
+ * replace them with the notice and other provisions required by the LGPL.
+ * If you do not delete the provisions above, a recipient may use your version
+ * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the MPL as stated above or under the terms of the GNU
+ * Library General Public License as published by the Free Software Foundation;
+ * either version 2 of the License, or any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
+ * details.
+ *
+ * If you didn't download this code from the following link, you should check if
+ * you aren't using an obsolete version:
+ * http://www.lowagie.com/iText/
+ *
+ * The code to recognize the encoding in this class and in the convenience class IanaEncodings was taken from Apache Xerces published under the following license:
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Part of this code is based on the Quick-and-Dirty XML parser by Steven Brandt.
+ * The code for the Quick-and-Dirty parser was published in JavaWorld (java tip 128).
+ * Steven Brandt and JavaWorld gave permission to use the code for free.
+ * (Bruno Lowagie and Paulo Soares chose to use it under the MPL/LGPL in
+ * conformance with the rest of the code).
+ * The original code can be found on this url: http://www.javaworld.com/javatips/jw-javatip128_p.html.
+ * It was substantially refactored by Bruno Lowagie.
+ *
+ * The method 'private static String getEncodingName(byte[] b4)' was found
+ * in org.apache.xerces.impl.XMLEntityManager, originaly published by the
+ * Apache Software Foundation under the Apache Software License; now being
+ * used in iText under the MPL.
+ */
+package com.fr.third.com.lowagie.text.xml.simpleparser;
+
+import com.fr.third.com.lowagie.text.html.simpleparser.HTMLWorker;
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.util.HashMap;
+import java.util.Stack;
+
+/**
+ * A simple XML and HTML parser. This parser is, like the SAX parser,
+ * an event based parser, but with much less functionality.
+ *
+ * The parser can: + *
+ *
<[CDATA[ ... ]]>
construct
+ * \r\n
and \r
to \n
on input, in accordance with the XML Specification, Section 2.11
+ *
+ */
+public final class SimpleXMLParser {
+ /** possible states */
+ private final static int UNKNOWN = 0;
+ private final static int TEXT = 1;
+ private final static int TAG_ENCOUNTERED = 2;
+ private final static int EXAMIN_TAG = 3;
+ private final static int TAG_EXAMINED = 4;
+ private final static int IN_CLOSETAG = 5;
+ private final static int SINGLE_TAG = 6;
+ private final static int CDATA = 7;
+ private final static int COMMENT = 8;
+ private final static int PI = 9;
+ private final static int ENTITY = 10;
+ private final static int QUOTE = 11;
+ private final static int ATTRIBUTE_KEY = 12;
+ private final static int ATTRIBUTE_EQUAL = 13;
+ private final static int ATTRIBUTE_VALUE = 14;
+
+ /** the state stack */
+ Stack stack;
+ /** The current character. */
+ int character = 0;
+ /** The previous character. */
+ int previousCharacter = -1;
+ /** the line we are currently reading */
+ int lines = 1;
+ /** the column where the current character occurs */
+ int columns = 0;
+ /** was the last character equivalent to a newline? */
+ boolean eol = false;
+ /**
+ * A boolean indicating if the next character should be taken into account
+ * if it's a space character. When nospace is false, the previous character
+ * wasn't whitespace.
+ * @since 2.1.5
+ */
+ boolean nowhite = false;
+ /** the current state */
+ int state;
+ /** Are we parsing HTML? */
+ boolean html;
+ /** current text (whatever is encountered between tags) */
+ StringBuffer text = new StringBuffer();
+ /** current entity (whatever is encountered between & and ;) */
+ StringBuffer entity = new StringBuffer();
+ /** current tagname */
+ String tag = null;
+ /** current attributes */
+ HashMap attributes = null;
+ /** The handler to which we are going to forward document content */
+ SimpleXMLDocHandler doc;
+ /** The handler to which we are going to forward comments. */
+ SimpleXMLDocHandlerComment comment;
+ /** Keeps track of the number of tags that are open. */
+ int nested = 0;
+ /** the quote character that was used to open the quote. */
+ int quoteCharacter = '"';
+ /** the attribute key. */
+ String attributekey = null;
+ /** the attribute value. */
+ String attributevalue = null;
+
+ /**
+ * Creates a Simple XML parser object.
+ * Call go(BufferedReader) immediately after creation.
+ */
+ private SimpleXMLParser(SimpleXMLDocHandler doc, SimpleXMLDocHandlerComment comment, boolean html) {
+ this.doc = doc;
+ this.comment = comment;
+ this.html = html;
+ stack = new Stack();
+ state = html ? TEXT : UNKNOWN;
+ }
+
+ /**
+ * Does the actual parsing. Perform this immediately
+ * after creating the parser object.
+ */
+ private void go(Reader r) throws IOException {
+ BufferedReader reader;
+ if (r instanceof BufferedReader)
+ reader = (BufferedReader)r;
+ else
+ reader = new BufferedReader(r);
+ doc.startDocument();
+ while(true) {
+ // read a new character
+ if (previousCharacter == -1) {
+ character = reader.read();
+ }
+ // or re-examine the previous character
+ else {
+ character = previousCharacter;
+ previousCharacter = -1;
+ }
+
+ // the end of the file was reached
+ if (character == -1) {
+ if (html) {
+ if (html && state == TEXT)
+ flush();
+ doc.endDocument();
+ } else {
+ throwException("Missing end tag");
+ }
+ return;
+ }
+
+ // dealing with \n and \r
+ if (character == '\n' && eol) {
+ eol = false;
+ continue;
+ } else if (eol) {
+ eol = false;
+ } else if (character == '\n') {
+ lines++;
+ columns = 0;
+ } else if (character == '\r') {
+ eol = true;
+ character = '\n';
+ lines++;
+ columns = 0;
+ } else {
+ columns++;
+ }
+
+ switch(state) {
+ // we are in an unknown state before there's actual content
+ case UNKNOWN:
+ if(character == '<') {
+ beginnOfTag((char) reader.read(), UNKNOWN);
+ }
+ break;
+ // we can encounter any content
+ case TEXT:
+ if(character == '<') {
+ beginnOfTag((char) reader.read(), TEXT);
+ } else if(character == '&') {
+ saveState(state);
+ entity.setLength(0);
+ state = ENTITY;
+ } else if (Character.isWhitespace((char)character) && character != 12288) {
+ if (nowhite)
+ text.append((char)character);
+ nowhite = false;
+ } else {
+ text.append((char)character);
+ nowhite = true;
+ }
+ break;
+ // we have just seen a < and are wondering what we are looking at
+ // true
+ * @return the escaped string
+ */
+ public static String escapeXML(String s, boolean onlyASCII) {
+ char cc[] = s.toCharArray();
+ int len = cc.length;
+ StringBuffer sb = new StringBuffer();
+ for (int k = 0; k < len; ++k) {
+ int c = cc[k];
+ switch (c) {
+ case '<':
+ sb.append("<");
+ break;
+ case '>':
+ sb.append(">");
+ break;
+ case '&':
+ sb.append("&");
+ break;
+ case '"':
+ sb.append(""");
+ break;
+ case '\'':
+ sb.append("'");
+ break;
+ default:
+ if ((c == 0x9) || (c == 0xA) || (c == 0xD)
+ || ((c >= 0x20) && (c <= 0xD7FF))
+ || ((c >= 0xE000) && (c <= 0xFFFD))
+ || ((c >= 0x10000) && (c <= 0x10FFFF))) {
+ if (onlyASCII && c > 127)
+ sb.append("").append(c).append(';');
+ else
+ sb.append((char)c);
+ }
+ }
+ }
+ return sb.toString();
+ }
+ /**
+ * Returns the IANA encoding name that is auto-detected from
+ * the bytes specified, with the endian-ness of that encoding where appropriate.
+ * (method found in org.apache.xerces.impl.XMLEntityManager, originally published
+ * by the Apache Software Foundation under the Apache Software License; now being
+ * used in iText under the MPL)
+ * @param b4 The first four bytes of the input.
+ * @return an IANA-encoding string
+ */
+ private static String getEncodingName(byte[] b4) {
+
+ // UTF-16, with BOM
+ int b0 = b4[0] & 0xFF;
+ int b1 = b4[1] & 0xFF;
+ if (b0 == 0xFE && b1 == 0xFF) {
+ // UTF-16, big-endian
+ return "UTF-16BE";
+ }
+ if (b0 == 0xFF && b1 == 0xFE) {
+ // UTF-16, little-endian
+ return "UTF-16LE";
+ }
+
+ // UTF-8 with a BOM
+ int b2 = b4[2] & 0xFF;
+ if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
+ return "UTF-8";
+ }
+
+ // other encodings
+ int b3 = b4[3] & 0xFF;
+ if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
+ // UCS-4, big endian (1234)
+ return "ISO-10646-UCS-4";
+ }
+ if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
+ // UCS-4, little endian (4321)
+ return "ISO-10646-UCS-4";
+ }
+ if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
+ // UCS-4, unusual octet order (2143)
+ // REVISIT: What should this be?
+ return "ISO-10646-UCS-4";
+ }
+ if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
+ // UCS-4, unusual octet order (3412)
+ // REVISIT: What should this be?
+ return "ISO-10646-UCS-4";
+ }
+ if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
+ // UTF-16, big-endian, no BOM
+ // (or could turn out to be UCS-2...
+ // REVISIT: What should this be?
+ return "UTF-16BE";
+ }
+ if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
+ // UTF-16, little-endian, no BOM
+ // (or could turn out to be UCS-2...
+ return "UTF-16LE";
+ }
+ if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
+ // EBCDIC
+ // a la xerces1, return CP037 instead of EBCDIC here
+ return "CP037";
+ }
+
+ // default encoding
+ return "UTF-8";
+ }
+}
\ No newline at end of file