From 52c2f5a1e5476349eac2b5e3a132259d2943d13a Mon Sep 17 00:00:00 2001 From: "Hugh.C" Date: Fri, 3 Jan 2020 13:48:21 +0800 Subject: [PATCH 1/4] =?UTF-8?q?REPORT-25774=20=E6=8F=90=E5=8D=87=E6=9D=83?= =?UTF-8?q?=E9=99=90=EF=BC=8C=E4=BE=9B=E5=A4=96=E9=83=A8=E8=B0=83=E7=94=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java b/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java index d81d3c878..60917613c 100644 --- a/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java +++ b/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java @@ -168,7 +168,7 @@ public class PdfFont implements Comparable { return image.getScaledWidth(); } - float width(String s) { + public float width(String s) { double width = 0.0d; java.awt.Font font = this.getAwtFont(); if (font.getSize2D() > 0) { From 2fb9bbc5b119129327e271748a9175d943d9b85b Mon Sep 17 00:00:00 2001 From: "Hugh.C" Date: Thu, 9 Jan 2020 12:35:12 +0800 Subject: [PATCH 2/4] =?UTF-8?q?REPORT-25774=20=E4=BD=BF=E7=94=A8=E5=92=8C?= =?UTF-8?q?=E6=8A=A5=E8=A1=A8=E4=B8=80=E6=A0=B7=E7=9A=84=E6=96=B9=E6=B3=95?= =?UTF-8?q?=E8=BF=9B=E8=A1=8C=E8=AE=A1=E7=AE=97=E5=AD=97=E7=AC=A6=E5=AE=BD?= =?UTF-8?q?=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../fr/third/v2/lowagie/text/pdf/PdfFont.java | 34 +++++++------------ 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java b/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java index 60917613c..4b213ac98 100644 --- a/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java +++ b/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java @@ -55,8 +55,6 @@ import com.fr.third.v2.lowagie.text.Image; import sun.font.FontDesignMetrics; import java.awt.FontMetrics; -import java.awt.font.FontRenderContext; -import java.awt.geom.AffineTransform; /** * PdfFont is the Pdf Font object. @@ -74,6 +72,7 @@ import java.awt.geom.AffineTransform; */ public class PdfFont implements Comparable { + private static final int ONE_THOUSAND = 1000 ; private Font oriFont; @@ -153,6 +152,8 @@ public class PdfFont implements Comparable { return width(' '); } + private FontMetrics metrics; + /** * Returns the width of a certain character of this font. * @@ -161,30 +162,19 @@ public class PdfFont implements Comparable { */ float width(int character) { - FontMetrics metrics = FontDesignMetrics.getMetrics(getAwtFont()); - if (image == null) - return metrics.charWidth(character) * hScale; - else - return image.getScaledWidth(); + return image == null ? getMetrics().charWidth(character) : image.getScaledWidth(); } - public float width(String s) { - double width = 0.0d; - java.awt.Font font = this.getAwtFont(); - if (font.getSize2D() > 0) { - float scale = ONE_THOUSAND / font.getSize2D(); - java.awt.Font derivedFont = font.deriveFont(AffineTransform.getScaleInstance(scale, scale)); - width = derivedFont.getStringBounds(s, new FontRenderContext(new AffineTransform(), true, true)).getWidth(); - if (derivedFont.isTransformed()){ - width /= scale; - } - } - if (image == null) - return (float) width * hScale; - else - return image.getScaledWidth(); + float width(String s) { + return image == null ? getMetrics().stringWidth(s) : image.getScaledWidth(); } + private FontMetrics getMetrics() { + if (null == metrics) { + metrics = FontDesignMetrics.getMetrics(getAwtFont()); + } + return metrics; + } BaseFont getFont() { return oriFont.getCalculatedBaseFont(false); From e68d07359900e7febd0d2f5703ad10d5395796d8 Mon Sep 17 00:00:00 2001 From: "Hugh.C" Date: Sat, 8 Feb 2020 11:49:55 +0800 Subject: [PATCH 3/4] =?UTF-8?q?REPORT-14598=20pdf=20HTML=E5=AF=BC=E5=87=BA?= =?UTF-8?q?=E6=8D=A2=E8=A1=8C=E4=B8=8E=E6=B5=8F=E8=A7=88=E5=99=A8=E4=B8=8D?= =?UTF-8?q?=E4=B8=80=E8=87=B4=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../fr/third/v2/lowagie/text/pdf/PdfFont.java | 62 ++++++++++++------- .../xml/simpleparser/SimpleXMLParser.java | 9 ++- 2 files changed, 45 insertions(+), 26 deletions(-) diff --git a/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java b/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java index d81d3c878..5e5f7b675 100644 --- a/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java +++ b/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java @@ -52,6 +52,9 @@ package com.fr.third.v2.lowagie.text.pdf; import com.fr.third.v2.lowagie.text.ExceptionConverter; import com.fr.third.v2.lowagie.text.Font; import com.fr.third.v2.lowagie.text.Image; +import java.awt.font.TextAttribute; +import java.util.HashMap; +import java.util.Map; import sun.font.FontDesignMetrics; import java.awt.FontMetrics; @@ -78,6 +81,8 @@ public class PdfFont implements Comparable { private Font oriFont; + public static int SCALE = 100; + /** the font metrics. */ // private BaseFont font; @@ -153,6 +158,15 @@ public class PdfFont implements Comparable { return width(' '); } + private FontMetrics metrics; + + private FontMetrics getMetrics() { + if (null == metrics) { + metrics = FontDesignMetrics.getMetrics(getAwtFont(SCALE)); + } + return metrics; + } + /** * Returns the width of a certain character of this font. * @@ -161,44 +175,44 @@ public class PdfFont implements Comparable { */ float width(int character) { - FontMetrics metrics = FontDesignMetrics.getMetrics(getAwtFont()); - if (image == null) - return metrics.charWidth(character) * hScale; - else - return image.getScaledWidth(); + return image == null ? getMetrics().charWidth(replaceNbsp(character))/SCALE : image.getScaledWidth(); } float width(String s) { - double width = 0.0d; - java.awt.Font font = this.getAwtFont(); - if (font.getSize2D() > 0) { - float scale = ONE_THOUSAND / font.getSize2D(); - java.awt.Font derivedFont = font.deriveFont(AffineTransform.getScaleInstance(scale, scale)); - width = derivedFont.getStringBounds(s, new FontRenderContext(new AffineTransform(), true, true)).getWidth(); - if (derivedFont.isTransformed()){ - width /= scale; - } - } - if (image == null) - return (float) width * hScale; - else - return image.getScaledWidth(); + return image == null ? getMetrics().stringWidth(replaceNbsp(s))/SCALE : image.getScaledWidth(); + } + + String replaceNbsp(String str) { + return canDisplayNbsp() ? str : str.replaceAll(String.valueOf((char) 160), String.valueOf((char) 32)); + } + + int replaceNbsp(int character) { + return character == 160 ? (canDisplayNbsp() ? character : 32) : character; } + private boolean canDisplayNbsp() { + return getAwtFont().canDisplay((char) 160); + } BaseFont getFont() { return oriFont.getCalculatedBaseFont(false); } public java.awt.Font getAwtFont() { - int style = Font.NORMAL; + return getAwtFont(1); + } + + private java.awt.Font getAwtFont(int scale) { + Map attrMap = new HashMap(4); + attrMap.put(TextAttribute.FAMILY, oriFont.getFontName()); + attrMap.put(TextAttribute.SIZE, new Float(oriFont.getSize() * scale)); if (oriFont.isBold()) { - style |= Font.BOLD; + attrMap.put(TextAttribute.WEIGHT, TextAttribute.WEIGHT_BOLD); } - if(oriFont.isItalic()){ - style |= Font.ITALIC; + if (oriFont.isItalic()) { + attrMap.put(TextAttribute.POSTURE, TextAttribute.POSTURE_OBLIQUE); } - return new java.awt.Font(oriFont.getFontName(), style, (int)oriFont.getSize()); + return new java.awt.Font(attrMap); } public Font getOriFont(){ diff --git a/fine-itext/src/com/fr/third/v2/lowagie/text/xml/simpleparser/SimpleXMLParser.java b/fine-itext/src/com/fr/third/v2/lowagie/text/xml/simpleparser/SimpleXMLParser.java index 2a15f56ff..f74d54011 100644 --- a/fine-itext/src/com/fr/third/v2/lowagie/text/xml/simpleparser/SimpleXMLParser.java +++ b/fine-itext/src/com/fr/third/v2/lowagie/text/xml/simpleparser/SimpleXMLParser.java @@ -335,6 +335,8 @@ public final class SimpleXMLParser { return; } state = restoreState(); + //防止 如
等标签后面的空格键生效 + nowhite = false; break; // we are processing CDATA @@ -377,8 +379,11 @@ public final class SimpleXMLParser { char ce = EntitiesToUnicode.decodeEntity(cent); if (ce == '\0') text.append('&').append(cent).append(';'); - else - text.append(ce); + else { + text.append(ce); + //防止如   等标签后面的空格无效 + nowhite = true; + } } else if ((character != '#' && (character < '0' || character > '9') && (character < 'a' || character > 'z') && (character < 'A' || character > 'Z')) || entity.length() >= 7) { state = restoreState(); From 7255663d3b2d93d62c9482e7eda7555c11358bec Mon Sep 17 00:00:00 2001 From: "Hugh.C" Date: Tue, 11 Feb 2020 11:51:14 +0800 Subject: [PATCH 4/4] =?UTF-8?q?REPORT-14598=20pdf=20HTML=E5=AF=BC=E5=87=BA?= =?UTF-8?q?=E6=8D=A2=E8=A1=8C=E4=B8=8E=E6=B5=8F=E8=A7=88=E5=99=A8=E4=B8=8D?= =?UTF-8?q?=E4=B8=80=E8=87=B4=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../SpaceWithPunctuationBreakIterator.java | 30 +++++++- .../text/html/simpleparser/HTMLWorker.java | 5 ++ .../third/v2/lowagie/text/pdf/PdfChunk.java | 40 +++++++--- .../fr/third/v2/lowagie/text/pdf/PdfFont.java | 74 ++++++++++++++----- 4 files changed, 121 insertions(+), 28 deletions(-) diff --git a/fine-itext/src/com/fr/third/v2/lowagie/text/html/SpaceWithPunctuationBreakIterator.java b/fine-itext/src/com/fr/third/v2/lowagie/text/html/SpaceWithPunctuationBreakIterator.java index 94322266b..1d4c7b035 100644 --- a/fine-itext/src/com/fr/third/v2/lowagie/text/html/SpaceWithPunctuationBreakIterator.java +++ b/fine-itext/src/com/fr/third/v2/lowagie/text/html/SpaceWithPunctuationBreakIterator.java @@ -13,16 +13,23 @@ public class SpaceWithPunctuationBreakIterator extends BreakIterator { private int currentPos = -1; private int currentIndex = -1; private boolean[] spaceIndex; + //不作为break分词的字符 + private boolean[] noSwitchIndex; public SpaceWithPunctuationBreakIterator(String text, BreakIterator iterator){ this.iterator = iterator; iterator.setText(text); this.spaceIndex = new boolean[text.length()]; + this.noSwitchIndex = new boolean[text.length()]; int ilen = text.length() - 1; if(ilen > 0) { for (int i = 0; i < ilen; i++) { char c = text.charAt(i); - spaceIndex[i + 1] = (c == ' ' && isPunctuation(text.charAt(i + 1)) )|| c == '-' || c == '\u2010' || c== '\n'; + //中文的标点符号都是可以直接断开的 + spaceIndex[i + 1] = (c == ' ' && isPunctuation(text.charAt(i + 1)) )|| c == '-' || c == '\u2010' || c== '\n'|| isChinesePunctuation( c); + //需要保证下一个字符不是中文,下一个字符如果是中文的话,允许分行 + char nextC = text.charAt(i+1); + noSwitchIndex[i + 1] = (c=='/' || c == '.' || c == ':' || c == ';') && !isChinese(nextC); } } } @@ -32,6 +39,23 @@ public class SpaceWithPunctuationBreakIterator extends BreakIterator { return code == 24 || code == 20 || code == 21 || code == 22 || code == 23; } + public boolean isChinese(char c){ + return c >= 0x4E00 && c <= 0x9FBF; + } + + // 根据UnicodeBlock方法判断中文标点符号 + public boolean isChinesePunctuation(char c) { + Character.UnicodeBlock ub = Character.UnicodeBlock.of(c); + if (ub == Character.UnicodeBlock.GENERAL_PUNCTUATION + || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION + || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS + || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_FORMS) { + return true; + } else { + return false; + } + } + public int first() { throw new UnsupportedOperationException(); } @@ -48,6 +72,10 @@ public class SpaceWithPunctuationBreakIterator extends BreakIterator { if(currentIndex == currentPos) { currentPos = this.iterator.next(); } + if (currentPos > -1 && currentPos < noSwitchIndex.length && noSwitchIndex[currentPos]) { + currentIndex = currentPos; + return this.next(); + } for(int i = currentIndex + 1; i < currentPos; i++){ if(spaceIndex[i]){ currentIndex = i; diff --git a/fine-itext/src/com/fr/third/v2/lowagie/text/html/simpleparser/HTMLWorker.java b/fine-itext/src/com/fr/third/v2/lowagie/text/html/simpleparser/HTMLWorker.java index 7e9254e71..245155736 100644 --- a/fine-itext/src/com/fr/third/v2/lowagie/text/html/simpleparser/HTMLWorker.java +++ b/fine-itext/src/com/fr/third/v2/lowagie/text/html/simpleparser/HTMLWorker.java @@ -50,6 +50,7 @@ package com.fr.third.v2.lowagie.text.html.simpleparser; +import com.fr.third.v2.lowagie.text.pdf.PdfFont; import java.awt.image.BufferedImage; import java.io.ByteArrayInputStream; import java.io.File; @@ -159,6 +160,10 @@ public class HTMLWorker implements SimpleXMLDocHandler, DocListener { return parseToList(reader, style, null); } + public static void initDefaultFont(String fontName) { + PdfFont.initDefaultFont(fontName); + } + public static ArrayList parseToList(Reader reader, StyleSheet style, HashMap interfaceProps) throws IOException { HTMLWorker worker = new HTMLWorker(null); diff --git a/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfChunk.java b/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfChunk.java index 8d014cd80..8d12c2a14 100644 --- a/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfChunk.java +++ b/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfChunk.java @@ -53,6 +53,7 @@ import java.awt.Color; import java.awt.FontMetrics; import java.util.HashMap; import java.util.Iterator; +import java.util.List; import java.util.Locale; import java.util.Map; @@ -92,6 +93,8 @@ public class PdfChunk { private static final String BREAK_TAG = "
"; + private final static char EMPTY_SYMBOL = ' '; + private boolean breakTag = false; public float getHeight() { @@ -309,7 +312,6 @@ public class PdfChunk { if (splitCharacter == null) splitCharacter = DefaultSplitCharacter.DEFAULT; } - // methods /** Gets the Unicode equivalent to a CID. @@ -362,11 +364,11 @@ public class PdfChunk { // or until the totalWidth is reached int length = value.length(); char valueArray[] = value.toCharArray(); - BreakIterator iterator = BreakIterator.getLineInstance(Locale.getDefault()); - BreakIterator iterator1 = new SpaceWithPunctuationBreakIterator(value, iterator); + BreakIterator iterator = new SpaceWithPunctuationBreakIterator(value, BreakIterator.getLineInstance(Locale.getDefault())); char character = 0; + boolean hasEmptySymbolEndOfLine = false; //行末有空格存在 ps:不存在连续空格键 while (currentPosition < length) { - int next = iterator1.next(); + int next = iterator.next(); if(next < 1){ break; } @@ -386,11 +388,17 @@ public class PdfChunk { } String substring = value.substring(start, next); currentWidth += font.width(substring); - if (currentWidth + indent.getRight() > width){ - currentPosition = start - 1; + if (currentWidth + indent.getRight() > width) { + if (dealWithEmptySymbol(substring, currentWidth + indent.getRight(), width)) { + //行末空格(加上该空格大于限制的行宽,减去则小于限制的行宽)、需要去掉该空格,不然下划线、删除线什么的会变长 + //该空格不能留给下一行 + hasEmptySymbolEndOfLine=true; + start = next; + } else { + currentPosition = start - 1; + } break; } - start = next; } @@ -401,12 +409,26 @@ public class PdfChunk { } // otherwise, the string has to be truncated String returnValue = value.substring(start); - value = value.substring(0, start); + value = value.substring(0, start - (hasEmptySymbolEndOfLine ? 1 : 0)); PdfChunk pc = new PdfChunk(returnValue, this); return pc; } -/** + /** + * @param text 文本 + * @param totalWidth 已经处理过的文本和当前文本的宽度和 + * @param lineWidth 行宽 + * @return true : 去掉text末尾的空格后小于行宽 + */ + private boolean dealWithEmptySymbol(String text, float totalWidth, float lineWidth) { + if (null == text || 0 == text.length()) { + return false; + } + //HTML解析后不存在连续多个空格键存在的情况,因此只需去除末尾的空格键 + return text.charAt(text.length() - 1) == EMPTY_SYMBOL && totalWidth - getFont().width(EMPTY_SYMBOL) < lineWidth; + } + + /** * Truncates this PdfChunk if it's too long for the given width. *

* Returns null if the PdfChunk wasn't truncated. diff --git a/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java b/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java index 5e5f7b675..31592eed7 100644 --- a/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java +++ b/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java @@ -77,11 +77,12 @@ import java.awt.geom.AffineTransform; */ public class PdfFont implements Comparable { - private static final int ONE_THOUSAND = 1000 ; private Font oriFont; - public static int SCALE = 100; + public static float SCALE = 100; + + private static String DEFAULT_FONT_NAME = ""; /** the font metrics. */ // private BaseFont font; @@ -101,6 +102,12 @@ public class PdfFont implements Comparable { this.oriFont = oriFont; } + public static void initDefaultFont(String fontName) { + if ("" == DEFAULT_FONT_NAME && null != fontName) { + DEFAULT_FONT_NAME = fontName; + } + } + // methods /** @@ -160,9 +167,14 @@ public class PdfFont implements Comparable { private FontMetrics metrics; - private FontMetrics getMetrics() { - if (null == metrics) { - metrics = FontDesignMetrics.getMetrics(getAwtFont(SCALE)); + private java.awt.Font scaleFont; + + private java.awt.Font scaleDefaultFont; + + private FontMetrics getMetrics(java.awt.Font font) { + if (null == metrics || !font.equals(metrics.getFont())) { + metrics = FontDesignMetrics.getMetrics(font); + return metrics; } return metrics; } @@ -174,16 +186,27 @@ public class PdfFont implements Comparable { * @return a width in Text Space */ - float width(int character) { - return image == null ? getMetrics().charWidth(replaceNbsp(character))/SCALE : image.getScaledWidth(); - } - - float width(String s) { - return image == null ? getMetrics().stringWidth(replaceNbsp(s))/SCALE : image.getScaledWidth(); + public float width(int character) { + if (null != image) { + return image.getScaledWidth(); + } + java.awt.Font font = getScaleAwtFont(); + font = font.canDisplay(character) ? font : getScaleDefaultAwtFont(); + return getMetrics(font).charWidth(replaceNbsp(character)) / SCALE; } - String replaceNbsp(String str) { - return canDisplayNbsp() ? str : str.replaceAll(String.valueOf((char) 160), String.valueOf((char) 32)); + public float width(String s) { + if (null != image) { + return image.getScaledWidth(); + } + if (null == s) { + return 0f; + } + float num = 0f; + for (int i = 0; i < s.length(); i++) { + num += width(s.charAt(i)); + } + return num; } int replaceNbsp(int character) { @@ -191,20 +214,35 @@ public class PdfFont implements Comparable { } private boolean canDisplayNbsp() { - return getAwtFont().canDisplay((char) 160); + return getScaleAwtFont().canDisplay((char) 160); } BaseFont getFont() { - return oriFont.getCalculatedBaseFont(false); + return oriFont.getCalculatedBaseFont(false); } public java.awt.Font getAwtFont() { - return getAwtFont(1); + return getAwtFont(oriFont.getFontName(), 1f); + } + + private java.awt.Font getScaleAwtFont() { + if (null == scaleFont) { + scaleFont = getAwtFont(oriFont.getFontName(), SCALE); + } + return scaleFont; } - private java.awt.Font getAwtFont(int scale) { + private java.awt.Font getScaleDefaultAwtFont() { + if (null == scaleDefaultFont) { + scaleDefaultFont = getAwtFont(DEFAULT_FONT_NAME, SCALE); + } + return scaleDefaultFont; + } + + + private java.awt.Font getAwtFont(String fontName,float scale) { Map attrMap = new HashMap(4); - attrMap.put(TextAttribute.FAMILY, oriFont.getFontName()); + attrMap.put(TextAttribute.FAMILY, fontName); attrMap.put(TextAttribute.SIZE, new Float(oriFont.getSize() * scale)); if (oriFont.isBold()) { attrMap.put(TextAttribute.WEIGHT, TextAttribute.WEIGHT_BOLD);