From 7255663d3b2d93d62c9482e7eda7555c11358bec Mon Sep 17 00:00:00 2001 From: "Hugh.C" Date: Tue, 11 Feb 2020 11:51:14 +0800 Subject: [PATCH] =?UTF-8?q?REPORT-14598=20pdf=20HTML=E5=AF=BC=E5=87=BA?= =?UTF-8?q?=E6=8D=A2=E8=A1=8C=E4=B8=8E=E6=B5=8F=E8=A7=88=E5=99=A8=E4=B8=8D?= =?UTF-8?q?=E4=B8=80=E8=87=B4=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../SpaceWithPunctuationBreakIterator.java | 30 +++++++- .../text/html/simpleparser/HTMLWorker.java | 5 ++ .../third/v2/lowagie/text/pdf/PdfChunk.java | 40 +++++++--- .../fr/third/v2/lowagie/text/pdf/PdfFont.java | 74 ++++++++++++++----- 4 files changed, 121 insertions(+), 28 deletions(-) diff --git a/fine-itext/src/com/fr/third/v2/lowagie/text/html/SpaceWithPunctuationBreakIterator.java b/fine-itext/src/com/fr/third/v2/lowagie/text/html/SpaceWithPunctuationBreakIterator.java index 94322266b..1d4c7b035 100644 --- a/fine-itext/src/com/fr/third/v2/lowagie/text/html/SpaceWithPunctuationBreakIterator.java +++ b/fine-itext/src/com/fr/third/v2/lowagie/text/html/SpaceWithPunctuationBreakIterator.java @@ -13,16 +13,23 @@ public class SpaceWithPunctuationBreakIterator extends BreakIterator { private int currentPos = -1; private int currentIndex = -1; private boolean[] spaceIndex; + //不作为break分词的字符 + private boolean[] noSwitchIndex; public SpaceWithPunctuationBreakIterator(String text, BreakIterator iterator){ this.iterator = iterator; iterator.setText(text); this.spaceIndex = new boolean[text.length()]; + this.noSwitchIndex = new boolean[text.length()]; int ilen = text.length() - 1; if(ilen > 0) { for (int i = 0; i < ilen; i++) { char c = text.charAt(i); - spaceIndex[i + 1] = (c == ' ' && isPunctuation(text.charAt(i + 1)) )|| c == '-' || c == '\u2010' || c== '\n'; + //中文的标点符号都是可以直接断开的 + spaceIndex[i + 1] = (c == ' ' && isPunctuation(text.charAt(i + 1)) )|| c == '-' || c == '\u2010' || c== '\n'|| isChinesePunctuation( c); + //需要保证下一个字符不是中文,下一个字符如果是中文的话,允许分行 + char nextC = text.charAt(i+1); + noSwitchIndex[i + 1] = (c=='/' || c == '.' || c == ':' || c == ';') && !isChinese(nextC); } } } @@ -32,6 +39,23 @@ public class SpaceWithPunctuationBreakIterator extends BreakIterator { return code == 24 || code == 20 || code == 21 || code == 22 || code == 23; } + public boolean isChinese(char c){ + return c >= 0x4E00 && c <= 0x9FBF; + } + + // 根据UnicodeBlock方法判断中文标点符号 + public boolean isChinesePunctuation(char c) { + Character.UnicodeBlock ub = Character.UnicodeBlock.of(c); + if (ub == Character.UnicodeBlock.GENERAL_PUNCTUATION + || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION + || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS + || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_FORMS) { + return true; + } else { + return false; + } + } + public int first() { throw new UnsupportedOperationException(); } @@ -48,6 +72,10 @@ public class SpaceWithPunctuationBreakIterator extends BreakIterator { if(currentIndex == currentPos) { currentPos = this.iterator.next(); } + if (currentPos > -1 && currentPos < noSwitchIndex.length && noSwitchIndex[currentPos]) { + currentIndex = currentPos; + return this.next(); + } for(int i = currentIndex + 1; i < currentPos; i++){ if(spaceIndex[i]){ currentIndex = i; diff --git a/fine-itext/src/com/fr/third/v2/lowagie/text/html/simpleparser/HTMLWorker.java b/fine-itext/src/com/fr/third/v2/lowagie/text/html/simpleparser/HTMLWorker.java index 7e9254e71..245155736 100644 --- a/fine-itext/src/com/fr/third/v2/lowagie/text/html/simpleparser/HTMLWorker.java +++ b/fine-itext/src/com/fr/third/v2/lowagie/text/html/simpleparser/HTMLWorker.java @@ -50,6 +50,7 @@ package com.fr.third.v2.lowagie.text.html.simpleparser; +import com.fr.third.v2.lowagie.text.pdf.PdfFont; import java.awt.image.BufferedImage; import java.io.ByteArrayInputStream; import java.io.File; @@ -159,6 +160,10 @@ public class HTMLWorker implements SimpleXMLDocHandler, DocListener { return parseToList(reader, style, null); } + public static void initDefaultFont(String fontName) { + PdfFont.initDefaultFont(fontName); + } + public static ArrayList parseToList(Reader reader, StyleSheet style, HashMap interfaceProps) throws IOException { HTMLWorker worker = new HTMLWorker(null); diff --git a/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfChunk.java b/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfChunk.java index 8d014cd80..8d12c2a14 100644 --- a/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfChunk.java +++ b/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfChunk.java @@ -53,6 +53,7 @@ import java.awt.Color; import java.awt.FontMetrics; import java.util.HashMap; import java.util.Iterator; +import java.util.List; import java.util.Locale; import java.util.Map; @@ -92,6 +93,8 @@ public class PdfChunk { private static final String BREAK_TAG = "
"; + private final static char EMPTY_SYMBOL = ' '; + private boolean breakTag = false; public float getHeight() { @@ -309,7 +312,6 @@ public class PdfChunk { if (splitCharacter == null) splitCharacter = DefaultSplitCharacter.DEFAULT; } - // methods /** Gets the Unicode equivalent to a CID. @@ -362,11 +364,11 @@ public class PdfChunk { // or until the totalWidth is reached int length = value.length(); char valueArray[] = value.toCharArray(); - BreakIterator iterator = BreakIterator.getLineInstance(Locale.getDefault()); - BreakIterator iterator1 = new SpaceWithPunctuationBreakIterator(value, iterator); + BreakIterator iterator = new SpaceWithPunctuationBreakIterator(value, BreakIterator.getLineInstance(Locale.getDefault())); char character = 0; + boolean hasEmptySymbolEndOfLine = false; //行末有空格存在 ps:不存在连续空格键 while (currentPosition < length) { - int next = iterator1.next(); + int next = iterator.next(); if(next < 1){ break; } @@ -386,11 +388,17 @@ public class PdfChunk { } String substring = value.substring(start, next); currentWidth += font.width(substring); - if (currentWidth + indent.getRight() > width){ - currentPosition = start - 1; + if (currentWidth + indent.getRight() > width) { + if (dealWithEmptySymbol(substring, currentWidth + indent.getRight(), width)) { + //行末空格(加上该空格大于限制的行宽,减去则小于限制的行宽)、需要去掉该空格,不然下划线、删除线什么的会变长 + //该空格不能留给下一行 + hasEmptySymbolEndOfLine=true; + start = next; + } else { + currentPosition = start - 1; + } break; } - start = next; } @@ -401,12 +409,26 @@ public class PdfChunk { } // otherwise, the string has to be truncated String returnValue = value.substring(start); - value = value.substring(0, start); + value = value.substring(0, start - (hasEmptySymbolEndOfLine ? 1 : 0)); PdfChunk pc = new PdfChunk(returnValue, this); return pc; } -/** + /** + * @param text 文本 + * @param totalWidth 已经处理过的文本和当前文本的宽度和 + * @param lineWidth 行宽 + * @return true : 去掉text末尾的空格后小于行宽 + */ + private boolean dealWithEmptySymbol(String text, float totalWidth, float lineWidth) { + if (null == text || 0 == text.length()) { + return false; + } + //HTML解析后不存在连续多个空格键存在的情况,因此只需去除末尾的空格键 + return text.charAt(text.length() - 1) == EMPTY_SYMBOL && totalWidth - getFont().width(EMPTY_SYMBOL) < lineWidth; + } + + /** * Truncates this PdfChunk if it's too long for the given width. *

* Returns null if the PdfChunk wasn't truncated. diff --git a/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java b/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java index 5e5f7b675..31592eed7 100644 --- a/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java +++ b/fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java @@ -77,11 +77,12 @@ import java.awt.geom.AffineTransform; */ public class PdfFont implements Comparable { - private static final int ONE_THOUSAND = 1000 ; private Font oriFont; - public static int SCALE = 100; + public static float SCALE = 100; + + private static String DEFAULT_FONT_NAME = ""; /** the font metrics. */ // private BaseFont font; @@ -101,6 +102,12 @@ public class PdfFont implements Comparable { this.oriFont = oriFont; } + public static void initDefaultFont(String fontName) { + if ("" == DEFAULT_FONT_NAME && null != fontName) { + DEFAULT_FONT_NAME = fontName; + } + } + // methods /** @@ -160,9 +167,14 @@ public class PdfFont implements Comparable { private FontMetrics metrics; - private FontMetrics getMetrics() { - if (null == metrics) { - metrics = FontDesignMetrics.getMetrics(getAwtFont(SCALE)); + private java.awt.Font scaleFont; + + private java.awt.Font scaleDefaultFont; + + private FontMetrics getMetrics(java.awt.Font font) { + if (null == metrics || !font.equals(metrics.getFont())) { + metrics = FontDesignMetrics.getMetrics(font); + return metrics; } return metrics; } @@ -174,16 +186,27 @@ public class PdfFont implements Comparable { * @return a width in Text Space */ - float width(int character) { - return image == null ? getMetrics().charWidth(replaceNbsp(character))/SCALE : image.getScaledWidth(); - } - - float width(String s) { - return image == null ? getMetrics().stringWidth(replaceNbsp(s))/SCALE : image.getScaledWidth(); + public float width(int character) { + if (null != image) { + return image.getScaledWidth(); + } + java.awt.Font font = getScaleAwtFont(); + font = font.canDisplay(character) ? font : getScaleDefaultAwtFont(); + return getMetrics(font).charWidth(replaceNbsp(character)) / SCALE; } - String replaceNbsp(String str) { - return canDisplayNbsp() ? str : str.replaceAll(String.valueOf((char) 160), String.valueOf((char) 32)); + public float width(String s) { + if (null != image) { + return image.getScaledWidth(); + } + if (null == s) { + return 0f; + } + float num = 0f; + for (int i = 0; i < s.length(); i++) { + num += width(s.charAt(i)); + } + return num; } int replaceNbsp(int character) { @@ -191,20 +214,35 @@ public class PdfFont implements Comparable { } private boolean canDisplayNbsp() { - return getAwtFont().canDisplay((char) 160); + return getScaleAwtFont().canDisplay((char) 160); } BaseFont getFont() { - return oriFont.getCalculatedBaseFont(false); + return oriFont.getCalculatedBaseFont(false); } public java.awt.Font getAwtFont() { - return getAwtFont(1); + return getAwtFont(oriFont.getFontName(), 1f); + } + + private java.awt.Font getScaleAwtFont() { + if (null == scaleFont) { + scaleFont = getAwtFont(oriFont.getFontName(), SCALE); + } + return scaleFont; } - private java.awt.Font getAwtFont(int scale) { + private java.awt.Font getScaleDefaultAwtFont() { + if (null == scaleDefaultFont) { + scaleDefaultFont = getAwtFont(DEFAULT_FONT_NAME, SCALE); + } + return scaleDefaultFont; + } + + + private java.awt.Font getAwtFont(String fontName,float scale) { Map attrMap = new HashMap(4); - attrMap.put(TextAttribute.FAMILY, oriFont.getFontName()); + attrMap.put(TextAttribute.FAMILY, fontName); attrMap.put(TextAttribute.SIZE, new Float(oriFont.getSize() * scale)); if (oriFont.isBold()) { attrMap.put(TextAttribute.WEIGHT, TextAttribute.WEIGHT_BOLD);