Browse Source

REPORT-14598 pdf HTML导出换行与浏览器不一致问题

release/10.0
Hugh.C 5 years ago
parent
commit
7255663d3b
  1. 30
      fine-itext/src/com/fr/third/v2/lowagie/text/html/SpaceWithPunctuationBreakIterator.java
  2. 5
      fine-itext/src/com/fr/third/v2/lowagie/text/html/simpleparser/HTMLWorker.java
  3. 38
      fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfChunk.java
  4. 70
      fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java

30
fine-itext/src/com/fr/third/v2/lowagie/text/html/SpaceWithPunctuationBreakIterator.java

@ -13,16 +13,23 @@ public class SpaceWithPunctuationBreakIterator extends BreakIterator {
private int currentPos = -1; private int currentPos = -1;
private int currentIndex = -1; private int currentIndex = -1;
private boolean[] spaceIndex; private boolean[] spaceIndex;
//不作为break分词的字符
private boolean[] noSwitchIndex;
public SpaceWithPunctuationBreakIterator(String text, BreakIterator iterator){ public SpaceWithPunctuationBreakIterator(String text, BreakIterator iterator){
this.iterator = iterator; this.iterator = iterator;
iterator.setText(text); iterator.setText(text);
this.spaceIndex = new boolean[text.length()]; this.spaceIndex = new boolean[text.length()];
this.noSwitchIndex = new boolean[text.length()];
int ilen = text.length() - 1; int ilen = text.length() - 1;
if(ilen > 0) { if(ilen > 0) {
for (int i = 0; i < ilen; i++) { for (int i = 0; i < ilen; i++) {
char c = text.charAt(i); char c = text.charAt(i);
spaceIndex[i + 1] = (c == ' ' && isPunctuation(text.charAt(i + 1)) )|| c == '-' || c == '\u2010' || c== '\n'; //中文的标点符号都是可以直接断开的
spaceIndex[i + 1] = (c == ' ' && isPunctuation(text.charAt(i + 1)) )|| c == '-' || c == '\u2010' || c== '\n'|| isChinesePunctuation( c);
//需要保证下一个字符不是中文,下一个字符如果是中文的话,允许分行
char nextC = text.charAt(i+1);
noSwitchIndex[i + 1] = (c=='/' || c == '.' || c == ':' || c == ';') && !isChinese(nextC);
} }
} }
} }
@ -32,6 +39,23 @@ public class SpaceWithPunctuationBreakIterator extends BreakIterator {
return code == 24 || code == 20 || code == 21 || code == 22 || code == 23; return code == 24 || code == 20 || code == 21 || code == 22 || code == 23;
} }
public boolean isChinese(char c){
return c >= 0x4E00 && c <= 0x9FBF;
}
// 根据UnicodeBlock方法判断中文标点符号
public boolean isChinesePunctuation(char c) {
Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
if (ub == Character.UnicodeBlock.GENERAL_PUNCTUATION
|| ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
|| ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS
|| ub == Character.UnicodeBlock.CJK_COMPATIBILITY_FORMS) {
return true;
} else {
return false;
}
}
public int first() { public int first() {
throw new UnsupportedOperationException(); throw new UnsupportedOperationException();
} }
@ -48,6 +72,10 @@ public class SpaceWithPunctuationBreakIterator extends BreakIterator {
if(currentIndex == currentPos) { if(currentIndex == currentPos) {
currentPos = this.iterator.next(); currentPos = this.iterator.next();
} }
if (currentPos > -1 && currentPos < noSwitchIndex.length && noSwitchIndex[currentPos]) {
currentIndex = currentPos;
return this.next();
}
for(int i = currentIndex + 1; i < currentPos; i++){ for(int i = currentIndex + 1; i < currentPos; i++){
if(spaceIndex[i]){ if(spaceIndex[i]){
currentIndex = i; currentIndex = i;

5
fine-itext/src/com/fr/third/v2/lowagie/text/html/simpleparser/HTMLWorker.java

@ -50,6 +50,7 @@
package com.fr.third.v2.lowagie.text.html.simpleparser; package com.fr.third.v2.lowagie.text.html.simpleparser;
import com.fr.third.v2.lowagie.text.pdf.PdfFont;
import java.awt.image.BufferedImage; import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.File; import java.io.File;
@ -159,6 +160,10 @@ public class HTMLWorker implements SimpleXMLDocHandler, DocListener {
return parseToList(reader, style, null); return parseToList(reader, style, null);
} }
public static void initDefaultFont(String fontName) {
PdfFont.initDefaultFont(fontName);
}
public static ArrayList parseToList(Reader reader, StyleSheet style, public static ArrayList parseToList(Reader reader, StyleSheet style,
HashMap interfaceProps) throws IOException { HashMap interfaceProps) throws IOException {
HTMLWorker worker = new HTMLWorker(null); HTMLWorker worker = new HTMLWorker(null);

38
fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfChunk.java

@ -53,6 +53,7 @@ import java.awt.Color;
import java.awt.FontMetrics; import java.awt.FontMetrics;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
@ -92,6 +93,8 @@ public class PdfChunk {
private static final String BREAK_TAG = "<br>"; private static final String BREAK_TAG = "<br>";
private final static char EMPTY_SYMBOL = ' ';
private boolean breakTag = false; private boolean breakTag = false;
public float getHeight() { public float getHeight() {
@ -309,7 +312,6 @@ public class PdfChunk {
if (splitCharacter == null) if (splitCharacter == null)
splitCharacter = DefaultSplitCharacter.DEFAULT; splitCharacter = DefaultSplitCharacter.DEFAULT;
} }
// methods // methods
/** Gets the Unicode equivalent to a CID. /** Gets the Unicode equivalent to a CID.
@ -362,11 +364,11 @@ public class PdfChunk {
// or until the totalWidth is reached // or until the totalWidth is reached
int length = value.length(); int length = value.length();
char valueArray[] = value.toCharArray(); char valueArray[] = value.toCharArray();
BreakIterator iterator = BreakIterator.getLineInstance(Locale.getDefault()); BreakIterator iterator = new SpaceWithPunctuationBreakIterator(value, BreakIterator.getLineInstance(Locale.getDefault()));
BreakIterator iterator1 = new SpaceWithPunctuationBreakIterator(value, iterator);
char character = 0; char character = 0;
boolean hasEmptySymbolEndOfLine = false; //行末有空格存在 ps:不存在连续空格键
while (currentPosition < length) { while (currentPosition < length) {
int next = iterator1.next(); int next = iterator.next();
if(next < 1){ if(next < 1){
break; break;
} }
@ -386,11 +388,17 @@ public class PdfChunk {
} }
String substring = value.substring(start, next); String substring = value.substring(start, next);
currentWidth += font.width(substring); currentWidth += font.width(substring);
if (currentWidth + indent.getRight() > width){ if (currentWidth + indent.getRight() > width) {
if (dealWithEmptySymbol(substring, currentWidth + indent.getRight(), width)) {
//行末空格(加上该空格大于限制的行宽,减去则小于限制的行宽)、需要去掉该空格,不然下划线、删除线什么的会变长
//该空格不能留给下一行
hasEmptySymbolEndOfLine=true;
start = next;
} else {
currentPosition = start - 1; currentPosition = start - 1;
}
break; break;
} }
start = next; start = next;
} }
@ -401,12 +409,26 @@ public class PdfChunk {
} }
// otherwise, the string has to be truncated // otherwise, the string has to be truncated
String returnValue = value.substring(start); String returnValue = value.substring(start);
value = value.substring(0, start); value = value.substring(0, start - (hasEmptySymbolEndOfLine ? 1 : 0));
PdfChunk pc = new PdfChunk(returnValue, this); PdfChunk pc = new PdfChunk(returnValue, this);
return pc; return pc;
} }
/** /**
* @param text 文本
* @param totalWidth 已经处理过的文本和当前文本的宽度和
* @param lineWidth 行宽
* @return true : 去掉text末尾的空格后小于行宽
*/
private boolean dealWithEmptySymbol(String text, float totalWidth, float lineWidth) {
if (null == text || 0 == text.length()) {
return false;
}
//HTML解析后不存在连续多个空格键存在的情况,因此只需去除末尾的空格键
return text.charAt(text.length() - 1) == EMPTY_SYMBOL && totalWidth - getFont().width(EMPTY_SYMBOL) < lineWidth;
}
/**
* Truncates this <CODE>PdfChunk</CODE> if it's too long for the given width. * Truncates this <CODE>PdfChunk</CODE> if it's too long for the given width.
* <P> * <P>
* Returns <VAR>null</VAR> if the <CODE>PdfChunk</CODE> wasn't truncated. * Returns <VAR>null</VAR> if the <CODE>PdfChunk</CODE> wasn't truncated.

70
fine-itext/src/com/fr/third/v2/lowagie/text/pdf/PdfFont.java

@ -77,11 +77,12 @@ import java.awt.geom.AffineTransform;
*/ */
public class PdfFont implements Comparable { public class PdfFont implements Comparable {
private static final int ONE_THOUSAND = 1000 ;
private Font oriFont; private Font oriFont;
public static int SCALE = 100; public static float SCALE = 100;
private static String DEFAULT_FONT_NAME = "";
/** the font metrics. */ /** the font metrics. */
// private BaseFont font; // private BaseFont font;
@ -101,6 +102,12 @@ public class PdfFont implements Comparable {
this.oriFont = oriFont; this.oriFont = oriFont;
} }
public static void initDefaultFont(String fontName) {
if ("" == DEFAULT_FONT_NAME && null != fontName) {
DEFAULT_FONT_NAME = fontName;
}
}
// methods // methods
/** /**
@ -160,9 +167,14 @@ public class PdfFont implements Comparable {
private FontMetrics metrics; private FontMetrics metrics;
private FontMetrics getMetrics() { private java.awt.Font scaleFont;
if (null == metrics) {
metrics = FontDesignMetrics.getMetrics(getAwtFont(SCALE)); private java.awt.Font scaleDefaultFont;
private FontMetrics getMetrics(java.awt.Font font) {
if (null == metrics || !font.equals(metrics.getFont())) {
metrics = FontDesignMetrics.getMetrics(font);
return metrics;
} }
return metrics; return metrics;
} }
@ -174,16 +186,27 @@ public class PdfFont implements Comparable {
* @return a width in Text Space * @return a width in Text Space
*/ */
float width(int character) { public float width(int character) {
return image == null ? getMetrics().charWidth(replaceNbsp(character))/SCALE : image.getScaledWidth(); if (null != image) {
return image.getScaledWidth();
} }
java.awt.Font font = getScaleAwtFont();
float width(String s) { font = font.canDisplay(character) ? font : getScaleDefaultAwtFont();
return image == null ? getMetrics().stringWidth(replaceNbsp(s))/SCALE : image.getScaledWidth(); return getMetrics(font).charWidth(replaceNbsp(character)) / SCALE;
} }
String replaceNbsp(String str) { public float width(String s) {
return canDisplayNbsp() ? str : str.replaceAll(String.valueOf((char) 160), String.valueOf((char) 32)); if (null != image) {
return image.getScaledWidth();
}
if (null == s) {
return 0f;
}
float num = 0f;
for (int i = 0; i < s.length(); i++) {
num += width(s.charAt(i));
}
return num;
} }
int replaceNbsp(int character) { int replaceNbsp(int character) {
@ -191,7 +214,7 @@ public class PdfFont implements Comparable {
} }
private boolean canDisplayNbsp() { private boolean canDisplayNbsp() {
return getAwtFont().canDisplay((char) 160); return getScaleAwtFont().canDisplay((char) 160);
} }
BaseFont getFont() { BaseFont getFont() {
@ -199,12 +222,27 @@ public class PdfFont implements Comparable {
} }
public java.awt.Font getAwtFont() { public java.awt.Font getAwtFont() {
return getAwtFont(1); return getAwtFont(oriFont.getFontName(), 1f);
}
private java.awt.Font getScaleAwtFont() {
if (null == scaleFont) {
scaleFont = getAwtFont(oriFont.getFontName(), SCALE);
}
return scaleFont;
} }
private java.awt.Font getAwtFont(int scale) { private java.awt.Font getScaleDefaultAwtFont() {
if (null == scaleDefaultFont) {
scaleDefaultFont = getAwtFont(DEFAULT_FONT_NAME, SCALE);
}
return scaleDefaultFont;
}
private java.awt.Font getAwtFont(String fontName,float scale) {
Map attrMap = new HashMap(4); Map attrMap = new HashMap(4);
attrMap.put(TextAttribute.FAMILY, oriFont.getFontName()); attrMap.put(TextAttribute.FAMILY, fontName);
attrMap.put(TextAttribute.SIZE, new Float(oriFont.getSize() * scale)); attrMap.put(TextAttribute.SIZE, new Float(oriFont.getSize() * scale));
if (oriFont.isBold()) { if (oriFont.isBold()) {
attrMap.put(TextAttribute.WEIGHT, TextAttribute.WEIGHT_BOLD); attrMap.put(TextAttribute.WEIGHT, TextAttribute.WEIGHT_BOLD);

Loading…
Cancel
Save