From 8e74c0324a9006a544f1eb74d7fe5c87a6aebe57 Mon Sep 17 00:00:00 2001 From: "Joe.Jiang" Date: Fri, 2 Apr 2021 14:15:53 +0800 Subject: [PATCH] =?UTF-8?q?REPORT-50649=20=E5=AF=BC=E5=87=BA-=E7=94=9F?= =?UTF-8?q?=E5=83=BB=E5=AD=97=E5=AF=BC=E5=87=BA=E9=97=AE=E9=A2=98=20Excel?= =?UTF-8?q?=E5=AF=BC=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../poi/xssf/streaming/SheetDataWriter.java | 16 ++++++- .../org/apache/xmlbeans/impl/store/Saver.java | 46 +++++++------------ 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/fine-poi/src/main/java/com/fr/third/v2/org/apache/poi/xssf/streaming/SheetDataWriter.java b/fine-poi/src/main/java/com/fr/third/v2/org/apache/poi/xssf/streaming/SheetDataWriter.java index 415c4f2e4..7a21b80a5 100644 --- a/fine-poi/src/main/java/com/fr/third/v2/org/apache/poi/xssf/streaming/SheetDataWriter.java +++ b/fine-poi/src/main/java/com/fr/third/v2/org/apache/poi/xssf/streaming/SheetDataWriter.java @@ -377,14 +377,22 @@ public class SheetDataWriter { default: // YK: XmlBeans silently replaces all ISO control characters ( < 32) with question marks. // the same rule applies to unicode surrogates and "not a character" symbols. - if( c < ' ' || Character.isLowSurrogate(c) || Character.isHighSurrogate(c) || - ('\uFFFE' <= c && c <= '\uFFFF')) { + // Excel SXSSF deals with special chars. + // https://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/streaming/SheetDataWriter.java?r1=1791720&r2=1800705&pathrev=1800705&diff_format=f + if (replaceWithQuestionMark(c)) { if (counter > last) { sb.append(chars, last, counter - last); } sb.append('?'); last = counter + 1; } + else if (Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) { + if (counter > last) { + sb.append(chars, last, counter - last); + } + sb.append(c); + last = counter + 1; + } else if (c > 127) { if (counter > last) { sb.append(chars, last, counter - last); @@ -412,4 +420,8 @@ public class SheetDataWriter { _out.close(); return _fd.delete(); } + + private boolean replaceWithQuestionMark(char c) { + return c < ' ' || '\uFFFE' <= c; + } } diff --git a/fine-poi/src/main/java/com/fr/third/v2/org/apache/xmlbeans/impl/store/Saver.java b/fine-poi/src/main/java/com/fr/third/v2/org/apache/xmlbeans/impl/store/Saver.java index 6f02763ca..b20265de0 100644 --- a/fine-poi/src/main/java/com/fr/third/v2/org/apache/xmlbeans/impl/store/Saver.java +++ b/fine-poi/src/main/java/com/fr/third/v2/org/apache/xmlbeans/impl/store/Saver.java @@ -273,6 +273,23 @@ abstract class Saver end.toEnd(); } + /** + * Test if a character is valid in xml character content. See + * http://www.w3.org/TR/REC-xml#NT-Char + * Excel XSSF deals with special chars. + * https://svn.apache.org/viewvc/xmlbeans/trunk/src/store/org/apache/xmlbeans/impl/store/Saver.java?r1=1025773&r2=1832298&pathrev=1832298&diff_format=h + */ + static boolean isBadChar ( char ch ) + { + return ! ( + Character.isHighSurrogate(ch) || + Character.isLowSurrogate(ch) || + (ch >= 0x20 && ch <= 0xD7FF) || + (ch >= 0xE000 && ch <= 0xFFFD) || + (ch == 0x9) || (ch == 0xA) || (ch == 0xD) + ); + } + protected boolean saveNamespacesFirst ( ) { return _saveNamespacesFirst; @@ -1548,21 +1565,6 @@ abstract class Saver } } - /** - * Test if a character is valid in xml character content. See - * http://www.w3.org/TR/REC-xml#NT-Char - */ - - private boolean isBadChar ( char ch ) - { - return ! ( - (ch >= 0x20 && ch <= 0xD7FF ) || - (ch >= 0xE000 && ch <= 0xFFFD) || - (ch >= 0x10000 && ch <= 0x10FFFF) || - (ch == 0x9) || (ch == 0xA) || (ch == 0xD) - ); - } - /** * Test if a character is to be replaced with an escaped value */ @@ -2187,20 +2189,6 @@ abstract class Saver } } - /** - * Test if a character is valid in xml character content. See - * http://www.w3.org/TR/REC-xml#NT-Char - */ - private boolean isBadChar ( char ch ) - { - return ! ( - (ch >= 0x20 && ch <= 0xD7FF ) || - (ch >= 0xE000 && ch <= 0xFFFD) || - (ch >= 0x10000 && ch <= 0x10FFFF) || - (ch == 0x9) || (ch == 0xA) || (ch == 0xD) - ); - } - private void emitLiteral ( String literal ) { // TODO: systemId production http://www.w3.org/TR/REC-xml/#NT-SystemLiteral