diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/analysis/v07/handlers/sax/SharedStringsTableHandler.java b/easyexcel-core/src/main/java/com/alibaba/excel/analysis/v07/handlers/sax/SharedStringsTableHandler.java
index 203db6c7..169429de 100644
--- a/easyexcel-core/src/main/java/com/alibaba/excel/analysis/v07/handlers/sax/SharedStringsTableHandler.java
+++ b/easyexcel-core/src/main/java/com/alibaba/excel/analysis/v07/handlers/sax/SharedStringsTableHandler.java
@@ -1,10 +1,12 @@
package com.alibaba.excel.analysis.v07.handlers.sax;
+import com.alibaba.excel.cache.ReadCache;
+import com.alibaba.excel.constant.ExcelXmlConstants;
import org.xml.sax.Attributes;
import org.xml.sax.helpers.DefaultHandler;
-import com.alibaba.excel.cache.ReadCache;
-import com.alibaba.excel.constant.ExcelXmlConstants;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
/**
* Sax read sharedStringsTable.xml
@@ -13,6 +15,8 @@ import com.alibaba.excel.constant.ExcelXmlConstants;
*/
public class SharedStringsTableHandler extends DefaultHandler {
+ private static final Pattern UTF_PATTTERN = Pattern.compile("_x([0-9A-Fa-f]{4})_");
+
/**
* The final piece of data
*/
@@ -86,7 +90,7 @@ public class SharedStringsTableHandler extends DefaultHandler {
if (currentData == null) {
readCache.put(null);
} else {
- readCache.put(currentData.toString());
+ readCache.put(utfDecode(currentData.toString()));
}
break;
case ExcelXmlConstants.SHAREDSTRINGS_RPH_TAG:
@@ -109,4 +113,51 @@ public class SharedStringsTableHandler extends DefaultHandler {
}
currentElementData.append(ch, start, length);
}
+
+ /**
+ * from poi XSSFRichTextString
+ *
+ * @param value the string to decode
+ * @return the decoded string or null if the input string is null
+ *
+ * For all characters which cannot be represented in XML as defined by the XML 1.0 specification,
+ * the characters are escaped using the Unicode numerical character representation escape character
+ * format _xHHHH_, where H represents a hexadecimal character in the character's value.
+ *
+ * Example: The Unicode character 0D is invalid in an XML 1.0 document,
+ * so it shall be escaped as _x000D_
.
+ *
+ * See section 3.18.9 in the OOXML spec.
+ * @see org.apache.poi.xssf.usermodel.XSSFRichTextString#utfDecode(String)
+ */
+ static String utfDecode(String value) {
+ if (value == null || !value.contains("_x")) {
+ return value;
+ }
+
+ StringBuilder buf = new StringBuilder();
+ Matcher m = UTF_PATTTERN.matcher(value);
+ int idx = 0;
+ while (m.find()) {
+ int pos = m.start();
+ if (pos > idx) {
+ buf.append(value, idx, pos);
+ }
+
+ String code = m.group(1);
+ int icode = Integer.decode("0x" + code);
+ buf.append((char) icode);
+
+ idx = m.end();
+ }
+
+ // small optimization: don't go via StringBuilder if not necessary,
+ // the encodings are very rare, so we should almost always go via this shortcut.
+ if (idx == 0) {
+ return value;
+ }
+
+ buf.append(value.substring(idx));
+ return buf.toString();
+ }
}
diff --git a/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/demo/rare/ReadTest.java b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/demo/rare/ReadTest.java
new file mode 100644
index 00000000..c1e614ea
--- /dev/null
+++ b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/demo/rare/ReadTest.java
@@ -0,0 +1,41 @@
+package com.alibaba.easyexcel.test.demo.rare;
+
+import com.alibaba.easyexcel.test.util.TestFileUtil;
+import com.alibaba.excel.EasyExcel;
+import org.apache.poi.xssf.usermodel.XSSFRow;
+import org.apache.poi.xssf.usermodel.XSSFSheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.File;
+import java.util.List;
+import java.util.Map;
+
+/**
+ *
+ * 记录一些不太常见的案例
+ * @author gxz gongxuanzhang@foxmail.com
+ **/
+public class ReadTest {
+
+
+ /**
+ * 当excel有需要转义的 如x005特殊符号时需要通过utf decode解码
+ *
+ **/
+ @Test
+ public void readX005() throws Exception{
+ String fileName = TestFileUtil.pathBuild().sub("temp").sub("utfdecode").sub("demo.xlsx").getPath();
+ XSSFWorkbook xssfWorkbook = new XSSFWorkbook(fileName);
+ XSSFSheet xssfSheet = xssfWorkbook.getSheetAt(0);
+ XSSFRow row = xssfSheet.getRow(0);
+ String poiValue = row.getCell(0).getStringCellValue();
+ List