Browse Source

* xlsx存在隐藏字符时需要忽略,确保和展示看到的一样

pull/3168/head
Jiaju Zhuang 2 years ago
parent
commit
73b65d29a4
  1. 26
      easyexcel-core/src/main/java/com/alibaba/excel/analysis/v07/handlers/sax/SharedStringsTableHandler.java
  2. 14
      easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/compatibility/CompatibilityTest.java
  3. 38
      easyexcel-test/src/test/java/com/alibaba/easyexcel/test/demo/rare/ReadTest.java
  4. 0
      easyexcel-test/src/test/resources/compatibility/t09.xlsx
  5. 1
      update.md

26
easyexcel-core/src/main/java/com/alibaba/excel/analysis/v07/handlers/sax/SharedStringsTableHandler.java

@ -1,13 +1,31 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package com.alibaba.excel.analysis.v07.handlers.sax;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.alibaba.excel.cache.ReadCache;
import com.alibaba.excel.constant.ExcelXmlConstants;
import org.xml.sax.Attributes;
import org.xml.sax.helpers.DefaultHandler;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Sax read sharedStringsTable.xml
*
@ -146,7 +164,7 @@ public class SharedStringsTableHandler extends DefaultHandler {
String code = m.group(1);
int icode = Integer.decode("0x" + code);
buf.append((char) icode);
buf.append((char)icode);
idx = m.end();
}

14
easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/compatibility/CompatibilityTest.java

@ -148,6 +148,20 @@ public class CompatibilityTest {
Assert.assertEquals(10L, list.size());
}
@Test
public void t09() {
// `SH_x005f_x000D_Z002` exists in `ShardingString.xml` and needs to be replaced by: `SH_x000D_Z002`
File file = TestFileUtil.readFile("compatibility/t09.xlsx");
List<Map<Integer, Object>> list = EasyExcel.read(file)
.headRowNumber(0)
.sheet()
.doReadSync();
log.info("data:{}", JSON.toJSONString(list));
Assert.assertEquals(1, list.size());
Assert.assertEquals("SH_x000D_Z002", list.get(0).get(0));
}
private List<SimpleData> data() {
List<SimpleData> list = new ArrayList<SimpleData>();
for (int i = 0; i < 10; i++) {

38
easyexcel-test/src/test/java/com/alibaba/easyexcel/test/demo/rare/ReadTest.java

@ -1,38 +0,0 @@
package com.alibaba.easyexcel.test.demo.rare;
import com.alibaba.easyexcel.test.util.TestFileUtil;
import com.alibaba.excel.EasyExcel;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.junit.Assert;
import org.junit.Test;
import java.util.List;
import java.util.Map;
/**
* 记录一些不太常见的案例
*
* @author gxz gongxuanzhang@foxmail.com
**/
public class ReadTest {
/**
* 当excel有需要转义的 如x005特殊符号时需要通过utf decode解码
**/
@Test
public void readX005() throws Exception {
String fileName = TestFileUtil.pathBuild().sub("temp").sub("utfdecode").sub("demo.xlsx").getPath();
XSSFWorkbook xssfWorkbook = new XSSFWorkbook(fileName);
XSSFSheet xssfSheet = xssfWorkbook.getSheetAt(0);
XSSFRow row = xssfSheet.getRow(0);
String poiValue = row.getCell(0).getStringCellValue();
List<Map<Integer, Object>> list = EasyExcel.read(fileName)
.sheet(0)
.headRowNumber(0).doReadSync();
Map<Integer, Object> easyExcelRow = list.get(0);
Assert.assertEquals(easyExcelRow.get(0).toString(), poiValue);
}
}

0
easyexcel-test/src/test/resources/temp/utfdecode/demo.xlsx → easyexcel-test/src/test/resources/compatibility/t09.xlsx

1
update.md

@ -2,6 +2,7 @@
* 读csv会忽略BOM数据 [Issue #3137](https://github.com/alibaba/easyexcel/issues/3137)
* 解决csv用office打开乱码的问题,写csv默认带上BOM数据
* xlsx存在隐藏字符时需要忽略,确保和展示看到的一样
# 3.2.1

Loading…
Cancel
Save