From 544682f146ab3cf71175e0b609c23f61e2d5a813 Mon Sep 17 00:00:00 2001
From: Lilac Coral
Date: Tue, 14 Feb 2023 08:53:18 +0800
Subject: [PATCH 01/53] Update ReadListener.java
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
修改注释拼写
---
.../main/java/com/alibaba/excel/read/listener/ReadListener.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/read/listener/ReadListener.java b/easyexcel-core/src/main/java/com/alibaba/excel/read/listener/ReadListener.java
index b56f8a56..79899153 100644
--- a/easyexcel-core/src/main/java/com/alibaba/excel/read/listener/ReadListener.java
+++ b/easyexcel-core/src/main/java/com/alibaba/excel/read/listener/ReadListener.java
@@ -36,7 +36,7 @@ public interface ReadListener extends Listener {
/**
* When analysis one row trigger invoke function.
*
- * @param data one row value. Is is same as {@link AnalysisContext#readRowHolder()}
+ * @param data one row value. It is same as {@link AnalysisContext#readRowHolder()}
* @param context analysis context
*/
void invoke(T data, AnalysisContext context);
From bed09d026a903d4cacd5001c078ff2e94774002c Mon Sep 17 00:00:00 2001
From: Jiaju Zhuang
Date: Tue, 21 Feb 2023 11:21:42 +0800
Subject: [PATCH 02/53] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=96=87=E4=BB=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../alibaba/easyexcel/test/demo/fill/FillTest.java | 13 +------------
1 file changed, 1 insertion(+), 12 deletions(-)
diff --git a/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/demo/fill/FillTest.java b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/demo/fill/FillTest.java
index 3b3c37fc..7023f31f 100644
--- a/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/demo/fill/FillTest.java
+++ b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/demo/fill/FillTest.java
@@ -73,18 +73,7 @@ public class FillTest {
// 这里 会填充到第一个sheet, 然后文件流会自动关闭
EasyExcel.write(fileName).withTemplate(templateFileName).sheet().doFill(data());
- // 方案2 分多次 填充 会使用文件缓存(省内存) jdk8
- // since: 3.0.0-beta1
- fileName = TestFileUtil.getPath() + "listFill" + System.currentTimeMillis() + ".xlsx";
- EasyExcel.write(fileName)
- .withTemplate(templateFileName)
- .sheet()
- .doFill(() -> {
- // 分页查询数据
- return data();
- });
-
- // 方案3 分多次 填充 会使用文件缓存(省内存)
+ // 方案2 分多次 填充 会使用文件缓存(省内存)
fileName = TestFileUtil.getPath() + "listFill" + System.currentTimeMillis() + ".xlsx";
try (ExcelWriter excelWriter = EasyExcel.write(fileName).withTemplate(templateFileName).build()) {
WriteSheet writeSheet = EasyExcel.writerSheet().build();
From 93d968ac27321dab2c70d4d3a0912d7f244f0e72 Mon Sep 17 00:00:00 2001
From: gongxuanzhang
Date: Tue, 28 Feb 2023 14:06:55 +0800
Subject: [PATCH 03/53] delete poi same name package, move PoiUtils to excel
utils package. fix Java9 modular bug
---
.../java/com/alibaba/excel/util/PoiUtils.java | 61 +++++++++++++++++++
.../executor/ExcelWriteFillExecutor.java | 2 +-
.../apache/poi/hssf/usermodel/PoiUtils.java | 37 -----------
3 files changed, 62 insertions(+), 38 deletions(-)
create mode 100644 easyexcel-core/src/main/java/com/alibaba/excel/util/PoiUtils.java
delete mode 100644 easyexcel-core/src/main/java/org/apache/poi/hssf/usermodel/PoiUtils.java
diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/util/PoiUtils.java b/easyexcel-core/src/main/java/com/alibaba/excel/util/PoiUtils.java
new file mode 100644
index 00000000..2989b22b
--- /dev/null
+++ b/easyexcel-core/src/main/java/com/alibaba/excel/util/PoiUtils.java
@@ -0,0 +1,61 @@
+package com.alibaba.excel.util;
+
+import com.alibaba.excel.exception.ExcelRuntimeException;
+import org.apache.poi.hssf.record.RowRecord;
+import org.apache.poi.hssf.usermodel.HSSFRow;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.util.BitField;
+import org.apache.poi.util.BitFieldFactory;
+import org.apache.poi.xssf.usermodel.XSSFRow;
+
+import java.lang.reflect.Field;
+
+/**
+ * utils
+ *
+ * @author Jiaju Zhuang
+ */
+public class PoiUtils {
+
+ /**
+ * Whether to customize the height
+ */
+ public static final BitField CUSTOM_HEIGHT = BitFieldFactory.getInstance(0x640);
+
+ private static Field ROW_RECORD_FIELD;
+
+ /**
+ * Whether to customize the height
+ *
+ * @param row row
+ * @return
+ */
+ public static boolean customHeight(Row row) {
+ if (row instanceof XSSFRow) {
+ XSSFRow xssfRow = (XSSFRow) row;
+ return xssfRow.getCTRow().getCustomHeight();
+ }
+ if (row instanceof HSSFRow) {
+ HSSFRow hssfRow = (HSSFRow) row;
+ try {
+ if (ROW_RECORD_FIELD == null) {
+ initRowRecordField();
+ }
+ RowRecord record = (RowRecord) ROW_RECORD_FIELD.get(hssfRow);
+ return CUSTOM_HEIGHT.getValue(record.getOptionFlags()) == 1;
+ } catch (IllegalAccessException ignore) {
+ }
+ }
+ return false;
+ }
+
+
+ private static void initRowRecordField() {
+ try {
+ ROW_RECORD_FIELD = HSSFRow.class.getDeclaredField("row");
+ ROW_RECORD_FIELD.setAccessible(true);
+ } catch (NoSuchFieldException e) {
+ throw new ExcelRuntimeException(e);
+ }
+ }
+}
diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/write/executor/ExcelWriteFillExecutor.java b/easyexcel-core/src/main/java/com/alibaba/excel/write/executor/ExcelWriteFillExecutor.java
index 6779e875..f6dc1955 100644
--- a/easyexcel-core/src/main/java/com/alibaba/excel/write/executor/ExcelWriteFillExecutor.java
+++ b/easyexcel-core/src/main/java/com/alibaba/excel/write/executor/ExcelWriteFillExecutor.java
@@ -37,7 +37,7 @@ import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.Setter;
import org.apache.commons.collections4.CollectionUtils;
-import org.apache.poi.hssf.usermodel.PoiUtils;
+import com.alibaba.excel.util.PoiUtils;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.CellType;
diff --git a/easyexcel-core/src/main/java/org/apache/poi/hssf/usermodel/PoiUtils.java b/easyexcel-core/src/main/java/org/apache/poi/hssf/usermodel/PoiUtils.java
deleted file mode 100644
index 4f1d404f..00000000
--- a/easyexcel-core/src/main/java/org/apache/poi/hssf/usermodel/PoiUtils.java
+++ /dev/null
@@ -1,37 +0,0 @@
-package org.apache.poi.hssf.usermodel;
-
-import org.apache.poi.ss.usermodel.Row;
-import org.apache.poi.util.BitField;
-import org.apache.poi.util.BitFieldFactory;
-import org.apache.poi.xssf.usermodel.XSSFRow;
-
-/**
- * utils
- *
- * @author Jiaju Zhuang
- */
-public class PoiUtils {
-
- /**
- * Whether to customize the height
- */
- public static final BitField CUSTOM_HEIGHT = BitFieldFactory.getInstance(0x640);
-
- /**
- * Whether to customize the height
- *
- * @param row row
- * @return
- */
- public static boolean customHeight(Row row) {
- if (row instanceof XSSFRow) {
- XSSFRow xssfRow = (XSSFRow)row;
- return xssfRow.getCTRow().getCustomHeight();
- }
- if (row instanceof HSSFRow) {
- HSSFRow hssfRow = (HSSFRow)row;
- return CUSTOM_HEIGHT.getValue(hssfRow.getRowRecord().getOptionFlags()) == 1;
- }
- return false;
- }
-}
From 94421ab8a40ed17c0881cafc20f15d3b2f23de34 Mon Sep 17 00:00:00 2001
From: syyyj <3139487747@qq.com>
Date: Sat, 4 Mar 2023 03:02:36 +0800
Subject: [PATCH 04/53] Word error
Word error
---
README_EN.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README_EN.md b/README_EN.md
index 619f4161..10135fed 100644
--- a/README_EN.md
+++ b/README_EN.md
@@ -122,7 +122,7 @@ DEMO:[https://github.com/alibaba/easyexcel/blob/master/src/test/java/com/aliba
* 1. Create an entity object, refer to{@link DownloadData}.
* Each property of the entity object corresponds to a specific field of Excel
* 2. Specify the returned properties
- * 3. Invoke wirte function, then the OutputStream is automatically closed when it ends.
+ * 3. Invoke write function, then the OutputStream is automatically closed when it ends.
*
*/
@GetMapping("download")
From 6dcf356dba3d79c61601648052a19dda5bcad874 Mon Sep 17 00:00:00 2001
From: supalle
Date: Sat, 4 Mar 2023 13:57:03 +0800
Subject: [PATCH 05/53] =?UTF-8?q?=E6=B7=BB=E5=8A=A0CSV=E7=9A=84BOM?=
=?UTF-8?q?=E8=A7=A3=E6=9E=90=E6=94=AF=E6=8C=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../analysis/csv/BomBufferedInputStream.java | 91 ++++++++++++++
.../excel/analysis/csv/ByteOrderMark.java | 112 ++++++++++++++++++
.../holder/csv/CsvReadWorkbookHolder.java | 34 +++++-
.../easyexcel/test/core/bom/BomData.java | 16 +++
.../easyexcel/test/core/bom/BomDataTest.java | 53 +++++++++
.../src/test/resources/bom/bom_none.csv | 11 ++
.../src/test/resources/bom/bom_utf16be.csv | Bin 0 -> 152 bytes
.../src/test/resources/bom/bom_utf16le.csv | Bin 0 -> 152 bytes
.../src/test/resources/bom/bom_utf8.csv | 11 ++
9 files changed, 327 insertions(+), 1 deletion(-)
create mode 100644 easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/BomBufferedInputStream.java
create mode 100644 easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/ByteOrderMark.java
create mode 100644 easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomData.java
create mode 100644 easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomDataTest.java
create mode 100644 easyexcel-test/src/test/resources/bom/bom_none.csv
create mode 100644 easyexcel-test/src/test/resources/bom/bom_utf16be.csv
create mode 100644 easyexcel-test/src/test/resources/bom/bom_utf16le.csv
create mode 100644 easyexcel-test/src/test/resources/bom/bom_utf8.csv
diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/BomBufferedInputStream.java b/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/BomBufferedInputStream.java
new file mode 100644
index 00000000..9a53520d
--- /dev/null
+++ b/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/BomBufferedInputStream.java
@@ -0,0 +1,91 @@
+package com.alibaba.excel.analysis.csv;
+
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * This class is used to wrap a stream that includes an encoded {@link ByteOrderMark} as its first bytes.
+ *
+ * @author supalle
+ * @see Byte Order Mark (BOM) FAQ
+ * @see Apache CommonsIO BOMInputStream
+ */
+public class BomBufferedInputStream extends BufferedInputStream {
+ public final static List DEFAULT_BYTE_ORDER_MARKS = new ArrayList<>();
+
+ static {
+ DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_8);
+ DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_16BE);
+ DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_16LE);
+ DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_32BE);
+ DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_32LE);
+ }
+
+ private boolean initialized;
+ private ByteOrderMark byteOrderMark;
+ private final List byteOrderMarks;
+
+ public BomBufferedInputStream(InputStream in, final ByteOrderMark... byteOrderMarks) {
+ super(in);
+ this.byteOrderMarks = applyByteOrderMarks(byteOrderMarks);
+ }
+
+ public BomBufferedInputStream(InputStream in, int size, final ByteOrderMark... byteOrderMarks) {
+ super(in, size);
+ this.byteOrderMarks = applyByteOrderMarks(byteOrderMarks);
+ }
+
+ private static List applyByteOrderMarks(ByteOrderMark[] byteOrderMarks) {
+ return byteOrderMarks == null || byteOrderMarks.length == 0 ? DEFAULT_BYTE_ORDER_MARKS : Arrays.asList(byteOrderMarks);
+ }
+
+ public boolean hasByteOrderMark() throws IOException {
+ return getByteOrderMark() != null;
+ }
+
+ public ByteOrderMark getByteOrderMark() throws IOException {
+ if (initialized) {
+ return byteOrderMark;
+ }
+ this.byteOrderMarks.sort(ByteOrderMark::compareTo);
+ int maxBomLength = byteOrderMarks.get(0).length();
+ mark(maxBomLength);
+ int[] firstBytes = new int[maxBomLength];
+ for (int i = 0; i < maxBomLength; i++) {
+ firstBytes[i] = read();
+ if (firstBytes[i] < 0) {
+ break;
+ }
+ }
+ byteOrderMark = matchByteOrderMark(this.byteOrderMarks, firstBytes);
+
+ reset();
+ if (byteOrderMark != null) {
+ // read(new byte[byteOrderMark.length()]);
+ skip(byteOrderMark.length());
+ }
+ initialized = true;
+ return byteOrderMark;
+ }
+
+ private ByteOrderMark matchByteOrderMark(final List byteOrderMarks, final int[] firstBytes) {
+ loop:
+ for (ByteOrderMark item : byteOrderMarks) {
+ int[] bytes = item.getBytes();
+ int length = bytes.length;
+ for (int i = 0; i < length; i++) {
+ if (firstBytes[i] != bytes[i]) {
+ continue loop;
+ }
+ }
+ return item;
+ }
+ return null;
+ }
+
+}
diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/ByteOrderMark.java b/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/ByteOrderMark.java
new file mode 100644
index 00000000..0c68351b
--- /dev/null
+++ b/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/ByteOrderMark.java
@@ -0,0 +1,112 @@
+package com.alibaba.excel.analysis.csv;
+
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.stream.Collectors;
+
+/**
+ * Byte Order Mark (BOM)
+ *
+ * User in {@link BomBufferedInputStream}
+ *
+ * @author supalle
+ * @see Byte Order Mark (BOM) FAQ
+ * @see Apache CommonsIO ByteOrderMark
+ */
+public class ByteOrderMark implements Comparable {
+
+ /**
+ * UTF-8 BOM.
+ */
+ public static final ByteOrderMark UTF_8 = new ByteOrderMark(StandardCharsets.UTF_8, 0xEF, 0xBB, 0xBF);
+
+ /**
+ * UTF-16BE BOM (Big-Endian).
+ */
+ public static final ByteOrderMark UTF_16BE = new ByteOrderMark(StandardCharsets.UTF_16BE, 0xFE, 0xFF);
+
+ /**
+ * UTF-16LE BOM (Little-Endian).
+ */
+ public static final ByteOrderMark UTF_16LE = new ByteOrderMark(StandardCharsets.UTF_16LE, 0xFF, 0xFE);
+
+ /**
+ * UTF-32BE BOM (Big-Endian).
+ *
+ * @since 2.2
+ */
+ public static final ByteOrderMark UTF_32BE = new ByteOrderMark(Charset.forName("UTF-32BE"), 0x00, 0x00, 0xFE, 0xFF);
+
+ /**
+ * UTF-32LE BOM (Little-Endian).
+ *
+ * @since 2.2
+ */
+ public static final ByteOrderMark UTF_32LE = new ByteOrderMark(Charset.forName("UTF-32LE"), 0xFF, 0xFE, 0x00, 0x00);
+
+ /**
+ * Unicode BOM character; external form depends on the encoding.
+ *
+ * @see Byte Order Mark (BOM) FAQ
+ * @since 2.5
+ */
+ public static final char UTF_BOM = '\uFEFF';
+
+ private final Charset charset;
+ private final int[] bytes;
+
+ public ByteOrderMark(final Charset charset, final int... bytes) {
+ this.charset = Objects.requireNonNull(charset, "charset must be not null");
+ if (bytes == null || bytes.length == 0) {
+ throw new IllegalArgumentException("bytes must be not empty");
+ }
+ this.bytes = bytes;
+ }
+
+ public Charset getCharset() {
+ return charset;
+ }
+
+ public int[] getBytes() {
+ return bytes;
+ }
+
+ public int length() {
+ return bytes.length;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ ByteOrderMark that = (ByteOrderMark) o;
+ return Objects.equals(charset, that.charset) && Arrays.equals(bytes, that.bytes);
+ }
+
+ @Override
+ public int hashCode() {
+ int result = Objects.hash(charset);
+ result = 31 * result + Arrays.hashCode(bytes);
+ return result;
+ }
+
+ @Override
+ public String toString() {
+ return "ByteOrderMark{" +
+ "charset=" + charset +
+ ", bytes=["
+ + Arrays.stream(bytes)
+ .mapToObj(Integer::toHexString)
+ .map(String::toUpperCase)
+ .map("0x"::concat)
+ .collect(Collectors.joining(",")) +
+ "]}";
+ }
+
+ @Override
+ public int compareTo(ByteOrderMark o) {
+ return o.length() - length();
+ }
+}
diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/read/metadata/holder/csv/CsvReadWorkbookHolder.java b/easyexcel-core/src/main/java/com/alibaba/excel/read/metadata/holder/csv/CsvReadWorkbookHolder.java
index 90ebe2b5..04aeac4e 100644
--- a/easyexcel-core/src/main/java/com/alibaba/excel/read/metadata/holder/csv/CsvReadWorkbookHolder.java
+++ b/easyexcel-core/src/main/java/com/alibaba/excel/read/metadata/holder/csv/CsvReadWorkbookHolder.java
@@ -1,15 +1,18 @@
package com.alibaba.excel.read.metadata.holder.csv;
+import com.alibaba.excel.analysis.csv.BomBufferedInputStream;
import com.alibaba.excel.read.metadata.ReadWorkbook;
import com.alibaba.excel.read.metadata.holder.ReadWorkbookHolder;
import com.alibaba.excel.support.ExcelTypeEnum;
-
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.Setter;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
+import java.io.IOException;
+import java.nio.file.Files;
+
/**
* Workbook holder
*
@@ -27,5 +30,34 @@ public class CsvReadWorkbookHolder extends ReadWorkbookHolder {
super(readWorkbook);
setExcelType(ExcelTypeEnum.CSV);
this.csvFormat = CSVFormat.DEFAULT;
+ // CSV BOM
+ if (readWorkbook.getCharset() == null) {
+ BomBufferedInputStream bomBufferedInputStream = buildBomBufferedInputStream();
+ setInputStream(bomBufferedInputStream);
+ setMandatoryUseInputStream(Boolean.TRUE);
+ try {
+ if (bomBufferedInputStream.hasByteOrderMark()) {
+ setCharset(bomBufferedInputStream.getByteOrderMark().getCharset());
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+
+ private BomBufferedInputStream buildBomBufferedInputStream() {
+ BomBufferedInputStream bomBufferedInputStream;
+ try {
+ if (Boolean.TRUE.equals(getMandatoryUseInputStream())) {
+ bomBufferedInputStream = new BomBufferedInputStream(getInputStream());
+ } else if (getFile() != null) {
+ bomBufferedInputStream = new BomBufferedInputStream(Files.newInputStream(getFile().toPath()));
+ } else {
+ bomBufferedInputStream = new BomBufferedInputStream(getInputStream());
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e.getMessage());
+ }
+ return bomBufferedInputStream;
}
}
diff --git a/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomData.java b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomData.java
new file mode 100644
index 00000000..24d8ece8
--- /dev/null
+++ b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomData.java
@@ -0,0 +1,16 @@
+package com.alibaba.easyexcel.test.core.bom;
+
+import com.alibaba.excel.annotation.ExcelProperty;
+import lombok.EqualsAndHashCode;
+import lombok.Getter;
+import lombok.Setter;
+
+@Getter
+@Setter
+@EqualsAndHashCode
+public class BomData {
+ @ExcelProperty("姓名")
+ private String name;
+ @ExcelProperty("年纪")
+ private Integer age;
+}
diff --git a/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomDataTest.java b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomDataTest.java
new file mode 100644
index 00000000..0c4ceacc
--- /dev/null
+++ b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomDataTest.java
@@ -0,0 +1,53 @@
+package com.alibaba.easyexcel.test.core.bom;
+
+import com.alibaba.easyexcel.test.util.TestFileUtil;
+import com.alibaba.excel.EasyExcel;
+import com.alibaba.excel.context.AnalysisContext;
+import com.alibaba.excel.metadata.data.ReadCellData;
+import com.alibaba.excel.read.listener.ReadListener;
+import org.apache.commons.compress.utils.Lists;
+import org.junit.Assert;
+import org.junit.FixMethodOrder;
+import org.junit.Test;
+import org.junit.runners.MethodSorters;
+
+import java.io.File;
+import java.util.List;
+import java.util.Map;
+
+@FixMethodOrder(MethodSorters.NAME_ASCENDING)
+public class BomDataTest {
+ @Test
+ public void t01ReadAndWriteCsv() {
+ readCsv(TestFileUtil.readFile("bom" + File.separator + "bom_none.csv"));
+ readCsv(TestFileUtil.readFile("bom" + File.separator + "bom_utf8.csv"));
+ readCsv(TestFileUtil.readFile("bom" + File.separator + "bom_utf16be.csv"));
+ readCsv(TestFileUtil.readFile("bom" + File.separator + "bom_utf16le.csv"));
+ }
+
+ private void readCsv(File file) {
+ EasyExcel.read(file, BomData.class, new ReadListener() {
+
+ private final List dataList = Lists.newArrayList();
+
+ @Override
+ public void invokeHead(Map> headMap, AnalysisContext context) {
+ String head = headMap.get(0).getStringValue();
+ Assert.assertEquals("姓名", head);
+ }
+
+ @Override
+ public void invoke(BomData data, AnalysisContext context) {
+ dataList.add(data);
+ }
+
+ @Override
+ public void doAfterAllAnalysed(AnalysisContext context) {
+ Assert.assertEquals(dataList.size(), 10);
+ BomData bomData = dataList.get(0);
+ Assert.assertEquals("姓名0", bomData.getName());
+ Assert.assertEquals(0, (long) bomData.getAge());
+ }
+ }).sheet().doRead();
+ }
+}
diff --git a/easyexcel-test/src/test/resources/bom/bom_none.csv b/easyexcel-test/src/test/resources/bom/bom_none.csv
new file mode 100644
index 00000000..26d73e1f
--- /dev/null
+++ b/easyexcel-test/src/test/resources/bom/bom_none.csv
@@ -0,0 +1,11 @@
+姓名,年纪
+姓名0,0
+姓名1,1
+姓名2,2
+姓名3,3
+姓名4,4
+姓名5,5
+姓名6,6
+姓名7,7
+姓名8,8
+姓名9,9
\ No newline at end of file
diff --git a/easyexcel-test/src/test/resources/bom/bom_utf16be.csv b/easyexcel-test/src/test/resources/bom/bom_utf16be.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ad13f8716eaf7e8219509eb773a657cadfc3cec0
GIT binary patch
literal 152
zcmX}fxei_@%
literal 0
HcmV?d00001
diff --git a/easyexcel-test/src/test/resources/bom/bom_utf16le.csv b/easyexcel-test/src/test/resources/bom/bom_utf16le.csv
new file mode 100644
index 0000000000000000000000000000000000000000..8e8eba909c4cbf4eafa298a5014930dd2c7a051f
GIT binary patch
literal 152
zcmX}fsS$uM00h9(%%qcu{7)a47^*<0ptM6jz#+JG``p(syTy5|X|Um&gN4IC&v`9y
YL5p0}5|^~hWvy^Ut6bF@*R;-cyk{mG>i_@%
literal 0
HcmV?d00001
diff --git a/easyexcel-test/src/test/resources/bom/bom_utf8.csv b/easyexcel-test/src/test/resources/bom/bom_utf8.csv
new file mode 100644
index 00000000..358d5bac
--- /dev/null
+++ b/easyexcel-test/src/test/resources/bom/bom_utf8.csv
@@ -0,0 +1,11 @@
+姓名,年纪
+姓名0,0
+姓名1,1
+姓名2,2
+姓名3,3
+姓名4,4
+姓名5,5
+姓名6,6
+姓名7,7
+姓名8,8
+姓名9,9
\ No newline at end of file
From bebbb1515f9b8aca9d003102d7bdd4358c3f5f8d Mon Sep 17 00:00:00 2001
From: supalle
Date: Sat, 4 Mar 2023 14:31:35 +0800
Subject: [PATCH 06/53] =?UTF-8?q?=E8=B0=83=E6=95=B4=E4=BB=A3=E7=A0=81?=
=?UTF-8?q?=E6=BB=A1=E8=B6=B3pmd=E7=BC=96=E7=A0=81=E8=A7=84=E8=8C=83?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../analysis/csv/BomBufferedInputStream.java | 1 -
.../excel/analysis/csv/ByteOrderMark.java | 16 ++++++----------
2 files changed, 6 insertions(+), 11 deletions(-)
diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/BomBufferedInputStream.java b/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/BomBufferedInputStream.java
index 9a53520d..c24a6e0c 100644
--- a/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/BomBufferedInputStream.java
+++ b/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/BomBufferedInputStream.java
@@ -66,7 +66,6 @@ public class BomBufferedInputStream extends BufferedInputStream {
reset();
if (byteOrderMark != null) {
- // read(new byte[byteOrderMark.length()]);
skip(byteOrderMark.length());
}
initialized = true;
diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/ByteOrderMark.java b/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/ByteOrderMark.java
index 0c68351b..1ed078cf 100644
--- a/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/ByteOrderMark.java
+++ b/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/ByteOrderMark.java
@@ -46,14 +46,6 @@ public class ByteOrderMark implements Comparable {
*/
public static final ByteOrderMark UTF_32LE = new ByteOrderMark(Charset.forName("UTF-32LE"), 0xFF, 0xFE, 0x00, 0x00);
- /**
- * Unicode BOM character; external form depends on the encoding.
- *
- * @see Byte Order Mark (BOM) FAQ
- * @since 2.5
- */
- public static final char UTF_BOM = '\uFEFF';
-
private final Charset charset;
private final int[] bytes;
@@ -79,8 +71,12 @@ public class ByteOrderMark implements Comparable {
@Override
public boolean equals(Object o) {
- if (this == o) return true;
- if (o == null || getClass() != o.getClass()) return false;
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
ByteOrderMark that = (ByteOrderMark) o;
return Objects.equals(charset, that.charset) && Arrays.equals(bytes, that.bytes);
}
From a5dc38e632ee0abb2f8fbf172e8f16d686a8a643 Mon Sep 17 00:00:00 2001
From: Joyfully <584974245@qq.com>
Date: Mon, 20 Mar 2023 14:11:10 +0800
Subject: [PATCH 07/53] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E6=B3=A8=E8=A7=A3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../main/java/com/alibaba/excel/read/listener/ReadListener.java | 2 +-
.../com/alibaba/easyexcel/test/demo/read/DemoDataListener.java | 2 +-
.../com/alibaba/easyexcel/test/demo/web/UploadDataListener.java | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/read/listener/ReadListener.java b/easyexcel-core/src/main/java/com/alibaba/excel/read/listener/ReadListener.java
index b56f8a56..79899153 100644
--- a/easyexcel-core/src/main/java/com/alibaba/excel/read/listener/ReadListener.java
+++ b/easyexcel-core/src/main/java/com/alibaba/excel/read/listener/ReadListener.java
@@ -36,7 +36,7 @@ public interface ReadListener extends Listener {
/**
* When analysis one row trigger invoke function.
*
- * @param data one row value. Is is same as {@link AnalysisContext#readRowHolder()}
+ * @param data one row value. It is same as {@link AnalysisContext#readRowHolder()}
* @param context analysis context
*/
void invoke(T data, AnalysisContext context);
diff --git a/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/demo/read/DemoDataListener.java b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/demo/read/DemoDataListener.java
index a6ddaf95..7815d374 100644
--- a/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/demo/read/DemoDataListener.java
+++ b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/demo/read/DemoDataListener.java
@@ -48,7 +48,7 @@ public class DemoDataListener implements ReadListener {
/**
* 这个每一条数据解析都会来调用
*
- * @param data one row value. Is is same as {@link AnalysisContext#readRowHolder()}
+ * @param data one row value. It is same as {@link AnalysisContext#readRowHolder()}
* @param context
*/
@Override
diff --git a/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/demo/web/UploadDataListener.java b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/demo/web/UploadDataListener.java
index 2096cd4d..183bc140 100644
--- a/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/demo/web/UploadDataListener.java
+++ b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/demo/web/UploadDataListener.java
@@ -44,7 +44,7 @@ public class UploadDataListener implements ReadListener {
/**
* 这个每一条数据解析都会来调用
*
- * @param data one row value. Is is same as {@link AnalysisContext#readRowHolder()}
+ * @param data one row value. It is same as {@link AnalysisContext#readRowHolder()}
* @param context
*/
@Override
From 8cb0447f8cff02b7261469e70bf90ffc6b438858 Mon Sep 17 00:00:00 2001
From: zhouyao
Date: Wed, 12 Apr 2023 10:24:28 +0800
Subject: [PATCH 08/53] fix: fix the spring boot Application
EasyexcelApplication cannot start
---
easyexcel-test/pom.xml | 5 +++++
pom.xml | 5 +++++
2 files changed, 10 insertions(+)
diff --git a/easyexcel-test/pom.xml b/easyexcel-test/pom.xml
index 73fc20d7..d59ff594 100644
--- a/easyexcel-test/pom.xml
+++ b/easyexcel-test/pom.xml
@@ -46,6 +46,11 @@
junittest
+
+ commons-logging
+ commons-logging
+ test
+
diff --git a/pom.xml b/pom.xml
index cceb0f94..d495370b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -157,6 +157,11 @@
junit4.13.2
+
+ commons-logging
+ commons-logging
+ 1.2
+
From 5e1e457b9011899523edd8f7fd5e92d917d33fc8 Mon Sep 17 00:00:00 2001
From: ltxlouis
Date: Fri, 14 Apr 2023 16:15:01 +0800
Subject: [PATCH 09/53] remove duplicate BigIntegerStringConverter
---
.../com/alibaba/excel/converters/DefaultConverterLoader.java | 1 -
1 file changed, 1 deletion(-)
diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/converters/DefaultConverterLoader.java b/easyexcel-core/src/main/java/com/alibaba/excel/converters/DefaultConverterLoader.java
index 1b04d240..79236d16 100644
--- a/easyexcel-core/src/main/java/com/alibaba/excel/converters/DefaultConverterLoader.java
+++ b/easyexcel-core/src/main/java/com/alibaba/excel/converters/DefaultConverterLoader.java
@@ -154,7 +154,6 @@ public class DefaultConverterLoader {
putWriteStringConverter(new LongStringConverter());
putWriteStringConverter(new ShortStringConverter());
putWriteStringConverter(new StringStringConverter());
- putWriteStringConverter(new BigIntegerStringConverter());
}
/**
From baf7d0bd5b6049e9306297826d6ed680f8b0fd7b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 17 Apr 2023 17:38:52 +0000
Subject: [PATCH 10/53] Bump spring-core from 5.3.15 to 5.3.27
Bumps [spring-core](https://github.com/spring-projects/spring-framework) from 5.3.15 to 5.3.27.
- [Release notes](https://github.com/spring-projects/spring-framework/releases)
- [Commits](https://github.com/spring-projects/spring-framework/compare/v5.3.15...v5.3.27)
---
updated-dependencies:
- dependency-name: org.springframework:spring-core
dependency-type: direct:production
...
Signed-off-by: dependabot[bot]
---
pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pom.xml b/pom.xml
index cceb0f94..2ade7961 100644
--- a/pom.xml
+++ b/pom.xml
@@ -133,7 +133,7 @@
org.springframeworkspring-core
- 5.3.15
+ 5.3.27
From 1635bb814742786e94a3e0b72e8588aa64617ae6 Mon Sep 17 00:00:00 2001
From: acc8226
Date: Thu, 20 Apr 2023 17:55:29 +0800
Subject: [PATCH 11/53] Update PageReadListener.java
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Incorrect words: Defuault -> Default
---
.../java/com/alibaba/excel/read/listener/PageReadListener.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/read/listener/PageReadListener.java b/easyexcel-core/src/main/java/com/alibaba/excel/read/listener/PageReadListener.java
index b97db357..f2ab4351 100644
--- a/easyexcel-core/src/main/java/com/alibaba/excel/read/listener/PageReadListener.java
+++ b/easyexcel-core/src/main/java/com/alibaba/excel/read/listener/PageReadListener.java
@@ -15,7 +15,7 @@ import org.apache.commons.collections4.CollectionUtils;
*/
public class PageReadListener implements ReadListener {
/**
- * Defuault single handle the amount of data
+ * Default single handle the amount of data
*/
public static int BATCH_COUNT = 100;
/**
From 5dc6196e509db434d30beaaebbd5757ace25e055 Mon Sep 17 00:00:00 2001
From: gongxuanzhang
Date: Fri, 21 Apr 2023 11:47:55 +0800
Subject: [PATCH 12/53] fix utf decode
---
.../sax/SharedStringsTableHandler.java | 57 +++++++++++++++++-
.../easyexcel/test/demo/rare/ReadTest.java | 41 +++++++++++++
.../easyexcel/test/util/TestFileUtil.java | 43 +++++++++++++
.../test/resources/temp/utfdecode/demo.xlsx | Bin 0 -> 8762 bytes
4 files changed, 138 insertions(+), 3 deletions(-)
create mode 100644 easyexcel-test/src/test/java/com/alibaba/easyexcel/test/demo/rare/ReadTest.java
create mode 100644 easyexcel-test/src/test/resources/temp/utfdecode/demo.xlsx
diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/analysis/v07/handlers/sax/SharedStringsTableHandler.java b/easyexcel-core/src/main/java/com/alibaba/excel/analysis/v07/handlers/sax/SharedStringsTableHandler.java
index 203db6c7..169429de 100644
--- a/easyexcel-core/src/main/java/com/alibaba/excel/analysis/v07/handlers/sax/SharedStringsTableHandler.java
+++ b/easyexcel-core/src/main/java/com/alibaba/excel/analysis/v07/handlers/sax/SharedStringsTableHandler.java
@@ -1,10 +1,12 @@
package com.alibaba.excel.analysis.v07.handlers.sax;
+import com.alibaba.excel.cache.ReadCache;
+import com.alibaba.excel.constant.ExcelXmlConstants;
import org.xml.sax.Attributes;
import org.xml.sax.helpers.DefaultHandler;
-import com.alibaba.excel.cache.ReadCache;
-import com.alibaba.excel.constant.ExcelXmlConstants;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
/**
* Sax read sharedStringsTable.xml
@@ -13,6 +15,8 @@ import com.alibaba.excel.constant.ExcelXmlConstants;
*/
public class SharedStringsTableHandler extends DefaultHandler {
+ private static final Pattern UTF_PATTTERN = Pattern.compile("_x([0-9A-Fa-f]{4})_");
+
/**
* The final piece of data
*/
@@ -86,7 +90,7 @@ public class SharedStringsTableHandler extends DefaultHandler {
if (currentData == null) {
readCache.put(null);
} else {
- readCache.put(currentData.toString());
+ readCache.put(utfDecode(currentData.toString()));
}
break;
case ExcelXmlConstants.SHAREDSTRINGS_RPH_TAG:
@@ -109,4 +113,51 @@ public class SharedStringsTableHandler extends DefaultHandler {
}
currentElementData.append(ch, start, length);
}
+
+ /**
+ * from poi XSSFRichTextString
+ *
+ * @param value the string to decode
+ * @return the decoded string or null if the input string is null
+ *
+ * For all characters which cannot be represented in XML as defined by the XML 1.0 specification,
+ * the characters are escaped using the Unicode numerical character representation escape character
+ * format _xHHHH_, where H represents a hexadecimal character in the character's value.
+ *
+ * Example: The Unicode character 0D is invalid in an XML 1.0 document,
+ * so it shall be escaped as _x000D_.
+ *
+ * See section 3.18.9 in the OOXML spec.
+ * @see org.apache.poi.xssf.usermodel.XSSFRichTextString#utfDecode(String)
+ */
+ static String utfDecode(String value) {
+ if (value == null || !value.contains("_x")) {
+ return value;
+ }
+
+ StringBuilder buf = new StringBuilder();
+ Matcher m = UTF_PATTTERN.matcher(value);
+ int idx = 0;
+ while (m.find()) {
+ int pos = m.start();
+ if (pos > idx) {
+ buf.append(value, idx, pos);
+ }
+
+ String code = m.group(1);
+ int icode = Integer.decode("0x" + code);
+ buf.append((char) icode);
+
+ idx = m.end();
+ }
+
+ // small optimization: don't go via StringBuilder if not necessary,
+ // the encodings are very rare, so we should almost always go via this shortcut.
+ if (idx == 0) {
+ return value;
+ }
+
+ buf.append(value.substring(idx));
+ return buf.toString();
+ }
}
diff --git a/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/demo/rare/ReadTest.java b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/demo/rare/ReadTest.java
new file mode 100644
index 00000000..c1e614ea
--- /dev/null
+++ b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/demo/rare/ReadTest.java
@@ -0,0 +1,41 @@
+package com.alibaba.easyexcel.test.demo.rare;
+
+import com.alibaba.easyexcel.test.util.TestFileUtil;
+import com.alibaba.excel.EasyExcel;
+import org.apache.poi.xssf.usermodel.XSSFRow;
+import org.apache.poi.xssf.usermodel.XSSFSheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.File;
+import java.util.List;
+import java.util.Map;
+
+/**
+ *
+ * 记录一些不太常见的案例
+ * @author gxz gongxuanzhang@foxmail.com
+ **/
+public class ReadTest {
+
+
+ /**
+ * 当excel有需要转义的 如x005特殊符号时需要通过utf decode解码
+ *
+ **/
+ @Test
+ public void readX005() throws Exception{
+ String fileName = TestFileUtil.pathBuild().sub("temp").sub("utfdecode").sub("demo.xlsx").getPath();
+ XSSFWorkbook xssfWorkbook = new XSSFWorkbook(fileName);
+ XSSFSheet xssfSheet = xssfWorkbook.getSheetAt(0);
+ XSSFRow row = xssfSheet.getRow(0);
+ String poiValue = row.getCell(0).getStringCellValue();
+ List