diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/BomBufferedInputStream.java b/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/BomBufferedInputStream.java
new file mode 100644
index 00000000..c24a6e0c
--- /dev/null
+++ b/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/BomBufferedInputStream.java
@@ -0,0 +1,90 @@
+package com.alibaba.excel.analysis.csv;
+
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * This class is used to wrap a stream that includes an encoded {@link ByteOrderMark} as its first bytes.
+ *
+ * @author supalle
+ * @see Byte Order Mark (BOM) FAQ
+ * @see Apache CommonsIO BOMInputStream
+ */
+public class BomBufferedInputStream extends BufferedInputStream {
+ public final static List DEFAULT_BYTE_ORDER_MARKS = new ArrayList<>();
+
+ static {
+ DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_8);
+ DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_16BE);
+ DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_16LE);
+ DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_32BE);
+ DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_32LE);
+ }
+
+ private boolean initialized;
+ private ByteOrderMark byteOrderMark;
+ private final List byteOrderMarks;
+
+ public BomBufferedInputStream(InputStream in, final ByteOrderMark... byteOrderMarks) {
+ super(in);
+ this.byteOrderMarks = applyByteOrderMarks(byteOrderMarks);
+ }
+
+ public BomBufferedInputStream(InputStream in, int size, final ByteOrderMark... byteOrderMarks) {
+ super(in, size);
+ this.byteOrderMarks = applyByteOrderMarks(byteOrderMarks);
+ }
+
+ private static List applyByteOrderMarks(ByteOrderMark[] byteOrderMarks) {
+ return byteOrderMarks == null || byteOrderMarks.length == 0 ? DEFAULT_BYTE_ORDER_MARKS : Arrays.asList(byteOrderMarks);
+ }
+
+ public boolean hasByteOrderMark() throws IOException {
+ return getByteOrderMark() != null;
+ }
+
+ public ByteOrderMark getByteOrderMark() throws IOException {
+ if (initialized) {
+ return byteOrderMark;
+ }
+ this.byteOrderMarks.sort(ByteOrderMark::compareTo);
+ int maxBomLength = byteOrderMarks.get(0).length();
+ mark(maxBomLength);
+ int[] firstBytes = new int[maxBomLength];
+ for (int i = 0; i < maxBomLength; i++) {
+ firstBytes[i] = read();
+ if (firstBytes[i] < 0) {
+ break;
+ }
+ }
+ byteOrderMark = matchByteOrderMark(this.byteOrderMarks, firstBytes);
+
+ reset();
+ if (byteOrderMark != null) {
+ skip(byteOrderMark.length());
+ }
+ initialized = true;
+ return byteOrderMark;
+ }
+
+ private ByteOrderMark matchByteOrderMark(final List byteOrderMarks, final int[] firstBytes) {
+ loop:
+ for (ByteOrderMark item : byteOrderMarks) {
+ int[] bytes = item.getBytes();
+ int length = bytes.length;
+ for (int i = 0; i < length; i++) {
+ if (firstBytes[i] != bytes[i]) {
+ continue loop;
+ }
+ }
+ return item;
+ }
+ return null;
+ }
+
+}
diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/ByteOrderMark.java b/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/ByteOrderMark.java
new file mode 100644
index 00000000..1ed078cf
--- /dev/null
+++ b/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/ByteOrderMark.java
@@ -0,0 +1,108 @@
+package com.alibaba.excel.analysis.csv;
+
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.stream.Collectors;
+
+/**
+ * Byte Order Mark (BOM)
+ *
+ * User in {@link BomBufferedInputStream}
+ *
+ * @author supalle
+ * @see Byte Order Mark (BOM) FAQ
+ * @see Apache CommonsIO ByteOrderMark
+ */
+public class ByteOrderMark implements Comparable {
+
+ /**
+ * UTF-8 BOM.
+ */
+ public static final ByteOrderMark UTF_8 = new ByteOrderMark(StandardCharsets.UTF_8, 0xEF, 0xBB, 0xBF);
+
+ /**
+ * UTF-16BE BOM (Big-Endian).
+ */
+ public static final ByteOrderMark UTF_16BE = new ByteOrderMark(StandardCharsets.UTF_16BE, 0xFE, 0xFF);
+
+ /**
+ * UTF-16LE BOM (Little-Endian).
+ */
+ public static final ByteOrderMark UTF_16LE = new ByteOrderMark(StandardCharsets.UTF_16LE, 0xFF, 0xFE);
+
+ /**
+ * UTF-32BE BOM (Big-Endian).
+ *
+ * @since 2.2
+ */
+ public static final ByteOrderMark UTF_32BE = new ByteOrderMark(Charset.forName("UTF-32BE"), 0x00, 0x00, 0xFE, 0xFF);
+
+ /**
+ * UTF-32LE BOM (Little-Endian).
+ *
+ * @since 2.2
+ */
+ public static final ByteOrderMark UTF_32LE = new ByteOrderMark(Charset.forName("UTF-32LE"), 0xFF, 0xFE, 0x00, 0x00);
+
+ private final Charset charset;
+ private final int[] bytes;
+
+ public ByteOrderMark(final Charset charset, final int... bytes) {
+ this.charset = Objects.requireNonNull(charset, "charset must be not null");
+ if (bytes == null || bytes.length == 0) {
+ throw new IllegalArgumentException("bytes must be not empty");
+ }
+ this.bytes = bytes;
+ }
+
+ public Charset getCharset() {
+ return charset;
+ }
+
+ public int[] getBytes() {
+ return bytes;
+ }
+
+ public int length() {
+ return bytes.length;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ ByteOrderMark that = (ByteOrderMark) o;
+ return Objects.equals(charset, that.charset) && Arrays.equals(bytes, that.bytes);
+ }
+
+ @Override
+ public int hashCode() {
+ int result = Objects.hash(charset);
+ result = 31 * result + Arrays.hashCode(bytes);
+ return result;
+ }
+
+ @Override
+ public String toString() {
+ return "ByteOrderMark{" +
+ "charset=" + charset +
+ ", bytes=["
+ + Arrays.stream(bytes)
+ .mapToObj(Integer::toHexString)
+ .map(String::toUpperCase)
+ .map("0x"::concat)
+ .collect(Collectors.joining(",")) +
+ "]}";
+ }
+
+ @Override
+ public int compareTo(ByteOrderMark o) {
+ return o.length() - length();
+ }
+}
diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/read/metadata/holder/csv/CsvReadWorkbookHolder.java b/easyexcel-core/src/main/java/com/alibaba/excel/read/metadata/holder/csv/CsvReadWorkbookHolder.java
index 90ebe2b5..04aeac4e 100644
--- a/easyexcel-core/src/main/java/com/alibaba/excel/read/metadata/holder/csv/CsvReadWorkbookHolder.java
+++ b/easyexcel-core/src/main/java/com/alibaba/excel/read/metadata/holder/csv/CsvReadWorkbookHolder.java
@@ -1,15 +1,18 @@
package com.alibaba.excel.read.metadata.holder.csv;
+import com.alibaba.excel.analysis.csv.BomBufferedInputStream;
import com.alibaba.excel.read.metadata.ReadWorkbook;
import com.alibaba.excel.read.metadata.holder.ReadWorkbookHolder;
import com.alibaba.excel.support.ExcelTypeEnum;
-
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.Setter;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
+import java.io.IOException;
+import java.nio.file.Files;
+
/**
* Workbook holder
*
@@ -27,5 +30,34 @@ public class CsvReadWorkbookHolder extends ReadWorkbookHolder {
super(readWorkbook);
setExcelType(ExcelTypeEnum.CSV);
this.csvFormat = CSVFormat.DEFAULT;
+ // CSV BOM
+ if (readWorkbook.getCharset() == null) {
+ BomBufferedInputStream bomBufferedInputStream = buildBomBufferedInputStream();
+ setInputStream(bomBufferedInputStream);
+ setMandatoryUseInputStream(Boolean.TRUE);
+ try {
+ if (bomBufferedInputStream.hasByteOrderMark()) {
+ setCharset(bomBufferedInputStream.getByteOrderMark().getCharset());
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+
+ private BomBufferedInputStream buildBomBufferedInputStream() {
+ BomBufferedInputStream bomBufferedInputStream;
+ try {
+ if (Boolean.TRUE.equals(getMandatoryUseInputStream())) {
+ bomBufferedInputStream = new BomBufferedInputStream(getInputStream());
+ } else if (getFile() != null) {
+ bomBufferedInputStream = new BomBufferedInputStream(Files.newInputStream(getFile().toPath()));
+ } else {
+ bomBufferedInputStream = new BomBufferedInputStream(getInputStream());
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e.getMessage());
+ }
+ return bomBufferedInputStream;
}
}
diff --git a/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomData.java b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomData.java
new file mode 100644
index 00000000..24d8ece8
--- /dev/null
+++ b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomData.java
@@ -0,0 +1,16 @@
+package com.alibaba.easyexcel.test.core.bom;
+
+import com.alibaba.excel.annotation.ExcelProperty;
+import lombok.EqualsAndHashCode;
+import lombok.Getter;
+import lombok.Setter;
+
+@Getter
+@Setter
+@EqualsAndHashCode
+public class BomData {
+ @ExcelProperty("姓名")
+ private String name;
+ @ExcelProperty("年纪")
+ private Integer age;
+}
diff --git a/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomDataTest.java b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomDataTest.java
new file mode 100644
index 00000000..0c4ceacc
--- /dev/null
+++ b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomDataTest.java
@@ -0,0 +1,53 @@
+package com.alibaba.easyexcel.test.core.bom;
+
+import com.alibaba.easyexcel.test.util.TestFileUtil;
+import com.alibaba.excel.EasyExcel;
+import com.alibaba.excel.context.AnalysisContext;
+import com.alibaba.excel.metadata.data.ReadCellData;
+import com.alibaba.excel.read.listener.ReadListener;
+import org.apache.commons.compress.utils.Lists;
+import org.junit.Assert;
+import org.junit.FixMethodOrder;
+import org.junit.Test;
+import org.junit.runners.MethodSorters;
+
+import java.io.File;
+import java.util.List;
+import java.util.Map;
+
+@FixMethodOrder(MethodSorters.NAME_ASCENDING)
+public class BomDataTest {
+ @Test
+ public void t01ReadAndWriteCsv() {
+ readCsv(TestFileUtil.readFile("bom" + File.separator + "bom_none.csv"));
+ readCsv(TestFileUtil.readFile("bom" + File.separator + "bom_utf8.csv"));
+ readCsv(TestFileUtil.readFile("bom" + File.separator + "bom_utf16be.csv"));
+ readCsv(TestFileUtil.readFile("bom" + File.separator + "bom_utf16le.csv"));
+ }
+
+ private void readCsv(File file) {
+ EasyExcel.read(file, BomData.class, new ReadListener() {
+
+ private final List dataList = Lists.newArrayList();
+
+ @Override
+ public void invokeHead(Map> headMap, AnalysisContext context) {
+ String head = headMap.get(0).getStringValue();
+ Assert.assertEquals("姓名", head);
+ }
+
+ @Override
+ public void invoke(BomData data, AnalysisContext context) {
+ dataList.add(data);
+ }
+
+ @Override
+ public void doAfterAllAnalysed(AnalysisContext context) {
+ Assert.assertEquals(dataList.size(), 10);
+ BomData bomData = dataList.get(0);
+ Assert.assertEquals("姓名0", bomData.getName());
+ Assert.assertEquals(0, (long) bomData.getAge());
+ }
+ }).sheet().doRead();
+ }
+}
diff --git a/easyexcel-test/src/test/resources/bom/bom_none.csv b/easyexcel-test/src/test/resources/bom/bom_none.csv
new file mode 100644
index 00000000..26d73e1f
--- /dev/null
+++ b/easyexcel-test/src/test/resources/bom/bom_none.csv
@@ -0,0 +1,11 @@
+姓名,年纪
+姓名0,0
+姓名1,1
+姓名2,2
+姓名3,3
+姓名4,4
+姓名5,5
+姓名6,6
+姓名7,7
+姓名8,8
+姓名9,9
\ No newline at end of file
diff --git a/easyexcel-test/src/test/resources/bom/bom_utf16be.csv b/easyexcel-test/src/test/resources/bom/bom_utf16be.csv
new file mode 100644
index 00000000..ad13f871
Binary files /dev/null and b/easyexcel-test/src/test/resources/bom/bom_utf16be.csv differ
diff --git a/easyexcel-test/src/test/resources/bom/bom_utf16le.csv b/easyexcel-test/src/test/resources/bom/bom_utf16le.csv
new file mode 100644
index 00000000..8e8eba90
Binary files /dev/null and b/easyexcel-test/src/test/resources/bom/bom_utf16le.csv differ
diff --git a/easyexcel-test/src/test/resources/bom/bom_utf8.csv b/easyexcel-test/src/test/resources/bom/bom_utf8.csv
new file mode 100644
index 00000000..358d5bac
--- /dev/null
+++ b/easyexcel-test/src/test/resources/bom/bom_utf8.csv
@@ -0,0 +1,11 @@
+姓名,年纪
+姓名0,0
+姓名1,1
+姓名2,2
+姓名3,3
+姓名4,4
+姓名5,5
+姓名6,6
+姓名7,7
+姓名8,8
+姓名9,9
\ No newline at end of file