Browse Source

* 读csv会忽略BOM数据 [Issue #3137](https://github.com/alibaba/easyexcel/issues/3137)

* 解决csv用office打开乱码的问题,写csv默认带上BOM数据
pull/3168/head
Jiaju Zhuang 2 years ago
parent
commit
8fb759ab1f
  1. 4
      easyexcel-core/pom.xml
  2. 90
      easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/BomBufferedInputStream.java
  3. 108
      easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/ByteOrderMark.java
  4. 24
      easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/CsvExcelReadExecutor.java
  5. 51
      easyexcel-core/src/main/java/com/alibaba/excel/enums/ByteOrderMarkEnum.java
  6. 14
      easyexcel-core/src/main/java/com/alibaba/excel/metadata/csv/CsvSheet.java
  7. 17
      easyexcel-core/src/main/java/com/alibaba/excel/metadata/csv/CsvWorkbook.java
  8. 31
      easyexcel-core/src/main/java/com/alibaba/excel/read/metadata/holder/csv/CsvReadWorkbookHolder.java
  9. 4
      easyexcel-core/src/main/java/com/alibaba/excel/util/WorkBookUtil.java
  10. 9
      easyexcel-core/src/main/java/com/alibaba/excel/write/builder/ExcelWriterBuilder.java
  11. 7
      easyexcel-core/src/main/java/com/alibaba/excel/write/metadata/WriteWorkbook.java
  12. 13
      easyexcel-core/src/main/java/com/alibaba/excel/write/metadata/holder/WriteWorkbookHolder.java
  13. 3
      easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomData.java
  14. 90
      easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomDataTest.java
  15. 11
      easyexcel-test/src/test/resources/bom/bom_none.csv
  16. BIN
      easyexcel-test/src/test/resources/bom/bom_utf16be.csv
  17. BIN
      easyexcel-test/src/test/resources/bom/bom_utf16le.csv
  18. 11
      easyexcel-test/src/test/resources/bom/bom_utf8.csv
  19. 11
      easyexcel-test/src/test/resources/bom/no_bom.csv
  20. 11
      easyexcel-test/src/test/resources/bom/office_bom.csv
  21. 7
      pom.xml
  22. 5
      update.md

4
easyexcel-core/pom.xml

@ -40,5 +40,9 @@
<groupId>org.ehcache</groupId>
<artifactId>ehcache</artifactId>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
</dependencies>
</project>

90
easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/BomBufferedInputStream.java

@ -1,90 +0,0 @@
package com.alibaba.excel.analysis.csv;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* This class is used to wrap a stream that includes an encoded {@link ByteOrderMark} as its first bytes.
*
* @author supalle
* @see <a href="http://unicode.org/faq/utf_bom.html#BOM">Byte Order Mark (BOM) FAQ</a>
* @see <a href="https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/ByteOrderMark.html">Apache CommonsIO BOMInputStream</a>
*/
public class BomBufferedInputStream extends BufferedInputStream {
public final static List<ByteOrderMark> DEFAULT_BYTE_ORDER_MARKS = new ArrayList<>();
static {
DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_8);
DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_16BE);
DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_16LE);
DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_32BE);
DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_32LE);
}
private boolean initialized;
private ByteOrderMark byteOrderMark;
private final List<ByteOrderMark> byteOrderMarks;
public BomBufferedInputStream(InputStream in, final ByteOrderMark... byteOrderMarks) {
super(in);
this.byteOrderMarks = applyByteOrderMarks(byteOrderMarks);
}
public BomBufferedInputStream(InputStream in, int size, final ByteOrderMark... byteOrderMarks) {
super(in, size);
this.byteOrderMarks = applyByteOrderMarks(byteOrderMarks);
}
private static List<ByteOrderMark> applyByteOrderMarks(ByteOrderMark[] byteOrderMarks) {
return byteOrderMarks == null || byteOrderMarks.length == 0 ? DEFAULT_BYTE_ORDER_MARKS : Arrays.asList(byteOrderMarks);
}
public boolean hasByteOrderMark() throws IOException {
return getByteOrderMark() != null;
}
public ByteOrderMark getByteOrderMark() throws IOException {
if (initialized) {
return byteOrderMark;
}
this.byteOrderMarks.sort(ByteOrderMark::compareTo);
int maxBomLength = byteOrderMarks.get(0).length();
mark(maxBomLength);
int[] firstBytes = new int[maxBomLength];
for (int i = 0; i < maxBomLength; i++) {
firstBytes[i] = read();
if (firstBytes[i] < 0) {
break;
}
}
byteOrderMark = matchByteOrderMark(this.byteOrderMarks, firstBytes);
reset();
if (byteOrderMark != null) {
skip(byteOrderMark.length());
}
initialized = true;
return byteOrderMark;
}
private ByteOrderMark matchByteOrderMark(final List<ByteOrderMark> byteOrderMarks, final int[] firstBytes) {
loop:
for (ByteOrderMark item : byteOrderMarks) {
int[] bytes = item.getBytes();
int length = bytes.length;
for (int i = 0; i < length; i++) {
if (firstBytes[i] != bytes[i]) {
continue loop;
}
}
return item;
}
return null;
}
}

108
easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/ByteOrderMark.java

@ -1,108 +0,0 @@
package com.alibaba.excel.analysis.csv;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Objects;
import java.util.stream.Collectors;
/**
* Byte Order Mark (BOM)
* <br/>
* User in {@link BomBufferedInputStream}
*
* @author supalle
* @see <a href="http://unicode.org/faq/utf_bom.html#BOM">Byte Order Mark (BOM) FAQ</a>
* @see <a href="https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/ByteOrderMark.html">Apache CommonsIO ByteOrderMark</a>
*/
public class ByteOrderMark implements Comparable<ByteOrderMark> {
/**
* UTF-8 BOM.
*/
public static final ByteOrderMark UTF_8 = new ByteOrderMark(StandardCharsets.UTF_8, 0xEF, 0xBB, 0xBF);
/**
* UTF-16BE BOM (Big-Endian).
*/
public static final ByteOrderMark UTF_16BE = new ByteOrderMark(StandardCharsets.UTF_16BE, 0xFE, 0xFF);
/**
* UTF-16LE BOM (Little-Endian).
*/
public static final ByteOrderMark UTF_16LE = new ByteOrderMark(StandardCharsets.UTF_16LE, 0xFF, 0xFE);
/**
* UTF-32BE BOM (Big-Endian).
*
* @since 2.2
*/
public static final ByteOrderMark UTF_32BE = new ByteOrderMark(Charset.forName("UTF-32BE"), 0x00, 0x00, 0xFE, 0xFF);
/**
* UTF-32LE BOM (Little-Endian).
*
* @since 2.2
*/
public static final ByteOrderMark UTF_32LE = new ByteOrderMark(Charset.forName("UTF-32LE"), 0xFF, 0xFE, 0x00, 0x00);
private final Charset charset;
private final int[] bytes;
public ByteOrderMark(final Charset charset, final int... bytes) {
this.charset = Objects.requireNonNull(charset, "charset must be not null");
if (bytes == null || bytes.length == 0) {
throw new IllegalArgumentException("bytes must be not empty");
}
this.bytes = bytes;
}
public Charset getCharset() {
return charset;
}
public int[] getBytes() {
return bytes;
}
public int length() {
return bytes.length;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
ByteOrderMark that = (ByteOrderMark) o;
return Objects.equals(charset, that.charset) && Arrays.equals(bytes, that.bytes);
}
@Override
public int hashCode() {
int result = Objects.hash(charset);
result = 31 * result + Arrays.hashCode(bytes);
return result;
}
@Override
public String toString() {
return "ByteOrderMark{" +
"charset=" + charset +
", bytes=["
+ Arrays.stream(bytes)
.mapToObj(Integer::toHexString)
.map(String::toUpperCase)
.map("0x"::concat)
.collect(Collectors.joining(",")) +
"]}";
}
@Override
public int compareTo(ByteOrderMark o) {
return o.length() - length();
}
}

24
easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/CsvExcelReadExecutor.java

@ -1,6 +1,7 @@
package com.alibaba.excel.analysis.csv;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.file.Files;
import java.util.ArrayList;
@ -11,6 +12,7 @@ import java.util.Map;
import com.alibaba.excel.analysis.ExcelReadExecutor;
import com.alibaba.excel.context.csv.CsvReadContext;
import com.alibaba.excel.enums.ByteOrderMarkEnum;
import com.alibaba.excel.enums.CellDataTypeEnum;
import com.alibaba.excel.enums.RowTypeEnum;
import com.alibaba.excel.exception.ExcelAnalysisException;
@ -27,6 +29,7 @@ import org.apache.commons.collections4.MapUtils;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.input.BOMInputStream;
/**
* read executor
@ -82,17 +85,26 @@ public class CsvExcelReadExecutor implements ExcelReadExecutor {
private CSVParser csvParser() throws IOException {
CsvReadWorkbookHolder csvReadWorkbookHolder = csvReadContext.csvReadWorkbookHolder();
CSVFormat csvFormat = csvReadWorkbookHolder.getCsvFormat();
ByteOrderMarkEnum byteOrderMark = ByteOrderMarkEnum.valueOfByCharsetName(
csvReadContext.csvReadWorkbookHolder().getCharset().name());
if (csvReadWorkbookHolder.getMandatoryUseInputStream()) {
return csvFormat.parse(
new InputStreamReader(csvReadWorkbookHolder.getInputStream(), csvReadWorkbookHolder.getCharset()));
return buildCsvParser(csvFormat, csvReadWorkbookHolder.getInputStream(), byteOrderMark);
}
if (csvReadWorkbookHolder.getFile() != null) {
return csvFormat.parse(new InputStreamReader(Files.newInputStream(csvReadWorkbookHolder.getFile().toPath()),
csvReadWorkbookHolder.getCharset()));
return buildCsvParser(csvFormat, Files.newInputStream(csvReadWorkbookHolder.getFile().toPath()),
byteOrderMark);
}
return buildCsvParser(csvFormat, csvReadWorkbookHolder.getInputStream(), byteOrderMark);
}
private CSVParser buildCsvParser(CSVFormat csvFormat, InputStream inputStream, ByteOrderMarkEnum byteOrderMark)
throws IOException {
if (byteOrderMark == null) {
return csvFormat.parse(
new InputStreamReader(csvReadWorkbookHolder.getInputStream(), csvReadWorkbookHolder.getCharset()));
new InputStreamReader(inputStream, csvReadContext.csvReadWorkbookHolder().getCharset()));
}
return csvFormat.parse(new InputStreamReader(new BOMInputStream(inputStream, byteOrderMark.getByteOrderMark()),
csvReadContext.csvReadWorkbookHolder().getCharset()));
}
private void dealRecord(CSVRecord record, int rowIndex) {

51
easyexcel-core/src/main/java/com/alibaba/excel/enums/ByteOrderMarkEnum.java

@ -0,0 +1,51 @@
package com.alibaba.excel.enums;
import java.nio.charset.Charset;
import java.util.Map;
import com.alibaba.excel.util.MapUtils;
import lombok.Getter;
import org.apache.commons.io.ByteOrderMark;
/**
* byte order mark
*
* @author Jiaju Zhuang
*/
@Getter
public enum ByteOrderMarkEnum {
UTF_8(ByteOrderMark.UTF_8),
UTF_16BE(ByteOrderMark.UTF_16BE),
UTF_16LE(ByteOrderMark.UTF_16LE),
UTF_32BE(ByteOrderMark.UTF_32BE),
UTF_32LE(ByteOrderMark.UTF_32LE),
;
final ByteOrderMark byteOrderMark;
final String stringPrefix;
ByteOrderMarkEnum(ByteOrderMark byteOrderMark) {
this.byteOrderMark = byteOrderMark;
Charset charset = Charset.forName(byteOrderMark.getCharsetName());
this.stringPrefix = new String(byteOrderMark.getBytes(), charset);
}
/**
* store character aliases corresponding to `ByteOrderMark` prefix
*/
private static final Map<String, ByteOrderMarkEnum> CHARSET_BYTE_ORDER_MARK_MAP = MapUtils.newHashMap();
static {
for (ByteOrderMarkEnum value : ByteOrderMarkEnum.values()) {
CHARSET_BYTE_ORDER_MARK_MAP.put(value.getByteOrderMark().getCharsetName(), value);
}
}
public static ByteOrderMarkEnum valueOfByCharsetName(String charsetName) {
return CHARSET_BYTE_ORDER_MARK_MAP.get(charsetName);
}
}

14
easyexcel-core/src/main/java/com/alibaba/excel/metadata/csv/CsvSheet.java

@ -3,24 +3,30 @@ package com.alibaba.excel.metadata.csv;
import java.io.Closeable;
import java.io.IOException;
import java.math.BigDecimal;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import com.alibaba.excel.constant.BuiltinFormats;
import com.alibaba.excel.enums.ByteOrderMarkEnum;
import com.alibaba.excel.enums.NumericCellTypeEnum;
import com.alibaba.excel.exception.ExcelGenerateException;
import com.alibaba.excel.util.DateUtils;
import com.alibaba.excel.util.ListUtils;
import com.alibaba.excel.util.MapUtils;
import com.alibaba.excel.util.NumberDataFormatterUtils;
import com.alibaba.excel.util.StringUtils;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.Setter;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.io.ByteOrderMark;
import org.apache.poi.ss.usermodel.AutoFilter;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellRange;
@ -51,6 +57,7 @@ import org.apache.poi.ss.util.PaneInformation;
@Setter
@EqualsAndHashCode
public class CsvSheet implements Sheet, Closeable {
/**
* workbook
*/
@ -109,6 +116,13 @@ public class CsvSheet implements Sheet, Closeable {
}
rowCache = ListUtils.newArrayListWithExpectedSize(rowCacheCount);
try {
if (csvWorkbook.getWithBom()) {
ByteOrderMarkEnum byteOrderMark = ByteOrderMarkEnum.valueOfByCharsetName(
csvWorkbook.getCharset().name());
if (byteOrderMark != null) {
out.append(byteOrderMark.getStringPrefix());
}
}
csvPrinter = csvFormat.print(out);
} catch (IOException e) {
throw new ExcelGenerateException(e);

17
easyexcel-core/src/main/java/com/alibaba/excel/metadata/csv/CsvWorkbook.java

@ -2,6 +2,7 @@ package com.alibaba.excel.metadata.csv;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
@ -71,11 +72,25 @@ public class CsvWorkbook implements Workbook {
*/
private List<CsvCellStyle> csvCellStyleList;
public CsvWorkbook(Appendable out, Locale locale, Boolean use1904windowing, Boolean useScientificFormat) {
/**
* charset.
*/
private Charset charset;
/**
* Set the encoding prefix in the csv file, otherwise the office may open garbled characters.
* Default true.
*/
private Boolean withBom;
public CsvWorkbook(Appendable out, Locale locale, Boolean use1904windowing, Boolean useScientificFormat,
Charset charset, Boolean withBom) {
this.out = out;
this.locale = locale;
this.use1904windowing = use1904windowing;
this.useScientificFormat = useScientificFormat;
this.charset = charset;
this.withBom = withBom;
}
@Override

31
easyexcel-core/src/main/java/com/alibaba/excel/read/metadata/holder/csv/CsvReadWorkbookHolder.java

@ -1,9 +1,9 @@
package com.alibaba.excel.read.metadata.holder.csv;
import com.alibaba.excel.analysis.csv.BomBufferedInputStream;
import com.alibaba.excel.read.metadata.ReadWorkbook;
import com.alibaba.excel.read.metadata.holder.ReadWorkbookHolder;
import com.alibaba.excel.support.ExcelTypeEnum;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.Setter;
@ -30,34 +30,5 @@ public class CsvReadWorkbookHolder extends ReadWorkbookHolder {
super(readWorkbook);
setExcelType(ExcelTypeEnum.CSV);
this.csvFormat = CSVFormat.DEFAULT;
// CSV BOM
if (readWorkbook.getCharset() == null) {
BomBufferedInputStream bomBufferedInputStream = buildBomBufferedInputStream();
setInputStream(bomBufferedInputStream);
setMandatoryUseInputStream(Boolean.TRUE);
try {
if (bomBufferedInputStream.hasByteOrderMark()) {
setCharset(bomBufferedInputStream.getByteOrderMark().getCharset());
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
private BomBufferedInputStream buildBomBufferedInputStream() {
BomBufferedInputStream bomBufferedInputStream;
try {
if (Boolean.TRUE.equals(getMandatoryUseInputStream())) {
bomBufferedInputStream = new BomBufferedInputStream(getInputStream());
} else if (getFile() != null) {
bomBufferedInputStream = new BomBufferedInputStream(Files.newInputStream(getFile().toPath()));
} else {
bomBufferedInputStream = new BomBufferedInputStream(getInputStream());
}
} catch (IOException e) {
throw new RuntimeException(e.getMessage());
}
return bomBufferedInputStream;
}
}

4
easyexcel-core/src/main/java/com/alibaba/excel/util/WorkBookUtil.java

@ -70,7 +70,9 @@ public class WorkBookUtil {
new OutputStreamWriter(writeWorkbookHolder.getOutputStream(), writeWorkbookHolder.getCharset())),
writeWorkbookHolder.getGlobalConfiguration().getLocale(),
writeWorkbookHolder.getGlobalConfiguration().getUse1904windowing(),
writeWorkbookHolder.getGlobalConfiguration().getUseScientificFormat());
writeWorkbookHolder.getGlobalConfiguration().getUseScientificFormat(),
writeWorkbookHolder.getCharset(),
writeWorkbookHolder.getWithBom());
writeWorkbookHolder.setCachedWorkbook(csvWorkbook);
writeWorkbookHolder.setWorkbook(csvWorkbook);
return;

9
easyexcel-core/src/main/java/com/alibaba/excel/write/builder/ExcelWriterBuilder.java

@ -94,6 +94,15 @@ public class ExcelWriterBuilder extends AbstractExcelWriterParameterBuilder<Exce
return this;
}
/**
* Set the encoding prefix in the csv file, otherwise the office may open garbled characters.
* Default true.
*/
public ExcelWriterBuilder withBom(Boolean withBom) {
writeWorkbook.setWithBom(withBom);
return this;
}
/**
* Template file.
* This file is read into memory, excessive cases can lead to OOM.

7
easyexcel-core/src/main/java/com/alibaba/excel/write/metadata/WriteWorkbook.java

@ -41,6 +41,13 @@ public class WriteWorkbook extends WriteBasicParameter {
* Only work on the CSV file
*/
private Charset charset;
/**
* Set the encoding prefix in the csv file, otherwise the office may open garbled characters.
* Default true.
*/
private Boolean withBom;
/**
* Template input stream
* <p>

13
easyexcel-core/src/main/java/com/alibaba/excel/write/metadata/holder/WriteWorkbookHolder.java

@ -84,6 +84,13 @@ public class WriteWorkbookHolder extends AbstractWriteHolder {
* Only work on the CSV file
*/
private Charset charset;
/**
* Set the encoding prefix in the csv file, otherwise the office may open garbled characters.
* Default true.
*/
private Boolean withBom;
/**
* Template input stream
* <p>
@ -176,6 +183,12 @@ public class WriteWorkbookHolder extends AbstractWriteHolder {
this.charset = writeWorkbook.getCharset();
}
if (writeWorkbook.getWithBom() == null) {
this.withBom = Boolean.TRUE;
} else {
this.withBom = writeWorkbook.getWithBom();
}
if (writeWorkbook.getAutoCloseStream() == null) {
this.autoCloseStream = Boolean.TRUE;
} else {

3
easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomData.java

@ -1,6 +1,7 @@
package com.alibaba.easyexcel.test.core.bom;
import com.alibaba.excel.annotation.ExcelProperty;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.Setter;
@ -12,5 +13,5 @@ public class BomData {
@ExcelProperty("姓名")
private String name;
@ExcelProperty("年纪")
private Integer age;
private Long age;
}

90
easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomDataTest.java

@ -1,10 +1,18 @@
package com.alibaba.easyexcel.test.core.bom;
import com.alibaba.easyexcel.test.core.simple.SimpleData;
import com.alibaba.easyexcel.test.demo.read.DemoData;
import com.alibaba.easyexcel.test.util.TestFileUtil;
import com.alibaba.excel.EasyExcel;
import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.metadata.data.ReadCellData;
import com.alibaba.excel.read.listener.PageReadListener;
import com.alibaba.excel.read.listener.ReadListener;
import com.alibaba.excel.support.ExcelTypeEnum;
import com.alibaba.excel.util.ListUtils;
import com.alibaba.fastjson2.JSON;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.compress.utils.Lists;
import org.junit.Assert;
import org.junit.FixMethodOrder;
@ -12,17 +20,76 @@ import org.junit.Test;
import org.junit.runners.MethodSorters;
import java.io.File;
import java.io.FileOutputStream;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* bom test
*
* @author Jiaju Zhuang
*/
@FixMethodOrder(MethodSorters.NAME_ASCENDING)
@Slf4j
public class BomDataTest {
@Test
public void t01ReadCsv() {
readCsv(TestFileUtil.readFile("bom" + File.separator + "no_bom.csv"));
readCsv(TestFileUtil.readFile("bom" + File.separator + "office_bom.csv"));
}
@Test
public void t01ReadAndWriteCsv() {
readCsv(TestFileUtil.readFile("bom" + File.separator + "bom_none.csv"));
readCsv(TestFileUtil.readFile("bom" + File.separator + "bom_utf8.csv"));
readCsv(TestFileUtil.readFile("bom" + File.separator + "bom_utf16be.csv"));
readCsv(TestFileUtil.readFile("bom" + File.separator + "bom_utf16le.csv"));
public void t02ReadAndWriteCsv() throws Exception {
readAndWriteCsv(TestFileUtil.createNewFile("bom" + File.separator + "bom_default.csv"), null, null);
readAndWriteCsv(TestFileUtil.createNewFile("bom" + File.separator + "bom_utf_8.csv"), "UTF-8", null);
readAndWriteCsv(TestFileUtil.createNewFile("bom" + File.separator + "bom_utf_8_lower_case.csv"), "utf-8", null);
readAndWriteCsv(TestFileUtil.createNewFile("bom" + File.separator + "bom_gbk.csv"), "GBK", null);
readAndWriteCsv(TestFileUtil.createNewFile("bom" + File.separator + "bom_gbk_lower_case.csv"), "gbk", null);
readAndWriteCsv(TestFileUtil.createNewFile("bom" + File.separator + "bom_utf_16be.csv"), "UTF-16BE", null);
readAndWriteCsv(TestFileUtil.createNewFile("bom" + File.separator + "bom_utf_8_not_with_bom.csv"), "UTF-8",
Boolean.FALSE);
}
private void readAndWriteCsv(File file, String charsetName, Boolean withBom) throws Exception {
Charset charset = null;
if (charsetName != null) {
charset = Charset.forName(charsetName);
}
EasyExcel.write(new FileOutputStream(file), BomData.class)
.charset(charset)
.withBom(withBom)
.excelType(ExcelTypeEnum.CSV)
.sheet()
.doWrite(data());
EasyExcel.read(file, BomData.class, new ReadListener<BomData>() {
private final List<BomData> dataList = Lists.newArrayList();
@Override
public void invokeHead(Map<Integer, ReadCellData<?>> headMap, AnalysisContext context) {
String head = headMap.get(0).getStringValue();
Assert.assertEquals("姓名", head);
}
@Override
public void invoke(BomData data, AnalysisContext context) {
dataList.add(data);
}
@Override
public void doAfterAllAnalysed(AnalysisContext context) {
Assert.assertEquals(dataList.size(), 10);
BomData bomData = dataList.get(0);
Assert.assertEquals("姓名0", bomData.getName());
Assert.assertEquals(20, (long)bomData.getAge());
}
})
.charset(charset)
.sheet().doRead();
}
private void readCsv(File file) {
@ -46,8 +113,19 @@ public class BomDataTest {
Assert.assertEquals(dataList.size(), 10);
BomData bomData = dataList.get(0);
Assert.assertEquals("姓名0", bomData.getName());
Assert.assertEquals(0, (long) bomData.getAge());
Assert.assertEquals(20L, (long)bomData.getAge());
}
}).sheet().doRead();
}
private List<BomData> data() {
List<BomData> list = ListUtils.newArrayList();
for (int i = 0; i < 10; i++) {
BomData data = new BomData();
data.setName("姓名" + i);
data.setAge(20L);
list.add(data);
}
return list;
}
}

11
easyexcel-test/src/test/resources/bom/bom_none.csv

@ -1,11 +0,0 @@
姓名,年纪
姓名0,0
姓名1,1
姓名2,2
姓名3,3
姓名4,4
姓名5,5
姓名6,6
姓名7,7
姓名8,8
姓名9,9
1 姓名 年纪
2 姓名0 0
3 姓名1 1
4 姓名2 2
5 姓名3 3
6 姓名4 4
7 姓名5 5
8 姓名6 6
9 姓名7 7
10 姓名8 8
11 姓名9 9

BIN
easyexcel-test/src/test/resources/bom/bom_utf16be.csv

Binary file not shown.
1 姓名 年纪
2 姓名0 0
3 姓名1 1
4 姓名2 2
5 姓名3 3
6 姓名4 4
7 姓名5 5
8 姓名6 6
9 姓名7 7
10 姓名8 8
11 姓名9 9

BIN
easyexcel-test/src/test/resources/bom/bom_utf16le.csv

Binary file not shown.
1 姓名 年纪
2 姓名0 0
3 姓名1 1
4 姓名2 2
5 姓名3 3
6 姓名4 4
7 姓名5 5
8 姓名6 6
9 姓名7 7
10 姓名8 8
11 姓名9 9

11
easyexcel-test/src/test/resources/bom/bom_utf8.csv

@ -1,11 +0,0 @@
姓名,年纪
姓名0,0
姓名1,1
姓名2,2
姓名3,3
姓名4,4
姓名5,5
姓名6,6
姓名7,7
姓名8,8
姓名9,9
1 姓名 年纪
2 姓名0 0
3 姓名1 1
4 姓名2 2
5 姓名3 3
6 姓名4 4
7 姓名5 5
8 姓名6 6
9 姓名7 7
10 姓名8 8
11 姓名9 9

11
easyexcel-test/src/test/resources/bom/no_bom.csv

@ -0,0 +1,11 @@
姓名,年纪
姓名0,20
姓名1,20
姓名2,20
姓名3,20
姓名4,20
姓名5,20
姓名6,20
姓名7,20
姓名8,20
姓名9,20
1 姓名 年纪
2 姓名0 20
3 姓名1 20
4 姓名2 20
5 姓名3 20
6 姓名4 20
7 姓名5 20
8 姓名6 20
9 姓名7 20
10 姓名8 20
11 姓名9 20

11
easyexcel-test/src/test/resources/bom/office_bom.csv

@ -0,0 +1,11 @@
姓名,年纪
姓名0,20
姓名1,20
姓名2,20
姓名3,20
姓名4,20
姓名5,20
姓名6,20
姓名7,20
姓名8,20
姓名9,20
1 姓名 年纪
2 姓名0 20
3 姓名1 20
4 姓名2 20
5 姓名3 20
6 姓名4 20
7 姓名5 20
8 姓名6 20
9 姓名7 20
10 姓名8 20
11 姓名9 20

7
pom.xml

@ -20,7 +20,7 @@
<properties>
<revision>3.2.1</revision>
<revision>3.3.0</revision>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<jdk.version>1.8</jdk.version>
<gpg.skip>true</gpg.skip>
@ -128,6 +128,11 @@
<artifactId>lombok</artifactId>
<version>1.18.20</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.11.0</version>
</dependency>
<dependency>

5
update.md

@ -1,3 +1,8 @@
# 3.3.0
* 读csv会忽略BOM数据 [Issue #3137](https://github.com/alibaba/easyexcel/issues/3137)
* 解决csv用office打开乱码的问题,写csv默认带上BOM数据
# 3.2.1
* 兼容`LocalDate` [Issue #2908](https://github.com/alibaba/easyexcel/issues/2908)

Loading…
Cancel
Save