diff --git a/easyexcel-core/pom.xml b/easyexcel-core/pom.xml index 9cfabb2a..70c1e6fe 100644 --- a/easyexcel-core/pom.xml +++ b/easyexcel-core/pom.xml @@ -40,5 +40,9 @@ org.ehcache ehcache + + commons-io + commons-io + diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/BomBufferedInputStream.java b/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/BomBufferedInputStream.java deleted file mode 100644 index c24a6e0c..00000000 --- a/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/BomBufferedInputStream.java +++ /dev/null @@ -1,90 +0,0 @@ -package com.alibaba.excel.analysis.csv; - - -import java.io.BufferedInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -/** - * This class is used to wrap a stream that includes an encoded {@link ByteOrderMark} as its first bytes. - * - * @author supalle - * @see Byte Order Mark (BOM) FAQ - * @see Apache CommonsIO BOMInputStream - */ -public class BomBufferedInputStream extends BufferedInputStream { - public final static List DEFAULT_BYTE_ORDER_MARKS = new ArrayList<>(); - - static { - DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_8); - DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_16BE); - DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_16LE); - DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_32BE); - DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_32LE); - } - - private boolean initialized; - private ByteOrderMark byteOrderMark; - private final List byteOrderMarks; - - public BomBufferedInputStream(InputStream in, final ByteOrderMark... byteOrderMarks) { - super(in); - this.byteOrderMarks = applyByteOrderMarks(byteOrderMarks); - } - - public BomBufferedInputStream(InputStream in, int size, final ByteOrderMark... byteOrderMarks) { - super(in, size); - this.byteOrderMarks = applyByteOrderMarks(byteOrderMarks); - } - - private static List applyByteOrderMarks(ByteOrderMark[] byteOrderMarks) { - return byteOrderMarks == null || byteOrderMarks.length == 0 ? DEFAULT_BYTE_ORDER_MARKS : Arrays.asList(byteOrderMarks); - } - - public boolean hasByteOrderMark() throws IOException { - return getByteOrderMark() != null; - } - - public ByteOrderMark getByteOrderMark() throws IOException { - if (initialized) { - return byteOrderMark; - } - this.byteOrderMarks.sort(ByteOrderMark::compareTo); - int maxBomLength = byteOrderMarks.get(0).length(); - mark(maxBomLength); - int[] firstBytes = new int[maxBomLength]; - for (int i = 0; i < maxBomLength; i++) { - firstBytes[i] = read(); - if (firstBytes[i] < 0) { - break; - } - } - byteOrderMark = matchByteOrderMark(this.byteOrderMarks, firstBytes); - - reset(); - if (byteOrderMark != null) { - skip(byteOrderMark.length()); - } - initialized = true; - return byteOrderMark; - } - - private ByteOrderMark matchByteOrderMark(final List byteOrderMarks, final int[] firstBytes) { - loop: - for (ByteOrderMark item : byteOrderMarks) { - int[] bytes = item.getBytes(); - int length = bytes.length; - for (int i = 0; i < length; i++) { - if (firstBytes[i] != bytes[i]) { - continue loop; - } - } - return item; - } - return null; - } - -} diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/ByteOrderMark.java b/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/ByteOrderMark.java deleted file mode 100644 index 1ed078cf..00000000 --- a/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/ByteOrderMark.java +++ /dev/null @@ -1,108 +0,0 @@ -package com.alibaba.excel.analysis.csv; - -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Objects; -import java.util.stream.Collectors; - -/** - * Byte Order Mark (BOM) - *
- * User in {@link BomBufferedInputStream} - * - * @author supalle - * @see Byte Order Mark (BOM) FAQ - * @see Apache CommonsIO ByteOrderMark - */ -public class ByteOrderMark implements Comparable { - - /** - * UTF-8 BOM. - */ - public static final ByteOrderMark UTF_8 = new ByteOrderMark(StandardCharsets.UTF_8, 0xEF, 0xBB, 0xBF); - - /** - * UTF-16BE BOM (Big-Endian). - */ - public static final ByteOrderMark UTF_16BE = new ByteOrderMark(StandardCharsets.UTF_16BE, 0xFE, 0xFF); - - /** - * UTF-16LE BOM (Little-Endian). - */ - public static final ByteOrderMark UTF_16LE = new ByteOrderMark(StandardCharsets.UTF_16LE, 0xFF, 0xFE); - - /** - * UTF-32BE BOM (Big-Endian). - * - * @since 2.2 - */ - public static final ByteOrderMark UTF_32BE = new ByteOrderMark(Charset.forName("UTF-32BE"), 0x00, 0x00, 0xFE, 0xFF); - - /** - * UTF-32LE BOM (Little-Endian). - * - * @since 2.2 - */ - public static final ByteOrderMark UTF_32LE = new ByteOrderMark(Charset.forName("UTF-32LE"), 0xFF, 0xFE, 0x00, 0x00); - - private final Charset charset; - private final int[] bytes; - - public ByteOrderMark(final Charset charset, final int... bytes) { - this.charset = Objects.requireNonNull(charset, "charset must be not null"); - if (bytes == null || bytes.length == 0) { - throw new IllegalArgumentException("bytes must be not empty"); - } - this.bytes = bytes; - } - - public Charset getCharset() { - return charset; - } - - public int[] getBytes() { - return bytes; - } - - public int length() { - return bytes.length; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - ByteOrderMark that = (ByteOrderMark) o; - return Objects.equals(charset, that.charset) && Arrays.equals(bytes, that.bytes); - } - - @Override - public int hashCode() { - int result = Objects.hash(charset); - result = 31 * result + Arrays.hashCode(bytes); - return result; - } - - @Override - public String toString() { - return "ByteOrderMark{" + - "charset=" + charset + - ", bytes=[" - + Arrays.stream(bytes) - .mapToObj(Integer::toHexString) - .map(String::toUpperCase) - .map("0x"::concat) - .collect(Collectors.joining(",")) + - "]}"; - } - - @Override - public int compareTo(ByteOrderMark o) { - return o.length() - length(); - } -} diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/CsvExcelReadExecutor.java b/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/CsvExcelReadExecutor.java index a1b7625d..6afabe34 100644 --- a/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/CsvExcelReadExecutor.java +++ b/easyexcel-core/src/main/java/com/alibaba/excel/analysis/csv/CsvExcelReadExecutor.java @@ -1,6 +1,7 @@ package com.alibaba.excel.analysis.csv; import java.io.IOException; +import java.io.InputStream; import java.io.InputStreamReader; import java.nio.file.Files; import java.util.ArrayList; @@ -11,6 +12,7 @@ import java.util.Map; import com.alibaba.excel.analysis.ExcelReadExecutor; import com.alibaba.excel.context.csv.CsvReadContext; +import com.alibaba.excel.enums.ByteOrderMarkEnum; import com.alibaba.excel.enums.CellDataTypeEnum; import com.alibaba.excel.enums.RowTypeEnum; import com.alibaba.excel.exception.ExcelAnalysisException; @@ -27,6 +29,7 @@ import org.apache.commons.collections4.MapUtils; import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.CSVRecord; +import org.apache.commons.io.input.BOMInputStream; /** * read executor @@ -82,17 +85,26 @@ public class CsvExcelReadExecutor implements ExcelReadExecutor { private CSVParser csvParser() throws IOException { CsvReadWorkbookHolder csvReadWorkbookHolder = csvReadContext.csvReadWorkbookHolder(); CSVFormat csvFormat = csvReadWorkbookHolder.getCsvFormat(); - + ByteOrderMarkEnum byteOrderMark = ByteOrderMarkEnum.valueOfByCharsetName( + csvReadContext.csvReadWorkbookHolder().getCharset().name()); if (csvReadWorkbookHolder.getMandatoryUseInputStream()) { - return csvFormat.parse( - new InputStreamReader(csvReadWorkbookHolder.getInputStream(), csvReadWorkbookHolder.getCharset())); + return buildCsvParser(csvFormat, csvReadWorkbookHolder.getInputStream(), byteOrderMark); } if (csvReadWorkbookHolder.getFile() != null) { - return csvFormat.parse(new InputStreamReader(Files.newInputStream(csvReadWorkbookHolder.getFile().toPath()), - csvReadWorkbookHolder.getCharset())); + return buildCsvParser(csvFormat, Files.newInputStream(csvReadWorkbookHolder.getFile().toPath()), + byteOrderMark); + } + return buildCsvParser(csvFormat, csvReadWorkbookHolder.getInputStream(), byteOrderMark); + } + + private CSVParser buildCsvParser(CSVFormat csvFormat, InputStream inputStream, ByteOrderMarkEnum byteOrderMark) + throws IOException { + if (byteOrderMark == null) { + return csvFormat.parse( + new InputStreamReader(inputStream, csvReadContext.csvReadWorkbookHolder().getCharset())); } - return csvFormat.parse( - new InputStreamReader(csvReadWorkbookHolder.getInputStream(), csvReadWorkbookHolder.getCharset())); + return csvFormat.parse(new InputStreamReader(new BOMInputStream(inputStream, byteOrderMark.getByteOrderMark()), + csvReadContext.csvReadWorkbookHolder().getCharset())); } private void dealRecord(CSVRecord record, int rowIndex) { diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/enums/ByteOrderMarkEnum.java b/easyexcel-core/src/main/java/com/alibaba/excel/enums/ByteOrderMarkEnum.java new file mode 100644 index 00000000..f6b1e8b0 --- /dev/null +++ b/easyexcel-core/src/main/java/com/alibaba/excel/enums/ByteOrderMarkEnum.java @@ -0,0 +1,51 @@ +package com.alibaba.excel.enums; + +import java.nio.charset.Charset; +import java.util.Map; + +import com.alibaba.excel.util.MapUtils; + +import lombok.Getter; +import org.apache.commons.io.ByteOrderMark; + +/** + * byte order mark + * + * @author Jiaju Zhuang + */ +@Getter +public enum ByteOrderMarkEnum { + + UTF_8(ByteOrderMark.UTF_8), + UTF_16BE(ByteOrderMark.UTF_16BE), + UTF_16LE(ByteOrderMark.UTF_16LE), + UTF_32BE(ByteOrderMark.UTF_32BE), + UTF_32LE(ByteOrderMark.UTF_32LE), + + ; + + final ByteOrderMark byteOrderMark; + final String stringPrefix; + + ByteOrderMarkEnum(ByteOrderMark byteOrderMark) { + this.byteOrderMark = byteOrderMark; + Charset charset = Charset.forName(byteOrderMark.getCharsetName()); + this.stringPrefix = new String(byteOrderMark.getBytes(), charset); + } + + /** + * store character aliases corresponding to `ByteOrderMark` prefix + */ + private static final Map CHARSET_BYTE_ORDER_MARK_MAP = MapUtils.newHashMap(); + + static { + for (ByteOrderMarkEnum value : ByteOrderMarkEnum.values()) { + CHARSET_BYTE_ORDER_MARK_MAP.put(value.getByteOrderMark().getCharsetName(), value); + } + } + + public static ByteOrderMarkEnum valueOfByCharsetName(String charsetName) { + return CHARSET_BYTE_ORDER_MARK_MAP.get(charsetName); + } + +} diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/metadata/csv/CsvSheet.java b/easyexcel-core/src/main/java/com/alibaba/excel/metadata/csv/CsvSheet.java index 725addbc..cf2cf8ec 100644 --- a/easyexcel-core/src/main/java/com/alibaba/excel/metadata/csv/CsvSheet.java +++ b/easyexcel-core/src/main/java/com/alibaba/excel/metadata/csv/CsvSheet.java @@ -3,24 +3,30 @@ package com.alibaba.excel.metadata.csv; import java.io.Closeable; import java.io.IOException; import java.math.BigDecimal; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Map; import com.alibaba.excel.constant.BuiltinFormats; +import com.alibaba.excel.enums.ByteOrderMarkEnum; import com.alibaba.excel.enums.NumericCellTypeEnum; import com.alibaba.excel.exception.ExcelGenerateException; import com.alibaba.excel.util.DateUtils; import com.alibaba.excel.util.ListUtils; +import com.alibaba.excel.util.MapUtils; import com.alibaba.excel.util.NumberDataFormatterUtils; import com.alibaba.excel.util.StringUtils; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.Setter; +import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVPrinter; +import org.apache.commons.io.ByteOrderMark; import org.apache.poi.ss.usermodel.AutoFilter; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.CellRange; @@ -51,6 +57,7 @@ import org.apache.poi.ss.util.PaneInformation; @Setter @EqualsAndHashCode public class CsvSheet implements Sheet, Closeable { + /** * workbook */ @@ -109,6 +116,13 @@ public class CsvSheet implements Sheet, Closeable { } rowCache = ListUtils.newArrayListWithExpectedSize(rowCacheCount); try { + if (csvWorkbook.getWithBom()) { + ByteOrderMarkEnum byteOrderMark = ByteOrderMarkEnum.valueOfByCharsetName( + csvWorkbook.getCharset().name()); + if (byteOrderMark != null) { + out.append(byteOrderMark.getStringPrefix()); + } + } csvPrinter = csvFormat.print(out); } catch (IOException e) { throw new ExcelGenerateException(e); diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/metadata/csv/CsvWorkbook.java b/easyexcel-core/src/main/java/com/alibaba/excel/metadata/csv/CsvWorkbook.java index 4e713509..d2f0e08b 100644 --- a/easyexcel-core/src/main/java/com/alibaba/excel/metadata/csv/CsvWorkbook.java +++ b/easyexcel-core/src/main/java/com/alibaba/excel/metadata/csv/CsvWorkbook.java @@ -2,6 +2,7 @@ package com.alibaba.excel.metadata.csv; import java.io.IOException; import java.io.OutputStream; +import java.nio.charset.Charset; import java.util.Iterator; import java.util.List; import java.util.Locale; @@ -71,11 +72,25 @@ public class CsvWorkbook implements Workbook { */ private List csvCellStyleList; - public CsvWorkbook(Appendable out, Locale locale, Boolean use1904windowing, Boolean useScientificFormat) { + /** + * charset. + */ + private Charset charset; + + /** + * Set the encoding prefix in the csv file, otherwise the office may open garbled characters. + * Default true. + */ + private Boolean withBom; + + public CsvWorkbook(Appendable out, Locale locale, Boolean use1904windowing, Boolean useScientificFormat, + Charset charset, Boolean withBom) { this.out = out; this.locale = locale; this.use1904windowing = use1904windowing; this.useScientificFormat = useScientificFormat; + this.charset = charset; + this.withBom = withBom; } @Override diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/read/metadata/holder/csv/CsvReadWorkbookHolder.java b/easyexcel-core/src/main/java/com/alibaba/excel/read/metadata/holder/csv/CsvReadWorkbookHolder.java index 04aeac4e..a65c2d79 100644 --- a/easyexcel-core/src/main/java/com/alibaba/excel/read/metadata/holder/csv/CsvReadWorkbookHolder.java +++ b/easyexcel-core/src/main/java/com/alibaba/excel/read/metadata/holder/csv/CsvReadWorkbookHolder.java @@ -1,9 +1,9 @@ package com.alibaba.excel.read.metadata.holder.csv; -import com.alibaba.excel.analysis.csv.BomBufferedInputStream; import com.alibaba.excel.read.metadata.ReadWorkbook; import com.alibaba.excel.read.metadata.holder.ReadWorkbookHolder; import com.alibaba.excel.support.ExcelTypeEnum; + import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.Setter; @@ -30,34 +30,5 @@ public class CsvReadWorkbookHolder extends ReadWorkbookHolder { super(readWorkbook); setExcelType(ExcelTypeEnum.CSV); this.csvFormat = CSVFormat.DEFAULT; - // CSV BOM - if (readWorkbook.getCharset() == null) { - BomBufferedInputStream bomBufferedInputStream = buildBomBufferedInputStream(); - setInputStream(bomBufferedInputStream); - setMandatoryUseInputStream(Boolean.TRUE); - try { - if (bomBufferedInputStream.hasByteOrderMark()) { - setCharset(bomBufferedInputStream.getByteOrderMark().getCharset()); - } - } catch (IOException e) { - throw new RuntimeException(e); - } - } - } - - private BomBufferedInputStream buildBomBufferedInputStream() { - BomBufferedInputStream bomBufferedInputStream; - try { - if (Boolean.TRUE.equals(getMandatoryUseInputStream())) { - bomBufferedInputStream = new BomBufferedInputStream(getInputStream()); - } else if (getFile() != null) { - bomBufferedInputStream = new BomBufferedInputStream(Files.newInputStream(getFile().toPath())); - } else { - bomBufferedInputStream = new BomBufferedInputStream(getInputStream()); - } - } catch (IOException e) { - throw new RuntimeException(e.getMessage()); - } - return bomBufferedInputStream; } } diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/util/WorkBookUtil.java b/easyexcel-core/src/main/java/com/alibaba/excel/util/WorkBookUtil.java index 8fc7c210..8ed49e6a 100644 --- a/easyexcel-core/src/main/java/com/alibaba/excel/util/WorkBookUtil.java +++ b/easyexcel-core/src/main/java/com/alibaba/excel/util/WorkBookUtil.java @@ -70,7 +70,9 @@ public class WorkBookUtil { new OutputStreamWriter(writeWorkbookHolder.getOutputStream(), writeWorkbookHolder.getCharset())), writeWorkbookHolder.getGlobalConfiguration().getLocale(), writeWorkbookHolder.getGlobalConfiguration().getUse1904windowing(), - writeWorkbookHolder.getGlobalConfiguration().getUseScientificFormat()); + writeWorkbookHolder.getGlobalConfiguration().getUseScientificFormat(), + writeWorkbookHolder.getCharset(), + writeWorkbookHolder.getWithBom()); writeWorkbookHolder.setCachedWorkbook(csvWorkbook); writeWorkbookHolder.setWorkbook(csvWorkbook); return; diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/write/builder/ExcelWriterBuilder.java b/easyexcel-core/src/main/java/com/alibaba/excel/write/builder/ExcelWriterBuilder.java index 76af6f66..821898f1 100644 --- a/easyexcel-core/src/main/java/com/alibaba/excel/write/builder/ExcelWriterBuilder.java +++ b/easyexcel-core/src/main/java/com/alibaba/excel/write/builder/ExcelWriterBuilder.java @@ -94,6 +94,15 @@ public class ExcelWriterBuilder extends AbstractExcelWriterParameterBuilder diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/write/metadata/holder/WriteWorkbookHolder.java b/easyexcel-core/src/main/java/com/alibaba/excel/write/metadata/holder/WriteWorkbookHolder.java index e64b8e38..7cc23917 100644 --- a/easyexcel-core/src/main/java/com/alibaba/excel/write/metadata/holder/WriteWorkbookHolder.java +++ b/easyexcel-core/src/main/java/com/alibaba/excel/write/metadata/holder/WriteWorkbookHolder.java @@ -84,6 +84,13 @@ public class WriteWorkbookHolder extends AbstractWriteHolder { * Only work on the CSV file */ private Charset charset; + + /** + * Set the encoding prefix in the csv file, otherwise the office may open garbled characters. + * Default true. + */ + private Boolean withBom; + /** * Template input stream *

@@ -176,6 +183,12 @@ public class WriteWorkbookHolder extends AbstractWriteHolder { this.charset = writeWorkbook.getCharset(); } + if (writeWorkbook.getWithBom() == null) { + this.withBom = Boolean.TRUE; + } else { + this.withBom = writeWorkbook.getWithBom(); + } + if (writeWorkbook.getAutoCloseStream() == null) { this.autoCloseStream = Boolean.TRUE; } else { diff --git a/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomData.java b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomData.java index 24d8ece8..55b58fcd 100644 --- a/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomData.java +++ b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomData.java @@ -1,6 +1,7 @@ package com.alibaba.easyexcel.test.core.bom; import com.alibaba.excel.annotation.ExcelProperty; + import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.Setter; @@ -12,5 +13,5 @@ public class BomData { @ExcelProperty("姓名") private String name; @ExcelProperty("年纪") - private Integer age; + private Long age; } diff --git a/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomDataTest.java b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomDataTest.java index 0c4ceacc..693708a3 100644 --- a/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomDataTest.java +++ b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/bom/BomDataTest.java @@ -1,10 +1,18 @@ package com.alibaba.easyexcel.test.core.bom; +import com.alibaba.easyexcel.test.core.simple.SimpleData; +import com.alibaba.easyexcel.test.demo.read.DemoData; import com.alibaba.easyexcel.test.util.TestFileUtil; import com.alibaba.excel.EasyExcel; import com.alibaba.excel.context.AnalysisContext; import com.alibaba.excel.metadata.data.ReadCellData; +import com.alibaba.excel.read.listener.PageReadListener; import com.alibaba.excel.read.listener.ReadListener; +import com.alibaba.excel.support.ExcelTypeEnum; +import com.alibaba.excel.util.ListUtils; +import com.alibaba.fastjson2.JSON; + +import lombok.extern.slf4j.Slf4j; import org.apache.commons.compress.utils.Lists; import org.junit.Assert; import org.junit.FixMethodOrder; @@ -12,17 +20,76 @@ import org.junit.Test; import org.junit.runners.MethodSorters; import java.io.File; +import java.io.FileOutputStream; +import java.nio.charset.Charset; +import java.util.ArrayList; import java.util.List; import java.util.Map; +/** + * bom test + * + * @author Jiaju Zhuang + */ @FixMethodOrder(MethodSorters.NAME_ASCENDING) +@Slf4j public class BomDataTest { + + @Test + public void t01ReadCsv() { + readCsv(TestFileUtil.readFile("bom" + File.separator + "no_bom.csv")); + readCsv(TestFileUtil.readFile("bom" + File.separator + "office_bom.csv")); + } + @Test - public void t01ReadAndWriteCsv() { - readCsv(TestFileUtil.readFile("bom" + File.separator + "bom_none.csv")); - readCsv(TestFileUtil.readFile("bom" + File.separator + "bom_utf8.csv")); - readCsv(TestFileUtil.readFile("bom" + File.separator + "bom_utf16be.csv")); - readCsv(TestFileUtil.readFile("bom" + File.separator + "bom_utf16le.csv")); + public void t02ReadAndWriteCsv() throws Exception { + readAndWriteCsv(TestFileUtil.createNewFile("bom" + File.separator + "bom_default.csv"), null, null); + readAndWriteCsv(TestFileUtil.createNewFile("bom" + File.separator + "bom_utf_8.csv"), "UTF-8", null); + readAndWriteCsv(TestFileUtil.createNewFile("bom" + File.separator + "bom_utf_8_lower_case.csv"), "utf-8", null); + readAndWriteCsv(TestFileUtil.createNewFile("bom" + File.separator + "bom_gbk.csv"), "GBK", null); + readAndWriteCsv(TestFileUtil.createNewFile("bom" + File.separator + "bom_gbk_lower_case.csv"), "gbk", null); + readAndWriteCsv(TestFileUtil.createNewFile("bom" + File.separator + "bom_utf_16be.csv"), "UTF-16BE", null); + readAndWriteCsv(TestFileUtil.createNewFile("bom" + File.separator + "bom_utf_8_not_with_bom.csv"), "UTF-8", + Boolean.FALSE); + } + + private void readAndWriteCsv(File file, String charsetName, Boolean withBom) throws Exception { + Charset charset = null; + if (charsetName != null) { + charset = Charset.forName(charsetName); + } + EasyExcel.write(new FileOutputStream(file), BomData.class) + .charset(charset) + .withBom(withBom) + .excelType(ExcelTypeEnum.CSV) + .sheet() + .doWrite(data()); + + EasyExcel.read(file, BomData.class, new ReadListener() { + + private final List dataList = Lists.newArrayList(); + + @Override + public void invokeHead(Map> headMap, AnalysisContext context) { + String head = headMap.get(0).getStringValue(); + Assert.assertEquals("姓名", head); + } + + @Override + public void invoke(BomData data, AnalysisContext context) { + dataList.add(data); + } + + @Override + public void doAfterAllAnalysed(AnalysisContext context) { + Assert.assertEquals(dataList.size(), 10); + BomData bomData = dataList.get(0); + Assert.assertEquals("姓名0", bomData.getName()); + Assert.assertEquals(20, (long)bomData.getAge()); + } + }) + .charset(charset) + .sheet().doRead(); } private void readCsv(File file) { @@ -46,8 +113,19 @@ public class BomDataTest { Assert.assertEquals(dataList.size(), 10); BomData bomData = dataList.get(0); Assert.assertEquals("姓名0", bomData.getName()); - Assert.assertEquals(0, (long) bomData.getAge()); + Assert.assertEquals(20L, (long)bomData.getAge()); } }).sheet().doRead(); } + + private List data() { + List list = ListUtils.newArrayList(); + for (int i = 0; i < 10; i++) { + BomData data = new BomData(); + data.setName("姓名" + i); + data.setAge(20L); + list.add(data); + } + return list; + } } diff --git a/easyexcel-test/src/test/resources/bom/bom_none.csv b/easyexcel-test/src/test/resources/bom/bom_none.csv deleted file mode 100644 index 26d73e1f..00000000 --- a/easyexcel-test/src/test/resources/bom/bom_none.csv +++ /dev/null @@ -1,11 +0,0 @@ -姓名,年纪 -姓名0,0 -姓名1,1 -姓名2,2 -姓名3,3 -姓名4,4 -姓名5,5 -姓名6,6 -姓名7,7 -姓名8,8 -姓名9,9 \ No newline at end of file diff --git a/easyexcel-test/src/test/resources/bom/bom_utf16be.csv b/easyexcel-test/src/test/resources/bom/bom_utf16be.csv deleted file mode 100644 index ad13f871..00000000 Binary files a/easyexcel-test/src/test/resources/bom/bom_utf16be.csv and /dev/null differ diff --git a/easyexcel-test/src/test/resources/bom/bom_utf16le.csv b/easyexcel-test/src/test/resources/bom/bom_utf16le.csv deleted file mode 100644 index 8e8eba90..00000000 Binary files a/easyexcel-test/src/test/resources/bom/bom_utf16le.csv and /dev/null differ diff --git a/easyexcel-test/src/test/resources/bom/bom_utf8.csv b/easyexcel-test/src/test/resources/bom/bom_utf8.csv deleted file mode 100644 index 358d5bac..00000000 --- a/easyexcel-test/src/test/resources/bom/bom_utf8.csv +++ /dev/null @@ -1,11 +0,0 @@ -姓名,年纪 -姓名0,0 -姓名1,1 -姓名2,2 -姓名3,3 -姓名4,4 -姓名5,5 -姓名6,6 -姓名7,7 -姓名8,8 -姓名9,9 \ No newline at end of file diff --git a/easyexcel-test/src/test/resources/bom/no_bom.csv b/easyexcel-test/src/test/resources/bom/no_bom.csv new file mode 100644 index 00000000..461c1eb0 --- /dev/null +++ b/easyexcel-test/src/test/resources/bom/no_bom.csv @@ -0,0 +1,11 @@ +姓名,年纪 +姓名0,20 +姓名1,20 +姓名2,20 +姓名3,20 +姓名4,20 +姓名5,20 +姓名6,20 +姓名7,20 +姓名8,20 +姓名9,20 \ No newline at end of file diff --git a/easyexcel-test/src/test/resources/bom/office_bom.csv b/easyexcel-test/src/test/resources/bom/office_bom.csv new file mode 100644 index 00000000..b6a80992 --- /dev/null +++ b/easyexcel-test/src/test/resources/bom/office_bom.csv @@ -0,0 +1,11 @@ +姓名,年纪 +姓名0,20 +姓名1,20 +姓名2,20 +姓名3,20 +姓名4,20 +姓名5,20 +姓名6,20 +姓名7,20 +姓名8,20 +姓名9,20 \ No newline at end of file diff --git a/pom.xml b/pom.xml index cceb0f94..4a73d527 100644 --- a/pom.xml +++ b/pom.xml @@ -20,7 +20,7 @@ - 3.2.1 + 3.3.0 UTF-8 1.8 true @@ -128,6 +128,11 @@ lombok 1.18.20 + + commons-io + commons-io + 2.11.0 + diff --git a/update.md b/update.md index f315c5b3..9d3b04af 100644 --- a/update.md +++ b/update.md @@ -1,3 +1,8 @@ +# 3.3.0 + +* 读csv会忽略BOM数据 [Issue #3137](https://github.com/alibaba/easyexcel/issues/3137) +* 解决csv用office打开乱码的问题,写csv默认带上BOM数据 + # 3.2.1 * 兼容`LocalDate` [Issue #2908](https://github.com/alibaba/easyexcel/issues/2908)