mirror of https://github.com/alibaba/easyexcel
supalle
2 years ago
9 changed files with 327 additions and 1 deletions
@ -0,0 +1,91 @@
|
||||
package com.alibaba.excel.analysis.csv; |
||||
|
||||
|
||||
import java.io.BufferedInputStream; |
||||
import java.io.IOException; |
||||
import java.io.InputStream; |
||||
import java.util.ArrayList; |
||||
import java.util.Arrays; |
||||
import java.util.List; |
||||
|
||||
/** |
||||
* This class is used to wrap a stream that includes an encoded {@link ByteOrderMark} as its first bytes. |
||||
* |
||||
* @author supalle |
||||
* @see <a href="http://unicode.org/faq/utf_bom.html#BOM">Byte Order Mark (BOM) FAQ</a> |
||||
* @see <a href="https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/ByteOrderMark.html">Apache CommonsIO BOMInputStream</a> |
||||
*/ |
||||
public class BomBufferedInputStream extends BufferedInputStream { |
||||
public final static List<ByteOrderMark> DEFAULT_BYTE_ORDER_MARKS = new ArrayList<>(); |
||||
|
||||
static { |
||||
DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_8); |
||||
DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_16BE); |
||||
DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_16LE); |
||||
DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_32BE); |
||||
DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_32LE); |
||||
} |
||||
|
||||
private boolean initialized; |
||||
private ByteOrderMark byteOrderMark; |
||||
private final List<ByteOrderMark> byteOrderMarks; |
||||
|
||||
public BomBufferedInputStream(InputStream in, final ByteOrderMark... byteOrderMarks) { |
||||
super(in); |
||||
this.byteOrderMarks = applyByteOrderMarks(byteOrderMarks); |
||||
} |
||||
|
||||
public BomBufferedInputStream(InputStream in, int size, final ByteOrderMark... byteOrderMarks) { |
||||
super(in, size); |
||||
this.byteOrderMarks = applyByteOrderMarks(byteOrderMarks); |
||||
} |
||||
|
||||
private static List<ByteOrderMark> applyByteOrderMarks(ByteOrderMark[] byteOrderMarks) { |
||||
return byteOrderMarks == null || byteOrderMarks.length == 0 ? DEFAULT_BYTE_ORDER_MARKS : Arrays.asList(byteOrderMarks); |
||||
} |
||||
|
||||
public boolean hasByteOrderMark() throws IOException { |
||||
return getByteOrderMark() != null; |
||||
} |
||||
|
||||
public ByteOrderMark getByteOrderMark() throws IOException { |
||||
if (initialized) { |
||||
return byteOrderMark; |
||||
} |
||||
this.byteOrderMarks.sort(ByteOrderMark::compareTo); |
||||
int maxBomLength = byteOrderMarks.get(0).length(); |
||||
mark(maxBomLength); |
||||
int[] firstBytes = new int[maxBomLength]; |
||||
for (int i = 0; i < maxBomLength; i++) { |
||||
firstBytes[i] = read(); |
||||
if (firstBytes[i] < 0) { |
||||
break; |
||||
} |
||||
} |
||||
byteOrderMark = matchByteOrderMark(this.byteOrderMarks, firstBytes); |
||||
|
||||
reset(); |
||||
if (byteOrderMark != null) { |
||||
// read(new byte[byteOrderMark.length()]);
|
||||
skip(byteOrderMark.length()); |
||||
} |
||||
initialized = true; |
||||
return byteOrderMark; |
||||
} |
||||
|
||||
private ByteOrderMark matchByteOrderMark(final List<ByteOrderMark> byteOrderMarks, final int[] firstBytes) { |
||||
loop: |
||||
for (ByteOrderMark item : byteOrderMarks) { |
||||
int[] bytes = item.getBytes(); |
||||
int length = bytes.length; |
||||
for (int i = 0; i < length; i++) { |
||||
if (firstBytes[i] != bytes[i]) { |
||||
continue loop; |
||||
} |
||||
} |
||||
return item; |
||||
} |
||||
return null; |
||||
} |
||||
|
||||
} |
@ -0,0 +1,112 @@
|
||||
package com.alibaba.excel.analysis.csv; |
||||
|
||||
import java.nio.charset.Charset; |
||||
import java.nio.charset.StandardCharsets; |
||||
import java.util.Arrays; |
||||
import java.util.Objects; |
||||
import java.util.stream.Collectors; |
||||
|
||||
/** |
||||
* Byte Order Mark (BOM) |
||||
* <br/> |
||||
* User in {@link BomBufferedInputStream} |
||||
* |
||||
* @author supalle |
||||
* @see <a href="http://unicode.org/faq/utf_bom.html#BOM">Byte Order Mark (BOM) FAQ</a> |
||||
* @see <a href="https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/ByteOrderMark.html">Apache CommonsIO ByteOrderMark</a> |
||||
*/ |
||||
public class ByteOrderMark implements Comparable<ByteOrderMark> { |
||||
|
||||
/** |
||||
* UTF-8 BOM. |
||||
*/ |
||||
public static final ByteOrderMark UTF_8 = new ByteOrderMark(StandardCharsets.UTF_8, 0xEF, 0xBB, 0xBF); |
||||
|
||||
/** |
||||
* UTF-16BE BOM (Big-Endian). |
||||
*/ |
||||
public static final ByteOrderMark UTF_16BE = new ByteOrderMark(StandardCharsets.UTF_16BE, 0xFE, 0xFF); |
||||
|
||||
/** |
||||
* UTF-16LE BOM (Little-Endian). |
||||
*/ |
||||
public static final ByteOrderMark UTF_16LE = new ByteOrderMark(StandardCharsets.UTF_16LE, 0xFF, 0xFE); |
||||
|
||||
/** |
||||
* UTF-32BE BOM (Big-Endian). |
||||
* |
||||
* @since 2.2 |
||||
*/ |
||||
public static final ByteOrderMark UTF_32BE = new ByteOrderMark(Charset.forName("UTF-32BE"), 0x00, 0x00, 0xFE, 0xFF); |
||||
|
||||
/** |
||||
* UTF-32LE BOM (Little-Endian). |
||||
* |
||||
* @since 2.2 |
||||
*/ |
||||
public static final ByteOrderMark UTF_32LE = new ByteOrderMark(Charset.forName("UTF-32LE"), 0xFF, 0xFE, 0x00, 0x00); |
||||
|
||||
/** |
||||
* Unicode BOM character; external form depends on the encoding. |
||||
* |
||||
* @see <a href="http://unicode.org/faq/utf_bom.html#BOM">Byte Order Mark (BOM) FAQ</a> |
||||
* @since 2.5 |
||||
*/ |
||||
public static final char UTF_BOM = '\uFEFF'; |
||||
|
||||
private final Charset charset; |
||||
private final int[] bytes; |
||||
|
||||
public ByteOrderMark(final Charset charset, final int... bytes) { |
||||
this.charset = Objects.requireNonNull(charset, "charset must be not null"); |
||||
if (bytes == null || bytes.length == 0) { |
||||
throw new IllegalArgumentException("bytes must be not empty"); |
||||
} |
||||
this.bytes = bytes; |
||||
} |
||||
|
||||
public Charset getCharset() { |
||||
return charset; |
||||
} |
||||
|
||||
public int[] getBytes() { |
||||
return bytes; |
||||
} |
||||
|
||||
public int length() { |
||||
return bytes.length; |
||||
} |
||||
|
||||
@Override |
||||
public boolean equals(Object o) { |
||||
if (this == o) return true; |
||||
if (o == null || getClass() != o.getClass()) return false; |
||||
ByteOrderMark that = (ByteOrderMark) o; |
||||
return Objects.equals(charset, that.charset) && Arrays.equals(bytes, that.bytes); |
||||
} |
||||
|
||||
@Override |
||||
public int hashCode() { |
||||
int result = Objects.hash(charset); |
||||
result = 31 * result + Arrays.hashCode(bytes); |
||||
return result; |
||||
} |
||||
|
||||
@Override |
||||
public String toString() { |
||||
return "ByteOrderMark{" + |
||||
"charset=" + charset + |
||||
", bytes=[" |
||||
+ Arrays.stream(bytes) |
||||
.mapToObj(Integer::toHexString) |
||||
.map(String::toUpperCase) |
||||
.map("0x"::concat) |
||||
.collect(Collectors.joining(",")) + |
||||
"]}"; |
||||
} |
||||
|
||||
@Override |
||||
public int compareTo(ByteOrderMark o) { |
||||
return o.length() - length(); |
||||
} |
||||
} |
@ -0,0 +1,16 @@
|
||||
package com.alibaba.easyexcel.test.core.bom; |
||||
|
||||
import com.alibaba.excel.annotation.ExcelProperty; |
||||
import lombok.EqualsAndHashCode; |
||||
import lombok.Getter; |
||||
import lombok.Setter; |
||||
|
||||
@Getter |
||||
@Setter |
||||
@EqualsAndHashCode |
||||
public class BomData { |
||||
@ExcelProperty("姓名") |
||||
private String name; |
||||
@ExcelProperty("年纪") |
||||
private Integer age; |
||||
} |
@ -0,0 +1,53 @@
|
||||
package com.alibaba.easyexcel.test.core.bom; |
||||
|
||||
import com.alibaba.easyexcel.test.util.TestFileUtil; |
||||
import com.alibaba.excel.EasyExcel; |
||||
import com.alibaba.excel.context.AnalysisContext; |
||||
import com.alibaba.excel.metadata.data.ReadCellData; |
||||
import com.alibaba.excel.read.listener.ReadListener; |
||||
import org.apache.commons.compress.utils.Lists; |
||||
import org.junit.Assert; |
||||
import org.junit.FixMethodOrder; |
||||
import org.junit.Test; |
||||
import org.junit.runners.MethodSorters; |
||||
|
||||
import java.io.File; |
||||
import java.util.List; |
||||
import java.util.Map; |
||||
|
||||
@FixMethodOrder(MethodSorters.NAME_ASCENDING) |
||||
public class BomDataTest { |
||||
@Test |
||||
public void t01ReadAndWriteCsv() { |
||||
readCsv(TestFileUtil.readFile("bom" + File.separator + "bom_none.csv")); |
||||
readCsv(TestFileUtil.readFile("bom" + File.separator + "bom_utf8.csv")); |
||||
readCsv(TestFileUtil.readFile("bom" + File.separator + "bom_utf16be.csv")); |
||||
readCsv(TestFileUtil.readFile("bom" + File.separator + "bom_utf16le.csv")); |
||||
} |
||||
|
||||
private void readCsv(File file) { |
||||
EasyExcel.read(file, BomData.class, new ReadListener<BomData>() { |
||||
|
||||
private final List<BomData> dataList = Lists.newArrayList(); |
||||
|
||||
@Override |
||||
public void invokeHead(Map<Integer, ReadCellData<?>> headMap, AnalysisContext context) { |
||||
String head = headMap.get(0).getStringValue(); |
||||
Assert.assertEquals("姓名", head); |
||||
} |
||||
|
||||
@Override |
||||
public void invoke(BomData data, AnalysisContext context) { |
||||
dataList.add(data); |
||||
} |
||||
|
||||
@Override |
||||
public void doAfterAllAnalysed(AnalysisContext context) { |
||||
Assert.assertEquals(dataList.size(), 10); |
||||
BomData bomData = dataList.get(0); |
||||
Assert.assertEquals("姓名0", bomData.getName()); |
||||
Assert.assertEquals(0, (long) bomData.getAge()); |
||||
} |
||||
}).sheet().doRead(); |
||||
} |
||||
} |
|
|
|
Loading…
Reference in new issue