mirror of https://github.com/alibaba/easyexcel
Jiaju Zhuang
2 years ago
22 changed files with 256 additions and 267 deletions
@ -1,90 +0,0 @@
|
||||
package com.alibaba.excel.analysis.csv; |
||||
|
||||
|
||||
import java.io.BufferedInputStream; |
||||
import java.io.IOException; |
||||
import java.io.InputStream; |
||||
import java.util.ArrayList; |
||||
import java.util.Arrays; |
||||
import java.util.List; |
||||
|
||||
/** |
||||
* This class is used to wrap a stream that includes an encoded {@link ByteOrderMark} as its first bytes. |
||||
* |
||||
* @author supalle |
||||
* @see <a href="http://unicode.org/faq/utf_bom.html#BOM">Byte Order Mark (BOM) FAQ</a> |
||||
* @see <a href="https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/ByteOrderMark.html">Apache CommonsIO BOMInputStream</a> |
||||
*/ |
||||
public class BomBufferedInputStream extends BufferedInputStream { |
||||
public final static List<ByteOrderMark> DEFAULT_BYTE_ORDER_MARKS = new ArrayList<>(); |
||||
|
||||
static { |
||||
DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_8); |
||||
DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_16BE); |
||||
DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_16LE); |
||||
DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_32BE); |
||||
DEFAULT_BYTE_ORDER_MARKS.add(ByteOrderMark.UTF_32LE); |
||||
} |
||||
|
||||
private boolean initialized; |
||||
private ByteOrderMark byteOrderMark; |
||||
private final List<ByteOrderMark> byteOrderMarks; |
||||
|
||||
public BomBufferedInputStream(InputStream in, final ByteOrderMark... byteOrderMarks) { |
||||
super(in); |
||||
this.byteOrderMarks = applyByteOrderMarks(byteOrderMarks); |
||||
} |
||||
|
||||
public BomBufferedInputStream(InputStream in, int size, final ByteOrderMark... byteOrderMarks) { |
||||
super(in, size); |
||||
this.byteOrderMarks = applyByteOrderMarks(byteOrderMarks); |
||||
} |
||||
|
||||
private static List<ByteOrderMark> applyByteOrderMarks(ByteOrderMark[] byteOrderMarks) { |
||||
return byteOrderMarks == null || byteOrderMarks.length == 0 ? DEFAULT_BYTE_ORDER_MARKS : Arrays.asList(byteOrderMarks); |
||||
} |
||||
|
||||
public boolean hasByteOrderMark() throws IOException { |
||||
return getByteOrderMark() != null; |
||||
} |
||||
|
||||
public ByteOrderMark getByteOrderMark() throws IOException { |
||||
if (initialized) { |
||||
return byteOrderMark; |
||||
} |
||||
this.byteOrderMarks.sort(ByteOrderMark::compareTo); |
||||
int maxBomLength = byteOrderMarks.get(0).length(); |
||||
mark(maxBomLength); |
||||
int[] firstBytes = new int[maxBomLength]; |
||||
for (int i = 0; i < maxBomLength; i++) { |
||||
firstBytes[i] = read(); |
||||
if (firstBytes[i] < 0) { |
||||
break; |
||||
} |
||||
} |
||||
byteOrderMark = matchByteOrderMark(this.byteOrderMarks, firstBytes); |
||||
|
||||
reset(); |
||||
if (byteOrderMark != null) { |
||||
skip(byteOrderMark.length()); |
||||
} |
||||
initialized = true; |
||||
return byteOrderMark; |
||||
} |
||||
|
||||
private ByteOrderMark matchByteOrderMark(final List<ByteOrderMark> byteOrderMarks, final int[] firstBytes) { |
||||
loop: |
||||
for (ByteOrderMark item : byteOrderMarks) { |
||||
int[] bytes = item.getBytes(); |
||||
int length = bytes.length; |
||||
for (int i = 0; i < length; i++) { |
||||
if (firstBytes[i] != bytes[i]) { |
||||
continue loop; |
||||
} |
||||
} |
||||
return item; |
||||
} |
||||
return null; |
||||
} |
||||
|
||||
} |
@ -1,108 +0,0 @@
|
||||
package com.alibaba.excel.analysis.csv; |
||||
|
||||
import java.nio.charset.Charset; |
||||
import java.nio.charset.StandardCharsets; |
||||
import java.util.Arrays; |
||||
import java.util.Objects; |
||||
import java.util.stream.Collectors; |
||||
|
||||
/** |
||||
* Byte Order Mark (BOM) |
||||
* <br/> |
||||
* User in {@link BomBufferedInputStream} |
||||
* |
||||
* @author supalle |
||||
* @see <a href="http://unicode.org/faq/utf_bom.html#BOM">Byte Order Mark (BOM) FAQ</a> |
||||
* @see <a href="https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/ByteOrderMark.html">Apache CommonsIO ByteOrderMark</a> |
||||
*/ |
||||
public class ByteOrderMark implements Comparable<ByteOrderMark> { |
||||
|
||||
/** |
||||
* UTF-8 BOM. |
||||
*/ |
||||
public static final ByteOrderMark UTF_8 = new ByteOrderMark(StandardCharsets.UTF_8, 0xEF, 0xBB, 0xBF); |
||||
|
||||
/** |
||||
* UTF-16BE BOM (Big-Endian). |
||||
*/ |
||||
public static final ByteOrderMark UTF_16BE = new ByteOrderMark(StandardCharsets.UTF_16BE, 0xFE, 0xFF); |
||||
|
||||
/** |
||||
* UTF-16LE BOM (Little-Endian). |
||||
*/ |
||||
public static final ByteOrderMark UTF_16LE = new ByteOrderMark(StandardCharsets.UTF_16LE, 0xFF, 0xFE); |
||||
|
||||
/** |
||||
* UTF-32BE BOM (Big-Endian). |
||||
* |
||||
* @since 2.2 |
||||
*/ |
||||
public static final ByteOrderMark UTF_32BE = new ByteOrderMark(Charset.forName("UTF-32BE"), 0x00, 0x00, 0xFE, 0xFF); |
||||
|
||||
/** |
||||
* UTF-32LE BOM (Little-Endian). |
||||
* |
||||
* @since 2.2 |
||||
*/ |
||||
public static final ByteOrderMark UTF_32LE = new ByteOrderMark(Charset.forName("UTF-32LE"), 0xFF, 0xFE, 0x00, 0x00); |
||||
|
||||
private final Charset charset; |
||||
private final int[] bytes; |
||||
|
||||
public ByteOrderMark(final Charset charset, final int... bytes) { |
||||
this.charset = Objects.requireNonNull(charset, "charset must be not null"); |
||||
if (bytes == null || bytes.length == 0) { |
||||
throw new IllegalArgumentException("bytes must be not empty"); |
||||
} |
||||
this.bytes = bytes; |
||||
} |
||||
|
||||
public Charset getCharset() { |
||||
return charset; |
||||
} |
||||
|
||||
public int[] getBytes() { |
||||
return bytes; |
||||
} |
||||
|
||||
public int length() { |
||||
return bytes.length; |
||||
} |
||||
|
||||
@Override |
||||
public boolean equals(Object o) { |
||||
if (this == o) { |
||||
return true; |
||||
} |
||||
if (o == null || getClass() != o.getClass()) { |
||||
return false; |
||||
} |
||||
ByteOrderMark that = (ByteOrderMark) o; |
||||
return Objects.equals(charset, that.charset) && Arrays.equals(bytes, that.bytes); |
||||
} |
||||
|
||||
@Override |
||||
public int hashCode() { |
||||
int result = Objects.hash(charset); |
||||
result = 31 * result + Arrays.hashCode(bytes); |
||||
return result; |
||||
} |
||||
|
||||
@Override |
||||
public String toString() { |
||||
return "ByteOrderMark{" + |
||||
"charset=" + charset + |
||||
", bytes=[" |
||||
+ Arrays.stream(bytes) |
||||
.mapToObj(Integer::toHexString) |
||||
.map(String::toUpperCase) |
||||
.map("0x"::concat) |
||||
.collect(Collectors.joining(",")) + |
||||
"]}"; |
||||
} |
||||
|
||||
@Override |
||||
public int compareTo(ByteOrderMark o) { |
||||
return o.length() - length(); |
||||
} |
||||
} |
@ -0,0 +1,51 @@
|
||||
package com.alibaba.excel.enums; |
||||
|
||||
import java.nio.charset.Charset; |
||||
import java.util.Map; |
||||
|
||||
import com.alibaba.excel.util.MapUtils; |
||||
|
||||
import lombok.Getter; |
||||
import org.apache.commons.io.ByteOrderMark; |
||||
|
||||
/** |
||||
* byte order mark |
||||
* |
||||
* @author Jiaju Zhuang |
||||
*/ |
||||
@Getter |
||||
public enum ByteOrderMarkEnum { |
||||
|
||||
UTF_8(ByteOrderMark.UTF_8), |
||||
UTF_16BE(ByteOrderMark.UTF_16BE), |
||||
UTF_16LE(ByteOrderMark.UTF_16LE), |
||||
UTF_32BE(ByteOrderMark.UTF_32BE), |
||||
UTF_32LE(ByteOrderMark.UTF_32LE), |
||||
|
||||
; |
||||
|
||||
final ByteOrderMark byteOrderMark; |
||||
final String stringPrefix; |
||||
|
||||
ByteOrderMarkEnum(ByteOrderMark byteOrderMark) { |
||||
this.byteOrderMark = byteOrderMark; |
||||
Charset charset = Charset.forName(byteOrderMark.getCharsetName()); |
||||
this.stringPrefix = new String(byteOrderMark.getBytes(), charset); |
||||
} |
||||
|
||||
/** |
||||
* store character aliases corresponding to `ByteOrderMark` prefix |
||||
*/ |
||||
private static final Map<String, ByteOrderMarkEnum> CHARSET_BYTE_ORDER_MARK_MAP = MapUtils.newHashMap(); |
||||
|
||||
static { |
||||
for (ByteOrderMarkEnum value : ByteOrderMarkEnum.values()) { |
||||
CHARSET_BYTE_ORDER_MARK_MAP.put(value.getByteOrderMark().getCharsetName(), value); |
||||
} |
||||
} |
||||
|
||||
public static ByteOrderMarkEnum valueOfByCharsetName(String charsetName) { |
||||
return CHARSET_BYTE_ORDER_MARK_MAP.get(charsetName); |
||||
} |
||||
|
||||
} |
|
|
|
|
|
|
Loading…
Reference in new issue