mirror of https://github.com/alibaba/easyexcel
Jiaju Zhuang
2 years ago
committed by
GitHub
63 changed files with 1580 additions and 646 deletions
@ -0,0 +1,9 @@ |
|||||||
|
# 建议先去看文档 |
||||||
|
[快速开始](https://easyexcel.opensource.alibaba.com/docs/current/) 、[常见问题](https://easyexcel.opensource.alibaba.com/qa/) |
||||||
|
# 异常代码 |
||||||
|
```java |
||||||
|
这里写你的代码 |
||||||
|
``` |
||||||
|
# 异常提示 |
||||||
|
大家尽量把问题一次性描述清楚,然后贴上全部异常,这样方便把问题一次性解决掉。 |
||||||
|
# 其他描述 |
@ -0,0 +1,55 @@ |
|||||||
|
# |
||||||
|
# Copyright 2009-2021 the original author or authors. |
||||||
|
# |
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
# you may not use this file except in compliance with the License. |
||||||
|
# You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, software |
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
# See the License for the specific language governing permissions and |
||||||
|
# limitations under the License. |
||||||
|
# |
||||||
|
|
||||||
|
name: Publish package to the Maven Central Repository |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
on: |
||||||
|
release: |
||||||
|
types: [created] |
||||||
|
|
||||||
|
jobs: |
||||||
|
publish: |
||||||
|
runs-on: ubuntu-latest |
||||||
|
steps: |
||||||
|
- name: Check out Git repository |
||||||
|
uses: actions/checkout@main |
||||||
|
- name: Install Java and Maven |
||||||
|
uses: actions/setup-java@main |
||||||
|
with: |
||||||
|
java-version: 8 |
||||||
|
distribution: 'adopt' |
||||||
|
server-id: ossrh |
||||||
|
server-username: MAVEN_USERNAME |
||||||
|
server-password: MAVEN_PASSWORD |
||||||
|
- name: Cache local Maven repository |
||||||
|
uses: actions/cache@v3 |
||||||
|
with: |
||||||
|
path: ~/.m2/repository |
||||||
|
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} |
||||||
|
restore-keys: | |
||||||
|
${{ runner.os }}-m2 |
||||||
|
- id: install-secret-key |
||||||
|
name: Install GPG secret key |
||||||
|
run: | |
||||||
|
cat <(echo -e "${{ secrets.GPG_PRIVATE_KEY }}") | gpg --batch --import |
||||||
|
- name: Publish package |
||||||
|
run: | |
||||||
|
mvn --batch-mode -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} clean deploy -Dmaven.test.skip=true -Dmaven.javadoc.skip=false -Dgpg.skip=false |
||||||
|
env: |
||||||
|
MAVEN_USERNAME: ${{ secrets.OSSRH_USERNAME }} |
||||||
|
MAVEN_PASSWORD: ${{ secrets.OSSRH_TOKEN }} |
@ -0,0 +1,26 @@ |
|||||||
|
# 通过 Github action, 在仓库的每一次 commit 后自动同步到 Gitee 上 |
||||||
|
name: Mirror the Github organization repos to Gitee |
||||||
|
on: [push, pull_request] |
||||||
|
|
||||||
|
jobs: |
||||||
|
repo-sync: |
||||||
|
runs-on: ubuntu-latest |
||||||
|
steps: |
||||||
|
- uses: actions/checkout@main |
||||||
|
with: |
||||||
|
persist-credentials: false |
||||||
|
- name: Mirror the Github organization repos to Gitee. |
||||||
|
uses: Yikun/hub-mirror-action@master |
||||||
|
with: |
||||||
|
# 必选,需要同步的 Github 这里记住选择的是仓库 或者账号 而不是具体的项目 |
||||||
|
src: github/alibaba |
||||||
|
# 必选,需要同步到的 Gitee 这里记住选择的是仓库 或者账号 而不是具体的项目 |
||||||
|
dst: gitee/easyexcel |
||||||
|
# 必选,Gitee公钥对应的私钥,https://gitee.com/profile/sshkeys |
||||||
|
dst_key: ${{ secrets.GITEE_PRIVATE_KEY }} |
||||||
|
# 必选,Gitee对应的用于创建仓库的token,https://gitee.com/profile/personal_access_tokens |
||||||
|
dst_token: ${{ secrets.GITEE_TOKEN }} |
||||||
|
# 如果是组织,指定组织即可,默认为用户 user |
||||||
|
account_type: org |
||||||
|
# 需要同步的仓库里面的项目 |
||||||
|
static_list: "easyexcel" |
@ -1,53 +0,0 @@ |
|||||||
# easyexcel要去解决的问题 |
|
||||||
|
|
||||||
## Excel读写时候内存溢出 |
|
||||||
|
|
||||||
虽然POI是目前使用最多的用来做excel解析的框架,但这个框架并不那么完美。大部分使用POI都是使用他的userModel模式。userModel的好处是上手容易使用简单,随便拷贝个代码跑一下,剩下就是写业务转换了,虽然转换也要写上百行代码,相对比较好理解。然而userModel模式最大的问题是在于非常大的内存消耗,一个几兆的文件解析要用掉上百兆的内存。现在很多应用采用这种模式,之所以还正常在跑一定是并发不大,并发上来后一定会OOM或者频繁的full gc。 |
|
||||||
|
|
||||||
## 其他开源框架使用复杂 |
|
||||||
|
|
||||||
对POI有过深入了解的估计才知道原来POI还有SAX模式。但SAX模式相对比较复杂,excel有03和07两种版本,两个版本数据存储方式截然不同,sax解析方式也各不一样。想要了解清楚这两种解析方式,才去写代码测试,估计两天时间是需要的。再加上即使解析完,要转换到自己业务模型还要很多繁琐的代码。总体下来感觉至少需要三天,由于代码复杂,后续维护成本巨大。 |
|
||||||
|
|
||||||
## 其他开源框架存在一些BUG修复不及时 |
|
||||||
|
|
||||||
由于我们的系统大多数都是大并发的情况下运行的,在大并发情况下,我们会发现poi存在一些bug,如果让POI团队修复估计遥遥无期了。所以我们在easyexcel对这些bug做了规避。 |
|
||||||
如下一段报错就是在大并发情况下poi抛的一个异常。 |
|
||||||
``` |
|
||||||
Caused by: java.io.IOException: Could not create temporary directory '/home/admin/dio2o/.default/temp/poifiles' |
|
||||||
at org.apache.poi.util.DefaultTempFileCreationStrategy.createTempDirectory(DefaultTempFileCreationStrategy.java:93) ~[poi-3.15.jar:3.15] |
|
||||||
at org.apache.poi.util.DefaultTempFileCreationStrategy.createPOIFilesDirectory(DefaultTempFileCreationStrategy.java:82) ~[poi-3.15.jar:3.15] |
|
||||||
``` |
|
||||||
报错地方poi源码如下 |
|
||||||
``` |
|
||||||
private void createTempDirectory(File directory) throws IOException { |
|
||||||
if (!(directory.exists() || directory.mkdirs()) || !directory.isDirectory()) { |
|
||||||
throw new IOException("Could not create temporary directory '" + directory + "'"); |
|
||||||
} |
|
||||||
} |
|
||||||
``` |
|
||||||
仔细看代码容易明白如果在并发情况下,如果2个线程同时判断directory.exists()都 为false,但执行directory.mkdirs()如果一些线程优先执行完,另外一个线程就会返回false。最终 throw new IOException("Could not create temporary directory '" + directory + "'")。针对这个问题easyexcel在写文件时候首先创建了该临时目录,避免poi在并发创建时候引起不该有的报错。 |
|
||||||
|
|
||||||
## Excel格式分析格式分析 |
|
||||||
|
|
||||||
- xls是Microsoft Excel2007前excel的文件存储格式,实现原理是基于微软的ole db是微软com组件的一种实现,本质上也是一个微型数据库,由于微软的东西很多不开源,另外也已经被淘汰,了解它的细节意义不大,底层的编程都是基于微软的com组件去开发的。 |
|
||||||
- xlsx是Microsoft Excel2007后excel的文件存储格式,实现是基于openXml和zip技术。这种存储简单,安全传输方便,同时处理数据也变的简单。 |
|
||||||
- csv 我们可以理解为纯文本文件,可以被excel打开。他的格式非常简单,解析起来和解析文本文件一样。 |
|
||||||
|
|
||||||
## 核心原理 |
|
||||||
|
|
||||||
写有大量数据的xlsx文件时,POI为我们提供了SXSSFWorkBook类来处理,这个类的处理机制是当内存中的数据条数达到一个极限数量的时候就flush这部分数据,再依次处理余下的数据,这个在大多数场景能够满足需求。 |
|
||||||
读有大量数据的文件时,使用WorkBook处理就不行了,因为POI对文件是先将文件中的cell读入内存,生成一个树的结构(针对Excel中的每个sheet,使用TreeMap存储sheet中的行)。如果数据量比较大,则同样会产生java.lang.OutOfMemoryError: Java heap space错误。POI官方推荐使用“XSSF and SAX(event API)”方式来解决。 |
|
||||||
分析清楚POI后要解决OOM有3个关键。 |
|
||||||
|
|
||||||
### 1、文件解压文件读取通过文件形式 |
|
||||||
|
|
||||||
![屏幕快照 2018-01-22 上午8.52.08.png](http://ata2-img.cn-hangzhou.img-pub.aliyun-inc.com/e3a3500014c95f7118d8c200a51acab4.png) |
|
||||||
|
|
||||||
### 2、避免将全部全部数据一次加载到内存 |
|
||||||
|
|
||||||
采用sax模式一行一行解析,并将一行的解析结果以观察者的模式通知处理。 |
|
||||||
![基础模板1 (2).png](http://ata2-img.cn-hangzhou.img-pub.aliyun-inc.com/82bb195ac62532963b2364d2e4da23e5.png) |
|
||||||
|
|
||||||
### 3、抛弃不重要的数据 |
|
||||||
|
|
||||||
Excel解析时候会包含样式,字体,宽度等数据,但这些数据是我们不关心的,如果将这部分数据抛弃可以大大降低内存使用。Excel中数据如下Style占了相当大的空间。 |
|
@ -0,0 +1,19 @@ |
|||||||
|
package com.alibaba.excel.constant; |
||||||
|
|
||||||
|
import java.math.MathContext; |
||||||
|
import java.math.RoundingMode; |
||||||
|
|
||||||
|
/** |
||||||
|
* Used to store constant |
||||||
|
* |
||||||
|
* @author Jiaju Zhuang |
||||||
|
*/ |
||||||
|
public class EasyExcelConstants { |
||||||
|
|
||||||
|
/** |
||||||
|
* Excel by default with 15 to store Numbers, and the double in Java can use to store number 17, led to the accuracy |
||||||
|
* will be a problem. So you need to set up 15 to deal with precision |
||||||
|
*/ |
||||||
|
public static final MathContext EXCEL_MATH_CONTEXT = new MathContext(15, RoundingMode.HALF_UP); |
||||||
|
|
||||||
|
} |
@ -0,0 +1,40 @@ |
|||||||
|
package com.alibaba.excel.enums; |
||||||
|
|
||||||
|
import java.math.BigDecimal; |
||||||
|
import java.time.LocalDateTime; |
||||||
|
import java.util.HashMap; |
||||||
|
import java.util.Map; |
||||||
|
|
||||||
|
import com.alibaba.excel.metadata.data.CellData; |
||||||
|
import com.alibaba.excel.util.StringUtils; |
||||||
|
|
||||||
|
/** |
||||||
|
* Read not to {@code com.alibaba.excel.metadata.BasicParameter#clazz} value, the default will return type. |
||||||
|
* |
||||||
|
* @author Jiaju Zhuang |
||||||
|
*/ |
||||||
|
public enum ReadDefaultReturnEnum { |
||||||
|
/** |
||||||
|
* default.The content of cells into string, is the same as you see in the excel. |
||||||
|
*/ |
||||||
|
STRING, |
||||||
|
|
||||||
|
/** |
||||||
|
* Returns the actual type. |
||||||
|
* Will be automatically selected according to the cell contents what return type, will return the following class: |
||||||
|
* <ol> |
||||||
|
* <li>{@link BigDecimal}</li> |
||||||
|
* <li>{@link Boolean}</li> |
||||||
|
* <li>{@link String}</li> |
||||||
|
* <li>{@link LocalDateTime}</li> |
||||||
|
* </ol> |
||||||
|
*/ |
||||||
|
ACTUAL_DATA, |
||||||
|
|
||||||
|
/** |
||||||
|
* Return to {@link com.alibaba.excel.metadata.data.ReadCellData}, can decide which field you need. |
||||||
|
*/ |
||||||
|
READ_CELL_DATA, |
||||||
|
; |
||||||
|
|
||||||
|
} |
@ -0,0 +1,143 @@ |
|||||||
|
package com.alibaba.easyexcel.test.demo.rare; |
||||||
|
|
||||||
|
import java.io.File; |
||||||
|
import java.util.Date; |
||||||
|
import java.util.List; |
||||||
|
|
||||||
|
import com.alibaba.easyexcel.test.demo.write.DemoData; |
||||||
|
import com.alibaba.easyexcel.test.util.TestFileUtil; |
||||||
|
import com.alibaba.excel.EasyExcel; |
||||||
|
import com.alibaba.excel.ExcelWriter; |
||||||
|
import com.alibaba.excel.util.FileUtils; |
||||||
|
import com.alibaba.excel.util.ListUtils; |
||||||
|
import com.alibaba.excel.write.handler.RowWriteHandler; |
||||||
|
import com.alibaba.excel.write.handler.SheetWriteHandler; |
||||||
|
import com.alibaba.excel.write.handler.WorkbookWriteHandler; |
||||||
|
import com.alibaba.excel.write.handler.context.RowWriteHandlerContext; |
||||||
|
import com.alibaba.excel.write.handler.context.SheetWriteHandlerContext; |
||||||
|
import com.alibaba.excel.write.handler.context.WorkbookWriteHandlerContext; |
||||||
|
import com.alibaba.excel.write.metadata.WriteSheet; |
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j; |
||||||
|
import org.apache.poi.ss.usermodel.Cell; |
||||||
|
import org.apache.poi.ss.usermodel.Row; |
||||||
|
import org.apache.poi.ss.usermodel.Sheet; |
||||||
|
import org.apache.poi.ss.usermodel.Workbook; |
||||||
|
import org.apache.poi.xssf.streaming.SXSSFWorkbook; |
||||||
|
import org.junit.Ignore; |
||||||
|
import org.junit.Test; |
||||||
|
|
||||||
|
/** |
||||||
|
* 记录一些不太常见的案例 |
||||||
|
* |
||||||
|
* @author Jiaju Zhuang |
||||||
|
*/ |
||||||
|
@Ignore |
||||||
|
@Slf4j |
||||||
|
public class WriteTest { |
||||||
|
|
||||||
|
/** |
||||||
|
* 压缩临时文件 |
||||||
|
* 在导出Excel且格式为xlsx的时候会生成一个临时的xml文件,会比较大,再磁盘不太够的情况下,可以压缩。 |
||||||
|
* 当然压缩式耗费性能的 |
||||||
|
*/ |
||||||
|
@Test |
||||||
|
public void compressedTemporaryFile() { |
||||||
|
log.info("临时的xml存储在:{}", FileUtils.getPoiFilesPath()); |
||||||
|
File file = TestFileUtil.createNewFile("rare/compressedTemporaryFile" + System.currentTimeMillis() |
||||||
|
+ ".xlsx"); |
||||||
|
|
||||||
|
// 这里 需要指定写用哪个class去写
|
||||||
|
try (ExcelWriter excelWriter = EasyExcel.write(file, DemoData.class).registerWriteHandler( |
||||||
|
new WorkbookWriteHandler() { |
||||||
|
|
||||||
|
/** |
||||||
|
* 拦截Workbook创建完成事件 |
||||||
|
* @param context |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public void afterWorkbookCreate(WorkbookWriteHandlerContext context) { |
||||||
|
// 获取到Workbook对象
|
||||||
|
Workbook workbook = context.getWriteWorkbookHolder().getWorkbook(); |
||||||
|
// 只有SXSSFWorkbook模式才会生成临时文件
|
||||||
|
if (workbook instanceof SXSSFWorkbook) { |
||||||
|
SXSSFWorkbook sxssfWorkbook = (SXSSFWorkbook)workbook; |
||||||
|
// 设置临时文件压缩,当然这个会浪费cpu性能 但是临时文件会变小
|
||||||
|
sxssfWorkbook.setCompressTempFiles(true); |
||||||
|
} |
||||||
|
} |
||||||
|
}).build()) { |
||||||
|
// 这里注意 如果同一个sheet只要创建一次
|
||||||
|
WriteSheet writeSheet = EasyExcel.writerSheet("模板").build(); |
||||||
|
// 10万数据 确保有足够的空间
|
||||||
|
for (int i = 0; i < 10000; i++) { |
||||||
|
// 分页去数据库查询数据 这里可以去数据库查询每一页的数据
|
||||||
|
List<DemoData> data = data(); |
||||||
|
excelWriter.write(data, writeSheet); |
||||||
|
} |
||||||
|
log.info("写入完毕,开始准备迁移压缩文件。"); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* 在指定单元格写入数据 |
||||||
|
*/ |
||||||
|
@Test |
||||||
|
public void specifiedCellWrite() { |
||||||
|
File file = TestFileUtil.createNewFile("rare/specifiedCellWrite" + System.currentTimeMillis() |
||||||
|
+ ".xlsx"); |
||||||
|
|
||||||
|
// 需要区分是在 最后一行之前 还是之后
|
||||||
|
// 区分的原因是:excel只能一直向前,而且内存里面只存储100条,而afterRowDispose是在每一行写入完成的时候调用,所以修改一行需要拦截这个事件
|
||||||
|
// 如果是在最后一行之后,由于后面不会再有数据了,所以只要拦截afterWorkbookDispose,在整个excel快写完的时候调用,继续写入数据即可
|
||||||
|
|
||||||
|
EasyExcel.write(file, DemoData.class) |
||||||
|
// 写入的值在最后一行之前
|
||||||
|
.registerWriteHandler(new RowWriteHandler() { |
||||||
|
@Override |
||||||
|
public void afterRowDispose(RowWriteHandlerContext context) { |
||||||
|
if (context.getRow().getRowNum() == 2) { |
||||||
|
Cell cell = context.getRow().getCell(2); |
||||||
|
if (cell == null) { |
||||||
|
cell = context.getRow().createCell(2); |
||||||
|
} |
||||||
|
cell.setCellValue("测试的第二行数据呀"); |
||||||
|
} |
||||||
|
} |
||||||
|
}) |
||||||
|
// 写入的值 在最后一一行之后
|
||||||
|
.registerWriteHandler(new WorkbookWriteHandler() { |
||||||
|
@Override |
||||||
|
public void afterWorkbookDispose(WorkbookWriteHandlerContext context) { |
||||||
|
Workbook workbook = context.getWriteWorkbookHolder().getWorkbook(); |
||||||
|
Sheet sheet = workbook.getSheetAt(0); |
||||||
|
Row row = sheet.getRow(99); |
||||||
|
if (row == null) { |
||||||
|
row = sheet.createRow(99); |
||||||
|
} |
||||||
|
Cell cell = row.getCell(2); |
||||||
|
if (cell == null) { |
||||||
|
cell = row.createCell(2); |
||||||
|
} |
||||||
|
cell.setCellValue("测试地99行数据呀"); |
||||||
|
} |
||||||
|
}) |
||||||
|
.sheet("模板") |
||||||
|
.doWrite(data()); |
||||||
|
|
||||||
|
log.info("写入到文件完成:{}", file); |
||||||
|
} |
||||||
|
|
||||||
|
private List<DemoData> data() { |
||||||
|
List<DemoData> list = ListUtils.newArrayList(); |
||||||
|
for (int i = 0; i < 10; i++) { |
||||||
|
DemoData data = new DemoData(); |
||||||
|
data.setString("字符串" + i); |
||||||
|
data.setDate(new Date()); |
||||||
|
data.setDoubleData(0.56); |
||||||
|
list.add(data); |
||||||
|
} |
||||||
|
return list; |
||||||
|
} |
||||||
|
|
||||||
|
} |
@ -0,0 +1,35 @@ |
|||||||
|
package com.alibaba.easyexcel.test.temp; |
||||||
|
|
||||||
|
import java.math.BigDecimal; |
||||||
|
import java.util.Date; |
||||||
|
|
||||||
|
import com.alibaba.excel.annotation.ExcelIgnore; |
||||||
|
import com.alibaba.excel.annotation.ExcelProperty; |
||||||
|
|
||||||
|
import lombok.EqualsAndHashCode; |
||||||
|
import lombok.Getter; |
||||||
|
import lombok.Setter; |
||||||
|
|
||||||
|
/** |
||||||
|
* 基础数据类 |
||||||
|
* |
||||||
|
* @author Jiaju Zhuang |
||||||
|
**/ |
||||||
|
@Getter |
||||||
|
@Setter |
||||||
|
@EqualsAndHashCode |
||||||
|
public class DemoData2 { |
||||||
|
@ExcelProperty("字符串标题") |
||||||
|
private String string; |
||||||
|
@ExcelProperty("日期标题") |
||||||
|
private Date date; |
||||||
|
@ExcelProperty("数字标题") |
||||||
|
private Double doubleData; |
||||||
|
@ExcelProperty("数字标题2") |
||||||
|
private BigDecimal bigDecimal; |
||||||
|
/** |
||||||
|
* 忽略这个字段 |
||||||
|
*/ |
||||||
|
@ExcelIgnore |
||||||
|
private String ignore; |
||||||
|
} |
@ -0,0 +1,24 @@ |
|||||||
|
package com.alibaba.easyexcel.test.temp; |
||||||
|
|
||||||
|
import java.time.LocalDateTime; |
||||||
|
import java.util.Date; |
||||||
|
|
||||||
|
import com.alibaba.excel.annotation.ExcelIgnore; |
||||||
|
import com.alibaba.excel.annotation.ExcelProperty; |
||||||
|
|
||||||
|
import lombok.EqualsAndHashCode; |
||||||
|
import lombok.Getter; |
||||||
|
import lombok.Setter; |
||||||
|
|
||||||
|
/** |
||||||
|
* 基础数据类 |
||||||
|
* |
||||||
|
* @author Jiaju Zhuang |
||||||
|
**/ |
||||||
|
@Getter |
||||||
|
@Setter |
||||||
|
@EqualsAndHashCode |
||||||
|
public class DemoData3 { |
||||||
|
@ExcelProperty("日期时间标题") |
||||||
|
private LocalDateTime localDateTime; |
||||||
|
} |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in new issue