forked from github/easyexcel
Jiaju Zhuang
5 years ago
18 changed files with 389 additions and 207 deletions
@ -0,0 +1,22 @@
|
||||
# 10M以上文件读取说明 |
||||
03版没有办法处理,相对内存占用大很多。excel 07版本有个共享字符串[共享字符串](https://docs.microsoft.com/zh-cn/office/open-xml/working-with-the-shared-string-table)的概念,这个会非常占用内存,如果全部读取到内存的话,大概是excel文件的大小的3-10倍,所以easyexcel用存储文件的,然后再反序列化去读取的策略来节约内存。当然需要通过文件反序列化以后,效率会降低,大概降低30-50%(不一定,也看命中率,可能会超过100%) |
||||
## 如果对读取效率感觉还能接受,就用默认的,永久占用(单个excel读取整个过程)一般不会超过50M(大概率就30M),剩下临时的GC会很快回收 |
||||
## 默认大文件处理 |
||||
默认大文件处理会自动判断,共享字符串5M以下会使用内存存储,大概占用15-50M的内存,超过5M则使用文件存储,然后文件存储也要设置多内存M用来存放临时的共享字符串,默认20M。除了共享字符串占用内存外,其他占用较少,所以可以预估10M,所以默认大概30M就能读取一个超级大的文件。 |
||||
## 根据实际需求配置内存 |
||||
想自定义设置,首先要确定你大概愿意花多少内存来读取一个超级大的excel,比如希望读取excel最多占用100M内存(是读取过程中永久占用,新生代马上回收的不算),那就设置使用文件来存储共享字符串的大小判断为20M(小于20M存内存,大于存临时文件),然后设置文件存储时临时共享字符串占用内存大小90M差不多 |
||||
### 如果最大文件条数也就十几二十万,然后excel也就是十几二十M,而且不会有很高的并发,并且内存也较大 |
||||
```java |
||||
// 强制使用内存存储,这样大概一个20M的excel使用150M(很多临时对象,所以100M会一直GC)的内存 |
||||
// 这样效率会比上面的复杂的策略高很多 |
||||
EasyExcel.read().readCache(new MapCache()); |
||||
``` |
||||
### 对并发要求较高,而且都是经常有超级大文件 |
||||
```java |
||||
// 第一个参数的意思是 多少M共享字符串以后 采用文件存储 单位MB 默认5M |
||||
// 第二个参数 文件存储时,内存存放多少M缓存数据 默认20M |
||||
// 比如 你希望用100M内存(这里说的是解析过程中的永久占用,临时对象不算)来解析excel,前面算过了 大概是 20M+90M 所以设置参数为:20 和 90 |
||||
EasyExcel.read().readCacheSelector(new SimpleReadCacheSelector(5, 20)); |
||||
``` |
||||
### 关于maxCacheActivateSize 也就是前面第二个参数的详细说明 |
||||
easyexcel在使用文件存储的时候,会把共享字符串拆分成1000条一批,然后放到文件存储。然后excel来读取共享字符串大概率是按照顺序的,所以默认20M的1000条的数据放在内存,命中后直接返回,没命中去读文件。所以不能设置太小,太小了,很难命中,一直去读取文件,太大了的话会占用过多的内存。 |
@ -0,0 +1,23 @@
|
||||
package com.alibaba.excel.cache.selector; |
||||
|
||||
import org.apache.poi.openxml4j.opc.PackagePart; |
||||
|
||||
import com.alibaba.excel.cache.ReadCache; |
||||
|
||||
/** |
||||
* Choose a eternal cache |
||||
* |
||||
* @author Jiaju Zhuang |
||||
**/ |
||||
public class EternalReadCacheSelector implements ReadCacheSelector { |
||||
private ReadCache readCache; |
||||
|
||||
public EternalReadCacheSelector(ReadCache readCache) { |
||||
this.readCache = readCache; |
||||
} |
||||
|
||||
@Override |
||||
public ReadCache readCache(PackagePart sharedStringsTablePackagePart) { |
||||
return readCache; |
||||
} |
||||
} |
@ -0,0 +1,21 @@
|
||||
package com.alibaba.excel.cache.selector; |
||||
|
||||
import org.apache.poi.openxml4j.opc.PackagePart; |
||||
|
||||
import com.alibaba.excel.cache.ReadCache; |
||||
|
||||
/** |
||||
* Select the cache |
||||
* |
||||
* @author Jiaju Zhuang |
||||
**/ |
||||
public interface ReadCacheSelector { |
||||
|
||||
/** |
||||
* Select a cache |
||||
* |
||||
* @param sharedStringsTablePackagePart |
||||
* @return |
||||
*/ |
||||
ReadCache readCache(PackagePart sharedStringsTablePackagePart); |
||||
} |
@ -0,0 +1,82 @@
|
||||
package com.alibaba.excel.cache.selector; |
||||
|
||||
import java.io.IOException; |
||||
|
||||
import org.apache.poi.openxml4j.opc.PackagePart; |
||||
import org.slf4j.Logger; |
||||
import org.slf4j.LoggerFactory; |
||||
|
||||
import com.alibaba.excel.cache.Ehcache; |
||||
import com.alibaba.excel.cache.MapCache; |
||||
import com.alibaba.excel.cache.ReadCache; |
||||
|
||||
/** |
||||
* Simple cache selector |
||||
* |
||||
* @author Jiaju Zhuang |
||||
**/ |
||||
public class SimpleReadCacheSelector implements ReadCacheSelector { |
||||
private static final Logger LOGGER = LoggerFactory.getLogger(SimpleReadCacheSelector.class); |
||||
/** |
||||
* Convert bytes to megabytes |
||||
*/ |
||||
private static final long B2M = 1000 * 1000L; |
||||
/** |
||||
* If it's less than 5M, use map cache, or use ehcache.unit MB. |
||||
*/ |
||||
private static final int DEFAULT_MAX_USE_MAP_CACHE_SIZE = 5; |
||||
/** |
||||
* Maximum size of cache activation.unit MB. |
||||
*/ |
||||
private static final int DEFAULT_MAX_EHCACHE_ACTIVATE_SIZE = 20; |
||||
|
||||
/** |
||||
* Shared strings exceeding this value will use {@link Ehcache},or use {@link MapCache}.unit MB. |
||||
*/ |
||||
private long maxUseMapCacheSize; |
||||
|
||||
/** |
||||
* Maximum size of cache activation.unit MB. |
||||
*/ |
||||
private int maxCacheActivateSize; |
||||
|
||||
public SimpleReadCacheSelector() { |
||||
this(DEFAULT_MAX_USE_MAP_CACHE_SIZE, DEFAULT_MAX_EHCACHE_ACTIVATE_SIZE); |
||||
} |
||||
|
||||
public SimpleReadCacheSelector(long maxUseMapCacheSize, int maxCacheActivateSize) { |
||||
if (maxUseMapCacheSize <= 0) { |
||||
this.maxUseMapCacheSize = DEFAULT_MAX_USE_MAP_CACHE_SIZE; |
||||
} else { |
||||
this.maxUseMapCacheSize = maxUseMapCacheSize; |
||||
} |
||||
if (maxCacheActivateSize <= 0) { |
||||
this.maxCacheActivateSize = DEFAULT_MAX_EHCACHE_ACTIVATE_SIZE; |
||||
} else { |
||||
this.maxCacheActivateSize = maxCacheActivateSize; |
||||
} |
||||
} |
||||
|
||||
@Override |
||||
public ReadCache readCache(PackagePart sharedStringsTablePackagePart) { |
||||
long size = sharedStringsTablePackagePart.getSize(); |
||||
if (size < 0) { |
||||
try { |
||||
size = sharedStringsTablePackagePart.getInputStream().available(); |
||||
} catch (IOException e) { |
||||
LOGGER.warn("Unable to get file size, default used MapCache"); |
||||
return new MapCache(); |
||||
} |
||||
} |
||||
if (size < maxUseMapCacheSize * B2M) { |
||||
if (LOGGER.isDebugEnabled()) { |
||||
LOGGER.debug("Use map cache.size:{}", size); |
||||
} |
||||
return new MapCache(); |
||||
} |
||||
if (LOGGER.isDebugEnabled()) { |
||||
LOGGER.debug("Use ehcache.size:{}", size); |
||||
} |
||||
return new Ehcache(maxCacheActivateSize); |
||||
} |
||||
} |
@ -0,0 +1,62 @@
|
||||
package com.alibaba.easyexcel.test.temp.cache; |
||||
|
||||
import java.io.File; |
||||
import java.io.IOException; |
||||
import java.util.HashMap; |
||||
import java.util.Map; |
||||
import java.util.UUID; |
||||
|
||||
import org.apache.poi.xssf.streaming.SXSSFRow; |
||||
import org.apache.poi.xssf.streaming.SXSSFSheet; |
||||
import org.apache.poi.xssf.streaming.SXSSFWorkbook; |
||||
import org.apache.poi.xssf.usermodel.XSSFRow; |
||||
import org.apache.poi.xssf.usermodel.XSSFSheet; |
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook; |
||||
import org.ehcache.Cache; |
||||
import org.ehcache.PersistentCacheManager; |
||||
import org.ehcache.config.builders.CacheConfigurationBuilder; |
||||
import org.ehcache.config.builders.CacheManagerBuilder; |
||||
import org.ehcache.config.builders.ResourcePoolsBuilder; |
||||
import org.ehcache.config.units.MemoryUnit; |
||||
import org.junit.Ignore; |
||||
import org.junit.Test; |
||||
import org.slf4j.Logger; |
||||
import org.slf4j.LoggerFactory; |
||||
|
||||
import com.alibaba.easyexcel.test.temp.poi.Poi2Test; |
||||
import com.alibaba.excel.util.FileUtils; |
||||
import com.alibaba.fastjson.JSON; |
||||
|
||||
/** |
||||
* |
||||
* @author Jiaju Zhuang |
||||
**/ |
||||
@Ignore |
||||
public class CacheTest { |
||||
private static final Logger LOGGER = LoggerFactory.getLogger(Poi2Test.class); |
||||
|
||||
@Test |
||||
public void cache() throws Exception { |
||||
|
||||
File readTempFile = FileUtils.createCacheTmpFile(); |
||||
|
||||
File cacheFile = new File(readTempFile.getPath(), UUID.randomUUID().toString()); |
||||
PersistentCacheManager persistentCacheManager = |
||||
CacheManagerBuilder.newCacheManagerBuilder().with(CacheManagerBuilder.persistence(cacheFile)) |
||||
.withCache("cache", CacheConfigurationBuilder.newCacheConfigurationBuilder(Integer.class, HashMap.class, |
||||
ResourcePoolsBuilder.newResourcePoolsBuilder().disk(10, MemoryUnit.GB))) |
||||
.build(true); |
||||
Cache<Integer, HashMap> cache = persistentCacheManager.getCache("cache", Integer.class, HashMap.class); |
||||
|
||||
HashMap<Integer, String> map = new HashMap<Integer, String>(); |
||||
map.put(1, "test"); |
||||
|
||||
cache.put(1, map); |
||||
LOGGER.info("dd1:{}", JSON.toJSONString(cache.get(1))); |
||||
|
||||
cache.clear(); |
||||
|
||||
LOGGER.info("dd2:{}", JSON.toJSONString(cache.get(1))); |
||||
} |
||||
|
||||
} |
Loading…
Reference in new issue