diff --git a/pom.xml b/pom.xml index 01efe344..b5ae5cbf 100644 --- a/pom.xml +++ b/pom.xml @@ -87,6 +87,13 @@ ehcache 3.8.1 + + org.projectlombok + lombok + 1.18.20 + provided + + ch.qos.logback @@ -100,12 +107,6 @@ 1.2.71 test - - org.projectlombok - lombok - 1.18.8 - test - org.springframework.boot spring-boot @@ -232,6 +233,19 @@ + + org.projectlombok + lombok-maven-plugin + 1.18.20.0 + + + generate-sources + + delombok + + + + diff --git a/src/main/java/com/alibaba/excel/cache/Ehcache.java b/src/main/java/com/alibaba/excel/cache/Ehcache.java index f5bc5ff6..a5250900 100644 --- a/src/main/java/com/alibaba/excel/cache/Ehcache.java +++ b/src/main/java/com/alibaba/excel/cache/Ehcache.java @@ -1,52 +1,49 @@ package com.alibaba.excel.cache; import java.io.File; -import java.util.HashMap; -import java.util.Map; +import java.util.ArrayList; import java.util.UUID; +import com.alibaba.excel.context.AnalysisContext; +import com.alibaba.excel.util.FileUtils; +import com.alibaba.excel.util.ListUtils; + +import lombok.extern.slf4j.Slf4j; import org.apache.commons.collections4.CollectionUtils; -import org.apache.commons.collections4.MapUtils; import org.ehcache.CacheManager; import org.ehcache.config.CacheConfiguration; import org.ehcache.config.builders.CacheConfigurationBuilder; import org.ehcache.config.builders.CacheManagerBuilder; import org.ehcache.config.builders.ResourcePoolsBuilder; import org.ehcache.config.units.MemoryUnit; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.alibaba.excel.context.AnalysisContext; -import com.alibaba.excel.util.FileUtils; /** * Default cache * * @author Jiaju Zhuang */ +@Slf4j public class Ehcache implements ReadCache { - - private static final Logger LOGGER = LoggerFactory.getLogger(Ehcache.class); - private static final int BATCH_COUNT = 1000; - private static final int DEBUG_WRITE_SIZE = 100 * 10000; - private static final int DEBUG_CACHE_MISS_SIZE = 1000; + public static final int BATCH_COUNT = 1000; /** * Key index */ - private int index = 0; - private HashMap dataMap = new HashMap<>(BATCH_COUNT * 4 / 3 + 1); + private int activeIndex = 0; + public static final int DEBUG_CACHE_MISS_SIZE = 1000; + public static final int DEBUG_WRITE_SIZE = 100 * 10000; + private ArrayList dataList = ListUtils.newArrayListWithExpectedSize(BATCH_COUNT); private static final CacheManager FILE_CACHE_MANAGER; - private static final CacheConfiguration FILE_CACHE_CONFIGURATION; + private static final CacheConfiguration FILE_CACHE_CONFIGURATION; private static final CacheManager ACTIVE_CACHE_MANAGER; - private final CacheConfiguration activeCacheConfiguration; + private final CacheConfiguration activeCacheConfiguration; /** * Bulk storage data */ - private org.ehcache.Cache fileCache; + private org.ehcache.Cache fileCache; /** * Currently active cache */ - private org.ehcache.Cache activeCache; + private org.ehcache.Cache activeCache; private String cacheAlias; /** * Count the number of cache misses @@ -55,7 +52,7 @@ public class Ehcache implements ReadCache { public Ehcache(int maxCacheActivateSize) { activeCacheConfiguration = CacheConfigurationBuilder - .newCacheConfigurationBuilder(Integer.class, HashMap.class, + .newCacheConfigurationBuilder(Integer.class, ArrayList.class, ResourcePoolsBuilder.newResourcePoolsBuilder().heap(maxCacheActivateSize, MemoryUnit.MB)) .withSizeOfMaxObjectGraph(1000 * 1000L).withSizeOfMaxObjectSize(maxCacheActivateSize, MemoryUnit.MB) .build(); @@ -67,7 +64,7 @@ public class Ehcache implements ReadCache { CacheManagerBuilder.newCacheManagerBuilder().with(CacheManagerBuilder.persistence(cacheFile)).build(true); ACTIVE_CACHE_MANAGER = CacheManagerBuilder.newCacheManagerBuilder().build(true); FILE_CACHE_CONFIGURATION = CacheConfigurationBuilder - .newCacheConfigurationBuilder(Integer.class, HashMap.class, + .newCacheConfigurationBuilder(Integer.class, ArrayList.class, ResourcePoolsBuilder.newResourcePoolsBuilder().disk(10, MemoryUnit.GB)) .withSizeOfMaxObjectGraph(1000 * 1000L).withSizeOfMaxObjectSize(10, MemoryUnit.GB).build(); } @@ -81,15 +78,16 @@ public class Ehcache implements ReadCache { @Override public void put(String value) { - dataMap.put(index, value); - if ((index + 1) % BATCH_COUNT == 0) { - fileCache.put(index / BATCH_COUNT, dataMap); - dataMap = new HashMap(BATCH_COUNT * 4 / 3 + 1); + dataList.add(value); + if (dataList.size() >= BATCH_COUNT) { + fileCache.put(activeIndex, dataList); + activeIndex++; + dataList = ListUtils.newArrayListWithExpectedSize(BATCH_COUNT); } - index++; - if (LOGGER.isDebugEnabled()) { - if (index % DEBUG_WRITE_SIZE == 0) { - LOGGER.debug("Already put :{}", index); + if (log.isDebugEnabled()) { + int alreadyPut = activeIndex * BATCH_COUNT + dataList.size(); + if (alreadyPut % DEBUG_WRITE_SIZE == 0) { + log.debug("Already put :{}", alreadyPut); } } } @@ -100,25 +98,25 @@ public class Ehcache implements ReadCache { return null; } int route = key / BATCH_COUNT; - HashMap dataMap = activeCache.get(route); - if (dataMap == null) { - dataMap = fileCache.get(route); - activeCache.put(route, dataMap); - if (LOGGER.isDebugEnabled()) { + ArrayList dataList = activeCache.get(route); + if (dataList == null) { + dataList = fileCache.get(route); + activeCache.put(route, dataList); + if (log.isDebugEnabled()) { if (cacheMiss++ % DEBUG_CACHE_MISS_SIZE == 0) { - LOGGER.debug("Cache misses count:{}", cacheMiss); + log.debug("Cache misses count:{}", cacheMiss); } } } - return dataMap.get(key); + return dataList.get(key % BATCH_COUNT); } @Override public void putFinished() { - if (MapUtils.isEmpty(dataMap)) { + if (CollectionUtils.isEmpty(dataList)) { return; } - fileCache.put(index / BATCH_COUNT, dataMap); + fileCache.put(activeIndex, dataList); } @Override diff --git a/src/main/java/com/alibaba/excel/cache/MapCache.java b/src/main/java/com/alibaba/excel/cache/MapCache.java index ae948fbc..f83a1233 100644 --- a/src/main/java/com/alibaba/excel/cache/MapCache.java +++ b/src/main/java/com/alibaba/excel/cache/MapCache.java @@ -1,26 +1,24 @@ package com.alibaba.excel.cache; -import java.util.HashMap; -import java.util.Map; +import java.util.ArrayList; +import java.util.List; import com.alibaba.excel.context.AnalysisContext; /** - * * Putting temporary data directly into a map is a little more efficient but very memory intensive * * @author Jiaju Zhuang */ public class MapCache implements ReadCache { - private Map cache = new HashMap(); - private int index = 0; + private List cache = new ArrayList<>(); @Override public void init(AnalysisContext analysisContext) {} @Override public void put(String value) { - cache.put(index++, value); + cache.add(value); } @Override diff --git a/src/main/java/com/alibaba/excel/util/IntUtils.java b/src/main/java/com/alibaba/excel/util/IntUtils.java new file mode 100644 index 00000000..0237e452 --- /dev/null +++ b/src/main/java/com/alibaba/excel/util/IntUtils.java @@ -0,0 +1,30 @@ +package com.alibaba.excel.util; + +import java.util.ArrayList; +import java.util.List; + +/** + * Int utils + * + * @author Jiaju Zhuang + **/ +public class IntUtils { + private IntUtils() {} + /** + * Returns the {@code int} nearest in value to {@code value}. + * + * @param value any {@code long} value + * @return the same value cast to {@code int} if it is in the range of the {@code int} type, + * {@link Integer#MAX_VALUE} if it is too large, or {@link Integer#MIN_VALUE} if it is too + * small + */ + public static int saturatedCast(long value) { + if (value > Integer.MAX_VALUE) { + return Integer.MAX_VALUE; + } + if (value < Integer.MIN_VALUE) { + return Integer.MIN_VALUE; + } + return (int) value; + } +} diff --git a/src/main/java/com/alibaba/excel/util/ListUtils.java b/src/main/java/com/alibaba/excel/util/ListUtils.java new file mode 100644 index 00000000..6b4d7499 --- /dev/null +++ b/src/main/java/com/alibaba/excel/util/ListUtils.java @@ -0,0 +1,63 @@ +package com.alibaba.excel.util; + +import java.util.ArrayList; +import java.util.List; + +/** + * List utils + * + * @author Jiaju Zhuang + **/ +public class ListUtils { + private ListUtils() {} + + /** + * Creates an {@code ArrayList} instance backed by an array with the specified initial size; + * simply delegates to {@link ArrayList#ArrayList(int)}. + * + *

Note for Java 7 and later: this method is now unnecessary and should be treated as + * deprecated. Instead, use {@code new }{@link ArrayList#ArrayList(int) ArrayList}{@code <>(int)} + * directly, taking advantage of the new "diamond" syntax. + * (Unlike here, there is no risk of overload ambiguity, since the {@code ArrayList} constructors + * very wisely did not accept varargs.) + * + * @param initialArraySize the exact size of the initial backing array for the returned array list + * ({@code ArrayList} documentation calls this value the "capacity") + * @return a new, empty {@code ArrayList} which is guaranteed not to resize itself unless its size + * reaches {@code initialArraySize + 1} + * @throws IllegalArgumentException if {@code initialArraySize} is negative + */ + public static ArrayList newArrayListWithCapacity(int initialArraySize) { + checkNonnegative(initialArraySize, "initialArraySize"); + return new ArrayList<>(initialArraySize); + } + + /** + * Creates an {@code ArrayList} instance to hold {@code estimatedSize} elements, plus an + * unspecified amount of padding; you almost certainly mean to call {@link + * #newArrayListWithCapacity} (see that method for further advice on usage). + * + *

Note: This method will soon be deprecated. Even in the rare case that you do want + * some amount of padding, it's best if you choose your desired amount explicitly. + * + * @param estimatedSize an estimate of the eventual {@link List#size()} of the new list + * @return a new, empty {@code ArrayList}, sized appropriately to hold the estimated number of + * elements + * @throws IllegalArgumentException if {@code estimatedSize} is negative + */ + public static ArrayList newArrayListWithExpectedSize(int estimatedSize) { + return new ArrayList<>(computeArrayListCapacity(estimatedSize)); + } + + static int computeArrayListCapacity(int arraySize) { + checkNonnegative(arraySize, "arraySize"); + return IntUtils.saturatedCast(5L + arraySize + (arraySize / 10)); + } + + static int checkNonnegative(int value, String name) { + if (value < 0) { + throw new IllegalArgumentException(name + " cannot be negative but was: " + value); + } + return value; + } +} diff --git a/update.md b/update.md index adfc42c2..2a7bfa83 100644 --- a/update.md +++ b/update.md @@ -5,6 +5,7 @@ * 升级ehcache 到 3.8.1 * 支持非驼峰的字段读写 * 修复`CellData`可能不返回行列号 [Issue #1832](https://github.com/alibaba/easyexcel/issues/1832) +* 优化读取性能 # 2.2.8 * 兼容07在特殊的excel的情况下,读取数据异常