package com.alibaba.excel.analysis.v03; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder; import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener; import org.apache.poi.hssf.eventusermodel.HSSFEventFactory; import org.apache.poi.hssf.eventusermodel.HSSFListener; import org.apache.poi.hssf.eventusermodel.HSSFRequest; import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener; import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord; import org.apache.poi.hssf.record.BOFRecord; import org.apache.poi.hssf.record.BoundSheetRecord; import org.apache.poi.hssf.record.Record; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.alibaba.excel.analysis.ExcelReadExecutor; import com.alibaba.excel.analysis.v03.handlers.BlankOrErrorRecordHandler; import com.alibaba.excel.analysis.v03.handlers.BofRecordHandler; import com.alibaba.excel.analysis.v03.handlers.FormulaRecordHandler; import com.alibaba.excel.analysis.v03.handlers.IndexRecordHandler; import com.alibaba.excel.analysis.v03.handlers.LabelRecordHandler; import com.alibaba.excel.analysis.v03.handlers.MissingCellDummyRecordHandler; import com.alibaba.excel.analysis.v03.handlers.NoteRecordHandler; import com.alibaba.excel.analysis.v03.handlers.NumberRecordHandler; import com.alibaba.excel.analysis.v03.handlers.RkRecordHandler; import com.alibaba.excel.analysis.v03.handlers.SstRecordHandler; import com.alibaba.excel.analysis.v03.handlers.TextObjectRecordHandler; import com.alibaba.excel.context.XlsReadContext; import com.alibaba.excel.enums.CellDataTypeEnum; import com.alibaba.excel.exception.ExcelAnalysisException; import com.alibaba.excel.metadata.CellData; import com.alibaba.excel.read.listener.event.EachRowAnalysisFinishEvent; import com.alibaba.excel.read.metadata.ReadSheet; import com.alibaba.excel.read.metadata.holder.ReadRowHolder; import com.alibaba.excel.util.CollectionUtils; /** * /** * A text extractor for Excel files. * *

* * Returns the textual content of the file, suitable for * indexing by something like Lucene, but not really * * intended for display to the user. * *

* * *

* * To turn an excel file into a CSV or similar, then see * the XLS2CSVmra example * *

* * * @see XLS2CSVmra * * @author jipengfei */ public class XlsSaxAnalyser implements HSSFListener, ExcelReadExecutor { private static final Logger LOGGER = LoggerFactory.getLogger(XlsSaxAnalyser.class); private POIFSFileSystem poifsFileSystem; private Boolean readAll; private List readSheetList; private int lastRowNumber; private int lastColumnNumber; private int ii = 0; /** * For parsing Formulas */ private EventWorkbookBuilder.SheetRecordCollectingListener workbookBuildingListener; private FormatTrackingHSSFListener formatListener; private Map records; private List sheets; private HSSFWorkbook stubWorkbook; private List recordHandlers = new ArrayList(); private XlsReadContext analysisContext; public XlsSaxAnalyser(XlsReadContext context, POIFSFileSystem poifsFileSystem) { this.analysisContext = context; this.records = new LinkedHashMap(); this.poifsFileSystem = poifsFileSystem; analysisContext.readWorkbookHolder().setPoifsFileSystem(poifsFileSystem); } @Override public List sheetList() { if (sheets == null) { LOGGER.warn("Getting the 'sheetList' before reading will cause the file to be read twice."); XlsListSheetListener xlsListSheetListener = new XlsListSheetListener(analysisContext, poifsFileSystem); sheets = xlsListSheetListener.getSheetList(); } return sheets; } @Override public void execute(List readSheetList, Boolean readAll) { this.readAll = readAll; this.readSheetList = readSheetList; MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this); formatListener = new FormatTrackingHSSFListener(listener); workbookBuildingListener = new EventWorkbookBuilder.SheetRecordCollectingListener(formatListener); if (workbookBuildingListener != null && stubWorkbook == null) { stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook(); } init(); HSSFEventFactory factory = new HSSFEventFactory(); HSSFRequest request = new HSSFRequest(); request.addListenerForAllRecords(formatListener); try { factory.processWorkbookEvents(request, poifsFileSystem); } catch (IOException e) { throw new ExcelAnalysisException(e); } // Sometimes tables lack the end record of the last column if (!records.isEmpty()) { endRow(); } } private void init() { lastRowNumber = 0; lastColumnNumber = 0; records = new LinkedHashMap(); buildXlsRecordHandlers(); } @Override public void processRecord(Record record) { // Not data from the current sheet if (ignoreRecord(record)) { return; } int thisRow = -1; int thisColumn = -1; CellData cellData = null; for (XlsRecordHandler handler : this.recordHandlers) { if (handler.support(record)) { handler.processRecord(record); thisRow = handler.getRow(); thisColumn = handler.getColumn(); cellData = handler.getCellData(); if (cellData != null) { cellData.checkEmpty(); if (CellDataTypeEnum.EMPTY != cellData.getType()) { records.put(thisColumn, cellData); } } break; } } // If we got something to print out, do so if (cellData != null && analysisContext.currentReadHolder().globalConfiguration().getAutoTrim() && CellDataTypeEnum.STRING == cellData.getType()) { cellData.setStringValue(cellData.getStringValue().trim()); } // Handle new row if (thisRow != -1 && thisRow != lastRowNumber) { lastColumnNumber = -1; } // Update column and row count if (thisRow > -1) { lastRowNumber = thisRow; } if (thisColumn > -1) { lastColumnNumber = thisColumn; } processLastCellOfRow(record); } private boolean ignoreRecord(Record record) { return analysisContext.readWorkbookHolder().getIgnoreRecord03() && record.getSid() != BoundSheetRecord.sid && record.getSid() != BOFRecord.sid; } private void processLastCellOfRow(Record record) { // Handle end of row if (record instanceof LastCellOfRowDummyRecord) { endRow(); } } private void endRow() { if (lastColumnNumber == -1) { lastColumnNumber = 0; } analysisContext.readRowHolder( new ReadRowHolder(lastRowNumber, analysisContext.readSheetHolder().getGlobalConfiguration())); analysisContext.readSheetHolder().notifyEndOneRow(new EachRowAnalysisFinishEvent(records), analysisContext); this.records = new LinkedHashMap(); lastColumnNumber = -1; } private void buildXlsRecordHandlers() { if (CollectionUtils.isEmpty(recordHandlers)) { recordHandlers.add(new BlankOrErrorRecordHandler(analysisContext)); // The table has been counted and there are no duplicate statistics if (sheets == null) { sheets = new ArrayList(); recordHandlers.add(new BofRecordHandler(analysisContext, sheets, false, true)); } else { recordHandlers.add(new BofRecordHandler(analysisContext, sheets, true, true)); } recordHandlers.add(new FormulaRecordHandler(analysisContext, stubWorkbook, formatListener)); recordHandlers.add(new LabelRecordHandler(analysisContext)); recordHandlers.add(new NoteRecordHandler(analysisContext)); recordHandlers.add(new NumberRecordHandler(analysisContext, formatListener)); recordHandlers.add(new RkRecordHandler(analysisContext)); recordHandlers.add(new SstRecordHandler(analysisContext)); recordHandlers.add(new MissingCellDummyRecordHandler(analysisContext)); recordHandlers.add(new IndexRecordHandler(analysisContext)); recordHandlers.add(new TextObjectRecordHandler(analysisContext)); Collections.sort(recordHandlers); } for (XlsRecordHandler x : recordHandlers) { x.init(); if (x instanceof BofRecordHandler) { BofRecordHandler bofRecordHandler = (BofRecordHandler)x; bofRecordHandler.init(readSheetList, readAll); } } } }