package com.alibaba.excel.analysis.v03; import com.alibaba.excel.analysis.BaseSaxAnalyser; import com.alibaba.excel.analysis.v03.handlers.*; import com.alibaba.excel.context.AnalysisContext; import com.alibaba.excel.event.EachRowAnalysisFinishEvent; import com.alibaba.excel.exception.ExcelAnalysisException; import com.alibaba.excel.metadata.Sheet; import com.alibaba.excel.util.CollectionUtils; import org.apache.poi.hssf.eventusermodel.*; import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord; import org.apache.poi.hssf.record.*; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; /** * /** * A text extractor for Excel files. *

* Returns the textual content of the file, suitable for * indexing by * something like Lucene, but not really * intended for display to the user. *

*

* To turn an excel file into * a CSV or similar, then see * the XLS2CSVmra example *

* * @see * XLS2CSVmra * * @author jipengfei */ public class XlsSaxAnalyser extends BaseSaxAnalyser implements HSSFListener { private boolean outputFormulaValues = true; private POIFSFileSystem fs; private int lastRowNumber; private int lastColumnNumber; /** * For parsing Formulas */ private EventWorkbookBuilder.SheetRecordCollectingListener workbookBuildingListener; private FormatTrackingHSSFListener formatListener; private List records; private boolean notAllEmpty = false; private List sheets = new ArrayList(); private HSSFWorkbook stubWorkbook; private List recordHandlers = new ArrayList(); public XlsSaxAnalyser(AnalysisContext context) throws IOException { this.analysisContext = context; this.records = new ArrayList(); context.setCurrentRowNum(0); this.fs = new POIFSFileSystem(analysisContext.getInputStream()); } @Override public List getSheets() { return sheets; } @Override public void execute() { MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this); formatListener = new FormatTrackingHSSFListener(listener); workbookBuildingListener = new EventWorkbookBuilder.SheetRecordCollectingListener(formatListener); if (workbookBuildingListener != null && stubWorkbook == null) { stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook(); } init(); HSSFEventFactory factory = new HSSFEventFactory(); HSSFRequest request = new HSSFRequest(); if (outputFormulaValues) { request.addListenerForAllRecords(formatListener); } else { request.addListenerForAllRecords(workbookBuildingListener); } try { factory.processWorkbookEvents(request, fs); } catch (IOException e) { throw new ExcelAnalysisException(e); } } private void init() { lastRowNumber = 0; lastColumnNumber = 0; records = new ArrayList(); notAllEmpty = false; sheets = new ArrayList(); buildXlsRecordHandlers(); } public void processRecord(Record record) { int thisRow = -1; int thisColumn = -1; String thisStr = null; for(XlsRecordHandler handler : this.recordHandlers) { if (handler.support(record)) { handler.processRecord(record); thisRow = handler.getRow(); thisColumn = handler.getColumn(); thisStr = handler.getValue(); break; } } // If we got something to print out, do so if (thisStr != null) { if (analysisContext.trim()) { thisStr = thisStr.trim(); } if (!"".equals(thisStr)) { notAllEmpty = true; } records.add(thisStr); } // Handle new row if (thisRow != -1 && thisRow != lastRowNumber) { lastColumnNumber = -1; } // Update column and row count if (thisRow > -1) { lastRowNumber = thisRow; } if (thisColumn > -1) { lastColumnNumber = thisColumn; } processLastCellOfRow(record); } private void processLastCellOfRow(Record record) { // Handle end of row if (record instanceof LastCellOfRowDummyRecord) { int row = ((LastCellOfRowDummyRecord)record).getRow(); if (lastColumnNumber == -1) { lastColumnNumber = 0; } analysisContext.setCurrentRowNum(row); if (notAllEmpty) { notify(new EachRowAnalysisFinishEvent(new ArrayList(records))); } records.clear(); lastColumnNumber = -1; notAllEmpty = false; } } private void buildXlsRecordHandlers() { if (CollectionUtils.isEmpty(recordHandlers)) { recordHandlers.add(new BlankOrErrorRecordHandler()); recordHandlers.add(new BOFRecordHandler(workbookBuildingListener, analysisContext, sheets)); recordHandlers.add(new FormulaRecordHandler(stubWorkbook, formatListener)); recordHandlers.add(new LabelRecordHandler()); recordHandlers.add(new NoteRecordHandler()); recordHandlers.add(new NumberRecordHandler(formatListener)); recordHandlers.add(new RKRecordHandler()); recordHandlers.add(new SSTRecordHandler()); recordHandlers.add(new MissingCellDummyRecordHandler()); Collections.sort(recordHandlers); } for(XlsRecordHandler x : recordHandlers) { x.init(); } } }