From 0ba2778c573fdb417a85d0453316970b158f7088 Mon Sep 17 00:00:00 2001 From: Wing-Kam Wong Date: Tue, 18 Oct 2022 01:02:33 +0800 Subject: [PATCH] wip(nc-gui): csv template adapter with papaparse v5 streaming --- .../utils/parsers/CSVTemplateAdapter.ts | 205 ++++++++++++++++-- 1 file changed, 186 insertions(+), 19 deletions(-) diff --git a/packages/nc-gui/utils/parsers/CSVTemplateAdapter.ts b/packages/nc-gui/utils/parsers/CSVTemplateAdapter.ts index f68c9be56e..f85c759b3e 100644 --- a/packages/nc-gui/utils/parsers/CSVTemplateAdapter.ts +++ b/packages/nc-gui/utils/parsers/CSVTemplateAdapter.ts @@ -1,42 +1,209 @@ import { parse } from 'papaparse' import TemplateGenerator from './TemplateGenerator' +import { UploadFile } from 'ant-design-vue' +import { UITypes } from 'nocodb-sdk' +import { + extractMultiOrSingleSelectProps, + isCheckboxType, + isDecimalType, + isEmailType, + isMultiLineTextType, + isUrlType, +} from '#imports' export default class CSVTemplateAdapter extends TemplateGenerator { + config: Record fileName: string - project: object - data: object - csv: any - csvData: any - columns: object + files: UploadFile[] + detectedColumnTypes: Record> + distinctValues: Record> + headers: Record + project: { + title: string + tables: Record[] + } + columnValues: Record - constructor(name: string, data: object) { + constructor(fileName: string, files: UploadFile[], parserConfig = {}) { super() - this.fileName = name - this.csvData = data + this.config = { + maxRowsToParse: 500, + ...parserConfig, + } + this.fileName = fileName // TODO: check usage + this.files = files this.project = { title: this.fileName, tables: [], } - this.data = {} - this.csv = {} - this.columns = {} - this.csvData = {} + this.detectedColumnTypes = {} + this.distinctValues = {} + this.headers = {} + this.columnValues = {} } - async init() { - this.csv = parse(this.csvData, { header: true }) + async init() {} + + initTemplate(tableIdx: number, fileName: string, columnNames: string[]) { + const columnNameRowExist = +columnNames.every((v: any) => v === null || typeof v === 'string') + const columnNamePrefixRef: Record = { id: 0 } + + this.project.tables.push({ + table_name: fileName, + ref_table_name: fileName, + columns: [], + }) + + this.headers[tableIdx] = [] + + for (const [columnIdx, columnName] of columnNames.entries()) { + let cn: string = ((columnNameRowExist && columnName.toString().trim()) || `field_${columnIdx + 1}`) + .replace(/[` ~!@#$%^&*()_|+\-=?;:'",.<>\{\}\[\]\\\/]/g, '_') + .trim() + while (cn in columnNamePrefixRef) { + cn = `${cn}${++columnNamePrefixRef[cn]}` + } + columnNamePrefixRef[cn] = 0 + + this.detectedColumnTypes[columnIdx] = {} + this.distinctValues[columnIdx] = new Set() + this.columnValues[columnIdx] = [] + + this.project.tables[tableIdx].columns.push({ + column_name: cn, + ref_column_name: cn, + meta: {}, + uidt: UITypes.SingleLineText, + key: columnIdx, + }) + + this.headers[tableIdx].push(cn) + } } - parseData() { - this.columns = this.csv.meta.fields - this.data = this.csv.data + detectInitialUidt(v: string) { + if (!isNaN(Number(v)) && !isNaN(parseFloat(v))) return UITypes.Number + if (!isNaN(new Date(v).getDate())) return UITypes.DateTime + if (['true', 'True', 'false', 'False', '1', '0', 'T', 'F', 'Y', 'N'].includes(v)) return UITypes.Checkbox + return UITypes.SingleLineText + } + + detectColumnType(data: []) { + for (let columnIdx = 0; columnIdx < data.length; columnIdx++) { + const colData: any = [data[columnIdx]] + const colProps = { uidt: this.detectInitialUidt(data[columnIdx]) } + // TODO: centralise + if (colProps.uidt === UITypes.SingleLineText) { + if (isMultiLineTextType(colData)) { + colProps.uidt = UITypes.LongText + } + if (isEmailType(colData)) { + colProps.uidt = UITypes.Email + } + if (isUrlType(colData)) { + colProps.uidt = UITypes.URL + } else { + const checkboxType = isCheckboxType(colData) + if (checkboxType.length === 1) { + colProps.uidt = UITypes.Checkbox + } else { + if (columnIdx < this.config.maxRowsToParse) { + this.columnValues[columnIdx].push(data[columnIdx]) + } + } + } + } else if (colProps.uidt === UITypes.Number) { + if (isDecimalType(colData)) { + colProps.uidt = UITypes.Decimal + } + } else if (colProps.uidt === UITypes.DateTime) { + if (columnIdx < this.config.maxRowsToParse) { + this.columnValues[columnIdx].push(data[columnIdx]) + } + } + if (!(colProps.uidt in this.detectedColumnTypes[columnIdx])) { + this.detectedColumnTypes[columnIdx] = { + ...this.detectedColumnTypes[columnIdx], + [colProps.uidt]: 0, + } + } + this.detectedColumnTypes[columnIdx][colProps.uidt] += 1 + if (data[columnIdx]) { + this.distinctValues[columnIdx].add(data[columnIdx]) + } + } + } + + getMaxPossibleUidt(columnIdx: number) { + let mx = -1 + let key: string = UITypes.SingleLineText + Object.entries(this.detectedColumnTypes[columnIdx]).forEach(([k, v]) => { + if (v > mx) { + mx = v + key = k + } + }) + return key + } + + updateTemplate(tableIdx: number) { + for (let columnIdx = 0; columnIdx < this.headers[tableIdx].length; columnIdx++) { + if (this.columnValues[columnIdx].length > 0) { + if (this.project.tables[tableIdx].columns[columnIdx].uidt === UITypes.DateTime) { + // TODO: handle DateTime + // set meta + } + Object.assign( + this.project.tables[tableIdx].columns[columnIdx], + extractMultiOrSingleSelectProps(this.columnValues[columnIdx]), + ) + } else { + this.project.tables[tableIdx].columns[columnIdx].uidt = this.getMaxPossibleUidt(columnIdx) + } + } + } + + parse() { + const that = this + for (const [tableIdx, file] of this.files.entries()) { + let steppers = 0 + parse(file.originFileObj as File, { + worker: true, + step: function (row) { + steppers += 1 + if (row) { + if (steppers == 1) { + that.initTemplate(tableIdx, file.name!, row.data as []) + } else { + that.detectColumnType(row.data as []) + } + // TODO: remove + if (steppers <= 10) { + console.log('Row:', row.data) + } + } + }, + complete: function () { + console.log('complete') + console.log('steppers: ' + steppers) + that.updateTemplate(tableIdx) + console.log(that.project.tables) + // TODO: enable import button + // TODO: put info.file.originFileObj to list + }, + }) + } } getColumns() { - return this.columns + // return this.columns } getData() { - return this.data + // return this.data + } + + getTemplate() { + return this.project } }