mirror of https://github.com/nocodb/nocodb
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
338 lines
11 KiB
338 lines
11 KiB
import { parse } from 'papaparse' |
|
import type { UploadFile } from 'ant-design-vue' |
|
import { UITypes } from 'nocodb-sdk' |
|
import { getDateFormat, validateDateWithUnknownFormat } from '../../utils/dateTimeUtils' |
|
import { |
|
extractMultiOrSingleSelectProps, |
|
getCheckboxValue, |
|
isCheckboxType, |
|
isDecimalType, |
|
isEmailType, |
|
isMultiLineTextType, |
|
isUrlType, |
|
} from './parserHelpers' |
|
|
|
export default class CSVTemplateAdapter { |
|
config: Record<string, any> |
|
source: UploadFile[] | string |
|
detectedColumnTypes: Record<number, Record<string, number>> |
|
distinctValues: Record<number, Set<string>> |
|
headers: Record<number, string[]> |
|
tables: Record<number, any> |
|
base: { |
|
tables: Record<string, any>[] |
|
} |
|
|
|
data: Record<string, any> = {} |
|
columnValues: Record<number, []> |
|
|
|
private progressCallback?: (msg: string) => void |
|
|
|
constructor(source: UploadFile[] | string, parserConfig = {}, progressCallback?: (msg: string) => void) { |
|
this.config = parserConfig |
|
this.source = source |
|
this.base = { |
|
tables: [], |
|
} |
|
this.detectedColumnTypes = {} |
|
this.distinctValues = {} |
|
this.headers = {} |
|
this.columnValues = {} |
|
this.tables = {} |
|
this.progressCallback = progressCallback |
|
} |
|
|
|
async init() {} |
|
|
|
initTemplate(tableIdx: number, tn: string, columnNames: string[]) { |
|
const columnNameRowExist = +columnNames.every((v: any) => v === null || typeof v === 'string') |
|
const columnNamePrefixRef: Record<string, any> = { id: 0 } |
|
|
|
const tableObj: Record<string, any> = { |
|
table_name: tn, |
|
ref_table_name: tn, |
|
columns: [], |
|
} |
|
|
|
this.headers[tableIdx] = [] |
|
this.tables[tableIdx] = [] |
|
|
|
for (const [columnIdx, columnName] of columnNames.entries()) { |
|
let cn: string = ((columnNameRowExist && columnName.toString().trim()) || `field_${columnIdx + 1}`) |
|
.replace(/[` ~!@#$%^&*()_|+\-=?;:'",.<>\{\}\[\]\\\/]/g, '_') |
|
.trim() |
|
while (cn in columnNamePrefixRef) { |
|
cn = `${cn}${++columnNamePrefixRef[cn]}` |
|
} |
|
columnNamePrefixRef[cn] = 0 |
|
|
|
this.detectedColumnTypes[columnIdx] = {} |
|
this.distinctValues[columnIdx] = new Set<string>() |
|
this.columnValues[columnIdx] = [] |
|
tableObj.columns.push({ |
|
column_name: cn, |
|
ref_column_name: cn, |
|
meta: {}, |
|
uidt: UITypes.SingleLineText, |
|
key: columnIdx, |
|
}) |
|
|
|
this.headers[tableIdx].push(cn) |
|
this.tables[tableIdx] = tableObj |
|
} |
|
} |
|
|
|
detectInitialUidt(v: string) { |
|
if (!isNaN(Number(v)) && !isNaN(parseFloat(v))) return UITypes.Number |
|
if (validateDateWithUnknownFormat(v)) return UITypes.DateTime |
|
if (['true', 'True', 'false', 'False', '1', '0', 'T', 'F', 'Y', 'N'].includes(v)) return UITypes.Checkbox |
|
return UITypes.SingleLineText |
|
} |
|
|
|
detectColumnType(tableIdx: number, data: []) { |
|
for (let columnIdx = 0; columnIdx < data.length; columnIdx++) { |
|
// skip null data |
|
if (!data[columnIdx]) continue |
|
const colData: any = [data[columnIdx]] |
|
const colProps = { uidt: this.detectInitialUidt(data[columnIdx]) } |
|
// TODO(import): centralise |
|
if (isMultiLineTextType(colData)) { |
|
colProps.uidt = UITypes.LongText |
|
} else if (colProps.uidt === UITypes.SingleLineText) { |
|
if (isEmailType(colData)) { |
|
colProps.uidt = UITypes.Email |
|
} |
|
if (isUrlType(colData)) { |
|
colProps.uidt = UITypes.URL |
|
} else { |
|
const checkboxType = isCheckboxType(colData) |
|
if (checkboxType.length === 1) { |
|
colProps.uidt = UITypes.Checkbox |
|
} else { |
|
if (data[columnIdx] && columnIdx < this.config.maxRowsToParse) { |
|
this.columnValues[columnIdx].push(data[columnIdx]) |
|
colProps.uidt = UITypes.SingleSelect |
|
} |
|
} |
|
} |
|
} else if (colProps.uidt === UITypes.Number) { |
|
if (isDecimalType(colData)) { |
|
colProps.uidt = UITypes.Decimal |
|
} |
|
} else if (colProps.uidt === UITypes.DateTime) { |
|
if (data[columnIdx] && columnIdx < this.config.maxRowsToParse) { |
|
this.columnValues[columnIdx].push(data[columnIdx]) |
|
} |
|
} |
|
if (!(colProps.uidt in this.detectedColumnTypes[columnIdx])) { |
|
this.detectedColumnTypes[columnIdx] = { |
|
...this.detectedColumnTypes[columnIdx], |
|
[colProps.uidt]: 0, |
|
} |
|
} |
|
this.detectedColumnTypes[columnIdx][colProps.uidt] += 1 |
|
|
|
if (data[columnIdx]) { |
|
this.distinctValues[columnIdx].add(data[columnIdx]) |
|
} |
|
} |
|
} |
|
|
|
getPossibleUidt(columnIdx: number) { |
|
const detectedColTypes = this.detectedColumnTypes[columnIdx] |
|
const len = Object.keys(detectedColTypes).length |
|
// all records are null |
|
if (len === 0) { |
|
return UITypes.SingleLineText |
|
} |
|
// handle numeric case |
|
if (len === 2 && UITypes.Number in detectedColTypes && UITypes.Decimal in detectedColTypes) { |
|
return UITypes.Decimal |
|
} |
|
// if there are multiple detected column types |
|
// then return either LongText or SingleLineText |
|
if (len > 1) { |
|
if (UITypes.LongText in detectedColTypes) { |
|
return UITypes.LongText |
|
} |
|
return UITypes.SingleLineText |
|
} |
|
// otherwise, all records have the same column type |
|
return Object.keys(detectedColTypes)[0] |
|
} |
|
|
|
updateTemplate(tableIdx: number) { |
|
for (let columnIdx = 0; columnIdx < this.headers[tableIdx].length; columnIdx++) { |
|
const uidt = this.getPossibleUidt(columnIdx) |
|
if (this.columnValues[columnIdx].length > 0) { |
|
if (uidt === UITypes.DateTime) { |
|
const dateFormat: Record<string, number> = {} |
|
if ( |
|
this.columnValues[columnIdx].slice(1, this.config.maxRowsToParse).every((v: any) => { |
|
const isDate = v.split(' ').length === 1 |
|
if (isDate) { |
|
dateFormat[getDateFormat(v)] = (dateFormat[getDateFormat(v)] || 0) + 1 |
|
} |
|
return isDate |
|
}) |
|
) { |
|
this.tables[tableIdx].columns[columnIdx].uidt = UITypes.Date |
|
// take the date format with the max occurrence |
|
const objKeys = Object.keys(dateFormat) |
|
this.tables[tableIdx].columns[columnIdx].meta.date_format = objKeys.length |
|
? objKeys.reduce((x, y) => (dateFormat[x] > dateFormat[y] ? x : y)) |
|
: 'YYYY/MM/DD' |
|
} else { |
|
// Datetime |
|
this.tables[tableIdx].columns[columnIdx].uidt = uidt |
|
} |
|
} else if (uidt === UITypes.SingleSelect || uidt === UITypes.MultiSelect) { |
|
// assume it is a SingleLineText first |
|
this.tables[tableIdx].columns[columnIdx].uidt = UITypes.SingleLineText |
|
// override with UITypes.SingleSelect or UITypes.MultiSelect if applicable |
|
Object.assign(this.tables[tableIdx].columns[columnIdx], extractMultiOrSingleSelectProps(this.columnValues[columnIdx])) |
|
} else { |
|
this.tables[tableIdx].columns[columnIdx].uidt = uidt |
|
} |
|
delete this.columnValues[columnIdx] |
|
} else { |
|
this.tables[tableIdx].columns[columnIdx].uidt = uidt |
|
} |
|
} |
|
} |
|
|
|
async _parseTableData(tableIdx: number, source: UploadFile | string, tn: string) { |
|
return new Promise((resolve, reject) => { |
|
const that = this |
|
let steppers = 0 |
|
if (that.config.shouldImportData) { |
|
that.progress(`Processing ${tn} data`) |
|
|
|
steppers = 0 |
|
const parseSource = (this.config.importFromURL ? (source as string) : (source as UploadFile).originFileObj)! |
|
|
|
parse(parseSource, { |
|
download: that.config.importFromURL, |
|
// worker: true, |
|
skipEmptyLines: 'greedy', |
|
step(row) { |
|
steppers += 1 |
|
if (row && steppers >= +that.config.firstRowAsHeaders + 1) { |
|
const rowData: Record<string, any> = {} |
|
for (let columnIdx = 0; columnIdx < that.headers[tableIdx].length; columnIdx++) { |
|
const column = that.tables[tableIdx].columns[columnIdx] |
|
const data = (row.data as [])[columnIdx] === '' ? null : (row.data as [])[columnIdx] |
|
if (column.uidt === UITypes.Checkbox) { |
|
rowData[column.column_name] = getCheckboxValue(data) |
|
} else if (column.uidt === UITypes.SingleSelect || column.uidt === UITypes.MultiSelect) { |
|
rowData[column.column_name] = (data || '').toString().trim() || null |
|
} else { |
|
// TODO(import): do parsing if necessary based on type |
|
rowData[column.column_name] = data |
|
} |
|
} |
|
that.data[tn].push(rowData) |
|
} |
|
|
|
if (steppers % 1000 === 0) { |
|
that.progress(`Processed ${steppers} rows of ${tn}`) |
|
} |
|
}, |
|
complete() { |
|
that.progress(`Processed ${tn} data`) |
|
resolve(true) |
|
}, |
|
error(e: Error) { |
|
reject(e) |
|
}, |
|
}) |
|
} else { |
|
resolve(true) |
|
} |
|
}) |
|
} |
|
|
|
async _parseTableMeta(tableIdx: number, source: UploadFile | string) { |
|
return new Promise((resolve, reject) => { |
|
const that = this |
|
let steppers = 0 |
|
const tn = ((this.config.importFromURL ? (source as string).split('/').pop() : (source as UploadFile).name) as string) |
|
.replace(/[` ~!@#$%^&*()_|+\-=?;:'",.<>\{\}\[\]\\\/]/g, '_') |
|
.trim()! |
|
this.data[tn] = [] |
|
const parseSource = (this.config.importFromURL ? (source as string) : (source as UploadFile).originFileObj)! |
|
parse(parseSource, { |
|
download: that.config.importFromURL, |
|
skipEmptyLines: 'greedy', |
|
step(row) { |
|
steppers += 1 |
|
if (row) { |
|
if (steppers === 1) { |
|
if (that.config.firstRowAsHeaders) { |
|
// row.data is header |
|
that.initTemplate(tableIdx, tn, row.data as []) |
|
} else { |
|
// use dummy column names as header |
|
that.initTemplate( |
|
tableIdx, |
|
tn, |
|
[...Array((row.data as []).length)].map((_, i) => `field_${i + 1}`), |
|
) |
|
if (that.config.autoSelectFieldTypes) { |
|
// row.data is data |
|
that.detectColumnType(tableIdx, row.data as []) |
|
} |
|
} |
|
} else { |
|
if (that.config.autoSelectFieldTypes) { |
|
// row.data is data |
|
that.detectColumnType(tableIdx, row.data as []) |
|
} |
|
} |
|
} |
|
}, |
|
async complete() { |
|
that.updateTemplate(tableIdx) |
|
that.base.tables.push(that.tables[tableIdx]) |
|
that.progress(`Processed ${tn} metadata`) |
|
await that._parseTableData(tableIdx, source, tn) |
|
resolve(true) |
|
}, |
|
error(e: Error) { |
|
reject(e) |
|
}, |
|
}) |
|
}) |
|
} |
|
|
|
async parse() { |
|
if (this.config.importFromURL) { |
|
await this._parseTableMeta(0, this.source as string) |
|
} else { |
|
await Promise.all( |
|
(this.source as UploadFile[]).map((file: UploadFile, tableIdx: number) => |
|
(async (f, idx) => { |
|
this.progress(`Parsing ${f.name}`) |
|
await this._parseTableMeta(idx, f) |
|
})(file, tableIdx), |
|
), |
|
) |
|
} |
|
} |
|
|
|
getColumns() { |
|
return this.base.tables.map((t: Record<string, any>) => t.columns) |
|
} |
|
|
|
getData() { |
|
return this.data |
|
} |
|
|
|
getTemplate() { |
|
return this.base |
|
} |
|
|
|
progress(msg: string) { |
|
this.progressCallback?.(msg) |
|
} |
|
}
|
|
|