Browse Source

feat(nc-gui): add callback to excel parsing + refactor

pull/4135/head
Wing-Kam Wong 2 years ago
parent
commit
a2e515de5d
  1. 354
      packages/nc-gui/utils/parsers/ExcelTemplateAdapter.ts

354
packages/nc-gui/utils/parsers/ExcelTemplateAdapter.ts

@ -3,12 +3,12 @@ import TemplateGenerator from './TemplateGenerator'
import { import {
extractMultiOrSingleSelectProps, extractMultiOrSingleSelectProps,
getCheckboxValue, getCheckboxValue,
getDateFormat,
isCheckboxType, isCheckboxType,
isEmailType, isEmailType,
isMultiLineTextType, isMultiLineTextType,
isUrlType, isUrlType,
} from './parserHelpers' } from '#imports'
import { getDateFormat } from '~/utils'
const excelTypeToUidt: Record<string, UITypes> = { const excelTypeToUidt: Record<string, UITypes> = {
d: UITypes.DateTime, d: UITypes.DateTime,
@ -58,200 +58,192 @@ export default class ExcelTemplateAdapter extends TemplateGenerator {
cellDates: true, cellDates: true,
} }
// TODO(import): remove later
// if (this.name.slice(-3) === 'csv') {
// this.wb = this.xlsx.read(new TextDecoder().decode(new Uint8Array(this.excelData)), {
// type: 'string',
// ...options,
// })
// } else {
// this.wb = this.xlsx.read(new Uint8Array(this.excelData), {
// type: 'array',
// ...options,
// })
// }
this.wb = this.xlsx.read(new Uint8Array(this.excelData), { this.wb = this.xlsx.read(new Uint8Array(this.excelData), {
type: 'array', type: 'array',
...options, ...options,
}) })
} }
parse() { parse(callback: Function) {
const tableNamePrefixRef: Record<string, any> = {} const tableNamePrefixRef: Record<string, any> = {}
this.wb.SheetNames.reduce((acc: any, sheet: any) => {
for (let i = 0; i < this.wb.SheetNames.length; i++) { return acc.then(
const columnNamePrefixRef: Record<string, any> = { id: 0 } () =>
const sheet: any = this.wb.SheetNames[i] new Promise((resolve) => {
let tn: string = (sheet || 'table').replace(/[` ~!@#$%^&*()_|+\-=?;:'",.<>\{\}\[\]\\\/]/g, '_').trim() const columnNamePrefixRef: Record<string, any> = { id: 0 }
let tn: string = (sheet || 'table').replace(/[` ~!@#$%^&*()_|+\-=?;:'",.<>\{\}\[\]\\\/]/g, '_').trim()
while (tn in tableNamePrefixRef) {
tn = `${tn}${++tableNamePrefixRef[tn]}` while (tn in tableNamePrefixRef) {
} tn = `${tn}${++tableNamePrefixRef[tn]}`
tableNamePrefixRef[tn] = 0
const table = { table_name: tn, ref_table_name: tn, columns: [] as any[] }
this.data[tn] = []
const ws: any = this.wb.Sheets[sheet]
const range = this.xlsx.utils.decode_range(ws['!ref'])
const rows: any = this.xlsx.utils.sheet_to_json(ws, { header: 1, blankrows: false, defval: null })
// TODO(import): remove later
// if (this.name.slice(-3) !== 'csv') {
// // fix precision bug & timezone offset issues introduced by xlsx
// const basedate = new Date(1899, 11, 30, 0, 0, 0)
// // number of milliseconds since base date
// const dnthresh = basedate.getTime() + (new Date().getTimezoneOffset() - basedate.getTimezoneOffset()) * 60000
// // number of milliseconds in a day
// const day_ms = 24 * 60 * 60 * 1000
// // handle date1904 property
// const fixImportedDate = (date: Date) => {
// const parsed = this.xlsx.SSF.parse_date_code((date.getTime() - dnthresh) / day_ms, {
// date1904: this.wb.Workbook.WBProps.date1904,
// })
// return new Date(parsed.y, parsed.m, parsed.d, parsed.H, parsed.M, parsed.S)
// }
// // fix imported date
// rows = rows.map((r: any) =>
// r.map((v: any) => {
// return v instanceof Date ? fixImportedDate(v) : v
// }),
// )
// }
const columnNameRowExist = +rows[0].every((v: any) => v === null || typeof v === 'string')
for (let col = 0; col < rows[0].length; col++) {
let cn: string = ((columnNameRowExist && rows[0] && rows[0][col] && rows[0][col].toString().trim()) || `field_${col + 1}`)
.replace(/[` ~!@#$%^&*()_|+\-=?;:'",.<>\{\}\[\]\\\/]/g, '_')
.trim()
while (cn in columnNamePrefixRef) {
cn = `${cn}${++columnNamePrefixRef[cn]}`
}
columnNamePrefixRef[cn] = 0
const column: Record<string, any> = {
column_name: cn,
ref_column_name: cn,
meta: {},
}
const cellId = this.xlsx.utils.encode_cell({
c: range.s.c + col,
r: columnNameRowExist,
})
const cellProps = ws[cellId] || {}
column.uidt = excelTypeToUidt[cellProps.t] || UITypes.SingleLineText
if (column.uidt === UITypes.SingleLineText) {
// check for long text
if (isMultiLineTextType(rows)) {
column.uidt = UITypes.LongText
}
if (isEmailType(rows)) {
column.uidt = UITypes.Email
}
if (isUrlType(rows)) {
column.uidt = UITypes.URL
} else {
const vals = rows
.slice(columnNameRowExist ? 1 : 0)
.map((r: any) => r[col])
.filter((v: any) => v !== null && v !== undefined && v.toString().trim() !== '')
const checkboxType = isCheckboxType(vals)
if (checkboxType.length === 1) {
column.uidt = UITypes.Checkbox
} else {
// Single Select / Multi Select
Object.assign(column, extractMultiOrSingleSelectProps(vals))
} }
} tableNamePrefixRef[tn] = 0
} else if (column.uidt === UITypes.Number) {
if ( const table = { table_name: tn, ref_table_name: tn, columns: [] as any[] }
rows.slice(1, this.config.maxRowsToParse).some((v: any) => { this.data[tn] = []
return v && v[col] && parseInt(v[col]) !== +v[col] const ws: any = this.wb.Sheets[sheet]
}) const range = this.xlsx.utils.decode_range(ws['!ref'])
) { let rows: any = this.xlsx.utils.sheet_to_json(ws, { header: 1, blankrows: false, defval: null })
column.uidt = UITypes.Decimal
} // fix precision bug & timezone offset issues introduced by xlsx
if ( const basedate = new Date(1899, 11, 30, 0, 0, 0)
rows.slice(1, this.config.maxRowsToParse).every((v: any, i: any) => { // number of milliseconds since base date
const cellId = this.xlsx.utils.encode_cell({ const dnthresh = basedate.getTime() + (new Date().getTimezoneOffset() - basedate.getTimezoneOffset()) * 60000
c: range.s.c + col, // number of milliseconds in a day
r: i + columnNameRowExist, const day_ms = 24 * 60 * 60 * 1000
// handle date1904 property
const fixImportedDate = (date: Date) => {
const parsed = this.xlsx.SSF.parse_date_code((date.getTime() - dnthresh) / day_ms, {
date1904: this.wb.Workbook.WBProps.date1904,
}) })
return new Date(parsed.y, parsed.m, parsed.d, parsed.H, parsed.M, parsed.S)
}
// fix imported date
rows = rows.map((r: any) =>
r.map((v: any) => {
return v instanceof Date ? fixImportedDate(v) : v
}),
)
const columnNameRowExist = +rows[0].every((v: any) => v === null || typeof v === 'string')
for (let col = 0; col < rows[0].length; col++) {
let cn: string = (
(columnNameRowExist && rows[0] && rows[0][col] && rows[0][col].toString().trim()) ||
`field_${col + 1}`
)
.replace(/[` ~!@#$%^&*()_|+\-=?;:'",.<>\{\}\[\]\\\/]/g, '_')
.trim()
while (cn in columnNamePrefixRef) {
cn = `${cn}${++columnNamePrefixRef[cn]}`
}
columnNamePrefixRef[cn] = 0
const column: Record<string, any> = {
column_name: cn,
ref_column_name: cn,
meta: {},
}
const cellObj = ws[cellId]
return !cellObj || (cellObj.w && cellObj.w.startsWith('$'))
})
) {
column.uidt = UITypes.Currency
}
} else if (column.uidt === UITypes.DateTime) {
// TODO(import): centralise
// hold the possible date format found in the date
const dateFormat: Record<string, number> = {}
if (
rows.slice(1, this.config.maxRowsToParse).every((v: any, i: any) => {
const cellId = this.xlsx.utils.encode_cell({ const cellId = this.xlsx.utils.encode_cell({
c: range.s.c + col, c: range.s.c + col,
r: i + columnNameRowExist, r: columnNameRowExist,
}) })
const cellProps = ws[cellId] || {}
column.uidt = excelTypeToUidt[cellProps.t] || UITypes.SingleLineText
if (column.uidt === UITypes.SingleLineText) {
// check for long text
if (isMultiLineTextType(rows)) {
column.uidt = UITypes.LongText
}
if (isEmailType(rows)) {
column.uidt = UITypes.Email
}
if (isUrlType(rows)) {
column.uidt = UITypes.URL
} else {
const vals = rows
.slice(columnNameRowExist ? 1 : 0)
.map((r: any) => r[col])
.filter((v: any) => v !== null && v !== undefined && v.toString().trim() !== '')
const checkboxType = isCheckboxType(vals)
if (checkboxType.length === 1) {
column.uidt = UITypes.Checkbox
} else {
// Single Select / Multi Select
Object.assign(column, extractMultiOrSingleSelectProps(vals))
}
}
} else if (column.uidt === UITypes.Number) {
if (
rows.slice(1, this.config.maxRowsToParse).some((v: any) => {
return v && v[col] && parseInt(v[col]) !== +v[col]
})
) {
column.uidt = UITypes.Decimal
}
if (
rows.slice(1, this.config.maxRowsToParse).every((v: any, i: any) => {
const cellId = this.xlsx.utils.encode_cell({
c: range.s.c + col,
r: i + columnNameRowExist,
})
const cellObj = ws[cellId]
return !cellObj || (cellObj.w && cellObj.w.startsWith('$'))
})
) {
column.uidt = UITypes.Currency
}
} else if (column.uidt === UITypes.DateTime) {
// TODO(import): centralise
// hold the possible date format found in the date
const dateFormat: Record<string, number> = {}
if (
rows.slice(1, this.config.maxRowsToParse).every((v: any, i: any) => {
const cellId = this.xlsx.utils.encode_cell({
c: range.s.c + col,
r: i + columnNameRowExist,
})
const cellObj = ws[cellId]
const isDate = !cellObj || (cellObj.w && cellObj.w.split(' ').length === 1)
if (isDate && cellObj) {
dateFormat[getDateFormat(cellObj.w)] = (dateFormat[getDateFormat(cellObj.w)] || 0) + 1
}
return isDate
})
) {
column.uidt = UITypes.Date
// take the date format with the max occurrence
column.meta.date_format =
Object.keys(dateFormat).reduce((x, y) => (dateFormat[x] > dateFormat[y] ? x : y)) || 'YYYY/MM/DD'
}
}
table.columns.push(column)
}
const cellObj = ws[cellId] let rowIndex = 0
const isDate = !cellObj || (cellObj.w && cellObj.w.split(' ').length === 1) for (const row of rows.slice(1)) {
if (isDate && cellObj) { const rowData: Record<string, any> = {}
dateFormat[getDateFormat(cellObj.w)] = (dateFormat[getDateFormat(cellObj.w)] || 0) + 1 for (let i = 0; i < table.columns.length; i++) {
if (table.columns[i].uidt === UITypes.Checkbox) {
rowData[table.columns[i].column_name] = getCheckboxValue(row[i])
} else if (table.columns[i].uidt === UITypes.Currency) {
const cellId = this.xlsx.utils.encode_cell({
c: range.s.c + i,
r: rowIndex + columnNameRowExist,
})
const cellObj = ws[cellId]
rowData[table.columns[i].column_name] = (cellObj && cellObj.w && cellObj.w.replace(/[^\d.]+/g, '')) || row[i]
} else if (table.columns[i].uidt === UITypes.SingleSelect || table.columns[i].uidt === UITypes.MultiSelect) {
rowData[table.columns[i].column_name] = (row[i] || '').toString().trim() || null
} else if (table.columns[i].uidt === UITypes.Date) {
const cellId = this.xlsx.utils.encode_cell({
c: range.s.c + i,
r: rowIndex + columnNameRowExist,
})
const cellObj = ws[cellId]
rowData[table.columns[i].column_name] = (cellObj && cellObj.w) || row[i]
} else {
// toto: do parsing if necessary based on type
rowData[table.columns[i].column_name] = row[i]
}
} }
return isDate this.data[tn].push(rowData)
}) rowIndex++
) { }
column.uidt = UITypes.Date this.project.tables.push(table)
// take the date format with the max occurrence resolve(true)
column.meta.date_format = }),
Object.keys(dateFormat).reduce((x, y) => (dateFormat[x] > dateFormat[y] ? x : y)) || 'YYYY/MM/DD' )
} }, Promise.resolve()).then(callback)
}
table.columns.push(column)
}
let rowIndex = 0
for (const row of rows.slice(1)) {
const rowData: Record<string, any> = {}
for (let i = 0; i < table.columns.length; i++) {
if (table.columns[i].uidt === UITypes.Checkbox) {
rowData[table.columns[i].column_name] = getCheckboxValue(row[i])
} else if (table.columns[i].uidt === UITypes.Currency) {
const cellId = this.xlsx.utils.encode_cell({
c: range.s.c + i,
r: rowIndex + columnNameRowExist,
})
const cellObj = ws[cellId]
rowData[table.columns[i].column_name] = (cellObj && cellObj.w && cellObj.w.replace(/[^\d.]+/g, '')) || row[i]
} else if (table.columns[i].uidt === UITypes.SingleSelect || table.columns[i].uidt === UITypes.MultiSelect) {
rowData[table.columns[i].column_name] = (row[i] || '').toString().trim() || null
} else if (table.columns[i].uidt === UITypes.Date) {
const cellId = this.xlsx.utils.encode_cell({
c: range.s.c + i,
r: rowIndex + columnNameRowExist,
})
const cellObj = ws[cellId]
rowData[table.columns[i].column_name] = (cellObj && cellObj.w) || row[i]
} else {
// toto: do parsing if necessary based on type
rowData[table.columns[i].column_name] = row[i]
}
}
this.data[tn].push(rowData)
rowIndex++
}
this.project.tables.push(table)
}
} }
getTemplate() { getTemplate() {

Loading…
Cancel
Save