@ -2,6 +2,7 @@
import { isLinksOrLTAR , RelationTypes } from 'nocodb-sdk' ;
import { isLinksOrLTAR , RelationTypes } from 'nocodb-sdk' ;
import sizeof from 'object-sizeof' ;
import sizeof from 'object-sizeof' ;
import { Logger } from '@nestjs/common' ;
import { Logger } from '@nestjs/common' ;
import PQueue from 'p-queue' ;
import type { BulkDataAliasService } from '~/services/bulk-data-alias.service' ;
import type { BulkDataAliasService } from '~/services/bulk-data-alias.service' ;
import type { TablesService } from '~/services/tables.service' ;
import type { TablesService } from '~/services/tables.service' ;
import type { AirtableBase } from 'airtable/lib/airtable_base' ;
import type { AirtableBase } from 'airtable/lib/airtable_base' ;
@ -10,11 +11,12 @@ import type { Source } from '~/models';
const logger = new Logger ( 'BaseModelSqlv2' ) ;
const logger = new Logger ( 'BaseModelSqlv2' ) ;
const BULK_DATA_BATCH_COUNT = 20 ; // check size for every 10 0 records
const BULK_DATA_BATCH_COUNT = 20 ; // check size for every 2 0 records
const BULK_DATA_BATCH_SIZE = 5 0 * 1024 ; // in bytes
const BULK_DATA_BATCH_SIZE = 2 0 * 1024 ; // in bytes
const BULK_LINK_BATCH_COUNT = 1000 ; // process 10 00 links at a time
const BULK_LINK_BATCH_COUNT = 200 ; // process 2 00 links at a time
const BULK_PARALLEL_PROCESS = 2 ;
const BULK_PARALLEL_PROCESS = 5 ;
const STREAM_BUFFER_LIMIT = 200 ;
const STREAM_BUFFER_LIMIT = 200 ;
const QUEUE_BUFFER_LIMIT = 50 ;
interface AirtableImportContext {
interface AirtableImportContext {
bulkDataService : BulkDataAliasService ;
bulkDataService : BulkDataAliasService ;
@ -31,6 +33,8 @@ async function readAllData({
table : { title? : string } ;
table : { title? : string } ;
fields ? ;
fields ? ;
atBase : AirtableBase ;
atBase : AirtableBase ;
dataStream : Readable ;
counter ? : { streamingCounter : number } ;
logBasic ? : ( string ) = > void ;
logBasic ? : ( string ) = > void ;
logDetailed ? : ( string ) = > void ;
logDetailed ? : ( string ) = > void ;
logWarning ? : ( string ) = > void ;
logWarning ? : ( string ) = > void ;
@ -127,18 +131,25 @@ export async function importData({
services : AirtableImportContext ;
services : AirtableImportContext ;
} ) : Promise < EntityMap > {
} ) : Promise < EntityMap > {
try {
try {
// returns EntityMap which allows us to stream data
const counter = {
const records : EntityMap = await readAllData ( {
streamingCounter : 0 ,
} ;
const dataStream = new Readable ( {
read() { } ,
} ) ;
dataStream . pause ( ) ;
readAllData ( {
table ,
table ,
atBase ,
atBase ,
logDetailed ,
logDetailed ,
logBasic ,
logBasic ,
} ) ;
} ) ;
await new Promise ( async ( resolve ) = > {
return new Promise ( async ( resolve ) = > {
const readable = records . getStream ( ) ;
const queue = new PQueue ( { concurrency : BULK_PARALLEL_PROCESS } ) ;
const allRecordsCount = await records . getCount ( ) ;
const promises = [ ] ;
const ltarPromise = importLTARData ( {
const ltarPromise = importLTARData ( {
table ,
table ,
@ -150,6 +161,7 @@ export async function importData({
syncDB ,
syncDB ,
source ,
source ,
services ,
services ,
queue ,
logBasic ,
logBasic ,
logDetailed ,
logDetailed ,
logWarning ,
logWarning ,
@ -163,18 +175,15 @@ export async function importData({
let importedCount = 0 ;
let importedCount = 0 ;
let tempCount = 0 ;
let tempCount = 0 ;
// we keep track of active process to pause and resume the stream as we have async calls within the stream and we don't want to load all data in memory
dataStream . on ( 'data' , async ( record ) = > {
let activeProcess = 0 ;
counter . streamingCounter -- ;
record = JSON . parse ( record ) ;
readable . on ( 'data' , async ( record ) = > {
queue . add (
promises . push (
( ) = >
new Promise ( async ( resolve ) = > {
new Promise ( async ( resolve ) = > {
try {
try {
activeProcess ++ ;
if ( activeProcess >= BULK_PARALLEL_PROCESS ) readable . pause ( ) ;
const { id : rid , . . . fields } = record ;
const { id : rid , . . . fields } = record ;
const r = await nocoBaseDataProcessing_v2 ( sDB , table , {
const r = await nocoBaseDataProcessing_v2 ( syncDB , table , {
id : rid ,
id : rid ,
fields ,
fields ,
} ) ;
} ) ;
@ -183,8 +192,6 @@ export async function importData({
if ( tempCount >= BULK_DATA_BATCH_COUNT ) {
if ( tempCount >= BULK_DATA_BATCH_COUNT ) {
if ( sizeof ( tempData ) >= BULK_DATA_BATCH_SIZE ) {
if ( sizeof ( tempData ) >= BULK_DATA_BATCH_SIZE ) {
readable . pause ( ) ;
let insertArray = tempData . splice ( 0 , tempData . length ) ;
let insertArray = tempData . splice ( 0 , tempData . length ) ;
await services . bulkDataService . bulkDataInsert ( {
await services . bulkDataService . bulkDataInsert ( {
@ -199,37 +206,37 @@ export async function importData({
logBasic (
logBasic (
` :: Importing ' ${
` :: Importing ' ${
table . title
table . title
} ' data : : $ { importedCount } - $ { Math . min (
} ' data : : $ { importedCount } - $ {
importedCount + insertArray . length ,
importedCount + insertArray . length
allRecordsCount ,
} ` ,
) } ` ,
) ;
) ;
importedCount += insertArray . length ;
importedCount += insertArray . length ;
insertArray = [ ] ;
insertArray = [ ] ;
readable . resume ( ) ;
}
}
tempCount = 0 ;
tempCount = 0 ;
}
}
activeProcess -- ;
if ( activeProcess < BULK_PARALLEL_PROCESS ) readable . resume ( ) ;
if ( queue . size < QUEUE_BUFFER_LIMIT / 2 ) dataStream . resume ( ) ;
resolve ( true ) ;
resolve ( true ) ;
} catch ( e ) {
} catch ( e ) {
logger . error ( e ) ;
logger . error ( e ) ;
logWarning (
logWarning (
` There were errors on importing ' ${ table . title } ' data :: ${ e } ` ,
` There were errors on importing ' ${ table . title } ' data :: ${ e } ` ,
) ;
) ;
readable . resume ( ) ;
if ( queue . size < QUEUE_BUFFER_LIMIT / 2 ) dataStream . resume ( ) ;
resolve ( true ) ;
resolve ( true ) ;
}
}
} ) ,
} ) ,
) ;
) ;
if ( queue . size >= QUEUE_BUFFER_LIMIT ) dataStream . pause ( ) ;
} ) ;
} ) ;
readable . on ( 'end' , async ( ) = > {
readable . on ( 'end' , async ( ) = > {
try {
try {
// ensure all chunks are processed
// ensure all chunks are processed
await Promise . all ( promises ) ;
await queue . onIdle ( ) ;
// insert remaining data
// insert remaining data
if ( tempData . length > 0 ) {
if ( tempData . length > 0 ) {
@ -282,6 +289,7 @@ export async function importLTARData({
syncDB ,
syncDB ,
source ,
source ,
services ,
services ,
queue ,
logBasic = ( _str ) = > { } ,
logBasic = ( _str ) = > { } ,
logDetailed = ( _str ) = > { } ,
logDetailed = ( _str ) = > { } ,
logWarning = ( _str ) = > { } ,
logWarning = ( _str ) = > { } ,
@ -301,6 +309,7 @@ export async function importLTARData({
syncDB ;
syncDB ;
source : Source ;
source : Source ;
services : AirtableImportContext ;
services : AirtableImportContext ;
queue : PQueue ;
logBasic : ( string ) = > void ;
logBasic : ( string ) = > void ;
logDetailed : ( string ) = > void ;
logDetailed : ( string ) = > void ;
logWarning : ( string ) = > void ;
logWarning : ( string ) = > void ;
@ -366,22 +375,24 @@ export async function importLTARData({
let nestedLinkCnt = 0 ;
let nestedLinkCnt = 0 ;
let importedCount = 0 ;
let importedCount = 0 ;
let assocTableData = [ ] ;
const assocTableData = { } ;
// extract link data from records
return new Promise ( ( resolve , reject ) = > {
dataStream . on ( 'data' , async ( record ) = > {
record = JSON . parse ( record ) ;
// Iterate over all related M2M associative table
// Iterate over all related M2M associative table
for ( const assocMeta of assocTableMetas ) {
for ( const assocMeta of assocTableMetas ) {
// extract link data from records
if ( ! assocTableData [ assocMeta . modelMeta . id ] ) {
await new Promise ( ( resolve ) = > {
assocTableData [ assocMeta . modelMeta . id ] = [ ] ;
const promises = [ ] ;
}
const readable = allData . getStream ( ) ;
queue . add (
( ) = >
readable . on ( 'data' , async ( record ) = > {
promises . push (
new Promise ( async ( resolve ) = > {
new Promise ( async ( resolve ) = > {
try {
try {
const { id : _atId , . . . rec } = record ;
const { id : _atId , . . . rec } = record ;
// todo: use actual alias instead of sanitized
// todo: use actual alias instead of sanitized
assocTableData . push (
assocTableData [ assocMeta . modelMeta . id ] . push (
. . . (
. . . (
rec ? . [ atNcAliasRef [ table . id ] [ assocMeta . colMeta . title ] ] || [ ]
rec ? . [ atNcAliasRef [ table . id ] [ assocMeta . colMeta . title ] ] || [ ]
) . map ( ( id ) = > ( {
) . map ( ( id ) = > ( {
@ -390,19 +401,22 @@ export async function importLTARData({
} ) ) ,
} ) ) ,
) ;
) ;
if ( assocTableData . length >= BULK_LINK_BATCH_COUNT ) {
if (
readable . pause ( ) ;
assocTableData [ assocMeta . modelMeta . id ] . length >=
BULK_LINK_BATCH_COUNT
) {
let insertArray = assocTableData [
assocMeta . modelMeta . id
] . splice ( 0 , assocTableData [ assocMeta . modelMeta . id ] . length ) ;
let insertArray = assocTableData . splice (
const lastImportedCount = importedCount ;
0 ,
importedCount += insertArray . length ;
assocTableData . length ,
) ;
logBasic (
logBasic (
` :: Importing ' ${
` :: Importing ' ${
table . title
table . title
} ' LTAR data : : $ { i mportedCount} - $ {
} ' LTAR data : : $ { lastI mportedCount} - $ {
i mportedCount + insertArray . length
lastI mportedCount + insertArray . length
} ` ,
} ` ,
) ;
) ;
@ -415,27 +429,29 @@ export async function importLTARData({
foreign_key_checks : ! ! source . isMeta ( ) ,
foreign_key_checks : ! ! source . isMeta ( ) ,
} ) ;
} ) ;
importedCount += insertArray . length ;
insertArray = [ ] ;
insertArray = [ ] ;
readable . resume ( ) ;
}
}
if ( queue . size < QUEUE_BUFFER_LIMIT / 2 ) dataStream . resume ( ) ;
resolve ( true ) ;
resolve ( true ) ;
} catch ( e ) {
} catch ( e ) {
logger . error ( e ) ;
logger . error ( e ) ;
logWarning (
logWarning (
` There were errors on importing ' ${ table . title } ' LTAR data :: ${ e } ` ,
` There were errors on importing ' ${ table . title } ' LTAR data :: ${ e } ` ,
) ;
) ;
readable . resume ( ) ;
if ( queue . size < QUEUE_BUFFER_LIMIT / 2 ) dataStream . resume ( ) ;
resolve ( true ) ;
resolve ( true ) ;
}
}
} ) ,
} ) ,
) ;
) ;
}
if ( queue . size >= QUEUE_BUFFER_LIMIT ) dataStream . pause ( ) ;
} ) ;
} ) ;
readable . on ( 'end' , async ( ) = > {
dataStream . on ( 'end' , async ( ) = > {
try {
try {
// ensure all chunks are processed
// ensure all chunks are processed
await Promise . all ( promises ) ;
await queue . onIdle ( ) ;
// insert remaining data
// insert remaining data
if ( assocTableData . length >= 0 ) {
if ( assocTableData . length >= 0 ) {