Browse Source

feat: improved migration logic

nc-feat/attachment-clean-up
mertmit 4 months ago
parent
commit
e6354ad6a7
  1. 529
      packages/nocodb/src/modules/jobs/migration-jobs/nc_job_001_attachment.ts

529
packages/nocodb/src/modules/jobs/migration-jobs/nc_job_001_attachment.ts

@ -152,326 +152,335 @@ export class AttachmentMigration {
.insert(fileReferenceBuffer); .insert(fileReferenceBuffer);
} }
// eslint-disable-next-line no-constant-condition let processedModelsCount = 0;
while (true) {
const modelLimit = 100;
let modelOffset = 0; const processModel = async (modelData) => {
const { fk_workspace_id, base_id, source_id, fk_model_id } = modelData;
const modelsWithAttachmentColumns = []; const context = {
workspace_id: fk_workspace_id,
base_id,
};
// get models that have at least one attachment column, and not processed const source = await Source.get(context, source_id);
// eslint-disable-next-line no-constant-condition if (!source) {
while (true) { this.log(`source not found for ${source_id}`);
const selectFields = [ return;
...(Noco.isEE() ? ['fk_workspace_id'] : []),
'base_id',
'source_id',
'fk_model_id',
];
const models = await ncMeta
.knexConnection(MetaTable.COLUMNS)
.select(selectFields)
.where('uidt', UITypes.Attachment)
.whereNotIn(
'fk_model_id',
ncMeta
.knexConnection(temp_processed_models_table)
.select('fk_model_id')
.where('completed', true),
)
.groupBy(selectFields)
.limit(modelLimit)
.offset(modelOffset);
modelOffset += modelLimit;
if (!models?.length) {
break;
}
modelsWithAttachmentColumns.push(...models);
} }
if (!modelsWithAttachmentColumns?.length) { const model = await Model.get(context, fk_model_id);
break;
}
this.log( if (!model) {
`Found ${modelsWithAttachmentColumns.length} models with attachment columns`, this.log(`model not found for ${fk_model_id}`);
); return;
}
let processedModelsCount = 0; await model.getColumns(context);
for (const modelData of modelsWithAttachmentColumns) { const attachmentColumns = model.columns.filter(
const { fk_workspace_id, base_id, source_id, fk_model_id } = (c) => c.uidt === UITypes.Attachment,
modelData; );
const context = { const dbDriver = await NcConnectionMgrv2.get(source);
workspace_id: fk_workspace_id,
base_id,
};
const source = await Source.get(context, source_id); if (!dbDriver) {
this.log(`connection can't achieved for ${source_id}`);
return;
}
if (!source) { const baseModel = await Model.getBaseModelSQL(context, {
this.log(`source not found for ${source_id}`); model,
continue; dbDriver,
} });
const model = await Model.get(context, fk_model_id); const processedModel = await ncMeta
.knexConnection(temp_processed_models_table)
.where('fk_model_id', fk_model_id)
.first();
if (!model) { const dataLimit = 10;
this.log(`model not found for ${fk_model_id}`); let dataOffset = 0;
continue;
}
await model.getColumns(context); if (!processedModel) {
await ncMeta
.knexConnection(temp_processed_models_table)
.insert({ fk_model_id, offset: 0 });
} else {
dataOffset = processedModel.offset;
}
const attachmentColumns = model.columns.filter( // eslint-disable-next-line no-constant-condition
(c) => c.uidt === UITypes.Attachment, while (true) {
const data = await baseModel.list(
{
fieldsSet: new Set(
model.primaryKeys
.map((c) => c.title)
.concat(attachmentColumns.map((c) => c.title)),
),
sort: model.primaryKeys.map((c) => c.title),
limit: dataLimit,
offset: dataOffset,
},
{
ignoreViewFilterAndSort: true,
},
); );
const dbDriver = await NcConnectionMgrv2.get(source); dataOffset += dataLimit;
if (!dbDriver) { if (!data?.length) {
this.log(`connection can't achieved for ${source_id}`); break;
continue;
} }
const baseModel = await Model.getBaseModelSQL(context, { const updatePayload = [];
model,
dbDriver,
});
const processedModel = await ncMeta for (const row of data) {
.knexConnection(temp_processed_models_table) const updateData = {};
.where('fk_model_id', fk_model_id)
.first();
const dataLimit = 10; let updateRequired = false;
let dataOffset = 0;
if (!processedModel) { for (const column of attachmentColumns) {
await ncMeta let attachmentArr = row[column.title];
.knexConnection(temp_processed_models_table)
.insert({ fk_model_id, offset: 0 });
} else {
dataOffset = processedModel.offset;
}
// eslint-disable-next-line no-constant-condition
while (true) {
const data = await baseModel.list(
{
fieldsSet: new Set(
model.primaryKeys
.map((c) => c.title)
.concat(attachmentColumns.map((c) => c.title)),
),
sort: model.primaryKeys.map((c) => c.title),
limit: dataLimit,
offset: dataOffset,
},
{
ignoreViewFilterAndSort: true,
},
);
dataOffset += dataLimit;
if (!data?.length) {
break;
}
const updatePayload = []; if (!attachmentArr?.length) {
continue;
for (const row of data) { }
const updateData = {};
let updateRequired = false;
for (const column of attachmentColumns) {
let attachmentArr = row[column.title];
if (!attachmentArr?.length) { try {
continue; if (typeof attachmentArr === 'string') {
attachmentArr = JSON.parse(attachmentArr);
} }
} catch (e) {
this.log(`error parsing attachment data ${attachmentArr}`);
continue;
}
try { if (Array.isArray(attachmentArr)) {
if (typeof attachmentArr === 'string') { attachmentArr = attachmentArr.map((a) =>
attachmentArr = JSON.parse(attachmentArr); extractProps(a, [
} 'id',
} catch (e) { 'url',
this.log(`error parsing attachment data ${attachmentArr}`); 'path',
continue; 'title',
} 'mimetype',
'size',
'icon',
'width',
'height',
]),
);
if (Array.isArray(attachmentArr)) { for (const attachment of attachmentArr) {
attachmentArr = attachmentArr.map((a) => try {
extractProps(a, [ if ('path' in attachment || 'url' in attachment) {
'id', const filePath = `nc/uploads/${
'url', attachment.path?.replace(/^download\//, '') ||
'path', this.normalizeUrl(attachment.url)
'title', }`;
'mimetype',
'size', const isReferenced = await ncMeta
'icon', .knexConnection(temp_file_references_table)
'width', .where('file_path', filePath)
'height', .first();
]),
); if (!isReferenced) {
// file is from another storage adapter
for (const attachment of attachmentArr) { this.log(
try { `file not found in file references table ${
if ('path' in attachment || 'url' in attachment) { attachment.path || attachment.url
const filePath = `nc/uploads/${ }, ${filePath}`,
attachment.path?.replace(/^download\//, '') || );
this.normalizeUrl(attachment.url) } else if (isReferenced.referenced === false) {
}`; const fileNameWithExt = path.basename(filePath);
const isReferenced = await ncMeta const mimetype =
attachment.mimetype ||
mimetypes[path.extname(fileNameWithExt).slice(1)];
await ncMeta
.knexConnection(temp_file_references_table) .knexConnection(temp_file_references_table)
.where('file_path', filePath) .where('file_path', filePath)
.update({
mimetype,
referenced: true,
});
// insert file reference if not exists
const fileReference = await ncMeta
.knexConnection(MetaTable.FILE_REFERENCES)
.where('file_url', attachment.path || attachment.url)
.andWhere('storage', storageAdapterType)
.first(); .first();
if (!isReferenced) { if (!fileReference) {
// file is from another storage adapter await FileReference.insert(
this.log( {
`file not found in file references table ${ workspace_id: RootScopes.ROOT,
attachment.path || attachment.url base_id: RootScopes.ROOT,
}, ${filePath}`, },
{
storage: storageAdapterType,
file_url: attachment.path || attachment.url,
file_size: attachment.size,
deleted: true,
},
); );
} else if (isReferenced.referenced === false) {
const fileNameWithExt = path.basename(filePath);
const mimetype =
attachment.mimetype ||
mimetypes[path.extname(fileNameWithExt).slice(1)];
await ncMeta
.knexConnection(temp_file_references_table)
.where('file_path', filePath)
.update({
mimetype,
referenced: true,
});
// insert file reference if not exists
const fileReference = await ncMeta
.knexConnection(MetaTable.FILE_REFERENCES)
.where(
'file_url',
attachment.path || attachment.url,
)
.andWhere('storage', storageAdapterType)
.first();
if (!fileReference) {
await FileReference.insert(
{
workspace_id: RootScopes.ROOT,
base_id: RootScopes.ROOT,
},
{
storage: storageAdapterType,
file_url: attachment.path || attachment.url,
file_size: attachment.size,
deleted: true,
},
);
}
} }
}
if (!('id' in attachment)) { if (!('id' in attachment)) {
attachment.id = await FileReference.insert(context, { attachment.id = await FileReference.insert(context, {
source_id: source.id, source_id: source.id,
fk_model_id, fk_model_id,
fk_column_id: column.id, fk_column_id: column.id,
file_url: attachment.path || attachment.url, file_url: attachment.path || attachment.url,
file_size: attachment.size, file_size: attachment.size,
is_external: !source.isMeta(), is_external: !source.isMeta(),
deleted: false, deleted: false,
}); });
updateRequired = true; updateRequired = true;
}
} }
} catch (e) {
this.log(
`Error processing attachment ${JSON.stringify(
attachment,
)}`,
);
this.log(e);
throw e;
} }
} catch (e) {
this.log(
`Error processing attachment ${JSON.stringify(
attachment,
)}`,
);
this.log(e);
throw e;
} }
} }
if (updateRequired) {
updateData[column.column_name] =
JSON.stringify(attachmentArr);
}
} }
if (Object.keys(updateData).length === 0) { if (updateRequired) {
continue; updateData[column.column_name] = JSON.stringify(attachmentArr);
} }
}
for (const pk of model.primaryKeys) { if (Object.keys(updateData).length === 0) {
updateData[pk.column_name] = row[pk.title]; continue;
} }
updatePayload.push(updateData); for (const pk of model.primaryKeys) {
updateData[pk.column_name] = row[pk.title];
} }
if (updatePayload.length > 0) { updatePayload.push(updateData);
for (const updateData of updatePayload) { }
const wherePk = await baseModel._wherePk(
baseModel._extractPksValues(updateData),
);
if (!wherePk) { if (updatePayload.length > 0) {
this.log(`where pk not found for ${updateData}`); for (const updateData of updatePayload) {
continue; const wherePk = await baseModel._wherePk(
} baseModel._extractPksValues(updateData),
);
await baseModel.execAndParse( if (!wherePk) {
baseModel this.log(`where pk not found for ${updateData}`);
.dbDriver(baseModel.tnPath) continue;
.update(updateData)
.where(wherePk),
null,
{
raw: true,
},
);
} }
}
// update offset await baseModel.execAndParse(
await ncMeta baseModel
.knexConnection(temp_processed_models_table) .dbDriver(baseModel.tnPath)
.where('fk_model_id', fk_model_id) .update(updateData)
.update({ offset: dataOffset }); .where(wherePk),
null,
{
raw: true,
},
);
}
} }
// mark model as processed // update offset
await ncMeta await ncMeta
.knexConnection(temp_processed_models_table) .knexConnection(temp_processed_models_table)
.where('fk_model_id', fk_model_id) .where('fk_model_id', fk_model_id)
.update({ completed: true }); .update({ offset: dataOffset });
}
processedModelsCount += 1; // mark model as processed
await ncMeta
.knexConnection(temp_processed_models_table)
.where('fk_model_id', fk_model_id)
.update({ completed: true });
processedModelsCount += 1;
};
const selectFields = [
...(Noco.isEE() ? ['fk_workspace_id'] : []),
'base_id',
'source_id',
'fk_model_id',
];
const numberOfModelsToBeProcessed = (
await ncMeta.knexConnection
.from(
ncMeta
.knexConnection(MetaTable.COLUMNS)
.where('uidt', UITypes.Attachment)
.whereNotIn(
'fk_model_id',
ncMeta
.knexConnection(temp_processed_models_table)
.select('fk_model_id')
.where('completed', true),
)
.groupBy(selectFields)
.count('*', { as: 'count' })
.as('t'),
)
.sum('count as count')
.first()
)?.count;
const processModelLimit = 100;
// get models that have at least one attachment column, and not processed
this.log( // eslint-disable-next-line no-constant-condition
`Processed ${processedModelsCount} of ${modelsWithAttachmentColumns.length} models`, while (true) {
); // this will return until all models marked as processed
const models = await ncMeta
.knexConnection(MetaTable.COLUMNS)
.select(selectFields)
.where('uidt', UITypes.Attachment)
.whereNotIn(
'fk_model_id',
ncMeta
.knexConnection(temp_processed_models_table)
.select('fk_model_id')
.where('completed', true),
)
.groupBy(selectFields)
.limit(processModelLimit);
if (!models?.length) {
break;
}
for (const model of models) {
try {
await processModel(model);
this.log(
`Processed ${processedModelsCount} of ${numberOfModelsToBeProcessed} models`,
);
} catch (e) {
this.log(`Error processing model ${model.fk_model_id}`);
this.log(e);
}
} }
} }
this.log(
`Processed total of ${numberOfModelsToBeProcessed} models with attachments`,
);
} catch (e) { } catch (e) {
this.log(`There was an error while processing attachment migration job`); this.log(`There was an error while processing attachment migration job`);
this.log(e); this.log(e);

Loading…
Cancel
Save