Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 138 additions & 0 deletions app/client/packages/rts/src/ctl/verify-migration.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import { MongoClient } from "mongodb";
import pg from "pg";
import { transformFields } from "./utils.mjs";

function isArchivedObject(doc) {
return doc.deleted === true || doc.deletedAt != null;
}

/**
* Verifies data integrity between MongoDB and PostgreSQL
* @param {string} mongoUrl - MongoDB connection URL
* @param {string} pgUrl - PostgreSQL connection URL
* @returns {Promise<void>}
*/
// usage node verify-migration.mjs --mongodb-url="mongodb://localhost:27017/dbname" --postgres-url="postgresql://user:password@localhost:5432/dbname"
async function verifyMigration(mongoUrl, pgUrl) {
const mongoClient = new MongoClient(mongoUrl);
const pgClient = new pg.Client(pgUrl);

try {
await mongoClient.connect();
await pgClient.connect();

const mongoDb = mongoClient.db();
const collections = await mongoDb
.listCollections({}, { nameOnly: true })
.toArray();

let hasDiscrepancy = false;
const verificationResults = [];

for (const collection of collections) {
const collectionName = collection.name;

if (collectionName.startsWith('mongock')) {
continue;
}

console.log(`\nVerifying collection: ${collectionName}`);
const pgTableName = collectionName.toLowerCase();

// Get all MongoDB documents and filter out archived ones
const allMongoDocs = await mongoDb
.collection(collectionName)
.find({})
.toArray();

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Optimize data fetching for large collections

Fetching all documents with toArray() can consume a lot of memory for large collections. Consider using cursors or processing documents in batches to handle large datasets efficiently.


const mongoDocs = allMongoDocs.filter(doc => !isArchivedObject(doc));

const missingInPostgres = [];

// Verify each document exists in PostgreSQL
for (const mongoDoc of mongoDocs) {
transformFields(mongoDoc);

const pgRecord = await pgClient.query(
`SELECT id FROM ${pgTableName} WHERE id = $1 AND "deletedAt" IS NULL`,
[mongoDoc.id]
);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Sanitize table names to prevent SQL injection

Directly interpolating pgTableName into SQL queries can lead to SQL injection if collectionName contains malicious input. Validate collectionName to ensure it contains only safe characters before using it in queries.

Apply this diff to sanitize the table name:

+const sanitizedTableName = collectionName.replace(/[^a-zA-Z0-9_]/g, '').toLowerCase();
-const pgTableName = collectionName.toLowerCase();
+const pgTableName = sanitizedTableName;

Also applies to: 68-70

Comment thread
AnaghHegde marked this conversation as resolved.
Outdated

if (pgRecord.rows.length === 0) {
missingInPostgres.push(mongoDoc.id);
hasDiscrepancy = true;
}
}

// Get PostgreSQL documents not in MongoDB
const pgDocs = await pgClient.query(
`SELECT id FROM ${pgTableName} WHERE "deletedAt" IS NULL`
);
Comment on lines +94 to +96

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Improve PostgreSQL query efficiency

Retrieving all rows at once may cause performance issues with large tables. Use pagination or streaming to handle large datasets without overwhelming memory resources.


const pgIds = new Set(pgDocs.rows.map(row => row.id));
const mongoIds = new Set(mongoDocs.map(doc => doc.id));

const missingInMongo = [...pgIds].filter(id => !mongoIds.has(id));

if (missingInMongo.length > 0 || missingInPostgres.length > 0) {
verificationResults.push({
collectionName,
mongoCount: mongoDocs.length,
pgCount: pgIds.size,
missingInPostgres,
missingInMongo
});
}
}

// Print verification summary
if (verificationResults.length > 0) {
console.error('\n❌ Data verification failed: Discrepancies found');

verificationResults.forEach(result => {
console.error(`\nCollection: ${result.collectionName}`);
console.error(`MongoDB count: ${result.mongoCount}, PostgreSQL count: ${result.pgCount}`);

if (result.missingInPostgres.length > 0) {
console.error('\nDocuments missing in PostgreSQL:');
result.missingInPostgres.forEach(id => console.error(`- ${id}`));
}

if (result.missingInMongo.length > 0) {
console.error('\nDocuments missing in MongoDB:');
result.missingInMongo.forEach(id => console.error(`- ${id}`));
}
});

process.exit(1);
} else {
console.log('\n✓ Data verification successful: All records match');
}

} catch (error) {
console.error('Error during verification:', error);
process.exit(1);
} finally {
await mongoClient.close();
await pgClient.end();
}
}

// Parse command line arguments
const args = process.argv.slice(2);
let mongoUrl, pgUrl;

for (const arg of args) {
if (arg.startsWith('--mongodb-url=')) {
mongoUrl = arg.split('=')[1];
} else if (arg.startsWith('--postgres-url=')) {
pgUrl = arg.split('=')[1];
}
}

if (!mongoUrl || !pgUrl) {
console.error('Usage: node verify-migration.mjs --mongodb-url=<url> --postgres-url=<url>');
process.exit(1);
}

verifyMigration(mongoUrl, pgUrl).catch(console.error);