-
Notifications
You must be signed in to change notification settings - Fork 107
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CUMULUS-2688: Update bulk operations to fetch granules by unique columns #3000
Changes from 25 commits
cbe0e44
c6a12dd
1b519a0
a0d78db
120ced7
3be9fb1
2588d6c
cea0306
15415c3
54734da
720805b
1fdde4b
4804dff
3af1c65
ee8e1f3
f3d62d5
2006805
5a89f58
20c8242
c15136a
393d153
709a18f
886a739
aff0ca6
1c28013
cc0c468
3e445aa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,38 +5,47 @@ const Logger = require('@cumulus/logger'); | |
const { | ||
GranulePgModel, | ||
getKnexClient, | ||
getUniqueGranuleByGranuleId, | ||
translatePostgresGranuleToApiGranule, | ||
getGranuleByUniqueColumns, | ||
CollectionPgModel, | ||
} = require('@cumulus/db'); | ||
const { RecordDoesNotExist } = require('@cumulus/errors'); | ||
|
||
const { deconstructCollectionId } = require('@cumulus/message/Collections'); | ||
const { chooseTargetExecution } = require('../lib/executions'); | ||
const { deleteGranuleAndFiles } = require('../src/lib/granule-delete'); | ||
const { unpublishGranule } = require('../lib/granule-remove-from-cmr'); | ||
const { updateGranuleStatusToQueued } = require('../lib/writeRecords/write-granules'); | ||
const { getGranuleIdsForPayload } = require('../lib/granules'); | ||
const { getGranulesForPayload } = require('../lib/granules'); | ||
const { reingestGranule, applyWorkflow } = require('../lib/ingest'); | ||
|
||
const log = new Logger({ sender: '@cumulus/bulk-operation' }); | ||
|
||
async function applyWorkflowToGranules({ | ||
granuleIds, | ||
granules, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These aren't really granules. They're There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I think it should be something better across the stack, esp at the endpoint level if we're renaming there anyway. I just can't decide on a good name. 🤔 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah that's where I'm getting stuck. It's really We could do something like The more I think about it the more I'm leaning towards keeping There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can't find a convincing reason to disagree. |
||
workflowName, | ||
meta, | ||
queueUrl, | ||
granulePgModel = new GranulePgModel(), | ||
collectionPgModel = new CollectionPgModel(), | ||
granuleTranslateMethod = translatePostgresGranuleToApiGranule, | ||
applyWorkflowHandler = applyWorkflow, | ||
updateGranulesToQueuedMethod = updateGranuleStatusToQueued, | ||
knex, | ||
}) { | ||
return await pMap( | ||
granuleIds, | ||
(async (granuleId) => { | ||
granules, | ||
(async (granule) => { | ||
try { | ||
const pgGranule = await getUniqueGranuleByGranuleId( | ||
const collection = await collectionPgModel.get( | ||
knex, | ||
granuleId, | ||
deconstructCollectionId(granule.collectionId) | ||
); | ||
|
||
const pgGranule = await getGranuleByUniqueColumns( | ||
knex, | ||
granule.granuleId, | ||
collection.cumulus_id, | ||
granulePgModel | ||
); | ||
const apiGranule = await granuleTranslateMethod({ | ||
|
@@ -51,10 +60,10 @@ async function applyWorkflowToGranules({ | |
queueUrl, | ||
asyncOperationId: process.env.asyncOperationId, | ||
}); | ||
return granuleId; | ||
return granule.granuleId; | ||
} catch (error) { | ||
log.error(`Granule ${granuleId} encountered an error`, error); | ||
return { granuleId, err: error }; | ||
log.error(`Granule ${granule.granuleId} encountered an error`, error); | ||
return { granuleId: granule.granuleId, err: error }; | ||
} | ||
}) | ||
); | ||
|
@@ -70,7 +79,8 @@ async function applyWorkflowToGranules({ | |
* @param {Object} [payload.query] - Optional parameter of query to send to ES | ||
* @param {string} [payload.index] - Optional parameter of ES index to query. | ||
* Must exist if payload.query exists. | ||
* @param {Object} [payload.ids] - Optional list of granule IDs to bulk operate on | ||
* @param {Object} [payload.granules] - Optional list of granule unique IDs to bulk operate on | ||
* e.g. { granuleId: xxx, collectionID: xxx } | ||
* @param {Function} [unpublishGranuleFunc] - Optional function to delete the | ||
* granule from CMR. Useful for testing. | ||
* @returns {Promise} | ||
|
@@ -81,16 +91,28 @@ async function bulkGranuleDelete( | |
) { | ||
const deletedGranules = []; | ||
const forceRemoveFromCmr = payload.forceRemoveFromCmr === true; | ||
const granuleIds = await getGranuleIdsForPayload(payload); | ||
const granules = await getGranulesForPayload(payload); | ||
const knex = await getKnexClient(); | ||
|
||
await pMap( | ||
granuleIds, | ||
async (granuleId) => { | ||
granules, | ||
async (granule) => { | ||
let pgGranule; | ||
const granulePgModel = new GranulePgModel(); | ||
const collectionPgModel = new CollectionPgModel(); | ||
|
||
const collection = await collectionPgModel.get( | ||
knex, | ||
deconstructCollectionId(granule.collectionId) | ||
); | ||
|
||
try { | ||
pgGranule = await getUniqueGranuleByGranuleId(knex, granuleId, granulePgModel); | ||
pgGranule = await getGranuleByUniqueColumns( | ||
knex, | ||
granule.granuleId, | ||
collection.cumulus_id, | ||
granulePgModel | ||
); | ||
} catch (error) { | ||
// PG Granule being undefined will be caught by deleteGranulesAndFiles | ||
if (error instanceof RecordDoesNotExist) { | ||
|
@@ -112,7 +134,7 @@ async function bulkGranuleDelete( | |
pgGranule, | ||
}); | ||
|
||
deletedGranules.push(granuleId); | ||
deletedGranules.push(granule.granuleId); | ||
}, | ||
{ | ||
concurrency: 10, // is this necessary? | ||
|
@@ -133,17 +155,18 @@ async function bulkGranuleDelete( | |
* @param {Object} [payload.query] - Optional parameter of query to send to ES | ||
* @param {string} [payload.index] - Optional parameter of ES index to query. | ||
* Must exist if payload.query exists. | ||
* @param {Object} [payload.ids] - Optional list of granule IDs to bulk operate on | ||
* @param {Object} [payload.granules] - Optional list of granule unique IDs to bulk operate on | ||
* e.g. { granuleId: xxx, collectionID: xxx } | ||
* @param {function} [applyWorkflowHandler] - Optional handler for testing | ||
* @returns {Promise} | ||
*/ | ||
async function bulkGranule(payload, applyWorkflowHandler) { | ||
const knex = await getKnexClient(); | ||
const { queueUrl, workflowName, meta } = payload; | ||
const granuleIds = await getGranuleIdsForPayload(payload); | ||
const granules = await getGranulesForPayload(payload); | ||
return await applyWorkflowToGranules({ | ||
knex, | ||
granuleIds, | ||
granules, | ||
meta, | ||
queueUrl, | ||
workflowName, | ||
|
@@ -155,22 +178,38 @@ async function bulkGranuleReingest( | |
payload, | ||
reingestHandler = reingestGranule | ||
) { | ||
const granuleIds = await getGranuleIdsForPayload(payload); | ||
log.info(`Starting bulkGranuleReingest for ${JSON.stringify(granuleIds)}`); | ||
const granules = await getGranulesForPayload(payload); | ||
log.info(`Starting bulkGranuleReingest for ${JSON.stringify(granules)}`); | ||
const knex = await getKnexClient(); | ||
|
||
const workflowName = payload.workflowName; | ||
return await pMap( | ||
granuleIds, | ||
async (granuleId) => { | ||
granules, | ||
async (granule) => { | ||
const granulePgModel = new GranulePgModel(); | ||
const collectionPgModel = new CollectionPgModel(); | ||
|
||
const collection = await collectionPgModel.get( | ||
knex, | ||
deconstructCollectionId(granule.collectionId) | ||
); | ||
|
||
try { | ||
const pgGranule = await getUniqueGranuleByGranuleId(knex, granuleId); | ||
const pgGranule = await getGranuleByUniqueColumns( | ||
knex, | ||
granule.granuleId, | ||
collection.cumulus_id, | ||
granulePgModel | ||
); | ||
const apiGranule = await translatePostgresGranuleToApiGranule({ | ||
granulePgRecord: pgGranule, | ||
knexOrTransaction: knex, | ||
}); | ||
|
||
const targetExecution = await chooseTargetExecution({ granuleId, workflowName }); | ||
const targetExecution = await chooseTargetExecution({ | ||
granuleId: granule.granuleId, | ||
workflowName, | ||
}); | ||
const apiGranuleToReingest = { | ||
...apiGranule, | ||
...(targetExecution && { execution: targetExecution }), | ||
|
@@ -180,10 +219,10 @@ async function bulkGranuleReingest( | |
apiGranule: apiGranuleToReingest, | ||
asyncOperationId: process.env.asyncOperationId, | ||
}); | ||
return granuleId; | ||
return granule.granuleId; | ||
} catch (error) { | ||
log.error(`Granule ${granuleId} encountered an error`, error); | ||
return { granuleId, err: error }; | ||
log.error(`Granule ${granule.granuleId} encountered an error`, error); | ||
return { granuleId: granule.granuleId, err: error }; | ||
} | ||
}, | ||
{ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wondering if there should be migration instructions for this... 🤔.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Definitely requires an update to https://github.com/nasa/cumulus-api/
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, good call. I'll update that repo once all three tickets for 2688 are in as they all have changes that'll need to be noted. It looks like we might need a new
rds-phase-3
branch there that gets released at the same time as the main cumulus phase 3 release too 🤔There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nasa/cumulus-api#321