-
Notifications
You must be signed in to change notification settings - Fork 8.6k
Add unused-urls-cleanup plugin #220038
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add unused-urls-cleanup plugin #220038
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| TODO |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| { | ||
| "type": "plugin", | ||
| "id": "@kbn/unused-urls-cleanup", | ||
| "owner": [ | ||
| "@elastic/appex-sharedux" | ||
| ], | ||
| "group": "platform", | ||
| "visibility": "private", | ||
| "description": "Background task responsible for deleting saved objects of type 'url' which are unused.", | ||
| "plugin": { | ||
| "id": "unusedUrlsCleanup", | ||
| "browser": false, | ||
| "server": true, | ||
| "requiredPlugins": [ | ||
| "taskManager" | ||
| ], | ||
| "configPath": "unused_urls_cleanup", | ||
| } | ||
| } | ||
|
|
||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the "Elastic License | ||
| * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side | ||
| * Public License v 1"; you may not use this file except in compliance with, at | ||
| * your election, the "Elastic License 2.0", the "GNU Affero General Public | ||
| * License v3.0 only", or the "Server Side Public License, v 1". | ||
| */ | ||
|
|
||
| import { schema, TypeOf } from '@kbn/config-schema'; | ||
| import type { PluginConfigDescriptor } from '@kbn/core/server'; | ||
| import { DEFAULT_MAX_AGE } from '@kbn/unused-urls-cleanup/server/constants'; | ||
|
|
||
| export const configSchema = schema.object({ | ||
| maxAge: schema.string({ | ||
| // TODO: Possibly disable this for new installations | ||
| defaultValue: DEFAULT_MAX_AGE, | ||
| validate: (value) => { | ||
| const rangeRegex = /\d+[yMwdhms]/; | ||
| if (!rangeRegex.test(value)) { | ||
| return `Invalid value: ${value}. Expected format: <number><unit>, where unit is one of y, M, w, d, h, m, s.`; | ||
| } | ||
| }, | ||
| }), | ||
| }); | ||
|
|
||
| export type UnusedUrlsCleanupPluginConfig = TypeOf<typeof configSchema>; | ||
|
|
||
| export const config: PluginConfigDescriptor<UnusedUrlsCleanupPluginConfig> = { | ||
| schema: configSchema, | ||
| }; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the "Elastic License | ||
| * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side | ||
| * Public License v 1"; you may not use this file except in compliance with, at | ||
| * your election, the "Elastic License 2.0", the "GNU Affero General Public | ||
| * License v3.0 only", or the "Server Side Public License, v 1". | ||
| */ | ||
|
|
||
| import { TaskInstanceWithId } from '@kbn/task-manager-plugin/server/task'; | ||
|
|
||
| export const TASK_ID = 'unusedUrlsCleanupTask'; | ||
| export const TASK_SCHEDULE_INTERVAL = '30s'; // TODO: Change this to 1 week | ||
| export const SAVED_OBJECT_TYPE = 'url'; | ||
| export const PIT_KEEP_ALIVE = '10m'; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ideally, the timing configurations should be included in the kibana.yml file. This makes it easy to adjust in development or testing scenarios, and it allows for quick changes if real deployments have issues with our defaults. |
||
| export const MAX_PAGE_SIZE = 10000; | ||
| export const DEFAULT_MAX_AGE = '1y'; | ||
| export const DELETE_UNUSED_URLS_TASK: TaskInstanceWithId = { | ||
| id: TASK_ID, | ||
| taskType: TASK_ID, | ||
| params: {}, | ||
| state: {}, | ||
| schedule: { | ||
| interval: TASK_SCHEDULE_INTERVAL, | ||
| }, | ||
| }; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the "Elastic License | ||
| * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side | ||
| * Public License v 1"; you may not use this file except in compliance with, at | ||
| * your election, the "Elastic License 2.0", the "GNU Affero General Public | ||
| * License v3.0 only", or the "Server Side Public License, v 1". | ||
| */ | ||
|
|
||
| import type { PluginInitializerContext } from '@kbn/core/server'; | ||
|
|
||
| export type { UnusedUrlsCleanupPluginSetup, UnusedUrlsCleanupPluginStart } from './types'; | ||
| export type { UnusedUrlsCleanupPluginConfig } from './config'; | ||
| export { config, configSchema } from './config'; | ||
| export { | ||
| TASK_ID, | ||
| TASK_SCHEDULE_INTERVAL, | ||
| SAVED_OBJECT_TYPE, | ||
| PIT_KEEP_ALIVE, | ||
| MAX_PAGE_SIZE, | ||
| DEFAULT_MAX_AGE, | ||
| DELETE_UNUSED_URLS_TASK, | ||
| } from './constants'; | ||
|
|
||
| export async function plugin(initializerContext: PluginInitializerContext) { | ||
| const { UnusedUrlsCleanupPlugin } = await import('./plugin'); | ||
| return new UnusedUrlsCleanupPlugin(initializerContext); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the "Elastic License | ||
| * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side | ||
| * Public License v 1"; you may not use this file except in compliance with, at | ||
| * your election, the "Elastic License 2.0", the "GNU Affero General Public | ||
| * License v3.0 only", or the "Server Side Public License, v 1". | ||
| */ | ||
| export * from './saved_objects'; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,119 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the "Elastic License | ||
| * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side | ||
| * Public License v 1"; you may not use this file except in compliance with, at | ||
| * your election, the "Elastic License 2.0", the "GNU Affero General Public | ||
| * License v3.0 only", or the "Server Side Public License, v 1". | ||
| */ | ||
|
|
||
| import { SortResults } from '@elastic/elasticsearch/lib/api/types'; | ||
| import { ISavedObjectsRepository, SavedObjectsFindResult } from '@kbn/core/server'; | ||
| import { Logger } from '@kbn/logging'; | ||
| import { | ||
| MAX_PAGE_SIZE, | ||
| PIT_KEEP_ALIVE, | ||
| SAVED_OBJECT_TYPE, | ||
| } from '@kbn/unused-urls-cleanup/server/constants'; | ||
|
|
||
| export const deleteUnusedUrls = async ({ | ||
| savedObjectsRepository, | ||
| unusedUrls, | ||
| logger, | ||
| }: { | ||
| savedObjectsRepository: ISavedObjectsRepository; | ||
| unusedUrls: Array<{ id: string; type: string }>; | ||
| logger: Logger; | ||
| }) => { | ||
| const total = unusedUrls.length; | ||
| logger.info(`Deleting ${total} unused URL(s)`); | ||
|
|
||
| try { | ||
| await savedObjectsRepository.bulkDelete(unusedUrls, { | ||
| refresh: 'wait_for', | ||
| }); | ||
|
|
||
| logger.info(`Succesfully deleted ${total} unused URL(s)`); | ||
| } catch (e) { | ||
| logger.error(`Failed to delete unused URL(s): ${e.message}`); | ||
| } | ||
| }; | ||
|
|
||
| export const fetchAllUnusedUrls = async ({ | ||
| savedObjectsRepository, | ||
| filter, | ||
| logger, | ||
| }: { | ||
| savedObjectsRepository: ISavedObjectsRepository; | ||
| filter: string; | ||
| logger: Logger; | ||
| }) => { | ||
| const results: SavedObjectsFindResult[] = []; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we shouldn't fetch everything and store it in memory due to low server memory limits. For more details, see: #203017 Ideally we would use deleteByQuery, but looks like it isn't available on the saved object client. But I think we should just store the IDs to remove, without the whole objects. |
||
|
|
||
| const { id: pitId } = await savedObjectsRepository.openPointInTimeForType(SAVED_OBJECT_TYPE, { | ||
| keepAlive: PIT_KEEP_ALIVE, | ||
| }); | ||
|
|
||
| try { | ||
| let searchAfter: SortResults | undefined; | ||
| let hasMore = true; | ||
|
|
||
| while (hasMore) { | ||
| const response = await savedObjectsRepository.find({ | ||
| type: SAVED_OBJECT_TYPE, | ||
| filter, | ||
| pit: { id: pitId, keepAlive: PIT_KEEP_ALIVE }, | ||
| searchAfter, | ||
| perPage: MAX_PAGE_SIZE, | ||
| }); | ||
|
|
||
| results.push(...response.saved_objects); | ||
| hasMore = response.saved_objects.length === MAX_PAGE_SIZE; | ||
|
|
||
| if (hasMore) { | ||
| searchAfter = response.saved_objects[response.saved_objects.length - 1].sort; | ||
| } | ||
| } | ||
| } catch (e) { | ||
| logger.error(`Failed to fetch unused URLs: ${e.message}`); | ||
| } finally { | ||
| await savedObjectsRepository.closePointInTime(pitId); | ||
| } | ||
|
|
||
| return results.map(({ id }) => ({ | ||
| id, | ||
| type: SAVED_OBJECT_TYPE, | ||
| })); | ||
| }; | ||
|
|
||
| export const runDeleteUnusedUrlsTask = async ({ | ||
| savedObjectsRepository, | ||
| filter, | ||
| logger, | ||
| }: { | ||
| savedObjectsRepository: ISavedObjectsRepository; | ||
| filter: string; | ||
| logger: Logger; | ||
| }) => { | ||
| try { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I’m not sure about the try/catch and best practices here because I think the task manager should know if the task succeeded or failed. This is something to investigate. |
||
| logger.info('Unused URLs cleanup started'); | ||
|
|
||
| const unusedUrls = await fetchAllUnusedUrls({ | ||
| savedObjectsRepository, | ||
| filter, | ||
| logger, | ||
| }); | ||
|
|
||
| logger.info(`Found ${unusedUrls.length} unused URL(s)`); | ||
|
|
||
| if (unusedUrls.length > 0) { | ||
| await deleteUnusedUrls({ | ||
| savedObjectsRepository, | ||
| unusedUrls, | ||
| logger, | ||
| }); | ||
| } | ||
| } catch (e) { | ||
| logger.error(`Failed to run: ${e.message}`); | ||
| } | ||
| }; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,72 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the "Elastic License | ||
| * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side | ||
| * Public License v 1"; you may not use this file except in compliance with, at | ||
| * your election, the "Elastic License 2.0", the "GNU Affero General Public | ||
| * License v3.0 only", or the "Server Side Public License, v 1". | ||
| */ | ||
|
|
||
| import type { | ||
| PluginInitializerContext, | ||
| CoreSetup, | ||
| CoreStart, | ||
| Plugin, | ||
| Logger, | ||
| } from '@kbn/core/server'; | ||
| import { TaskManagerSetupContract } from '@kbn/task-manager-plugin/server'; | ||
| import { TASK_ID, DELETE_UNUSED_URLS_TASK } from '@kbn/unused-urls-cleanup/server/constants'; | ||
| import { runDeleteUnusedUrlsTask } from '@kbn/unused-urls-cleanup/server/lib'; | ||
| import type { UnusedUrlsCleanupPluginSetup, UnusedUrlsCleanupPluginStart } from './types'; | ||
| import type { UnusedUrlsCleanupPluginConfig } from './config'; | ||
|
|
||
| export class UnusedUrlsCleanupPlugin implements Plugin { | ||
| private readonly logger: Logger; | ||
| private readonly config: UnusedUrlsCleanupPluginConfig; | ||
| private taskManagerSetup: TaskManagerSetupContract | undefined; | ||
|
|
||
| constructor(initializerContext: PluginInitializerContext) { | ||
| this.logger = initializerContext.logger.get(); | ||
| this.config = initializerContext.config.get<UnusedUrlsCleanupPluginConfig>(); | ||
| } | ||
|
|
||
| public setup(_core: CoreSetup, { taskManager }: UnusedUrlsCleanupPluginSetup) { | ||
| this.taskManagerSetup = taskManager; | ||
| } | ||
|
|
||
| public start(core: CoreStart, { taskManager }: UnusedUrlsCleanupPluginStart) { | ||
| const { | ||
| logger, | ||
| taskManagerSetup, | ||
| config: { maxAge }, | ||
| } = this; | ||
|
|
||
| if (!taskManagerSetup) { | ||
| logger.error('taskManagerSetup is not defined'); | ||
| return; | ||
| } | ||
|
|
||
| const savedObjectsRepository = core.savedObjects.createInternalRepository(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One important thing to test is different spaces. I don't know how we store URLs from different spaces, but we need to double-check that our cleanup logic works globally across all spaces. |
||
| const filter = `url.attributes.accessDate <= now-${maxAge}`; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When a URL is created, is the access date prefilled with the creation date? Or can accessDat be null ?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
|
||
| taskManagerSetup.registerTaskDefinitions({ | ||
| [TASK_ID]: { | ||
| title: 'Unused URLs Cleanup', | ||
| description: `Deletes unused (unaccessed for 1 year - configurable via unused_urls_cleanup.maxAge config) saved objects of type 'url' once a week.`, | ||
| createTaskRunner: () => { | ||
| return { | ||
| async run() { | ||
| runDeleteUnusedUrlsTask({ | ||
| savedObjectsRepository, | ||
| filter, | ||
| logger, | ||
| }); | ||
| }, | ||
| }; | ||
| }, | ||
| }, | ||
| }); | ||
|
|
||
| taskManager.ensureScheduled(DELETE_UNUSED_URLS_TASK); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the "Elastic License | ||
| * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side | ||
| * Public License v 1"; you may not use this file except in compliance with, at | ||
| * your election, the "Elastic License 2.0", the "GNU Affero General Public | ||
| * License v3.0 only", or the "Server Side Public License, v 1". | ||
| */ | ||
|
|
||
| import { | ||
| TaskManagerSetupContract, | ||
| TaskManagerStartContract, | ||
| } from '@kbn/task-manager-plugin/server'; | ||
|
|
||
| export interface UnusedUrlsCleanupPluginSetup { | ||
| taskManager: TaskManagerSetupContract; | ||
| } | ||
|
|
||
| export interface UnusedUrlsCleanupPluginStart { | ||
| taskManager: TaskManagerStartContract; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| { | ||
| "extends": "../../../../../tsconfig.base.json", | ||
| "compilerOptions": { | ||
| "outDir": "target/types" | ||
| }, | ||
| "include": [ | ||
| "server/**/*", | ||
| ], | ||
| "exclude": [ | ||
| "target/**/*", | ||
| ], | ||
| "kbn_references": [ | ||
| "@kbn/core", | ||
| "@kbn/task-manager-plugin", | ||
| "@kbn/config-schema", | ||
| ] | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think there should be something like schema.duration that should do the trick