Skip to content

Commit

Permalink
perf: hash and delete pages in a single scan
Browse files Browse the repository at this point in the history
  • Loading branch information
gotson committed Feb 17, 2022
1 parent b724f20 commit b436e90
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 16 deletions.
11 changes: 11 additions & 0 deletions komga/src/main/kotlin/org/gotson/komga/application/tasks/Task.kt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import java.io.Serializable
const val HIGHEST_PRIORITY = 8
const val HIGH_PRIORITY = 6
const val DEFAULT_PRIORITY = 4
const val LOW_PRIORITY = 2
const val LOWEST_PRIORITY = 0

sealed class Task(priority: Int = DEFAULT_PRIORITY, val groupId: String? = null) : Serializable {
Expand All @@ -25,6 +26,16 @@ sealed class Task(priority: Int = DEFAULT_PRIORITY, val groupId: String? = null)
override fun toString(): String = "FindBooksToConvert(libraryId='$libraryId', priority='$priority')"
}

class FindBooksWithMissingPageHash(val libraryId: String, priority: Int = DEFAULT_PRIORITY) : Task(priority) {
override fun uniqueId() = "FIND_BOOKS_WITH_MISSING_PAGE_HASH_$libraryId"
override fun toString(): String = "FindBooksWithMissingPageHash(libraryId='$libraryId', priority='$priority')"
}

class FindDuplicatePagesToDelete(val libraryId: String, priority: Int = DEFAULT_PRIORITY) : Task(priority) {
override fun uniqueId() = "FIND_DUPLICATE_PAGES_TO_DELETE_$libraryId"
override fun toString(): String = "FindDuplicatePagesToDelete(libraryId='$libraryId', priority='$priority')"
}

class EmptyTrash(val libraryId: String, priority: Int = DEFAULT_PRIORITY) : Task(priority) {
override fun uniqueId() = "EMPTY_TRASH_$libraryId"
override fun toString(): String = "EmptyTrash(libraryId='$libraryId', priority='$priority')"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import org.gotson.komga.domain.service.BookMetadataLifecycle
import org.gotson.komga.domain.service.BookPageEditor
import org.gotson.komga.domain.service.LibraryContentLifecycle
import org.gotson.komga.domain.service.LocalArtworkLifecycle
import org.gotson.komga.domain.service.PageHashLifecycle
import org.gotson.komga.domain.service.SeriesLifecycle
import org.gotson.komga.domain.service.SeriesMetadataLifecycle
import org.gotson.komga.infrastructure.jms.QUEUE_FACTORY
Expand Down Expand Up @@ -43,6 +44,7 @@ class TaskHandler(
private val bookConverter: BookConverter,
private val bookPageEditor: BookPageEditor,
private val searchIndexLifecycle: SearchIndexLifecycle,
private val pageHashLifecycle: PageHashLifecycle,
private val meterRegistry: MeterRegistry,
) {

Expand All @@ -57,11 +59,11 @@ class TaskHandler(
libraryRepository.findByIdOrNull(task.libraryId)?.let { library ->
libraryContentLifecycle.scanRootFolder(library)
taskReceiver.analyzeUnknownAndOutdatedBooks(library)
taskReceiver.hashBooksWithoutHash(library)
taskReceiver.hashBookPagesWithMissingHash(library)
taskReceiver.repairExtensions(library, LOWEST_PRIORITY)
taskReceiver.repairExtensions(library, LOW_PRIORITY)
taskReceiver.findBooksToConvert(library, LOWEST_PRIORITY)
taskReceiver.removeDuplicatePages(library, LOWEST_PRIORITY)
taskReceiver.findBooksWithMissingPageHash(library, LOWEST_PRIORITY)
taskReceiver.findDuplicatePagesToDelete(library, LOWEST_PRIORITY)
taskReceiver.hashBooksWithoutHash(library)
} ?: logger.warn { "Cannot execute task $task: Library does not exist" }

is Task.FindBooksToConvert ->
Expand All @@ -71,6 +73,20 @@ class TaskHandler(
}
} ?: logger.warn { "Cannot execute task $task: Library does not exist" }

is Task.FindBooksWithMissingPageHash ->
libraryRepository.findByIdOrNull(task.libraryId)?.let { library ->
pageHashLifecycle.getBookAndSeriesIdsWithMissingPageHash(library).forEach {
taskReceiver.hashBookPages(it.first, it.second, task.priority + 1)
}
} ?: logger.warn { "Cannot execute task $task: Library does not exist" }

is Task.FindDuplicatePagesToDelete ->
libraryRepository.findByIdOrNull(task.libraryId)?.let { library ->
pageHashLifecycle.getBookPagesToDeleteAutomatically(library).forEach { (bookId, pages) ->
taskReceiver.removeDuplicatePages(bookId, pages, task.priority + 1)
}
} ?: logger.warn { "Cannot execute task $task: Library does not exist" }

is Task.EmptyTrash ->
libraryRepository.findByIdOrNull(task.libraryId)?.let { library ->
libraryContentLifecycle.emptyTrash(library)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import org.gotson.komga.domain.model.Media
import org.gotson.komga.domain.persistence.BookRepository
import org.gotson.komga.domain.persistence.LibraryRepository
import org.gotson.komga.domain.service.BookConverter
import org.gotson.komga.domain.service.PageHashLifecycle
import org.gotson.komga.infrastructure.jms.JMS_PROPERTY_TYPE
import org.gotson.komga.infrastructure.jms.QUEUE_TASKS
import org.gotson.komga.infrastructure.jms.QUEUE_UNIQUE_ID
Expand All @@ -30,7 +29,6 @@ class TaskReceiver(
private val libraryRepository: LibraryRepository,
private val bookRepository: BookRepository,
private val bookConverter: BookConverter,
private val pageHashLifecycle: PageHashLifecycle,
) {

private val jmsTemplates = (0..9).associateWith {
Expand Down Expand Up @@ -71,11 +69,12 @@ class TaskReceiver(
}
}

fun hashBookPagesWithMissingHash(library: Library) {
if (library.hashPages)
pageHashLifecycle.getBookAndSeriesIdsWithMissingPageHash(library).forEach {
submitTask(Task.HashBookPages(it.first, LOWEST_PRIORITY, it.second))
}
fun findBooksWithMissingPageHash(library: Library, priority: Int = DEFAULT_PRIORITY) {
submitTask(Task.FindBooksWithMissingPageHash(library.id, priority))
}

fun hashBookPages(bookId: String, seriesId: String, priority: Int = DEFAULT_PRIORITY) {
submitTask(Task.HashBookPages(bookId, priority, seriesId))
}

fun findBooksToConvert(library: Library, priority: Int = DEFAULT_PRIORITY) {
Expand All @@ -93,10 +92,8 @@ class TaskReceiver(
}
}

fun removeDuplicatePages(library: Library, priority: Int = DEFAULT_PRIORITY) {
pageHashLifecycle.getBookPagesToDeleteAutomatically(library).forEach { (bookId, pages) ->
removeDuplicatePages(bookId, pages, priority)
}
fun findDuplicatePagesToDelete(library: Library, priority: Int = DEFAULT_PRIORITY) {
submitTask(Task.FindDuplicatePagesToDelete(library.id, priority))
}

fun removeDuplicatePages(bookId: String, pages: Collection<BookPageNumbered>, priority: Int = DEFAULT_PRIORITY) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.gotson.komga.domain.service

import mu.KotlinLogging
import org.gotson.komga.domain.model.BookPageContent
import org.gotson.komga.domain.model.BookPageNumbered
import org.gotson.komga.domain.model.Library
Expand All @@ -13,6 +14,8 @@ import org.gotson.komga.infrastructure.configuration.KomgaProperties
import org.springframework.data.domain.Pageable
import org.springframework.stereotype.Service

private val logger = KotlinLogging.logger {}

@Service
class PageHashLifecycle(
private val pageHashRepository: PageHashRepository,
Expand All @@ -28,7 +31,13 @@ class PageHashLifecycle(
* @return a Collection of Pair of BookId/SeriesId
*/
fun getBookAndSeriesIdsWithMissingPageHash(library: Library): Collection<Pair<String, String>> =
mediaRepository.findAllBookAndSeriesIdsByLibraryIdAndMediaTypeAndWithMissingPageHash(library.id, hashableMediaTypes, komgaProperties.pageHashing)
if (library.hashPages)
mediaRepository.findAllBookAndSeriesIdsByLibraryIdAndMediaTypeAndWithMissingPageHash(library.id, hashableMediaTypes, komgaProperties.pageHashing)
.also { logger.info { "Found ${it.size} books with missing page hash" } }
else {
logger.info { "Page hashing is not enabled, skipping" }
emptyList()
}

fun getPage(pageHash: PageHash, resizeTo: Int? = null): BookPageContent? {
val match = pageHashRepository.findMatchesByHash(pageHash, null, Pageable.ofSize(1)).firstOrNull() ?: return null
Expand Down

0 comments on commit b436e90

Please sign in to comment.