Skip to content

Commit

Permalink
feat(kobo): handle read progression conversion between kepub and epub
Browse files Browse the repository at this point in the history
  • Loading branch information
gotson committed Sep 25, 2024
1 parent 1d1e3fd commit df11920
Show file tree
Hide file tree
Showing 14 changed files with 251 additions and 122 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ data class R2Locator(
* Textual context of the locator.
*/
val text: Text? = null,
/**
* Komga specific, used to have a mapping between a [R2Locator] and a koboSpan
*/
val koboSpan: String? = null,
) {
@JsonInclude(JsonInclude.Include.NON_EMPTY)
data class Location(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -494,8 +494,14 @@ class BookLifecycle(
newProgression.modified.toLocalDateTime().toCurrentTimeZone(),
newProgression.device.id,
newProgression.device.name,
// use the type we have instead of the one provided
newProgression.locator.copy(type = matchedPosition.type),
newProgression.locator.copy(
// use the type we have instead of the one provided
type = matchedPosition.type,
// if no koboSpan is provided, use the one we matched
koboSpan = newProgression.locator.koboSpan ?: matchedPosition.koboSpan,
// don't trust the provided total progression, the one from Kobo can be wrong
locations = newProgression.locator.locations?.copy(totalProgression = totalProgression),
),
)
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.gotson.komga.infrastructure.configuration

sealed class SettingChangedEvent {
data object TaskPoolSize: SettingChangedEvent()
data object KepubifyPath: SettingChangedEvent()
data object TaskPoolSize : SettingChangedEvent()

data object KepubifyPath : SettingChangedEvent()
}
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ class KoboDtoDao(
workId = dr.bookId,
isKepub = mr.epubIsKepub,
isPrePaginated = mediaExtension?.isFixedLayout == true,
fileSize = br.fileSize
fileSize = br.fileSize,
)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,12 @@ class KepubConverter(

@PostConstruct
private fun configureKepubifyOnStartup() {
if (!settingsProvider.kepubifyPath.isNullOrBlank()) configureKepubify(settingsProvider.kepubifyPath, true)
else if (!kepubifyConfigurationPath.isNullOrBlank()) configureKepubify(kepubifyConfigurationPath)
else logger.info { "Kepub conversion unavailable. kepubify path is not set" }
if (!settingsProvider.kepubifyPath.isNullOrBlank())
configureKepubify(settingsProvider.kepubifyPath, true)
else if (!kepubifyConfigurationPath.isNullOrBlank())
configureKepubify(kepubifyConfigurationPath)
else
logger.info { "Kepub conversion unavailable. kepubify path is not set" }
}

@EventListener(SettingChangedEvent.KepubifyPath::class)
Expand All @@ -49,7 +52,10 @@ class KepubConverter(
* @param newValue path to kepubify
* @param fallback whether to fallback to configuration properties in case [newValue] is invalid
*/
fun configureKepubify(newValue: String?, fallback: Boolean = false) {
fun configureKepubify(
newValue: String?,
fallback: Boolean = false,
) {
if (newValue.isNullOrBlank()) {
isAvailable = false
kepubifyPath = null
Expand Down Expand Up @@ -94,23 +100,42 @@ class KepubConverter(
* @throws IllegalArgumentException if the source book is not an EPUB, or is already a KEPUB
* @return the [Path] of the converted file in case of success, else null
*/
fun convertEpubToKepub(bookWithMedia: BookWithMedia, destinationDir: Path? = null): Path? {
check(isAvailable) { "Kepub conversion is not available, kepubify path may not be set, or may be invalid" }
fun convertEpubToKepub(
bookWithMedia: BookWithMedia,
destinationDir: Path? = null,
): Path? {
require(bookWithMedia.media.mediaType == MediaType.EPUB.type) { "Cannot convert, not an EPUB: ${bookWithMedia.book.path}" }
require(!bookWithMedia.media.epubIsKepub) { "Cannot convert, EPUB is already a KEPUB: ${bookWithMedia.book.path}" }
require(bookWithMedia.book.path.exists()) { "Source file does not exist: ${bookWithMedia.book.path}" }
if(destinationDir != null) require(destinationDir.isDirectory()) { "Destination directory does not exist: $destinationDir" }

return convertEpubToKepubWithoutChecks(bookWithMedia.book.path, destinationDir)
}

/**
* Converts an EPUB book to KEPUB. The destination filename will be built from the original file name.
* This function does not check whether the file is an EPUB, or is already a KEPUB, or if the source file exists.
*
* This is intended for internal use in the EpubExtractor
*/
fun convertEpubToKepubWithoutChecks(
epub: Path,
destinationDir: Path? = null,
): Path? {
check(isAvailable) { "Kepub conversion is not available, kepubify path may not be set, or may be invalid" }

if (destinationDir != null) require(destinationDir.isDirectory()) { "Destination directory does not exist: $destinationDir" }

// kepubify will only convert when the destination name has the .kepub.epub extension, so we have to force it
val destinationPath = (destinationDir ?: tmpDir).resolve(bookWithMedia.book.path.nameWithoutExtension + ".kepub.epub")
val destinationPath = (destinationDir ?: tmpDir).resolve(epub.nameWithoutExtension + ".kepub.epub")
destinationPath.deleteIfExists()

val command = arrayOf(
kepubifyPath.toString(),
bookWithMedia.book.path.toString(),
"-o",
destinationPath.toString(),
)
val command =
arrayOf(
kepubifyPath.toString(),
epub.toString(),
"-o",
destinationPath.toString(),
)
logger.debug { "Starting conversion with: ${command.joinToString(" ")}" }
val process = Runtime.getRuntime().exec(command)
if (!process.waitFor(10, TimeUnit.SECONDS)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,21 @@ import org.gotson.komga.domain.model.MediaFile
import org.gotson.komga.domain.model.R2Locator
import org.gotson.komga.domain.model.TypedBytes
import org.gotson.komga.infrastructure.image.ImageAnalyzer
import org.gotson.komga.infrastructure.kobo.KepubConverter
import org.gotson.komga.infrastructure.mediacontainer.ContentDetector
import org.gotson.komga.infrastructure.util.getEntryBytes
import org.gotson.komga.infrastructure.util.getEntryInputStream
import org.gotson.komga.infrastructure.util.getZipEntryBytes
import org.jsoup.Jsoup
import org.jsoup.parser.Parser
import org.springframework.beans.factory.annotation.Value
import org.springframework.stereotype.Service
import org.springframework.web.util.UriUtils
import java.nio.file.Path
import kotlin.io.path.Path
import kotlin.io.path.deleteIfExists
import kotlin.io.path.invariantSeparatorsPathString
import kotlin.math.absoluteValue
import kotlin.math.ceil
import kotlin.math.roundToInt

Expand All @@ -28,6 +32,7 @@ private val logger = KotlinLogging.logger {}
class EpubExtractor(
private val contentDetector: ContentDetector,
private val imageAnalyzer: ImageAnalyzer,
private val kepubConverter: KepubConverter,
@Value("#{@komgaProperties.epubDivinaLetterCountThreshold}") private val letterCountThreshold: Int,
) {
/**
Expand Down Expand Up @@ -77,6 +82,7 @@ class EpubExtractor(
val (resources, missingResources) = getResources(epub).partition { it.fileSize != null }
val isFixedLayout = isFixedLayout(epub)
val pageCount = computePageCount(epub)
val isKepub = isKepub(epub, resources)
EpubManifest(
resources = resources,
missingResources = missingResources,
Expand All @@ -85,9 +91,9 @@ class EpubExtractor(
pageList = getPageList(epub),
pageCount = pageCount,
isFixedLayout = isFixedLayout,
positions = computePositions(resources, isFixedLayout),
positions = computePositions(epub, path, resources, isFixedLayout, isKepub),
divinaPages = getDivinaPages(epub, isFixedLayout, pageCount, analyzeDimensions),
isKepub = isKepub(epub, resources)
isKepub = isKepub,
)
}

Expand Down Expand Up @@ -177,14 +183,14 @@ class EpubExtractor(

private fun isKepub(
epub: EpubPackage,
resources: List<MediaFile>
resources: List<MediaFile>,
): Boolean {
try {
val readingOrder = resources.filter { it.subType == MediaFile.SubType.EPUB_PAGE }

readingOrder.forEach { mediaFile ->
val doc = epub.zip.getEntryInputStream(mediaFile.fileName).use { Jsoup.parse(it, null, "") }
if(!doc.getElementsByClass("koboSpan").isNullOrEmpty()) return true
if (!doc.getElementsByClass("koboSpan").isNullOrEmpty()) return true
}
} catch (e: Exception) {
logger.warn(e) { "Error while checking if EPUB is KEPUB" }
Expand All @@ -206,40 +212,105 @@ class EpubExtractor(
epub.opfDoc.selectFirst("metadata > *|meta[name=fixed-layout]")?.attr("content") == "true"

private fun computePositions(
epub: EpubPackage,
path: Path,
resources: List<MediaFile>,
isFixedLayout: Boolean,
isKepub: Boolean,
): List<R2Locator> {
val readingOrder = resources.filter { it.subType == MediaFile.SubType.EPUB_PAGE }

var startPosition = 1

val koboPositions =
when {
isFixedLayout -> emptyMap()
isKepub -> computePositionsFromKoboSpan(readingOrder) { filename -> epub.zip.getEntryInputStream(filename).use { it.readBytes().decodeToString() } }
kepubConverter.isAvailable -> {
try {
val kepub =
kepubConverter.convertEpubToKepubWithoutChecks(path)
?.also { it.toFile().deleteOnExit() }
// if the conversion failed, throw an exception that will be caught in the catch block
?: throw IllegalStateException()
val positions = computePositionsFromKoboSpan(readingOrder) { filename -> getZipEntryBytes(kepub, filename).decodeToString() }
kepub.deleteIfExists()
positions
} catch (e: Exception) {
logger.warn { "Could not convert to Kepub to compute positions: $path" }
emptyMap()
}
}

else -> emptyMap()
}

val positions =
if (isFixedLayout) {
// for fixed-layout book we create 1 position per page
readingOrder.map {
R2Locator(
href = it.fileName,
type = it.mediaType ?: "application/octet-stream",
koboSpan = "kobo.1.1",
locations = R2Locator.Location(progression = 0F, position = startPosition++),
)
}
} else {
// this is the Readium algorithm
// we create 1 position every 1024 bytes
readingOrder.flatMap { file ->
val positionCount = maxOf(1, ceil((file.fileSize ?: 0) / 1024.0).roundToInt())
(0 until positionCount).map { p ->
val progression = p.toFloat() / positionCount
val koboSpan =
if (positionCount == 1 || p == 0)
"kobo.1.1"
else
koboPositions[file.fileName]
?.minByOrNull { (progression - it.second).absoluteValue }
?.first

R2Locator(
href = file.fileName,
type = file.mediaType ?: "application/octet-stream",
locations = R2Locator.Location(progression = p.toFloat() / positionCount, position = startPosition++),
locations = R2Locator.Location(progression = progression, position = startPosition++),
koboSpan = koboSpan,
)
}
}
}

// finally we compute the total progression for each position
return positions.map { locator ->
val totalProgression = locator.locations?.position?.let { it.toFloat() / positions.size }
locator.copy(locations = locator.locations?.copy(totalProgression = totalProgression))
}
}

/**
* Builds the positions for a KEPUB book, based on koboSpan tags.
* @return a [Map] where the key is the resource name, and the value is a [List] of [Pair] containing the koboSpan ID and the progression as a Float between 0 and 1.
*/
private fun computePositionsFromKoboSpan(
readingOrder: List<MediaFile>,
resourceSupplier: (String) -> String,
): Map<String, List<Pair<String, Float>>> =
readingOrder.associate { file ->
val doc = Jsoup.parse(resourceSupplier(file.fileName), Parser.htmlParser().setTrackPosition(true))
file.fileName to
doc.select("span.koboSpan").mapNotNull { koboSpan ->
val id = koboSpan.id()
if (!id.isNullOrBlank()) {
// progression is built from the position in the file of each koboSpan, divided by the file size
val progression = koboSpan.sourceRange().endPos().toFloat() / file.fileSize!!.toFloat()
Pair(id, progression)
} else {
null
}
}
}

private fun getToc(epub: EpubPackage): List<EpubTocEntry> {
// Epub 3
epub.getNavResource()?.let { return processNav(it, Epub3Nav.TOC) }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import java.nio.file.Path
inline fun <R> ZipFile.Builder.use(block: (ZipFile) -> R) = this.get().use(block)

fun ZipFile.getEntryInputStream(entryName: String): InputStream = this.getInputStream(this.getEntry(entryName))

fun ZipFile.getEntryBytes(entryName: String): ByteArray = this.getInputStream(this.getEntry(entryName)).use { it.readBytes() }

fun getZipEntryBytes(
Expand Down
Loading

0 comments on commit df11920

Please sign in to comment.