Skip to content

Commit

Permalink
perf: faster zip entry extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
Snd-R authored Aug 26, 2024
1 parent 6794e24 commit eeb5898
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 15 deletions.
2 changes: 1 addition & 1 deletion komga/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ dependencies {
implementation("com.appmattus.crypto:cryptohash:0.10.1")

implementation("org.apache.tika:tika-core:2.9.1")
implementation("org.apache.commons:commons-compress:1.25.0")
implementation("org.apache.commons:commons-compress:1.27.1")
implementation("com.github.junrar:junrar:7.5.5")
implementation("com.github.gotson.nightcompress:nightcompress:0.2.0")
implementation("org.apache.pdfbox:pdfbox:3.0.1")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import org.gotson.komga.domain.model.MediaContainerEntry
import org.gotson.komga.domain.model.MediaType
import org.gotson.komga.infrastructure.image.ImageAnalyzer
import org.gotson.komga.infrastructure.mediacontainer.ContentDetector
import org.gotson.komga.infrastructure.util.getZipEntryBytes
import org.gotson.komga.infrastructure.util.use
import org.springframework.stereotype.Service
import java.nio.file.Path

Expand All @@ -26,7 +28,7 @@ class ZipExtractor(
path: Path,
analyzeDimensions: Boolean,
): List<MediaContainerEntry> =
ZipFile(path.toFile()).use { zip ->
ZipFile.builder().setPath(path).use { zip ->
zip.entries.toList()
.filter { !it.isDirectory }
.map { entry ->
Expand All @@ -52,8 +54,5 @@ class ZipExtractor(
override fun getEntryStream(
path: Path,
entryName: String,
): ByteArray =
ZipFile(path.toFile()).use { zip ->
zip.getInputStream(zip.getEntry(entryName)).use { it.readBytes() }
}
): ByteArray = getZipEntryBytes(path, entryName)
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package org.gotson.komga.infrastructure.mediacontainer.epub

import org.apache.commons.compress.archivers.zip.ZipFile
import org.gotson.komga.domain.model.MediaUnsupportedException
import org.gotson.komga.infrastructure.util.use
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.jsoup.parser.Parser
Expand All @@ -16,7 +17,7 @@ data class EpubPackage(
)

inline fun <R> Path.epub(block: (EpubPackage) -> R): R =
ZipFile(this.toFile()).use { zip ->
ZipFile.builder().setPath(this).use { zip ->
val opfFile = zip.getPackagePath()
val opfDoc = zip.getInputStream(zip.getEntry(opfFile)).use { Jsoup.parse(it, null, "", Parser.xmlParser()) }
val opfDir = Paths.get(opfFile).parent
Expand All @@ -30,7 +31,7 @@ fun ZipFile.getPackagePath(): String =
}

fun getPackageFile(path: Path): String? =
ZipFile(path.toFile()).use { zip ->
ZipFile.builder().setPath(path).use { zip ->
try {
zip.getInputStream(zip.getEntry(zip.getPackagePath())).reader().use { it.readText() }
} catch (e: Exception) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@ package org.gotson.komga.infrastructure.mediacontainer.epub

import io.github.oshai.kotlinlogging.KotlinLogging
import org.apache.commons.compress.archivers.ArchiveEntry
import org.apache.commons.compress.archivers.zip.ZipFile
import org.gotson.komga.domain.model.BookPage
import org.gotson.komga.domain.model.EntryNotFoundException
import org.gotson.komga.domain.model.EpubTocEntry
import org.gotson.komga.domain.model.MediaFile
import org.gotson.komga.domain.model.R2Locator
import org.gotson.komga.domain.model.TypedBytes
import org.gotson.komga.infrastructure.image.ImageAnalyzer
import org.gotson.komga.infrastructure.mediacontainer.ContentDetector
import org.gotson.komga.infrastructure.util.getZipEntryBytes
import org.jsoup.Jsoup
import org.springframework.beans.factory.annotation.Value
import org.springframework.stereotype.Service
Expand All @@ -35,11 +34,7 @@ class EpubExtractor(
fun getEntryStream(
path: Path,
entryName: String,
): ByteArray =
ZipFile(path.toFile()).use { zip ->
zip.getEntry(entryName)?.let { entry -> zip.getInputStream(entry).use { it.readBytes() } }
?: throw EntryNotFoundException("Entry does not exist: $entryName")
}
): ByteArray = getZipEntryBytes(path, entryName)

fun isEpub(path: Path): Boolean =
try {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package org.gotson.komga.infrastructure.util

import org.apache.commons.compress.archivers.zip.ZipFile
import org.gotson.komga.domain.model.EntryNotFoundException
import java.nio.file.Path

inline fun <R> ZipFile.Builder.use(block: (ZipFile) -> R) = this.get().use(block)

fun getZipEntryBytes(
path: Path,
entryName: String,
): ByteArray {
// fast path. Only read central directory record and try to find entry in it
val zipBuilder =
ZipFile.builder()
.setPath(path)
.setUseUnicodeExtraFields(true)
.setIgnoreLocalFileHeader(true)
val bytes = zipBuilder.use { it.getEntryBytes(entryName) }
if (bytes != null) return bytes

// slow path. Entry with that name wasn't in central directory record
// Iterate each entry and, if present, set name from Unicode extra field in local file header
return zipBuilder.setIgnoreLocalFileHeader(false).use {
it.getEntryBytes(entryName)
?: throw EntryNotFoundException("Entry does not exist: $entryName")
}
}

private fun ZipFile.getEntryBytes(entryName: String) =
this.use { zip ->
zip.getEntry(entryName)?.let { entry ->
zip.getInputStream(entry).use { it.readBytes() }
}
}

0 comments on commit eeb5898

Please sign in to comment.