Skip to content

Commit

Permalink
Improve file type detection in classifier (#365)
Browse files Browse the repository at this point in the history
  • Loading branch information
mgdigital authored Jan 8, 2025
1 parent 1d0034c commit 0b51d37
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 5 deletions.
48 changes: 43 additions & 5 deletions internal/classifier/classifier.core.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ workflows:
- if_else:
condition:
and:
- "torrent.files.map(f, f.fileType == fileType.audio ? f.size : - f.size).sum() > 50*mb"
- "torrent.files.map(f, f.extension in extensions.audio ? f.size : - f.size).sum() > 50*mb"
- or:
- "torrent.baseName.matches(keywords.audiobook)"
- "torrent.files.filter(f, f.extension in extensions.audiobook).size() > 0"
Expand All @@ -40,19 +40,22 @@ workflows:
- "torrent.files.map(f, f.extension in extensions.music ? f.size : - f.size).sum() > 0"
- and:
- "torrent.baseName.matches(keywords.music)"
- "torrent.files.map(f, f.fileType == fileType.audio ? f.size : - f.size).sum() > 0"
- "torrent.files.map(f, f.extension in extensions.audio ? f.size : - f.size).sum() > 0"
if_action:
set_content_type: music
else_action: unmatched
# match software:
- if_else:
condition: "torrent.files.map(f, f.fileType == fileType.software ? f.size : - f.size).sum() > 0"
condition: "torrent.files.map(f, f.extension in extensions.software ? f.size : - f.size).sum() > 0"
if_action:
set_content_type: software
else_action: unmatched
# match xxx:
- if_else:
condition: "([torrent.baseName] + torrent.files.map(f, f.basePath)).join(' ').matches(keywords.xxx)"
condition:
and:
- "torrent.files.map(f, (f.extension in extensions.video || f.extension in extensions.image) ? f.size : - f.size).sum() > 0"
- "([torrent.baseName] + torrent.files.map(f, f.basePath)).join(' ').matches(keywords.xxx)"
if_action:
set_content_type: xxx
else_action: unmatched
Expand All @@ -78,7 +81,7 @@ workflows:
condition:
or:
- "result.contentType in [contentType.movie, contentType.tv_show]"
- "torrent.files.map(f, f.fileType == fileType.video ? f.size : - f.size).sum() > 100*mb"
- "torrent.files.map(f, f.extension in extensions.video ? f.size : - f.size).sum() > 100*mb"
if_action:
find_match:
# parse video-related attributes from the torrent name;
Expand All @@ -105,6 +108,16 @@ workflows:
- "flags.delete_xxx && result.contentType == contentType.xxx"
if_action: delete
extensions:
audio:
- mp3
- wav
- flac
- aac
- ogg
- m4a
- m4b
- mid
- dsf
audiobook:
- m4b
comic:
Expand All @@ -128,6 +141,18 @@ extensions:
- odt
- pdf
- rtf
image:
- jpg
- jpeg
- png
- gif
- bmp
- svg
- dds
- psd
- tif
- tiff
- ico
music:
- ape
- dsf
Expand All @@ -149,6 +174,19 @@ extensions:
- pkg
- rpm
- sh
video:
- mp4
- mkv
- avi
- iso
- mov
- wmv
- flv
- m4v
- mpg
- mpeg
- ts
- vob
keywords:
audiobook:
- audiobooks?
Expand Down
1 change: 1 addition & 0 deletions internal/model/file_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ var extensionToFileTypeMap = map[string]FileType{
"m4v": FileTypeVideo,
"mpg": FileTypeVideo,
"mpeg": FileTypeVideo,
"ts": FileTypeVideo,
"vob": FileTypeVideo,
}

Expand Down

0 comments on commit 0b51d37

Please sign in to comment.