From abea240e735775d1bc53601e64ff4ed1f9886b05 Mon Sep 17 00:00:00 2001 From: Daniel Boline Date: Mon, 10 Jun 2024 08:32:40 -0400 Subject: [PATCH] Add subtitle extractor (#22) * subtitle extractor --- Cargo.toml | 2 +- movie_collection_http/Cargo.toml | 2 +- movie_collection_http/src/movie_queue_app.rs | 24 +-- .../src/movie_queue_elements.rs | 46 ++++- .../src/movie_queue_routes.rs | 40 ++++ movie_collection_lib/Cargo.toml | 2 +- movie_collection_lib/src/lib.rs | 1 + movie_collection_lib/src/make_list.rs | 36 +++- movie_collection_lib/src/mkv_utils.rs | 175 ++++++++++++++++++ templates/scripts.js | 14 ++ tests/data/mkvinfo_output.txt | 76 ++++++++ transcode_lib/Cargo.toml | 2 +- 12 files changed, 396 insertions(+), 24 deletions(-) create mode 100644 movie_collection_lib/src/mkv_utils.rs create mode 100644 tests/data/mkvinfo_output.txt diff --git a/Cargo.toml b/Cargo.toml index e3341a9..e810231 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "movie_collection_rust" -version = "0.10.32" +version = "0.10.33" authors = ["Daniel Boline "] edition = "2018" diff --git a/movie_collection_http/Cargo.toml b/movie_collection_http/Cargo.toml index 69f4da6..9e108c4 100644 --- a/movie_collection_http/Cargo.toml +++ b/movie_collection_http/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "movie_collection_http" -version = "0.10.32" +version = "0.10.33" authors = ["Daniel Boline "] edition = "2018" diff --git a/movie_collection_http/src/movie_queue_app.rs b/movie_collection_http/src/movie_queue_app.rs index 1ff9937..e7f8c36 100644 --- a/movie_collection_http/src/movie_queue_app.rs +++ b/movie_collection_http/src/movie_queue_app.rs @@ -30,16 +30,16 @@ use super::{ find_new_episodes, frontpage, imdb_episodes_route, imdb_episodes_update, imdb_ratings_route, imdb_ratings_set_source, imdb_ratings_update, imdb_show, last_modified_route, movie_collection_route, movie_collection_update, movie_queue, - movie_queue_delete, movie_queue_play, movie_queue_remcom_directory_file, - movie_queue_remcom_file, movie_queue_route, movie_queue_show, movie_queue_transcode, - movie_queue_transcode_cleanup, movie_queue_transcode_directory, movie_queue_transcode_file, - movie_queue_transcode_status, movie_queue_transcode_status_file_list, - movie_queue_transcode_status_procs, movie_queue_update, music_collection, - music_collection_update, plex_detail, plex_events, plex_events_update, plex_filename, - plex_filename_update, plex_list, plex_metadata, plex_metadata_update, plex_webhook, - refresh_auth, scripts_js, trakt_auth_url, trakt_cal, trakt_callback, trakt_watched_action, - trakt_watched_list, trakt_watched_seasons, trakt_watchlist, trakt_watchlist_action, - tvshows, user, + movie_queue_delete, movie_queue_extract_subtitle, movie_queue_play, + movie_queue_remcom_directory_file, movie_queue_remcom_file, movie_queue_route, + movie_queue_show, movie_queue_transcode, movie_queue_transcode_cleanup, + movie_queue_transcode_directory, movie_queue_transcode_file, movie_queue_transcode_status, + movie_queue_transcode_status_file_list, movie_queue_transcode_status_procs, + movie_queue_update, music_collection, music_collection_update, plex_detail, plex_events, + plex_events_update, plex_filename, plex_filename_update, plex_list, plex_metadata, + plex_metadata_update, plex_webhook, refresh_auth, scripts_js, trakt_auth_url, trakt_cal, + trakt_callback, trakt_watched_action, trakt_watched_list, trakt_watched_seasons, + trakt_watchlist, trakt_watchlist_action, tvshows, user, }, }; @@ -138,6 +138,7 @@ fn get_full_path(app: &AppState) -> BoxedFilter<(impl Reply,)> { let plex_metadata_update_path = plex_metadata_update(app.clone()).boxed(); let music_collection_path = music_collection(app.clone()).boxed(); let music_collection_update_path = music_collection_update(app.clone()).boxed(); + let movie_queue_extract_subtitle_path = movie_queue_extract_subtitle(app.clone()).boxed(); let list_path = frontpage_path .or(scripts_js_path) @@ -166,7 +167,8 @@ fn get_full_path(app: &AppState) -> BoxedFilter<(impl Reply,)> { .or(plex_metadata_path) .or(plex_metadata_update_path) .or(music_collection_path) - .or(music_collection_update_path); + .or(music_collection_update_path) + .or(movie_queue_extract_subtitle_path); let auth_url_path = trakt_auth_url(app.clone()).boxed(); let trakt_callback_path = trakt_callback(app.clone()).boxed(); let refresh_auth_path = refresh_auth(app.clone()).boxed(); diff --git a/movie_collection_http/src/movie_queue_elements.rs b/movie_collection_http/src/movie_queue_elements.rs index a4858e7..207860d 100644 --- a/movie_collection_http/src/movie_queue_elements.rs +++ b/movie_collection_http/src/movie_queue_elements.rs @@ -2081,11 +2081,14 @@ fn LocalFileElement( .iter() .enumerate() .map(|(idx, f)| { - let f_key = f - .replace(".mkv", "") - .replace(".m4v", "") - .replace(".avi", "") - .replace(".mp4", ""); + let mut f_key = f.clone(); + for suffix in [".mkv", ".m4v", ".avi", ".mp4"] { + if let Some(s) = f_key.strip_suffix(suffix) { + f_key = s.into(); + break; + } + } + let mut subtitle_selector = None; let button = if file_map.contains_key(f_key.as_str()) { rsx! { button { @@ -2134,6 +2137,38 @@ fn LocalFileElement( None => rsx! {"unknown"}, } } else { + if let Some(subtitles) = file_lists.subtitles.get(f.as_str()) { + let nlines = subtitles.iter().find_map(|(_, n)| *n); + let options = subtitles.iter().enumerate().map(|(i, (s, _))| { + let mkv_number = s.number; + let label = + format_sstr!("{} {} {} {}", s.number, s.language, s.codec_id, s.name); + rsx! { + option { + key: "subtitle-key-{f}-{i}", + value: "{mkv_number}", + "{label}" + } + } + }); + let title = if let Some(n) = nlines { + format_sstr!("re-extract subtitles {n}") + } else { + "extract subtitles".into() + }; + subtitle_selector.replace(rsx! { + select { + id: "subtitle-selector-{f}", + {options}, + }, + button { + "type": "submit", + id: "subtitle-button-{f}", + "onclick": "extract_subtitles('{f}')", + "{title}", + } + }); + } rsx! { button { "type": "submit", @@ -2149,6 +2184,7 @@ fn LocalFileElement( key: "flist-key-{idx}", td {"{f}"}, td {{button}}, + td {{subtitle_selector}}, } } }); diff --git a/movie_collection_http/src/movie_queue_routes.rs b/movie_collection_http/src/movie_queue_routes.rs index fd61a93..3ad715e 100644 --- a/movie_collection_http/src/movie_queue_routes.rs +++ b/movie_collection_http/src/movie_queue_routes.rs @@ -29,6 +29,7 @@ use movie_collection_lib::{ imdb_episodes::ImdbEpisodes, imdb_ratings::ImdbRatings, make_list::FileLists, + mkv_utils::MkvTrack, movie_collection::{LastModifiedResponse, MovieCollection, TvShowsResult}, movie_queue::{MovieQueueDB, MovieQueueResult}, music_collection::MusicCollection, @@ -1869,3 +1870,42 @@ pub async fn music_collection_update( task.await.ok(); Ok(HtmlBase::new("Success").into()) } + +#[derive(RwebResponse)] +#[response( + description = "Extract Subtitles", + content = "html", + status = "Created" +)] +struct ExtractSubtitlesResponse(HtmlBase); + +#[post("/list/transcode/subtitle/{filename}/{index}")] +pub async fn movie_queue_extract_subtitle( + filename: StackString, + index: u64, + #[filter = "LoggedUser::filter"] user: LoggedUser, + #[data] state: AppState, +) -> WarpResult { + let task = user + .store_url_task( + state.trakt.get_client(), + &state.config, + &format_sstr!("/list/transcode/subtitle/{filename}/{index}"), + ) + .await; + + let input_path = state + .config + .home_dir + .join("Documents") + .join("movies") + .join(&filename); + + let input_file: StackString = input_path.to_string_lossy().into(); + let output = MkvTrack::extract_subtitles_from_mkv(&input_file, index) + .await + .map_err(Into::::into)?; + task.await.ok(); + + Ok(HtmlBase::new(output).into()) +} diff --git a/movie_collection_lib/Cargo.toml b/movie_collection_lib/Cargo.toml index ac627e0..8129c29 100644 --- a/movie_collection_lib/Cargo.toml +++ b/movie_collection_lib/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "movie_collection_lib" -version = "0.10.32" +version = "0.10.33" authors = ["Daniel Boline "] edition = "2018" diff --git a/movie_collection_lib/src/lib.rs b/movie_collection_lib/src/lib.rs index f6c50f0..fb3248e 100644 --- a/movie_collection_lib/src/lib.rs +++ b/movie_collection_lib/src/lib.rs @@ -15,6 +15,7 @@ pub mod imdb_ratings; pub mod imdb_utils; pub mod make_list; pub mod make_queue; +pub mod mkv_utils; pub mod movie_collection; pub mod movie_queue; pub mod music_collection; diff --git a/movie_collection_lib/src/make_list.rs b/movie_collection_lib/src/make_list.rs index 0b50da4..5eb3950 100644 --- a/movie_collection_lib/src/make_list.rs +++ b/movie_collection_lib/src/make_list.rs @@ -18,6 +18,7 @@ use tokio_stream::{wrappers::ReadDirStream, StreamExt}; use crate::{ config::Config, + mkv_utils::{MkvTrack, TrackType}, movie_collection::MovieCollection, pgpool::PgPool, transcode_service::transcode_status, @@ -28,6 +29,7 @@ use crate::{ pub struct FileLists { pub local_file_list: Vec, pub file_list: Vec, + pub subtitles: HashMap)>>, } impl FileLists { @@ -71,10 +73,12 @@ impl FileLists { let patterns: Vec<_> = local_file_list .iter() .map(|f| { - f.replace(".mkv", "") - .replace(".m4v", "") - .replace(".avi", "") - .replace(".mp4", "") + for suffix in [".mkv", ".m4v", ".avi", ".mp4"] { + if let Some(s) = f.strip_suffix(suffix) { + return s.into(); + } + } + f.clone() }) .collect(); @@ -107,9 +111,33 @@ impl FileLists { file_list? }; + let mut subtitles: HashMap)>> = HashMap::new(); + + for f in &local_file_list { + if f.ends_with(".mkv") { + let movies_dir = config.home_dir.join("Documents").join("movies"); + let full_path = movies_dir.join(f); + let srt_path = full_path.with_extension("srt"); + let mut nlines: Option = None; + if srt_path.exists() { + nlines.replace(fs::read_to_string(srt_path).await?.split('\n').count()); + } + let full_path: StackString = full_path.to_string_lossy().into(); + for track in MkvTrack::get_subtitles_from_mkv(&full_path).await? { + if track.track_type == Some(TrackType::Subtitles) { + subtitles + .entry(f.clone()) + .or_default() + .push((track, nlines)); + } + } + } + } + Ok(Self { local_file_list, file_list, + subtitles, }) } diff --git a/movie_collection_lib/src/mkv_utils.rs b/movie_collection_lib/src/mkv_utils.rs new file mode 100644 index 0000000..15daced --- /dev/null +++ b/movie_collection_lib/src/mkv_utils.rs @@ -0,0 +1,175 @@ +use anyhow::{format_err, Error}; +use stack_string::{format_sstr, StackString}; +use std::fmt; +use tokio::process::Command; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TrackType { + Video, + Audio, + Subtitles, +} + +impl TrackType { + fn from_str(s: &str) -> Option { + match s { + "video" => Some(Self::Video), + "audio" => Some(Self::Audio), + "subtitles" => Some(Self::Subtitles), + _ => None, + } + } + + fn to_str(self) -> &'static str { + match self { + Self::Video => "video", + Self::Audio => "audio", + Self::Subtitles => "subtitles", + } + } +} + +impl fmt::Display for TrackType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.to_str()) + } +} + +#[derive(Debug, Default, PartialEq, Eq, Clone)] +pub struct MkvTrack { + pub number: u64, + pub track_type: Option, + pub codec_id: StackString, + pub name: StackString, + pub language: StackString, +} + +impl MkvTrack { + fn parse_mkvinfo(input: &str) -> Vec { + let mut tracks = Vec::new(); + let mut current_track: Option = None; + + for line in input.split('\n') { + if line.starts_with("| + Track") { + if let Some(track) = current_track.replace(Self::default()) { + tracks.push(track); + } + } else if line.starts_with("| + Track number: ") { + if let Some(entry) = line + .strip_prefix("| + Track number: ") + .and_then(|l| l.split_ascii_whitespace().next()) + { + if let Some(track) = &mut current_track { + if let Ok(n) = entry.parse() { + track.number = n; + } + } + } + } else if line.starts_with("| + Track type: ") { + if let Some(entry) = line.strip_prefix("| + Track type: ") { + if let Some(track_type) = TrackType::from_str(entry.trim()) { + if let Some(track) = &mut current_track { + track.track_type = Some(track_type); + } + } + } + } else if line.starts_with("| + Codec ID: ") { + if let Some(entry) = line.strip_prefix("| + Codec ID: ") { + if let Some(track) = &mut current_track { + track.codec_id = entry.trim().into(); + } + } + } else if line.starts_with("| + Name: ") { + if let Some(entry) = line.strip_prefix("| + Name: ") { + if let Some(track) = &mut current_track { + track.name = entry.trim().into(); + } + } + } else if line.starts_with("| + Language: ") { + if let Some(entry) = line.strip_prefix("| + Language: ") { + if let Some(track) = &mut current_track { + track.language = entry.trim().into(); + } + } + } + } + if let Some(track) = current_track.take() { + tracks.push(track); + } + + tracks + } + + /// # Errors + /// Return error if fpath doesn't end in mkv, or if output of mkvinfo is not + /// utf8 + pub async fn get_subtitles_from_mkv(fpath: &str) -> Result, Error> { + if !fpath.to_lowercase().ends_with(".mkv") { + return Err(format_err!("Filename must end in mkv")); + } + + let output = Command::new("/usr/bin/mkvinfo") + .args([fpath]) + .output() + .await?; + if !output.status.success() && output.status.code() != Some(1) { + return Err(format_err!( + "Process exited with error {:?}", + output.status.code() + )); + } + let output = StackString::from_utf8_vec(output.stdout)?; + + let tracks = Self::parse_mkvinfo(&output); + + Ok(tracks) + } + + /// # Errors + /// Return error if fpath doesn't end in mkv, or if output of mkvinfo is not + /// utf8 + pub async fn extract_subtitles_from_mkv(fpath: &str, index: u64) -> Result { + let fname = fpath + .strip_suffix(".mkv") + .ok_or_else(|| format_err!("Wrong suffix"))?; + let srt_path = format_sstr!("{fname}.srt"); + if index < 1 { + return Err(format_err!("Index must be greater than 0")); + } + let output = Command::new("/usr/bin/mkvextract") + .args([fpath, "tracks", &format_sstr!("{}:{srt_path}", index - 1)]) + .output() + .await?; + if !output.status.success() && output.status.code() != Some(1) { + return Err(format_err!( + "Process exited with error {:?}", + output.status.code() + )); + } + let output = StackString::from_utf8_vec(output.stdout)?; + Ok(output) + } +} + +#[cfg(test)] +mod tests { + use anyhow::Error; + + use crate::mkv_utils::{MkvTrack, TrackType}; + + #[test] + fn test_parse_mkvinfo() -> Result<(), Error> { + let input = include_str!("../../tests/data/mkvinfo_output.txt"); + let tracks = MkvTrack::parse_mkvinfo(input); + assert_eq!(tracks.len(), 3); + for track in &tracks { + if track.track_type == Some(TrackType::Subtitles) { + assert_eq!(track.number, 3); + assert_eq!(track.codec_id.as_str(), "S_TEXT/UTF8"); + assert_eq!(track.name.as_str(), "Scarface - YIFY"); + assert_eq!(track.language.as_str(), "eng"); + } + } + Ok(()) + } +} diff --git a/templates/scripts.js b/templates/scripts.js index 67ef4c0..a1a58f3 100644 --- a/templates/scripts.js +++ b/templates/scripts.js @@ -241,3 +241,17 @@ function sourceWatchlist(link) { let url = `${link}&source=${source}`; updateMainArticle(url); } +function extract_subtitles(file) { + let index = document.getElementById(`subtitle-selector-${file}`).value; + let url = `/list/transcode/subtitle/${file}/${index}`; + let xmlhttp = new XMLHttpRequest(); + xmlhttp.open("POST", url, true); + xmlhttp.onload = function nothing() { + update_procs(); + update_file_list(); + document.getElementById("remcomoutput").innerHTML = " "; + } + xmlhttp.send(null); + let out = `extract ${index} from ${file}`; + document.getElementById("remcomoutput").innerHTML = out; +} \ No newline at end of file diff --git a/tests/data/mkvinfo_output.txt b/tests/data/mkvinfo_output.txt new file mode 100644 index 0000000..cc20142 --- /dev/null +++ b/tests/data/mkvinfo_output.txt @@ -0,0 +1,76 @@ ++ EBML head +|+ Document type: matroska +|+ Document type version: 1 +|+ Document type read version: 1 ++ Segment: size 733901177 +|+ Seek head (subentries will be skipped) +|+ EBML void: size 4027 +|+ Segment information +| + Timestamp scale: 1000000 +| + Multiplexing application: libebml v0.7.7 + libmatroska v0.8.1 +| + Writing application: mkvmerge v2.2.0 ('Turn It On Again') built on Mar 4 2008 13:20:25 +| + Duration: 02:44:58.368000000 +| + Date: 2010-11-09 21:18:58 UTC +| + Segment UID: 0xad 0xe6 0xc3 0x8a 0xad 0xde 0x51 0xc6 0x92 0x41 0x11 0x0f 0x9e 0x69 0x72 0x17 +|+ Tracks +| + Track +| + Track number: 1 (track ID for mkvmerge & mkvextract: 0) +| + Track UID: 1 +| + Track type: video +| + "Enabled" flag: 1 +| + "Default track" flag: 1 +| + "Forced display" flag: 0 +| + "Lacing" flag: 0 +| + Minimum cache: 1 +| + Timestamp scale: 1 +| + Maximum block additional ID: 0 +| + Codec ID: V_MPEG4/ISO/AVC +| + Codec decode all: 1 +| + Codec's private data: size 40 (H.264 profile: High @L4.0) +| + Default duration: 00:00:00.041708398 (23.976 frames/fields per second for a video track) +| + Language: eng +| + Name: scOrp @ 300mbunited.com +| + Video track +| + Pixel width: 1280 +| + Pixel height: 544 +| + Interlaced: 0 +| + Display width: 40 +| + Display height: 17 +| + Track +| + Track number: 2 (track ID for mkvmerge & mkvextract: 1) +| + Track UID: 1200562060 +| + Track type: audio +| + "Enabled" flag: 1 +| + "Default track" flag: 1 +| + "Forced display" flag: 0 +| + "Lacing" flag: 1 +| + Minimum cache: 0 +| + Timestamp scale: 1 +| + Maximum block additional ID: 0 +| + Codec ID: A_AAC +| + Codec decode all: 1 +| + Codec's private data: size 7 +| + Default duration: 00:00:00.042666666 (23.438 frames/fields per second for a video track) +| + Language: und +| + Name: scOrp @ 300mbunited.com +| + Audio track +| + Sampling frequency: 24000 +| + Channels: 2 +| + Output sampling frequency: 48000 +| + Track +| + Track number: 3 (track ID for mkvmerge & mkvextract: 2) +| + Track UID: 267228297 +| + Track type: subtitles +| + "Enabled" flag: 1 +| + "Default track" flag: 1 +| + "Forced display" flag: 0 +| + "Lacing" flag: 0 +| + Minimum cache: 0 +| + Timestamp scale: 1 +| + Maximum block additional ID: 0 +| + Codec ID: S_TEXT/UTF8 +| + Codec decode all: 1 +| + Language: eng +| + Name: Scarface - YIFY +|+ EBML void: size 1024 +|+ Cluster diff --git a/transcode_lib/Cargo.toml b/transcode_lib/Cargo.toml index 27e12d8..6963bd3 100644 --- a/transcode_lib/Cargo.toml +++ b/transcode_lib/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "transcode_lib" -version = "0.10.32" +version = "0.10.33" authors = ["Daniel Boline "] edition = "2018"