Skip to content

Commit

Permalink
fix: Add filtering logic to exclude certain torrents during DMM scraping
Browse files Browse the repository at this point in the history
- Introduced `WipeSomeTissue` method to filter out torrents with specific criteria.
- Updated both batched and unbatched processing methods to apply the new filtering logic before storing torrent info.
  • Loading branch information
iPromKnight committed Nov 16, 2024
1 parent f710926 commit 50612a7
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions src/Zilean.DmmScraper/Features/Dmm/DmmScraping.cs
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,9 @@ await AnsiConsole.Progress()

logger.LogInformation("Distinct torrents: {Count}", distinctTorrents.Count);

var finalizedTorrents = await parseTorrentNameService.ParseAndPopulateAsync(distinctTorrents);
var parsedTorrents = await parseTorrentNameService.ParseAndPopulateAsync(distinctTorrents);

var finalizedTorrents = parsedTorrents.Where(WipeSomeTissue).ToList();

await torrentInfoService.StoreTorrentInfo(finalizedTorrents);
}
Expand Down Expand Up @@ -150,7 +152,9 @@ await Parallel.ForEachAsync(files, parallelOptions, async (file, ct) =>
{
var distinctTorrents = torrents.DistinctBy(x => x.InfoHash).ToList();

var finalizedTorrents = await parseTorrentNameService.ParseAndPopulateAsync(distinctTorrents);
var parsedTorrents = await parseTorrentNameService.ParseAndPopulateAsync(distinctTorrents);

var finalizedTorrents = parsedTorrents.Where(WipeSomeTissue).ToList();

logger.LogInformation("Parsed {Count} torrents", finalizedTorrents.Count);

Expand Down Expand Up @@ -185,4 +189,8 @@ private static async IAsyncEnumerable<ExtractedDmmEntry> ProcessFileAsync(string
yield return torrent;
}
}

private static bool WipeSomeTissue(TorrentInfo torrent) =>
torrent.RawTitle.Contains(" XXX ", StringComparison.OrdinalIgnoreCase) &&
!torrent.ParsedTitle.Contains("XXX", StringComparison.OrdinalIgnoreCase);
}

0 comments on commit 50612a7

Please sign in to comment.