-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: service discovery and manual config
- Loading branch information
1 parent
79e12e1
commit b39b6a3
Showing
19 changed files
with
297 additions
and
122 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
2 changes: 1 addition & 1 deletion
2
...Scraper/Features/Dmm/DmmFileDownloader.cs → ...r/Features/Ingestion/DmmFileDownloader.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
4 changes: 1 addition & 3 deletions
4
....Scraper/Features/Dmm/DmmPageProcessor.cs → ...er/Features/Ingestion/DmmPageProcessor.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2 changes: 1 addition & 1 deletion
2
...lean.Scraper/Features/Dmm/DmmSyncState.cs → ...craper/Features/Ingestion/DmmSyncState.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
115 changes: 115 additions & 0 deletions
115
src/Zilean.Scraper/Features/Ingestion/GenericIngestionProcessor.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
namespace Zilean.Scraper.Features.Ingestion; | ||
|
||
public class GenericIngestionProcessor( | ||
IHttpClientFactory clientFactory, | ||
ILogger<GenericIngestionProcessor> logger, | ||
ParseTorrentNameService parseTorrentNameService, | ||
TorrentInfoService torrentInfoService, | ||
ZileanConfiguration configuration) | ||
{ | ||
public async Task ProcessTorrentsAsync(string url, CancellationToken cancellationToken = default) | ||
{ | ||
logger.LogInformation("Processing URL: {Url}", url); | ||
|
||
var channel = Channel.CreateBounded<Task<StreamedEntry>>(new BoundedChannelOptions(configuration.Ingestion.MaxChannelSize) | ||
{ | ||
SingleReader = true, | ||
SingleWriter = false, | ||
FullMode = BoundedChannelFullMode.Wait | ||
}); | ||
|
||
var producerTask = ProduceAsync(url, channel.Writer, cancellationToken); | ||
var consumerTask = ConsumeAsync(channel.Reader, configuration.Ingestion.BatchSize, cancellationToken); | ||
await Task.WhenAll(producerTask, consumerTask); | ||
} | ||
|
||
private async Task ProduceAsync(string url, ChannelWriter<Task<StreamedEntry>> writer, CancellationToken cancellationToken = default) | ||
{ | ||
try | ||
{ | ||
var httpClient = clientFactory.CreateClient(); | ||
var response = await httpClient.GetAsync(url, HttpCompletionOption.ResponseHeadersRead, cancellationToken); | ||
response.EnsureSuccessStatusCode(); | ||
|
||
var stream = await response.Content.ReadAsStreamAsync(cancellationToken); | ||
var options = new JsonSerializerOptions | ||
{ | ||
PropertyNameCaseInsensitive = true | ||
}; | ||
|
||
await foreach (var item in JsonSerializer.DeserializeAsyncEnumerable<StreamedEntry>(stream, options, cancellationToken)) | ||
{ | ||
if (item is not null) | ||
{ | ||
await writer.WriteAsync(Task.FromResult(item), cancellationToken); | ||
} | ||
} | ||
} | ||
catch (Exception) | ||
{ | ||
logger.LogWarning("Error processing item"); | ||
throw; | ||
} | ||
finally | ||
{ | ||
writer.Complete(); | ||
} | ||
} | ||
|
||
private async Task ConsumeAsync(ChannelReader<Task<StreamedEntry>> reader, int batchSize, CancellationToken cancellationToken = default) | ||
{ | ||
var batch = new List<Task<StreamedEntry>>(batchSize); | ||
|
||
await foreach (var task in reader.ReadAllAsync(cancellationToken)) | ||
{ | ||
batch.Add(task); | ||
|
||
if (batch.Count < batchSize) | ||
{ | ||
continue; | ||
} | ||
|
||
await ProcessBatch(batch, cancellationToken); | ||
batch.Clear(); | ||
} | ||
|
||
if (batch.Count > 0) | ||
{ | ||
await ProcessBatch(batch, cancellationToken); | ||
} | ||
} | ||
|
||
private async Task ProcessBatch(List<Task<StreamedEntry>> batch, CancellationToken cancellationToken = default) | ||
{ | ||
try | ||
{ | ||
var torrents = new List<ExtractedDmmEntry>(); | ||
|
||
await foreach (var result in Task.WhenEach(batch).WithCancellation(cancellationToken)) | ||
{ | ||
var current = await result; | ||
torrents.Add(ExtractedDmmEntry.FromStreamedEntry(current)); | ||
} | ||
|
||
if (torrents.Count == 0 || cancellationToken.IsCancellationRequested) | ||
{ | ||
return; | ||
} | ||
|
||
logger.LogInformation("Processing batch of {Count} torrents", torrents.Count); | ||
|
||
if (torrents.Count != 0) | ||
{ | ||
var parsedTorrents = await parseTorrentNameService.ParseAndPopulateAsync(torrents); | ||
var finalizedTorrents = parsedTorrents.Where(torrentInfo => torrentInfo.WipeSomeTissue()).ToList(); | ||
logger.LogInformation("Parsed {Count} torrents", finalizedTorrents.Count); | ||
await torrentInfoService.StoreTorrentInfo(finalizedTorrents); | ||
} | ||
} | ||
catch (Exception) | ||
{ | ||
logger.LogWarning("Error processing batch of torrents. Batch size: {BatchSize}", batch.Count); | ||
throw; | ||
} | ||
} | ||
} |
67 changes: 67 additions & 0 deletions
67
src/Zilean.Scraper/Features/Ingestion/GenericIngestionScraping.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
namespace Zilean.Scraper.Features.Ingestion; | ||
|
||
public class GenericIngestionScraping( | ||
ZileanConfiguration configuration, | ||
GenericIngestionProcessor ingestionProcessor, | ||
ILogger<GenericIngestionScraping> logger, | ||
KubernetesServiceDiscovery kubernetesServiceDiscovery) | ||
{ | ||
public async Task<int> Execute(CancellationToken cancellationToken) | ||
{ | ||
logger.LogInformation("Starting ingestion scraping"); | ||
|
||
List<string> urlsToProcess = []; | ||
|
||
if (configuration.Ingestion.Kubernetes.EnableServiceDiscovery) | ||
{ | ||
logger.LogInformation("Discovering URLs from Kubernetes services"); | ||
var urls = await kubernetesServiceDiscovery.DiscoverUrlsAsync(cancellationToken); | ||
logger.LogInformation("Discovered {Count} URLs from Kubernetes services", urls.Count); | ||
urlsToProcess.AddRange(urls); | ||
} | ||
|
||
if (configuration.Ingestion.EnableZurgIngestion) | ||
{ | ||
logger.LogInformation("Adding Zurg instances to the list of URLs to process"); | ||
urlsToProcess.AddRange(configuration.Ingestion.ZurgInstances); | ||
} | ||
|
||
if (configuration.Ingestion.EnableZileanIngestion) | ||
{ | ||
logger.LogInformation("Adding Zilean instances to the list of URLs to process"); | ||
urlsToProcess.AddRange(configuration.Ingestion.ZileanInstances); | ||
} | ||
|
||
if (urlsToProcess.Count == 0) | ||
{ | ||
logger.LogInformation("No URLs to process, exiting"); | ||
return 0; | ||
} | ||
|
||
var completedCount = 0; | ||
|
||
foreach (var url in urlsToProcess) | ||
{ | ||
cancellationToken.ThrowIfCancellationRequested(); | ||
|
||
try | ||
{ | ||
await ingestionProcessor.ProcessTorrentsAsync(url, cancellationToken); | ||
completedCount++; | ||
} | ||
catch (OperationCanceledException) | ||
{ | ||
logger.LogInformation("Ingestion scraping cancelled"); | ||
break; | ||
} | ||
catch (Exception ex) | ||
{ | ||
logger.LogError(ex, "Error processing URL: {Url}", url); | ||
} | ||
} | ||
|
||
logger.LogInformation("Ingestion scraping completed for {Count} URLs", completedCount); | ||
|
||
return 0; | ||
} | ||
} |
Oops, something went wrong.