Skip to content

Commit

Permalink
tweaks and freaks
Browse files Browse the repository at this point in the history
  • Loading branch information
ilude committed Apr 18, 2024
1 parent 75971d2 commit 2216527
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 38 deletions.
34 changes: 10 additions & 24 deletions app/models/feed.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,12 @@ def __init__(self, widget) -> None:
self.cache_path.parent.mkdir(parents=True, exist_ok=True)

items = self.load_cache(self.cache_path)
self.items = self.filter_removed_objects(items, self.display_limit)
self.items = items[:self.display_limit] if items else []
if self.items:
self._last_updated = datetime.fromtimestamp(os.path.getmtime(self.cache_path))

# logger.debug(f"creating cron job for {self.name}")
# self.scheduler.add_job(self.update, 'cron', name=f'{self.id} - {self.name} - cron', hour='*', jitter=20, max_instances=1)
logger.debug(f"creating cron job for {self.name}")
self.scheduler.add_job(self.update, 'cron', name=f'{self.id} - {self.name} - cron', hour='*', jitter=20, max_instances=1)

if self.needs_update or self.old_cache_path.exists() or self.name == "Instapundit":
# schedule job to run right now
Expand Down Expand Up @@ -103,28 +103,12 @@ def feed_url(self, url: str):
self._url = url
self.id = calculate_sha1_hash(url)

def filter_removed_objects(self, articles: list['FeedArticle'], display_limit: int=None):
"""
Filters a list of objects and returns a new list with objects where 'removed' is False.
Parameters:
objects_list (list): A list of objects with a 'removed' property.
display_limit (int, optional): The maximum number of objects to return. If not provided, all objects are returned.
Returns:
list: A new list with objects where 'removed' is False, up to the specified display limit.
"""
filtered_objects = list(filter(lambda obj: not obj.removed, articles))

if display_limit is not None:
return filtered_objects[:display_limit]
else:
return filtered_objects


def update(self):
articles = self.download(self.feed_url)
articles = self.save_articles(articles)
self.items = self.filter_removed_objects(articles, self.display_limit)
self.items = articles[:self.display_limit]
self._last_updated = datetime.now()
logging.debug(f"Updated {self.name}")

Expand Down Expand Up @@ -193,6 +177,7 @@ def processors(self, articles: list[FeedArticle]) -> list[FeedArticle]:
return articles



def remove_duplicate_articles(self, articles):
"""
Removes articles with duplicate IDs, keeping the one with the 'processed' attribute set if it exists.
Expand All @@ -203,6 +188,10 @@ def remove_duplicate_articles(self, articles):
Returns:
list: A new list with articles where duplicate IDs have been removed, keeping the one with 'processed' set.
"""

# Filters a list of objects and returns a new list with objects where 'removed' is False.
articles = list(filter(lambda obj: not obj.removed, articles))

# Create a dictionary to group articles by their ID
article_dict = defaultdict(list)
for article in articles:
Expand All @@ -226,9 +215,6 @@ def save_articles(self, articles: list[FeedArticle]):

# using article.id remove duplicates from articles
all_articles = self.remove_duplicate_articles(all_articles)

#all_articles = self.apply_filters(all_articles)
all_articles = self.filter_removed_objects(all_articles)

all_articles = self.processors(all_articles)

Expand Down
23 changes: 9 additions & 14 deletions app/processors/title_editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,26 +48,21 @@ def __init__(self):
self.chain = chat_prompt | model | parser

self.script_hash = calculate_sha1_hash(f"{system_prompt.content}{model_name}{model_temp}")

def set_processed(self, article, script_hash):
article.processed = script_hash
return article

def process(self, articles: list[FeedArticle]) -> list[FeedArticle]:
if self.ollama_url:

needs_processed = list(filter(lambda article: article.processed != self.script_hash, articles))

total = len(needs_processed)
try:
for count, article in enumerate(needs_processed, start=1):
result = self.chain.invoke({"title": article.original_title, "summary": article.description})
article.title = result['title']
logger.debug(f"{count}/{total}: {article.processed != self.script_hash} current hash: {self.script_hash} processed hash: {article.processed}")
except Exception as ex:
print(f"Error: {ex} for {article.original_title}")
needs_processed.remove(article)

articles = list(map(lambda article: self.set_processed(article, self.script_hash), articles))
for count, article in enumerate(needs_processed, start=1):
try:
result = self.chain.invoke({"title": article.original_title, "summary": article.description})
article.title = result['title']
article.processed = self.script_hash
logger.debug(f"{count}/{total}: {article.processed != self.script_hash} current hash: {self.script_hash} processed hash: {article.processed}")
except Exception as ex:
print(f"Error: {ex} for {article.original_title}")
needs_processed.remove(article)

return articles

0 comments on commit 2216527

Please sign in to comment.