Skip to content

Commit

Permalink
Merge pull request #13 from skirmer/generator
Browse files Browse the repository at this point in the history
Adding script to pull new articles
  • Loading branch information
skirmer authored Nov 28, 2023
2 parents 14c6fcf + 705182d commit 75fa736
Showing 1 changed file with 85 additions and 0 deletions.
85 changes: 85 additions & 0 deletions generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import feedparser
from mdutils.mdutils import MdUtils
from bs4 import BeautifulSoup
from urllib.request import urlretrieve
from markdownify import markdownify as md
import re
import string
## get content from medium rss

## Generate page in hugo and save

rss_items = feedparser.parse('https://medium.com/feed/@s.kirmer')
website_title = rss_items.feed.title
entries = rss_items.entries

def get_soup(base_item):
soup = BeautifulSoup(base_item['content'][0]['value'], 'html.parser')
return soup

def get_image(soup):
image = soup.figure
img_link = soup.figure.img['src']
img_caption = soup.figure.figcaption
return img_link, img_caption

def get_subtitle(soup):
subtitle = soup.find_all('h4')[0].text
return subtitle

def get_date(soup):
date = soup.published_parsed
year = date[0]
month = str(date[1]).zfill(2)
day = str(date[2]).zfill(2)
return year, month, day

def get_tags(base_item):
tags = [j['term'] for j in base_item['tags']]
return tags

def generate_yaml(img_link, tags, base_item, year, month, day):
yaml_str = f"""
---
date: {year}-{month}-{day}
featured_image: "{img_link}"
tags: ["{'","'.join(tags)}"]
title: "{base_item['title']}"
disable_share: false
---
"""
return yaml_str

def get_body(base_item):
body = md(base_item, strip=['figure', 'figcaption', 'title', 'img'])
return body


def get_body2(soup):
i_tag = soup.figure
i_tag.decompose()
base_item = soup.prettify()

body = md(base_item, strip=['figure', 'figcaption', 'title', 'img'])
return body


for i in entries:
print(i['title'])
soup = get_soup(i)
img_link, img_caption = get_image(soup)
tags = get_tags(i)
year, month, day = get_date(i)
print(year, month, day)
# try:
# subtitle = get_subtitle(soup)
# except:
# continue
body = get_body2(soup)
yaml_str = generate_yaml(img_link, tags, i, year, month, day)

mdFile = MdUtils(file_name=f"""content/writing/{i['title'].replace(" ", "_").lower().translate(str.maketrans('', '', string.punctuation))}""")
mdFile.write(f"{yaml_str}")
mdFile.new_line()
mdFile.write(f"{body}")
mdFile.create_md_file()

0 comments on commit 75fa736

Please sign in to comment.