Skip to content

Commit e64589b

Browse files
author
Remy DeCausemaker
committed
Added script to pull all the bizlegfoss feeds and write to a .txt corpus
1 parent 0523578 commit e64589b

File tree

1 file changed

+36
-0
lines changed

1 file changed

+36
-0
lines changed

bizlegfossfeedpull.py

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import feedparser
2+
from bs4 import BeautifulSoup
3+
from pprint import pprint
4+
5+
feeds = [
6+
"http://wyattwinters.com/feeds/bizlegfoss.atom.xml",
7+
"https://amm4108.github.io/feeds/bizleg.atom.xml",
8+
"http://beru.co/blog/feeds/bizfoss.rss.xml",
9+
"https://brendanwhitfield.wordpress.com/feed/",
10+
"https://calebcoffie.com/bizleg-feed.xml",
11+
"https://chrisknepper.com/blog/feed/",
12+
"http://dropofwill.herokuapp.com/bizleg_feed.xml",
13+
"http://nolski.rocks/rss/",
14+
#"http://blog-fortnight.rhcloud.com/rss/",
15+
"https://h2g2guy.wordpress.com/feed/",
16+
"http://fossclassjeid64.blogspot.com/feeds/posts/default?alt=rss",
17+
"https://kaffys.github.io/feed",
18+
"http://kocsen-hfoss.blogspot.com/feeds/posts/default?alt=rss",
19+
"http://gearchicken.com/blog/rss.xml",
20+
"http://msoucy.me/feeds/tag/bizlegfoss.atom.xml",
21+
"http://blog-mtubinis.rhcloud.com/rss/",
22+
"http://pharaskn.blogspot.com/feeds/posts/default",
23+
"http://aaron.herting.cc/feeds/posts/default/-/RIT-BIZLEGFOSS/",
24+
"http://bizlegfoss.blogspot.com/feeds/posts/default?alt=rss",
25+
]
26+
27+
with open('{}'.format('bizlegfossallposts.txt'), "w") as g:
28+
for feed in feeds:
29+
d = feedparser.parse(feed)
30+
with open('{}'.format(d.feed.title.encode('utf8')), "w") as f:
31+
for item in d.entries:
32+
pprint(item)
33+
soup = BeautifulSoup(item.summary)
34+
contents = "\n".join(soup.stripped_strings)
35+
f.write(contents.encode('utf8'))
36+
g.write(contents.encode('utf8'))

0 commit comments

Comments
 (0)