-
Notifications
You must be signed in to change notification settings - Fork 0
/
email_processor.py
103 lines (89 loc) · 3.54 KB
/
email_processor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# email_reader.py
from bs4 import BeautifulSoup
import imaplib
import email
import logging
import datetime
import os
# Access sensitive data from environment variables
EMAIL_ADDRESS = os.getenv('EMAIL_ADDRESS')
EMAIL_PASSWORD = os.getenv('EMAIL_PASSWORD')
# Setup basic logging
logging.basicConfig(level=logging.INFO)
# Configuration
IMAP_SERVER = 'imap.gmail.com'
EMAIL_FOLDER = 'INBOX'
def connect_to_email_server():
"""Establishes connection to the IMAP server and logs in."""
try:
mail = imaplib.IMAP4_SSL(IMAP_SERVER)
mail.login(EMAIL_ADDRESS, EMAIL_PASSWORD)
mail.select(EMAIL_FOLDER)
return mail
except Exception as e:
logging.error(f"Failed to connect to the email server: {e}")
raise
# def search_for_unread_emails(mail, sender_email):
# """Searches for unread emails from a specific sender."""
# try:
# status, email_ids = mail.search(None, '(UNSEEN FROM "{}")'.format(sender_email))
# if status != 'OK':
# logging.error("No emails found.")
# return []
# return email_ids[0].split()
# except Exception as e:
# logging.error(f"Error searching for emails: {e}")
# raise
def search_for_unread_emails(mail, sender_email):
"""Searches for unread emails from a specific sender, received on the current date."""
try:
# Get today's date in the required format
today_date = datetime.datetime.today().strftime('%d-%b-%Y')
# Modify search query to include the date filter
search_criteria = '(UNSEEN FROM "{}" ON "{}")'.format(sender_email, today_date)
status, email_ids = mail.search(None, search_criteria)
if status != 'OK':
logging.error("No emails found.")
return []
return email_ids[0].split()
except Exception as e:
logging.error(f"Error searching for emails: {e}")
raise
def fetch_and_process_emails(mail, email_ids):
"""Fetches emails by ID and extracts all URLs from them."""
links = []
for e_id in email_ids:
_, msg_data = mail.fetch(e_id, '(RFC822)')
for response_part in msg_data:
if isinstance(response_part, tuple):
msg = email.message_from_bytes(response_part[1])
if msg.is_multipart():
for part in msg.walk():
content_type = part.get_content_type()
if content_type == "text/html":
body = part.get_payload(decode=True).decode()
new_links = extract_links(body)
links.extend(new_links)
else:
body = msg.get_payload(decode=True).decode()
new_links = extract_links(body)
links.extend(new_links)
return links
def extract_links(html_content):
"""Extracts URLs from the HTML content based on the specified condition."""
soup = BeautifulSoup(html_content, 'html.parser')
links = []
for a_tag in soup.find_all('a'):
if 'minute read)' in a_tag.text:
links.append(a_tag.get('href'))
return links
def get_article_links(sender_email):
"""Orchestrates the process to connect, search, fetch, and process emails"""
mail = connect_to_email_server()
email_ids = search_for_unread_emails(mail, sender_email)
if email_ids:
links = fetch_and_process_emails(mail, email_ids)
return links
else:
logging.info("No unread emails from the specified sender.")
return []