-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Thomi Richards
committed
Feb 17, 2015
0 parents
commit bddda49
Showing
10 changed files
with
414 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
The FreeBSD Copyright | ||
|
||
Copyright 1992-2012 The FreeBSD Project. All rights reserved. | ||
|
||
Redistribution and use in source and binary forms, with or without | ||
modification, are permitted provided that the following conditions are | ||
met: | ||
|
||
1. Redistributions of source code must retain the above copyright notice, | ||
this list of conditions and the following disclaimer. | ||
|
||
2. Redistributions in binary form must reproduce the above copyright notice, | ||
this list of conditions and the following disclaimer in the documentation | ||
and/or other materials provided with the distribution. | ||
|
||
THIS SOFTWARE IS PROVIDED BY THE FREEBSD PROJECT ``AS IS'' AND ANY EXPRESS | ||
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | ||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN | ||
NO EVENT SHALL THE FREEBSD PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, | ||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | ||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | ||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
|
||
The views and conclusions contained in the software and documentation | ||
are those of the authors and should not be interpreted as representing | ||
official policies, either expressed or implied, of the FreeBSD | ||
Project. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
|
||
import logging | ||
import os | ||
import sys | ||
from argparse import ArgumentParser | ||
|
||
from gmailfilter._connection import IMAPServer | ||
|
||
|
||
def run(): | ||
"""Main entry point for command line executable.""" | ||
args = configure_argument_parser() | ||
log_level = logging.DEBUG if args.verbose else logging.INFO | ||
logging.basicConfig(level=log_level, stream=sys.stdout) | ||
rules_path = get_filter_file_or_raise() | ||
|
||
with open(rules_path) as f: | ||
code = compile(f.read(), rules_path, 'exec') | ||
exec(code, get_rule_globals_dict()) | ||
|
||
|
||
def configure_argument_parser(): | ||
parser = ArgumentParser( | ||
prog="gmailfilter", | ||
description="Filter IMAP emails the easy way!" | ||
) | ||
parser.add_argument('-v', '--verbose', action='store_true', help="Be more verbose") | ||
return parser.parse_args() | ||
|
||
|
||
def get_filter_file_or_raise(): | ||
path = os.path.expanduser('~/.config/gmailfilter/rules') | ||
if not os.path.exists(path): | ||
raise IOError("Rules file %r does not exist" % path) | ||
# TODO: Check for readability? | ||
return path | ||
|
||
|
||
def get_rule_globals_dict(): | ||
rule_globals = { | ||
'IMAPServer': IMAPServer | ||
} | ||
return rule_globals | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
from contextlib import contextmanager | ||
import logging | ||
|
||
from imapclient import IMAPClient | ||
|
||
from gmailfilter._message import Message | ||
|
||
|
||
# TODO: Accept config from command line, encapsulate in a dict and pass | ||
# in to the connection class. | ||
|
||
|
||
|
||
class IMAPServer(object): | ||
|
||
def __init__(self, server=None, username=None, password=None, port=993, ssl=True): | ||
if ( | ||
server is None or | ||
username is None or | ||
password is None | ||
): | ||
raise ValueError("server and username and password cannot be None") | ||
|
||
|
||
self._client = IMAPClient( | ||
host=server, | ||
port=port, | ||
use_uid=False, | ||
ssl=ssl | ||
) | ||
# self._client.debug = True | ||
self._client.login( | ||
username, | ||
password, | ||
) | ||
|
||
def get_messages(self): | ||
"""A generator that yields Message instances, one for every message | ||
in the users inbox. | ||
""" | ||
# TODO - perahps the user wants to filter a different folder? | ||
mbox_details = self._client.select_folder("INBOX") | ||
total_messages = mbox_details['EXISTS'] | ||
logging.info("Scanning inbox, found %d messages" % total_messages) | ||
# TODO: Research best chunk size - maybe let user tweak this from | ||
# config file?: | ||
i = 0 | ||
with self.use_sequence(): | ||
for chunk in sequence_chunk(total_messages, optimal_chunk_size(1000)): | ||
logging.info("Fetching: " + chunk) | ||
data = self._client.fetch( | ||
chunk, | ||
['UID', 'BODY.PEEK[HEADER]', 'INTERNALDATE', 'FLAGS'] | ||
) | ||
for msg_seq in data: | ||
logging.debug("Processing %d / %d", i, total_messages) | ||
proxy = MessageConnectionProxy(self, data[msg_seq]) | ||
yield Message(proxy) | ||
i += 1 | ||
self._do_chunk_cleanup() | ||
|
||
def move_message(self, message, folder): | ||
"""Move a message to a folder, creating the folder if it doesn't exist. | ||
:param message: An instance of gmailfilter.Message | ||
:param folder: A string descriving the folder. | ||
""" | ||
# TODO: optimise this by trying the copy, and if we get 'NO' with | ||
# 'TRYCREATE' then, and only then try and create the folder. Removes the | ||
# overhead of the existance check for every message, | ||
if not self._client.folder_exists(folder): | ||
status = self._client.create_folder(folder) | ||
assert status.lower() == "success", "Unable to create folder %s" % folder | ||
with self.use_uid(): | ||
self._client.copy(str(message.uid()), folder) | ||
self.delete_message(message) | ||
|
||
def delete_message(self, message): | ||
with self.use_uid(): | ||
uid_string = str(message.uid()) | ||
logging.info("Deleting %s" % uid_string) | ||
self._client.delete_messages(uid_string) | ||
|
||
def _do_chunk_cleanup(self): | ||
# self._client.expunge() | ||
pass | ||
|
||
|
||
@contextmanager | ||
def use_uid(self): | ||
old = self._client.use_uid | ||
self._client.use_uid = True | ||
try: | ||
yield | ||
finally: | ||
self._client.use_uid = old | ||
|
||
@contextmanager | ||
def use_sequence(self): | ||
old = self._client.use_uid | ||
self._client.use_uid = False | ||
try: | ||
yield | ||
finally: | ||
self._client.use_uid = old | ||
|
||
|
||
def sequence_chunk(num_messages, chunk_size): | ||
assert chunk_size >= 1 | ||
start = 1 | ||
while start <= num_messages: | ||
end = min(start + chunk_size - 1, num_messages) | ||
if end > start: | ||
if end != num_messages: | ||
yield '%d:%d' % (start, end) | ||
else: | ||
yield '%d:*' % start | ||
else: | ||
yield '%d' % (start) | ||
start += chunk_size | ||
|
||
|
||
def optimal_chunk_size(total_messages): | ||
"""Work out the optimal chunk size for an inbox with total_messages.""" | ||
# use 1000 (maximum sensible chunk size), or 10 retrieval operations, | ||
# whichever is smaller: | ||
return min(1000, total_messages / 10) | ||
|
||
class MessageConnectionProxy(object): | ||
|
||
"""A class that knows how to retrieve additional message parts.""" | ||
|
||
def __init__(self, connection, initial_data): | ||
assert 'UID' in initial_data | ||
self._connection = connection | ||
self._data = initial_data | ||
|
||
def get_message_part(self, part_name): | ||
"""Get a part of a message, possibly from memory. | ||
'part_name' will be one of ENVELOPE, RFC822, UID, BODY etc. | ||
""" | ||
# transform 'BODY.PEEK[HEADER]' into 'BODY[HEADER]' | ||
if part_name.startswith('BODY.PEEK'): | ||
retrieve_key = 'BODY' + part_name[9:] | ||
else: | ||
retrieve_key = part_name | ||
|
||
# ask the server for 'part_name', but look in our dictionary with | ||
# 'retrieve_key' | ||
if retrieve_key not in self._data: | ||
with self._connection.use_uid(): | ||
msg_uid = self._data['UID'] | ||
# for some reason, sometimes a fetch call returns an empty dict. | ||
# until I find out why, I'll simply retry this: | ||
data = {} | ||
for i in range(3): | ||
data = self._connection._client.fetch(msg_uid, part_name) | ||
if data: | ||
self._data.update(data[msg_uid]) | ||
break | ||
assert msg_uid in data, ("Server gave us back some other data: %d %r" % (msg_uid, data)) | ||
return self._data[retrieve_key] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
import email | ||
from email.utils import parseaddr | ||
|
||
|
||
class Message(object): | ||
|
||
"""An interface to represent an email message. | ||
The message is lazily-created. Methods such as 'subject' cause network | ||
traffic the first time they're called. After that, the results are cached. | ||
""" | ||
|
||
def __init__(self, connection_proxy): | ||
self._connection_proxy = connection_proxy | ||
self._message = None | ||
|
||
def _get_email(self): | ||
if self._message is None: | ||
self._message = email.message_from_string( | ||
self._connection_proxy.get_message_part('BODY.PEEK[HEADER]') | ||
) | ||
return self._message | ||
|
||
def subject(self): | ||
return self._get_email()['Subject'] | ||
|
||
def from_(self): | ||
return self._get_email()['From'] | ||
|
||
def is_list_message(self): | ||
return 'List-Id' in self._get_email() | ||
|
||
def list_id(self): | ||
# Returns None if key is not found, does not raise KeyError: | ||
list_id = self._get_email()['List-Id'] | ||
return parse_list_id(list_id) if list_id is not None else None | ||
|
||
def uid(self): | ||
return self._connection_proxy.get_message_part('UID') | ||
|
||
def get_headers(self): | ||
# TODO: email objects are dictionaries for the headers, but also expose | ||
# the body contents, attachments etc. etc. It'd be nice if we could | ||
# *only* expose the headers here... | ||
return self._get_email() | ||
|
||
def get_date(self): | ||
return self._connection_proxy.get_message_part('INTERNALDATE') | ||
|
||
def get_flags(self): | ||
return self._connection_proxy.get_message_part('FLAGS') | ||
|
||
def __repr__(self): | ||
return repr(self.subject()) | ||
|
||
|
||
def parse_list_id(id_string): | ||
return parseaddr(id_string)[1] |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
from unittest import TestCase | ||
|
||
|
||
from gmailfilter import _connection as c | ||
|
||
class SequenceChunkTests(TestCase): | ||
|
||
def assertSequenceChunk(self, messages, chunk_size, expected): | ||
observed = list(c.sequence_chunk(messages, chunk_size)) | ||
self.assertEqual(expected, observed) | ||
|
||
def test_no_messages(self): | ||
self.assertSequenceChunk(0, 10, []) | ||
|
||
def test_single_message(self): | ||
self.assertSequenceChunk(1, 10, ['1']) | ||
|
||
def test_two_messages(self): | ||
self.assertSequenceChunk(2, 10, ['1:2']) | ||
|
||
def test_one_chunk(self): | ||
self.assertSequenceChunk(10, 10, ['1:10']) | ||
|
||
def test_one_and_a_bit_chunks(self): | ||
self.assertSequenceChunk(11, 10, ['1:10', '11']) | ||
|
||
def test_two_chunks(self): | ||
self.assertSequenceChunk(20, 10, ['1:10', '11:20']) | ||
|
||
def test_with_no_chunking(self): | ||
self.assertSequenceChunk(5, 1, ['1', '2', '3', '4', '5']) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
|
||
|
||
from unittest import TestCase | ||
|
||
from gmailfilter._message import parse_list_id | ||
|
||
class ListIdParsingTestCase(TestCase): | ||
|
||
def test_list_id_equality_as_string(self): | ||
l = parse_list_id('mail.asana.com') | ||
self.assertEqual('mail.asana.com', str(l)) | ||
self.assertEqual('mail.asana.com', l) | ||
|
||
def test_list_id_inequality_as_string(self): | ||
l = parse_list_id('mail.asana.com') | ||
self.assertNotEqual('foo.com', str(l)) | ||
self.assertNotEqual('foo.com', l) | ||
|
||
def test_can_extract_list_id_from_description(self): | ||
l = parse_list_id('Some list description <some.list.id>') | ||
self.assertEqual('some.list.id', l) | ||
|
||
def test_list_ids_with_different_descriptions_are_equal(self): | ||
self.assertEqual( | ||
parse_list_id('some description <list.id>'), | ||
parse_list_id('some other description <list.id>'), | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
= What is this? = | ||
|
||
This is the source code to my personal IMAP-based mail filtering tool. I use it every day, and it has proven to be useful for me. You are more than welcome to use and contribute to it. | ||
|
||
= How does it work? = | ||
|
||
You must create a `rules` configuration file which tells `gmailfilter` how to connect to your IMAP mail server, and what to do with all the mail in your inbox. The approach `gmailfilter` takes to mail filtering is that your inbox should be virtually empty at the end of a filter run - only messages which need "active processing" should remain (usually this means "unread and flagged (starred) messages"). `gmailfilter` takes care of iterating over the messages in your inbox, and will run your rules over any new messages that arrive. | ||
|
||
You can use `gmailfilter` to achieve the following: | ||
|
||
* Automatically move mailing list messages to a separate folder. | ||
* Automatically delete spam messages from automated services such as jenkins. | ||
* Move messages that are older than a certain age to a different folder. | ||
* ...much much more! | ||
|
||
Rules are written in python, so you can do pretty much whatever you want! |
Oops, something went wrong.