diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..e275a2d --- /dev/null +++ b/LICENSE @@ -0,0 +1,31 @@ +The FreeBSD Copyright + +Copyright 1992-2012 The FreeBSD Project. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE FREEBSD PROJECT ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN +NO EVENT SHALL THE FREEBSD PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation +are those of the authors and should not be interpreted as representing +official policies, either expressed or implied, of the FreeBSD +Project. + diff --git a/gmailfilter/__init__.py b/gmailfilter/__init__.py new file mode 100644 index 0000000..139597f --- /dev/null +++ b/gmailfilter/__init__.py @@ -0,0 +1,2 @@ + + diff --git a/gmailfilter/_command.py b/gmailfilter/_command.py new file mode 100644 index 0000000..d91752c --- /dev/null +++ b/gmailfilter/_command.py @@ -0,0 +1,44 @@ + +import logging +import os +import sys +from argparse import ArgumentParser + +from gmailfilter._connection import IMAPServer + + +def run(): + """Main entry point for command line executable.""" + args = configure_argument_parser() + log_level = logging.DEBUG if args.verbose else logging.INFO + logging.basicConfig(level=log_level, stream=sys.stdout) + rules_path = get_filter_file_or_raise() + + with open(rules_path) as f: + code = compile(f.read(), rules_path, 'exec') + exec(code, get_rule_globals_dict()) + + +def configure_argument_parser(): + parser = ArgumentParser( + prog="gmailfilter", + description="Filter IMAP emails the easy way!" + ) + parser.add_argument('-v', '--verbose', action='store_true', help="Be more verbose") + return parser.parse_args() + + +def get_filter_file_or_raise(): + path = os.path.expanduser('~/.config/gmailfilter/rules') + if not os.path.exists(path): + raise IOError("Rules file %r does not exist" % path) + # TODO: Check for readability? + return path + + +def get_rule_globals_dict(): + rule_globals = { + 'IMAPServer': IMAPServer + } + return rule_globals + diff --git a/gmailfilter/_connection.py b/gmailfilter/_connection.py new file mode 100644 index 0000000..3184cea --- /dev/null +++ b/gmailfilter/_connection.py @@ -0,0 +1,167 @@ +from contextlib import contextmanager +import logging + +from imapclient import IMAPClient + +from gmailfilter._message import Message + + +# TODO: Accept config from command line, encapsulate in a dict and pass +# in to the connection class. + + + +class IMAPServer(object): + + def __init__(self, server=None, username=None, password=None, port=993, ssl=True): + if ( + server is None or + username is None or + password is None + ): + raise ValueError("server and username and password cannot be None") + + + self._client = IMAPClient( + host=server, + port=port, + use_uid=False, + ssl=ssl + ) + # self._client.debug = True + self._client.login( + username, + password, + ) + + def get_messages(self): + """A generator that yields Message instances, one for every message + in the users inbox. + + """ + # TODO - perahps the user wants to filter a different folder? + mbox_details = self._client.select_folder("INBOX") + total_messages = mbox_details['EXISTS'] + logging.info("Scanning inbox, found %d messages" % total_messages) + # TODO: Research best chunk size - maybe let user tweak this from + # config file?: + i = 0 + with self.use_sequence(): + for chunk in sequence_chunk(total_messages, optimal_chunk_size(1000)): + logging.info("Fetching: " + chunk) + data = self._client.fetch( + chunk, + ['UID', 'BODY.PEEK[HEADER]', 'INTERNALDATE', 'FLAGS'] + ) + for msg_seq in data: + logging.debug("Processing %d / %d", i, total_messages) + proxy = MessageConnectionProxy(self, data[msg_seq]) + yield Message(proxy) + i += 1 + self._do_chunk_cleanup() + + def move_message(self, message, folder): + """Move a message to a folder, creating the folder if it doesn't exist. + + :param message: An instance of gmailfilter.Message + :param folder: A string descriving the folder. + + """ + # TODO: optimise this by trying the copy, and if we get 'NO' with + # 'TRYCREATE' then, and only then try and create the folder. Removes the + # overhead of the existance check for every message, + if not self._client.folder_exists(folder): + status = self._client.create_folder(folder) + assert status.lower() == "success", "Unable to create folder %s" % folder + with self.use_uid(): + self._client.copy(str(message.uid()), folder) + self.delete_message(message) + + def delete_message(self, message): + with self.use_uid(): + uid_string = str(message.uid()) + logging.info("Deleting %s" % uid_string) + self._client.delete_messages(uid_string) + + def _do_chunk_cleanup(self): + # self._client.expunge() + pass + + + @contextmanager + def use_uid(self): + old = self._client.use_uid + self._client.use_uid = True + try: + yield + finally: + self._client.use_uid = old + + @contextmanager + def use_sequence(self): + old = self._client.use_uid + self._client.use_uid = False + try: + yield + finally: + self._client.use_uid = old + + +def sequence_chunk(num_messages, chunk_size): + assert chunk_size >= 1 + start = 1 + while start <= num_messages: + end = min(start + chunk_size - 1, num_messages) + if end > start: + if end != num_messages: + yield '%d:%d' % (start, end) + else: + yield '%d:*' % start + else: + yield '%d' % (start) + start += chunk_size + + +def optimal_chunk_size(total_messages): + """Work out the optimal chunk size for an inbox with total_messages.""" + # use 1000 (maximum sensible chunk size), or 10 retrieval operations, + # whichever is smaller: + return min(1000, total_messages / 10) + +class MessageConnectionProxy(object): + + """A class that knows how to retrieve additional message parts.""" + + def __init__(self, connection, initial_data): + assert 'UID' in initial_data + self._connection = connection + self._data = initial_data + + def get_message_part(self, part_name): + """Get a part of a message, possibly from memory. + + 'part_name' will be one of ENVELOPE, RFC822, UID, BODY etc. + + """ + # transform 'BODY.PEEK[HEADER]' into 'BODY[HEADER]' + if part_name.startswith('BODY.PEEK'): + retrieve_key = 'BODY' + part_name[9:] + else: + retrieve_key = part_name + + # ask the server for 'part_name', but look in our dictionary with + # 'retrieve_key' + if retrieve_key not in self._data: + with self._connection.use_uid(): + msg_uid = self._data['UID'] + # for some reason, sometimes a fetch call returns an empty dict. + # until I find out why, I'll simply retry this: + data = {} + for i in range(3): + data = self._connection._client.fetch(msg_uid, part_name) + if data: + self._data.update(data[msg_uid]) + break + assert msg_uid in data, ("Server gave us back some other data: %d %r" % (msg_uid, data)) + return self._data[retrieve_key] + diff --git a/gmailfilter/_message.py b/gmailfilter/_message.py new file mode 100644 index 0000000..3c675e7 --- /dev/null +++ b/gmailfilter/_message.py @@ -0,0 +1,58 @@ +import email +from email.utils import parseaddr + + +class Message(object): + + """An interface to represent an email message. + + The message is lazily-created. Methods such as 'subject' cause network + traffic the first time they're called. After that, the results are cached. + """ + + def __init__(self, connection_proxy): + self._connection_proxy = connection_proxy + self._message = None + + def _get_email(self): + if self._message is None: + self._message = email.message_from_string( + self._connection_proxy.get_message_part('BODY.PEEK[HEADER]') + ) + return self._message + + def subject(self): + return self._get_email()['Subject'] + + def from_(self): + return self._get_email()['From'] + + def is_list_message(self): + return 'List-Id' in self._get_email() + + def list_id(self): + # Returns None if key is not found, does not raise KeyError: + list_id = self._get_email()['List-Id'] + return parse_list_id(list_id) if list_id is not None else None + + def uid(self): + return self._connection_proxy.get_message_part('UID') + + def get_headers(self): + # TODO: email objects are dictionaries for the headers, but also expose + # the body contents, attachments etc. etc. It'd be nice if we could + # *only* expose the headers here... + return self._get_email() + + def get_date(self): + return self._connection_proxy.get_message_part('INTERNALDATE') + + def get_flags(self): + return self._connection_proxy.get_message_part('FLAGS') + + def __repr__(self): + return repr(self.subject()) + + +def parse_list_id(id_string): + return parseaddr(id_string)[1] diff --git a/gmailfilter/test/__init__.py b/gmailfilter/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gmailfilter/test/test_connection.py b/gmailfilter/test/test_connection.py new file mode 100644 index 0000000..ff023c1 --- /dev/null +++ b/gmailfilter/test/test_connection.py @@ -0,0 +1,33 @@ +from unittest import TestCase + + +from gmailfilter import _connection as c + +class SequenceChunkTests(TestCase): + + def assertSequenceChunk(self, messages, chunk_size, expected): + observed = list(c.sequence_chunk(messages, chunk_size)) + self.assertEqual(expected, observed) + + def test_no_messages(self): + self.assertSequenceChunk(0, 10, []) + + def test_single_message(self): + self.assertSequenceChunk(1, 10, ['1']) + + def test_two_messages(self): + self.assertSequenceChunk(2, 10, ['1:2']) + + def test_one_chunk(self): + self.assertSequenceChunk(10, 10, ['1:10']) + + def test_one_and_a_bit_chunks(self): + self.assertSequenceChunk(11, 10, ['1:10', '11']) + + def test_two_chunks(self): + self.assertSequenceChunk(20, 10, ['1:10', '11:20']) + + def test_with_no_chunking(self): + self.assertSequenceChunk(5, 1, ['1', '2', '3', '4', '5']) + + diff --git a/gmailfilter/test/test_message.py b/gmailfilter/test/test_message.py new file mode 100644 index 0000000..c12b377 --- /dev/null +++ b/gmailfilter/test/test_message.py @@ -0,0 +1,27 @@ + + +from unittest import TestCase + +from gmailfilter._message import parse_list_id + +class ListIdParsingTestCase(TestCase): + + def test_list_id_equality_as_string(self): + l = parse_list_id('mail.asana.com') + self.assertEqual('mail.asana.com', str(l)) + self.assertEqual('mail.asana.com', l) + + def test_list_id_inequality_as_string(self): + l = parse_list_id('mail.asana.com') + self.assertNotEqual('foo.com', str(l)) + self.assertNotEqual('foo.com', l) + + def test_can_extract_list_id_from_description(self): + l = parse_list_id('Some list description ') + self.assertEqual('some.list.id', l) + + def test_list_ids_with_different_descriptions_are_equal(self): + self.assertEqual( + parse_list_id('some description '), + parse_list_id('some other description '), + ) diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..6b83458 --- /dev/null +++ b/readme.md @@ -0,0 +1,16 @@ += What is this? = + +This is the source code to my personal IMAP-based mail filtering tool. I use it every day, and it has proven to be useful for me. You are more than welcome to use and contribute to it. + += How does it work? = + +You must create a `rules` configuration file which tells `gmailfilter` how to connect to your IMAP mail server, and what to do with all the mail in your inbox. The approach `gmailfilter` takes to mail filtering is that your inbox should be virtually empty at the end of a filter run - only messages which need "active processing" should remain (usually this means "unread and flagged (starred) messages"). `gmailfilter` takes care of iterating over the messages in your inbox, and will run your rules over any new messages that arrive. + +You can use `gmailfilter` to achieve the following: + + * Automatically move mailing list messages to a separate folder. + * Automatically delete spam messages from automated services such as jenkins. + * Move messages that are older than a certain age to a different folder. + * ...much much more! + + Rules are written in python, so you can do pretty much whatever you want! \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..cf7450a --- /dev/null +++ b/setup.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 + +from setuptools.command.test import test as TestCommand +from setuptools import find_packages, setup + + +class TestDiscoverCommand(TestCommand): + """ + Use unittest2 to discover and run tests + """ + + def finalize_options(self): + TestCommand.finalize_options(self) + self.test_args = [] + self.test_suite = True + + def run_tests(self): + import unittest # this will import unittest2 + unittest.main(argv=['', 'discover']).runTests() + + +setup( + name='gmailfilter', + version='0.1', + author='Thomi Richards', + author_email='thomi.richards@canonical.com', + url='http://launchpad.net/gmailfilter', + packages=['gmailfilter'], + # packages=find_packages('gmailfilter'), + # test_suite='gmailfilter.tests', + install_requires=['IMAPClient'], + entry_points={ + 'console_scripts': ['gmailfilter = gmailfilter._command:run'] + }, + cmdclass={'test': TestDiscoverCommand}, +)