Skip to content

Commit

Permalink
First commit.
Browse files Browse the repository at this point in the history
  • Loading branch information
Thomi Richards committed Feb 17, 2015
0 parents commit bddda49
Show file tree
Hide file tree
Showing 10 changed files with 414 additions and 0 deletions.
31 changes: 31 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
The FreeBSD Copyright

Copyright 1992-2012 The FreeBSD Project. All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE FREEBSD PROJECT ``AS IS'' AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
NO EVENT SHALL THE FREEBSD PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

The views and conclusions contained in the software and documentation
are those of the authors and should not be interpreted as representing
official policies, either expressed or implied, of the FreeBSD
Project.

2 changes: 2 additions & 0 deletions gmailfilter/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@


44 changes: 44 additions & 0 deletions gmailfilter/_command.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@

import logging
import os
import sys
from argparse import ArgumentParser

from gmailfilter._connection import IMAPServer


def run():
"""Main entry point for command line executable."""
args = configure_argument_parser()
log_level = logging.DEBUG if args.verbose else logging.INFO
logging.basicConfig(level=log_level, stream=sys.stdout)
rules_path = get_filter_file_or_raise()

with open(rules_path) as f:
code = compile(f.read(), rules_path, 'exec')
exec(code, get_rule_globals_dict())


def configure_argument_parser():
parser = ArgumentParser(
prog="gmailfilter",
description="Filter IMAP emails the easy way!"
)
parser.add_argument('-v', '--verbose', action='store_true', help="Be more verbose")
return parser.parse_args()


def get_filter_file_or_raise():
path = os.path.expanduser('~/.config/gmailfilter/rules')
if not os.path.exists(path):
raise IOError("Rules file %r does not exist" % path)
# TODO: Check for readability?
return path


def get_rule_globals_dict():
rule_globals = {
'IMAPServer': IMAPServer
}
return rule_globals

167 changes: 167 additions & 0 deletions gmailfilter/_connection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
from contextlib import contextmanager
import logging

from imapclient import IMAPClient

from gmailfilter._message import Message


# TODO: Accept config from command line, encapsulate in a dict and pass
# in to the connection class.



class IMAPServer(object):

def __init__(self, server=None, username=None, password=None, port=993, ssl=True):
if (
server is None or
username is None or
password is None
):
raise ValueError("server and username and password cannot be None")


self._client = IMAPClient(
host=server,
port=port,
use_uid=False,
ssl=ssl
)
# self._client.debug = True
self._client.login(
username,
password,
)

def get_messages(self):
"""A generator that yields Message instances, one for every message
in the users inbox.
"""
# TODO - perahps the user wants to filter a different folder?
mbox_details = self._client.select_folder("INBOX")
total_messages = mbox_details['EXISTS']
logging.info("Scanning inbox, found %d messages" % total_messages)
# TODO: Research best chunk size - maybe let user tweak this from
# config file?:
i = 0
with self.use_sequence():
for chunk in sequence_chunk(total_messages, optimal_chunk_size(1000)):
logging.info("Fetching: " + chunk)
data = self._client.fetch(
chunk,
['UID', 'BODY.PEEK[HEADER]', 'INTERNALDATE', 'FLAGS']
)
for msg_seq in data:
logging.debug("Processing %d / %d", i, total_messages)
proxy = MessageConnectionProxy(self, data[msg_seq])
yield Message(proxy)
i += 1
self._do_chunk_cleanup()

def move_message(self, message, folder):
"""Move a message to a folder, creating the folder if it doesn't exist.
:param message: An instance of gmailfilter.Message
:param folder: A string descriving the folder.
"""
# TODO: optimise this by trying the copy, and if we get 'NO' with
# 'TRYCREATE' then, and only then try and create the folder. Removes the
# overhead of the existance check for every message,
if not self._client.folder_exists(folder):
status = self._client.create_folder(folder)
assert status.lower() == "success", "Unable to create folder %s" % folder
with self.use_uid():
self._client.copy(str(message.uid()), folder)
self.delete_message(message)

def delete_message(self, message):
with self.use_uid():
uid_string = str(message.uid())
logging.info("Deleting %s" % uid_string)
self._client.delete_messages(uid_string)

def _do_chunk_cleanup(self):
# self._client.expunge()
pass


@contextmanager
def use_uid(self):
old = self._client.use_uid
self._client.use_uid = True
try:
yield
finally:
self._client.use_uid = old

@contextmanager
def use_sequence(self):
old = self._client.use_uid
self._client.use_uid = False
try:
yield
finally:
self._client.use_uid = old


def sequence_chunk(num_messages, chunk_size):
assert chunk_size >= 1
start = 1
while start <= num_messages:
end = min(start + chunk_size - 1, num_messages)
if end > start:
if end != num_messages:
yield '%d:%d' % (start, end)
else:
yield '%d:*' % start
else:
yield '%d' % (start)
start += chunk_size


def optimal_chunk_size(total_messages):
"""Work out the optimal chunk size for an inbox with total_messages."""
# use 1000 (maximum sensible chunk size), or 10 retrieval operations,
# whichever is smaller:
return min(1000, total_messages / 10)

class MessageConnectionProxy(object):

"""A class that knows how to retrieve additional message parts."""

def __init__(self, connection, initial_data):
assert 'UID' in initial_data
self._connection = connection
self._data = initial_data

def get_message_part(self, part_name):
"""Get a part of a message, possibly from memory.
'part_name' will be one of ENVELOPE, RFC822, UID, BODY etc.
"""
# transform 'BODY.PEEK[HEADER]' into 'BODY[HEADER]'
if part_name.startswith('BODY.PEEK'):
retrieve_key = 'BODY' + part_name[9:]
else:
retrieve_key = part_name

# ask the server for 'part_name', but look in our dictionary with
# 'retrieve_key'
if retrieve_key not in self._data:
with self._connection.use_uid():
msg_uid = self._data['UID']
# for some reason, sometimes a fetch call returns an empty dict.
# until I find out why, I'll simply retry this:
data = {}
for i in range(3):
data = self._connection._client.fetch(msg_uid, part_name)
if data:
self._data.update(data[msg_uid])
break
assert msg_uid in data, ("Server gave us back some other data: %d %r" % (msg_uid, data))
return self._data[retrieve_key]

58 changes: 58 additions & 0 deletions gmailfilter/_message.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import email
from email.utils import parseaddr


class Message(object):

"""An interface to represent an email message.
The message is lazily-created. Methods such as 'subject' cause network
traffic the first time they're called. After that, the results are cached.
"""

def __init__(self, connection_proxy):
self._connection_proxy = connection_proxy
self._message = None

def _get_email(self):
if self._message is None:
self._message = email.message_from_string(
self._connection_proxy.get_message_part('BODY.PEEK[HEADER]')
)
return self._message

def subject(self):
return self._get_email()['Subject']

def from_(self):
return self._get_email()['From']

def is_list_message(self):
return 'List-Id' in self._get_email()

def list_id(self):
# Returns None if key is not found, does not raise KeyError:
list_id = self._get_email()['List-Id']
return parse_list_id(list_id) if list_id is not None else None

def uid(self):
return self._connection_proxy.get_message_part('UID')

def get_headers(self):
# TODO: email objects are dictionaries for the headers, but also expose
# the body contents, attachments etc. etc. It'd be nice if we could
# *only* expose the headers here...
return self._get_email()

def get_date(self):
return self._connection_proxy.get_message_part('INTERNALDATE')

def get_flags(self):
return self._connection_proxy.get_message_part('FLAGS')

def __repr__(self):
return repr(self.subject())


def parse_list_id(id_string):
return parseaddr(id_string)[1]
Empty file added gmailfilter/test/__init__.py
Empty file.
33 changes: 33 additions & 0 deletions gmailfilter/test/test_connection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from unittest import TestCase


from gmailfilter import _connection as c

class SequenceChunkTests(TestCase):

def assertSequenceChunk(self, messages, chunk_size, expected):
observed = list(c.sequence_chunk(messages, chunk_size))
self.assertEqual(expected, observed)

def test_no_messages(self):
self.assertSequenceChunk(0, 10, [])

def test_single_message(self):
self.assertSequenceChunk(1, 10, ['1'])

def test_two_messages(self):
self.assertSequenceChunk(2, 10, ['1:2'])

def test_one_chunk(self):
self.assertSequenceChunk(10, 10, ['1:10'])

def test_one_and_a_bit_chunks(self):
self.assertSequenceChunk(11, 10, ['1:10', '11'])

def test_two_chunks(self):
self.assertSequenceChunk(20, 10, ['1:10', '11:20'])

def test_with_no_chunking(self):
self.assertSequenceChunk(5, 1, ['1', '2', '3', '4', '5'])


27 changes: 27 additions & 0 deletions gmailfilter/test/test_message.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@


from unittest import TestCase

from gmailfilter._message import parse_list_id

class ListIdParsingTestCase(TestCase):

def test_list_id_equality_as_string(self):
l = parse_list_id('mail.asana.com')
self.assertEqual('mail.asana.com', str(l))
self.assertEqual('mail.asana.com', l)

def test_list_id_inequality_as_string(self):
l = parse_list_id('mail.asana.com')
self.assertNotEqual('foo.com', str(l))
self.assertNotEqual('foo.com', l)

def test_can_extract_list_id_from_description(self):
l = parse_list_id('Some list description <some.list.id>')
self.assertEqual('some.list.id', l)

def test_list_ids_with_different_descriptions_are_equal(self):
self.assertEqual(
parse_list_id('some description <list.id>'),
parse_list_id('some other description <list.id>'),
)
16 changes: 16 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
= What is this? =

This is the source code to my personal IMAP-based mail filtering tool. I use it every day, and it has proven to be useful for me. You are more than welcome to use and contribute to it.

= How does it work? =

You must create a `rules` configuration file which tells `gmailfilter` how to connect to your IMAP mail server, and what to do with all the mail in your inbox. The approach `gmailfilter` takes to mail filtering is that your inbox should be virtually empty at the end of a filter run - only messages which need "active processing" should remain (usually this means "unread and flagged (starred) messages"). `gmailfilter` takes care of iterating over the messages in your inbox, and will run your rules over any new messages that arrive.

You can use `gmailfilter` to achieve the following:

* Automatically move mailing list messages to a separate folder.
* Automatically delete spam messages from automated services such as jenkins.
* Move messages that are older than a certain age to a different folder.
* ...much much more!

Rules are written in python, so you can do pretty much whatever you want!
Loading

0 comments on commit bddda49

Please sign in to comment.