From dfc771e3c21c638b00fd575b828dd2738ede7439 Mon Sep 17 00:00:00 2001 From: Thomi Richards Date: Sun, 14 Jun 2015 10:07:05 +1200 Subject: [PATCH] Various cleanups. --- gmailfilter/_command.py | 48 +++++- gmailfilter/_connection.py | 220 ++++++++++++++++++++++++++- gmailfilter/_rules.py | 2 +- gmailfilter/actions.py | 13 +- gmailfilter/messageutils.py | 7 + gmailfilter/test.py | 68 +++++++++ gmailfilter/tests/test_connection.py | 1 + 7 files changed, 349 insertions(+), 10 deletions(-) create mode 100644 gmailfilter/messageutils.py diff --git a/gmailfilter/_command.py b/gmailfilter/_command.py index 94a90f7..ca0d4c7 100644 --- a/gmailfilter/_command.py +++ b/gmailfilter/_command.py @@ -4,7 +4,13 @@ import sys from argparse import ArgumentParser -from gmailfilter._connection import IMAPServer +from gmailfilter._connection import ( + IMAPServer, + IMAPConnection, + ServerInfo, + default_credentials_file_location, +) +from gmailfilter import _rules def run(): @@ -12,6 +18,13 @@ def run(): args = configure_argument_parser() log_level = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=log_level, stream=sys.stdout) + if not args.dev: + run_old_filter() + else: + run_new_filter() + +def run_old_filter(): + """Run the old, pre v1 filter agent. This will get deleted soon.""" rules_path = get_filter_file_or_raise() with open(rules_path) as f: @@ -19,12 +32,45 @@ def run(): exec(code, get_rule_globals_dict()) +def run_new_filter(): + try: + s = ServerInfo.read_config_file() + except IOError: + ServerInfo.write_template_config_file() + print( + "Could not find server credentials file. A template file has been" + "written to {}. Please edit this and re-run.".format( + default_credentials_file_location() + ) + ) + sys.exit(0) + except KeyError as e: + print( + "Could not find required credentials key '{}'.".format(e.args[0]) + ) + sys.exit(1) + try: + rules = _rules.load_rules() + except _rules.RuleLoadError as e: + print(e) + sys.exit(2) + + connection = IMAPConnection(s) + rule_processor = _rules.SimpleRuleProcessor( + rules, + connection.get_connection_proxy() + ) + for message in connection.get_messages(): + rule_processor.process_message(message) + + def configure_argument_parser(): parser = ArgumentParser( prog="gmailfilter", description="Filter IMAP emails the easy way!" ) parser.add_argument('-v', '--verbose', action='store_true', help="Be more verbose") + parser.add_argument('--dev', action='store_true', help="Run new, development code.") return parser.parse_args() diff --git a/gmailfilter/_connection.py b/gmailfilter/_connection.py index 306873f..8cef381 100644 --- a/gmailfilter/_connection.py +++ b/gmailfilter/_connection.py @@ -1,5 +1,10 @@ from contextlib import contextmanager +import configparser import logging +import os +import os.path +import textwrap +import stat from imapclient import IMAPClient @@ -9,10 +14,11 @@ # TODO: Accept config from command line, encapsulate in a dict and pass # in to the connection class. - - class IMAPServer(object): + """The old, pre v1 server object. Some of this can be re-used, but most + will be deleted.""" + def __init__(self, server=None, username=None, password=None, port=993, ssl=True): if ( server is None or @@ -128,6 +134,7 @@ def optimal_chunk_size(total_messages): # whichever is smaller: return min(1000, total_messages / 10) + class MessageConnectionProxy(object): """A class that knows how to retrieve additional message parts.""" @@ -165,3 +172,212 @@ def get_message_part(self, part_name): assert msg_uid in data, ("Server gave us back some other data: %d %r" % (msg_uid, data)) return self._data[retrieve_key] + +############################################################################## +# v2 code below here: + +class IMAPConnection(object): + + def __init__(self, server_info): + self._client = IMAPClient( + host=server_info.host, + port=server_info.port, + use_uid=False, + ssl=server_info.use_ssl + ) + # self._client.debug = True + self._client.login( + server_info.username, + server_info.password, + ) + + def get_messages(self): + """A generator that yields Message instances, one for every message + in the users inbox. + + """ + # TODO - perahps the user wants to filter a different folder? + mbox_details = self._client.select_folder("INBOX") + total_messages = mbox_details['EXISTS'] + logging.info("Scanning inbox, found %d messages" % total_messages) + # TODO: Research best chunk size - maybe let user tweak this from + # config file?: + i = 0 + with self.use_sequence(): + for chunk in sequence_chunk(total_messages, optimal_chunk_size(1000)): + logging.info("Fetching: " + chunk) + data = self._client.fetch( + chunk, + ['UID', 'BODY.PEEK[HEADER]', 'INTERNALDATE', 'FLAGS'] + ) + for msg_seq in data: + logging.debug("Processing %d / %d", i, total_messages) + proxy = MessageConnectionProxy(self, data[msg_seq]) + yield Message(proxy) + i += 1 + + def get_connection_proxy(self): + return ConnectionProxy(self._client) + + @contextmanager + def use_uid(self): + old = self._client.use_uid + self._client.use_uid = True + try: + yield + finally: + self._client.use_uid = old + + @contextmanager + def use_sequence(self): + old = self._client.use_uid + self._client.use_uid = False + try: + yield + finally: + self._client.use_uid = old + + +class ConnectionProxy(object): + + """A class that proxies an IMAPClient object, but hides access to methods + that filter Actions should not call. + + """ + def __init__(self, wrapped): + self._wrapped = wrapped + + def __getattribute__(self, name): + if name == '_wrapped': + return super().__getattribute__(name) + + allowed = ( + 'add_flags', + 'add_gmail_labels', + 'copy', + 'create_folder', + 'delete_folder', + 'delete_messages', + 'folder_exists', + 'get_flags', + 'get_gmail_labels', + 'list_folders', + 'list_sub_folders', + 'remove_flags', + 'remove_gmail_labels', + 'rename_folder', + 'set_flags', + 'set_gmail_labels', + ) + if name in allowed: + return getattr(self._wrapped, name) + raise AttributeError(name) + + +class ServerInfo(object): + """A class that encapsulates information about how to connect to a server. + + Knows how to read from a config file on disk, create a template config + file. + + """ + + _default_options = { + 'port': '993', + 'use_ssl': 'True', + } + + def __init__(self, host, username, password, port, use_ssl): + if not host: + raise KeyError('host') + if not username: + raise KeyError('username') + if not password: + raise KeyError('password') + self.host = host + self.username = username + self.password = password + self.port = port + self.use_ssl = use_ssl + + @classmethod + def read_config_file(cls, path=None): + """Read credentials from a config file, return a ServerInfo instance. + + This function will log a warning if the credentials file exists and is + group or world readable (your imap credentials should be private!), + but will return a valid ServerInfo instance. + + If the path to the credentals file cannot be found, it will raise an + IOError. + + If the path exists, but cannot be parsed, a RuntimeError will be + raised with the parse failure reason. + + If required keys are missing, KeyError is raised. + + """ + path = path or default_credentials_file_location() + if not os.path.exists(path): + raise IOError("Could not read path {}".format(path)) + if os.stat(path).st_mode & (stat.S_IRWXG | stat.S_IRWXO): + logging.warning( + "The credentials file at '{0}' is readable by other users on " + "this system. To eliminate this security risk, run " + "'chmod go-rwx {0}'.".format(path) + ) + parser = configparser.ConfigParser(defaults=cls._default_options) + try: + parser.read(path) + except configparser.ParsingError as e: + raise RuntimeError( + "Could not parse credentials file '{}'. Error was:\n{}".format( + path, + str(e) + ) + ) + return cls( + host = parser['server']['host'], + username = parser['server']['username'], + password = parser['server']['password'], + port = parser['server']['port'], + use_ssl = parser['server']['use_ssl'] + ) + + @classmethod + def write_template_config_file(cls, path=None): + """Write a template config file to disk.""" + path = path or default_credentials_file_location() + with open(path, 'w') as template_file: + template_file.write(textwrap.dedent(''' + # Credentials config file. + # Comments start with a '#'. See comments below for + # detailed information on each option. + + [server] + + # REQUIRED: The domain name or ip address of the IMAP + # server to connect to: + host = + + # REQUIRED: The username to log in to the IMAP server with + username = + + # REQUIRED: The password to log in to the IMAP server with + password = + + # OPTIONAL: Whether or not to connect with SSL. Default is + # to use SSL. Uncomment this and change it to False to + # connect without SSL. + #use_ssl = True + + # OPTIONAL: The port to connect to on the host. + #port = 993 + + ''') + ) + os.chmod(path, stat.S_IRUSR | stat.S_IWUSR) + + +def default_credentials_file_location(): + return os.path.expanduser('~/.config/gmailfilter/credentials.ini') diff --git a/gmailfilter/_rules.py b/gmailfilter/_rules.py index f5e8462..2d602a4 100644 --- a/gmailfilter/_rules.py +++ b/gmailfilter/_rules.py @@ -108,5 +108,5 @@ def process_message(self, message): for test, *actions in self._ruleset: if test.match(message): for action in actions: - action.process(self._connection._client, str(message.uid())) + action.process(self._connection, message) break diff --git a/gmailfilter/actions.py b/gmailfilter/actions.py index 4aaa426..607a559 100644 --- a/gmailfilter/actions.py +++ b/gmailfilter/actions.py @@ -1,4 +1,5 @@ +import logging """Classes that manipulate mails.""" @@ -10,14 +11,14 @@ class Action(object): """ - def process(self, client_conn, message_uid): + def process(self, client_conn, message): """Run the action. 'client_conn' will be an IMAPClient.IMAPClient object, possibly with access to dangerous methods removed (TODO: Document this interface explicitly). - 'message_uid' will be the message uid, as a string. + 'message' will be a message interface object. If this method raises any exceptions, action processing will stop, and an error will be logged (TODO: Actually do that somewhere). @@ -30,7 +31,7 @@ class Move(Action): def __init__(self, target_folder): self._target_folder = target_folder - def process(self, conn, uid): + def process(self, conn, message): # TODO: optimise this by trying the copy, and if we get 'NO' with # 'TRYCREATE' then, and only then try and create the folder. Removes the # overhead of the existance check for every message, @@ -39,7 +40,7 @@ def process(self, conn, uid): assert status.lower() == "success", "Unable to create folder %s" % self._target_folder - conn.copy(uid, self._target_folder) + conn.copy(message.uid(), self._target_folder) # TODO: Maybe provide logging facilities in parent 'Action' class? - # logging.info("Deleting %s" % uid) - conn.delete_messages(uid) + conn.delete_messages(message.uid()) + logging.info("Moving message %r to %s" % (message, self._target_folder)) diff --git a/gmailfilter/messageutils.py b/gmailfilter/messageutils.py new file mode 100644 index 0000000..373cf34 --- /dev/null +++ b/gmailfilter/messageutils.py @@ -0,0 +1,7 @@ + +from email.utils import parseaddr + + +def get_list_id(message): + list_id = message.get_headers()['List-Id'] + return parseaddr(list_id)[1] diff --git a/gmailfilter/test.py b/gmailfilter/test.py index a5239c0..87d3a49 100644 --- a/gmailfilter/test.py +++ b/gmailfilter/test.py @@ -6,11 +6,17 @@ """ +import operator +import unicodedata + +from gmailfilter.messageutils import get_list_id + __all__ = [ 'Test', 'And', 'Or', + 'MatchesHeader', ] class Test(object): @@ -104,3 +110,65 @@ def match(self, message): else: return True return False + + +class SubjectContains(Test): + + """Check whether a subject contains a certain phrase. + + Can do both case sensitive, and case insensitive matching. + + By default, matches are case sensitive:: + + >>> SubjectContains("Hello World") + + ...will match for the string 'Hello World' exactly anywhere in the + subject. Searches can be made case-insensitive like so:: + + >>> SubjectContains("welcome to", case_sensitive=False) + + Case sensitivity controls both whether we consider character case, and + whether we consider character accents. + + """ + + def __init__(self, search_string, case_sensitive=False): + self._search_string = search_string + self._case_sensitive = case_sensitive + + def match(self, message): + subject = message.get_headers()['Subject'] + if self._case_sensitive: + return subject.contains(self._search_string) + else: + return sobject.casefold().contains(self._search_string.casefold()) + + +class ListId(Test): + + """Match for mailinglist messages from a particular list-id. + + """ + + def __init__(self, target_list): + self._target_list = target_list + + def match(self, message): + + return get_list_id(message) == self._target_list + + +# def caseless_comparison(str1, str2, op): +# """Perform probably-correct caseless comparison between two strings. + +# This is surprisingly complex in a unicode world. We need to deal with +# characters that have different case-forms, as well as character accents. + +# 'op' should be a callable that accepts two arguments, and returns True +# or False. Good candidates are those from the 'operator' module... + +# """ +# return op( +# unicodedata.normalize("NFKD", str1.casefold()), +# unicodedata.normalize("NFKD", str2.casefold()), +# ) diff --git a/gmailfilter/tests/test_connection.py b/gmailfilter/tests/test_connection.py index ade4091..042b16b 100644 --- a/gmailfilter/tests/test_connection.py +++ b/gmailfilter/tests/test_connection.py @@ -31,3 +31,4 @@ def test_with_no_chunking(self): self.assertSequenceChunk(5, 1, ['1', '2', '3', '4', '5']) +