diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2e62ad983..d45d383be 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,19 @@
 # Changelog
 
+## v0.5.9
+Released 05 September 2015
+
+Highlights:
+* Added: Providers Strike, Jackett, custom Torznabs
+* Added: Option to stop post-processing if no good match found (#2343)
+* Fixed: Blackhole -> Magnet, limit to torcache
+* Fixed: Kat 403 flac error
+* Fixed: Last.fm errors
+* Fixed: Pushover notifications
+* Improved: Rutracker logging, switched to requests lib
+
+The full list of commits can be found [here](https://github.com/rembo10/headphones/compare/v0.5.6...v0.5.7).
+
 ## v0.5.8
 Released 13 July 2015
 
diff --git a/data/interfaces/default/config.html b/data/interfaces/default/config.html
index 7478e5ede..5688ce6d3 100644
--- a/data/interfaces/default/config.html
+++ b/data/interfaces/default/config.html
@@ -533,22 +533,6 @@ <h1 class="clearfix"><i class="fa fa-gear"></i> Settings</h1>
                                     </div>
                                 </fieldset>
 
-                                <fieldset>
-                                    <div class="row checkbox left">
-                                        <input id="use_oldpiratebay" type="checkbox" class="bigcheck" name="use_oldpiratebay" value="1" ${config['use_oldpiratebay']} /><label for="use_oldpiratebay"><span class="option">Old Pirate Bay</span></label>
-                                    </div>
-                                    <div class="config">
-                                        <div class="row">
-                                            <label>URL</label>
-                                            <input type="text" name="oldpiratebay_url" value="${config['oldpiratebay_url']}" size="36">
-                                        </div>
-                                        <div class="row">
-                                            <label>Seed Ratio</label>
-                                            <input type="text" class="override-float" name="oldpiratebay_ratio" value="${config['oldpiratebay_ratio']}" size="10" title="Stop seeding when ratio met, 0 = unlimited. Scheduled job will remove torrent when post processed and finished seeding.">
-                                        </div>
-                                    </div>
-                                </fieldset>
-
                                 <fieldset>
                                     <div class="row checkbox left">
                                         <input id="use_kat" type="checkbox" class="bigcheck" name="use_kat" value="1" ${config['use_kat']} /><label for="use_kat"><span class="option">Kick Ass Torrents</span></label>
@@ -626,6 +610,87 @@ <h1 class="clearfix"><i class="fa fa-gear"></i> Settings</h1>
                                     </div>
                                 </fieldset>
 
+                                <fieldset>
+                                    <div class="row checkbox left">
+                                        <input id="use_strike" type="checkbox" class="bigcheck" name="use_strike" value="1" ${config['use_strike']} /><label for="use_strike"><span class="option">Strike Search</span></label>
+                                    </div>
+                                    <div class="config">
+                                        <div class="row">
+                                            <label>Seed Ratio</label>
+                                            <input type="text" class="override-float" name="strike_ratio" value="${config['strike_ratio']}" size="10" title="Stop seeding when ratio met, 0 = unlimited. Scheduled job will remove torrent when post processed and finished seeding.">
+                                        </div>
+                                    </div>
+                                </fieldset>
+
+                                <fieldset>
+                                    <div class="row checkbox left">
+                                        <input id="use_torznab" type="checkbox" class="bigcheck" name="use_torznab" value="1" ${config['use_torznab']} /><label for="use_torznab"><span class="option">Jackett / Torznab Providers</span></label>
+                                    </div>
+                                    <div id="torznab_providers">
+                                        <div class="config" id="torznab1">
+                                            <div class="row">
+                                                <label>Torznab Host</label>
+                                                <input type="text" name="torznab_host" value="${config['torznab_host']}" size="30">
+                                                <small>e.g. http://localhost:9117/torznab/iptorrents</small>
+                                            </div>
+                                            <div class="row">
+                                                <label>Torznab API</label>
+                                                <input type="text" name="torznab_apikey" value="${config['torznab_apikey']}" size="36">
+                                            </div>
+                                            <div class="row checkbox">
+                                                <input id="torznab_enabled" type="checkbox" name="torznab_enabled" value="1" ${config['torznab_enabled']} /><label>Enabled</label>
+                                            </div>
+                                        </div>
+                                        <%
+                                            torznab_number = 2
+                                        %>
+                                        %for torznab in config['extra_torznabs']:
+                                            <%
+                                                if torznab[2] == '1' or torznab[2] == 1:
+                                                    torznab_enabled = "checked"
+                                                else:
+                                                    torznab_enabled = ""
+                                            %>
+                                            <div class="config" id="torznab${torznab_number}">
+                                                <div class="row">
+                                                    <label>Torznab Host</label>
+                                                    <input type="text" name="torznab_host${torznab_number}" value="${torznab[0]}" size="30">
+                                                </div>
+                                                <div class="row">
+                                                    <label>Torznab API</label>
+                                                    <input type="text" name="torznab_api${torznab_number}" value="${torznab[1]}" size="36">
+                                                </div>
+                                                <div class="row checkbox">
+                                                    <input id="torznab_enabled" type="checkbox" name="torznab_enabled${torznab_number}" value="1" ${torznab_enabled} /><label>Enabled</label>
+                                                </div>
+                                                <div class="row">
+                                                    <input type="button" class="remove_torznab" id="torznab${torznab_number}" value="Remove ${torznab[0]}">
+                                                </div>
+                                            </div>
+                                            <%
+                                                torznab_number += 1
+                                            %>
+                                        %endfor
+                                        <input type="button" value="Add Torznab" class="add_torznab" id="add_torznab" />
+                                    </div>
+                                </fieldset>
+
+                                <fieldset>
+                                    <div class="row checkbox left">
+                                        <input id="use_oldpiratebay" type="checkbox" class="bigcheck" name="use_oldpiratebay" value="1" ${config['use_oldpiratebay']} /><label for="use_oldpiratebay"><span class="option">Old Pirate Bay</span></label>
+                                    </div>
+                                    <div class="config">
+                                        <div class="row">
+                                            <label>URL</label>
+                                            <input type="text" name="oldpiratebay_url" value="${config['oldpiratebay_url']}" size="36">
+                                        </div>
+                                        <div class="row">
+                                            <label>Seed Ratio</label>
+                                            <input type="text" class="override-float" name="oldpiratebay_ratio" value="${config['oldpiratebay_ratio']}" size="10" title="Stop seeding when ratio met, 0 = unlimited. Scheduled job will remove torrent when post processed and finished seeding.">
+                                        </div>
+                                    </div>
+                                </fieldset>
+
                                 <fieldset>
                                      <div class="row checkbox left">
                                         <input id="use_mininova" type="checkbox" class="bigcheck" name="use_mininova" value="1" ${config['use_mininova']} /><label for="use_mininova"><span class="option">Mininova</span></label>
@@ -683,7 +748,7 @@ <h1 class="clearfix"><i class="fa fa-gear"></i> Settings</h1>
                                 </div>
                                 <div id="preferred_bitrate_options" class="suboptions">
                                     <div class="row nopad">
-                                        Target bitrate: 
+                                        Target bitrate:
                                         <input type="text" class="override-float" name="preferred_bitrate" value="${config['preferred_bitrate']}" size="3">kbps<br>
                                     </div>
                                     <div class="row nopad">
@@ -1393,6 +1458,10 @@ <h1 class="clearfix"><i class="fa fa-gear"></i> Settings</h1>
                                         <input type="checkbox" name="freeze_db" id="freeze_db" value="1" ${config['freeze_db']} />
                                     </label>
                                 </div>
+                                <div class="row checkbox left clearfix nopad">
+                                    <label><input type="checkbox" title="Stop post-processing if no good metadata match found"
+                                           name="do_not_process_unmatched" value="1" ${config['do_not_process_unmatched']} />Stop post-processing if no good metadata match found</label>
+                                </div>
                                 <div class="row checkbox left clearfix">
                                     <label title="Use ID3v2.3 instead of ID3v2.4">
                                         Tag using ID3v2.3 instead of ID3v2.4
@@ -2157,6 +2226,13 @@ <h1 class="clearfix"><i class="fa fa-gear"></i> Settings</h1>
                     deletedNewznabs = deletedNewznabs + 1;
                 });
 
+                var deletedTorznabs = 0;
+
+                $(".remove_torznab").click(function() {
+                    $(this).parent().parent().remove();
+                    deletedTorznabs = deletedTorznabs + 1;
+                });
+
                 $("#modify_extras").click(openExtrasDialog);
 
                 $("#include_extras").click(function(){
@@ -2190,6 +2266,20 @@ <h1 class="clearfix"><i class="fa fa-gear"></i> Settings</h1>
                     $("#add_newznab").before(formfields);
                 });
 
+                $("#add_torznab").click(function() {
+                    var intId = $("#torznab_providers > div").size() + deletedTorznabs + 1;
+                    var formfields = $("<div class=\"config\" id=\"torznab" + intId + "\"><div class=\"row\"><label>Torznab Host</label><input type=\"text\" name=\"torznab_host" + intId + "\" size=\"30\"></div><div class=\"row\"><label>Torznab API</label><input type=\"text\" name=\"torznab_api" + intId + "\" size=\"36\"></div><div class=\"row checkbox\"><input type=\"checkbox\" name=\"torznab_enabled" + intId + "\" value=\"1\" checked /><label>Enabled</label></div>");
+                    var removeTorznabButton = $("<div class=\"row\"><input type=\"button\" class=\"remove_torznab\" value=\"Remove\" /></div>");
+                    removeTorznabButton.click(function() {
+                        $(this).parent().remove();
+                        deletedTorznabs = deletedTorznabs + 1;
+
+                    });
+                    formfields.append(removeTorznabButton);
+                    formfields.append("</div>");
+                    $("#add_torznab").before(formfields);
+                });
+
                 $(".hpuser").keyup(function() {
                     $(".hpuser").val($(this).val());
                 });
@@ -2206,6 +2296,7 @@ <h1 class="clearfix"><i class="fa fa-gear"></i> Settings</h1>
                 initConfigCheckbox("#use_newznab");
                 initConfigCheckbox("#use_nzbsorg");
                 initConfigCheckbox("#use_omgwtfnzbs");
+                initConfigCheckbox("#use_torznab");
                 initConfigCheckbox("#use_kat");
                 initConfigCheckbox("#use_piratebay");
                 initConfigCheckbox("#use_oldpiratebay");
@@ -2213,6 +2304,7 @@ <h1 class="clearfix"><i class="fa fa-gear"></i> Settings</h1>
                 initConfigCheckbox("#use_waffles");
                 initConfigCheckbox("#use_rutracker");
                 initConfigCheckbox("#use_whatcd");
+                initConfigCheckbox("#use_strike");
                 initConfigCheckbox("#api_enabled");
                 initConfigCheckbox("#enable_https");
                 initConfigCheckbox("#customauth");
diff --git a/headphones/config.py b/headphones/config.py
index 97ad7c62a..e2a122e20 100644
--- a/headphones/config.py
+++ b/headphones/config.py
@@ -53,6 +53,7 @@ def bool_int(value):
     'DELETE_LOSSLESS_FILES': (int, 'General', 1),
     'DESTINATION_DIR': (str, 'General', ''),
     'DETECT_BITRATE': (int, 'General', 0),
+    'DO_NOT_PROCESS_UNMATCHED': (int, 'General', 0),
     'DOWNLOAD_DIR': (str, 'General', ''),
     'DOWNLOAD_SCAN_INTERVAL': (int, 'General', 5),
     'DOWNLOAD_TORRENT_DIR': (str, 'General', ''),
@@ -81,6 +82,7 @@ def bool_int(value):
     'ENCODER_PATH': (str, 'General', ''),
     'EXTRAS': (str, 'General', ''),
     'EXTRA_NEWZNABS': (list, 'Newznab', ''),
+    'EXTRA_TORZNABS': (list, 'Torznab', ''),
     'FILE_FORMAT': (str, 'General', 'Track Artist - Album [Year] - Title'),
     'FILE_PERMISSIONS': (str, 'General', '0644'),
     'FILE_UNDERSCORES': (int, 'General', 0),
@@ -219,6 +221,8 @@ def bool_int(value):
     'SONGKICK_ENABLED': (int, 'Songkick', 1),
     'SONGKICK_FILTER_ENABLED': (int, 'Songkick', 0),
     'SONGKICK_LOCATION': (str, 'Songkick', ''),
+    'STRIKE': (int, 'Strike', 0),
+    'STRIKE_RATIO': (str, 'Strike', ''),
     'SUBSONIC_ENABLED': (int, 'Subsonic', 0),
     'SUBSONIC_HOST': (str, 'Subsonic', ''),
     'SUBSONIC_PASSWORD': (str, 'Subsonic', ''),
@@ -227,6 +231,10 @@ def bool_int(value):
     'TORRENTBLACKHOLE_DIR': (str, 'General', ''),
     'TORRENT_DOWNLOADER': (int, 'General', 0),
     'TORRENT_REMOVAL_INTERVAL': (int, 'General', 720),
+    'TORZNAB': (int, 'Torznab', 0),
+    'TORZNAB_APIKEY': (str, 'Torznab', ''),
+    'TORZNAB_ENABLED': (int, 'Torznab', 1),
+    'TORZNAB_HOST': (str, 'Torznab', ''),
     'TRANSMISSION_HOST': (str, 'Transmission', ''),
     'TRANSMISSION_PASSWORD': (str, 'Transmission', ''),
     'TRANSMISSION_USERNAME': (str, 'Transmission', ''),
@@ -351,6 +359,25 @@ def add_extra_newznab(self, newznab):
             extra_newznabs.append(item)
         self.EXTRA_NEWZNABS = extra_newznabs
 
+    def get_extra_torznabs(self):
+        """ Return the extra torznab tuples """
+        extra_torznabs = list(
+            itertools.izip(*[itertools.islice(self.EXTRA_TORZNABS, i, None, 3)
+            for i in range(3)])
+        )
+        return extra_torznabs
+
+    def clear_extra_torznabs(self):
+        """ Forget about the configured extra torznabs """
+        self.EXTRA_TORZNABS = []
+
+    def add_extra_torznab(self, torznab):
+        """ Add a new extra torznab """
+        extra_torznabs = self.EXTRA_TORZNABS
+        for item in torznab:
+            extra_torznabs.append(item)
+        self.EXTRA_TORZNABS = extra_torznabs
+
     def __getattr__(self, name):
         """
         Returns something from the ini unless it is a real property
diff --git a/headphones/lastfm.py b/headphones/lastfm.py
index 85dcc8174..720c8dd92 100644
--- a/headphones/lastfm.py
+++ b/headphones/lastfm.py
@@ -79,7 +79,7 @@ def getSimilar():
                 try:
                     artist_mbid = artist["mbid"]
                     artist_name = artist["name"]
-                except TypeError:
+                except KeyError:
                     continue
 
                 if not any(artist_mbid in x for x in results):
@@ -116,7 +116,7 @@ def getArtists():
         return
 
     logger.info("Fetching artists from Last.FM for username: %s", headphones.CONFIG.LASTFM_USERNAME)
-    data = request_lastfm("library.getartists", limit=10000, user=headphones.CONFIG.LASTFM_USERNAME)
+    data = request_lastfm("library.getartists", limit=1000, user=headphones.CONFIG.LASTFM_USERNAME)
 
     if data and "artists" in data:
         artistlist = []
diff --git a/headphones/librarysync.py b/headphones/librarysync.py
index 709c7e640..17381d80e 100644
--- a/headphones/librarysync.py
+++ b/headphones/librarysync.py
@@ -53,7 +53,7 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None,
         tracks = myDB.select('SELECT Location from alltracks WHERE Location IS NOT NULL UNION SELECT Location from tracks WHERE Location IS NOT NULL')
 
         for track in tracks:
-            encoded_track_string = track['Location'].encode(headphones.SYS_ENCODING)
+            encoded_track_string = track['Location'].encode(headphones.SYS_ENCODING, 'replace')
             if not os.path.isfile(encoded_track_string):
                 myDB.action('UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, track['Location']])
                 myDB.action('UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE Location=?', [None, None, None, track['Location']])
diff --git a/headphones/mb.py b/headphones/mb.py
index d83b45729..0a477aa0f 100644
--- a/headphones/mb.py
+++ b/headphones/mb.py
@@ -51,7 +51,7 @@ def startmb():
         mbpass = headphones.CONFIG.CUSTOMPASS
         sleepytime = int(headphones.CONFIG.CUSTOMSLEEP)
     elif headphones.CONFIG.MIRROR == "headphones":
-        mbhost = "144.76.94.239"
+        mbhost = "codeshy.com"
         mbport = 8181
         mbuser = headphones.CONFIG.HPUSER
         mbpass = headphones.CONFIG.HPPASS
@@ -362,7 +362,7 @@ def getSeries(seriesid):
 
     if not series:
         return False
-    
+
     if 'disambiguation' in series:
         series_dict['artist_name'] = unicode(series['name'] + " (" + unicode(series['disambiguation']) + ")")
     else:
diff --git a/headphones/notifiers.py b/headphones/notifiers.py
index 545350f71..64fded1cc 100644
--- a/headphones/notifiers.py
+++ b/headphones/notifiers.py
@@ -443,7 +443,7 @@ def __init__(self):
         self.apikey = headphones.CONFIG.PUSHBULLET_APIKEY
         self.deviceid = headphones.CONFIG.PUSHBULLET_DEVICEID
 
-    def notify(self, message):
+    def notify(self, message, status):
         if not headphones.CONFIG.PUSHBULLET_ENABLED:
             return
 
@@ -451,7 +451,7 @@ def notify(self, message):
 
         data = {'type': "note",
                 'title': "Headphones",
-                'body': message}
+                'body': message + ': ' + status}
 
         if self.deviceid:
             data['device_iden'] = self.deviceid
diff --git a/headphones/postprocessor.py b/headphones/postprocessor.py
index 319e09c9d..7235a9541 100755
--- a/headphones/postprocessor.py
+++ b/headphones/postprocessor.py
@@ -372,7 +372,9 @@ def doPostProcessing(albumid, albumpath, release, tracks, downloaded_track_list,
         addAlbumArt(artwork, albumpath, release)
 
     if headphones.CONFIG.CORRECT_METADATA:
-        correctMetadata(albumid, release, downloaded_track_list)
+        correctedMetadata = correctMetadata(albumid, release, downloaded_track_list)
+        if not correctedMetadata and headphones.CONFIG.DO_NOT_PROCESS_UNMATCHED:
+            return
 
     if headphones.CONFIG.EMBED_LYRICS:
         embedLyrics(downloaded_track_list)
@@ -475,7 +477,7 @@ def doPostProcessing(albumid, albumpath, release, tracks, downloaded_track_list,
     if headphones.CONFIG.PUSHBULLET_ENABLED:
         logger.info(u"PushBullet request")
         pushbullet = notifiers.PUSHBULLET()
-        pushbullet.notify(pushmessage, "Download and Postprocessing completed")
+        pushbullet.notify(pushmessage, statusmessage)
 
     if headphones.CONFIG.TWITTER_ENABLED:
         logger.info(u"Sending Twitter notification")
@@ -595,7 +597,6 @@ def renameNFO(albumpath):
                 except Exception as e:
                     logger.error(u'Could not rename file: %s. Error: %s' % (os.path.join(r, file).decode(headphones.SYS_ENCODING, 'replace'), e))
 
-
 def moveFiles(albumpath, release, tracks):
     logger.info("Moving files: %s" % albumpath)
     try:
@@ -858,23 +859,23 @@ def correctMetadata(albumid, release, downloaded_track_list):
             cur_artist, cur_album, candidates, rec = autotag.tag_album(items, search_artist=helpers.latinToAscii(release['ArtistName']), search_album=helpers.latinToAscii(release['AlbumTitle']))
         except Exception as e:
             logger.error('Error getting recommendation: %s. Not writing metadata', e)
-            return
+            return False
         if str(rec) == 'Recommendation.none':
             logger.warn('No accurate album match found for %s, %s -  not writing metadata', release['ArtistName'], release['AlbumTitle'])
-            return
+            return False
 
         if candidates:
             dist, info, mapping, extra_items, extra_tracks = candidates[0]
         else:
             logger.warn('No accurate album match found for %s, %s -  not writing metadata', release['ArtistName'], release['AlbumTitle'])
-            return
+            return False
 
         logger.info('Beets recommendation for tagging items: %s' % rec)
 
         # TODO: Handle extra_items & extra_tracks
 
         autotag.apply_metadata(info, mapping)
-        
+
         # Set ID3 tag version
         if headphones.CONFIG.IDTAG:
             beetsconfig['id3v23'] = True
@@ -889,7 +890,9 @@ def correctMetadata(albumid, release, downloaded_track_list):
                 logger.info("Successfully applied metadata to: %s", item.path.decode(headphones.SYS_ENCODING, 'replace'))
             except Exception as e:
                 logger.warn("Error writing metadata to '%s': %s", item.path.decode(headphones.SYS_ENCODING, 'replace'), str(e))
+                return False
 
+        return True
 
 def embedLyrics(downloaded_track_list):
     logger.info('Adding lyrics')
@@ -1063,7 +1066,7 @@ def renameUnprocessedFolder(path, tag):
 def forcePostProcess(dir=None, expand_subfolders=True, album_dir=None, keep_original_folder=False):
 
     logger.info('Force checking download folder for completed downloads')
-	
+
     ignored = 0
 
     if album_dir:
diff --git a/headphones/rutracker.py b/headphones/rutracker.py
new file mode 100644
index 000000000..0b7959b9a
--- /dev/null
+++ b/headphones/rutracker.py
@@ -0,0 +1,224 @@
+#!/usr/bin/env python
+
+import urllib
+import requests as requests
+from urlparse import urlparse
+from bs4 import BeautifulSoup
+
+import os
+import time
+import re
+
+import headphones
+from headphones import logger
+
+class Rutracker(object):
+
+    def __init__(self):
+        self.session = requests.session()
+        self.timeout = 60
+        self.loggedin = False
+        self.maxsize = 0
+        self.search_referer = 'http://rutracker.org/forum/tracker.php'
+
+    def logged_in(self):
+        return self.loggedin
+
+    def still_logged_in(self, html):
+        if not html or "action=\"http://login.rutracker.org/forum/login.php\">" in html:
+            return False
+        else:
+            return True
+
+    def login(self):
+        """
+        Logs in user
+        """
+
+        loginpage = 'http://login.rutracker.org/forum/login.php'
+        post_params = {
+            'login_username': headphones.CONFIG.RUTRACKER_USER,
+            'login_password': headphones.CONFIG.RUTRACKER_PASSWORD,
+            'login': b'\xc2\xf5\xee\xe4'  # '%C2%F5%EE%E4'
+        }
+
+        logger.info("Attempting to log in to rutracker...")
+
+        try:
+            r = self.session.post(loginpage, data=post_params, timeout=self.timeout)
+            # try again
+            if 'bb_data' not in r.cookies.keys():
+                time.sleep(10)
+                r = self.session.post(loginpage, data=post_params, timeout=self.timeout)
+            if r.status_code != 200:
+                logger.error("rutracker login returned status code %s" % r.status_code)
+                self.loggedin = False
+            else:
+                if 'bb_data' in r.cookies.keys():
+                    self.loggedin = True
+                    logger.info("Successfully logged in to rutracker")
+                else:
+                    logger.error("Could not login to rutracker, credentials maybe incorrect, site is down or too many attempts. Try again later")
+                    self.loggedin = False
+            return self.loggedin
+        except Exception as e:
+            logger.error("Unknown error logging in to rutracker: %s" % e)
+            self.loggedin = False
+            return self.loggedin
+
+    def searchurl(self, artist, album, year, format):
+        """
+        Return the search url
+        """
+
+        # Build search url
+        searchterm = ''
+        if artist != 'Various Artists':
+            searchterm = artist
+            searchterm = searchterm + ' '
+        searchterm = searchterm + album
+        searchterm = searchterm + ' '
+        searchterm = searchterm + year
+
+        if format == 'lossless':
+            format = '+lossless'
+            self.maxsize = 10000000000
+        elif format == 'lossless+mp3':
+            format = '+lossless||mp3||aac'
+            self.maxsize = 10000000000
+        else:
+            format = '+mp3||aac'
+            self.maxsize = 300000000
+
+        # sort by size, descending.
+        sort = '&o=7&s=2'
+
+        searchurl = "%s?nm=%s%s%s" % (self.search_referer, urllib.quote(searchterm), format, sort)
+
+        logger.info("Searching rutracker using term: %s", searchterm)
+
+        return searchurl
+
+    def search(self, searchurl):
+        """
+        Parse the search results and return valid torrent list
+        """
+
+        try:
+            headers = {'Referer': self.search_referer}
+            r = self.session.get(url=searchurl, headers=headers, timeout=self.timeout)
+
+            soup = BeautifulSoup(r.content, 'html5lib')
+
+            # Debug
+            #logger.debug (soup.prettify())
+
+            # Check if still logged in
+            if not self.still_logged_in(soup):
+                self.login()
+                r = self.session.get(url=searchurl, timeout=self.timeout)
+                soup = BeautifulSoup(r.content, 'html5lib')
+                if not self.still_logged_in(soup):
+                    logger.error("Error getting rutracker data")
+                    return None
+
+            # Process
+            rulist = []
+            i = soup.find('table', id='tor-tbl')
+            if not i:
+                logger.info("No valid results found from rutracker")
+                return None
+            minimumseeders = int(headphones.CONFIG.NUMBEROFSEEDERS) - 1
+
+            for item in zip(i.find_all(class_='hl-tags'),i.find_all(class_='dl-stub'),i.find_all(class_='seedmed')):
+                title = item[0].get_text()
+                url = item[1].get('href')
+                size_formatted = item[1].get_text()[:-2]
+                seeds = item[2].get_text()
+                size_parts = size_formatted.split()
+                size = float(size_parts[0])
+
+                if size_parts[1] == 'KB':
+                    size *= 1024
+                if size_parts[1] == 'MB':
+                    size *= 1024 ** 2
+                if size_parts[1] == 'GB':
+                    size *= 1024 ** 3
+                if size_parts[1] == 'TB':
+                    size *= 1024 ** 4
+
+                if size < self.maxsize and minimumseeders < int(seeds):
+                    logger.info('Found %s. Size: %s' % (title, size_formatted))
+                    #Torrent topic page
+                    torrent_id = dict([part.split('=') for part in urlparse(url)[4].split('&')])['t']
+                    topicurl = 'http://rutracker.org/forum/viewtopic.php?t=' + torrent_id
+                    rulist.append((title, size, topicurl, 'rutracker.org', 'torrent', True))
+                else:
+                    logger.info("%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)" % (title, size, int(seeds)))
+
+            if not rulist:
+                logger.info("No valid results found from rutracker")
+
+            return rulist
+
+        except Exception as e:
+            logger.error("An unknown error occurred in the rutracker parser: %s" % e)
+            return None
+
+
+    def get_torrent_data(self, url):
+        """
+        return the .torrent data
+        """
+
+        torrent_id = dict([part.split('=') for part in urlparse(url)[4].split('&')])['t']
+        downloadurl = 'http://dl.rutracker.org/forum/dl.php?t=' + torrent_id
+        cookie = {'bb_dl': torrent_id}
+        try:
+            headers = {'Referer': url}
+            r = self.session.get(url=downloadurl, cookies=cookie, headers=headers, timeout=self.timeout)
+            return r.content
+        except Exception as e:
+            logger.error('Error getting torrent: %s', e)
+            return False
+
+
+    #TODO get this working in utorrent.py
+    def utorrent_add_file(self, data):
+
+        host = headphones.CONFIG.UTORRENT_HOST
+        if not host.startswith('http'):
+            host = 'http://' + host
+        if host.endswith('/'):
+            host = host[:-1]
+        if host.endswith('/gui'):
+            host = host[:-4]
+
+        base_url = host
+
+        url = base_url + '/gui/'
+        self.session.auth = (headphones.CONFIG.UTORRENT_USERNAME, headphones.CONFIG.UTORRENT_PASSWORD)
+
+        try:
+            r = self.session.get(url + 'token.html')
+        except Exception as e:
+            logger.error('Error getting token: %s', e)
+            return
+
+        if r.status_code == 401:
+            logger.debug('Error reaching utorrent')
+            return
+
+        regex = re.search(r'.+>([^<]+)</div></html>', r.text)
+        if regex is None:
+            logger.debug('Error reading token')
+            return
+
+        self.session.params = {'token': regex.group(1)}
+        files = {'torrent_file': ("", data)}
+
+        try:
+            self.session.post(url, params={'action': 'add-file'}, files=files)
+        except Exception as e:
+            logger.exception('Error adding file to utorrent %s', e)
+
diff --git a/headphones/searcher.py b/headphones/searcher.py
index 6af321315..7dbcf5ba5 100644
--- a/headphones/searcher.py
+++ b/headphones/searcher.py
@@ -36,24 +36,20 @@
 
 from headphones.common import USER_AGENT
 from headphones import logger, db, helpers, classes, sab, nzbget, request
-from headphones import utorrent, transmission, notifiers
+from headphones import utorrent, transmission, notifiers, rutracker
 
 from bencode import bencode, bdecode
 
-import headphones.searcher_rutracker as rutrackersearch
-
 # Magnet to torrent services, for Black hole. Stolen from CouchPotato.
 TORRENT_TO_MAGNET_SERVICES = [
-    'https://zoink.it/torrent/%s.torrent',
-    'http://torrage.com/torrent/%s.torrent',
+    #'https://zoink.it/torrent/%s.torrent',
+    #'http://torrage.com/torrent/%s.torrent',
     'https://torcache.net/torrent/%s.torrent',
 ]
 
 # Persistent What.cd API object
 gazelle = None
-
-# RUtracker search object
-rutracker = rutrackersearch.Rutracker()
+ruobj = None
 
 
 def fix_url(s, charset="utf-8"):
@@ -168,6 +164,8 @@ def get_seed_ratio(provider):
         seed_ratio = headphones.CONFIG.WAFFLES_RATIO
     elif provider == 'Mininova':
         seed_ratio = headphones.CONFIG.MININOVA_RATIO
+    elif provider == 'Strike':
+    	seed_ratio = headphones.CONFIG.STRIKE_RATIO
     else:
         seed_ratio = None
 
@@ -232,7 +230,7 @@ def do_sorted_search(album, new, losslessOnly, choose_specific_download=False):
 
     NZB_PROVIDERS = (headphones.CONFIG.HEADPHONES_INDEXER or headphones.CONFIG.NEWZNAB or headphones.CONFIG.NZBSORG or headphones.CONFIG.OMGWTFNZBS)
     NZB_DOWNLOADERS = (headphones.CONFIG.SAB_HOST or headphones.CONFIG.BLACKHOLE_DIR or headphones.CONFIG.NZBGET_HOST)
-    TORRENT_PROVIDERS = (headphones.CONFIG.KAT or headphones.CONFIG.PIRATEBAY or headphones.CONFIG.OLDPIRATEBAY or headphones.CONFIG.MININOVA or headphones.CONFIG.WAFFLES or headphones.CONFIG.RUTRACKER or headphones.CONFIG.WHATCD)
+    TORRENT_PROVIDERS = (headphones.CONFIG.TORZNAB or headphones.CONFIG.KAT or headphones.CONFIG.PIRATEBAY or headphones.CONFIG.OLDPIRATEBAY or headphones.CONFIG.MININOVA or headphones.CONFIG.WAFFLES or headphones.CONFIG.RUTRACKER or headphones.CONFIG.WHATCD or headphones.CONFIG.STRIKE)
 
     results = []
     myDB = db.DBConnection()
@@ -793,10 +791,11 @@ def send_to_downloader(data, bestqual, album):
                     # Randomize list of services
                     services = TORRENT_TO_MAGNET_SERVICES[:]
                     random.shuffle(services)
+                    headers = {'User-Agent': USER_AGENT}
 
                     for service in services:
-                        data = request.request_content(service % torrent_hash)
 
+                        data = request.request_content(service % torrent_hash, headers=headers)
                         if data and "torcache" in data:
                             if not torrent_to_file(download_path, data):
                                 return
@@ -818,15 +817,9 @@ def send_to_downloader(data, bestqual, album):
                         "to open or convert magnet links")
                     return
             else:
-                if bestqual[3] == "rutracker.org":
-                    download_path, _ = rutracker.get_torrent(bestqual[2],
-                        headphones.CONFIG.TORRENTBLACKHOLE_DIR)
 
-                    if not download_path:
-                        return
-                else:
-                    if not torrent_to_file(download_path, data):
-                        return
+                if not torrent_to_file(download_path, data):
+                    return
 
                 # Extract folder name from torrent
                 folder_name = read_torrent_name(download_path, bestqual[0])
@@ -836,13 +829,11 @@ def send_to_downloader(data, bestqual, album):
         elif headphones.CONFIG.TORRENT_DOWNLOADER == 1:
             logger.info("Sending torrent to Transmission")
 
-            # rutracker needs cookies to be set, pass the .torrent file instead of url
+            # Add torrent
             if bestqual[3] == 'rutracker.org':
-                file_or_url, torrentid = rutracker.get_torrent(bestqual[2])
+                torrentid = transmission.addTorrent('', data)
             else:
-                file_or_url = bestqual[2]
-
-            torrentid = transmission.addTorrent(file_or_url)
+                torrentid = transmission.addTorrent(bestqual[2])
 
             if not torrentid:
                 logger.error("Error sending torrent to Transmission. Are you sure it's running?")
@@ -855,13 +846,6 @@ def send_to_downloader(data, bestqual, album):
                 logger.error('Torrent folder name could not be determined')
                 return
 
-            # remove temp .torrent file created above
-            if bestqual[3] == 'rutracker.org':
-                try:
-                    shutil.rmtree(os.path.split(file_or_url)[0])
-                except Exception as e:
-                    logger.exception("Unhandled exception")
-
             # Set Seed Ratio
             seed_ratio = get_seed_ratio(bestqual[3])
             if seed_ratio is not None:
@@ -870,29 +854,29 @@ def send_to_downloader(data, bestqual, album):
         else:# if headphones.CONFIG.TORRENT_DOWNLOADER == 2:
             logger.info("Sending torrent to uTorrent")
 
-            # rutracker needs cookies to be set, pass the .torrent file instead of url
+            # Add torrent
             if bestqual[3] == 'rutracker.org':
-                file_or_url, torrentid = rutracker.get_torrent(bestqual[2])
-                folder_name, cacheid = utorrent.dirTorrent(torrentid)
-                folder_name = os.path.basename(os.path.normpath(folder_name))
-                utorrent.labelTorrent(torrentid)
+                ruobj.utorrent_add_file(data)
             else:
-                file_or_url = bestqual[2]
-                torrentid = calculate_torrent_hash(file_or_url, data)
-                folder_name = utorrent.addTorrent(file_or_url, torrentid)
+                utorrent.addTorrent(bestqual[2])
 
+            # Get hash
+            torrentid = calculate_torrent_hash(bestqual[2], data)
+            if not torrentid:
+                logger.error('Torrent id could not be determined')
+                return
+
+            # Get folder
+            folder_name = utorrent.getFolder(torrentid)
             if folder_name:
                 logger.info('Torrent folder name: %s' % folder_name)
             else:
                 logger.error('Torrent folder name could not be determined')
                 return
 
-            # remove temp .torrent file created above
-            if bestqual[3] == 'rutracker.org':
-                try:
-                    shutil.rmtree(os.path.split(file_or_url)[0])
-                except Exception as e:
-                    logger.exception("Unhandled exception")
+            # Set Label
+            if headphones.CONFIG.UTORRENT_LABEL:
+                utorrent.labelTorrent(torrentid)
 
             # Set Seed Ratio
             seed_ratio = get_seed_ratio(bestqual[3])
@@ -932,7 +916,7 @@ def send_to_downloader(data, bestqual, album):
     if headphones.CONFIG.PUSHBULLET_ENABLED and headphones.CONFIG.PUSHBULLET_ONSNATCH:
         logger.info(u"Sending PushBullet notification")
         pushbullet = notifiers.PUSHBULLET()
-        pushbullet.notify(name + " has been snatched!", "Download started")
+        pushbullet.notify(name, "Download started")
     if headphones.CONFIG.TWITTER_ENABLED and headphones.CONFIG.TWITTER_ONSNATCH:
         logger.info(u"Sending Twitter notification")
         twitter = notifiers.TwitterNotifier()
@@ -1041,12 +1025,7 @@ def verifyresult(title, artistterm, term, lossless):
 
 def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, choose_specific_download=False):
     global gazelle  # persistent what.cd api object to reduce number of login attempts
-
-    # rutracker login
-    if headphones.CONFIG.RUTRACKER and album:
-        rulogin = rutracker.login(headphones.CONFIG.RUTRACKER_USER, headphones.CONFIG.RUTRACKER_PASSWORD)
-        if not rulogin:
-            logger.info(u'Could not login to rutracker, search results will exclude this provider')
+    global ruobj    # and rutracker
 
     albumid = album['AlbumID']
     reldate = album['ReleaseDate']
@@ -1110,6 +1089,68 @@ def set_proxy(proxy_url):
 
         return proxy_url
 
+    if headphones.CONFIG.TORZNAB:
+        provider = "torznab"
+        torznab_hosts = []
+
+        if headphones.CONFIG.TORZNAB_HOST and headphones.CONFIG.TORZNAB_ENABLED:
+            torznab_hosts.append((headphones.CONFIG.TORZNAB_HOST, headphones.CONFIG.TORZNAB_APIKEY, headphones.CONFIG.TORZNAB_ENABLED))
+
+        for torznab_host in headphones.CONFIG.get_extra_torznabs():
+            if torznab_host[2] == '1' or torznab_host[2] == 1:
+                torznab_hosts.append(torznab_host)
+
+        if headphones.CONFIG.PREFERRED_QUALITY == 3 or losslessOnly:
+            categories = "3040"
+        elif headphones.CONFIG.PREFERRED_QUALITY == 1 or allow_lossless:
+            categories = "3040,3010"
+        else:
+            categories = "3010"
+
+        if album['Type'] == 'Other':
+            categories = "3030"
+            logger.info("Album type is audiobook/spokenword. Using audiobook category")
+
+        for torznab_host in torznab_hosts:
+
+            provider = torznab_host[0]
+
+            # Request results
+            logger.info('Parsing results from %s using search term: %s' % (torznab_host[0],term))
+
+            headers = {'User-Agent': USER_AGENT}
+            params = {
+                "t": "search",
+                "apikey": torznab_host[1],
+                "cat": categories,
+                "maxage": headphones.CONFIG.USENET_RETENTION,
+                "q": term
+            }
+
+            data = request.request_feed(
+                url=torznab_host[0] + '/api?',
+                params=params, headers=headers
+            )
+
+            # Process feed
+            if data:
+                if not len(data.entries):
+                    logger.info(u"No results found from %s for %s", torznab_host[0], term)
+                else:
+                    for item in data.entries:
+                        try:
+                            url = item.link
+                            title = item.title
+                            size = int(item.links[1]['length'])
+                            if all(word.lower() in title.lower() for word in term.split()):
+                                logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
+                                resultlist.append((title, size, url, provider, 'torrent', True))
+                            else:
+                                logger.info('Skipping %s, not all search term words found' % title)
+
+                        except Exception as e:
+                            logger.exception("An unknown error occurred trying to parse the feed: %s" % e)
+
     if headphones.CONFIG.KAT:
         provider = "Kick Ass Torrents"
         ka_term = term.replace("!", "")
@@ -1142,7 +1183,8 @@ def set_proxy(proxy_url):
             "field": "seeders",
             "sorder": "desc"
         }
-        data = request.request_json(url=providerurl, params=params)
+        headers = {'User-Agent': USER_AGENT}
+        data = request.request_json(url=providerurl, params=params, headers=headers)
 
         # Process feed
         if data:
@@ -1158,7 +1200,7 @@ def set_proxy(proxy_url):
                         size = int(item['size'])
 
                         if format == "2":
-                            torrent = request.request_content(url)
+                            torrent = request.request_content(url, headers=headers)
                             if not torrent or (int(torrent.find(".mp3")) > 0 and int(torrent.find(".flac")) < 1):
                                 rightformat = False
 
@@ -1239,45 +1281,38 @@ def set_proxy(proxy_url):
                         logger.error(u"An error occurred while trying to parse the response from Waffles.fm: %s", e)
 
     # rutracker.org
-    if headphones.CONFIG.RUTRACKER and rulogin:
+    if headphones.CONFIG.RUTRACKER:
         provider = "rutracker.org"
 
         # Ignore if release date not specified, results too unpredictable
         if not year and not usersearchterm:
-            logger.info(u'Release date not specified, ignoring for rutracker.org')
+            logger.info(u"Release date not specified, ignoring for rutracker.org")
         else:
-
             if headphones.CONFIG.PREFERRED_QUALITY == 3 or losslessOnly:
                 format = 'lossless'
-                maxsize = 10000000000
             elif headphones.CONFIG.PREFERRED_QUALITY == 1 or allow_lossless:
                 format = 'lossless+mp3'
-                maxsize = 10000000000
             else:
                 format = 'mp3'
-                maxsize = 300000000
 
-            # build search url based on above
-            if not usersearchterm:
-                searchURL = rutracker.searchurl(artistterm, albumterm, year, format)
-            else:
-                searchURL = rutracker.searchurl(usersearchterm, ' ', ' ', format)
+            # Login
+            if not ruobj or not ruobj.logged_in():
+                ruobj = rutracker.Rutracker()
+                if not ruobj.login():
+                    ruobj = None
 
-            logger.info(u'Parsing results from <a href="%s">rutracker.org</a>' % searchURL)
+            if ruobj and ruobj.logged_in():
 
-            # parse results and get best match
-            rulist = rutracker.search(searchURL, maxsize, minimumseeders, albumid)
+                # build search url
+                if not usersearchterm:
+                    searchURL = ruobj.searchurl(artistterm, albumterm, year, format)
+                else:
+                    searchURL = ruobj.searchurl(usersearchterm, ' ', ' ', format)
 
-            # add best match to overall results list
-            if rulist:
-                for ru in rulist:
-                    title = ru[0].decode('utf-8')
-                    size = ru[1]
-                    url = ru[2]
-                    resultlist.append((title, size, url, provider, 'torrent', True))
-                    logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
-            else:
-                logger.info(u"No valid results found from %s" % (provider))
+                # parse results
+                rulist = ruobj.search(searchURL)
+                if rulist:
+                    resultlist.extend(rulist)
 
     if headphones.CONFIG.WHATCD:
         provider = "What.cd"
@@ -1491,6 +1526,57 @@ def set_proxy(proxy_url):
                     except Exception as e:
                         logger.error(u"An unknown error occurred in the Old Pirate Bay parser: %s" % e)
 
+    # Strike
+    if headphones.CONFIG.STRIKE:
+        provider = "Strike"
+        s_term = term.replace("!", "")
+        providerurl = fix_url("https://getstrike.net/api/v2/torrents/search/?phrase=")
+
+        providerurl = providerurl + s_term + "&category=Music"
+
+        if headphones.CONFIG.PREFERRED_QUALITY == 3 or losslessOnly:
+            format = "2"
+            providerurl = providerurl + "&subcategory=Lossless"
+            maxsize = 10000000000
+        elif headphones.CONFIG.PREFERRED_QUALITY == 1 or allow_lossless:
+            format = "10"  # MP3 and FLAC
+            maxsize = 10000000000
+        else:
+            format = "8"  # MP3 only
+            maxsize = 300000000
+
+        logger.info("Searching %s using term: %s" % (provider, s_term))
+        data = request.request_json(url=providerurl)
+
+        if not data or not data.get('torrents'):
+            logger.info("No results found on %s using search term: %s" % (provider, s_term))
+        else:
+            for item in data['torrents']:
+                try:
+                    rightformat = True
+                    title = item['torrent_title']
+                    seeders = item['seeds']
+                    url = item['magnet_uri']
+                    size = int(item['size'])
+                    subcategory = item['sub_category']
+
+                    if format == 2:
+                        if subcategory != "Lossless":
+                            rightformat = False
+
+                    if rightformat and size < maxsize and minimumseeders < int(seeders):
+                        match = True
+                        logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
+                    else:
+                        match = False
+                        logger.info(
+                            '%s is larger than the maxsize, the wrong format or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %d, Format: %s)',
+                            title, size, int(seeders), rightformat)
+
+                    resultlist.append((title, size, url, provider, 'torrent', match))
+                except Exception as e:
+                    logger.exception("Unhandled exception in the Strike parser")
+
     # Mininova
     if headphones.CONFIG.MININOVA:
         provider = "Mininova"
@@ -1567,12 +1653,14 @@ def preprocess(resultlist):
 
     for result in resultlist:
         if result[4] == 'torrent':
+
+            # rutracker always needs the torrent data
+            if result[3] == 'rutracker.org':
+                return ruobj.get_torrent_data(result[2]), result
+
             #Get out of here if we're using Transmission
             if headphones.CONFIG.TORRENT_DOWNLOADER == 1:  ## if not a magnet link still need the .torrent to generate hash... uTorrent support labeling
                 return True, result
-            # get outta here if rutracker
-            if result[3] == 'rutracker.org':
-                return True, result
             # Get out of here if it's a magnet link
             if result[2].lower().startswith("magnet:"):
                 return True, result
@@ -1581,7 +1669,8 @@ def preprocess(resultlist):
             headers = {}
 
             if result[3] == 'Kick Ass Torrents':
-                headers['Referer'] = 'http://kat.ph/'
+                #headers['Referer'] = 'http://kat.ph/'
+                headers['User-Agent'] = USER_AGENT
             elif result[3] == 'What.cd':
                 headers['User-Agent'] = 'Headphones'
             elif result[3] == "The Pirate Bay" or result[3] == "Old Pirate Bay":
diff --git a/headphones/searcher_rutracker.py b/headphones/searcher_rutracker.py
deleted file mode 100644
index 0817cc501..000000000
--- a/headphones/searcher_rutracker.py
+++ /dev/null
@@ -1,349 +0,0 @@
-#!/usr/bin/env python
-# coding=utf-8
-
-# Headphones rutracker.org search
-# Functions called from searcher.py
-
-from bencode import bencode as bencode, bdecode
-from urlparse import urlparse
-from bs4 import BeautifulSoup
-from tempfile import mkdtemp
-from hashlib import sha1
-
-import headphones
-import requests
-import cookielib
-import urllib2
-import urllib
-import re
-import os
-
-from headphones import db, logger
-
-
-class Rutracker():
-
-    logged_in = False
-
-    # Stores a number of login attempts to prevent recursion.
-    #login_counter = 0
-
-    def __init__(self):
-
-        self.cookiejar = cookielib.CookieJar()
-        self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookiejar))
-        urllib2.install_opener(self.opener)
-
-    def login(self, login, password):
-        """Implements tracker login procedure."""
-
-        self.logged_in = False
-
-        if login is None or password is None:
-            return False
-
-        #self.login_counter += 1
-
-        # No recursion wanted.
-        #if self.login_counter > 1:
-        #    return False
-
-        params = urllib.urlencode({"login_username": login,
-                                   "login_password": password,
-                                   "login": "Вход"})
-
-        try:
-            self.opener.open("http://login.rutracker.org/forum/login.php", params)
-        except Exception:
-            pass
-
-        # Check if we're logged in
-        for cookie in self.cookiejar:
-            if cookie.name == 'bb_data':
-                self.logged_in = True
-
-        return self.logged_in
-
-    def searchurl(self, artist, album, year, format):
-        """
-        Return the search url
-        """
-
-        # Build search url
-        searchterm = ''
-        if artist != 'Various Artists':
-            searchterm = artist
-            searchterm = searchterm + ' '
-        searchterm = searchterm + album
-        searchterm = searchterm + ' '
-        searchterm = searchterm + year
-
-        providerurl = "http://rutracker.org/forum/tracker.php"
-
-        if format == 'lossless':
-            format = '+lossless'
-        elif format == 'lossless+mp3':
-            format = '+lossless||mp3||aac'
-        else:
-            format = '+mp3||aac'
-
-        # sort by size, descending.
-        sort = '&o=7&s=2'
-
-        searchurl = "%s?nm=%s%s%s" % (providerurl, urllib.quote(searchterm), format, sort)
-
-        return searchurl
-
-    def search(self, searchurl, maxsize, minseeders, albumid):
-        """
-        Parse the search results and return valid torrent list
-        """
-
-        titles = []
-        urls = []
-        seeders = []
-        sizes = []
-        torrentlist = []
-        rulist = []
-
-        try:
-
-            page = self.opener.open(searchurl, timeout=60)
-            soup = BeautifulSoup(page.read())
-
-            # Debug
-            #logger.debug (soup.prettify())
-
-            # Title
-            for link in soup.find_all('a', attrs={'class': 'med tLink hl-tags bold'}):
-                title = link.get_text()
-                titles.append(title)
-
-            # Download URL
-            for link in soup.find_all('a', attrs={'class': 'small tr-dl dl-stub'}):
-                url = link.get('href')
-                urls.append(url)
-
-            # Seeders
-            for link in soup.find_all('b', attrs={'class': 'seedmed'}):
-                seeder = link.get_text()
-                seeders.append(seeder)
-
-            # Size
-            for link in soup.find_all('td', attrs={'class': 'row4 small nowrap tor-size'}):
-                size = link.u.string
-                sizes.append(size)
-
-        except:
-            pass
-
-        # Combine lists
-        torrentlist = zip(titles, urls, seeders, sizes)
-
-        # return if nothing found
-        if not torrentlist:
-            return False
-
-        # don't bother checking track counts anymore, let searcher filter instead
-        # leave code in just in case
-        check_track_count = False
-
-        if check_track_count:
-
-            # get headphones track count for album, return if not found
-            myDB = db.DBConnection()
-            tracks = myDB.select('SELECT * from tracks WHERE AlbumID=?', [albumid])
-            hptrackcount = len(tracks)
-
-            if not hptrackcount:
-                logger.info('headphones track info not found, cannot compare to torrent')
-                return False
-
-            # Return all valid entries, ignored, required words now checked in searcher.py
-
-            #unwantedlist = ['promo', 'vinyl', '[lp]', 'songbook', 'tvrip', 'hdtv', 'dvd']
-
-            formatlist = ['ape', 'flac', 'ogg', 'm4a', 'aac', 'mp3', 'wav', 'aif']
-            deluxelist = ['deluxe', 'edition', 'japanese', 'exclusive']
-
-        for torrent in torrentlist:
-
-            returntitle = torrent[0].encode('utf-8')
-            url = torrent[1]
-            seeders = torrent[2]
-            size = torrent[3]
-
-            if int(size) <= maxsize and int(seeders) >= minseeders:
-
-                #Torrent topic page
-                torrent_id = dict([part.split('=') for part in urlparse(url)[4].split('&')])['t']
-                topicurl = 'http://rutracker.org/forum/viewtopic.php?t=' + torrent_id
-
-                # add to list
-                if not check_track_count:
-                    valid = True
-                else:
-
-                    # Check torrent info
-                    self.cookiejar.set_cookie(cookielib.Cookie(version=0, name='bb_dl', value=torrent_id, port=None, port_specified=False, domain='.rutracker.org', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False))
-
-                    # Debug
-                    #for cookie in self.cookiejar:
-                    #    logger.debug ('Cookie: %s' % cookie)
-
-                    try:
-                        page = self.opener.open(url)
-                        torrent = page.read()
-                        if torrent:
-                            decoded = bdecode(torrent)
-                            metainfo = decoded['info']
-                        page.close()
-                    except Exception as e:
-                        logger.error('Error getting torrent: %s' % e)
-                        return False
-
-                    # get torrent track count and check for cue
-                    trackcount = 0
-                    cuecount = 0
-
-                    if 'files' in metainfo: # multi
-                        for pathfile in metainfo['files']:
-                            path = pathfile['path']
-                            for file in path:
-                                if any(file.lower().endswith('.' + x.lower()) for x in formatlist):
-                                    trackcount += 1
-                                if '.cue' in file:
-                                    cuecount += 1
-
-                    title = returntitle.lower()
-                    logger.debug('torrent title: %s' % title)
-                    logger.debug('headphones trackcount: %s' % hptrackcount)
-                    logger.debug('rutracker trackcount: %s' % trackcount)
-
-                    # If torrent track count less than headphones track count, and there's a cue, then attempt to get track count from log(s)
-                    # This is for the case where we have a single .flac/.wav which can be split by cue
-                    # Not great, but shouldn't be doing this too often
-                    totallogcount = 0
-                    if trackcount < hptrackcount and cuecount > 0 and cuecount < hptrackcount:
-                        page = self.opener.open(topicurl, timeout=60)
-                        soup = BeautifulSoup(page.read())
-                        findtoc = soup.find_all(text='TOC of the extracted CD')
-                        if not findtoc:
-                            findtoc = soup.find_all(text='TOC извлечённого CD')
-                        for toc in findtoc:
-                            logcount = 0
-                            for toccontent in toc.find_all_next(text=True):
-                                cut_string = toccontent.split('|')
-                                new_string = cut_string[0].lstrip().rstrip()
-                                if new_string == '1' or new_string == '01':
-                                    logcount = 1
-                                elif logcount > 0:
-                                    if new_string.isdigit():
-                                        logcount += 1
-                                    else:
-                                        break
-                            totallogcount = totallogcount + logcount
-
-                    if totallogcount > 0:
-                        trackcount = totallogcount
-                        logger.debug('rutracker logtrackcount: %s' % totallogcount)
-
-                    # If torrent track count = hp track count then return torrent,
-                    # if greater, check for deluxe/special/foreign editions
-                    # if less, then allow if it's a single track with a cue
-                    valid = False
-
-                    if trackcount == hptrackcount:
-                        valid = True
-                    elif trackcount > hptrackcount:
-                        if any(deluxe in title for deluxe in deluxelist):
-                            valid = True
-
-                # Add to list
-                if valid:
-                    rulist.append((returntitle, size, topicurl))
-                else:
-                    if topicurl:
-                        logger.info(u'<a href="%s">Torrent</a> found with %s tracks but the selected headphones release has %s tracks, skipping for rutracker.org' % (topicurl, trackcount, hptrackcount))
-            else:
-                logger.info('%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)' % (returntitle, int(size), int(seeders)))
-
-        return rulist
-
-    def get_torrent(self, url, savelocation=None):
-
-        torrent_id = dict([part.split('=') for part in urlparse(url)[4].split('&')])['t']
-        self.cookiejar.set_cookie(cookielib.Cookie(version=0, name='bb_dl', value=torrent_id, port=None, port_specified=False, domain='.rutracker.org', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False))
-        downloadurl = 'http://dl.rutracker.org/forum/dl.php?t=' + torrent_id
-        torrent_name = torrent_id + '.torrent'
-
-        try:
-            prev = os.umask(headphones.UMASK)
-            page = self.opener.open(downloadurl)
-            torrent = page.read()
-            decoded = bdecode(torrent)
-            metainfo = decoded['info']
-            tor_hash = sha1(bencode(metainfo)).hexdigest()
-            if savelocation:
-                download_path = os.path.join(savelocation, torrent_name)
-            else:
-                tempdir = mkdtemp(suffix='_rutracker_torrents')
-                download_path = os.path.join(tempdir, torrent_name)
-
-            with open(download_path, 'wb') as f:
-                f.write(torrent)
-            os.umask(prev)
-
-            # Add file to utorrent
-            if headphones.CONFIG.TORRENT_DOWNLOADER == 2:
-                self.utorrent_add_file(download_path)
-
-        except Exception as e:
-            logger.error('Error getting torrent: %s', e)
-            return False
-
-        return download_path, tor_hash
-
-    #TODO get this working in utorrent.py
-    def utorrent_add_file(self, filename):
-
-        host = headphones.CONFIG.UTORRENT_HOST
-        if not host.startswith('http'):
-            host = 'http://' + host
-        if host.endswith('/'):
-            host = host[:-1]
-        if host.endswith('/gui'):
-            host = host[:-4]
-
-        base_url = host
-        username = headphones.CONFIG.UTORRENT_USERNAME
-        password = headphones.CONFIG.UTORRENT_PASSWORD
-
-        session = requests.Session()
-        url = base_url + '/gui/'
-        session.auth = (username, password)
-
-        try:
-            r = session.get(url + 'token.html')
-        except Exception:
-            logger.exception('Error getting token')
-            return
-
-        if r.status_code == '401':
-            logger.debug('Error reaching utorrent')
-            return
-
-        regex = re.search(r'.+>([^<]+)</div></html>', r.text)
-        if regex is None:
-            logger.debug('Error reading token')
-            return
-
-        session.params = {'token': regex.group(1)}
-
-        with open(filename, 'rb') as f:
-            try:
-                session.post(url, params={'action': 'add-file'},
-                    files={'torrent_file': f})
-            except Exception:
-                logger.exception('Error adding file to utorrent')
-                return
diff --git a/headphones/transmission.py b/headphones/transmission.py
index ec5a9f601..990360d00 100644
--- a/headphones/transmission.py
+++ b/headphones/transmission.py
@@ -28,12 +28,15 @@
 #       Store torrent id so we can check up on it
 
 
-def addTorrent(link):
+def addTorrent(link, data=None):
     method = 'torrent-add'
 
-    if link.endswith('.torrent'):
-        with open(link, 'rb') as f:
-            metainfo = str(base64.b64encode(f.read()))
+    if link.endswith('.torrent') or data:
+        if data:
+            metainfo = str(base64.b64encode(data))
+        else:
+            with open(link, 'rb') as f:
+                metainfo = str(base64.b64encode(f.read()))
         arguments = {'metainfo': metainfo, 'download-dir': headphones.CONFIG.DOWNLOAD_TORRENT_DIR}
     else:
         arguments = {'filename': link, 'download-dir': headphones.CONFIG.DOWNLOAD_TORRENT_DIR}
diff --git a/headphones/utorrent.py b/headphones/utorrent.py
index 352ac72b4..08d20ce15 100644
--- a/headphones/utorrent.py
+++ b/headphones/utorrent.py
@@ -220,7 +220,7 @@ def dirTorrent(hash, cacheid=None, return_name=None):
     cacheid = torrentList['torrentc']
 
     for torrent in torrents:
-        if torrent[0].upper() == hash:
+        if torrent[0].upper() == hash.upper():
             if not return_name:
                 return torrent[26], cacheid
             else:
@@ -228,8 +228,12 @@ def dirTorrent(hash, cacheid=None, return_name=None):
 
     return None, None
 
+def addTorrent(link):
+    uTorrentClient = utorrentclient()
+    uTorrentClient.add_url(link)
+
 
-def addTorrent(link, hash):
+def getFolder(hash):
     uTorrentClient = utorrentclient()
 
     # Get Active Directory from settings
@@ -239,8 +243,6 @@ def addTorrent(link, hash):
         logger.error('Could not get "Put new downloads in:" directory from uTorrent settings, please ensure it is set')
         return None
 
-    uTorrentClient.add_url(link)
-
     # Get Torrent Folder Name
     torrent_folder, cacheid = dirTorrent(hash)
 
@@ -254,10 +256,8 @@ def addTorrent(link, hash):
 
     if torrent_folder == active_dir or not torrent_folder:
         torrent_folder, cacheid = dirTorrent(hash, cacheid, return_name=True)
-        labelTorrent(hash)
         return torrent_folder
     else:
-        labelTorrent(hash)
         if headphones.SYS_PLATFORM != "win32":
             torrent_folder = torrent_folder.replace('\\', '/')
         return os.path.basename(os.path.normpath(torrent_folder))
diff --git a/headphones/webserve.py b/headphones/webserve.py
index b247e0675..121e8667c 100644
--- a/headphones/webserve.py
+++ b/headphones/webserve.py
@@ -232,11 +232,11 @@ def resumeArtist(self, ArtistID):
         raise cherrypy.HTTPRedirect("artistPage?ArtistID=%s" % ArtistID)
 
     def removeArtist(self, ArtistID):
-        logger.info(u"Deleting all traces of artist: " + ArtistID)
         myDB = db.DBConnection()
         namecheck = myDB.select('SELECT ArtistName from artists where ArtistID=?', [ArtistID])
         for name in namecheck:
             artistname = name['ArtistName']
+        logger.info(u"Deleting all traces of artist: " + artistname)
         myDB.action('DELETE from artists WHERE ArtistID=?', [ArtistID])
 
         from headphones import cache
@@ -274,7 +274,7 @@ def scanArtist(self, ArtistID):
         logger.info(u"Scanning artist: %s", artist_name)
 
         full_folder_format = headphones.CONFIG.FOLDER_FORMAT
-        folder_format = re.findall(r'(.*[Aa]rtist?)\.*', full_folder_format)[0]
+        folder_format = re.findall(r'(.*?[Aa]rtist?)\.*', full_folder_format)[0]
 
         acceptable_formats = ["$artist","$sortartist","$first/$artist","$first/$sortartist"]
 
@@ -802,7 +802,7 @@ def forceSearch(self):
     @cherrypy.expose
     def forcePostProcess(self, dir=None, album_dir=None, keep_original_folder=False):
         from headphones import postprocessor
-        threading.Thread(target=postprocessor.forcePostProcess, kwargs={'dir': dir, 'album_dir': album_dir, 'keep_original_folder':keep_original_folder}).start()
+        threading.Thread(target=postprocessor.forcePostProcess, kwargs={'dir': dir, 'album_dir': album_dir, 'keep_original_folder':keep_original_folder == 'True'}).start()
         raise cherrypy.HTTPRedirect("home")
 
     @cherrypy.expose
@@ -1065,6 +1065,11 @@ def config(self):
             "newznab_apikey": headphones.CONFIG.NEWZNAB_APIKEY,
             "newznab_enabled": checked(headphones.CONFIG.NEWZNAB_ENABLED),
             "extra_newznabs": headphones.CONFIG.get_extra_newznabs(),
+            "use_torznab": checked(headphones.CONFIG.TORZNAB),
+            "torznab_host": headphones.CONFIG.TORZNAB_HOST,
+            "torznab_apikey": headphones.CONFIG.TORZNAB_APIKEY,
+            "torznab_enabled": checked(headphones.CONFIG.TORZNAB_ENABLED),
+            "extra_torznabs": headphones.CONFIG.get_extra_torznabs(),
             "use_nzbsorg": checked(headphones.CONFIG.NZBSORG),
             "nzbsorg_uid": headphones.CONFIG.NZBSORG_UID,
             "nzbsorg_hash": headphones.CONFIG.NZBSORG_HASH,
@@ -1101,6 +1106,8 @@ def config(self):
             "whatcd_username": headphones.CONFIG.WHATCD_USERNAME,
             "whatcd_password": headphones.CONFIG.WHATCD_PASSWORD,
             "whatcd_ratio": headphones.CONFIG.WHATCD_RATIO,
+            "use_strike": checked(headphones.CONFIG.STRIKE),
+            "strike_ratio": headphones.CONFIG.STRIKE_RATIO,
             "pref_qual_0": radio(headphones.CONFIG.PREFERRED_QUALITY, 0),
             "pref_qual_1": radio(headphones.CONFIG.PREFERRED_QUALITY, 1),
             "pref_qual_2": radio(headphones.CONFIG.PREFERRED_QUALITY, 2),
@@ -1138,6 +1145,7 @@ def config(self):
             "autowant_upcoming": checked(headphones.CONFIG.AUTOWANT_UPCOMING),
             "autowant_all": checked(headphones.CONFIG.AUTOWANT_ALL),
             "autowant_manually_added": checked(headphones.CONFIG.AUTOWANT_MANUALLY_ADDED),
+            "do_not_process_unmatched": checked(headphones.CONFIG.DO_NOT_PROCESS_UNMATCHED),
             "keep_torrent_files": checked(headphones.CONFIG.KEEP_TORRENT_FILES),
             "prefer_torrents_0": radio(headphones.CONFIG.PREFER_TORRENTS, 0),
             "prefer_torrents_1": radio(headphones.CONFIG.PREFER_TORRENTS, 1),
@@ -1278,12 +1286,12 @@ def configUpdate(self, **kwargs):
         # Handle the variable config options. Note - keys with False values aren't getting passed
 
         checked_configs = [
-            "launch_browser", "enable_https", "api_enabled", "use_blackhole", "headphones_indexer", "use_newznab", "newznab_enabled",
+            "launch_browser", "enable_https", "api_enabled", "use_blackhole", "headphones_indexer", "use_newznab", "newznab_enabled", "use_torznab", "torznab_enabled",
             "use_nzbsorg", "use_omgwtfnzbs", "use_kat", "use_piratebay", "use_oldpiratebay", "use_mininova", "use_waffles", "use_rutracker",
-            "use_whatcd", "preferred_bitrate_allow_lossless", "detect_bitrate", "ignore_clean_releases", "freeze_db", "cue_split", "move_files", 
-            "rename_files", "correct_metadata", "cleanup_files", "keep_nfo", "add_album_art", "embed_album_art", "embed_lyrics", 
-            "replace_existing_folders", "keep_original_folder", "file_underscores", "include_extras", "official_releases_only", 
-            "wait_until_release_date", "autowant_upcoming", "autowant_all", "autowant_manually_added", "keep_torrent_files", "music_encoder",
+            "use_whatcd", "use_strike", "preferred_bitrate_allow_lossless", "detect_bitrate", "ignore_clean_releases", "freeze_db", "cue_split", "move_files",
+            "rename_files", "correct_metadata", "cleanup_files", "keep_nfo", "add_album_art", "embed_album_art", "embed_lyrics",
+            "replace_existing_folders", "keep_original_folder", "file_underscores", "include_extras", "official_releases_only",
+            "wait_until_release_date", "autowant_upcoming", "autowant_all", "autowant_manually_added", "do_not_process_unmatched", "keep_torrent_files", "music_encoder",
             "encoderlossless", "encoder_multicore", "delete_lossless_files", "growl_enabled", "growl_onsnatch", "prowl_enabled",
             "prowl_onsnatch", "xbmc_enabled", "xbmc_update", "xbmc_notify", "lms_enabled", "plex_enabled", "plex_update", "plex_notify",
             "nma_enabled", "nma_onsnatch", "pushalot_enabled", "pushalot_onsnatch", "synoindex_enabled", "pushover_enabled",
@@ -1316,6 +1324,21 @@ def configUpdate(self, **kwargs):
                         del kwargs[key]
                 extra_newznabs.append((newznab_host, newznab_api, newznab_enabled))
 
+        extra_torznabs = []
+        for kwarg in [x for x in kwargs if x.startswith('torznab_host')]:
+            torznab_host_key = kwarg
+            torznab_number = kwarg[12:]
+            if len(torznab_number):
+                torznab_api_key = 'torznab_api' + torznab_number
+                torznab_enabled_key = 'torznab_enabled' + torznab_number
+                torznab_host = kwargs.get(torznab_host_key, '')
+                torznab_api = kwargs.get(torznab_api_key, '')
+                torznab_enabled = int(kwargs.get(torznab_enabled_key, 0))
+                for key in [torznab_host_key, torznab_api_key, torznab_enabled_key]:
+                    if key in kwargs:
+                        del kwargs[key]
+                extra_torznabs.append((torznab_host, torznab_api, torznab_enabled))
+
         # Convert the extras to list then string. Coming in as 0 or 1 (append new extras to the end)
         temp_extras_list = []
 
@@ -1341,11 +1364,18 @@ def configUpdate(self, **kwargs):
                 del kwargs[extra]
 
         headphones.CONFIG.EXTRAS = ','.join(str(n) for n in temp_extras_list)
+
         headphones.CONFIG.clear_extra_newznabs()
+        headphones.CONFIG.clear_extra_torznabs()
+
         headphones.CONFIG.process_kwargs(kwargs)
+
         for extra_newznab in extra_newznabs:
             headphones.CONFIG.add_extra_newznab(extra_newznab)
 
+        for extra_torznab in extra_torznabs:
+            headphones.CONFIG.add_extra_torznab(extra_torznab)
+
         # Sanity checking
         if headphones.CONFIG.SEARCH_INTERVAL and headphones.CONFIG.SEARCH_INTERVAL < 360:
             logger.info("Search interval too low. Resetting to 6 hour minimum")
diff --git a/lib/bs4/__init__.py b/lib/bs4/__init__.py
index 7ba34269a..d35f765bc 100644
--- a/lib/bs4/__init__.py
+++ b/lib/bs4/__init__.py
@@ -17,8 +17,8 @@
 """
 
 __author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.3.2"
-__copyright__ = "Copyright (c) 2004-2013 Leonard Richardson"
+__version__ = "4.4.0"
+__copyright__ = "Copyright (c) 2004-2015 Leonard Richardson"
 __license__ = "MIT"
 
 __all__ = ['BeautifulSoup']
@@ -45,7 +45,7 @@
 
 # The very first thing we do is give a useful error if someone is
 # running this code under Python 3 without converting it.
-syntax_error = u'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work. You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
+'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
 
 class BeautifulSoup(Tag):
     """
@@ -77,8 +77,11 @@ class BeautifulSoup(Tag):
 
     ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
 
+    NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n"
+
     def __init__(self, markup="", features=None, builder=None,
-                 parse_only=None, from_encoding=None, **kwargs):
+                 parse_only=None, from_encoding=None, exclude_encodings=None,
+                 **kwargs):
         """The Soup object is initialized as the 'root tag', and the
         provided markup (which can be a string or a file-like object)
         is fed into the underlying parser."""
@@ -114,9 +117,9 @@ def __init__(self, markup="", features=None, builder=None,
             del kwargs['isHTML']
             warnings.warn(
                 "BS4 does not respect the isHTML argument to the "
-                "BeautifulSoup constructor. You can pass in features='html' "
-                "or features='xml' to get a builder capable of handling "
-                "one or the other.")
+                "BeautifulSoup constructor. Suggest you use "
+                "features='lxml' for HTML and features='lxml-xml' for "
+                "XML.")
 
         def deprecated_argument(old_name, new_name):
             if old_name in kwargs:
@@ -140,6 +143,7 @@ def deprecated_argument(old_name, new_name):
                 "__init__() got an unexpected keyword argument '%s'" % arg)
 
         if builder is None:
+            original_features = features
             if isinstance(features, basestring):
                 features = [features]
             if features is None or len(features) == 0:
@@ -151,6 +155,16 @@ def deprecated_argument(old_name, new_name):
                     "requested: %s. Do you need to install a parser library?"
                     % ",".join(features))
             builder = builder_class()
+            if not (original_features == builder.NAME or
+                    original_features in builder.ALTERNATE_NAMES):
+                if builder.is_xml:
+                    markup_type = "XML"
+                else:
+                    markup_type = "HTML"
+                warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
+                    parser=builder.NAME,
+                    markup_type=markup_type))
+
         self.builder = builder
         self.is_xml = builder.is_xml
         self.builder.soup = self
@@ -178,6 +192,8 @@ def deprecated_argument(old_name, new_name):
                 # system. Just let it go.
                 pass
             if is_file:
+                if isinstance(markup, unicode):
+                    markup = markup.encode("utf8")
                 warnings.warn(
                     '"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup)
             if markup[:5] == "http:" or markup[:6] == "https:":
@@ -185,12 +201,15 @@ def deprecated_argument(old_name, new_name):
                 # Python 3 otherwise.
                 if ((isinstance(markup, bytes) and not b' ' in markup)
                     or (isinstance(markup, unicode) and not u' ' in markup)):
+                    if isinstance(markup, unicode):
+                        markup = markup.encode("utf8")
                     warnings.warn(
                         '"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup)
 
         for (self.markup, self.original_encoding, self.declared_html_encoding,
          self.contains_replacement_characters) in (
-            self.builder.prepare_markup(markup, from_encoding)):
+             self.builder.prepare_markup(
+                 markup, from_encoding, exclude_encodings=exclude_encodings)):
             self.reset()
             try:
                 self._feed()
@@ -203,6 +222,16 @@ def deprecated_argument(old_name, new_name):
         self.markup = None
         self.builder.soup = None
 
+    def __copy__(self):
+        return type(self)(self.encode(), builder=self.builder)
+
+    def __getstate__(self):
+        # Frequently a tree builder can't be pickled.
+        d = dict(self.__dict__)
+        if 'builder' in d and not self.builder.picklable:
+            del d['builder']
+        return d
+
     def _feed(self):
         # Convert the document to Unicode.
         self.builder.reset()
@@ -229,9 +258,7 @@ def new_tag(self, name, namespace=None, nsprefix=None, **attrs):
 
     def new_string(self, s, subclass=NavigableString):
         """Create a new NavigableString associated with this soup."""
-        navigable = subclass(s)
-        navigable.setup()
-        return navigable
+        return subclass(s)
 
     def insert_before(self, successor):
         raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
@@ -290,14 +317,49 @@ def endData(self, containerClass=NavigableString):
     def object_was_parsed(self, o, parent=None, most_recent_element=None):
         """Add an object to the parse tree."""
         parent = parent or self.currentTag
-        most_recent_element = most_recent_element or self._most_recent_element
-        o.setup(parent, most_recent_element)
+        previous_element = most_recent_element or self._most_recent_element
+
+        next_element = previous_sibling = next_sibling = None
+        if isinstance(o, Tag):
+            next_element = o.next_element
+            next_sibling = o.next_sibling
+            previous_sibling = o.previous_sibling
+            if not previous_element:
+                previous_element = o.previous_element
+
+        o.setup(parent, previous_element, next_element, previous_sibling, next_sibling)
 
-        if most_recent_element is not None:
-            most_recent_element.next_element = o
         self._most_recent_element = o
         parent.contents.append(o)
 
+        if parent.next_sibling:
+            # This node is being inserted into an element that has
+            # already been parsed. Deal with any dangling references.
+            index = parent.contents.index(o)
+            if index == 0:
+                previous_element = parent
+                previous_sibling = None
+            else:
+                previous_element = previous_sibling = parent.contents[index-1]
+            if index == len(parent.contents)-1:
+                next_element = parent.next_sibling
+                next_sibling = None
+            else:
+                next_element = next_sibling = parent.contents[index+1]
+
+            o.previous_element = previous_element
+            if previous_element:
+                previous_element.next_element = o
+            o.next_element = next_element
+            if next_element:
+                next_element.previous_element = o
+            o.next_sibling = next_sibling
+            if next_sibling:
+                next_sibling.previous_sibling = o
+            o.previous_sibling = previous_sibling
+            if previous_sibling:
+                previous_sibling.next_sibling = o
+
     def _popToTag(self, name, nsprefix=None, inclusivePop=True):
         """Pops the tag stack up to and including the most recent
         instance of the given tag. If inclusivePop is false, pops the tag
diff --git a/lib/bs4/builder/__init__.py b/lib/bs4/builder/__init__.py
index 740f5f29c..f8fce5681 100644
--- a/lib/bs4/builder/__init__.py
+++ b/lib/bs4/builder/__init__.py
@@ -80,9 +80,12 @@ def lookup(self, *features):
 class TreeBuilder(object):
     """Turn a document into a Beautiful Soup object tree."""
 
+    NAME = "[Unknown tree builder]"
+    ALTERNATE_NAMES = []
     features = []
 
     is_xml = False
+    picklable = False
     preserve_whitespace_tags = set()
     empty_element_tags = None # A tag will be considered an empty-element
                               # tag when and only when it has no contents.
diff --git a/lib/bs4/builder/_html5lib.py b/lib/bs4/builder/_html5lib.py
index d46b695bd..ab5793c18 100644
--- a/lib/bs4/builder/_html5lib.py
+++ b/lib/bs4/builder/_html5lib.py
@@ -2,6 +2,7 @@
     'HTML5TreeBuilder',
     ]
 
+from pdb import set_trace
 import warnings
 from bs4.builder import (
     PERMISSIVE,
@@ -9,7 +10,10 @@
     HTML_5,
     HTMLTreeBuilder,
     )
-from bs4.element import NamespacedAttribute
+from bs4.element import (
+    NamespacedAttribute,
+    whitespace_re,
+)
 import html5lib
 from html5lib.constants import namespaces
 from bs4.element import (
@@ -22,11 +26,20 @@
 class HTML5TreeBuilder(HTMLTreeBuilder):
     """Use html5lib to build a tree."""
 
-    features = ['html5lib', PERMISSIVE, HTML_5, HTML]
+    NAME = "html5lib"
+
+    features = [NAME, PERMISSIVE, HTML_5, HTML]
 
-    def prepare_markup(self, markup, user_specified_encoding):
+    def prepare_markup(self, markup, user_specified_encoding,
+                       document_declared_encoding=None, exclude_encodings=None):
         # Store the user-specified encoding for use later on.
         self.user_specified_encoding = user_specified_encoding
+
+        # document_declared_encoding and exclude_encodings aren't used
+        # ATM because the html5lib TreeBuilder doesn't use
+        # UnicodeDammit.
+        if exclude_encodings:
+            warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.")
         yield (markup, None, None, False)
 
     # These methods are defined by Beautiful Soup.
@@ -101,7 +114,13 @@ def __init__(self, element):
     def __iter__(self):
         return list(self.attrs.items()).__iter__()
     def __setitem__(self, name, value):
-        "set attr", name, value
+        # If this attribute is a multi-valued attribute for this element,
+        # turn its value into a list.
+        list_attr = HTML5TreeBuilder.cdata_list_attributes
+        if (name in list_attr['*']
+            or (self.element.name in list_attr
+                and name in list_attr[self.element.name])):
+            value = whitespace_re.split(value)
         self.element[name] = value
     def items(self):
         return list(self.attrs.items())
@@ -161,6 +180,12 @@ def appendChild(self, node):
             # immediately after the parent, if it has no children.)
             if self.element.contents:
                 most_recent_element = self.element._last_descendant(False)
+            elif self.element.next_element is not None:
+                # Something from further ahead in the parse tree is
+                # being inserted into this earlier element. This is
+                # very annoying because it means an expensive search
+                # for the last element in the tree.
+                most_recent_element = self.soup._last_descendant()
             else:
                 most_recent_element = self.element
 
@@ -172,6 +197,7 @@ def getAttributes(self):
         return AttrList(self.element)
 
     def setAttributes(self, attributes):
+
         if attributes is not None and len(attributes) > 0:
 
             converted_attributes = []
@@ -218,6 +244,9 @@ def removeChild(self, node):
 
     def reparentChildren(self, new_parent):
         """Move all of this tag's children into another tag."""
+        # print "MOVE", self.element.contents
+        # print "FROM", self.element
+        # print "TO", new_parent.element
         element = self.element
         new_parent_element = new_parent.element
         # Determine what this tag's next_element will be once all the children
@@ -236,17 +265,28 @@ def reparentChildren(self, new_parent):
             new_parents_last_descendant_next_element = new_parent_element.next_element
 
         to_append = element.contents
-        append_after = new_parent.element.contents
+        append_after = new_parent_element.contents
         if len(to_append) > 0:
             # Set the first child's previous_element and previous_sibling
             # to elements within the new parent
             first_child = to_append[0]
-            first_child.previous_element = new_parents_last_descendant
+            if new_parents_last_descendant:
+                first_child.previous_element = new_parents_last_descendant
+            else:
+                first_child.previous_element = new_parent_element
             first_child.previous_sibling = new_parents_last_child
+            if new_parents_last_descendant:
+                new_parents_last_descendant.next_element = first_child
+            else:
+                new_parent_element.next_element = first_child
+            if new_parents_last_child:
+                new_parents_last_child.next_sibling = first_child
 
             # Fix the last child's next_element and next_sibling
             last_child = to_append[-1]
             last_child.next_element = new_parents_last_descendant_next_element
+            if new_parents_last_descendant_next_element:
+                new_parents_last_descendant_next_element.previous_element = last_child
             last_child.next_sibling = None
 
         for child in to_append:
@@ -257,6 +297,10 @@ def reparentChildren(self, new_parent):
         element.contents = []
         element.next_element = final_next_element
 
+        # print "DONE WITH MOVE"
+        # print "FROM", self.element
+        # print "TO", new_parent_element
+
     def cloneNode(self):
         tag = self.soup.new_tag(self.element.name, self.namespace)
         node = Element(tag, self.soup, self.namespace)
@@ -268,7 +312,7 @@ def hasContent(self):
         return self.element.contents
 
     def getNameTuple(self):
-        if self.namespace is None:
+        if self.namespace == None:
             return namespaces["html"], self.name
         else:
             return self.namespace, self.name
diff --git a/lib/bs4/builder/_htmlparser.py b/lib/bs4/builder/_htmlparser.py
index ca8d8b892..0101d647b 100644
--- a/lib/bs4/builder/_htmlparser.py
+++ b/lib/bs4/builder/_htmlparser.py
@@ -4,10 +4,16 @@
     'HTMLParserTreeBuilder',
     ]
 
-from HTMLParser import (
-    HTMLParser,
-    HTMLParseError,
-    )
+from HTMLParser import HTMLParser
+
+try:
+    from HTMLParser import HTMLParseError
+except ImportError, e:
+    # HTMLParseError is removed in Python 3.5. Since it can never be
+    # thrown in 3.5, we can just define our own class as a placeholder.
+    class HTMLParseError(Exception):
+        pass
+
 import sys
 import warnings
 
@@ -19,10 +25,10 @@
 # At the end of this file, we monkeypatch HTMLParser so that
 # strict=True works well on Python 3.2.2.
 major, minor, release = sys.version_info[:3]
-CONSTRUCTOR_TAKES_STRICT = (
-    major > 3
-    or (major == 3 and minor > 2)
-    or (major == 3 and minor == 2 and release >= 3))
+CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3
+CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
+CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
+
 
 from bs4.element import (
     CData,
@@ -63,7 +69,8 @@ def handle_data(self, data):
 
     def handle_charref(self, name):
         # XXX workaround for a bug in HTMLParser. Remove this once
-        # it's fixed.
+        # it's fixed in all supported versions.
+        # http://bugs.python.org/issue13633
         if name.startswith('x'):
             real_name = int(name.lstrip('x'), 16)
         elif name.startswith('X'):
@@ -113,14 +120,6 @@ def unknown_decl(self, data):
 
     def handle_pi(self, data):
         self.soup.endData()
-        if data.endswith("?") and data.lower().startswith("xml"):
-            # "An XHTML processing instruction using the trailing '?'
-            # will cause the '?' to be included in data." - HTMLParser
-            # docs.
-            #
-            # Strip the question mark so we don't end up with two
-            # question marks.
-            data = data[:-1]
         self.soup.handle_data(data)
         self.soup.endData(ProcessingInstruction)
 
@@ -128,15 +127,19 @@ def handle_pi(self, data):
 class HTMLParserTreeBuilder(HTMLTreeBuilder):
 
     is_xml = False
-    features = [HTML, STRICT, HTMLPARSER]
+    picklable = True
+    NAME = HTMLPARSER
+    features = [NAME, HTML, STRICT]
 
     def __init__(self, *args, **kwargs):
-        if CONSTRUCTOR_TAKES_STRICT:
+        if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
             kwargs['strict'] = False
+        if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
+            kwargs['convert_charrefs'] = False
         self.parser_args = (args, kwargs)
 
     def prepare_markup(self, markup, user_specified_encoding=None,
-                       document_declared_encoding=None):
+                       document_declared_encoding=None, exclude_encodings=None):
         """
         :return: A 4-tuple (markup, original encoding, encoding
         declared within markup, whether any characters had to be
@@ -147,7 +150,8 @@ def prepare_markup(self, markup, user_specified_encoding=None,
             return
 
         try_encodings = [user_specified_encoding, document_declared_encoding]
-        dammit = UnicodeDammit(markup, try_encodings, is_html=True)
+        dammit = UnicodeDammit(markup, try_encodings, is_html=True,
+                               exclude_encodings=exclude_encodings)
         yield (dammit.markup, dammit.original_encoding,
                dammit.declared_html_encoding,
                dammit.contains_replacement_characters)
diff --git a/lib/bs4/builder/_lxml.py b/lib/bs4/builder/_lxml.py
index fa5d49875..9e8f88fb5 100644
--- a/lib/bs4/builder/_lxml.py
+++ b/lib/bs4/builder/_lxml.py
@@ -7,7 +7,12 @@
 from StringIO import StringIO
 import collections
 from lxml import etree
-from bs4.element import Comment, Doctype, NamespacedAttribute
+from bs4.element import (
+    Comment,
+    Doctype,
+    NamespacedAttribute,
+    ProcessingInstruction,
+)
 from bs4.builder import (
     FAST,
     HTML,
@@ -25,8 +30,11 @@ class LXMLTreeBuilderForXML(TreeBuilder):
 
     is_xml = True
 
+    NAME = "lxml-xml"
+    ALTERNATE_NAMES = ["xml"]
+
     # Well, it's permissive by XML parser standards.
-    features = [LXML, XML, FAST, PERMISSIVE]
+    features = [NAME, LXML, XML, FAST, PERMISSIVE]
 
     CHUNK_SIZE = 512
 
@@ -70,6 +78,7 @@ def _getNsTag(self, tag):
             return (None, tag)
 
     def prepare_markup(self, markup, user_specified_encoding=None,
+                       exclude_encodings=None,
                        document_declared_encoding=None):
         """
         :yield: A series of 4-tuples.
@@ -95,7 +104,8 @@ def prepare_markup(self, markup, user_specified_encoding=None,
         # the document as each one in turn.
         is_html = not self.is_xml
         try_encodings = [user_specified_encoding, document_declared_encoding]
-        detector = EncodingDetector(markup, try_encodings, is_html)
+        detector = EncodingDetector(
+            markup, try_encodings, is_html, exclude_encodings)
         for encoding in detector.encodings:
             yield (detector.markup, encoding, document_declared_encoding, False)
 
@@ -189,7 +199,9 @@ def end(self, name):
             self.nsmaps.pop()
 
     def pi(self, target, data):
-        pass
+        self.soup.endData()
+        self.soup.handle_data(target + ' ' + data)
+        self.soup.endData(ProcessingInstruction)
 
     def data(self, content):
         self.soup.handle_data(content)
@@ -212,7 +224,10 @@ def test_fragment_to_document(self, fragment):
 
 class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
 
-    features = [LXML, HTML, FAST, PERMISSIVE]
+    NAME = LXML
+    ALTERNATE_NAMES = ["lxml-html"]
+
+    features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE]
     is_xml = False
 
     def default_parser(self, encoding):
diff --git a/lib/bs4/dammit.py b/lib/bs4/dammit.py
index 32e211dc4..317ad6d74 100644
--- a/lib/bs4/dammit.py
+++ b/lib/bs4/dammit.py
@@ -3,10 +3,11 @@
 
 This library converts a bytestream to Unicode through any means
 necessary. It is heavily based on code from Mark Pilgrim's Universal
-Feed Parser. It works best on XML and XML, but it does not rewrite the
+Feed Parser. It works best on XML and HTML, but it does not rewrite the
 XML or HTML to reflect a new encoding; that's the tree builder's job.
 """
 
+from pdb import set_trace
 import codecs
 from htmlentitydefs import codepoint2name
 import re
@@ -212,8 +213,11 @@ class EncodingDetector:
 
     5. Windows-1252.
     """
-    def __init__(self, markup, override_encodings=None, is_html=False):
+    def __init__(self, markup, override_encodings=None, is_html=False,
+                 exclude_encodings=None):
         self.override_encodings = override_encodings or []
+        exclude_encodings = exclude_encodings or []
+        self.exclude_encodings = set([x.lower() for x in exclude_encodings])
         self.chardet_encoding = None
         self.is_html = is_html
         self.declared_encoding = None
@@ -224,6 +228,8 @@ def __init__(self, markup, override_encodings=None, is_html=False):
     def _usable(self, encoding, tried):
         if encoding is not None:
             encoding = encoding.lower()
+            if encoding in self.exclude_encodings:
+                return False
             if encoding not in tried:
                 tried.add(encoding)
                 return True
@@ -266,6 +272,9 @@ def encodings(self):
     def strip_byte_order_mark(cls, data):
         """If a byte-order mark is present, strip it and return the encoding it implies."""
         encoding = None
+        if isinstance(data, unicode):
+            # Unicode data cannot have a byte-order mark.
+            return data, encoding
         if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \
                and (data[2:4] != '\x00\x00'):
             encoding = 'utf-16be'
@@ -299,14 +308,14 @@ def find_declared_encoding(cls, markup, is_html=False, search_entire_document=Fa
         else:
             xml_endpos = 1024
             html_endpos = max(2048, int(len(markup) * 0.05))
-
+            
         declared_encoding = None
         declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos)
         if not declared_encoding_match and is_html:
             declared_encoding_match = html_meta_re.search(markup, endpos=html_endpos)
         if declared_encoding_match is not None:
             declared_encoding = declared_encoding_match.groups()[0].decode(
-                'ascii')
+                'ascii', 'replace')
         if declared_encoding:
             return declared_encoding.lower()
         return None
@@ -331,13 +340,14 @@ class UnicodeDammit:
         ]
 
     def __init__(self, markup, override_encodings=[],
-                 smart_quotes_to=None, is_html=False):
+                 smart_quotes_to=None, is_html=False, exclude_encodings=[]):
         self.smart_quotes_to = smart_quotes_to
         self.tried_encodings = []
         self.contains_replacement_characters = False
         self.is_html = is_html
 
-        self.detector = EncodingDetector(markup, override_encodings, is_html)
+        self.detector = EncodingDetector(
+            markup, override_encodings, is_html, exclude_encodings)
 
         # Short-circuit if the data is in Unicode to begin with.
         if isinstance(markup, unicode) or markup == '':
diff --git a/lib/bs4/diagnose.py b/lib/bs4/diagnose.py
index b7c99b1c5..1b719830b 100644
--- a/lib/bs4/diagnose.py
+++ b/lib/bs4/diagnose.py
@@ -33,12 +33,21 @@ def diagnose(data):
 
     if 'lxml' in basic_parsers:
         basic_parsers.append(["lxml", "xml"])
-        from lxml import etree
-        print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
+        try:
+            from lxml import etree
+            print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
+        except ImportError, e:
+            print (
+                "lxml is not installed or couldn't be imported.")
+
 
     if 'html5lib' in basic_parsers:
-        import html5lib
-        print "Found html5lib version %s" % html5lib.__version__
+        try:
+            import html5lib
+            print "Found html5lib version %s" % html5lib.__version__
+        except ImportError, e:
+            print (
+                "html5lib is not installed or couldn't be imported.")
 
     if hasattr(data, 'read'):
         data = data.read()
@@ -135,7 +144,7 @@ def rword(length=5):
 def rsentence(length=4):
     "Generate a random sentence-like string."
     return " ".join(rword(random.randint(4,9)) for i in range(length))
-
+        
 def rdoc(num_elements=1000):
     """Randomly generate an invalid HTML document."""
     tag_names = ['p', 'div', 'span', 'i', 'b', 'script', 'table']
@@ -159,7 +168,7 @@ def benchmark_parsers(num_elements=100000):
     print "Comparative parser benchmark on Beautiful Soup %s" % __version__
     data = rdoc(num_elements)
     print "Generated a large invalid HTML document (%d bytes)." % len(data)
-
+    
     for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
         success = False
         try:
diff --git a/lib/bs4/element.py b/lib/bs4/element.py
index da9afdf48..c70ad5a01 100644
--- a/lib/bs4/element.py
+++ b/lib/bs4/element.py
@@ -1,3 +1,4 @@
+from pdb import set_trace
 import collections
 import re
 import sys
@@ -185,24 +186,40 @@ def _formatter_for_name(self, name):
             return self.HTML_FORMATTERS.get(
                 name, HTMLAwareEntitySubstitution.substitute_xml)
 
-    def setup(self, parent=None, previous_element=None):
+    def setup(self, parent=None, previous_element=None, next_element=None,
+              previous_sibling=None, next_sibling=None):
         """Sets up the initial relations between this element and
         other elements."""
         self.parent = parent
+
         self.previous_element = previous_element
         if previous_element is not None:
             self.previous_element.next_element = self
-        self.next_element = None
-        self.previous_sibling = None
-        self.next_sibling = None
-        if self.parent is not None and self.parent.contents:
-            self.previous_sibling = self.parent.contents[-1]
+
+        self.next_element = next_element
+        if self.next_element:
+            self.next_element.previous_element = self
+
+        self.next_sibling = next_sibling
+        if self.next_sibling:
+            self.next_sibling.previous_sibling = self
+
+        if (not previous_sibling
+            and self.parent is not None and self.parent.contents):
+            previous_sibling = self.parent.contents[-1]
+
+        self.previous_sibling = previous_sibling
+        if previous_sibling:
             self.previous_sibling.next_sibling = self
 
     nextSibling = _alias("next_sibling")  # BS3
     previousSibling = _alias("previous_sibling")  # BS3
 
     def replace_with(self, replace_with):
+        if not self.parent:
+            raise ValueError(
+                "Cannot replace one element with another when the"
+                "element to be replaced is not part of a tree.")
         if replace_with is self:
             return
         if replace_with is self.parent:
@@ -216,6 +233,10 @@ def replace_with(self, replace_with):
 
     def unwrap(self):
         my_parent = self.parent
+        if not self.parent:
+            raise ValueError(
+                "Cannot replace an element with its contents when that"
+                "element is not part of a tree.")
         my_index = self.parent.index(self)
         self.extract()
         for child in reversed(self.contents[:]):
@@ -240,17 +261,20 @@ def extract(self):
         last_child = self._last_descendant()
         next_element = last_child.next_element
 
-        if self.previous_element is not None:
+        if (self.previous_element is not None and
+            self.previous_element != next_element):
             self.previous_element.next_element = next_element
-        if next_element is not None:
+        if next_element is not None and next_element != self.previous_element:
             next_element.previous_element = self.previous_element
         self.previous_element = None
         last_child.next_element = None
 
         self.parent = None
-        if self.previous_sibling is not None:
+        if (self.previous_sibling is not None
+            and self.previous_sibling != self.next_sibling):
             self.previous_sibling.next_sibling = self.next_sibling
-        if self.next_sibling is not None:
+        if (self.next_sibling is not None
+            and self.next_sibling != self.previous_sibling):
             self.next_sibling.previous_sibling = self.previous_sibling
         self.previous_sibling = self.next_sibling = None
         return self
@@ -478,6 +502,10 @@ def _find_one(self, method, name, attrs, text, **kwargs):
     def _find_all(self, name, attrs, text, limit, generator, **kwargs):
         "Iterates over a generator looking for things that match."
 
+        if text is None and 'string' in kwargs:
+            text = kwargs['string']
+            del kwargs['string']
+
         if isinstance(name, SoupStrainer):
             strainer = name
         else:
@@ -548,17 +576,17 @@ def parents(self):
 
     # Methods for supporting CSS selectors.
 
-    tag_name_re = re.compile('^[a-z0-9]+$')
+    tag_name_re = re.compile('^[a-zA-Z0-9][-.a-zA-Z0-9:_]*$')
 
-    # /^(\w+)\[(\w+)([=~\|\^\$\*]?)=?"?([^\]"]*)"?\]$/
-    #   \---/  \---/\-------------/    \-------/
-    #     |      |         |               |
-    #     |      |         |           The value
-    #     |      |    ~,|,^,$,* or =
-    #     |   Attribute
+    # /^([a-zA-Z0-9][-.a-zA-Z0-9:_]*)\[(\w+)([=~\|\^\$\*]?)=?"?([^\]"]*)"?\]$/
+    #   \---------------------------/  \---/\-------------/    \-------/
+    #     |                              |         |               |
+    #     |                              |         |           The value
+    #     |                              |    ~,|,^,$,* or =
+    #     |                           Attribute
     #    Tag
     attribselect_re = re.compile(
-        r'^(?P<tag>\w+)?\[(?P<attribute>\w+)(?P<operator>[=~\|\^\$\*]?)' +
+        r'^(?P<tag>[a-zA-Z0-9][-.a-zA-Z0-9:_]*)?\[(?P<attribute>[\w-]+)(?P<operator>[=~\|\^\$\*]?)' +
         r'=?"?(?P<value>[^\]"]*)"?\]$'
         )
 
@@ -654,11 +682,17 @@ def __new__(cls, value):
         how to handle non-ASCII characters.
         """
         if isinstance(value, unicode):
-            return unicode.__new__(cls, value)
-        return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
+            u = unicode.__new__(cls, value)
+        else:
+            u = unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
+        u.setup()
+        return u
 
     def __copy__(self):
-        return self
+        """A copy of a NavigableString has the same contents and class
+        as the original, but it is not connected to the parse tree.
+        """
+        return type(self)(self)
 
     def __getnewargs__(self):
         return (unicode(self),)
@@ -707,7 +741,7 @@ class CData(PreformattedString):
 class ProcessingInstruction(PreformattedString):
 
     PREFIX = u'<?'
-    SUFFIX = u'?>'
+    SUFFIX = u'>'
 
 class Comment(PreformattedString):
 
@@ -759,9 +793,12 @@ def __init__(self, parser=None, builder=None, name=None, namespace=None,
         self.prefix = prefix
         if attrs is None:
             attrs = {}
-        elif attrs and builder.cdata_list_attributes:
-            attrs = builder._replace_cdata_list_attribute_values(
-                self.name, attrs)
+        elif attrs:
+            if builder is not None and builder.cdata_list_attributes:
+                attrs = builder._replace_cdata_list_attribute_values(
+                    self.name, attrs)
+            else:
+                attrs = dict(attrs)
         else:
             attrs = dict(attrs)
         self.attrs = attrs
@@ -778,6 +815,18 @@ def __init__(self, parser=None, builder=None, name=None, namespace=None,
 
     parserClass = _alias("parser_class")  # BS3
 
+    def __copy__(self):
+        """A copy of a Tag is a new Tag, unconnected to the parse tree.
+        Its contents are a copy of the old Tag's contents.
+        """
+        clone = type(self)(None, self.builder, self.name, self.namespace,
+                           self.nsprefix, self.attrs)
+        for attr in ('can_be_empty_element', 'hidden'):
+            setattr(clone, attr, getattr(self, attr))
+        for child in self.contents:
+            clone.append(child.__copy__())
+        return clone
+
     @property
     def is_empty_element(self):
         """Is this tag an empty-element tag? (aka a self-closing tag)
@@ -971,15 +1020,25 @@ def __ne__(self, other):
         as defined in __eq__."""
         return not self == other
 
-    def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+    def __repr__(self, encoding="unicode-escape"):
         """Renders this tag as a string."""
-        return self.encode(encoding)
+        if PY3K:
+            # "The return value must be a string object", i.e. Unicode
+            return self.decode()
+        else:
+            # "The return value must be a string object", i.e. a bytestring.
+            # By convention, the return value of __repr__ should also be
+            # an ASCII string.
+            return self.encode(encoding)
 
     def __unicode__(self):
         return self.decode()
 
     def __str__(self):
-        return self.encode()
+        if PY3K:
+            return self.decode()
+        else:
+            return self.encode()
 
     if PY3K:
         __str__ = __repr__ = __unicode__
@@ -1103,12 +1162,18 @@ def decode_contents(self, indent_level=None,
                        formatter="minimal"):
         """Renders the contents of this tag as a Unicode string.
 
+        :param indent_level: Each line of the rendering will be
+           indented this many spaces.
+
         :param eventual_encoding: The tag is destined to be
            encoded into this encoding. This method is _not_
            responsible for performing that encoding. This information
            is passed in so that it can be substituted in if the
            document contains a <META> tag that mentions the document's
            encoding.
+
+        :param formatter: The output formatter responsible for converting
+           entities to Unicode characters.
         """
         # First off, turn a string formatter into a function. This
         # will stop the lookup from happening over and over again.
@@ -1137,7 +1202,17 @@ def decode_contents(self, indent_level=None,
     def encode_contents(
         self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING,
         formatter="minimal"):
-        """Renders the contents of this tag as a bytestring."""
+        """Renders the contents of this tag as a bytestring.
+
+        :param indent_level: Each line of the rendering will be
+           indented this many spaces.
+
+        :param eventual_encoding: The bytestring will be in this encoding.
+
+        :param formatter: The output formatter responsible for converting
+           entities to Unicode characters.
+        """
+
         contents = self.decode_contents(indent_level, encoding, formatter)
         return contents.encode(encoding)
 
@@ -1201,63 +1276,89 @@ def descendants(self):
 
     _selector_combinators = ['>', '+', '~']
     _select_debug = False
-    def select(self, selector, _candidate_generator=None):
+    def select_one(self, selector):
         """Perform a CSS selection operation on the current element."""
-        tokens = selector.split()
+        value = self.select(selector, limit=1)
+        if value:
+            return value[0]
+        return None
+
+    def select(self, selector, _candidate_generator=None, limit=None):
+        """Perform a CSS selection operation on the current element."""
+
+        # Remove whitespace directly after the grouping operator ','
+        # then split into tokens.
+        tokens = re.sub(',[\s]*',',', selector).split()
         current_context = [self]
 
         if tokens[-1] in self._selector_combinators:
             raise ValueError(
                 'Final combinator "%s" is missing an argument.' % tokens[-1])
+
         if self._select_debug:
             print 'Running CSS selector "%s"' % selector
-        for index, token in enumerate(tokens):
-            if self._select_debug:
-                print ' Considering token "%s"' % token
-            recursive_candidate_generator = None
-            tag_name = None
+
+        for index, token_group in enumerate(tokens):
+            new_context = []
+            new_context_ids = set([])
+
+            # Grouping selectors, ie: p,a
+            grouped_tokens = token_group.split(',')
+            if '' in grouped_tokens:
+                raise ValueError('Invalid group selection syntax: %s' % token_group)
+
             if tokens[index-1] in self._selector_combinators:
                 # This token was consumed by the previous combinator. Skip it.
                 if self._select_debug:
                     print '  Token was consumed by the previous combinator.'
                 continue
-            # Each operation corresponds to a checker function, a rule
-            # for determining whether a candidate matches the
-            # selector. Candidates are generated by the active
-            # iterator.
-            checker = None
-
-            m = self.attribselect_re.match(token)
-            if m is not None:
-                # Attribute selector
-                tag_name, attribute, operator, value = m.groups()
-                checker = self._attribute_checker(operator, attribute, value)
-
-            elif '#' in token:
-                # ID selector
-                tag_name, tag_id = token.split('#', 1)
-                def id_matches(tag):
-                    return tag.get('id', None) == tag_id
-                checker = id_matches
-
-            elif '.' in token:
-                # Class selector
-                tag_name, klass = token.split('.', 1)
-                classes = set(klass.split('.'))
-                def classes_match(candidate):
-                    return classes.issubset(candidate.get('class', []))
-                checker = classes_match
-
-            elif ':' in token:
-                # Pseudo-class
-                tag_name, pseudo = token.split(':', 1)
-                if tag_name == '':
-                    raise ValueError(
-                        "A pseudo-class must be prefixed with a tag name.")
-                pseudo_attributes = re.match('([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo)
-                found = []
-                if pseudo_attributes is not None:
-                    pseudo_type, pseudo_value = pseudo_attributes.groups()
+
+            for token in grouped_tokens:
+                if self._select_debug:
+                    print ' Considering token "%s"' % token
+                recursive_candidate_generator = None
+                tag_name = None
+
+                # Each operation corresponds to a checker function, a rule
+                # for determining whether a candidate matches the
+                # selector. Candidates are generated by the active
+                # iterator.
+                checker = None
+
+                m = self.attribselect_re.match(token)
+                if m is not None:
+                    # Attribute selector
+                    tag_name, attribute, operator, value = m.groups()
+                    checker = self._attribute_checker(operator, attribute, value)
+
+                elif '#' in token:
+                    # ID selector
+                    tag_name, tag_id = token.split('#', 1)
+                    def id_matches(tag):
+                        return tag.get('id', None) == tag_id
+                    checker = id_matches
+
+                elif '.' in token:
+                    # Class selector
+                    tag_name, klass = token.split('.', 1)
+                    classes = set(klass.split('.'))
+                    def classes_match(candidate):
+                        return classes.issubset(candidate.get('class', []))
+                    checker = classes_match
+
+                elif ':' in token:
+                    # Pseudo-class
+                    tag_name, pseudo = token.split(':', 1)
+                    if tag_name == '':
+                        raise ValueError(
+                            "A pseudo-class must be prefixed with a tag name.")
+                    pseudo_attributes = re.match('([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo)
+                    found = []
+                    if pseudo_attributes is None:
+                        pseudo_type = pseudo
+                        pseudo_value = None
+                    else:
+                        pseudo_type, pseudo_value = pseudo_attributes.groups()
                     if pseudo_type == 'nth-of-type':
                         try:
                             pseudo_value = int(pseudo_value)
@@ -1286,109 +1387,110 @@ def nth_child_of_type(self, tag):
                         raise NotImplementedError(
                             'Only the following pseudo-classes are implemented: nth-of-type.')
 
-            elif token == '*':
-                # Star selector -- matches everything
-                pass
-            elif token == '>':
-                # Run the next token as a CSS selector against the
-                # direct children of each tag in the current context.
-                recursive_candidate_generator = lambda tag: tag.children
-            elif token == '~':
-                # Run the next token as a CSS selector against the
-                # siblings of each tag in the current context.
-                recursive_candidate_generator = lambda tag: tag.next_siblings
-            elif token == '+':
-                # For each tag in the current context, run the next
-                # token as a CSS selector against the tag's next
-                # sibling that's a tag.
-                def next_tag_sibling(tag):
-                    yield tag.find_next_sibling(True)
-                recursive_candidate_generator = next_tag_sibling
-
-            elif self.tag_name_re.match(token):
-                # Just a tag name.
-                tag_name = token
-            else:
-                raise ValueError(
-                    'Unsupported or invalid CSS selector: "%s"' % token)
-
-            if recursive_candidate_generator:
-                # This happens when the selector looks like  "> foo".
-                #
-                # The generator calls select() recursively on every
-                # member of the current context, passing in a different
-                # candidate generator and a different selector.
-                #
-                # In the case of "> foo", the candidate generator is
-                # one that yields a tag's direct children (">"), and
-                # the selector is "foo".
-                next_token = tokens[index+1]
-                def recursive_select(tag):
-                    if self._select_debug:
-                        print '    Calling select("%s") recursively on %s %s' % (next_token, tag.name, tag.attrs)
-                        print '-' * 40
-                    for i in tag.select(next_token, recursive_candidate_generator):
+                elif token == '*':
+                    # Star selector -- matches everything
+                    pass
+                elif token == '>':
+                    # Run the next token as a CSS selector against the
+                    # direct children of each tag in the current context.
+                    recursive_candidate_generator = lambda tag: tag.children
+                elif token == '~':
+                    # Run the next token as a CSS selector against the
+                    # siblings of each tag in the current context.
+                    recursive_candidate_generator = lambda tag: tag.next_siblings
+                elif token == '+':
+                    # For each tag in the current context, run the next
+                    # token as a CSS selector against the tag's next
+                    # sibling that's a tag.
+                    def next_tag_sibling(tag):
+                        yield tag.find_next_sibling(True)
+                    recursive_candidate_generator = next_tag_sibling
+
+                elif self.tag_name_re.match(token):
+                    # Just a tag name.
+                    tag_name = token
+                else:
+                    raise ValueError(
+                        'Unsupported or invalid CSS selector: "%s"' % token)
+                if recursive_candidate_generator:
+                    # This happens when the selector looks like  "> foo".
+                    #
+                    # The generator calls select() recursively on every
+                    # member of the current context, passing in a different
+                    # candidate generator and a different selector.
+                    #
+                    # In the case of "> foo", the candidate generator is
+                    # one that yields a tag's direct children (">"), and
+                    # the selector is "foo".
+                    next_token = tokens[index+1]
+                    def recursive_select(tag):
+                        if self._select_debug:
+                            print '    Calling select("%s") recursively on %s %s' % (next_token, tag.name, tag.attrs)
+                            print '-' * 40
+                        for i in tag.select(next_token, recursive_candidate_generator):
+                            if self._select_debug:
+                                print '(Recursive select picked up candidate %s %s)' % (i.name, i.attrs)
+                            yield i
                         if self._select_debug:
-                            print '(Recursive select picked up candidate %s %s)' % (i.name, i.attrs)
-                        yield i
+                            print '-' * 40
+                    _use_candidate_generator = recursive_select
+                elif _candidate_generator is None:
+                    # By default, a tag's candidates are all of its
+                    # children. If tag_name is defined, only yield tags
+                    # with that name.
                     if self._select_debug:
-                        print '-' * 40
-                _use_candidate_generator = recursive_select
-            elif _candidate_generator is None:
-                # By default, a tag's candidates are all of its
-                # children. If tag_name is defined, only yield tags
-                # with that name.
-                if self._select_debug:
-                    if tag_name:
-                        check = "[any]"
+                        if tag_name:
+                            check = "[any]"
+                        else:
+                            check = tag_name
+                        print '   Default candidate generator, tag name="%s"' % check
+                    if self._select_debug:
+                        # This is redundant with later code, but it stops
+                        # a bunch of bogus tags from cluttering up the
+                        # debug log.
+                        def default_candidate_generator(tag):
+                            for child in tag.descendants:
+                                if not isinstance(child, Tag):
+                                    continue
+                                if tag_name and not child.name == tag_name:
+                                    continue
+                                yield child
+                        _use_candidate_generator = default_candidate_generator
                     else:
-                        check = tag_name
-                    print '   Default candidate generator, tag name="%s"' % check
-                if self._select_debug:
-                    # This is redundant with later code, but it stops
-                    # a bunch of bogus tags from cluttering up the
-                    # debug log.
-                    def default_candidate_generator(tag):
-                        for child in tag.descendants:
-                            if not isinstance(child, Tag):
-                                continue
-                            if tag_name and not child.name == tag_name:
-                                continue
-                            yield child
-                    _use_candidate_generator = default_candidate_generator
+                        _use_candidate_generator = lambda tag: tag.descendants
                 else:
-                    _use_candidate_generator = lambda tag: tag.descendants
-            else:
-                _use_candidate_generator = _candidate_generator
+                    _use_candidate_generator = _candidate_generator
+
+                count = 0
+                for tag in current_context:
+                    if self._select_debug:
+                        print "    Running candidate generator on %s %s" % (
+                            tag.name, repr(tag.attrs))
+                    for candidate in _use_candidate_generator(tag):
+                        if not isinstance(candidate, Tag):
+                            continue
+                        if tag_name and candidate.name != tag_name:
+                            continue
+                        if checker is not None:
+                            try:
+                                result = checker(candidate)
+                            except StopIteration:
+                                # The checker has decided we should no longer
+                                # run the generator.
+                                break
+                        if checker is None or result:
+                            if self._select_debug:
+                                print "     SUCCESS %s %s" % (candidate.name, repr(candidate.attrs))
+                            if id(candidate) not in new_context_ids:
+                                # If a tag matches a selector more than once,
+                                # don't include it in the context more than once.
+                                new_context.append(candidate)
+                                new_context_ids.add(id(candidate))
+                                if limit and len(new_context) >= limit:
+                                    break
+                        elif self._select_debug:
+                            print "     FAILURE %s %s" % (candidate.name, repr(candidate.attrs))
 
-            new_context = []
-            new_context_ids = set([])
-            for tag in current_context:
-                if self._select_debug:
-                    print "    Running candidate generator on %s %s" % (
-                        tag.name, repr(tag.attrs))
-                for candidate in _use_candidate_generator(tag):
-                    if not isinstance(candidate, Tag):
-                        continue
-                    if tag_name and candidate.name != tag_name:
-                        continue
-                    if checker is not None:
-                        try:
-                            result = checker(candidate)
-                        except StopIteration:
-                            # The checker has decided we should no longer
-                            # run the generator.
-                            break
-                    if checker is None or result:
-                        if self._select_debug:
-                            print "     SUCCESS %s %s" % (candidate.name, repr(candidate.attrs))
-                        if id(candidate) not in new_context_ids:
-                            # If a tag matches a selector more than once,
-                            # don't include it in the context more than once.
-                            new_context.append(candidate)
-                            new_context_ids.add(id(candidate))
-                    elif self._select_debug:
-                        print "     FAILURE %s %s" % (candidate.name, repr(candidate.attrs))
 
             current_context = new_context
 
diff --git a/lib/bs4/testing.py b/lib/bs4/testing.py
index fd4495ac5..9e5e295ef 100644
--- a/lib/bs4/testing.py
+++ b/lib/bs4/testing.py
@@ -1,5 +1,6 @@
 """Helper classes for tests."""
 
+import pickle
 import copy
 import functools
 import unittest
@@ -43,6 +44,16 @@ def assertSoupEquals(self, to_parse, compare_parsed_to=None):
 
         self.assertEqual(obj.decode(), self.document_for(compare_parsed_to))
 
+    def assertConnectedness(self, element):
+        """Ensure that next_element and previous_element are properly
+        set for all descendants of the given element.
+        """
+        earlier = None
+        for e in element.descendants:
+            if earlier:
+                self.assertEqual(e, earlier.next_element)
+                self.assertEqual(earlier, e.previous_element)
+            earlier = e
 
 class HTMLTreeBuilderSmokeTest(object):
 
@@ -54,6 +65,15 @@ class HTMLTreeBuilderSmokeTest(object):
     markup in these tests, there's not much room for interpretation.
     """
 
+    def test_pickle_and_unpickle_identity(self):
+        # Pickling a tree, then unpickling it, yields a tree identical
+        # to the original.
+        tree = self.soup("<a><b>foo</a>")
+        dumped = pickle.dumps(tree, 2)
+        loaded = pickle.loads(dumped)
+        self.assertEqual(loaded.__class__, BeautifulSoup)
+        self.assertEqual(loaded.decode(), tree.decode())
+
     def assertDoctypeHandled(self, doctype_fragment):
         """Assert that a given doctype string is handled correctly."""
         doctype_str, soup = self._document_with_doctype(doctype_fragment)
@@ -114,6 +134,11 @@ def test_real_xhtml_document(self):
             soup.encode("utf-8").replace(b"\n", b""),
             markup.replace(b"\n", b""))
 
+    def test_processing_instruction(self):
+        markup = b"""<?PITarget PIContent?>"""
+        soup = self.soup(markup)
+        self.assertEqual(markup, soup.encode("utf8"))
+
     def test_deepcopy(self):
         """Make sure you can copy the tree builder.
 
@@ -155,6 +180,23 @@ def test_br_is_always_empty_element_tag(self):
     def test_nested_formatting_elements(self):
         self.assertSoupEquals("<em><em></em></em>")
 
+    def test_double_head(self):
+        html = '''<!DOCTYPE html>
+<html>
+<head>
+<title>Ordinary HEAD element test</title>
+</head>
+<script type="text/javascript">
+alert("Help!");
+</script>
+<body>
+Hello, world!
+</body>
+</html>
+'''
+        soup = self.soup(html)
+        self.assertEqual("text/javascript", soup.find('script')['type'])
+
     def test_comment(self):
         # Comments are represented as Comment objects.
         markup = "<p>foo<!--foobar-->baz</p>"
@@ -221,6 +263,14 @@ def test_deeply_nested_multivalued_attribute(self):
         soup = self.soup(markup)
         self.assertEqual(["css"], soup.div.div['class'])
 
+    def test_multivalued_attribute_on_html(self):
+        # html5lib uses a different API to set the attributes ot the
+        # <html> tag. This has caused problems with multivalued
+        # attributes.
+        markup = '<html class="a b"></html>'
+        soup = self.soup(markup)
+        self.assertEqual(["a", "b"], soup.html['class'])
+
     def test_angle_brackets_in_attribute_values_are_escaped(self):
         self.assertSoupEquals('<a b="<a>"></a>', '<a b="&lt;a&gt;"></a>')
 
@@ -253,6 +303,35 @@ def test_multipart_strings(self):
         soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
         self.assertEqual("p", soup.h2.string.next_element.name)
         self.assertEqual("p", soup.p.name)
+        self.assertConnectedness(soup)
+
+    def test_head_tag_between_head_and_body(self):
+        "Prevent recurrence of a bug in the html5lib treebuilder."
+        content = """<html><head></head>
+  <link></link>
+  <body>foo</body>
+</html>
+"""
+        soup = self.soup(content)
+        self.assertNotEqual(None, soup.html.body)
+        self.assertConnectedness(soup)
+
+    def test_multiple_copies_of_a_tag(self):
+        "Prevent recurrence of a bug in the html5lib treebuilder."
+        content = """<!DOCTYPE html>
+<html>
+ <body>
+   <article id="a" >
+   <div><a href="1"></div>
+   <footer>
+     <a href="2"></a>
+   </footer>
+  </article>
+  </body>
+</html>
+"""
+        soup = self.soup(content)
+        self.assertConnectedness(soup.article)
 
     def test_basic_namespaces(self):
         """Parsers don't need to *understand* namespaces, but at the
@@ -463,6 +542,15 @@ def test_tag_with_no_attributes_can_have_attributes_added(self):
 
 class XMLTreeBuilderSmokeTest(object):
 
+    def test_pickle_and_unpickle_identity(self):
+        # Pickling a tree, then unpickling it, yields a tree identical
+        # to the original.
+        tree = self.soup("<a><b>foo</a>")
+        dumped = pickle.dumps(tree, 2)
+        loaded = pickle.loads(dumped)
+        self.assertEqual(loaded.__class__, BeautifulSoup)
+        self.assertEqual(loaded.decode(), tree.decode())
+
     def test_docstring_generated(self):
         soup = self.soup("<root/>")
         self.assertEqual(
@@ -485,7 +573,7 @@ def test_formatter_processes_script_tag_for_xml_documents(self):
   <script type="text/javascript">
   </script>
 """
-        soup = BeautifulSoup(doc, "xml")
+        soup = BeautifulSoup(doc, "lxml-xml")
         # lxml would have stripped this while parsing, but we can add
         # it later.
         soup.script.string = 'console.log("< < hey > > ");'
diff --git a/lib/html5lib/__init__.py b/lib/html5lib/__init__.py
index 19a4b7d69..3ba1163cf 100644
--- a/lib/html5lib/__init__.py
+++ b/lib/html5lib/__init__.py
@@ -20,4 +20,6 @@
 
 __all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
            "getTreeWalker", "serialize"]
-__version__ = "0.999"
+
+# this has to be at the top level, see how setup.py parses this
+__version__ = "0.999999"
diff --git a/lib/html5lib/constants.py b/lib/html5lib/constants.py
index e7089846d..d938e0ae6 100644
--- a/lib/html5lib/constants.py
+++ b/lib/html5lib/constants.py
@@ -1,292 +1,290 @@
 from __future__ import absolute_import, division, unicode_literals
 
 import string
-import gettext
-_ = gettext.gettext
 
 EOF = None
 
 E = {
     "null-character":
-        _("Null character in input stream, replaced with U+FFFD."),
+        "Null character in input stream, replaced with U+FFFD.",
     "invalid-codepoint":
-        _("Invalid codepoint in stream."),
+        "Invalid codepoint in stream.",
     "incorrectly-placed-solidus":
-        _("Solidus (/) incorrectly placed in tag."),
+        "Solidus (/) incorrectly placed in tag.",
     "incorrect-cr-newline-entity":
-        _("Incorrect CR newline entity, replaced with LF."),
+        "Incorrect CR newline entity, replaced with LF.",
     "illegal-windows-1252-entity":
-        _("Entity used with illegal number (windows-1252 reference)."),
+        "Entity used with illegal number (windows-1252 reference).",
     "cant-convert-numeric-entity":
-        _("Numeric entity couldn't be converted to character "
-          "(codepoint U+%(charAsInt)08x)."),
+        "Numeric entity couldn't be converted to character "
+        "(codepoint U+%(charAsInt)08x).",
     "illegal-codepoint-for-numeric-entity":
-        _("Numeric entity represents an illegal codepoint: "
-          "U+%(charAsInt)08x."),
+        "Numeric entity represents an illegal codepoint: "
+        "U+%(charAsInt)08x.",
     "numeric-entity-without-semicolon":
-        _("Numeric entity didn't end with ';'."),
+        "Numeric entity didn't end with ';'.",
     "expected-numeric-entity-but-got-eof":
-        _("Numeric entity expected. Got end of file instead."),
+        "Numeric entity expected. Got end of file instead.",
     "expected-numeric-entity":
-        _("Numeric entity expected but none found."),
+        "Numeric entity expected but none found.",
     "named-entity-without-semicolon":
-        _("Named entity didn't end with ';'."),
+        "Named entity didn't end with ';'.",
     "expected-named-entity":
-        _("Named entity expected. Got none."),
+        "Named entity expected. Got none.",
     "attributes-in-end-tag":
-        _("End tag contains unexpected attributes."),
+        "End tag contains unexpected attributes.",
     'self-closing-flag-on-end-tag':
-        _("End tag contains unexpected self-closing flag."),
+        "End tag contains unexpected self-closing flag.",
     "expected-tag-name-but-got-right-bracket":
-        _("Expected tag name. Got '>' instead."),
+        "Expected tag name. Got '>' instead.",
     "expected-tag-name-but-got-question-mark":
-        _("Expected tag name. Got '?' instead. (HTML doesn't "
-          "support processing instructions.)"),
+        "Expected tag name. Got '?' instead. (HTML doesn't "
+        "support processing instructions.)",
     "expected-tag-name":
-        _("Expected tag name. Got something else instead"),
+        "Expected tag name. Got something else instead",
     "expected-closing-tag-but-got-right-bracket":
-        _("Expected closing tag. Got '>' instead. Ignoring '</>'."),
+        "Expected closing tag. Got '>' instead. Ignoring '</>'.",
     "expected-closing-tag-but-got-eof":
-        _("Expected closing tag. Unexpected end of file."),
+        "Expected closing tag. Unexpected end of file.",
     "expected-closing-tag-but-got-char":
-        _("Expected closing tag. Unexpected character '%(data)s' found."),
+        "Expected closing tag. Unexpected character '%(data)s' found.",
     "eof-in-tag-name":
-        _("Unexpected end of file in the tag name."),
+        "Unexpected end of file in the tag name.",
     "expected-attribute-name-but-got-eof":
-        _("Unexpected end of file. Expected attribute name instead."),
+        "Unexpected end of file. Expected attribute name instead.",
     "eof-in-attribute-name":
-        _("Unexpected end of file in attribute name."),
+        "Unexpected end of file in attribute name.",
     "invalid-character-in-attribute-name":
-        _("Invalid character in attribute name"),
+        "Invalid character in attribute name",
     "duplicate-attribute":
-        _("Dropped duplicate attribute on tag."),
+        "Dropped duplicate attribute on tag.",
     "expected-end-of-tag-name-but-got-eof":
-        _("Unexpected end of file. Expected = or end of tag."),
+        "Unexpected end of file. Expected = or end of tag.",
     "expected-attribute-value-but-got-eof":
-        _("Unexpected end of file. Expected attribute value."),
+        "Unexpected end of file. Expected attribute value.",
     "expected-attribute-value-but-got-right-bracket":
-        _("Expected attribute value. Got '>' instead."),
+        "Expected attribute value. Got '>' instead.",
     'equals-in-unquoted-attribute-value':
-        _("Unexpected = in unquoted attribute"),
+        "Unexpected = in unquoted attribute",
     'unexpected-character-in-unquoted-attribute-value':
-        _("Unexpected character in unquoted attribute"),
+        "Unexpected character in unquoted attribute",
     "invalid-character-after-attribute-name":
-        _("Unexpected character after attribute name."),
+        "Unexpected character after attribute name.",
     "unexpected-character-after-attribute-value":
-        _("Unexpected character after attribute value."),
+        "Unexpected character after attribute value.",
     "eof-in-attribute-value-double-quote":
-        _("Unexpected end of file in attribute value (\")."),
+        "Unexpected end of file in attribute value (\").",
     "eof-in-attribute-value-single-quote":
-        _("Unexpected end of file in attribute value (')."),
+        "Unexpected end of file in attribute value (').",
     "eof-in-attribute-value-no-quotes":
-        _("Unexpected end of file in attribute value."),
+        "Unexpected end of file in attribute value.",
     "unexpected-EOF-after-solidus-in-tag":
-        _("Unexpected end of file in tag. Expected >"),
+        "Unexpected end of file in tag. Expected >",
     "unexpected-character-after-solidus-in-tag":
-        _("Unexpected character after / in tag. Expected >"),
+        "Unexpected character after / in tag. Expected >",
     "expected-dashes-or-doctype":
-        _("Expected '--' or 'DOCTYPE'. Not found."),
+        "Expected '--' or 'DOCTYPE'. Not found.",
     "unexpected-bang-after-double-dash-in-comment":
-        _("Unexpected ! after -- in comment"),
+        "Unexpected ! after -- in comment",
     "unexpected-space-after-double-dash-in-comment":
-        _("Unexpected space after -- in comment"),
+        "Unexpected space after -- in comment",
     "incorrect-comment":
-        _("Incorrect comment."),
+        "Incorrect comment.",
     "eof-in-comment":
-        _("Unexpected end of file in comment."),
+        "Unexpected end of file in comment.",
     "eof-in-comment-end-dash":
-        _("Unexpected end of file in comment (-)"),
+        "Unexpected end of file in comment (-)",
     "unexpected-dash-after-double-dash-in-comment":
-        _("Unexpected '-' after '--' found in comment."),
+        "Unexpected '-' after '--' found in comment.",
     "eof-in-comment-double-dash":
-        _("Unexpected end of file in comment (--)."),
+        "Unexpected end of file in comment (--).",
     "eof-in-comment-end-space-state":
-        _("Unexpected end of file in comment."),
+        "Unexpected end of file in comment.",
     "eof-in-comment-end-bang-state":
-        _("Unexpected end of file in comment."),
+        "Unexpected end of file in comment.",
     "unexpected-char-in-comment":
-        _("Unexpected character in comment found."),
+        "Unexpected character in comment found.",
     "need-space-after-doctype":
-        _("No space after literal string 'DOCTYPE'."),
+        "No space after literal string 'DOCTYPE'.",
     "expected-doctype-name-but-got-right-bracket":
-        _("Unexpected > character. Expected DOCTYPE name."),
+        "Unexpected > character. Expected DOCTYPE name.",
     "expected-doctype-name-but-got-eof":
-        _("Unexpected end of file. Expected DOCTYPE name."),
+        "Unexpected end of file. Expected DOCTYPE name.",
     "eof-in-doctype-name":
-        _("Unexpected end of file in DOCTYPE name."),
+        "Unexpected end of file in DOCTYPE name.",
     "eof-in-doctype":
-        _("Unexpected end of file in DOCTYPE."),
+        "Unexpected end of file in DOCTYPE.",
     "expected-space-or-right-bracket-in-doctype":
-        _("Expected space or '>'. Got '%(data)s'"),
+        "Expected space or '>'. Got '%(data)s'",
     "unexpected-end-of-doctype":
-        _("Unexpected end of DOCTYPE."),
+        "Unexpected end of DOCTYPE.",
     "unexpected-char-in-doctype":
-        _("Unexpected character in DOCTYPE."),
+        "Unexpected character in DOCTYPE.",
     "eof-in-innerhtml":
-        _("XXX innerHTML EOF"),
+        "XXX innerHTML EOF",
     "unexpected-doctype":
-        _("Unexpected DOCTYPE. Ignored."),
+        "Unexpected DOCTYPE. Ignored.",
     "non-html-root":
-        _("html needs to be the first start tag."),
+        "html needs to be the first start tag.",
     "expected-doctype-but-got-eof":
-        _("Unexpected End of file. Expected DOCTYPE."),
+        "Unexpected End of file. Expected DOCTYPE.",
     "unknown-doctype":
-        _("Erroneous DOCTYPE."),
+        "Erroneous DOCTYPE.",
     "expected-doctype-but-got-chars":
-        _("Unexpected non-space characters. Expected DOCTYPE."),
+        "Unexpected non-space characters. Expected DOCTYPE.",
     "expected-doctype-but-got-start-tag":
-        _("Unexpected start tag (%(name)s). Expected DOCTYPE."),
+        "Unexpected start tag (%(name)s). Expected DOCTYPE.",
     "expected-doctype-but-got-end-tag":
-        _("Unexpected end tag (%(name)s). Expected DOCTYPE."),
+        "Unexpected end tag (%(name)s). Expected DOCTYPE.",
     "end-tag-after-implied-root":
-        _("Unexpected end tag (%(name)s) after the (implied) root element."),
+        "Unexpected end tag (%(name)s) after the (implied) root element.",
     "expected-named-closing-tag-but-got-eof":
-        _("Unexpected end of file. Expected end tag (%(name)s)."),
+        "Unexpected end of file. Expected end tag (%(name)s).",
     "two-heads-are-not-better-than-one":
-        _("Unexpected start tag head in existing head. Ignored."),
+        "Unexpected start tag head in existing head. Ignored.",
     "unexpected-end-tag":
-        _("Unexpected end tag (%(name)s). Ignored."),
+        "Unexpected end tag (%(name)s). Ignored.",
     "unexpected-start-tag-out-of-my-head":
-        _("Unexpected start tag (%(name)s) that can be in head. Moved."),
+        "Unexpected start tag (%(name)s) that can be in head. Moved.",
     "unexpected-start-tag":
-        _("Unexpected start tag (%(name)s)."),
+        "Unexpected start tag (%(name)s).",
     "missing-end-tag":
-        _("Missing end tag (%(name)s)."),
+        "Missing end tag (%(name)s).",
     "missing-end-tags":
-        _("Missing end tags (%(name)s)."),
+        "Missing end tags (%(name)s).",
     "unexpected-start-tag-implies-end-tag":
-        _("Unexpected start tag (%(startName)s) "
-          "implies end tag (%(endName)s)."),
+        "Unexpected start tag (%(startName)s) "
+        "implies end tag (%(endName)s).",
     "unexpected-start-tag-treated-as":
-        _("Unexpected start tag (%(originalName)s). Treated as %(newName)s."),
+        "Unexpected start tag (%(originalName)s). Treated as %(newName)s.",
     "deprecated-tag":
-        _("Unexpected start tag %(name)s. Don't use it!"),
+        "Unexpected start tag %(name)s. Don't use it!",
     "unexpected-start-tag-ignored":
-        _("Unexpected start tag %(name)s. Ignored."),
+        "Unexpected start tag %(name)s. Ignored.",
     "expected-one-end-tag-but-got-another":
-        _("Unexpected end tag (%(gotName)s). "
-          "Missing end tag (%(expectedName)s)."),
+        "Unexpected end tag (%(gotName)s). "
+        "Missing end tag (%(expectedName)s).",
     "end-tag-too-early":
-        _("End tag (%(name)s) seen too early. Expected other end tag."),
+        "End tag (%(name)s) seen too early. Expected other end tag.",
     "end-tag-too-early-named":
-        _("Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."),
+        "Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).",
     "end-tag-too-early-ignored":
-        _("End tag (%(name)s) seen too early. Ignored."),
+        "End tag (%(name)s) seen too early. Ignored.",
     "adoption-agency-1.1":
-        _("End tag (%(name)s) violates step 1, "
-          "paragraph 1 of the adoption agency algorithm."),
+        "End tag (%(name)s) violates step 1, "
+        "paragraph 1 of the adoption agency algorithm.",
     "adoption-agency-1.2":
-        _("End tag (%(name)s) violates step 1, "
-          "paragraph 2 of the adoption agency algorithm."),
+        "End tag (%(name)s) violates step 1, "
+        "paragraph 2 of the adoption agency algorithm.",
     "adoption-agency-1.3":
-        _("End tag (%(name)s) violates step 1, "
-          "paragraph 3 of the adoption agency algorithm."),
+        "End tag (%(name)s) violates step 1, "
+        "paragraph 3 of the adoption agency algorithm.",
     "adoption-agency-4.4":
-        _("End tag (%(name)s) violates step 4, "
-          "paragraph 4 of the adoption agency algorithm."),
+        "End tag (%(name)s) violates step 4, "
+        "paragraph 4 of the adoption agency algorithm.",
     "unexpected-end-tag-treated-as":
-        _("Unexpected end tag (%(originalName)s). Treated as %(newName)s."),
+        "Unexpected end tag (%(originalName)s). Treated as %(newName)s.",
     "no-end-tag":
-        _("This element (%(name)s) has no end tag."),
+        "This element (%(name)s) has no end tag.",
     "unexpected-implied-end-tag-in-table":
-        _("Unexpected implied end tag (%(name)s) in the table phase."),
+        "Unexpected implied end tag (%(name)s) in the table phase.",
     "unexpected-implied-end-tag-in-table-body":
-        _("Unexpected implied end tag (%(name)s) in the table body phase."),
+        "Unexpected implied end tag (%(name)s) in the table body phase.",
     "unexpected-char-implies-table-voodoo":
-        _("Unexpected non-space characters in "
-          "table context caused voodoo mode."),
+        "Unexpected non-space characters in "
+        "table context caused voodoo mode.",
     "unexpected-hidden-input-in-table":
-        _("Unexpected input with type hidden in table context."),
+        "Unexpected input with type hidden in table context.",
     "unexpected-form-in-table":
-        _("Unexpected form in table context."),
+        "Unexpected form in table context.",
     "unexpected-start-tag-implies-table-voodoo":
-        _("Unexpected start tag (%(name)s) in "
-          "table context caused voodoo mode."),
+        "Unexpected start tag (%(name)s) in "
+        "table context caused voodoo mode.",
     "unexpected-end-tag-implies-table-voodoo":
-        _("Unexpected end tag (%(name)s) in "
-          "table context caused voodoo mode."),
+        "Unexpected end tag (%(name)s) in "
+        "table context caused voodoo mode.",
     "unexpected-cell-in-table-body":
-        _("Unexpected table cell start tag (%(name)s) "
-          "in the table body phase."),
+        "Unexpected table cell start tag (%(name)s) "
+        "in the table body phase.",
     "unexpected-cell-end-tag":
-        _("Got table cell end tag (%(name)s) "
-          "while required end tags are missing."),
+        "Got table cell end tag (%(name)s) "
+        "while required end tags are missing.",
     "unexpected-end-tag-in-table-body":
-        _("Unexpected end tag (%(name)s) in the table body phase. Ignored."),
+        "Unexpected end tag (%(name)s) in the table body phase. Ignored.",
     "unexpected-implied-end-tag-in-table-row":
-        _("Unexpected implied end tag (%(name)s) in the table row phase."),
+        "Unexpected implied end tag (%(name)s) in the table row phase.",
     "unexpected-end-tag-in-table-row":
-        _("Unexpected end tag (%(name)s) in the table row phase. Ignored."),
+        "Unexpected end tag (%(name)s) in the table row phase. Ignored.",
     "unexpected-select-in-select":
-        _("Unexpected select start tag in the select phase "
-          "treated as select end tag."),
+        "Unexpected select start tag in the select phase "
+        "treated as select end tag.",
     "unexpected-input-in-select":
-        _("Unexpected input start tag in the select phase."),
+        "Unexpected input start tag in the select phase.",
     "unexpected-start-tag-in-select":
-        _("Unexpected start tag token (%(name)s in the select phase. "
-          "Ignored."),
+        "Unexpected start tag token (%(name)s in the select phase. "
+        "Ignored.",
     "unexpected-end-tag-in-select":
-        _("Unexpected end tag (%(name)s) in the select phase. Ignored."),
+        "Unexpected end tag (%(name)s) in the select phase. Ignored.",
     "unexpected-table-element-start-tag-in-select-in-table":
-        _("Unexpected table element start tag (%(name)s) in the select in table phase."),
+        "Unexpected table element start tag (%(name)s) in the select in table phase.",
     "unexpected-table-element-end-tag-in-select-in-table":
-        _("Unexpected table element end tag (%(name)s) in the select in table phase."),
+        "Unexpected table element end tag (%(name)s) in the select in table phase.",
     "unexpected-char-after-body":
-        _("Unexpected non-space characters in the after body phase."),
+        "Unexpected non-space characters in the after body phase.",
     "unexpected-start-tag-after-body":
-        _("Unexpected start tag token (%(name)s)"
-          " in the after body phase."),
+        "Unexpected start tag token (%(name)s)"
+        " in the after body phase.",
     "unexpected-end-tag-after-body":
-        _("Unexpected end tag token (%(name)s)"
-          " in the after body phase."),
+        "Unexpected end tag token (%(name)s)"
+        " in the after body phase.",
     "unexpected-char-in-frameset":
-        _("Unexpected characters in the frameset phase. Characters ignored."),
+        "Unexpected characters in the frameset phase. Characters ignored.",
     "unexpected-start-tag-in-frameset":
-        _("Unexpected start tag token (%(name)s)"
-          " in the frameset phase. Ignored."),
+        "Unexpected start tag token (%(name)s)"
+        " in the frameset phase. Ignored.",
     "unexpected-frameset-in-frameset-innerhtml":
-        _("Unexpected end tag token (frameset) "
-          "in the frameset phase (innerHTML)."),
+        "Unexpected end tag token (frameset) "
+        "in the frameset phase (innerHTML).",
     "unexpected-end-tag-in-frameset":
-        _("Unexpected end tag token (%(name)s)"
-          " in the frameset phase. Ignored."),
+        "Unexpected end tag token (%(name)s)"
+        " in the frameset phase. Ignored.",
     "unexpected-char-after-frameset":
-        _("Unexpected non-space characters in the "
-          "after frameset phase. Ignored."),
+        "Unexpected non-space characters in the "
+        "after frameset phase. Ignored.",
     "unexpected-start-tag-after-frameset":
-        _("Unexpected start tag (%(name)s)"
-          " in the after frameset phase. Ignored."),
+        "Unexpected start tag (%(name)s)"
+        " in the after frameset phase. Ignored.",
     "unexpected-end-tag-after-frameset":
-        _("Unexpected end tag (%(name)s)"
-          " in the after frameset phase. Ignored."),
+        "Unexpected end tag (%(name)s)"
+        " in the after frameset phase. Ignored.",
     "unexpected-end-tag-after-body-innerhtml":
-        _("Unexpected end tag after body(innerHtml)"),
+        "Unexpected end tag after body(innerHtml)",
     "expected-eof-but-got-char":
-        _("Unexpected non-space characters. Expected end of file."),
+        "Unexpected non-space characters. Expected end of file.",
     "expected-eof-but-got-start-tag":
-        _("Unexpected start tag (%(name)s)"
-          ". Expected end of file."),
+        "Unexpected start tag (%(name)s)"
+        ". Expected end of file.",
     "expected-eof-but-got-end-tag":
-        _("Unexpected end tag (%(name)s)"
-          ". Expected end of file."),
+        "Unexpected end tag (%(name)s)"
+        ". Expected end of file.",
     "eof-in-table":
-        _("Unexpected end of file. Expected table content."),
+        "Unexpected end of file. Expected table content.",
     "eof-in-select":
-        _("Unexpected end of file. Expected select content."),
+        "Unexpected end of file. Expected select content.",
     "eof-in-frameset":
-        _("Unexpected end of file. Expected frameset content."),
+        "Unexpected end of file. Expected frameset content.",
     "eof-in-script-in-script":
-        _("Unexpected end of file. Expected script content."),
+        "Unexpected end of file. Expected script content.",
     "eof-in-foreign-lands":
-        _("Unexpected end of file. Expected foreign content"),
+        "Unexpected end of file. Expected foreign content",
     "non-void-element-with-trailing-solidus":
-        _("Trailing solidus not allowed on element %(name)s"),
+        "Trailing solidus not allowed on element %(name)s",
     "unexpected-html-element-in-foreign-content":
-        _("Element %(name)s not allowed in a non-html context"),
+        "Element %(name)s not allowed in a non-html context",
     "unexpected-end-tag-before-html":
-        _("Unexpected end tag (%(name)s) before html."),
+        "Unexpected end tag (%(name)s) before html.",
     "XXX-undefined-error":
-        _("Undefined error (this sucks and should be fixed)"),
+        "Undefined error (this sucks and should be fixed)",
 }
 
 namespaces = {
@@ -298,7 +296,7 @@
     "xmlns": "http://www.w3.org/2000/xmlns/"
 }
 
-scopingElements = frozenset((
+scopingElements = frozenset([
     (namespaces["html"], "applet"),
     (namespaces["html"], "caption"),
     (namespaces["html"], "html"),
@@ -316,9 +314,9 @@
     (namespaces["svg"], "foreignObject"),
     (namespaces["svg"], "desc"),
     (namespaces["svg"], "title"),
-))
+])
 
-formattingElements = frozenset((
+formattingElements = frozenset([
     (namespaces["html"], "a"),
     (namespaces["html"], "b"),
     (namespaces["html"], "big"),
@@ -333,9 +331,9 @@
     (namespaces["html"], "strong"),
     (namespaces["html"], "tt"),
     (namespaces["html"], "u")
-))
+])
 
-specialElements = frozenset((
+specialElements = frozenset([
     (namespaces["html"], "address"),
     (namespaces["html"], "applet"),
     (namespaces["html"], "area"),
@@ -416,22 +414,22 @@
     (namespaces["html"], "wbr"),
     (namespaces["html"], "xmp"),
     (namespaces["svg"], "foreignObject")
-))
+])
 
-htmlIntegrationPointElements = frozenset((
+htmlIntegrationPointElements = frozenset([
     (namespaces["mathml"], "annotaion-xml"),
     (namespaces["svg"], "foreignObject"),
     (namespaces["svg"], "desc"),
     (namespaces["svg"], "title")
-))
+])
 
-mathmlTextIntegrationPointElements = frozenset((
+mathmlTextIntegrationPointElements = frozenset([
     (namespaces["mathml"], "mi"),
     (namespaces["mathml"], "mo"),
     (namespaces["mathml"], "mn"),
     (namespaces["mathml"], "ms"),
     (namespaces["mathml"], "mtext")
-))
+])
 
 adjustForeignAttributes = {
     "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
@@ -451,21 +449,21 @@
 unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in
                                   adjustForeignAttributes.items()])
 
-spaceCharacters = frozenset((
+spaceCharacters = frozenset([
     "\t",
     "\n",
     "\u000C",
     " ",
     "\r"
-))
+])
 
-tableInsertModeElements = frozenset((
+tableInsertModeElements = frozenset([
     "table",
     "tbody",
     "tfoot",
     "thead",
     "tr"
-))
+])
 
 asciiLowercase = frozenset(string.ascii_lowercase)
 asciiUppercase = frozenset(string.ascii_uppercase)
@@ -486,7 +484,7 @@
     "h6"
 )
 
-voidElements = frozenset((
+voidElements = frozenset([
     "base",
     "command",
     "event-source",
@@ -502,11 +500,11 @@
     "input",
     "source",
     "track"
-))
+])
 
-cdataElements = frozenset(('title', 'textarea'))
+cdataElements = frozenset(['title', 'textarea'])
 
-rcdataElements = frozenset((
+rcdataElements = frozenset([
     'style',
     'script',
     'xmp',
@@ -514,27 +512,27 @@
     'noembed',
     'noframes',
     'noscript'
-))
+])
 
 booleanAttributes = {
-    "": frozenset(("irrelevant",)),
-    "style": frozenset(("scoped",)),
-    "img": frozenset(("ismap",)),
-    "audio": frozenset(("autoplay", "controls")),
-    "video": frozenset(("autoplay", "controls")),
-    "script": frozenset(("defer", "async")),
-    "details": frozenset(("open",)),
-    "datagrid": frozenset(("multiple", "disabled")),
-    "command": frozenset(("hidden", "disabled", "checked", "default")),
-    "hr": frozenset(("noshade")),
-    "menu": frozenset(("autosubmit",)),
-    "fieldset": frozenset(("disabled", "readonly")),
-    "option": frozenset(("disabled", "readonly", "selected")),
-    "optgroup": frozenset(("disabled", "readonly")),
-    "button": frozenset(("disabled", "autofocus")),
-    "input": frozenset(("disabled", "readonly", "required", "autofocus", "checked", "ismap")),
-    "select": frozenset(("disabled", "readonly", "autofocus", "multiple")),
-    "output": frozenset(("disabled", "readonly")),
+    "": frozenset(["irrelevant"]),
+    "style": frozenset(["scoped"]),
+    "img": frozenset(["ismap"]),
+    "audio": frozenset(["autoplay", "controls"]),
+    "video": frozenset(["autoplay", "controls"]),
+    "script": frozenset(["defer", "async"]),
+    "details": frozenset(["open"]),
+    "datagrid": frozenset(["multiple", "disabled"]),
+    "command": frozenset(["hidden", "disabled", "checked", "default"]),
+    "hr": frozenset(["noshade"]),
+    "menu": frozenset(["autosubmit"]),
+    "fieldset": frozenset(["disabled", "readonly"]),
+    "option": frozenset(["disabled", "readonly", "selected"]),
+    "optgroup": frozenset(["disabled", "readonly"]),
+    "button": frozenset(["disabled", "autofocus"]),
+    "input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]),
+    "select": frozenset(["disabled", "readonly", "autofocus", "multiple"]),
+    "output": frozenset(["disabled", "readonly"]),
 }
 
 # entitiesWindows1252 has to be _ordered_ and needs to have an index. It
@@ -574,7 +572,7 @@
     376     # 0x9F  0x0178  LATIN CAPITAL LETTER Y WITH DIAERESIS
 )
 
-xmlEntities = frozenset(('lt;', 'gt;', 'amp;', 'apos;', 'quot;'))
+xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;'])
 
 entities = {
     "AElig": "\xc6",
@@ -3088,8 +3086,8 @@
     "ParseError": 7
 }
 
-tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"],
-                           tokenTypes["EmptyTag"]))
+tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"],
+                           tokenTypes["EmptyTag"]])
 
 
 prefixes = dict([(v, k) for k, v in namespaces.items()])
diff --git a/lib/html5lib/filters/lint.py b/lib/html5lib/filters/lint.py
index 7cc99a4ba..8884696dc 100644
--- a/lib/html5lib/filters/lint.py
+++ b/lib/html5lib/filters/lint.py
@@ -1,8 +1,5 @@
 from __future__ import absolute_import, division, unicode_literals
 
-from gettext import gettext
-_ = gettext
-
 from . import _base
 from ..constants import cdataElements, rcdataElements, voidElements
 
@@ -23,24 +20,24 @@ def __iter__(self):
             if type in ("StartTag", "EmptyTag"):
                 name = token["name"]
                 if contentModelFlag != "PCDATA":
-                    raise LintError(_("StartTag not in PCDATA content model flag: %(tag)s") % {"tag": name})
+                    raise LintError("StartTag not in PCDATA content model flag: %(tag)s" % {"tag": name})
                 if not isinstance(name, str):
-                    raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
+                    raise LintError("Tag name is not a string: %(tag)r" % {"tag": name})
                 if not name:
-                    raise LintError(_("Empty tag name"))
+                    raise LintError("Empty tag name")
                 if type == "StartTag" and name in voidElements:
-                    raise LintError(_("Void element reported as StartTag token: %(tag)s") % {"tag": name})
+                    raise LintError("Void element reported as StartTag token: %(tag)s" % {"tag": name})
                 elif type == "EmptyTag" and name not in voidElements:
-                    raise LintError(_("Non-void element reported as EmptyTag token: %(tag)s") % {"tag": token["name"]})
+                    raise LintError("Non-void element reported as EmptyTag token: %(tag)s" % {"tag": token["name"]})
                 if type == "StartTag":
                     open_elements.append(name)
                 for name, value in token["data"]:
                     if not isinstance(name, str):
-                        raise LintError(_("Attribute name is not a string: %(name)r") % {"name": name})
+                        raise LintError("Attribute name is not a string: %(name)r" % {"name": name})
                     if not name:
-                        raise LintError(_("Empty attribute name"))
+                        raise LintError("Empty attribute name")
                     if not isinstance(value, str):
-                        raise LintError(_("Attribute value is not a string: %(value)r") % {"value": value})
+                        raise LintError("Attribute value is not a string: %(value)r" % {"value": value})
                 if name in cdataElements:
                     contentModelFlag = "CDATA"
                 elif name in rcdataElements:
@@ -51,43 +48,43 @@ def __iter__(self):
             elif type == "EndTag":
                 name = token["name"]
                 if not isinstance(name, str):
-                    raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
+                    raise LintError("Tag name is not a string: %(tag)r" % {"tag": name})
                 if not name:
-                    raise LintError(_("Empty tag name"))
+                    raise LintError("Empty tag name")
                 if name in voidElements:
-                    raise LintError(_("Void element reported as EndTag token: %(tag)s") % {"tag": name})
+                    raise LintError("Void element reported as EndTag token: %(tag)s" % {"tag": name})
                 start_name = open_elements.pop()
                 if start_name != name:
-                    raise LintError(_("EndTag (%(end)s) does not match StartTag (%(start)s)") % {"end": name, "start": start_name})
+                    raise LintError("EndTag (%(end)s) does not match StartTag (%(start)s)" % {"end": name, "start": start_name})
                 contentModelFlag = "PCDATA"
 
             elif type == "Comment":
                 if contentModelFlag != "PCDATA":
-                    raise LintError(_("Comment not in PCDATA content model flag"))
+                    raise LintError("Comment not in PCDATA content model flag")
 
             elif type in ("Characters", "SpaceCharacters"):
                 data = token["data"]
                 if not isinstance(data, str):
-                    raise LintError(_("Attribute name is not a string: %(name)r") % {"name": data})
+                    raise LintError("Attribute name is not a string: %(name)r" % {"name": data})
                 if not data:
-                    raise LintError(_("%(type)s token with empty data") % {"type": type})
+                    raise LintError("%(type)s token with empty data" % {"type": type})
                 if type == "SpaceCharacters":
                     data = data.strip(spaceCharacters)
                     if data:
-                        raise LintError(_("Non-space character(s) found in SpaceCharacters token: %(token)r") % {"token": data})
+                        raise LintError("Non-space character(s) found in SpaceCharacters token: %(token)r" % {"token": data})
 
             elif type == "Doctype":
                 name = token["name"]
                 if contentModelFlag != "PCDATA":
-                    raise LintError(_("Doctype not in PCDATA content model flag: %(name)s") % {"name": name})
+                    raise LintError("Doctype not in PCDATA content model flag: %(name)s" % {"name": name})
                 if not isinstance(name, str):
-                    raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
+                    raise LintError("Tag name is not a string: %(tag)r" % {"tag": name})
                 # XXX: what to do with token["data"] ?
 
             elif type in ("ParseError", "SerializeError"):
                 pass
 
             else:
-                raise LintError(_("Unknown token type: %(type)s") % {"type": type})
+                raise LintError("Unknown token type: %(type)s" % {"type": type})
 
             yield token
diff --git a/lib/html5lib/html5parser.py b/lib/html5lib/html5parser.py
index b0f14f393..12aa6a35e 100644
--- a/lib/html5lib/html5parser.py
+++ b/lib/html5lib/html5parser.py
@@ -18,6 +18,7 @@
 from .constants import tokenTypes, ReparseException, namespaces
 from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements
 from .constants import adjustForeignAttributes as adjustForeignAttributesMap
+from .constants import E
 
 
 def parse(doc, treebuilder="etree", encoding=None,
@@ -129,6 +130,17 @@ def reset(self):
 
         self.framesetOK = True
 
+    @property
+    def documentEncoding(self):
+        """The name of the character encoding
+        that was used to decode the input stream,
+        or :obj:`None` if that is not determined yet.
+
+        """
+        if not hasattr(self, 'tokenizer'):
+            return None
+        return self.tokenizer.stream.charEncoding[0]
+
     def isHTMLIntegrationPoint(self, element):
         if (element.name == "annotation-xml" and
                 element.namespace == namespaces["mathml"]):
@@ -245,7 +257,7 @@ def parseError(self, errorcode="XXX-undefined-error", datavars={}):
         # XXX The idea is to make errorcode mandatory.
         self.errors.append((self.tokenizer.stream.position(), errorcode, datavars))
         if self.strict:
-            raise ParseError
+            raise ParseError(E[errorcode] % datavars)
 
     def normalizeToken(self, token):
         """ HTML5 specific normalizations to the token stream """
@@ -868,7 +880,7 @@ def __init__(self, parser, tree):
             self.startTagHandler = utils.MethodDispatcher([
                 ("html", self.startTagHtml),
                 (("base", "basefont", "bgsound", "command", "link", "meta",
-                  "noframes", "script", "style", "title"),
+                  "script", "style", "title"),
                  self.startTagProcessInHead),
                 ("body", self.startTagBody),
                 ("frameset", self.startTagFrameset),
@@ -1205,8 +1217,7 @@ def startTagIsIndex(self, token):
             attributes["name"] = "isindex"
             self.processStartTag(impliedTagToken("input", "StartTag",
                                                  attributes=attributes,
-                                                 selfClosing=
-                                                 token["selfClosing"]))
+                                                 selfClosing=token["selfClosing"]))
             self.processEndTag(impliedTagToken("label"))
             self.processStartTag(impliedTagToken("hr", "StartTag"))
             self.processEndTag(impliedTagToken("form"))
diff --git a/lib/html5lib/inputstream.py b/lib/html5lib/inputstream.py
index 9e03b9313..7020aa60f 100644
--- a/lib/html5lib/inputstream.py
+++ b/lib/html5lib/inputstream.py
@@ -28,7 +28,18 @@ class BufferedIOBase(object):
 asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
 spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
 
-invalid_unicode_re = re.compile("[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uD800-\uDFFF\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]")
+
+invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]"
+
+if utils.supports_lone_surrogates:
+    # Use one extra step of indirection and create surrogates with
+    # unichr. Not using this indirection would introduce an illegal
+    # unicode literal on platforms not supporting such lone
+    # surrogates.
+    invalid_unicode_re = re.compile(invalid_unicode_no_surrogate +
+                                    eval('"\\uD800-\\uDFFF"'))
+else:
+    invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
 
 non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
                                   0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
@@ -164,13 +175,18 @@ def __init__(self, source):
 
         """
 
-        # Craziness
-        if len("\U0010FFFF") == 1:
+        if not utils.supports_lone_surrogates:
+            # Such platforms will have already checked for such
+            # surrogate errors, so no need to do this checking.
+            self.reportCharacterErrors = None
+            self.replaceCharactersRegexp = None
+        elif len("\U0010FFFF") == 1:
             self.reportCharacterErrors = self.characterErrorsUCS4
-            self.replaceCharactersRegexp = re.compile("[\uD800-\uDFFF]")
+            self.replaceCharactersRegexp = re.compile(eval('"[\\uD800-\\uDFFF]"'))
         else:
             self.reportCharacterErrors = self.characterErrorsUCS2
-            self.replaceCharactersRegexp = re.compile("([\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])")
+            self.replaceCharactersRegexp = re.compile(
+                eval('"([\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?<![\\uD800-\\uDBFF])[\\uDC00-\\uDFFF])"'))
 
         # List of where new lines occur
         self.newLines = [0]
@@ -265,11 +281,12 @@ def readChunk(self, chunkSize=None):
                 self._bufferedCharacter = data[-1]
                 data = data[:-1]
 
-        self.reportCharacterErrors(data)
+        if self.reportCharacterErrors:
+            self.reportCharacterErrors(data)
 
-        # Replace invalid characters
-        # Note U+0000 is dealt with in the tokenizer
-        data = self.replaceCharactersRegexp.sub("\ufffd", data)
+            # Replace invalid characters
+            # Note U+0000 is dealt with in the tokenizer
+            data = self.replaceCharactersRegexp.sub("\ufffd", data)
 
         data = data.replace("\r\n", "\n")
         data = data.replace("\r", "\n")
diff --git a/lib/html5lib/sanitizer.py b/lib/html5lib/sanitizer.py
index 71dc5212c..2cef26555 100644
--- a/lib/html5lib/sanitizer.py
+++ b/lib/html5lib/sanitizer.py
@@ -2,11 +2,26 @@
 
 import re
 from xml.sax.saxutils import escape, unescape
+from six.moves import urllib_parse as urlparse
 
 from .tokenizer import HTMLTokenizer
 from .constants import tokenTypes
 
 
+content_type_rgx = re.compile(r'''
+                               ^
+                               # Match a content type <application>/<type>
+                               (?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
+                               # Match any character set and encoding
+                               (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
+                                 |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
+                               # Assume the rest is data
+                               ,.*
+                               $
+                               ''',
+                              re.VERBOSE)
+
+
 class HTMLSanitizerMixin(object):
     """ sanitization of XHTML+MathML+SVG and of inline style attributes."""
 
@@ -100,8 +115,8 @@ class HTMLSanitizerMixin(object):
                       'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
                       'y1', 'y2', 'zoomAndPan']
 
-    attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster',
-                       'xlink:href', 'xml:base']
+    attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster', 'background', 'datasrc',
+                       'dynsrc', 'lowsrc', 'ping', 'poster', 'xlink:href', 'xml:base']
 
     svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
                                'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
@@ -138,7 +153,9 @@ class HTMLSanitizerMixin(object):
     acceptable_protocols = ['ed2k', 'ftp', 'http', 'https', 'irc',
                             'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
                             'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
-                            'ssh', 'sftp', 'rtsp', 'afs']
+                            'ssh', 'sftp', 'rtsp', 'afs', 'data']
+
+    acceptable_content_types = ['image/png', 'image/jpeg', 'image/gif', 'image/webp', 'image/bmp', 'text/plain']
 
     # subclasses may define their own versions of these constants
     allowed_elements = acceptable_elements + mathml_elements + svg_elements
@@ -147,6 +164,7 @@ class HTMLSanitizerMixin(object):
     allowed_css_keywords = acceptable_css_keywords
     allowed_svg_properties = acceptable_svg_properties
     allowed_protocols = acceptable_protocols
+    allowed_content_types = acceptable_content_types
 
     # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
     # stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
@@ -189,10 +207,17 @@ def allowed_token(self, token, token_type):
                                        unescape(attrs[attr])).lower()
                 # remove replacement characters from unescaped characters
                 val_unescaped = val_unescaped.replace("\ufffd", "")
-                if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped) and
-                    (val_unescaped.split(':')[0] not in
-                     self.allowed_protocols)):
-                    del attrs[attr]
+                uri = urlparse.urlparse(val_unescaped)
+                if uri and uri.scheme:
+                    if uri.scheme not in self.allowed_protocols:
+                        del attrs[attr]
+                    if uri.scheme == 'data':
+                        m = content_type_rgx.match(uri.path)
+                        if not m:
+                            del attrs[attr]
+                        elif m.group('content_type') not in self.allowed_content_types:
+                            del attrs[attr]
+
             for attr in self.svg_attr_val_allows_ref:
                 if attr in attrs:
                     attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
@@ -245,7 +270,7 @@ def sanitize_css(self, style):
             elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
                                                 'padding']:
                 for keyword in value.split():
-                    if not keyword in self.acceptable_css_keywords and \
+                    if keyword not in self.acceptable_css_keywords and \
                             not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
                         break
                 else:
diff --git a/lib/html5lib/serializer/htmlserializer.py b/lib/html5lib/serializer/htmlserializer.py
index 412a5a220..be4d63441 100644
--- a/lib/html5lib/serializer/htmlserializer.py
+++ b/lib/html5lib/serializer/htmlserializer.py
@@ -1,9 +1,6 @@
 from __future__ import absolute_import, division, unicode_literals
 from six import text_type
 
-import gettext
-_ = gettext.gettext
-
 try:
     from functools import reduce
 except ImportError:
@@ -35,7 +32,7 @@
                 v = utils.surrogatePairToCodepoint(v)
             else:
                 v = ord(v)
-            if not v in encode_entity_map or k.islower():
+            if v not in encode_entity_map or k.islower():
                 # prefer &lt; over &LT; and similarly for &amp;, &gt;, etc.
                 encode_entity_map[v] = k
 
@@ -208,7 +205,7 @@ def serialize(self, treewalker, encoding=None):
                 if token["systemId"]:
                     if token["systemId"].find('"') >= 0:
                         if token["systemId"].find("'") >= 0:
-                            self.serializeError(_("System identifer contains both single and double quote characters"))
+                            self.serializeError("System identifer contains both single and double quote characters")
                         quote_char = "'"
                     else:
                         quote_char = '"'
@@ -220,7 +217,7 @@ def serialize(self, treewalker, encoding=None):
             elif type in ("Characters", "SpaceCharacters"):
                 if type == "SpaceCharacters" or in_cdata:
                     if in_cdata and token["data"].find("</") >= 0:
-                        self.serializeError(_("Unexpected </ in CDATA"))
+                        self.serializeError("Unexpected </ in CDATA")
                     yield self.encode(token["data"])
                 else:
                     yield self.encode(escape(token["data"]))
@@ -231,7 +228,7 @@ def serialize(self, treewalker, encoding=None):
                 if name in rcdataElements and not self.escape_rcdata:
                     in_cdata = True
                 elif in_cdata:
-                    self.serializeError(_("Unexpected child element of a CDATA element"))
+                    self.serializeError("Unexpected child element of a CDATA element")
                 for (attr_namespace, attr_name), attr_value in token["data"].items():
                     # TODO: Add namespace support here
                     k = attr_name
@@ -279,20 +276,20 @@ def serialize(self, treewalker, encoding=None):
                 if name in rcdataElements:
                     in_cdata = False
                 elif in_cdata:
-                    self.serializeError(_("Unexpected child element of a CDATA element"))
+                    self.serializeError("Unexpected child element of a CDATA element")
                 yield self.encodeStrict("</%s>" % name)
 
             elif type == "Comment":
                 data = token["data"]
                 if data.find("--") >= 0:
-                    self.serializeError(_("Comment contains --"))
+                    self.serializeError("Comment contains --")
                 yield self.encodeStrict("<!--%s-->" % token["data"])
 
             elif type == "Entity":
                 name = token["name"]
                 key = name + ";"
-                if not key in entities:
-                    self.serializeError(_("Entity %s not recognized" % name))
+                if key not in entities:
+                    self.serializeError("Entity %s not recognized" % name)
                 if self.resolve_entities and key not in xmlEntities:
                     data = entities[key]
                 else:
diff --git a/lib/html5lib/treebuilders/dom.py b/lib/html5lib/treebuilders/dom.py
index 61e5ed79e..234233b79 100644
--- a/lib/html5lib/treebuilders/dom.py
+++ b/lib/html5lib/treebuilders/dom.py
@@ -158,7 +158,7 @@ def insertText(self, data, parent=None):
             else:
                 # HACK: allow text nodes as children of the document node
                 if hasattr(self.dom, '_child_node_types'):
-                    if not Node.TEXT_NODE in self.dom._child_node_types:
+                    if Node.TEXT_NODE not in self.dom._child_node_types:
                         self.dom._child_node_types = list(self.dom._child_node_types)
                         self.dom._child_node_types.append(Node.TEXT_NODE)
                 self.dom.appendChild(self.dom.createTextNode(data))
diff --git a/lib/html5lib/treewalkers/__init__.py b/lib/html5lib/treewalkers/__init__.py
index 18124e75f..20b91b114 100644
--- a/lib/html5lib/treewalkers/__init__.py
+++ b/lib/html5lib/treewalkers/__init__.py
@@ -10,8 +10,12 @@
 
 from __future__ import absolute_import, division, unicode_literals
 
+__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshistream", "lxmletree",
+           "pulldom"]
+
 import sys
 
+from .. import constants
 from ..utils import default_etree
 
 treeWalkerCache = {}
@@ -55,3 +59,89 @@ def getTreeWalker(treeType, implementation=None, **kwargs):
             # XXX: NEVER cache here, caching is done in the etree submodule
             return etree.getETreeModule(implementation, **kwargs).TreeWalker
     return treeWalkerCache.get(treeType)
+
+
+def concatenateCharacterTokens(tokens):
+    pendingCharacters = []
+    for token in tokens:
+        type = token["type"]
+        if type in ("Characters", "SpaceCharacters"):
+            pendingCharacters.append(token["data"])
+        else:
+            if pendingCharacters:
+                yield {"type": "Characters", "data": "".join(pendingCharacters)}
+                pendingCharacters = []
+            yield token
+    if pendingCharacters:
+        yield {"type": "Characters", "data": "".join(pendingCharacters)}
+
+
+def pprint(walker):
+    """Pretty printer for tree walkers"""
+    output = []
+    indent = 0
+    for token in concatenateCharacterTokens(walker):
+        type = token["type"]
+        if type in ("StartTag", "EmptyTag"):
+            # tag name
+            if token["namespace"] and token["namespace"] != constants.namespaces["html"]:
+                if token["namespace"] in constants.prefixes:
+                    ns = constants.prefixes[token["namespace"]]
+                else:
+                    ns = token["namespace"]
+                name = "%s %s" % (ns, token["name"])
+            else:
+                name = token["name"]
+            output.append("%s<%s>" % (" " * indent, name))
+            indent += 2
+            # attributes (sorted for consistent ordering)
+            attrs = token["data"]
+            for (namespace, localname), value in sorted(attrs.items()):
+                if namespace:
+                    if namespace in constants.prefixes:
+                        ns = constants.prefixes[namespace]
+                    else:
+                        ns = namespace
+                    name = "%s %s" % (ns, localname)
+                else:
+                    name = localname
+                output.append("%s%s=\"%s\"" % (" " * indent, name, value))
+            # self-closing
+            if type == "EmptyTag":
+                indent -= 2
+
+        elif type == "EndTag":
+            indent -= 2
+
+        elif type == "Comment":
+            output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
+
+        elif type == "Doctype":
+            if token["name"]:
+                if token["publicId"]:
+                    output.append("""%s<!DOCTYPE %s "%s" "%s">""" %
+                                  (" " * indent,
+                                   token["name"],
+                                   token["publicId"],
+                                   token["systemId"] if token["systemId"] else ""))
+                elif token["systemId"]:
+                    output.append("""%s<!DOCTYPE %s "" "%s">""" %
+                                  (" " * indent,
+                                   token["name"],
+                                   token["systemId"]))
+                else:
+                    output.append("%s<!DOCTYPE %s>" % (" " * indent,
+                                                       token["name"]))
+            else:
+                output.append("%s<!DOCTYPE >" % (" " * indent,))
+
+        elif type == "Characters":
+            output.append("%s\"%s\"" % (" " * indent, token["data"]))
+
+        elif type == "SpaceCharacters":
+            assert False, "concatenateCharacterTokens should have got rid of all Space tokens"
+
+        else:
+            raise ValueError("Unknown token type, %s" % type)
+
+    return "\n".join(output)
diff --git a/lib/html5lib/treewalkers/_base.py b/lib/html5lib/treewalkers/_base.py
index 34252e50c..4e11cd020 100644
--- a/lib/html5lib/treewalkers/_base.py
+++ b/lib/html5lib/treewalkers/_base.py
@@ -1,8 +1,8 @@
 from __future__ import absolute_import, division, unicode_literals
 from six import text_type, string_types
 
-import gettext
-_ = gettext.gettext
+__all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN",
+           "TreeWalker", "NonRecursiveTreeWalker"]
 
 from xml.dom import Node
 
@@ -58,7 +58,7 @@ def emptyTag(self, namespace, name, attrs, hasChildren=False):
                "namespace": to_text(namespace),
                "data": attrs}
         if hasChildren:
-            yield self.error(_("Void element has children"))
+            yield self.error("Void element has children")
 
     def startTag(self, namespace, name, attrs):
         assert namespace is None or isinstance(namespace, string_types), type(namespace)
@@ -122,7 +122,7 @@ def entity(self, name):
         return {"type": "Entity", "name": text_type(name)}
 
     def unknown(self, nodeType):
-        return self.error(_("Unknown node type: ") + nodeType)
+        return self.error("Unknown node type: " + nodeType)
 
 
 class NonRecursiveTreeWalker(TreeWalker):
diff --git a/lib/html5lib/treewalkers/dom.py b/lib/html5lib/treewalkers/dom.py
index a01287a94..ac4dcf31b 100644
--- a/lib/html5lib/treewalkers/dom.py
+++ b/lib/html5lib/treewalkers/dom.py
@@ -2,9 +2,6 @@
 
 from xml.dom import Node
 
-import gettext
-_ = gettext.gettext
-
 from . import _base
 
 
diff --git a/lib/html5lib/treewalkers/etree.py b/lib/html5lib/treewalkers/etree.py
index fd8a9cc9b..69840c21e 100644
--- a/lib/html5lib/treewalkers/etree.py
+++ b/lib/html5lib/treewalkers/etree.py
@@ -7,12 +7,10 @@
         from ordereddict import OrderedDict
     except ImportError:
         OrderedDict = dict
-import gettext
-_ = gettext.gettext
 
 import re
 
-from six import text_type
+from six import string_types
 
 from . import _base
 from ..utils import moduleFactoryFactory
@@ -60,7 +58,7 @@ def getNodeDetails(self, node):
                 return _base.COMMENT, node.text
 
             else:
-                assert type(node.tag) == text_type, type(node.tag)
+                assert isinstance(node.tag, string_types), type(node.tag)
                 # This is assumed to be an ordinary element
                 match = tag_regexp.match(node.tag)
                 if match:
diff --git a/lib/html5lib/treewalkers/lxmletree.py b/lib/html5lib/treewalkers/lxmletree.py
index bc934ac05..90e116d38 100644
--- a/lib/html5lib/treewalkers/lxmletree.py
+++ b/lib/html5lib/treewalkers/lxmletree.py
@@ -4,9 +4,6 @@
 from lxml import etree
 from ..treebuilders.etree import tag_regexp
 
-from gettext import gettext
-_ = gettext
-
 from . import _base
 
 from .. import ihatexml
@@ -130,7 +127,7 @@ def __init__(self, tree):
     def getNodeDetails(self, node):
         if isinstance(node, tuple):  # Text node
             node, key = node
-            assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
+            assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
             return _base.TEXT, ensure_str(getattr(node, key))
 
         elif isinstance(node, Root):
@@ -169,7 +166,7 @@ def getNodeDetails(self, node):
                     attrs, len(node) > 0 or node.text)
 
     def getFirstChild(self, node):
-        assert not isinstance(node, tuple), _("Text nodes have no children")
+        assert not isinstance(node, tuple), "Text nodes have no children"
 
         assert len(node) or node.text, "Node has no children"
         if node.text:
@@ -180,7 +177,7 @@ def getFirstChild(self, node):
     def getNextSibling(self, node):
         if isinstance(node, tuple):  # Text node
             node, key = node
-            assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
+            assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
             if key == "text":
                 # XXX: we cannot use a "bool(node) and node[0] or None" construct here
                 # because node[0] might evaluate to False if it has no child element
@@ -196,7 +193,7 @@ def getNextSibling(self, node):
     def getParentNode(self, node):
         if isinstance(node, tuple):  # Text node
             node, key = node
-            assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
+            assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
             if key == "text":
                 return node
             # else: fallback to "normal" processing
diff --git a/lib/html5lib/utils.py b/lib/html5lib/utils.py
index 2f41f4dfa..fdc18febb 100644
--- a/lib/html5lib/utils.py
+++ b/lib/html5lib/utils.py
@@ -2,6 +2,8 @@
 
 from types import ModuleType
 
+from six import text_type
+
 try:
     import xml.etree.cElementTree as default_etree
 except ImportError:
@@ -9,7 +11,26 @@
 
 
 __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
-           "surrogatePairToCodepoint", "moduleFactoryFactory"]
+           "surrogatePairToCodepoint", "moduleFactoryFactory",
+           "supports_lone_surrogates"]
+
+
+# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be
+# caught by the below test. In general this would be any platform
+# using UTF-16 as its encoding of unicode strings, such as
+# Jython. This is because UTF-16 itself is based on the use of such
+# surrogates, and there is no mechanism to further escape such
+# escapes.
+try:
+    _x = eval('"\\uD800"')
+    if not isinstance(_x, text_type):
+        # We need this with u"" because of http://bugs.jython.org/issue2039
+        _x = eval('u"\\uD800"')
+        assert isinstance(_x, text_type)
+except:
+    supports_lone_surrogates = False
+else:
+    supports_lone_surrogates = True
 
 
 class MethodDispatcher(dict):