Skip to content

Commit

Permalink
Fix to get regex to work on python > 3.6
Browse files Browse the repository at this point in the history
  • Loading branch information
nicochidt committed Sep 18, 2019
1 parent 7b0ddf7 commit dc0e608
Showing 1 changed file with 18 additions and 18 deletions.
36 changes: 18 additions & 18 deletions pythonwhois/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import re, sys, datetime, csv, pkgutil
from . import net, shared

try:
try:
from io import StringIO
except ImportError:
from cStringIO import StringIO
Expand All @@ -25,13 +25,13 @@ def read_dataset(filename, destination, abbrev_key, name_key, is_dict=False):
destination[line[abbrev_key]] = line[name_key]
except IOError as e:
pass

airports = {}
countries = {}
states_au = {}
states_us = {}
states_ca = {}

try:
reader = csv.reader(pkgdata("airports.dat").splitlines())

Expand All @@ -50,7 +50,7 @@ def read_dataset(filename, destination, abbrev_key, name_key, is_dict=False):

def precompile_regexes(source, flags=0):
return [re.compile(regex, flags) for regex in source]

grammar = {
"_data": {
'id': ['Domain ID:[ ]*(?P<val>.+)'],
Expand Down Expand Up @@ -201,8 +201,8 @@ def precompile_regexes(source, flags=0):
}

def preprocess_regex(regex):
# Fix for #2; prevents a ridiculous amount of varying size permutations.
regex = re.sub(r"\\s\*\(\?P<([^>]+)>\.\+\)", r"\s*(?P<\1>\S.*)", regex)
# Fix for #2; prevents a ridiculous amount of varying size permutations
regex = re.sub(r"\\s\*\(\?P<([^>]+)>\.\+\)", r"\\s*(?P<\1>\\S.*)", regex)
# Experimental fix for #18; removes unnecessary variable-size whitespace
# matching, since we're stripping results anyway.
regex = re.sub(r"\[ \]\*\(\?P<([^>]+)>\.\*\)", r"(?P<\1>.*)", regex)
Expand Down Expand Up @@ -553,7 +553,7 @@ def parse_raw_whois(raw_data, normalized=None, never_query_handles=True, handle_
data["nameservers"].append(match.strip())
except KeyError as e:
data["nameservers"] = [match.strip()]


data["contacts"] = parse_registrants(raw_data, never_query_handles, handle_server)

Expand Down Expand Up @@ -645,7 +645,7 @@ def normalize_data(data, normalized):
for country, source in (("united states", states_us), ("australia", states_au), ("canada", states_ca)):
if country in contact["country"].lower() and contact["state"] in source:
contact["state"] = source[contact["state"]]

for key in ("email",):
if key in contact and contact[key] is not None and (normalized == True or key in normalized):
if is_string(contact[key]):
Expand All @@ -660,7 +660,7 @@ def normalize_data(data, normalized):
for key in ("city", "organization", "state", "country"):
if key in contact and contact[key] is not None and (normalized == True or key in normalized):
contact[key] = normalize_name(contact[key], abbreviation_threshold=3, length_threshold=3)

if "name" in contact and "organization" not in contact:
lines = [x.strip() for x in contact["name"].splitlines()]
new_lines = []
Expand All @@ -674,10 +674,10 @@ def normalize_data(data, normalized):
contact["name"] = "\n".join(lines)
else:
del contact["name"]

if len(new_lines) > 0:
contact["organization"] = "\n".join(new_lines)

if "street" in contact and "organization" not in contact:
lines = [x.strip() for x in contact["street"].splitlines()]
if len(lines) > 1:
Expand All @@ -686,7 +686,7 @@ def normalize_data(data, normalized):
contact["organization"] = lines[0]
contact["street"] = "\n".join(lines[1:])
break

for key in list(contact.keys()):
try:
contact[key] = contact[key].strip(", ")
Expand Down Expand Up @@ -831,10 +831,10 @@ def remove_suffixes(data):
# Removes everything before and after the first non-whitespace continuous string.
# Used to get rid of IP suffixes for nameservers.
cleaned_list = []

for entry in data:
cleaned_list.append(re.search("([^\s]+)\s*[\s]*", entry).group(1).lstrip())

return cleaned_list

def parse_registrants(data, never_query_handles=True, handle_server=""):
Expand Down Expand Up @@ -911,7 +911,7 @@ def parse_registrants(data, never_query_handles=True, handle_server=""):
elif category == "admin":
admin_contact = data_reference
break

# Post-processing
for obj in (registrant, tech_contact, billing_contact, admin_contact):
if obj is not None:
Expand Down Expand Up @@ -986,18 +986,18 @@ def fetch_nic_contact(handle, lookup_server):
response = net.get_whois_raw(handle, lookup_server)
response = [segment.replace("\r", "") for segment in response] # Carriage returns are the devil
results = parse_nic_contact(response)

if len(results) > 0:
return results[0]
else:
raise shared.WhoisException("No contact data found in the response.")

def parse_nic_contact(data):
handle_contacts = []
for regex in nic_contact_regexes:
for segment in data:
matches = re.finditer(regex, segment)
for match in matches:
handle_contacts.append(match.groupdict())

return handle_contacts

0 comments on commit dc0e608

Please sign in to comment.