Skip to content

Commit

Permalink
Use ua hash for smaller cache keys
Browse files Browse the repository at this point in the history
  • Loading branch information
thinkwelltwd committed Jan 21, 2019
1 parent af33b18 commit f0fc3f4
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 11 deletions.
15 changes: 8 additions & 7 deletions device_detector/device_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
NameVersionExtractor,
WholeNameExtractor,
)
from .settings import DDCache, WORTHLESS_UA_TYPES
from .settings import DDCache, WORTHLESS_UA_TYPES, ua_hash
from .yaml_loader import RegexLoader

MAC_iOS = {
Expand Down Expand Up @@ -73,7 +73,7 @@ def __init__(self, user_agent, skip_bot_detection=False):

# Holds the useragent that should be parsed
self.user_agent = user_agent

self.ua_hash = ua_hash(self.user_agent)
self.os = None
self.client = None
self.device = None
Expand All @@ -98,14 +98,15 @@ def class_name(self) -> str:
return self.__class__.__name__

def get_parse_cache(self):
if self.user_agent not in DDCache['user_agents']:
if self.ua_hash not in DDCache['user_agents']:
return None
return DDCache['user_agents'][self.user_agent].get('parsed', None)
return DDCache['user_agents'][self.ua_hash].get('parsed', None)

def set_parse_cache(self):
if self.user_agent not in DDCache['user_agents']:
DDCache['user_agents'][self.user_agent] = {}
DDCache['user_agents'][self.user_agent]['parsed'] = self
try:
DDCache['user_agents'][self.ua_hash]['parsed'] = self
except KeyError:
DDCache['user_agents'][self.ua_hash] = {'parsed': self}
return self

# -----------------------------------------------------------------------------
Expand Down
10 changes: 6 additions & 4 deletions device_detector/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from ..settings import (
DDCache,
ua_hash,
)
from .extractors import (
NameExtractor,
Expand All @@ -30,13 +31,14 @@ def __init__(self, ua):
else:
self.user_agent = ua

self.ua_hash = ua_hash(self.user_agent)
self.ua_data = {}
self.app_name = ''
self.app_name_no_punctuation = ''
self.matched_regex = None
self.known = False
if self.user_agent not in DDCache['user_agents']:
DDCache['user_agents'][self.user_agent] = {}
if self.ua_hash not in DDCache['user_agents']:
DDCache['user_agents'][self.ua_hash] = {}

@property
def cache_name(self) -> str:
Expand All @@ -50,10 +52,10 @@ def dtype(self) -> str:
return self.cache_name.lower()

def get_from_cache(self) -> dict:
return DDCache['user_agents'][self.user_agent].get(self.cache_name, None)
return DDCache['user_agents'][self.ua_hash].get(self.cache_name, None)

def add_to_cache(self) -> dict:
DDCache['user_agents'][self.user_agent][self.cache_name] = self.ua_data
DDCache['user_agents'][self.ua_hash][self.cache_name] = self.ua_data
return self.ua_data

def _check_regex(self, regex):
Expand Down
11 changes: 11 additions & 0 deletions device_detector/settings.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
from collections import OrderedDict
from copy import deepcopy
from hashlib import md5
import os


def ua_hash(user_agent):
"""
Return short hash of User Agent string for
memory-efficient cache key.
"""
return md5(user_agent.encode('utf-8')).hexdigest()[:9]


# interpolate regex with anchors so
# iPhone / Tiphone are matched correctly
BOUNDED_REGEX = r'(?:^|[^A-Z0-9_\-])(?:{})'
Expand Down Expand Up @@ -85,4 +94,6 @@ def clear(self):
'LRUDict',
'ROOT',
'WORTHLESS_UA_TYPES',
'ua_hash',
)

0 comments on commit f0fc3f4

Please sign in to comment.