diff --git a/docs/source/extending.rst b/docs/source/extending.rst index 97b7d389..0c10924b 100644 --- a/docs/source/extending.rst +++ b/docs/source/extending.rst @@ -45,7 +45,7 @@ a keyword to the lexer: from sqlparse.lexer import Lexer # get the lexer singleton object to configure it - lex = Lexer() + lex = Lexer.get_default_instance() # Clear the default configurations. # After this call, reg-exps and keyword dictionaries need to be loaded diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index 6e17fca2..9d25c9e6 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -7,6 +7,7 @@ """SQL Lexer""" import re + # This code is based on the SqlLexer in pygments. # http://pygments.org/ # It's separated from the rest of pygments to increase performance @@ -18,21 +19,39 @@ from sqlparse.utils import consume -class _LexerSingletonMetaclass(type): - _lexer_instance = None - - def __call__(cls, *args, **kwargs): - if _LexerSingletonMetaclass._lexer_instance is None: - _LexerSingletonMetaclass._lexer_instance = super( - _LexerSingletonMetaclass, cls - ).__call__(*args, **kwargs) - return _LexerSingletonMetaclass._lexer_instance - - -class Lexer(metaclass=_LexerSingletonMetaclass): +class Lexer: """The Lexer supports configurable syntax. To add support for additional keywords, use the `add_keywords` method.""" + _default_intance = None + + # Development notes: + # - This class is prepared to be able to support additional SQL dialects + # in the future by adding additional functions that take the place of + # the function default_initialization() + # - The lexer class uses an explicit singleton behavior with the + # instance-getter method get_default_instance(). This mechanism has + # the advantage that the call signature of the entry-points to the + # sqlparse library are not affected. Also, usage of sqlparse in third + # party code does not need to be adapted. On the other hand, singleton + # behavior is not thread safe, and the current implementation does not + # easily allow for multiple SQL dialects to be parsed in the same + # process. Such behavior can be supported in the future by passing a + # suitably initialized lexer object as an additional parameter to the + # entry-point functions (such as `parse`). Code will need to be written + # to pass down and utilize such an object. The current implementation + # is prepared to support this thread safe approach without the + # default_instance part needing to change interface. + + @classmethod + def get_default_instance(cls): + """Returns the lexer instance used internally + by the sqlparse core functions.""" + if cls._default_intance is None: + cls._default_intance = cls() + cls._default_intance.default_initialization() + return cls._default_intance + def default_initialization(self): """Initialize the lexer with default dictionaries. Useful if you need to revert custom syntax settings.""" @@ -45,13 +64,10 @@ def default_initialization(self): self.add_keywords(keywords.KEYWORDS_MSACCESS) self.add_keywords(keywords.KEYWORDS) - def __init__(self): - self.default_initialization() - def clear(self): """Clear all syntax configurations. Useful if you want to load a reduced set of syntax configurations. - After this call, reg-exps and keyword dictionaries need to be loaded + After this call, regexps and keyword dictionaries need to be loaded to make the lexer functional again.""" self._SQL_REGEX = [] self._keywords = [] @@ -73,7 +89,7 @@ def is_keyword(self, value): """Checks for a keyword. If the given value is in one of the KEYWORDS_* dictionary - it's considered a keyword. Otherwise tokens.Name is returned. + it's considered a keyword. Otherwise, tokens.Name is returned. """ val = value.upper() for kwdict in self._keywords: @@ -136,4 +152,4 @@ def tokenize(sql, encoding=None): Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream of ``(token type, value)`` items. """ - return Lexer().get_tokens(sql, encoding) + return Lexer.get_default_instance().get_tokens(sql, encoding) diff --git a/tests/test_keywords.py b/tests/test_keywords.py index 2eddccce..b26e9b45 100644 --- a/tests/test_keywords.py +++ b/tests/test_keywords.py @@ -9,5 +9,5 @@ class TestSQLREGEX: '1.', '-1.', '.1', '-.1']) def test_float_numbers(self, number): - ttype = next(tt for action, tt in Lexer()._SQL_REGEX if action(number)) + ttype = next(tt for action, tt in Lexer.get_default_instance()._SQL_REGEX if action(number)) assert tokens.Number.Float == ttype diff --git a/tests/test_parse.py b/tests/test_parse.py index 33e8541f..5feef5a7 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -509,7 +509,7 @@ def test_configurable_keywords(): (sqlparse.tokens.Punctuation, ";"), ] - Lexer().add_keywords( + Lexer.get_default_instance().add_keywords( { "BACON": sqlparse.tokens.Name.Builtin, "SPAM": sqlparse.tokens.Keyword, @@ -520,7 +520,7 @@ def test_configurable_keywords(): tokens = sqlparse.parse(sql)[0] # reset the syntax for later tests. - Lexer().default_initialization() + Lexer.get_default_instance().default_initialization() assert list( (t.ttype, t.value) @@ -539,7 +539,7 @@ def test_configurable_keywords(): def test_configurable_regex(): - lex = Lexer() + lex = Lexer.get_default_instance() lex.clear() my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword) @@ -559,7 +559,7 @@ def test_configurable_regex(): tokens = sqlparse.parse("select * from foo zorder by bar;")[0] # reset the syntax for later tests. - Lexer().default_initialization() + Lexer.get_default_instance().default_initialization() assert list( (t.ttype, t.value)