From fb7f85365875100185890a4c3bfeac29c632d7dd Mon Sep 17 00:00:00 2001 From: saichethana28 Date: Sun, 8 Feb 2026 06:58:58 +0000 Subject: [PATCH 1/4] resolve query parser blocker --- backend/apps/common/search/query_parser.py | 32 ++++++++----------- .../apps/common/search/query_parser_test.py | 17 +++++++--- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/backend/apps/common/search/query_parser.py b/backend/apps/common/search/query_parser.py index 5e4fd4f8e3..3f8d4a2936 100644 --- a/backend/apps/common/search/query_parser.py +++ b/backend/apps/common/search/query_parser.py @@ -170,8 +170,6 @@ def parse(self, query: str) -> list[dict]: """ conditions: list[dict] = [] query = query.strip() - if not self.case_sensitive: - query = query.lower() tokens = self._split_tokens(query) try: @@ -184,12 +182,14 @@ def parse(self, query: str) -> list[dict]: conditions.append(self._create_text_search_condition(raw_value)) continue - if field not in self.field_schema: - self._handle_unknown_field(field) + normalized_field = field.lower() + + if normalized_field not in self.field_schema: + self._handle_unknown_field(normalized_field) continue - field_type = self.field_schema[field] - condition = self.to_dict(field, field_type, raw_value) + field_type = self.field_schema[normalized_field] + condition = self.to_dict(normalized_field, field_type, raw_value) if condition: conditions.append(condition) except QueryParserError as e: @@ -360,23 +360,17 @@ def _split_tokens(query: str) -> list[str]: """ regex_components = { - # Support escaped quotes within quotes. - "key_value": r'\S+:"([^"\\]|\\.)*"', + # Key-value pairs with quoted values: name:"OWASP" + "key_value_quoted": r'[a-zA-Z0-9_]+:"([^"\\]|\\.)*"', + # Standard key-value pairs: stars:>10 + "key_value_plain": r"[a-zA-Z0-9_]+:\S+", + # Standalone quoted strings: "OWASP Foundation" "quoted_string": r'"([^"\\]|\\.)*"', + # Single words "unquoted_word": r"\S+", } - parser = ZeroOrMore( - Regex( - "|".join( - [ - regex_components["key_value"], - regex_components["quoted_string"], - regex_components["unquoted_word"], - ] - ) - ) - ) + parser = ZeroOrMore(Regex("|".join(regex_components.values()))) try: result = parser.parse_string(query, parse_all=True) except ParseException as e: diff --git a/backend/tests/apps/common/search/query_parser_test.py b/backend/tests/apps/common/search/query_parser_test.py index 168ad8304f..a363499715 100644 --- a/backend/tests/apps/common/search/query_parser_test.py +++ b/backend/tests/apps/common/search/query_parser_test.py @@ -43,8 +43,8 @@ def test_invalid_parser_field_validation(self): [ ("case_sensitive_parser", '"John Doe"'), ("case_sensitive_strict_parser", '"John Doe"'), - ("parser", '"john doe"'), - ("strict_parser", '"john doe"'), + ("parser", '"John Doe"'), + ("strict_parser", '"John Doe"'), ], ) def test_basic_field_parsing_all_types(self, parser_type, expected_string): @@ -95,7 +95,7 @@ def test_boolean_variations(self): ( 'author:"John Doe" stars:>100 archived:false some "free text"', [ - {"type": "string", "field": "author", "value": '"john doe"'}, + {"type": "string", "field": "author", "value": '"John Doe"'}, {"type": "number", "field": "stars", "op": ">", "value": 100}, {"type": "boolean", "field": "archived", "value": False}, {"type": "string", "field": "query", "value": "some"}, @@ -106,7 +106,7 @@ def test_boolean_variations(self): 'project:"my-awesome-project" language:"C++"', [ {"type": "string", "field": "project", "value": '"my-awesome-project"'}, - {"type": "string", "field": "language", "value": '"c++"'}, + {"type": "string", "field": "language", "value": '"C++"'}, ], ), ], @@ -208,3 +208,12 @@ def test_overflow_numerical_value(self): self.strict_parser.parse(f"stars:{overflow_number}") assert e.value.error_type == "NUMBER_VALUE_ERROR" + + def test_quoted_multi_word_values(self): + """Test that multi-word values in quotes are parsed correctly without splitting.""" + query = 'project:"OWASP Nest" author:"John Doe"' + results = self.parser.parse(query) + + assert len(results) == 2 + assert results[0]["value"] == '"OWASP Nest"' + assert results[1]["value"] == '"John Doe"' From 37714e89144962569d171ed32dec5b2d97eb4d80 Mon Sep 17 00:00:00 2001 From: saichethana28 Date: Sun, 8 Feb 2026 22:54:29 +0000 Subject: [PATCH 2/4] use case_sensitive flag in QueryParser --- backend/apps/api/rest/v0/structured_search.py | 2 +- backend/tests/apps/common/search/query_parser_test.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/backend/apps/api/rest/v0/structured_search.py b/backend/apps/api/rest/v0/structured_search.py index 5e9f3a87dd..195a7319f3 100644 --- a/backend/apps/api/rest/v0/structured_search.py +++ b/backend/apps/api/rest/v0/structured_search.py @@ -59,7 +59,7 @@ def apply_structured_search( } try: - parser = QueryParser(field_schema=parser_schema, strict=False) + parser = QueryParser(field_schema=parser_schema, strict=False, case_sensitive=True) conditions = parser.parse(query) except QueryParserError: # Fail safely diff --git a/backend/tests/apps/common/search/query_parser_test.py b/backend/tests/apps/common/search/query_parser_test.py index a363499715..0d52d331bc 100644 --- a/backend/tests/apps/common/search/query_parser_test.py +++ b/backend/tests/apps/common/search/query_parser_test.py @@ -217,3 +217,9 @@ def test_quoted_multi_word_values(self): assert len(results) == 2 assert results[0]["value"] == '"OWASP Nest"' assert results[1]["value"] == '"John Doe"' + + def test_case_sensitivity_toggle(self): + """Verify that the case_sensitive flag controls value normalization.""" + query = "Author:OWASP" + cs_result = self.case_sensitive_parser.parse(query) + assert cs_result[0]["value"] == "OWASP" From 2d5ee9f3036764e62f5ec3f7baefdb6ad4a68bc7 Mon Sep 17 00:00:00 2001 From: saichethana28 Date: Sun, 8 Feb 2026 23:52:36 +0000 Subject: [PATCH 3/4] feat: add case_sensitive option to QueryParser and update tests --- backend/apps/common/search/query_parser.py | 4 ++++ .../tests/apps/common/search/query_parser_test.py | 14 +++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/backend/apps/common/search/query_parser.py b/backend/apps/common/search/query_parser.py index 3f8d4a2936..43466e38b9 100644 --- a/backend/apps/common/search/query_parser.py +++ b/backend/apps/common/search/query_parser.py @@ -178,6 +178,10 @@ def parse(self, query: str) -> list[dict]: continue field, raw_value = self._parse_token(token) + + if not self.case_sensitive and raw_value: + raw_value = raw_value.lower() + if field is None: conditions.append(self._create_text_search_condition(raw_value)) continue diff --git a/backend/tests/apps/common/search/query_parser_test.py b/backend/tests/apps/common/search/query_parser_test.py index 0d52d331bc..dc5aa7e475 100644 --- a/backend/tests/apps/common/search/query_parser_test.py +++ b/backend/tests/apps/common/search/query_parser_test.py @@ -48,13 +48,17 @@ def test_invalid_parser_field_validation(self): ], ) def test_basic_field_parsing_all_types(self, parser_type, expected_string): + """Test parsing across different parser configurations.""" parser = getattr(self, parser_type) + expected_value = ( + expected_string if "case_sensitive" in parser_type else expected_string.lower() + ) assert parser.parse('Author:"John Doe"') == [ { "field": "author", "type": "string", - "value": expected_string, + "value": expected_value, } ] @@ -95,7 +99,7 @@ def test_boolean_variations(self): ( 'author:"John Doe" stars:>100 archived:false some "free text"', [ - {"type": "string", "field": "author", "value": '"John Doe"'}, + {"type": "string", "field": "author", "value": '"john doe"'}, {"type": "number", "field": "stars", "op": ">", "value": 100}, {"type": "boolean", "field": "archived", "value": False}, {"type": "string", "field": "query", "value": "some"}, @@ -106,7 +110,7 @@ def test_boolean_variations(self): 'project:"my-awesome-project" language:"C++"', [ {"type": "string", "field": "project", "value": '"my-awesome-project"'}, - {"type": "string", "field": "language", "value": '"C++"'}, + {"type": "string", "field": "language", "value": '"c++"'}, ], ), ], @@ -215,8 +219,8 @@ def test_quoted_multi_word_values(self): results = self.parser.parse(query) assert len(results) == 2 - assert results[0]["value"] == '"OWASP Nest"' - assert results[1]["value"] == '"John Doe"' + assert results[0]["value"] == '"owasp nest"' + assert results[1]["value"] == '"john doe"' def test_case_sensitivity_toggle(self): """Verify that the case_sensitive flag controls value normalization.""" From af364ad608bacf9bce0651a80759db0c9748c001 Mon Sep 17 00:00:00 2001 From: Arkadii Yakovets Date: Sun, 8 Feb 2026 21:29:04 -0800 Subject: [PATCH 4/4] Update code --- backend/apps/api/rest/v0/structured_search.py | 2 +- backend/apps/common/search/query_parser.py | 36 ++++++++++--------- .../apps/common/search/query_parser_test.py | 12 ++----- 3 files changed, 23 insertions(+), 27 deletions(-) diff --git a/backend/apps/api/rest/v0/structured_search.py b/backend/apps/api/rest/v0/structured_search.py index 195a7319f3..3d39ff2f4c 100644 --- a/backend/apps/api/rest/v0/structured_search.py +++ b/backend/apps/api/rest/v0/structured_search.py @@ -59,7 +59,7 @@ def apply_structured_search( } try: - parser = QueryParser(field_schema=parser_schema, strict=False, case_sensitive=True) + parser = QueryParser(case_sensitive=True, field_schema=parser_schema, strict=False) conditions = parser.parse(query) except QueryParserError: # Fail safely diff --git a/backend/apps/common/search/query_parser.py b/backend/apps/common/search/query_parser.py index 43466e38b9..5e4fd4f8e3 100644 --- a/backend/apps/common/search/query_parser.py +++ b/backend/apps/common/search/query_parser.py @@ -170,6 +170,8 @@ def parse(self, query: str) -> list[dict]: """ conditions: list[dict] = [] query = query.strip() + if not self.case_sensitive: + query = query.lower() tokens = self._split_tokens(query) try: @@ -178,22 +180,16 @@ def parse(self, query: str) -> list[dict]: continue field, raw_value = self._parse_token(token) - - if not self.case_sensitive and raw_value: - raw_value = raw_value.lower() - if field is None: conditions.append(self._create_text_search_condition(raw_value)) continue - normalized_field = field.lower() - - if normalized_field not in self.field_schema: - self._handle_unknown_field(normalized_field) + if field not in self.field_schema: + self._handle_unknown_field(field) continue - field_type = self.field_schema[normalized_field] - condition = self.to_dict(normalized_field, field_type, raw_value) + field_type = self.field_schema[field] + condition = self.to_dict(field, field_type, raw_value) if condition: conditions.append(condition) except QueryParserError as e: @@ -364,17 +360,23 @@ def _split_tokens(query: str) -> list[str]: """ regex_components = { - # Key-value pairs with quoted values: name:"OWASP" - "key_value_quoted": r'[a-zA-Z0-9_]+:"([^"\\]|\\.)*"', - # Standard key-value pairs: stars:>10 - "key_value_plain": r"[a-zA-Z0-9_]+:\S+", - # Standalone quoted strings: "OWASP Foundation" + # Support escaped quotes within quotes. + "key_value": r'\S+:"([^"\\]|\\.)*"', "quoted_string": r'"([^"\\]|\\.)*"', - # Single words "unquoted_word": r"\S+", } - parser = ZeroOrMore(Regex("|".join(regex_components.values()))) + parser = ZeroOrMore( + Regex( + "|".join( + [ + regex_components["key_value"], + regex_components["quoted_string"], + regex_components["unquoted_word"], + ] + ) + ) + ) try: result = parser.parse_string(query, parse_all=True) except ParseException as e: diff --git a/backend/tests/apps/common/search/query_parser_test.py b/backend/tests/apps/common/search/query_parser_test.py index dc5aa7e475..1ad2ea400c 100644 --- a/backend/tests/apps/common/search/query_parser_test.py +++ b/backend/tests/apps/common/search/query_parser_test.py @@ -43,22 +43,18 @@ def test_invalid_parser_field_validation(self): [ ("case_sensitive_parser", '"John Doe"'), ("case_sensitive_strict_parser", '"John Doe"'), - ("parser", '"John Doe"'), - ("strict_parser", '"John Doe"'), + ("parser", '"john doe"'), + ("strict_parser", '"john doe"'), ], ) def test_basic_field_parsing_all_types(self, parser_type, expected_string): - """Test parsing across different parser configurations.""" parser = getattr(self, parser_type) - expected_value = ( - expected_string if "case_sensitive" in parser_type else expected_string.lower() - ) assert parser.parse('Author:"John Doe"') == [ { "field": "author", "type": "string", - "value": expected_value, + "value": expected_string, } ] @@ -214,7 +210,6 @@ def test_overflow_numerical_value(self): assert e.value.error_type == "NUMBER_VALUE_ERROR" def test_quoted_multi_word_values(self): - """Test that multi-word values in quotes are parsed correctly without splitting.""" query = 'project:"OWASP Nest" author:"John Doe"' results = self.parser.parse(query) @@ -223,7 +218,6 @@ def test_quoted_multi_word_values(self): assert results[1]["value"] == '"john doe"' def test_case_sensitivity_toggle(self): - """Verify that the case_sensitive flag controls value normalization.""" query = "Author:OWASP" cs_result = self.case_sensitive_parser.parse(query) assert cs_result[0]["value"] == "OWASP"