From f2c0746c9bbbe19686b59bfaca7b08457deef7f6 Mon Sep 17 00:00:00 2001 From: robfrank Date: Wed, 22 Apr 2026 22:07:58 +0200 Subject: [PATCH 1/3] fix: Cypher -> EXISTS { MATCH (p)-[:WORKS_WITH]->() } returns false when relationship type name embeds a Cypher keyword after underscore Closes #3952 `matchesKeywordAt()` in ExistsExpression used `Character.isLetterOrDigit()` for word-boundary detection. Because underscore is not a letter or digit in Java, "WITH" inside "WORKS_WITH" was falsely matched as the Cypher WITH clause keyword, corrupting the injected subquery and causing a silently-caught parse exception that returned false. Fix: replace the boundary check with `isCypherIdentifierChar()` which also treats `_` as part of an identifier, consistent with Cypher identifier rules. Affects any relationship type whose name ends with a Cypher keyword after an underscore (e.g. _WITH, _WHERE, _RETURN). Co-Authored-By: Claude Sonnet 4.6 --- ...exists-match-subquery-relationship-type.md | 42 ++++++ .../opencypher/ast/ExistsExpression.java | 10 +- ...rExistsUnderscoreRelationshipTypeTest.java | 142 ++++++++++++++++++ 3 files changed, 192 insertions(+), 2 deletions(-) create mode 100644 3952-exists-match-subquery-relationship-type.md create mode 100644 engine/src/test/java/com/arcadedb/query/opencypher/CypherExistsUnderscoreRelationshipTypeTest.java diff --git a/3952-exists-match-subquery-relationship-type.md b/3952-exists-match-subquery-relationship-type.md new file mode 100644 index 0000000000..53b940c078 --- /dev/null +++ b/3952-exists-match-subquery-relationship-type.md @@ -0,0 +1,42 @@ +# Fix #3952: EXISTS { MATCH } subquery returns false for relationship types containing Cypher keyword fragments + +## Summary + +`EXISTS { MATCH (p)-[:WORKS_WITH]->(:Person) }` returned `false` for all rows even when the pattern matched, +while the equivalent `WHERE (p)-[:WORKS_WITH]->()` predicate worked correctly. + +## Root Cause + +`ExistsExpression.matchesKeywordAt()` used `Character.isLetterOrDigit()` to detect word boundaries when +scanning the subquery string for Cypher clause keywords (WITH, WHERE, RETURN, ...). + +Because underscore `_` is NOT a letter or digit in Java, the check incorrectly accepted it as a word +boundary. This caused "WITH" inside "WORKS_WITH" to be falsely recognised as the Cypher `WITH` clause +keyword, making `injectWhereConditions()` split the relationship type name and produce an invalid query +such as: + +``` +MATCH (p), (p)-[:WORKS_WHERE id(p) = $__exists_p WITH]->(:Person) +``` + +The invalid query threw an exception that was silently caught in `evaluate()`, which returned `false`. + +The bug affected any relationship type whose name ends with a Cypher keyword after an underscore: +`_WITH`, `_WHERE`, `_RETURN`, `_ORDER`, `_SKIP`, `_LIMIT`, `_UNION`. + +## Fix + +`engine/src/main/java/com/arcadedb/query/opencypher/ast/ExistsExpression.java` + +Replaced `Character.isLetterOrDigit(c)` in the boundary checks of `matchesKeywordAt()` with a new helper +`isCypherIdentifierChar(c)` that also returns `true` for `_`, matching Cypher identifier rules. + +## Tests + +New test class: `engine/src/test/java/com/arcadedb/query/opencypher/CypherExistsUnderscoreRelationshipTypeTest.java` + +- `existsWithUnderscoreKeywordWithInRelationshipType` - reproduces issue #3952 (WORKS_WITH embeds "WITH") +- `existsWithUnderscoreKeywordWhereInRelationshipType` - KNOWS_WHERE embeds "WHERE" +- `existsWithSimpleRelationshipTypeStillWorks` - control: KNOWS (no embedded keyword) still works + +All 3 new tests pass. All 5835 existing Cypher/OpenCypher tests continue to pass. diff --git a/engine/src/main/java/com/arcadedb/query/opencypher/ast/ExistsExpression.java b/engine/src/main/java/com/arcadedb/query/opencypher/ast/ExistsExpression.java index 73efdeef87..86a182bd59 100644 --- a/engine/src/main/java/com/arcadedb/query/opencypher/ast/ExistsExpression.java +++ b/engine/src/main/java/com/arcadedb/query/opencypher/ast/ExistsExpression.java @@ -200,6 +200,8 @@ else if (clauseStart < 0 && (matchesKeywordAt(upper, i, "WITH") || matchesKeywor /** * Checks if the uppercase query string has a keyword at the given position, * ensuring it's a word boundary (not part of a longer identifier). + * Underscore is treated as an identifier character so that relationship type names + * such as WORKS_WITH are not falsely split at embedded keyword fragments (e.g. "WITH"). */ private static boolean matchesKeywordAt(final String upper, final int pos, final String keyword) { if (pos + keyword.length() > upper.length()) @@ -207,15 +209,19 @@ private static boolean matchesKeywordAt(final String upper, final int pos, final if (!upper.startsWith(keyword, pos)) return false; // Check word boundary before - if (pos > 0 && Character.isLetterOrDigit(upper.charAt(pos - 1))) + if (pos > 0 && isCypherIdentifierChar(upper.charAt(pos - 1))) return false; // Check word boundary after final int end = pos + keyword.length(); - if (end < upper.length() && Character.isLetterOrDigit(upper.charAt(end))) + if (end < upper.length() && isCypherIdentifierChar(upper.charAt(end))) return false; return true; } + private static boolean isCypherIdentifierChar(final char c) { + return Character.isLetterOrDigit(c) || c == '_'; + } + @Override public boolean isAggregation() { return false; diff --git a/engine/src/test/java/com/arcadedb/query/opencypher/CypherExistsUnderscoreRelationshipTypeTest.java b/engine/src/test/java/com/arcadedb/query/opencypher/CypherExistsUnderscoreRelationshipTypeTest.java new file mode 100644 index 0000000000..a9bb8b714e --- /dev/null +++ b/engine/src/test/java/com/arcadedb/query/opencypher/CypherExistsUnderscoreRelationshipTypeTest.java @@ -0,0 +1,142 @@ +/* + * Copyright © 2021-present Arcade Data Ltd (info@arcadedata.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-FileCopyrightText: 2021-present Arcade Data Ltd (info@arcadedata.com) + * SPDX-License-Identifier: Apache-2.0 + */ +package com.arcadedb.query.opencypher; + +import com.arcadedb.database.Database; +import com.arcadedb.database.DatabaseFactory; +import com.arcadedb.graph.MutableVertex; +import com.arcadedb.query.sql.executor.Result; +import com.arcadedb.query.sql.executor.ResultSet; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.util.HashMap; +import java.util.Map; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Regression test for issue #3952: EXISTS { MATCH (p)-[:WORKS_WITH]->() } returned false for all rows because + * the word-boundary check in ExistsExpression.matchesKeywordAt() did not treat underscore as an identifier + * character, so "WITH" inside "WORKS_WITH" was falsely detected as the Cypher WITH clause keyword, + * corrupting the injected subquery and causing a silently-caught exception. + */ +class CypherExistsUnderscoreRelationshipTypeTest { + private Database database; + + @BeforeEach + void setup() { + database = new DatabaseFactory("./target/databases/cypherexistsunderscore").create(); + + database.transaction(() -> { + database.getSchema().createVertexType("Person"); + database.getSchema().createEdgeType("WORKS_WITH"); + database.getSchema().createEdgeType("KNOWS"); + database.getSchema().createEdgeType("KNOWS_WHERE"); + + final MutableVertex alice = database.newVertex("Person").set("name", "Alice").save(); + final MutableVertex bob = database.newVertex("Person").set("name", "Bob").save(); + final MutableVertex charlie = database.newVertex("Person").set("name", "Charlie").save(); + final MutableVertex david = database.newVertex("Person").set("name", "David").save(); + + // Alice -[WORKS_WITH]-> Bob (type name embeds keyword "WITH") + alice.newEdge("WORKS_WITH", bob, new Object[0]).save(); + // Charlie -[KNOWS]-> David (simple type, control) + charlie.newEdge("KNOWS", david, new Object[0]).save(); + // Alice -[KNOWS_WHERE]-> Charlie (type name embeds keyword "WHERE") + alice.newEdge("KNOWS_WHERE", charlie, new Object[0]).save(); + }); + } + + @AfterEach + void teardown() { + if (database != null) + database.drop(); + } + + /** + * Regression for issue #3952: relationship type containing "_WITH" (a Cypher keyword after underscore) + * must not break the EXISTS { MATCH ... } subquery injection. + */ + @Test + void existsWithUnderscoreKeywordWithInRelationshipType() { + final ResultSet results = database.query("opencypher", """ + MATCH (p:Person) + RETURN p.name AS person, + EXISTS { MATCH (p)-[:WORKS_WITH]->(:Person) } AS worksWith + ORDER BY person"""); + + final Map actual = collectBooleanColumn(results, "person", "worksWith"); + + assertThat(actual).containsEntry("Alice", true); + assertThat(actual).containsEntry("Bob", false); + assertThat(actual).containsEntry("Charlie", false); + assertThat(actual).containsEntry("David", false); + } + + /** + * EXISTS with relationship type "KNOWS_WHERE" whose name embeds the keyword "WHERE". + */ + @Test + void existsWithUnderscoreKeywordWhereInRelationshipType() { + final ResultSet results = database.query("opencypher", """ + MATCH (p:Person) + RETURN p.name AS person, + EXISTS { MATCH (p)-[:KNOWS_WHERE]->(:Person) } AS knowsWhere + ORDER BY person"""); + + final Map actual = collectBooleanColumn(results, "person", "knowsWhere"); + + assertThat(actual).containsEntry("Alice", true); + assertThat(actual).containsEntry("Bob", false); + assertThat(actual).containsEntry("Charlie", false); + assertThat(actual).containsEntry("David", false); + } + + /** + * Control: EXISTS with a simple relationship type (no embedded keyword) continues to work correctly. + */ + @Test + void existsWithSimpleRelationshipTypeStillWorks() { + final ResultSet results = database.query("opencypher", """ + MATCH (p:Person) + RETURN p.name AS person, + EXISTS { MATCH (p)-[:KNOWS]->(:Person) } AS knows + ORDER BY person"""); + + final Map actual = collectBooleanColumn(results, "person", "knows"); + + assertThat(actual).containsEntry("Alice", false); + assertThat(actual).containsEntry("Bob", false); + assertThat(actual).containsEntry("Charlie", true); + assertThat(actual).containsEntry("David", false); + } + + private static Map collectBooleanColumn(final ResultSet results, final String keyCol, + final String valueCol) { + final Map map = new HashMap<>(); + while (results.hasNext()) { + final Result row = results.next(); + map.put(row.getProperty(keyCol), row.getProperty(valueCol)); + } + results.close(); + return map; + } +} From ccd1d6bde426c6500b58d2681fd982d2052e17d8 Mon Sep 17 00:00:00 2001 From: robfrank Date: Wed, 22 Apr 2026 22:11:07 +0200 Subject: [PATCH 2/3] simplify if --- .../com/arcadedb/query/opencypher/ast/ExistsExpression.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/engine/src/main/java/com/arcadedb/query/opencypher/ast/ExistsExpression.java b/engine/src/main/java/com/arcadedb/query/opencypher/ast/ExistsExpression.java index 86a182bd59..62873ed396 100644 --- a/engine/src/main/java/com/arcadedb/query/opencypher/ast/ExistsExpression.java +++ b/engine/src/main/java/com/arcadedb/query/opencypher/ast/ExistsExpression.java @@ -26,7 +26,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.regex.Matcher; /** * Expression representing EXISTS predicate. @@ -213,9 +212,7 @@ private static boolean matchesKeywordAt(final String upper, final int pos, final return false; // Check word boundary after final int end = pos + keyword.length(); - if (end < upper.length() && isCypherIdentifierChar(upper.charAt(end))) - return false; - return true; + return end >= upper.length() || !isCypherIdentifierChar(upper.charAt(end)); } private static boolean isCypherIdentifierChar(final char c) { From b0dad27897872c619efe1a673f7c478f6e33fc40 Mon Sep 17 00:00:00 2001 From: robfrank Date: Wed, 22 Apr 2026 22:15:14 +0200 Subject: [PATCH 3/3] fix: harden ExistsExpression keyword boundary detection per code review - Apply isCypherIdentifierChar (includes underscore) to variableUsedInSubquery so outer variable 'p' no longer false-matches inside 'p_node' or similar - Extend matchesKeywordAt boundary checks to also reject ':' '.' '$' as preceding/following chars, preventing false keyword matches in label selectors ([:WITH]), property access (n.with), or parameters ($with) Co-Authored-By: Claude Sonnet 4.6 --- .../opencypher/ast/ExistsExpression.java | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/engine/src/main/java/com/arcadedb/query/opencypher/ast/ExistsExpression.java b/engine/src/main/java/com/arcadedb/query/opencypher/ast/ExistsExpression.java index 62873ed396..911bb2c45b 100644 --- a/engine/src/main/java/com/arcadedb/query/opencypher/ast/ExistsExpression.java +++ b/engine/src/main/java/com/arcadedb/query/opencypher/ast/ExistsExpression.java @@ -100,10 +100,10 @@ private static boolean variableUsedInSubquery(final String subquery, final Strin if (idx < 0) return false; // Verify it's a word boundary (not part of a longer identifier) - if (idx > 0 && Character.isLetterOrDigit(subquery.charAt(idx - 1))) + if (idx > 0 && isCypherIdentifierChar(subquery.charAt(idx - 1))) return false; final int end = idx + varName.length(); - if (end < subquery.length() && Character.isLetterOrDigit(subquery.charAt(end))) + if (end < subquery.length() && isCypherIdentifierChar(subquery.charAt(end))) return false; return true; } @@ -198,21 +198,31 @@ else if (clauseStart < 0 && (matchesKeywordAt(upper, i, "WITH") || matchesKeywor /** * Checks if the uppercase query string has a keyword at the given position, - * ensuring it's a word boundary (not part of a longer identifier). - * Underscore is treated as an identifier character so that relationship type names - * such as WORKS_WITH are not falsely split at embedded keyword fragments (e.g. "WITH"). + * ensuring it's a word boundary (not part of a longer identifier or Cypher token). + * Underscore is treated as an identifier character, and ':', '.', '$' are treated as + * non-boundary token prefixes, so that patterns like [:WORKS_WITH], n.with, or $with + * do not falsely match a keyword fragment. */ private static boolean matchesKeywordAt(final String upper, final int pos, final String keyword) { if (pos + keyword.length() > upper.length()) return false; if (!upper.startsWith(keyword, pos)) return false; - // Check word boundary before - if (pos > 0 && isCypherIdentifierChar(upper.charAt(pos - 1))) - return false; + // Check word boundary before — reject if preceded by an identifier char or a Cypher token + // prefix (: for labels/types, . for property access, $ for parameters) + if (pos > 0) { + final char before = upper.charAt(pos - 1); + if (isCypherIdentifierChar(before) || before == ':' || before == '.' || before == '$') + return false; + } // Check word boundary after final int end = pos + keyword.length(); - return end >= upper.length() || !isCypherIdentifierChar(upper.charAt(end)); + if (end < upper.length()) { + final char after = upper.charAt(end); + if (isCypherIdentifierChar(after) || after == ':' || after == '.' || after == '$') + return false; + } + return true; } private static boolean isCypherIdentifierChar(final char c) {