-
-
Notifications
You must be signed in to change notification settings - Fork 57
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(admin): Extend /run_clickhouse_system_query to support custom SQL queries #2296
Merged
Merged
Changes from 12 commits
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
62be7a2
v1 of wiring in queries and rendering results
onewland b0340a5
make table widths more dynamic
onewland af8caf8
use Lyn Table class rather than bad home-rolled version
onewland 2c04421
fully switch over to Lyn table class
onewland b3298b7
make null check more canonical TS
onewland 33681af
update bundle
onewland 38ec2db
Merge branch 'master' into wire-in-queries-admin-tool
onewland de4c63e
fix typing from evan PR
onewland 34411b3
style cleanups
onewland a491114
type hint cleanups
onewland e7987ab
feat(admin): Extend /run_clickhouse_system_query API to support custo…
lynnagara 8199172
more error handling
lynnagara 68e3aaa
Merge remote-tracking branch 'origin/master' into system-queries
lynnagara 3cb17de
allow all system tables
lynnagara File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,9 @@ | ||
import re | ||
from dataclasses import dataclass | ||
from typing import Any, Dict, Optional, Sequence, Tuple, Type, cast | ||
from typing import Dict, Optional, Sequence, Type | ||
|
||
from snuba import settings | ||
from snuba.clickhouse.native import ClickhousePool | ||
from snuba.clickhouse.native import ClickhousePool, ClickhouseResult | ||
from snuba.clusters.cluster import ClickhouseClientSettings, ClickhouseCluster | ||
from snuba.datasets.storages import StorageKey | ||
from snuba.datasets.storages.factory import get_storage | ||
|
@@ -21,6 +22,14 @@ class InvalidStorageError(SerializableException): | |
pass | ||
|
||
|
||
class InvalidResultError(SerializableException): | ||
pass | ||
|
||
|
||
class InvalidCustomQuery(SerializableException): | ||
pass | ||
|
||
|
||
class _QueryRegistry: | ||
"""Keep a mapping of SystemQueries to their names""" | ||
|
||
|
@@ -94,17 +103,12 @@ def _is_valid_node(host: str, port: int, cluster: ClickhouseCluster) -> bool: | |
return host == connection_id.hostname and port == connection_id.tcp_port | ||
|
||
|
||
def run_system_query_on_host_by_name( | ||
clickhouse_host: str, | ||
clickhouse_port: int, | ||
storage_name: str, | ||
system_query_name: str, | ||
) -> Tuple[Sequence[Any], Sequence[Tuple[str, str]]]: | ||
query = SystemQuery.from_name(system_query_name) | ||
|
||
if not query: | ||
raise NonExistentSystemQuery(extra_data={"query_name": system_query_name}) | ||
|
||
def _run_sql_query_on_host( | ||
clickhouse_host: str, clickhouse_port: int, storage_name: str, sql: str | ||
) -> ClickhouseResult: | ||
""" | ||
Run the SQL query. It should be validated before getting to this point | ||
""" | ||
storage_key = None | ||
try: | ||
storage_key = StorageKey(storage_name) | ||
|
@@ -130,6 +134,95 @@ def run_system_query_on_host_by_name( | |
# force read-only | ||
client_settings=ClickhouseClientSettings.QUERY.value.settings, | ||
) | ||
query_result = connection.execute(query=query.sql, with_column_types=True) | ||
query_result = connection.execute(query=sql, with_column_types=True) | ||
connection.close() | ||
return cast(Tuple[Sequence[Any], Sequence[Tuple[str, str]]], query_result,) | ||
|
||
return query_result | ||
|
||
|
||
def run_system_query_on_host_by_name( | ||
clickhouse_host: str, | ||
clickhouse_port: int, | ||
storage_name: str, | ||
system_query_name: str, | ||
) -> ClickhouseResult: | ||
query = SystemQuery.from_name(system_query_name) | ||
|
||
if not query: | ||
raise NonExistentSystemQuery(extra_data={"query_name": system_query_name}) | ||
|
||
return _run_sql_query_on_host( | ||
clickhouse_host, clickhouse_port, storage_name, query.sql | ||
) | ||
|
||
|
||
SYSTEM_QUERY_RE = re.compile( | ||
r""" | ||
^ # Start | ||
(SELECT|select) | ||
\s | ||
(?P<select_statement>[\w\s,\(\)]+|\*) | ||
\s | ||
(FROM|from) | ||
\s | ||
system.(?P<system_table_name>\w+) | ||
(?P<extra>\s[\w\s,=+\(\)']+)? | ||
;? # Optional semicolon | ||
$ # End | ||
""", | ||
re.VERBOSE, | ||
) | ||
|
||
# An incomplete list | ||
VALID_SYSTEM_TABLES = [ | ||
"clusters", | ||
"merges", | ||
"parts", | ||
] | ||
|
||
|
||
def run_system_query_on_host_with_sql( | ||
clickhouse_host: str, clickhouse_port: int, storage_name: str, system_query_sql: str | ||
) -> ClickhouseResult: | ||
validate_system_query(system_query_sql) | ||
return _run_sql_query_on_host( | ||
clickhouse_host, clickhouse_port, storage_name, system_query_sql | ||
) | ||
|
||
|
||
def validate_system_query(sql_query: str) -> None: | ||
""" | ||
Simple validation to ensure query only attempts to access system tables and not | ||
any others. Will be replaced by AST parser eventually. | ||
|
||
Raises InvalidCustomQuery if query is invalid or not allowed. | ||
""" | ||
sql_query = " ".join(sql_query.split()) | ||
|
||
disallowed_keywords = ["select", "insert", "join"] | ||
|
||
match = SYSTEM_QUERY_RE.match(sql_query) | ||
|
||
if match is None: | ||
raise InvalidCustomQuery("Query is invalid") | ||
|
||
select_statement = match.group("select_statement") | ||
|
||
# Extremely quick and dirty way of ensuring there is not a nested select, insert or a join | ||
for kw in disallowed_keywords: | ||
if kw in select_statement.lower(): | ||
raise InvalidCustomQuery(f"{kw} is not allowed here") | ||
|
||
system_table_name = match.group("system_table_name") | ||
|
||
if system_table_name not in VALID_SYSTEM_TABLES: | ||
raise InvalidCustomQuery("Invalid table") | ||
|
||
extra = match.group("extra") | ||
|
||
# Unfortunately "extra" is pretty permissive right now, just ensure | ||
# there is no attempt to do a select, insert or join in there | ||
if extra is not None: | ||
for kw in disallowed_keywords: | ||
if kw in extra.lower(): | ||
raise InvalidCustomQuery(f"{kw} is not allowed here") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's have a proper AST here asap. |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can allow all of them. There is nothing dangerous there.
But if you plan to keep it. Please add: