Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(admin): Extend /run_clickhouse_system_query to support custom SQL queries #2296

Merged
merged 14 commits into from
Dec 17, 2021
Merged
123 changes: 108 additions & 15 deletions snuba/admin/clickhouse/system_queries.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import re
from dataclasses import dataclass
from typing import Any, Dict, Optional, Sequence, Tuple, Type, cast
from typing import Dict, Optional, Sequence, Type

from snuba import settings
from snuba.clickhouse.native import ClickhousePool
from snuba.clickhouse.native import ClickhousePool, ClickhouseResult
from snuba.clusters.cluster import ClickhouseClientSettings, ClickhouseCluster
from snuba.datasets.storages import StorageKey
from snuba.datasets.storages.factory import get_storage
Expand All @@ -21,6 +22,14 @@ class InvalidStorageError(SerializableException):
pass


class InvalidResultError(SerializableException):
pass


class InvalidCustomQuery(SerializableException):
pass


class _QueryRegistry:
"""Keep a mapping of SystemQueries to their names"""

Expand Down Expand Up @@ -94,17 +103,12 @@ def _is_valid_node(host: str, port: int, cluster: ClickhouseCluster) -> bool:
return host == connection_id.hostname and port == connection_id.tcp_port


def run_system_query_on_host_by_name(
clickhouse_host: str,
clickhouse_port: int,
storage_name: str,
system_query_name: str,
) -> Tuple[Sequence[Any], Sequence[Tuple[str, str]]]:
query = SystemQuery.from_name(system_query_name)

if not query:
raise NonExistentSystemQuery(extra_data={"query_name": system_query_name})

def _run_sql_query_on_host(
clickhouse_host: str, clickhouse_port: int, storage_name: str, sql: str
) -> ClickhouseResult:
"""
Run the SQL query. It should be validated before getting to this point
"""
storage_key = None
try:
storage_key = StorageKey(storage_name)
Expand All @@ -130,6 +134,95 @@ def run_system_query_on_host_by_name(
# force read-only
client_settings=ClickhouseClientSettings.QUERY.value.settings,
)
query_result = connection.execute(query=query.sql, with_column_types=True)
query_result = connection.execute(query=sql, with_column_types=True)
connection.close()
return cast(Tuple[Sequence[Any], Sequence[Tuple[str, str]]], query_result,)

return query_result


def run_system_query_on_host_by_name(
clickhouse_host: str,
clickhouse_port: int,
storage_name: str,
system_query_name: str,
) -> ClickhouseResult:
query = SystemQuery.from_name(system_query_name)

if not query:
raise NonExistentSystemQuery(extra_data={"query_name": system_query_name})

return _run_sql_query_on_host(
clickhouse_host, clickhouse_port, storage_name, query.sql
)


SYSTEM_QUERY_RE = re.compile(
r"""
^ # Start
(SELECT|select)
\s
(?P<select_statement>[\w\s,\(\)]+|\*)
\s
(FROM|from)
\s
system.(?P<system_table_name>\w+)
(?P<extra>\s[\w\s,=+\(\)']+)?
;? # Optional semicolon
$ # End
""",
re.VERBOSE,
)

# An incomplete list
VALID_SYSTEM_TABLES = [
"clusters",
"merges",
"parts",
]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can allow all of them. There is nothing dangerous there.
But if you plan to keep it. Please add:

replication_queue
settings
metrics
columns



def run_system_query_on_host_with_sql(
clickhouse_host: str, clickhouse_port: int, storage_name: str, system_query_sql: str
) -> ClickhouseResult:
validate_system_query(system_query_sql)
return _run_sql_query_on_host(
clickhouse_host, clickhouse_port, storage_name, system_query_sql
)


def validate_system_query(sql_query: str) -> None:
"""
Simple validation to ensure query only attempts to access system tables and not
any others. Will be replaced by AST parser eventually.

Raises InvalidCustomQuery if query is invalid or not allowed.
"""
sql_query = " ".join(sql_query.split())

disallowed_keywords = ["select", "insert", "join"]

match = SYSTEM_QUERY_RE.match(sql_query)

if match is None:
raise InvalidCustomQuery("Query is invalid")

select_statement = match.group("select_statement")

# Extremely quick and dirty way of ensuring there is not a nested select, insert or a join
for kw in disallowed_keywords:
if kw in select_statement.lower():
raise InvalidCustomQuery(f"{kw} is not allowed here")

system_table_name = match.group("system_table_name")

if system_table_name not in VALID_SYSTEM_TABLES:
raise InvalidCustomQuery("Invalid table")

extra = match.group("extra")

# Unfortunately "extra" is pretty permissive right now, just ensure
# there is no attempt to do a select, insert or join in there
if extra is not None:
for kw in disallowed_keywords:
if kw in extra.lower():
raise InvalidCustomQuery(f"{kw} is not allowed here")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's have a proper AST here asap.

2 changes: 1 addition & 1 deletion snuba/admin/dist/bundle.js

Large diffs are not rendered by default.

21 changes: 20 additions & 1 deletion snuba/admin/static/api_client.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,20 @@ import {
ConfigChange,
} from "./runtime_config/types";

import { ClickhouseNodeData } from "./clickhouse_queries/types";
import {
ClickhouseCannedQuery,
ClickhouseNodeData,
QueryRequest,
QueryResult,
} from "./clickhouse_queries/types";

interface Client {
getConfigs: () => Promise<Config[]>;
createNewConfig: (key: ConfigKey, value: ConfigValue) => Promise<Config>;
getAuditlog: () => Promise<ConfigChange[]>;
getClickhouseNodes: () => Promise<[ClickhouseNodeData]>;
getClickhouseCannedQueries: () => Promise<[ClickhouseCannedQuery]>;
executeQuery: (req: QueryRequest) => Promise<QueryResult>;
}

function Client() {
Expand Down Expand Up @@ -57,6 +64,18 @@ function Client() {
})
);
},
getClickhouseCannedQueries: () => {
const url = baseUrl + "clickhouse_queries";
return fetch(url).then((resp) => resp.json());
},
executeQuery: (query: QueryRequest) => {
const url = baseUrl + "run_clickhouse_system_query";
return fetch(url, {
headers: { "Content-Type": "application/json" },
method: "POST",
body: JSON.stringify(query),
}).then((resp) => resp.json());
},
};
}

Expand Down
87 changes: 71 additions & 16 deletions snuba/admin/static/clickhouse_queries/index.tsx
Original file line number Diff line number Diff line change
@@ -1,34 +1,41 @@
import React, { useEffect, useState } from "react";
import Client from "../api_client";
import { Table } from "../table";

import { ClickhouseNodeData } from "./types";
import {
ClickhouseNodeData,
ClickhouseCannedQuery,
QueryRequest,
QueryResult,
} from "./types";

type QueryState = {
storage: string | null;
host: string | null;
port: number | null;
};
type QueryState = Partial<QueryRequest>;

function ClickhouseQueries(props: { api: Client }) {
const [nodeData, setNodeData] = useState<ClickhouseNodeData[]>([]);

const [query, setQuery] = useState<QueryState>({
storage: null,
host: null,
port: null,
});
const [cannedQueries, setCannedQueries] = useState<ClickhouseCannedQuery[]>(
[]
);
const [query, setQuery] = useState<QueryState>({});
const [queryResultHistory, setQueryResultHistory] = useState<QueryResult[]>(
[]
);

useEffect(() => {
props.api.getClickhouseNodes().then((res) => {
setNodeData(res);
});
props.api.getClickhouseCannedQueries().then((res) => {
setCannedQueries(res);
});
}, []);

function selectStorage(storage: string) {
setQuery({
storage,
host: null,
port: null,
setQuery((prevQuery) => {
return {
...prevQuery,
storage: storage,
};
});
}

Expand All @@ -44,9 +51,26 @@ function ClickhouseQueries(props: { api: Client }) {
});
}

function selectCannedQuery(queryName: string) {
setQuery((prevQuery) => {
return {
...prevQuery,
query_name: queryName,
};
});
}

function executeQuery() {
props.api.executeQuery(query as QueryRequest).then((result) => {
result.input_query = `${query.query_name}(${query.storage},${query.host}:${query.port})`;
setQueryResultHistory((prevHistory) => [result, ...prevHistory]);
});
}

return (
<div>
<form>
<h2>Construct a query</h2>
<select
value={query.storage || ""}
onChange={(evt) => selectStorage(evt.target.value)}
Expand Down Expand Up @@ -82,7 +106,38 @@ function ClickhouseQueries(props: { api: Client }) {
))}
</select>
)}
{query.storage && query.host && query.port && (
<select
value={query.query_name || ""}
onChange={(evt) => selectCannedQuery(evt.target.value)}
>
<option disabled value="">
Select a query
</option>
{cannedQueries.map((cannedQuery) => (
<option key={`${cannedQuery.name}`} value={`${cannedQuery.name}`}>
{cannedQuery.name}: {cannedQuery.description}
</option>
))}
</select>
)}
{query.storage && query.host && query.port && query.query_name && (
<button onClick={(_) => executeQuery()}>Execute query</button>
)}
</form>
<div>
<h2>Query results</h2>
<Table
headerData={["Query", "Response"]}
rowData={queryResultHistory.map((queryResult) => [
<span>{queryResult.input_query}</span>,
<Table
headerData={queryResult.column_names}
rowData={queryResult.rows}
/>,
])}
/>
</div>
</div>
);
}
Expand Down
25 changes: 24 additions & 1 deletion snuba/admin/static/clickhouse_queries/types.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,27 @@ type ClickhouseNodeData = {
local_nodes: ClickhouseNode[];
};

export { ClickhouseNodeData };
type ClickhouseCannedQuery = {
description: string | null;
name: string;
sql: string;
};

type QueryRequest = {
storage: string;
host: string;
port: number;
query_name: string;
};

type QueryResultColumnMetadata = [string];
type QueryResultRow = [string];

type QueryResult = {
input_query?: string;
timestamp: number;
column_names: QueryResultColumnMetadata;
rows: [QueryResultRow];
};

export { ClickhouseNodeData, ClickhouseCannedQuery, QueryRequest, QueryResult };
10 changes: 7 additions & 3 deletions snuba/admin/static/table.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@ import { COLORS } from "./theme";
type TableProps = {
headerData: ReactNode[];
rowData: ReactNode[][];
columnWidths: number[];
columnWidths?: number[];
};

function Table(props: TableProps) {
const { headerData, rowData, columnWidths } = props;

const sumColumnWidths = columnWidths.reduce((acc, i) => acc + i, 0);
const autoColumnWidths = Array(headerData.length).fill(1);
const notEmptyColumnWidths = columnWidths ?? autoColumnWidths;
const sumColumnWidths = notEmptyColumnWidths.reduce((acc, i) => acc + i, 0);

return (
<table style={tableStyle}>
Expand All @@ -22,7 +24,9 @@ function Table(props: TableProps) {
key={idx}
style={{
...thStyle,
width: `${(columnWidths[idx] * 100) / sumColumnWidths}%`,
width: `${
(notEmptyColumnWidths[idx] * 100) / sumColumnWidths
}%`,
}}
>
{col}
Expand Down
Loading