Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
boonhapus committed Jun 24, 2024
2 parents 462edb7 + 50bca24 commit 06fd7fd
Show file tree
Hide file tree
Showing 32 changed files with 463 additions and 207 deletions.
13 changes: 13 additions & 0 deletions .github/ISSUE_TEMPLATE/report-bug.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,5 +53,18 @@ body:
happen, and what is currently happening.
If we can copy it, run it, and see it right away, there's a much higher chance we'll be able to help you.
validations:
required: true

- type: textarea
id: logs
attributes:
label: Need to upload log files securely?
description: >-
Run the following command.
`cs_tools logs report --latest 5`
Then use the secure file share link below, which routes to the CS Tools team directly.
https://thoughtspot.egnyte.com/ul/1bK0dz05L6
1 change: 1 addition & 0 deletions .github/ISSUE_TEMPLATE/report-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,6 @@ body:
What is wrong with the docs?
Write a short description telling us what the docs are missing, a link to where you found a typo, or if you thought something was generally confusing.
validations:
required: true
2 changes: 1 addition & 1 deletion cs_tools/__project__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "1.5.7"
__version__ = "1.5.8"
__docs__ = "https://thoughtspot.github.io/cs_tools/"
__repo__ = "https://github.com/thoughtspot/cs_tools"
__help__ = f"{__repo__}/discussions/"
Expand Down
17 changes: 13 additions & 4 deletions cs_tools/api/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,22 @@ def __before_request__(self, request: httpx.Request) -> None:
now = dt.datetime.now(tz=dt.timezone.utc)
request.headers["cs-tools-request-start-utc-timestamp"] = now.isoformat()

log.debug(
f">>> [{now:%H:%M:%S}] {request.method} -> {request.url.path}"
log_msg = (
f">>> [{now:%H:%M:%S}] HTTP {request.method} -> {request.url.path}"
f"\n\t=== HEADERS ===\n{request.headers}"
f"\n\t=== DATA ===\n{_utils.obfuscate_sensitive_data(request.url.params)}"
f"\n",
)

if request.url.params:
log_msg += f"\n\t=== PARAMS ===\n{_utils.obfuscate_sensitive_data(request.url.params)}"

is_sending_files_to_server = request.headers.get("Content-Type", "").startswith("multipart/form-data")

if not is_sending_files_to_server and request.content:
data: str = request.content.decode()
log_msg += f"\n\t=== DATA ===\n{_utils.obfuscate_sensitive_data(httpx.QueryParams(data))}"

log.debug(f"{log_msg}\n")

def request(self, method: Literal["POST", "GET", "PUT", "DELETE"], url: str, **kwargs) -> httpx.Response:
"""Proxy httpx.Session base method on our client."""
# DEV NOTE: @boonhapus, 2024/02/15
Expand Down
139 changes: 91 additions & 48 deletions cs_tools/api/_rest_api_v1.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion cs_tools/api/_rest_api_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def vcs_git_config_update(
*,
username: str,
access_token: str,
org_identifier: Identifier = None,
org_identifier: Optional[Identifier] = None,
branch_names: Optional[list[str]] = None,
commit_branch_name: Optional[str] = None,
enable_guid_mapping: bool = False,
Expand Down
22 changes: 9 additions & 13 deletions cs_tools/api/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@

from typing import Any, Union
import copy
import enum
import json
import uuid

import httpx

UNDEFINED = object()
UndefinedType = enum.Enum("_UndefinedTyped", "SENTINEL")
UNDEFINED = UndefinedType.SENTINEL
SYSTEM_USERS = {"system": "System User", "su": "Administrator Super-User", "tsadmin": "Administrator"}


Expand All @@ -34,7 +36,7 @@ def is_valid_guid(to_test: str) -> bool:
return str(guid) == to_test


def scrub_undefined_sentinel(inp: Any, *, null: Union[type[UNDEFINED], None]) -> Any:
def scrub_undefined_sentinel(inp: Any, *, null: Union[UndefinedType, None]) -> Any:
"""
Remove sentinel values from input parameters.
Expand Down Expand Up @@ -64,17 +66,11 @@ def obfuscate_sensitive_data(request_query: httpx.QueryParams) -> dict[str, Any]
# don't modify the actual keywords we want to build into the request
secure = copy.deepcopy({k: v for k, v in request_query.items() if k not in ("file", "files")})

for keyword in ("params", "data", "json"):
# .params on GET, POST, PUT
# .data, .json on POST, PUT
if secure.get(keyword, None) is None:
continue

for safe_word in SAFEWORDS:
try:
secure[keyword][safe_word] = "[secure]"
except KeyError:
pass
for safe_word in SAFEWORDS:
try:
secure[safe_word] = "[secure]"
except KeyError:
pass

return secure

Expand Down
11 changes: 10 additions & 1 deletion cs_tools/api/middlewares/logical_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,16 @@ def all( # noqa: A003

# BUGFIX: SCAL-199984 .. eta 10.0.0.cl
if not info or info[0]["type"] == "Default":
continue
info.append(
{
"type": "FALCON",
"header": {
"id": connection_guid,
"name": "Default Data Source",
"description": "This is the hidden, default data source for system content.",
},
}
)

table["data_source"] = info[0]["header"]
table["data_source"]["type"] = info[0]["type"]
Expand Down
12 changes: 9 additions & 3 deletions cs_tools/api/middlewares/metadata.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Optional
import functools as ft
import logging

from cs_tools import utils
Expand All @@ -27,6 +28,7 @@ class MetadataMiddleware:
def __init__(self, ts: ThoughtSpot):
self.ts = ts
self._details_cache: dict[GUID, dict] = {}
self._error_cache: set[GUID] = set()

def permissions(
self,
Expand Down Expand Up @@ -227,21 +229,24 @@ def fetch_header_and_extras(self, metadata_type: MetadataObjectType, guids: list
data = []

for guid in guids:
try:
if guid in self._error_cache:
continue

if guid in self._details_cache:
data.append(self._details_cache[guid])
continue
except KeyError:
pass

r = self.ts.api.v1.metadata_details(metadata_type=metadata_type, guids=[guid], show_hidden=True)

if r.is_error:
self._error_cache.add(guid)
log.warning(f"Failed to fetch details for {guid} ({metadata_type})")
continue

j = r.json()

if not j["storables"]:
self._error_cache.add(guid)
log.warning(f"Failed to fetch details for {guid} ({metadata_type})")
continue

Expand All @@ -267,6 +272,7 @@ def fetch_header_and_extras(self, metadata_type: MetadataObjectType, guids: list

return data

@ft.cache
def find_data_source_of_logical_table(self, guid: GUID) -> GUID:
"""
METADATA DETAILS is expensive. Here's our shortcut.
Expand Down
33 changes: 25 additions & 8 deletions cs_tools/api/middlewares/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def _cast(data: list[TableRowsFormat], headers_to_types: dict[str, str]) -> list
"""
TS_TO_PY_TYPES = {
"VARCHAR": str,
"CHAR": str,
"DOUBLE": float,
"FLOAT": float,
"BOOL": bool,
Expand Down Expand Up @@ -110,6 +111,8 @@ def __call__(
table: Optional[str] = None,
view: Optional[str] = None,
sample: bool = -1,
use_logical_column_names: bool = False,
include_dtype_mapping: bool = False,
) -> TableRowsFormat:
"""
Search a data source.
Expand Down Expand Up @@ -193,24 +196,34 @@ def __call__(
guid = d[0]["id"]

log.debug(f"executing search on guid {guid}\n\n{query}\n")
offset = 0
data_types = {}
data = []
offset = 0

while True:
r = self.ts.api.v1.search_data(
query_string=query, data_source_guid=guid, format_type="COMPACT", batchsize=sample, offset=offset
query_string=query,
data_source_guid=guid,
format_type="COMPACT",
batchsize=sample,
offset=offset,
)

d = r.json()

# Add the rows to our dataset
data.extend(d.pop("data"))

# Increment the row offset for the next batch
offset += d["rowCount"]

if d["rowCount"] < d["pageSize"]:
break
is_last_batch = d["rowCount"] < d["pageSize"]
is_exceed_sample_threshold = sample >= 0 and d["rowCount"] == d["pageSize"]

if sample >= 0 and d["rowCount"] == d["pageSize"]:
if is_last_batch or is_exceed_sample_threshold:
break

# Warn the user if the returned data exceeds the 1M row threshold
if offset % 500_000 == 0:
log.warning(
f"using the Data API to extract {offset / 1_000_000: >4,.1f}M+ "
Expand All @@ -219,10 +232,14 @@ def __call__(
)

# Get the data types
r = self.ts.api.v1.metadata_details(metadata_type="LOGICAL_TABLE", guids=[guid])
data_types = {c["header"]["name"]: c["dataType"] for c in r.json()["storables"][0]["columns"]}
column_name = "referenced_column_name" if use_logical_column_names else "name"
data_types = {column[column_name]: column["data_type"] for column in d["columnDetails"]}

# Remap column names based on the desired column state
d["columnNames"] = list(data_types.keys())

# Cleanups
data = _to_records(columns=d["columnNames"], rows=data)
data = _cast(data, headers_to_types=data_types)
return data

return (data, data_types) if include_dtype_mapping else data # type: ignore
1 change: 1 addition & 0 deletions cs_tools/api/middlewares/tql.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def query(
}

r = self.ts.api.v1.dataservice_query(data=data)
r.raise_for_status()
i = [json.loads(_) for _ in r.iter_lines() if _]

out = []
Expand Down
46 changes: 25 additions & 21 deletions cs_tools/api/middlewares/tsload.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from cs_tools.types import (
GUID as CycleID,
GroupPrivilege,
TableRowsFormat,
)
from cs_tools.updater import cs_tools_venv

Expand Down Expand Up @@ -147,7 +146,7 @@ def __init__(self, ts: ThoughtSpot):
self.ts = ts
self._cache_fp = TSLoadNodeRedirectCache()

def _check_for_redirect_auth(self, cycle_id: CycleID) -> None:
def _check_for_redirect_auth(self, *, cycle_id: CycleID) -> None:
"""
Attempt a login.
Expand All @@ -156,13 +155,18 @@ def _check_for_redirect_auth(self, cycle_id: CycleID) -> None:
applicable) to submit file uploads to. If that node is not the main node, then
we will be required to authorize again.
"""
# fmt: off
redirect_url = self.ts.api.v1._redirected_url_due_to_tsload_load_balancer
redirect_info = self._cache_fp.get_for(cycle_id)
# fmt: on

if redirect_info is not None:
is_currently_on_node = redirect_url is not None and redirect_url.host == redirect_info["host"]

if not is_currently_on_node:
redirected = self.ts.api._session.base_url.copy_with(host=redirect_info["host"], port=redirect_info["port"])
self.ts.api.v1._redirected_url_due_to_tsload_load_balancer = redirected

log.debug(f"redirecting to: {redirected}")
log.info(f"The tsload API is redirecting CS Tools to node -> {redirected}")
self.ts.login()

def _check_privileges(self) -> None:
Expand Down Expand Up @@ -199,7 +203,7 @@ def upload(
# not related to Remote TSLOAD API
ignore_node_redirect: bool = False,
http_timeout: float = 60.0,
) -> TableRowsFormat:
) -> CycleID:
"""
Load a file via tsload on a remote server.
Expand Down Expand Up @@ -300,10 +304,22 @@ def upload(
self._cache_fp.set_for(data["cycle_id"], redirect_info=data["node_address"])

if not ignore_node_redirect:
self._check_for_redirect_auth(data["cycle_id"])
self._check_for_redirect_auth(cycle_id=data["cycle_id"])

r = self.ts.api.v1.dataservice_dataload_start(cycle_id=data["cycle_id"], fd=fd, timeout=http_timeout)
log.info(f"{database}.{schema_}.{table} - {r.text}")
r.raise_for_status()

r = self.ts.api.v1.dataservice_dataload_bad_records(cycle_id=data["cycle_id"])

if r.text:
log.info(r.text)
r.raise_for_status()

r = self.ts.api.v1.dataservice_dataload_commit(cycle_id=data["cycle_id"])
log.info(r.text)
r.raise_for_status()

self.ts.api.v1.dataservice_dataload_start(cycle_id=data["cycle_id"], fd=fd, timeout=http_timeout)
self.ts.api.v1.dataservice_dataload_commit(cycle_id=data["cycle_id"])
return data["cycle_id"]

def status(
Expand All @@ -326,7 +342,7 @@ def status(
self._check_privileges()

if not ignore_node_redirect:
self._check_for_redirect_auth(cycle_id)
self._check_for_redirect_auth(cycle_id=cycle_id)

while True:
r = self.ts.api.v1.dataservice_dataload_status(cycle_id=cycle_id)
Expand All @@ -345,15 +361,3 @@ def status(
time.sleep(1)

return data

# @validate_arguments
# def bad_records(self, cycle_id: str) -> TableRowsFormat:
# """
# """
# r = self.ts.api.v1.ts_dataservice.load_params(cycle_id)
# params = r.json()
# print(params)
# raise

# r = self.ts.api.v1.ts_dataservice.bad_records(cycle_id)
# r.text
Loading

0 comments on commit 06fd7fd

Please sign in to comment.