Skip to content
Merged
Show file tree
Hide file tree
Changes from 59 commits
Commits
Show all changes
70 commits
Select commit Hold shift + click to select a range
03aecba
compiler cutover
nikhil-zlai Mar 13, 2025
422b218
Merge branch 'main' into nikhil/compile_switch
david-zlai Mar 18, 2025
77110b9
install rich
david-zlai Mar 18, 2025
7c3e882
no emojis
david-zlai Mar 18, 2025
6e924ad
fix conditional
david-zlai Mar 18, 2025
98e7c3b
outputNamespace
david-zlai Mar 19, 2025
43395b7
fix models compiling, explicitly importlib teams.py via file
david-zlai Mar 19, 2025
98a7044
deprecate teams.json
david-zlai Mar 19, 2025
6bdba7c
Only `add` to the compile tracker if the object successfully writes
david-zlai Mar 19, 2025
13dd1eb
linting
david-zlai Mar 19, 2025
8fb36f6
put this one back
david-zlai Mar 19, 2025
9591d90
migrate away from `production` and to `compiled`
david-zlai Mar 19, 2025
9bd7989
remove log
david-zlai Mar 19, 2025
9aed948
linting
david-zlai Mar 19, 2025
6917ecb
try to fix sample_bootstrap
david-zlai Mar 19, 2025
91b240f
move file to fix tox test
david-zlai Mar 19, 2025
b7eee2f
glom
david-zlai Mar 19, 2025
4e59371
Add chronon_root for testing
david-zlai Mar 19, 2025
1abac40
Add ConfType
david-zlai Mar 26, 2025
89dbeb4
Rename py directory to python
david-zlai Mar 26, 2025
12b385a
new python dir
david-zlai Mar 26, 2025
2b74847
fix test_run.py
david-zlai Mar 26, 2025
0f713ab
tox passes
david-zlai Mar 26, 2025
77ee111
Rename py to python directory because pytest collision with its py di…
david-zlai Mar 26, 2025
82fe50f
add python
david-zlai Mar 26, 2025
a93be8c
update .gitignore
david-zlai Mar 26, 2025
d062d7e
Retrack api files
david-zlai Mar 26, 2025
a40851c
Merge branch 'davidhan/remane_to_python3' into davidhan/compile_switc…
david-zlai Mar 26, 2025
c2a444a
update gitignore
david-zlai Mar 26, 2025
b109796
lint
david-zlai Mar 26, 2025
4e9819c
remove teams.json
david-zlai Mar 26, 2025
5286601
Merge branch 'main' into davidhan/compile_switch_fork
david-zlai Mar 27, 2025
821f58a
thrift not needed
david-zlai Mar 27, 2025
21a65ad
linting
david-zlai Mar 27, 2025
5af399a
ruff
david-zlai Mar 27, 2025
2c14668
switch over integration testing
david-zlai Mar 27, 2025
2e74b20
integration testing changes
david-zlai Mar 27, 2025
3addab2
Apply suggestions from code review
david-zlai Mar 27, 2025
dd7dc3b
forgot some files
david-zlai Mar 27, 2025
3d4c843
don't track compiled files from canary
david-zlai Mar 27, 2025
f1310e5
lint
david-zlai Mar 27, 2025
31c2dd0
fix traceback
david-zlai Mar 28, 2025
41a5b26
checkpoint
david-zlai Apr 1, 2025
778ea9a
checkpoint - require_backfill keeps being added
david-zlai Apr 1, 2025
bee53e6
do not modify objects. damn i miss scala
david-zlai Apr 1, 2025
aa3935a
Disable rich for now and just print at the end
david-zlai Apr 2, 2025
40020e6
clean up
david-zlai Apr 2, 2025
09a89ac
Merge branch 'main' into davidhan/compile_switch_fork
david-zlai Apr 2, 2025
12be656
format
david-zlai Apr 2, 2025
d15d194
add enum to constants
david-zlai Apr 2, 2025
516bb91
fix merge mode maps
david-zlai Apr 3, 2025
baf1175
Add to run.py
david-zlai Apr 3, 2025
d750cd6
python ruff
david-zlai Apr 3, 2025
4325df7
fix ruff
david-zlai Apr 3, 2025
5f15bbc
Merge branch 'main' into davidhan/compile_switch_fork
david-zlai Apr 3, 2025
b910c61
scala fix
david-zlai Apr 3, 2025
c780b06
update constants
david-zlai Apr 3, 2025
3616f4d
more comments
david-zlai Apr 3, 2025
2eef75a
safer compiling
david-zlai Apr 3, 2025
20a7806
compile doesn't need chronon root anymore
david-zlai Apr 3, 2025
2c3806f
common needs to be updated
david-zlai Apr 3, 2025
7090133
pr comments
david-zlai Apr 3, 2025
352ab9f
Merge branch 'main' into davidhan/compile_switch_fork
david-zlai Apr 3, 2025
43d58b4
fixes
david-zlai Apr 3, 2025
09e69bb
Merge branch 'davidhan/compile_switch_fork' of github.com:zipline-ai/…
david-zlai Apr 3, 2025
9d0c21e
Everything else uses group-by so should this
david-zlai Apr 4, 2025
f5bbe08
try to fix
david-zlai Apr 4, 2025
db38029
try this again
david-zlai Apr 4, 2025
55f8f3a
Merge branch 'main' into davidhan/compile_switch_fork
david-zlai Apr 4, 2025
0770673
fix constants
david-zlai Apr 4, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,15 @@
**/.vscode/
**/__pycache__/
**/.DS_Store
api/python/ai/chronon/api/
api/python/ai/chronon/observability/
api/python/test/canary/compiled/
api/python/test/canary/production/
api/python/test/sample/production/
api/python/ai/chronon/api/
api/python/ai/chronon/observability/
api/python/ai/chronon/fetcher/
api/python/ai/chronon/hub/
api/python/ai/chronon/lineage/
api/python/ai/chronon/orchestration/
api/python/.coverage
api/python/htmlcov/
**/derby.log
Expand Down
9 changes: 4 additions & 5 deletions api/python/ai/chronon/cli/compile/compile_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@

import ai.chronon.cli.compile.parse_teams as teams
from ai.chronon.api.common.ttypes import ConfigType
from ai.chronon.api.ttypes import GroupBy, Join, StagingQuery, Team
from ai.chronon.api.ttypes import GroupBy, Join, Model, StagingQuery, Team
from ai.chronon.cli.compile.conf_validator import ConfValidator
from ai.chronon.cli.compile.display.compile_status import CompileStatus
from ai.chronon.cli.compile.display.compiled_obj import CompiledObj
from ai.chronon.cli.compile.serializer import file2thrift
from ai.chronon.cli.logger import get_logger, require
from ai.chronon.model import Model

logger = get_logger()

Expand Down Expand Up @@ -45,7 +44,7 @@ def __init__(self):
ConfigInfo(folder_name="models", cls=Model, config_type=ConfigType.MODEL),
]

self.compile_status = CompileStatus()
self.compile_status = CompileStatus(use_live=False)

self.existing_confs: Dict[Type, Dict[str, Any]] = {}
for config_info in self.config_infos:
Expand Down Expand Up @@ -147,7 +146,7 @@ def _parse_existing_confs(self, obj_class: type) -> Dict[str, object]:
name=obj.metaData.name,
obj=obj,
file=obj.metaData.sourceFile,
error=None,
errors=None,
obj_type=obj_class.__name__,
tjson=open(full_path).read(),
)
Expand All @@ -157,7 +156,7 @@ def _parse_existing_confs(self, obj_class: type) -> Dict[str, object]:
)

else:
logger.error(
logger.errors(
f"Parsed object from {full_path} has no metaData attribute"
)

Expand Down
50 changes: 23 additions & 27 deletions api/python/ai/chronon/cli/compile/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
class CompileResult:
config_info: ConfigInfo
obj_dict: Dict[str, Any]
error_dict: Dict[str, List[str]]
error_dict: Dict[str, List[BaseException]]


class Compiler:
Expand All @@ -27,19 +27,20 @@ def __init__(self, compile_context: CompileContext):
self.compile_context = compile_context

def compile(
self, compile_context: CompileContext
self
) -> Dict[ConfigType, CompileResult]:

config_infos = compile_context.config_infos
config_infos = self.compile_context.config_infos

compile_results = {}

for config_info in config_infos:

compile_results[config_info.config_type] = self._compile_class_configs(
configs = self._compile_class_configs(
config_info
)

compile_results[config_info.config_type] = configs

# check if staging_output_dir exists
staging_dir = self.compile_context.staging_output_dir()
if os.path.exists(staging_dir):
Expand All @@ -54,6 +55,10 @@ def compile(
"Happens when every chronon config fails to compile or when no chronon configs exist."
)

# TODO: temporarily just print out the final results of the compile until live fix is implemented:
# https://github.com/Textualize/rich/pull/3637
print(self.compile_context.compile_status.generate_update_display_text())

return compile_results

def _compile_class_configs(self, config_info: ConfigInfo) -> CompileResult:
Expand Down Expand Up @@ -83,7 +88,7 @@ def _compile_class_configs(self, config_info: ConfigInfo) -> CompileResult:
def _write_objects_in_folder(
self,
compiled_objects: List[ai.chronon.cli.compile.display.compiled_obj.CompiledObj],
) -> Tuple[Dict[str, Any], Dict[str, List[str]]]:
) -> Tuple[Dict[str, Any], Dict[str, List[BaseException]]]:

error_dict = {}
object_dict = {}
Expand All @@ -92,40 +97,31 @@ def _write_objects_in_folder(

if co.obj:

errors = self._write_object(co)

if errors:
error_dict[co.name] = errors
if co.errors:
error_dict[co.name] = co.errors

for error in errors:
print(f"\nError processing conf {co.name}: {error}")
for error in co.errors:
self.compile_context.compile_status.print_live_console(f"Error processing conf {co.name}: {error}")
traceback.print_exception(
type(error), error, error.__traceback__
)
print("\n")
type(error), error, error.__traceback__)

else:
self._write_object(co)
object_dict[co.name] = co.obj

else:
error_dict[co.file] = co.error
error_dict[co.file] = co.errors

print(f"\nError processing file {co.file}: {co.error}")
traceback.print_exception(
type(co.error), co.error, co.error.__traceback__
)
print("\n")
self.compile_context.compile_status.print_live_console(f"Error processing file {co.file}: {co.errors}")
for error in co.errors:
traceback.print_exception(
type(error), error, error.__traceback__)

return object_dict, error_dict

def _write_object(self, compiled_obj: CompiledObj) -> Optional[List[str]]:
def _write_object(self, compiled_obj: CompiledObj) -> Optional[List[BaseException]]:

obj = compiled_obj.obj

errors = self.compile_context.validator.validate_obj(obj)
if errors:
return errors

output_path = self.compile_context.staging_output_path(obj)

folder = os.path.dirname(output_path)
Expand Down
44 changes: 24 additions & 20 deletions api/python/ai/chronon/cli/compile/conf_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def can_skip_materialize(self, obj: object) -> List[str]:
)
return reasons

def validate_obj(self, obj: object) -> List[str]:
def validate_obj(self, obj: object) -> List[BaseException]:
"""
Validate Chronon API obj against other entities in the repo.

Expand Down Expand Up @@ -333,7 +333,7 @@ def safe_to_overwrite(self, obj: object) -> bool:

def _validate_derivations(
self, pre_derived_cols: List[str], derivations: List[Derivation]
) -> List[str]:
) -> List[BaseException]:
"""
Validate join/groupBy's derivation is defined correctly.

Expand Down Expand Up @@ -361,22 +361,23 @@ def _validate_derivations(
and derivation.expression not in ("ds", "ts")
):
errors.append(
"Incorrect derivation expression {}, expression not found in pre-derived columns {}".format(
ValueError("Incorrect derivation expression {}, expression not found in pre-derived columns {}"
.format(
derivation.expression, pre_derived_cols
)
))
)
if derivation.name != "*":
if derivation.name in derived_columns:
errors.append(
"Incorrect derivation name {} due to output column name conflict".format(
ValueError("Incorrect derivation name {} due to output column name conflict".format(
derivation.name
)
)
))
else:
derived_columns.add(derivation.name)
return errors

def _validate_join(self, join: Join) -> List[str]:
def _validate_join(self, join: Join) -> List[BaseException]:
"""
Validate join's status with materialized versions of group_bys
included by the join.
Expand Down Expand Up @@ -406,25 +407,25 @@ def _validate_join(self, join: Join) -> List[str]:
self._validate_group_by(group_by) for group_by in included_group_bys
]
errors += [
f"join {join.metaData.name}'s underlying {error}"
ValueError(f"join {join.metaData.name}'s underlying {error}")
for errors in group_by_errors
for error in errors
]
# Check if the production join is using non production groupBy
if join.metaData.production and non_prod_old_group_bys:
errors.append(
"join {} is production but includes the following non production group_bys: {}".format(
ValueError("join {} is production but includes the following non production group_bys: {}".format(
join.metaData.name, ", ".join(non_prod_old_group_bys)
)
)
))
# Check if the online join is using the offline groupBy
if join.metaData.online:
if offline_included_group_bys:
errors.append(
"join {} is online but includes the following offline group_bys: {}".format(
ValueError("join {} is online but includes the following offline group_bys: {}".format(
join.metaData.name, ", ".join(offline_included_group_bys)
)
)
))
# Only validate the join derivation when the underlying groupBy is valid
group_by_correct = all(not errors for errors in group_by_errors)
if join.derivations and group_by_correct:
Expand All @@ -438,7 +439,7 @@ def _validate_join(self, join: Join) -> List[str]:
errors.extend(self._validate_derivations(columns, join.derivations))
return errors

def _validate_group_by(self, group_by: GroupBy) -> List[str]:
def _validate_group_by(self, group_by: GroupBy) -> List[BaseException]:
"""
Validate group_by's status with materialized versions of joins
including the group_by.
Expand All @@ -465,26 +466,29 @@ def _validate_group_by(self, group_by: GroupBy) -> List[str]:
# batch features cannot contain hourly windows
if (no_topic and non_temporal) and has_hourly_windows:
errors.append(
f"group_by {group_by.metaData.name} is defined to be daily refreshed but contains hourly windows. "
ValueError(f"group_by {group_by.metaData.name} is defined to be daily refreshed but contains "
f"hourly windows. "
)
)

# group by that are marked explicitly offline should not be present in
# materialized online joins.
if group_by.metaData.online is False and online_joins:
errors.append(
"group_by {} is explicitly marked offline but included in "
ValueError("group_by {} is explicitly marked offline but included in "
"the following online joins: {}".format(
group_by.metaData.name, ", ".join(online_joins)
)
))
)
# group by that are marked explicitly non-production should not be
# present in materialized production joins.
if prod_joins:
if group_by.metaData.production is False:
errors.append(
"group_by {} is explicitly marked as non-production but included in the following production "
ValueError(
"group_by {} is explicitly marked as non-production but included in the following production "
"joins: {}".format(group_by.metaData.name, ", ".join(prod_joins))
)
))
# if the group by is included in any of materialized production join,
# set it to production in the materialized output.
else:
Expand All @@ -507,8 +511,8 @@ def _validate_group_by(self, group_by: GroupBy) -> List[str]:
and (src.events.query.timeColumn is None)
):
errors.append(
"Please set query.timeColumn for Cumulative Events Table: {}".format(
ValueError("Please set query.timeColumn for Cumulative Events Table: {}".format(
src.events.table
)
))
)
return errors
16 changes: 9 additions & 7 deletions api/python/ai/chronon/cli/compile/display/class_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class ClassTracker:
def __init__(self):
self.existing_objs: Dict[str, CompiledObj] = {} # name to obj
self.files_to_obj: Dict[str, List[Any]] = {}
self.files_to_errors: Dict[str, Exception] = {}
self.files_to_errors: Dict[str, List[Exception]] = {}
self.new_objs: Dict[str, CompiledObj] = {} # name to obj
self.diff_result = DiffResult()
self.deleted_names: List[str] = []
Expand All @@ -25,12 +25,12 @@ def add_existing(self, obj: CompiledObj) -> None:

def add(self, compiled: CompiledObj) -> None:

if compiled.error:
if compiled.errors:

if compiled.file not in self.files_to_errors:
self.files_to_errors[compiled.file] = []

self.files_to_errors[compiled.file].append(compiled.error)
self.files_to_errors[compiled.file].extend(compiled.errors)

else:
if compiled.file not in self.files_to_obj:
Expand Down Expand Up @@ -62,15 +62,17 @@ def _update_diff(self, compiled: CompiledObj) -> None:
self.diff_result.updated.append(compiled.name)

else:
self.diff_result.added.append(compiled.name)
if not compiled.errors:
self.diff_result.added.append(compiled.name)


def close(self) -> None:
self.closed = True
self.recent_file = None
self.deleted_names = list(self.existing_objs.keys() - self.new_objs.keys())

def to_status(self) -> Text:
text = Text()
text = Text(overflow="fold", no_wrap=False)

if self.existing_objs:
text.append(
Expand All @@ -92,12 +94,12 @@ def to_status(self) -> Text:
return text

def to_errors(self) -> Text:
text = Text()
text = Text(overflow="fold", no_wrap=False)

if self.files_to_errors:
for file, error in self.files_to_errors.items():
text.append(" ERROR ", style="bold red")
text.append(f"- {file}: {error[0]}\n")
text.append(f"- {file}: {str(error)}\n")

return text

Expand Down
Loading
Loading