-
Notifications
You must be signed in to change notification settings - Fork 296
[Experimental feature] enable orjson parser for whole project #2552
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+201
−2
Merged
Changes from all commits
Commits
Show all changes
17 commits
Select commit
Hold shift + click to select a range
612b52f
Add optional orjson parser for faster dbt manifest loading
corsettigyg 0c275df
Merge pull request #18 from corsettigyg/feat/orjson-manifest-parser
corsettigyg 934b348
Update cosmos/dbt/graph.py
corsettigyg fe2ee15
Update cosmos/dbt/graph.py
corsettigyg 083e445
Update tests/dbt/test_orjson_parser.py
corsettigyg f691e80
🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
pre-commit-ci[bot] 8e202da
Merge branch 'main' into main
corsettigyg b05c9b4
Merge branch 'main' into main
corsettigyg ce404bf
Merge branch 'main' into main
corsettigyg 08eb627
Merge branch 'main' into main
corsettigyg 13dcb08
Fix null manifest handling in orjson parser to preserve backward comp…
corsettigyg 32495a9
Apply suggestions from code review
corsettigyg c99b7ff
Merge branch 'main' into main
corsettigyg 22861df
Potential fix for pull request finding
corsettigyg 3661a41
🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
pre-commit-ci[bot] 8a6680a
Align orjson missing-dependency test with actual error message
corsettigyg 1040847
Merge branch 'main' into main
tatiana File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Some comments aren't visible on the classic Files Changed page.
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,140 @@ | ||
| """ | ||
| Unit tests for the experimental orjson parser feature. | ||
|
|
||
| Covers: | ||
| - Default setting value | ||
| - Error when orjson is enabled but not installed | ||
| - Standard json is used when setting is disabled | ||
| - orjson produces identical DbtGraph output to standard json | ||
| """ | ||
|
|
||
| from pathlib import Path | ||
| from unittest.mock import patch | ||
|
|
||
| import pytest | ||
|
|
||
| from cosmos import settings | ||
| from cosmos.config import ExecutionConfig, ProjectConfig, RenderConfig | ||
| from cosmos.dbt.graph import CosmosLoadDbtException, DbtGraph | ||
|
|
||
| SAMPLE_MANIFEST = Path(__file__).parent.parent / "sample/manifest.json" | ||
| DBT_PROJECTS_ROOT_DIR = Path(__file__).parent.parent.parent / "dev/dags/dbt" | ||
|
|
||
|
|
||
| def _make_dbt_graph(manifest_path: Path = SAMPLE_MANIFEST) -> DbtGraph: | ||
| return DbtGraph( | ||
| project=ProjectConfig(manifest_path=manifest_path, project_name="jaffle_shop"), | ||
| execution_config=ExecutionConfig(dbt_project_path=DBT_PROJECTS_ROOT_DIR / "jaffle_shop"), | ||
| render_config=RenderConfig(), | ||
| ) | ||
|
|
||
|
|
||
| class TestOrjsonParserSettings: | ||
| def test_orjson_disabled_by_default(self): | ||
| assert settings.enable_orjson_parser is False | ||
|
|
||
| @patch.object(settings, "enable_orjson_parser", True) | ||
| def test_orjson_setting_can_be_overridden(self): | ||
| assert settings.enable_orjson_parser is True | ||
|
|
||
|
|
||
| class TestOrjsonParserMissingDependency: | ||
| @patch.object(settings, "enable_orjson_parser", True) | ||
| @patch("cosmos.dbt.graph.orjson", None) | ||
| def test_raises_when_orjson_not_installed(self): | ||
| dbt_graph = _make_dbt_graph() | ||
|
|
||
| with pytest.raises(CosmosLoadDbtException) as exc_info: | ||
| dbt_graph.load_from_dbt_manifest() | ||
|
|
||
| error_msg = str(exc_info.value) | ||
| assert "orjson" in error_msg.lower() | ||
| assert "not installed" in error_msg.lower() | ||
| assert "pip install orjson" in error_msg | ||
|
|
||
|
corsettigyg marked this conversation as resolved.
|
||
| @patch.object(settings, "enable_orjson_parser", True) | ||
| @patch("cosmos.dbt.graph.orjson", None) | ||
| def test_load_manifest_from_file_raises_without_orjson(self, tmp_path): | ||
| manifest_file = tmp_path / "manifest.json" | ||
| manifest_file.write_text('{"nodes": {}, "sources": {}, "exposures": {}}') | ||
| dbt_graph = _make_dbt_graph() | ||
|
|
||
| with pytest.raises(CosmosLoadDbtException, match="pip install orjson"): | ||
| dbt_graph._load_manifest_from_file(manifest_file) | ||
|
|
||
|
|
||
| class TestOrjsonParserEquivalence: | ||
| """Verify orjson and standard json produce identical DbtGraph output.""" | ||
|
|
||
| @patch.object(settings, "enable_orjson_parser", False) | ||
| def test_standard_json_loads_manifest(self): | ||
| dbt_graph = _make_dbt_graph() | ||
| dbt_graph.load_from_dbt_manifest() | ||
|
|
||
| assert len(dbt_graph.nodes) > 0 | ||
|
|
||
| @pytest.mark.skipif( | ||
| not __import__("importlib").util.find_spec("orjson"), | ||
| reason="orjson not installed", | ||
| ) | ||
| def test_orjson_produces_same_nodes_as_standard_json(self): | ||
| graph_std = _make_dbt_graph() | ||
| with patch.object(settings, "enable_orjson_parser", False): | ||
| graph_std.load_from_dbt_manifest() | ||
|
|
||
| graph_orjson = _make_dbt_graph() | ||
| with patch.object(settings, "enable_orjson_parser", True): | ||
| graph_orjson.load_from_dbt_manifest() | ||
|
|
||
| assert graph_std.nodes.keys() == graph_orjson.nodes.keys() | ||
|
|
||
| for node_id in graph_std.nodes: | ||
| std_node = graph_std.nodes[node_id] | ||
| fast_node = graph_orjson.nodes[node_id] | ||
| assert std_node.unique_id == fast_node.unique_id | ||
| assert std_node.resource_type == fast_node.resource_type | ||
| assert std_node.depends_on == fast_node.depends_on | ||
| assert std_node.tags == fast_node.tags | ||
|
|
||
| @pytest.mark.skipif( | ||
| not __import__("importlib").util.find_spec("orjson"), | ||
| reason="orjson not installed", | ||
| ) | ||
| def test_load_manifest_from_file_returns_same_dict(self, tmp_path): | ||
| """_load_manifest_from_file returns the same structure regardless of parser.""" | ||
| import json | ||
|
|
||
| data = {"nodes": {"model.test.foo": {"resource_type": "model"}}, "sources": {}, "exposures": {}} | ||
| manifest_file = tmp_path / "manifest.json" | ||
| manifest_file.write_text(json.dumps(data)) | ||
|
|
||
| dbt_graph = _make_dbt_graph() | ||
|
|
||
| with patch.object(settings, "enable_orjson_parser", False): | ||
| result_std = dbt_graph._load_manifest_from_file(manifest_file) | ||
|
|
||
| with patch.object(settings, "enable_orjson_parser", True): | ||
| result_orjson = dbt_graph._load_manifest_from_file(manifest_file) | ||
|
|
||
| assert result_std == result_orjson | ||
|
corsettigyg marked this conversation as resolved.
|
||
|
|
||
| def test_load_from_dbt_manifest_handles_null_manifest_root_per_loader_contract(self, tmp_path): | ||
| """A manifest containing JSON ``null`` is treated as an empty dict (backward-compatible).""" | ||
| manifest_file = tmp_path / "manifest.json" | ||
| manifest_file.write_text("null") | ||
|
|
||
| dbt_graph = _make_dbt_graph(manifest_file) | ||
|
|
||
| with patch.object(settings, "enable_orjson_parser", False): | ||
| assert dbt_graph._load_manifest_from_file(manifest_file) == {} | ||
|
|
||
| def test_load_manifest_from_file_raises_on_invalid_root_type(self, tmp_path): | ||
| """Non-dict, non-null roots (e.g. JSON arrays) raise CosmosLoadDbtException.""" | ||
| manifest_file = tmp_path / "manifest.json" | ||
| manifest_file.write_text("[1, 2, 3]") | ||
|
|
||
| dbt_graph = _make_dbt_graph(manifest_file) | ||
|
|
||
| with patch.object(settings, "enable_orjson_parser", False): | ||
| with pytest.raises(CosmosLoadDbtException, match="expected top-level JSON object"): | ||
| dbt_graph._load_manifest_from_file(manifest_file) | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.