diff --git a/python/sourcedb/build.bxl b/python/sourcedb/build.bxl new file mode 100644 index 000000000..42182ca7f --- /dev/null +++ b/python/sourcedb/build.bxl @@ -0,0 +1,32 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under both the MIT license found in the +# LICENSE-MIT file in the root directory of this source tree and the Apache +# License, Version 2.0 found in the LICENSE-APACHE file in the root directory +# of this source tree. + +def _get_artifact(result: "bxl_build_result") -> "artifact": + # NOTE: the first artifact is always the source db json + # T124989384 will make this nicer + for artifact in result.artifacts(): + return artifact + fail("Sourcedb rule must have at least one artifact") + +# Build sourcedb for the given targets, and return a mapping from target names +# to the corresponding sourcedb JSON file location. +def do_build( + ctx: "bxl_ctx", + targets: ["configured_target_label"]) -> {"target_label": "artifact"}: + # Build sourcedbs of all targets + configured_sub_targets = [ + configured_sub_target(target, ["source-db-no-deps"]) + for target in targets + ] + build_results = ctx.build(configured_sub_targets) + + # Compute result dict + output = {} + for key, result in build_results.items(): + path = _get_artifact(result) + output[key.raw_target()] = path + return output diff --git a/python/sourcedb/classic.bxl b/python/sourcedb/classic.bxl new file mode 100644 index 000000000..dc892a119 --- /dev/null +++ b/python/sourcedb/classic.bxl @@ -0,0 +1,41 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under both the MIT license found in the +# LICENSE-MIT file in the root directory of this source tree and the Apache +# License, Version 2.0 found in the LICENSE-APACHE file in the root directory +# of this source tree. + +load(":build.bxl", "do_build") +load(":merge.bxl", "do_merge") +load(":query.bxl", "do_query") + +def _build_entry_point(ctx: "bxl_ctx") -> None: + query = ctx.cquery() + actions = ctx.bxl_actions.action_factory() + targets = do_query(ctx, query, actions, [query.eval(target) for target in ctx.cli_args.target]) + built_sourcedbs = do_build(ctx, targets) + + merged_sourcedb = do_merge( + ctx, + actions, + built_sourcedbs, + merger_target = "prelude//python/tools/sourcedb_merger:legacy_merge", + command_category = "pyre_legacy_merge_sourcedb", + ) + ctx.output.print_json({"db": merged_sourcedb.abs_path()}) + +build = bxl( + doc = """Build Python sourcedb for Pyre classic type checking server. + + It takes a list of target patterns (usually obtained from Pyre local configuration + file), and will build source-db for those targets. + """, + impl = _build_entry_point, + cli_args = { + "target": cli_args.list( + cli_args.string( + doc = "Target pattern to build a source db for", + ), + ), + }, +) diff --git a/python/sourcedb/code_navigation.bxl b/python/sourcedb/code_navigation.bxl new file mode 100644 index 000000000..85c70da2e --- /dev/null +++ b/python/sourcedb/code_navigation.bxl @@ -0,0 +1,44 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under both the MIT license found in the +# LICENSE-MIT file in the root directory of this source tree and the Apache +# License, Version 2.0 found in the LICENSE-APACHE file in the root directory +# of this source tree. + +load(":build.bxl", "do_build") +load(":merge.bxl", "do_merge") +load(":query.bxl", "do_query") + +def _build_entry_point(ctx: "bxl_ctx") -> None: + query = ctx.cquery() + actions = ctx.bxl_actions.action_factory() + root = ctx.root() + + sources = ["{}/{}".format(root, source) for source in ctx.cli_args.source] + targets = do_query(ctx, query, actions, query.owner(sources)) + built_sourcedbs = do_build(ctx, targets) + + merged_sourcedb = do_merge( + ctx, + actions, + built_sourcedbs, + merger_target = "prelude//python/tools/sourcedb_merger:merge", + command_category = "pyre_merge_sourcedb", + ) + ctx.output.print_json({"db": merged_sourcedb.abs_path()}) + +build = bxl( + doc = """Build Python sourcedb for Pyre code navigation server. + + It takes a list of file paths, and will find the owner targets for all + those files and build source-db for those owning targets. + """, + impl = _build_entry_point, + cli_args = { + "source": cli_args.list( + cli_args.string( + doc = "File to build a source db for (relative to source root)", + ), + ), + }, +) diff --git a/python/sourcedb/merge.bxl b/python/sourcedb/merge.bxl new file mode 100644 index 000000000..3412703ae --- /dev/null +++ b/python/sourcedb/merge.bxl @@ -0,0 +1,35 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under both the MIT license found in the +# LICENSE-MIT file in the root directory of this source tree and the Apache +# License, Version 2.0 found in the LICENSE-APACHE file in the root directory +# of this source tree. + +def do_merge( + ctx: "bxl_ctx", + actions: "actions", + built_sourcedbs: {"target_label": "artifact"}, + merger_target: str.type, + command_category: str.type) -> "ensured_artifact": + merger_input = actions.write_json("merge_input.json", built_sourcedbs) + merger_output = actions.declare_output("merged_db.json") + merger = ctx.analysis( + merger_target, + target_platform = "prelude//platforms:default", + ).providers()[RunInfo] + + # Ensure all artifacts so merger is guaranteed to see them + ctx.output.ensure_multiple(built_sourcedbs.values()) + + command = cmd_args(merger) + command.add(merger_input) + command.add("--root") + command.add(ctx.root()) + command.add("--output") + command.add(merger_output.as_output()) + + # Declare that the merger result depends on all sourcedbs + command.hidden(built_sourcedbs.values()) + + actions.run(command, category = command_category) + return ctx.output.ensure(merger_output) diff --git a/python/sourcedb/query.bxl b/python/sourcedb/query.bxl new file mode 100644 index 000000000..f858816cf --- /dev/null +++ b/python/sourcedb/query.bxl @@ -0,0 +1,100 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under both the MIT license found in the +# LICENSE-MIT file in the root directory of this source tree and the Apache +# License, Version 2.0 found in the LICENSE-APACHE file in the root directory +# of this source tree. + +load("@prelude//python:python.bzl", "PythonLibraryInfo", "PythonLibraryManifestsTSet") + +BUCK_PYTHON_RULE_KINDS = [ + "python_binary", + "python_library", + "python_test", +] +BUCK_PYTHON_RULE_KIND_QUERY = "|".join(BUCK_PYTHON_RULE_KINDS) + +def _filter_root_targets( + query: "cqueryctx", + target_patterns: "_iterable_of_target_pattern") -> "target_set": + # Find all Pure-Python targets + candidate_targets = target_set() + for pattern in target_patterns: + candidate_targets += query.kind( + BUCK_PYTHON_RULE_KIND_QUERY, + pattern, + ) + + # Don't check generated rules + filtered_targets = candidate_targets - query.attrfilter( + "labels", + "generated", + candidate_targets, + ) + + # Do include unittest sources, which are marked as generated + filtered_targets = filtered_targets + query.attrfilter( + "labels", + "unittest-library", + candidate_targets, + ) + + # Provide an opt-out label + filtered_targets = filtered_targets - query.attrfilter( + "labels", + "no_pyre", + candidate_targets, + ) + return filtered_targets + +def _get_python_library_manifest( + ctx: "bxl_ctx", + target: "target_node") -> [PythonLibraryManifestsTSet.type, None]: + providers = ctx.analysis(target).providers() + sub_target = providers[DefaultInfo].sub_targets.get("source-db-no-deps") + if sub_target == None: + return None + python_library_info = sub_target.get(PythonLibraryInfo) + if python_library_info == None: + return None + return python_library_info.manifests + +def _expand_and_filter_dependencies( + ctx: "bxl_ctx", + actions: "actions", + root_targets: "target_set") -> ["configured_target_label"]: + manifests_of_transitive_dependencies = actions.tset( + PythonLibraryManifestsTSet, + children = filter(None, [ + _get_python_library_manifest(ctx, target) + for target in root_targets + ]), + ) + return [ + manifest.label.configured_target() + for manifest in manifests_of_transitive_dependencies.traverse() + if manifest.src_types != None + ] + +def do_query( + ctx: "bxl_ctx", + query: "cqueryctx", + actions: "actions", + target_patterns: "_iterable_of_target_pattern") -> ["configured_target_label"]: + root_targets = _filter_root_targets(query, target_patterns) + return _expand_and_filter_dependencies(ctx, actions, root_targets) + +def _do_query_entry_point(ctx: "bxl_ctx") -> None: + targets = do_query(ctx, ctx.cquery(), ctx.bxl_actions.action_factory(), ctx.cli_args.target) + ctx.output.print_json([target.raw_target() for target in targets]) + +query = bxl( + doc = ( + "Expand target patterns and look for all targets in their transitive" + + "dependencies that will be built by Pyre." + ), + impl = _do_query_entry_point, + cli_args = { + "target": cli_args.list(cli_args.target_expr()), + }, +) diff --git a/python/tools/sourcedb_merger/TARGETS.v2 b/python/tools/sourcedb_merger/TARGETS.v2 new file mode 100644 index 000000000..b64e43e1e --- /dev/null +++ b/python/tools/sourcedb_merger/TARGETS.v2 @@ -0,0 +1,38 @@ +prelude = native + +prelude.python_bootstrap_library( + name = "library-internal", + srcs = [ + "inputs.py", + "legacy_outputs.py", + "outputs.py", + ], +) + +prelude.python_library( + name = "library", + srcs = [ + "inputs.py", + "legacy_outputs.py", + "outputs.py", + ], + visibility = ["PUBLIC"], +) + +prelude.python_bootstrap_binary( + name = "merge", + main = "merge.py", + deps = [ + ":library-internal", + ], + visibility = ["PUBLIC"], +) + +prelude.python_bootstrap_binary( + name = "legacy_merge", + main = "legacy_merge.py", + deps = [ + ":library-internal", + ], + visibility = ["PUBLIC"], +) diff --git a/python/tools/sourcedb_merger/inputs.py b/python/tools/sourcedb_merger/inputs.py new file mode 100644 index 000000000..90fbbe660 --- /dev/null +++ b/python/tools/sourcedb_merger/inputs.py @@ -0,0 +1,87 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under both the MIT license found in the +# LICENSE-MIT file in the root directory of this source tree and the Apache +# License, Version 2.0 found in the LICENSE-APACHE file in the root directory +# of this source tree. + +import dataclasses +import json +import pathlib +from typing import Dict, Iterable, Mapping + + +class BuildMapLoadError(Exception): + pass + + +@dataclasses.dataclass(frozen=True) +class Target: + name: str + + +@dataclasses.dataclass(frozen=True) +class PartialBuildMap: + content: Mapping[str, str] = dataclasses.field(default_factory=dict) + + @staticmethod + def load_from_json(input_json: object) -> "PartialBuildMap": + if not isinstance(input_json, dict): + raise BuildMapLoadError( + "Input JSON for build map should be a dict." + f"Got {type(input_json)} instead" + ) + result: Dict[str, str] = {} + for key, value in input_json.items(): + if not isinstance(key, str): + raise BuildMapLoadError( + f"Build map keys are expected to be strings. Got `{key}`." + ) + if not isinstance(value, str): + raise BuildMapLoadError( + f"Build map values are expected to be strings. Got `{value}`." + ) + if pathlib.Path(key).suffix not in (".py", ".pyi"): + continue + result[key] = value + return PartialBuildMap(result) + + @staticmethod + def load_from_path(input_path: pathlib.Path) -> "PartialBuildMap": + with open(input_path, "r") as input_file: + return PartialBuildMap.load_from_json(json.load(input_file)) + + +@dataclasses.dataclass(frozen=True) +class TargetEntry: + target: Target + build_map: PartialBuildMap + + +def load_targets_and_build_maps_from_json( + buck_root: pathlib.Path, input_json: object +) -> Iterable[TargetEntry]: + if not isinstance(input_json, dict): + raise BuildMapLoadError( + f"Input JSON should be a dict. Got {type(input_json)} instead" + ) + for key, value in input_json.items(): + if not isinstance(key, str): + raise BuildMapLoadError( + f"Target keys are expected to be strings. Got `{key}`." + ) + if not isinstance(value, str): + raise BuildMapLoadError( + f"Sourcedb file paths are expected to be strings. Got `{value}`." + ) + yield TargetEntry( + target=Target(key), + build_map=PartialBuildMap.load_from_path(buck_root / value), + ) + + +def load_targets_and_build_maps_from_path( + buck_root: pathlib.Path, input_path: str +) -> Iterable[TargetEntry]: + with open(buck_root / input_path, "r") as input_file: + return load_targets_and_build_maps_from_json(buck_root, json.load(input_file)) diff --git a/python/tools/sourcedb_merger/legacy_merge.py b/python/tools/sourcedb_merger/legacy_merge.py new file mode 100644 index 000000000..aa1c890ba --- /dev/null +++ b/python/tools/sourcedb_merger/legacy_merge.py @@ -0,0 +1,36 @@ +#!/usr/bin/env fbpython +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under both the MIT license found in the +# LICENSE-MIT file in the root directory of this source tree and the Apache +# License, Version 2.0 found in the LICENSE-APACHE file in the root directory +# of this source tree. + +import argparse +import pathlib +import sys + +from typing import Sequence + +# pyre-ignore[21]: This import path only exists in BUCK: +from __legacy_merge__ import inputs, legacy_outputs + + +def run_merge(root: pathlib.Path, input_file: str, output_file: str) -> None: + target_entries = inputs.load_targets_and_build_maps_from_path(root, input_file) + merge_result = legacy_outputs.merge_partial_build_maps(target_entries) + merge_result.write_json_file(root / output_file) + + +def main(argv: Sequence[str]) -> None: + parser = argparse.ArgumentParser() + parser.add_argument("input", type=str) + parser.add_argument("-r", "--root", required=True, type=pathlib.Path) + parser.add_argument("-o", "--output", required=True, type=str) + arguments = parser.parse_args(argv[1:]) + + run_merge(arguments.root, arguments.input, arguments.output) + + +if __name__ == "__main__": + main(sys.argv) diff --git a/python/tools/sourcedb_merger/legacy_outputs.py b/python/tools/sourcedb_merger/legacy_outputs.py new file mode 100644 index 000000000..54b21c6e0 --- /dev/null +++ b/python/tools/sourcedb_merger/legacy_outputs.py @@ -0,0 +1,143 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under both the MIT license found in the +# LICENSE-MIT file in the root directory of this source tree and the Apache +# License, Version 2.0 found in the LICENSE-APACHE file in the root directory +# of this source tree. + +import dataclasses +import json +import pathlib +from typing import Dict, Iterable, Mapping, Optional, Set + +from . import inputs, outputs + + +@dataclasses.dataclass(frozen=True) +class ConflictInfo: + conflict_with: inputs.Target + artifact_path: str + preserved_source_path: str + dropped_source_path: str + + def to_json(self) -> Dict[str, str]: + return { + "conflict_with": self.conflict_with.name, + "artifact_path": self.artifact_path, + "preserved_source_path": self.preserved_source_path, + "dropped_source_path": self.dropped_source_path, + } + + +@dataclasses.dataclass(frozen=True) +class FullBuildMap: + content: Mapping[str, outputs.SourceInfo] = dataclasses.field(default_factory=dict) + + def get_all_targets(self) -> Set[inputs.Target]: + return {source_info.target for _, source_info in self.content.items()} + + def to_json(self) -> Dict[str, str]: + return { + artifact_path: source_info.source_path + for artifact_path, source_info in self.content.items() + } + + +@dataclasses.dataclass(frozen=True) +class ConflictMap: + content: Mapping[inputs.Target, ConflictInfo] = dataclasses.field( + default_factory=dict + ) + + def to_json(self) -> Dict[str, Dict[str, str]]: + return { + target.name: conflict_info.to_json() + for target, conflict_info in self.content.items() + } + + +@dataclasses.dataclass(frozen=True) +class MergeResult: + build_map: FullBuildMap + dropped_targets: ConflictMap + + def to_json(self) -> Dict[str, object]: + return { + "build_map": self.build_map.to_json(), + "built_targets_count": len( + [target.name for target in self.build_map.get_all_targets()] + ), + "dropped_targets": self.dropped_targets.to_json(), + } + + def write_json_file(self, path: pathlib.Path) -> None: + with open(path, "w") as output_file: + json.dump(self.to_json(), output_file, indent=2) + + +def detect_conflict( + build_map: Mapping[str, outputs.SourceInfo], + target: inputs.Target, + merge_candidate: Mapping[str, str], +) -> Optional[ConflictInfo]: + for artifact_path, source_path in merge_candidate.items(): + existing_source_info = build_map.get(artifact_path, None) + if ( + existing_source_info is not None + and source_path != existing_source_info.source_path + ): + return ConflictInfo( + conflict_with=existing_source_info.target, + artifact_path=artifact_path, + preserved_source_path=existing_source_info.source_path, + dropped_source_path=source_path, + ) + return None + + +def insert_build_map_inplace( + build_map: Dict[str, outputs.SourceInfo], + target: inputs.Target, + merge_candidate: Mapping[str, str], +) -> None: + for artifact_path, source_path in merge_candidate.items(): + build_map.setdefault( + artifact_path, outputs.SourceInfo(source_path=source_path, target=target) + ) + + +def merge_partial_build_map_inplace( + build_map: Dict[str, outputs.SourceInfo], + dropped_targets: Dict[inputs.Target, ConflictInfo], + target_entry: inputs.TargetEntry, +) -> None: + target = target_entry.target + filtered_mappings = { + artifact_path: source_path + for artifact_path, source_path in target_entry.build_map.content.items() + if artifact_path + not in ( + "__manifest__.py", + "__test_main__.py", + "__test_modules__.py", + ) + } + conflict = detect_conflict(build_map, target, filtered_mappings) + if conflict is not None: + dropped_targets[target_entry.target] = conflict + else: + insert_build_map_inplace(build_map, target, filtered_mappings) + + +def merge_partial_build_maps( + target_entries: Iterable[inputs.TargetEntry], +) -> MergeResult: + build_map: Dict[str, outputs.SourceInfo] = {} + dropped_targets: Dict[inputs.Target, ConflictInfo] = {} + for target_entry in sorted(target_entries, key=lambda entry: entry.target.name): + merge_partial_build_map_inplace( + build_map, + dropped_targets, + target_entry, + ) + return MergeResult(FullBuildMap(build_map), ConflictMap(dropped_targets)) diff --git a/python/tools/sourcedb_merger/merge.py b/python/tools/sourcedb_merger/merge.py new file mode 100644 index 000000000..d197fd40d --- /dev/null +++ b/python/tools/sourcedb_merger/merge.py @@ -0,0 +1,35 @@ +#!/usr/bin/env fbpython +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under both the MIT license found in the +# LICENSE-MIT file in the root directory of this source tree and the Apache +# License, Version 2.0 found in the LICENSE-APACHE file in the root directory +# of this source tree. + +import argparse +import pathlib +import sys + +from typing import Sequence + +from . import inputs, outputs + + +def run_merge(root: pathlib.Path, input_file: str, output_file: str) -> None: + target_entries = inputs.load_targets_and_build_maps_from_path(root, input_file) + merged_build_map = outputs.merge_partial_build_maps(target_entries) + merged_build_map.write_build_map_json_file(root / output_file) + + +def main(argv: Sequence[str]) -> None: + parser = argparse.ArgumentParser() + parser.add_argument("input", type=str) + parser.add_argument("-r", "--root", required=True, type=pathlib.Path) + parser.add_argument("-o", "--output", required=True, type=str) + arguments = parser.parse_args(argv[1:]) + + run_merge(arguments.root, arguments.input, arguments.output) + + +if __name__ == "__main__": + main(sys.argv) diff --git a/python/tools/sourcedb_merger/outputs.py b/python/tools/sourcedb_merger/outputs.py new file mode 100644 index 000000000..066d402c1 --- /dev/null +++ b/python/tools/sourcedb_merger/outputs.py @@ -0,0 +1,54 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under both the MIT license found in the +# LICENSE-MIT file in the root directory of this source tree and the Apache +# License, Version 2.0 found in the LICENSE-APACHE file in the root directory +# of this source tree. + +import dataclasses +import json +import pathlib +from typing import Dict, Iterable, Mapping + +from . import inputs + + +@dataclasses.dataclass(frozen=True) +class SourceInfo: + source_path: str + target: inputs.Target + + +@dataclasses.dataclass(frozen=True) +class FullBuildMap: + content: Mapping[str, SourceInfo] = dataclasses.field(default_factory=dict) + + def to_build_map_json(self) -> Dict[str, str]: + return { + artifact_path: source_info.source_path + for artifact_path, source_info in self.content.items() + } + + def write_build_map_json_file(self, path: pathlib.Path) -> None: + with open(path, "w") as output_file: + json.dump(self.to_build_map_json(), output_file, indent=2) + + +def merge_partial_build_map_inplace( + sofar: Dict[str, SourceInfo], + target_entry: inputs.TargetEntry, +) -> None: + for artifact_path, source_path in target_entry.build_map.content.items(): + sofar.setdefault( + artifact_path, + SourceInfo(source_path=source_path, target=target_entry.target), + ) + + +def merge_partial_build_maps( + target_entries: Iterable[inputs.TargetEntry], +) -> FullBuildMap: + result: Dict[str, SourceInfo] = {} + for target_entry in target_entries: + merge_partial_build_map_inplace(result, target_entry) + return FullBuildMap(result)