From 3dfdcfdd94743a81af908e149bb24d08eb728746 Mon Sep 17 00:00:00 2001 From: Harvey Tuch Date: Mon, 30 Mar 2020 16:32:17 -0400 Subject: [PATCH 1/3] tools: refactor protoxform to distinct transform/pretty-print stages. This patch moves protoxform to a protoc plugin that is solely in the business of transforming FileDescriptorProtos to FileDescriptorProtos, taking into account package status and performing vN -> v(N+1) migrations. A distinct tool, protoprint.py, which has the guts of the former protoxform.py (basically no changes), is then invoked by proto_sync.py during source tree copy-back. The rationale for this is split is that proto_sync.py needs to be able to combine both the v2 next major version candidate and the hand edited v3 protos when generating the v3 shadow, once v3 becomes canonical. This is easiest to do when we operate on FileDescriptorProtos. Risk level: Low Testing: proto_format.sh (no changes). Signed-off-by: Harvey Tuch --- tools/proto_format/proto_format.sh | 2 + tools/proto_format/proto_sync.py | 35 +- tools/protoxform/BUILD | 23 +- tools/protoxform/protoprint.py | 595 ++++++++++++++++++ tools/protoxform/protoxform.py | 572 +---------------- tools/protoxform/protoxform_test.sh | 2 + tools/protoxform/protoxform_test_helper.py | 38 +- .../v2/discovery_service.proto.active.gold | 2 +- ...ajor_version_candidate.envoy_internal.gold | 2 +- ...ce.proto.next_major_version_candidate.gold | 2 +- 10 files changed, 692 insertions(+), 581 deletions(-) create mode 100755 tools/protoxform/protoprint.py diff --git a/tools/proto_format/proto_format.sh b/tools/proto_format/proto_format.sh index 8c3a58fc74a99..f45c4c45e99de 100755 --- a/tools/proto_format/proto_format.sh +++ b/tools/proto_format/proto_format.sh @@ -38,6 +38,8 @@ bazel build ${BAZEL_BUILD_OPTIONS} --//tools/api_proto_plugin:default_type_db_ta TOOLS=$(dirname $(dirname $(realpath $0))) # to satisfy dependency on api_proto_plugin export PYTHONPATH="$TOOLS" +# Build protoprint for use in proto_sync.py. +bazel build ${BAZEL_BUILD_OPTIONS} //tools/protoxform:protoprint ./tools/proto_format/proto_sync.py "--mode=$1" ${PROTO_TARGETS} bazel build ${BAZEL_BUILD_OPTIONS} //tools/type_whisperer:api_build_file diff --git a/tools/proto_format/proto_sync.py b/tools/proto_format/proto_sync.py index 79dc4b25251e8..d1e9cf9d8a535 100755 --- a/tools/proto_format/proto_sync.py +++ b/tools/proto_format/proto_sync.py @@ -3,6 +3,8 @@ # Diff or copy protoxform artifacts from Bazel cache back to the source tree. import argparse +import functools +import multiprocessing as mp import os import pathlib import re @@ -46,7 +48,7 @@ IMPORT_REGEX = re.compile('import "(.*)";') SERVICE_REGEX = re.compile('service \w+ {') -PACKAGE_REGEX = re.compile('\npackage ([^="]*);') +PACKAGE_REGEX = re.compile('\npackage: "([^"]*)"') PREVIOUS_MESSAGE_TYPE_REGEX = re.compile(r'previous_message_type\s+=\s+"([^"]*)";') @@ -87,7 +89,22 @@ def GetDestinationPath(src): matches[0])).joinpath(src_path.name.split('.')[0] + ".proto") -def SyncProtoFile(cmd, src, dst_root): +def ProtoPrint(src, dst): + """Pretty-print FileDescriptorProto to a destination file. + + Args: + src: source path for FileDescriptorProto. + dst: destination path for formatted proto. + """ + print('ProtoPrint %s' % dst) + contents = subprocess.check_output([ + 'bazel-bin/tools/protoxform/protoprint', src, + './bazel-bin/tools/protoxform/protoprint.runfiles/envoy/tools/type_whisperer/api_type_db.pb_text' + ]) + dst.write_bytes(contents) + + +def SyncProtoFile(cmd, dst_root, src): """Diff or in-place update a single proto file from protoxform.py Bazel cache artifacts." Args: @@ -100,7 +117,7 @@ def SyncProtoFile(cmd, src, dst_root): rel_dst_path = GetDestinationPath(src) dst = dst_root.joinpath(rel_dst_path) dst.parent.mkdir(0o755, True, True) - shutil.copyfile(src, str(dst)) + ProtoPrint(src, dst) return ['//%s:pkg' % str(rel_dst_path.parent)] @@ -250,17 +267,17 @@ def GitStatus(path): def Sync(api_root, mode, labels, shadow): - pkg_deps = [] with tempfile.TemporaryDirectory() as tmp: dst_dir = pathlib.Path(tmp).joinpath("b") + paths = [] for label in labels: - pkg_deps += SyncProtoFile(mode, utils.BazelBinPathForOutputArtifact(label, '.active.proto'), - dst_dir) - pkg_deps += SyncProtoFile( - mode, + paths.append(utils.BazelBinPathForOutputArtifact(label, '.active.proto')) + paths.append( utils.BazelBinPathForOutputArtifact( label, '.next_major_version_candidate.envoy_internal.proto' - if shadow else '.next_major_version_candidate.proto'), dst_dir) + if shadow else '.next_major_version_candidate.proto')) + with mp.Pool() as p: + pkg_deps = p.map(functools.partial(SyncProtoFile, mode, dst_dir), paths) SyncBuildFiles(mode, dst_dir) current_api_dir = pathlib.Path(tmp).joinpath("a") diff --git a/tools/protoxform/BUILD b/tools/protoxform/BUILD index 268579f06be62..dec0d2e9b90f8 100644 --- a/tools/protoxform/BUILD +++ b/tools/protoxform/BUILD @@ -8,11 +8,32 @@ py_binary( "protoxform.py", "utils.py", ], - data = ["//:.clang-format"], python_version = "PY3", visibility = ["//visibility:public"], deps = [ "//tools/api_proto_plugin", + "//tools/type_whisperer:api_type_db_proto_py_proto", + "@com_envoyproxy_protoc_gen_validate//validate:validate_py", + "@com_github_cncf_udpa//udpa/annotations:pkg_py_proto", + "@com_google_googleapis//google/api:annotations_py_proto", + "@envoy_api_canonical//envoy/annotations:pkg_py_proto", + ], +) + +py_binary( + name = "protoprint", + srcs = [ + "options.py", + "protoprint.py", + "utils.py", + ], + data = [ + "//:.clang-format", + "//tools/type_whisperer:api_type_db.pb_text", + ], + python_version = "PY3", + visibility = ["//visibility:public"], + deps = [ "//tools/type_whisperer", "//tools/type_whisperer:api_type_db_proto_py_proto", "@com_envoyproxy_protoc_gen_validate//validate:validate_py", diff --git a/tools/protoxform/protoprint.py b/tools/protoxform/protoprint.py new file mode 100755 index 0000000000000..02d2658075de4 --- /dev/null +++ b/tools/protoxform/protoprint.py @@ -0,0 +1,595 @@ +# FileDescriptorProtos pretty-printer tool. +# +# protoprint.py provides the canonical .proto formatting for the Envoy APIs. +# +# See https://github.com/google/protobuf/blob/master/src/google/protobuf/descriptor.proto +# for the underlying protos mentioned in this file. +# +# Usage: protoprint.py + +from collections import deque +import functools +import io +import os +import pathlib +import re +import subprocess +import sys + +from tools.api_proto_plugin import annotations +from tools.api_proto_plugin import plugin +from tools.api_proto_plugin import traverse +from tools.api_proto_plugin import visitor +from tools.protoxform import options as protoxform_options +from tools.protoxform import utils +from tools.type_whisperer import type_whisperer +from tools.type_whisperer.types_pb2 import Types + +from google.protobuf import descriptor_pb2 +from google.protobuf import text_format + +# Note: we have to include those proto definitions to make FormatOptions work, +# this also serves as whitelist of extended options. +from google.api import annotations_pb2 as _ +from validate import validate_pb2 as _ +from envoy.annotations import deprecation_pb2 as _ +from envoy.annotations import resource_pb2 +from udpa.annotations import migrate_pb2 +from udpa.annotations import sensitive_pb2 as _ +from udpa.annotations import status_pb2 + +NEXT_FREE_FIELD_MIN = 5 + + +class ProtoXformError(Exception): + """Base error class for the protoxform module.""" + + +def ExtractClangProtoStyle(clang_format_text): + """Extract a key:value dictionary for proto formatting. + + Args: + clang_format_text: text from a .clang-format file. + + Returns: + key:value dictionary suitable for passing to clang-format --style. + """ + lang = None + format_dict = {} + for line in clang_format_text.split('\n'): + if lang is None or lang != 'Proto': + match = re.match('Language:\s+(\w+)', line) + if match: + lang = match.group(1) + continue + match = re.match('(\w+):\s+(\w+)', line) + if match: + key, value = match.groups() + format_dict[key] = value + else: + break + return str(format_dict) + + +# Ensure we are using the canonical clang-format proto style. +CLANG_FORMAT_STYLE = ExtractClangProtoStyle(pathlib.Path('.clang-format').read_text()) + + +def ClangFormat(contents): + """Run proto-style oriented clang-format over given string. + + Args: + contents: a string with proto contents. + + Returns: + clang-formatted string + """ + return subprocess.run( + ['clang-format', + '--style=%s' % CLANG_FORMAT_STYLE, '--assume-filename=.proto'], + input=contents.encode('utf-8'), + stdout=subprocess.PIPE).stdout + + +def FormatBlock(block): + """Append \n to a .proto section (e.g. + + comment, message definition, etc.) if non-empty. + + Args: + block: a string representing the section. + + Returns: + A string with appropriate whitespace. + """ + if block.strip(): + return block + '\n' + return '' + + +def FormatComments(comments): + """Format a list of comment blocks from SourceCodeInfo. + + Prefixes // to each line, separates blocks by spaces. + + Args: + comments: a list of blocks, each block is a list of strings representing + lines in each block. + + Returns: + A string reprenting the formatted comment blocks. + """ + + # TODO(htuch): not sure why this is needed, but clang-format does some weird + # stuff with // comment indents when we have these trailing \ + def FixupTrailingBackslash(s): + return s[:-1].rstrip() if s.endswith('\\') else s + + comments = '\n\n'.join( + '\n'.join(['//%s' % FixupTrailingBackslash(line) + for line in comment.split('\n')[:-1]]) + for comment in comments) + return FormatBlock(comments) + + +def CreateNextFreeFieldXform(msg_proto): + """Return the next free field number annotation transformer of a message. + + Args: + msg_proto: DescriptorProto for message. + + Returns: + the next free field number annotation transformer. + """ + next_free = max( + sum([ + [f.number + 1 for f in msg_proto.field], + [rr.end for rr in msg_proto.reserved_range], + [ex.end for ex in msg_proto.extension_range], + ], [1])) + return lambda _: next_free if next_free > NEXT_FREE_FIELD_MIN else None + + +def FormatTypeContextComments(type_context, annotation_xforms=None): + """Format the leading/trailing comments in a given TypeContext. + + Args: + type_context: contextual information for message/enum/field. + annotation_xforms: a dict of transformers for annotations in leading + comment. + + Returns: + Tuple of formatted leading and trailing comment blocks. + """ + leading_comment = type_context.leading_comment + if annotation_xforms: + leading_comment = leading_comment.getCommentWithTransforms(annotation_xforms) + leading = FormatComments(list(type_context.leading_detached_comments) + [leading_comment.raw]) + trailing = FormatBlock(FormatComments([type_context.trailing_comment])) + return leading, trailing + + +def FormatHeaderFromFile(source_code_info, file_proto): + """Format proto header. + + Args: + source_code_info: SourceCodeInfo object. + file_proto: FileDescriptorProto for file. + + Returns: + Formatted proto header as a string. + """ + # Load the type database. + typedb = utils.GetTypeDb() + # Figure out type dependencies in this .proto. + types = Types() + text_format.Merge(traverse.TraverseFile(file_proto, type_whisperer.TypeWhispererVisitor()), types) + type_dependencies = sum([list(t.type_dependencies) for t in types.types.values()], []) + for service in file_proto.service: + for m in service.method: + type_dependencies.extend([m.input_type[1:], m.output_type[1:]]) + # Determine the envoy/ import paths from type deps. + envoy_proto_paths = set( + typedb.types[t].proto_path + for t in type_dependencies + if t.startswith('envoy.') and typedb.types[t].proto_path != file_proto.name) + + def CamelCase(s): + return ''.join(t.capitalize() for t in re.split('[\._]', s)) + + package_line = 'package %s;\n' % file_proto.package + file_block = '\n'.join(['syntax = "proto3";\n', package_line]) + + options = descriptor_pb2.FileOptions() + options.java_outer_classname = CamelCase(os.path.basename(file_proto.name)) + options.java_multiple_files = True + options.java_package = 'io.envoyproxy.' + file_proto.package + + # This is a workaround for C#/Ruby namespace conflicts between packages and + # objects, see https://github.com/envoyproxy/envoy/pull/3854. + # TODO(htuch): remove once v3 fixes this naming issue in + # https://github.com/envoyproxy/envoy/issues/8120. + if file_proto.package in ['envoy.api.v2.listener', 'envoy.api.v2.cluster']: + qualified_package = '.'.join(s.capitalize() for s in file_proto.package.split('.')) + 'NS' + options.csharp_namespace = qualified_package + options.ruby_package = qualified_package + + if file_proto.service: + options.java_generic_services = True + + if file_proto.options.HasExtension(migrate_pb2.file_migrate): + options.Extensions[migrate_pb2.file_migrate].CopyFrom( + file_proto.options.Extensions[migrate_pb2.file_migrate]) + + if file_proto.options.HasExtension( + status_pb2.file_status) and file_proto.package.endswith('alpha'): + options.Extensions[status_pb2.file_status].CopyFrom( + file_proto.options.Extensions[status_pb2.file_status]) + + options.Extensions[status_pb2.file_status].package_version_status = file_proto.options.Extensions[ + status_pb2.file_status].package_version_status + + options_block = FormatOptions(options) + + requires_versioning_import = any( + protoxform_options.GetVersioningAnnotation(m.options) for m in file_proto.message_type) + + envoy_imports = list(envoy_proto_paths) + google_imports = [] + infra_imports = [] + misc_imports = [] + public_imports = [] + + for idx, d in enumerate(file_proto.dependency): + if idx in file_proto.public_dependency: + public_imports.append(d) + continue + elif d.startswith('envoy/annotations') or d.startswith('udpa/annotations'): + infra_imports.append(d) + elif d.startswith('envoy/'): + # We ignore existing envoy/ imports, since these are computed explicitly + # from type_dependencies. + pass + elif d.startswith('google/'): + google_imports.append(d) + elif d.startswith('validate/'): + infra_imports.append(d) + elif d in ['udpa/annotations/versioning.proto', 'udpa/annotations/status.proto']: + # Skip, we decide to add this based on requires_versioning_import and options. + pass + else: + misc_imports.append(d) + + if options.HasExtension(status_pb2.file_status): + infra_imports.append('udpa/annotations/status.proto') + + if requires_versioning_import: + infra_imports.append('udpa/annotations/versioning.proto') + + def FormatImportBlock(xs): + if not xs: + return '' + return FormatBlock('\n'.join(sorted('import "%s";' % x for x in set(xs) if x))) + + def FormatPublicImportBlock(xs): + if not xs: + return '' + return FormatBlock('\n'.join(sorted('import public "%s";' % x for x in xs))) + + import_block = '\n'.join( + map(FormatImportBlock, [envoy_imports, google_imports, misc_imports, infra_imports])) + import_block += '\n' + FormatPublicImportBlock(public_imports) + comment_block = FormatComments(source_code_info.file_level_comments) + + return ''.join(map(FormatBlock, [file_block, import_block, options_block, comment_block])) + + +def NormalizeFieldTypeName(type_context, field_fqn): + """Normalize a fully qualified field type name, e.g. + + .envoy.foo.bar is normalized to foo.bar. + + Considers type context to minimize type prefix. + + Args: + field_fqn: a fully qualified type name from FieldDescriptorProto.type_name. + type_context: contextual information for message/enum/field. + + Returns: + Normalized type name as a string. + """ + if field_fqn.startswith('.'): + # Let's say we have type context namespace a.b.c.d.e and the type we're + # trying to normalize is a.b.d.e. We take (from the end) on package fragment + # at a time, and apply the inner-most evaluation that protoc performs to see + # if we evaluate to the fully qualified type. If so, we're done. It's not + # sufficient to compute common prefix and drop that, since in the above + # example the normalized type name would be d.e, which proto resolves inner + # most as a.b.c.d.e (bad) instead of the intended a.b.d.e. + field_fqn_splits = field_fqn[1:].split('.') + type_context_splits = type_context.name.split('.')[:-1] + remaining_field_fqn_splits = deque(field_fqn_splits[:-1]) + normalized_splits = deque([field_fqn_splits[-1]]) + + def EquivalentInTypeContext(splits): + type_context_splits_tmp = deque(type_context_splits) + while type_context_splits_tmp: + # If we're in a.b.c and the FQN is a.d.Foo, we want to return true once + # we have type_context_splits_tmp as [a] and splits as [d, Foo]. + if list(type_context_splits_tmp) + list(splits) == field_fqn_splits: + return True + # If we're in a.b.c.d.e.f and the FQN is a.b.d.e.Foo, we want to return True + # once we have type_context_splits_tmp as [a] and splits as [b, d, e, Foo], but + # not when type_context_splits_tmp is [a, b, c] and FQN is [d, e, Foo]. + if len(splits) > 1 and '.'.join(type_context_splits_tmp).endswith('.'.join( + list(splits)[:-1])): + return False + type_context_splits_tmp.pop() + return False + + while remaining_field_fqn_splits and not EquivalentInTypeContext(normalized_splits): + normalized_splits.appendleft(remaining_field_fqn_splits.pop()) + + # `extensions` is a keyword in proto2, and protoc will throw error if a type name + # starts with `extensions.`. + if normalized_splits[0] == 'extensions': + normalized_splits.appendleft(remaining_field_fqn_splits.pop()) + + return '.'.join(normalized_splits) + return field_fqn + + +def TypeNameFromFQN(fqn): + return fqn[1:] + + +def FormatFieldType(type_context, field): + """Format a FieldDescriptorProto type description. + + Args: + type_context: contextual information for message/enum/field. + field: FieldDescriptor proto. + + Returns: + Formatted proto field type as string. + """ + label = 'repeated ' if field.label == field.LABEL_REPEATED else '' + type_name = label + NormalizeFieldTypeName(type_context, field.type_name) + + if field.type == field.TYPE_MESSAGE: + if type_context.map_typenames and TypeNameFromFQN( + field.type_name) in type_context.map_typenames: + return 'map<%s, %s>' % tuple( + map(functools.partial(FormatFieldType, type_context), + type_context.map_typenames[TypeNameFromFQN(field.type_name)])) + return type_name + elif field.type_name: + return type_name + + pretty_type_names = { + field.TYPE_DOUBLE: 'double', + field.TYPE_FLOAT: 'float', + field.TYPE_INT32: 'int32', + field.TYPE_SFIXED32: 'int32', + field.TYPE_SINT32: 'int32', + field.TYPE_FIXED32: 'uint32', + field.TYPE_UINT32: 'uint32', + field.TYPE_INT64: 'int64', + field.TYPE_SFIXED64: 'int64', + field.TYPE_SINT64: 'int64', + field.TYPE_FIXED64: 'uint64', + field.TYPE_UINT64: 'uint64', + field.TYPE_BOOL: 'bool', + field.TYPE_STRING: 'string', + field.TYPE_BYTES: 'bytes', + } + if field.type in pretty_type_names: + return label + pretty_type_names[field.type] + raise ProtoXformError('Unknown field type ' + str(field.type)) + + +def FormatServiceMethod(type_context, method): + """Format a service MethodDescriptorProto. + + Args: + type_context: contextual information for method. + method: MethodDescriptorProto proto. + + Returns: + Formatted service method as string. + """ + + def FormatStreaming(s): + return 'stream ' if s else '' + + leading_comment, trailing_comment = FormatTypeContextComments(type_context) + return '%srpc %s(%s%s%s) returns (%s%s) {%s}\n' % ( + leading_comment, method.name, trailing_comment, FormatStreaming( + method.client_streaming), NormalizeFieldTypeName( + type_context, method.input_type), FormatStreaming(method.server_streaming), + NormalizeFieldTypeName(type_context, method.output_type), FormatOptions(method.options)) + + +def FormatField(type_context, field): + """Format FieldDescriptorProto as a proto field. + + Args: + type_context: contextual information for message/enum/field. + field: FieldDescriptor proto. + + Returns: + Formatted proto field as a string. + """ + if protoxform_options.HasHideOption(field.options): + return '' + leading_comment, trailing_comment = FormatTypeContextComments(type_context) + + return '%s%s %s = %d%s;\n%s' % (leading_comment, FormatFieldType(type_context, field), field.name, + field.number, FormatOptions(field.options), trailing_comment) + + +def FormatEnumValue(type_context, value): + """Format a EnumValueDescriptorProto as a proto enum value. + + Args: + type_context: contextual information for message/enum/field. + value: EnumValueDescriptorProto. + + Returns: + Formatted proto enum value as a string. + """ + if protoxform_options.HasHideOption(value.options): + return '' + leading_comment, trailing_comment = FormatTypeContextComments(type_context) + formatted_annotations = FormatOptions(value.options) + return '%s%s = %d%s;\n%s' % (leading_comment, value.name, value.number, formatted_annotations, + trailing_comment) + + +def TextFormatValue(field, value): + """Format the value as protobuf text format + + Args: + field: a FieldDescriptor that describes the field + value: the value stored in the field + + Returns: + value in protobuf text format + """ + out = io.StringIO() + text_format.PrintFieldValue(field, value, out) + return out.getvalue() + + +def FormatOptions(options): + """Format *Options (e.g. + + MessageOptions, FieldOptions) message. + + Args: + options: A *Options (e.g. MessageOptions, FieldOptions) message. + + Returns: + Formatted options as a string. + """ + + formatted_options = [] + for option_descriptor, option_value in sorted(options.ListFields(), key=lambda x: x[0].number): + option_name = '({})'.format( + option_descriptor.full_name) if option_descriptor.is_extension else option_descriptor.name + if option_descriptor.message_type and option_descriptor.label != option_descriptor.LABEL_REPEATED: + formatted_options.extend([ + '{}.{} = {}'.format(option_name, subfield.name, TextFormatValue(subfield, value)) + for subfield, value in option_value.ListFields() + ]) + else: + formatted_options.append('{} = {}'.format(option_name, + TextFormatValue(option_descriptor, option_value))) + + if formatted_options: + if options.DESCRIPTOR.name in ('EnumValueOptions', 'FieldOptions'): + return '[{}]'.format(','.join(formatted_options)) + else: + return FormatBlock(''.join( + 'option {};\n'.format(formatted_option) for formatted_option in formatted_options)) + return '' + + +def FormatReserved(enum_or_msg_proto): + """Format reserved values/names in a [Enum]DescriptorProto. + + Args: + enum_or_msg_proto: [Enum]DescriptorProto message. + + Returns: + Formatted enum_or_msg_proto as a string. + """ + reserved_fields = FormatBlock('reserved %s;\n' % ','.join( + map(str, sum([list(range(rr.start, rr.end)) for rr in enum_or_msg_proto.reserved_range], + [])))) if enum_or_msg_proto.reserved_range else '' + if enum_or_msg_proto.reserved_name: + reserved_fields += FormatBlock('reserved %s;\n' % + ', '.join('"%s"' % n for n in enum_or_msg_proto.reserved_name)) + return reserved_fields + + +class ProtoFormatVisitor(visitor.Visitor): + """Visitor to generate a proto representation from a FileDescriptor proto. + + See visitor.Visitor for visitor method docs comments. + """ + + def VisitService(self, service_proto, type_context): + leading_comment, trailing_comment = FormatTypeContextComments(type_context) + methods = '\n'.join( + FormatServiceMethod(type_context.ExtendMethod(index, m.name), m) + for index, m in enumerate(service_proto.method)) + options = FormatBlock(FormatOptions(service_proto.options)) + return '%sservice %s {\n%s%s%s\n}\n' % (leading_comment, service_proto.name, options, + trailing_comment, methods) + + def VisitEnum(self, enum_proto, type_context): + leading_comment, trailing_comment = FormatTypeContextComments(type_context) + formatted_options = FormatOptions(enum_proto.options) + reserved_fields = FormatReserved(enum_proto) + values = [ + FormatEnumValue(type_context.ExtendField(index, value.name), value) + for index, value in enumerate(enum_proto.value) + ] + joined_values = ('\n' if any('//' in v for v in values) else '').join(values) + return '%senum %s {\n%s%s%s%s\n}\n' % (leading_comment, enum_proto.name, trailing_comment, + formatted_options, reserved_fields, joined_values) + + def VisitMessage(self, msg_proto, type_context, nested_msgs, nested_enums): + # Skip messages synthesized to represent map types. + if msg_proto.options.map_entry: + return '' + if protoxform_options.HasHideOption(msg_proto.options): + return '' + annotation_xforms = { + annotations.NEXT_FREE_FIELD_ANNOTATION: CreateNextFreeFieldXform(msg_proto) + } + leading_comment, trailing_comment = FormatTypeContextComments(type_context, annotation_xforms) + formatted_options = FormatOptions(msg_proto.options) + formatted_enums = FormatBlock('\n'.join(nested_enums)) + formatted_msgs = FormatBlock('\n'.join(nested_msgs)) + reserved_fields = FormatReserved(msg_proto) + # Recover the oneof structure. This needs some extra work, since + # DescriptorProto just gives use fields and a oneof_index that can allow + # recovery of the original oneof placement. + fields = '' + oneof_index = None + for index, field in enumerate(msg_proto.field): + if oneof_index is not None: + if not field.HasField('oneof_index') or field.oneof_index != oneof_index: + fields += '}\n\n' + oneof_index = None + if oneof_index is None and field.HasField('oneof_index'): + oneof_index = field.oneof_index + oneof_proto = msg_proto.oneof_decl[oneof_index] + oneof_leading_comment, oneof_trailing_comment = FormatTypeContextComments( + type_context.ExtendOneof(oneof_index, field.name)) + fields += '%soneof %s {\n%s%s' % (oneof_leading_comment, oneof_proto.name, + oneof_trailing_comment, FormatOptions( + oneof_proto.options)) + fields += FormatBlock(FormatField(type_context.ExtendField(index, field.name), field)) + if oneof_index is not None: + fields += '}\n\n' + return '%smessage %s {\n%s%s%s%s%s%s\n}\n' % (leading_comment, msg_proto.name, trailing_comment, + formatted_options, formatted_enums, + formatted_msgs, reserved_fields, fields) + + def VisitFile(self, file_proto, type_context, services, msgs, enums): + header = FormatHeaderFromFile(type_context.source_code_info, file_proto) + formatted_services = FormatBlock('\n'.join(services)) + formatted_enums = FormatBlock('\n'.join(enums)) + formatted_msgs = FormatBlock('\n'.join(msgs)) + return ClangFormat(header + formatted_services + formatted_enums + formatted_msgs) + + +if __name__ == '__main__': + proto_desc_path = sys.argv[1] + file_proto = descriptor_pb2.FileDescriptorProto() + text_format.Merge(pathlib.Path(proto_desc_path).read_text(), file_proto) + utils.LoadTypeDb(sys.argv[2]) + sys.stdout.write(traverse.TraverseFile(file_proto, ProtoFormatVisitor()).decode()) diff --git a/tools/protoxform/protoxform.py b/tools/protoxform/protoxform.py index c207410c5992a..f99649cb8b0d5 100755 --- a/tools/protoxform/protoxform.py +++ b/tools/protoxform/protoxform.py @@ -1,37 +1,18 @@ -# protoc plugin to map from FileDescriptorProtos to a canonicaly formatted -# proto. +# protoc plugin to map from FileDescriptorProtos to intermediate form # -# protoxform is currently only a formatting tool, but will act as the basis for -# vN -> v(N+1) API migration tooling, allowing for things like deprecated field -# removal, package renaming, field movement, providing both .proto and .cc code -# generation to support automation of Envoy API version translation. -# -# See https://github.com/google/protobuf/blob/master/src/google/protobuf/descriptor.proto -# for the underlying protos mentioned in this file. +# protoxform takes a source FileDescriptorProto and generates active/next major +# version candidate FileDescriptorProtos. The resulting FileDescriptorProtos are +# then later processed by proto_sync.py, which invokes protoprint.py to format. -from collections import deque import functools -import io -import os -import pathlib -import re -import subprocess -from tools.api_proto_plugin import annotations from tools.api_proto_plugin import plugin -from tools.api_proto_plugin import traverse from tools.api_proto_plugin import visitor from tools.protoxform import migrate -from tools.protoxform import options as protoxform_options from tools.protoxform import utils -from tools.type_whisperer import type_whisperer -from tools.type_whisperer.types_pb2 import Types - -from google.protobuf import descriptor_pb2 -from google.protobuf import text_format -# Note: we have to include those proto definitions to make FormatOptions work, -# this also serves as whitelist of extended options. +# Note: we have to include those proto definitions to ensure we don't lose these +# during FileDescriptorProto printing. from google.api import annotations_pb2 as _ from validate import validate_pb2 as _ from envoy.annotations import deprecation_pb2 as _ @@ -40,481 +21,6 @@ from udpa.annotations import sensitive_pb2 as _ from udpa.annotations import status_pb2 -NEXT_FREE_FIELD_MIN = 5 - - -class ProtoXformError(Exception): - """Base error class for the protoxform module.""" - - -def ExtractClangProtoStyle(clang_format_text): - """Extract a key:value dictionary for proto formatting. - - Args: - clang_format_text: text from a .clang-format file. - - Returns: - key:value dictionary suitable for passing to clang-format --style. - """ - lang = None - format_dict = {} - for line in clang_format_text.split('\n'): - if lang is None or lang != 'Proto': - match = re.match('Language:\s+(\w+)', line) - if match: - lang = match.group(1) - continue - match = re.match('(\w+):\s+(\w+)', line) - if match: - key, value = match.groups() - format_dict[key] = value - else: - break - return str(format_dict) - - -# Ensure we are using the canonical clang-format proto style. -CLANG_FORMAT_STYLE = ExtractClangProtoStyle( - pathlib.Path(os.getenv('RUNFILES_DIR'), 'envoy/.clang-format').read_text()) - - -def ClangFormat(contents): - """Run proto-style oriented clang-format over given string. - - Args: - contents: a string with proto contents. - - Returns: - clang-formatted string - """ - return subprocess.run( - ['clang-format', - '--style=%s' % CLANG_FORMAT_STYLE, '--assume-filename=.proto'], - input=contents.encode('utf-8'), - stdout=subprocess.PIPE).stdout - - -def FormatBlock(block): - """Append \n to a .proto section (e.g. - - comment, message definition, etc.) if non-empty. - - Args: - block: a string representing the section. - - Returns: - A string with appropriate whitespace. - """ - if block.strip(): - return block + '\n' - return '' - - -def FormatComments(comments): - """Format a list of comment blocks from SourceCodeInfo. - - Prefixes // to each line, separates blocks by spaces. - - Args: - comments: a list of blocks, each block is a list of strings representing - lines in each block. - - Returns: - A string reprenting the formatted comment blocks. - """ - - # TODO(htuch): not sure why this is needed, but clang-format does some weird - # stuff with // comment indents when we have these trailing \ - def FixupTrailingBackslash(s): - return s[:-1].rstrip() if s.endswith('\\') else s - - comments = '\n\n'.join( - '\n'.join(['//%s' % FixupTrailingBackslash(line) - for line in comment.split('\n')[:-1]]) - for comment in comments) - return FormatBlock(comments) - - -def CreateNextFreeFieldXform(msg_proto): - """Return the next free field number annotation transformer of a message. - - Args: - msg_proto: DescriptorProto for message. - - Returns: - the next free field number annotation transformer. - """ - next_free = max( - sum([ - [f.number + 1 for f in msg_proto.field], - [rr.end for rr in msg_proto.reserved_range], - [ex.end for ex in msg_proto.extension_range], - ], [1])) - return lambda _: next_free if next_free > NEXT_FREE_FIELD_MIN else None - - -def FormatTypeContextComments(type_context, annotation_xforms=None): - """Format the leading/trailing comments in a given TypeContext. - - Args: - type_context: contextual information for message/enum/field. - annotation_xforms: a dict of transformers for annotations in leading - comment. - - Returns: - Tuple of formatted leading and trailing comment blocks. - """ - leading_comment = type_context.leading_comment - if annotation_xforms: - leading_comment = leading_comment.getCommentWithTransforms(annotation_xforms) - leading = FormatComments(list(type_context.leading_detached_comments) + [leading_comment.raw]) - trailing = FormatBlock(FormatComments([type_context.trailing_comment])) - return leading, trailing - - -def FormatHeaderFromFile(source_code_info, file_proto, pkg_version_status): - """Format proto header. - - Args: - source_code_info: SourceCodeInfo object. - file_proto: FileDescriptorProto for file. - pkg_version_status: package version status (as per UDPA file status annotations). - - Returns: - Formatted proto header as a string. - """ - # Load the type database. - typedb = utils.GetTypeDb() - # Figure out type dependencies in this .proto. - types = Types() - text_format.Merge(traverse.TraverseFile(file_proto, type_whisperer.TypeWhispererVisitor()), types) - type_dependencies = sum([list(t.type_dependencies) for t in types.types.values()], []) - for service in file_proto.service: - for m in service.method: - type_dependencies.extend([m.input_type[1:], m.output_type[1:]]) - # Determine the envoy/ import paths from type deps. - envoy_proto_paths = set( - typedb.types[t].proto_path - for t in type_dependencies - if t.startswith('envoy.') and typedb.types[t].proto_path != file_proto.name) - - def CamelCase(s): - return ''.join(t.capitalize() for t in re.split('[\._]', s)) - - package_line = 'package %s;\n' % file_proto.package - file_block = '\n'.join(['syntax = "proto3";\n', package_line]) - - options = descriptor_pb2.FileOptions() - options.java_outer_classname = CamelCase(os.path.basename(file_proto.name)) - options.java_multiple_files = True - options.java_package = 'io.envoyproxy.' + file_proto.package - - # This is a workaround for C#/Ruby namespace conflicts between packages and - # objects, see https://github.com/envoyproxy/envoy/pull/3854. - # TODO(htuch): remove once v3 fixes this naming issue in - # https://github.com/envoyproxy/envoy/issues/8120. - if file_proto.package in ['envoy.api.v2.listener', 'envoy.api.v2.cluster']: - qualified_package = '.'.join(s.capitalize() for s in file_proto.package.split('.')) + 'NS' - options.csharp_namespace = qualified_package - options.ruby_package = qualified_package - - if file_proto.service: - options.java_generic_services = True - - if file_proto.options.HasExtension(migrate_pb2.file_migrate): - options.Extensions[migrate_pb2.file_migrate].CopyFrom( - file_proto.options.Extensions[migrate_pb2.file_migrate]) - - if file_proto.options.HasExtension( - status_pb2.file_status) and file_proto.package.endswith('alpha'): - options.Extensions[status_pb2.file_status].CopyFrom( - file_proto.options.Extensions[status_pb2.file_status]) - - options.Extensions[status_pb2.file_status].package_version_status = pkg_version_status - - options_block = FormatOptions(options) - - requires_versioning_import = any( - protoxform_options.GetVersioningAnnotation(m.options) for m in file_proto.message_type) - - envoy_imports = list(envoy_proto_paths) - google_imports = [] - infra_imports = [] - misc_imports = [] - public_imports = [] - - for idx, d in enumerate(file_proto.dependency): - if idx in file_proto.public_dependency: - public_imports.append(d) - continue - elif d.startswith('envoy/annotations') or d.startswith('udpa/annotations'): - infra_imports.append(d) - elif d.startswith('envoy/'): - # We ignore existing envoy/ imports, since these are computed explicitly - # from type_dependencies. - pass - elif d.startswith('google/'): - google_imports.append(d) - elif d.startswith('validate/'): - infra_imports.append(d) - elif d in ['udpa/annotations/versioning.proto', 'udpa/annotations/status.proto']: - # Skip, we decide to add this based on requires_versioning_import and options. - pass - else: - misc_imports.append(d) - - if options.HasExtension(status_pb2.file_status): - infra_imports.append('udpa/annotations/status.proto') - - if requires_versioning_import: - infra_imports.append('udpa/annotations/versioning.proto') - - def FormatImportBlock(xs): - if not xs: - return '' - return FormatBlock('\n'.join(sorted('import "%s";' % x for x in set(xs)))) - - def FormatPublicImportBlock(xs): - if not xs: - return '' - return FormatBlock('\n'.join(sorted('import public "%s";' % x for x in xs))) - - import_block = '\n'.join( - map(FormatImportBlock, [envoy_imports, google_imports, misc_imports, infra_imports])) - import_block += '\n' + FormatPublicImportBlock(public_imports) - comment_block = FormatComments(source_code_info.file_level_comments) - - return ''.join(map(FormatBlock, [file_block, import_block, options_block, comment_block])) - - -def NormalizeFieldTypeName(type_context, field_fqn): - """Normalize a fully qualified field type name, e.g. - - .envoy.foo.bar is normalized to foo.bar. - - Considers type context to minimize type prefix. - - Args: - field_fqn: a fully qualified type name from FieldDescriptorProto.type_name. - type_context: contextual information for message/enum/field. - - Returns: - Normalized type name as a string. - """ - if field_fqn.startswith('.'): - # Let's say we have type context namespace a.b.c.d.e and the type we're - # trying to normalize is a.b.d.e. We take (from the end) on package fragment - # at a time, and apply the inner-most evaluation that protoc performs to see - # if we evaluate to the fully qualified type. If so, we're done. It's not - # sufficient to compute common prefix and drop that, since in the above - # example the normalized type name would be d.e, which proto resolves inner - # most as a.b.c.d.e (bad) instead of the intended a.b.d.e. - field_fqn_splits = field_fqn[1:].split('.') - type_context_splits = type_context.name.split('.')[:-1] - remaining_field_fqn_splits = deque(field_fqn_splits[:-1]) - normalized_splits = deque([field_fqn_splits[-1]]) - - def EquivalentInTypeContext(splits): - type_context_splits_tmp = deque(type_context_splits) - while type_context_splits_tmp: - # If we're in a.b.c and the FQN is a.d.Foo, we want to return true once - # we have type_context_splits_tmp as [a] and splits as [d, Foo]. - if list(type_context_splits_tmp) + list(splits) == field_fqn_splits: - return True - # If we're in a.b.c.d.e.f and the FQN is a.b.d.e.Foo, we want to return True - # once we have type_context_splits_tmp as [a] and splits as [b, d, e, Foo], but - # not when type_context_splits_tmp is [a, b, c] and FQN is [d, e, Foo]. - if len(splits) > 1 and '.'.join(type_context_splits_tmp).endswith('.'.join( - list(splits)[:-1])): - return False - type_context_splits_tmp.pop() - return False - - while remaining_field_fqn_splits and not EquivalentInTypeContext(normalized_splits): - normalized_splits.appendleft(remaining_field_fqn_splits.pop()) - - # `extensions` is a keyword in proto2, and protoc will throw error if a type name - # starts with `extensions.`. - if normalized_splits[0] == 'extensions': - normalized_splits.appendleft(remaining_field_fqn_splits.pop()) - - return '.'.join(normalized_splits) - return field_fqn - - -def TypeNameFromFQN(fqn): - return fqn[1:] - - -def FormatFieldType(type_context, field): - """Format a FieldDescriptorProto type description. - - Args: - type_context: contextual information for message/enum/field. - field: FieldDescriptor proto. - - Returns: - Formatted proto field type as string. - """ - label = 'repeated ' if field.label == field.LABEL_REPEATED else '' - type_name = label + NormalizeFieldTypeName(type_context, field.type_name) - - if field.type == field.TYPE_MESSAGE: - if type_context.map_typenames and TypeNameFromFQN( - field.type_name) in type_context.map_typenames: - return 'map<%s, %s>' % tuple( - map(functools.partial(FormatFieldType, type_context), - type_context.map_typenames[TypeNameFromFQN(field.type_name)])) - return type_name - elif field.type_name: - return type_name - - pretty_type_names = { - field.TYPE_DOUBLE: 'double', - field.TYPE_FLOAT: 'float', - field.TYPE_INT32: 'int32', - field.TYPE_SFIXED32: 'int32', - field.TYPE_SINT32: 'int32', - field.TYPE_FIXED32: 'uint32', - field.TYPE_UINT32: 'uint32', - field.TYPE_INT64: 'int64', - field.TYPE_SFIXED64: 'int64', - field.TYPE_SINT64: 'int64', - field.TYPE_FIXED64: 'uint64', - field.TYPE_UINT64: 'uint64', - field.TYPE_BOOL: 'bool', - field.TYPE_STRING: 'string', - field.TYPE_BYTES: 'bytes', - } - if field.type in pretty_type_names: - return label + pretty_type_names[field.type] - raise ProtoXformError('Unknown field type ' + str(field.type)) - - -def FormatServiceMethod(type_context, method): - """Format a service MethodDescriptorProto. - - Args: - type_context: contextual information for method. - method: MethodDescriptorProto proto. - - Returns: - Formatted service method as string. - """ - - def FormatStreaming(s): - return 'stream ' if s else '' - - leading_comment, trailing_comment = FormatTypeContextComments(type_context) - return '%srpc %s(%s%s%s) returns (%s%s) {%s}\n' % ( - leading_comment, method.name, trailing_comment, FormatStreaming( - method.client_streaming), NormalizeFieldTypeName( - type_context, method.input_type), FormatStreaming(method.server_streaming), - NormalizeFieldTypeName(type_context, method.output_type), FormatOptions(method.options)) - - -def FormatField(type_context, field): - """Format FieldDescriptorProto as a proto field. - - Args: - type_context: contextual information for message/enum/field. - field: FieldDescriptor proto. - - Returns: - Formatted proto field as a string. - """ - if protoxform_options.HasHideOption(field.options): - return '' - leading_comment, trailing_comment = FormatTypeContextComments(type_context) - - return '%s%s %s = %d%s;\n%s' % (leading_comment, FormatFieldType(type_context, field), field.name, - field.number, FormatOptions(field.options), trailing_comment) - - -def FormatEnumValue(type_context, value): - """Format a EnumValueDescriptorProto as a proto enum value. - - Args: - type_context: contextual information for message/enum/field. - value: EnumValueDescriptorProto. - - Returns: - Formatted proto enum value as a string. - """ - if protoxform_options.HasHideOption(value.options): - return '' - leading_comment, trailing_comment = FormatTypeContextComments(type_context) - formatted_annotations = FormatOptions(value.options) - return '%s%s = %d%s;\n%s' % (leading_comment, value.name, value.number, formatted_annotations, - trailing_comment) - - -def TextFormatValue(field, value): - """Format the value as protobuf text format - - Args: - field: a FieldDescriptor that describes the field - value: the value stored in the field - - Returns: - value in protobuf text format - """ - out = io.StringIO() - text_format.PrintFieldValue(field, value, out) - return out.getvalue() - - -def FormatOptions(options): - """Format *Options (e.g. - - MessageOptions, FieldOptions) message. - - Args: - options: A *Options (e.g. MessageOptions, FieldOptions) message. - - Returns: - Formatted options as a string. - """ - - formatted_options = [] - for option_descriptor, option_value in sorted(options.ListFields(), key=lambda x: x[0].number): - option_name = '({})'.format( - option_descriptor.full_name) if option_descriptor.is_extension else option_descriptor.name - if option_descriptor.message_type and option_descriptor.label != option_descriptor.LABEL_REPEATED: - formatted_options.extend([ - '{}.{} = {}'.format(option_name, subfield.name, TextFormatValue(subfield, value)) - for subfield, value in option_value.ListFields() - ]) - else: - formatted_options.append('{} = {}'.format(option_name, - TextFormatValue(option_descriptor, option_value))) - - if formatted_options: - if options.DESCRIPTOR.name in ('EnumValueOptions', 'FieldOptions'): - return '[{}]'.format(','.join(formatted_options)) - else: - return FormatBlock(''.join( - 'option {};\n'.format(formatted_option) for formatted_option in formatted_options)) - return '' - - -def FormatReserved(enum_or_msg_proto): - """Format reserved values/names in a [Enum]DescriptorProto. - - Args: - enum_or_msg_proto: [Enum]DescriptorProto message. - - Returns: - Formatted enum_or_msg_proto as a string. - """ - reserved_fields = FormatBlock('reserved %s;\n' % ','.join( - map(str, sum([list(range(rr.start, rr.end)) for rr in enum_or_msg_proto.reserved_range], - [])))) if enum_or_msg_proto.reserved_range else '' - if enum_or_msg_proto.reserved_name: - reserved_fields += FormatBlock('reserved %s;\n' % - ', '.join('"%s"' % n for n in enum_or_msg_proto.reserved_name)) - return reserved_fields - class ProtoFormatVisitor(visitor.Visitor): """Visitor to generate a proto representation from a FileDescriptor proto. @@ -526,72 +32,18 @@ def __init__(self, pkg_version_status): self._pkg_version_status = pkg_version_status def VisitService(self, service_proto, type_context): - leading_comment, trailing_comment = FormatTypeContextComments(type_context) - methods = '\n'.join( - FormatServiceMethod(type_context.ExtendMethod(index, m.name), m) - for index, m in enumerate(service_proto.method)) - options = FormatBlock(FormatOptions(service_proto.options)) - return '%sservice %s {\n%s%s%s\n}\n' % (leading_comment, service_proto.name, options, - trailing_comment, methods) + return None def VisitEnum(self, enum_proto, type_context): - leading_comment, trailing_comment = FormatTypeContextComments(type_context) - formatted_options = FormatOptions(enum_proto.options) - reserved_fields = FormatReserved(enum_proto) - values = [ - FormatEnumValue(type_context.ExtendField(index, value.name), value) - for index, value in enumerate(enum_proto.value) - ] - joined_values = ('\n' if any('//' in v for v in values) else '').join(values) - return '%senum %s {\n%s%s%s%s\n}\n' % (leading_comment, enum_proto.name, trailing_comment, - formatted_options, reserved_fields, joined_values) + return None def VisitMessage(self, msg_proto, type_context, nested_msgs, nested_enums): - # Skip messages synthesized to represent map types. - if msg_proto.options.map_entry: - return '' - if protoxform_options.HasHideOption(msg_proto.options): - return '' - annotation_xforms = { - annotations.NEXT_FREE_FIELD_ANNOTATION: CreateNextFreeFieldXform(msg_proto) - } - leading_comment, trailing_comment = FormatTypeContextComments(type_context, annotation_xforms) - formatted_options = FormatOptions(msg_proto.options) - formatted_enums = FormatBlock('\n'.join(nested_enums)) - formatted_msgs = FormatBlock('\n'.join(nested_msgs)) - reserved_fields = FormatReserved(msg_proto) - # Recover the oneof structure. This needs some extra work, since - # DescriptorProto just gives use fields and a oneof_index that can allow - # recovery of the original oneof placement. - fields = '' - oneof_index = None - for index, field in enumerate(msg_proto.field): - if oneof_index is not None: - if not field.HasField('oneof_index') or field.oneof_index != oneof_index: - fields += '}\n\n' - oneof_index = None - if oneof_index is None and field.HasField('oneof_index'): - oneof_index = field.oneof_index - oneof_proto = msg_proto.oneof_decl[oneof_index] - oneof_leading_comment, oneof_trailing_comment = FormatTypeContextComments( - type_context.ExtendOneof(oneof_index, field.name)) - fields += '%soneof %s {\n%s%s' % (oneof_leading_comment, oneof_proto.name, - oneof_trailing_comment, FormatOptions( - oneof_proto.options)) - fields += FormatBlock(FormatField(type_context.ExtendField(index, field.name), field)) - if oneof_index is not None: - fields += '}\n\n' - return '%smessage %s {\n%s%s%s%s%s%s\n}\n' % (leading_comment, msg_proto.name, trailing_comment, - formatted_options, formatted_enums, - formatted_msgs, reserved_fields, fields) + return None def VisitFile(self, file_proto, type_context, services, msgs, enums): - header = FormatHeaderFromFile(type_context.source_code_info, file_proto, - self._pkg_version_status) - formatted_services = FormatBlock('\n'.join(services)) - formatted_enums = FormatBlock('\n'.join(enums)) - formatted_msgs = FormatBlock('\n'.join(msgs)) - return ClangFormat(header + formatted_services + formatted_enums + formatted_msgs) + file_proto.options.Extensions[ + status_pb2.file_status].package_version_status = self._pkg_version_status + return str(file_proto) def ParameterCallback(parameter): diff --git a/tools/protoxform/protoxform_test.sh b/tools/protoxform/protoxform_test.sh index b4661db9c3a93..c5503c7ff74d2 100755 --- a/tools/protoxform/protoxform_test.sh +++ b/tools/protoxform/protoxform_test.sh @@ -13,5 +13,7 @@ bazel build ${BAZEL_BUILD_OPTIONS} --//tools/api_proto_plugin:default_type_db_ta TOOLS=$(dirname $(dirname $(realpath $0))) # to satisfy dependency on run_command export PYTHONPATH="$TOOLS" +# Build protoprint for use in protoxform_test_helper.py. +bazel build ${BAZEL_BUILD_OPTIONS} //tools/protoxform:protoprint ./tools/protoxform/protoxform_test_helper.py ${PROTO_TARGETS} diff --git a/tools/protoxform/protoxform_test_helper.py b/tools/protoxform/protoxform_test_helper.py index 1a582003f6538..4a4a55dfcb955 100755 --- a/tools/protoxform/protoxform_test_helper.py +++ b/tools/protoxform/protoxform_test_helper.py @@ -4,8 +4,11 @@ import logging import os +import pathlib import re +import subprocess import sys +import tempfile def PathAndFilename(label): @@ -44,11 +47,27 @@ def GoldenProtoFile(path, filename, version): return os.path.abspath(base) -def ResultProtoFile(path, filename, version): +def ProtoPrint(src, dst): + """Pretty-print FileDescriptorProto to a destination file. + + Args: + src: source path for FileDescriptorProto. + dst: destination path for formatted proto. + """ + print('ProtoPrint %s' % dst) + contents = subprocess.check_output([ + 'bazel-bin/tools/protoxform/protoprint', src, + './bazel-bin/tools/protoxform/protoprint.runfiles/envoy/tools/type_whisperer/api_type_db.pb_text' + ]) + pathlib.Path(dst).write_bytes(contents) + + +def ResultProtoFile(path, tmp, filename, version): """Retrieve result proto file path. In general, those are placed in bazel artifacts. Args: path: target proto path + tmp: temporary directory. filename: target proto filename version: api version to specify target result proto filename @@ -59,7 +78,9 @@ def ResultProtoFile(path, filename, version): base += os.path.join(path, "protos") base += os.path.join(base, path) base += "/{0}.{1}.proto".format(filename, version) - return os.path.abspath(base) + dst = os.path.join(tmp, filename) + ProtoPrint(os.path.abspath(base), dst) + return dst def Diff(result_file, golden_file): @@ -91,15 +112,16 @@ def Run(path, filename, version): result message extracted from diff command """ message = "" - golden_path = GoldenProtoFile(path, filename, version) - test_path = ResultProtoFile(path, filename, version) + with tempfile.TemporaryDirectory() as tmp: + golden_path = GoldenProtoFile(path, filename, version) + test_path = ResultProtoFile(path, tmp, filename, version) - status, stdout, stderr = Diff(test_path, golden_path) + status, stdout, stderr = Diff(golden_path, test_path) - if status != 0: - message = '\n'.join([str(line) for line in stdout + stderr]) + if status != 0: + message = '\n'.join([str(line) for line in stdout + stderr]) - return message + return message if __name__ == "__main__": diff --git a/tools/testdata/protoxform/envoy/v2/discovery_service.proto.active.gold b/tools/testdata/protoxform/envoy/v2/discovery_service.proto.active.gold index 2f6a81cf29f4b..40d597ad8cd21 100644 --- a/tools/testdata/protoxform/envoy/v2/discovery_service.proto.active.gold +++ b/tools/testdata/protoxform/envoy/v2/discovery_service.proto.active.gold @@ -2,7 +2,7 @@ syntax = "proto3"; package envoy.v2; -import ""; +import "envoy/api/v2/discovery.proto"; import "google/api/annotations.proto"; diff --git a/tools/testdata/protoxform/envoy/v2/discovery_service.proto.next_major_version_candidate.envoy_internal.gold b/tools/testdata/protoxform/envoy/v2/discovery_service.proto.next_major_version_candidate.envoy_internal.gold index 69661b1e170f3..cd6b36941d926 100644 --- a/tools/testdata/protoxform/envoy/v2/discovery_service.proto.next_major_version_candidate.envoy_internal.gold +++ b/tools/testdata/protoxform/envoy/v2/discovery_service.proto.next_major_version_candidate.envoy_internal.gold @@ -2,7 +2,7 @@ syntax = "proto3"; package envoy.v3; -import ""; +import "envoy/api/v2/discovery.proto"; import "google/api/annotations.proto"; diff --git a/tools/testdata/protoxform/envoy/v2/discovery_service.proto.next_major_version_candidate.gold b/tools/testdata/protoxform/envoy/v2/discovery_service.proto.next_major_version_candidate.gold index 69661b1e170f3..cd6b36941d926 100644 --- a/tools/testdata/protoxform/envoy/v2/discovery_service.proto.next_major_version_candidate.gold +++ b/tools/testdata/protoxform/envoy/v2/discovery_service.proto.next_major_version_candidate.gold @@ -2,7 +2,7 @@ syntax = "proto3"; package envoy.v3; -import ""; +import "envoy/api/v2/discovery.proto"; import "google/api/annotations.proto"; From 7f6d3ef0e1c5f106829b324424a44db7e8e5640b Mon Sep 17 00:00:00 2001 From: Harvey Tuch Date: Tue, 31 Mar 2020 09:38:57 -0400 Subject: [PATCH 2/3] Fix UTF-8 and mkdir racing that was breaking check_format in CI. Signed-off-by: Harvey Tuch --- tools/proto_format/proto_sync.py | 38 ++++++++++++++-------- tools/protoxform/protoprint.py | 5 +-- tools/protoxform/protoxform_test_helper.py | 5 ++- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/tools/proto_format/proto_sync.py b/tools/proto_format/proto_sync.py index d1e9cf9d8a535..9959daed50c36 100755 --- a/tools/proto_format/proto_sync.py +++ b/tools/proto_format/proto_sync.py @@ -89,6 +89,21 @@ def GetDestinationPath(src): matches[0])).joinpath(src_path.name.split('.')[0] + ".proto") +def GetAbsDestinationPath(dst_root, src): + """Obtain absolute path from a proto file path combined with destination root. + + Creates the parent directory if necessary. + + Args: + dst_root: destination root path. + src: source path. + """ + rel_dst_path = GetDestinationPath(src) + dst = dst_root.joinpath(rel_dst_path) + dst.parent.mkdir(0o755, parents=True, exist_ok=True) + return dst + + def ProtoPrint(src, dst): """Pretty-print FileDescriptorProto to a destination file. @@ -97,27 +112,21 @@ def ProtoPrint(src, dst): dst: destination path for formatted proto. """ print('ProtoPrint %s' % dst) - contents = subprocess.check_output([ + subprocess.check_output([ 'bazel-bin/tools/protoxform/protoprint', src, + str(dst), './bazel-bin/tools/protoxform/protoprint.runfiles/envoy/tools/type_whisperer/api_type_db.pb_text' ]) - dst.write_bytes(contents) -def SyncProtoFile(cmd, dst_root, src): +def SyncProtoFile(src_dst_pair): """Diff or in-place update a single proto file from protoxform.py Bazel cache artifacts." Args: - cmd: 'check' or 'fix'. - src: source path. + src_dst_pair: source/destination path tuple. """ - # Skip empty files, this indicates this file isn't modified in this version. - if os.stat(src).st_size == 0: - return [] - rel_dst_path = GetDestinationPath(src) - dst = dst_root.joinpath(rel_dst_path) - dst.parent.mkdir(0o755, True, True) - ProtoPrint(src, dst) + rel_dst_path = GetDestinationPath(src_dst_pair[0]) + ProtoPrint(*src_dst_pair) return ['//%s:pkg' % str(rel_dst_path.parent)] @@ -276,8 +285,11 @@ def Sync(api_root, mode, labels, shadow): utils.BazelBinPathForOutputArtifact( label, '.next_major_version_candidate.envoy_internal.proto' if shadow else '.next_major_version_candidate.proto')) + src_dst_paths = [ + (path, GetAbsDestinationPath(dst_dir, path)) for path in paths if os.stat(path).st_size > 0 + ] with mp.Pool() as p: - pkg_deps = p.map(functools.partial(SyncProtoFile, mode, dst_dir), paths) + pkg_deps = p.map(SyncProtoFile, src_dst_paths) SyncBuildFiles(mode, dst_dir) current_api_dir = pathlib.Path(tmp).joinpath("a") diff --git a/tools/protoxform/protoprint.py b/tools/protoxform/protoprint.py index 02d2658075de4..70cf2f1eb1d86 100755 --- a/tools/protoxform/protoprint.py +++ b/tools/protoxform/protoprint.py @@ -591,5 +591,6 @@ def VisitFile(self, file_proto, type_context, services, msgs, enums): proto_desc_path = sys.argv[1] file_proto = descriptor_pb2.FileDescriptorProto() text_format.Merge(pathlib.Path(proto_desc_path).read_text(), file_proto) - utils.LoadTypeDb(sys.argv[2]) - sys.stdout.write(traverse.TraverseFile(file_proto, ProtoFormatVisitor()).decode()) + dst_path = pathlib.Path(sys.argv[2]) + utils.LoadTypeDb(sys.argv[3]) + dst_path.write_bytes(traverse.TraverseFile(file_proto, ProtoFormatVisitor())) diff --git a/tools/protoxform/protoxform_test_helper.py b/tools/protoxform/protoxform_test_helper.py index 4a4a55dfcb955..6e592b59f8606 100755 --- a/tools/protoxform/protoxform_test_helper.py +++ b/tools/protoxform/protoxform_test_helper.py @@ -55,11 +55,10 @@ def ProtoPrint(src, dst): dst: destination path for formatted proto. """ print('ProtoPrint %s' % dst) - contents = subprocess.check_output([ - 'bazel-bin/tools/protoxform/protoprint', src, + subprocess.check_call([ + 'bazel-bin/tools/protoxform/protoprint', src, dst, './bazel-bin/tools/protoxform/protoprint.runfiles/envoy/tools/type_whisperer/api_type_db.pb_text' ]) - pathlib.Path(dst).write_bytes(contents) def ResultProtoFile(path, tmp, filename, version): From ca68c1371a8c36ffbf964c9c2284b17e24600e15 Mon Sep 17 00:00:00 2001 From: Harvey Tuch Date: Wed, 1 Apr 2020 20:34:02 -0400 Subject: [PATCH 3/3] Remove PY3. Signed-off-by: Harvey Tuch --- tools/api_proto_plugin/BUILD | 2 -- tools/protoxform/BUILD | 2 -- tools/type_whisperer/BUILD | 2 -- 3 files changed, 6 deletions(-) diff --git a/tools/api_proto_plugin/BUILD b/tools/api_proto_plugin/BUILD index 4eb5ddaa7832c..0bec15eb6bd32 100644 --- a/tools/api_proto_plugin/BUILD +++ b/tools/api_proto_plugin/BUILD @@ -12,7 +12,6 @@ py_library( "type_context.py", "visitor.py", ], - srcs_version = "PY3", visibility = ["//visibility:public"], deps = [ "@com_google_protobuf//:protobuf_python", @@ -22,7 +21,6 @@ py_library( py_library( name = "utils", srcs = ["utils.py"], - srcs_version = "PY3", visibility = ["//visibility:public"], ) diff --git a/tools/protoxform/BUILD b/tools/protoxform/BUILD index dec0d2e9b90f8..082bce7cd07cf 100644 --- a/tools/protoxform/BUILD +++ b/tools/protoxform/BUILD @@ -8,7 +8,6 @@ py_binary( "protoxform.py", "utils.py", ], - python_version = "PY3", visibility = ["//visibility:public"], deps = [ "//tools/api_proto_plugin", @@ -31,7 +30,6 @@ py_binary( "//:.clang-format", "//tools/type_whisperer:api_type_db.pb_text", ], - python_version = "PY3", visibility = ["//visibility:public"], deps = [ "//tools/type_whisperer", diff --git a/tools/type_whisperer/BUILD b/tools/type_whisperer/BUILD index a7173a3121874..3acb95c8adbea 100644 --- a/tools/type_whisperer/BUILD +++ b/tools/type_whisperer/BUILD @@ -16,7 +16,6 @@ envoy_proto_library( py_binary( name = "type_whisperer", srcs = ["type_whisperer.py"], - python_version = "PY3", visibility = ["//visibility:public"], deps = [ ":types_py_proto", @@ -29,7 +28,6 @@ py_binary( py_binary( name = "typedb_gen", srcs = ["typedb_gen.py"], - python_version = "PY3", visibility = ["//visibility:public"], deps = [ ":api_type_db_proto_py_proto",