astral-sh · dcreager · Apr 17, 2025 · Apr 8, 2025 · Apr 9, 2025 · Apr 10, 2025
diff --git a/crates/ruff_python_ast/ast.toml b/crates/ruff_python_ast/ast.toml
@@ -37,6 +37,10 @@
 # derives:
 # List of derives to add to the syntax node struct. Clone, Debug, PartialEq are added by default.
 #
+# custom_source_order:
+# A boolean that specifies if this node has a custom source order visitor implementation.
+# generation of visit_source_order will be skipped for this node.
+#
 # fields:
 # List of fields in the syntax node struct. Each field is a table with the
 # following keys:
@@ -48,6 +52,10 @@
 #    * `Expr*` - A vector of Expr.
 #    * `&Expr*` - A boxed slice of Expr.
 #    These properties cannot be nested, for example we cannot create a vector of option types.
+# * is_annotation - If this field is a type annotation.
+#
+# source_order:
+# Defines in what order the fields appear in source
 #
 # variant:
 #   The name of the enum variant for this syntax node.  Defaults to the node
@@ -57,9 +65,13 @@
 anynode_is_label = "module"
 doc = "See also [mod](https://docs.python.org/3/library/ast.html#ast.mod)"
 
-[Mod.nodes]
-ModModule = {}
-ModExpression = {}
+[Mod.nodes.ModModule]
+doc = "See also [Module](https://docs.python.org/3/library/ast.html#ast.Module)"
+fields = [{ name = "body", type = "Stmt*" }]
+
+[Mod.nodes.ModExpression]
+doc = "See also [Module](https://docs.python.org/3/library/ast.html#ast.Module)"
+fields = [{ name = "body", type = "Box<Expr>" }]
 
 [Stmt]
 add_suffix_to_is_methods = true
@@ -77,8 +89,7 @@ fields = [
   { name = "name", type = "Identifier" },
   { name = "type_params", type = "Box<crate::TypeParams>?" },
   { name = "parameters", type = "Box<crate::Parameters>" },
-
-  { name = "returns", type = "Expr?" },
+  { name = "returns", type = "Expr?", is_annotation = true },
   { name = "body", type = "Stmt*" },
 ]
 
@@ -127,7 +138,7 @@ fields = [
 doc = "See also [AnnAssign](https://docs.python.org/3/library/ast.html#ast.AnnAssign)"
 fields = [
   { name = "target", type = "Expr" },
-  { name = "annotation", type = "Expr" },
+  { name = "annotation", type = "Expr", is_annotation = true },
   { name = "value", type = "Expr?" },
   { name = "simple", type = "bool" },
 ]
@@ -305,6 +316,7 @@ doc = "See also [expr](https://docs.python.org/3/library/ast.html#ast.expr)"
 [Expr.nodes.ExprBoolOp]
 doc = "See also [BoolOp](https://docs.python.org/3/library/ast.html#ast.BoolOp)"
 fields = [{ name = "op", type = "BoolOp" }, { name = "values", type = "Expr*" }]
+custom_source_order = true
 
 [Expr.nodes.ExprNamed]
 doc = "See also [NamedExpr](https://docs.python.org/3/library/ast.html#ast.NamedExpr)"
@@ -339,10 +351,12 @@ fields = [
   { name = "body", type = "Expr" },
   { name = "orelse", type = "Expr" },
 ]
+source_order = ["body", "test", "orelse"]
 
 [Expr.nodes.ExprDict]
 doc = "See also [Dict](https://docs.python.org/3/library/ast.html#ast.Dict)"
 fields = [{ name = "items", type = "DictItem*" }]
+custom_source_order = true
 
 [Expr.nodes.ExprSet]
 doc = "See also [Set](https://docs.python.org/3/library/ast.html#ast.Set)"
@@ -397,6 +411,8 @@ fields = [
   { name = "ops", type = "&CmpOp*" },
   { name = "comparators", type = "&Expr*" },
 ]
+# The fields must be visited simultaneously
+custom_source_order = true
 
 [Expr.nodes.ExprCall]
 doc = "See also [Call](https://docs.python.org/3/library/ast.html#ast.Call)"
@@ -415,16 +431,21 @@ it keeps them separate and provide various methods to access the parts.
 
 See also [JoinedStr](https://docs.python.org/3/library/ast.html#ast.JoinedStr)"""
 fields = [{ name = "value", type = "FStringValue" }]
+custom_source_order = true
 
 [Expr.nodes.ExprStringLiteral]
 doc = """An AST node that represents either a single-part string literal
 or an implicitly concatenated string literal."""
 fields = [{ name = "value", type = "StringLiteralValue" }]
+# Because StringLiteralValue type is an iterator and it's not clear from the type
+custom_source_order = true
 
 [Expr.nodes.ExprBytesLiteral]
 doc = """An AST node that represents either a single-part bytestring literal
 or an implicitly concatenated bytestring literal."""
 fields = [{ name = "value", type = "BytesLiteralValue" }]
+# Because BytesLiteralValue type is an iterator and it's not clear from the type
+custom_source_order = true
 
 [Expr.nodes.ExprNumberLiteral]
 fields = [{ name = "value", type = "Number" }]

diff --git a/crates/ruff_python_ast/generate.py b/crates/ruff_python_ast/generate.py
@@ -15,7 +15,7 @@
 import tomllib
 
 # Types that require `crate::`. We can slowly remove these types as we move them to generate scripts.
-types_requiring_create_prefix = [
+types_requiring_create_prefix = {
     "IpyEscapeKind",
     "ExprContext",
     "Identifier",
@@ -33,12 +33,11 @@
     "Decorator",
     "TypeParams",
     "Parameters",
-    "Arguments",
     "ElifElseClause",
     "WithItem",
     "MatchCase",
     "Alias",
-]
+}
 
 
 def rustfmt(code: str) -> str:
@@ -124,6 +123,8 @@ class Node:
     doc: str | None
     fields: list[Field] | None
     derives: list[str]
+    custom_source_order: bool
+    source_order: list[str] | None
 
     def __init__(self, group: Group, node_name: str, node: dict[str, Any]) -> None:
         self.name = node_name
@@ -133,33 +134,90 @@ def __init__(self, group: Group, node_name: str, node: dict[str, Any]) -> None:
         fields = node.get("fields")
         if fields is not None:
             self.fields = [Field(f) for f in fields]
+        self.custom_source_order = node.get("custom_source_order", False)
         self.derives = node.get("derives", [])
         self.doc = node.get("doc")
+        self.source_order = node.get("source_order")
+
+    def fields_in_source_order(self) -> list[Field]:
+        if self.fields is None:
+            return []
+        if self.source_order is None:
+            return list(filter(lambda x: not x.skip_source_order(), self.fields))
+
+        fields = []
+        for field_name in self.source_order:
+            field = None
+            for field in self.fields:
+                if field.skip_source_order():
+                    continue
+                if field.name == field_name:
+                    field = field
+                    break
+            fields.append(field)
+        return fields
 
 
 @dataclass
 class Field:
     name: str
     ty: str
+    _skip_visit: bool
+    is_annotation: bool
     parsed_ty: FieldType
 
     def __init__(self, field: dict[str, Any]) -> None:
         self.name = field["name"]
         self.ty = field["type"]
         self.parsed_ty = FieldType(self.ty)
+        self._skip_visit = field.get("skip_visit", False)
+        self.is_annotation = field.get("is_annotation", False)
+
+    def skip_source_order(self) -> bool:
+        return self._skip_visit or self.parsed_ty.inner in [
+            "str",
+            "ExprContext",
+            "Name",
+            "u32",
+            "bool",
+            "Number",
+            "IpyEscapeKind",
+        ]
+
+
+# Extracts the type argument from the given rust type with AST field type syntax.
+# Box<str> -> str
+# Box<Expr?> -> Expr
+# If the type does not have a type argument, it will return the string.
+# Does not support nested types
+def extract_type_argument(rust_type_str: str) -> str:
+    rust_type_str = rust_type_str.replace("*", "")
+    rust_type_str = rust_type_str.replace("?", "")
+    rust_type_str = rust_type_str.replace("&", "")
+
+    open_bracket_index = rust_type_str.find("<")
+    if open_bracket_index == -1:
+        return rust_type_str
+    close_bracket_index = rust_type_str.rfind(">")
+    if close_bracket_index == -1 or close_bracket_index <= open_bracket_index:
+        raise ValueError(f"Brackets are not balanced for type {rust_type_str}")
+    inner_type = rust_type_str[open_bracket_index + 1 : close_bracket_index].strip()
+    return inner_type
 
 
 @dataclass
 class FieldType:
     rule: str
     name: str
+    inner: str
     seq: bool = False
     optional: bool = False
     slice_: bool = False
 
     def __init__(self, rule: str) -> None:
         self.rule = rule
         self.name = ""
+        self.inner = extract_type_argument(rule)
 
         # The following cases are the limitations of this parser(and not used in the ast.toml):
         # * Rules that involve declaring a sequence with optional items e.g. Vec<Option<...>>
@@ -201,6 +259,7 @@ def write_preamble(out: list[str]) -> None:
     // Run `crates/ruff_python_ast/generate.py` to re-generate the file.
 
     use crate::name::Name;
+    use crate::visitor::source_order::SourceOrderVisitor;
     """)
 
 
@@ -703,6 +762,98 @@ def write_node(out: list[str], ast: Ast) -> None:
             out.append("")
 
 
+# ------------------------------------------------------------------------------
+# Source order visitor
+
+
+@dataclass
+class VisitorInfo:
+    name: str
+    accepts_sequence: bool = False
+
+
+# Map of AST node types to their corresponding visitor information
+type_to_visitor_function: dict[str, VisitorInfo] = {
+    "Decorator": VisitorInfo("visit_decorator"),
+    "Identifier": VisitorInfo("visit_identifier"),
+    "crate::TypeParams": VisitorInfo("visit_type_params", True),
+    "crate::Parameters": VisitorInfo("visit_parameters", True),
+    "Expr": VisitorInfo("visit_expr"),
+    "Stmt": VisitorInfo("visit_body", True),
+    "Arguments": VisitorInfo("visit_arguments", True),
+    "crate::Arguments": VisitorInfo("visit_arguments", True),
+    "Operator": VisitorInfo("visit_operator"),
+    "ElifElseClause": VisitorInfo("visit_elif_else_clause"),
+    "WithItem": VisitorInfo("visit_with_item"),
+    "MatchCase": VisitorInfo("visit_match_case"),
+    "ExceptHandler": VisitorInfo("visit_except_handler"),
+    "Alias": VisitorInfo("visit_alias"),
+    "UnaryOp": VisitorInfo("visit_unary_op"),
+    "DictItem": VisitorInfo("visit_dict_item"),
+    "Comprehension": VisitorInfo("visit_comprehension"),
+    "CmpOp": VisitorInfo("visit_cmp_op"),
+    "FStringValue": VisitorInfo("visit_f_string_value"),
+    "StringLiteralValue": VisitorInfo("visit_string_literal"),
+    "BytesLiteralValue": VisitorInfo("visit_bytes_literal"),
+}
+annotation_visitor_function = VisitorInfo("visit_annotation")
+
+
+def write_source_order(out: list[str], ast: Ast) -> None:
+    for group in ast.groups:
+        for node in group.nodes:
+            if node.fields is None or node.custom_source_order:
+                continue
+            name = node.name
+            fields_list = ""
+            body = ""
+
+            for field in node.fields:
+                if field.skip_source_order():
+                    fields_list += f"{field.name}: _,\n"
+                else:
+                    fields_list += f"{field.name},\n"
+            fields_list += "range: _,\n"
+
+            for field in node.fields_in_source_order():
+                visitor = type_to_visitor_function[field.parsed_ty.inner]
+                if field.is_annotation:
+                    visitor = annotation_visitor_function
+
+                if field.parsed_ty.optional:
+                    body += f"""
+                            if let Some({field.name}) = {field.name} {{
+                                visitor.{visitor.name}({field.name});
+                            }}\n
+                      """
+                elif not visitor.accepts_sequence and field.parsed_ty.seq:
+                    body += f"""
+                            for elm in {field.name} {{
+                                visitor.{visitor.name}(elm);
+                            }}
+                     """
+                else:
+                    body += f"visitor.{visitor.name}({field.name});\n"
-                visitor = type_to_visitor_function[field.parsed_ty.inner]
-                if field.is_annotation:
-                    visitor = annotation_visitor_function
-
-                if field.parsed_ty.optional:
-                    body += f"""
-                            if let Some({field.name}) = {field.name} {{
-                                visitor.{visitor.name}({field.name});
-                            }}\n
-                      """
-                elif not visitor.accepts_sequence and field.parsed_ty.seq:
-                    body += f"""
-                            for elm in {field.name} {{
-                                visitor.{visitor.name}(elm);
-                            }}
-                     """
-                else:
-                    body += f"visitor.{visitor.name}({field.name});\n"
+                if field.is_annotation:
+                    body += f"""
+                            visitor.visit_annotation({field.name});
+                      """
+                else:
+                    body += f"{field.name}.visit(visitor);\n"
-                visitor = type_to_visitor_function[field.parsed_ty.inner]
-                if field.is_annotation:
-                    visitor = annotation_visitor_function
-
-                if field.parsed_ty.optional:
-                    body += f"""
-                            if let Some({field.name}) = {field.name} {{
-                                visitor.{visitor.name}({field.name});
-                            }}\n
-                      """
-                elif not visitor.accepts_sequence and field.parsed_ty.seq:
-                    body += f"""
-                            for elm in {field.name} {{
-                                visitor.{visitor.name}(elm);
-                            }}
-                     """
-                else:
-                    body += f"visitor.{visitor.name}({field.name});\n"
+                if field.is_annotation:
+                    body += f"""
+                            visitor.visit_annotation({field.name});
+                      """
+                else:
+                    body += f"{field.name}.visit(visitor);\n"
+
+            visitor_arg_name = "visitor"
+            if len(node.fields_in_source_order()) == 0:
+                visitor_arg_name = "_"
+
+            out.append(f"""
+impl {name} {{
+    pub(crate) fn visit_source_order<'a, V>(&'a self, {visitor_arg_name}: &mut V)
+    where
+        V: SourceOrderVisitor<'a> + ?Sized,
+    {{
+        let {name} {{
+            {fields_list}
+        }} = self;
+        {body}
+    }}
+}}
+        """)
+
+
 # ------------------------------------------------------------------------------
 # Format and write output
 
@@ -715,6 +866,7 @@ def generate(ast: Ast) -> list[str]:
     write_anynoderef(out, ast)
     write_nodekind(out, ast)
     write_node(out, ast)
+    write_source_order(out, ast)
     return out