Skip to content

Commit

Permalink
Convert Location to a namedtuple, and associated cleanup (#205)
Browse files Browse the repository at this point in the history
This change makes the `Location` dataclass, which does not change
frequently, into a new `SourceLocation` namedtuple, and changes the
`SourceLocation` serialization.  As a result, with this change:

*   `embossc` runs about 25% faster on a large (7kLOC) input; `python3
    -OO emboss` runs about 19% faster on the same input.
*   Serialized IR is about 45% smaller.

Details:

*   Replace the `ir_data.Location` dataclass with a new
    `parser_types.SourceLocation` namedtuple.  The rename helps clarify
    the difference between a location within source code
    (`SourceLocation`) and a location within a structure
    (`FieldLocation`).
*   Similarly, replace `ir_data.Position` with
    `parser_types.SourcePosition`.
*   Update any place that edits a `SourceLocation` with an appropriate
    assignment; e.g., `x.source_location.end = y` becomes
    `x.source_location = x.source_location._replace(end=y)`.  In most
    cases, several fields were updated consecutively; those updates are
    been merged.
*   Update the JSON serialization to use the compact format.
*   Replace `format_location()` and `format_position()` with
    `__str__()` methods on `SourceLocation` and `SourcePosition`,
    respectively.
*   Replace `parse_location()` and `parse_position()` with `from_str()`
    class methods on `SourceLocation` and `SourcePosition`,
    respectively.
*   Move the `make_location()` functionality into
    `SourceLocation.__new__()`.
*   Update `_to_dict` and `_from_dict` in `IrDataSerializer` to
    stringify and destringify `SourceLocation`.  It is tempting to
    try to do this during the JSON serialization step (with a `default=`
    parameter to `json.dumps` and an `object_hook=` parameter to
    `json.loads`), but it is tricky to get the `object_hook` to know
    when to convert.
  • Loading branch information
reventlov authored Oct 17, 2024
1 parent 46423da commit 73cbd98
Show file tree
Hide file tree
Showing 29 changed files with 780 additions and 1,523 deletions.
11 changes: 8 additions & 3 deletions compiler/back_end/cpp/header_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -1747,9 +1747,14 @@ def _offset_source_location_column(source_location, offset):
original start column.
"""

new_location = ir_data_utils.copy(source_location)
new_location.start.column = source_location.start.column + offset[0]
new_location.end.column = source_location.start.column + offset[1]
new_location = source_location._replace(
start=source_location.start._replace(
column=source_location.start.column + offset[0]
),
end=source_location.start._replace(
column=source_location.start.column + offset[1]
),
)

return new_location

Expand Down
91 changes: 49 additions & 42 deletions compiler/back_end/cpp/header_generator_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,11 @@ def test_rejects_bad_enum_case_at_start(self):
)
attr = ir.module[0].type[0].attribute[0]

bad_case_source_location = ir_data.Location()
bad_case_source_location = ir_data_utils.builder(bad_case_source_location)
bad_case_source_location.CopyFrom(attr.value.source_location)
# Location of SHORTY_CASE in the attribute line.
bad_case_source_location.start.column = 30
bad_case_source_location.end.column = 41
# SourceLocation of SHORTY_CASE in the attribute line.
bad_case_source_location = attr.value.source_location._replace(
start=attr.value.source_location.start._replace(column=30),
end=attr.value.source_location.end._replace(column=41),
)

self.assertEqual(
[
Expand All @@ -156,12 +155,11 @@ def test_rejects_bad_enum_case_in_middle(self):
)
attr = ir.module[0].type[0].attribute[0]

bad_case_source_location = ir_data.Location()
bad_case_source_location = ir_data_utils.builder(bad_case_source_location)
bad_case_source_location.CopyFrom(attr.value.source_location)
# Location of bad_CASE in the attribute line.
bad_case_source_location.start.column = 43
bad_case_source_location.end.column = 51
# SourceLocation of bad_CASE in the attribute line.
bad_case_source_location = attr.value.source_location._replace(
start=attr.value.source_location.start._replace(column=43),
end=attr.value.source_location.end._replace(column=51),
)

self.assertEqual(
[
Expand All @@ -186,12 +184,11 @@ def test_rejects_bad_enum_case_at_end(self):
)
attr = ir.module[0].type[0].attribute[0]

bad_case_source_location = ir_data.Location()
bad_case_source_location = ir_data_utils.builder(bad_case_source_location)
bad_case_source_location.CopyFrom(attr.value.source_location)
# Location of BAD_case in the attribute line.
bad_case_source_location.start.column = 55
bad_case_source_location.end.column = 63
# SourceLocation of BAD_case in the attribute line.
bad_case_source_location = attr.value.source_location._replace(
start=attr.value.source_location.start._replace(column=55),
end=attr.value.source_location.end._replace(column=63),
)

self.assertEqual(
[
Expand All @@ -216,12 +213,11 @@ def test_rejects_duplicate_enum_case(self):
)
attr = ir.module[0].type[0].attribute[0]

bad_case_source_location = ir_data.Location()
bad_case_source_location = ir_data_utils.builder(bad_case_source_location)
bad_case_source_location.CopyFrom(attr.value.source_location)
# Location of the second SHOUTY_CASE in the attribute line.
bad_case_source_location.start.column = 43
bad_case_source_location.end.column = 54
# SourceLocation of the second SHOUTY_CASE in the attribute line.
bad_case_source_location = attr.value.source_location._replace(
start=attr.value.source_location.start._replace(column=43),
end=attr.value.source_location.end._replace(column=54),
)

self.assertEqual(
[
Expand All @@ -246,12 +242,11 @@ def test_rejects_empty_enum_case(self):
)
attr = ir.module[0].type[0].attribute[0]

bad_case_source_location = ir_data.Location()
bad_case_source_location = ir_data_utils.builder(bad_case_source_location)
bad_case_source_location.CopyFrom(attr.value.source_location)
# Location of excess comma.
bad_case_source_location.start.column = 42
bad_case_source_location.end.column = 42
# SourceLocation of excess comma.
bad_case_source_location = attr.value.source_location._replace(
start=attr.value.source_location.start._replace(column=42),
end=attr.value.source_location.end._replace(column=42),
)

self.assertEqual(
[
Expand All @@ -274,8 +269,10 @@ def test_rejects_empty_enum_case(self):
" BAZ = 2\n"
)

bad_case_source_location.start.column = 30
bad_case_source_location.end.column = 30
bad_case_source_location = attr.value.source_location._replace(
start=attr.value.source_location.start._replace(column=30),
end=attr.value.source_location.end._replace(column=30),
)

self.assertEqual(
[
Expand All @@ -298,8 +295,10 @@ def test_rejects_empty_enum_case(self):
" BAZ = 2\n"
)

bad_case_source_location.start.column = 54
bad_case_source_location.end.column = 54
bad_case_source_location = attr.value.source_location._replace(
start=attr.value.source_location.start._replace(column=54),
end=attr.value.source_location.end._replace(column=54),
)

self.assertEqual(
[
Expand All @@ -322,8 +321,10 @@ def test_rejects_empty_enum_case(self):
" BAZ = 2\n"
)

bad_case_source_location.start.column = 45
bad_case_source_location.end.column = 45
bad_case_source_location = attr.value.source_location._replace(
start=attr.value.source_location.start._replace(column=45),
end=attr.value.source_location.end._replace(column=45),
)

self.assertEqual(
[
Expand All @@ -346,8 +347,10 @@ def test_rejects_empty_enum_case(self):
" BAZ = 2\n"
)

bad_case_source_location.start.column = 30
bad_case_source_location.end.column = 30
bad_case_source_location = attr.value.source_location._replace(
start=attr.value.source_location.start._replace(column=30),
end=attr.value.source_location.end._replace(column=30),
)

self.assertEqual(
[
Expand All @@ -370,8 +373,10 @@ def test_rejects_empty_enum_case(self):
" BAZ = 2\n"
)

bad_case_source_location.start.column = 35
bad_case_source_location.end.column = 35
bad_case_source_location = attr.value.source_location._replace(
start=attr.value.source_location.start._replace(column=35),
end=attr.value.source_location.end._replace(column=35),
)

self.assertEqual(
[
Expand All @@ -394,8 +399,10 @@ def test_rejects_empty_enum_case(self):
" BAZ = 2\n"
)

bad_case_source_location.start.column = 31
bad_case_source_location.end.column = 31
bad_case_source_location = attr.value.source_location._replace(
start=attr.value.source_location.start._replace(column=31),
end=attr.value.source_location.end._replace(column=31),
)

self.assertEqual(
[
Expand Down
2 changes: 1 addition & 1 deletion compiler/front_end/glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def parse_module(file_name, file_reader):
"""
source_code, errors = file_reader(file_name)
if errors:
location = parser_types.make_location((1, 1), (1, 1))
location = parser_types.SourceLocation((1, 1), (1, 1))
return (
None,
None,
Expand Down
14 changes: 10 additions & 4 deletions compiler/front_end/glue_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from compiler.util import parser_types
from compiler.util import test_util

_location = parser_types.make_location
_location = parser_types.SourceLocation

_ROOT_PACKAGE = "testdata.golden"
_GOLDEN_PATH = ""
Expand Down Expand Up @@ -232,7 +232,9 @@ def test_synthetic_error(self):
self.assertFalse(errors)
# Artificially mark the first field as is_synthetic.
first_field = ir.module[0].type[0].structure.field[0]
first_field.source_location.is_synthetic = True
first_field.source_location = first_field.source_location._replace(
is_synthetic=True
)
ir, errors = glue.process_ir(ir, None)
self.assertTrue(errors)
self.assertEqual(
Expand All @@ -259,8 +261,12 @@ def test_suppressed_synthetic_error(self):
self.assertFalse(errors)
# Artificially mark the name of the second field as is_synthetic.
second_field = ir.module[0].type[0].structure.field[1]
second_field.name.source_location.is_synthetic = True
second_field.name.name.source_location.is_synthetic = True
second_field.name.source_location = second_field.name.source_location._replace(
is_synthetic=True
)
second_field.name.name.source_location = (
second_field.name.name.source_location._replace(is_synthetic=True)
)
ir, errors = glue.process_ir(ir, None)
self.assertEqual(1, len(errors))
self.assertEqual("Duplicate name 'field'", errors[0][0].message)
Expand Down
25 changes: 4 additions & 21 deletions compiler/front_end/lr1.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,9 @@ def __str__(self):

# ANY_TOKEN is used by mark_error as a "wildcard" token that should be replaced
# by every other token.
ANY_TOKEN = parser_types.Token(object(), "*", parser_types.parse_location("0:0-0:0"))
ANY_TOKEN = parser_types.Token(
object(), "*", parser_types.SourceLocation.from_str("0:0-0:0")
)


class Reduction(
Expand Down Expand Up @@ -690,26 +692,7 @@ def state():
# children, setting the source_location to None in that case.
start_position = None
end_position = None
for child in children:
if (
hasattr(child, "source_location")
and child.source_location is not None
):
start_position = child.source_location.start
break
for child in reversed(children):
if (
hasattr(child, "source_location")
and child.source_location is not None
):
end_position = child.source_location.end
break
if start_position is None:
source_location = None
else:
source_location = parser_types.make_location(
start_position, end_position
)
source_location = parser_types.merge_source_locations(*children)
reduction = Reduction(
next_action.rule.lhs, children, next_action.rule, source_location
)
Expand Down
4 changes: 2 additions & 2 deletions compiler/front_end/lr1_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def _tokenize(text):
result = []
for i in range(len(text)):
result.append(
Token(text[i], parser_types.make_location((1, i + 1), (1, i + 2)))
Token(text[i], parser_types.SourceLocation((1, i + 1), (1, i + 2)))
)
return result

Expand Down Expand Up @@ -209,7 +209,7 @@ def test_goto_table(self):

def test_successful_parse(self):
parser = _alsu_grammar.parser()
loc = parser_types.parse_location
loc = parser_types.SourceLocation.from_str
s_to_c_c = parser_types.Production.parse("S -> C C")
c_to_c_c = parser_types.Production.parse("C -> c C")
c_to_d = parser_types.Production.parse("C -> d")
Expand Down
Loading

0 comments on commit 73cbd98

Please sign in to comment.