Skip to content

Commit f5f5a75

Browse files
authored
[TVMScript] Text underlining in DocPrinter based on Doc's source_paths (#12344)
This adds an ability to print a "diagnostic marker" based on a given ObjectPath. For example, say we are printing a fragment of TIR like ``` for i in T.serial(10): a[i] = 5 ``` and we would like bring the user's attention to the bound of the loop: ``` for i in T.serial(10): ^^ a[i] = 5 ``` In this case we would give the doc printer an object path that represents this loop bound, i.e. something like `path_to_underline=ObjectPath.root().attr("extent")` Tracking issue: #11912
1 parent 1a3d36a commit f5f5a75

File tree

6 files changed

+718
-17
lines changed

6 files changed

+718
-17
lines changed

include/tvm/script/printer/doc_printer.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,15 @@ namespace printer {
3131
* This function unpacks the DocPrinterOptions into function arguments
3232
* to be FFI friendly.
3333
*
34-
* \param doc the doc to be converted
35-
* \param indent_spaces the number of spaces used for indention
34+
* \param doc Doc to be converted
35+
* \param indent_spaces Number of spaces used for indentation
36+
* \param print_line_numbers Whether to print line numbers
37+
* \param num_context_lines Number of context lines to print around the underlined text
38+
* \param path_to_underline Object path to be underlined
3639
*/
37-
String DocToPythonScript(Doc doc, int indent_spaces = 4);
40+
String DocToPythonScript(Doc doc, int indent_spaces = 4, bool print_line_numbers = false,
41+
int num_context_lines = -1,
42+
Optional<ObjectPath> path_to_underline = NullOpt);
3843

3944
} // namespace printer
4045
} // namespace script

python/tvm/script/printer/doc_printer.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,19 @@
1616
# under the License.
1717
"""Functions to print doc into text format"""
1818

19+
from typing import Optional
20+
from tvm.runtime.object_path import ObjectPath
1921
from . import _ffi_api
2022
from .doc import Doc
2123

2224

23-
def to_python_script(doc: Doc, indent_spaces: int = 4) -> str:
25+
def to_python_script(
26+
doc: Doc,
27+
indent_spaces: int = 4,
28+
print_line_numbers: bool = False,
29+
num_context_lines: Optional[int] = None,
30+
path_to_underline: Optional[ObjectPath] = None,
31+
) -> str:
2432
"""Convert Doc into Python script.
2533
2634
Parameters
@@ -29,10 +37,20 @@ def to_python_script(doc: Doc, indent_spaces: int = 4) -> str:
2937
The doc to convert into Python script
3038
indent_spaces : int
3139
The number of indent spaces to use in the output
40+
print_line_numbers: bool
41+
Whether to print line numbers
42+
num_context_lines : Optional[int]
43+
Number of context lines to print around the underlined text
44+
path_to_underline : Optional[ObjectPath]
45+
Object path to be underlined
3246
3347
Returns
3448
-------
3549
script : str
3650
The text representation of Doc in Python syntax
3751
"""
38-
return _ffi_api.DocToPythonScript(doc, indent_spaces) # type: ignore # pylint: disable=no-member
52+
if num_context_lines is None:
53+
num_context_lines = -1
54+
return _ffi_api.DocToPythonScript( # type: ignore
55+
doc, indent_spaces, print_line_numbers, num_context_lines, path_to_underline
56+
)

src/script/printer/base_doc_printer.cc

Lines changed: 258 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,256 @@ namespace tvm {
2323
namespace script {
2424
namespace printer {
2525

26-
DocPrinter::DocPrinter(int indent_spaces) : indent_spaces_(indent_spaces) {}
26+
namespace {
2727

28-
void DocPrinter::Append(const Doc& doc) { PrintDoc(doc); }
28+
void SortAndMergeSpans(std::vector<ByteSpan>* spans) {
29+
if (spans->empty()) {
30+
return;
31+
}
32+
std::sort(spans->begin(), spans->end());
33+
auto last = spans->begin();
34+
for (auto cur = spans->begin() + 1; cur != spans->end(); ++cur) {
35+
if (cur->first > last->second) {
36+
*++last = *cur;
37+
} else if (cur->second > last->second) {
38+
last->second = cur->second;
39+
}
40+
}
41+
spans->erase(++last, spans->end());
42+
}
43+
44+
size_t GetTextWidth(const std::string& text, const ByteSpan& span) {
45+
// FIXME: this only works for ASCII characters.
46+
// To do this "correctly", we need to parse UTF-8 into codepoints
47+
// and call wcwidth() or equivalent for every codepoint.
48+
size_t ret = 0;
49+
for (size_t i = span.first; i != span.second; ++i) {
50+
if (isprint(text[i])) {
51+
ret += 1;
52+
}
53+
}
54+
return ret;
55+
}
56+
57+
size_t MoveBack(size_t pos, size_t distance) { return distance > pos ? 0 : pos - distance; }
58+
59+
size_t MoveForward(size_t pos, size_t distance, size_t max) {
60+
return distance > max - pos ? max : pos + distance;
61+
}
62+
63+
size_t GetLineIndex(size_t byte_pos, const std::vector<size_t>& line_starts) {
64+
auto it = std::upper_bound(line_starts.begin(), line_starts.end(), byte_pos);
65+
return (it - line_starts.begin()) - 1;
66+
}
67+
68+
using UnderlineIter = typename std::vector<ByteSpan>::const_iterator;
69+
70+
ByteSpan PopNextUnderline(UnderlineIter* next_underline, UnderlineIter end_underline) {
71+
if (*next_underline == end_underline) {
72+
return {std::numeric_limits<size_t>::max(), std::numeric_limits<size_t>::max()};
73+
} else {
74+
return *(*next_underline)++;
75+
}
76+
}
77+
78+
void PrintChunk(const std::pair<size_t, size_t>& lines_range,
79+
const std::pair<UnderlineIter, UnderlineIter>& underlines, const std::string& text,
80+
const std::vector<size_t>& line_starts, const DocPrinterOptions& options,
81+
size_t line_number_width, std::string* out) {
82+
UnderlineIter next_underline = underlines.first;
83+
ByteSpan current_underline = PopNextUnderline(&next_underline, underlines.second);
84+
85+
for (size_t line_idx = lines_range.first; line_idx < lines_range.second; ++line_idx) {
86+
if (options.print_line_numbers) {
87+
std::string line_num_str = std::to_string(line_idx + 1);
88+
line_num_str.push_back(' ');
89+
for (size_t i = line_num_str.size(); i < line_number_width; ++i) {
90+
out->push_back(' ');
91+
}
92+
*out += line_num_str;
93+
}
94+
95+
size_t line_start = line_starts.at(line_idx);
96+
size_t line_end =
97+
line_idx + 1 == line_starts.size() ? text.size() : line_starts.at(line_idx + 1);
98+
out->append(text.begin() + line_start, text.begin() + line_end);
99+
100+
bool printed_underline = false;
101+
size_t line_pos = line_start;
102+
bool printed_extra_caret = 0;
103+
while (current_underline.first < line_end) {
104+
if (!printed_underline) {
105+
*out += std::string(line_number_width, ' ');
106+
printed_underline = true;
107+
}
108+
109+
size_t underline_end_for_line = std::min(line_end, current_underline.second);
110+
size_t num_spaces = GetTextWidth(text, {line_pos, current_underline.first});
111+
if (num_spaces > 0 && printed_extra_caret) {
112+
num_spaces -= 1;
113+
printed_extra_caret = false;
114+
}
115+
*out += std::string(num_spaces, ' ');
116+
117+
size_t num_carets = GetTextWidth(text, {current_underline.first, underline_end_for_line});
118+
if (num_carets == 0 && !printed_extra_caret) {
119+
// Special case: when underlineing an empty or unprintable string, make sure to print
120+
// at least one caret still.
121+
num_carets = 1;
122+
printed_extra_caret = true;
123+
} else if (num_carets > 0 && printed_extra_caret) {
124+
num_carets -= 1;
125+
printed_extra_caret = false;
126+
}
127+
*out += std::string(num_carets, '^');
128+
129+
line_pos = current_underline.first = underline_end_for_line;
130+
if (current_underline.first == current_underline.second) {
131+
current_underline = PopNextUnderline(&next_underline, underlines.second);
132+
}
133+
}
134+
135+
if (printed_underline) {
136+
out->push_back('\n');
137+
}
138+
}
139+
}
140+
141+
void PrintCut(size_t num_lines_skipped, std::string* out) {
142+
if (num_lines_skipped != 0) {
143+
std::ostringstream s;
144+
s << "(... " << num_lines_skipped << " lines skipped ...)\n";
145+
*out += s.str();
146+
}
147+
}
148+
149+
std::pair<size_t, size_t> GetLinesForUnderline(const ByteSpan& underline,
150+
const std::vector<size_t>& line_starts,
151+
size_t num_lines, const DocPrinterOptions& options) {
152+
size_t first_line_of_underline = GetLineIndex(underline.first, line_starts);
153+
size_t first_line_of_chunk = MoveBack(first_line_of_underline, options.num_context_lines);
154+
size_t end_line_of_underline = GetLineIndex(underline.second - 1, line_starts) + 1;
155+
size_t end_line_of_chunk =
156+
MoveForward(end_line_of_underline, options.num_context_lines, num_lines);
157+
158+
return {first_line_of_chunk, end_line_of_chunk};
159+
}
160+
161+
// If there is only one line between the chunks, it is better to print it as is,
162+
// rather than something like "(... 1 line skipped ...)".
163+
constexpr const size_t kMinLinesToCutOut = 2;
164+
165+
bool TryMergeChunks(std::pair<size_t, size_t>* cur_chunk,
166+
const std::pair<size_t, size_t>& new_chunk) {
167+
if (new_chunk.first < cur_chunk->second + kMinLinesToCutOut) {
168+
cur_chunk->second = new_chunk.second;
169+
return true;
170+
} else {
171+
return false;
172+
}
173+
}
174+
175+
size_t GetNumLines(const std::string& text, const std::vector<size_t>& line_starts) {
176+
if (line_starts.back() == text.size()) {
177+
// Final empty line doesn't count as a line
178+
return line_starts.size() - 1;
179+
} else {
180+
return line_starts.size();
181+
}
182+
}
183+
184+
size_t GetLineNumberWidth(size_t num_lines, const DocPrinterOptions& options) {
185+
if (options.print_line_numbers) {
186+
return std::to_string(num_lines).size() + 1;
187+
} else {
188+
return 0;
189+
}
190+
}
191+
192+
std::string DecorateText(const std::string& text, const std::vector<size_t>& line_starts,
193+
const DocPrinterOptions& options,
194+
const std::vector<ByteSpan>& underlines) {
195+
size_t num_lines = GetNumLines(text, line_starts);
196+
size_t line_number_width = GetLineNumberWidth(num_lines, options);
197+
198+
std::string ret;
199+
if (underlines.empty()) {
200+
PrintChunk({0, num_lines}, {underlines.begin(), underlines.begin()}, text, line_starts, options,
201+
line_number_width, &ret);
202+
return ret;
203+
}
204+
205+
size_t last_end_line = 0;
206+
std::pair<size_t, size_t> cur_chunk =
207+
GetLinesForUnderline(underlines[0], line_starts, num_lines, options);
208+
if (cur_chunk.first < kMinLinesToCutOut) {
209+
cur_chunk.first = 0;
210+
}
211+
212+
auto first_underline_in_cur_chunk = underlines.begin();
213+
for (auto underline_it = underlines.begin() + 1; underline_it != underlines.end();
214+
++underline_it) {
215+
std::pair<size_t, size_t> new_chunk =
216+
GetLinesForUnderline(*underline_it, line_starts, num_lines, options);
217+
218+
if (!TryMergeChunks(&cur_chunk, new_chunk)) {
219+
PrintCut(cur_chunk.first - last_end_line, &ret);
220+
PrintChunk(cur_chunk, {first_underline_in_cur_chunk, underline_it}, text, line_starts,
221+
options, line_number_width, &ret);
222+
last_end_line = cur_chunk.second;
223+
cur_chunk = new_chunk;
224+
first_underline_in_cur_chunk = underline_it;
225+
}
226+
}
227+
228+
PrintCut(cur_chunk.first - last_end_line, &ret);
229+
if (num_lines - cur_chunk.second < kMinLinesToCutOut) {
230+
cur_chunk.second = num_lines;
231+
}
232+
PrintChunk(cur_chunk, {first_underline_in_cur_chunk, underlines.end()}, text, line_starts,
233+
options, line_number_width, &ret);
234+
PrintCut(num_lines - cur_chunk.second, &ret);
235+
return ret;
236+
}
237+
238+
} // anonymous namespace
239+
240+
DocPrinter::DocPrinter(const DocPrinterOptions& options) : options_(options) {
241+
line_starts_.push_back(0);
242+
}
243+
244+
void DocPrinter::Append(const Doc& doc) { Append(doc, NullOpt); }
245+
246+
void DocPrinter::Append(const Doc& doc, Optional<ObjectPath> path_to_underline) {
247+
path_to_underline_ = path_to_underline;
248+
current_max_path_length_ = 0;
249+
current_underline_candidates_.clear();
250+
PrintDoc(doc);
251+
252+
underlines_.insert(underlines_.end(), current_underline_candidates_.begin(),
253+
current_underline_candidates_.end());
254+
}
29255

30256
String DocPrinter::GetString() const {
31257
std::string text = output_.str();
258+
259+
// Remove any trailing indentation
260+
while (!text.empty() && text.back() == ' ') {
261+
text.pop_back();
262+
}
263+
32264
if (!text.empty() && text.back() != '\n') {
33265
text.push_back('\n');
34266
}
35-
return text;
267+
268+
std::vector<ByteSpan> underlines = underlines_;
269+
SortAndMergeSpans(&underlines);
270+
return DecorateText(text, line_starts_, options_, underlines);
36271
}
37272

38273
void DocPrinter::PrintDoc(const Doc& doc) {
274+
size_t start_pos = output_.tellp();
275+
39276
if (const auto* doc_node = doc.as<LiteralDocNode>()) {
40277
PrintTypedDoc(GetRef<LiteralDoc>(doc_node));
41278
} else if (const auto* doc_node = doc.as<IdDocNode>()) {
@@ -84,6 +321,24 @@ void DocPrinter::PrintDoc(const Doc& doc) {
84321
LOG(FATAL) << "Do not know how to print " << doc->GetTypeKey();
85322
throw;
86323
}
324+
325+
size_t end_pos = output_.tellp();
326+
for (const ObjectPath& path : doc->source_paths) {
327+
MarkSpan({start_pos, end_pos}, path);
328+
}
329+
}
330+
331+
void DocPrinter::MarkSpan(const ByteSpan& span, const ObjectPath& path) {
332+
if (path_to_underline_.defined()) {
333+
if (path->Length() >= current_max_path_length_ &&
334+
path->IsPrefixOf(path_to_underline_.value())) {
335+
if (path->Length() > current_max_path_length_) {
336+
current_max_path_length_ = path->Length();
337+
current_underline_candidates_.clear();
338+
}
339+
current_underline_candidates_.push_back(span);
340+
}
341+
}
87342
}
88343

89344
} // namespace printer

0 commit comments

Comments
 (0)