-
Notifications
You must be signed in to change notification settings - Fork 3.7k
[TVMScript] Text underlining in DocPrinter based on Doc's source_paths #12344
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,19 +23,256 @@ namespace tvm { | |
| namespace script { | ||
| namespace printer { | ||
|
|
||
| DocPrinter::DocPrinter(int indent_spaces) : indent_spaces_(indent_spaces) {} | ||
| namespace { | ||
|
|
||
| void DocPrinter::Append(const Doc& doc) { PrintDoc(doc); } | ||
| void SortAndMergeSpans(std::vector<ByteSpan>* spans) { | ||
| if (spans->empty()) { | ||
| return; | ||
| } | ||
| std::sort(spans->begin(), spans->end()); | ||
| auto last = spans->begin(); | ||
| for (auto cur = spans->begin() + 1; cur != spans->end(); ++cur) { | ||
| if (cur->first > last->second) { | ||
| *++last = *cur; | ||
| } else if (cur->second > last->second) { | ||
| last->second = cur->second; | ||
| } | ||
| } | ||
| spans->erase(++last, spans->end()); | ||
| } | ||
|
|
||
| size_t GetTextWidth(const std::string& text, const ByteSpan& span) { | ||
| // FIXME: this only works for ASCII characters. | ||
| // To do this "correctly", we need to parse UTF-8 into codepoints | ||
| // and call wcwidth() or equivalent for every codepoint. | ||
| size_t ret = 0; | ||
| for (size_t i = span.first; i != span.second; ++i) { | ||
| if (isprint(text[i])) { | ||
| ret += 1; | ||
| } | ||
| } | ||
| return ret; | ||
| } | ||
|
|
||
| size_t MoveBack(size_t pos, size_t distance) { return distance > pos ? 0 : pos - distance; } | ||
|
|
||
| size_t MoveForward(size_t pos, size_t distance, size_t max) { | ||
| return distance > max - pos ? max : pos + distance; | ||
| } | ||
|
|
||
| size_t GetLineIndex(size_t byte_pos, const std::vector<size_t>& line_starts) { | ||
| auto it = std::upper_bound(line_starts.begin(), line_starts.end(), byte_pos); | ||
| return (it - line_starts.begin()) - 1; | ||
| } | ||
|
|
||
| using UnderlineIter = typename std::vector<ByteSpan>::const_iterator; | ||
|
|
||
| ByteSpan PopNextUnderline(UnderlineIter* next_underline, UnderlineIter end_underline) { | ||
| if (*next_underline == end_underline) { | ||
| return {std::numeric_limits<size_t>::max(), std::numeric_limits<size_t>::max()}; | ||
| } else { | ||
| return *(*next_underline)++; | ||
| } | ||
| } | ||
|
|
||
| void PrintChunk(const std::pair<size_t, size_t>& lines, | ||
| const std::pair<UnderlineIter, UnderlineIter>& underlines, const std::string& text, | ||
| const std::vector<size_t>& line_starts, const DocPrinterOptions& options, | ||
| size_t line_number_width, std::string* out) { | ||
| UnderlineIter next_underline = underlines.first; | ||
| ByteSpan current_underline = PopNextUnderline(&next_underline, underlines.second); | ||
|
|
||
| for (size_t line_idx = lines.first; line_idx < lines.second; ++line_idx) { | ||
| if (options.print_line_numbers) { | ||
| std::string line_num_str = std::to_string(line_idx + 1); | ||
| line_num_str.push_back(' '); | ||
| for (size_t i = line_num_str.size(); i < line_number_width; ++i) { | ||
| out->push_back(' '); | ||
| } | ||
| *out += line_num_str; | ||
| } | ||
|
|
||
| size_t line_start = line_starts.at(line_idx); | ||
| size_t line_end = | ||
| line_idx + 1 == line_starts.size() ? text.size() : line_starts.at(line_idx + 1); | ||
| out->append(text.begin() + line_start, text.begin() + line_end); | ||
|
|
||
| bool printed_underline = false; | ||
| size_t line_pos = line_start; | ||
| bool printed_extra_caret = 0; | ||
| while (current_underline.first < line_end) { | ||
| if (!printed_underline) { | ||
| *out += std::string(line_number_width, ' '); | ||
| printed_underline = true; | ||
| } | ||
|
|
||
| size_t underline_end_for_line = std::min(line_end, current_underline.second); | ||
| size_t num_spaces = GetTextWidth(text, {line_pos, current_underline.first}); | ||
| if (num_spaces > 0 && printed_extra_caret) { | ||
| num_spaces -= 1; | ||
| printed_extra_caret = false; | ||
| } | ||
| *out += std::string(num_spaces, ' '); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should it skip printing carets under the indentation spaces for multi-line underline?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure, I could see arguments either way. I'd say that the first option can provide a better sense of "continuity". make it clear that this is one chunk of text being highlighted, as opposed to two different chunks. Also it seems to simplify the implementation :)
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This makes sense. What I was thinking about is the deeply nested code, like But it does add more complexity to the implementation. We can keep it as is for now and make improvement in the future if needed.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point about the nested code. But let's give the simpler implementation a shot and then add more complexity if people hate this behavior. |
||
|
|
||
| size_t num_carets = GetTextWidth(text, {current_underline.first, underline_end_for_line}); | ||
| if (num_carets == 0 && !printed_extra_caret) { | ||
| // Special case: when underlineing an empty or unprintable string, make sure to print | ||
| // at least one caret still. | ||
| num_carets = 1; | ||
| printed_extra_caret = true; | ||
| } else if (num_carets > 0 && printed_extra_caret) { | ||
| num_carets -= 1; | ||
| printed_extra_caret = false; | ||
| } | ||
| *out += std::string(num_carets, '^'); | ||
|
|
||
| line_pos = current_underline.first = underline_end_for_line; | ||
| if (current_underline.first == current_underline.second) { | ||
| current_underline = PopNextUnderline(&next_underline, underlines.second); | ||
| } | ||
| } | ||
|
|
||
| if (printed_underline) { | ||
| out->push_back('\n'); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| void PrintCut(size_t num_lines_skipped, std::string* out) { | ||
| if (num_lines_skipped != 0) { | ||
| std::ostringstream s; | ||
| s << "(... " << num_lines_skipped << " lines skipped ...)\n"; | ||
| *out += s.str(); | ||
| } | ||
| } | ||
|
|
||
| std::pair<size_t, size_t> GetLinesForUnderline(const ByteSpan& underline, | ||
| const std::vector<size_t>& line_starts, | ||
| size_t num_lines, const DocPrinterOptions& options) { | ||
| size_t first_line_of_underline = GetLineIndex(underline.first, line_starts); | ||
| size_t first_line_of_chunk = MoveBack(first_line_of_underline, options.num_context_lines); | ||
| size_t end_line_of_underline = GetLineIndex(underline.second - 1, line_starts) + 1; | ||
| size_t end_line_of_chunk = | ||
| MoveForward(end_line_of_underline, options.num_context_lines, num_lines); | ||
|
|
||
| return {first_line_of_chunk, end_line_of_chunk}; | ||
| } | ||
|
|
||
| // If there is only one line between the chunks, it is better to print it as is, | ||
| // rather than something like "(... 1 line skipped ...)". | ||
| constexpr const size_t kMinLinesToCutOut = 2; | ||
|
|
||
| bool TryMergeChunks(std::pair<size_t, size_t>* cur_chunk, | ||
| const std::pair<size_t, size_t>& new_chunk) { | ||
| if (new_chunk.first < cur_chunk->second + kMinLinesToCutOut) { | ||
| cur_chunk->second = new_chunk.second; | ||
| return true; | ||
| } else { | ||
| return false; | ||
| } | ||
| } | ||
|
|
||
| size_t GetNumLines(const std::string& text, const std::vector<size_t>& line_starts) { | ||
| if (line_starts.back() == text.size()) { | ||
| // Final empty line doesn't count as a line | ||
| return line_starts.size() - 1; | ||
| } else { | ||
| return line_starts.size(); | ||
| } | ||
| } | ||
|
|
||
| size_t GetLineNumberWidth(size_t num_lines, const DocPrinterOptions& options) { | ||
| if (options.print_line_numbers) { | ||
| return std::to_string(num_lines).size() + 1; | ||
| } else { | ||
| return 0; | ||
| } | ||
| } | ||
|
|
||
| std::string DecorateText(const std::string& text, const std::vector<size_t>& line_starts, | ||
| const DocPrinterOptions& options, | ||
| const std::vector<ByteSpan>& underlines) { | ||
| size_t num_lines = GetNumLines(text, line_starts); | ||
| size_t line_number_width = GetLineNumberWidth(num_lines, options); | ||
|
|
||
| std::string ret; | ||
| if (underlines.empty()) { | ||
| PrintChunk({0, num_lines}, {underlines.begin(), underlines.begin()}, text, line_starts, options, | ||
| line_number_width, &ret); | ||
| return ret; | ||
| } | ||
|
|
||
| size_t last_end_line = 0; | ||
| std::pair<size_t, size_t> cur_chunk = | ||
| GetLinesForUnderline(underlines[0], line_starts, num_lines, options); | ||
| if (cur_chunk.first < kMinLinesToCutOut) { | ||
| cur_chunk.first = 0; | ||
| } | ||
|
|
||
| auto first_underline_in_cur_chunk = underlines.begin(); | ||
| for (auto underline_it = underlines.begin() + 1; underline_it != underlines.end(); | ||
| ++underline_it) { | ||
| std::pair<size_t, size_t> new_chunk = | ||
| GetLinesForUnderline(*underline_it, line_starts, num_lines, options); | ||
|
|
||
| if (!TryMergeChunks(&cur_chunk, new_chunk)) { | ||
| PrintCut(cur_chunk.first - last_end_line, &ret); | ||
| PrintChunk(cur_chunk, {first_underline_in_cur_chunk, underline_it}, text, line_starts, | ||
| options, line_number_width, &ret); | ||
| last_end_line = cur_chunk.second; | ||
| cur_chunk = new_chunk; | ||
| first_underline_in_cur_chunk = underline_it; | ||
| } | ||
| } | ||
|
|
||
| PrintCut(cur_chunk.first - last_end_line, &ret); | ||
| if (num_lines - cur_chunk.second < kMinLinesToCutOut) { | ||
| cur_chunk.second = num_lines; | ||
| } | ||
| PrintChunk(cur_chunk, {first_underline_in_cur_chunk, underlines.end()}, text, line_starts, | ||
| options, line_number_width, &ret); | ||
| PrintCut(num_lines - cur_chunk.second, &ret); | ||
| return ret; | ||
| } | ||
|
|
||
| } // anonymous namespace | ||
|
|
||
| DocPrinter::DocPrinter(const DocPrinterOptions& options) : options_(options) { | ||
| line_starts_.push_back(0); | ||
| } | ||
|
|
||
| void DocPrinter::Append(const Doc& doc) { Append(doc, NullOpt); } | ||
|
|
||
| void DocPrinter::Append(const Doc& doc, Optional<ObjectPath> path_to_underline) { | ||
| path_to_underline_ = path_to_underline; | ||
| current_max_path_length_ = 0; | ||
| current_underline_candidates_.clear(); | ||
| PrintDoc(doc); | ||
|
|
||
| underlines_.insert(underlines_.end(), current_underline_candidates_.begin(), | ||
| current_underline_candidates_.end()); | ||
| } | ||
|
|
||
| String DocPrinter::GetString() const { | ||
| std::string text = output_.str(); | ||
|
|
||
| // Remove any trailing indentation | ||
| while (!text.empty() && text.back() == ' ') { | ||
| text.pop_back(); | ||
| } | ||
|
|
||
| if (!text.empty() && text.back() != '\n') { | ||
| text.push_back('\n'); | ||
| } | ||
| return text; | ||
|
|
||
| std::vector<ByteSpan> underlines = underlines_; | ||
| SortAndMergeSpans(&underlines); | ||
| return DecorateText(text, line_starts_, options_, underlines); | ||
| } | ||
|
|
||
| void DocPrinter::PrintDoc(const Doc& doc) { | ||
| size_t start_pos = output_.tellp(); | ||
|
|
||
| if (const auto* doc_node = doc.as<LiteralDocNode>()) { | ||
| PrintTypedDoc(GetRef<LiteralDoc>(doc_node)); | ||
| } else if (const auto* doc_node = doc.as<IdDocNode>()) { | ||
|
|
@@ -84,6 +321,24 @@ void DocPrinter::PrintDoc(const Doc& doc) { | |
| LOG(FATAL) << "Do not know how to print " << doc->GetTypeKey(); | ||
| throw; | ||
| } | ||
|
|
||
| size_t end_pos = output_.tellp(); | ||
| for (const ObjectPath& path : doc->source_paths) { | ||
| MarkSpan({start_pos, end_pos}, path); | ||
| } | ||
| } | ||
|
|
||
| void DocPrinter::MarkSpan(const ByteSpan& span, const ObjectPath& path) { | ||
| if (path_to_underline_.defined()) { | ||
| if (path->Length() >= current_max_path_length_ && | ||
| path->IsPrefixOf(path_to_underline_.value())) { | ||
| if (path->Length() > current_max_path_length_) { | ||
| current_max_path_length_ = path->Length(); | ||
| current_underline_candidates_.clear(); | ||
| } | ||
| current_underline_candidates_.push_back(span); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| } // namespace printer | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.