Skip to content

Commit

Permalink
fix: sanitize references
Browse files Browse the repository at this point in the history
  • Loading branch information
alandefreitas committed Feb 29, 2024
1 parent 541cf5b commit 1de3dff
Show file tree
Hide file tree
Showing 3 changed files with 195 additions and 25 deletions.
158 changes: 148 additions & 10 deletions src/lib/AST/ParseJavadoc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//
// Copyright (c) 2023 Vinnie Falco ([email protected])
// Copyright (c) 2023 Krystian Stasiowski ([email protected])
// Copyright (c) 2024 Alan de Freitas ([email protected])
//
// Official repository: https://github.com/cppalliance/mrdocs
//
Expand Down Expand Up @@ -497,7 +498,7 @@ parseHTMLTag(HTMLStartTagComment const* C)
}) : it_;
if (tagEndIt == end_)
{
return Unexpected(Error(fmt::format("warning: HTML <{}> tag not followed by end tag", res.tag)));
return Unexpected(formatError("warning: HTML <{}> tag not followed by end tag", res.tag));
}

// Check if end tag matches start tag
Expand Down Expand Up @@ -660,6 +661,94 @@ convertDirection(ParamCommandComment::PassDirection kind)
}
}

/** Parse first chars of string that represent an identifier
*/
std::string_view
parseIdentifier(std::string_view str)
{
static constexpr auto idChars =
"abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"0123456789"
"_";
static constexpr auto operatorChars =
"~!%^&*()-+=|[]{};:,.<>?/";
if (str.empty())
{
return {};
}

std::size_t p = str.find_first_not_of(idChars);
if (p == std::string_view::npos)
{
return str;
}

if (str.substr(0, p) == "operator")
{
p = str.find_first_not_of(operatorChars, p);
if (p == std::string_view::npos)
{
return str;
}
}

return str.substr(0, p);
}

/** Parse first chars of string that represent an identifier
*/
std::string_view
parseQualifiedIdentifier(std::string_view str)
{
auto str0 = str;
std::size_t off = 0;
if (str.starts_with("::"))
{
off += 2;
str.remove_prefix(2);
}

bool atIdentifier = true;
while (!str.empty())
{
if (atIdentifier)
{
auto idStr = parseIdentifier(str);
if (!idStr.empty())
{
off += idStr.size();
str = str.substr(idStr.size());
atIdentifier = false;
}
else
{
break;
}
}
else
{
// At delimiter
if (str.starts_with("::"))
{
off += 2;
str = str.substr(2);
atIdentifier = true;
}
else
{
break;
}
}
}
std::string_view result = str0.substr(0, off);
if (result.ends_with("::"))
{
result = result.substr(0, result.size() - 2);
}
return result;
}

void
JavadocVisitor::
visitInlineCommandComment(
Expand All @@ -672,9 +761,6 @@ visitInlineCommandComment(
// VFALCO I'd like to know when this happens
MRDOCS_ASSERT(cmd != nullptr);

// KRYSTIAN FIXME: the text for a copydoc/ref command
// should not include illegal characters
// (e.g. periods that occur after the symbol name)
switch(unsigned ID = cmd->getID())
{
// Emphasis
Expand All @@ -699,23 +785,75 @@ visitInlineCommandComment(
{
if(! goodArgCount(1, *C))
return;

// the referenced symbol will be resolved during
// the finalization step once all symbols are extracted
std::string const &s = C->getArgText(0).str();
bool const copyingFunctionDoc = s.find('(') != std::string::npos;
std::string ref = s;
if (copyingFunctionDoc)
{
// Clang parses the copydoc command breaking
// before the complete overload information. For instance,
// `@copydoc operator()(unsigned char) const` will create
// a node with the text `operator()(unsigned` and another
// with `char) const`. We need to merge these nodes.
std::size_t open = std::ranges::count(s, '(');
std::size_t close = std::ranges::count(s, ')');
while (open != close)
{
++it_;
if (it_ == end_)
{
break;
}
auto const* c = *it_;
if (c->getCommentKind() == Comment::TextCommentKind)
{
ref += static_cast<TextComment const*>(c)->getText();
}
else
{
break;
}
open = std::ranges::count(ref, '(');
close = std::ranges::count(ref, ')');
}
}
emplaceText<doc::Copied>(
C->hasTrailingNewline(),
C->getArgText(0).str(),
ref,
convertCopydoc(ID));
return;
}
case CommandTraits::KCI_ref:
{
if(! goodArgCount(1, *C))
return;
// the referenced symbol will be resolved during
// the finalization step once all symbols are extracted
emplaceText<doc::Reference>(
C->hasTrailingNewline(),
C->getArgText(0).str());
// The parsed reference often includes characters
// that are not valid in identifiers, so we need to
// clean it up.
// Find the first character that is not a valid C++
// identifier character, and truncate the string there.
// This potentially creates two text nodes.
auto const s = C->getArgText(0).str();
std::string_view ref = parseQualifiedIdentifier(s);
bool const hasExtraText = ref.size() != s.size();
if (!ref.empty())
{
// the referenced symbol will be resolved during
// the finalization step once all symbols are extracted
emplaceText<doc::Reference>(
C->hasTrailingNewline() && !hasExtraText,
std::string(ref));
}
// Emplace the rest of the string as doc::Text
if(hasExtraText)
{
emplaceText<doc::Text>(
C->hasTrailingNewline(),
s.substr(ref.size()));
}
return;
}

Expand Down
60 changes: 45 additions & 15 deletions src/lib/Gen/xml/XMLWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -622,49 +622,55 @@ writeNode(
switch(node.kind)
{
case doc::Kind::text:
writeText(static_cast<doc::Text const&>(node));
writeText(dynamic_cast<doc::Text const&>(node));
break;
case doc::Kind::styled:
writeStyledText(static_cast<doc::Styled const&>(node));
writeStyledText(dynamic_cast<doc::Styled const&>(node));
break;
case doc::Kind::heading:
writeHeading(static_cast<doc::Heading const&>(node));
writeHeading(dynamic_cast<doc::Heading const&>(node));
break;
case doc::Kind::paragraph:
writeParagraph(static_cast<doc::Paragraph const&>(node));
writeParagraph(dynamic_cast<doc::Paragraph const&>(node));
break;
case doc::Kind::link:
writeLink(static_cast<doc::Link const&>(node));
writeLink(dynamic_cast<doc::Link const&>(node));
break;
case doc::Kind::list_item:
writeListItem(static_cast<doc::ListItem const&>(node));
writeListItem(dynamic_cast<doc::ListItem const&>(node));
break;
case doc::Kind::brief:
writeBrief(static_cast<doc::Brief const&>(node));
writeBrief(dynamic_cast<doc::Brief const&>(node));
break;
case doc::Kind::admonition:
writeAdmonition(static_cast<doc::Admonition const&>(node));
writeAdmonition(dynamic_cast<doc::Admonition const&>(node));
break;
case doc::Kind::code:
writeCode(static_cast<doc::Code const&>(node));
writeCode(dynamic_cast<doc::Code const&>(node));
break;
case doc::Kind::param:
writeJParam(static_cast<doc::Param const&>(node));
writeJParam(dynamic_cast<doc::Param const&>(node));
break;
case doc::Kind::tparam:
writeTParam(static_cast<doc::TParam const&>(node));
writeTParam(dynamic_cast<doc::TParam const&>(node));
break;
case doc::Kind::returns:
writeReturns(static_cast<doc::Returns const&>(node));
writeReturns(dynamic_cast<doc::Returns const&>(node));
break;
case doc::Kind::reference:
writeReference(static_cast<doc::Reference const&>(node));
writeReference(dynamic_cast<doc::Reference const&>(node));
break;
case doc::Kind::copied:
writeCopied(static_cast<doc::Copied const&>(node));
writeCopied(dynamic_cast<doc::Copied const&>(node));
break;
case doc::Kind::throws:
writeThrows(static_cast<doc::Throws const&>(node));
writeThrows(dynamic_cast<doc::Throws const&>(node));
break;
case doc::Kind::details:
writeDetails(dynamic_cast<doc::Details const&>(node));
break;
case doc::Kind::see:
writeSee(dynamic_cast<doc::See const&>(node));
break;
default:
// unknown kind
Expand Down Expand Up @@ -776,6 +782,30 @@ writeParagraph(
tags_.close("para");
}

void
XMLWriter::
writeDetails(
doc::Details const& para,
llvm::StringRef tag)
{
tags_.open("details", {
{ "class", tag, ! tag.empty() }});
writeNodes(para.children);
tags_.close("details");
}

void
XMLWriter::
writeSee(
doc::See const& para,
llvm::StringRef tag)
{
tags_.open("see", {
{ "class", tag, ! tag.empty() }});
writeNodes(para.children);
tags_.close("see");
}

void
XMLWriter::
writeAdmonition(
Expand Down
2 changes: 2 additions & 0 deletions src/lib/Gen/xml/XMLWriter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ class XMLWriter
void writeReference(doc::Reference const& node);
void writeCopied(doc::Copied const& node);
void writeThrows(doc::Throws const& node);
void writeDetails(doc::Details const& node, llvm::StringRef tag = "");
void writeSee(doc::See const& node, llvm::StringRef tag = "");
};

} // xml
Expand Down

0 comments on commit 1de3dff

Please sign in to comment.