-
-
Notifications
You must be signed in to change notification settings - Fork 1.9k
PoC for RFC145: dynamic documentation for lambdas #8778
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,234 @@ | ||
| #include <iostream> | ||
| #include <fstream> | ||
| #include <sstream> | ||
| #include <regex> | ||
| #include <climits> | ||
| #include <algorithm> | ||
|
|
||
| #include "comment.hh" | ||
| #include "util.hh" | ||
|
|
||
| /* This module looks for documentation comments in the source code. | ||
|
|
||
| Documentation is not retained during parsing, and it should not be, | ||
| for performance reasons. Because of this the code has to jump | ||
| through some hoops, to perform its task. | ||
|
|
||
| Adapting the parser was not considered an option, so this code | ||
| parses the comments from scratch, using regular expressions. These | ||
| do not support all syntactic constructs, so in rare cases, they | ||
| will fail and the code will report no documentation. | ||
|
|
||
| One such situation is where documentation is requested for a | ||
| partially applied function, where the outer lambda pattern | ||
| matches an attribute set. This is not supported in the regexes | ||
| because it potentially requires (almost?) the entire grammar. | ||
|
|
||
| This module has been designed not to report the wrong | ||
| documentation; considering that the wrong documentation is worse | ||
| than no documentation. The regular expressions will only match | ||
| simple, well understood syntactic structures, or not match at all. | ||
|
|
||
| This approach to finding documentation does not cause extra runtime | ||
| overhead, until used. | ||
|
|
||
| This module does not support tab ('\t') characters. In some places | ||
| they are treated as single spaces. They should be avoided. | ||
| */ | ||
| namespace nix::Comment { | ||
|
|
||
| struct Doc emptyDoc("", "", "", 0); | ||
|
|
||
| /* parseDoc will try to recover a Doc by looking at the text that leads up to a term | ||
| definition.*/ | ||
| static struct Doc parseDoc(std::string sourcePrefix); | ||
|
|
||
| /* stripComment unpacks a comment, by unindenting and stripping " * " prefixes as | ||
| applicable. The argument should include any preceding whitespace. */ | ||
| static std::string stripComment(std::string rawComment); | ||
|
|
||
| /* Consistent unindenting. It will only remove entire columns. */ | ||
| static std::string unindent(std::string s); | ||
|
|
||
| static std::string trimUnindent(std::string s) { | ||
| return trim(unindent(s)); | ||
| } | ||
|
|
||
| static std::string stripPrefix(std::string prefix, std::string s) { | ||
| std::string::size_type index = s.find(prefix); | ||
| return (index == 0) ? s.erase(0, prefix.length()) : s; | ||
| } | ||
|
|
||
| static std::string readFileUpToPos(const Pos & pos) { | ||
| if(auto path = std::get_if<SourcePath>(&pos.origin)) { | ||
| std::ifstream ifs(path->path.abs()); | ||
| std::stringstream ret; | ||
| size_t lineNum = 1; | ||
| std::string line; | ||
|
|
||
| while (getline(ifs, line) && lineNum <= pos.line) { | ||
| if (lineNum < pos.line) { | ||
| ret << line << "\n"; | ||
| } else if (lineNum == pos.line) { | ||
| ret << line.substr(0, pos.column-1); | ||
| } | ||
| lineNum++; | ||
| } | ||
|
|
||
| return ret.str(); | ||
| } else { | ||
| throw std::invalid_argument("pos.origin is not a path"); | ||
| } | ||
| } | ||
|
|
||
| struct Doc lookupDoc(const Pos & pos) { | ||
| try { | ||
| return parseDoc(readFileUpToPos(pos)); | ||
| } catch (std::exception & e) { | ||
| ignoreException(); | ||
| return emptyDoc; | ||
| } | ||
| } | ||
|
|
||
| /* See lambdas in parseDoc */ | ||
| static int countLambdas(std::string piece) { | ||
| return std::count(piece.begin(), piece.end(), ':'); | ||
| } | ||
|
|
||
| /* Try to recover a Doc by looking at the text that leads up to a term | ||
| definition */ | ||
| static struct Doc parseDoc(std::string sourcePrefix) { | ||
|
|
||
| std::string wss("[ \t\r\n]*"); | ||
| std::string spaces("[ \t]*"); | ||
|
|
||
| std::string singleLineComment(spaces + "#[^\r\n]*(?:\n|\r\n)"); | ||
| std::string multiSingleLineComment("(?:" + singleLineComment + ")*"); | ||
| std::string multiLineComment("\\/\\*(?:[^*]|\\*+[^*/])*\\*+\\/"); | ||
| std::string commentUnit("(" + multiSingleLineComment + "|" + spaces + multiLineComment + ")" + wss); | ||
|
|
||
| std::string ident("[a-zA-Z_][a-zA-Z0-9_'-]*" + wss); | ||
| std::string identKeep("([a-zA-Z_][a-zA-Z0-9_'-]*)" + wss); | ||
|
|
||
| /* lvalue for nested attrset construction, but not matching | ||
| quoted identifiers or ${...} or comments inbetween etc */ | ||
| std::string simplePath("(?:" + wss + ident + "\\.)*" + identKeep); | ||
|
|
||
| std::string lambda(ident + wss + ":" + wss); | ||
|
|
||
| /* see countLambdas */ | ||
| std::string lambdas("((:?" + lambda + ")*)"); | ||
|
|
||
| std::string assign("=" + wss); | ||
|
|
||
| std::string re(commentUnit + simplePath + assign + lambdas + "$"); | ||
| std::regex e(re); | ||
|
|
||
| #define REGEX_GROUP_COMMENT 1 | ||
| #define REGEX_GROUP_NAME 2 | ||
| #define REGEX_GROUP_LAMBDAS 3 | ||
| #define REGEX_GROUP_MAX 4 | ||
|
|
||
| std::smatch matches; | ||
| regex_search(sourcePrefix, matches, e); | ||
|
|
||
| std::stringstream buffer; | ||
| if (matches.length() < REGEX_GROUP_MAX) { | ||
| return emptyDoc; | ||
| } | ||
|
|
||
| std::string rawComment = matches[REGEX_GROUP_COMMENT]; | ||
| std::string name = matches[REGEX_GROUP_NAME]; | ||
| int timesApplied = countLambdas(matches[REGEX_GROUP_LAMBDAS]); | ||
| return Doc(rawComment, stripComment(rawComment), name, timesApplied); | ||
| } | ||
|
|
||
| static std::string stripComment(std::string rawComment) { | ||
| rawComment.erase(rawComment.find_last_not_of("\n")+1); | ||
|
|
||
| std::string s(trimUnindent(rawComment)); | ||
|
|
||
| if (s[0] == '/' && s[1] == '*') { | ||
| // Remove the "/*" | ||
| // Indentation will be removed consistently later on | ||
| s[0] = ' '; | ||
| s[1] = ' '; | ||
|
|
||
| // Remove the "*/" | ||
| if (!s.empty() && *(--s.end()) == '/') | ||
| s.pop_back(); | ||
| if (!s.empty() && *(--s.end()) == '*') | ||
| s.pop_back(); | ||
|
|
||
| s = trimUnindent(s); | ||
|
|
||
| std::istringstream inStream(s); | ||
| std::ostringstream stripped; | ||
|
|
||
| std::string line; | ||
|
|
||
| /* at first, we assume a comment | ||
| * that is formatted like this | ||
| * with '*' characters at the beginning | ||
| * of the line. | ||
| */ | ||
| bool hasStars = true; | ||
|
|
||
| while(std::getline(inStream,line,'\n')){ | ||
| if (hasStars && ( | ||
| (!line.empty() && line[0] == '*') | ||
| || (line.length() >= 2 && line[0] == ' ' && line[1] == '*') | ||
| )) { | ||
| if (line[0] == ' ') { | ||
| line = stripPrefix(" *", line); | ||
| } else { | ||
| line = stripPrefix("*", line); | ||
| } | ||
| } else { | ||
| hasStars = false; | ||
| } | ||
|
|
||
| stripped << line << std::endl; | ||
| } | ||
| return trimUnindent(stripped.str()); | ||
| } | ||
| else { | ||
| std::istringstream inStream(s); | ||
| std::ostringstream stripped; | ||
|
|
||
| std::string line; | ||
| while(std::getline(inStream, line, '\n')) { | ||
| line.erase(0, line.find("#") + 1); | ||
| stripped << line << std::endl; | ||
| } | ||
| return trimUnindent(stripped.str()); | ||
| } | ||
| } | ||
|
|
||
| static std::string unindent(std::string s) { | ||
| size_t maxIndent = 1000; | ||
| { | ||
| std::istringstream inStream(s); | ||
| for (std::string line; std::getline(inStream, line); ) { | ||
| size_t firstNonWS = line.find_first_not_of(" \t\r\n"); | ||
| if (firstNonWS != std::string::npos) { | ||
| maxIndent = std::min(firstNonWS, maxIndent); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| std::ostringstream unindentedStream; | ||
| { | ||
| std::istringstream inStream(s); | ||
| for (std::string line; std::getline(inStream, line); ) { | ||
| if (line.length() >= maxIndent) { | ||
| unindentedStream << line.substr(maxIndent) << std::endl; | ||
| } else { | ||
| unindentedStream << std::endl; | ||
| } | ||
| } | ||
| } | ||
| return unindentedStream.str(); | ||
| } | ||
|
|
||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| #pragma once | ||
|
|
||
| #include "nixexpr.hh" | ||
|
|
||
| namespace nix::Comment { | ||
|
|
||
| struct Doc { | ||
|
|
||
| // Name that the term is assigned to | ||
| std::string name; | ||
|
|
||
| std::string rawComment; | ||
| std::string comment; | ||
|
|
||
| // Number of times the curried function must be applied to get the value | ||
| // that this structure documents. | ||
| // | ||
| // This is useful when showing the documentation for a partially applied | ||
| // curried function. The documentation is for the unapplied function, so | ||
| // this is crucial information. | ||
| int timesApplied; | ||
|
|
||
| Doc(std::string rawComment, std::string comment, std::string name, int timesApplied) { | ||
| this->name = name; | ||
| this->rawComment = rawComment; | ||
| this->comment = comment; | ||
| this->timesApplied = timesApplied; | ||
| } | ||
|
|
||
| }; | ||
|
|
||
| extern struct Doc emptyDoc; | ||
|
|
||
| // lookupDoc will try to recover a Doc. This will perform perform I/O, | ||
| // because documentation is not retained by the parser. | ||
| // | ||
| // Will return empty values if nothing can be found. | ||
| // For its limitations, see the docs of the implementation. | ||
| struct Doc lookupDoc(const Pos & pos); | ||
|
|
||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,5 @@ | ||
| #include "archive.hh" | ||
| #include "comment.hh" | ||
| #include "derivations.hh" | ||
| #include "downstream-placeholder.hh" | ||
| #include "eval-inline.hh" | ||
|
|
@@ -2439,6 +2440,27 @@ static RegisterPrimOp primop_unsafeGetAttrPos(PrimOp { | |
| .fun = prim_unsafeGetAttrPos, | ||
| }); | ||
|
|
||
| void prim_unsafeGetLambdaDoc(EvalState & state, const PosIdx pos, Value * * args, Value & v) | ||
| { | ||
| state.forceFunction(*args[0], pos, "while evaluating the first argument to builtins.unsafeGetLambdaDoc"); | ||
|
|
||
| Pos funPos = state.positions[args[0]->lambda.fun->pos]; | ||
|
|
||
| Comment::Doc doc = Comment::lookupDoc(funPos); | ||
|
|
||
| if(doc.comment.empty()) { | ||
| v.mkNull(); | ||
| } else { | ||
| v.mkString(doc.comment); | ||
| } | ||
| } | ||
|
|
||
| static RegisterPrimOp primop_unsafeGetLambdaDoc(PrimOp { | ||
| .name = "__unsafeGetLambdaDoc", | ||
| .arity = 1, | ||
| .fun = prim_unsafeGetLambdaDoc, | ||
| }); | ||
|
|
||
|
Comment on lines
+2443
to
+2463
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This needs to be behind an |
||
| /* Dynamic version of the `?' operator. */ | ||
| static void prim_hasAttr(EvalState & state, const PosIdx pos, Value * * args, Value & v) | ||
| { | ||
|
|
||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.