diff --git a/src/libcmd/repl.cc b/src/libcmd/repl.cc index d15162e761c..25962eef6b9 100644 --- a/src/libcmd/repl.cc +++ b/src/libcmd/repl.cc @@ -42,6 +42,7 @@ extern "C" { #include "local-fs-store.hh" #include "progress-bar.hh" #include "print.hh" +#include "comment.hh" #if HAVE_BOEHMGC #define GC_INCLUDE_NEW @@ -94,6 +95,10 @@ struct NixRepl typedef std::set ValuesSeen; std::ostream & printValue(std::ostream & str, Value & v, unsigned int maxDepth); std::ostream & printValue(std::ostream & str, Value & v, unsigned int maxDepth, ValuesSeen & seen); + std::ostream & printValueAndDoc(std::ostream & str, Value & v, unsigned int maxDepth); + + // Only prints if a comment is found preceding the position. + void tryPrintDoc(std::ostream & str, Pos & pos); }; std::string removeWhitespace(std::string s) @@ -764,7 +769,7 @@ bool NixRepl::processLine(std::string line) } else { Value v; evalString(line, v); - printValue(std::cout, v, 1) << std::endl; + printValueAndDoc(std::cout, v, 1) << std::endl; } } @@ -886,6 +891,57 @@ void NixRepl::evalString(std::string s, Value & v) } +std::ostream & NixRepl::printValueAndDoc(std::ostream & str, Value & v, unsigned int maxDepth) +{ + printValue(str, v, maxDepth); + + if(v.isLambda()) { + auto pos = state->positions[v.lambda.fun->pos]; + tryPrintDoc(str, pos); + } + return str; +} + + +void NixRepl::tryPrintDoc(std::ostream & str, Pos & pos) { + + Comment::Doc doc = Comment::lookupDoc(pos); + + if (!doc.name.empty()) { + str << std::endl; + str << std::endl << "| " << ANSI_BOLD << doc.name << ANSI_NORMAL << std::endl; + str << "| "; + for (size_t i = 0; i < doc.name.length(); i++) { + str << '-'; + } + str << std::endl; + str << "| " << std::endl; + } + + // If we're showing any form of doc at all, we need to inform + // user about partially applied functions. + if (!doc.name.empty() || !doc.comment.empty()) { + + if (doc.timesApplied > 0) { + str << "| " << ANSI_YELLOW << "NOTE: " << ANSI_BOLD << "This function has already been applied!" << ANSI_NORMAL << std::endl + << "| You should ignore the first " + << ANSI_BOLD << std::to_string(doc.timesApplied) << ANSI_NORMAL + << " parameter(s) in this documentation," << std::endl + << "| because they have already been applied." << std::endl + << "|" << std::endl; + } + } + + if (!doc.comment.empty()) { + std::stringstream commentStream(doc.comment); + std::string line; + while(std::getline(commentStream,line,'\n')){ + str << "| " << line << std::endl; + } + } +} + + std::ostream & NixRepl::printValue(std::ostream & str, Value & v, unsigned int maxDepth) { ValuesSeen seen; diff --git a/src/libexpr/comment.cc b/src/libexpr/comment.cc new file mode 100644 index 00000000000..959b8accdf3 --- /dev/null +++ b/src/libexpr/comment.cc @@ -0,0 +1,234 @@ +#include +#include +#include +#include +#include +#include + +#include "comment.hh" +#include "util.hh" + +/* This module looks for documentation comments in the source code. + + Documentation is not retained during parsing, and it should not be, + for performance reasons. Because of this the code has to jump + through some hoops, to perform its task. + + Adapting the parser was not considered an option, so this code + parses the comments from scratch, using regular expressions. These + do not support all syntactic constructs, so in rare cases, they + will fail and the code will report no documentation. + + One such situation is where documentation is requested for a + partially applied function, where the outer lambda pattern + matches an attribute set. This is not supported in the regexes + because it potentially requires (almost?) the entire grammar. + + This module has been designed not to report the wrong + documentation; considering that the wrong documentation is worse + than no documentation. The regular expressions will only match + simple, well understood syntactic structures, or not match at all. + + This approach to finding documentation does not cause extra runtime + overhead, until used. + + This module does not support tab ('\t') characters. In some places + they are treated as single spaces. They should be avoided. +*/ +namespace nix::Comment { + +struct Doc emptyDoc("", "", "", 0); + +/* parseDoc will try to recover a Doc by looking at the text that leads up to a term + definition.*/ +static struct Doc parseDoc(std::string sourcePrefix); + +/* stripComment unpacks a comment, by unindenting and stripping " * " prefixes as + applicable. The argument should include any preceding whitespace. */ +static std::string stripComment(std::string rawComment); + +/* Consistent unindenting. It will only remove entire columns. */ +static std::string unindent(std::string s); + +static std::string trimUnindent(std::string s) { + return trim(unindent(s)); +} + +static std::string stripPrefix(std::string prefix, std::string s) { + std::string::size_type index = s.find(prefix); + return (index == 0) ? s.erase(0, prefix.length()) : s; +} + +static std::string readFileUpToPos(const Pos & pos) { + if(auto path = std::get_if(&pos.origin)) { + std::ifstream ifs(path->path.abs()); + std::stringstream ret; + size_t lineNum = 1; + std::string line; + + while (getline(ifs, line) && lineNum <= pos.line) { + if (lineNum < pos.line) { + ret << line << "\n"; + } else if (lineNum == pos.line) { + ret << line.substr(0, pos.column-1); + } + lineNum++; + } + + return ret.str(); + } else { + throw std::invalid_argument("pos.origin is not a path"); + } +} + +struct Doc lookupDoc(const Pos & pos) { + try { + return parseDoc(readFileUpToPos(pos)); + } catch (std::exception & e) { + ignoreException(); + return emptyDoc; + } +} + +/* See lambdas in parseDoc */ +static int countLambdas(std::string piece) { + return std::count(piece.begin(), piece.end(), ':'); +} + +/* Try to recover a Doc by looking at the text that leads up to a term + definition */ +static struct Doc parseDoc(std::string sourcePrefix) { + + std::string wss("[ \t\r\n]*"); + std::string spaces("[ \t]*"); + + std::string singleLineComment(spaces + "#[^\r\n]*(?:\n|\r\n)"); + std::string multiSingleLineComment("(?:" + singleLineComment + ")*"); + std::string multiLineComment("\\/\\*(?:[^*]|\\*+[^*/])*\\*+\\/"); + std::string commentUnit("(" + multiSingleLineComment + "|" + spaces + multiLineComment + ")" + wss); + + std::string ident("[a-zA-Z_][a-zA-Z0-9_'-]*" + wss); + std::string identKeep("([a-zA-Z_][a-zA-Z0-9_'-]*)" + wss); + + /* lvalue for nested attrset construction, but not matching + quoted identifiers or ${...} or comments inbetween etc */ + std::string simplePath("(?:" + wss + ident + "\\.)*" + identKeep); + + std::string lambda(ident + wss + ":" + wss); + + /* see countLambdas */ + std::string lambdas("((:?" + lambda + ")*)"); + + std::string assign("=" + wss); + + std::string re(commentUnit + simplePath + assign + lambdas + "$"); + std::regex e(re); + + #define REGEX_GROUP_COMMENT 1 + #define REGEX_GROUP_NAME 2 + #define REGEX_GROUP_LAMBDAS 3 + #define REGEX_GROUP_MAX 4 + + std::smatch matches; + regex_search(sourcePrefix, matches, e); + + std::stringstream buffer; + if (matches.length() < REGEX_GROUP_MAX) { + return emptyDoc; + } + + std::string rawComment = matches[REGEX_GROUP_COMMENT]; + std::string name = matches[REGEX_GROUP_NAME]; + int timesApplied = countLambdas(matches[REGEX_GROUP_LAMBDAS]); + return Doc(rawComment, stripComment(rawComment), name, timesApplied); +} + +static std::string stripComment(std::string rawComment) { + rawComment.erase(rawComment.find_last_not_of("\n")+1); + + std::string s(trimUnindent(rawComment)); + + if (s[0] == '/' && s[1] == '*') { + // Remove the "/*" + // Indentation will be removed consistently later on + s[0] = ' '; + s[1] = ' '; + + // Remove the "*/" + if (!s.empty() && *(--s.end()) == '/') + s.pop_back(); + if (!s.empty() && *(--s.end()) == '*') + s.pop_back(); + + s = trimUnindent(s); + + std::istringstream inStream(s); + std::ostringstream stripped; + + std::string line; + + /* at first, we assume a comment + * that is formatted like this + * with '*' characters at the beginning + * of the line. + */ + bool hasStars = true; + + while(std::getline(inStream,line,'\n')){ + if (hasStars && ( + (!line.empty() && line[0] == '*') + || (line.length() >= 2 && line[0] == ' ' && line[1] == '*') + )) { + if (line[0] == ' ') { + line = stripPrefix(" *", line); + } else { + line = stripPrefix("*", line); + } + } else { + hasStars = false; + } + + stripped << line << std::endl; + } + return trimUnindent(stripped.str()); + } + else { + std::istringstream inStream(s); + std::ostringstream stripped; + + std::string line; + while(std::getline(inStream, line, '\n')) { + line.erase(0, line.find("#") + 1); + stripped << line << std::endl; + } + return trimUnindent(stripped.str()); + } +} + +static std::string unindent(std::string s) { + size_t maxIndent = 1000; + { + std::istringstream inStream(s); + for (std::string line; std::getline(inStream, line); ) { + size_t firstNonWS = line.find_first_not_of(" \t\r\n"); + if (firstNonWS != std::string::npos) { + maxIndent = std::min(firstNonWS, maxIndent); + } + } + } + + std::ostringstream unindentedStream; + { + std::istringstream inStream(s); + for (std::string line; std::getline(inStream, line); ) { + if (line.length() >= maxIndent) { + unindentedStream << line.substr(maxIndent) << std::endl; + } else { + unindentedStream << std::endl; + } + } + } + return unindentedStream.str(); +} + +} diff --git a/src/libexpr/comment.hh b/src/libexpr/comment.hh new file mode 100644 index 00000000000..ce173eeed20 --- /dev/null +++ b/src/libexpr/comment.hh @@ -0,0 +1,41 @@ +#pragma once + +#include "nixexpr.hh" + +namespace nix::Comment { + +struct Doc { + + // Name that the term is assigned to + std::string name; + + std::string rawComment; + std::string comment; + + // Number of times the curried function must be applied to get the value + // that this structure documents. + // + // This is useful when showing the documentation for a partially applied + // curried function. The documentation is for the unapplied function, so + // this is crucial information. + int timesApplied; + + Doc(std::string rawComment, std::string comment, std::string name, int timesApplied) { + this->name = name; + this->rawComment = rawComment; + this->comment = comment; + this->timesApplied = timesApplied; + } + +}; + +extern struct Doc emptyDoc; + +// lookupDoc will try to recover a Doc. This will perform perform I/O, +// because documentation is not retained by the parser. +// +// Will return empty values if nothing can be found. +// For its limitations, see the docs of the implementation. +struct Doc lookupDoc(const Pos & pos); + +} diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index ddf529b9eaf..b5279f91fd0 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -1,4 +1,5 @@ #include "archive.hh" +#include "comment.hh" #include "derivations.hh" #include "downstream-placeholder.hh" #include "eval-inline.hh" @@ -2439,6 +2440,27 @@ static RegisterPrimOp primop_unsafeGetAttrPos(PrimOp { .fun = prim_unsafeGetAttrPos, }); +void prim_unsafeGetLambdaDoc(EvalState & state, const PosIdx pos, Value * * args, Value & v) +{ + state.forceFunction(*args[0], pos, "while evaluating the first argument to builtins.unsafeGetLambdaDoc"); + + Pos funPos = state.positions[args[0]->lambda.fun->pos]; + + Comment::Doc doc = Comment::lookupDoc(funPos); + + if(doc.comment.empty()) { + v.mkNull(); + } else { + v.mkString(doc.comment); + } +} + +static RegisterPrimOp primop_unsafeGetLambdaDoc(PrimOp { + .name = "__unsafeGetLambdaDoc", + .arity = 1, + .fun = prim_unsafeGetLambdaDoc, +}); + /* Dynamic version of the `?' operator. */ static void prim_hasAttr(EvalState & state, const PosIdx pos, Value * * args, Value & v) { diff --git a/src/libutil/ansicolor.hh b/src/libutil/ansicolor.hh index 86becafa66e..f28785227c3 100644 --- a/src/libutil/ansicolor.hh +++ b/src/libutil/ansicolor.hh @@ -17,5 +17,6 @@ namespace nix { #define ANSI_BLUE "\e[34;1m" #define ANSI_MAGENTA "\e[35;1m" #define ANSI_CYAN "\e[36;1m" +#define ANSI_YELLOW "\e[33;m" } diff --git a/tests/comments.nix b/tests/comments.nix new file mode 100644 index 00000000000..e338c969858 --- /dev/null +++ b/tests/comments.nix @@ -0,0 +1,97 @@ +{ + # Just a function. + f = x: x; + + # Just a function in an attrset. + nested.f = x: x; + + /* A nice function. + */ + g = x: x; + + # FOO + /* A somewhat nice function. + */ + h = x: x; + + /* + * A nice function. + */ + i = x: x; + + /* + * + * A nice function. + * + */ + j = x: x; + + /* + + A nice function. + + */ + k = x: x; + + /* + + A nice function. + + */ + l = x: x; + + /* + + A nice function. + + */ + m = x: x; + + /* One + Two + */ + n = x: x; + + /* + Bullets: + + * Are lethal. + * Are made of metal. + + */ + o = x: x; + + /* + * Bullets: + * + * * Are lethal. + * * Are made of metal. + * + */ + p = x: x; + + # Implementation detail + + # Useful stuff + q = x: x; + + # Useful + # stuff + r = x: x; + + /* ßuper toll. + */ + unicode1 = x: x; + + /* 🤢 + */ + unicode2 = x: x; + + /* Apply me twice. + */ + curried = x: y: x; + + /* You can give 3 arguments. + */ + curried2 = x: y: z: x; +} diff --git a/tests/repl.sh b/tests/repl.sh index 2b378952116..a7fbbfcb519 100644 --- a/tests/repl.sh +++ b/tests/repl.sh @@ -131,3 +131,165 @@ echo "changingThing" ) | nix repl ./flake --experimental-features 'flakes repl-flake') echo "$replResult" | grepQuiet -s beforeChange echo "$replResult" | grepQuiet -s afterChange + +# Comments +testReplResponse "(import $testDir/comments.nix).f" ' +| f +| - +| +| Just a function. +' + +testReplResponse "(import $testDir/comments.nix).nested.f" ' +| f +| - +| +| Just a function in an attrset. +' + +testReplResponse "(import ./comments.nix).g" ' +| g +| - +| +| A nice function. +' + +testReplResponse "(import ./comments.nix).h" ' +| h +| - +| +| A somewhat nice function. +' + +testReplResponse "(import ./comments.nix).i" ' +| i +| - +| +| A nice function. +' + +testReplResponse "(import ./comments.nix).j" ' +| j +| - +| +| A nice function. +' + +testReplResponse "(import ./comments.nix).k" ' +| k +| - +| +| A nice function. +' + +testReplResponse "(import ./comments.nix).l" ' +| l +| - +| +| A nice function. +' + +testReplResponse "(import ./comments.nix).m" ' +| m +| - +| +| A nice function. +' + +testReplResponse "(import ./comments.nix).n" ' +| n +| - +| +| One +| Two +' + +testReplResponse "(import ./comments.nix).o" ' +| o +| - +| +| Bullets: +| +| * Are lethal. +| * Are made of metal. +' + +testReplResponse "(import ./comments.nix).p" ' +| p +| - +| +| Bullets: +| +| * Are lethal. +| * Are made of metal. +' + +testReplResponse "(import ./comments.nix).q" ' +| q +| - +| +| Useful stuff +' + + +testReplResponse "(import ./comments.nix).r" ' +| r +| - +| +| Useful +| stuff +' + +testReplResponse "(import ./comments.nix).unicode1" ' +| unicode1 +| -------- +| +| ßuper toll. +' + +testReplResponse "(import ./comments.nix).unicode2" ' +| unicode2 +| -------- +| +| 🤢 +' + +testReplResponse "(import ./comments.nix).curried" ' +| curried +| ------- +| +| Apply me twice. +' + +testReplResponse "(import ./comments.nix).curried true" ' +| curried +| ------- +| +| NOTE: This function has already been applied! +| You should ignore the first 1 parameter(s) in this documentation, +| because they have already been applied. +| +| Apply me twice. +' + +testReplResponse "(import ./comments.nix).curried2 true false" ' +| curried2 +| -------- +| +| NOTE: This function has already been applied! +| You should ignore the first 2 parameter(s) in this documentation, +| because they have already been applied. +| +| You can give 3 arguments. +' + +testReplResponse "(import ./comments.nix).curried2 true" ' +| curried2 +| -------- +| +| NOTE: This function has already been applied! +| You should ignore the first 1 parameter(s) in this documentation, +| because they have already been applied. +| +| You can give 3 arguments. +'