NixOS · sternenseemann · Oct 31, 2017 · Aug 3, 2023 · ghost · Nov 30, 2023
@@ -42,6 +42,7 @@ extern "C" {
 #include "local-fs-store.hh"
 #include "progress-bar.hh"
 #include "print.hh"
+#include "comment.hh"
 
 #if HAVE_BOEHMGC
 #define GC_INCLUDE_NEW
@@ -94,6 +95,10 @@ struct NixRepl
     typedef std::set<Value *> ValuesSeen;
     std::ostream & printValue(std::ostream & str, Value & v, unsigned int maxDepth);
     std::ostream & printValue(std::ostream & str, Value & v, unsigned int maxDepth, ValuesSeen & seen);
+    std::ostream &  printValueAndDoc(std::ostream & str, Value & v, unsigned int maxDepth);
+
+    // Only prints if a comment is found preceding the position.
+    void tryPrintDoc(std::ostream & str, Pos & pos);
 };
 
 std::string removeWhitespace(std::string s)
@@ -764,7 +769,7 @@ bool NixRepl::processLine(std::string line)
         } else {
             Value v;
             evalString(line, v);
-            printValue(std::cout, v, 1) << std::endl;
+            printValueAndDoc(std::cout, v, 1) << std::endl;
         }
     }
 
@@ -886,6 +891,57 @@ void NixRepl::evalString(std::string s, Value & v)
 }
 
 
+std::ostream & NixRepl::printValueAndDoc(std::ostream & str, Value & v, unsigned int maxDepth)
+{
+    printValue(str, v, maxDepth);
+
+    if(v.isLambda()) {
+        auto pos = state->positions[v.lambda.fun->pos];
+        tryPrintDoc(str, pos);
+    }
+    return str;
+}
+
+
+void NixRepl::tryPrintDoc(std::ostream & str, Pos & pos) {
+
+    Comment::Doc doc = Comment::lookupDoc(pos);
+
+    if (!doc.name.empty()) {
+        str << std::endl;
+        str << std::endl << "| " << ANSI_BOLD << doc.name << ANSI_NORMAL << std::endl;
+        str << "| ";
+        for (size_t i = 0; i < doc.name.length(); i++) {
+            str << '-';
+        }
+        str << std::endl;
+        str << "| " << std::endl;
+    }
+
+    // If we're showing any form of doc at all, we need to inform
+    // user about partially applied functions.
+    if (!doc.name.empty() || !doc.comment.empty()) {
+
+        if (doc.timesApplied > 0) {
+            str << "| " << ANSI_YELLOW << "NOTE: " << ANSI_BOLD << "This function has already been applied!" << ANSI_NORMAL << std::endl
+                << "|       You should ignore the first "
+                               << ANSI_BOLD << std::to_string(doc.timesApplied) << ANSI_NORMAL
+                               << " parameter(s) in this documentation," << std::endl
+                << "|       because they have already been applied." << std::endl
+                << "|" << std::endl;
+        }
+    }
+
+    if (!doc.comment.empty()) {
+        std::stringstream commentStream(doc.comment);
+        std::string line;
+        while(std::getline(commentStream,line,'\n')){
+            str << "| " << line << std::endl;
+        }
+    }
+}
+
+
 std::ostream & NixRepl::printValue(std::ostream & str, Value & v, unsigned int maxDepth)
 {
     ValuesSeen seen;

@@ -0,0 +1,234 @@
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <regex>
+#include <climits>
+#include <algorithm>
+
+#include "comment.hh"
+#include "util.hh"
+
+/* This module looks for documentation comments in the source code.
+
+   Documentation is not retained during parsing, and it should not be,
+   for performance reasons. Because of this the code has to jump
+   through some hoops, to perform its task.
+
+   Adapting the parser was not considered an option, so this code
+   parses the comments from scratch, using regular expressions. These
+   do not support all syntactic constructs, so in rare cases, they
+   will fail and the code will report no documentation.
+
+   One such situation is where documentation is requested for a
+   partially applied function, where the outer lambda pattern
+   matches an attribute set. This is not supported in the regexes
+   because it potentially requires (almost?) the entire grammar.
+
+   This module has been designed not to report the wrong
+   documentation; considering that the wrong documentation is worse
+   than no documentation. The regular expressions will only match
+   simple, well understood syntactic structures, or not match at all.
+
+   This approach to finding documentation does not cause extra runtime
+   overhead, until used.
+
+   This module does not support tab ('\t') characters. In some places
+   they are treated as single spaces. They should be avoided.
+*/
+namespace nix::Comment {
+
+struct Doc emptyDoc("", "", "", 0);
+
+/* parseDoc will try to recover a Doc by looking at the text that leads up to a term
+   definition.*/
+static struct Doc parseDoc(std::string sourcePrefix);
+
+/* stripComment unpacks a comment, by unindenting and stripping " * " prefixes as
+   applicable. The argument should include any preceding whitespace. */
+static std::string stripComment(std::string rawComment);
+
+/* Consistent unindenting. It will only remove entire columns. */
+static std::string unindent(std::string s);
+
+static std::string trimUnindent(std::string s) {
+    return trim(unindent(s));
+}
+
+static std::string stripPrefix(std::string prefix, std::string s) {
+    std::string::size_type index = s.find(prefix);
+    return (index == 0) ? s.erase(0, prefix.length()) : s;
+}
+
+static std::string readFileUpToPos(const Pos & pos) {
+    if(auto path = std::get_if<SourcePath>(&pos.origin)) {
+        std::ifstream ifs(path->path.abs());
+        std::stringstream ret;
+        size_t lineNum = 1;
+        std::string line;
+
+        while (getline(ifs, line) && lineNum <= pos.line) {
+            if (lineNum < pos.line) {
+                ret << line << "\n";
+            } else if (lineNum == pos.line) {
+                ret << line.substr(0, pos.column-1);
+            }
+            lineNum++;
+        }
+
+        return ret.str();
+    } else {
+        throw std::invalid_argument("pos.origin is not a path");
+    }
+}
+
+struct Doc lookupDoc(const Pos & pos) {
+    try {
+        return parseDoc(readFileUpToPos(pos));
+    } catch (std::exception & e) {
+        ignoreException();
+        return emptyDoc;
+    }
+}
+
+/* See lambdas in parseDoc */
+static int countLambdas(std::string piece) {
+    return std::count(piece.begin(), piece.end(), ':');
+}
+
+/* Try to recover a Doc by looking at the text that leads up to a term
+   definition */
+static struct Doc parseDoc(std::string sourcePrefix) {
+
+    std::string wss("[ \t\r\n]*");
+    std::string spaces("[ \t]*");
+
+    std::string singleLineComment(spaces + "#[^\r\n]*(?:\n|\r\n)");
+    std::string multiSingleLineComment("(?:" + singleLineComment + ")*");
+    std::string multiLineComment("\\/\\*(?:[^*]|\\*+[^*/])*\\*+\\/");
+    std::string commentUnit("(" + multiSingleLineComment + "|" + spaces + multiLineComment + ")" + wss);
+
+    std::string ident("[a-zA-Z_][a-zA-Z0-9_'-]*" + wss);
+    std::string identKeep("([a-zA-Z_][a-zA-Z0-9_'-]*)" + wss);
+
+    /* lvalue for nested attrset construction, but not matching
+       quoted identifiers or ${...} or comments inbetween etc */
+    std::string simplePath("(?:" + wss + ident + "\\.)*" + identKeep);
+
+    std::string lambda(ident + wss + ":" + wss);
+
+    /* see countLambdas */
+    std::string lambdas("((:?" + lambda + ")*)");
+
+    std::string assign("=" + wss);
+
+    std::string re(commentUnit + simplePath + assign + lambdas + "$");
+    std::regex e(re);
+
+    #define REGEX_GROUP_COMMENT 1
+    #define REGEX_GROUP_NAME 2
+    #define REGEX_GROUP_LAMBDAS 3
+    #define REGEX_GROUP_MAX 4
+
+    std::smatch matches;
+    regex_search(sourcePrefix, matches, e);
+
+    std::stringstream buffer;
+    if (matches.length() < REGEX_GROUP_MAX) {
+        return emptyDoc;
+    }
+
+    std::string rawComment = matches[REGEX_GROUP_COMMENT];
+    std::string name = matches[REGEX_GROUP_NAME];
+    int timesApplied = countLambdas(matches[REGEX_GROUP_LAMBDAS]);
+    return Doc(rawComment, stripComment(rawComment), name, timesApplied);
+}
+
+static std::string stripComment(std::string rawComment) {
+    rawComment.erase(rawComment.find_last_not_of("\n")+1);
+
+    std::string s(trimUnindent(rawComment));
+
+    if (s[0] == '/' && s[1] == '*') {
+        // Remove the "/*"
+        // Indentation will be removed consistently later on
+        s[0] = ' ';
+        s[1] = ' ';
+
+        // Remove the "*/"
+        if (!s.empty() && *(--s.end()) == '/')
+            s.pop_back();
+        if (!s.empty() && *(--s.end()) == '*')
+            s.pop_back();
+
+        s = trimUnindent(s);
+
+        std::istringstream inStream(s);
+        std::ostringstream stripped;
+
+        std::string line;
+
+        /* at first, we assume a comment
+         * that is formatted like this
+         * with '*' characters at the beginning
+         * of the line.
+         */
+        bool hasStars = true;
+
+        while(std::getline(inStream,line,'\n')){
+            if (hasStars && (
+                    (!line.empty() && line[0] == '*')
+                 || (line.length() >= 2 && line[0] == ' ' && line[1] == '*')
+                    )) {
+                if (line[0] == ' ') {
+                    line = stripPrefix(" *", line);
+                } else {
+                    line = stripPrefix("*", line);
+                }
+            } else {
+                hasStars = false;
+            }
+
+            stripped << line << std::endl;
+        }
+        return trimUnindent(stripped.str());
+    }
+    else {
+        std::istringstream inStream(s);
+        std::ostringstream stripped;
+
+        std::string line;
+        while(std::getline(inStream, line, '\n')) {
+            line.erase(0, line.find("#") + 1);
+            stripped << line << std::endl;
+        }
+        return trimUnindent(stripped.str());
+    }
+}
+
+static std::string unindent(std::string s) {
+    size_t maxIndent = 1000;
+    {
+        std::istringstream inStream(s);
+        for (std::string line; std::getline(inStream, line); ) {
+            size_t firstNonWS = line.find_first_not_of(" \t\r\n");
+            if (firstNonWS != std::string::npos) {
+                maxIndent = std::min(firstNonWS, maxIndent);
+            }
+        }
+    }
+
+    std::ostringstream unindentedStream;
+    {
+        std::istringstream inStream(s);
+        for (std::string line; std::getline(inStream, line); ) {
+            if (line.length() >= maxIndent) {
+                unindentedStream << line.substr(maxIndent) << std::endl;
+            } else {
+                unindentedStream << std::endl;
+            }
+        }
+    }
+    return unindentedStream.str();
+}
+
+}
@@ -0,0 +1,41 @@
+#pragma once
+
+#include "nixexpr.hh"
+
+namespace nix::Comment {
+
+struct Doc {
+
+    // Name that the term is assigned to
+    std::string name;
+
+    std::string rawComment;
+    std::string comment;
+
+    // Number of times the curried function must be applied to get the value
+    // that this structure documents.
+    //
+    // This is useful when showing the documentation for a partially applied
+    // curried function. The documentation is for the unapplied function, so
+    // this is crucial information.
+    int timesApplied;
+
+    Doc(std::string rawComment, std::string comment, std::string name, int timesApplied) {
+        this->name = name;
+        this->rawComment = rawComment;
+        this->comment = comment;
+        this->timesApplied = timesApplied;
+    }
+
+};
+
+extern struct Doc emptyDoc;
+
+// lookupDoc will try to recover a Doc. This will perform perform I/O,
+// because documentation is not retained by the parser.
+//
+// Will return empty values if nothing can be found.
+// For its limitations, see the docs of the implementation.
+struct Doc lookupDoc(const Pos & pos);
+
+}
diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc
@@ -1,4 +1,5 @@
 #include "archive.hh"
+#include "comment.hh"
 #include "derivations.hh"
 #include "downstream-placeholder.hh"
 #include "eval-inline.hh"
@@ -2439,6 +2440,27 @@ static RegisterPrimOp primop_unsafeGetAttrPos(PrimOp {
     .fun = prim_unsafeGetAttrPos,
 });
 
+void prim_unsafeGetLambdaDoc(EvalState & state, const PosIdx pos, Value * * args, Value & v)
+{
+    state.forceFunction(*args[0], pos, "while evaluating the first argument to builtins.unsafeGetLambdaDoc");
-    state.forceFunction(*args[0], pos, "while evaluating the first argument to builtins.unsafeGetLambdaDoc");
+    if (!settings.readOnlyMode) {
+      throw Error("attempted to introspect on documentation comments in derivation-producing mode");
+    }
+
+    state.forceFunction(*args[0], pos, "while evaluating the first argument to builtins.unsafeGetLambdaDoc");
-    state.forceFunction(*args[0], pos, "while evaluating the first argument to builtins.unsafeGetLambdaDoc");
+    if (!settings.readOnlyMode) {
+      throw Error("attempted to introspect on documentation comments in derivation-producing mode");
+    }
+
+    state.forceFunction(*args[0], pos, "while evaluating the first argument to builtins.unsafeGetLambdaDoc");
+
+    Pos funPos = state.positions[args[0]->lambda.fun->pos];
+
+    Comment::Doc doc = Comment::lookupDoc(funPos);
+
+    if(doc.comment.empty()) {
+        v.mkNull();
+    } else {
+        v.mkString(doc.comment);
+    }
+}
+
+static RegisterPrimOp primop_unsafeGetLambdaDoc(PrimOp {
+    .name = "__unsafeGetLambdaDoc",
+    .arity = 1,
+    .fun = prim_unsafeGetLambdaDoc,
+});
+
 /* Dynamic version of the `?' operator. */
 static void prim_hasAttr(EvalState & state, const PosIdx pos, Value * * args, Value & v)
 {