elastic · brian-mckinney · Mar 6, 2025 · Mar 7, 2025 · Mar 10, 2025 · Mar 10, 2025
@@ -9,3 +9,4 @@ vendor/
 generated/
 .DS_Store
 *.swp
+*.pyc
@@ -0,0 +1,22 @@
+- name: Endpoint.policy.applied.artifacts.global.channel
+  overrides:
+    default:
+      description: The channel of the artifact.
+      example: default example
+      type: keyword
+    os:
+      linux:
+        description: The channel of the linux artifact.
+        example: stable
+      windows:
+        description: The channel of the windows artifact.
+      macos:
+        description: The channel of the macos artifact.
+    event:
+      linux_malicious_behavior_alert:
+        description: The channel of the artifact for linux malicious behavior alert.
+        example: stable
+- name: agent.type
+  overrides:
+    default:
+      example: endpoint
@@ -0,0 +1,54 @@
+# Custom Documentation Generator
+
+## Description
+
+This module generates documentation for the custom endpoint fields defined in [custom_documentation](../../../custom_documentation/)
+
+### Background
+
+The fields defined in [custom_documentation](../../../custom_documentation/) do not have descriptions.  They are simply the possible fields
+of an event, including all the custom fields Endpoint uses but are not mapped.
+
+The fields defined in [package](../../../package/) are the fields that are mapped into Kibana.  These fields have descriptions and documentation.
+
+
+### Implementation
+
+This python module generates markdown for all of the fields in [custom_documentation](../../../custom_documentation/) by taking the following steps
+
+1. Parses all of the mapped fields defined in [package](../../../package/), collecting descriptions, examples, and other metadata
+
+2. Parses any override fields defined in [documentation_overrides.yaml](../../../custom_documentation/src/documentation_overrides.yaml)
+   - overrides can be set for any field.  They can be set at the event level, the os level, or a default override that applies to all
+    instances of that field.
+   - See [documentation_overrides.yaml](../../../custom_documentation/src/documentation_overrides.yaml) for the format
+   - If overrides are updated, the documentation must be regenerated
+
+3. Puts all of that data into an sqlite database
+
+4. Parses all of the endpoint fields defined in [custom_documentation](../../../custom_documentation/)
+
+5. Iterates over the custom_documentation data, filling out descriptions and examples pulled from the database that was just created.
+
+### Example Usage
+`python -m pydocgen --output-dir /path/to/output`
+
+#### Help statement
+```
+usage: __main__.py [-h] [--database DATABASE] [--no-cache] [--output-dir OUTPUT_DIR] [-v] [-l {DEBUG,INFO,WARNING,ERROR,CRITICAL}] [--csv CSV]
+
+Create markdown documentation for the fields defined in custom_documentation
+
+options:
+  -h, --help            show this help message and exit
+  --database DATABASE   path to the database
+  --no-cache            do not use cached database if it exists, always regenerate the database
+  --output-dir OUTPUT_DIR
+                        output directory for markdown documentation
+  -v, --verbose         Force maximum verbosity (DEBUG level + detailed output)
+  -l {DEBUG,INFO,WARNING,ERROR,CRITICAL}, --log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL}
+                        Set logging verbosity level
-  -l {DEBUG,INFO,WARNING,ERROR,CRITICAL}, --log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL}
-                        Set logging verbosity level
+  -l, --log-level       [DEBUG,INFO,WARNING,ERROR,CRITICAL]
+                        Set logging verbosity level
-  -l {DEBUG,INFO,WARNING,ERROR,CRITICAL}, --log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL}
-                        Set logging verbosity level
+  -l, --log-level       [DEBUG,INFO,WARNING,ERROR,CRITICAL]
+                        Set logging verbosity level
+  --csv CSV             Path to CSV file for missing documentation fields (optional)
+
+Example usage: python -m pydocgen --output-dir /path/to/output
+```
@@ -0,0 +1,104 @@
+import argparse
+import logging
+from logging import config
+import pathlib
+import traceback
+import sys
+import tempfile
+
+from .markdown import generate_custom_documentation_markdown
+
+from .models.custom_documentation import DocumentationOverrideMap
+
+from typing import Literal
+
+
+def configure_logging(
+    log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], 
+    verbose: bool
+) -> None:
+    """Configures the logging system with specified level and verbosity.
+
+    Args:
+        log_level: String representation of logging level (DEBUG, INFO, etc.)
+        verbose: Boolean flag to force maximum verbosity
+    """
+    level = getattr(logging, log_level)
+
+    # If verbose is specified, override to DEBUG level
+    if verbose:
+        level = logging.DEBUG
+
+    # Basic config with both handlers
+    logging.basicConfig(
+        level=level,
+        format="%(asctime)s - %(levelname)-8s %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Create markdown documentation for the fields defined in custom_documentation",
+        epilog="Example usage: python -m pydocgen --output-dir /path/to/output",
+    )
+
+    parser.add_argument(
+        "--database",
+        default=pathlib.Path(tempfile.gettempdir()) / "generate-docs.sqlite",
+        type=pathlib.Path,
+        help="path to the database",
+    )
+
+    parser.add_argument(
+        "--no-cache",
+        action="store_true",
+        help="do not use cached database if it exists, always regenerate the database",
+    )
+
+    parser.add_argument(
+        "--output-dir",
+        default=pathlib.Path.cwd().resolve() / "output",
+        type=pathlib.Path,
+        help="output directory for markdown documentation",
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        help="Force maximum verbosity (DEBUG level + detailed output)",
+    )
+
+    parser.add_argument(
+        "-l",
+        "--log-level",
+        type=str.upper,
+        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
+        default="INFO",
+        help="Set logging verbosity level",
+    )
+
+    parser.add_argument(
+        "--csv",
+        type=pathlib.Path,
+        default=None,
+        help="Path to CSV file for missing documentation fields (optional)",
+    )
+
+    args = parser.parse_args()
+
+    configure_logging(args.log_level, args.verbose)
+
+    if args.no_cache and args.database.exists():
+        logging.info(f"Removing existing database {args.database} since --no-cache was specified")
+        args.database.unlink()
+
+    generate_custom_documentation_markdown(args.database, args.output_dir)
+    logging.info(f"Generated markdown documentation to {args.output_dir}")
+
+if __name__ == "__main__":
+    try:
+        main()
+    except Exception as e:
+        traceback.print_exc()
+        sys.exit(1)
@@ -0,0 +1,208 @@
+import pathlib
+import logging
+
+from sqlmodel import SQLModel, Field, create_engine, Session, select, Relationship
+from sqlalchemy import Engine, Column, JSON
+
+from .models.custom_documentation import DocumentationOverrideMap
+from .models.packages import Package, PackageList
+
+from typing import Optional
+
+
+#
+# These models represent the database tables for mapped fields
+#
+class PackageReference(SQLModel, table=True):
+    __tablename__ = "package_references"
+    id: Optional[int] = Field(default=None, primary_key=True)
+    package_data: Optional[str] = Field(default=None, sa_column=Column(JSON))
+
+
+class PackageField(SQLModel, table=True):
+    """
+    PackageField represents a specific field as defined in package/endpoint/datastream/{type}/fields/fields.yml
+    each in fields.yml has a name and description, this class holds the name, description, and reference to the parent package.
+    These fields will be used to provide descriptions for the fields in the custom documentation.
+
+    Note: this is the database table definition for the Package class defined in models/packages.py
+
+    Args:
+        SQLModel: this is a SQLModel class (database table)
+        table: Defaults to True.
+
+    Raises:
+        ValueError: _description_
+
+    Returns:
+        _description_
+    """
+
+    __tablename__ = "package_fields"
+    id: Optional[int] = Field(default=None, primary_key=True)
+    name: str
+    description: str
+    example: Optional[str] = None
+    package_reference_id: Optional[int] = Field(foreign_key="package_references.id")
+    package_reference: Optional[PackageReference] = Relationship()
+
+    @property
+    def package(self) -> Package:
+        if not self.package_reference:
+            raise ValueError(f"PackageReference is not set for PackageField {self}")
+        return Package.model_validate_json(self.package_reference.package_data)
+
+
+#
+# These models reprensent the database tables for overrides
+#
+class OverrideField(SQLModel, table=True):
+    __tablename__ = "overrides"
+    id: Optional[int] = Field(default=None, primary_key=True)
+    description: Optional[str] = None
+    example: Optional[str] = None
+    type: Optional[str] = None
+
+
+class OverrideRelationship(SQLModel, table=True):
+    __tablename__ = "override_relationships"
+    id: Optional[int] = Field(default=None, primary_key=True)
+    name: str
+    event: Optional[str] = None
+    os: Optional[str] = None
+    default: bool = False
+    override_id: int = Field(foreign_key="overrides.id")
+    override: OverrideField = Relationship(sa_relationship_kwargs={"lazy": "joined"})
+
+
+def populate_overrides(session: Session):
+    dom = DocumentationOverrideMap.from_yaml()
+    for name, mapping in dom.items():
+        if mapping.os:
+            for os, override in mapping.os.items():
+                record = OverrideField(
+                    description=override.description,
+                    example=override.example,
+                    type=override.type,
+                )
+                session.add(record)
+                session.flush()
+
+                related_record = OverrideRelationship(
+                    name=name, os=os, override_id=record.id
+                )
+                session.add(related_record)
+
+        if mapping.event:
+            for event, override in mapping.event.items():
+
+                record = OverrideField(
+                    description=override.description,
+                    example=override.example,
+                    type=override.type,
+                )
+                session.add(record)
+                session.flush()
+
+                related_record = OverrideRelationship(
+                    name=name, event=event, override_id=record.id
+                )
+                session.add(related_record)
+
+        if mapping.default:
+            record = OverrideField(
+                description=mapping.default.description,
+                example=mapping.default.example,
+                type=mapping.default.type,
+            )
+            session.add(record)
+            session.flush()
+
+            related_record = OverrideRelationship(
+                name=name, default=True, override_id=record.id
+            )
+            session.add(related_record)
+
+    session.commit()
+
+
+def populate_packages_fields(session: Session):
+    """
+    populate_packages_fields populates the package fields in the database
+
+    Args:
+        session: database session
+    """
+
+    def add_to_db(field: PackageField, session: Session):
+        existing_field = session.exec(
+            select(PackageField).where(PackageField.name == field.name)
+        ).first()
+        if existing_field:
+            if existing_field.description != field.description:
+                raise ValueError(
+                    f"Field {field.name} already exists with different description"
+                )
+        else:
+            logging.debug(f"  Adding field {field.name}")
+            session.add(field)
+
+    package_list = PackageList.from_files()
+    for package in package_list:
+        logging.debug(f"Adding package fields for {package.filepath}")
+        package_ref = PackageReference(package_data=package.model_dump_json())
+        session.add(package_ref)
+        session.flush()
+        for field in package.fields:
+            if field.fields:
+                for sub_field in field.fields:
+                    name = f"{field.name}.{sub_field.name}"
+                    add_to_db(
+                        PackageField(
+                            name=name,
+                            description=sub_field.description,
+                            package_reference_id=package_ref.id,
+                            example=sub_field.example,
+                        ),
+                        session,
+                    )
+            else:
+                add_to_db(
+                    PackageField(
+                        name=field.name,
+                        description=field.description,
+                        package_reference_id=package_ref.id,
+                        example=field.example,
+                    ),
+                    session,
+                )
+    session.commit()
+
+
+def getDatabase(db_path: pathlib.Path) -> Engine:
+    """
+    getDatabase creates a database if it does not exist, otherwise it uses the existing database
+
+    This stores the documentation in package/endpoint/data_stream in a lightweight SQLite database.  We will
+    use this when generating markdown documentation for the fields defined in the custom_documentation.
+
+    overrides are also added to the database here.
+
+    Args:
+        db_path: path to the database
+
+    Returns:
+        database Engine
+    """
+    if db_path.exists():
+        logging.info(f"Using existing database at {db_path}")
+        return create_engine(f"sqlite:///{db_path}")
+
+    logging.info(f"Creating database at {db_path}")
+    engine = create_engine(f"sqlite:///{db_path}")
+    SQLModel.metadata.create_all(engine)
+    with Session(engine) as session:
+        populate_packages_fields(session)
+        populate_overrides(session)
+        session.commit()
+    return engine
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,3 +9,4 @@ vendor/ @@
     generated/
     .DS_Store
     *.swp
+    *.pyc