diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 4efc31d..ca386d9 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -222,19 +222,6 @@ jobs: - name: Run all tests run: npm test - - name: Generate coverage report - if: matrix.node-version == '18' - run: npm test -- --coverage --coverageReporters=lcov - - - name: Upload coverage to Codecov - if: matrix.node-version == '18' - uses: codecov/codecov-action@v3 - with: - token: ${{ secrets.CODECOV_TOKEN }} - file: ./coverage/lcov.info - flags: unittests - name: codecov-umbrella - # Summary job that all other jobs depend on test-summary: if: always() diff --git a/.gitignore b/.gitignore index 5a4c5ce..2fd947e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -/node_modules/* /wip/* /docs/* .vscode @@ -10,4 +9,5 @@ gen/ tree-sitter/ test/ modules/ -cloud-controlplane/ \ No newline at end of file +cloud-controlplane/ +node_modules diff --git a/__tests__/docs-data/examples/admin-example.adoc b/__tests__/docs-data/examples/admin-example.adoc new file mode 100644 index 0000000..d85e311 --- /dev/null +++ b/__tests__/docs-data/examples/admin-example.adoc @@ -0,0 +1,15 @@ +.Example Configuration for Admin API +This example shows how to configure the admin API endpoint with custom addressing. + +[,yaml] +---- +redpanda: + admin: + - address: "0.0.0.0" + port: 9644 + - address: "127.0.0.1" + port: 9645 +---- + +You can specify multiple admin endpoints to provide redundancy and load balancing. +The admin API is used for cluster management operations. diff --git a/__tests__/docs-data/property-overrides.json b/__tests__/docs-data/property-overrides.json new file mode 100644 index 0000000..2b7d88e --- /dev/null +++ b/__tests__/docs-data/property-overrides.json @@ -0,0 +1,81 @@ +{ + "properties": { + "admin": { + "description": "Network addresses for Admin API servers with version info.", + "version": "v23.1.0", + "example_file": "examples/admin-example.adoc" + }, + "admin_api_tls": { + "description": "TLS configuration for the Admin API endpoints.", + "version": "v23.2.0", + "example_yaml": { + "title": "Example TLS Configuration", + "description": "This example shows how to configure TLS for the Admin API with client authentication.", + "config": { + "redpanda": { + "admin_api_tls": [ + { + "name": "internal-admin", + "enabled": true, + "cert_file": "/etc/redpanda/certs/admin.crt", + "key_file": "/etc/redpanda/certs/admin.key", + "truststore_file": "/etc/redpanda/certs/ca.crt", + "require_client_auth": true + } + ] + } + } + } + }, + "abort_index_segment_size": { + "description": "Segment size for transaction abort index. Controls how large each abort index segment can grow.", + "example": [ + ".Example: Setting abort index segment size", + "[,yaml]", + "----", + "redpanda:", + " abort_index_segment_size: 134217728 # 128MB", + "----", + "", + "This setting controls the maximum size of abort index segments.", + "Smaller segments may improve memory usage but increase overhead." + ] + }, + "append_chunk_size": { + "description": "Size of data chunks for append operations.", + "version": "v24.1.0", + "example": "67108864" + }, + "cloud_storage_access_key": { + "description": "Access key for cloud storage authentication. Used to authenticate with S3-compatible object storage services." + }, + "cleanup.policy": { + "description": "Determines how log segments are cleaned. **delete** removes old segments based on time or size. **compact** retains only the latest value for each key. **compact,delete** enables both strategies.", + "version": "v23.1.0", + "example": [ + ".Example: Setting cleanup policy", + "[,bash]", + "----", + "rpk topic alter-config my-topic --set cleanup.policy=compact", + "----", + "", + "For topics that require both compaction and deletion:", + "", + "[,bash]", + "----", + "rpk topic alter-config my-topic --set cleanup.policy=compact,delete", + "----" + ] + }, + "compression.type": { + "description": "Compression algorithm used for compressing message batches. Options include **none** (no compression), **gzip**, **snappy**, **lz4**, and **zstd**.", + "example": [ + ".Example: Setting compression type", + "[,bash]", + "----", + "rpk topic alter-config my-topic --set compression.type=zstd", + "----" + ] + } + } +} diff --git a/__tests__/tools/property-docs-overrides.test.js b/__tests__/tools/property-docs-overrides.test.js new file mode 100644 index 0000000..1dea834 --- /dev/null +++ b/__tests__/tools/property-docs-overrides.test.js @@ -0,0 +1,87 @@ +const fs = require('fs'); +const path = require('path'); +const os = require('os'); + +const repoRoot = path.resolve(__dirname, '..', '..'); +const overridesFile = path.join(repoRoot, '__tests__', 'docs-data', 'property-overrides.json'); + +describe('property-docs description override', () => { + let tempDir; + let mockPropertiesFile; + + beforeAll(() => { + tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'property-docs-test-')); + + // Create mock property data that includes admin property + const mockProperties = { + properties: { + admin: { + config_scope: "broker", + default: [{ address: "127.0.0.1", port: 9644 }], + defined_in: "src/v/config/node_config.cc", + description: "Default description for admin", + name: "admin", + needs_restart: true, + nullable: false, + type: "array", + visibility: "user" + }, + kafka_api: { + config_scope: "broker", + default: [{ address: "127.0.0.1", port: 9092 }], + defined_in: "src/v/config/node_config.cc", + description: "IP address and port of the Kafka API endpoint that handles requests.", + name: "kafka_api", + needs_restart: true, + nullable: false, + type: "array", + visibility: "user" + } + } + }; + + // Write mock properties to a temp file + mockPropertiesFile = path.join(tempDir, 'mock-properties.json'); + fs.writeFileSync(mockPropertiesFile, JSON.stringify(mockProperties, null, 2)); + }); + + afterAll(() => { + fs.rmSync(tempDir, { recursive: true, force: true }); + }); + + it('applies the override description for admin property', () => { + const overrides = JSON.parse(fs.readFileSync(overridesFile, 'utf8')); + const mockProperties = JSON.parse(fs.readFileSync(mockPropertiesFile, 'utf8')); + + // Test 1: Verify the override file structure + const adminOverride = overrides.properties.admin; + expect(adminOverride).toBeTruthy(); + expect(adminOverride.description).toBeTruthy(); + expect(adminOverride.version).toBe('v23.1.0'); + expect(adminOverride.description).toBe('Network addresses for Admin API servers with version info.'); + + // Test 2: Verify our mock data has the correct structure (no artificial name field) + const adminProperty = mockProperties.properties.admin; + expect(adminProperty.default).toEqual([{ address: "127.0.0.1", port: 9644 }]); + + const adminDefault = adminProperty.default[0]; + expect(adminDefault).toHaveProperty('address', '127.0.0.1'); + expect(adminDefault).toHaveProperty('port', 9644); + + // Test 3: Simulate applying overrides (this is what the Python script would do) + const adminWithOverrides = { + ...adminProperty, + description: adminOverride.description, + version: adminOverride.version + }; + + expect(adminWithOverrides.description).toBe('Network addresses for Admin API servers with version info.'); + expect(adminWithOverrides.version).toBe('v23.1.0'); + expect(adminWithOverrides.default).toEqual([{ address: "127.0.0.1", port: 9644 }]); + + // Test 4: Verify that kafka_api (without overrides) keeps its original description + const kafkaProperty = mockProperties.properties.kafka_api; + expect(kafkaProperty.description).toBe('IP address and port of the Kafka API endpoint that handles requests.'); + expect(kafkaProperty.default).toEqual([{ address: "127.0.0.1", port: 9092 }]); + }); +}); diff --git a/bin/doc-tools.js b/bin/doc-tools.js index 19d17d6..38211ad 100755 --- a/bin/doc-tools.js +++ b/bin/doc-tools.js @@ -98,7 +98,7 @@ function requireCmd(cmd, help, versionFlag = '--version') { * @param {number} [minMinor=10] - Minimum required minor version of Python. */ function requirePython(minMajor = 3, minMinor = 10) { - const candidates = ['python3', 'python']; + const candidates = ['python3', 'python', 'python3.12', 'python3.11', 'python3.10']; for (const p of candidates) { try { const out = execSync(`${p} --version`, { encoding: 'utf8' }).trim(); @@ -234,12 +234,16 @@ For more details, visit: https://github.com/norwoodj/helm-docs /** * Ensures all dependencies required for generating property documentation are installed. * - * Checks for the presence of `make`, Python 3.10 or newer, C++ compiler, and C++ standard library headers. + * Checks for the presence of `make`, Python 3.10 or newer, Node.js, C++ compiler, and C++ standard library headers. * Exits the process with an error message if any dependency is missing. */ function verifyPropertyDependencies() { requireCmd('make', 'Your OS package manager'); requirePython(); + + // Check for Node.js (required for Handlebars templates) + requireCmd('node', 'https://nodejs.org/en/download/ or use your package manager (e.g., brew install node)'); + requireCmd('npm', 'Usually installed with Node.js'); // Check for C++ compiler let cppCompiler = null; @@ -463,25 +467,23 @@ function runClusterDocs(mode, tag, options) { if (r.status !== 0) process.exit(r.status); } -// helper to diff two autogenerated directories -function diffDirs(kind, oldTag, newTag) { - const oldDir = path.join('autogenerated', oldTag, kind); - const newDir = path.join('autogenerated', newTag, kind); - const diffDir = path.join('autogenerated', 'diffs', kind, `${oldTag}_to_${newTag}`); +// helper to diff two temporary directories +function diffDirs(kind, oldTag, newTag, oldTempDir, newTempDir) { + const diffDir = path.join('tmp', 'diffs', kind, `${oldTag}_to_${newTag}`); const patch = path.join(diffDir, 'changes.patch'); - if (!fs.existsSync(oldDir)) { - console.error(`❌ Cannot diff: missing ${oldDir}`); + if (!fs.existsSync(oldTempDir)) { + console.error(`❌ Cannot diff: missing ${oldTempDir}`); process.exit(1); } - if (!fs.existsSync(newDir)) { - console.error(`❌ Cannot diff: missing ${newDir}`); + if (!fs.existsSync(newTempDir)) { + console.error(`❌ Cannot diff: missing ${newTempDir}`); process.exit(1); } fs.mkdirSync(diffDir, { recursive: true }); - const cmd = `diff -ru "${oldDir}" "${newDir}" > "${patch}" || true`; + const cmd = `diff -ru "${oldTempDir}" "${newTempDir}" > "${patch}" || true`; const res = spawnSync(cmd, { stdio: 'inherit', shell: true }); if (res.error) { @@ -489,6 +491,10 @@ function diffDirs(kind, oldTag, newTag) { process.exit(1); } console.log(`✅ Wrote patch: ${patch}`); + + // Clean up temporary directories + fs.rmSync(oldTempDir, { recursive: true, force: true }); + fs.rmSync(newTempDir, { recursive: true, force: true }); } automation @@ -747,15 +753,56 @@ automation .description('Generate JSON and AsciiDoc documentation for Redpanda configuration properties') .option('--tag ', 'Git tag or branch to extract from', 'dev') .option('--diff ', 'Also diff autogenerated properties from ') + .option('--overrides ', 'Optional JSON file with property description overrides') + .option('--output-dir ', 'Where to write all generated files', 'modules/reference') + .option('--template-property-page ', 'Custom Handlebars template for property page layout') + .option('--template-property ', 'Custom Handlebars template for individual property sections') + .option('--template-deprecated ', 'Custom Handlebars template for deprecated properties page') + .option('--template-deprecated-property ', 'Custom Handlebars template for individual deprecated property sections') .action((options) => { verifyPropertyDependencies(); const newTag = options.tag; const oldTag = options.diff; + const overridesPath = options.overrides; + const outputDir = options.outputDir; const cwd = path.resolve(__dirname, '../tools/property-extractor'); - const make = (tag) => { - console.log(`⏳ Building property docs for ${tag}…`); - const r = spawnSync('make', ['build', `TAG=${tag}`], { cwd, stdio: 'inherit' }); + + const make = (tag, overrides, templates = {}, outputDir = 'modules/reference/', tempDir = null) => { + console.log(`⏳ Building property docs for ${tag}${tempDir ? ' (for diff)' : ''}…`); + const args = ['build', `TAG=${tag}`]; + + // Pass all paths as environment variables for consistency + const env = { ...process.env }; + if (overrides) { + env.OVERRIDES = path.resolve(overrides); + } + if (templates.propertyPage) { + env.TEMPLATE_PROPERTY_PAGE = path.resolve(templates.propertyPage); + } + if (templates.property) { + env.TEMPLATE_PROPERTY = path.resolve(templates.property); + } + if (templates.deprecated) { + env.TEMPLATE_DEPRECATED = path.resolve(templates.deprecated); + } + if (templates.deprecatedProperty) { + env.TEMPLATE_DEPRECATED_PROPERTY = path.resolve(templates.deprecatedProperty); + } + + if (tempDir) { + // For diff purposes, generate to temporary directory + env.OUTPUT_ASCIIDOC_DIR = path.resolve(tempDir); + env.OUTPUT_JSON_DIR = path.resolve(tempDir, 'examples'); + env.OUTPUT_AUTOGENERATED_DIR = path.resolve(tempDir); + } else { + // Normal generation - go directly to final destination + // Let Makefile calculate OUTPUT_ASCIIDOC_DIR as OUTPUT_AUTOGENERATED_DIR/pages + env.OUTPUT_JSON_DIR = path.resolve(outputDir, 'examples'); + env.OUTPUT_AUTOGENERATED_DIR = path.resolve(outputDir); + } + + const r = spawnSync('make', args, { cwd, stdio: 'inherit', env }); if (r.error) { console.error(`❌ ${r.error.message}`); process.exit(1); @@ -763,50 +810,38 @@ automation if (r.status !== 0) process.exit(r.status); }; - if (oldTag) { - const oldDir = path.join('autogenerated', oldTag, 'properties'); - if (!fs.existsSync(oldDir)) make(oldTag); - } - - make(newTag); - - if (oldTag) { - diffDirs('properties', oldTag, newTag); - } - - process.exit(0); - }); - -automation - .command('topic-property-docs') - .description('Generate JSON and AsciiDoc documentation for Redpanda topic configuration properties') - .option('--tag ', 'Git tag or branch to extract from', 'dev') - .option('--diff ', 'Also diff autogenerated topic properties from ') - .action((options) => { - verifyPropertyDependencies(); - - const newTag = options.tag; - const oldTag = options.diff; - const cwd = path.resolve(__dirname, '../tools/property-extractor'); - const make = (tag) => { - console.log(`⏳ Building topic property docs for ${tag}…`); - const r = spawnSync('make', ['topic-properties', `TAG=${tag}`], { cwd, stdio: 'inherit' }); - if (r.error) { - console.error(`❌ ${r.error.message}`); - process.exit(1); - } - if (r.status !== 0) process.exit(r.status); + // Collect template options + const templates = { + propertyPage: options.templatePropertyPage, + property: options.templateProperty, + deprecated: options.templateDeprecated, + deprecatedProperty: options.templateDeprecatedProperty }; + let oldTempDir = null; + let newTempDir = null; + if (oldTag) { - const oldDir = path.join('autogenerated', oldTag, 'properties'); - if (!fs.existsSync(oldDir)) make(oldTag); + // Generate old version to temporary directory for diff + oldTempDir = path.join('tmp', 'diff', `${oldTag}-properties`); + fs.mkdirSync(oldTempDir, { recursive: true }); + make(oldTag, overridesPath, templates, outputDir, oldTempDir); } - make(newTag); - if (oldTag) { - diffDirs('properties', oldTag, newTag); + // Generate new version to temporary directory for diff + newTempDir = path.join('tmp', 'diff', `${newTag}-properties`); + fs.mkdirSync(newTempDir, { recursive: true }); + make(newTag, overridesPath, templates, outputDir, newTempDir); + + // Then generate new version to final destination + make(newTag, overridesPath, templates, outputDir); + + // Compare the temporary directories + diffDirs('properties', oldTag, newTag, oldTempDir, newTempDir); + } else { + // No diff requested, just generate to final destination + make(newTag, overridesPath, templates, outputDir); } process.exit(0); diff --git a/package-lock.json b/package-lock.json index 069b550..78f264e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@redpanda-data/docs-extensions-and-macros", - "version": "4.8.0", + "version": "4.8.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@redpanda-data/docs-extensions-and-macros", - "version": "4.7.2", + "version": "4.8.1", "license": "ISC", "dependencies": { "@asciidoctor/tabs": "^1.0.0-beta.6", diff --git a/package.json b/package.json index ee575b4..df41027 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@redpanda-data/docs-extensions-and-macros", - "version": "4.8.0", + "version": "4.8.1", "description": "Antora extensions and macros developed for Redpanda documentation.", "keywords": [ "antora", diff --git a/tools/property-extractor/Makefile b/tools/property-extractor/Makefile index 6edc675..79b3c50 100644 --- a/tools/property-extractor/Makefile +++ b/tools/property-extractor/Makefile @@ -1,4 +1,4 @@ -.PHONY: build venv clean redpanda-git treesitter topic-properties generate-docs check +.PHONY: build venv clean redpanda-git treesitter generate-docs check # --- Main build: venv, fetch code, build parser, extract & docgen --- build: venv redpanda-git treesitter @@ -18,11 +18,11 @@ TAG ?= dev # Derive a “major.minor” or rc identifier from TAG for folder naming VERSION := $(shell \ if echo "$(TAG)" | grep -qE '^v?[0-9]+\.[0-9]+'; then \ - echo "$(TAG)" \ - | sed -E 's/^v?([0-9]+\.[0-9]+)(\.[0-9]+)?(-rc[0-9]+)?.*/\1\3/' \ - | sed 's/-rc/rc/'; \ + echo "$(TAG)" \ + | sed -E 's/^v?([0-9]+\.[0-9]+)(\.[0-9]+)?(-rc[0-9]+)?.*/\1\3/' \ + | sed 's/-rc/rc/'; \ else \ - echo "$(TAG)"; \ + echo "$(TAG)"; \ fi) # Paths @@ -34,9 +34,33 @@ REDPANDA_SRC := $(TMP_ROOT)/redpanda TREESITTER_DIR:= $(TOOL_ROOT)/tree-sitter/tree-sitter-cpp VENV := $(TOOL_ROOT)/tmp/redpanda-property-extractor-venv PYTHON := $(VENV)/bin/python -OUTPUT_DIR := $(REPO_ROOT)/autogenerated/$(TAG)/properties TREE_SITTER := npx tree-sitter +# Output directory configuration (can be overridden by environment variables) +OUTPUT_AUTOGENERATED_DIR ?= $(REPO_ROOT)/modules/reference +OUTPUT_ASCIIDOC_DIR ?= $(OUTPUT_AUTOGENERATED_DIR)/pages +OUTPUT_JSON_DIR ?= $(OUTPUT_AUTOGENERATED_DIR)/examples + +# --- Main build: venv, fetch code, build parser, extract & docgen --- +build: venv redpanda-git treesitter + @echo "🔧 Building with Redpanda tag: $(TAG)" + @mkdir -p $(TOOL_ROOT)/gen + @if [ -n "$(OVERRIDES)" ] && [ ! -f "$(OVERRIDES)" ]; then \ + echo "❌ Error: Overrides file '$(OVERRIDES)' does not exist."; \ + echo " Please check the path or omit the OVERRIDES variable to continue without overrides."; \ + exit 1; \ + fi + @cd $(TOOL_ROOT) && \ + $(PYTHON) -W ignore::FutureWarning property_extractor.py \ + --recursive \ + --path $(REDPANDA_SRC) \ + --output gen/properties-output.json \ + --enhanced-output gen/$(TAG)-properties.json \ + $(if $(OVERRIDES),$(if $(shell [ -f "$(OVERRIDES)" ] && echo exists),--overrides $(OVERRIDES),),) + @echo "✅ JSON generated at $(TOOL_ROOT)/gen/properties-output.json" + @echo "✅ Enhanced JSON (with overrides) generated at $(TOOL_ROOT)/gen/$(TAG)-properties.json" + @$(MAKE) generate-docs + # --- Ensure Python venv & dependencies --- venv: $(TOOL_ROOT)/requirements.txt @if [ ! -d "$(VENV)" ]; then \ @@ -84,34 +108,38 @@ treesitter: @echo "🔧 Generating parser in $(TREESITTER_DIR)…" @cd "$(TREESITTER_DIR)" && npm install --silent && $(TREE_SITTER) generate -# --- Turn the JSON into AsciiDoc pages under autogen//properties --- -generate-docs: - @echo "📝 Generating AsciiDoc pages in $(OUTPUT_DIR)…" - @mkdir -p "$(OUTPUT_DIR)" - @cd $(TOOL_ROOT) && \ - $(PYTHON) json-to-asciidoc/generate_docs.py --output-dir "$(OUTPUT_DIR)" - @echo "📄 Copying properties-output.json to $(OUTPUT_DIR)…" - @cp "$(TOOL_ROOT)/gen/properties-output.json" "$(OUTPUT_DIR)/" - @echo "✅ Docs generated at $(OUTPUT_DIR)" +# --- Install Node.js dependencies for Handlebars --- +node-deps: + @echo "📦 Installing Node.js dependencies…" + @cd $(TOOL_ROOT) && npm install --silent + +# --- Turn the JSON into AsciiDoc pages under the specified output directories --- +generate-docs: node-deps + @echo "📝 Generating AsciiDoc pages in $(OUTPUT_ASCIIDOC_DIR)…" + @mkdir -p "$(OUTPUT_ASCIIDOC_DIR)" + @mkdir -p "$(OUTPUT_JSON_DIR)" + @# Use the enhanced properties file (with overrides) for documentation generation if it exists + @if [ -f "$(TOOL_ROOT)/gen/$(TAG)-properties.json" ]; then \ + cd $(TOOL_ROOT) && \ + node generate-handlebars-docs.js "gen/$(TAG)-properties.json" "$(OUTPUT_AUTOGENERATED_DIR)"; \ + else \ + cd $(TOOL_ROOT) && \ + node generate-handlebars-docs.js "gen/properties-output.json" "$(OUTPUT_AUTOGENERATED_DIR)"; \ + fi + @echo "📄 Copying properties JSON files to $(OUTPUT_JSON_DIR)…" + @if [ -f "$(TOOL_ROOT)/gen/$(TAG)-properties.json" ]; then \ + cp "$(TOOL_ROOT)/gen/$(TAG)-properties.json" "$(OUTPUT_JSON_DIR)/"; \ + fi + @echo "✅ Docs generated at $(OUTPUT_AUTOGENERATED_DIR)" # --- Debug helper to print all the key paths/vars --- check: - @echo "MODULE_ROOT: $(MODULE_ROOT)" - @echo "TOOL_ROOT: $(TOOL_ROOT)" - @echo "REDPANDA_SRC: $(REDPANDA_SRC)" - @echo "TREESITTER: $(TREESITTER_DIR)" - @echo "VENV: $(VENV)" - @echo "PYTHON: $(PYTHON)" - @echo "OUTPUT_DIR: $(OUTPUT_DIR)" - -# --- Extract topic properties --- -topic-properties: venv redpanda-git treesitter - @echo "🔧 Extracting topic properties with Redpanda tag: $(TAG)" - @mkdir -p $(TOOL_ROOT)/gen - @mkdir -p "$(OUTPUT_DIR)" - @cd $(TOOL_ROOT) && \ - $(PYTHON) topic_property_extractor.py \ - --source-path $(REDPANDA_SRC) \ - --output-json "$(OUTPUT_DIR)/topic-properties-output.json" \ - --output-adoc "$(OUTPUT_DIR)/topic-properties.adoc" - @echo "✅ Topic properties extracted" \ No newline at end of file + @echo "MODULE_ROOT: $(MODULE_ROOT)" + @echo "TOOL_ROOT: $(TOOL_ROOT)" + @echo "REDPANDA_SRC: $(REDPANDA_SRC)" + @echo "TREESITTER: $(TREESITTER_DIR)" + @echo "VENV: $(VENV)" + @echo "PYTHON: $(PYTHON)" + @echo "OUTPUT_ASCIIDOC_DIR: $(OUTPUT_ASCIIDOC_DIR)" + @echo "OUTPUT_JSON_DIR: $(OUTPUT_JSON_DIR)" + @echo "OUTPUT_AUTOGENERATED_DIR: $(OUTPUT_AUTOGENERATED_DIR)" \ No newline at end of file diff --git a/tools/property-extractor/generate-handlebars-docs.js b/tools/property-extractor/generate-handlebars-docs.js new file mode 100644 index 0000000..eb348b4 --- /dev/null +++ b/tools/property-extractor/generate-handlebars-docs.js @@ -0,0 +1,344 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); +const handlebars = require('handlebars'); +const helpers = require('./helpers'); + +// Register all helpers +Object.entries(helpers).forEach(([name, fn]) => { + if (typeof fn !== 'function') { + console.error(`❌ Helper "${name}" is not a function`); + process.exit(1); + } + handlebars.registerHelper(name, fn); +}); + +/** + * Configuration mapping for different property types + */ +const PROPERTY_CONFIG = { + broker: { + pageTitle: 'Broker Configuration Properties', + pageAliases: ['reference:node-properties.adoc', 'reference:node-configuration-sample.adoc'], + description: 'Reference of broker configuration properties.', + intro: `Broker configuration properties are applied individually to each broker in a cluster. You can find and modify these properties in the \`redpanda.yaml\` configuration file. + +For information on how to edit broker properties, see xref:manage:cluster-maintenance/node-property-configuration.adoc[]. + +NOTE: All broker properties require that you restart Redpanda for any update to take effect.`, + sectionTitle: 'Broker configuration', + groups: [ + { + filter: (prop) => prop.config_scope === 'broker' && !prop.is_deprecated + } + ], + filename: 'broker-properties.adoc' + }, + cluster: { + pageTitle: 'Cluster Configuration Properties', + pageAliases: ['reference:tunable-properties.adoc', 'reference:cluster-properties.adoc'], + description: 'Cluster configuration properties list.', + intro: `Cluster configuration properties are the same for all brokers in a cluster, and are set at the cluster level. + +For information on how to edit cluster properties, see xref:manage:cluster-maintenance/cluster-property-configuration.adoc[] or xref:manage:kubernetes/k-cluster-property-configuration.adoc[]. + +NOTE: Some cluster properties require that you restart the cluster for any updates to take effect. See the specific property details to identify whether or not a restart is required.`, + sectionTitle: 'Cluster configuration', + groups: [ + { + filter: (prop) => prop.config_scope === 'cluster' && !prop.is_deprecated + } + ], + filename: 'cluster-properties.adoc' + }, + 'object-storage': { + pageTitle: 'Object Storage Properties', + description: 'Reference of object storage properties.', + intro: `Object storage properties are a type of cluster property. For information on how to edit cluster properties, see xref:manage:cluster-maintenance/cluster-property-configuration.adoc[]. + +NOTE: Some object storage properties require that you restart the cluster for any updates to take effect. See the specific property details to identify whether or not a restart is required.`, + sectionTitle: 'Object storage configuration', + sectionIntro: 'Object storage properties should only be set if you enable xref:manage:tiered-storage.adoc[Tiered Storage].', + groups: [ + { + filter: (prop) => prop.name && ( + prop.name.includes('cloud_storage') || + prop.name.includes('s3_') || + prop.name.includes('azure_') || + prop.name.includes('gcs_') || + prop.name.includes('archival_') || + prop.name.includes('remote_') || + prop.name.includes('tiered_') + ) && !prop.is_deprecated + } + ], + filename: 'object-storage-properties.adoc' + }, + topic: { + pageTitle: 'Topic Configuration Properties', + pageAliases: ['reference:topic-properties.adoc'], + description: 'Reference of topic configuration properties.', + intro: `A topic-level property sets a Redpanda or Kafka configuration for a particular topic. + +Many topic-level properties have corresponding xref:manage:cluster-maintenance/cluster-property-configuration.adoc[cluster properties] that set a default value for all topics of a cluster. To customize the value for a topic, you can set a topic-level property that overrides the value of the corresponding cluster property. + +NOTE: All topic properties take effect immediately after being set.`, + sectionTitle: 'Topic configuration', + groups: [ + { + filter: (prop) => prop.config_scope === 'topic' && !prop.is_deprecated, + template: 'topic-property' + } + ], + filename: 'topic-properties.adoc' + } +}; + +// "src/v/kafka/server/handlers/topics/types.cc": "topic" + +/** + * Gets template path, checking environment variables for custom paths first + */ +function getTemplatePath(defaultPath, envVar) { + const customPath = process.env[envVar]; + if (customPath && fs.existsSync(customPath)) { + console.log(`📄 Using custom template: ${customPath}`); + return customPath; + } + return defaultPath; +} + +/** + * Registers Handlebars partials from template files + */ +function registerPartials() { + const templatesDir = path.join(__dirname, 'templates'); + + // Register property partial + const propertyTemplatePath = getTemplatePath( + path.join(templatesDir, 'property.hbs'), + 'TEMPLATE_PROPERTY' + ); + const propertyTemplate = fs.readFileSync(propertyTemplatePath, 'utf8'); + handlebars.registerPartial('property', propertyTemplate); + + // Register topic property partial + const topicPropertyTemplatePath = getTemplatePath( + path.join(templatesDir, 'topic-property.hbs'), + 'TEMPLATE_TOPIC_PROPERTY' + ); + const topicPropertyTemplate = fs.readFileSync(topicPropertyTemplatePath, 'utf8'); + handlebars.registerPartial('topic-property', topicPropertyTemplate); + + // Register deprecated property partial + const deprecatedPropertyTemplatePath = getTemplatePath( + path.join(templatesDir, 'deprecated-property.hbs'), + 'TEMPLATE_DEPRECATED_PROPERTY' + ); + const deprecatedPropertyTemplate = fs.readFileSync(deprecatedPropertyTemplatePath, 'utf8'); + handlebars.registerPartial('deprecated-property', deprecatedPropertyTemplate); +} + +/** + * Generates documentation for a specific property type + */ +function generatePropertyDocs(properties, config, outputDir) { + const templatePath = getTemplatePath( + path.join(__dirname, 'templates', 'property-page.hbs'), + 'TEMPLATE_PROPERTY_PAGE' + ); + const template = handlebars.compile(fs.readFileSync(templatePath, 'utf8')); + + // Filter and group properties according to configuration + const groups = config.groups.map(group => { + const filteredProperties = Object.values(properties) + .filter(prop => group.filter(prop)) + .sort((a, b) => String(a.name || '').localeCompare(String(b.name || ''))); + + return { + title: group.title, + intro: group.intro, + properties: filteredProperties, + template: group.template || 'property' // Default to 'property' template + }; + }).filter(group => group.properties.length > 0); + + const data = { + ...config, + groups + }; + + const output = template(data); + const outputPath = path.join(outputDir, config.filename); + + fs.mkdirSync(path.dirname(outputPath), { recursive: true }); + fs.writeFileSync(outputPath, output, 'utf8'); + + console.log(`✅ Generated ${outputPath}`); + return groups.reduce((total, group) => total + group.properties.length, 0); +} + +/** + * Generates deprecated properties documentation + */ +function generateDeprecatedDocs(properties, outputDir) { + const templatePath = getTemplatePath( + path.join(__dirname, 'templates', 'deprecated-properties.hbs'), + 'TEMPLATE_DEPRECATED' + ); + const template = handlebars.compile(fs.readFileSync(templatePath, 'utf8')); + + const deprecatedProperties = Object.values(properties).filter(prop => prop.is_deprecated); + + const brokerProperties = deprecatedProperties + .filter(prop => prop.config_scope === 'broker') + .sort((a, b) => String(a.name || '').localeCompare(String(b.name || ''))); + + const clusterProperties = deprecatedProperties + .filter(prop => prop.config_scope === 'cluster') + .sort((a, b) => String(a.name || '').localeCompare(String(b.name || ''))); + + const data = { + deprecated: deprecatedProperties.length > 0, + brokerProperties: brokerProperties.length > 0 ? brokerProperties : null, + clusterProperties: clusterProperties.length > 0 ? clusterProperties : null + }; + + const output = template(data); + const outputPath = path.join(outputDir, 'deprecated', 'partials', 'deprecated-properties.adoc'); + + fs.mkdirSync(path.dirname(outputPath), { recursive: true }); + fs.writeFileSync(outputPath, output, 'utf8'); + + console.log(`✅ Generated ${outputPath}`); + return deprecatedProperties.length; +} + +/** + * Main function to generate all property documentation + */ +function generateAllDocs(inputFile, outputDir) { + // Register partials + registerPartials(); + + // Read input JSON + const data = JSON.parse(fs.readFileSync(inputFile, 'utf8')); + const properties = data.properties || {}; + + let totalProperties = 0; + let totalBrokerProperties = 0; + let totalClusterProperties = 0; + let totalObjectStorageProperties = 0; + let totalTopicProperties = 0; + + // Generate each type of documentation + for (const [type, config] of Object.entries(PROPERTY_CONFIG)) { + const count = generatePropertyDocs(properties, config, path.join(outputDir, 'pages')); + totalProperties += count; + + if (type === 'broker') totalBrokerProperties = count; + else if (type === 'cluster') totalClusterProperties = count; + else if (type === 'object-storage') totalObjectStorageProperties = count; + else if (type === 'topic') totalTopicProperties = count; + } + + // Generate deprecated properties documentation + const deprecatedCount = generateDeprecatedDocs(properties, path.join(outputDir, 'pages')); + + // Generate summary file + const allPropertiesContent = Object.keys(properties).sort().join('\n'); + fs.writeFileSync(path.join(outputDir, 'all_properties.txt'), allPropertiesContent, 'utf8'); + + // Generate error reports + generateErrorReports(properties, outputDir); + + console.log(`📊 Generation Summary:`); + console.log(` Total properties read: ${Object.keys(properties).length}`); + console.log(` Total Broker properties: ${totalBrokerProperties}`); + console.log(` Total Cluster properties: ${totalClusterProperties}`); + console.log(` Total Object Storage properties: ${totalObjectStorageProperties}`); + console.log(` Total Topic properties: ${totalTopicProperties}`); + console.log(` Total Deprecated properties: ${deprecatedCount}`); + + return { + totalProperties: Object.keys(properties).length, + brokerProperties: totalBrokerProperties, + clusterProperties: totalClusterProperties, + objectStorageProperties: totalObjectStorageProperties, + topicProperties: totalTopicProperties, + deprecatedProperties: deprecatedCount + }; +} + +/** + * Generate error reports for properties with missing or invalid data + */ +function generateErrorReports(properties, outputDir) { + const errorDir = path.join(outputDir, 'error'); + fs.mkdirSync(errorDir, { recursive: true }); + + const emptyDescriptions = []; + const deprecatedProperties = []; + + Object.values(properties).forEach(prop => { + if (!prop.description || prop.description.trim() === '') { + emptyDescriptions.push(prop.name); + } + if (prop.is_deprecated) { + deprecatedProperties.push(prop.name); + } + }); + + // Write error reports + if (emptyDescriptions.length > 0) { + fs.writeFileSync( + path.join(errorDir, 'empty_description.txt'), + emptyDescriptions.join('\n'), + 'utf8' + ); + const percentage = ((emptyDescriptions.length / Object.keys(properties).length) * 100).toFixed(2); + console.log(`You have ${emptyDescriptions.length} properties with empty description. Percentage of errors: ${percentage}%. Data written in 'empty_description.txt'.`); + } + + if (deprecatedProperties.length > 0) { + fs.writeFileSync( + path.join(errorDir, 'deprecated_properties.txt'), + deprecatedProperties.join('\n'), + 'utf8' + ); + const percentage = ((deprecatedProperties.length / Object.keys(properties).length) * 100).toFixed(2); + console.log(`You have ${deprecatedProperties.length} deprecated properties. Percentage of errors: ${percentage}%. Data written in 'deprecated_properties.txt'.`); + } +} + +module.exports = { + generateAllDocs, + generatePropertyDocs, + generateDeprecatedDocs, + PROPERTY_CONFIG +}; + +// CLI interface +if (require.main === module) { + const args = process.argv.slice(2); + if (args.length < 2) { + console.error('Usage: node generate-handlebars-docs.js '); + process.exit(1); + } + + const [inputFile, outputDir] = args; + + if (!fs.existsSync(inputFile)) { + console.error(`❌ Input file not found: ${inputFile}`); + process.exit(1); + } + + try { + generateAllDocs(inputFile, outputDir); + console.log('✅ Documentation generation completed successfully'); + } catch (error) { + console.error(`❌ Error generating documentation: ${error.message}`); + process.exit(1); + } +} diff --git a/tools/property-extractor/helpers/and.js b/tools/property-extractor/helpers/and.js new file mode 100644 index 0000000..8ae2cbc --- /dev/null +++ b/tools/property-extractor/helpers/and.js @@ -0,0 +1,10 @@ +/** + * Handlebars helper for logical AND + * @param {...*} args - Values to check + * @returns {boolean} True if all values are truthy per JavaScript semantics + */ +module.exports = function and(...args) { + // Remove the last argument which is the Handlebars options object + const values = args.slice(0, -1); + return values.every(val => Boolean(val)); +}; diff --git a/tools/property-extractor/helpers/eq.js b/tools/property-extractor/helpers/eq.js new file mode 100644 index 0000000..37e8774 --- /dev/null +++ b/tools/property-extractor/helpers/eq.js @@ -0,0 +1,9 @@ +/** + * Handlebars helper for equality comparison + * @param {*} a - First value + * @param {*} b - Second value + * @returns {boolean} True if values are equal + */ +module.exports = function eq(a, b) { + return a === b; +}; diff --git a/tools/property-extractor/helpers/formatPropertyValue.js b/tools/property-extractor/helpers/formatPropertyValue.js new file mode 100644 index 0000000..9ad4502 --- /dev/null +++ b/tools/property-extractor/helpers/formatPropertyValue.js @@ -0,0 +1,128 @@ +const handlebars = require('handlebars'); + +/** + * Helper function to format a value (used in object/array formatting) + */ +function formatValue(val) { + if (typeof val === 'string') { + return `"${val}"`; + } else if (typeof val === 'boolean') { + return val ? 'true' : 'false'; + } else if (val === null || val === undefined) { + return 'null'; + } else if (Array.isArray(val)) { + return "[" + val.map(v => formatValue(v)).join(", ") + "]"; + } else if (typeof val === 'object' && val !== null) { + return JSON.stringify(val); + } else { + return String(val); + } +} + +/** + * Process C++ internal representations and convert them to user-friendly formats + * This matches the Python process_defaults function logic + */ +function processDefaults(inputString, suffix) { + if (typeof inputString !== 'string') { + return inputString; + } + + // Test for ip:port in vector: std::vector({{...}}) + const vectorMatch = inputString.match(/std::vector\(\{\{("([\d.]+)",\s*(\d+))\}\}\)/); + if (vectorMatch) { + const ip = vectorMatch[2]; + const port = vectorMatch[3]; + return [`${ip}:${port}`]; + } + + // Test for ip:port in single-string: net::unresolved_address("127.0.0.1", 9092) + const brokerMatch = inputString.match(/net::unresolved_address\("([\d.]+)",\s*(\d+)\)/); + if (brokerMatch) { + const ip = brokerMatch[1]; + const port = brokerMatch[2]; + return `${ip}:${port}`; + } + + // Handle std::nullopt + if (inputString.includes('std::nullopt')) { + return inputString.replace(/std::nullopt/g, 'null'); + } + + // Handle time units and other patterns would go here... + // For now, return the original string + return inputString; +} + +/** + * Formats a property value for display, matching Python legacy format exactly + * @param {*} value - The value to format + * @param {string} type - The property type + * @returns {handlebars.SafeString} Formatted value + */ +module.exports = function formatPropertyValue(value, type) { + if (value === null || value === undefined || value === '') { + return new handlebars.SafeString('null'); + } + + if (typeof value === 'boolean') { + return new handlebars.SafeString(value ? 'true' : 'false'); + } + + if (typeof value === 'object' && !Array.isArray(value)) { + // Format object defaults with Python-style syntax: {key: "value", key2: value2} + const pairs = []; + for (const [k, v] of Object.entries(value)) { + // Process each value for C++ representations + let processedValue = v; + if (typeof v === 'string') { + processedValue = processDefaults(v, null); + } + pairs.push(`${k}: ${formatValue(processedValue)}`); + } + return new handlebars.SafeString(`{${pairs.join(', ')}}`); + } + + if (Array.isArray(value)) { + // Handle array defaults to match Python format + if (value.length === 0) { + return new handlebars.SafeString('[]'); + } else { + // Format each array element + const formattedElements = []; + for (const item of value) { + if (typeof item === 'object' && item !== null) { + // Format object within array + const pairs = []; + for (const [k, v] of Object.entries(item)) { + // Process each value for C++ representations + let processedValue = v; + if (typeof v === 'string') { + processedValue = processDefaults(v, null); + } + pairs.push(`${k}: ${formatValue(processedValue)}`); + } + formattedElements.push(`{${pairs.join(', ')}}`); + } else { + formattedElements.push(String(item)); + } + } + return new handlebars.SafeString(`[${formattedElements.join(', ')}]`); + } + } + + // For other types, handle strings vs non-strings differently + let result; + if (typeof value === 'string') { + // Keep strings as-is (preserve original characters and casing) + result = value; + } else { + // Convert non-string values to String without altering case/quotes + result = String(value); + } + + // Apply C++ processing + result = processDefaults(result, null); + + return new handlebars.SafeString(result); +}; diff --git a/tools/property-extractor/helpers/formatUnits.js b/tools/property-extractor/helpers/formatUnits.js new file mode 100644 index 0000000..6e34ac9 --- /dev/null +++ b/tools/property-extractor/helpers/formatUnits.js @@ -0,0 +1,26 @@ +/** + * Formats units for display based on property name suffix + * @param {string} name - Property name that might contain unit suffixes + * @returns {string} Formatted unit description + */ +module.exports = function formatUnits(name) { + const suffixToUnit = { + 'ms': 'milliseconds', + 'sec': 'seconds', + 'seconds': 'seconds', + 'bytes': 'bytes', + 'buf': 'bytes', + 'partitions': 'number of partitions per topic', + 'percent': 'percent', + 'bps': 'bytes per second', + 'fraction': 'fraction' + }; + + if (!name) return ''; + + // Extract the last part after splitting on underscores (like Python implementation) + const parts = name.split('_'); + const suffix = parts[parts.length - 1]; + + return suffixToUnit[suffix] || ''; +}; diff --git a/tools/property-extractor/helpers/index.js b/tools/property-extractor/helpers/index.js new file mode 100644 index 0000000..4665020 --- /dev/null +++ b/tools/property-extractor/helpers/index.js @@ -0,0 +1,13 @@ +'use strict'; + +module.exports = { + join: require('./join.js'), + eq: require('./eq.js'), + ne: require('./ne.js'), + and: require('./and.js'), + or: require('./or.js'), + not: require('./not.js'), + formatPropertyValue: require('./formatPropertyValue.js'), + renderPropertyExample: require('./renderPropertyExample.js'), + formatUnits: require('./formatUnits.js'), +}; diff --git a/tools/property-extractor/helpers/join.js b/tools/property-extractor/helpers/join.js new file mode 100644 index 0000000..f4da251 --- /dev/null +++ b/tools/property-extractor/helpers/join.js @@ -0,0 +1,18 @@ +/** + * Handlebars helper to join an array with a separator + * @param {Array} array - The array to join + * @param {string} separator - The separator to use + * @returns {string} The joined string + */ +module.exports = function join(array, separator) { + if (!Array.isArray(array)) { + return ''; + } + + // Detect and ignore Handlebars options object + if (separator && typeof separator === 'object' && (separator.hash !== undefined || separator.data !== undefined)) { + separator = undefined; + } + + return array.join(separator || ', '); +}; diff --git a/tools/property-extractor/helpers/ne.js b/tools/property-extractor/helpers/ne.js new file mode 100644 index 0000000..a62854f --- /dev/null +++ b/tools/property-extractor/helpers/ne.js @@ -0,0 +1,9 @@ +/** + * Handlebars helper for inequality comparison + * @param {*} a - First value + * @param {*} b - Second value + * @returns {boolean} True if values are not equal + */ +module.exports = function ne(a, b) { + return a !== b; +}; diff --git a/tools/property-extractor/helpers/not.js b/tools/property-extractor/helpers/not.js new file mode 100644 index 0000000..a1e7fe1 --- /dev/null +++ b/tools/property-extractor/helpers/not.js @@ -0,0 +1,8 @@ +/** + * Handlebars helper for logical NOT + * @param {*} value - Value to negate + * @returns {boolean} True if value is falsy + */ +module.exports = function not(value) { + return !value; +}; diff --git a/tools/property-extractor/helpers/or.js b/tools/property-extractor/helpers/or.js new file mode 100644 index 0000000..38219bf --- /dev/null +++ b/tools/property-extractor/helpers/or.js @@ -0,0 +1,10 @@ +/** + * Handlebars helper for logical OR + * @param {...*} args - Values to check + * @returns {boolean} True if any value is truthy + */ +module.exports = function or(...args) { + // Remove the last argument which is the Handlebars options object + const values = args.slice(0, -1); + return values.some(val => !!val); +}; diff --git a/tools/property-extractor/helpers/renderPropertyExample.js b/tools/property-extractor/helpers/renderPropertyExample.js new file mode 100644 index 0000000..4e2062c --- /dev/null +++ b/tools/property-extractor/helpers/renderPropertyExample.js @@ -0,0 +1,42 @@ +const handlebars = require('handlebars'); + +/** + * Renders an example for a property based on its format + * @param {Object} property - The property object containing example data + * @returns {handlebars.SafeString} Formatted example block + */ +module.exports = function renderPropertyExample(property) { + if (!property.example) { + return new handlebars.SafeString(''); + } + + let exampleContent = ''; + + // Handle different example formats + if (typeof property.example === 'string') { + // Check if it's already a complete AsciiDoc example + if (property.example.includes('.Example') || property.example.includes('[,yaml]')) { + exampleContent = property.example; + } else { + // Simple string example - wrap it + exampleContent = `.Example\n[,yaml]\n----\n${property.name}: ${property.example}\n----`; + } + } else if (Array.isArray(property.example)) { + // Multiline array example + exampleContent = property.example.join('\n'); + } else if (typeof property.example === 'object' && property.example.title) { + // Structured example with title and content + exampleContent = `.${property.example.title}\n`; + if (property.example.description) { + exampleContent += `${property.example.description}\n\n`; + } + if (property.example.config) { + exampleContent += `[,yaml]\n----\n${JSON.stringify(property.example.config, null, 2)}\n----`; + } + } else { + // Fallback: JSON stringify the example + exampleContent = `.Example\n[,yaml]\n----\n${property.name}: ${JSON.stringify(property.example, null, 2)}\n----`; + } + + return new handlebars.SafeString('\n' + exampleContent + '\n'); +}; diff --git a/tools/property-extractor/json-to-asciidoc/generate_docs.py b/tools/property-extractor/json-to-asciidoc/generate_docs.py deleted file mode 100644 index 9db6d41..0000000 --- a/tools/property-extractor/json-to-asciidoc/generate_docs.py +++ /dev/null @@ -1,491 +0,0 @@ -import json -import os -import re -import argparse - -# --- Constants for Paths and Filenames --- -INPUT_JSON_PATH = "gen/" -INPUT_JSON_FILE = "properties-output.json" - -OUTPUT_DIR_DEFAULT = "output" -PAGE_FOLDER_NAME = "pages" -ERROR_FOLDER_NAME = "error" - -OUTPUT_FILE_BROKER = "broker-properties.adoc" -OUTPUT_FILE_CLUSTER = "cluster-properties.adoc" -OUTPUT_FILE_CLOUD = "object-storage-properties.adoc" -OUTPUT_FILE_TOPIC = "topic-properties.adoc" -OUTPUT_FILE_DEPRECATED = os.path.join("deprecated", "partials", "deprecated-properties.adoc") -ALL_PROPERTIES_FILE = "all_properties.txt" - -ERROR_FILE_DESCRIPTION = "empty_description.txt" -ERROR_FILE_TYPE = "empty_type.txt" -ERROR_FILE_MAX_WITHOUT_MIN = "max_without_min.txt" -ERROR_FILE_MIN_WITHOUT_MAX = "min_without_max.txt" - -# --- Static Documentation Strings --- -BROKER_PAGE_TITLE = ( - "= Broker Configuration Properties\n" - ":page-aliases: reference:node-properties.adoc, reference:node-configuration-sample.adoc\n" - ":description: Reference of broker configuration properties.\n\n" -) -BROKER_INTRO = ( - "Broker configuration properties are applied individually to each broker in a cluster. " - "You can find and modify these properties in the `redpanda.yaml` configuration file.\n\n" - "For information on how to edit broker properties, see xref:manage:cluster-maintenance/node-property-configuration.adoc[].\n\n" - "NOTE: All broker properties require that you restart Redpanda for any update to take effect.\n\n" -) -BROKER_TITLE = "== Broker configuration\n\n" - -SCHEMA_REGISTRY_TITLE = "== Schema Registry\n\n" -PANDAPROXY_TITLE = "== HTTP Proxy\n\n" -KAFKA_CLIENT_TITLE = "== HTTP Proxy Client\n\n" - -SCHEMA_REGISTRY_INTRO = ( - "The Schema Registry provides configuration properties to help you enable producers and consumers " - "to share information needed to serialize and deserialize producer and consumer messages.\n\n" - "For information on how to edit broker properties for the Schema Registry, see xref:manage:cluster-maintenance/node-property-configuration.adoc[].\n\n" -) -PANDAPROXY_INTRO = ( - "Redpanda HTTP Proxy allows access to your data through a REST API. For example, you can list topics or brokers, " - "get events, produce events, subscribe to events from topics using consumer groups, and commit offsets for a consumer.\n\n" - "See xref:develop:http-proxy.adoc[]\n\n" -) -KAFKA_CLIENT_INTRO = "Configuration options for HTTP Proxy Client.\n\n" - -CLUSTER_PAGE_TITLE = ( - "= Cluster Configuration Properties\n" - ":page-aliases: reference:tunable-properties.adoc, reference:cluster-properties.adoc\n" - ":description: Cluster configuration properties list.\n\n" -) -CLUSTER_CONFIG_INTRO = ( - "Cluster configuration properties are the same for all brokers in a cluster, and are set at the cluster level.\n\n" - "For information on how to edit cluster properties, see xref:manage:cluster-maintenance/cluster-property-configuration.adoc[] " - "or xref:manage:kubernetes/k-cluster-property-configuration.adoc[].\n\n" - "NOTE: Some cluster properties require that you restart the cluster for any updates to take effect. " - "See the specific property details to identify whether or not a restart is required.\n\n" -) -CLUSTER_CONFIG_TITLE = "== Cluster configuration\n\n" - -TOPIC_PAGE_TITLE = ( - "= Topic Configuration Properties\n" - ":page-aliases: reference:topic-properties.adoc\n" - ":description: Reference of topic configuration properties.\n\n" -) - -TOPIC_INTRO = ( - "A topic-level property sets a Redpanda or Kafka configuration for a particular topic.\n\n" - "Many topic-level properties have corresponding xref:manage:cluster-maintenance/cluster-property-configuration.adoc[cluster properties] that set a default value for all topics of a cluster. To customize the value for a topic, you can set a topic-level property that overrides the value of the corresponding cluster property.\n\n" - "NOTE: All topic properties take effect immediately after being set.\n\n" -) - -TOPIC_CONFIG_TITLE = "== Topic configuration\n\n" - -CLOUD_PAGE_TITLE = ( - "= Object Storage Properties\n" - ":description: Reference of object storage properties.\n\n" -) -CLOUD_CONFIG_INTRO = ( - "Object storage properties are a type of cluster property. For information on how to edit cluster properties, " - "see xref:manage:cluster-maintenance/cluster-property-configuration.adoc[].\n\n" - "NOTE: Some object storage properties require that you restart the cluster for any updates to take effect. " - "See the specific property details to identify whether or not a restart is required.\n\n" -) -CLOUD_CONFIG_TITLE = ( - "== Object storage configuration\n\n" - "Object storage properties should only be set if you enable xref:manage:tiered-storage.adoc[Tiered Storage].\n\n" -) - -DEPRECATED_PROPERTIES_TITLE = "\n== Configuration properties\n\n" -DEPRECATED_PROPERTIES_INTRO = "This is an exhaustive list of all the deprecated properties.\n\n" -DEPRECATED_BROKER_TITLE = "=== Broker properties\n\n" -DEPRECATED_CLUSTER_TITLE = "=== Cluster properties\n\n" - -# --- Mapping Constants --- -DEFINED_IN_MAPPING = { - "src/v/config/node_config.cc": "broker", - "src/v/pandaproxy/schema_registry/configuration.cc": "schema reg", - "src/v/pandaproxy/rest/configuration.cc": "http proxy", - "src/v/kafka/client/configuration.cc": "http client", - "src/v/config/configuration.cc": "cluster", - "src/v/kafka/server/handlers/topics/types.cc": "topic" -} - -SUFFIX_TO_UNIT = { - "ms": "milliseconds", - "sec": "seconds", # Code is not always consistent when using seconds. - "seconds": "seconds", - "bytes": "bytes", - "buf": "bytes", - "partitions": "number of partitions per topic", - "percent": "percent", - "bps": "bytes per second", - "fraction": "fraction" -} - -# --- Utility Functions --- -def parse_arguments(): - parser = argparse.ArgumentParser( - description="Generate documentation from properties JSON" - ) - parser.add_argument( - "--output-dir", - type=str, - required=True, - help="Directory to save the generated documentation", - ) - return parser.parse_args() - -def ensure_directory_exists(directory): - os.makedirs(directory, exist_ok=True) - -def load_json(input_path, input_file): - try: - with open(os.path.join(input_path, input_file), "r", encoding="utf-8") as json_file: - return json.load(json_file) - except FileNotFoundError: - print(f"Error: The file '{input_file}' does not exist.") - return {} - except json.JSONDecodeError as e: - print(f"Error: Failed to parse JSON in '{input_file}': {str(e)}") - return {} - -def process_defaults(input_string, suffix): - # Test for ip:port in vector - vector_match = re.search( - r'std::vector\(\{\{("([\d.]+)",\s*(\d+))\}\}\)', input_string - ) - if vector_match: - ip = vector_match.group(2) - port = vector_match.group(3) - return [f"{ip}:{port}"] - - # Test for ip:port in single-string - broker_match = re.search(r'net::unresolved_address\("([\d.]+)",\s*(\d+)\)', input_string) - if broker_match: - ip = broker_match.group(1) - port = broker_match.group(2) - return f"{ip}:{port}" - - # Handle single time units: seconds, milliseconds, hours, minutes - time_match = re.search(r"(\d+)(ms|s|min|h)", input_string) - # Handle complex time expressions like '24h*365' - complex_match = re.search(r"(\d+)(h|min|s|ms)\s*\*\s*(\d+)", input_string) - # Handle std::chrono::time expressions - chrono_match = re.search(r"std::chrono::(\w+)[\{\(](\d+)[\)\}]", input_string) - - if time_match: - value = int(time_match.group(1)) - unit = time_match.group(2) - if suffix == "ms": - if unit == "ms": - return value - elif unit == "s": - return value * 1000 - elif unit == "min": - return value * 60 * 1000 - elif unit == "h": - return value * 60 * 60 * 1000 - elif suffix == "sec": - if unit == "s": - return value - elif unit == "min": - return value * 60 - elif unit == "h": - return value * 60 * 60 - elif unit == "ms": - return value / 1000 - - if complex_match: - value = int(complex_match.group(1)) - unit = complex_match.group(2) - multiplier = int(complex_match.group(3)) - if suffix == "ms": - if unit == "h": - return value * 60 * 60 * 1000 * multiplier - elif unit == "min": - return value * 60 * 1000 * multiplier - elif unit == "s": - return value * 1000 * multiplier - elif unit == "ms": - return value * multiplier - elif suffix == "sec": - if unit == "h": - return value * 60 * 60 * multiplier - elif unit == "min": - return value * 60 * multiplier - elif unit == "s": - return value * multiplier - elif unit == "ms": - return (value * multiplier) / 1000 - - if chrono_match: - chrono_unit = chrono_match.group(1) - chrono_value = int(chrono_match.group(2)) - chrono_conversion = { - "milliseconds": 1, - "seconds": 1000, - "minutes": 60 * 1000, - "hours": 60 * 60 * 1000, - "days": 24 * 60 * 60 * 1000, - "weeks": 7 * 24 * 60 * 60 * 1000, - } - if suffix == "ms": - return chrono_value * chrono_conversion.get(chrono_unit, 1) - elif suffix == "sec": - if chrono_unit == "milliseconds": - return chrono_value / 1000 - else: - return (chrono_value * chrono_conversion.get(chrono_unit, 1)) / 1000 - - # Return the original string if no pattern matches - return input_string - -def generate_property_doc(key, value): - """ - Generate documentation string for a single property. - Returns None if required fields are missing. - """ - description = value.get("description", "").strip() - prop_type = value.get("type", "").strip() - if not description or not prop_type: - return None - - # Capitalize first letter and ensure a period at the end. - description = description[0].upper() + description[1:] - if not description.endswith('.'): - description += '.' - - lines = [f"=== {value.get('name')}\n\n", f"{description}\n\n"] - - property_suffix = value.get("name").split('_')[-1] - if property_suffix in SUFFIX_TO_UNIT: - lines.append(f"*Unit:* {SUFFIX_TO_UNIT[property_suffix]}\n\n") - - # For non-broker properties (node_config.cc indicates broker), add restart info. - if value.get("defined_in") != "src/v/config/node_config.cc": - restart = "Yes" if value.get("needs_restart", False) else "No" - lines.append(f"*Requires restart:* {restart}\n\n") - - if "gets_restored" in value: - restored = "Yes" if value.get("gets_restored", False) else "No" - lines.append(f"*Gets restored during cluster restore:* {restored}\n\n") - - visibility = value.get("visibility") or "user" - lines.append(f"*Visibility:* `{visibility}`\n\n") - - if prop_type in ["string", "array", "number", "boolean", "integer"]: - lines.append(f"*Type:* {prop_type}\n\n") - - # Add aliases if they exist - aliases = value.get("aliases") - if aliases and len(aliases) > 0: - aliases_str = ", ".join(f"`{alias}`" for alias in aliases) - lines.append(f"*Aliases:* {aliases_str}\n\n") - - if value.get("maximum") is not None and value.get("minimum") is not None: - lines.append( - f"*Accepted values:* [`{value.get('minimum')}`, `{value.get('maximum')}`]\n\n" - ) - - default = value.get("default") - if default is None or default == "": - default_str = "null" - elif isinstance(default, bool): - default_str = "true" if default else "false" - else: - default_str = str(default).replace("'", "").lower() - default_str = process_defaults(default_str, property_suffix) - lines.append(f"*Default:* `{default_str}`\n\n") - lines.append("---\n\n") - return "".join(lines) - -def write_data_to_file(output_dir, filename, data): - file_path = os.path.join(output_dir, filename) - ensure_directory_exists(os.path.dirname(file_path)) - try: - with open(file_path, "w+", encoding="utf-8") as output: - output.write(data) - print(f"Data written to {file_path} successfully.") - return True - except Exception as e: - print(f"Error writing data to {filename}: {str(e)}") - return False - -def write_error_file(output_dir, filename, error_content, total_properties): - file_path = os.path.join(output_dir, filename) - ensure_directory_exists(os.path.dirname(file_path)) - try: - if os.path.exists(file_path): - os.remove(file_path) - if error_content: - error_content = error_content.rstrip("\n") - with open(file_path, "w+", encoding="utf-8") as output: - output.write(error_content) - error_count = len(error_content.split("\n")) - if error_count > 0: - empty_name = filename.replace("empty_", "").replace(".txt", "") - error_type = ( - "deprecated properties" - if empty_name == "deprecated_properties" - else f"properties with empty {empty_name}" - ) - error_percentage = round((error_count / total_properties) * 100, 2) - print( - f"You have {error_count} {error_type}. Percentage of errors: {error_percentage}%. Data written in '{filename}'." - ) - except Exception as e: - print(f"Error writing error data to '{filename}': {str(e)}") - -# --- Main Processing --- -def main(): - args = parse_arguments() - output_dir = args.output_dir - page_folder = os.path.join(output_dir, PAGE_FOLDER_NAME) - error_folder = os.path.join(output_dir, ERROR_FOLDER_NAME) - - data = load_json(INPUT_JSON_PATH, INPUT_JSON_FILE) - properties = data.get("properties", {}) - total_properties = len(properties) - - # Accumulators for property documentation and error logs. - broker_config_content = [] - schema_registry_content = [] - pandaproxy_content = [] - kafka_client_content = [] - cluster_config_content = [] - cloud_config_content = [] - topic_config_content = [] - deprecated_broker_content = [] - deprecated_cluster_content = [] - all_properties = [] - empty_description_errors = [] - empty_type_errors = [] - max_without_min_errors = [] - min_without_max_errors = [] - deprecated_properties_errors = [] - - for key, value in properties.items(): - all_properties.append(key) - group = None - if key.startswith("cloud_"): - group = "cloud" - else: - group = DEFINED_IN_MAPPING.get(value.get("defined_in")) - - # Handle deprecated properties. - if value.get("is_deprecated") is True: - deprecated_properties_errors.append(key) - if group == "broker": - deprecated_broker_content.append(f"- {key}\n\n") - elif group in ["cluster", "cloud"]: - deprecated_cluster_content.append(f"- {key}\n\n") - continue - - # Log errors for missing description or type. - if not value.get("description", "").strip(): - empty_description_errors.append(key) - if not value.get("type", "").strip(): - empty_type_errors.append(key) - - # Check for max/min inconsistencies. - if value.get("maximum") is not None and value.get("minimum") is None: - max_without_min_errors.append(key) - if value.get("minimum") is not None and value.get("maximum") is None: - min_without_max_errors.append(key) - - property_doc = generate_property_doc(key, value) - if property_doc is None: - continue - - group_mapping = { - "broker": broker_config_content, - "schema reg": schema_registry_content, - "http proxy": pandaproxy_content, - "http client": kafka_client_content, - "cluster": cluster_config_content, - "cloud": cloud_config_content, - "topic": topic_config_content, - } - if group in group_mapping: - group_mapping[group].append(property_doc) - - # Construct final documentation pages. - broker_page = ( - BROKER_PAGE_TITLE - + BROKER_INTRO - + BROKER_TITLE - + "".join(broker_config_content) - + "\n\n" - + SCHEMA_REGISTRY_TITLE - + SCHEMA_REGISTRY_INTRO - + "".join(schema_registry_content) - + "\n\n" - + PANDAPROXY_TITLE - + PANDAPROXY_INTRO - + "".join(pandaproxy_content) - + "\n\n" - + KAFKA_CLIENT_TITLE - + KAFKA_CLIENT_INTRO - + "".join(kafka_client_content) - ) - cluster_page = ( - CLUSTER_PAGE_TITLE - + CLUSTER_CONFIG_INTRO - + CLUSTER_CONFIG_TITLE - + "".join(cluster_config_content) - ) - cloud_page = ( - CLOUD_PAGE_TITLE - + CLOUD_CONFIG_INTRO - + CLOUD_CONFIG_TITLE - + "".join(cloud_config_content) - ) - topic_page = ( - TOPIC_PAGE_TITLE - + TOPIC_INTRO - + TOPIC_CONFIG_TITLE - + "".join(topic_config_content) - ) - deprecated_page = ( - DEPRECATED_PROPERTIES_TITLE - + DEPRECATED_PROPERTIES_INTRO - + DEPRECATED_BROKER_TITLE - + "".join(deprecated_broker_content) - + DEPRECATED_CLUSTER_TITLE - + "".join(deprecated_cluster_content) - ) - - # Write output files. - write_data_to_file(page_folder, OUTPUT_FILE_BROKER, broker_page) - write_data_to_file(page_folder, OUTPUT_FILE_CLUSTER, cluster_page) - write_data_to_file(page_folder, OUTPUT_FILE_CLOUD, cloud_page) - write_data_to_file(page_folder, OUTPUT_FILE_TOPIC, topic_page) - write_data_to_file(page_folder, OUTPUT_FILE_DEPRECATED, deprecated_page) - write_data_to_file(output_dir, ALL_PROPERTIES_FILE, "\n".join(all_properties)) - - # Write error files. - write_error_file( - error_folder, ERROR_FILE_DESCRIPTION, "\n".join(empty_description_errors), total_properties - ) - write_error_file( - error_folder, ERROR_FILE_TYPE, "\n".join(empty_type_errors), total_properties - ) - write_error_file( - error_folder, ERROR_FILE_MAX_WITHOUT_MIN, "\n".join(max_without_min_errors), total_properties - ) - write_error_file( - error_folder, ERROR_FILE_MIN_WITHOUT_MAX, "\n".join(min_without_max_errors), total_properties - ) - write_error_file( - error_folder, "deprecated_properties.txt", "\n".join(deprecated_properties_errors), total_properties - ) - - # Print summary. - print(f"Total properties read: {total_properties}") - print(f"Total Broker properties: {len(broker_config_content)}") - print(f"Total Cluster properties: {len(cluster_config_content)}") - print(f"Total Cloud properties: {len(cloud_config_content)}") - -if __name__ == "__main__": - main() diff --git a/tools/property-extractor/package-lock.json b/tools/property-extractor/package-lock.json new file mode 100644 index 0000000..d6d57eb --- /dev/null +++ b/tools/property-extractor/package-lock.json @@ -0,0 +1,77 @@ +{ + "name": "property-extractor", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "dependencies": { + "handlebars": "^4.7.8" + } + }, + "node_modules/handlebars": { + "version": "4.7.8", + "resolved": "https://registry.npmjs.org/handlebars/-/handlebars-4.7.8.tgz", + "integrity": "sha512-vafaFqs8MZkRrSX7sFVUdo3ap/eNiLnb4IakshzvP56X5Nr1iGKAIqdX6tMlm6HcNRIkr6AxO5jFEoJzzpT8aQ==", + "license": "MIT", + "dependencies": { + "minimist": "^1.2.5", + "neo-async": "^2.6.2", + "source-map": "^0.6.1", + "wordwrap": "^1.0.0" + }, + "bin": { + "handlebars": "bin/handlebars" + }, + "engines": { + "node": ">=0.4.7" + }, + "optionalDependencies": { + "uglify-js": "^3.1.4" + } + }, + "node_modules/minimist": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", + "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/neo-async": { + "version": "2.6.2", + "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.2.tgz", + "integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==", + "license": "MIT" + }, + "node_modules/source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/uglify-js": { + "version": "3.19.3", + "resolved": "https://registry.npmjs.org/uglify-js/-/uglify-js-3.19.3.tgz", + "integrity": "sha512-v3Xu+yuwBXisp6QYTcH4UbH+xYJXqnq2m/LtQVWKWzYc1iehYnLixoQDN9FH6/j9/oybfd6W9Ghwkl8+UMKTKQ==", + "license": "BSD-2-Clause", + "optional": true, + "bin": { + "uglifyjs": "bin/uglifyjs" + }, + "engines": { + "node": ">=0.8.0" + } + }, + "node_modules/wordwrap": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-1.0.0.tgz", + "integrity": "sha512-gvVzJFlPycKc5dZN4yPkP8w7Dc37BtP1yczEneOb4uq34pXZcvrtRTmWV8W+Ume+XCxKgbjM+nevkyFPMybd4Q==", + "license": "MIT" + } + } +} diff --git a/tools/property-extractor/package.json b/tools/property-extractor/package.json new file mode 100644 index 0000000..26b75db --- /dev/null +++ b/tools/property-extractor/package.json @@ -0,0 +1,6 @@ +{ + "private": true, + "dependencies": { + "handlebars": "^4.7.8" + } +} diff --git a/tools/property-extractor/property_extractor.py b/tools/property-extractor/property_extractor.py index c6870cb..5e95ca7 100755 --- a/tools/property-extractor/property_extractor.py +++ b/tools/property-extractor/property_extractor.py @@ -1,9 +1,65 @@ #!/usr/bin/env python3 +""" +Redpanda Configuration Property Extractor + +This script extracts configuration properties from Redpanda's C++ source code and generates +JSON schema definitions with proper type resolution and default value expansion. + +SPECIAL HANDLING FOR one_or_many_property TYPES: + +Redpanda uses a custom C++ type called `one_or_many_property` for configuration properties +that can accept either a single value or an array of values. Examples include: + +- admin: one_or_many_property +- admin_api_tls: one_or_many_property +- kafka_api_tls: one_or_many_property + +These properties allow flexible configuration syntax: + Single value: admin: {address: "127.0.0.1", port: 9644} + Array syntax: admin: [{address: "127.0.0.1", port: 9644}, {address: "0.0.0.0", port: 9645}] + +PROCESSING PIPELINE: + +1. **Property Detection & Transformation** (transformers.py): + - IsArrayTransformer detects one_or_many_property declarations + - Marks these properties as type="array" with items.type extracted from T + - TypeTransformer extracts inner types from template declarations + +2. **Type Resolution & Default Expansion** (property_extractor.py): + - resolve_type_and_default() converts C++ types to JSON schema types + - Expands C++ constructor defaults to structured JSON objects + - Ensures array-type properties have array defaults (wraps single objects in arrays) + +3. **Documentation Generation** (generate-handlebars-docs.js): + - Properly formats array defaults as [{ }] instead of { } + - Displays correct types in documentation (array vs object) + +EXAMPLE TRANSFORMATION: + +C++ Source: + one_or_many_property admin( + *this, "admin", "Network address for Admin API", + {model::broker_endpoint(net::unresolved_address("127.0.0.1", 9644))} + ); + +JSON Output: + "admin": { + "type": "array", + "items": {"type": "object"}, + "default": [{"address": "127.0.0.1", "port": 9644}] + } + +Documentation Output: + Type: array + Default: [{address: "127.0.0.1", port: 9644}] +""" import logging import sys import os import json import re +import yaml +from copy import deepcopy from pathlib import Path from file_pair import FilePair @@ -13,9 +69,138 @@ from property_bag import PropertyBag from transformers import * +# Import topic property extractor +try: + from topic_property_extractor import TopicPropertyExtractor +except ImportError: + # TopicPropertyExtractor not available, will skip topic property extraction + TopicPropertyExtractor = None + logger = logging.getLogger("viewer") +def resolve_cpp_function_call(function_name): + """ + Dynamically resolve C++ function calls to their return values by searching the source code. + + Args: + function_name: The C++ function name (e.g., "model::kafka_audit_logging_topic") + + Returns: + The resolved string value or None if not found + """ + # Map function names to likely search patterns and file locations + search_patterns = { + 'model::kafka_audit_logging_topic': { + 'patterns': [ + r'inline\s+const\s+model::topic\s+kafka_audit_logging_topic\s*\(\s*"([^"]+)"\s*\)', + r'const\s+model::topic\s+kafka_audit_logging_topic\s*\(\s*"([^"]+)"\s*\)', + r'model::topic\s+kafka_audit_logging_topic\s*\(\s*"([^"]+)"\s*\)', + r'std::string_view\s+kafka_audit_logging_topic\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"', + r'inline\s+std::string_view\s+kafka_audit_logging_topic\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"' + ], + 'files': ['src/v/model/namespace.h', 'src/v/model/namespace.cc', 'src/v/model/kafka_namespace.h'] + }, + 'model::kafka_consumer_offsets_topic': { + 'patterns': [ + r'inline\s+const\s+model::topic\s+kafka_consumer_offsets_topic\s*\(\s*"([^"]+)"\s*\)', + r'const\s+model::topic\s+kafka_consumer_offsets_topic\s*\(\s*"([^"]+)"\s*\)', + r'model::topic\s+kafka_consumer_offsets_topic\s*\(\s*"([^"]+)"\s*\)', + r'std::string_view\s+kafka_consumer_offsets_topic\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"', + r'inline\s+std::string_view\s+kafka_consumer_offsets_topic\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"' + ], + 'files': ['src/v/model/namespace.h', 'src/v/model/namespace.cc', 'src/v/model/kafka_namespace.h'] + }, + 'model::kafka_internal_namespace': { + 'patterns': [ + r'inline\s+const\s+model::ns\s+kafka_internal_namespace\s*\(\s*"([^"]+)"\s*\)', + r'const\s+model::ns\s+kafka_internal_namespace\s*\(\s*"([^"]+)"\s*\)', + r'model::ns\s+kafka_internal_namespace\s*\(\s*"([^"]+)"\s*\)', + r'std::string_view\s+kafka_internal_namespace\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"', + r'inline\s+std::string_view\s+kafka_internal_namespace\s*\(\s*\)\s*\{\s*return\s*"([^"]+)"' + ], + 'files': ['src/v/model/namespace.h', 'src/v/model/namespace.cc', 'src/v/model/kafka_namespace.h'] + } + } + + # Check if we have search patterns for this function + if function_name not in search_patterns: + logger.debug(f"No search patterns defined for function: {function_name}") + return None + + config = search_patterns[function_name] + + # Try to find the Redpanda source directory + # Look for it in the standard locations used by the property extractor + redpanda_source_paths = [ + 'tmp/redpanda', # Current directory + '../tmp/redpanda', # Parent directory + 'tools/property-extractor/tmp/redpanda', # From project root + os.path.join(os.getcwd(), 'tools', 'property-extractor', 'tmp', 'redpanda') + ] + + redpanda_source = None + for path in redpanda_source_paths: + if os.path.exists(path): + redpanda_source = path + break + + if not redpanda_source: + logger.warning(f"Could not find Redpanda source directory to resolve function: {function_name}") + return None + + # Search in the specified files + for file_path in config['files']: + full_path = os.path.join(redpanda_source, file_path) + if not os.path.exists(full_path): + continue + + try: + with open(full_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Try each pattern + for pattern in config['patterns']: + match = re.search(pattern, content, re.MULTILINE | re.DOTALL) + if match: + resolved_value = match.group(1) + logger.debug(f"Resolved {function_name}() -> '{resolved_value}' from {file_path}") + return resolved_value + + except Exception as e: + logger.debug(f"Error reading {full_path}: {e}") + continue + + # If not found in specific files, do a broader search + logger.debug(f"Function {function_name} not found in expected files, doing broader search...") + + # Search more broadly in the model directory + model_dir = os.path.join(redpanda_source, 'src', 'v', 'model') + if os.path.exists(model_dir): + for root, dirs, files in os.walk(model_dir): + for file in files: + if file.endswith('.h') or file.endswith('.cc'): + file_path = os.path.join(root, file) + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Try patterns for this file + for pattern in config['patterns']: + match = re.search(pattern, content, re.MULTILINE | re.DOTALL) + if match: + resolved_value = match.group(1) + logger.debug(f"Resolved {function_name}() -> '{resolved_value}' from {file_path}") + return resolved_value + + except Exception as e: + logger.debug(f"Error reading {file_path}: {e}") + continue + + logger.warning(f"Could not resolve function call: {function_name}()") + return None + + def validate_paths(options): path = options.path @@ -128,23 +313,910 @@ def transform_files_with_properties(files_with_properties): # The definitions.json file contains type definitions that the extractor uses to standardize and centralize type information. After extracting and transforming the properties from the source code, the function merge_properties_and_definitions looks up each property's type in the definitions. If a property's type (or the type of its items, in the case of arrays) matches one of the definitions, the transformer replaces that type with a JSON pointer ( such as #/definitions/) to the corresponding entry in definitions.json. The final JSON output then includes both a properties section (with types now referencing the definitions) and a definitions section, so that consumers of the output can easily resolve the full type information. def merge_properties_and_definitions(properties, definitions): - for name in properties: - property = properties[name] - # guard against missing "type" - prop_type = property.get("type") - if prop_type and prop_type in definitions: - properties[name]["type"] = "#/definitions/{prop_type}" - elif ( - prop_type == "array" - and property.get("items", {}).get("type") in definitions - ): - properties[name]["items"]["type"] = ( - f"#/definitions/{property['items']['type']}" - ) - + # Do not overwrite the resolved type/default with a reference. Just return the resolved properties and definitions. return dict(properties=properties, definitions=definitions) +def apply_property_overrides(properties, overrides, overrides_file_path=None): + """ + Apply property overrides from the overrides JSON file to enhance property documentation. + + This function allows customizing property documentation by providing overrides for: + + 1. description: Override the auto-extracted property description with custom text + 2. version: Add version information showing when the property was introduced + 3. example: Add AsciiDoc example sections with flexible input formats (see below) + 4. default: Override the auto-extracted default value + + Multiple example input formats are supported for user convenience: + + 1. Direct AsciiDoc string: + "example": ".Example\n[,yaml]\n----\nredpanda:\n property_name: value\n----" + + 2. Multi-line array (each element becomes a line): + "example": [ + ".Example", + "[,yaml]", + "----", + "redpanda:", + " property_name: value", + "----" + ] + + 3. External file reference: + "example_file": "examples/property_name.adoc" + + 4. Auto-formatted YAML with title and description: + "example_yaml": { + "title": "Example Configuration", + "description": "This shows how to configure the property.", + "config": { + "redpanda": { + "property_name": "value" + } + } + } + + Args: + properties: Dictionary of extracted properties from C++ source + overrides: Dictionary loaded from overrides JSON file + overrides_file_path: Path to the overrides file (for resolving relative example_file paths) + + Returns: + Updated properties dictionary with overrides applied + """ + if overrides and "properties" in overrides: + for prop, override in overrides["properties"].items(): + if prop in properties: + # Apply description override + if "description" in override: + properties[prop]["description"] = override["description"] + + # Apply version override (introduced in version) + if "version" in override: + properties[prop]["version"] = override["version"] + + # Apply example override with multiple input format support + example_content = _process_example_override(override, overrides_file_path) + if example_content: + properties[prop]["example"] = example_content + + # Apply default override + if "default" in override: + properties[prop]["default"] = override["default"] + return properties + + +def _process_example_override(override, overrides_file_path=None): + """ + Process example overrides in various user-friendly formats. + + Supports multiple input formats for examples: + 1. Direct string: "example": "content" + 2. Multi-line array: "example": ["line1", "line2", ...] + 3. External file: "example_file": "path/to/file" + 4. Auto-formatted YAML: "example_yaml": {...} + + Args: + override: Dictionary containing override data for a property + overrides_file_path: Path to the overrides file (for resolving relative paths) + + Returns: + Processed AsciiDoc example content as string, or None if no example found + """ + # Format 1: Direct AsciiDoc string + if "example" in override: + example = override["example"] + if isinstance(example, str): + return example + elif isinstance(example, list): + # Format 2: Multi-line array - join with newlines + return "\n".join(example) + + # Format 3: External file reference + if "example_file" in override: + file_path = override["example_file"] + + # Support both absolute and relative paths + if not os.path.isabs(file_path): + # Build search paths starting with the overrides file directory + search_paths = [] + + # If we have the overrides file path, try relative to its directory first + if overrides_file_path: + overrides_dir = os.path.dirname(overrides_file_path) + search_paths.append(os.path.join(overrides_dir, file_path)) + + # Then try common locations relative to current working directory + search_paths.extend([ + file_path, + os.path.join("examples", file_path), + os.path.join("docs-data", file_path), + os.path.join("__tests__", "docs-data", file_path) + ]) + + found_path = None + for search_path in search_paths: + if os.path.exists(search_path): + found_path = search_path + break + + if found_path: + file_path = found_path + else: + print(f"Warning: Example file not found: {override['example_file']}") + print(f"Searched in: {', '.join(search_paths)}") + return None + + try: + with open(file_path, 'r', encoding='utf-8') as f: + return f.read().strip() + except Exception as e: + print(f"Error reading example file {file_path}: {e}") + return None + + # Format 4: Auto-formatted YAML configuration + if "example_yaml" in override: + yaml_data = override["example_yaml"] + title = yaml_data.get("title", "Example") + description = yaml_data.get("description", "") + config = yaml_data.get("config", {}) + + # Build AsciiDoc content + lines = [f".{title}"] + if description: + lines.append(f"{description}\n") + + lines.extend([ + "[,yaml]", + "----" + ]) + + # Convert config to YAML and add to lines + try: + yaml_content = yaml.dump(config, default_flow_style=False, indent=2) + lines.append(yaml_content.rstrip()) + except Exception as e: + import traceback + logger.error(f"Error formatting YAML config: {e}") + logger.debug(f"Full traceback:\n{traceback.format_exc()}") + return None + + lines.append("----") + + return "\n".join(lines) + + return None + + +def add_config_scope(properties): + """ + Add a config_scope field to each property based on its defined_in value or property type. + 'cluster' if defined_in == src/v/config/configuration.cc + 'broker' if defined_in == src/v/config/node_config.cc + 'topic' if is_topic_property == True + """ + for prop in properties.values(): + # Check if this is a topic property first + if prop.get("is_topic_property", False): + prop["config_scope"] = "topic" + else: + defined_in = prop.get("defined_in", "") + if defined_in == "src/v/config/configuration.cc": + prop["config_scope"] = "cluster" + elif defined_in == "src/v/config/node_config.cc": + prop["config_scope"] = "broker" + else: + prop["config_scope"] = None + return properties + + +def resolve_type_and_default(properties, definitions): + """ + Resolve type references and expand default values for all properties. + + This function performs several critical transformations: + + 1. **Type Resolution**: Converts C++ type names to JSON schema types + - model::broker_endpoint -> "object" + - std::string -> "string" + - Handles both direct type names and JSON pointer references (#/definitions/...) + + 2. **Default Value Expansion**: Transforms C++ constructor syntax to JSON objects + - model::broker_endpoint(net::unresolved_address("127.0.0.1", 9644)) + -> {address: "127.0.0.1", port: 9644} + + 3. **Array Default Handling**: Ensures one_or_many_property defaults are arrays + - For properties with type="array", wraps single object defaults in arrays + - Converts empty object strings "{}" to empty arrays [] + + This is essential for one_or_many_property types like 'admin' which should show: + - Type: array + - Default: [{address: "127.0.0.1", port: 9644}] (not just {address: ...}) + """ + import ast + import re + + def resolve_definition_type(defn): + """Recursively resolve $ref pointers to get the actual type definition.""" + # Recursively resolve $ref + while isinstance(defn, dict) and "$ref" in defn: + ref = defn["$ref"] + ref_name = ref.split("/")[-1] + defn = definitions.get(ref_name, defn) + return defn + + def parse_constructor(s): + """Parse C++ constructor syntax into type name and arguments.""" + s = s.strip() + if s.startswith("{") and s.endswith("}"): + s = s[1:-1].strip() + match = re.match(r'([a-zA-Z0-9_:]+)\((.*)\)', s) + if not match: + # Primitive or enum + if s.startswith('"') and s.endswith('"'): + return None, [ast.literal_eval(s)] + try: + return None, [int(s)] + except Exception: + return None, [s] + type_name, arg_str = match.groups() + args = [] + depth = 0 + current = '' + in_string = False + for c in arg_str: + if c == '"' and (not current or current[-1] != '\\'): + in_string = not in_string + if c == ',' and depth == 0 and not in_string: + if current.strip(): + args.append(current.strip()) + current = '' + else: + if c == '(' and not in_string: + depth += 1 + elif c == ')' and not in_string: + depth -= 1 + current += c + if current.strip(): + args.append(current.strip()) + return type_name, args + + def process_cpp_patterns(arg_str): + """ + Process specific C++ patterns to user-friendly values. + + Handles: + - net::unresolved_address("127.0.0.1", 9092) -> expands based on type definition + - std::nullopt -> null + - fips_mode_flag::disabled -> "disabled" + - model::kafka_audit_logging_topic() -> dynamically looked up from source + """ + arg_str = arg_str.strip() + + # Handle std::nullopt -> null + if arg_str == "std::nullopt": + return "null" + + # Handle C++ function calls that return constant values + # Dynamically look up function return values from the source code + function_call_match = re.match(r'([a-zA-Z0-9_:]+)\(\)', arg_str) + if function_call_match: + function_name = function_call_match.group(1) + resolved_value = resolve_cpp_function_call(function_name) + if resolved_value is not None: + return f'"{resolved_value}"' + + # Handle enum-like patterns (such as fips_mode_flag::disabled -> "disabled") + enum_match = re.match(r'[a-zA-Z0-9_:]+::([a-zA-Z0-9_]+)', arg_str) + if enum_match: + enum_value = enum_match.group(1) + return f'"{enum_value}"' + + # Handle default constructors and their default values + # This handles cases where C++ default constructors are used but should map to specific values + + # Pattern 1: Full constructor syntax like config::leaders_preference{} + constructor_patterns = { + r'config::leaders_preference\{\}': '"none"', # Based on C++ code analysis + r'std::chrono::seconds\{0\}': '0', + r'std::chrono::milliseconds\{0\}': '0', + r'model::timeout_clock::duration\{\}': '0', + r'config::data_directory_path\{\}': '""', + r'std::optional<[^>]+>\{\}': 'null', # Empty optional + } + + for pattern, replacement in constructor_patterns.items(): + if re.match(pattern, arg_str): + return replacement + + # Pattern 2: Truncated type names that likely came from default constructors + # These are cases where tree-sitter parsing truncated "config::type{}" to just "type" + truncated_patterns = { + 'leaders_preference': '"none"', # config::leaders_preference{} -> none + 'data_directory_path': '""', # config::data_directory_path{} -> empty string + 'timeout_clock_duration': '0', # model::timeout_clock::duration{} -> 0 + 'log_level': '"info"', # Default log level + 'compression_type': '"none"', # Default compression + } + + # Check if arg_str is exactly one of these truncated patterns + if arg_str in truncated_patterns: + return truncated_patterns[arg_str] + + # Pattern 3: Handle remaining default constructor syntax generically + generic_constructor_match = re.match(r'[a-zA-Z0-9_:]+\{\}', arg_str) + if generic_constructor_match: + # For unknown constructors, try to infer a reasonable default + type_name = arg_str[:-2] # Remove the {} + if 'duration' in type_name.lower() or 'time' in type_name.lower(): + return '0' + elif 'path' in type_name.lower() or 'directory' in type_name.lower(): + return '""' + elif 'optional' in type_name.lower(): + return 'null' + else: + return '""' # Conservative default to empty string + + # Handle string concatenation with + operator (such as "128_kib + 1") + if " + " in arg_str: + return f'"{arg_str}"' + + return arg_str + + def expand_default(type_name, default_str): + """ + Expand C++ default values into structured JSON objects. + + For array types with initializer list syntax like: + {model::broker_endpoint(net::unresolved_address("127.0.0.1", 9644))} + + This creates: [{address: "127.0.0.1", port: 9644}] + """ + # Handle non-string defaults + if not isinstance(default_str, str): + return default_str + + # Apply C++ pattern processing for simple cases (not complex constructor calls) + if not ("(" in default_str and "::" in default_str): + processed = process_cpp_patterns(default_str) + if processed != default_str: + # Pattern was processed, return the result + if processed == "null": + return None + elif processed.startswith('"') and processed.endswith('"'): + return ast.literal_eval(processed) + else: + return processed + + type_def = resolve_definition_type(definitions.get(type_name, {})) + if "enum" in type_def: + return default_str + # If it has properties but no explicit type, it's an object + if type_def.get("type") == "object" or (type_def.get("properties") and not type_def.get("type")): + tname, args = parse_constructor(default_str) + if tname is None: + return default_str + + props = list(type_def["properties"].keys()) + result = {} + + # For each constructor argument, try to expand it and map to the correct property + for i, prop in enumerate(props): + prop_def = type_def["properties"][prop] + if "$ref" in prop_def: + sub_type = prop_def["$ref"].split("/")[-1] + else: + sub_type = prop_def.get("type") + + if i < len(args): + arg = args[i] + # Check if this argument is a nested constructor call + if "(" in arg and "::" in arg: + # Parse the nested constructor + nested_tname, nested_args = parse_constructor(arg) + if nested_tname and nested_tname in definitions: + # Get the definition for the nested type + nested_type_def = resolve_definition_type(definitions.get(nested_tname, {})) + nested_props = list(nested_type_def.get("properties", {}).keys()) + + # Expand the nested constructor by mapping its arguments to its properties + nested_result = {} + for j, nested_prop in enumerate(nested_props): + nested_prop_def = nested_type_def["properties"][nested_prop] + if j < len(nested_args): + nested_arg = nested_args[j] + # Apply simple C++ pattern processing to the argument + processed_nested_arg = process_cpp_patterns(nested_arg) + + # Convert the processed argument based on the property type + if nested_prop_def.get("type") == "string": + if processed_nested_arg.startswith('"') and processed_nested_arg.endswith('"'): + nested_result[nested_prop] = ast.literal_eval(processed_nested_arg) + else: + nested_result[nested_prop] = processed_nested_arg + elif nested_prop_def.get("type") == "integer": + try: + nested_result[nested_prop] = int(processed_nested_arg) + except ValueError: + nested_result[nested_prop] = processed_nested_arg + elif nested_prop_def.get("type") == "boolean": + nested_result[nested_prop] = processed_nested_arg.lower() == "true" + else: + nested_result[nested_prop] = processed_nested_arg + else: + nested_result[nested_prop] = None + + # Now we have the expanded nested object, we need to map it to the parent object's properties + # This is where the type-aware mapping happens + + # Special case: if the nested type is net::unresolved_address and parent is broker_endpoint + if nested_tname == "net::unresolved_address" and type_name == "model::broker_endpoint": + # Map net::unresolved_address properties to broker_endpoint + # Only map the fields that actually exist in the net::unresolved_address + result["address"] = nested_result.get("address") + result["port"] = nested_result.get("port") + break + else: + # General case: if we have a single nested constructor argument, + # try to merge its properties into the parent + if i == 0 and len(args) == 1: + result.update(nested_result) + # Set remaining properties to None + for remaining_prop in props[i+1:]: + if remaining_prop not in result: + result[remaining_prop] = None + break + else: + # Map the nested object to the current property + result[prop] = nested_result + else: + # Fallback: recursively expand with the expected property type + expanded_arg = expand_default(sub_type, arg) + result[prop] = expanded_arg + else: + # Simple value, parse based on the property type + # First apply C++ pattern processing + processed_arg = process_cpp_patterns(arg) + + if sub_type == "string": + # If processed_arg is already quoted, use ast.literal_eval, otherwise keep as is + if processed_arg.startswith('"') and processed_arg.endswith('"'): + result[prop] = ast.literal_eval(processed_arg) + else: + result[prop] = processed_arg + elif sub_type == "integer": + try: + result[prop] = int(processed_arg) + except ValueError: + # If conversion fails, keep as string (might be processed C++ pattern) + result[prop] = processed_arg + elif sub_type == "boolean": + result[prop] = processed_arg.lower() == "true" + else: + result[prop] = processed_arg + else: + result[prop] = None + return result + elif type_def.get("type") == "array": + # Handle array defaults with C++ initializer list syntax like {model::broker_endpoint(...)} + # This is specifically important for one_or_many_property types that use initializer lists + # in their C++ defaults but should produce JSON arrays in the output. + # + # Example transformation: + # C++: {model::broker_endpoint(net::unresolved_address("127.0.0.1", 9644))} + # JSON: [{"address": "127.0.0.1", "port": 9644, "name": "127.0.0.1:9644"}] + if isinstance(default_str, str) and default_str.strip().startswith("{") and default_str.strip().endswith("}"): + # This is an initializer list, parse the elements + initializer_content = default_str.strip()[1:-1].strip() # Remove outer braces + if initializer_content: + # Parse multiple comma-separated elements + elements = [] + current_element = "" + paren_depth = 0 + in_quotes = False + + # Parse elements while respecting nested parentheses and quoted strings + for char in initializer_content: + if char == '"' and (not current_element or current_element[-1] != '\\'): + in_quotes = not in_quotes + + if not in_quotes: + if char == '(': + paren_depth += 1 + elif char == ')': + paren_depth -= 1 + elif char == ',' and paren_depth == 0: + # Found a top-level comma, this is a separator + if current_element.strip(): + elements.append(current_element.strip()) + current_element = "" + continue + + current_element += char + + # Add the last element + if current_element.strip(): + elements.append(current_element.strip()) + + # Try to determine the item type from the type_def + items_def = type_def.get("items", {}) + if "$ref" in items_def: + item_type_name = items_def["$ref"].split("/")[-1] + else: + item_type_name = items_def.get("type", "string") # Default to string for arrays + + # Process each element + result_array = [] + for element_str in elements: + # Check if this element is a function call that needs resolution + if "::" in element_str and element_str.endswith("()"): + # This is a function call, resolve it + resolved_value = process_cpp_patterns(element_str) + if resolved_value.startswith('"') and resolved_value.endswith('"'): + # Remove quotes from resolved string values + result_array.append(ast.literal_eval(resolved_value)) + else: + result_array.append(resolved_value) + elif element_str.startswith('"') and element_str.endswith('"'): + # This is a quoted string, parse it + result_array.append(ast.literal_eval(element_str)) + elif item_type_name == "string": + # For string items, expand using the item type (might be constructor) + expanded_element = expand_default(item_type_name, element_str) + result_array.append(expanded_element) + else: + # For other types, expand using the item type + expanded_element = expand_default(item_type_name, element_str) + result_array.append(expanded_element) + + return result_array + else: + return [] + else: + return default_str + else: + return default_str + + for prop in properties.values(): + t = prop.get("type") + ref_name = None + + # Handle both JSON pointer references and direct type names + if isinstance(t, str): + if t.startswith("#/definitions/"): + ref_name = t.split("/")[-1] + elif t in definitions: + ref_name = t + + if ref_name and ref_name in definitions: + defn = definitions.get(ref_name) + if defn: + resolved = resolve_definition_type(defn) + # Always set type to the resolved type string (object, string, etc.) + resolved_type = resolved.get("type") + if resolved_type in ("object", "string", "integer", "boolean", "array", "number"): + prop["type"] = resolved_type + else: + prop["type"] = "object" # fallback for complex types + # Expand default if possible + if "default" in prop and prop["default"] is not None: + expanded = expand_default(ref_name, prop["default"]) + prop["default"] = expanded + + # Handle case where default is already an object with nested constructors + elif prop.get("type") == "object" and isinstance(prop.get("default"), dict): + default_obj = prop["default"] + for field_name, field_value in default_obj.items(): + if isinstance(field_value, str) and "::" in field_value and "(" in field_value: + # This field contains a nested constructor, try to expand it + tname, args = parse_constructor(field_value) + if tname and tname in definitions: + expanded = expand_default(tname, field_value) + if isinstance(expanded, dict): + # Update the existing object fields with the expanded values + for exp_key, exp_value in expanded.items(): + if exp_key in default_obj: + default_obj[exp_key] = exp_value + # Remove the field that contained the constructor + # unless it's supposed to remain (like 'name' field) + # For now, let's replace entire default with expanded version + prop["default"] = expanded + break + + # Handle case where property type is array and default contains C++ constructor syntax + # This is a backup mechanism for cases where the expand_default function above + # didn't catch array initialization patterns. It specifically looks for properties + # that are already marked as array type but still have string defaults with + # C++ constructor syntax that need expansion. + elif prop.get("type") == "array" and isinstance(prop.get("default"), str): + default_str = prop["default"] + if default_str.strip().startswith("{") and default_str.strip().endswith("}"): + # This is an initializer list for an array, expand it using the same logic as expand_default + initializer_content = default_str.strip()[1:-1].strip() # Remove outer braces + if initializer_content: + # Parse multiple comma-separated elements + elements = [] + current_element = "" + paren_depth = 0 + in_quotes = False + + # Parse elements while respecting nested parentheses and quoted strings + for char in initializer_content: + if char == '"' and (not current_element or current_element[-1] != '\\'): + in_quotes = not in_quotes + + if not in_quotes: + if char == '(': + paren_depth += 1 + elif char == ')': + paren_depth -= 1 + elif char == ',' and paren_depth == 0: + # Found a top-level comma, this is a separator + if current_element.strip(): + elements.append(current_element.strip()) + current_element = "" + continue + + current_element += char + + # Add the last element + if current_element.strip(): + elements.append(current_element.strip()) + + # Get the item type from the property definition + items_type = prop.get("items", {}).get("type", "string") + + # Process each element + result_array = [] + for element_str in elements: + # Check if this element is a function call that needs resolution + if "::" in element_str and element_str.endswith("()"): + # This is a function call, resolve it + resolved_value = process_cpp_patterns(element_str) + if resolved_value.startswith('"') and resolved_value.endswith('"'): + # Remove quotes from resolved string values + result_array.append(ast.literal_eval(resolved_value)) + else: + result_array.append(resolved_value) + elif element_str.startswith('"') and element_str.endswith('"'): + # This is a quoted string, parse it + result_array.append(ast.literal_eval(element_str)) + elif items_type in definitions: + # For complex types, expand using the item type + expanded_element = expand_default(items_type, element_str) + result_array.append(expanded_element) + else: + # For simple types, just use the element as-is (likely a string) + result_array.append(element_str) + + prop["default"] = result_array + else: + prop["default"] = [] + + # Handle array properties where the default is a single object but should be an array + # This is crucial for one_or_many_property types that are detected as arrays + # but have defaults that were parsed as single objects by the transformers. + # + # Background: The transformer chain processes defaults before type resolution, + # so a property like admin with default {model::broker_endpoint(...)} gets + # expanded to {address: "127.0.0.1", port: 9644} (single object). + # But since admin is one_or_many_property, it should + # be an array: [{address: "127.0.0.1", port: 9644}] + if prop.get("type") == "array": + default = prop.get("default") + if isinstance(default, dict): + # If we have an array type but the default is a single object, wrap it in an array + # This handles cases like admin: {address: "127.0.0.1", port: 9644} -> [{address: ...}] + prop["default"] = [default] + elif isinstance(default, str) and default.strip() == "{}": + # Empty object string should become empty array for array types + # This handles cases like admin_api_tls: "{}" -> [] + prop["default"] = [] + + # Also handle array item types + if prop.get("type") == "array" and "items" in prop: + items_type = prop["items"].get("type") + if isinstance(items_type, str) and items_type in definitions: + item_defn = definitions.get(items_type) + if item_defn: + resolved_item = resolve_definition_type(item_defn) + resolved_item_type = resolved_item.get("type") + if resolved_item_type in ("object", "string", "integer", "boolean", "array", "number"): + prop["items"]["type"] = resolved_item_type + else: + prop["items"]["type"] = "object" # fallback for complex types + + # Final pass: apply C++ pattern processing to any remaining unprocessed defaults + for prop in properties.values(): + if "default" in prop: + default_value = prop["default"] + + if isinstance(default_value, str): + # Process string defaults + processed = process_cpp_patterns(default_value) + if processed != default_value: + if processed == "null": + prop["default"] = None + elif isinstance(processed, str) and processed.startswith('"') and processed.endswith('"'): + prop["default"] = ast.literal_eval(processed) + else: + prop["default"] = processed + + elif isinstance(default_value, list): + # Process array defaults - apply C++ pattern processing to each element + processed_array = [] + for item in default_value: + if isinstance(item, dict): + # Process each field in the object + processed_item = {} + for field_name, field_value in item.items(): + if isinstance(field_value, str) and "::" in field_value and "(" in field_value: + # This field contains a C++ constructor pattern - try to expand it using type definitions + tname, args = parse_constructor(field_value) + if tname and tname in definitions: + # Get the definition for the nested type and expand the constructor + nested_type_def = resolve_definition_type(definitions.get(tname, {})) + if nested_type_def.get("properties"): + nested_props = list(nested_type_def["properties"].keys()) + nested_result = {} + + # Map constructor arguments to type properties + for j, nested_prop in enumerate(nested_props): + nested_prop_def = nested_type_def["properties"][nested_prop] + if j < len(args): + nested_arg = args[j] + processed_nested_arg = process_cpp_patterns(nested_arg) + + # Convert based on property type + if nested_prop_def.get("type") == "string": + if processed_nested_arg.startswith('"') and processed_nested_arg.endswith('"'): + nested_result[nested_prop] = ast.literal_eval(processed_nested_arg) + else: + nested_result[nested_prop] = processed_nested_arg + elif nested_prop_def.get("type") == "integer": + try: + nested_result[nested_prop] = int(processed_nested_arg) + except ValueError: + nested_result[nested_prop] = processed_nested_arg + elif nested_prop_def.get("type") == "boolean": + nested_result[nested_prop] = processed_nested_arg.lower() == "true" + else: + nested_result[nested_prop] = processed_nested_arg + else: + nested_result[nested_prop] = None + + # For special case of net::unresolved_address inside broker_authn_endpoint + if tname == "net::unresolved_address": + # Replace the entire object with expanded net::unresolved_address values + # Only include the fields that are actually defined in the type + processed_item.update(nested_result) + break # Don't process other fields since we replaced the whole object + else: + processed_item[field_name] = nested_result + else: + # Fallback to simple pattern processing + processed_field = process_cpp_patterns(field_value) + if processed_field == "null": + processed_item[field_name] = None + elif isinstance(processed_field, str) and processed_field.startswith('"') and processed_field.endswith('"'): + processed_item[field_name] = ast.literal_eval(processed_field) + else: + processed_item[field_name] = processed_field + else: + # Fallback to simple pattern processing + processed_field = process_cpp_patterns(field_value) + if processed_field == "null": + processed_item[field_name] = None + elif isinstance(processed_field, str) and processed_field.startswith('"') and processed_field.endswith('"'): + processed_item[field_name] = ast.literal_eval(processed_field) + else: + processed_item[field_name] = processed_field + elif isinstance(field_value, str): + # Simple string field - apply C++ pattern processing + processed_field = process_cpp_patterns(field_value) + if processed_field == "null": + processed_item[field_name] = None + elif isinstance(processed_field, str) and processed_field.startswith('"') and processed_field.endswith('"'): + processed_item[field_name] = ast.literal_eval(processed_field) + else: + processed_item[field_name] = processed_field + else: + processed_item[field_name] = field_value + processed_array.append(processed_item) + else: + # Non-object array item + if isinstance(item, str): + processed_item = process_cpp_patterns(item) + if processed_item == "null": + processed_array.append(None) + elif isinstance(processed_item, str) and processed_item.startswith('"') and processed_item.endswith('"'): + processed_array.append(ast.literal_eval(processed_item)) + else: + processed_array.append(processed_item) + else: + processed_array.append(item) + prop["default"] = processed_array + + elif isinstance(default_value, dict): + # Process object defaults - apply C++ pattern processing to each field + processed_object = {} + for field_name, field_value in default_value.items(): + if isinstance(field_value, str): + processed_field = process_cpp_patterns(field_value) + if processed_field == "null": + processed_object[field_name] = None + elif isinstance(processed_field, str) and processed_field.startswith('"') and processed_field.endswith('"'): + processed_object[field_name] = ast.literal_eval(processed_field) + else: + processed_object[field_name] = processed_field + else: + processed_object[field_name] = field_value + prop["default"] = processed_object + + # Handle unresolved C++ types + prop_type = prop.get("type") + if isinstance(prop_type, str): + # Check if it's an unresolved C++ type (contains :: or ends with >) + if ("::" in prop_type or prop_type.endswith(">") or + prop_type.endswith("_t") or prop_type.startswith("std::")): + # Default unresolved C++ types to string, unless they look like numbers + if any(word in prop_type.lower() for word in ["int", "long", "short", "double", "float", "number"]): + prop["type"] = "integer" + elif any(word in prop_type.lower() for word in ["bool"]): + prop["type"] = "boolean" + else: + prop["type"] = "string" + + return properties + + +def extract_topic_properties(source_path): + """ + Extract topic properties and convert them to the standard properties format. + + Args: + source_path: Path to the Redpanda source code + + Returns: + Dictionary of topic properties in the standard format with config_scope: "topic" + """ + if TopicPropertyExtractor is None: + logging.warning("TopicPropertyExtractor not available, skipping topic property extraction") + return {} + + try: + extractor = TopicPropertyExtractor(source_path) + topic_data = extractor.extract_topic_properties() + topic_properties = topic_data.get("topic_properties", {}) + + # Convert topic properties to the standard properties format + converted_properties = {} + for prop_name, prop_data in topic_properties.items(): + # Skip no-op properties + if prop_data.get("is_noop", False): + continue + + converted_properties[prop_name] = { + "name": prop_name, + "description": prop_data.get("description", ""), + "type": prop_data.get("type", "string"), + "config_scope": "topic", + "source_file": prop_data.get("source_file", ""), + "corresponding_cluster_property": prop_data.get("corresponding_cluster_property", ""), + "acceptable_values": prop_data.get("acceptable_values", ""), + "is_deprecated": False, + "is_topic_property": True + } + + logging.info(f"Extracted {len(converted_properties)} topic properties (excluding {len([p for p in topic_properties.values() if p.get('is_noop', False)])} no-op properties)") + return converted_properties + + except Exception as e: + logging.error(f"Failed to extract topic properties: {e}") + return {} + + def main(): import argparse @@ -170,6 +1242,13 @@ def generate_options(): help="File to store the JSON output. If no file is provided, the JSON will be printed to the standard output", ) + arg_parser.add_argument( + "--enhanced-output", + type=str, + required=False, + help="File to store the enhanced JSON output with overrides applied (such as 'dev-properties.json')", + ) + arg_parser.add_argument( "--definitions", type=str, @@ -178,6 +1257,13 @@ def generate_options(): help='JSON file with the type definitions. This file will be merged in the output under the "definitions" field', ) + arg_parser.add_argument( + "--overrides", + type=str, + required=False, + help='Optional JSON file with property description overrides', + ) + arg_parser.add_argument("-v", "--verbose", action="store_true") return arg_parser @@ -208,6 +1294,16 @@ def generate_options(): logging.error(f"Failed to parse definitions file: {e}") sys.exit(1) + # Load property overrides if provided + overrides = None + if options.overrides: + try: + with open(options.overrides) as f: + overrides = json.load(f) + except Exception as e: + logging.error(f"Failed to load overrides file: {e}") + sys.exit(1) + treesitter_dir = os.path.join(os.getcwd(), "tree-sitter/tree-sitter-cpp") destination_path = os.path.join(treesitter_dir, "tree-sitter-cpp.so") @@ -219,25 +1315,72 @@ def generate_options(): treesitter_dir, destination_path ) + files_with_properties = get_files_with_properties( file_pairs, treesitter_parser, cpp_language ) properties = transform_files_with_properties(files_with_properties) - properties_and_definitions = merge_properties_and_definitions( - properties, definitions + + # Extract topic properties and add them to the main properties dictionary + topic_properties = extract_topic_properties(options.path) + if topic_properties: + properties.update(topic_properties) + logging.info(f"Added {len(topic_properties)} topic properties to the main properties collection") + + # First, create the original properties without overrides for the base JSON output + # 1. Add config_scope field based on which source file defines the property + original_properties = add_config_scope(deepcopy(properties)) + + # 2. Resolve type references and expand default values for original properties + original_properties = resolve_type_and_default(original_properties, definitions) + + # Generate original properties JSON (without overrides) + original_properties_and_definitions = merge_properties_and_definitions( + original_properties, definitions ) + original_json_output = json.dumps(original_properties_and_definitions, indent=4, sort_keys=True) + + # Now create enhanced properties with overrides applied + # 1. Apply any description overrides from external override files + enhanced_properties = apply_property_overrides(deepcopy(properties), overrides, options.overrides) + + # 2. Add config_scope field based on which source file defines the property + enhanced_properties = add_config_scope(enhanced_properties) + + # 3. Resolve type references and expand default values + # This step converts: + # - C++ type names (model::broker_endpoint) to JSON schema types (object) + # - C++ constructor defaults to structured JSON objects + # - Single object defaults to arrays for one_or_many_property types + enhanced_properties = resolve_type_and_default(enhanced_properties, definitions) - json_output = json.dumps(properties_and_definitions, indent=4, sort_keys=True) + # Generate enhanced properties JSON (with overrides) + enhanced_properties_and_definitions = merge_properties_and_definitions( + enhanced_properties, definitions + ) + enhanced_json_output = json.dumps(enhanced_properties_and_definitions, indent=4, sort_keys=True) + # Write original properties file (for backward compatibility) if options.output: try: with open(options.output, "w+") as json_file: - json_file.write(json_output) + json_file.write(original_json_output) + print(f"✅ Original properties JSON generated at {options.output}") except IOError as e: - logging.error(f"Failed to write output file: {e}") + logging.error(f"Failed to write original output file: {e}") sys.exit(1) else: - print(json_output) + print(original_json_output) + + # Write enhanced properties file (with overrides applied) + if options.enhanced_output: + try: + with open(options.enhanced_output, "w+") as json_file: + json_file.write(enhanced_json_output) + print(f"✅ Enhanced properties JSON (with overrides) generated at {options.enhanced_output}") + except IOError as e: + logging.error(f"Failed to write enhanced output file: {e}") + sys.exit(1) if __name__ == "__main__": main() diff --git a/tools/property-extractor/requirements.txt b/tools/property-extractor/requirements.txt index 030b250..9627bb1 100644 --- a/tools/property-extractor/requirements.txt +++ b/tools/property-extractor/requirements.txt @@ -1,2 +1,3 @@ tree_sitter==0.21.1 setuptools>=42.0.0 +pyyaml>=6.0 diff --git a/tools/property-extractor/templates/deprecated-properties.hbs b/tools/property-extractor/templates/deprecated-properties.hbs new file mode 100644 index 0000000..24122ab --- /dev/null +++ b/tools/property-extractor/templates/deprecated-properties.hbs @@ -0,0 +1,25 @@ +{{#if deprecated}} += Deprecated Configuration Properties +:description: This is an exhaustive list of all the deprecated properties. + +This is an exhaustive list of all the deprecated properties. + +{{#if brokerProperties}} +== Broker properties + +{{#each brokerProperties}} +{{> deprecated-property}} + +{{/each}} +{{/if}} + +{{#if clusterProperties}} +== Cluster properties + +{{#each clusterProperties}} +{{> deprecated-property}} + +{{/each}} +{{/if}} + +{{/if}} diff --git a/tools/property-extractor/templates/deprecated-property.hbs b/tools/property-extractor/templates/deprecated-property.hbs new file mode 100644 index 0000000..d8f5460 --- /dev/null +++ b/tools/property-extractor/templates/deprecated-property.hbs @@ -0,0 +1,7 @@ +=== `{{name}}` + +{{#if description}} +{{{description}}} +{{else}} +No description available. +{{/if}} diff --git a/tools/property-extractor/templates/property-page.hbs b/tools/property-extractor/templates/property-page.hbs new file mode 100644 index 0000000..8a44b8d --- /dev/null +++ b/tools/property-extractor/templates/property-page.hbs @@ -0,0 +1,22 @@ += {{pageTitle}} +{{#if pageAliases}} +:page-aliases: {{join pageAliases ", "}} +{{/if}} +:description: {{description}} + +{{{intro}}} + +{{#if sectionTitle}} +== {{sectionTitle}} +{{/if}} + +{{#if sectionIntro}} +{{{sectionIntro}}} +{{/if}} + +{{#each groups}} +{{#each this.properties}} +{{> (lookup ../this "template")}} + +{{/each}} +{{/each}} diff --git a/tools/property-extractor/templates/property.hbs b/tools/property-extractor/templates/property.hbs new file mode 100644 index 0000000..05f06f7 --- /dev/null +++ b/tools/property-extractor/templates/property.hbs @@ -0,0 +1,70 @@ +=== {{name}} + +{{#if version}} +*Introduced in {{version}}* + +{{/if}} +{{#if description}} +{{{description}}} +{{else}} +No description available. +{{/if}} + +{{#if units}} +*Unit:* {{units}} + +{{else}} +{{#if (formatUnits name)}} +*Unit:* {{formatUnits name}} + +{{/if}} +{{/if}} +{{#if (ne defined_in "src/v/config/node_config.cc")}} +{{#if (ne needs_restart undefined)}} +*Requires restart:* {{#if needs_restart}}Yes{{else}}No{{/if}} + +{{/if}} +{{/if}} +{{#if visibility}} +*Visibility:* `{{visibility}}` + +{{/if}} +{{#if type}} +*Type:* {{type}} + +{{/if}} +{{#if (and minimum maximum)}} +*Accepted values:* [`{{minimum}}`, `{{maximum}}`] + +{{else}} +{{#if minimum}} +*Minimum value:* `{{minimum}}` + +{{/if}} +{{#if maximum}} +*Maximum value:* `{{maximum}}` + +{{/if}} +{{/if}} +{{#if (ne default undefined)}} +*Default:* `{{formatPropertyValue default type}}` + +{{/if}} +*Nullable:* {{#if nullable}}Yes{{else}}No{{/if}} + +{{#if example}} +{{{renderPropertyExample this}}} +{{/if}} + +{{#if aliases}} +*Aliases:* {{join aliases ", "}} + +{{/if}} +{{#if is_deprecated}} +[WARNING] +==== +This property is deprecated. +==== + +{{/if}} +--- diff --git a/tools/property-extractor/templates/topic-property.hbs b/tools/property-extractor/templates/topic-property.hbs new file mode 100644 index 0000000..7e3eaf7 --- /dev/null +++ b/tools/property-extractor/templates/topic-property.hbs @@ -0,0 +1,59 @@ +=== {{name}} + +{{#if version}} +*Introduced in {{version}}* + +{{/if}} +{{#if description}} +{{{description}}} +{{else}} +No description available. +{{/if}} + +{{#if type}} +*Type:* {{type}} + +{{/if}} +{{#if acceptable_values}} +*Accepted values:* {{{acceptable_values}}} + +{{/if}} +{{#if corresponding_cluster_property}} +*Related cluster property:* xref:reference:cluster-properties.adoc#{{corresponding_cluster_property}}[{{corresponding_cluster_property}}] + +{{/if}} +{{#if (and minimum maximum)}} +*Accepted values:* [`{{minimum}}`, `{{maximum}}`] + +{{else}} +{{#if minimum}} +*Minimum value:* `{{minimum}}` + +{{/if}} +{{#if maximum}} +*Maximum value:* `{{maximum}}` + +{{/if}} +{{/if}} +{{#if (ne default undefined)}} +*Default:* `{{formatPropertyValue default type}}` + +{{/if}} +*Nullable:* {{#if nullable}}Yes{{else}}No{{/if}} + +{{#if example}} +{{{renderPropertyExample this}}} +{{/if}} + +{{#if aliases}} +*Aliases:* {{join aliases ", "}} + +{{/if}} +{{#if is_deprecated}} +[WARNING] +==== +This property is deprecated. +==== + +{{/if}} +--- diff --git a/tools/property-extractor/transformers.py b/tools/property-extractor/transformers.py index e46953c..56c7ef4 100644 --- a/tools/property-extractor/transformers.py +++ b/tools/property-extractor/transformers.py @@ -36,13 +36,52 @@ def parse(self, property, info, file_pair): class IsArrayTransformer: + """ + Detects properties that should be treated as arrays based on their C++ type declarations. + + This transformer identifies two types of array properties: + 1. std::vector - Standard C++ vectors + 2. one_or_many_property - Redpanda's custom type that accepts either a single value or an array + + The one_or_many_property type is used in Redpanda configuration for properties like 'admin' + and 'admin_api_tls' where users can specify either: + - A single object: admin: {address: "127.0.0.1", port: 9644} + - An array of objects: admin: [{address: "127.0.0.1", port: 9644}, {address: "0.0.0.0", port: 9645}] + + When detected, these properties are marked with: + - type: "array" + - items: {type: } where is extracted from T + """ + + # Class-level constants for array type patterns + ARRAY_PATTERN_STD_VECTOR = "std::vector" + ARRAY_PATTERN_ONE_OR_MANY = "one_or_many_property" + def __init__(self, type_transformer): self.type_transformer = type_transformer def accepts(self, info, file_pair): - return "std::vector" in info["declaration"] + """ + Check if this property declaration represents an array type. + + Returns True for: + - std::vector declarations (standard C++ vectors) + - one_or_many_property declarations (Redpanda's flexible array type) + """ + return (self.ARRAY_PATTERN_STD_VECTOR in info["declaration"] or + self.ARRAY_PATTERN_ONE_OR_MANY in info["declaration"]) def parse(self, property, info, file_pair): + """ + Transform the property to indicate it's an array type. + + Sets: + - property["type"] = "array" + - property["items"]["type"] = + + The inner type is extracted by the type_transformer, which handles + removing the wrapper (std::vector<> or one_or_many_property<>) to get T. + """ property["type"] = "array" property["items"] = PropertyBag() property["items"]["type"] = self.type_transformer.get_type_from_declaration( @@ -94,10 +133,35 @@ def parse(self, property, info, file_pair): class TypeTransformer: + + # Class-level constants for type pattern matching + # Shared with IsArrayTransformer for consistency + ARRAY_PATTERN_STD_VECTOR = "std::vector" + ARRAY_PATTERN_ONE_OR_MANY = "one_or_many_property" + OPTIONAL_PATTERN = "std::optional" + def accepts(self, info, file_pair): return True def get_cpp_type_from_declaration(self, declaration): + """ + Extract the inner type from C++ property declarations. + + This method handles various C++ template types and extracts the core type T from: + - property -> T + - std::optional -> T + - std::vector -> T + - one_or_many_property -> T (Redpanda's flexible array type) + + For one_or_many_property, this is crucial because it allows the same property + to accept either a single value or an array of values in the configuration. + Examples: + - one_or_many_property -> model::broker_endpoint + - one_or_many_property -> endpoint_tls_config + + The extracted type is then used to determine the JSON schema type and + for resolving default values from the definitions. + """ one_line_declaration = declaration.replace("\n", "").strip() raw_type = ( re.sub(r"^.*property<(.+)>.*", "\\1", one_line_declaration) @@ -105,11 +169,19 @@ def get_cpp_type_from_declaration(self, declaration): .replace(",", "") ) - if "std::optional" in raw_type: + if self.OPTIONAL_PATTERN in raw_type: raw_type = re.sub(".*std::optional<(.+)>.*", "\\1", raw_type) - if "std::vector" in raw_type: + if self.ARRAY_PATTERN_STD_VECTOR in raw_type: raw_type = re.sub(".*std::vector<(.+)>.*", "\\1", raw_type) + + # Handle one_or_many_property - extract the inner type T + # This is essential for Redpanda's flexible configuration properties + # that can accept either single values or arrays + # Check and extract from raw_type for consistency with other type extractors + if self.ARRAY_PATTERN_ONE_OR_MANY in raw_type: + raw_type = re.sub(".*one_or_many_property<(.+)>.*", "\\1", raw_type) + raw_type = raw_type.split()[0].replace(",", "") return raw_type @@ -282,6 +354,10 @@ class FriendlyDefaultTransformer: - std::chrono::milliseconds(10) - std::nullopt """ + + # Class-level constants for pattern matching in default values + ARRAY_PATTERN_STD_VECTOR = "std::vector" + def accepts(self, info, file_pair): return info.get("params") and len(info["params"]) > 3 @@ -308,7 +384,7 @@ def parse(self, property, info, file_pair): return property # Transform std::vector defaults. - if "std::vector" in default: + if self.ARRAY_PATTERN_STD_VECTOR in default: m = re.search(r'\{([^}]+)\}', default) if m: contents = m.group(1).strip()