Skip to content
Closed
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
b6986a2
feat(backend): add DingTalk document to knowledge base MCP tool
parabala Apr 13, 2026
e567768
fix(backend): address review comments for dingtalk-docs feature
parabala Apr 14, 2026
e34e393
fix(backend): use dingtalk_docs MCP for real API calls
parabala Apr 14, 2026
93cb40a
fix(backend): make dingtalk-docs skill call real MCP APIs
parabala Apr 14, 2026
bcc2bc9
fix(backend): improve error handling for DingTalk authentication
parabala Apr 14, 2026
aa8d2a2
fix(dingtalk): correct MCP tool parameters and routing logic
parabala Apr 14, 2026
d6f25b4
refactor(dingtalk): improve docs_service code quality
parabala Apr 14, 2026
aeda766
fix(dingtalk): add Accept header and better 406 error handling
parabala Apr 14, 2026
d1774e7
fix(dingtalk): correct field name mapping for MCP response
parabala Apr 14, 2026
1c6627f
fix(dingtalk): correct field names for get_document_info response
parabala Apr 14, 2026
77c655a
fix(dingtalk): support millisecond timestamp in filename format
parabala Apr 14, 2026
615dafa
fix(dingtalk): correct field names in download_document_content
parabala Apr 14, 2026
bd58b05
fix(dingtalk): support Unix timestamp in modified_time validation
parabala Apr 14, 2026
1128176
refactor(dingtalk): rename skill from dingtalk-docs to dingtalk-conne…
parabala Apr 15, 2026
78fbb84
feat(dingtalk): add file_extension and source_config to knowledge doc…
parabala Apr 15, 2026
6ea239c
refactor(mcp): simplify filename format for DingTalk documents
parabala Apr 15, 2026
60ad6f3
fix(backend): restore dingtalk-docs mcp config and test cases
parabala Apr 15, 2026
5b25f86
refactor(dingtalk): remove unused download_document method and clean …
parabala Apr 15, 2026
c534204
fix(dingtalk): add OpenTelemetry tracing and fix URL extraction
parabala Apr 15, 2026
73dc2c9
fix(mcp): remove unsafe asyncio.to_thread usage with SQLAlchemy objects
parabala Apr 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backend/app/mcp_server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ def _register_knowledge_tools() -> None:
# The decorators will add tools to the global registry
from app.mcp_server.tool_registry import register_tools_to_server
from app.mcp_server.tools import ( # noqa: F401 side-effect: triggers @mcp_tool registration
dingtalk_docs,
knowledge,
)

Expand Down
321 changes: 321 additions & 0 deletions backend/app/mcp_server/tools/dingtalk_docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,321 @@
# SPDX-FileCopyrightText: 2025 WeCode, Inc.
#
# SPDX-License-Identifier: Apache-2.0

"""
DingTalk Document MCP tools for knowledge base integration.

This module provides MCP tool implementations for adding DingTalk documents
to Wegent knowledge bases. The tools coordinate with sandbox execution to:
1. Download DingTalk document content
2. Save with name {title}.{file_extension}
3. Upload as attachment
4. Create knowledge base document

These tools are registered with the MCP server and exposed to AI agents.
"""

import logging
from typing import Any, Dict, Optional

from app.db.session import SessionLocal
from app.mcp_server.auth import TaskTokenInfo
from app.mcp_server.tools.decorator import build_mcp_tools_dict, mcp_tool
from app.mcp_server.tools.knowledge import _get_user_from_token
from app.services.dingtalk.docs_service import dingtalk_docs_service
from app.services.knowledge.orchestrator import knowledge_orchestrator

logger = logging.getLogger(__name__)


@mcp_tool(
name="get_dingtalk_document_info",
description="Get information about a DingTalk document from its URL.",
server="knowledge",
param_descriptions={
"doc_url": "DingTalk document URL (e.g., https://alidocs.dingtalk.com/i/nodes/xxx)",
},
)
async def get_dingtalk_document_info(
Comment thread
coderabbitai[bot] marked this conversation as resolved.
token_info: TaskTokenInfo,
doc_url: str,
) -> Dict[str, Any]:
"""
Get DingTalk document information including title and modification time.

Args:
token_info: Task token information containing user context
doc_url: DingTalk document URL

Returns:
Dict with document info:
- doc_id: Document ID
- title: Document title
- modified_time: ISO format modification time
- content_type: Content type
- url: Original URL
"""
try:
# Get user preferences for MCP config
db = SessionLocal()
try:
user = _get_user_from_token(db, token_info)
user_preferences = user.preferences if user else None
finally:
db.close()

doc_info = await dingtalk_docs_service.get_document_info(
doc_url, user_preferences=user_preferences
)
return {
Comment thread
coderabbitai[bot] marked this conversation as resolved.
"success": True,
"doc_id": doc_info["doc_id"],
"title": doc_info["title"],
"modified_time": doc_info["modified_time"],
"content_type": doc_info["content_type"],
"url": doc_info["url"],
}
except ValueError as e:
logger.warning(f"[MCP] get_dingtalk_document_info validation error: {e}")
return {"success": False, "error": str(e)}
except Exception as e:
logger.error(f"[MCP] get_dingtalk_document_info error: {e}", exc_info=True)
return {"success": False, "error": f"Failed to get document info: {e}"}


@mcp_tool(
name="add_dingtalk_doc_to_knowledge",
description="Add a DingTalk document to Wegent knowledge base. Downloads the document, saves it with naming {title}.{file_extension}, and creates a knowledge base document.",
server="knowledge",
param_descriptions={
"knowledge_base_id": "Target knowledge base ID",
"doc_url": "DingTalk document URL",
"doc_title": "Document title (optional, will be fetched from DingTalk if not provided)",
"doc_content": "Document content (optional, will be downloaded from DingTalk if not provided)",
"modified_time": "Document modification time in YYYYMMDDHHMMSS format (optional)",
"trigger_indexing": "Whether to trigger RAG indexing (default: True)",
"trigger_summary": "Whether to trigger summary generation (default: True)",
},
)
async def add_dingtalk_doc_to_knowledge(
token_info: TaskTokenInfo,
knowledge_base_id: int,
doc_url: str,
doc_title: Optional[str] = None,
doc_content: Optional[str] = None,
modified_time: Optional[str] = None,
trigger_indexing: bool = True,
trigger_summary: bool = True,
) -> Dict[str, Any]:
"""
Add a DingTalk document to Wegent knowledge base.

This tool creates a knowledge base document from a DingTalk document.
The document can be provided directly via parameters or fetched from DingTalk.

File name: {title}.{file_extension}
Example: 产品需求文档.md

Args:
token_info: Task token information containing user context
knowledge_base_id: Target knowledge base ID
doc_url: DingTalk document URL (for reference and metadata)
doc_title: Document title (optional)
doc_content: Document content (optional, markdown format preferred)
modified_time: Modification time in YYYYMMDDHHMMSS format (optional)
trigger_indexing: Whether to trigger RAG indexing
trigger_summary: Whether to trigger summary generation

Returns:
Dict with operation result:
- success: Whether the operation succeeded
- document_id: Created document ID
- document_name: Document name
- message: Status message
"""
db = SessionLocal()
try:
user = _get_user_from_token(db, token_info)
if not user:
return {"success": False, "error": "User not found"}

# Variables to store document metadata from DingTalk
file_extension = "md"
update_time = None

# If content not provided, fetch from DingTalk
if not doc_content:
logger.info(
f"[MCP] Content not provided, fetching from DingTalk: {doc_url}"
)
try:
# Get user preferences for MCP config
user_preferences = user.preferences if user else None
doc_download = await dingtalk_docs_service.download_document_content(
doc_url, user_preferences=user_preferences
)
doc_content = doc_download.get("content", "")
# Use fetched title if not provided
if not doc_title:
doc_title = doc_download.get("title", "DingTalk Document")
# Use fetched modified_time if not provided
if not modified_time:
modified_time = doc_download.get("modified_time_formatted")
# Get file_extension from DingTalk response
Comment on lines +178 to +180

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Use the actual DingTalk download key for modified time.

download_document_content() returns modified_time, not modified_time_formatted. This branch leaves modified_time unset unexpectedly.

Suggested fix
-                if not modified_time:
-                    modified_time = doc_download.get("modified_time_formatted")
+                if not modified_time:
+                    modified_time = doc_download.get("modified_time")
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@backend/app/mcp_server/tools/dingtalk_docs.py` around lines 178 - 180, The
code branch in download_document_content (or the caller handling doc_download)
incorrectly reads modified_time_formatted, leaving modified_time unset; change
the assignment to use the actual DingTalk key "modified_time" (e.g., set
modified_time = doc_download.get("modified_time")) so modified_time is populated
when download_document_content() returns modified_time rather than
modified_time_formatted.

file_extension = doc_download.get("file_extension", "md")
# Get original updateTime for source_config
update_time = doc_download.get("modified_time")
except Exception as e:
logger.error(f"[MCP] Failed to fetch document from DingTalk: {e}")
return {
"success": False,
"error": f"Failed to fetch document from DingTalk: {e}",
}

if not doc_content:
return {
"success": False,
"error": "Failed to get document content from DingTalk",
}

# Use provided title or extract from URL
title = doc_title or "DingTalk Document"

# Build filename: {title}.{file_extension}
from app.services.knowledge.orchestrator import _build_filename

filename = _build_filename(title, file_extension)

logger.info(
f"[MCP] Adding DingTalk doc to KB {knowledge_base_id}: "
f"title='{title}', filename='{filename}'"
)

# Create document with text content (run in thread to avoid blocking)
# The content is expected to be markdown from DingTalk
import asyncio

# Build source_config with DingTalk document metadata
source_config = {
"url": doc_url,
"source": "dingtalk-connector",
"updated_at": update_time or modified_time,
}

result = await asyncio.to_thread(
knowledge_orchestrator.create_document_with_content,
db=db,
user=user,
knowledge_base_id=knowledge_base_id,
name=title,
source_type="text",
content=doc_content,
file_extension=file_extension,
trigger_indexing=trigger_indexing,
trigger_summary=trigger_summary,
source_config=source_config,
)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

return {
"success": True,
"document_id": result.id,
"document_name": result.name,
"filename": filename,
"message": f"Document '{title}' added to knowledge base successfully",
}

except ValueError as e:
logger.warning(f"[MCP] add_dingtalk_doc_to_knowledge validation error: {e}")
return {"success": False, "error": str(e)}
except Exception as e:
logger.error(f"[MCP] add_dingtalk_doc_to_knowledge error: {e}", exc_info=True)
return {"success": False, "error": f"Failed to add document: {e}"}
finally:
db.close()


@mcp_tool(
name="add_dingtalk_doc_with_attachment",
description="Add a DingTalk document to knowledge base using an existing attachment. This is used after the skill uploads the document as an attachment.",
server="knowledge",
param_descriptions={
"knowledge_base_id": "Target knowledge base ID",
"doc_title": "Document title",
"attachment_id": "Existing attachment ID from upload_attachment tool",
"trigger_indexing": "Whether to trigger RAG indexing (default: True)",
"trigger_summary": "Whether to trigger summary generation (default: True)",
},
)
def add_dingtalk_doc_with_attachment(
token_info: TaskTokenInfo,
knowledge_base_id: int,
doc_title: str,
attachment_id: int,
trigger_indexing: bool = True,
trigger_summary: bool = True,
) -> Dict[str, Any]:
"""
Add a DingTalk document to knowledge base using an existing attachment.

This tool is designed to work with the dingtalk-connector skill which:
1. Downloads the DingTalk document in sandbox
2. Saves it as {title}.{file_extension}
3. Uploads it as an attachment
4. Calls this tool to create the knowledge base document

Args:
token_info: Task token information containing user context
knowledge_base_id: Target knowledge base ID
doc_title: Document title
attachment_id: Attachment ID from upload_attachment tool
trigger_indexing: Whether to trigger RAG indexing
trigger_summary: Whether to trigger summary generation

Returns:
Dict with operation result
"""
db = SessionLocal()
try:
user = _get_user_from_token(db, token_info)
if not user:
return {"success": False, "error": "User not found"}

logger.info(
f"[MCP] Adding DingTalk doc with attachment to KB {knowledge_base_id}: "
f"title='{doc_title}', attachment_id={attachment_id}"
)

# Create document with attachment reference
result = knowledge_orchestrator.create_document_with_content(
db=db,
user=user,
knowledge_base_id=knowledge_base_id,
name=doc_title,
source_type="attachment",
attachment_id=attachment_id,
trigger_indexing=trigger_indexing,
trigger_summary=trigger_summary,
)

return {
"success": True,
"document_id": result.id,
"document_name": result.name,
"attachment_id": attachment_id,
"message": f"Document '{doc_title}' added to knowledge base successfully",
}
Comment on lines +395 to +401

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Returned attachment_id may be inaccurate for the created document.

This response returns the input attachment_id, but the attachment source path creates a copied attachment in orchestrator (backend/app/services/knowledge/orchestrator.py, Line 1216-1231). Return result.attachment_id to reflect the actual linked attachment.

Suggested fix
         return {
             "success": True,
             "document_id": result.id,
             "document_name": result.name,
-            "attachment_id": attachment_id,
+            "attachment_id": result.attachment_id,
             "message": f"Document '{doc_title}' added to knowledge base successfully",
         }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
return {
"success": True,
"document_id": result.id,
"document_name": result.name,
"attachment_id": attachment_id,
"message": f"Document '{doc_title}' added to knowledge base successfully",
}
return {
"success": True,
"document_id": result.id,
"document_name": result.name,
"attachment_id": result.attachment_id,
"message": f"Document '{doc_title}' added to knowledge base successfully",
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@backend/app/mcp_server/tools/dingtalk_docs.py` around lines 317 - 323, The
response currently returns the input attachment_id which can be stale because
the orchestrator (backend/app/services/knowledge/orchestrator.py) copies the
attachment and links the document to a new attachment; update the return payload
in the function that constructs the response (the block using result, doc_title
and attachment_id in backend/app/mcp_server/tools/dingtalk_docs.py) to return
the actual linked attachment id from the created document (use
result.attachment_id) instead of the original input attachment_id; also ensure
result has an attachment_id attribute before returning (or fall back
appropriately).


except ValueError as e:
logger.warning(f"[MCP] add_dingtalk_doc_with_attachment validation error: {e}")
return {"success": False, "error": str(e)}
except Exception as e:
logger.error(
f"[MCP] add_dingtalk_doc_with_attachment error: {e}", exc_info=True
)
return {"success": False, "error": f"Failed to add document: {e}"}
finally:
db.close()


# Build tool registry from decorated functions
DINGTALK_DOCS_MCP_TOOLS = build_mcp_tools_dict(server="knowledge")
15 changes: 15 additions & 0 deletions backend/app/services/dingtalk/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# SPDX-FileCopyrightText: 2025 WeCode, Inc.
#
# SPDX-License-Identifier: Apache-2.0

"""DingTalk services package."""

from app.services.dingtalk.docs_service import (
DingTalkDocsService,
dingtalk_docs_service,
)

__all__ = [
"DingTalkDocsService",
"dingtalk_docs_service",
]
Loading
Loading