Add metadata validation script and module documentation

- Introduced a new Python script `check_agents_metadata.py` for validating agent metadata, including YAML parsing, rating ranges, and cross-references. - Added usage instructions and exit codes for the script. - Created a new markdown file `模块目录和功能说明.md` to outline the directory structure and functionality of the modules. - Added a text file `说明此文件不可AI修改.txt` to specify that certain files should not be modified by AI, including important information about the `dstalk` framework and its modules.
2026-05-31 00:00:58 +08:00
parent 3cc9ee95e4
commit f2da0f2ed4
43 changed files with 2467 additions and 800 deletions
--- a/plugins/deepseek/src/deepseek_plugin.cpp
+++ b/plugins/deepseek/src/deepseek_plugin.cpp
@@ -1,3 +1,10 @@
+/*
+ * @file deepseek_plugin.cpp
+ * @brief DeepSeek/OpenAI-compatible AI provider plugin with SSE streaming and tool calls.
+ * DeepSeek/OpenAI 兼容 AI 提供者插件，支持 SSE 流式输出和工具调用。
+ * Copyright (c) 2026 dstalk contributors. GPLv3.
+ */
+
 #include "dstalk/dstalk_host.h"
 #include "dstalk/dstalk_services.h"

@@ -11,14 +18,14 @@
 namespace json = boost::json;

 // ============================================================================
-// 全局指针：从 on_init 获取（W14.3: atomic acquire/release 保护读写竞态）
+// 全局指针：从 on_init 获取（W14.3: atomic acquire/release 保护读写竞态） / Global pointers: obtained from on_init (W14.3: atomic acquire/release protects read/write races)
 // ============================================================================
 static std::atomic<const dstalk_host_api_t*> g_host{nullptr};
 static std::atomic<dstalk_http_service_t*> g_http{nullptr};
 static std::atomic<dstalk_config_service_t*> g_config{nullptr};

 // ============================================================================
-// 配置数据（由 configure() 设置）
+// 配置数据（由 configure() 设置） / Config data (set by configure())
 // ============================================================================
 struct PluginConfig {
    std::string provider;
@@ -29,19 +36,21 @@ struct PluginConfig {
    double temperature = 0.7;
 };
 static PluginConfig g_cfg;
-static std::string g_tools_json;  // W20.2: cached by configure(), consumed by chat/chat_stream
+static std::string g_tools_json;  // W20.2: 由 configure() 缓存，供 chat/chat_stream 使用 / cached by configure(), consumed by chat/chat_stream

 // ============================================================================
-// 安全擦除：用 volatile 写零循环防止编译器优化
+// 安全擦除：用 volatile 写零循环防止编译器优化 / Secure erase: write zero loop through volatile to prevent compiler optimization
 // ============================================================================
+// 通过 volatile 写入零来安全擦除内存，防止编译器优化 / Securely zero out memory by writing through volatile to prevent compiler optimization.
 static void secure_zero(void* p, size_t n) {
    volatile char* vp = (volatile char*)p;
    while (n--) *vp++ = 0;
 }

 // ============================================================================
-// 辅助：从 base_url 提取 host 和 target
+// 辅助：从 base_url 提取 host 和 target / Helper: extract host and target from base_url
 // ============================================================================
+// 将 URL 解析为 scheme、host、port 和 target path 组件 / Parse a URL into scheme, host, port, and target path components.
 static bool extract_host_port(const std::string& url,
    std::string& scheme_out, std::string& host_out,
    std::string& port_out, std::string& target_out)
@@ -65,8 +74,9 @@ static bool extract_host_port(const std::string& url,
 }

 // ============================================================================
-// 辅助：构建 headers JSON 字符串
+// 辅助：构建 headers JSON 字符串 / Helper: build headers JSON string
 // ============================================================================
+// 构建包含 Bearer 授权令牌的 JSON headers 对象 / Build the JSON headers object containing the Bearer authorization token.
 static std::string build_headers_json(const std::string& auth_header_value)
 {
    json::object h;
@@ -75,8 +85,9 @@ static std::string build_headers_json(const std::string& auth_header_value)
 }

 // ============================================================================
-// 辅助：dstalk_message_t[] -> boost::json::array
+// 辅助：dstalk_message_t[] -> boost::json::array / Helper: dstalk_message_t[] -> boost::json::array
 // ============================================================================
+// 将 dstalk_message_t 数组转换为 Boost.JSON 数组，用于 API 请求体 / Convert dstalk_message_t array into a Boost.JSON array for the API request body.
 static void append_history(json::array& msgs,
                           const dstalk_message_t* history, int history_len)
 {
@@ -100,8 +111,9 @@ static void append_history(json::array& msgs,
 }

 // ============================================================================
-// 构建 DeepSeek JSON 请求体
+// 构建 DeepSeek JSON 请求体 / Build DeepSeek JSON request body
 // ============================================================================
+// 构建 DeepSeek/OpenAI chat completions API 的完整 JSON 请求体 / Build the full JSON request body for the DeepSeek/OpenAI chat completions API.
 static std::string build_request_json(
    const dstalk_message_t* history, int history_len,
    const std::string& user_input,
@@ -117,7 +129,7 @@ static std::string build_request_json(
    json::array msgs;
    append_history(msgs, history, history_len);

-    // 追加当前用户输入
+    // 追加当前用户输入 / Append current user input
    if (!user_input.empty()) {
        json::object obj;
        obj["role"]    = "user";
@@ -127,7 +139,7 @@ static std::string build_request_json(

    root["messages"] = msgs;

-    // tools 定义
+    // tools 定义 / tools definition
    if (!tools_json.empty()) {
        root["tools"] = json::parse(tools_json);
    }
@@ -136,8 +148,9 @@ static std::string build_request_json(
 }

 // ============================================================================
-// 解析非流式 JSON 响应
+// 解析非流式 JSON 响应 / Parse non-streaming JSON response
 // ============================================================================
+// 将非流式 JSON 响应体解析为 dstalk_chat_result_t / Parse a non-streaming JSON response body into a dstalk_chat_result_t.
 static void parse_response(const dstalk_host_api_t* host,
                           const char* body, int http_status,
                           dstalk_chat_result_t& r)
@@ -207,13 +220,13 @@ static void parse_response(const dstalk_host_api_t* host,
 }

 // ============================================================================
-// 流式上下文：在 SSE 回调间累积内容和 tool_calls
+// 流式上下文：在 SSE 回调间累积内容和 tool_calls / Stream context: accumulate content and tool_calls across SSE callbacks
 // ============================================================================
 struct ToolCallAccum {
    int index = -1;
    std::string id;
    std::string name;
-    std::string arguments;  // 增量拼接的 JSON arguments 字符串
+    std::string arguments;  // 增量拼接的 JSON arguments 字符串 / incrementally concatenated JSON arguments string
 };

 struct StreamContext {
@@ -222,12 +235,18 @@ struct StreamContext {
    void* userdata;
    std::string accumulated;
    bool streaming_ok = true;
-    std::vector<ToolCallAccum> tool_calls;  // W20.2: 按 index 累积 delta tool_calls
+    std::vector<ToolCallAccum> tool_calls;  // W20.2: 按 index 累积 delta tool_calls / accumulate delta tool_calls by index
 };

 // ============================================================================
-// SSE 行解析（OpenAI 兼容格式）
+// SSE 行解析（OpenAI 兼容格式） / SSE line parsing (OpenAI-compatible format)
 // ============================================================================
+// 解析单行 SSE "data:" 行。如果包含 content delta，将 token 写入 token_out。
+// 如果包含 tool_calls delta，累积到 ctx->tool_calls。
+// 如果产生了 content token 则返回 true，否则返回 false（tool_calls 或未知）。
+// Parse a single SSE "data:" line. If it contains a content delta, writes the token
+// to token_out. If it contains tool_calls delta, accumulates into ctx->tool_calls.
+// Returns true if a content token was produced, false otherwise (tool_calls or unknown).
 static bool parse_sse_line(const std::string& line, std::string& token_out,
                           StreamContext* ctx)
 {
@@ -235,7 +254,7 @@ static bool parse_sse_line(const std::string& line, std::string& token_out,

    std::string data = line.substr(6);

-    // F-13.2-3: Trim leading/trailing whitespace before comparing [DONE] sentinel.
+    // F-13.2-3: 比较 [DONE] 哨兵前去除首尾空白 / Trim leading/trailing whitespace before comparing [DONE] sentinel.
    const char* ws = " \t\r\n";
    size_t start = data.find_first_not_of(ws);
    if (start != std::string::npos) {
@@ -244,7 +263,7 @@ static bool parse_sse_line(const std::string& line, std::string& token_out,
    }
    if (data == "[DONE]") {
        token_out.clear();
-        return true;  // 流结束信号
+        return true;  // 流结束信号 / stream end signal
    }

    try {
@@ -254,12 +273,12 @@ static bool parse_sse_line(const std::string& line, std::string& token_out,
        if (!choices.empty()) {
            auto delta = choices[0].as_object()["delta"].as_object();

-            // W20.2: 处理 delta["tool_calls"] 增量 chunk
-            // DeepSeek/OpenAI 流式模式 tool_calls 跨多个 SSE 事件分片传输：
-            //   事件 1: {"index":0, "id":"call_xxx", "function":{"name":"foo"}}
-            //   事件 2: {"index":0, "function":{"arguments":"{\"bar\":"}}
-            //   事件 3: {"index":0, "function":{"arguments":"1}"}}
-            // 需要按 index 累积 id/name/arguments。
+            // W20.2: 处理 delta["tool_calls"] 增量 chunk / Handle delta["tool_calls"] incremental chunks
+            // DeepSeek/OpenAI 流式模式 tool_calls 跨多个 SSE 事件分片传输 / DeepSeek/OpenAI streaming mode: tool_calls transmitted across multiple SSE event chunks:
+            //   事件 1 / Event 1: {"index":0, "id":"call_xxx", "function":{"name":"foo"}}
+            //   事件 2 / Event 2: {"index":0, "function":{"arguments":"{\"bar\":"}}
+            //   事件 3 / Event 3: {"index":0, "function":{"arguments":"1}"}}
+            // 需要按 index 累积 id/name/arguments / Need to accumulate id/name/arguments by index.
            if (delta.contains("tool_calls") && ctx) {
                auto tc_array = delta["tool_calls"].as_array();
                for (auto& tc_val : tc_array) {
@@ -288,7 +307,7 @@ static bool parse_sse_line(const std::string& line, std::string& token_out,
                        }
                    }
                }
-                return false;  // tool_calls 已处理，无内容 token 给用户回调
+                return false;  // tool_calls 已处理，无内容 token 给用户回调 / tool_calls processed, no content token for user callback
            }

            if (delta.contains("content")) {
@@ -297,14 +316,15 @@ static bool parse_sse_line(const std::string& line, std::string& token_out,
            }
        }
    } catch (...) {
-        // 忽略解析失败
+        // 忽略解析失败 / Ignore parse failures
    }
    return false;
 }

 // ============================================================================
-// configure 实现
+// configure 实现 / configure implementation
 // ============================================================================
+// 配置插件：provider、endpoint、auth、model 和生成参数 / Configure the plugin with provider, endpoint, auth, model, and generation parameters.
 static int my_configure(const char* provider, const char* base_url,
                        const char* api_key, const char* model,
                        int max_tokens, double temperature)
@@ -319,7 +339,7 @@ static int my_configure(const char* provider, const char* base_url,

        const dstalk_host_api_t* host = g_host.load(std::memory_order_acquire);
        if (host) {
-            // W20.2: 从 tools service 缓存 tools_json，供 chat/chat_stream 复用
+            // W20.2: 从 tools service 缓存 tools_json，供 chat/chat_stream 复用 / Cache tools_json from tools service for reuse in chat/chat_stream
            auto* tools_svc = reinterpret_cast<const dstalk_tools_service_t*>(
                host->query_service("tools", 1));
            if (tools_svc && tools_svc->get_tools_json) {
@@ -348,8 +368,9 @@ static int my_configure(const char* provider, const char* base_url,
 }

 // ============================================================================
-// chat 实现
+// chat 实现 / chat implementation
 // ============================================================================
+// 非流式 chat completion：发送 history + user input，返回完整响应 / Non-streaming chat completion: send history + user input, return full response.
 static dstalk_chat_result_t my_chat(
    const dstalk_message_t* history, int history_len,
    const char* user_input,
@@ -412,29 +433,29 @@ static dstalk_chat_result_t my_chat(
 }

 // ============================================================================
-// chat_stream 实现
+// chat_stream 实现 / chat_stream implementation
 // ============================================================================

-// 行回调：解析 SSE line，将 token 传递给用户回调
+// 行回调：解析 SSE line，将 token 传递给用户回调 / SSE line callback: parses each line and forwards content tokens to the user callback.
 static int sse_line_callback(const char* line, void* userdata)
 {
    try {
        auto* ctx = static_cast<StreamContext*>(userdata);
-        if (!line || !line[0]) return 1; // 空行，继续
+        if (!line || !line[0]) return 1; // 空行，继续 / empty line, continue

        std::string line_str(line);
        std::string token;

-        if (!parse_sse_line(line_str, token, ctx)) return 1; // 非 data/tool_calls 行，继续
+        if (!parse_sse_line(line_str, token, ctx)) return 1; // 非 data/tool_calls 行，继续 / not a data/tool_calls line, continue

-        if (token.empty()) return 0; // [DONE]，停止
+        if (token.empty()) return 0; // [DONE]，停止 / [DONE], stop

        ctx->accumulated += token;

        if (ctx->user_cb) {
            return ctx->user_cb(token.c_str(), ctx->userdata);
        }
-        return 1; // 继续
+        return 1; // 继续 / continue
    } catch (const std::exception& e) {
        const dstalk_host_api_t* host = g_host.load(std::memory_order_acquire);
        if (host && host->log) host->log(DSTALK_LOG_ERROR, "[deepseek] sse_line_callback exception: %s", e.what());
@@ -446,6 +467,9 @@ static int sse_line_callback(const char* line, void* userdata)
    }
 }

+// 流式 chat completion：以 stream=true 发送 history + user input，通过回调传递 token。
+// 在 SSE 分片中累积 tool_calls 并在结束时序列化 / Streaming chat completion: send history + user input with stream=true, deliver tokens
+// via callback. Accumulates tool_calls across SSE chunks and serializes them at end.
 static dstalk_chat_result_t my_chat_stream(
    const dstalk_message_t* history, int history_len,
    const char* user_input,
@@ -488,10 +512,10 @@ static dstalk_chat_result_t my_chat_stream(

        r.http_status = status_code;

-        // 检查传输层错误或非 2xx 状态
+        // 检查传输层错误或非 2xx 状态 / Check transport errors or non-2xx status
        if (status_code < 200 || status_code >= 300) {
            r.ok = 0;
-            // 尝试从响应体提取错误信息
+            // 尝试从响应体提取错误信息 / Try to extract error info from response body
            if (response_body && response_body[0]) {
                try {
                    auto jv = json::parse(response_body);
@@ -518,7 +542,7 @@ static dstalk_chat_result_t my_chat_stream(

        if (response_body && host) host->free(response_body);

-        // W20.2: 成功条件 = 有内容 OR 有 tool_calls（tool-only 响应如 function calling）
+        // W20.2: 成功条件 = 有内容 OR 有 tool_calls（tool-only 响应如 function calling） / Success = has content OR has tool_calls (tool-only responses like function calling)
        bool has_content = !ctx.accumulated.empty();
        bool has_tool_calls = !ctx.tool_calls.empty();

@@ -533,7 +557,7 @@ static dstalk_chat_result_t my_chat_stream(
            r.content = has_content
                ? host->strdup(ctx.accumulated.c_str()) : nullptr;

-            // 序列化累积的 tool_calls 为 JSON（兼容 OpenAI tool_calls 格式）
+            // 序列化累积的 tool_calls 为 JSON（兼容 OpenAI tool_calls 格式） / Serialize accumulated tool_calls to JSON (OpenAI-compatible tool_calls format)
            if (has_tool_calls) {
                json::array tc_array;
                for (auto& tc : ctx.tool_calls) {
@@ -572,8 +596,9 @@ static dstalk_chat_result_t my_chat_stream(
 }

 // ============================================================================
-// free_result 实现
+// free_result 实现 / free_result implementation
 // ============================================================================
+// 释放 chat result 结构体中所有主机分配的字符串字段 / Free all host-allocated string fields in a chat result struct.
 static void my_free_result(dstalk_chat_result_t* result)
 {
    const dstalk_host_api_t* host = g_host.load(std::memory_order_acquire);
@@ -584,7 +609,7 @@ static void my_free_result(dstalk_chat_result_t* result)
 }

 // ============================================================================
-// 服务 vtable
+// 服务 vtable / Service vtable
 // ============================================================================
 static dstalk_ai_service_t g_service = {
    &my_configure,
@@ -594,8 +619,9 @@ static dstalk_ai_service_t g_service = {
 };

 // ============================================================================
-// 生命周期
+// 生命周期 / Lifecycle
 // ============================================================================
+// 插件初始化：查询 http 和 config 服务，注册 ai.deepseek 服务 / Plugin init: query http and config services, register ai.deepseek service.
 static int on_init(const dstalk_host_api_t* host)
 {
    try {
@@ -624,6 +650,7 @@ static int on_init(const dstalk_host_api_t* host)
    }
 }

+// 插件关闭：从内存安全擦除 API key，清空服务指针 / Plugin shutdown: securely erase API key from memory, null out service pointers.
 static void on_shutdown()
 {
    try {
@@ -644,12 +671,12 @@ static void on_shutdown()
 }

 // ============================================================================
-// 插件描述符
+// 插件描述符 / Plugin descriptor
 // ============================================================================
 static dstalk_plugin_info_t g_info = {
    /* .name         = */ "deepseek-ai",
    /* .version      = */ "1.0.0",
-    /* .description  = */ "DeepSeek AI provider (OpenAI-compatible API)",
+    /* .description  = */ "DeepSeek AI provider (OpenAI-compatible API)    / DeepSeek AI 提供者 (OpenAI 兼容 API)",
    /* .api_version  = */ DSTALK_API_VERSION,
    /* .dependencies = */ { "http", "config", NULL },
    /* .on_init      = */ on_init,
@@ -657,6 +684,7 @@ static dstalk_plugin_info_t g_info = {
    /* .on_event     = */ nullptr,
 };

+// 必须入口点：返回插件描述符给主机 / Mandatory entry point: returns the plugin descriptor to the host.
 extern "C" DSTALK_PLUGIN_EXPORT dstalk_plugin_info_t* dstalk_plugin_init(void)
 {
    return &g_info;