Add metadata validation script and module documentation

- Introduced a new Python script `check_agents_metadata.py` for validating agent metadata, including YAML parsing, rating ranges, and cross-references.
- Added usage instructions and exit codes for the script.
- Created a new markdown file `模块目录和功能说明.md` to outline the directory structure and functionality of the modules.
- Added a text file `说明此文件不可AI修改.txt` to specify that certain files should not be modified by AI, including important information about the `dstalk` framework and its modules.
This commit is contained in:
2026-05-31 00:00:58 +08:00
parent 3cc9ee95e4
commit f2da0f2ed4
43 changed files with 2467 additions and 800 deletions

View File

@@ -1,3 +1,10 @@
/*
* @file deepseek_plugin.cpp
* @brief DeepSeek/OpenAI-compatible AI provider plugin with SSE streaming and tool calls.
* DeepSeek/OpenAI 兼容 AI 提供者插件,支持 SSE 流式输出和工具调用。
* Copyright (c) 2026 dstalk contributors. GPLv3.
*/
#include "dstalk/dstalk_host.h"
#include "dstalk/dstalk_services.h"
@@ -11,14 +18,14 @@
namespace json = boost::json;
// ============================================================================
// 全局指针:从 on_init 获取W14.3: atomic acquire/release 保护读写竞态)
// 全局指针:从 on_init 获取W14.3: atomic acquire/release 保护读写竞态) / Global pointers: obtained from on_init (W14.3: atomic acquire/release protects read/write races)
// ============================================================================
static std::atomic<const dstalk_host_api_t*> g_host{nullptr};
static std::atomic<dstalk_http_service_t*> g_http{nullptr};
static std::atomic<dstalk_config_service_t*> g_config{nullptr};
// ============================================================================
// 配置数据(由 configure() 设置)
// 配置数据(由 configure() 设置) / Config data (set by configure())
// ============================================================================
struct PluginConfig {
std::string provider;
@@ -29,19 +36,21 @@ struct PluginConfig {
double temperature = 0.7;
};
static PluginConfig g_cfg;
static std::string g_tools_json; // W20.2: cached by configure(), consumed by chat/chat_stream
static std::string g_tools_json; // W20.2: 由 configure() 缓存,供 chat/chat_stream 使用 / cached by configure(), consumed by chat/chat_stream
// ============================================================================
// 安全擦除:用 volatile 写零循环防止编译器优化
// 安全擦除:用 volatile 写零循环防止编译器优化 / Secure erase: write zero loop through volatile to prevent compiler optimization
// ============================================================================
// 通过 volatile 写入零来安全擦除内存,防止编译器优化 / Securely zero out memory by writing through volatile to prevent compiler optimization.
static void secure_zero(void* p, size_t n) {
volatile char* vp = (volatile char*)p;
while (n--) *vp++ = 0;
}
// ============================================================================
// 辅助:从 base_url 提取 host 和 target
// 辅助:从 base_url 提取 host 和 target / Helper: extract host and target from base_url
// ============================================================================
// 将 URL 解析为 scheme、host、port 和 target path 组件 / Parse a URL into scheme, host, port, and target path components.
static bool extract_host_port(const std::string& url,
std::string& scheme_out, std::string& host_out,
std::string& port_out, std::string& target_out)
@@ -65,8 +74,9 @@ static bool extract_host_port(const std::string& url,
}
// ============================================================================
// 辅助:构建 headers JSON 字符串
// 辅助:构建 headers JSON 字符串 / Helper: build headers JSON string
// ============================================================================
// 构建包含 Bearer 授权令牌的 JSON headers 对象 / Build the JSON headers object containing the Bearer authorization token.
static std::string build_headers_json(const std::string& auth_header_value)
{
json::object h;
@@ -75,8 +85,9 @@ static std::string build_headers_json(const std::string& auth_header_value)
}
// ============================================================================
// 辅助dstalk_message_t[] -> boost::json::array
// 辅助dstalk_message_t[] -> boost::json::array / Helper: dstalk_message_t[] -> boost::json::array
// ============================================================================
// 将 dstalk_message_t 数组转换为 Boost.JSON 数组,用于 API 请求体 / Convert dstalk_message_t array into a Boost.JSON array for the API request body.
static void append_history(json::array& msgs,
const dstalk_message_t* history, int history_len)
{
@@ -100,8 +111,9 @@ static void append_history(json::array& msgs,
}
// ============================================================================
// 构建 DeepSeek JSON 请求体
// 构建 DeepSeek JSON 请求体 / Build DeepSeek JSON request body
// ============================================================================
// 构建 DeepSeek/OpenAI chat completions API 的完整 JSON 请求体 / Build the full JSON request body for the DeepSeek/OpenAI chat completions API.
static std::string build_request_json(
const dstalk_message_t* history, int history_len,
const std::string& user_input,
@@ -117,7 +129,7 @@ static std::string build_request_json(
json::array msgs;
append_history(msgs, history, history_len);
// 追加当前用户输入
// 追加当前用户输入 / Append current user input
if (!user_input.empty()) {
json::object obj;
obj["role"] = "user";
@@ -127,7 +139,7 @@ static std::string build_request_json(
root["messages"] = msgs;
// tools 定义
// tools 定义 / tools definition
if (!tools_json.empty()) {
root["tools"] = json::parse(tools_json);
}
@@ -136,8 +148,9 @@ static std::string build_request_json(
}
// ============================================================================
// 解析非流式 JSON 响应
// 解析非流式 JSON 响应 / Parse non-streaming JSON response
// ============================================================================
// 将非流式 JSON 响应体解析为 dstalk_chat_result_t / Parse a non-streaming JSON response body into a dstalk_chat_result_t.
static void parse_response(const dstalk_host_api_t* host,
const char* body, int http_status,
dstalk_chat_result_t& r)
@@ -207,13 +220,13 @@ static void parse_response(const dstalk_host_api_t* host,
}
// ============================================================================
// 流式上下文:在 SSE 回调间累积内容和 tool_calls
// 流式上下文:在 SSE 回调间累积内容和 tool_calls / Stream context: accumulate content and tool_calls across SSE callbacks
// ============================================================================
struct ToolCallAccum {
int index = -1;
std::string id;
std::string name;
std::string arguments; // 增量拼接的 JSON arguments 字符串
std::string arguments; // 增量拼接的 JSON arguments 字符串 / incrementally concatenated JSON arguments string
};
struct StreamContext {
@@ -222,12 +235,18 @@ struct StreamContext {
void* userdata;
std::string accumulated;
bool streaming_ok = true;
std::vector<ToolCallAccum> tool_calls; // W20.2: 按 index 累积 delta tool_calls
std::vector<ToolCallAccum> tool_calls; // W20.2: 按 index 累积 delta tool_calls / accumulate delta tool_calls by index
};
// ============================================================================
// SSE 行解析OpenAI 兼容格式)
// SSE 行解析OpenAI 兼容格式) / SSE line parsing (OpenAI-compatible format)
// ============================================================================
// 解析单行 SSE "data:" 行。如果包含 content delta将 token 写入 token_out。
// 如果包含 tool_calls delta累积到 ctx->tool_calls。
// 如果产生了 content token 则返回 true否则返回 falsetool_calls 或未知)。
// Parse a single SSE "data:" line. If it contains a content delta, writes the token
// to token_out. If it contains tool_calls delta, accumulates into ctx->tool_calls.
// Returns true if a content token was produced, false otherwise (tool_calls or unknown).
static bool parse_sse_line(const std::string& line, std::string& token_out,
StreamContext* ctx)
{
@@ -235,7 +254,7 @@ static bool parse_sse_line(const std::string& line, std::string& token_out,
std::string data = line.substr(6);
// F-13.2-3: Trim leading/trailing whitespace before comparing [DONE] sentinel.
// F-13.2-3: 比较 [DONE] 哨兵前去除首尾空白 / Trim leading/trailing whitespace before comparing [DONE] sentinel.
const char* ws = " \t\r\n";
size_t start = data.find_first_not_of(ws);
if (start != std::string::npos) {
@@ -244,7 +263,7 @@ static bool parse_sse_line(const std::string& line, std::string& token_out,
}
if (data == "[DONE]") {
token_out.clear();
return true; // 流结束信号
return true; // 流结束信号 / stream end signal
}
try {
@@ -254,12 +273,12 @@ static bool parse_sse_line(const std::string& line, std::string& token_out,
if (!choices.empty()) {
auto delta = choices[0].as_object()["delta"].as_object();
// W20.2: 处理 delta["tool_calls"] 增量 chunk
// DeepSeek/OpenAI 流式模式 tool_calls 跨多个 SSE 事件分片传输
// 事件 1: {"index":0, "id":"call_xxx", "function":{"name":"foo"}}
// 事件 2: {"index":0, "function":{"arguments":"{\"bar\":"}}
// 事件 3: {"index":0, "function":{"arguments":"1}"}}
// 需要按 index 累积 id/name/arguments
// W20.2: 处理 delta["tool_calls"] 增量 chunk / Handle delta["tool_calls"] incremental chunks
// DeepSeek/OpenAI 流式模式 tool_calls 跨多个 SSE 事件分片传输 / DeepSeek/OpenAI streaming mode: tool_calls transmitted across multiple SSE event chunks:
// 事件 1 / Event 1: {"index":0, "id":"call_xxx", "function":{"name":"foo"}}
// 事件 2 / Event 2: {"index":0, "function":{"arguments":"{\"bar\":"}}
// 事件 3 / Event 3: {"index":0, "function":{"arguments":"1}"}}
// 需要按 index 累积 id/name/arguments / Need to accumulate id/name/arguments by index.
if (delta.contains("tool_calls") && ctx) {
auto tc_array = delta["tool_calls"].as_array();
for (auto& tc_val : tc_array) {
@@ -288,7 +307,7 @@ static bool parse_sse_line(const std::string& line, std::string& token_out,
}
}
}
return false; // tool_calls 已处理,无内容 token 给用户回调
return false; // tool_calls 已处理,无内容 token 给用户回调 / tool_calls processed, no content token for user callback
}
if (delta.contains("content")) {
@@ -297,14 +316,15 @@ static bool parse_sse_line(const std::string& line, std::string& token_out,
}
}
} catch (...) {
// 忽略解析失败
// 忽略解析失败 / Ignore parse failures
}
return false;
}
// ============================================================================
// configure 实现
// configure 实现 / configure implementation
// ============================================================================
// 配置插件provider、endpoint、auth、model 和生成参数 / Configure the plugin with provider, endpoint, auth, model, and generation parameters.
static int my_configure(const char* provider, const char* base_url,
const char* api_key, const char* model,
int max_tokens, double temperature)
@@ -319,7 +339,7 @@ static int my_configure(const char* provider, const char* base_url,
const dstalk_host_api_t* host = g_host.load(std::memory_order_acquire);
if (host) {
// W20.2: 从 tools service 缓存 tools_json供 chat/chat_stream 复用
// W20.2: 从 tools service 缓存 tools_json供 chat/chat_stream 复用 / Cache tools_json from tools service for reuse in chat/chat_stream
auto* tools_svc = reinterpret_cast<const dstalk_tools_service_t*>(
host->query_service("tools", 1));
if (tools_svc && tools_svc->get_tools_json) {
@@ -348,8 +368,9 @@ static int my_configure(const char* provider, const char* base_url,
}
// ============================================================================
// chat 实现
// chat 实现 / chat implementation
// ============================================================================
// 非流式 chat completion发送 history + user input返回完整响应 / Non-streaming chat completion: send history + user input, return full response.
static dstalk_chat_result_t my_chat(
const dstalk_message_t* history, int history_len,
const char* user_input,
@@ -412,29 +433,29 @@ static dstalk_chat_result_t my_chat(
}
// ============================================================================
// chat_stream 实现
// chat_stream 实现 / chat_stream implementation
// ============================================================================
// 行回调:解析 SSE line将 token 传递给用户回调
// 行回调:解析 SSE line将 token 传递给用户回调 / SSE line callback: parses each line and forwards content tokens to the user callback.
static int sse_line_callback(const char* line, void* userdata)
{
try {
auto* ctx = static_cast<StreamContext*>(userdata);
if (!line || !line[0]) return 1; // 空行,继续
if (!line || !line[0]) return 1; // 空行,继续 / empty line, continue
std::string line_str(line);
std::string token;
if (!parse_sse_line(line_str, token, ctx)) return 1; // 非 data/tool_calls 行,继续
if (!parse_sse_line(line_str, token, ctx)) return 1; // 非 data/tool_calls 行,继续 / not a data/tool_calls line, continue
if (token.empty()) return 0; // [DONE],停止
if (token.empty()) return 0; // [DONE],停止 / [DONE], stop
ctx->accumulated += token;
if (ctx->user_cb) {
return ctx->user_cb(token.c_str(), ctx->userdata);
}
return 1; // 继续
return 1; // 继续 / continue
} catch (const std::exception& e) {
const dstalk_host_api_t* host = g_host.load(std::memory_order_acquire);
if (host && host->log) host->log(DSTALK_LOG_ERROR, "[deepseek] sse_line_callback exception: %s", e.what());
@@ -446,6 +467,9 @@ static int sse_line_callback(const char* line, void* userdata)
}
}
// 流式 chat completion以 stream=true 发送 history + user input通过回调传递 token。
// 在 SSE 分片中累积 tool_calls 并在结束时序列化 / Streaming chat completion: send history + user input with stream=true, deliver tokens
// via callback. Accumulates tool_calls across SSE chunks and serializes them at end.
static dstalk_chat_result_t my_chat_stream(
const dstalk_message_t* history, int history_len,
const char* user_input,
@@ -488,10 +512,10 @@ static dstalk_chat_result_t my_chat_stream(
r.http_status = status_code;
// 检查传输层错误或非 2xx 状态
// 检查传输层错误或非 2xx 状态 / Check transport errors or non-2xx status
if (status_code < 200 || status_code >= 300) {
r.ok = 0;
// 尝试从响应体提取错误信息
// 尝试从响应体提取错误信息 / Try to extract error info from response body
if (response_body && response_body[0]) {
try {
auto jv = json::parse(response_body);
@@ -518,7 +542,7 @@ static dstalk_chat_result_t my_chat_stream(
if (response_body && host) host->free(response_body);
// W20.2: 成功条件 = 有内容 OR 有 tool_callstool-only 响应如 function calling
// W20.2: 成功条件 = 有内容 OR 有 tool_callstool-only 响应如 function calling / Success = has content OR has tool_calls (tool-only responses like function calling)
bool has_content = !ctx.accumulated.empty();
bool has_tool_calls = !ctx.tool_calls.empty();
@@ -533,7 +557,7 @@ static dstalk_chat_result_t my_chat_stream(
r.content = has_content
? host->strdup(ctx.accumulated.c_str()) : nullptr;
// 序列化累积的 tool_calls 为 JSON兼容 OpenAI tool_calls 格式)
// 序列化累积的 tool_calls 为 JSON兼容 OpenAI tool_calls 格式) / Serialize accumulated tool_calls to JSON (OpenAI-compatible tool_calls format)
if (has_tool_calls) {
json::array tc_array;
for (auto& tc : ctx.tool_calls) {
@@ -572,8 +596,9 @@ static dstalk_chat_result_t my_chat_stream(
}
// ============================================================================
// free_result 实现
// free_result 实现 / free_result implementation
// ============================================================================
// 释放 chat result 结构体中所有主机分配的字符串字段 / Free all host-allocated string fields in a chat result struct.
static void my_free_result(dstalk_chat_result_t* result)
{
const dstalk_host_api_t* host = g_host.load(std::memory_order_acquire);
@@ -584,7 +609,7 @@ static void my_free_result(dstalk_chat_result_t* result)
}
// ============================================================================
// 服务 vtable
// 服务 vtable / Service vtable
// ============================================================================
static dstalk_ai_service_t g_service = {
&my_configure,
@@ -594,8 +619,9 @@ static dstalk_ai_service_t g_service = {
};
// ============================================================================
// 生命周期
// 生命周期 / Lifecycle
// ============================================================================
// 插件初始化:查询 http 和 config 服务,注册 ai.deepseek 服务 / Plugin init: query http and config services, register ai.deepseek service.
static int on_init(const dstalk_host_api_t* host)
{
try {
@@ -624,6 +650,7 @@ static int on_init(const dstalk_host_api_t* host)
}
}
// 插件关闭:从内存安全擦除 API key清空服务指针 / Plugin shutdown: securely erase API key from memory, null out service pointers.
static void on_shutdown()
{
try {
@@ -644,12 +671,12 @@ static void on_shutdown()
}
// ============================================================================
// 插件描述符
// 插件描述符 / Plugin descriptor
// ============================================================================
static dstalk_plugin_info_t g_info = {
/* .name = */ "deepseek-ai",
/* .version = */ "1.0.0",
/* .description = */ "DeepSeek AI provider (OpenAI-compatible API)",
/* .description = */ "DeepSeek AI provider (OpenAI-compatible API) / DeepSeek AI 提供者 (OpenAI 兼容 API)",
/* .api_version = */ DSTALK_API_VERSION,
/* .dependencies = */ { "http", "config", NULL },
/* .on_init = */ on_init,
@@ -657,6 +684,7 @@ static dstalk_plugin_info_t g_info = {
/* .on_event = */ nullptr,
};
// 必须入口点:返回插件描述符给主机 / Mandatory entry point: returns the plugin descriptor to the host.
extern "C" DSTALK_PLUGIN_EXPORT dstalk_plugin_info_t* dstalk_plugin_init(void)
{
return &g_info;