Add metadata validation script and module documentation

- Introduced a new Python script `check_agents_metadata.py` for validating agent metadata, including YAML parsing, rating ranges, and cross-references.
- Added usage instructions and exit codes for the script.
- Created a new markdown file `模块目录和功能说明.md` to outline the directory structure and functionality of the modules.
- Added a text file `说明此文件不可AI修改.txt` to specify that certain files should not be modified by AI, including important information about the `dstalk` framework and its modules.
This commit is contained in:
2026-05-31 00:00:58 +08:00
parent 3cc9ee95e4
commit f2da0f2ed4
43 changed files with 2467 additions and 800 deletions

View File

@@ -1,8 +1,12 @@
// ============================================================================
// deepseek_plugin_test.cpp — DeepSeek AI 插件单元测试
// W21.6 (qa-wang): 覆盖 SSE 解析 / [DONE] 匹配 / JSON 请求构建 / tool_calls
// 通过 #include plugin source 访问 file-scope static 函数
// ============================================================================
/*
* @file deepseek_plugin_test.cpp
* @brief DeepSeek AI plugin unit tests: SSE parsing (parse_sse_line edge cases),
* [DONE] sentinel matching, tool_calls delta extraction, request building,
* append_history, extract_host_port, secure_zero, and null-safety.
* DeepSeek AI 插件单元测试SSE 解析parse_sse_line 边界情况)、[DONE] 标记匹配、
* tool_calls delta 提取、请求构建、append_history、extract_host_port、secure_zero、空指针安全。
* Copyright (c) 2026 dstalk contributors. GPLv3.
*/
#define BOOST_JSON_HEADER_ONLY
#define BOOST_ALL_NO_LIB
#include "../plugins/deepseek/src/deepseek_plugin.cpp"
@@ -12,6 +16,7 @@
#include <string>
static int g_failures = 0;
// Lightweight assertion macro: increments g_failures counter on failure
#define CHECK(cond, msg) do { \
if (cond) { \
std::cout << "[OK] " << (msg) << "\n"; \
@@ -22,6 +27,8 @@ static int g_failures = 0;
} while (0)
// Test helper: populate g_cfg for build functions
// Test helper: populate g_cfg with valid deepseek defaults before build_* tests
// 测试辅助函数:为 build_* 测试填充 g_cfg 的有效 deepseek 默认值
static void setup_config() {
g_cfg.provider = "deepseek";
g_cfg.base_url = "https://api.deepseek.com/v1";
@@ -31,10 +38,19 @@ static void setup_config() {
g_cfg.temperature = 0.7;
}
// DeepSeek 插件测试 (W21.6)parse_sse_line 无效/畸形输入、[DONE] 标记及空白变体、
// content delta 提取、tool_calls delta 累积、build_request_json基础、tools、边界
// build_headers_json、extract_host_port、secure_zero、append_history所有消息类型
// my_free_result、my_configure。
// DeepSeek plugin tests (W21.6): parse_sse_line invalid/malformed inputs, [DONE] sentinel
// with whitespace variants, content delta extraction, tool_calls delta accumulation,
// build_request_json (basic, tools, edge cases), build_headers_json, extract_host_port,
// secure_zero, append_history (all message types), my_free_result, and my_configure.
int main()
{
// ================================================================
// Test Block 1: parse_sse_line — invalid/malformed inputs
// 测试块 1parse_sse_line — 无效/畸形输入
// ================================================================
std::cout << "\n--- Block 1: parse_sse_line invalid/malformed ---\n";
@@ -58,27 +74,28 @@ int main()
{
// "data:" without space — rfind("data: ", 0) should fail
// "data:" 无空格 — rfind("data: ", 0) 应失败
std::string token;
bool ret = parse_sse_line("data:{\"x\":1}", token, nullptr);
CHECK(!ret, "T1.4: 'data:' without trailing space returns false (rfind mismatch)");
}
{
// "data: " followed by invalid JSON
// "data: " followed by invalid JSON / "data: " 后跟无效 JSON
std::string token;
bool ret = parse_sse_line("data: not valid json!!!", token, nullptr);
CHECK(!ret, "T1.5: 'data: ' + invalid JSON returns false (no crash)");
}
{
// "data: " followed by binary garbage
// "data: " followed by binary garbage / "data: " 后跟二进制垃圾
std::string token;
bool ret = parse_sse_line("data: \x00\x01\xFF\xFE", token, nullptr);
CHECK(!ret, "T1.6: 'data: ' + binary garbage returns false (no crash)");
}
{
// Empty data after "data: "
// Empty data after "data: " / "data: " 后数据为空
std::string token;
bool ret = parse_sse_line("data: ", token, nullptr);
CHECK(!ret, "T1.7: 'data: ' with empty payload returns false");
@@ -86,6 +103,7 @@ int main()
// ================================================================
// Test Block 2: parse_sse_line — [DONE] sentinel
// 测试块 2parse_sse_line — [DONE] 标记
// ================================================================
std::cout << "\n--- Block 2: parse_sse_line [DONE] sentinel ---\n";
@@ -97,7 +115,7 @@ int main()
}
{
// [DONE] with leading whitespace
// [DONE] with leading whitespace / [DONE] 前导空白
std::string token;
bool ret = parse_sse_line("data: [DONE]", token, nullptr);
CHECK(ret, "T2.3: 'data: [DONE]' (leading spaces) returns true");
@@ -105,7 +123,7 @@ int main()
}
{
// [DONE] with trailing whitespace
// [DONE] with trailing whitespace / [DONE] 尾部空白
std::string token;
bool ret = parse_sse_line("data: [DONE] ", token, nullptr);
CHECK(ret, "T2.5: 'data: [DONE] ' (trailing spaces) returns true");
@@ -113,7 +131,7 @@ int main()
}
{
// [DONE] with tabs and newlines around it
// [DONE] with tabs and newlines around it / [DONE] 周围有制表符和换行符
std::string token;
bool ret = parse_sse_line("data: \t [DONE] \t\r\n", token, nullptr);
CHECK(ret, "T2.7: '[DONE]' with mixed whitespace returns true");
@@ -121,7 +139,7 @@ int main()
}
{
// [DONE] without spaces — exact match
// [DONE] without spaces — exact match / [DONE] 精确匹配(无空格)
std::string token;
bool ret = parse_sse_line("data: [DONE]", token, nullptr);
CHECK(ret, "T2.9: '[DONE]' exact match returns true");
@@ -129,13 +147,14 @@ int main()
{
// "[done]" lowercase — should NOT match (case-sensitive)
// "[done]" 小写 — 不应匹配(大小写敏感)
std::string token;
bool ret = parse_sse_line("data: [done]", token, nullptr);
CHECK(!ret, "T2.10: '[done]' lowercase NOT treated as DONE (case-sensitive)");
}
{
// "[DONE" without closing bracket
// "[DONE" without closing bracket / "[DONE" 缺少闭括号
std::string token;
bool ret = parse_sse_line("data: [DONE", token, nullptr);
CHECK(!ret, "T2.11: '[DONE' (no closing bracket) not treated as DONE");
@@ -143,6 +162,7 @@ int main()
// ================================================================
// Test Block 3: parse_sse_line — content delta
// 测试块 3parse_sse_line — content delta
// ================================================================
std::cout << "\n--- Block 3: parse_sse_line content delta ---\n";
@@ -166,7 +186,7 @@ int main()
}
{
// Delta with no content field
// Delta with no content field / delta 不含 content 字段
std::string token;
const char* json =
"data: {\"choices\":[{\"delta\":{},\"index\":0}]}";
@@ -175,7 +195,7 @@ int main()
}
{
// Empty choices array
// Empty choices array / 空 choices 数组
std::string token;
const char* json =
"data: {\"choices\":[]}";
@@ -184,7 +204,7 @@ int main()
}
{
// Single character token (typical streaming)
// Single character token (typical streaming) / 单字符 token典型流式
std::string token;
const char* json =
"data: {\"choices\":[{\"delta\":{\"content\":\"H\"},\"index\":0}]}";
@@ -194,7 +214,7 @@ int main()
}
{
// Multi-byte UTF-8 content (emoji) in delta
// Multi-byte UTF-8 content (emoji) in delta / delta 中的多字节 UTF-8 内容emoji
std::string token;
const char* json =
"data: {\"choices\":[{\"delta\":{\"content\":\"\\uD83D\\uDE00\"},"
@@ -207,7 +227,7 @@ int main()
}
{
// Malformed JSON structure — no "delta" key
// Malformed JSON structure — no "delta" key / 畸形 JSON 结构 — 无 "delta" key
std::string token;
const char* json =
"data: {\"choices\":[{\"no_delta\":{},\"index\":0}]}";
@@ -217,6 +237,7 @@ int main()
{
// Realistic DeepSeek streaming chunk (with finish_reason)
// 真实的 DeepSeek 流式数据块(含 finish_reason
std::string token;
const char* json =
"data: {\"id\":\"chatcmpl-xxx\","
@@ -233,11 +254,13 @@ int main()
// ================================================================
// Test Block 4: parse_sse_line — tool_calls delta
// 测试块 4parse_sse_line — tool_calls delta
// ================================================================
std::cout << "\n--- Block 4: parse_sse_line tool_calls delta ---\n";
{
// tool_calls chunk with id + function name (first chunk)
// tool_calls 数据块含 id + function name首个数据块
StreamContext ctx = {};
std::string token;
const char* json =
@@ -258,8 +281,9 @@ int main()
{
// tool_calls arguments chunk (second chunk, same index)
// tool_calls arguments 数据块(第二个数据块,相同 index
StreamContext ctx;
// First, set up the initial state
// First, set up the initial state / 先设置初始状态
ctx.tool_calls.push_back({0, "call_abc123", "get_weather", ""});
std::string token;
@@ -276,7 +300,7 @@ int main()
}
{
// tool_calls final arguments chunk
// tool_calls final arguments chunk / tool_calls 最终 arguments 数据块
StreamContext ctx;
ctx.tool_calls.push_back({0, "call_abc123", "get_weather", "{\"city\":\""});
@@ -295,6 +319,7 @@ int main()
{
// tool_calls with null ctx — should skip tool_calls processing
// tool_calls 配合 null ctx — 应跳过 tool_calls 处理
std::string token;
const char* json =
"data: {\"choices\":[{\"index\":0,"
@@ -306,6 +331,7 @@ int main()
{
// Multiple tool_calls in single chunk (unusual but valid)
// 单个数据块中有多个 tool_calls不常见但合法
StreamContext ctx;
std::string token;
const char* json =
@@ -325,6 +351,7 @@ int main()
// ================================================================
// Test Block 5: build_request_json — basic cases
// 测试块 5build_request_json — 基础用例
// ================================================================
setup_config();
std::cout << "\n--- Block 5: build_request_json basic ---\n";
@@ -351,7 +378,7 @@ int main()
}
{
// With user+assistant history
// With user+assistant history / 包含 user+assistant 历史
dstalk_message_t msgs[2] = {
{"user", "What is 2+2?", nullptr, nullptr},
{"assistant", "It is 4.", nullptr, nullptr}
@@ -376,22 +403,26 @@ int main()
{
// Empty user input — no user message appended
// 空用户输入 — 不追加 user 消息
std::string json = build_request_json(
nullptr, 0, "", "", false);
CHECK(!json.empty(), "T5.13: empty user input produces valid JSON");
// DeepSeek's build_request_json checks `if (!user_input.empty())` before adding
// So there should be no user message for empty input
// DeepSeek 的 build_request_json 在添加前检查 `if (!user_input.empty())`
// 因此空输入时不应有 user 消息
CHECK(json.find("\"role\":\"user\"") == std::string::npos,
"T5.14: empty user input NOT added to messages (DeepSeek guard)");
}
// ================================================================
// Test Block 6: build_request_json — tools / edge cases
// 测试块 6build_request_json — tools / 边界情况
// ================================================================
std::cout << "\n--- Block 6: build_request_json tools / edges ---\n";
{
// With tools_json
// With tools_json / 含 tools_json
std::string tools = "[{\"type\":\"function\","
"\"function\":{\"name\":\"get_weather\","
"\"description\":\"Get current weather\","
@@ -407,7 +438,7 @@ int main()
}
{
// Empty tools_json — no tools field
// Empty tools_json — no tools field / 空 tools_json — 无 tools 字段
std::string json = build_request_json(
nullptr, 0, "Hello", "", false);
CHECK(json.find("\"tools\"") == std::string::npos,
@@ -418,6 +449,8 @@ int main()
// Malformed tools_json — build_request_json calls json::parse()
// without try/catch, so it will throw std::exception.
// This test verifies that the exception is thrown (rather than crashing).
// 畸形 tools_json — build_request_json 调用 json::parse() 不含 try/catch
// 因此会抛出 std::exception。本测试验证异常被抛出而非崩溃
bool threw = false;
try {
build_request_json(nullptr, 0, "Hello", "NOT JSON", false);
@@ -430,7 +463,7 @@ int main()
}
{
// History with null role
// History with null role / null 角色的历史
dstalk_message_t msgs[1] = {
{nullptr, "some content", nullptr, nullptr}
};
@@ -439,7 +472,7 @@ int main()
}
{
// History with null content
// History with null content / null 内容的历史
dstalk_message_t msgs[1] = {
{"user", nullptr, nullptr, nullptr}
};
@@ -448,7 +481,7 @@ int main()
}
{
// Very long message
// Very long message / 超长消息
std::string long_input(5000, 'A');
std::string json = build_request_json(
nullptr, 0, long_input, "", false);
@@ -458,6 +491,7 @@ int main()
// ================================================================
// Test Block 7: build_headers_json
// 测试块 7build_headers_json
// ================================================================
std::cout << "\n--- Block 7: build_headers_json ---\n";
@@ -470,7 +504,7 @@ int main()
}
{
// Empty API key
// Empty API key / 空 API key
std::string headers = build_headers_json("");
CHECK(headers.find("Authorization") != std::string::npos,
"T7.3: Authorization header present with empty key");
@@ -480,6 +514,7 @@ int main()
// ================================================================
// Test Block 8: extract_host_port (same logic as anthropic)
// 测试块 8extract_host_port逻辑同 anthropic
// ================================================================
std::cout << "\n--- Block 8: extract_host_port ---\n";
@@ -525,6 +560,7 @@ int main()
// ================================================================
// Test Block 9: secure_zero
// 测试块 9secure_zero
// ================================================================
std::cout << "\n--- Block 9: secure_zero ---\n";
@@ -546,6 +582,7 @@ int main()
// ================================================================
// Test Block 10: append_history
// 测试块 10append_history
// ================================================================
std::cout << "\n--- Block 10: append_history ---\n";
@@ -561,7 +598,7 @@ int main()
}
{
// Tool message (should include tool_call_id)
// Tool message (should include tool_call_id) / Tool 消息(应包含 tool_call_id
json::array msgs;
dstalk_message_t m = {"tool", "result data", "call_xyz", nullptr};
append_history(msgs, &m, 1);
@@ -575,7 +612,7 @@ int main()
}
{
// Assistant with tool_calls_json
// Assistant with tool_calls_json / Assistant 含 tool_calls_json
json::array msgs;
const char* tc_json = "[{\"id\":\"call_1\",\"type\":\"function\","
"\"function\":{\"name\":\"get_weather\",\"arguments\":\"{}\"}}]";
@@ -589,14 +626,14 @@ int main()
}
{
// Empty history (0 messages)
// Empty history (0 messages) / 空历史0 条消息)
json::array msgs;
append_history(msgs, nullptr, 0);
CHECK(msgs.size() == 0, "T10.12: empty history produces empty array");
}
{
// Multiple messages
// Multiple messages / 多条消息
json::array msgs;
dstalk_message_t ms[2] = {
{"user", "Q1", nullptr, nullptr},
@@ -608,6 +645,7 @@ int main()
{
// Null role and null content — default to empty strings
// null 角色与 null 内容 — 默认为空字符串
json::array msgs;
dstalk_message_t m = {nullptr, nullptr, nullptr, nullptr};
append_history(msgs, &m, 1);
@@ -619,11 +657,13 @@ int main()
// ================================================================
// Test Block 11: my_free_result — null safety
// 测试块 11my_free_result — 空指针安全
// ================================================================
std::cout << "\n--- Block 11: my_free_result null safety ---\n";
{
// g_host is nullptr, so free_result should early-return
// g_host 为 nullptrfree_result 应提前返回
my_free_result(nullptr);
CHECK(true, "T11.1: free_result(nullptr) does not crash (null host)");
}
@@ -637,6 +677,7 @@ int main()
// ================================================================
// Test Block 12: my_configure — null host safety
// 测试块 12my_configure — null host 安全
// ================================================================
std::cout << "\n--- Block 12: my_configure null host safety ---\n";
@@ -656,7 +697,7 @@ int main()
}
// ================================================================
// Summary
// Summary / 总结
// ================================================================
std::cout << "\n";
if (g_failures == 0) {