Add metadata validation script and module documentation

- Introduced a new Python script `check_agents_metadata.py` for validating agent metadata, including YAML parsing, rating ranges, and cross-references.
- Added usage instructions and exit codes for the script.
- Created a new markdown file `模块目录和功能说明.md` to outline the directory structure and functionality of the modules.
- Added a text file `说明此文件不可AI修改.txt` to specify that certain files should not be modified by AI, including important information about the `dstalk` framework and its modules.
This commit is contained in:
2026-05-31 00:00:58 +08:00
parent 3cc9ee95e4
commit f2da0f2ed4
43 changed files with 2467 additions and 800 deletions

View File

@@ -1,9 +1,10 @@
// ============================================================================
// context_plugin_test.cpp — 上下文插件单元测试
// ============================================================================
// W18.1 (qa-wang + architect-lin): 覆盖 token 计数、trim、UTF-8 边界、
// 0xC0/0xC1 过短编码检测。修复 F-11.1-3/4/5/6 后补充测试
// ============================================================================
/*
* @file context_plugin_test.cpp
* @brief Context plugin unit tests: token counting (ASCII, CJK, mixed, emoji),
* UTF-8 truncation safety, trim edge cases, and system message preservation.
* Context 插件单元测试token 计数ASCII、CJK、混合、emoji、UTF-8 截断安全、trim 边界情况、系统消息保留
* Copyright (c) 2026 dstalk contributors. GPLv3.
*/
#include "dstalk/dstalk_host.h"
@@ -14,6 +15,7 @@
#include <string>
static int g_failures = 0;
// Lightweight assertion macro: increments g_failures counter on failure
#define CHECK(cond, msg) do { \
if (cond) { \
std::cout << "[OK] " << (msg) << "\n"; \
@@ -23,6 +25,12 @@ static int g_failures = 0;
} \
} while (0)
// Context 插件测试token 计数边界null、空、ASCII、CJK、混合、截断 UTF-8 边界保护 (F-11.1-4)、
// 0xC0/0xC1 超长编码 (F-11.1-6)、多消息 token、trim 的各种场景、系统消息保留、4 字节 emoji、孤立的续字节。
// Context plugin tests: token counting edge cases (null, empty, ASCII, CJK, mixed),
// truncated UTF-8 bounds protection (F-11.1-4), 0xC0/0xC1 overlong encoding (F-11.1-6),
// multiple-message tokens, trim null/edge/within-limit/exceeds-limit scenarios,
// system message preservation, 4-byte emoji, and lone continuation bytes.
int main()
{
const auto dir = std::filesystem::temp_directory_path() / "dstalk-ctx-test";
@@ -54,6 +62,7 @@ int main()
// ================================================================
// Test Block 1: count_tokens edge cases (null / empty)
// 测试块 1count_tokens 边界情况null / 空)
// ================================================================
std::cout << "\n--- Block 1: count_tokens edge cases ---\n";
@@ -77,6 +86,7 @@ int main()
// ================================================================
// Test Block 2: count_tokens — ASCII
// 测试块 2count_tokens — ASCII
// ================================================================
std::cout << "\n--- Block 2: count_tokens ASCII ---\n";
@@ -107,6 +117,7 @@ int main()
// ================================================================
// Test Block 3: count_tokens — Chinese (CJK U+4E00-U+9FFF)
// 测试块 3count_tokens — 中文 (CJK U+4E00-U+9FFF)
// ================================================================
std::cout << "\n--- Block 3: count_tokens Chinese (CJK) ---\n";
@@ -132,6 +143,7 @@ int main()
// ================================================================
// Test Block 4: count_tokens — Mixed content
// 测试块 4count_tokens — 混合内容
// ================================================================
std::cout << "\n--- Block 4: count_tokens mixed content ---\n";
@@ -146,6 +158,7 @@ int main()
// ================================================================
// Test Block 5: Truncated UTF-8 bounds protection (F-11.1-4)
// 测试块 5截断 UTF-8 边界保护 (F-11.1-4)
// ================================================================
std::cout << "\n--- Block 5: Truncated UTF-8 (F-11.1-4 fix) ---\n";
@@ -197,6 +210,7 @@ int main()
// ================================================================
// Test Block 6: 0xC0/0xC1 overlong encoding (F-11.1-6)
// 测试块 60xC0/0xC1 超长编码 (F-11.1-6)
// ================================================================
std::cout << "\n--- Block 6: 0xC0/0xC1 overlong encoding (F-11.1-6 fix) ---\n";
@@ -230,6 +244,7 @@ int main()
{
// Verify 0xC0/0xC1 are NOT treated as valid 2-byte sequences
// They should each count as 1 other_char, not as 2-byte sequence
// 验证 0xC0/0xC1 不被视为合法的 2 字节序列 / 它们每个应计为 1 个 other_char而非 2 字节序列
// 0xC0 + 0xC1 + 2 ASCII = 2 other + 2 ascii
// = (2/3) + (2/4) + 4 overhead = 0 + 0 + 4 = 4
// Actually 2/4 = 0 (integer division) for ascii, 2/3 = 0 for other
@@ -244,6 +259,7 @@ int main()
// ================================================================
// Test Block 7: count_tokens — multiple messages
// 测试块 7count_tokens — 多消息
// ================================================================
std::cout << "\n--- Block 7: multiple messages ---\n";
@@ -275,6 +291,7 @@ int main()
// ================================================================
// Test Block 8: trim — null and edge cases
// 测试块 8trim — null 和边界情况
// ================================================================
std::cout << "\n--- Block 8: trim edge cases ---\n";
@@ -291,6 +308,7 @@ int main()
// ================================================================
// Test Block 9: trim — within limit (no trimming needed)
// 测试块 9trim — 预算内(无需裁剪)
// ================================================================
std::cout << "\n--- Block 9: trim within limit ---\n";
@@ -320,6 +338,7 @@ int main()
// ================================================================
// Test Block 10: trim — exceeds limit (trimming required)
// 测试块 10trim — 超预算(需要裁剪)
// ================================================================
std::cout << "\n--- Block 10: trim exceeds limit ---\n";
@@ -358,6 +377,7 @@ int main()
// ================================================================
// Test Block 11: trim — system message preservation
// 测试块 11trim — 系统消息保留
// ================================================================
std::cout << "\n--- Block 11: trim preserves system messages ---\n";
@@ -387,11 +407,12 @@ int main()
// ================================================================
// Test Block 12: count_tokens — 4-byte UTF-8 (emoji / supplementary)
// 测试块 12count_tokens — 4 字节 UTF-8emoji / 补充平面)
// ================================================================
std::cout << "\n--- Block 12: 4-byte UTF-8 ---\n";
{
// U+1F600 (😀) = F0 9F 98 80
// U+1F600 (<EFBFBD><EFBFBD>) = F0 9F 98 80
char buf[6] = {static_cast<char>(0xF0), static_cast<char>(0x9F),
static_cast<char>(0x98), static_cast<char>(0x80), '\0'};
dstalk_message_t msg = {"user", buf, nullptr, nullptr};
@@ -403,6 +424,7 @@ int main()
// ================================================================
// Test Block 13: count_tokens — continuation bytes as lone chars
// 测试块 13count_tokens — 孤立的续字节
// ================================================================
std::cout << "\n--- Block 13: lone continuation bytes ---\n";