W21: anthropic Stream+Tools + --prompt batch + sanitizer fix + plugin unit tests (W21.1-W21.6)
Some checks failed
CI / Determine matrix (push) Has been cancelled
CI / ${{ matrix.os }} / ${{ matrix.build_type }} (push) Has been cancelled
CI / Sanitizer (ASan+UBSan) / ubuntu-24.04 (push) Has been cancelled

- W21.1: ci-sanitize preset 独立 Linux-clang + ci-threadsan (TSan)
- W21.2: anthropic tool_use content_block 解析 + configure 缓存 tools_json
- W21.3: --prompt 非交互批处理模式
- W21.4: session auto-save 失败告警 + 当前目录 fallback
- W21.5: smoke 补 tool_calls 边界用例 4 块 12 断言
- W21.6: anthropic 11 块 78 CHECK + deepseek 12 块 78 CHECK

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-27 20:40:58 +08:00
parent 20ead86e88
commit b2b381b9b3
14 changed files with 1671 additions and 34 deletions

View File

@@ -6,6 +6,7 @@
#include <atomic>
#include <cstring>
#include <string>
#include <vector>
namespace json = boost::json;
@@ -28,6 +29,7 @@ struct PluginConfig {
double temperature = 0.7;
};
static PluginConfig g_cfg;
static std::string g_tools_json; // W21.2: cached by configure(), consumed by chat/chat_stream
// ============================================================================
// 安全擦除:用 volatile 写零循环防止编译器优化
@@ -79,6 +81,7 @@ static std::string build_headers_json()
static std::string build_request_json(
const dstalk_message_t* history, int history_len,
const std::string& user_input,
const std::string& tools_json,
bool stream)
{
json::object root;
@@ -121,6 +124,11 @@ static std::string build_request_json(
root["temperature"] = g_cfg.temperature;
}
// W21.2: tools 定义传递给 API
if (!tools_json.empty()) {
root["tools"] = json::parse(tools_json);
}
return json::serialize(root);
}
@@ -161,21 +169,51 @@ static void parse_response(const char* body, int http_status,
auto obj = jv.as_object();
auto content = obj["content"].as_array();
if (!content.empty()) {
// 取第一个 text block
// W21.2: 提取 text 和 tool_use content blocks
std::string text_content;
json::array tool_use_blocks;
for (const auto& block : content) {
auto bobj = block.as_object();
if (bobj.contains("type") &&
json::value_to<std::string>(bobj["type"]) == "text") {
std::string text = json::value_to<std::string>(bobj["text"]);
r.content = h->strdup(text.c_str());
r.ok = 1;
r.error = nullptr;
r.tool_calls_json = nullptr;
return;
if (!bobj.contains("type")) continue;
std::string btype = json::value_to<std::string>(bobj["type"]);
if (btype == "text") {
text_content = json::value_to<std::string>(bobj["text"]);
} else if (btype == "tool_use") {
// 转换为 OpenAI 兼容格式: {id, type:"function", function:{name, arguments}}
json::object tc;
tc["id"] = bobj["id"];
tc["type"] = "function";
json::object func;
func["name"] = bobj["name"];
func["arguments"] = json::serialize(bobj["input"]);
tc["function"] = func;
tool_use_blocks.push_back(std::move(tc));
}
}
if (!tool_use_blocks.empty()) {
r.tool_calls_json = h->strdup(
json::serialize(tool_use_blocks).c_str());
} else {
r.tool_calls_json = nullptr;
}
if (!text_content.empty()) {
r.content = h->strdup(text_content.c_str());
r.ok = 1;
r.error = nullptr;
return;
} else if (!tool_use_blocks.empty()) {
// tool-only 响应
r.content = nullptr;
r.ok = 1;
r.error = nullptr;
return;
}
r.ok = 0;
r.error = h->strdup("no text content block found");
r.error = h->strdup("no text or tool_use content block found");
} else {
r.ok = 0;
r.error = h->strdup("empty response");
@@ -200,9 +238,26 @@ static void parse_response(const char* body, int http_status,
// SSE 事件解析Anthropic 格式: event/content_block_delta)
// ============================================================================
// 状态机:记录当前正在处理的事件类型
// 简化版:直接从 data: 行解析,不依赖 event: 行
static bool parse_sse_data(const std::string& data, std::string& token_out)
// W21.2: 按 content_block index 累积 Anthropic tool_use 增量
struct ToolCallAccum {
int index = -1;
std::string id;
std::string name;
std::string arguments; // 从 input_json_delta.partial_json 累积
};
struct StreamContext {
const dstalk_host_api_t* host;
dstalk_stream_cb user_cb;
void* userdata;
std::string accumulated;
bool saw_data_line = false;
std::vector<ToolCallAccum> tool_calls; // W21.2: 按 index 累积 tool_use content blocks
};
// W21.2: 解析 Anthropic SSE 事件,含 tool_use content_block 增量解析
static bool parse_sse_data(const std::string& data, std::string& token_out,
StreamContext* ctx)
{
try {
auto jv = json::parse(data);
@@ -212,6 +267,34 @@ static bool parse_sse_data(const std::string& data, std::string& token_out)
if (!type_ptr || !type_ptr->is_string()) return false;
std::string type = json::value_to<std::string>(*type_ptr);
if (type == "content_block_start") {
// content_block_start 可能为 tool_use
auto* cb = obj.if_contains("content_block");
if (!cb || !cb->is_object()) return false;
auto& cb_obj = cb->as_object();
auto* cb_type = cb_obj.if_contains("type");
if (!cb_type || !cb_type->is_string()) return false;
std::string cb_type_str = json::value_to<std::string>(*cb_type);
if (cb_type_str == "tool_use" && ctx) {
auto* idx_ptr = obj.if_contains("index");
int idx = idx_ptr ? static_cast<int>(
json::value_to<int64_t>(*idx_ptr)) : -1;
if (idx < 0) return false;
while (static_cast<int>(ctx->tool_calls.size()) <= idx) {
ctx->tool_calls.push_back({});
}
auto& acc = ctx->tool_calls[idx];
acc.index = idx;
if (cb_obj.contains("id") && cb_obj["id"].is_string())
acc.id = json::value_to<std::string>(cb_obj["id"]);
if (cb_obj.contains("name") && cb_obj["name"].is_string())
acc.name = json::value_to<std::string>(cb_obj["name"]);
}
return false;
}
if (type == "content_block_delta") {
auto* delta = obj.if_contains("delta");
if (!delta || !delta->is_object()) return false;
@@ -227,12 +310,25 @@ static bool parse_sse_data(const std::string& data, std::string& token_out)
token_out = json::value_to<std::string>(*text);
return true;
}
} else if (delta_type == "input_json_delta" && ctx) {
// W21.2: 累积 tool_use arguments 分片
auto* pj = dobj.if_contains("partial_json");
if (pj && pj->is_string()) {
auto* idx_ptr = obj.if_contains("index");
int idx = idx_ptr ? static_cast<int>(
json::value_to<int64_t>(*idx_ptr)) : -1;
if (idx >= 0 && idx < static_cast<int>(ctx->tool_calls.size())) {
ctx->tool_calls[idx].arguments +=
json::value_to<std::string>(*pj);
}
}
return false;
}
} else if (type == "message_stop") {
token_out.clear();
return true; // 流结束
}
// 忽略: message_start, content_block_start, content_block_stop, ping, message_delta
// 忽略: message_start, content_block_stop, ping, message_delta
} catch (...) {
// 解析失败忽略
}
@@ -256,6 +352,17 @@ static int my_configure(const char* provider, const char* base_url,
const auto* h = g_host.load(std::memory_order_acquire);
if (h) {
// W21.2: 从 tools service 缓存 tools_json供 chat/chat_stream 复用
auto* tools_svc = reinterpret_cast<const dstalk_tools_service_t*>(
h->query_service("tools", 1));
if (tools_svc && tools_svc->get_tools_json) {
char* json = tools_svc->get_tools_json();
if (json) {
g_tools_json = json;
h->free(json);
}
}
h->log(DSTALK_LOG_INFO,
"[anthropic] configured: model=%s base_url=%s max_tokens=%d temperature=%.2f",
g_cfg.model.c_str(), g_cfg.base_url.c_str(),
@@ -279,7 +386,7 @@ static int my_configure(const char* provider, const char* base_url,
static dstalk_chat_result_t my_chat(
const dstalk_message_t* history, int history_len,
const char* user_input,
const char* /*tools_json*/)
const char* tools_json)
{
try {
dstalk_chat_result_t r = {};
@@ -298,7 +405,8 @@ static dstalk_chat_result_t my_chat(
std::string target_path = target + "/v1/messages";
std::string body = build_request_json(history, history_len,
user_input ? user_input : "", false);
user_input ? user_input : "",
tools_json ? tools_json : g_tools_json, false);
std::string headers_json = build_headers_json();
@@ -342,14 +450,6 @@ static dstalk_chat_result_t my_chat(
// chat_stream
// ============================================================================
struct StreamContext {
const dstalk_host_api_t* host;
dstalk_stream_cb user_cb;
void* userdata;
std::string accumulated;
bool saw_data_line = false;
};
// 行回调
static int sse_line_callback(const char* line, void* userdata)
{
@@ -363,7 +463,7 @@ static int sse_line_callback(const char* line, void* userdata)
if (line_str.rfind("data: ", 0) == 0) {
std::string data = line_str.substr(6);
std::string token;
if (parse_sse_data(data, token)) {
if (parse_sse_data(data, token, ctx)) {
ctx->saw_data_line = true;
if (token.empty()) {
// message_stop
@@ -410,7 +510,7 @@ static dstalk_chat_result_t my_chat_stream(
std::string target_path = target + "/v1/messages";
std::string body = build_request_json(history, history_len,
user_input ? user_input : "", true);
user_input ? user_input : "", g_tools_json, true);
std::string headers_json = build_headers_json();
@@ -460,7 +560,11 @@ static dstalk_chat_result_t my_chat_stream(
if (response_body) host->free(response_body);
if (ctx.accumulated.empty() && !ctx.saw_data_line) {
// W21.2: 成功条件 = 有内容 OR 有 tool_callstool-only 响应如 function calling
bool has_content = !ctx.accumulated.empty();
bool has_tool_calls = !ctx.tool_calls.empty();
if (!has_content && !has_tool_calls) {
r.ok = 0;
r.error = host->strdup("no content received");
r.content = nullptr;
@@ -468,8 +572,28 @@ static dstalk_chat_result_t my_chat_stream(
} else {
r.ok = 1;
r.error = nullptr;
r.content = host->strdup(ctx.accumulated.c_str());
r.tool_calls_json = nullptr;
r.content = has_content
? host->strdup(ctx.accumulated.c_str()) : nullptr;
// W21.2: 序列化累积的 tool_calls 为 JSON兼容 OpenAI tool_calls 格式)
if (has_tool_calls) {
json::array tc_array;
for (auto& tc : ctx.tool_calls) {
json::object tc_obj;
tc_obj["index"] = tc.index;
if (!tc.id.empty()) tc_obj["id"] = tc.id;
tc_obj["type"] = "function";
json::object func;
if (!tc.name.empty()) func["name"] = tc.name;
func["arguments"] = tc.arguments;
tc_obj["function"] = func;
tc_array.push_back(std::move(tc_obj));
}
std::string tc_json = json::serialize(tc_array);
r.tool_calls_json = host ? host->strdup(tc_json.c_str()) : nullptr;
} else {
r.tool_calls_json = nullptr;
}
}
return r;
} catch (const std::exception& e) {