Wave 9: fix audit findings, harden ABI, deduplicate config (W12.1-W12.6)
Some checks failed
CI / Determine matrix (push) Has been cancelled
CI / ${{ matrix.os }} / ${{ matrix.build_type }} (push) Has been cancelled

- W12.1 context_plugin (engineer-zhou): wrap C ABI surface in try/catch,
  add OOM-safe strdup_message_fields helper, make g_max_tokens drive
  message-count trim (option A).
- W12.2 config refactor (architect-lin): introduce
  plugins/config/include/toml_parse.h to eliminate 74-line parser
  duplication; config_plugin delegates to host->config_get/set,
  collapsing the dual-store data island; ConfigStore::get() now copies
  via thread_local std::string to remove c_str() dangling under
  concurrent set(). Zero ABI changes.
- W12.3 CLI command parsing (engineer-zhao): guard /clear and /context
  on missing session service; refactor /file dispatch so bare
  /file write hits usage instead of unknown-command.
- W12.4 build path unification (devops-hu): set per-target
  RUNTIME_OUTPUT_DIRECTORY on dstalk-cli; remove stale
  build/dstalk-cli/dstalk-cli.exe so build/bin/ is the sole binary.
- W12.5 STATUS.md auto-refresh (engineer-li): run W11.6 script to
  regenerate STATUS from live profile/group data.
- W12.6 plugin-abi.md (writer-deng): add §8 exception safety across
  ABI boundary and §9 string return lifetime; reference real
  audit-found violations as anti-examples.

Verified: cmake build 0 error 0 warning, ctest 4/4 pass.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2026-05-27 09:19:17 +08:00
parent bb2e8c0220
commit 58869abc15
15 changed files with 750 additions and 332 deletions

View File

@@ -0,0 +1,67 @@
#pragma once
// Shared TOML parser — used by both ConfigStore (core) and config plugin.
// W12.2: Extracted from config_store.cpp:23-61 and config_plugin.cpp:28-66
// to eliminate the 74-line code duplication (W11.2 audit Finding 1).
// Does NOT support: inline tables, arrays, multi-line strings, escape sequences.
#include <string>
namespace dstalk {
namespace toml {
/// Parse a TOML string, calling on_kv(full_key, value) for each key-value pair.
/// Supports [section] headers, key = "value" pairs, # comments, blank lines.
template<typename F>
inline void parse(const std::string& content, F&& on_kv)
{
std::string current_section;
size_t pos = 0;
while (pos < content.size()) {
// Trim left whitespace
while (pos < content.size() && (content[pos] == ' ' || content[pos] == '\t'))
pos++;
if (pos >= content.size()) break;
// Extract next line
size_t nl = content.find('\n', pos);
std::string line = (nl != std::string::npos)
? content.substr(pos, nl - pos) : content.substr(pos);
pos = (nl != std::string::npos) ? nl + 1 : content.size();
// Trim right whitespace (including \r)
while (!line.empty() && (line.back() == '\r' || line.back() == ' '))
line.pop_back();
// Skip empty lines and comments
if (line.empty() || line[0] == '#') continue;
// Section header: [section_name]
if (line[0] == '[' && line.back() == ']') {
current_section = line.substr(1, line.size() - 2);
continue;
}
// Key = value
size_t eq = line.find('=');
if (eq == std::string::npos) continue;
std::string key = line.substr(0, eq);
while (!key.empty() && key.back() == ' ') key.pop_back();
if (key.empty()) continue;
std::string val = line.substr(eq + 1);
while (!val.empty() && (val.front() == ' ' || val.front() == '\t'))
val.erase(0, 1);
if (val.size() >= 2 && val.front() == '"' && val.back() == '"')
val = val.substr(1, val.size() - 2);
std::string full_key = current_section.empty()
? key : current_section + "." + key;
on_kv(full_key, val);
}
}
} // namespace toml
} // namespace dstalk

View File

@@ -1,115 +1,54 @@
#include "dstalk/dstalk_host.h"
#include "dstalk/dstalk_services.h"
#include "../include/toml_parse.h"
#include <string>
#include <unordered_map>
#include <mutex>
#include <fstream>
#include <sstream>
#include <cstdio>
// ============================================================
// ConfigStore - independent TOML key-value store
// ============================================================
namespace {
class ConfigStore {
public:
int load_file(const char* path) {
if (!path) return -1;
std::ifstream file(path);
if (!file.is_open()) return -1;
std::stringstream ss;
ss << file.rdbuf();
std::string data = ss.str();
std::string current_section;
size_t pos = 0;
while (pos < data.size()) {
while (pos < data.size() && (data[pos] == ' ' || data[pos] == '\t'))
pos++;
if (pos >= data.size()) break;
size_t nl = data.find('\n', pos);
std::string line = (nl != std::string::npos)
? data.substr(pos, nl - pos) : data.substr(pos);
pos = (nl != std::string::npos) ? nl + 1 : data.size();
while (!line.empty() && (line.back() == '\r' || line.back() == ' '))
line.pop_back();
if (line.empty() || line[0] == '#') continue;
if (line[0] == '[' && line.back() == ']') {
current_section = line.substr(1, line.size() - 2);
continue;
}
size_t eq = line.find('=');
if (eq == std::string::npos) continue;
std::string key = line.substr(0, eq);
while (!key.empty() && key.back() == ' ') key.pop_back();
if (key.empty()) continue;
std::string val = line.substr(eq + 1);
while (!val.empty() && (val.front() == ' ' || val.front() == '\t'))
val.erase(0, 1);
if (val.size() >= 2 && val.front() == '"' && val.back() == '"')
val = val.substr(1, val.size() - 2);
std::lock_guard<std::mutex> lock(mutex_);
std::string full_key = current_section.empty()
? key : current_section + "." + key;
data_[full_key] = val;
}
return 0;
}
const char* get(const char* key) const {
if (!key) return nullptr;
std::lock_guard<std::mutex> lock(mutex_);
auto it = data_.find(key);
if (it == data_.end()) return nullptr;
return it->second.c_str();
}
int set(const char* key, const char* value) {
if (!key || !value) return -1;
std::lock_guard<std::mutex> lock(mutex_);
data_[key] = value;
return 0;
}
private:
mutable std::mutex mutex_;
std::unordered_map<std::string, std::string> data_;
};
} // anonymous namespace
// ============================================================
// Global state
// ============================================================
static const dstalk_host_api_t* g_host = nullptr;
static ConfigStore g_config;
// ============================================================
// Service implementations
//
// W12.2: Eliminated private ConfigStore (was 90 lines duplicating core).
// All get/set/load_file now delegate to the host store via g_host->config_get
// and g_host->config_set, making the host store the single source of truth.
// TOML parsing uses the shared dstalk::toml::parse() from toml_parse.h.
// ============================================================
static const char* config_get(const char* key) {
return g_config.get(key);
if (!g_host) return nullptr;
return g_host->config_get(key);
}
static int config_set(const char* key, const char* value) {
return g_config.set(key, value);
if (!g_host) return -1;
return g_host->config_set(key, value);
}
static int config_load_file(const char* path) {
return g_config.load_file(path);
if (!g_host || !path) return -1;
std::ifstream file(path);
if (!file.is_open()) return -1;
std::stringstream ss;
ss << file.rdbuf();
std::string data = ss.str();
int count = 0;
dstalk::toml::parse(data, [&](const std::string& key, const std::string& value) {
g_host->config_set(key.c_str(), value.c_str());
++count;
});
g_host->log(DSTALK_LOG_INFO,
"config: loaded %d entries from %s into host store", count, path);
return 0;
}
static dstalk_config_service_t g_service = {
@@ -123,17 +62,28 @@ static dstalk_config_service_t g_service = {
// ============================================================
static int on_init(const dstalk_host_api_t* host) {
g_host = host;
return host->register_service("config", 1, &g_service);
// W12.2: This service is now a thin wrapper around host->config_get/set.
// Direct host API calls are preferred.
host->log(DSTALK_LOG_INFO,
"plugin config service is deprecated, prefer host->config_get/set");
int rc = host->register_service("config", 1, &g_service);
if (rc != 0) {
host->log(DSTALK_LOG_WARN,
"config: register_service failed (rc=%d), service name may conflict", rc);
}
return (rc >= 0) ? 0 : -1;
}
static void on_shutdown() {
// nothing to clean up
// W12.2: No local store to clean up — all data lives in host store.
}
static dstalk_plugin_info_t g_info = {
"config", // name
"1.0.0", // version
"Configuration service with TOML file support", // description
"Configuration service with TOML file support (deprecated: use host->config_get/set)",
DSTALK_API_VERSION, // api_version
{nullptr}, // dependencies (none)
on_init, // on_init

View File

@@ -9,6 +9,7 @@
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <exception>
#include <string>
#include <vector>
@@ -111,133 +112,207 @@ static size_t count_tokens_trim_vec(const std::vector<TrimMessage>& msgs) {
return total;
}
// 释放单条消息中所有已分配的字符串字段(用于 OOM 回滚)
static void free_msg_strs(dstalk_message_t* msg) {
if (msg->role) { g_host->free((void*)msg->role); msg->role = nullptr; }
if (msg->content) { g_host->free((void*)msg->content); msg->content = nullptr; }
if (msg->tool_call_id) { g_host->free((void*)msg->tool_call_id); msg->tool_call_id = nullptr; }
if (msg->tool_calls_json) { g_host->free((void*)msg->tool_calls_json); msg->tool_calls_json = nullptr; }
}
// 将 TrimMessage 的字符串字段通过 g_host->strdup 复制到 dstalk_message_t。
// 成功返回 0OOM 时释放当前消息已分配字段并返回 -1。
static int strdup_message_fields(dstalk_message_t* dst, const TrimMessage& src) {
memset(dst, 0, sizeof(dstalk_message_t));
if (!src.role.empty()) {
dst->role = g_host->strdup(src.role.c_str());
if (!dst->role) goto oom;
}
if (!src.content.empty()) {
dst->content = g_host->strdup(src.content.c_str());
if (!dst->content) goto oom;
}
if (!src.tool_call_id.empty()) {
dst->tool_call_id = g_host->strdup(src.tool_call_id.c_str());
if (!dst->tool_call_id) goto oom;
}
if (!src.tool_calls_json.empty()) {
dst->tool_calls_json = g_host->strdup(src.tool_calls_json.c_str());
if (!dst->tool_calls_json) goto oom;
}
return 0;
oom:
free_msg_strs(dst);
return -1;
}
// W12.1 修复trim_impl 包裹 try/catch 防止 C++ 异常穿越 ABI 边界 (§5.3)
static int trim_impl(const dstalk_message_t* in, int in_count,
dstalk_message_t** out, int* out_count,
size_t max_tokens) {
if (!in || in_count <= 0 || !out || !out_count) return -1;
try {
if (!in || in_count <= 0 || !out || !out_count) return -1;
// 将 C 数组转换为内部 vector
std::vector<TrimMessage> messages;
messages.reserve(in_count);
for (int i = 0; i < in_count; ++i) {
TrimMessage tm;
if (in[i].role) tm.role = in[i].role;
if (in[i].content) tm.content = in[i].content;
if (in[i].tool_call_id) tm.tool_call_id = in[i].tool_call_id;
if (in[i].tool_calls_json) tm.tool_calls_json = in[i].tool_calls_json;
messages.push_back(std::move(tm));
}
// W12.1: 调用方传 0 时使用 g_max_tokens 作为默认限制
if (max_tokens == 0) max_tokens = g_max_tokens;
// 如果已在限制内,直接返回完整副本
size_t current = count_tokens_trim_vec(messages);
if (current <= max_tokens) {
*out_count = in_count;
*out = static_cast<dstalk_message_t*>(g_host->alloc(sizeof(dstalk_message_t) * in_count));
if (!*out) return -1;
// 将 C 数组转换为内部 vector
std::vector<TrimMessage> messages;
messages.reserve(in_count);
for (int i = 0; i < in_count; ++i) {
(*out)[i].role = messages[i].role.empty() ? nullptr : g_host->strdup(messages[i].role.c_str());
(*out)[i].content = messages[i].content.empty() ? nullptr : g_host->strdup(messages[i].content.c_str());
(*out)[i].tool_call_id = messages[i].tool_call_id.empty() ? nullptr : g_host->strdup(messages[i].tool_call_id.c_str());
(*out)[i].tool_calls_json = messages[i].tool_calls_json.empty() ? nullptr : g_host->strdup(messages[i].tool_calls_json.c_str());
TrimMessage tm;
if (in[i].role) tm.role = in[i].role;
if (in[i].content) tm.content = in[i].content;
if (in[i].tool_call_id) tm.tool_call_id = in[i].tool_call_id;
if (in[i].tool_calls_json) tm.tool_calls_json = in[i].tool_calls_json;
messages.push_back(std::move(tm));
}
// 如果已在限制内,直接返回完整副本
size_t current = count_tokens_trim_vec(messages);
if (current <= max_tokens) {
*out_count = in_count;
*out = static_cast<dstalk_message_t*>(g_host->alloc(sizeof(dstalk_message_t) * in_count));
if (!*out) return -1;
// W12.1: strdup 返回值逐一检查OOM 时回滚已分配消息
for (int i = 0; i < in_count; ++i) {
if (strdup_message_fields(&(*out)[i], messages[i]) != 0) {
for (int j = 0; j < i; ++j) free_msg_strs(&(*out)[j]);
g_host->free(*out);
*out = nullptr;
return -1;
}
}
return 0;
}
// 分离 system 消息和非 system 消息
std::vector<TrimMessage> system_msgs;
std::vector<TrimMessage> non_system_msgs;
for (const auto& msg : messages) {
if (msg.role == "system") {
system_msgs.push_back(msg);
} else {
non_system_msgs.push_back(msg);
}
}
size_t system_tokens = count_tokens_trim_vec(system_msgs);
if (system_tokens > max_tokens) {
std::fprintf(stderr, "[context] WARNING: system messages alone "
"(%zu tokens) exceed max_context_tokens (%zu)\n",
system_tokens, max_tokens);
}
// 检查是否有单条消息超过限制
for (const auto& msg : non_system_msgs) {
size_t msg_tokens = count_tokens_trim(msg);
if (msg_tokens > max_tokens) {
std::fprintf(stderr, "[context] WARNING: single message "
"(%s, %zu tokens) exceeds max_context_tokens (%zu). "
"Returning empty list.\n",
msg.role.c_str(), msg_tokens, max_tokens);
*out = nullptr;
*out_count = 0;
return -1;
}
}
// 从最早的非 system 消息开始裁剪,确保 user/assistant 成对移除
while (!non_system_msgs.empty()) {
current = system_tokens + count_tokens_trim_vec(non_system_msgs);
if (current <= max_tokens) break;
// 找第一个 "user" 消息
auto user_it = non_system_msgs.begin();
while (user_it != non_system_msgs.end() && user_it->role != "user") {
++user_it;
}
if (user_it == non_system_msgs.end()) break;
// 找下一个 "assistant"
auto assistant_it = user_it + 1;
while (assistant_it != non_system_msgs.end() && assistant_it->role != "assistant") {
++assistant_it;
}
if (assistant_it == non_system_msgs.end()) {
non_system_msgs.erase(user_it);
} else {
// 先删 assistant 再删 user 避免迭代器失效
non_system_msgs.erase(assistant_it);
user_it = non_system_msgs.begin();
while (user_it != non_system_msgs.end() && user_it->role != "user") ++user_it;
if (user_it != non_system_msgs.end()) non_system_msgs.erase(user_it);
}
}
// W12.1: 消息数量上限粗略估算(每消息 ~100 token利用 g_max_tokens 防止消息泛滥
{
size_t max_msg_count = (g_max_tokens + 99) / 100; // ceil(g_max_tokens / 100)
if (max_msg_count < 1) max_msg_count = 1;
while (non_system_msgs.size() > max_msg_count) {
non_system_msgs.erase(non_system_msgs.begin());
}
}
// 组装结果
std::vector<TrimMessage> result;
result.reserve(system_msgs.size() + non_system_msgs.size());
result.insert(result.end(), system_msgs.begin(), system_msgs.end());
result.insert(result.end(), non_system_msgs.begin(), non_system_msgs.end());
int result_count = static_cast<int>(result.size());
*out_count = result_count;
*out = static_cast<dstalk_message_t*>(g_host->alloc(sizeof(dstalk_message_t) * result_count));
if (!*out) return -1;
// W12.1: strdup 返回值逐一检查OOM 时回滚已分配消息
for (int i = 0; i < result_count; ++i) {
if (strdup_message_fields(&(*out)[i], result[i]) != 0) {
for (int j = 0; j < i; ++j) free_msg_strs(&(*out)[j]);
g_host->free(*out);
*out = nullptr;
return -1;
}
}
return 0;
} catch (const std::exception& e) {
// W12.1: 防止 std::bad_alloc 等 C++ 异常穿越 C ABI 边界 → std::terminate()
if (g_host) g_host->log(DSTALK_LOG_ERROR, "[context] trim_impl exception: %s", e.what());
return -1;
} catch (...) {
if (g_host) g_host->log(DSTALK_LOG_ERROR, "[context] trim_impl unknown exception");
return -1;
}
// 分离 system 消息和非 system 消息
std::vector<TrimMessage> system_msgs;
std::vector<TrimMessage> non_system_msgs;
for (const auto& msg : messages) {
if (msg.role == "system") {
system_msgs.push_back(msg);
} else {
non_system_msgs.push_back(msg);
}
}
size_t system_tokens = count_tokens_trim_vec(system_msgs);
if (system_tokens > max_tokens) {
std::fprintf(stderr, "[context] WARNING: system messages alone "
"(%zu tokens) exceed max_context_tokens (%zu)\n",
system_tokens, max_tokens);
}
// 检查是否有单条消息超过限制
for (const auto& msg : non_system_msgs) {
size_t msg_tokens = count_tokens_trim(msg);
if (msg_tokens > max_tokens) {
std::fprintf(stderr, "[context] WARNING: single message "
"(%s, %zu tokens) exceeds max_context_tokens (%zu). "
"Returning empty list.\n",
msg.role.c_str(), msg_tokens, max_tokens);
*out = nullptr;
*out_count = 0;
return -1;
}
}
// 从最早的非 system 消息开始裁剪,确保 user/assistant 成对移除
while (!non_system_msgs.empty()) {
current = system_tokens + count_tokens_trim_vec(non_system_msgs);
if (current <= max_tokens) break;
// 找第一个 "user" 消息
auto user_it = non_system_msgs.begin();
while (user_it != non_system_msgs.end() && user_it->role != "user") {
++user_it;
}
if (user_it == non_system_msgs.end()) break;
// 找下一个 "assistant"
auto assistant_it = user_it + 1;
while (assistant_it != non_system_msgs.end() && assistant_it->role != "assistant") {
++assistant_it;
}
if (assistant_it == non_system_msgs.end()) {
non_system_msgs.erase(user_it);
} else {
// 先删 assistant 再删 user 避免迭代器失效
non_system_msgs.erase(assistant_it);
user_it = non_system_msgs.begin();
while (user_it != non_system_msgs.end() && user_it->role != "user") ++user_it;
if (user_it != non_system_msgs.end()) non_system_msgs.erase(user_it);
}
}
// 组装结果
std::vector<TrimMessage> result;
result.reserve(system_msgs.size() + non_system_msgs.size());
result.insert(result.end(), system_msgs.begin(), system_msgs.end());
result.insert(result.end(), non_system_msgs.begin(), non_system_msgs.end());
int result_count = static_cast<int>(result.size());
*out_count = result_count;
*out = static_cast<dstalk_message_t*>(g_host->alloc(sizeof(dstalk_message_t) * result_count));
if (!*out) return -1;
for (int i = 0; i < result_count; ++i) {
(*out)[i].role = result[i].role.empty() ? nullptr : g_host->strdup(result[i].role.c_str());
(*out)[i].content = result[i].content.empty() ? nullptr : g_host->strdup(result[i].content.c_str());
(*out)[i].tool_call_id = result[i].tool_call_id.empty() ? nullptr : g_host->strdup(result[i].tool_call_id.c_str());
(*out)[i].tool_calls_json = result[i].tool_calls_json.empty() ? nullptr : g_host->strdup(result[i].tool_calls_json.c_str());
}
return 0;
}
// ============================================================
// Context 服务 vtable 实现
// ============================================================
// W12.1: 包裹 try/catch 防止异常穿越 C ABI 边界 → std::terminate()
static size_t context_count_tokens(const dstalk_message_t* msgs, int count) {
if (!msgs || count <= 0) return 0;
return count_tokens_all(msgs, count);
try {
if (!msgs || count <= 0) return 0;
return count_tokens_all(msgs, count);
} catch (...) {
return 0;
}
}
// W12.1: 包裹 try/catch 防止异常穿越 C ABI 边界
static int context_trim(const dstalk_message_t* in, int in_count,
dstalk_message_t** out, int* out_count,
size_t max_tokens) {
return trim_impl(in, in_count, out, out_count, max_tokens);
try {
return trim_impl(in, in_count, out, out_count, max_tokens);
} catch (...) {
return -1;
}
}
static void context_set_max_tokens(size_t max) {
@@ -254,18 +329,27 @@ static dstalk_context_service_t g_context_service = {
// 插件生命周期
// ============================================================
// W12.1: 包裹 try/catch 防止异常穿越 C ABI 边界
static int on_init(const dstalk_host_api_t* host) {
g_host = host;
try {
g_host = host;
// 查询依赖服务: session
void* raw = host->query_service("session", 1);
if (!raw) {
host->log(DSTALK_LOG_ERROR, "[plugin-context] required service 'session' not found");
// 查询依赖服务: session
void* raw = host->query_service("session", 1);
if (!raw) {
host->log(DSTALK_LOG_ERROR, "[plugin-context] required service 'session' not found");
return -1;
}
g_session = static_cast<const dstalk_session_service_t*>(raw);
return host->register_service("context", 1, &g_context_service);
} catch (const std::exception& e) {
if (g_host) g_host->log(DSTALK_LOG_ERROR, "[plugin-context] on_init exception: %s", e.what());
return -1;
} catch (...) {
if (g_host) g_host->log(DSTALK_LOG_ERROR, "[plugin-context] on_init unknown exception");
return -1;
}
g_session = static_cast<const dstalk_session_service_t*>(raw);
return host->register_service("context", 1, &g_context_service);
}
static void on_shutdown() {