feat: add AI endpoint manager plugin with configuration and routing capabilities

- Introduced `ai_endpoint_mgr` plugin to manage multiple AI provider endpoints. - Added configuration reference documentation for `config.toml`. - Implemented endpoint loading, active endpoint switching, and model mutation. - Included error handling for missing endpoints and configuration failures. - Developed unit tests covering various scenarios including error paths and concurrency.
2026-06-03 21:07:25 +08:00
parent 28ae90a6cc
commit 4745ce1f1c
18 changed files with 1570 additions and 34 deletions
--- a/dstalk_cli/src/main.cpp
+++ b/dstalk_cli/src/main.cpp
@@ -56,6 +56,7 @@ static const dstalk_ai_service_t*      g_ai      = nullptr;
 static const dstalk_session_service_t* g_session = nullptr;
 static const dstalk_file_io_service_t* g_file_io = nullptr;
 static const dstalk_tools_service_t*   g_tools   = nullptr;
+static const dstalk_ai_endpoint_mgr_t* g_endpoint_mgr = nullptr;  // I08: AI endpoint manager（可选）/ optional

 // ---- 运行时状态 / Runtime state ----
 // g_current_model tracks the active model name for display in the prompt.
@@ -134,6 +135,61 @@ static void spinner_join()
    }
 }

+// ---- AI 调用路由（endpoint_mgr 优先，g_ai fallback）/ AI call routing (endpoint_mgr preferred, g_ai fallback) ----
+// 当 endpoint_mgr 可用且至少有一个已配置 endpoint 时，通过 endpoint_mgr 路由调用；
+// 否则回退到直接使用 g_ai 服务（保持旧配置兼容）。
+// When endpoint_mgr is available with >=1 configured endpoints, route through it;
+// otherwise fall back to direct g_ai service (keeping old config compatible).
+
+// 是否有可用的 endpoint_mgr / Whether endpoint_mgr is usable
+static inline bool has_endpoint_mgr()
+{
+    return g_endpoint_mgr != nullptr && g_endpoint_mgr->count() > 0;
+}
+
+// 是否有任一 AI 后端 / Whether any AI backend is usable
+static inline bool has_ai_backend()
+{
+    return has_endpoint_mgr() || g_ai != nullptr;
+}
+
+// 阻塞 chat 路由 / Blocking chat routing
+static dstalk_chat_result_t do_chat(
+    const dstalk_message_t* history, int history_len,
+    const char* user_input, const char* tools_json)
+{
+    if (has_endpoint_mgr())
+        return g_endpoint_mgr->chat(nullptr, history, history_len, user_input, tools_json);
+    return g_ai->chat(history, history_len, user_input, tools_json);
+}
+
+// 流式 chat 路由 / Streaming chat routing
+static dstalk_chat_result_t do_chat_stream(
+    const dstalk_message_t* history, int history_len,
+    const char* user_input, dstalk_stream_cb cb, void* userdata)
+{
+    if (has_endpoint_mgr())
+        return g_endpoint_mgr->chat_stream(nullptr, history, history_len, user_input, cb, userdata);
+    return g_ai->chat_stream(history, history_len, user_input, cb, userdata);
+}
+
+// 释放 chat result（使用对应服务） / Free chat result (use corresponding service)
+static void do_free_result(dstalk_chat_result_t* result)
+{
+    if (has_endpoint_mgr())
+        g_endpoint_mgr->free_result(result);
+    else
+        g_ai->free_result(result);
+}
+
+// 设置模型（endpoint_mgr 优先） / Set model (endpoint_mgr preferred)
+static int do_set_model(const char* model)
+{
+    if (has_endpoint_mgr())
+        return g_endpoint_mgr->set_model(nullptr, model);
+    return g_ai->configure(nullptr, nullptr, nullptr, model, 0, 0.0);
+}
+
 // ---- 错误分类与友好提示 / Error classification and user-friendly messages ----
 // 根据 HTTP 状态码和错误消息字符串匹配，将常见错误归类为认证/频率限制/网络/配额问题，并给出中文建议。
 // Classifies common errors into auth/rate-limit/network/quota categories based on HTTP status and string matching, with Chinese suggestions.
@@ -382,6 +438,21 @@ static void handle_command(const char* line)
        const dstalk_tools_service_t* tools = static_cast<const dstalk_tools_service_t*>(
            dstalk_service_query("tools", 1));
        std::printf("  Tools 服务: %s\n", tools ? "就绪" : "不可用");
+
+        // I08/I09: endpoint manager 状态 / endpoint manager status
+        if (g_endpoint_mgr) {
+            std::printf("  --- Endpoint Manager ---\n");
+            std::printf("  状态: 就绪 (%d endpoint(s))\n", g_endpoint_mgr->count());
+            const char* active = g_endpoint_mgr->get_active();
+            std::printf("  Active Endpoint: %s\n", active ? active : "(无)");
+            char* list_json = g_endpoint_mgr->list_json();
+            if (list_json) {
+                std::printf("  Endpoints: %s\n", list_json);  // JSON 不含 api_key，已脱敏 / no api_key in JSON, already desensitized
+                dstalk_free(list_json);
+            }
+        } else {
+            std::printf("  Endpoint Manager: 不可用\n");
+        }
        return;
    }

@@ -393,7 +464,15 @@ static void handle_command(const char* line)
            std::printf(CLR_RED "[ERROR] /model 需要模型名\n" CLR_RESET);
            return;
        }
-        if (g_ai) {
+        // I08: 优先通过 endpoint_mgr 设置模型，fallback 到 g_ai->configure / prefer endpoint_mgr, fallback to g_ai
+        if (g_endpoint_mgr && g_endpoint_mgr->count() > 0) {
+            if (g_endpoint_mgr->set_model(nullptr, model) == 0) {
+                g_current_model = model;
+                std::printf(CLR_GREEN "[OK] 模型已切换: %s (via endpoint_mgr)\n" CLR_RESET, model);
+            } else {
+                std::printf(CLR_RED "[ERROR] 模型切换失败（endpoint 不存在或未配置）\n" CLR_RESET);
+            }
+        } else if (g_ai) {
            g_ai->configure(nullptr, nullptr, nullptr, model, 0, 0.0);
            g_current_model = model;
            std::printf(CLR_GREEN "[OK] 模型已切换: %s\n" CLR_RESET, model);
@@ -645,6 +724,9 @@ int main(int argc, char* argv[])
    g_session = static_cast<const dstalk_session_service_t*>(dstalk_service_query("session", 1));
    g_file_io = static_cast<const dstalk_file_io_service_t*>(dstalk_service_query("file_io", 1));
    g_tools   = static_cast<const dstalk_tools_service_t*>(dstalk_service_query("tools", 1));
+    // I08: 查询 AI endpoint manager（可选服务）/ query AI endpoint manager (optional service)
+    g_endpoint_mgr = static_cast<const dstalk_ai_endpoint_mgr_t*>(
+                         dstalk_service_query("ai_endpoint_mgr", 1));

    if (!g_ai) {
        std::fprintf(stderr, CLR_RED "[dstalk] AI 服务未找到（请检查插件目录）\n" CLR_RESET);
@@ -663,6 +745,12 @@ int main(int argc, char* argv[])
        g_ai->configure(ai_provider, base_url, api_key ? api_key : "", model, 4096, 0.7);
        g_current_model = model;   // A1: 记录当前模型名 / Record current model name
    }
+    // I08: 记录 endpoint_mgr 可用性 / log endpoint_mgr availability
+    if (g_endpoint_mgr && g_endpoint_mgr->count() > 0) {
+        const char* active = g_endpoint_mgr->get_active();
+        std::fprintf(stderr, "[dstalk] endpoint_mgr: %d endpoint(s), active=%s\n",
+                     g_endpoint_mgr->count(), active ? active : "(none)");
+    }

    if (!batch_mode) {
        std::printf("\n");
@@ -678,22 +766,23 @@ int main(int argc, char* argv[])
            dstalk_shutdown();
            return EXIT_FATAL;
        }
-        if (!g_ai || !g_session) {
+        if (!has_ai_backend() || !g_session) {
            std::fprintf(stderr, CLR_RED "[ERROR] AI or session service unavailable\n" CLR_RESET);
            dstalk_shutdown();
            return EXIT_CONFIG;
        }
        int history_count = 0;
        const dstalk_message_t* history = g_session->history(&history_count);
-        dstalk_chat_result_t result = g_ai->chat(history, history_count, input.c_str(), nullptr);
+        // I08: 通过 endpoint_mgr 路由（优先），或 fallback 到 g_ai / route via endpoint_mgr (preferred), or fallback to g_ai
+        dstalk_chat_result_t result = do_chat(history, history_count, input.c_str(), nullptr);
        if (result.ok) {
            std::printf("%s\n", result.content ? result.content : "");
-            g_ai->free_result(&result);
+            do_free_result(&result);
            dstalk_shutdown();
            return EXIT_OK;
        } else {
            print_error(result.error, result.http_status);
-            g_ai->free_result(&result);
+            do_free_result(&result);
            dstalk_shutdown();
            return EXIT_FATAL;
        }
@@ -718,22 +807,23 @@ int main(int argc, char* argv[])
            }
            prompt_text = prompt_arg;
        }
-        if (!g_ai || !g_session) {
+        if (!has_ai_backend() || !g_session) {
            std::fprintf(stderr, CLR_RED "[ERROR] AI or session service unavailable\n" CLR_RESET);
            dstalk_shutdown();
            return EXIT_CONFIG;
        }
        int history_count = 0;
        const dstalk_message_t* history = g_session->history(&history_count);
-        dstalk_chat_result_t result = g_ai->chat(history, history_count, prompt_text.c_str(), nullptr);
+        // I08: 通过 endpoint_mgr 路由（优先），或 fallback 到 g_ai / route via endpoint_mgr (preferred), or fallback to g_ai
+        dstalk_chat_result_t result = do_chat(history, history_count, prompt_text.c_str(), nullptr);
        if (result.ok) {
            std::printf("%s\n", result.content ? result.content : "");
-            g_ai->free_result(&result);
+            do_free_result(&result);
            dstalk_shutdown();
            return EXIT_OK;
        } else {
            print_error(result.error, result.http_status);
-            g_ai->free_result(&result);
+            do_free_result(&result);
            dstalk_shutdown();
            return EXIT_FATAL;
        }
@@ -770,7 +860,7 @@ int main(int argc, char* argv[])
        }

        // AI 对话（通过插件服务 vtable） / AI chat (via plugin service vtable)
-        if (!g_ai || !g_session) {
+        if (!has_ai_backend() || !g_session) {
            std::printf(CLR_RED "[ERROR] AI 或 Session 服务不可用\n" CLR_RESET);
            continue;
        }
@@ -782,7 +872,8 @@ int main(int argc, char* argv[])
        // 启动 spinner，等待 AI 响应 / Start spinner while waiting for AI response
        spinner_start();
        bool first = true;
-        dstalk_chat_result_t result = g_ai->chat_stream(
+        // I08: 通过 endpoint_mgr 路由（优先），或 fallback 到 g_ai / route via endpoint_mgr (preferred), or fallback to g_ai
+        dstalk_chat_result_t result = do_chat_stream(
            history, history_count, line.c_str(), on_stream_token, &first);

        // 确保 spinner 已停止（处理无流式输出的情况） / Ensure spinner is stopped (handles no-stream-output case)
@@ -866,10 +957,12 @@ int main(int argc, char* argv[])
                history_count = 0;
                history = g_session->history(&history_count);

-                g_ai->free_result(&result);
+                // I08: 通过 endpoint_mgr 路由 free_result / route free_result via endpoint_mgr
+                do_free_result(&result);
                spinner_start();
                bool tool_stream_first = true;
-                result = g_ai->chat_stream(history, history_count, nullptr, on_stream_token, &tool_stream_first);
+                // I08: 通过 endpoint_mgr 路由 chat_stream / route chat_stream via endpoint_mgr
+                result = do_chat_stream(history, history_count, nullptr, on_stream_token, &tool_stream_first);
                spinner_stop();

                if (result.ok) {
@@ -896,7 +989,7 @@ int main(int argc, char* argv[])
            std::printf(CLR_RESET "\n");
            print_error(result.error, result.http_status);
        }
-        g_ai->free_result(&result);
+        do_free_result(&result);
    }

    // B2: 单一退出点，dstalk_shutdown 只在此调用（交互模式下） / Single exit point, dstalk_shutdown only called here (in interactive mode)