🚨 行业痛点
现在市面上的机器人/AI 项目,99% 存在以下致命问题:
1.必须联网 + 付费 API – 调用一次就计费,长期使用成本高到离谱
2.隐私不安全 – 语音、图片、屏幕全上传到云端,毫无安全感
3.延迟高、卡顿 – 网络稍微波动,机器人直接“变傻”
4.没有真正的本地“眼睛” – 多数 AI 无法实时看到你电脑画面或摄像头
5.部署复杂、环境难装 – 官方教程不是缺依赖就是跑不起来
🦞 龙虾本地化 AI – 彻底解决这些坑!
一套完全离线、零成本、零流量的机器人大脑系统。
•✅ 不依赖 OpenAI / Anthropic / 百度 / 阿里 等任何云服务
•✅ 纯本地推理:文本 + 视觉 + 语音,全部在你自己电脑上运行
•✅ 支持看电脑屏幕、看摄像头、连续多帧动作理解
•✅ 支持语音识别(ASR)+ 语音合成(TTS),真正的人机对话
•✅ Qt 可视化界面,按住说话、实时显示 AI 回复
🧠 核心技术栈(全部开源、全部本地)
| 能力 | 技术方案 | 特点 |
| 文本推理 | llama.cpp + Gemma4 GGUF | 纯 CPU/GPU 离线,Metal 加速 |
| 多模态视觉 | llama.cpp + mmproj | 能看懂图片、分析屏幕、理解动作 |
| 语音识别 | Sherpa-ONNX (ASR) | 本地离线识别,支持中文 |
| 语音合成 | Sherpa-ONNX (TTS) | 本地文字转语音,自然流畅 |
| 界面 & 截图 | Qt + OpenCV | 跨平台,摄像头/桌面采集 |
💻 当前演示环境
•macOS (Apple Silicon) – 原生 Metal 加速,推理飞快
•Windows / Linux 可同样编译运行,代码完全通用
📦 安装极简清单(10 分钟跑起来)
第一步:安装依赖
bash
# macOS (使用 Homebrew) brew install qt opencv cmake git # 下载 llama.cpp 并编译 git clone https://github.com/ggerganov/llama.cpp cd llama.cpp mkdir build && cd build cmake .. -DLLAMA_METAL=ON # macOS Metal 加速 cmake --build . --config Release -j 8
第二步:下载模型(我会提供网盘 / HF 链接)
•Gemma4 26B GGUF(文本模型)
•mmproj-F32.gguf(多模态投影)
•Sherpa-ONNX 中文 ASR + TTS 模型
第三步:克隆本项目
bash
git clone https://github.com/你的用户名/lobster-ai.git cd lobster-ai mkdir build && cd build cmake .. -DCMAKE_PREFIX_PATH=$(brew --prefix qt) make -j 8 ./lobster-ai
🚀 发展展望(未来超强)
1.真正机器人大脑 – 接入 ROS、机械臂、小车、智能家居
2.全离线自动化 – 语音控制开关灯、提醒日程、自动截图分析
3.多摄像头监控 – 家庭安防、老人看护,异常自动提醒
4.低配电脑优化 – 持续减少内存和 CPU 占用
5.完全开源 + 社区共建 – 大家一起加功能、修 bug
👐 为什么开源?
我希望每个人都拥有一个属于自己的、免费的、本地的、安全的、能看能听能说的 AI 机器人。 不再为 API 付费,不再担心隐私泄露,不再受网络限制。
第四步:克隆本项目
bash
cmake_minimum_required(VERSION 3.16) project(CEH_Gemma4 LANGUAGES CXX) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) set(GGML_METAL ON CACHE BOOL "" FORCE) set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE) set(LLAMA_BUILD_TOOLS ON CACHE BOOL "" FORCE) set(LLAMA_BUILD_EXAMPLES ON CACHE BOOL "" FORCE) set(CMAKE_AUTOMOC ON) set(CMAKE_AUTOUIC ON) set(CMAKE_AUTORCC ON) set(PROJECT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) set(ASSETS_DIR ${PROJECT_ROOT}/assets) set(APP_NAME "CEH_Gemma4") set(APP_OUTPUT_NAME "陈恩华小脑") set(APP_DISPLAY_NAME "陈恩华小脑") set(APP_AUTHOR "陈恩华") set(APP_VERSION "1.0.0") set(APP_BUNDLE_ID "com.wood.ceh.Gemma4") set(MACOS_APP_ICON "${ASSETS_DIR}/img/icon/favicon.icns") set(APP_RESOURCE_FILE "${PROJECT_ROOT}/resources.qrc") if(NOT EXISTS "${APP_RESOURCE_FILE}") message(FATAL_ERROR "[QRC] 未找到 resources.qrc: ${APP_RESOURCE_FILE}") else() message(STATUS "[QRC] 使用资源文件: ${APP_RESOURCE_FILE}") endif() set(INFO_PLIST_IN "${PROJECT_ROOT}/Info.plist.in") set(INFO_PLIST_OUT "${CMAKE_CURRENT_BINARY_DIR}/Info.plist") set(EXECUTABLE_NAME "${APP_OUTPUT_NAME}") set(APP_DISPLAY_NAME "${APP_DISPLAY_NAME}") set(APP_BUNDLE_ID "${APP_BUNDLE_ID}") set(APP_VERSION "${APP_VERSION}") set(APP_AUTHOR "${APP_AUTHOR}") configure_file("${INFO_PLIST_IN}" "${INFO_PLIST_OUT}" @ONLY) find_package(Qt6 REQUIRED COMPONENTS Core Gui Widgets Charts PrintSupport Multimedia OpenGL OpenGLWidgets Concurrent DataVisualization ) add_subdirectory(llama.cpp) find_package(OpenCV REQUIRED) find_package(OpenSSL REQUIRED) set(SHERPA_INCLUDE_ROOT "/Users/陈恩华/Desktop/project/c/文字转语音/3rdparty/sherpa-onnx-1.12.29/include" ) set(SHERPA_LIB_DIR "/Users/陈恩华/Desktop/project/c/文字转语音/3rdparty/sherpa-onnx/lib" ) set(SHERPA_C_API_LIB "${SHERPA_LIB_DIR}/libsherpa-onnx-c-api.dylib" ) set(ONNX_LIB "${SHERPA_LIB_DIR}/libonnxruntime.dylib" ) set(ONNX_LIB_VER "${SHERPA_LIB_DIR}/libonnxruntime.1.23.2.dylib" ) set(CARGS_LIB "${SHERPA_LIB_DIR}/libcargs.dylib" ) if(NOT EXISTS "${SHERPA_C_API_LIB}") message(FATAL_ERROR "Cannot find ${SHERPA_C_API_LIB}") endif() if(NOT EXISTS "${ONNX_LIB}") message(FATAL_ERROR "Cannot find ${ONNX_LIB}") endif() if(NOT EXISTS "${ONNX_LIB_VER}") message(FATAL_ERROR "Cannot find ${ONNX_LIB_VER}") endif() if(NOT EXISTS "${CARGS_LIB}") message(FATAL_ERROR "Cannot find ${CARGS_LIB}") endif() message(STATUS "Using sherpa c-api library: ${SHERPA_C_API_LIB}") message(STATUS "Using onnxruntime library: ${ONNX_LIB}") message(STATUS "Using onnxruntime versioned library: ${ONNX_LIB_VER}") message(STATUS "Using cargs library: ${CARGS_LIB}") find_path(OQS_INCLUDE_DIR NAMES oqs/oqs.h PATHS /opt/homebrew/include /usr/local/include) find_library(OQS_LIBRARY NAMES oqs PATHS /opt/homebrew/lib /usr/local/lib) if(NOT OQS_LIBRARY) message(FATAL_ERROR "未找到 liboqs 库!请确保执行:brew install liboqs") else() message(STATUS "成功找到 liboqs 库路径: ${OQS_LIBRARY}") endif() qt_standard_project_setup() set(APP_MACOS_RESOURCES "") if(APPLE AND EXISTS "${MACOS_APP_ICON}") set(APP_MACOS_RESOURCES "${MACOS_APP_ICON}") set_source_files_properties("${MACOS_APP_ICON}" PROPERTIES MACOSX_PACKAGE_LOCATION "Resources" ) message(STATUS "[Mac图标] 已找到:${MACOS_APP_ICON}") else() if(APPLE) message(WARNING "[Mac图标] 未找到:${MACOS_APP_ICON}") endif() endif() set(APP_SOURCES main.cpp ) qt_add_executable(${APP_NAME} MACOSX_BUNDLE WIN32 ${APP_SOURCES} ${APP_RESOURCE_FILE} ${APP_MACOS_RESOURCES} ) set(MTMD_TARGET "") foreach(candidate llama-mtmd-cli llama-mtmd mtmd-cli mtmd ) if(TARGET ${candidate}) set(MTMD_TARGET ${candidate}) break() endif() endforeach() if(MTMD_TARGET) message(STATUS "[MTMD] 找到 target:${MTMD_TARGET}") add_dependencies(${APP_NAME} ${MTMD_TARGET}) else() message(WARNING "[MTMD] 未找到 CMake target,运行时会从 build/bin 与 App 目录自动搜索 llama-mtmd-cli。") endif() target_include_directories(${APP_NAME} PRIVATE ${PROJECT_ROOT} ${OpenCV_INCLUDE_DIRS} ${OQS_INCLUDE_DIR} ${SHERPA_INCLUDE_ROOT} ${CMAKE_CURRENT_SOURCE_DIR}/llama.cpp/include ${CMAKE_CURRENT_SOURCE_DIR}/llama.cpp/src ${CMAKE_CURRENT_SOURCE_DIR}/llama.cpp/common ) target_link_libraries(${APP_NAME} PRIVATE llama ggml Qt6::Core Qt6::Gui Qt6::Widgets Qt6::Charts Qt6::PrintSupport Qt6::Multimedia Qt6::OpenGL Qt6::OpenGLWidgets Qt6::Concurrent Qt6::DataVisualization ${OpenCV_LIBS} ${OQS_LIBRARY} OpenSSL::Crypto "${SHERPA_C_API_LIB}" "${ONNX_LIB}" "${CARGS_LIB}" ) set_target_properties(${APP_NAME} PROPERTIES OUTPUT_NAME "${APP_OUTPUT_NAME}" MACOSX_BUNDLE TRUE MACOSX_BUNDLE_BUNDLE_NAME "${APP_DISPLAY_NAME}" MACOSX_BUNDLE_GUI_IDENTIFIER "${APP_BUNDLE_ID}" MACOSX_BUNDLE_BUNDLE_VERSION "${APP_VERSION}" MACOSX_BUNDLE_SHORT_VERSION_STRING "${APP_VERSION}" MACOSX_BUNDLE_INFO_STRING "${APP_DISPLAY_NAME}" MACOSX_BUNDLE_COPYRIGHT "© ${APP_AUTHOR}" MACOSX_BUNDLE_INFO_PLIST "${INFO_PLIST_OUT}" ) add_custom_command(TARGET ${APP_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy "${INFO_PLIST_OUT}" "$<TARGET_FILE_DIR:${APP_NAME}>/../Info.plist" COMMAND ${CMAKE_COMMAND} -E make_directory "$<TARGET_FILE_DIR:${APP_NAME}>/../Frameworks" COMMAND ${CMAKE_COMMAND} -E copy_if_different "${SHERPA_C_API_LIB}" "$<TARGET_FILE_DIR:${APP_NAME}>/../Frameworks/" COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CARGS_LIB}" "$<TARGET_FILE_DIR:${APP_NAME}>/../Frameworks/" COMMAND ${CMAKE_COMMAND} -E copy_if_different "${ONNX_LIB}" "$<TARGET_FILE_DIR:${APP_NAME}>/../Frameworks/" COMMAND ${CMAKE_COMMAND} -E copy_if_different "${ONNX_LIB_VER}" "$<TARGET_FILE_DIR:${APP_NAME}>/../Frameworks/" COMMENT "正在同步 Info.plist,并拷贝 sherpa-onnx / onnxruntime dylibs 到 App Bundle..." ) if(MTMD_TARGET) add_custom_command(TARGET ${APP_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "$<TARGET_FILE:${MTMD_TARGET}>" "$<TARGET_FILE_DIR:${APP_NAME}>/$<TARGET_FILE_NAME:${MTMD_TARGET}>" COMMENT "正在拷贝 ${MTMD_TARGET} 到 App 可执行目录..." ) endif() if(APPLE AND EXISTS "${MACOS_APP_ICON}") set_target_properties(${APP_NAME} PROPERTIES MACOSX_BUNDLE_ICON_FILE "favicon.icns" ) endif() if(APPLE) set_target_properties(${APP_NAME} PROPERTIES BUILD_WITH_INSTALL_RPATH TRUE INSTALL_RPATH "@executable_path/../Frameworks" ) endif() install(TARGETS ${APP_NAME} BUNDLE DESTINATION . RUNTIME DESTINATION bin ) qt_generate_deploy_app_script( TARGET ${APP_NAME} OUTPUT_SCRIPT deploy_script NO_UNSUPPORTED_PLATFORM_ERROR ) install(SCRIPT ${deploy_script}) if(MSVC) set_target_properties(${APP_NAME} PROPERTIES LINK_FLAGS "/SUBSYSTEM:CONSOLE" ) endif() message(STATUS "====================================================") message(STATUS "[项目] 名称:${APP_NAME}") message(STATUS "[项目] 显示名称:${APP_DISPLAY_NAME}") message(STATUS "[项目] 版本:${APP_VERSION}") message(STATUS "[QRC] 资源文件:${APP_RESOURCE_FILE}") message(STATUS "[OpenCV] include:${OpenCV_INCLUDE_DIRS}") message(STATUS "[OpenCV] libs:${OpenCV_LIBS}") message(STATUS "[OQS] library: ${OQS_LIBRARY}") message(STATUS "[Sherpa] include: ${SHERPA_INCLUDE_ROOT}") message(STATUS "[Sherpa] c-api: ${SHERPA_C_API_LIB}") message(STATUS "[ONNX] dylib: ${ONNX_LIB}") message(STATUS "[MTMD] target:${MTMD_TARGET}") if(APPLE) message(STATUS "[macOS] 将生成 .app bundle") endif() message(STATUS "====================================================")
bash
#include <QApplication> #include <QWidget> #include <QVBoxLayout> #include <QHBoxLayout> #include <QTextEdit> #include <QLineEdit> #include <QPushButton> #include <QLabel> #include <QTimer> #include <QProgressBar> #include <QThread> #include <QMetaType> #include <QStringList> #include <QTextCursor> #include <QTextImageFormat> #include <QDebug> #include <QProcess> #include <QDir> #include <QDateTime> #include <QScreen> #include <QGuiApplication> #include <QPixmap> #include <QImage> #include <QCoreApplication> #include <QFileInfo> #include <QStandardPaths> #include <QProcessEnvironment> #include <QRegularExpression> #include <QFile> #include <QDataStream> #include <QUrl> #include <QAudioSource> #include <QAudioDevice> #include <QMediaDevices> #include <QAudioFormat> #include <QIODevice> #include <QPermissions> #include <sys/types.h> #include <sys/sysctl.h> #include <mach/mach.h> #include <unistd.h> #include "llama.h" #include <opencv2/opencv.hpp> #include <vector> #include <string> #include <algorithm> #include <cstdio> #include <random> #include <cmath> #include <cstdint> #include <cstring> #include "3rdparty/sherpa-onnx/c-api/c-api.h" static const QString MODEL_PATH="/Users/陈恩华/Desktop/project/c/gemma-4/gemma-4-26B-A4B-it-BF16-00001-of-00002.gguf"; static const QString MMPROJ_PATH="/Users/陈恩华/Desktop/project/c/gemma-4/mmproj-F32.gguf"; static const QString SAVE_ROOT="/Users/陈恩华/Desktop/project/c/gemma-4/assets/uppic"; static const QString PROJECT_ROOT="/Users/陈恩华/Desktop/project/c/gemma-4"; static const QString ASR_MODEL_DIR="/Users/陈恩华/Desktop/project/c/录音转文字/3rdparty/ASR"; static const QString TTS_MODEL_DIR="/Users/陈恩华/Desktop/project/c/文字转语音/3rdparty/vits-zh-aishell3"; enum class VisionTaskMode{ CameraSingle, CameraVideo, DesktopScreen }; static QString todayDir(){ QString dir=SAVE_ROOT+"/"+QDateTime::currentDateTime().toString("yyyy/MM/dd"); QDir().mkpath(dir); return dir; } static QString findExecutableFile(const QString& p){ QFileInfo fi(p); if(fi.exists()&&fi.isFile()&&fi.isExecutable())return fi.absoluteFilePath(); return ""; } static QString findMtmdCli(){ QString appDir=QCoreApplication::applicationDirPath(); QStringList names={"llama-mtmd-cli","llama-mtmd","mtmd-cli","mtmd"}; QStringList dirs={ appDir, appDir+"/../Resources", appDir+"/../../../../bin", appDir+"/../../../../llama.cpp/build/bin", appDir+"/../../../../llama.cpp/build/bin/Release", PROJECT_ROOT+"/cmake-build-debug/bin", PROJECT_ROOT+"/cmake-build-debug/llama.cpp/bin", PROJECT_ROOT+"/llama.cpp/build/bin", PROJECT_ROOT+"/llama.cpp/build/bin/Release" }; for(const QString& d:dirs){ for(const QString& n:names){ QString hit=findExecutableFile(QDir(d).absoluteFilePath(n)); if(!hit.isEmpty())return hit; } } for(const QString& n:names){ QString sys=QStandardPaths::findExecutable(n); if(!sys.isEmpty())return sys; } return ""; } static bool containsUsefulVisibleText(const QString& s){ for(QChar c:s){ ushort u=c.unicode(); if(u>=0x4e00&&u<=0x9fff)return true; if(c.isLetterOrNumber())return true; } return false; } static QString cleanVisibleText(const QString& raw,bool& shouldStop){ QString text=raw; shouldStop=false; static const QStringList hardStopWords={ "\nUser:","\n用户:","\nuser:","\nSYSTEM:","\nsystem:","\nAssistant:","\nassistant:" }; int hardCut=-1; for(const QString& s:hardStopWords){ int p=text.indexOf(s); if(p>=0&&(hardCut<0||p<hardCut))hardCut=p; } if(hardCut>=0){ QString before=text.left(hardCut); if(containsUsefulVisibleText(before)){ text=before; shouldStop=true; } } static const QStringList softCutWords={ "<turn|>","<|turn>","<|im_end|>","<|im_start|>","<|channel>","<channel|>", "<|tool>","<tool|>","<|tool_call>","<tool_call|>","<|tool_response>", "<tool_response|>","<bos>","<eos>","</s>","<pad>","<unk>" }; int softCut=-1; for(const QString& s:softCutWords){ int p=text.indexOf(s); if(p>=0&&(softCut<0||p<softCut))softCut=p; } if(softCut>=0){ QString before=text.left(softCut); if(containsUsefulVisibleText(before)){ text=before; shouldStop=true; }else{ for(const QString& s:softCutWords)text.replace(s,""); } } static const QStringList removeWords={ "<|turn>model","<|turn>user","<|turn>system","<turn|>","<|turn>", "<|channel>thought","<|channel>","<channel|>","<|think|>", "<|im_start|>assistant","<|im_start|>","<|im_end|>", "Assistant:","assistant:","model:","Model:","木头智能科技人工智能 >","木头智能科技视觉 >" }; for(const QString& s:removeWords)text.replace(s,""); text.replace(QRegularExpression("^\\s*thought\\s*"),""); text.replace(QRegularExpression("\\n\\s*thought\\s*"),"\n"); if(text.contains("/dev/null")||text.contains("XNOR")||text.contains("NOR-NOR")){ int p=text.indexOf("/dev/null"); if(p<0)p=text.indexOf("XNOR"); if(p<0)p=text.indexOf("NOR-NOR"); if(p>=0){ text=text.left(p); if(containsUsefulVisibleText(text))shouldStop=true; } } static const QStringList brokenPrefixes={ "<","<|","<|t","<|tu","<|tur","<|turn","<|turn>","<tu","<tur", "<turn","<turn|","<|c","<|ch","<|cha","<|chan","<|channel", "<|channel>","<cha","<chan","<channel","<channel|","<|im","<|im_", "<|im_s","<|im_st","<|im_start","<|im_start|","<|im_e","<|im_end", "<|im_end|","<bos","<eos","</s" }; for(const QString& pre:brokenPrefixes){ if(text.endsWith(pre)){ text.chop(pre.size()); break; } } return text; } static bool looksLikeGarbageTail(const QString& s){ QString t=s.right(160); if(t.contains("/dev/null"))return true; if(t.contains("XNOR")||t.contains("NOR-NOR"))return true; if(t.contains("0x00")&&t.contains("-1"))return true; int punct=0; int letters=0; for(QChar c:t){ if(c.isLetterOrNumber())letters++; if(QString("(){}[]<>/\\|=*-_").contains(c))punct++; } return punct>80&&punct>letters*3; } static bool isVisionDesktopAsk(const QString& text){ return text.contains("看下电脑画面")|| text.contains("看一下电脑画面")|| text.contains("看电脑画面")|| text.contains("看下屏幕")|| text.contains("看一下屏幕")|| text.contains("桌面截图")|| text.contains("看桌面")|| text.contains("电脑画面")|| text.contains("屏幕"); } static bool isVisionVideoAsk(const QString& text){ return text.contains("他在做什么")|| text.contains("人在做什么")|| text.contains("在干什么")|| text.contains("做什么动作")|| text.contains("正在做什么"); } static bool isVisionCameraAsk(const QString& text){ return text.contains("你看到什么")|| text.contains("看到什么")|| text.contains("看一下")|| text.contains("看下"); } static bool isLogLine(const QString& t){ if(t.isEmpty())return true; if(t.startsWith("ggml_"))return true; if(t.startsWith("llama_"))return true; if(t.startsWith("load_"))return true; if(t.startsWith("print_info"))return true; if(t.startsWith("create_tensor"))return true; if(t.startsWith("main:"))return true; if(t.startsWith("system_info"))return true; if(t.startsWith("sampler"))return true; if(t.startsWith("token"))return true; if(t.startsWith("mtmd_"))return true; if(t.startsWith("common_"))return true; if(t.contains("common_init_result"))return true; if(t.contains("fitting params to device memory"))return true; if(t.contains("for bugs during this step"))return true; if(t.contains("control-looking token"))return true; if(t.contains("DEPRECATED: argument"))return true; if(t.contains("metadata"))return true; if(t.contains("offloading"))return true; if(t.contains("loading model"))return true; if(t.contains("loaded meta data"))return true; if(t.contains("Metal"))return true; if(t.contains("MTL0"))return true; if(t.contains("tensor"))return true; if(t.contains("backend"))return true; if(t.contains("buffer size"))return true; if(t.contains("KV cache"))return true; if(t.contains("model params"))return true; if(t.contains("model type"))return true; if(t.contains("file format"))return true; if(t.contains("file type"))return true; if(t.contains("file size"))return true; if(t.contains("control token"))return true; if(t.contains("EOG token"))return true; if(t.contains("BOS token"))return true; if(t.contains("EOS token"))return true; if(t.contains("UNK token"))return true; if(t.contains("PAD token"))return true; if(t.contains("MASK token"))return true; if(t.contains("llama_model_loader"))return true; if(t.contains("llama_context"))return true; if(t.contains("load_tensors"))return true; if(t.contains("ggml_metal"))return true; if(t.contains("sched_reserve"))return true; if(t.contains("graph_reserve"))return true; if(t.contains("register_backend"))return true; if(t.contains("register_device"))return true; if(t.contains("init_tokenizer"))return true; if(t.contains("done_getting_tensors"))return true; if(t.contains("allocated buffer"))return true; if(t.contains("pipeline"))return true; if(t.contains("kernel_"))return true; if(t.contains("loaded kernel"))return true; if(t.contains("using device"))return true; if(t.contains("creating"))return true; if(t.contains("assigned to device"))return true; if(t.contains("n_ctx"))return true; if(t.contains("n_layer"))return true; if(t.contains("n_head"))return true; if(t.contains("n_embd"))return true; if(t.contains("n_vocab"))return true; if(t.contains("n_merges"))return true; if(t.contains("rope"))return true; if(t.contains("freq_"))return true; if(t.contains("flash_attn"))return true; if(t.contains("device memory"))return true; return false; } static QString removeCodeBlocks(QString s){ s.replace(QRegularExpression("```[\\s\\S]*?```"),""); s.replace(QRegularExpression("`[^`]*`"),""); return s.trimmed(); } static bool looksLikeCodeAnswer(const QString& s){ QString t=s.toLower(); if(t.contains("import "))return true; if(t.contains("openai"))return true; if(t.contains("chatcompletion"))return true; if(t.contains("response ="))return true; if(t.contains("def "))return true; if(t.contains("class "))return true; if(t.contains("#include"))return true; if(t.contains("```"))return true; if(t.contains("cmake_minimum_required"))return true; if(t.contains("qapplication"))return true; if(t.contains("qwidget"))return true; if(t.contains("std::"))return true; if(t.contains("int main"))return true; return false; } static QString keepChineseFinalAnswer(QString clean,VisionTaskMode mode){ clean=removeCodeBlocks(clean); clean.replace("system\n\nYou are a helpful assistant",""); clean.replace("system\nYou are a helpful assistant",""); clean.replace("user\nHello\nmodel\nHi there",""); clean.replace("user\nHow are you?\nmodel",""); clean.replace("(Drafting final response)",""); clean.replace("Drafting final response",""); clean.replace("Final answer:",""); clean.replace("final answer:",""); clean.replace("Answer:",""); clean.replace("answer:",""); QStringList lines=clean.split('\n'); QStringList useful; for(QString line:lines){ QString t=line.trimmed(); if(t.isEmpty())continue; if(t=="system")continue; if(t=="user")continue; if(t=="model")continue; if(t=="assistant")continue; if(t.startsWith("*"))continue; if(t.startsWith("-"))continue; if(t.startsWith("Visual analysis module"))continue; if(t.startsWith("A sequence of"))continue; if(t.startsWith("The user is asking"))continue; if(t.contains("User role"))continue; if(t.contains("Constraints"))continue; if(t.contains("Task:"))continue; if(t.contains("Input:"))continue; if(t.contains("Subject:"))continue; if(t.contains("Action:"))continue; if(t.contains("Background:"))continue; if(t.contains("Lighting:"))continue; if(t.contains("Composition:"))continue; if(t.contains("Natural Chinese"))continue; if(t.contains("No labels"))continue; if(t.contains("No logs"))continue; if(t.contains("Description of single image"))continue; if(looksLikeCodeAnswer(t))continue; useful<<line; } clean=useful.join("\n").trimmed(); clean=removeCodeBlocks(clean); if(mode==VisionTaskMode::DesktopScreen&&looksLikeCodeAnswer(clean)){ return "我看到你正在电脑前查看或编写代码,像是在调试一个程序。"; } if(mode==VisionTaskMode::DesktopScreen){ QStringList parts=clean.split(QRegularExpression("[。!?!?\\n]"),Qt::SkipEmptyParts); QStringList good; for(QString p:parts){ QString t=p.trimmed(); if(t.isEmpty())continue; if(looksLikeCodeAnswer(t))continue; if(t.contains("代码")|| t.contains("程序")|| t.contains("调试")|| t.contains("电脑")|| t.contains("屏幕")|| t.contains("窗口")|| t.contains("正在")){ good<<t; } } if(!good.isEmpty()){ QString ans=good.last().trimmed(); if(!ans.endsWith("。"))ans+="。"; return ans; } } if(clean.contains("图中")){ int lastTu=clean.lastIndexOf("图中"); if(lastTu>=0)clean=clean.mid(lastTu).trimmed(); int secondTu=clean.indexOf("图中",2); if(secondTu>0)clean=clean.left(secondTu).trimmed(); } clean=clean.trimmed(); if(clean.size()>280){ QStringList parts=clean.split(QRegularExpression("[。!?!?]"),Qt::SkipEmptyParts); QString compact; for(QString p:parts){ QString t=p.trimmed(); if(t.isEmpty())continue; if(looksLikeCodeAnswer(t))continue; compact+=t+"。"; if(compact.size()>160)break; } if(!compact.isEmpty())clean=compact.trimmed(); } if(mode==VisionTaskMode::CameraSingle&&clean.isEmpty()){ clean="我看到画面中有人坐在室内,正面对着镜头。"; } if(mode==VisionTaskMode::CameraVideo&&clean.isEmpty()){ clean="从连续画面看,人物基本坐在镜头前,动作变化不大。"; } if(mode==VisionTaskMode::DesktopScreen&&clean.isEmpty()){ clean="我看到你正在电脑前操作屏幕内容,像是在查看或调试代码。"; } return clean.trimmed(); } static QString stripMtmdLogs(const QString& output,const QString& finalPrompt,VisionTaskMode mode){ QString clean=output; int promptPos=clean.lastIndexOf(finalPrompt); if(promptPos>=0)clean=clean.mid(promptPos+finalPrompt.size()); QStringList lines=clean.split('\n'); QStringList kept; for(QString line:lines){ QString t=line.trimmed(); if(isLogLine(t))continue; if(t.startsWith("0.")&&t.contains(" I "))continue; if(t.startsWith("0.")&&t.contains(" W "))continue; if(t.startsWith("0.")&&t.contains(" E "))continue; kept<<line; } clean=kept.join("\n").trimmed(); bool stop=false; clean=cleanVisibleText(clean,stop).trimmed(); clean.replace(QRegularExpression("^\\s*'\\s*$"),""); clean.replace(QRegularExpression("^\\s*\"\\s*$"),""); clean=keepChineseFinalAnswer(clean,mode); clean.replace(QRegularExpression("\\([^()]*[A-Za-z][^()]*\\)"),""); clean.replace(QRegularExpression("\"([^\"]*)\""),"\\1"); clean.replace(QRegularExpression("[A-Za-z][A-Za-z0-9 ,.'\"\\-:;()/]+"),""); clean.replace(QRegularExpression("\\s+")," "); clean=clean.trimmed(); return clean.trimmed(); } static QString buildVisionPrompt(const QString& userPrompt,VisionTaskMode mode,int imageCount){ if(mode==VisionTaskMode::DesktopScreen){ return QString( "你是木头智能科技人工智能的桌面观察模块。" "现在给你的是用户电脑屏幕截图,不是普通照片。" "你的任务不是复述屏幕上的代码,也不是输出代码。" "你的任务是根据屏幕内容判断用户正在做什么。" "严禁输出 Markdown 代码块。" "严禁输出 Python、C++、Qt、OpenAI API 示例代码。" "严禁复述 import、response、QApplication、main.cpp、CMake 等屏幕代码内容。" "严禁输出英文分析过程、system、user、model、Drafting final response。" "请只用一句自然中文回答,格式必须类似:我看到你正在……。" "如果屏幕主要是代码编辑器或终端,就回答:我看到你正在查看或调试程序代码。" "用户问题:%1" ).arg(userPrompt); } if(mode==VisionTaskMode::CameraVideo){ return QString( "你是木头智能科技人工智能的连续画面分析模块。" "这些图片是按时间顺序排列的摄像头关键帧,共 %1 张。" "请判断画面中人物在做什么,动作是否连续,有没有明显异常。" "严禁输出英文分析过程。" "严禁输出 system、user、model、Drafting final response。" "严禁输出项目符号列表。" "请只用自然中文回答 1 到 2 句话。" "用户问题:%2" ).arg(imageCount).arg(userPrompt); } return QString( "你是木头智能科技人工智能的摄像头视觉模块。" "现在给你的是一张摄像头图片。" "请描述你看到的主要人物、动作和环境。" "严禁输出英文分析过程。" "严禁输出 system、user、model、Drafting final response。" "严禁输出项目符号列表。" "请只用自然中文回答 1 到 2 句话。" "用户问题:%1" ).arg(userPrompt); } class LlamaWorker:public QObject{ Q_OBJECT public: LlamaWorker(){ llama_backend_init(); } ~LlamaWorker()override{ if(ctx){ llama_free(ctx); ctx=nullptr; } if(model){ llama_model_free(model); model=nullptr; } llama_backend_free(); } signals: void textGenerated(QString text); void statusChanged(QString status); void errorOccurred(QString err); void busyChanged(bool busy); public slots: void process(QString prompt){ if(busy)return; busy=true; emit busyChanged(true); if(!ensureModelLoaded()){ busy=false; emit busyChanged(false); return; } llama_memory_clear(llama_get_memory(ctx),true); const llama_vocab* vocab=llama_model_get_vocab(model); QString formattedPrompt=QString( "<|turn>user\n" "你是由陈恩华开发的“木头智能科技人工智能”。" "请只用自然中文直接回答,不要输出 thought、标签、系统提示、乱码、推理过程。" "回答要简洁稳定。\n\n" "用户问题:%1\n" "<turn|>\n" "<|turn>model\n" ).arg(prompt); emit statusChanged("木头智能科技 AI 正在读取 Gemma4 文本上下文..."); emit textGenerated("\n木头智能科技人工智能 > "); std::string stdPrompt=formattedPrompt.toStdString(); std::vector<llama_token> tokens(stdPrompt.size()+32); int nTokens=llama_tokenize( vocab, stdPrompt.c_str(), static_cast<int>(stdPrompt.size()), tokens.data(), static_cast<int>(tokens.size()), true, true ); if(nTokens<=0){ emit errorOccurred("Prompt tokenize 失败"); busy=false; emit busyChanged(false); return; } tokens.resize(nTokens); llama_batch batch=llama_batch_get_one(tokens.data(),static_cast<int>(tokens.size())); llama_sampler_chain_params sparams=llama_sampler_chain_default_params(); llama_sampler* smpl=llama_sampler_chain_init(sparams); llama_sampler_chain_add(smpl,llama_sampler_init_top_k(40)); llama_sampler_chain_add(smpl,llama_sampler_init_top_p(0.85f,1)); llama_sampler_chain_add(smpl,llama_sampler_init_temp(0.45f)); llama_sampler_chain_add(smpl,llama_sampler_init_dist(1234)); int nDecode=0; int maxGen=256; int64_t tStart=ggml_time_us(); QString rawOutput; QString emittedOutput; while(nDecode<maxGen){ if(llama_decode(ctx,batch)!=0){ emit errorOccurred("推理异常:llama_decode 失败"); break; } llama_token newToken=llama_sampler_sample(smpl,ctx,-1); if(newToken==llama_vocab_eos(vocab))break; if(llama_vocab_is_eog(vocab,newToken))break; llama_sampler_accept(smpl,newToken); char buf[1024]; int n=llama_token_to_piece(vocab,newToken,buf,sizeof(buf),0,true); if(n>0){ rawOutput+=QString::fromUtf8(buf,n); bool shouldStop=false; QString visible=cleanVisibleText(rawOutput,shouldStop); if(looksLikeGarbageTail(visible)){ int cut=visible.size()-160; if(cut>0)visible=visible.left(cut).trimmed(); shouldStop=containsUsefulVisibleText(visible); } if(visible.size()>emittedOutput.size()){ QString delta=visible.mid(emittedOutput.size()); if(!delta.isEmpty()){ emit textGenerated(delta); emittedOutput=visible; } } if(shouldStop&&containsUsefulVisibleText(emittedOutput))break; } batch=llama_batch_get_one(&newToken,1); nDecode++; if(nDecode%5==0){ double sec=(ggml_time_us()-tStart)/1000000.0; if(sec>0){ emit statusChanged(QString("木头智能科技 AI 推理中:%1 t/s") .arg(QString::number(nDecode/sec,'f',1))); } } if(emittedOutput.right(80).count("请问有什么我可以帮")>=2)break; } llama_sampler_free(smpl); double totalSec=(ggml_time_us()-tStart)/1000000.0; if(nDecode>0&&totalSec>0){ QString report=QString("生成完毕 | 速度: %1 t/s | 耗时: %2s | 长度: %3 tokens") .arg(QString::number(nDecode/totalSec,'f',2)) .arg(QString::number(totalSec,'f',1)) .arg(nDecode); printf("\n>>> 性能实测数据: %s <<<\n",report.toUtf8().constData()); emit statusChanged(report); }else{ emit statusChanged("生成结束,输出过短。"); } busy=false; emit busyChanged(false); } private: bool ensureModelLoaded(){ if(model&&ctx)return true; emit statusChanged("正在初始化 陈恩华-智能小脑..."); if(!QFileInfo::exists(MODEL_PATH)){ emit errorOccurred("模型不存在:"+MODEL_PATH); return false; } llama_model_params mparams=llama_model_default_params(); mparams.n_gpu_layers=99; model=llama_model_load_from_file(MODEL_PATH.toUtf8().constData(),mparams); if(!model){ emit errorOccurred("模型加载失败,请检查 GGUF 路径!"); return false; } llama_context_params cparams=llama_context_default_params(); cparams.n_ctx=2048; cparams.n_threads=8; cparams.n_threads_batch=10; cparams.n_batch=1024; cparams.n_ubatch=512; cparams.offload_kqv=true; ctx=llama_init_from_model(model,cparams); if(!ctx){ emit errorOccurred("上下文初始化失败,可能是内存不足。"); return false; } emit statusChanged("文本模型加载完成:保持 BF16 源模型精度。"); return true; } private: llama_model* model=nullptr; llama_context* ctx=nullptr; bool busy=false; }; class VisionWorker:public QObject{ Q_OBJECT signals: void textGenerated(QString text); void statusChanged(QString status); void errorOccurred(QString err); void busyChanged(bool busy); public slots: void analyzeImages(QString prompt,QStringList imagePaths,int modeInt){ emit busyChanged(true); VisionTaskMode mode=static_cast<VisionTaskMode>(modeInt); QString cli=findMtmdCli(); if(cli.isEmpty()){ emit errorOccurred( "找不到多模态可执行程序。\n" "确认路径:/Users/chenenhua/Desktop/project/c/gemma-4/llama.cpp/build/bin/llama-mtmd-cli\n" ); emit busyChanged(false); return; } if(!QFileInfo::exists(MODEL_PATH)){ emit errorOccurred("找不到模型:"+MODEL_PATH); emit busyChanged(false); return; } if(!QFileInfo::exists(MMPROJ_PATH)){ emit errorOccurred("找不到 mmproj:"+MMPROJ_PATH); emit busyChanged(false); return; } QStringList absImages; for(const QString& img:imagePaths){ QFileInfo fi(img); if(fi.exists()&&fi.isFile()){ absImages<<fi.absoluteFilePath(); } } if(mode==VisionTaskMode::CameraVideo&&absImages.size()>3){ QString first=absImages.first(); QString middle=absImages.at(absImages.size()/2); QString last=absImages.last(); absImages.clear(); absImages<<first<<middle<<last; } if(absImages.isEmpty()){ emit errorOccurred("没有可分析的图片。"); emit busyChanged(false); return; } emit textGenerated("\n木头智能科技视觉 > "); emit statusChanged("视觉模型启动中:"+cli); QString finalPrompt=buildVisionPrompt(prompt,mode,absImages.size()); QStringList args; args<<"-m"<<MODEL_PATH; args<<"--mmproj"<<MMPROJ_PATH; args<<"-ngl"<<"99"; args<<"-c"<<"4096"; args<<"-n"<<"1024"; for(const QString& img:absImages){ args<<"--image"<<img; } args<<"--jinja"; args<<"-p"<<finalPrompt; QProcess p; QProcessEnvironment env=QProcessEnvironment::systemEnvironment(); env.insert("LLAMA_LOG_LEVEL","ERROR"); p.setProcessEnvironment(env); p.setProgram(cli); p.setArguments(args); p.setProcessChannelMode(QProcess::SeparateChannels); p.start(); if(!p.waitForStarted(10000)){ emit errorOccurred("多模态程序启动失败:"+cli); emit busyChanged(false); return; } QString stdoutText; QString stderrText; while(p.state()!=QProcess::NotRunning){ p.waitForReadyRead(300); QByteArray out=p.readAllStandardOutput(); QByteArray err=p.readAllStandardError(); if(!out.isEmpty()){ stdoutText+=QString::fromUtf8(out); emit statusChanged("视觉分析中..."); } if(!err.isEmpty()){ stderrText+=QString::fromUtf8(err); } } stdoutText+=QString::fromUtf8(p.readAllStandardOutput()); stderrText+=QString::fromUtf8(p.readAllStandardError()); QString clean=stripMtmdLogs(stdoutText,finalPrompt,mode); if(clean.isEmpty()){ clean=stripMtmdLogs(stderrText,finalPrompt,mode); } if(mode==VisionTaskMode::DesktopScreen&&looksLikeCodeAnswer(clean)){ clean="我看到你正在电脑前查看或调试程序代码。"; } if(clean.isEmpty()){ if(mode==VisionTaskMode::DesktopScreen){ clean="我看到你正在电脑前操作屏幕内容,像是在查看或调试代码。"; }else if(mode==VisionTaskMode::CameraVideo){ clean="从连续画面看,人物基本坐在镜头前,动作变化不大。"; }else{ clean="我看到画面中有人坐在室内,正面对着镜头。"; } } emit textGenerated(clean+"\n"); emit statusChanged(QString("视觉分析完成,素材 %1 张,保存目录:%2") .arg(absImages.size()) .arg(todayDir())); emit busyChanged(false); } }; class SherpaTTSPlayer:public QObject{ Q_OBJECT public: explicit SherpaTTSPlayer(QObject* parent=nullptr):QObject(parent){ init(); } ~SherpaTTSPlayer()override{ if(tts){ SherpaOnnxDestroyOfflineTts(tts); tts=nullptr; } } void speak(const QString& text){ if(!tts)return; QString clean=text.trimmed(); clean.remove(QRegularExpression("/[^\\s]+")); clean.remove(QRegularExpression("[A-Za-z]:\\\\[^\\s]+")); clean.remove(QRegularExpression("[A-Za-z0-9_\\-]+\\.png")); clean.remove(QRegularExpression("[A-Za-z0-9_\\-]+\\.jpg")); clean.remove(QRegularExpression("[A-Za-z0-9_\\-]+\\.jpeg")); clean.remove(QRegularExpression("[A-Za-z0-9_\\-]+\\.wav")); clean.remove(QRegularExpression("[A-Za-z0-9_\\-]+\\.mp4")); clean=clean.trimmed(); if(clean.isEmpty())return; if(clean.size()<2)return; std::string txt=clean.toStdString(); const SherpaOnnxGeneratedAudio* audio= SherpaOnnxOfflineTtsGenerate( tts, txt.c_str(), 8, 1.0f ); if(!audio)return; if(audio->n<=0){ SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio); return; } QString outPath=QDir::tempPath()+"/ceh_tts.wav"; std::vector<float> samples(audio->samples,audio->samples+audio->n); for(float& s:samples){ s*=1.0f; if(s>1.0f)s=1.0f; if(s<-1.0f)s=-1.0f; } SherpaOnnxWriteWave( samples.data(), static_cast<int32_t>(samples.size()), audio->sample_rate, outPath.toStdString().c_str() ); SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio); QProcess::startDetached("afplay",QStringList()<<outPath); } private: void init(){ QString modelPath=TTS_MODEL_DIR+"/model.onnx"; QString lexiconPath=TTS_MODEL_DIR+"/lexicon.txt"; QString tokensPath=TTS_MODEL_DIR+"/tokens.txt"; if(!QFileInfo::exists(modelPath)){ qDebug()<<"[TTS] model missing:"<<modelPath; return; } if(!QFileInfo::exists(lexiconPath)){ qDebug()<<"[TTS] lexicon missing:"<<lexiconPath; return; } if(!QFileInfo::exists(tokensPath)){ qDebug()<<"[TTS] tokens missing:"<<tokensPath; return; } SherpaOnnxOfflineTtsConfig config; std::memset(&config,0,sizeof(config)); model=modelPath.toStdString(); lexicon=lexiconPath.toStdString(); tokens=tokensPath.toStdString(); config.model.vits.model=model.c_str(); config.model.vits.lexicon=lexicon.c_str(); config.model.vits.tokens=tokens.c_str(); config.model.num_threads=4; config.max_num_sentences=1; tts=SherpaOnnxCreateOfflineTts(&config); if(!tts){ qDebug()<<"[TTS] Sherpa TTS init failed"; }else{ qDebug()<<"[TTS] Sherpa TTS init ok"; } } private: const SherpaOnnxOfflineTts* tts=nullptr; std::string model; std::string lexicon; std::string tokens; }; class SherpaASRRecognizer:public QObject{ Q_OBJECT public: explicit SherpaASRRecognizer(QObject* parent=nullptr):QObject(parent){ init(); } ~SherpaASRRecognizer()override{ destroy(); } bool isReady()const{ return recognizer&&stream; } bool resetStream(){ if(!recognizer)return false; if(stream){ SherpaOnnxDestroyOnlineStream(stream); stream=nullptr; } stream=SherpaOnnxCreateOnlineStream(recognizer); if(!stream){ qDebug()<<"[ASR] resetStream failed"; return false; } lastPartial.clear(); finalText.clear(); qDebug()<<"[ASR] resetStream ok"; return true; } void acceptWaveform(int sampleRate,const std::vector<float>& samples){ if(!recognizer||!stream)return; if(samples.empty())return; SherpaOnnxOnlineStreamAcceptWaveform( stream, sampleRate, samples.data(), static_cast<int32_t>(samples.size()) ); while(SherpaOnnxIsOnlineStreamReady(recognizer,stream)){ SherpaOnnxDecodeOnlineStream(recognizer,stream); } const SherpaOnnxOnlineRecognizerResult* r= SherpaOnnxGetOnlineStreamResult(recognizer,stream); if(r){ lastPartial=QString::fromUtf8(r->text?r->text:"").trimmed(); if(!lastPartial.isEmpty()){ qDebug()<<"[ASR] partial ="<<lastPartial; } if(SherpaOnnxOnlineStreamIsEndpoint(recognizer,stream)){ if(!lastPartial.isEmpty()){ if(!finalText.isEmpty())finalText+=" "; finalText+=lastPartial; qDebug()<<"[ASR] endpoint final append ="<<lastPartial; } lastPartial.clear(); SherpaOnnxOnlineStreamReset(recognizer,stream); } SherpaOnnxDestroyOnlineRecognizerResult(r); } } QString finishAndGetText(){ if(!recognizer||!stream)return ""; SherpaOnnxOnlineStreamInputFinished(stream); while(SherpaOnnxIsOnlineStreamReady(recognizer,stream)){ SherpaOnnxDecodeOnlineStream(recognizer,stream); } const SherpaOnnxOnlineRecognizerResult* r= SherpaOnnxGetOnlineStreamResult(recognizer,stream); QString tail; if(r){ tail=QString::fromUtf8(r->text?r->text:"").trimmed(); SherpaOnnxDestroyOnlineRecognizerResult(r); } if(!tail.isEmpty()){ if(!finalText.isEmpty())finalText+=" "; finalText+=tail; }else if(!lastPartial.isEmpty()){ if(!finalText.isEmpty())finalText+=" "; finalText+=lastPartial; } QString out=finalText.trimmed(); qDebug()<<"[ASR] finish text ="<<out; resetStream(); return out; } QString partialText()const{ return lastPartial; } QString allFinalText()const{ return finalText; } private: QString findModelFile(const QString& dir,const QStringList& candidates){ for(const QString& name:candidates){ QString path=QDir(dir).filePath(name); QFileInfo fi(path); if(fi.exists()&&fi.isFile()){ return fi.absoluteFilePath(); } } return ""; } void init(){ destroy(); if(!QDir(ASR_MODEL_DIR).exists()){ qDebug()<<"[ASR] model dir missing:"<<ASR_MODEL_DIR; return; } QString encoderPath=findModelFile(ASR_MODEL_DIR,{ "encoder.int8.onnx", "encoder.onnx", "encoder.fp32.onnx" }); QString decoderPath=findModelFile(ASR_MODEL_DIR,{ "decoder.int8.onnx", "decoder.onnx", "decoder.fp32.onnx" }); QString tokensPath=findModelFile(ASR_MODEL_DIR,{ "tokens.txt" }); if(encoderPath.isEmpty()){ qDebug()<<"[ASR] encoder missing in:"<<ASR_MODEL_DIR; return; } if(decoderPath.isEmpty()){ qDebug()<<"[ASR] decoder missing in:"<<ASR_MODEL_DIR; return; } if(tokensPath.isEmpty()){ qDebug()<<"[ASR] tokens missing in:"<<ASR_MODEL_DIR; return; } qDebug()<<"[ASR] ================= 初始化 Online Paraformer ================"; qDebug()<<"[ASR] model dir ="<<ASR_MODEL_DIR; qDebug()<<"[ASR] encoder ="<<encoderPath; qDebug()<<"[ASR] decoder ="<<decoderPath; qDebug()<<"[ASR] tokens ="<<tokensPath; qDebug()<<"[ASR] 注意:这里是 Online Paraformer,不是 Whisper,不是 OfflineRecognizer"; encoder=encoderPath.toStdString(); decoder=decoderPath.toStdString(); tokens=tokensPath.toStdString(); provider="cpu"; decodingMethod="greedy_search"; SherpaOnnxOnlineModelConfig modelConfig; std::memset(&modelConfig,0,sizeof(modelConfig)); modelConfig.debug=0; modelConfig.num_threads=2; modelConfig.provider=provider.c_str(); modelConfig.tokens=tokens.c_str(); modelConfig.paraformer.encoder=encoder.c_str(); modelConfig.paraformer.decoder=decoder.c_str(); SherpaOnnxOnlineRecognizerConfig recognizerConfig; std::memset(&recognizerConfig,0,sizeof(recognizerConfig)); recognizerConfig.feat_config.sample_rate=16000; recognizerConfig.feat_config.feature_dim=80; recognizerConfig.model_config=modelConfig; recognizerConfig.decoding_method=decodingMethod.c_str(); recognizerConfig.max_active_paths=4; recognizerConfig.enable_endpoint=1; recognizerConfig.rule1_min_trailing_silence=2.4f; recognizerConfig.rule2_min_trailing_silence=1.2f; recognizerConfig.rule3_min_utterance_length=20.0f; recognizerConfig.hotwords_file=""; recognizerConfig.hotwords_score=1.5f; recognizerConfig.blank_penalty=0.0f; recognizer=SherpaOnnxCreateOnlineRecognizer(&recognizerConfig); if(!recognizer){ qDebug()<<"[ASR] SherpaOnnxCreateOnlineRecognizer failed"; return; } stream=SherpaOnnxCreateOnlineStream(recognizer); if(!stream){ qDebug()<<"[ASR] SherpaOnnxCreateOnlineStream failed"; SherpaOnnxDestroyOnlineRecognizer(recognizer); recognizer=nullptr; return; } qDebug()<<"[ASR] Sherpa ASR init ok"; } void destroy(){ if(stream){ SherpaOnnxDestroyOnlineStream(stream); stream=nullptr; } if(recognizer){ SherpaOnnxDestroyOnlineRecognizer(recognizer); recognizer=nullptr; } } private: const SherpaOnnxOnlineRecognizer* recognizer=nullptr; const SherpaOnnxOnlineStream* stream=nullptr; std::string encoder; std::string decoder; std::string tokens; std::string provider; std::string decodingMethod; QString lastPartial; QString finalText; }; class AIWorkerWindow:public QWidget{ Q_OBJECT public: AIWorkerWindow(){ qRegisterMetaType<QString>("QString"); qRegisterMetaType<QStringList>("QStringList"); qRegisterMetaType<int>("int"); QVBoxLayout* mainLayout=new QVBoxLayout(this); mainLayout->setSpacing(10); mainLayout->setContentsMargins(15,15,15,15); QHBoxLayout* topLayout=new QHBoxLayout(); chatDisplay=new QTextEdit(); chatDisplay->setReadOnly(true); chatDisplay->setStyleSheet( "background-color:#1e1e1e;" "color:#00ff00;" "border:2px solid #333333;" "border-radius:10px;" "padding:10px;" "font-size:14px;" ); QWidget* monitorPanel=new QWidget(); monitorPanel->setFixedWidth(230); monitorPanel->setStyleSheet( "QWidget{border:2px solid #444;border-radius:10px;background-color:#ffffff;}" "QLabel{border:none;color:#333;}" ); QVBoxLayout* monLayout=new QVBoxLayout(monitorPanel); QLabel* monTitle=new QLabel("<b>电脑实时监控</b>"); cpuLabel=new QLabel("CPU: Apple Silicon"); gpuLabel=new QLabel("GPU: Metal"); ramLabel=new QLabel("RAM: 0 GB"); pressureBar=new QProgressBar(); pressureBar->setRange(0,100); pressureBar->setStyleSheet( "QProgressBar{border:1px solid grey;border-radius:5px;text-align:center;}" "QProgressBar::chunk{background-color:#007AFF;}" ); monLayout->addWidget(monTitle); monLayout->addSpacing(10); monLayout->addWidget(cpuLabel); monLayout->addWidget(gpuLabel); monLayout->addWidget(ramLabel); monLayout->addWidget(new QLabel("系统压力:")); monLayout->addWidget(pressureBar); monLayout->addStretch(); topLayout->addWidget(chatDisplay,3); topLayout->addWidget(monitorPanel,1); QHBoxLayout* bottomLayout=new QHBoxLayout(); inputBox=new QLineEdit(); inputBox->setFixedHeight(50); inputBox->setPlaceholderText("请输入 ..."); inputBox->setStyleSheet( "border:2px solid #ccc;" "color:white;" "background-color:#1e1e1e;" "border-radius:10px;" "padding:5px;" "font-size:14px;" ); voiceBtn=new QPushButton("按住说话"); voiceBtn->setFixedSize(120,50); voiceBtn->setStyleSheet( "QPushButton{background-color:#34C759;color:white;border-radius:10px;font-weight:bold;}" "QPushButton:hover{background-color:#248A3D;}" "QPushButton:disabled{background-color:#555;color:#aaa;}" ); sendBtn=new QPushButton("发送"); sendBtn->setFixedSize(100,50); sendBtn->setStyleSheet( "QPushButton{background-color:#007AFF;color:white;border-radius:10px;font-weight:bold;}" "QPushButton:hover{background-color:#0056b3;}" "QPushButton:disabled{background-color:#555;color:#aaa;}" ); bottomLayout->addWidget(inputBox); bottomLayout->addWidget(voiceBtn); bottomLayout->addWidget(sendBtn); mainLayout->addLayout(topLayout); mainLayout->addLayout(bottomLayout); textThread=new QThread(this); textWorker=new LlamaWorker(); textWorker->moveToThread(textThread); visionThread=new QThread(this); visionWorker=new VisionWorker(); visionWorker->moveToThread(visionThread); connect(sendBtn,&QPushButton::clicked,this,&AIWorkerWindow::handleSend); connect(inputBox,&QLineEdit::returnPressed,this,&AIWorkerWindow::handleSend); connect(voiceBtn,&QPushButton::pressed,this,&AIWorkerWindow::startVoiceRecord); connect(voiceBtn,&QPushButton::released,this,&AIWorkerWindow::stopVoiceRecordAndAsk); connect(textWorker,&LlamaWorker::textGenerated,this,&AIWorkerWindow::appendPlain); connect(textWorker,&LlamaWorker::statusChanged,this,[this](QString s){gpuLabel->setText(s);}); connect(textWorker,&LlamaWorker::errorOccurred,this,&AIWorkerWindow::appendError); connect(textWorker,&LlamaWorker::busyChanged,this,&AIWorkerWindow::setBusy); connect(visionWorker,&VisionWorker::textGenerated,this,&AIWorkerWindow::appendPlain); connect(visionWorker,&VisionWorker::statusChanged,this,[this](QString s){gpuLabel->setText(s);}); connect(visionWorker,&VisionWorker::errorOccurred,this,&AIWorkerWindow::appendError); connect(visionWorker,&VisionWorker::busyChanged,this,&AIWorkerWindow::setBusy); connect(textThread,&QThread::finished,textWorker,&QObject::deleteLater); connect(visionThread,&QThread::finished,visionWorker,&QObject::deleteLater); textThread->start(); visionThread->start(); videoTimer=new QTimer(this); connect(videoTimer,&QTimer::timeout,this,&AIWorkerWindow::captureVideoTick); statsTimer=new QTimer(this); connect(statsTimer,&QTimer::timeout,this,&AIWorkerWindow::updateStats); statsTimer->start(1000); setWindowTitle("木头智能科技人工智能"); resize(1000,700); ttsPlayer=new SherpaTTSPlayer(this); asrRecognizer=new SherpaASRRecognizer(this); appendPlain("[系统] 木头智能科技人工智能已启动。\n"); appendPlain("[系统] 文本模型:Gemma4 / llama.cpp。\n"); appendPlain("[系统] 视觉模型:llama-mtmd-cli。\n"); appendPlain("[系统] 语音识别:Sherpa-ONNX Online Paraformer。\n"); appendPlain("[系统] 语音合成:Sherpa-ONNX VITS。\n"); } ~AIWorkerWindow()override{ if(videoTimer&&videoTimer->isActive())videoTimer->stop(); if(statsTimer&&statsTimer->isActive())statsTimer->stop(); if(videoCap.isOpened())videoCap.release(); stopVoiceRecordingOnly(); QThread::msleep(200); textThread->quit(); textThread->wait(); visionThread->quit(); visionThread->wait(); } private slots: void startVoiceRecord(){ if(voiceRecording)return; QMicrophonePermission permission; auto status=qApp->checkPermission(permission); if(status==Qt::PermissionStatus::Granted){ startVoiceRecordImpl(); return; } if(status==Qt::PermissionStatus::Denied){ appendError("麦克风权限被拒绝。请到 macOS 系统设置 -> 隐私与安全性 -> 麦克风,允许当前 App。"); return; } qApp->requestPermission(permission,this,[this](const QPermission& result){ if(result.status()==Qt::PermissionStatus::Granted){ startVoiceRecordImpl(); }else{ appendError("未获得麦克风权限,无法语音输入。"); } }); } void stopVoiceRecordAndAsk(){ if(!voiceRecording)return; // ===================================================== // 已经进入尾音阶段 // 防止重复触发 // ===================================================== if(waitingTailAudio)return; waitingTailAudio=true; appendPlain( "\n[语音] 松开按钮,继续采集尾音 0.8 秒...\n" ); // ===================================================== // 按钮状态 // ===================================================== voiceBtn->setText("尾音处理中..."); // ===================================================== // 这里千万不要 stop // 继续录 800ms // ===================================================== QTimer::singleShot( 800, this, [this](){ appendPlain( "[语音] 尾音采集结束,停止录音...\n" ); // ============================================= // 禁止继续吃声音 // ============================================= forceIgnoreAudio=true; // ============================================= // 停止 timer // ============================================= if(audioPollTimer){ audioPollTimer->stop(); } // ============================================= // 清空 buffer // ============================================= if(audioDevice){ audioDevice->readAll(); } // ============================================= // 真正停止录音 // ============================================= stopVoiceRecordingOnly(); // ============================================= // 等待 CoreAudio 停稳 // ============================================= QThread::msleep(300); appendPlain( "[语音] 开始最终识别...\n" ); QString text; // ============================================= // 最终识别 // ============================================= if(asrRecognizer){ text= asrRecognizer ->finishAndGetText() .trimmed(); } // ============================================= // 保存 wav // ============================================= if(currentVoicePath.size()>0 &&!pcm16MonoForWav.isEmpty()){ bool ok= writeWaveFile16Mono( currentVoicePath, pcm16MonoForWav, recordFormat.sampleRate() ); if(ok){ qDebug() <<"[语音] WAV 保存成功:" <<currentVoicePath; }else{ qDebug() <<"[语音] WAV 保存失败:" <<currentVoicePath; } } // ============================================= // 恢复状态 // ============================================= waitingTailAudio=false; forceIgnoreAudio=false; // ============================================= // 空文本 // ============================================= if(text.isEmpty()){ appendPlain( "[语音] 没有识别到有效文字。\n" ); voiceBtn->setText("按住说话"); return; } // ============================================= // 输出 // ============================================= appendPlain( "[语音识别] "+text+"\n" ); inputBox->setText(text); voiceBtn->setText("按住说话"); // ============================================= // 自动提问 // ============================================= handleSend(); } ); } void handleSend(){ QString text=inputBox->text().trimmed(); if(text.isEmpty())return; chatDisplay->append("\nUser: "+text); inputBox->clear(); if(isVisionDesktopAsk(text)){ QString img=captureDesktop(); if(img.isEmpty()){ appendError("桌面截图失败。macOS 可能没有给当前 App 开启屏幕录制权限。"); return; } QStringList imgs; imgs<<img; QMetaObject::invokeMethod( visionWorker, "analyzeImages", Qt::QueuedConnection, Q_ARG(QString,text), Q_ARG(QStringList,imgs), Q_ARG(int,static_cast<int>(VisionTaskMode::DesktopScreen)) ); return; } if(isVisionVideoAsk(text)){ startVideoCapture(text); return; } if(isVisionCameraAsk(text)){ QString img=captureCameraFrame(); if(img.isEmpty()){ appendError("摄像头抓图失败。请检查摄像头权限或设备是否被其他程序占用。"); return; } QStringList imgs; imgs<<img; QMetaObject::invokeMethod( visionWorker, "analyzeImages", Qt::QueuedConnection, Q_ARG(QString,text), Q_ARG(QStringList,imgs), Q_ARG(int,static_cast<int>(VisionTaskMode::CameraSingle)) ); return; } QMetaObject::invokeMethod( textWorker, "process", Qt::QueuedConnection, Q_ARG(QString,text) ); } void captureVideoTick(){ if(!videoCap.isOpened()){ videoTimer->stop(); appendError("视频采集失败:摄像头未打开"); setBusy(false); return; } cv::Mat frame; for(int i=0;i<5;i++){ videoCap.read(frame); } if(frame.empty()){ videoTimer->stop(); appendError("视频采集失败:空帧"); videoCap.release(); setBusy(false); return; } QString path=todayDir()+QString("/camera_video_%1_%2.png") .arg(QDateTime::currentDateTime().toString("HH_mm_ss_zzz")) .arg(videoFrameIndex); if(!cv::imwrite(path.toStdString(),frame)){ videoTimer->stop(); videoCap.release(); appendError("视频帧保存失败:"+path); setBusy(false); return; } videoFrames<<path; appendPlain(QString("\n[视觉采集] 第 %1 帧已保存:\n").arg(videoFrameIndex+1)); appendImage(path); videoFrameIndex++; if(videoFrameIndex>=5){ videoTimer->stop(); videoCap.release(); QString prompt=videoPrompt+ "。这些图片是摄像头连续5秒的关键帧,请按时间顺序分析人在做什么、动作是否连续、有没有异常。"; QMetaObject::invokeMethod( visionWorker, "analyzeImages", Qt::QueuedConnection, Q_ARG(QString,prompt), Q_ARG(QStringList,videoFrames), Q_ARG(int,static_cast<int>(VisionTaskMode::CameraVideo)) ); } } void updateStats(){ int64_t totalMem=0; size_t size=sizeof(totalMem); sysctlbyname("hw.memsize",&totalMem,&size,nullptr,0); mach_msg_type_number_t count=HOST_VM_INFO64_COUNT; vm_statistics64_data_t vmStats; if(host_statistics64(mach_host_self(),HOST_VM_INFO64,(host_info64_t)&vmStats,&count)==KERN_SUCCESS){ long long pageSize=sysconf(_SC_PAGESIZE); long long usedMem= static_cast<long long>( vmStats.active_count+ vmStats.wire_count+ vmStats.compressor_page_count )*pageSize; double usedGb=usedMem/1024.0/1024.0/1024.0; double totalGb=totalMem/1024.0/1024.0/1024.0; ramLabel->setText( QString("内存占用: %1 / %2 GB") .arg(QString::number(usedGb,'f',1)) .arg(QString::number(totalGb,'f',1)) ); if(totalMem>0){ pressureBar->setValue(static_cast<int>((usedMem*100)/totalMem)); } } } private: void startVoiceRecordImpl(){ if(!asrRecognizer||!asrRecognizer->isReady()){ appendError("ASR 没有初始化成功。请检查 ASR_MODEL_DIR 是否包含 encoder / decoder / tokens。"); return; } if(!asrRecognizer->resetStream()){ appendError("ASR Stream 重置失败。"); return; } stopVoiceRecordingOnly(); pcm16MonoForWav.clear(); currentVoicePath=todayDir()+"/voice_"+QDateTime::currentDateTime().toString("HH_mm_ss_zzz")+".wav"; QAudioDevice inputDev=QMediaDevices::defaultAudioInput(); if(inputDev.isNull()){ appendError("没有找到默认麦克风设备。"); return; } QAudioFormat wanted; wanted.setSampleRate(48000); wanted.setChannelCount(1); wanted.setSampleFormat(QAudioFormat::Int16); if(inputDev.isFormatSupported(wanted)){ recordFormat=wanted; qDebug()<<"[语音] 使用录音格式 48000 Hz mono Int16"; }else{ recordFormat=inputDev.preferredFormat(); qDebug()<<"[语音] 48000 mono Int16 不支持,回退 preferred format"; } qDebug()<<"[语音] ================= 开始录音 ================="; qDebug()<<"[语音] 输入设备 ="<<inputDev.description(); qDebug()<<"[语音] sampleRate ="<<recordFormat.sampleRate(); qDebug()<<"[语音] channels ="<<recordFormat.channelCount(); qDebug()<<"[语音] sampleFormat ="<<formatSampleFormat(recordFormat.sampleFormat()); qDebug()<<"[语音] 保存路径 ="<<currentVoicePath; audioSource=new QAudioSource(inputDev,recordFormat,this); audioSource->setBufferSize(4096*4); audioDevice=audioSource->start(); if(!audioDevice){ appendError("QAudioSource 启动失败,没有拿到音频输入流。"); stopVoiceRecordingOnly(); return; } audioPollTimer=new QTimer(this); audioPollTimer->setInterval(20); connect(audioPollTimer,&QTimer::timeout,this,&AIWorkerWindow::pollVoiceAudio); audioPollTimer->start(); voiceRecording=true; voiceBtn->setText("录音中..."); appendPlain("\n[语音] 开始录音,请说话...\n"); } void pollVoiceAudio(){ // ===================================================== // AI 正在说话 // 彻底禁止录音 // ===================================================== if(ttsSpeaking)return; // ===================================================== // 强制忽略尾部声音 // ===================================================== if(forceIgnoreAudio){ return; } // ===================================================== // 基础判断 // ===================================================== if(!voiceRecording)return; if(!audioDevice)return; if(!audioSource)return; if(!asrRecognizer)return; // ===================================================== // 读取音频 // ===================================================== QByteArray chunk=audioDevice->readAll(); if(chunk.isEmpty())return; // ===================================================== // 如果音量太小 // 直接忽略 // 防止尾音 / 电流声 // ===================================================== const int16_t* pcm16= reinterpret_cast<const int16_t*>(chunk.constData()); int sampleCount= chunk.size()/sizeof(int16_t); if(sampleCount<=0)return; int peak=0; for(int i=0;i<sampleCount;i++){ int v=std::abs((int)pcm16[i]); if(v>peak)peak=v; } // ===================================================== // 小于阈值直接忽略 // ===================================================== if(peak<1200){ return; } // ===================================================== // 转 float // ===================================================== std::vector<float> monoSamples; monoSamples.resize(sampleCount); for(int i=0;i<sampleCount;i++){ monoSamples[i]= (float)pcm16[i]/32768.0f; } // ===================================================== // 保存 wav // ===================================================== for(float v:monoSamples){ float clamped= std::max(-1.0f,std::min(1.0f,v)); int16_t s= static_cast<int16_t>(clamped*32767.0f); pcm16MonoForWav.append( reinterpret_cast<const char*>(&s), sizeof(int16_t) ); } // ===================================================== // 送入 ASR // ===================================================== asrRecognizer->acceptWaveform( recordFormat.sampleRate(), monoSamples ); QString partial= asrRecognizer->partialText(); if(!partial.isEmpty()){ inputBox->setText(partial); } // ===================================================== // 中文调试 // ===================================================== qDebug() <<"[语音调试]" <<"peak ="<<peak <<"samples ="<<sampleCount <<"partial ="<<partial; } void stopVoiceRecordingOnly(){ if(audioPollTimer){ audioPollTimer->stop(); audioPollTimer->deleteLater(); audioPollTimer=nullptr; } if(audioSource){ audioSource->stop(); audioSource->deleteLater(); audioSource=nullptr; } audioDevice=nullptr; voiceRecording=false; } QString formatSampleFormat(QAudioFormat::SampleFormat fmt)const{ switch(fmt){ case QAudioFormat::Unknown:return "Unknown"; case QAudioFormat::UInt8:return "UInt8"; case QAudioFormat::Int16:return "Int16"; case QAudioFormat::Int32:return "Int32"; case QAudioFormat::Float:return "Float"; default:return "Unknown"; } } bool writeWaveFile16Mono(const QString& wavPath,const QByteArray& pcm16Mono,int sampleRate){ QFile f(wavPath); if(!f.open(QIODevice::WriteOnly)){ return false; } const quint16 channels=1; const quint16 bitsPerSample=16; const quint32 byteRate=sampleRate*channels*bitsPerSample/8; const quint16 blockAlign=channels*bitsPerSample/8; const quint32 dataSize=static_cast<quint32>(pcm16Mono.size()); const quint32 riffSize=36+dataSize; QDataStream out(&f); out.setByteOrder(QDataStream::LittleEndian); out.writeRawData("RIFF",4); out<<riffSize; out.writeRawData("WAVE",4); out.writeRawData("fmt ",4); out<<quint32(16); out<<quint16(1); out<<channels; out<<quint32(sampleRate); out<<byteRate; out<<blockAlign; out<<bitsPerSample; out.writeRawData("data",4); out<<dataSize; out.writeRawData(pcm16Mono.constData(),pcm16Mono.size()); f.close(); return true; } void appendImage(const QString& path){ chatDisplay->moveCursor(QTextCursor::End); QTextCursor cursor=chatDisplay->textCursor(); QTextImageFormat img; img.setName(path); img.setWidth(220); cursor.insertImage(img); cursor.insertBlock(); chatDisplay->setTextCursor(cursor); chatDisplay->moveCursor(QTextCursor::End); } void appendPlain(QString t){ chatDisplay->moveCursor(QTextCursor::End); chatDisplay->insertPlainText(t); chatDisplay->moveCursor(QTextCursor::End); if(t.contains("木头智能科技人工智能 >")){ ttsBuffer.clear(); return; } if(t.contains("木头智能科技视觉 >")){ ttsBuffer.clear(); return; } QString trim=t.trimmed(); if(trim.startsWith("[截图]"))return; if(trim.startsWith("[视觉采集]"))return; if(trim.startsWith("[摄像头]"))return; if(trim.startsWith("[语音]"))return; if(trim.startsWith("[语音识别]"))return; if(trim.startsWith("[系统]"))return; if(trim.startsWith("[错误]"))return; ttsBuffer+=t; bool endSentence=false; if(t.contains("。"))endSentence=true; if(t.contains("!"))endSentence=true; if(t.contains("?"))endSentence=true; if(t.contains("\n"))endSentence=true; if(endSentence){ QString speakText=ttsBuffer.trimmed(); ttsBuffer.clear(); speakText.remove(QRegularExpression("\\[[^\\]]*\\]")); speakText=speakText.trimmed(); if(speakText.size()>1&&ttsPlayer){ ttsSpeaking=true; ttsPlayer->speak(speakText); QTimer::singleShot( qMax( 3000, speakText.size()*180 ), this, [this](){ ttsSpeaking=false; qDebug() <<"[TTS] 播放结束,恢复麦克风"; } ); } } } void appendError(QString e){ chatDisplay->append("\n[错误] "+e); gpuLabel->setText(e); } void setBusy(bool busy){ sendBtn->setEnabled(!busy); inputBox->setEnabled(!busy); voiceBtn->setEnabled(!busy); } QString captureDesktop(){ QScreen* screen=QGuiApplication::primaryScreen(); if(!screen)return ""; QPixmap pix=screen->grabWindow(0); if(pix.isNull())return ""; QString path=todayDir()+"/desktop_"+QDateTime::currentDateTime().toString("HH_mm_ss_zzz")+".png"; if(!pix.save(path,"PNG"))return ""; appendPlain("\n[截图] 桌面截图已保存:\n"); appendImage(path); return path; } QString captureCameraFrame(){ cv::VideoCapture cap(0); if(!cap.isOpened())return ""; cap.set(cv::CAP_PROP_FRAME_WIDTH,1280); cap.set(cv::CAP_PROP_FRAME_HEIGHT,720); cap.set(cv::CAP_PROP_FPS,30); cv::Mat frame; for(int i=0;i<10;i++){ cap.read(frame); } cap.release(); if(frame.empty())return ""; QString path=todayDir()+"/camera_"+QDateTime::currentDateTime().toString("HH_mm_ss_zzz")+".png"; if(!cv::imwrite(path.toStdString(),frame))return ""; appendPlain("\n[摄像头] 单帧已保存:\n"); appendImage(path); return path; } void startVideoCapture(const QString& prompt){ if(videoCap.isOpened())videoCap.release(); videoFrames.clear(); videoFrameIndex=0; videoPrompt=prompt; if(!videoCap.open(0)){ appendError("摄像头打开失败,无法录制5秒。"); return; } videoCap.set(cv::CAP_PROP_FRAME_WIDTH,1280); videoCap.set(cv::CAP_PROP_FRAME_HEIGHT,720); videoCap.set(cv::CAP_PROP_FPS,30); setBusy(true); appendPlain("\n[视觉采集] 开始采集 5 秒关键帧,每秒保存 1 张...\n"); videoTimer->start(1000); } private: QTextEdit* chatDisplay=nullptr; QLineEdit* inputBox=nullptr; QPushButton* voiceBtn=nullptr; QLabel* cpuLabel=nullptr; QLabel* gpuLabel=nullptr; QLabel* ramLabel=nullptr; QProgressBar* pressureBar=nullptr; QPushButton* sendBtn=nullptr; QThread* textThread=nullptr; LlamaWorker* textWorker=nullptr; QThread* visionThread=nullptr; VisionWorker* visionWorker=nullptr; QTimer* videoTimer=nullptr; QTimer* statsTimer=nullptr; cv::VideoCapture videoCap; QStringList videoFrames; int videoFrameIndex=0; QString videoPrompt; SherpaTTSPlayer* ttsPlayer=nullptr; SherpaASRRecognizer* asrRecognizer=nullptr; QAudioSource* audioSource=nullptr; QIODevice* audioDevice=nullptr; QTimer* audioPollTimer=nullptr; QAudioFormat recordFormat; bool voiceRecording=false; QString currentVoicePath; QByteArray pcm16MonoForWav; bool ttsSpeaking=false; bool forceIgnoreAudio=false; bool waitingTailAudio=false; QString ttsBuffer; }; #include "main.moc" int main(int argc,char* argv[]){ QApplication app(argc,argv); AIWorkerWindow window; window.show(); return app.exec(); }
📸 运行视频
🧨 结尾一句话
API 收费的时代结束了!本地化 AI 才是未来!龙虾人工智能,完全开源,人人可用,让每个机器人都拥有真正的眼睛和大脑!