feat: add homelab configuration and transcription support

- Add host-based delegate agents (ubuntu, grizzley, truenas, panda, pve) - Add functional delegates (coder, reasoner, research, quick) - Add NanoGPT provider with minimax-m2.5 model - Add transcribe tool using faster-whisper - Update TelegramChannel with workspace_dir - Configure Z.AI as default provider with glm-5
2026-02-18 01:55:33 +00:00
parent a2f29838b4
commit 5d9c716a72
6 changed files with 300 additions and 3 deletions
--- a/src/channels/mod.rs
+++ b/src/channels/mod.rs
@@ -633,6 +633,7 @@ pub async fn doctor_channels(config: Config) -> Result<()> {
            Arc::new(TelegramChannel::new(
                tg.bot_token.clone(),
                tg.allowed_users.clone(),
                Some(config.workspace_dir.clone()),
            )),
        ));
    }
@@ -923,6 +924,7 @@ pub async fn start_channels(config: Config) -> Result<()> {
        channels.push(Arc::new(TelegramChannel::new(
            tg.bot_token.clone(),
            tg.allowed_users.clone(),
                Some(config.workspace_dir.clone()),
        )));
    }
--- a/src/channels/telegram.rs
+++ b/src/channels/telegram.rs
@@ -179,18 +179,19 @@ fn parse_attachment_markers(message: &str) -> (String, Vec<TelegramAttachment>)
 }
 /// Telegram channel — long-polls the Bot API for updates
-pub struct TelegramChannel {
+pub struct TelegramChannel { workspace_dir: std::path::PathBuf,
    bot_token: String,
    allowed_users: Vec<String>,
    client: reqwest::Client,
 }
 impl TelegramChannel {
-    pub fn new(bot_token: String, allowed_users: Vec<String>) -> Self {
+    pub fn new(bot_token: String, allowed_users: Vec<String>, workspace_dir: Option<std::path::PathBuf>) -> Self {
        Self {
            bot_token,
            allowed_users,
            client: reqwest::Client::new(),
            workspace_dir: workspace_dir.unwrap_or_else(|| std::path::PathBuf::from("/tmp")),
        }
    }
--- a/src/cron/scheduler.rs
+++ b/src/cron/scheduler.rs
@@ -231,7 +231,7 @@ async fn deliver_if_configured(config: &Config, job: &CronJob, output: &str) ->
                .telegram
                .as_ref()
                .ok_or_else(|| anyhow::anyhow!("telegram channel not configured"))?;
-            let channel = TelegramChannel::new(tg.bot_token.clone(), tg.allowed_users.clone());
+            let channel = TelegramChannel::new(tg.bot_token.clone(), tg.allowed_users.clone(), Some(config.workspace_dir.clone()));
            channel.send(output, target).await?;
        }
        "discord" => {
--- a/src/providers/mod.rs
+++ b/src/providers/mod.rs
@@ -135,6 +135,7 @@ fn resolve_provider_credential(name: &str, credential_override: Option<&str>) ->
        "zai" | "z.ai" => vec!["ZAI_API_KEY"],
        "nvidia" | "nvidia-nim" | "build.nvidia.com" => vec!["NVIDIA_API_KEY"],
        "synthetic" => vec!["SYNTHETIC_API_KEY"],
        "nanogpt" | "nano-gpt" => vec!["NANO_GPT_API_KEY"],
        "opencode" | "opencode-zen" => vec!["OPENCODE_API_KEY"],
        "vercel" | "vercel-ai" => vec!["VERCEL_API_KEY"],
        "cloudflare" | "cloudflare-ai" => vec!["CLOUDFLARE_API_KEY"],
@@ -246,6 +247,12 @@ pub fn create_provider_with_url(
            key,
            AuthStyle::Bearer,
        ))),
        "nanogpt" | "nano-gpt" => Ok(Box::new(OpenAiCompatibleProvider::new(
            "NanoGPT",
            "https://nano-gpt.com/api/v1",
            key,
            AuthStyle::Bearer,
        ))),
        "bedrock" | "aws-bedrock" => Ok(Box::new(OpenAiCompatibleProvider::new(
            "Amazon Bedrock",
            "https://bedrock-runtime.us-east-1.amazonaws.com",
--- a/src/tools/mod.rs
+++ b/src/tools/mod.rs
@@ -25,6 +25,7 @@ pub mod schema;
 pub mod screenshot;
 pub mod shell;
 pub mod traits;
 pub mod transcribe;
 pub use browser::{BrowserTool, ComputerUseConfig};
 pub use browser_open::BrowserOpenTool;
@@ -53,6 +54,7 @@ pub use schema::{CleaningStrategy, SchemaCleanr};
 pub use screenshot::ScreenshotTool;
 pub use shell::ShellTool;
 pub use traits::Tool;
 pub use transcribe::TranscribeTool;
 #[allow(unused_imports)]
 pub use traits::{ToolResult, ToolSpec};
@@ -191,6 +193,8 @@ pub fn all_tools_with_runtime(
    tools.push(Box::new(ScreenshotTool::new(security.clone())));
    tools.push(Box::new(ImageInfoTool::new(security.clone())));
    tools.push(Box::new(TranscribeTool::new(security.clone(), None, None)));
    if let Some(key) = composio_key {
        if !key.is_empty() {
            tools.push(Box::new(ComposioTool::new(key, composio_entity_id)));
--- a/src/tools/transcribe.rs
+++ b/src/tools/transcribe.rs
@@ -0,0 +1,283 @@
 use super::traits::{Tool, ToolResult};
 use crate::security::SecurityPolicy;
 use async_trait::async_trait;
 use serde_json::json;
 use std::path::Path;
 use std::sync::Arc;
 use tokio::process::Command;
 const MAX_AUDIO_BYTES: u64 = 104_857_600;
 const SUPPORTED_FORMATS: &[&str] = &["mp3", "wav", "m4a", "flac", "ogg", "webm", "mp4", "mpeg", "mpga"];
 pub struct TranscribeTool {
    security: Arc<SecurityPolicy>,
    model: String,
    device: String,
 }
 impl TranscribeTool {
    pub fn new(security: Arc<SecurityPolicy>, model: Option<String>, device: Option<String>) -> Self {
        Self {
            security,
            model: model.unwrap_or_else(|| "base".to_string()),
            device: device.unwrap_or_else(|| "cpu".to_string()),
        }
    }
    fn is_supported_format(path: &Path) -> bool {
        path.extension()
            .and_then(|ext| ext.to_str())
            .map(|ext| SUPPORTED_FORMATS.contains(&ext.to_lowercase().as_str()))
            .unwrap_or(false)
    }
    fn transcription_script() -> &'static str {
        r#"
 import sys
 import json
 def transcribe(audio_path, model_size, device):
    from faster_whisper import WhisperModel
    model = WhisperModel(model_size, device=device, compute_type="int8")
    segments, info = model.transcribe(audio_path, beam_size=5)
    transcription = []
    for segment in segments:
        transcription.append({
            "start": round(segment.start, 2),
            "end": round(segment.end, 2),
            "text": segment.text.strip()
        })
    result = {
        "language": info.language,
        "language_probability": round(info.language_probability, 2),
        "duration": round(info.duration, 2),
        "segments": transcription,
        "text": " ".join(s["text"] for s in transcription)
    }
    print(json.dumps(result))
 if __name__ == "__main__":
    if len(sys.argv) != 4:
        print(json.dumps({"error": "Usage: script.py <audio_path> <model> <device>"}))
        sys.exit(1)
    transcribe(sys.argv[1], sys.argv[2], sys.argv[3])
 "#
    }
 }
 #[async_trait]
 impl Tool for TranscribeTool {
    fn name(&self) -> &str {
        "transcribe"
    }
    fn description(&self) -> &str {
        "Transcribe audio files to text using faster-whisper. \
         Supports mp3, wav, m4a, flac, ogg, webm, and other common audio formats. \
         Returns the transcription with timestamps and detected language."
    }
    fn parameters_schema(&self) -> serde_json::Value {
        json!({
            "type": "object",
            "additionalProperties": false,
            "properties": {
                "path": {
                    "type": "string",
                    "description": "Path to the audio file to transcribe"
                },
                "model": {
                    "type": "string",
                    "enum": ["tiny", "base", "small", "medium", "large-v2", "large-v3"],
                    "description": "Whisper model size (default: base). Larger models are more accurate but slower."
                },
                "language": {
                    "type": "string",
                    "description": "Hint for the spoken language (e.g., 'en', 'es', 'zh'). Optional."
                }
            },
            "required": ["path"]
        })
    }
    async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
        let path_str = args
            .get("path")
            .and_then(|v| v.as_str())
            .ok_or_else(|| anyhow::anyhow!("Missing 'path' parameter"))?;
        if self.security.is_rate_limited() {
            return Ok(ToolResult {
                success: false,
                output: String::new(),
                error: Some("Rate limit exceeded: too many actions in the last hour".into()),
            });
        }
        if !self.security.is_path_allowed(path_str) {
            return Ok(ToolResult {
                success: false,
                output: String::new(),
                error: Some(format!("Path not allowed by security policy: {}", path_str)),
            });
        }
        if !self.security.record_action() {
            return Ok(ToolResult {
                success: false,
                output: String::new(),
                error: Some("Rate limit exceeded: action budget exhausted".into()),
            });
        }
        let path = Path::new(path_str);
        let full_path = self.security.workspace_dir.join(path);
        if !full_path.exists() {
            return Ok(ToolResult {
                success: false,
                output: String::new(),
                error: Some(format!("File not found: {}", path_str)),
            });
        }
        let metadata = match std::fs::metadata(&full_path) {
            Ok(m) => m,
            Err(e) => {
                return Ok(ToolResult {
                    success: false,
                    output: String::new(),
                    error: Some(format!("Cannot read file metadata: {}", e)),
                });
            }
        };
        if metadata.len() > MAX_AUDIO_BYTES {
            return Ok(ToolResult {
                success: false,
                output: String::new(),
                error: Some(format!(
                    "File too large: {} bytes (max: {} bytes)",
                    metadata.len(),
                    MAX_AUDIO_BYTES
                )),
            });
        }
        if !Self::is_supported_format(&full_path) {
            let ext = full_path
                .extension()
                .and_then(|e| e.to_str())
                .unwrap_or("unknown");
            return Ok(ToolResult {
                success: false,
                output: String::new(),
                error: Some(format!(
                    "Unsupported audio format: {}. Supported: {}",
                    ext,
                    SUPPORTED_FORMATS.join(", ")
                )),
            });
        }
        let model = args
            .get("model")
            .and_then(|v| v.as_str())
            .unwrap_or(&self.model);
        let script = Self::transcription_script();
        let output = Command::new("python3")
            .arg("-c")
            .arg(script)
            .arg(&full_path)
            .arg(model)
            .arg(&self.device)
            .output()
            .await;
        match output {
            Ok(result) => {
                if result.status.success() {
                    let stdout = String::from_utf8_lossy(&result.stdout);
                    match serde_json::from_str::<serde_json::Value>(&stdout) {
                        Ok(json) => {
                            let text = json
                                .get("text")
                                .and_then(|t| t.as_str())
                                .unwrap_or(&stdout);
                            let language = json
                                .get("language")
                                .and_then(|l| l.as_str())
                                .unwrap_or("unknown");
                            let duration = json
                                .get("duration")
                                .and_then(|d| d.as_f64())
                                .unwrap_or(0.0);
                            let duration_f = duration;
                            let mut out = format!(
                                "**Transcription** ({:.1}, language: {})\n\n{}\n",
                                duration_f, language, text
                            );
                            if let Some(segments) = json.get("segments").and_then(|s| s.as_array())
                            {
                                if segments.len() > 1 {
                                    out.push_str("\n**Segments:**\n");
                                    for seg in segments.iter().take(20) {
                                        if let (Some(start), Some(end), Some(seg_text)) = (
                                            seg.get("start").and_then(|v| v.as_f64()),
                                            seg.get("end").and_then(|v| v.as_f64()),
                                            seg.get("text").and_then(|v| v.as_str()),
                                        ) {
                                            let start_f = start;
                                            let end_f = end;
                                            out.push_str(&format!(
                                                "[{:05.1} - {:05.1}] {}\n",
                                                start_f, end_f, seg_text
                                            ));
                                        }
                                    }
                                    if segments.len() > 20 {
                                        out.push_str(&format!(
                                            "... and {} more segments\n",
                                            segments.len() - 20
                                        ));
                                    }
                                }
                            }
                            Ok(ToolResult {
                                success: true,
                                output: out,
                                error: None,
                            })
                        }
                        Err(_) => Ok(ToolResult {
                            success: true,
                            output: stdout.to_string(),
                            error: None,
                        }),
                    }
                } else {
                    let stderr = String::from_utf8_lossy(&result.stderr);
                    Ok(ToolResult {
                        success: false,
                        output: String::new(),
                        error: Some(format!("Transcription failed: {}", stderr.trim())),
                    })
                }
            }
            Err(e) => Ok(ToolResult {
                success: false,
                output: String::new(),
                error: Some(format!("Failed to run transcription: {}", e)),
            }),
        }
    }
 }