Files
zeroclaw/src/tools/browser.rs
fettpl 6776373e8e fix: constant_time_eq no longer leaks secret length via early return
Remove the early return on length mismatch that leaked length
information via timing. Now iterates over max(a.len(), b.len()),
padding the shorter input with zeros, and checks both byte-level
differences and length equality at the end.

Closes #57

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 00:01:23 +01:00

841 lines
27 KiB
Rust

//! Browser automation tool using Vercel's agent-browser CLI
//!
//! This tool provides AI-optimized web browsing capabilities via the agent-browser CLI.
//! It supports semantic element selection, accessibility snapshots, and JSON output
//! for efficient LLM integration.
use super::traits::{Tool, ToolResult};
use crate::security::SecurityPolicy;
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use std::process::Stdio;
use std::sync::Arc;
use tokio::process::Command;
use tracing::debug;
/// Browser automation tool using agent-browser CLI
pub struct BrowserTool {
security: Arc<SecurityPolicy>,
allowed_domains: Vec<String>,
session_name: Option<String>,
}
/// Response from agent-browser --json commands
#[derive(Debug, Deserialize)]
struct AgentBrowserResponse {
success: bool,
data: Option<Value>,
error: Option<String>,
}
/// Supported browser actions
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum BrowserAction {
/// Navigate to a URL
Open { url: String },
/// Get accessibility snapshot with refs
Snapshot {
#[serde(default)]
interactive_only: bool,
#[serde(default)]
compact: bool,
#[serde(default)]
depth: Option<u32>,
},
/// Click an element by ref or selector
Click { selector: String },
/// Fill a form field
Fill { selector: String, value: String },
/// Type text into focused element
Type { selector: String, text: String },
/// Get text content of element
GetText { selector: String },
/// Get page title
GetTitle,
/// Get current URL
GetUrl,
/// Take screenshot
Screenshot {
#[serde(default)]
path: Option<String>,
#[serde(default)]
full_page: bool,
},
/// Wait for element or time
Wait {
#[serde(default)]
selector: Option<String>,
#[serde(default)]
ms: Option<u64>,
#[serde(default)]
text: Option<String>,
},
/// Press a key
Press { key: String },
/// Hover over element
Hover { selector: String },
/// Scroll page
Scroll {
direction: String,
#[serde(default)]
pixels: Option<u32>,
},
/// Check if element is visible
IsVisible { selector: String },
/// Close browser
Close,
/// Find element by semantic locator
Find {
by: String, // role, text, label, placeholder, testid
value: String,
action: String, // click, fill, text, hover
#[serde(default)]
fill_value: Option<String>,
},
}
impl BrowserTool {
pub fn new(
security: Arc<SecurityPolicy>,
allowed_domains: Vec<String>,
session_name: Option<String>,
) -> Self {
Self {
security,
allowed_domains: normalize_domains(allowed_domains),
session_name,
}
}
/// Check if agent-browser CLI is available
pub async fn is_available() -> bool {
Command::new("agent-browser")
.arg("--version")
.stdout(Stdio::null())
.stderr(Stdio::null())
.status()
.await
.map(|s| s.success())
.unwrap_or(false)
}
/// Validate URL against allowlist
fn validate_url(&self, url: &str) -> anyhow::Result<()> {
let url = url.trim();
if url.is_empty() {
anyhow::bail!("URL cannot be empty");
}
// Allow file:// URLs for local testing
if url.starts_with("file://") {
return Ok(());
}
if !url.starts_with("https://") && !url.starts_with("http://") {
anyhow::bail!("Only http:// and https:// URLs are allowed");
}
if self.allowed_domains.is_empty() {
anyhow::bail!(
"Browser tool enabled but no allowed_domains configured. \
Add [browser].allowed_domains in config.toml"
);
}
let host = extract_host(url)?;
if is_private_host(&host) {
anyhow::bail!("Blocked local/private host: {host}");
}
if !host_matches_allowlist(&host, &self.allowed_domains) {
anyhow::bail!("Host '{host}' not in browser.allowed_domains");
}
Ok(())
}
/// Execute an agent-browser command
async fn run_command(&self, args: &[&str]) -> anyhow::Result<AgentBrowserResponse> {
let mut cmd = Command::new("agent-browser");
// Add session if configured
if let Some(ref session) = self.session_name {
cmd.arg("--session").arg(session);
}
// Add --json for machine-readable output
cmd.args(args).arg("--json");
debug!("Running: agent-browser {} --json", args.join(" "));
let output = cmd
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.output()
.await?;
let stdout = String::from_utf8_lossy(&output.stdout);
let stderr = String::from_utf8_lossy(&output.stderr);
if !stderr.is_empty() {
debug!("agent-browser stderr: {}", stderr);
}
// Parse JSON response
if let Ok(resp) = serde_json::from_str::<AgentBrowserResponse>(&stdout) {
return Ok(resp);
}
// Fallback for non-JSON output
if output.status.success() {
Ok(AgentBrowserResponse {
success: true,
data: Some(json!({ "output": stdout.trim() })),
error: None,
})
} else {
Ok(AgentBrowserResponse {
success: false,
data: None,
error: Some(stderr.trim().to_string()),
})
}
}
/// Execute a browser action
#[allow(clippy::too_many_lines)]
async fn execute_action(&self, action: BrowserAction) -> anyhow::Result<ToolResult> {
match action {
BrowserAction::Open { url } => {
self.validate_url(&url)?;
let resp = self.run_command(&["open", &url]).await?;
self.to_result(resp)
}
BrowserAction::Snapshot {
interactive_only,
compact,
depth,
} => {
let mut args = vec!["snapshot"];
if interactive_only {
args.push("-i");
}
if compact {
args.push("-c");
}
let depth_str;
if let Some(d) = depth {
args.push("-d");
depth_str = d.to_string();
args.push(&depth_str);
}
let resp = self.run_command(&args).await?;
self.to_result(resp)
}
BrowserAction::Click { selector } => {
let resp = self.run_command(&["click", &selector]).await?;
self.to_result(resp)
}
BrowserAction::Fill { selector, value } => {
let resp = self.run_command(&["fill", &selector, &value]).await?;
self.to_result(resp)
}
BrowserAction::Type { selector, text } => {
let resp = self.run_command(&["type", &selector, &text]).await?;
self.to_result(resp)
}
BrowserAction::GetText { selector } => {
let resp = self.run_command(&["get", "text", &selector]).await?;
self.to_result(resp)
}
BrowserAction::GetTitle => {
let resp = self.run_command(&["get", "title"]).await?;
self.to_result(resp)
}
BrowserAction::GetUrl => {
let resp = self.run_command(&["get", "url"]).await?;
self.to_result(resp)
}
BrowserAction::Screenshot { path, full_page } => {
let mut args = vec!["screenshot"];
if let Some(ref p) = path {
args.push(p);
}
if full_page {
args.push("--full");
}
let resp = self.run_command(&args).await?;
self.to_result(resp)
}
BrowserAction::Wait { selector, ms, text } => {
let mut args = vec!["wait"];
let ms_str;
if let Some(sel) = selector.as_ref() {
args.push(sel);
} else if let Some(millis) = ms {
ms_str = millis.to_string();
args.push(&ms_str);
} else if let Some(ref t) = text {
args.push("--text");
args.push(t);
}
let resp = self.run_command(&args).await?;
self.to_result(resp)
}
BrowserAction::Press { key } => {
let resp = self.run_command(&["press", &key]).await?;
self.to_result(resp)
}
BrowserAction::Hover { selector } => {
let resp = self.run_command(&["hover", &selector]).await?;
self.to_result(resp)
}
BrowserAction::Scroll { direction, pixels } => {
let mut args = vec!["scroll", &direction];
let px_str;
if let Some(px) = pixels {
px_str = px.to_string();
args.push(&px_str);
}
let resp = self.run_command(&args).await?;
self.to_result(resp)
}
BrowserAction::IsVisible { selector } => {
let resp = self.run_command(&["is", "visible", &selector]).await?;
self.to_result(resp)
}
BrowserAction::Close => {
let resp = self.run_command(&["close"]).await?;
self.to_result(resp)
}
BrowserAction::Find {
by,
value,
action,
fill_value,
} => {
let mut args = vec!["find", &by, &value, &action];
if let Some(ref fv) = fill_value {
args.push(fv);
}
let resp = self.run_command(&args).await?;
self.to_result(resp)
}
}
}
#[allow(clippy::unnecessary_wraps, clippy::unused_self)]
fn to_result(&self, resp: AgentBrowserResponse) -> anyhow::Result<ToolResult> {
if resp.success {
let output = resp
.data
.map(|d| serde_json::to_string_pretty(&d).unwrap_or_default())
.unwrap_or_default();
Ok(ToolResult {
success: true,
output,
error: None,
})
} else {
Ok(ToolResult {
success: false,
output: String::new(),
error: resp.error,
})
}
}
}
#[async_trait]
#[allow(clippy::too_many_lines)]
impl Tool for BrowserTool {
fn name(&self) -> &str {
"browser"
}
fn description(&self) -> &str {
"Web browser automation using agent-browser. Supports navigation, clicking, \
filling forms, taking screenshots, and getting accessibility snapshots with refs. \
Use 'snapshot' to get interactive elements with refs (@e1, @e2), then use refs \
for precise element interaction. Allowed domains only."
}
fn parameters_schema(&self) -> Value {
json!({
"type": "object",
"properties": {
"action": {
"type": "string",
"enum": ["open", "snapshot", "click", "fill", "type", "get_text",
"get_title", "get_url", "screenshot", "wait", "press",
"hover", "scroll", "is_visible", "close", "find"],
"description": "Browser action to perform"
},
"url": {
"type": "string",
"description": "URL to navigate to (for 'open' action)"
},
"selector": {
"type": "string",
"description": "Element selector: @ref (e.g. @e1), CSS (#id, .class), or text=..."
},
"value": {
"type": "string",
"description": "Value to fill or type"
},
"text": {
"type": "string",
"description": "Text to type or wait for"
},
"key": {
"type": "string",
"description": "Key to press (Enter, Tab, Escape, etc.)"
},
"direction": {
"type": "string",
"enum": ["up", "down", "left", "right"],
"description": "Scroll direction"
},
"pixels": {
"type": "integer",
"description": "Pixels to scroll"
},
"interactive_only": {
"type": "boolean",
"description": "For snapshot: only show interactive elements"
},
"compact": {
"type": "boolean",
"description": "For snapshot: remove empty structural elements"
},
"depth": {
"type": "integer",
"description": "For snapshot: limit tree depth"
},
"full_page": {
"type": "boolean",
"description": "For screenshot: capture full page"
},
"path": {
"type": "string",
"description": "File path for screenshot"
},
"ms": {
"type": "integer",
"description": "Milliseconds to wait"
},
"by": {
"type": "string",
"enum": ["role", "text", "label", "placeholder", "testid"],
"description": "For find: semantic locator type"
},
"find_action": {
"type": "string",
"enum": ["click", "fill", "text", "hover", "check"],
"description": "For find: action to perform on found element"
},
"fill_value": {
"type": "string",
"description": "For find with fill action: value to fill"
}
},
"required": ["action"]
})
}
async fn execute(&self, args: Value) -> anyhow::Result<ToolResult> {
// Security checks
if !self.security.can_act() {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some("Action blocked: autonomy is read-only".into()),
});
}
if !self.security.record_action() {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some("Action blocked: rate limit exceeded".into()),
});
}
// Check if agent-browser is available
if !Self::is_available().await {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(
"agent-browser CLI not found. Install with: npm install -g agent-browser"
.into(),
),
});
}
// Parse action from args
let action_str = args
.get("action")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'action' parameter"))?;
let action = match action_str {
"open" => {
let url = args
.get("url")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'url' for open action"))?;
BrowserAction::Open { url: url.into() }
}
"snapshot" => BrowserAction::Snapshot {
interactive_only: args
.get("interactive_only")
.and_then(serde_json::Value::as_bool)
.unwrap_or(true), // Default to interactive for AI
compact: args
.get("compact")
.and_then(serde_json::Value::as_bool)
.unwrap_or(true),
depth: args
.get("depth")
.and_then(serde_json::Value::as_u64)
.map(|d| u32::try_from(d).unwrap_or(u32::MAX)),
},
"click" => {
let selector = args
.get("selector")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'selector' for click"))?;
BrowserAction::Click {
selector: selector.into(),
}
}
"fill" => {
let selector = args
.get("selector")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'selector' for fill"))?;
let value = args
.get("value")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'value' for fill"))?;
BrowserAction::Fill {
selector: selector.into(),
value: value.into(),
}
}
"type" => {
let selector = args
.get("selector")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'selector' for type"))?;
let text = args
.get("text")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'text' for type"))?;
BrowserAction::Type {
selector: selector.into(),
text: text.into(),
}
}
"get_text" => {
let selector = args
.get("selector")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'selector' for get_text"))?;
BrowserAction::GetText {
selector: selector.into(),
}
}
"get_title" => BrowserAction::GetTitle,
"get_url" => BrowserAction::GetUrl,
"screenshot" => BrowserAction::Screenshot {
path: args.get("path").and_then(|v| v.as_str()).map(String::from),
full_page: args
.get("full_page")
.and_then(serde_json::Value::as_bool)
.unwrap_or(false),
},
"wait" => BrowserAction::Wait {
selector: args
.get("selector")
.and_then(|v| v.as_str())
.map(String::from),
ms: args.get("ms").and_then(serde_json::Value::as_u64),
text: args.get("text").and_then(|v| v.as_str()).map(String::from),
},
"press" => {
let key = args
.get("key")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'key' for press"))?;
BrowserAction::Press { key: key.into() }
}
"hover" => {
let selector = args
.get("selector")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'selector' for hover"))?;
BrowserAction::Hover {
selector: selector.into(),
}
}
"scroll" => {
let direction = args
.get("direction")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'direction' for scroll"))?;
BrowserAction::Scroll {
direction: direction.into(),
pixels: args
.get("pixels")
.and_then(serde_json::Value::as_u64)
.map(|p| u32::try_from(p).unwrap_or(u32::MAX)),
}
}
"is_visible" => {
let selector = args
.get("selector")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'selector' for is_visible"))?;
BrowserAction::IsVisible {
selector: selector.into(),
}
}
"close" => BrowserAction::Close,
"find" => {
let by = args
.get("by")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'by' for find"))?;
let value = args
.get("value")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'value' for find"))?;
let action = args
.get("find_action")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'find_action' for find"))?;
BrowserAction::Find {
by: by.into(),
value: value.into(),
action: action.into(),
fill_value: args
.get("fill_value")
.and_then(|v| v.as_str())
.map(String::from),
}
}
_ => {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(format!("Unknown action: {action_str}")),
});
}
};
self.execute_action(action).await
}
}
// ── Helper functions ─────────────────────────────────────────────
fn normalize_domains(domains: Vec<String>) -> Vec<String> {
domains
.into_iter()
.map(|d| d.trim().to_lowercase())
.filter(|d| !d.is_empty())
.collect()
}
fn extract_host(url_str: &str) -> anyhow::Result<String> {
// Simple host extraction without url crate
let url = url_str.trim();
let without_scheme = url
.strip_prefix("https://")
.or_else(|| url.strip_prefix("http://"))
.or_else(|| url.strip_prefix("file://"))
.unwrap_or(url);
// Extract host (before first / or :)
let host = without_scheme
.split('/')
.next()
.unwrap_or(without_scheme)
.split(':')
.next()
.unwrap_or(without_scheme);
if host.is_empty() {
anyhow::bail!("Invalid URL: no host");
}
Ok(host.to_lowercase())
}
fn is_private_host(host: &str) -> bool {
let private_patterns = [
"localhost",
"127.",
"10.",
"192.168.",
"172.16.",
"172.17.",
"172.18.",
"172.19.",
"172.20.",
"172.21.",
"172.22.",
"172.23.",
"172.24.",
"172.25.",
"172.26.",
"172.27.",
"172.28.",
"172.29.",
"172.30.",
"172.31.",
"0.0.0.0",
"::1",
"[::1]",
];
private_patterns
.iter()
.any(|p| host.starts_with(p) || host == *p)
}
fn host_matches_allowlist(host: &str, allowed: &[String]) -> bool {
allowed.iter().any(|pattern| {
if pattern == "*" {
return true;
}
if pattern.starts_with("*.") {
// Wildcard subdomain match
let suffix = &pattern[1..]; // ".example.com"
host.ends_with(suffix) || host == &pattern[2..]
} else {
// Exact match or subdomain
host == pattern || host.ends_with(&format!(".{pattern}"))
}
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn normalize_domains_works() {
let domains = vec![
" Example.COM ".into(),
"docs.example.com".into(),
"".into(),
];
let normalized = normalize_domains(domains);
assert_eq!(normalized, vec!["example.com", "docs.example.com"]);
}
#[test]
fn extract_host_works() {
assert_eq!(
extract_host("https://example.com/path").unwrap(),
"example.com"
);
assert_eq!(
extract_host("https://Sub.Example.COM:8080/").unwrap(),
"sub.example.com"
);
}
#[test]
fn is_private_host_detects_local() {
assert!(is_private_host("localhost"));
assert!(is_private_host("127.0.0.1"));
assert!(is_private_host("192.168.1.1"));
assert!(is_private_host("10.0.0.1"));
assert!(!is_private_host("example.com"));
assert!(!is_private_host("google.com"));
}
#[test]
fn host_matches_allowlist_exact() {
let allowed = vec!["example.com".into()];
assert!(host_matches_allowlist("example.com", &allowed));
assert!(host_matches_allowlist("sub.example.com", &allowed));
assert!(!host_matches_allowlist("notexample.com", &allowed));
}
#[test]
fn host_matches_allowlist_wildcard() {
let allowed = vec!["*.example.com".into()];
assert!(host_matches_allowlist("sub.example.com", &allowed));
assert!(host_matches_allowlist("example.com", &allowed));
assert!(!host_matches_allowlist("other.com", &allowed));
}
#[test]
fn host_matches_allowlist_star() {
let allowed = vec!["*".into()];
assert!(host_matches_allowlist("anything.com", &allowed));
assert!(host_matches_allowlist("example.org", &allowed));
}
#[test]
fn browser_tool_name() {
let security = Arc::new(SecurityPolicy::default());
let tool = BrowserTool::new(security, vec!["example.com".into()], None);
assert_eq!(tool.name(), "browser");
}
#[test]
fn browser_tool_validates_url() {
let security = Arc::new(SecurityPolicy::default());
let tool = BrowserTool::new(security, vec!["example.com".into()], None);
// Valid
assert!(tool.validate_url("https://example.com").is_ok());
assert!(tool.validate_url("https://sub.example.com/path").is_ok());
// Invalid - not in allowlist
assert!(tool.validate_url("https://other.com").is_err());
// Invalid - private host
assert!(tool.validate_url("https://localhost").is_err());
assert!(tool.validate_url("https://127.0.0.1").is_err());
// Invalid - not https
assert!(tool.validate_url("ftp://example.com").is_err());
// File URLs allowed
assert!(tool.validate_url("file:///tmp/test.html").is_ok());
}
#[test]
fn browser_tool_empty_allowlist_blocks() {
let security = Arc::new(SecurityPolicy::default());
let tool = BrowserTool::new(security, vec![], None);
assert!(tool.validate_url("https://example.com").is_err());
}
}