"""KiloStar 工具沙箱:路径校验、命令过滤、代码静态检查。""" from __future__ import annotations import os import re from pathlib import Path from typing import List, Optional import yaml from pydantic import BaseModel, Field _CONFIG_DIR = Path(__file__).resolve().parent.parent.parent / "config" _SANDBOX_YAML = _CONFIG_DIR / "sandbox.yaml" class FilesystemPolicy(BaseModel): workspace_root: str = "/tmp/kilostar_workspace" allowed_read_paths: List[str] = Field(default_factory=lambda: ["/tmp"]) denied_paths: List[str] = Field(default_factory=list) class ShellPolicy(BaseModel): enabled: bool = True blocked_commands: List[str] = Field(default_factory=list) blocked_operators: List[str] = Field(default_factory=list) max_timeout: int = 60 class PythonExecutorPolicy(BaseModel): enabled: bool = True max_timeout: int = 30 blocked_imports: List[str] = Field(default_factory=list) blocked_builtins: List[str] = Field(default_factory=list) class SandboxConfig(BaseModel): enabled: bool = True filesystem: FilesystemPolicy = Field(default_factory=FilesystemPolicy) shell: ShellPolicy = Field(default_factory=ShellPolicy) python_executor: PythonExecutorPolicy = Field(default_factory=PythonExecutorPolicy) _current: Optional[SandboxConfig] = None def _load_sandbox_config() -> SandboxConfig: if not _SANDBOX_YAML.exists(): return SandboxConfig() with open(_SANDBOX_YAML, "r", encoding="utf-8") as f: data = yaml.safe_load(f) or {} raw = data.get("sandbox", data) return SandboxConfig.model_validate(raw) def get_sandbox_config() -> SandboxConfig: global _current if _current is None: _current = _load_sandbox_config() return _current def reload_sandbox_config() -> SandboxConfig: global _current _current = _load_sandbox_config() return _current # ─── Exceptions ─── class PathViolation(Exception): pass class CommandViolation(Exception): pass class CodeViolation(Exception): pass # ─── Path Validation ─── def validate_path(file_path: str, *, write: bool = False) -> str: cfg = get_sandbox_config() if not cfg.enabled: return os.path.abspath(file_path) fs = cfg.filesystem resolved = os.path.realpath(os.path.abspath(file_path)) for denied in fs.denied_paths: denied_resolved = os.path.realpath(denied) if resolved == denied_resolved or resolved.startswith(denied_resolved + os.sep): raise PathViolation(f"路径被禁止访问: {file_path}") if write: ws_root = os.path.realpath(fs.workspace_root) if not (resolved == ws_root or resolved.startswith(ws_root + os.sep)): raise PathViolation( f"写操作路径必须在工作目录内: {fs.workspace_root}," f"当前路径: {file_path}" ) return resolved allowed = [os.path.realpath(fs.workspace_root)] for p in fs.allowed_read_paths: allowed.append(os.path.realpath(p)) for allowed_dir in allowed: if resolved == allowed_dir or resolved.startswith(allowed_dir + os.sep): return resolved raise PathViolation( f"读操作路径不在允许范围内: {file_path}。" f"允许的目录: {[fs.workspace_root] + fs.allowed_read_paths}" ) # ─── Shell Command Validation ─── def validate_shell_command(command: str) -> str: cfg = get_sandbox_config() if not cfg.enabled: return command shell_cfg = cfg.shell if not shell_cfg.enabled: raise CommandViolation("shell_executor 已被沙箱策略禁用") cmd_lower = command.strip().lower() for blocked in shell_cfg.blocked_commands: if cmd_lower.startswith(blocked.lower()): raise CommandViolation(f"命令被禁止: {blocked}") for op in shell_cfg.blocked_operators: if op in command: raise CommandViolation(f"命令包含被禁止的操作符: '{op}'") return command def get_shell_timeout(requested: int) -> int: cfg = get_sandbox_config() return min(requested, cfg.shell.max_timeout) # ─── Python Code Validation ─── def validate_python_code(code: str) -> str: cfg = get_sandbox_config() if not cfg.enabled: return code py_cfg = cfg.python_executor if not py_cfg.enabled: raise CodeViolation("python_executor 已被沙箱策略禁用") for module in py_cfg.blocked_imports: pattern = rf"(?:^|\n)\s*(?:import\s+{re.escape(module)}|from\s+{re.escape(module)})\b" if re.search(pattern, code): raise CodeViolation(f"禁止导入模块: {module}") for builtin in py_cfg.blocked_builtins: pattern = rf"\b{re.escape(builtin)}\s*\(" if re.search(pattern, code): raise CodeViolation(f"禁止使用: {builtin}()") return code def get_python_timeout(requested: int) -> int: cfg = get_sandbox_config() return min(requested, cfg.python_executor.max_timeout)