"""ray_submit:把分析脚本提交到 Ray(distributed)或 subprocess(standalone)执行。 凭证以 ``AWS_*`` 环境变量注入子进程,让 boto3/pandas-s3 自然读到。 脚本走 ``kilostar.utils.sandbox.validate_python_code`` 的静态屏蔽兜底。 """ from __future__ import annotations import asyncio import os import sys import tempfile from kilostar.utils.ray_compat import _STANDALONE from kilostar.utils.sandbox import ( CodeViolation, get_python_timeout, validate_python_code, ) from ._s3_common import get_s3_creds_or_raise def _build_env(creds) -> dict: env = os.environ.copy() env["AWS_ACCESS_KEY_ID"] = creds["access_key"] env["AWS_SECRET_ACCESS_KEY"] = creds["secret_key"] env["AWS_DEFAULT_REGION"] = creds.get("region") or "us-east-1" if creds.get("endpoint_url"): env["AWS_ENDPOINT_URL_S3"] = creds["endpoint_url"] env["AWS_ENDPOINT_URL"] = creds["endpoint_url"] return env async def ray_submit(script: str, timeout: int = 300) -> str: """提交 Python 脚本到 Ray(分布式)或子进程(单机)执行。 脚本中可直接 ``import boto3`` 读 S3(凭证已通过环境变量注入);可用 pandas / polars / numpy 等已安装的依赖。**只读**——不要尝试 put/delete。 Args: script: Python 源码 timeout: 超时秒数(默认 300) Returns: stdout(必要时尾部追加 stderr 与 exit code) """ try: script = validate_python_code(script) except CodeViolation as e: return f"[Sandbox] {e}" creds = get_s3_creds_or_raise() env = _build_env(creds) timeout = get_python_timeout(timeout) # standalone 与 distributed 第一版都走 subprocess,保证环境变量传递可控 # (ray.remote 跑函数时 env vars 需另装 runtime_env,复杂度跟 subprocess 持平 # 但前者透明可控,先这样落地) tmp_file = None try: with tempfile.NamedTemporaryFile( mode="w", suffix=".py", delete=False, encoding="utf-8" ) as f: f.write(script) tmp_file = f.name proc = await asyncio.create_subprocess_exec( sys.executable, tmp_file, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, env=env, ) stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout) out = stdout.decode("utf-8", errors="replace") err = stderr.decode("utf-8", errors="replace") result = "" if out: result += out if err: result += f"\n[stderr]\n{err}" if proc.returncode != 0: result += f"\n[exit code: {proc.returncode}]" result = result.strip() or "(no output)" if not _STANDALONE: result = f"[mode: ray-cluster (subprocess)]\n{result}" return result except asyncio.TimeoutError: return f"[Error] ray_submit 执行超时({timeout}s)" except Exception as e: return f"[Error] ray_submit 失败:{e}" finally: if tmp_file and os.path.exists(tmp_file): os.unlink(tmp_file)