This commit is contained in:
2026-07-01 09:22:26 +00:00
parent 4aa1dab283
commit aa47a19e98
53 changed files with 4721 additions and 77 deletions
@@ -0,0 +1,46 @@
"""s3_get_object:下载到 artifact 目录(路径强校验防穿越)。"""
from __future__ import annotations
import os
from pathlib import Path
from kilostar.utils.settings import get_artifact_dir
from ._s3_common import get_s3_creds_or_raise, make_session_kwargs, parse_s3_uri
async def s3_get_object(uri: str, save_as: str) -> str:
"""把 S3 对象下载到本进程的 artifact 工作区,返回本地绝对路径。
``save_as`` 必须是相对路径,落到 ``data/artifact/data_analytics_downloads/``
下面(防越权写入任意目录)。下载后供 python_executor / ray_submit 中以
pandas/polars 读取。
Args:
uri: 形如 ``s3://bucket/key`` 的对象路径
save_as: 保存的相对文件名(不能含 ``..`` 或绝对路径)
Returns:
本地保存的绝对路径
"""
from aiobotocore.session import get_session
creds = get_s3_creds_or_raise()
bucket, key = parse_s3_uri(uri)
save_path = Path(save_as).as_posix()
if save_path.startswith("/") or ".." in save_path.split("/"):
raise ValueError(f"save_as 必须是相对、不含 .. 的路径,收到 {save_as!r}")
base = get_artifact_dir() / "data_analytics_downloads"
base.mkdir(parents=True, exist_ok=True)
target = base / save_path
target.parent.mkdir(parents=True, exist_ok=True)
session = get_session()
async with session.create_client("s3", **make_session_kwargs(creds)) as client:
resp = await client.get_object(Bucket=bucket, Key=key)
body = await resp["Body"].read()
target.write_bytes(body)
return str(target.resolve())