47 lines
1.6 KiB
Python
47 lines
1.6 KiB
Python
"""s3_get_object:下载到 artifact 目录(路径强校验防穿越)。"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
from pathlib import Path
|
|
|
|
from kilostar.utils.settings import get_artifact_dir
|
|
|
|
from ._s3_common import get_s3_creds_or_raise, make_session_kwargs, parse_s3_uri
|
|
|
|
|
|
async def s3_get_object(uri: str, save_as: str) -> str:
|
|
"""把 S3 对象下载到本进程的 artifact 工作区,返回本地绝对路径。
|
|
|
|
``save_as`` 必须是相对路径,落到 ``data/artifact/data_analytics_downloads/``
|
|
下面(防越权写入任意目录)。下载后供 python_executor / ray_submit 中以
|
|
pandas/polars 读取。
|
|
|
|
Args:
|
|
uri: 形如 ``s3://bucket/key`` 的对象路径
|
|
save_as: 保存的相对文件名(不能含 ``..`` 或绝对路径)
|
|
|
|
Returns:
|
|
本地保存的绝对路径
|
|
"""
|
|
from aiobotocore.session import get_session
|
|
|
|
creds = get_s3_creds_or_raise()
|
|
bucket, key = parse_s3_uri(uri)
|
|
|
|
save_path = Path(save_as).as_posix()
|
|
if save_path.startswith("/") or ".." in save_path.split("/"):
|
|
raise ValueError(f"save_as 必须是相对、不含 .. 的路径,收到 {save_as!r}")
|
|
|
|
base = get_artifact_dir() / "data_analytics_downloads"
|
|
base.mkdir(parents=True, exist_ok=True)
|
|
target = base / save_path
|
|
target.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
session = get_session()
|
|
async with session.create_client("s3", **make_session_kwargs(creds)) as client:
|
|
resp = await client.get_object(Bucket=bucket, Key=key)
|
|
body = await resp["Body"].read()
|
|
target.write_bytes(body)
|
|
return str(target.resolve())
|