存档
This commit is contained in:
@@ -0,0 +1,46 @@
|
||||
"""s3_get_object:下载到 artifact 目录(路径强校验防穿越)。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from kilostar.utils.settings import get_artifact_dir
|
||||
|
||||
from ._s3_common import get_s3_creds_or_raise, make_session_kwargs, parse_s3_uri
|
||||
|
||||
|
||||
async def s3_get_object(uri: str, save_as: str) -> str:
|
||||
"""把 S3 对象下载到本进程的 artifact 工作区,返回本地绝对路径。
|
||||
|
||||
``save_as`` 必须是相对路径,落到 ``data/artifact/data_analytics_downloads/``
|
||||
下面(防越权写入任意目录)。下载后供 python_executor / ray_submit 中以
|
||||
pandas/polars 读取。
|
||||
|
||||
Args:
|
||||
uri: 形如 ``s3://bucket/key`` 的对象路径
|
||||
save_as: 保存的相对文件名(不能含 ``..`` 或绝对路径)
|
||||
|
||||
Returns:
|
||||
本地保存的绝对路径
|
||||
"""
|
||||
from aiobotocore.session import get_session
|
||||
|
||||
creds = get_s3_creds_or_raise()
|
||||
bucket, key = parse_s3_uri(uri)
|
||||
|
||||
save_path = Path(save_as).as_posix()
|
||||
if save_path.startswith("/") or ".." in save_path.split("/"):
|
||||
raise ValueError(f"save_as 必须是相对、不含 .. 的路径,收到 {save_as!r}")
|
||||
|
||||
base = get_artifact_dir() / "data_analytics_downloads"
|
||||
base.mkdir(parents=True, exist_ok=True)
|
||||
target = base / save_path
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
session = get_session()
|
||||
async with session.create_client("s3", **make_session_kwargs(creds)) as client:
|
||||
resp = await client.get_object(Bucket=bucket, Key=key)
|
||||
body = await resp["Body"].read()
|
||||
target.write_bytes(body)
|
||||
return str(target.resolve())
|
||||
Reference in New Issue
Block a user