Files
2026-07-01 09:22:26 +00:00

36 lines
1.3 KiB
Python

"""s3_peek:读取对象的头若干字节并尝试 UTF-8 解码(看几行用)。"""
from __future__ import annotations
from ._s3_common import get_s3_creds_or_raise, make_session_kwargs, parse_s3_uri
async def s3_peek(uri: str, n_bytes: int = 4096) -> str:
"""读取 S3 对象的头 ``n_bytes`` 字节,UTF-8 解码后返回。
适合快速预览 csv/json/log 等文本类对象的开头几行。二进制内容会以
``[binary, ...]`` 占位说明返回。
Args:
uri: 形如 ``s3://bucket/key`` 的对象路径
n_bytes: 读取字节数,默认 4096,上限 1MB
Returns:
对象内容片段(解码后的字符串或占位说明)
"""
from aiobotocore.session import get_session
creds = get_s3_creds_or_raise()
bucket, key = parse_s3_uri(uri)
n = max(1, min(int(n_bytes), 1024 * 1024))
session = get_session()
async with session.create_client("s3", **make_session_kwargs(creds)) as client:
resp = await client.get_object(Bucket=bucket, Key=key, Range=f"bytes=0-{n-1}")
body = await resp["Body"].read()
try:
text = body.decode("utf-8")
return text
except UnicodeDecodeError:
return f"[binary, {len(body)} bytes; first 64 hex] {body[:64].hex()}"