36 lines
1.3 KiB
Python
36 lines
1.3 KiB
Python
"""s3_peek:读取对象的头若干字节并尝试 UTF-8 解码(看几行用)。"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from ._s3_common import get_s3_creds_or_raise, make_session_kwargs, parse_s3_uri
|
|
|
|
|
|
async def s3_peek(uri: str, n_bytes: int = 4096) -> str:
|
|
"""读取 S3 对象的头 ``n_bytes`` 字节,UTF-8 解码后返回。
|
|
|
|
适合快速预览 csv/json/log 等文本类对象的开头几行。二进制内容会以
|
|
``[binary, ...]`` 占位说明返回。
|
|
|
|
Args:
|
|
uri: 形如 ``s3://bucket/key`` 的对象路径
|
|
n_bytes: 读取字节数,默认 4096,上限 1MB
|
|
|
|
Returns:
|
|
对象内容片段(解码后的字符串或占位说明)
|
|
"""
|
|
from aiobotocore.session import get_session
|
|
|
|
creds = get_s3_creds_or_raise()
|
|
bucket, key = parse_s3_uri(uri)
|
|
n = max(1, min(int(n_bytes), 1024 * 1024))
|
|
|
|
session = get_session()
|
|
async with session.create_client("s3", **make_session_kwargs(creds)) as client:
|
|
resp = await client.get_object(Bucket=bucket, Key=key, Range=f"bytes=0-{n-1}")
|
|
body = await resp["Body"].read()
|
|
try:
|
|
text = body.decode("utf-8")
|
|
return text
|
|
except UnicodeDecodeError:
|
|
return f"[binary, {len(body)} bytes; first 64 hex] {body[:64].hex()}"
|