存档
This commit is contained in:
@@ -0,0 +1,35 @@
|
||||
"""s3_peek:读取对象的头若干字节并尝试 UTF-8 解码(看几行用)。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from ._s3_common import get_s3_creds_or_raise, make_session_kwargs, parse_s3_uri
|
||||
|
||||
|
||||
async def s3_peek(uri: str, n_bytes: int = 4096) -> str:
|
||||
"""读取 S3 对象的头 ``n_bytes`` 字节,UTF-8 解码后返回。
|
||||
|
||||
适合快速预览 csv/json/log 等文本类对象的开头几行。二进制内容会以
|
||||
``[binary, ...]`` 占位说明返回。
|
||||
|
||||
Args:
|
||||
uri: 形如 ``s3://bucket/key`` 的对象路径
|
||||
n_bytes: 读取字节数,默认 4096,上限 1MB
|
||||
|
||||
Returns:
|
||||
对象内容片段(解码后的字符串或占位说明)
|
||||
"""
|
||||
from aiobotocore.session import get_session
|
||||
|
||||
creds = get_s3_creds_or_raise()
|
||||
bucket, key = parse_s3_uri(uri)
|
||||
n = max(1, min(int(n_bytes), 1024 * 1024))
|
||||
|
||||
session = get_session()
|
||||
async with session.create_client("s3", **make_session_kwargs(creds)) as client:
|
||||
resp = await client.get_object(Bucket=bucket, Key=key, Range=f"bytes=0-{n-1}")
|
||||
body = await resp["Body"].read()
|
||||
try:
|
||||
text = body.decode("utf-8")
|
||||
return text
|
||||
except UnicodeDecodeError:
|
||||
return f"[binary, {len(body)} bytes; first 64 hex] {body[:64].hex()}"
|
||||
Reference in New Issue
Block a user