Files
saladeaula.digital/batch-jobs/app/events/read_chunk.py
2025-05-19 09:04:19 -03:00

48 lines
1.1 KiB
Python

import csv
from io import StringIO
import boto3
from aws_lambda_powertools.utilities.data_classes import (
EventBridgeEvent,
event_source,
)
from aws_lambda_powertools.utilities.typing import LambdaContext
from csv_utils import byte_ranges
CHUNK_SIZE = 50
s3_client = boto3.client('s3')
@event_source(data_class=EventBridgeEvent)
def lambda_handler(event: EventBridgeEvent, context: LambdaContext) -> bool:
new_image = event.detail['new_image']
csvfile = new_image['csv_s3uri']
*_, pair = byte_ranges(csvfile, CHUNK_SIZE)
data = get_object_range(csvfile, pair[0], pair[1], s3_client=s3_client)
reader = csv.reader(data)
for x in reader:
print(x)
return True
def get_object_range(
s3_uri: str,
start_byte: int,
end_byte: int = -1,
*,
s3_client,
) -> StringIO:
bucket, key = s3_uri.replace('s3://', '').split('/', 1)
range_ = f'bytes={start_byte}-{end_byte}' if end_byte else f'bytes={start_byte}-'
response = s3_client.get_object(
Bucket=bucket,
Key=key,
Range=range_,
)
return StringIO(response['Body'].read().decode('utf-8'))