wip
This commit is contained in:
0
batch-jobs/app/events/__init__.py
Normal file
0
batch-jobs/app/events/__init__.py
Normal file
20
batch-jobs/app/events/csv_chunks.py
Normal file
20
batch-jobs/app/events/csv_chunks.py
Normal file
@@ -0,0 +1,20 @@
|
||||
import boto3
|
||||
from aws_lambda_powertools.utilities.data_classes import (
|
||||
EventBridgeEvent,
|
||||
event_source,
|
||||
)
|
||||
from aws_lambda_powertools.utilities.typing import LambdaContext
|
||||
|
||||
from csv_utils import byte_ranges
|
||||
|
||||
CHUNK_SIZE = 50
|
||||
s3_client = boto3.client('s3')
|
||||
|
||||
|
||||
@event_source(data_class=EventBridgeEvent)
|
||||
def lambda_handler(event: EventBridgeEvent, context: LambdaContext) -> bool:
|
||||
new_image = event.detail['new_image']
|
||||
csvfile = new_image['csv_s3uri']
|
||||
pairs = byte_ranges(csvfile, CHUNK_SIZE)
|
||||
|
||||
return True
|
||||
47
batch-jobs/app/events/read_chunk.py
Normal file
47
batch-jobs/app/events/read_chunk.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import csv
|
||||
from io import StringIO
|
||||
|
||||
import boto3
|
||||
from aws_lambda_powertools.utilities.data_classes import (
|
||||
EventBridgeEvent,
|
||||
event_source,
|
||||
)
|
||||
from aws_lambda_powertools.utilities.typing import LambdaContext
|
||||
|
||||
from csv_utils import byte_ranges
|
||||
|
||||
CHUNK_SIZE = 50
|
||||
s3_client = boto3.client('s3')
|
||||
|
||||
|
||||
@event_source(data_class=EventBridgeEvent)
|
||||
def lambda_handler(event: EventBridgeEvent, context: LambdaContext) -> bool:
|
||||
new_image = event.detail['new_image']
|
||||
csvfile = new_image['csv_s3uri']
|
||||
*_, pair = byte_ranges(csvfile, CHUNK_SIZE)
|
||||
|
||||
data = get_object_range(csvfile, pair[0], pair[1], s3_client=s3_client)
|
||||
reader = csv.reader(data)
|
||||
|
||||
for x in reader:
|
||||
print(x)
|
||||
return True
|
||||
|
||||
|
||||
def get_object_range(
|
||||
s3_uri: str,
|
||||
start_byte: int,
|
||||
end_byte: int = -1,
|
||||
*,
|
||||
s3_client,
|
||||
) -> StringIO:
|
||||
bucket, key = s3_uri.replace('s3://', '').split('/', 1)
|
||||
range_ = f'bytes={start_byte}-{end_byte}' if end_byte else f'bytes={start_byte}-'
|
||||
|
||||
response = s3_client.get_object(
|
||||
Bucket=bucket,
|
||||
Key=key,
|
||||
Range=range_,
|
||||
)
|
||||
|
||||
return StringIO(response['Body'].read().decode('utf-8'))
|
||||
Reference in New Issue
Block a user