update
This commit is contained in:
@@ -1,9 +1,9 @@
|
||||
from typing import Any, Generator
|
||||
from typing import Generator
|
||||
|
||||
import boto3
|
||||
import jsonlines
|
||||
from aws_lambda_powertools.shared.json_encoder import Encoder
|
||||
from layercake.dynamodb import deserialize
|
||||
from layercake.dynamodb import deserialize, serialize
|
||||
from meilisearch import Client as Meilisearch
|
||||
from tqdm import tqdm
|
||||
|
||||
@@ -11,6 +11,14 @@ dynamodb_client = boto3.client('dynamodb', endpoint_url='http://127.0.0.1:8000')
|
||||
meili_client = Meilisearch('http://127.0.0.1:7700')
|
||||
|
||||
|
||||
JSONL_FILES = (
|
||||
# 'test-orders.jsonl',
|
||||
'test-users.jsonl',
|
||||
# 'test-enrollments.jsonl',
|
||||
# 'test-courses.jsonl',
|
||||
)
|
||||
|
||||
|
||||
class JSONEncoder(Encoder):
|
||||
def default(self, obj):
|
||||
if isinstance(obj, set):
|
||||
@@ -18,19 +26,11 @@ class JSONEncoder(Encoder):
|
||||
return super(__class__, self).default(obj)
|
||||
|
||||
|
||||
jsonl_files = (
|
||||
'test-orders.jsonl',
|
||||
'test-users.jsonl',
|
||||
'test-enrollments.jsonl',
|
||||
'test-courses.jsonl',
|
||||
)
|
||||
|
||||
|
||||
def put_item(item: dict, table_name: str, /, dynamodb_client) -> bool:
|
||||
try:
|
||||
dynamodb_client.put_item(
|
||||
TableName=table_name,
|
||||
Item=item,
|
||||
Item=serialize(item),
|
||||
)
|
||||
except Exception:
|
||||
return False
|
||||
@@ -45,7 +45,7 @@ def scan_table(table_name: str, /, dynamodb_client, **kwargs) -> Generator:
|
||||
yield from ()
|
||||
else:
|
||||
for item in r['Items']:
|
||||
yield deserialize(item)
|
||||
yield item
|
||||
|
||||
if 'LastEvaluatedKey' in r:
|
||||
yield from scan_table(
|
||||
@@ -55,22 +55,9 @@ def scan_table(table_name: str, /, dynamodb_client, **kwargs) -> Generator:
|
||||
)
|
||||
|
||||
|
||||
def _serialize_to_basic_types(value: Any) -> Any:
|
||||
if isinstance(value, dict):
|
||||
return {k: _serialize_to_basic_types(v) for k, v in value.items()}
|
||||
|
||||
if isinstance(value, set):
|
||||
return list(value)
|
||||
|
||||
if isinstance(value, list):
|
||||
return [_serialize_to_basic_types(v) for v in value]
|
||||
|
||||
return value
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Populate DynamoDB tables with data from JSONL files
|
||||
for file in tqdm(jsonl_files, desc='Processing files'):
|
||||
for file in tqdm(JSONL_FILES, desc='Processing files'):
|
||||
with open(f'seeds/{file}') as fp:
|
||||
table_name = file.removesuffix('.jsonl')
|
||||
reader = jsonlines.Reader(fp).iter(skip_invalid=True)
|
||||
@@ -79,7 +66,7 @@ if __name__ == '__main__':
|
||||
put_item(line, table_name, dynamodb_client) # type: ignore
|
||||
|
||||
# Scan DynamoDB tables and index the data into Meilisearch
|
||||
for file in tqdm(jsonl_files, desc='Scanning tables'):
|
||||
for file in tqdm(JSONL_FILES, desc='Scanning tables'):
|
||||
table_name = file.removesuffix('.jsonl')
|
||||
|
||||
for doc in tqdm(
|
||||
@@ -91,6 +78,7 @@ if __name__ == '__main__':
|
||||
),
|
||||
desc=f'Indexing {table_name}',
|
||||
):
|
||||
doc = deserialize(doc)
|
||||
meili_client.index(table_name).add_documents([doc], serializer=JSONEncoder)
|
||||
meili_client.index('pytest').add_documents([doc], serializer=JSONEncoder)
|
||||
|
||||
@@ -98,6 +86,6 @@ if __name__ == '__main__':
|
||||
index.update_settings(
|
||||
{
|
||||
'sortableAttributes': ['create_date', 'createDate', 'created_at'],
|
||||
'filterableAttributes': ['tenant_id', 'status'],
|
||||
'filterableAttributes': ['tenant_id', 'status', 'cnpj'],
|
||||
}
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user