This commit is contained in:
2025-04-12 21:04:02 -03:00
parent 1b2ebcfb99
commit 86bdb41216
19 changed files with 259 additions and 60 deletions

View File

@@ -57,10 +57,11 @@ class Elastic:
index: str,
doc: dict,
):
return self.client.index(
return self.client.update(
index=index,
id=id,
document=_serialize_to_basic_types(doc),
doc=_serialize_to_basic_types(doc),
doc_as_upsert=True,
)
def delete_index(self, index: str) -> bool:
@@ -90,12 +91,12 @@ if __name__ == '__main__':
# Populate DynamoDB tables with data from JSONL files
for file in tqdm(jsonl_files, desc='Processing files'):
with jsonlines.open(f'seeds/{file}') as lines:
with open(f'seeds/{file}') as fp:
table_name = file.removesuffix('.jsonl')
reader = jsonlines.Reader(fp)
reader = jsonlines.Reader(fp).iter(skip_invalid=True)
for line in tqdm(lines, desc=f'Processing lines in {file}'):
put_item(line, table_name, dynamodb_client)
for line in tqdm(reader, desc=f'Processing lines in {file}'):
put_item(line, table_name, dynamodb_client) # type: ignore
# Scan DynamoDB tables and index the data into Elasticsearch
for file in tqdm(jsonl_files, desc='Scanning tables'):