add apikey
This commit is contained in:
@@ -8,7 +8,7 @@ from tqdm import tqdm
|
||||
from boto3clients import dynamodb_client
|
||||
|
||||
elastic_client = Elasticsearch('http://127.0.0.1:9200')
|
||||
files = (
|
||||
jsonl_files = (
|
||||
'test-orders.jsonl',
|
||||
'test-users.jsonl',
|
||||
'test-enrollments.jsonl',
|
||||
@@ -16,7 +16,7 @@ files = (
|
||||
)
|
||||
|
||||
|
||||
def put_item(item: dict, table_name: str, *, dynamodb_client) -> bool:
|
||||
def put_item(item: dict, table_name: str, /, dynamodb_client) -> bool:
|
||||
try:
|
||||
dynamodb_client.put_item(
|
||||
TableName=table_name,
|
||||
@@ -28,7 +28,7 @@ def put_item(item: dict, table_name: str, *, dynamodb_client) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def scan_table(table_name: str, *, dynamodb_client, **kwargs) -> Generator:
|
||||
def scan_table(table_name: str, /, dynamodb_client, **kwargs) -> Generator:
|
||||
try:
|
||||
r = dynamodb_client.scan(TableName=table_name, **kwargs)
|
||||
except Exception:
|
||||
@@ -45,6 +45,31 @@ def scan_table(table_name: str, *, dynamodb_client, **kwargs) -> Generator:
|
||||
)
|
||||
|
||||
|
||||
class Elastic:
|
||||
def __init__(self, client: Elasticsearch) -> None:
|
||||
self.client = client
|
||||
|
||||
def index_item(
|
||||
self,
|
||||
id: str,
|
||||
index: str,
|
||||
doc: dict,
|
||||
):
|
||||
return self.client.index(
|
||||
index=index,
|
||||
id=id,
|
||||
document=_serialize_python_type(doc),
|
||||
)
|
||||
|
||||
def delete_index(self, index: str) -> bool:
|
||||
try:
|
||||
self.client.indices.delete(index=index)
|
||||
except Exception:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def _serialize_python_type(value: Any) -> Any:
|
||||
if isinstance(value, dict):
|
||||
return {k: _serialize_python_type(v) for k, v in value.items()}
|
||||
@@ -58,53 +83,27 @@ def _serialize_python_type(value: Any) -> Any:
|
||||
return value
|
||||
|
||||
|
||||
def index_item(
|
||||
id: str,
|
||||
index: str,
|
||||
doc: dict,
|
||||
*,
|
||||
elastic_client: Elasticsearch,
|
||||
):
|
||||
return elastic_client.index(
|
||||
index=index,
|
||||
id=id,
|
||||
document=_serialize_python_type(doc),
|
||||
)
|
||||
|
||||
|
||||
def delete_index(index: str, *, elastic_client: Elasticsearch) -> bool:
|
||||
try:
|
||||
elastic_client.indices.delete(index=index)
|
||||
except Exception:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
for file in tqdm(files, desc='Processing files'):
|
||||
elastic = Elastic(elastic_client)
|
||||
|
||||
for file in tqdm(jsonl_files, desc='Processing files'):
|
||||
with jsonl.readlines(f'seeds/{file}') as lines:
|
||||
table_name = file.removesuffix('.jsonl')
|
||||
|
||||
for line in tqdm(lines, desc=f'Processing lines in {file}'):
|
||||
put_item(line, table_name, dynamodb_client=dynamodb_client)
|
||||
put_item(line, table_name, dynamodb_client)
|
||||
|
||||
for file in tqdm(files, desc='Scanning tables'):
|
||||
for file in tqdm(jsonl_files, desc='Scanning tables'):
|
||||
table_name = file.removesuffix('.jsonl')
|
||||
delete_index(table_name, elastic_client=elastic_client)
|
||||
elastic.delete_index(table_name)
|
||||
|
||||
for record in tqdm(
|
||||
scan_table(
|
||||
table_name,
|
||||
dynamodb_client=dynamodb_client,
|
||||
dynamodb_client,
|
||||
FilterExpression='sk = :sk',
|
||||
ExpressionAttributeValues={':sk': {'S': '0'}},
|
||||
),
|
||||
desc=f'Indexing {table_name}',
|
||||
):
|
||||
index_item(
|
||||
id=record['id'],
|
||||
index=table_name,
|
||||
doc=record,
|
||||
elastic_client=elastic_client,
|
||||
)
|
||||
elastic.index_item(id=record['id'], index=table_name, doc=record)
|
||||
|
||||
Reference in New Issue
Block a user