63 lines
1.9 KiB
Python
63 lines
1.9 KiB
Python
# /// script
|
|
# dependencies = [
|
|
# "cloudflare"
|
|
# ]
|
|
# ///
|
|
|
|
from cloudflare import Cloudflare
|
|
|
|
CLOUDFLARE_ACCOUNT_ID = '5436b62470020c04b434ad31c3e4cf4e'
|
|
CLOUDFLARE_API_TOKEN = 'gFndkBJCzH4pRX7mKXokdWfw1xhm8-9FHfvLfhwa'
|
|
|
|
|
|
client = Cloudflare(api_token=CLOUDFLARE_API_TOKEN)
|
|
|
|
assistant = """
|
|
You are a data analysis assistant specialized in identifying Brazilian
|
|
personal data from CSV files.
|
|
|
|
These CSV files may or may not include headers.
|
|
|
|
Your task is to analyze the content and identify only three possible
|
|
data types: 'name', 'cpf', and 'email'.
|
|
|
|
Ignore all other fields.
|
|
"""
|
|
|
|
csv_content = """
|
|
,RICARDO GALLES BONET,ricardo.bonet@fanucamerica.com,424.430.528-93,NR-10 (RECICLAGEM)
|
|
,RULIO SIEFERT SERA,rulio.sera@fanucamerica.com,063.916.859-08,NR-10 (RECICLAGEM)
|
|
,MACIEL FERREIRA BOMFIM,maciel.bomfim@fanucamerica.com,334.547.088-85,NR-10 (RECICLAGEM)
|
|
,JAIME EDUARDO GALVEZ AVILES,jaime.galvez@fanucamerica.com,280.238.818-50,NR-12
|
|
,JAIME EDUARDO GALVEZ AVILES,jaime.galvez@fanucamerica.com,280.238.818-50,NR-35 (RECICLAGEM)
|
|
,HIGOR MACHADO SILVA,higor.silva@fanucamerica.com,419.879.878-88,NR-12
|
|
,LÁZARO SOUZA DIAS,lazaro.dias@fanucamerica.com,067.179.825-19,NR-12
|
|
,JOÃO PEDRO AGUIAR GALASSO,joao.pedro@fanucamerica.com,570.403.588-40,NR-12
|
|
"""
|
|
|
|
prompt = f"""
|
|
Here is a CSV sample:
|
|
|
|
{csv_content}
|
|
|
|
Your task is to:
|
|
- Detect which columns most likely contain "name", "cpf", or "email".
|
|
- Skip any category that is not present in the data.
|
|
- Return ONLY a valid Python list of tuples, like:
|
|
[('name', index), ('cpf', index), ('email', index)]
|
|
- Use the column index that most likely matches each data type,
|
|
based on frequency and data format.
|
|
- Don't include explanations, code, or any additional text.
|
|
"""
|
|
|
|
r = client.ai.run(
|
|
model_name='@cf/meta/llama-3-8b-instruct',
|
|
account_id=CLOUDFLARE_ACCOUNT_ID,
|
|
messages=[
|
|
{'role': 'system', 'content': assistant},
|
|
{'role': 'user', 'content': prompt},
|
|
],
|
|
)
|
|
|
|
print(r)
|