# /// script
# dependencies = [
#   "cloudflare"
# ]
# ///

from cloudflare import Cloudflare

CLOUDFLARE_ACCOUNT_ID = '5436b62470020c04b434ad31c3e4cf4e'
CLOUDFLARE_API_TOKEN = 'gFndkBJCzH4pRX7mKXokdWfw1xhm8-9FHfvLfhwa'


client = Cloudflare(api_token=CLOUDFLARE_API_TOKEN)

assistant = """
You are a data analysis assistant specialized in identifying Brazilian
personal data from CSV files.

These CSV files may or may not include headers.

Your task is to analyze the content and identify only three possible
data types: 'name', 'cpf', and 'email'.

Ignore all other fields.
"""

csv_content = """
Sérgio Rafael de Siqueira,10,07879819908,osergiosiqueria@gmail.com,cipa
Tiago Maciel,12,086.790.049-01,tiago@somosbeta.com.br,nr 10
"""

prompt = f"""
Here is a CSV sample:

{csv_content}

Your task is to:
- Detect which columns most likely contain "name", "cpf", or "email".
- Skip any category that is not present in the data.
- Return ONLY a valid Python list of tuples, like:
[('name', index), ('cpf', index), ('email', index)]
- Use the column index that most likely matches each data type,
based on frequency and data format.
- Don't include explanations, code, or any additional text.
"""

r = client.ai.run(
    model_name='@cf/meta/llama-3-8b-instruct',
    account_id=CLOUDFLARE_ACCOUNT_ID,
    messages=[
        {'role': 'system', 'content': assistant},
        {'role': 'user', 'content': prompt},
    ],
)

print(r)