saladeaula.digital/user-management/cf.py

# /// script
# dependencies = [
#   "cloudflare"
# ]
# ///

from cloudflare import Cloudflare

CLOUDFLARE_ACCOUNT_ID = '5436b62470020c04b434ad31c3e4cf4e'
CLOUDFLARE_API_TOKEN = 'gFndkBJCzH4pRX7mKXokdWfw1xhm8-9FHfvLfhwa'


client = Cloudflare(api_token=CLOUDFLARE_API_TOKEN)

assistant = """
You are a data analysis assistant specialized in identifying Brazilian
personal data from CSV files.

These CSV files may or may not include headers.

Your task is to analyze the content and identify only three possible
data types: 'name', 'cpf', and 'email'.

Ignore all other fields.
"""

csv_content = """
,RICARDO GALLES BONET,ricardo.bonet@fanucamerica.com,424.430.528-93,NR-10 (RECICLAGEM)
,RULIO SIEFERT SERA,rulio.sera@fanucamerica.com,063.916.859-08,NR-10 (RECICLAGEM)
,MACIEL FERREIRA BOMFIM,maciel.bomfim@fanucamerica.com,334.547.088-85,NR-10 (RECICLAGEM)
,JAIME EDUARDO GALVEZ AVILES,jaime.galvez@fanucamerica.com,280.238.818-50,NR-12
,JAIME EDUARDO GALVEZ AVILES,jaime.galvez@fanucamerica.com,280.238.818-50,NR-35 (RECICLAGEM)
,HIGOR MACHADO SILVA,higor.silva@fanucamerica.com,419.879.878-88,NR-12
,LÁZARO SOUZA DIAS,lazaro.dias@fanucamerica.com,067.179.825-19,NR-12
,JOÃO PEDRO AGUIAR GALASSO,joao.pedro@fanucamerica.com,570.403.588-40,NR-12
"""

prompt = f"""
Here is a CSV sample:

{csv_content}

Your task is to:
- Detect which columns most likely contain "name", "cpf", or "email".
- Skip any category that is not present in the data.
- Return ONLY a valid Python list of tuples, like:
[('name', index), ('cpf', index), ('email', index)]
- Use the column index that most likely matches each data type,
based on frequency and data format.
- Don't include explanations, code, or any additional text.
"""

r = client.ai.run(
    model_name='@cf/meta/llama-3-8b-instruct',
    account_id=CLOUDFLARE_ACCOUNT_ID,
    messages=[
        {'role': 'system', 'content': assistant},
        {'role': 'user', 'content': prompt},
    ],
)

print(r)