apprentice/examples/apprentice.yaml at main · jmcentire/apprentice · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# Example Apprentice configuration
#
# Copy this file and adjust for your use case.
# Environment variables are referenced as env:VAR_NAME and resolved at startup.

provider:
  api_base_url: https://api.anthropic.com
  api_key: "env:ANTHROPIC_API_KEY"
  model: "claude-sonnet-4-5-20250929"
  timeout_seconds: 30
  max_retries: 3
  retry_base_delay_seconds: 1.0

local_model:
  endpoint: "http://localhost:11434"
  model_name: "llama3.1:8b"
  timeout_seconds: 60
  max_retries: 1
  health_check_path: /health

tasks:
  - task_name: classify_ticket
    prompt_template: |
      Classify the following support ticket.
      Return JSON with "category" (billing, technical, account, general)
      and "priority" (1=urgent, 2=high, 3=normal, 4=low).

      Ticket: {text}
    input_schema:
      - name: text
        type: string
        required: true
      - name: metadata
        type: string
        required: false
    output_schema:
      - name: category
        type: string
        required: true
      - name: priority
        type: string
        required: true
    evaluators:
      - type: exact_match
        match_fields:
          - name: category
            weight: 1.0
            case_sensitive: true
          - name: priority
            weight: 1.0
            case_sensitive: true
    thresholds:
      local_ready: 0.7
      local_only: 0.85
      degraded_threshold: 0.3
    sampling_rate_initial: 1.0
    min_training_examples: 100

budget:
  max_daily_cost_usd: 10.00
  max_monthly_cost_usd: 150.00
  rolling_window_hours: 24
  cost_per_input_token: 0.000003
  cost_per_output_token: 0.000015
  budget_state_path: .apprentice/budget_state.json

finetuning:
  backend: local_lora
  model_base: "llama3.1:8b"
  batch_size: 100
  trigger_interval_hours: 24
  output_dir: .apprentice/models/
  max_concurrent_jobs: 1

  # ── Kubernetes LoRA backend (uncomment to enable) ──
  # backend: kubernetes_lora
  # gcs_bucket: "my-project-apprentice-training"
  # training_image: "gcr.io/my-project/apprentice-trainer:latest"
  # gpu_type: "nvidia-tesla-t4"          # or nvidia-l4
  # k8s_namespace: "default"
  # service_account: "apprentice-trainer" # K8s SA with GCS access

audit:
  log_path: .apprentice/audit.log
  log_level: INFO
  log_to_stdout: false
  max_file_size_mb: 100
  backup_count: 5

training_data:
  storage_dir: .apprentice/training_data/
  max_examples_per_task: 50000

# ── PII Protection ──
# Scrubs sensitive data before it reaches models, training stores, or audit logs.
# detection_mode: regex_only (default, no extra deps) | hybrid | ner_only
pii:
  enabled: true
  detection_mode: regex_only           # regex_only | hybrid | ner_only
  sensitive_fields:
    - email
    - phone
    - ssn
    - password
    - credit_card
    - api_key

  # ── NER model settings (uncomment for hybrid or ner_only mode) ──
  # Requires: pip install apprentice-ai[ml]
  # ner_model: "dslim/bert-base-NER"  # HuggingFace model ID
  # ner_device: cpu                     # cpu | cuda
  # ner_confidence_threshold: 0.7       # discard NER detections below this
  # ner_max_text_length: 10000          # skip NER on very long strings

# ── Feedback ──
# Human and AI feedback collection for continuous improvement.
feedback:
  enabled: true
  storage_dir: .apprentice/feedback/