-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapprentice.yaml
More file actions
119 lines (107 loc) · 3.37 KB
/
apprentice.yaml
File metadata and controls
119 lines (107 loc) · 3.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# Example Apprentice configuration
#
# Copy this file and adjust for your use case.
# Environment variables are referenced as env:VAR_NAME and resolved at startup.
provider:
api_base_url: https://api.anthropic.com
api_key: "env:ANTHROPIC_API_KEY"
model: "claude-sonnet-4-5-20250929"
timeout_seconds: 30
max_retries: 3
retry_base_delay_seconds: 1.0
local_model:
endpoint: "http://localhost:11434"
model_name: "llama3.1:8b"
timeout_seconds: 60
max_retries: 1
health_check_path: /health
tasks:
- task_name: classify_ticket
prompt_template: |
Classify the following support ticket.
Return JSON with "category" (billing, technical, account, general)
and "priority" (1=urgent, 2=high, 3=normal, 4=low).
Ticket: {text}
input_schema:
- name: text
type: string
required: true
- name: metadata
type: string
required: false
output_schema:
- name: category
type: string
required: true
- name: priority
type: string
required: true
evaluators:
- type: exact_match
match_fields:
- name: category
weight: 1.0
case_sensitive: true
- name: priority
weight: 1.0
case_sensitive: true
thresholds:
local_ready: 0.7
local_only: 0.85
degraded_threshold: 0.3
sampling_rate_initial: 1.0
min_training_examples: 100
budget:
max_daily_cost_usd: 10.00
max_monthly_cost_usd: 150.00
rolling_window_hours: 24
cost_per_input_token: 0.000003
cost_per_output_token: 0.000015
budget_state_path: .apprentice/budget_state.json
finetuning:
backend: local_lora
model_base: "llama3.1:8b"
batch_size: 100
trigger_interval_hours: 24
output_dir: .apprentice/models/
max_concurrent_jobs: 1
# ── Kubernetes LoRA backend (uncomment to enable) ──
# backend: kubernetes_lora
# gcs_bucket: "my-project-apprentice-training"
# training_image: "gcr.io/my-project/apprentice-trainer:latest"
# gpu_type: "nvidia-tesla-t4" # or nvidia-l4
# k8s_namespace: "default"
# service_account: "apprentice-trainer" # K8s SA with GCS access
audit:
log_path: .apprentice/audit.log
log_level: INFO
log_to_stdout: false
max_file_size_mb: 100
backup_count: 5
training_data:
storage_dir: .apprentice/training_data/
max_examples_per_task: 50000
# ── PII Protection ──
# Scrubs sensitive data before it reaches models, training stores, or audit logs.
# detection_mode: regex_only (default, no extra deps) | hybrid | ner_only
pii:
enabled: true
detection_mode: regex_only # regex_only | hybrid | ner_only
sensitive_fields:
- email
- phone
- ssn
- password
- credit_card
- api_key
# ── NER model settings (uncomment for hybrid or ner_only mode) ──
# Requires: pip install apprentice-ai[ml]
# ner_model: "dslim/bert-base-NER" # HuggingFace model ID
# ner_device: cpu # cpu | cuda
# ner_confidence_threshold: 0.7 # discard NER detections below this
# ner_max_text_length: 10000 # skip NER on very long strings
# ── Feedback ──
# Human and AI feedback collection for continuous improvement.
feedback:
enabled: true
storage_dir: .apprentice/feedback/