Skip to content

Configuration Reference

This document covers all configuration options available in caro.

caro stores configuration in platform-specific locations:

PlatformConfig Path
macOS~/Library/Application Support/caro/config.toml
Linux~/.config/caro/config.toml
Windows%APPDATA%\caro\config.toml

Configuration uses TOML format:

# caro configuration file
[general]
# Default backend to use
backend = "mlx"
# Enable colored output
color = true
# Show safety warnings
safety_warnings = true
[model]
# Model to use for inference
name = "qwen2.5-coder-1.5b-instruct"
# Model format
format = "gguf"
# Quantization level
quantization = "q4_k_m"
[backends.mlx]
# MLX-specific settings
enabled = true
threads = 4
[backends.ollama]
# Ollama backend settings
enabled = false
host = "http://localhost:11434"
model = "qwen2.5-coder:latest"
[backends.vllm]
# vLLM backend settings
enabled = false
url = "http://localhost:8000"
OptionTypeDefaultDescription
backendstring"mlx"Default inference backend
colorbooltrueEnable colored terminal output
safety_warningsbooltrueShow safety level warnings
confirm_executionbooltrueRequire confirmation before execution
OptionTypeDefaultDescription
namestring"qwen2.5-coder-1.5b-instruct"Model name
formatstring"gguf"Model file format
quantizationstring"q4_k_m"Quantization level
cache_dirstring(auto)Custom model cache directory
OptionTypeDefaultDescription
enabledbooltrueEnable MLX backend
threadsint4Number of CPU threads
gpubooltrueUse GPU acceleration
OptionTypeDefaultDescription
enabledboolfalseEnable Ollama backend
hoststring"http://localhost:11434"Ollama server URL
modelstring"qwen2.5-coder:latest"Ollama model name
timeoutint30Request timeout in seconds
OptionTypeDefaultDescription
enabledboolfalseEnable vLLM backend
urlstring"http://localhost:8000"vLLM server URL
timeoutint30Request timeout in seconds

Configuration can also be set via environment variables:

Terminal window
# Override default backend
export CARO_BACKEND=ollama
# Custom cache directory
export CARO_CACHE_DIR=~/custom/cache
# Enable debug logging
export RUST_LOG=debug
# Disable color output
export NO_COLOR=1
# Ollama host
export OLLAMA_HOST=http://localhost:11434

Command-line flags override configuration file settings:

Terminal window
# Use specific backend
caro --backend ollama "list files"
# Disable color output
caro --no-color "list files"
# Skip confirmation
caro --yes "list files"
# Verbose output
caro --verbose "list files"

Model cache location:

PlatformCache Path
macOS~/Library/Caches/caro/models/
Linux~/.cache/caro/models/
Windows%LOCALAPPDATA%\caro\cache\
Terminal window
# Show cache location and size
caro cache info
# Clear model cache
caro cache clear
# Download specific model
caro cache download qwen2.5-coder-1.5b-instruct
[general]
backend = "mlx"
color = true
safety_warnings = true
[backends.mlx]
enabled = true
[general]
backend = "vllm"
color = false
confirm_execution = false
[backends.vllm]
enabled = true
url = "http://inference-server:8000"
timeout = 60
[general]
backend = "ollama" # Primary
[backends.mlx]
enabled = true
[backends.ollama]
enabled = true
host = "http://localhost:11434"
model = "qwen2.5-coder:latest"
[backends.vllm]
enabled = true
url = "http://backup-server:8000"