1. 简介
Ollama现在提供了与OpenAI API的完全兼容性,这意味着你可以:
使用熟悉的OpenAI接口
在本地运行大语言模型
无缝迁移现有OpenAI应用
享受更低的延迟和更好的隐私保护
1.1 支持的功能
Chat completions
流式输出
JSON模式
可重现输出
视觉功能
函数调用
Logprobs计算
1.2 环境准备
# 1. 安装Ollama
curl -fsSL https://ollama.com/install.sh | sh
# 2. 拉取模型
ollama pull llama3
# 3. 验证安装
curl http://localhost:11434/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "llama3",
"messages": [
{
"role": "user",
"content": "Hello!"
}
]
}'
2. 兼容特性
2.1 请求参数对照表
{
"model": "string", // 模型名称
"messages": [ // 消息数组
{
"role": "string", // 角色:system/user/assistant
"content": "string" // 消息内容
}
],
"temperature": float, // 温度参数
"top_p": float, // 采样参数
"stream": boolean, // 是否流式输出
"stop": [string], // 停止词
"max_tokens": integer, // 最大token数
"presence_penalty": float, // 存在惩罚
"frequency_penalty": float // 频率惩罚
}
2.2 模型命名映射
# 为了兼容依赖默认OpenAI模型名称的工具
ollama cp llama3 gpt-3.5-turbo
ollama cp mistral gpt-4
3. 快速开始
3.1 Python示例
from openai import OpenAI
class OllamaClient:
def __init__(self):
self.client = OpenAI(
base_url='http://localhost:11434/v1',
api_key='ollama' # 必须但不使用
)
def chat(self, messages):
try:
response = self.client.chat.completions.create(
model="llama3",
messages=messages
)
return response.choices[0].message.content
except Exception as e:
print(f"Error: {e}")
return None
def stream_chat(self, messages):
response = self.client.chat.completions.create(
model="llama3",
messages=messages,
stream=True
)
for chunk in response:
if chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content
3.2 JavaScript示例
import OpenAI from 'openai';
class OllamaService {
constructor() {
this.client = new OpenAI({
baseURL: 'http://localhost:11434/v1',
apiKey: 'ollama'
});
}
async chat(messages) {
try {
const completion = await this.client.chat.completions.create({
model: 'llama3',
messages: messages
});
return completion.choices[0].message.content;
} catch (error) {
console.error('Chat error:', error);
throw error;
}
}
}
4. 多语言SDK支持
4.1 Python SDK使用
# 完整对话示例
client = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')
conversation = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "我需要一个Python函数来计算斐波那契数列"},
{"role": "assistant", "content": "我会帮你写一个高效的实现"},
{"role": "user", "content": "请使用递归和缓存"}
]
response = client.chat.completions.create(
model="llama3",
messages=conversation
)
4.2 Node.js SDK使用
// Express服务器集成示例
import express from 'express';
import OpenAI from 'openai';
const app = express();
const openai = new OpenAI({
baseURL: 'http://localhost:11434/v1',
apiKey: 'ollama'
});
app.post('/chat', async (req, res) => {
try {
const completion = await openai.chat.completions.create({
model: 'llama3',
messages: req.body.messages,
stream: true
});
res.setHeader('Content-Type', 'text/event-stream');
for await (const chunk of completion) {
const content = chunk.choices[0].delta.content;
if (content) {
res.write(`data: ${JSON.stringify({content})}\n\n`);
}
}
res.end();
} catch (error) {
res.status(500).json({error: error.message});
}
});
5. 高级应用
5.1 函数调用
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "获取指定城市的天气信息",
"parameters": {
"type": "object",
"properties": {
"city": {
"type": "string",
"description": "城市名称"
}
},
"required": ["city"]
}
}
}
]
response = client.chat.completions.create(
model="llama3",
messages=[{"role": "user", "content": "北京今天天气怎么样?"}],
tools=tools
)
5.2 JSON模式输出
response = client.chat.completions.create(
model="llama3",
messages=[{
"role": "user",
"content": "用JSON格式描述一本书的信息"
}],
response_format={"type": "json_object"}
)
6. 最佳实践
6.1 错误处理
def safe_chat_request(client, messages, retries=3):
for i in range(retries):
try:
response = client.chat.completions.create(
model="llama3",
messages=messages
)
return response
except Exception as e:
if i == retries - 1:
raise e
time.sleep(1 * (i + 1))
6.2 长文本处理
def chunk_text(text, max_tokens=2000):
"""将长文本分割成多个小块"""
sentences = text.split('. ')
chunks = []
current_chunk = []
current_length = 0
for sentence in sentences:
sentence_length = len(sentence.split())
if current_length + sentence_length > max_tokens:
chunks.append('. '.join(current_chunk) + '.')
current_chunk = [sentence]
current_length = sentence_length
else:
current_chunk.append(sentence)
current_length += sentence_length
if current_chunk:
chunks.append('. '.join(current_chunk) + '.')
return chunks
7. 常见框架集成
7.1 Vercel AI SDK集成
// app/api/chat/route.ts
import { OpenAIStream, StreamingTextResponse } from 'ai'
import OpenAI from 'openai'
export const runtime = 'edge'
const openai = new OpenAI({
baseURL: 'http://localhost:11434/v1',
apiKey: 'ollama'
})
export async function POST(req: Request) {
const { messages } = await req.json()
const response = await openai.chat.completions.create({
model: 'llama3',
stream: true,
messages
})
const stream = OpenAIStream(response)
return new StreamingTextResponse(stream)
}
7.2 AutoGen集成
from autogen import AssistantAgent, UserProxyAgent
config_list = [{
"model": "codellama",
"base_url": "http://localhost:11434/v1",
"api_key": "ollama",
}]
assistant = AssistantAgent(
name="coding_assistant",
llm_config={"config_list": config_list}
)
user_proxy = UserProxyAgent(
name="user_proxy",
code_execution_config={
"work_dir": "coding",
"use_docker": False
}
)
user_proxy.initiate_chat(
assistant,
message="创建一个简单的Flask API服务器"
)
8. 问题排查
8.1 常见问题
连接问题
def check_ollama_connection():
try:
requests.get("http://localhost:11434/v1/health")
return True
except:
return False
模型加载问题
def ensure_model_available(model_name):
try:
# 检查模型是否已下载
result = subprocess.run(
['ollama', 'list'],
capture_output=True,
text=True
)
if model_name not in result.stdout:
subprocess.run(['ollama', 'pull', model_name])
return True
except Exception as e:
print(f"Error ensuring model availability: {e}")
return False
8.2 性能优化
class OllamaOptimizer:
def __init__(self):
self.model_cache = set()
def preload_models(self, models):
"""预加载常用模型"""
for model in models:
if model not in self.model_cache:
subprocess.run(['ollama', 'pull', model])
self.model_cache.add(model)
def cleanup(self):
"""清理不需要的模型"""
result = subprocess.run(
['ollama', 'list'],
capture_output=True,
text=True
)
# 实现清理逻辑