Ollama 与OpenAI API 兼容性

文摘   2024-11-17 08:33   湖北  



1. 简介

Ollama现在提供了与OpenAI API的完全兼容性,这意味着你可以:

  • 使用熟悉的OpenAI接口

  • 在本地运行大语言模型

  • 无缝迁移现有OpenAI应用

  • 享受更低的延迟和更好的隐私保护


1.1 支持的功能

  • Chat completions

  • 流式输出

  • JSON模式

  • 可重现输出

  • 视觉功能

  • 函数调用

  • Logprobs计算


1.2 环境准备

# 1. 安装Ollama
curl -fsSL https://ollama.com/install.sh | sh

# 2. 拉取模型
ollama pull llama3

# 3. 验证安装
curl http://localhost:11434/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "llama3",
"messages": [
{
"role": "user",
"content": "Hello!"
}
]
}'

2. 兼容特性

2.1 请求参数对照表

{
"model": "string", // 模型名称
"messages": [ // 消息数组
{
"role": "string", // 角色:system/user/assistant
"content": "string" // 消息内容
}
],
"temperature": float, // 温度参数
"top_p": float, // 采样参数
"stream": boolean, // 是否流式输出
"stop": [string], // 停止词
"max_tokens": integer, // 最大token数
"presence_penalty": float, // 存在惩罚
"frequency_penalty": float // 频率惩罚
}

2.2 模型命名映射

# 为了兼容依赖默认OpenAI模型名称的工具
ollama cp llama3 gpt-3.5-turbo
ollama cp mistral gpt-4

3. 快速开始

3.1 Python示例

from openai import OpenAI

class OllamaClient:
def __init__(self):
self.client = OpenAI(
base_url='http://localhost:11434/v1',
api_key='ollama' # 必须但不使用
)

def chat(self, messages):
try:
response = self.client.chat.completions.create(
model="llama3",
messages=messages
)
return response.choices[0].message.content
except Exception as e:
print(f"Error: {e}")
return None

def stream_chat(self, messages):
response = self.client.chat.completions.create(
model="llama3",
messages=messages,
stream=True
)
for chunk in response:
if chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content

3.2 JavaScript示例

import OpenAI from 'openai';

class OllamaService {
constructor() {
this.client = new OpenAI({
baseURL: 'http://localhost:11434/v1',
apiKey: 'ollama'
});
}

async chat(messages) {
try {
const completion = await this.client.chat.completions.create({
model: 'llama3',
messages: messages
});
return completion.choices[0].message.content;
} catch (error) {
console.error('Chat error:', error);
throw error;
}
}
}

4. 多语言SDK支持

4.1 Python SDK使用

# 完整对话示例
client = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')

conversation = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "我需要一个Python函数来计算斐波那契数列"},
{"role": "assistant", "content": "我会帮你写一个高效的实现"},
{"role": "user", "content": "请使用递归和缓存"}
]

response = client.chat.completions.create(
model="llama3",
messages=conversation
)

4.2 Node.js SDK使用

// Express服务器集成示例
import express from 'express';
import OpenAI from 'openai';

const app = express();
const openai = new OpenAI({
baseURL: 'http://localhost:11434/v1',
apiKey: 'ollama'
});

app.post('/chat', async (req, res) => {
try {
const completion = await openai.chat.completions.create({
model: 'llama3',
messages: req.body.messages,
stream: true
});

res.setHeader('Content-Type', 'text/event-stream');

for await (const chunk of completion) {
const content = chunk.choices[0].delta.content;
if (content) {
res.write(`data: ${JSON.stringify({content})}\n\n`);
}
}
res.end();
} catch (error) {
res.status(500).json({error: error.message});
}
});

5. 高级应用

5.1 函数调用

tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "获取指定城市的天气信息",
"parameters": {
"type": "object",
"properties": {
"city": {
"type": "string",
"description": "城市名称"
}
},
"required": ["city"]
}
}
}
]

response = client.chat.completions.create(
model="llama3",
messages=[{"role": "user", "content": "北京今天天气怎么样?"}],
tools=tools
)

5.2 JSON模式输出

response = client.chat.completions.create(
model="llama3",
messages=[{
"role": "user",
"content": "用JSON格式描述一本书的信息"
}],
response_format={"type": "json_object"}
)

6. 最佳实践

6.1 错误处理

def safe_chat_request(client, messages, retries=3):
for i in range(retries):
try:
response = client.chat.completions.create(
model="llama3",
messages=messages
)
return response
except Exception as e:
if i == retries - 1:
raise e
time.sleep(1 * (i + 1))

6.2 长文本处理

def chunk_text(text, max_tokens=2000):
"""将长文本分割成多个小块"""
sentences = text.split('. ')
chunks = []
current_chunk = []
current_length = 0

for sentence in sentences:
sentence_length = len(sentence.split())
if current_length + sentence_length > max_tokens:
chunks.append('. '.join(current_chunk) + '.')
current_chunk = [sentence]
current_length = sentence_length
else:
current_chunk.append(sentence)
current_length += sentence_length

if current_chunk:
chunks.append('. '.join(current_chunk) + '.')
return chunks

7. 常见框架集成

7.1 Vercel AI SDK集成

// app/api/chat/route.ts
import { OpenAIStream, StreamingTextResponse } from 'ai'
import OpenAI from 'openai'

export const runtime = 'edge'

const openai = new OpenAI({
baseURL: 'http://localhost:11434/v1',
apiKey: 'ollama'
})

export async function POST(req: Request) {
const { messages } = await req.json()
const response = await openai.chat.completions.create({
model: 'llama3',
stream: true,
messages
})
const stream = OpenAIStream(response)
return new StreamingTextResponse(stream)
}

7.2 AutoGen集成

from autogen import AssistantAgent, UserProxyAgent

config_list = [{
"model": "codellama",
"base_url": "http://localhost:11434/v1",
"api_key": "ollama",
}]

assistant = AssistantAgent(
name="coding_assistant",
llm_config={"config_list": config_list}
)

user_proxy = UserProxyAgent(
name="user_proxy",
code_execution_config={
"work_dir": "coding",
"use_docker": False
}
)

user_proxy.initiate_chat(
assistant,
message="创建一个简单的Flask API服务器"
)

8. 问题排查

8.1 常见问题

  • 连接问题

def check_ollama_connection():
try:
requests.get("http://localhost:11434/v1/health")
return True
except:
return False
  • 模型加载问题

def ensure_model_available(model_name):
try:
# 检查模型是否已下载
result = subprocess.run(
['ollama', 'list'],
capture_output=True,
text=True
)
if model_name not in result.stdout:
subprocess.run(['ollama', 'pull', model_name])
return True
except Exception as e:
print(f"Error ensuring model availability: {e}")
return False

8.2 性能优化

class OllamaOptimizer:
def __init__(self):
self.model_cache = set()

def preload_models(self, models):
"""预加载常用模型"""
for model in models:
if model not in self.model_cache:
subprocess.run(['ollama', 'pull', model])
self.model_cache.add(model)

def cleanup(self):
"""清理不需要的模型"""
result = subprocess.run(
['ollama', 'list'],
capture_output=True,
text=True
)
# 实现清理逻辑

字节笔记本
专注于科技领域的分享,AIGC,全栈开发,产品运营
 最新文章