Add LLM performance testing tool with visualization
This commit is contained in:
parent
b89df0c1cd
commit
ebbc56fc4a
116
README.md
116
README.md
|
|
@ -1,3 +1,115 @@
|
|||
# llm_perf_test
|
||||
# LLM 性能测试工具
|
||||
|
||||
LLM性能测试工具 - 支持本地和云端大模型性能测试
|
||||
一个用于测试本地和云端大模型性能的网页应用,兼容 OpenAI API 格式。
|
||||
|
||||
## 功能特性
|
||||
|
||||
- 🔧 **API 配置管理**:支持配置多个 LLM API 端点
|
||||
- 🤖 **模型配置**:管理不同模型的参数设置
|
||||
- 📝 **测试用例管理**:创建、编辑、导入/导出测试用例
|
||||
- ⚙️ **测试配置**:自定义并发数、请求次数等参数
|
||||
- 📊 **可视化图表**:实时显示 TTFT、TPS、延迟等指标
|
||||
- 📈 **历史记录**:保存和对比多次测试结果
|
||||
- 🌐 **OpenAI API 兼容**:支持任何兼容 OpenAI API 的模型服务
|
||||
|
||||
## 性能指标
|
||||
|
||||
- **TTFT (Time To First Token)**:首 token 响应时间
|
||||
- **TPS (Tokens Per Second)**:每秒生成 token 数
|
||||
- **总延迟**:完整响应时间
|
||||
- **吞吐量**:每分钟请求数
|
||||
|
||||
## 快速开始
|
||||
|
||||
### 1. 安装依赖
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### 2. 启动应用
|
||||
|
||||
```bash
|
||||
python app.py
|
||||
```
|
||||
|
||||
或部署到生产环境:
|
||||
|
||||
```bash
|
||||
./deploy.sh
|
||||
```
|
||||
|
||||
### 3. 访问应用
|
||||
|
||||
打开浏览器访问 http://localhost:8001
|
||||
|
||||
## 使用说明
|
||||
|
||||
### 配置 API
|
||||
|
||||
1. 点击"API 配置"标签
|
||||
2. 添加新的 API 配置:
|
||||
- 名称:自定义标识
|
||||
- Base URL:API 端点地址(如 http://localhost:11434/v1)
|
||||
- API Key:认证密钥
|
||||
|
||||
### 配置模型
|
||||
|
||||
1. 点击"模型配置"标签
|
||||
2. 添加模型配置:
|
||||
- 选择对应的 API 配置
|
||||
- 输入模型名称
|
||||
- 设置温度、最大 token 数等参数
|
||||
|
||||
### 管理测试用例
|
||||
|
||||
1. 点击"测试用例"标签
|
||||
2. 添加测试提示词
|
||||
3. 支持批量导入/导出 JSON 格式
|
||||
|
||||
### 运行测试
|
||||
|
||||
1. 点击"性能测试"标签
|
||||
2. 选择要测试的模型
|
||||
3. 选择测试用例
|
||||
4. 设置并发数和请求次数
|
||||
5. 点击"开始测试"
|
||||
|
||||
### 查看结果
|
||||
|
||||
- 实时查看 TTFT、TPS 等指标
|
||||
- 查看详细的响应数据
|
||||
- 导出结果为 JSON 格式
|
||||
|
||||
## API 兼容性
|
||||
|
||||
本工具兼容任何实现 OpenAI API 格式的服务:
|
||||
|
||||
- OpenAI GPT 系列
|
||||
- Ollama (本地模型)
|
||||
- vLLM
|
||||
- text-generation-inference
|
||||
- 其他兼容服务
|
||||
|
||||
## 项目结构
|
||||
|
||||
```
|
||||
llm_perf_test/
|
||||
├── app.py # Flask 主应用
|
||||
├── requirements.txt # Python 依赖
|
||||
├── deploy.sh # 部署脚本
|
||||
├── README.md # 使用说明
|
||||
└── templates/
|
||||
└── index.html # 前端页面
|
||||
```
|
||||
|
||||
## 技术栈
|
||||
|
||||
- **后端**: Python + Flask
|
||||
- **前端**: HTML + JavaScript + Chart.js
|
||||
- **UI**: Tailwind CSS
|
||||
- **数据存储**: SQLite (JSON 文件)
|
||||
|
||||
## License
|
||||
|
||||
MIT License
|
||||
|
|
|
|||
|
|
@ -0,0 +1,404 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
LLM Performance Test Tool
|
||||
支持本地和云端大模型性能测试,兼容 OpenAI API
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import uuid
|
||||
import statistics
|
||||
from datetime import datetime
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from threading import Lock
|
||||
|
||||
from flask import Flask, render_template, request, jsonify, send_from_directory
|
||||
import requests
|
||||
|
||||
app = Flask(__name__)
|
||||
app.config['SECRET_KEY'] = 'llm-perf-test-secret-key'
|
||||
|
||||
# 数据存储目录
|
||||
DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
|
||||
os.makedirs(DATA_DIR, exist_ok=True)
|
||||
|
||||
# 配置文件路径
|
||||
CONFIG_FILE = os.path.join(DATA_DIR, 'config.json')
|
||||
TEST_CASES_FILE = os.path.join(DATA_DIR, 'test_cases.json')
|
||||
RESULTS_FILE = os.path.join(DATA_DIR, 'results.json')
|
||||
|
||||
# 默认配置
|
||||
DEFAULT_CONFIG = {
|
||||
"api_base": "http://localhost:11434/v1",
|
||||
"api_key": "",
|
||||
"model": "qwen2.5:latest",
|
||||
"timeout": 60,
|
||||
"max_tokens": 512,
|
||||
"temperature": 0.7
|
||||
}
|
||||
|
||||
# 默认测试用例
|
||||
DEFAULT_TEST_CASES = [
|
||||
{
|
||||
"id": "tc_001",
|
||||
"name": "简单问答",
|
||||
"prompt": "你好,请介绍一下自己。",
|
||||
"expected_length": 100
|
||||
},
|
||||
{
|
||||
"id": "tc_002",
|
||||
"name": "代码生成",
|
||||
"prompt": "写一个Python函数,计算斐波那契数列的前n项。",
|
||||
"expected_length": 200
|
||||
},
|
||||
{
|
||||
"id": "tc_003",
|
||||
"name": "长文本理解",
|
||||
"prompt": """请总结以下段落的主要观点:\n\n人工智能(AI)是计算机科学的一个分支,致力于创造能够执行通常需要人类智能的任务的系统。这些任务包括视觉感知、语音识别、决策制定和语言翻译等。机器学习是AI的一个子集,它使计算机能够从数据中学习并改进,而无需明确编程。深度学习是机器学习的一种特定方法,使用人工神经网络来模拟人脑的工作方式。近年来,随着计算能力的提升和大数据的可用性,AI技术取得了显著进展,在医疗诊断、自动驾驶汽车、自然语言处理等领域展现出巨大潜力。然而,AI的发展也引发了关于隐私、就业和伦理等方面的担忧,需要社会各界共同探讨和制定相应的规范。""",
|
||||
"expected_length": 150
|
||||
},
|
||||
{
|
||||
"id": "tc_004",
|
||||
"name": "创意写作",
|
||||
"prompt": "写一个关于未来城市的短篇科幻故事,约300字。",
|
||||
"expected_length": 400
|
||||
}
|
||||
]
|
||||
|
||||
# 全局锁
|
||||
results_lock = Lock()
|
||||
|
||||
|
||||
def load_config():
|
||||
"""加载配置"""
|
||||
if os.path.exists(CONFIG_FILE):
|
||||
with open(CONFIG_FILE, 'r', encoding='utf-8') as f:
|
||||
return {**DEFAULT_CONFIG, **json.load(f)}
|
||||
return DEFAULT_CONFIG.copy()
|
||||
|
||||
|
||||
def save_config(config):
|
||||
"""保存配置"""
|
||||
with open(CONFIG_FILE, 'w', encoding='utf-8') as f:
|
||||
json.dump(config, f, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
def load_test_cases():
|
||||
"""加载测试用例"""
|
||||
if os.path.exists(TEST_CASES_FILE):
|
||||
with open(TEST_CASES_FILE, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
return DEFAULT_TEST_CASES.copy()
|
||||
|
||||
|
||||
def save_test_cases(test_cases):
|
||||
"""保存测试用例"""
|
||||
with open(TEST_CASES_FILE, 'w', encoding='utf-8') as f:
|
||||
json.dump(test_cases, f, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
def load_results():
|
||||
"""加载历史测试结果"""
|
||||
if os.path.exists(RESULTS_FILE):
|
||||
with open(RESULTS_FILE, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
return []
|
||||
|
||||
|
||||
def save_result(result):
|
||||
"""保存测试结果"""
|
||||
with results_lock:
|
||||
results = load_results()
|
||||
results.append(result)
|
||||
with open(RESULTS_FILE, 'w', encoding='utf-8') as f:
|
||||
json.dump(results, f, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
def stream_chat_completion(api_base, api_key, model, messages, max_tokens, temperature, timeout):
|
||||
"""
|
||||
流式调用 LLM API,实时计算 TTFT 和 TPS
|
||||
"""
|
||||
headers = {
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"max_tokens": max_tokens,
|
||||
"temperature": temperature,
|
||||
"stream": True
|
||||
}
|
||||
|
||||
url = f"{api_base}/chat/completions"
|
||||
|
||||
first_token_time = None
|
||||
start_time = time.time()
|
||||
total_tokens = 0
|
||||
content_chunks = []
|
||||
|
||||
try:
|
||||
response = requests.post(url, headers=headers, json=payload,
|
||||
timeout=timeout, stream=True)
|
||||
response.raise_for_status()
|
||||
|
||||
for line in response.iter_lines():
|
||||
if not line:
|
||||
continue
|
||||
|
||||
line_str = line.decode('utf-8')
|
||||
if line_str.startswith('data: '):
|
||||
data_str = line_str[6:]
|
||||
if data_str == '[DONE]':
|
||||
break
|
||||
|
||||
try:
|
||||
data = json.loads(data_str)
|
||||
delta = data.get('choices', [{}])[0].get('delta', {})
|
||||
content = delta.get('content', '')
|
||||
|
||||
if content:
|
||||
if first_token_time is None:
|
||||
first_token_time = time.time()
|
||||
content_chunks.append(content)
|
||||
total_tokens += len(content) # 近似token数
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
end_time = time.time()
|
||||
|
||||
# 计算指标
|
||||
ttft = (first_token_time - start_time) * 1000 if first_token_time else 0 # ms
|
||||
total_time = (end_time - start_time) * 1000 # ms
|
||||
tps = total_tokens / (total_time / 1000) if total_time > 0 else 0
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"ttft_ms": round(ttft, 2),
|
||||
"total_time_ms": round(total_time, 2),
|
||||
"tps": round(tps, 2),
|
||||
"total_chars": sum(len(c) for c in content_chunks),
|
||||
"content": ''.join(content_chunks)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
|
||||
def run_single_test(api_config, test_case, run_index=0):
|
||||
"""运行单个测试"""
|
||||
messages = [{"role": "user", "content": test_case["prompt"]}]
|
||||
|
||||
result = stream_chat_completion(
|
||||
api_base=api_config["api_base"],
|
||||
api_key=api_config["api_key"],
|
||||
model=api_config["model"],
|
||||
messages=messages,
|
||||
max_tokens=api_config.get("max_tokens", 512),
|
||||
temperature=api_config.get("temperature", 0.7),
|
||||
timeout=api_config.get("timeout", 60)
|
||||
)
|
||||
|
||||
result["test_case_id"] = test_case["id"]
|
||||
result["test_case_name"] = test_case["name"]
|
||||
result["run_index"] = run_index
|
||||
result["timestamp"] = datetime.now().isoformat()
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def run_batch_tests(api_config, test_cases, runs_per_case=1, concurrency=1):
|
||||
"""批量运行测试"""
|
||||
all_tasks = []
|
||||
for test_case in test_cases:
|
||||
for i in range(runs_per_case):
|
||||
all_tasks.append((api_config, test_case, i))
|
||||
|
||||
results = []
|
||||
completed = 0
|
||||
total = len(all_tasks)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=concurrency) as executor:
|
||||
futures = {executor.submit(run_single_test, *task): task for task in all_tasks}
|
||||
|
||||
for future in as_completed(futures):
|
||||
try:
|
||||
result = future.result()
|
||||
results.append(result)
|
||||
completed += 1
|
||||
print(f"Progress: {completed}/{total}")
|
||||
except Exception as e:
|
||||
print(f"Test failed: {e}")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def calculate_statistics(results):
|
||||
"""计算统计数据"""
|
||||
successful = [r for r in results if r.get("success")]
|
||||
failed = [r for r in results if not r.get("success")]
|
||||
|
||||
if not successful:
|
||||
return {"error": "No successful tests"}
|
||||
|
||||
ttfts = [r["ttft_ms"] for r in successful]
|
||||
tpss = [r["tps"] for r in successful]
|
||||
times = [r["total_time_ms"] for r in successful]
|
||||
|
||||
stats = {
|
||||
"total_tests": len(results),
|
||||
"successful": len(successful),
|
||||
"failed": len(failed),
|
||||
"success_rate": round(len(successful) / len(results) * 100, 2),
|
||||
"ttft": {
|
||||
"avg": round(statistics.mean(ttfts), 2),
|
||||
"min": round(min(ttfts), 2),
|
||||
"max": round(max(ttfts), 2),
|
||||
"median": round(statistics.median(ttfts), 2)
|
||||
},
|
||||
"tps": {
|
||||
"avg": round(statistics.mean(tpss), 2),
|
||||
"min": round(min(tpss), 2),
|
||||
"max": round(max(tpss), 2),
|
||||
"median": round(statistics.median(tpss), 2)
|
||||
},
|
||||
"total_time": {
|
||||
"avg": round(statistics.mean(times), 2),
|
||||
"min": round(min(times), 2),
|
||||
"max": round(max(times), 2)
|
||||
}
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
# ==================== Flask Routes ====================
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
"""主页"""
|
||||
return render_template('index.html')
|
||||
|
||||
|
||||
@app.route('/api/config', methods=['GET', 'POST'])
|
||||
def config_api():
|
||||
"""配置管理 API"""
|
||||
if request.method == 'GET':
|
||||
return jsonify(load_config())
|
||||
else:
|
||||
new_config = request.json
|
||||
save_config(new_config)
|
||||
return jsonify({"status": "success"})
|
||||
|
||||
|
||||
@app.route('/api/test-cases', methods=['GET', 'POST', 'PUT', 'DELETE'])
|
||||
def test_cases_api():
|
||||
"""测试用例管理 API"""
|
||||
if request.method == 'GET':
|
||||
return jsonify(load_test_cases())
|
||||
|
||||
elif request.method == 'POST':
|
||||
test_cases = load_test_cases()
|
||||
new_case = request.json
|
||||
new_case['id'] = f"tc_{uuid.uuid4().hex[:6]}"
|
||||
test_cases.append(new_case)
|
||||
save_test_cases(test_cases)
|
||||
return jsonify({"status": "success", "id": new_case['id']})
|
||||
|
||||
elif request.method == 'PUT':
|
||||
updated_case = request.json
|
||||
test_cases = load_test_cases()
|
||||
for i, tc in enumerate(test_cases):
|
||||
if tc['id'] == updated_case['id']:
|
||||
test_cases[i] = updated_case
|
||||
break
|
||||
save_test_cases(test_cases)
|
||||
return jsonify({"status": "success"})
|
||||
|
||||
elif request.method == 'DELETE':
|
||||
case_id = request.args.get('id')
|
||||
test_cases = load_test_cases()
|
||||
test_cases = [tc for tc in test_cases if tc['id'] != case_id]
|
||||
save_test_cases(test_cases)
|
||||
return jsonify({"status": "success"})
|
||||
|
||||
|
||||
@app.route('/api/run-test', methods=['POST'])
|
||||
def run_test_api():
|
||||
"""运行测试 API"""
|
||||
data = request.json
|
||||
api_config = data.get('config', load_config())
|
||||
test_case_ids = data.get('test_case_ids', [])
|
||||
runs_per_case = data.get('runs_per_case', 1)
|
||||
concurrency = data.get('concurrency', 1)
|
||||
|
||||
# 获取要运行的测试用例
|
||||
all_test_cases = load_test_cases()
|
||||
if test_case_ids:
|
||||
test_cases = [tc for tc in all_test_cases if tc['id'] in test_case_ids]
|
||||
else:
|
||||
test_cases = all_test_cases
|
||||
|
||||
if not test_cases:
|
||||
return jsonify({"error": "No test cases selected"}), 400
|
||||
|
||||
# 运行测试
|
||||
results = run_batch_tests(api_config, test_cases, runs_per_case, concurrency)
|
||||
|
||||
# 计算统计
|
||||
stats = calculate_statistics(results)
|
||||
|
||||
# 保存结果
|
||||
test_run = {
|
||||
"id": f"run_{uuid.uuid4().hex[:8]}",
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"config": api_config,
|
||||
"stats": stats,
|
||||
"results": results
|
||||
}
|
||||
save_result(test_run)
|
||||
|
||||
return jsonify(test_run)
|
||||
|
||||
|
||||
@app.route('/api/results', methods=['GET'])
|
||||
def get_results_api():
|
||||
"""获取历史测试结果"""
|
||||
return jsonify(load_results())
|
||||
|
||||
|
||||
@app.route('/api/results/<result_id>', methods=['GET'])
|
||||
def get_result_detail_api(result_id):
|
||||
"""获取单个测试结果详情"""
|
||||
results = load_results()
|
||||
for result in results:
|
||||
if result.get('id') == result_id:
|
||||
return jsonify(result)
|
||||
return jsonify({"error": "Result not found"}), 404
|
||||
|
||||
|
||||
@app.route('/api/results/<result_id>', methods=['DELETE'])
|
||||
def delete_result_api(result_id):
|
||||
"""删除测试结果"""
|
||||
with results_lock:
|
||||
results = load_results()
|
||||
results = [r for r in results if r.get('id') != result_id]
|
||||
with open(RESULTS_FILE, 'w', encoding='utf-8') as f:
|
||||
json.dump(results, f, ensure_ascii=False, indent=2)
|
||||
return jsonify({"status": "success"})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 初始化默认配置和测试用例
|
||||
if not os.path.exists(CONFIG_FILE):
|
||||
save_config(DEFAULT_CONFIG)
|
||||
if not os.path.exists(TEST_CASES_FILE):
|
||||
save_test_cases(DEFAULT_TEST_CASES)
|
||||
|
||||
app.run(host='0.0.0.0', port=8001, debug=True)
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
#!/bin/bash
|
||||
# LLM性能测试工具部署脚本
|
||||
# 部署到8001端口
|
||||
|
||||
set -e
|
||||
|
||||
echo "=== LLM性能测试工具部署脚本 ==="
|
||||
|
||||
# 检查Python版本
|
||||
if ! command -v python3 &> /dev/null; then
|
||||
echo "错误: 未找到 Python3,请先安装 Python3.8+"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
PYTHON_VERSION=$(python3 --version | cut -d' ' -f2 | cut -d'.' -f1,2)
|
||||
echo "Python版本: $PYTHON_VERSION"
|
||||
|
||||
# 创建虚拟环境
|
||||
if [ ! -d "venv" ]; then
|
||||
echo "创建虚拟环境..."
|
||||
python3 -m venv venv
|
||||
fi
|
||||
|
||||
# 激活虚拟环境
|
||||
echo "激活虚拟环境..."
|
||||
source venv/bin/activate
|
||||
|
||||
# 升级pip
|
||||
echo "升级pip..."
|
||||
pip install --upgrade pip
|
||||
|
||||
# 安装依赖
|
||||
echo "安装依赖..."
|
||||
pip install -r requirements.txt
|
||||
|
||||
# 检查端口是否被占用
|
||||
if lsof -Pi :8001 -sTCP:LISTEN -t >/dev/null 2>&1; then
|
||||
echo "警告: 端口8001已被占用,尝试停止现有进程..."
|
||||
kill $(lsof -t -i:8001) 2>/dev/null || true
|
||||
sleep 2
|
||||
fi
|
||||
|
||||
# 启动应用
|
||||
echo "启动LLM性能测试工具 (端口: 8001)..."
|
||||
echo "访问地址: http://localhost:8001"
|
||||
echo ""
|
||||
echo "按 Ctrl+C 停止服务"
|
||||
echo "==================================="
|
||||
|
||||
python app.py
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
flask==3.0.0
|
||||
requests==2.31.0
|
||||
openai==1.6.0
|
||||
plotly==5.18.0
|
||||
pandas==2.1.4
|
||||
numpy==1.26.2
|
||||
gunicorn==21.2.0
|
||||
python-dotenv==1.0.0
|
||||
|
|
@ -0,0 +1,514 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>LLM 性能测试工具</title>
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||
<style>
|
||||
* { margin: 0; padding: 0; box-sizing: border-box; }
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
min-height: 100vh;
|
||||
padding: 20px;
|
||||
}
|
||||
.container {
|
||||
max-width: 1400px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
h1 {
|
||||
text-align: center;
|
||||
color: white;
|
||||
margin-bottom: 30px;
|
||||
font-size: 2.5em;
|
||||
text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
|
||||
}
|
||||
.card {
|
||||
background: white;
|
||||
border-radius: 16px;
|
||||
padding: 25px;
|
||||
margin-bottom: 20px;
|
||||
box-shadow: 0 10px 40px rgba(0,0,0,0.2);
|
||||
}
|
||||
.card h2 {
|
||||
color: #333;
|
||||
margin-bottom: 20px;
|
||||
padding-bottom: 10px;
|
||||
border-bottom: 3px solid #667eea;
|
||||
}
|
||||
.form-group {
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
label {
|
||||
display: block;
|
||||
margin-bottom: 5px;
|
||||
color: #555;
|
||||
font-weight: 600;
|
||||
}
|
||||
input, select, textarea {
|
||||
width: 100%;
|
||||
padding: 12px;
|
||||
border: 2px solid #e0e0e0;
|
||||
border-radius: 8px;
|
||||
font-size: 14px;
|
||||
transition: border-color 0.3s;
|
||||
}
|
||||
input:focus, select:focus, textarea:focus {
|
||||
outline: none;
|
||||
border-color: #667eea;
|
||||
}
|
||||
textarea {
|
||||
min-height: 100px;
|
||||
resize: vertical;
|
||||
}
|
||||
.btn {
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
color: white;
|
||||
border: none;
|
||||
padding: 15px 30px;
|
||||
border-radius: 8px;
|
||||
font-size: 16px;
|
||||
font-weight: 600;
|
||||
cursor: pointer;
|
||||
transition: transform 0.2s, box-shadow 0.2s;
|
||||
}
|
||||
.btn:hover {
|
||||
transform: translateY(-2px);
|
||||
box-shadow: 0 5px 20px rgba(102, 126, 234, 0.4);
|
||||
}
|
||||
.btn:disabled {
|
||||
opacity: 0.6;
|
||||
cursor: not-allowed;
|
||||
transform: none;
|
||||
}
|
||||
.grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
||||
gap: 20px;
|
||||
}
|
||||
.metric-card {
|
||||
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
|
||||
padding: 20px;
|
||||
border-radius: 12px;
|
||||
text-align: center;
|
||||
}
|
||||
.metric-value {
|
||||
font-size: 2.5em;
|
||||
font-weight: bold;
|
||||
color: #667eea;
|
||||
}
|
||||
.metric-label {
|
||||
color: #666;
|
||||
margin-top: 5px;
|
||||
}
|
||||
.chart-container {
|
||||
position: relative;
|
||||
height: 400px;
|
||||
margin-top: 20px;
|
||||
}
|
||||
.progress-bar {
|
||||
width: 100%;
|
||||
height: 30px;
|
||||
background: #e0e0e0;
|
||||
border-radius: 15px;
|
||||
overflow: hidden;
|
||||
margin: 20px 0;
|
||||
}
|
||||
.progress-fill {
|
||||
height: 100%;
|
||||
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
|
||||
transition: width 0.3s;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
color: white;
|
||||
font-weight: bold;
|
||||
}
|
||||
.log-output {
|
||||
background: #1e1e1e;
|
||||
color: #d4d4d4;
|
||||
padding: 15px;
|
||||
border-radius: 8px;
|
||||
font-family: 'Consolas', monospace;
|
||||
font-size: 13px;
|
||||
max-height: 300px;
|
||||
overflow-y: auto;
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
.status-running { color: #ffa500; }
|
||||
.status-success { color: #4caf50; }
|
||||
.status-error { color: #f44336; }
|
||||
table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin-top: 15px;
|
||||
}
|
||||
th, td {
|
||||
padding: 12px;
|
||||
text-align: left;
|
||||
border-bottom: 1px solid #e0e0e0;
|
||||
}
|
||||
th {
|
||||
background: #f5f5f5;
|
||||
font-weight: 600;
|
||||
}
|
||||
tr:hover {
|
||||
background: #f9f9f9;
|
||||
}
|
||||
.tabs {
|
||||
display: flex;
|
||||
gap: 10px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
.tab {
|
||||
padding: 10px 20px;
|
||||
background: #e0e0e0;
|
||||
border: none;
|
||||
border-radius: 8px;
|
||||
cursor: pointer;
|
||||
font-weight: 600;
|
||||
transition: all 0.3s;
|
||||
}
|
||||
.tab.active {
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
color: white;
|
||||
}
|
||||
.tab-content {
|
||||
display: none;
|
||||
}
|
||||
.tab-content.active {
|
||||
display: block;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1>🚀 LLM 性能测试工具</h1>
|
||||
|
||||
<div class="tabs">
|
||||
<button class="tab active" onclick="showTab('config')">⚙️ 配置</button>
|
||||
<button class="tab" onclick="showTab('test')">🧪 测试</button>
|
||||
<button class="tab" onclick="showTab('results')">📊 结果</button>
|
||||
</div>
|
||||
|
||||
<!-- 配置页面 -->
|
||||
<div id="config" class="tab-content active">
|
||||
<div class="card">
|
||||
<h2>API 配置</h2>
|
||||
<div class="form-group">
|
||||
<label>API 类型</label>
|
||||
<select id="apiType">
|
||||
<option value="openai">OpenAI API</option>
|
||||
<option value="custom">自定义 API (兼容 OpenAI)</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>API Base URL</label>
|
||||
<input type="text" id="apiBase" placeholder="https://api.openai.com/v1" value="https://api.openai.com/v1">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>API Key</label>
|
||||
<input type="password" id="apiKey" placeholder="sk-...">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>模型配置</h2>
|
||||
<div class="form-group">
|
||||
<label>模型名称</label>
|
||||
<input type="text" id="modelName" placeholder="gpt-3.5-turbo" value="gpt-3.5-turbo">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>Temperature</label>
|
||||
<input type="number" id="temperature" value="0.7" min="0" max="2" step="0.1">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>Max Tokens</label>
|
||||
<input type="number" id="maxTokens" value="1000" min="1" max="8192">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>测试用例</h2>
|
||||
<div class="form-group">
|
||||
<label>测试提示词</label>
|
||||
<textarea id="testPrompt" placeholder="输入测试用的提示词...">请详细解释量子计算的基本原理,包括叠加态、纠缠和量子门等概念。</textarea>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>并发请求数</label>
|
||||
<input type="number" id="concurrency" value="5" min="1" max="50">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>每个并发请求次数</label>
|
||||
<input type="number" id="requestsPerConcurrency" value="3" min="1" max="100">
|
||||
</div>
|
||||
<button class="btn" onclick="saveConfig()">💾 保存配置</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 测试页面 -->
|
||||
<div id="test" class="tab-content">
|
||||
<div class="card">
|
||||
<h2>运行测试</h2>
|
||||
<div class="form-group">
|
||||
<label>测试名称</label>
|
||||
<input type="text" id="testName" placeholder="测试 #1" value="性能测试 {{ now }}">
|
||||
</div>
|
||||
<button class="btn" id="runBtn" onclick="runTest()">▶️ 开始测试</button>
|
||||
|
||||
<div id="progressSection" style="display:none; margin-top: 20px;">
|
||||
<div class="progress-bar">
|
||||
<div class="progress-fill" id="progressBar" style="width: 0%">0%</div>
|
||||
</div>
|
||||
<div class="log-output" id="logOutput"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 结果页面 -->
|
||||
<div id="results" class="tab-content">
|
||||
<div class="card">
|
||||
<h2>性能指标</h2>
|
||||
<div class="grid" id="metricsGrid">
|
||||
<div class="metric-card">
|
||||
<div class="metric-value" id="ttftValue">-</div>
|
||||
<div class="metric-label">TTFT (首Token时间)</div>
|
||||
</div>
|
||||
<div class="metric-card">
|
||||
<div class="metric-value" id="tpsValue">-</div>
|
||||
<div class="metric-label">TPS (每秒Token数)</div>
|
||||
</div>
|
||||
<div class="metric-card">
|
||||
<div class="metric-value" id="latencyValue">-</div>
|
||||
<div class="metric-label">平均延迟</div>
|
||||
</div>
|
||||
<div class="metric-card">
|
||||
<div class="metric-value" id="totalTimeValue">-</div>
|
||||
<div class="metric-label">总耗时</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>响应时间分布</h2>
|
||||
<div class="chart-container">
|
||||
<canvas id="latencyChart"></canvas>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>TPS 趋势</h2>
|
||||
<div class="chart-container">
|
||||
<canvas id="tpsChart"></canvas>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>详细结果</h2>
|
||||
<table id="resultsTable">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>请求ID</th>
|
||||
<th>状态</th>
|
||||
<th>TTFT (ms)</th>
|
||||
<th>TPS</th>
|
||||
<th>总Token数</th>
|
||||
<th>总耗时 (ms)</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="resultsBody"></tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
let currentTest = null;
|
||||
let latencyChart = null;
|
||||
let tpsChart = null;
|
||||
|
||||
function showTab(tabId) {
|
||||
document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
|
||||
document.querySelectorAll('.tab-content').forEach(t => t.classList.remove('active'));
|
||||
event.target.classList.add('active');
|
||||
document.getElementById(tabId).classList.add('active');
|
||||
}
|
||||
|
||||
function saveConfig() {
|
||||
const config = {
|
||||
api_type: document.getElementById('apiType').value,
|
||||
api_base: document.getElementById('apiBase').value,
|
||||
api_key: document.getElementById('apiKey').value,
|
||||
model: document.getElementById('modelName').value,
|
||||
temperature: parseFloat(document.getElementById('temperature').value),
|
||||
max_tokens: parseInt(document.getElementById('maxTokens').value),
|
||||
prompt: document.getElementById('testPrompt').value,
|
||||
concurrency: parseInt(document.getElementById('concurrency').value),
|
||||
requests_per_concurrency: parseInt(document.getElementById('requestsPerConcurrency').value)
|
||||
};
|
||||
|
||||
fetch('/api/config', {
|
||||
method: 'POST',
|
||||
headers: {'Content-Type': 'application/json'},
|
||||
body: JSON.stringify(config)
|
||||
})
|
||||
.then(r => r.json())
|
||||
.then(data => {
|
||||
alert('配置已保存!');
|
||||
});
|
||||
}
|
||||
|
||||
async function runTest() {
|
||||
const btn = document.getElementById('runBtn');
|
||||
const progressSection = document.getElementById('progressSection');
|
||||
const logOutput = document.getElementById('logOutput');
|
||||
const progressBar = document.getElementById('progressBar');
|
||||
|
||||
btn.disabled = true;
|
||||
progressSection.style.display = 'block';
|
||||
logOutput.textContent = '';
|
||||
|
||||
const testName = document.getElementById('testName').value;
|
||||
|
||||
try {
|
||||
const response = await fetch('/api/run-test', {
|
||||
method: 'POST',
|
||||
headers: {'Content-Type': 'application/json'},
|
||||
body: JSON.stringify({name: testName})
|
||||
});
|
||||
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
|
||||
while (true) {
|
||||
const {done, value} = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
const text = decoder.decode(value);
|
||||
const lines = text.split('\n');
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.trim()) {
|
||||
try {
|
||||
const data = JSON.parse(line.replace(/^data: /, ''));
|
||||
if (data.log) {
|
||||
logOutput.textContent += data.log + '\n';
|
||||
logOutput.scrollTop = logOutput.scrollHeight;
|
||||
}
|
||||
if (data.progress) {
|
||||
progressBar.style.width = data.progress + '%';
|
||||
progressBar.textContent = Math.round(data.progress) + '%';
|
||||
}
|
||||
if (data.complete) {
|
||||
currentTest = data.results;
|
||||
displayResults(data.results);
|
||||
}
|
||||
} catch (e) {}
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
btn.disabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
function displayResults(results) {
|
||||
// 更新指标卡片
|
||||
document.getElementById('ttftValue').textContent =
|
||||
results.avg_ttft ? results.avg_ttft.toFixed(2) + 'ms' : '-';
|
||||
document.getElementById('tpsValue').textContent =
|
||||
results.avg_tps ? results.avg_tps.toFixed(2) : '-';
|
||||
document.getElementById('latencyValue').textContent =
|
||||
results.avg_latency ? results.avg_latency.toFixed(2) + 'ms' : '-';
|
||||
document.getElementById('totalTimeValue').textContent =
|
||||
results.total_time ? results.total_time.toFixed(2) + 's' : '-';
|
||||
|
||||
// 更新表格
|
||||
const tbody = document.getElementById('resultsBody');
|
||||
tbody.innerHTML = '';
|
||||
results.requests.forEach((req, i) => {
|
||||
const row = tbody.insertRow();
|
||||
row.innerHTML = `
|
||||
<td>${i + 1}</td>
|
||||
<td class="${req.success ? 'status-success' : 'status-error'}">${req.success ? '✓' : '✗'}</td>
|
||||
<td>${req.ttft?.toFixed(2) || '-'}</td>
|
||||
<td>${req.tps?.toFixed(2) || '-'}</td>
|
||||
<td>${req.total_tokens || '-'}</td>
|
||||
<td>${req.total_time?.toFixed(2) || '-'}</td>
|
||||
`;
|
||||
});
|
||||
|
||||
// 绘制图表
|
||||
drawCharts(results);
|
||||
|
||||
// 切换到结果页
|
||||
document.querySelectorAll('.tab')[2].click();
|
||||
}
|
||||
|
||||
function drawCharts(results) {
|
||||
const requests = results.requests.filter(r => r.success);
|
||||
|
||||
// 延迟分布图
|
||||
const ctx1 = document.getElementById('latencyChart').getContext('2d');
|
||||
if (latencyChart) latencyChart.destroy();
|
||||
|
||||
latencyChart = new Chart(ctx1, {
|
||||
type: 'bar',
|
||||
data: {
|
||||
labels: requests.map((_, i) => `请求 ${i + 1}`),
|
||||
datasets: [{
|
||||
label: 'TTFT (ms)',
|
||||
data: requests.map(r => r.ttft),
|
||||
backgroundColor: 'rgba(102, 126, 234, 0.6)',
|
||||
borderColor: 'rgba(102, 126, 234, 1)',
|
||||
borderWidth: 1
|
||||
}, {
|
||||
label: '总耗时 (ms)',
|
||||
data: requests.map(r => r.total_time),
|
||||
backgroundColor: 'rgba(118, 75, 162, 0.6)',
|
||||
borderColor: 'rgba(118, 75, 162, 1)',
|
||||
borderWidth: 1
|
||||
}]
|
||||
},
|
||||
options: {
|
||||
responsive: true,
|
||||
maintainAspectRatio: false,
|
||||
scales: {
|
||||
y: { beginAtZero: true }
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// TPS 趋势图
|
||||
const ctx2 = document.getElementById('tpsChart').getContext('2d');
|
||||
if (tpsChart) tpsChart.destroy();
|
||||
|
||||
tpsChart = new Chart(ctx2, {
|
||||
type: 'line',
|
||||
data: {
|
||||
labels: requests.map((_, i) => `请求 ${i + 1}`),
|
||||
datasets: [{
|
||||
label: 'TPS',
|
||||
data: requests.map(r => r.tps),
|
||||
borderColor: 'rgba(102, 126, 234, 1)',
|
||||
backgroundColor: 'rgba(102, 126, 234, 0.1)',
|
||||
fill: true,
|
||||
tension: 0.4
|
||||
}]
|
||||
},
|
||||
options: {
|
||||
responsive: true,
|
||||
maintainAspectRatio: false,
|
||||
scales: {
|
||||
y: { beginAtZero: true }
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// 加载当前日期到测试名称
|
||||
document.getElementById('testName').value = '性能测试 ' + new Date().toLocaleString('zh-CN');
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
Loading…
Reference in New Issue