diff --git a/README.md b/README.md index fc3b183..186a91f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,115 @@ -# llm_perf_test +# LLM 性能测试工具 -LLM性能测试工具 - 支持本地和云端大模型性能测试 \ No newline at end of file +一个用于测试本地和云端大模型性能的网页应用,兼容 OpenAI API 格式。 + +## 功能特性 + +- 🔧 **API 配置管理**:支持配置多个 LLM API 端点 +- 🤖 **模型配置**:管理不同模型的参数设置 +- 📝 **测试用例管理**:创建、编辑、导入/导出测试用例 +- ⚙️ **测试配置**:自定义并发数、请求次数等参数 +- 📊 **可视化图表**:实时显示 TTFT、TPS、延迟等指标 +- 📈 **历史记录**:保存和对比多次测试结果 +- 🌐 **OpenAI API 兼容**:支持任何兼容 OpenAI API 的模型服务 + +## 性能指标 + +- **TTFT (Time To First Token)**:首 token 响应时间 +- **TPS (Tokens Per Second)**:每秒生成 token 数 +- **总延迟**:完整响应时间 +- **吞吐量**:每分钟请求数 + +## 快速开始 + +### 1. 安装依赖 + +```bash +pip install -r requirements.txt +``` + +### 2. 启动应用 + +```bash +python app.py +``` + +或部署到生产环境: + +```bash +./deploy.sh +``` + +### 3. 访问应用 + +打开浏览器访问 http://localhost:8001 + +## 使用说明 + +### 配置 API + +1. 点击"API 配置"标签 +2. 添加新的 API 配置: + - 名称:自定义标识 + - Base URL:API 端点地址(如 http://localhost:11434/v1) + - API Key:认证密钥 + +### 配置模型 + +1. 点击"模型配置"标签 +2. 添加模型配置: + - 选择对应的 API 配置 + - 输入模型名称 + - 设置温度、最大 token 数等参数 + +### 管理测试用例 + +1. 点击"测试用例"标签 +2. 添加测试提示词 +3. 支持批量导入/导出 JSON 格式 + +### 运行测试 + +1. 点击"性能测试"标签 +2. 选择要测试的模型 +3. 选择测试用例 +4. 设置并发数和请求次数 +5. 点击"开始测试" + +### 查看结果 + +- 实时查看 TTFT、TPS 等指标 +- 查看详细的响应数据 +- 导出结果为 JSON 格式 + +## API 兼容性 + +本工具兼容任何实现 OpenAI API 格式的服务: + +- OpenAI GPT 系列 +- Ollama (本地模型) +- vLLM +- text-generation-inference +- 其他兼容服务 + +## 项目结构 + +``` +llm_perf_test/ +├── app.py # Flask 主应用 +├── requirements.txt # Python 依赖 +├── deploy.sh # 部署脚本 +├── README.md # 使用说明 +└── templates/ + └── index.html # 前端页面 +``` + +## 技术栈 + +- **后端**: Python + Flask +- **前端**: HTML + JavaScript + Chart.js +- **UI**: Tailwind CSS +- **数据存储**: SQLite (JSON 文件) + +## License + +MIT License diff --git a/app.py b/app.py new file mode 100644 index 0000000..2f5f5d7 --- /dev/null +++ b/app.py @@ -0,0 +1,404 @@ +#!/usr/bin/env python3 +""" +LLM Performance Test Tool +支持本地和云端大模型性能测试,兼容 OpenAI API +""" + +import os +import json +import time +import uuid +import statistics +from datetime import datetime +from concurrent.futures import ThreadPoolExecutor, as_completed +from threading import Lock + +from flask import Flask, render_template, request, jsonify, send_from_directory +import requests + +app = Flask(__name__) +app.config['SECRET_KEY'] = 'llm-perf-test-secret-key' + +# 数据存储目录 +DATA_DIR = os.path.join(os.path.dirname(__file__), 'data') +os.makedirs(DATA_DIR, exist_ok=True) + +# 配置文件路径 +CONFIG_FILE = os.path.join(DATA_DIR, 'config.json') +TEST_CASES_FILE = os.path.join(DATA_DIR, 'test_cases.json') +RESULTS_FILE = os.path.join(DATA_DIR, 'results.json') + +# 默认配置 +DEFAULT_CONFIG = { + "api_base": "http://localhost:11434/v1", + "api_key": "", + "model": "qwen2.5:latest", + "timeout": 60, + "max_tokens": 512, + "temperature": 0.7 +} + +# 默认测试用例 +DEFAULT_TEST_CASES = [ + { + "id": "tc_001", + "name": "简单问答", + "prompt": "你好,请介绍一下自己。", + "expected_length": 100 + }, + { + "id": "tc_002", + "name": "代码生成", + "prompt": "写一个Python函数,计算斐波那契数列的前n项。", + "expected_length": 200 + }, + { + "id": "tc_003", + "name": "长文本理解", + "prompt": """请总结以下段落的主要观点:\n\n人工智能(AI)是计算机科学的一个分支,致力于创造能够执行通常需要人类智能的任务的系统。这些任务包括视觉感知、语音识别、决策制定和语言翻译等。机器学习是AI的一个子集,它使计算机能够从数据中学习并改进,而无需明确编程。深度学习是机器学习的一种特定方法,使用人工神经网络来模拟人脑的工作方式。近年来,随着计算能力的提升和大数据的可用性,AI技术取得了显著进展,在医疗诊断、自动驾驶汽车、自然语言处理等领域展现出巨大潜力。然而,AI的发展也引发了关于隐私、就业和伦理等方面的担忧,需要社会各界共同探讨和制定相应的规范。""", + "expected_length": 150 + }, + { + "id": "tc_004", + "name": "创意写作", + "prompt": "写一个关于未来城市的短篇科幻故事,约300字。", + "expected_length": 400 + } +] + +# 全局锁 +results_lock = Lock() + + +def load_config(): + """加载配置""" + if os.path.exists(CONFIG_FILE): + with open(CONFIG_FILE, 'r', encoding='utf-8') as f: + return {**DEFAULT_CONFIG, **json.load(f)} + return DEFAULT_CONFIG.copy() + + +def save_config(config): + """保存配置""" + with open(CONFIG_FILE, 'w', encoding='utf-8') as f: + json.dump(config, f, ensure_ascii=False, indent=2) + + +def load_test_cases(): + """加载测试用例""" + if os.path.exists(TEST_CASES_FILE): + with open(TEST_CASES_FILE, 'r', encoding='utf-8') as f: + return json.load(f) + return DEFAULT_TEST_CASES.copy() + + +def save_test_cases(test_cases): + """保存测试用例""" + with open(TEST_CASES_FILE, 'w', encoding='utf-8') as f: + json.dump(test_cases, f, ensure_ascii=False, indent=2) + + +def load_results(): + """加载历史测试结果""" + if os.path.exists(RESULTS_FILE): + with open(RESULTS_FILE, 'r', encoding='utf-8') as f: + return json.load(f) + return [] + + +def save_result(result): + """保存测试结果""" + with results_lock: + results = load_results() + results.append(result) + with open(RESULTS_FILE, 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2) + + +def stream_chat_completion(api_base, api_key, model, messages, max_tokens, temperature, timeout): + """ + 流式调用 LLM API,实时计算 TTFT 和 TPS + """ + headers = { + "Content-Type": "application/json" + } + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + + payload = { + "model": model, + "messages": messages, + "max_tokens": max_tokens, + "temperature": temperature, + "stream": True + } + + url = f"{api_base}/chat/completions" + + first_token_time = None + start_time = time.time() + total_tokens = 0 + content_chunks = [] + + try: + response = requests.post(url, headers=headers, json=payload, + timeout=timeout, stream=True) + response.raise_for_status() + + for line in response.iter_lines(): + if not line: + continue + + line_str = line.decode('utf-8') + if line_str.startswith('data: '): + data_str = line_str[6:] + if data_str == '[DONE]': + break + + try: + data = json.loads(data_str) + delta = data.get('choices', [{}])[0].get('delta', {}) + content = delta.get('content', '') + + if content: + if first_token_time is None: + first_token_time = time.time() + content_chunks.append(content) + total_tokens += len(content) # 近似token数 + except json.JSONDecodeError: + continue + + end_time = time.time() + + # 计算指标 + ttft = (first_token_time - start_time) * 1000 if first_token_time else 0 # ms + total_time = (end_time - start_time) * 1000 # ms + tps = total_tokens / (total_time / 1000) if total_time > 0 else 0 + + return { + "success": True, + "ttft_ms": round(ttft, 2), + "total_time_ms": round(total_time, 2), + "tps": round(tps, 2), + "total_chars": sum(len(c) for c in content_chunks), + "content": ''.join(content_chunks) + } + + except Exception as e: + return { + "success": False, + "error": str(e) + } + + +def run_single_test(api_config, test_case, run_index=0): + """运行单个测试""" + messages = [{"role": "user", "content": test_case["prompt"]}] + + result = stream_chat_completion( + api_base=api_config["api_base"], + api_key=api_config["api_key"], + model=api_config["model"], + messages=messages, + max_tokens=api_config.get("max_tokens", 512), + temperature=api_config.get("temperature", 0.7), + timeout=api_config.get("timeout", 60) + ) + + result["test_case_id"] = test_case["id"] + result["test_case_name"] = test_case["name"] + result["run_index"] = run_index + result["timestamp"] = datetime.now().isoformat() + + return result + + +def run_batch_tests(api_config, test_cases, runs_per_case=1, concurrency=1): + """批量运行测试""" + all_tasks = [] + for test_case in test_cases: + for i in range(runs_per_case): + all_tasks.append((api_config, test_case, i)) + + results = [] + completed = 0 + total = len(all_tasks) + + with ThreadPoolExecutor(max_workers=concurrency) as executor: + futures = {executor.submit(run_single_test, *task): task for task in all_tasks} + + for future in as_completed(futures): + try: + result = future.result() + results.append(result) + completed += 1 + print(f"Progress: {completed}/{total}") + except Exception as e: + print(f"Test failed: {e}") + + return results + + +def calculate_statistics(results): + """计算统计数据""" + successful = [r for r in results if r.get("success")] + failed = [r for r in results if not r.get("success")] + + if not successful: + return {"error": "No successful tests"} + + ttfts = [r["ttft_ms"] for r in successful] + tpss = [r["tps"] for r in successful] + times = [r["total_time_ms"] for r in successful] + + stats = { + "total_tests": len(results), + "successful": len(successful), + "failed": len(failed), + "success_rate": round(len(successful) / len(results) * 100, 2), + "ttft": { + "avg": round(statistics.mean(ttfts), 2), + "min": round(min(ttfts), 2), + "max": round(max(ttfts), 2), + "median": round(statistics.median(ttfts), 2) + }, + "tps": { + "avg": round(statistics.mean(tpss), 2), + "min": round(min(tpss), 2), + "max": round(max(tpss), 2), + "median": round(statistics.median(tpss), 2) + }, + "total_time": { + "avg": round(statistics.mean(times), 2), + "min": round(min(times), 2), + "max": round(max(times), 2) + } + } + + return stats + + +# ==================== Flask Routes ==================== + +@app.route('/') +def index(): + """主页""" + return render_template('index.html') + + +@app.route('/api/config', methods=['GET', 'POST']) +def config_api(): + """配置管理 API""" + if request.method == 'GET': + return jsonify(load_config()) + else: + new_config = request.json + save_config(new_config) + return jsonify({"status": "success"}) + + +@app.route('/api/test-cases', methods=['GET', 'POST', 'PUT', 'DELETE']) +def test_cases_api(): + """测试用例管理 API""" + if request.method == 'GET': + return jsonify(load_test_cases()) + + elif request.method == 'POST': + test_cases = load_test_cases() + new_case = request.json + new_case['id'] = f"tc_{uuid.uuid4().hex[:6]}" + test_cases.append(new_case) + save_test_cases(test_cases) + return jsonify({"status": "success", "id": new_case['id']}) + + elif request.method == 'PUT': + updated_case = request.json + test_cases = load_test_cases() + for i, tc in enumerate(test_cases): + if tc['id'] == updated_case['id']: + test_cases[i] = updated_case + break + save_test_cases(test_cases) + return jsonify({"status": "success"}) + + elif request.method == 'DELETE': + case_id = request.args.get('id') + test_cases = load_test_cases() + test_cases = [tc for tc in test_cases if tc['id'] != case_id] + save_test_cases(test_cases) + return jsonify({"status": "success"}) + + +@app.route('/api/run-test', methods=['POST']) +def run_test_api(): + """运行测试 API""" + data = request.json + api_config = data.get('config', load_config()) + test_case_ids = data.get('test_case_ids', []) + runs_per_case = data.get('runs_per_case', 1) + concurrency = data.get('concurrency', 1) + + # 获取要运行的测试用例 + all_test_cases = load_test_cases() + if test_case_ids: + test_cases = [tc for tc in all_test_cases if tc['id'] in test_case_ids] + else: + test_cases = all_test_cases + + if not test_cases: + return jsonify({"error": "No test cases selected"}), 400 + + # 运行测试 + results = run_batch_tests(api_config, test_cases, runs_per_case, concurrency) + + # 计算统计 + stats = calculate_statistics(results) + + # 保存结果 + test_run = { + "id": f"run_{uuid.uuid4().hex[:8]}", + "timestamp": datetime.now().isoformat(), + "config": api_config, + "stats": stats, + "results": results + } + save_result(test_run) + + return jsonify(test_run) + + +@app.route('/api/results', methods=['GET']) +def get_results_api(): + """获取历史测试结果""" + return jsonify(load_results()) + + +@app.route('/api/results/', methods=['GET']) +def get_result_detail_api(result_id): + """获取单个测试结果详情""" + results = load_results() + for result in results: + if result.get('id') == result_id: + return jsonify(result) + return jsonify({"error": "Result not found"}), 404 + + +@app.route('/api/results/', methods=['DELETE']) +def delete_result_api(result_id): + """删除测试结果""" + with results_lock: + results = load_results() + results = [r for r in results if r.get('id') != result_id] + with open(RESULTS_FILE, 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2) + return jsonify({"status": "success"}) + + +if __name__ == '__main__': + # 初始化默认配置和测试用例 + if not os.path.exists(CONFIG_FILE): + save_config(DEFAULT_CONFIG) + if not os.path.exists(TEST_CASES_FILE): + save_test_cases(DEFAULT_TEST_CASES) + + app.run(host='0.0.0.0', port=8001, debug=True) diff --git a/deploy.sh b/deploy.sh new file mode 100644 index 0000000..a3de384 --- /dev/null +++ b/deploy.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# LLM性能测试工具部署脚本 +# 部署到8001端口 + +set -e + +echo "=== LLM性能测试工具部署脚本 ===" + +# 检查Python版本 +if ! command -v python3 &> /dev/null; then + echo "错误: 未找到 Python3,请先安装 Python3.8+" + exit 1 +fi + +PYTHON_VERSION=$(python3 --version | cut -d' ' -f2 | cut -d'.' -f1,2) +echo "Python版本: $PYTHON_VERSION" + +# 创建虚拟环境 +if [ ! -d "venv" ]; then + echo "创建虚拟环境..." + python3 -m venv venv +fi + +# 激活虚拟环境 +echo "激活虚拟环境..." +source venv/bin/activate + +# 升级pip +echo "升级pip..." +pip install --upgrade pip + +# 安装依赖 +echo "安装依赖..." +pip install -r requirements.txt + +# 检查端口是否被占用 +if lsof -Pi :8001 -sTCP:LISTEN -t >/dev/null 2>&1; then + echo "警告: 端口8001已被占用,尝试停止现有进程..." + kill $(lsof -t -i:8001) 2>/dev/null || true + sleep 2 +fi + +# 启动应用 +echo "启动LLM性能测试工具 (端口: 8001)..." +echo "访问地址: http://localhost:8001" +echo "" +echo "按 Ctrl+C 停止服务" +echo "===================================" + +python app.py diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..64fae30 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +flask==3.0.0 +requests==2.31.0 +openai==1.6.0 +plotly==5.18.0 +pandas==2.1.4 +numpy==1.26.2 +gunicorn==21.2.0 +python-dotenv==1.0.0 diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..6cef85e --- /dev/null +++ b/templates/index.html @@ -0,0 +1,514 @@ + + + + + + LLM 性能测试工具 + + + + +
+

🚀 LLM 性能测试工具

+ +
+ + + +
+ + +
+
+

API 配置

+
+ + +
+
+ + +
+
+ + +
+
+ +
+

模型配置

+
+ + +
+
+ + +
+
+ + +
+
+ +
+

测试用例

+
+ + +
+
+ + +
+
+ + +
+ +
+
+ + +
+
+

运行测试

+
+ + +
+ + + +
+
+ + +
+
+

性能指标

+
+
+
-
+
TTFT (首Token时间)
+
+
+
-
+
TPS (每秒Token数)
+
+
+
-
+
平均延迟
+
+
+
-
+
总耗时
+
+
+
+ +
+

响应时间分布

+
+ +
+
+ +
+

TPS 趋势

+
+ +
+
+ +
+

详细结果

+ + + + + + + + + + + + +
请求ID状态TTFT (ms)TPS总Token数总耗时 (ms)
+
+
+
+ + + +