405 lines
12 KiB
Python
405 lines
12 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
LLM Performance Test Tool
|
|||
|
|
支持本地和云端大模型性能测试,兼容 OpenAI API
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import os
|
|||
|
|
import json
|
|||
|
|
import time
|
|||
|
|
import uuid
|
|||
|
|
import statistics
|
|||
|
|
from datetime import datetime
|
|||
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|||
|
|
from threading import Lock
|
|||
|
|
|
|||
|
|
from flask import Flask, render_template, request, jsonify, send_from_directory
|
|||
|
|
import requests
|
|||
|
|
|
|||
|
|
app = Flask(__name__)
|
|||
|
|
app.config['SECRET_KEY'] = 'llm-perf-test-secret-key'
|
|||
|
|
|
|||
|
|
# 数据存储目录
|
|||
|
|
DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
|
|||
|
|
os.makedirs(DATA_DIR, exist_ok=True)
|
|||
|
|
|
|||
|
|
# 配置文件路径
|
|||
|
|
CONFIG_FILE = os.path.join(DATA_DIR, 'config.json')
|
|||
|
|
TEST_CASES_FILE = os.path.join(DATA_DIR, 'test_cases.json')
|
|||
|
|
RESULTS_FILE = os.path.join(DATA_DIR, 'results.json')
|
|||
|
|
|
|||
|
|
# 默认配置
|
|||
|
|
DEFAULT_CONFIG = {
|
|||
|
|
"api_base": "http://localhost:11434/v1",
|
|||
|
|
"api_key": "",
|
|||
|
|
"model": "qwen2.5:latest",
|
|||
|
|
"timeout": 60,
|
|||
|
|
"max_tokens": 512,
|
|||
|
|
"temperature": 0.7
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 默认测试用例
|
|||
|
|
DEFAULT_TEST_CASES = [
|
|||
|
|
{
|
|||
|
|
"id": "tc_001",
|
|||
|
|
"name": "简单问答",
|
|||
|
|
"prompt": "你好,请介绍一下自己。",
|
|||
|
|
"expected_length": 100
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "tc_002",
|
|||
|
|
"name": "代码生成",
|
|||
|
|
"prompt": "写一个Python函数,计算斐波那契数列的前n项。",
|
|||
|
|
"expected_length": 200
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "tc_003",
|
|||
|
|
"name": "长文本理解",
|
|||
|
|
"prompt": """请总结以下段落的主要观点:\n\n人工智能(AI)是计算机科学的一个分支,致力于创造能够执行通常需要人类智能的任务的系统。这些任务包括视觉感知、语音识别、决策制定和语言翻译等。机器学习是AI的一个子集,它使计算机能够从数据中学习并改进,而无需明确编程。深度学习是机器学习的一种特定方法,使用人工神经网络来模拟人脑的工作方式。近年来,随着计算能力的提升和大数据的可用性,AI技术取得了显著进展,在医疗诊断、自动驾驶汽车、自然语言处理等领域展现出巨大潜力。然而,AI的发展也引发了关于隐私、就业和伦理等方面的担忧,需要社会各界共同探讨和制定相应的规范。""",
|
|||
|
|
"expected_length": 150
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "tc_004",
|
|||
|
|
"name": "创意写作",
|
|||
|
|
"prompt": "写一个关于未来城市的短篇科幻故事,约300字。",
|
|||
|
|
"expected_length": 400
|
|||
|
|
}
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
# 全局锁
|
|||
|
|
results_lock = Lock()
|
|||
|
|
|
|||
|
|
|
|||
|
|
def load_config():
|
|||
|
|
"""加载配置"""
|
|||
|
|
if os.path.exists(CONFIG_FILE):
|
|||
|
|
with open(CONFIG_FILE, 'r', encoding='utf-8') as f:
|
|||
|
|
return {**DEFAULT_CONFIG, **json.load(f)}
|
|||
|
|
return DEFAULT_CONFIG.copy()
|
|||
|
|
|
|||
|
|
|
|||
|
|
def save_config(config):
|
|||
|
|
"""保存配置"""
|
|||
|
|
with open(CONFIG_FILE, 'w', encoding='utf-8') as f:
|
|||
|
|
json.dump(config, f, ensure_ascii=False, indent=2)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def load_test_cases():
|
|||
|
|
"""加载测试用例"""
|
|||
|
|
if os.path.exists(TEST_CASES_FILE):
|
|||
|
|
with open(TEST_CASES_FILE, 'r', encoding='utf-8') as f:
|
|||
|
|
return json.load(f)
|
|||
|
|
return DEFAULT_TEST_CASES.copy()
|
|||
|
|
|
|||
|
|
|
|||
|
|
def save_test_cases(test_cases):
|
|||
|
|
"""保存测试用例"""
|
|||
|
|
with open(TEST_CASES_FILE, 'w', encoding='utf-8') as f:
|
|||
|
|
json.dump(test_cases, f, ensure_ascii=False, indent=2)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def load_results():
|
|||
|
|
"""加载历史测试结果"""
|
|||
|
|
if os.path.exists(RESULTS_FILE):
|
|||
|
|
with open(RESULTS_FILE, 'r', encoding='utf-8') as f:
|
|||
|
|
return json.load(f)
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
|
|||
|
|
def save_result(result):
|
|||
|
|
"""保存测试结果"""
|
|||
|
|
with results_lock:
|
|||
|
|
results = load_results()
|
|||
|
|
results.append(result)
|
|||
|
|
with open(RESULTS_FILE, 'w', encoding='utf-8') as f:
|
|||
|
|
json.dump(results, f, ensure_ascii=False, indent=2)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def stream_chat_completion(api_base, api_key, model, messages, max_tokens, temperature, timeout):
|
|||
|
|
"""
|
|||
|
|
流式调用 LLM API,实时计算 TTFT 和 TPS
|
|||
|
|
"""
|
|||
|
|
headers = {
|
|||
|
|
"Content-Type": "application/json"
|
|||
|
|
}
|
|||
|
|
if api_key:
|
|||
|
|
headers["Authorization"] = f"Bearer {api_key}"
|
|||
|
|
|
|||
|
|
payload = {
|
|||
|
|
"model": model,
|
|||
|
|
"messages": messages,
|
|||
|
|
"max_tokens": max_tokens,
|
|||
|
|
"temperature": temperature,
|
|||
|
|
"stream": True
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
url = f"{api_base}/chat/completions"
|
|||
|
|
|
|||
|
|
first_token_time = None
|
|||
|
|
start_time = time.time()
|
|||
|
|
total_tokens = 0
|
|||
|
|
content_chunks = []
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
response = requests.post(url, headers=headers, json=payload,
|
|||
|
|
timeout=timeout, stream=True)
|
|||
|
|
response.raise_for_status()
|
|||
|
|
|
|||
|
|
for line in response.iter_lines():
|
|||
|
|
if not line:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
line_str = line.decode('utf-8')
|
|||
|
|
if line_str.startswith('data: '):
|
|||
|
|
data_str = line_str[6:]
|
|||
|
|
if data_str == '[DONE]':
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
data = json.loads(data_str)
|
|||
|
|
delta = data.get('choices', [{}])[0].get('delta', {})
|
|||
|
|
content = delta.get('content', '')
|
|||
|
|
|
|||
|
|
if content:
|
|||
|
|
if first_token_time is None:
|
|||
|
|
first_token_time = time.time()
|
|||
|
|
content_chunks.append(content)
|
|||
|
|
total_tokens += len(content) # 近似token数
|
|||
|
|
except json.JSONDecodeError:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
end_time = time.time()
|
|||
|
|
|
|||
|
|
# 计算指标
|
|||
|
|
ttft = (first_token_time - start_time) * 1000 if first_token_time else 0 # ms
|
|||
|
|
total_time = (end_time - start_time) * 1000 # ms
|
|||
|
|
tps = total_tokens / (total_time / 1000) if total_time > 0 else 0
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
"success": True,
|
|||
|
|
"ttft_ms": round(ttft, 2),
|
|||
|
|
"total_time_ms": round(total_time, 2),
|
|||
|
|
"tps": round(tps, 2),
|
|||
|
|
"total_chars": sum(len(c) for c in content_chunks),
|
|||
|
|
"content": ''.join(content_chunks)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
return {
|
|||
|
|
"success": False,
|
|||
|
|
"error": str(e)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
def run_single_test(api_config, test_case, run_index=0):
|
|||
|
|
"""运行单个测试"""
|
|||
|
|
messages = [{"role": "user", "content": test_case["prompt"]}]
|
|||
|
|
|
|||
|
|
result = stream_chat_completion(
|
|||
|
|
api_base=api_config["api_base"],
|
|||
|
|
api_key=api_config["api_key"],
|
|||
|
|
model=api_config["model"],
|
|||
|
|
messages=messages,
|
|||
|
|
max_tokens=api_config.get("max_tokens", 512),
|
|||
|
|
temperature=api_config.get("temperature", 0.7),
|
|||
|
|
timeout=api_config.get("timeout", 60)
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
result["test_case_id"] = test_case["id"]
|
|||
|
|
result["test_case_name"] = test_case["name"]
|
|||
|
|
result["run_index"] = run_index
|
|||
|
|
result["timestamp"] = datetime.now().isoformat()
|
|||
|
|
|
|||
|
|
return result
|
|||
|
|
|
|||
|
|
|
|||
|
|
def run_batch_tests(api_config, test_cases, runs_per_case=1, concurrency=1):
|
|||
|
|
"""批量运行测试"""
|
|||
|
|
all_tasks = []
|
|||
|
|
for test_case in test_cases:
|
|||
|
|
for i in range(runs_per_case):
|
|||
|
|
all_tasks.append((api_config, test_case, i))
|
|||
|
|
|
|||
|
|
results = []
|
|||
|
|
completed = 0
|
|||
|
|
total = len(all_tasks)
|
|||
|
|
|
|||
|
|
with ThreadPoolExecutor(max_workers=concurrency) as executor:
|
|||
|
|
futures = {executor.submit(run_single_test, *task): task for task in all_tasks}
|
|||
|
|
|
|||
|
|
for future in as_completed(futures):
|
|||
|
|
try:
|
|||
|
|
result = future.result()
|
|||
|
|
results.append(result)
|
|||
|
|
completed += 1
|
|||
|
|
print(f"Progress: {completed}/{total}")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"Test failed: {e}")
|
|||
|
|
|
|||
|
|
return results
|
|||
|
|
|
|||
|
|
|
|||
|
|
def calculate_statistics(results):
|
|||
|
|
"""计算统计数据"""
|
|||
|
|
successful = [r for r in results if r.get("success")]
|
|||
|
|
failed = [r for r in results if not r.get("success")]
|
|||
|
|
|
|||
|
|
if not successful:
|
|||
|
|
return {"error": "No successful tests"}
|
|||
|
|
|
|||
|
|
ttfts = [r["ttft_ms"] for r in successful]
|
|||
|
|
tpss = [r["tps"] for r in successful]
|
|||
|
|
times = [r["total_time_ms"] for r in successful]
|
|||
|
|
|
|||
|
|
stats = {
|
|||
|
|
"total_tests": len(results),
|
|||
|
|
"successful": len(successful),
|
|||
|
|
"failed": len(failed),
|
|||
|
|
"success_rate": round(len(successful) / len(results) * 100, 2),
|
|||
|
|
"ttft": {
|
|||
|
|
"avg": round(statistics.mean(ttfts), 2),
|
|||
|
|
"min": round(min(ttfts), 2),
|
|||
|
|
"max": round(max(ttfts), 2),
|
|||
|
|
"median": round(statistics.median(ttfts), 2)
|
|||
|
|
},
|
|||
|
|
"tps": {
|
|||
|
|
"avg": round(statistics.mean(tpss), 2),
|
|||
|
|
"min": round(min(tpss), 2),
|
|||
|
|
"max": round(max(tpss), 2),
|
|||
|
|
"median": round(statistics.median(tpss), 2)
|
|||
|
|
},
|
|||
|
|
"total_time": {
|
|||
|
|
"avg": round(statistics.mean(times), 2),
|
|||
|
|
"min": round(min(times), 2),
|
|||
|
|
"max": round(max(times), 2)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return stats
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ==================== Flask Routes ====================
|
|||
|
|
|
|||
|
|
@app.route('/')
|
|||
|
|
def index():
|
|||
|
|
"""主页"""
|
|||
|
|
return render_template('index.html')
|
|||
|
|
|
|||
|
|
|
|||
|
|
@app.route('/api/config', methods=['GET', 'POST'])
|
|||
|
|
def config_api():
|
|||
|
|
"""配置管理 API"""
|
|||
|
|
if request.method == 'GET':
|
|||
|
|
return jsonify(load_config())
|
|||
|
|
else:
|
|||
|
|
new_config = request.json
|
|||
|
|
save_config(new_config)
|
|||
|
|
return jsonify({"status": "success"})
|
|||
|
|
|
|||
|
|
|
|||
|
|
@app.route('/api/test-cases', methods=['GET', 'POST', 'PUT', 'DELETE'])
|
|||
|
|
def test_cases_api():
|
|||
|
|
"""测试用例管理 API"""
|
|||
|
|
if request.method == 'GET':
|
|||
|
|
return jsonify(load_test_cases())
|
|||
|
|
|
|||
|
|
elif request.method == 'POST':
|
|||
|
|
test_cases = load_test_cases()
|
|||
|
|
new_case = request.json
|
|||
|
|
new_case['id'] = f"tc_{uuid.uuid4().hex[:6]}"
|
|||
|
|
test_cases.append(new_case)
|
|||
|
|
save_test_cases(test_cases)
|
|||
|
|
return jsonify({"status": "success", "id": new_case['id']})
|
|||
|
|
|
|||
|
|
elif request.method == 'PUT':
|
|||
|
|
updated_case = request.json
|
|||
|
|
test_cases = load_test_cases()
|
|||
|
|
for i, tc in enumerate(test_cases):
|
|||
|
|
if tc['id'] == updated_case['id']:
|
|||
|
|
test_cases[i] = updated_case
|
|||
|
|
break
|
|||
|
|
save_test_cases(test_cases)
|
|||
|
|
return jsonify({"status": "success"})
|
|||
|
|
|
|||
|
|
elif request.method == 'DELETE':
|
|||
|
|
case_id = request.args.get('id')
|
|||
|
|
test_cases = load_test_cases()
|
|||
|
|
test_cases = [tc for tc in test_cases if tc['id'] != case_id]
|
|||
|
|
save_test_cases(test_cases)
|
|||
|
|
return jsonify({"status": "success"})
|
|||
|
|
|
|||
|
|
|
|||
|
|
@app.route('/api/run-test', methods=['POST'])
|
|||
|
|
def run_test_api():
|
|||
|
|
"""运行测试 API"""
|
|||
|
|
data = request.json
|
|||
|
|
api_config = data.get('config', load_config())
|
|||
|
|
test_case_ids = data.get('test_case_ids', [])
|
|||
|
|
runs_per_case = data.get('runs_per_case', 1)
|
|||
|
|
concurrency = data.get('concurrency', 1)
|
|||
|
|
|
|||
|
|
# 获取要运行的测试用例
|
|||
|
|
all_test_cases = load_test_cases()
|
|||
|
|
if test_case_ids:
|
|||
|
|
test_cases = [tc for tc in all_test_cases if tc['id'] in test_case_ids]
|
|||
|
|
else:
|
|||
|
|
test_cases = all_test_cases
|
|||
|
|
|
|||
|
|
if not test_cases:
|
|||
|
|
return jsonify({"error": "No test cases selected"}), 400
|
|||
|
|
|
|||
|
|
# 运行测试
|
|||
|
|
results = run_batch_tests(api_config, test_cases, runs_per_case, concurrency)
|
|||
|
|
|
|||
|
|
# 计算统计
|
|||
|
|
stats = calculate_statistics(results)
|
|||
|
|
|
|||
|
|
# 保存结果
|
|||
|
|
test_run = {
|
|||
|
|
"id": f"run_{uuid.uuid4().hex[:8]}",
|
|||
|
|
"timestamp": datetime.now().isoformat(),
|
|||
|
|
"config": api_config,
|
|||
|
|
"stats": stats,
|
|||
|
|
"results": results
|
|||
|
|
}
|
|||
|
|
save_result(test_run)
|
|||
|
|
|
|||
|
|
return jsonify(test_run)
|
|||
|
|
|
|||
|
|
|
|||
|
|
@app.route('/api/results', methods=['GET'])
|
|||
|
|
def get_results_api():
|
|||
|
|
"""获取历史测试结果"""
|
|||
|
|
return jsonify(load_results())
|
|||
|
|
|
|||
|
|
|
|||
|
|
@app.route('/api/results/<result_id>', methods=['GET'])
|
|||
|
|
def get_result_detail_api(result_id):
|
|||
|
|
"""获取单个测试结果详情"""
|
|||
|
|
results = load_results()
|
|||
|
|
for result in results:
|
|||
|
|
if result.get('id') == result_id:
|
|||
|
|
return jsonify(result)
|
|||
|
|
return jsonify({"error": "Result not found"}), 404
|
|||
|
|
|
|||
|
|
|
|||
|
|
@app.route('/api/results/<result_id>', methods=['DELETE'])
|
|||
|
|
def delete_result_api(result_id):
|
|||
|
|
"""删除测试结果"""
|
|||
|
|
with results_lock:
|
|||
|
|
results = load_results()
|
|||
|
|
results = [r for r in results if r.get('id') != result_id]
|
|||
|
|
with open(RESULTS_FILE, 'w', encoding='utf-8') as f:
|
|||
|
|
json.dump(results, f, ensure_ascii=False, indent=2)
|
|||
|
|
return jsonify({"status": "success"})
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == '__main__':
|
|||
|
|
# 初始化默认配置和测试用例
|
|||
|
|
if not os.path.exists(CONFIG_FILE):
|
|||
|
|
save_config(DEFAULT_CONFIG)
|
|||
|
|
if not os.path.exists(TEST_CASES_FILE):
|
|||
|
|
save_test_cases(DEFAULT_TEST_CASES)
|
|||
|
|
|
|||
|
|
app.run(host='0.0.0.0', port=8001, debug=True)
|