Files
ChouJuGEO/modules/roi_analyzer.py
T
刘国栋 8f7f082c3d feat: 重构项目结构并添加平台同步基础架构
- 重构项目目录结构,将功能模块移至 modules/ 目录
- 创建平台同步基础架构,包括发布器基类和 GitHub 发布器
- 新增 UI 状态管理模块 (modules/ui/state.py) 统一管理会话状态
- 更新依赖配置,添加平台同步所需依赖 (httpx, pyperclip)
- 整理文档结构,将所有文档分类移至 docs/ 目录
- 添加 .cursorrules 文件定义项目开发规范
- 清理根目录重复文件,保持项目结构整洁
2026-01-30 10:21:29 +08:00

423 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
ROI 分析与成本优化模块
用于计算 API 调用成本、分析 ROI、提供成本优化建议
"""
from typing import Dict, List, Optional, Tuple
from datetime import datetime, timedelta
import pandas as pd
from collections import defaultdict
class ROIAnalyzer:
"""ROI 分析器"""
def __init__(self, usd_to_cny_rate: float = 7.2):
"""
Args:
usd_to_cny_rate: USD 到 CNY 的汇率(默认 7.2
"""
self.usd_to_cny_rate = usd_to_cny_rate
# 各平台 API 定价(每 1K tokensUSD
# 注意:这些是示例价格,实际价格可能不同,需要根据实际情况更新
self.pricing_config = {
"DeepSeek": {
"input": 0.00014, # $0.14 per 1M tokens
"output": 0.00028, # $0.28 per 1M tokens
},
"OpenAI (GPT)": {
"gpt-4": {"input": 0.03, "output": 0.06},
"gpt-4-turbo": {"input": 0.01, "output": 0.03},
"gpt-3.5-turbo": {"input": 0.0005, "output": 0.0015},
},
"Tongyi (通义千问)": {
"qwen-plus": {"input": 0.002, "output": 0.008},
"qwen-turbo": {"input": 0.0008, "output": 0.002},
},
"Groq": {
"input": 0.0, # 免费
"output": 0.0,
},
"Moonshot (Kimi)": {
"moonshot-v1-8k": {"input": 0.012, "output": 0.012},
"moonshot-v1-32k": {"input": 0.024, "output": 0.024},
},
"豆包(字节跳动)": {
"doubao-pro-4k": {"input": 0.0008, "output": 0.002},
"doubao-lite-4k": {"input": 0.0004, "output": 0.001},
},
"文心一言(百度)": {
"ernie-4.0": {"input": 0.012, "output": 0.012},
"ernie-3.5": {"input": 0.002, "output": 0.002},
},
}
def calculate_cost(
self,
provider: str,
model: str,
input_tokens: int,
output_tokens: int
) -> Tuple[float, float]:
"""
计算 API 调用成本
Args:
provider: 提供商名称
model: 模型名称
input_tokens: 输入 token 数量
output_tokens: 输出 token 数量
Returns:
(cost_usd, cost_cny) 元组
"""
cost_usd = 0.0
# 获取定价配置
pricing = self.pricing_config.get(provider, {})
if not pricing:
# 如果没有配置,返回 0
return 0.0, 0.0
# 处理不同的定价结构
if "input" in pricing and "output" in pricing:
# 统一定价(如 DeepSeek、Groq
input_price = pricing["input"]
output_price = pricing["output"]
elif model in pricing:
# 按模型定价(如 OpenAI
model_pricing = pricing[model]
input_price = model_pricing.get("input", 0.0)
output_price = model_pricing.get("output", 0.0)
else:
# 默认使用第一个模型的定价
if pricing:
first_model = list(pricing.keys())[0]
if isinstance(pricing[first_model], dict):
input_price = pricing[first_model].get("input", 0.0)
output_price = pricing[first_model].get("output", 0.0)
else:
input_price = pricing.get("input", 0.0)
output_price = pricing.get("output", 0.0)
else:
input_price = 0.0
output_price = 0.0
# 计算成本(价格是每 1K tokens)
cost_usd = (input_tokens / 1000.0 * input_price) + (output_tokens / 1000.0 * output_price)
cost_cny = cost_usd * self.usd_to_cny_rate
return cost_usd, cost_cny
def analyze_costs(
self,
api_calls_df: pd.DataFrame,
verify_results_df: Optional[pd.DataFrame] = None
) -> Dict:
"""
分析成本数据
Args:
api_calls_df: API 调用记录 DataFrame
verify_results_df: 验证结果 DataFrame(可选,用于 ROI 分析)
Returns:
成本分析结果字典
"""
if api_calls_df.empty:
return {
"total_cost_usd": 0.0,
"total_cost_cny": 0.0,
"total_tokens": 0,
"total_calls": 0,
"cost_by_operation": {},
"cost_by_provider": {},
"cost_by_keyword": {},
"cost_by_platform": {},
"daily_costs": [],
"roi_analysis": {}
}
# 总成本
total_cost_usd = api_calls_df["成本(USD)"].sum()
total_cost_cny = api_calls_df["成本(CNY)"].sum()
total_tokens = api_calls_df["总Token"].sum()
total_calls = len(api_calls_df)
# 按操作类型统计
cost_by_operation = {}
if "操作类型" in api_calls_df.columns:
operation_groups = api_calls_df.groupby("操作类型")
for op_type, group in operation_groups:
cost_by_operation[op_type] = {
"cost_usd": group["成本(USD)"].sum(),
"cost_cny": group["成本(CNY)"].sum(),
"calls": len(group),
"tokens": group["总Token"].sum()
}
# 按提供商统计
cost_by_provider = {}
if "提供商" in api_calls_df.columns:
provider_groups = api_calls_df.groupby("提供商")
for provider, group in provider_groups:
cost_by_provider[provider] = {
"cost_usd": group["成本(USD)"].sum(),
"cost_cny": group["成本(CNY)"].sum(),
"calls": len(group),
"tokens": group["总Token"].sum()
}
# 按关键词统计
cost_by_keyword = {}
if "关键词" in api_calls_df.columns:
keyword_groups = api_calls_df.groupby("关键词")
for keyword, group in keyword_groups:
if pd.notna(keyword) and keyword:
cost_by_keyword[keyword] = {
"cost_usd": group["成本(USD)"].sum(),
"cost_cny": group["成本(CNY)"].sum(),
"calls": len(group),
"tokens": group["总Token"].sum()
}
# 按平台统计
cost_by_platform = {}
if "平台" in api_calls_df.columns:
platform_groups = api_calls_df.groupby("平台")
for platform, group in platform_groups:
if pd.notna(platform) and platform:
cost_by_platform[platform] = {
"cost_usd": group["成本(USD)"].sum(),
"cost_cny": group["成本(CNY)"].sum(),
"calls": len(group),
"tokens": group["总Token"].sum()
}
# 每日成本趋势
daily_costs = []
if "调用时间" in api_calls_df.columns:
api_calls_df["日期"] = pd.to_datetime(api_calls_df["调用时间"]).dt.date
daily_groups = api_calls_df.groupby("日期")
for date, group in daily_groups:
daily_costs.append({
"date": date.isoformat() if isinstance(date, datetime.date) else str(date),
"cost_usd": group["成本(USD)"].sum(),
"cost_cny": group["成本(CNY)"].sum(),
"calls": len(group),
"tokens": group["总Token"].sum()
})
daily_costs.sort(key=lambda x: x["date"])
# ROI 分析(如果有验证结果)
roi_analysis = {}
if verify_results_df is not None and not verify_results_df.empty:
roi_analysis = self._calculate_roi(api_calls_df, verify_results_df)
return {
"total_cost_usd": total_cost_usd,
"total_cost_cny": total_cost_cny,
"total_tokens": int(total_tokens),
"total_calls": total_calls,
"cost_by_operation": cost_by_operation,
"cost_by_provider": cost_by_provider,
"cost_by_keyword": cost_by_keyword,
"cost_by_platform": cost_by_platform,
"daily_costs": daily_costs,
"roi_analysis": roi_analysis
}
def _calculate_roi(
self,
api_calls_df: pd.DataFrame,
verify_results_df: pd.DataFrame
) -> Dict:
"""
计算 ROI(基于验证结果)
Args:
api_calls_df: API 调用记录
verify_results_df: 验证结果
Returns:
ROI 分析结果
"""
# 计算总成本
total_cost = api_calls_df["成本(CNY)"].sum()
# 计算提及率提升(简化估算)
# 这里假设每次提及的价值为某个固定值(需要根据实际情况调整)
mention_value_per_mention = 10.0 # 每次提及的价值(CNY),可配置
# 统计品牌提及次数
brand_mentions = verify_results_df[verify_results_df["品牌"] == verify_results_df["品牌"].iloc[0] if len(verify_results_df) > 0 else ""]
total_mentions = brand_mentions["提及次数"].sum() if "提及次数" in brand_mentions.columns else 0
# 估算价值
estimated_value = total_mentions * mention_value_per_mention
# 计算 ROI
roi_ratio = (estimated_value - total_cost) / total_cost * 100 if total_cost > 0 else 0
roi_value = estimated_value - total_cost
# 按关键词分析 ROI
keyword_roi = {}
if "关键词" in api_calls_df.columns and "问题" in verify_results_df.columns:
# 合并数据
keyword_costs = api_calls_df.groupby("关键词")["成本(CNY)"].sum()
keyword_mentions = verify_results_df.groupby("问题")["提及次数"].sum()
for keyword in keyword_costs.index:
if pd.notna(keyword) and keyword:
cost = keyword_costs[keyword]
mentions = keyword_mentions.get(keyword, 0)
value = mentions * mention_value_per_mention
roi = (value - cost) / cost * 100 if cost > 0 else 0
keyword_roi[keyword] = {
"cost": cost,
"mentions": int(mentions),
"value": value,
"roi": roi
}
return {
"total_cost": total_cost,
"total_mentions": int(total_mentions),
"estimated_value": estimated_value,
"roi_ratio": roi_ratio,
"roi_value": roi_value,
"mention_value_per_mention": mention_value_per_mention,
"keyword_roi": keyword_roi
}
def get_optimization_suggestions(self, cost_analysis: Dict) -> List[Dict]:
"""
获取成本优化建议
Args:
cost_analysis: 成本分析结果
Returns:
优化建议列表
"""
suggestions = []
total_cost = cost_analysis.get("total_cost_cny", 0.0)
cost_by_provider = cost_analysis.get("cost_by_provider", {})
cost_by_keyword = cost_analysis.get("cost_by_keyword", {})
cost_by_operation = cost_analysis.get("cost_by_operation", {})
# 检查是否有高成本提供商
if cost_by_provider:
sorted_providers = sorted(
cost_by_provider.items(),
key=lambda x: x[1]["cost_cny"],
reverse=True
)
top_provider = sorted_providers[0]
if top_provider[1]["cost_cny"] > total_cost * 0.5:
suggestions.append({
"type": "provider",
"priority": "",
"title": f"考虑使用更便宜的提供商替代 {top_provider[0]}",
"description": f"{top_provider[0]} 占总成本的 {top_provider[1]['cost_cny']/total_cost*100:.1f}%,考虑使用更经济的替代方案",
"savings_estimate": top_provider[1]["cost_cny"] * 0.3 # 估算可节省30%
})
# 检查是否有低 ROI 关键词
roi_analysis = cost_analysis.get("roi_analysis", {})
keyword_roi = roi_analysis.get("keyword_roi", {})
if keyword_roi:
low_roi_keywords = [
(kw, data) for kw, data in keyword_roi.items()
if data.get("roi", 0) < 0
]
if low_roi_keywords:
suggestions.append({
"type": "keyword",
"priority": "",
"title": f"发现 {len(low_roi_keywords)} 个负 ROI 关键词",
"description": "这些关键词的成本高于产生的价值,建议暂停或优化",
"keywords": [kw for kw, _ in low_roi_keywords[:5]]
})
# 检查操作类型分布
if cost_by_operation:
verify_cost = cost_by_operation.get("验证", {}).get("cost_cny", 0.0)
generate_cost = cost_by_operation.get("生成", {}).get("cost_cny", 0.0)
if verify_cost > total_cost * 0.7:
suggestions.append({
"type": "operation",
"priority": "",
"title": "验证成本占比过高",
"description": f"验证操作占总成本的 {verify_cost/total_cost*100:.1f}%,建议减少验证频率或使用更便宜的验证模型",
"savings_estimate": verify_cost * 0.2
})
# 如果没有建议,添加通用建议
if not suggestions:
suggestions.append({
"type": "general",
"priority": "",
"title": "成本控制良好",
"description": "当前成本结构合理,继续保持"
})
return suggestions
def estimate_future_cost(
self,
api_calls_df: pd.DataFrame,
days: int = 30
) -> Dict:
"""
估算未来成本
Args:
api_calls_df: 历史 API 调用记录
days: 预测天数
Returns:
未来成本估算
"""
if api_calls_df.empty:
return {
"estimated_daily_cost_cny": 0.0,
"estimated_total_cost_cny": 0.0,
"confidence": ""
}
# 计算日均成本
if "调用时间" in api_calls_df.columns:
api_calls_df["日期"] = pd.to_datetime(api_calls_df["调用时间"]).dt.date
daily_costs = api_calls_df.groupby("日期")["成本(CNY)"].sum()
if len(daily_costs) > 0:
avg_daily_cost = daily_costs.mean()
estimated_total = avg_daily_cost * days
# 计算置信度(基于数据点数量)
confidence = "" if len(daily_costs) >= 7 else ("" if len(daily_costs) >= 3 else "")
return {
"estimated_daily_cost_cny": float(avg_daily_cost),
"estimated_total_cost_cny": float(estimated_total),
"confidence": confidence,
"data_points": len(daily_costs)
}
# 如果没有日期数据,使用总成本估算
total_cost = api_calls_df["成本(CNY)"].sum()
# 假设数据覆盖最近7天
avg_daily = total_cost / 7.0 if total_cost > 0 else 0.0
return {
"estimated_daily_cost_cny": avg_daily,
"estimated_total_cost_cny": avg_daily * days,
"confidence": "",
"data_points": 0
}