feat: 重构项目结构并添加平台同步基础架构

- 重构项目目录结构,将功能模块移至 modules/ 目录
- 创建平台同步基础架构,包括发布器基类和 GitHub 发布器
- 新增 UI 状态管理模块 (modules/ui/state.py) 统一管理会话状态
- 更新依赖配置,添加平台同步所需依赖 (httpx, pyperclip)
- 整理文档结构,将所有文档分类移至 docs/ 目录
- 添加 .cursorrules 文件定义项目开发规范
- 清理根目录重复文件,保持项目结构整洁
This commit is contained in:
刘国栋
2026-01-30 10:21:29 +08:00
parent 77d5ec70f8
commit 8f7f082c3d
102 changed files with 33742 additions and 1526 deletions
+422
View File
@@ -0,0 +1,422 @@
"""
ROI 分析与成本优化模块
用于计算 API 调用成本、分析 ROI、提供成本优化建议
"""
from typing import Dict, List, Optional, Tuple
from datetime import datetime, timedelta
import pandas as pd
from collections import defaultdict
class ROIAnalyzer:
"""ROI 分析器"""
def __init__(self, usd_to_cny_rate: float = 7.2):
"""
Args:
usd_to_cny_rate: USD 到 CNY 的汇率(默认 7.2
"""
self.usd_to_cny_rate = usd_to_cny_rate
# 各平台 API 定价(每 1K tokensUSD
# 注意:这些是示例价格,实际价格可能不同,需要根据实际情况更新
self.pricing_config = {
"DeepSeek": {
"input": 0.00014, # $0.14 per 1M tokens
"output": 0.00028, # $0.28 per 1M tokens
},
"OpenAI (GPT)": {
"gpt-4": {"input": 0.03, "output": 0.06},
"gpt-4-turbo": {"input": 0.01, "output": 0.03},
"gpt-3.5-turbo": {"input": 0.0005, "output": 0.0015},
},
"Tongyi (通义千问)": {
"qwen-plus": {"input": 0.002, "output": 0.008},
"qwen-turbo": {"input": 0.0008, "output": 0.002},
},
"Groq": {
"input": 0.0, # 免费
"output": 0.0,
},
"Moonshot (Kimi)": {
"moonshot-v1-8k": {"input": 0.012, "output": 0.012},
"moonshot-v1-32k": {"input": 0.024, "output": 0.024},
},
"豆包(字节跳动)": {
"doubao-pro-4k": {"input": 0.0008, "output": 0.002},
"doubao-lite-4k": {"input": 0.0004, "output": 0.001},
},
"文心一言(百度)": {
"ernie-4.0": {"input": 0.012, "output": 0.012},
"ernie-3.5": {"input": 0.002, "output": 0.002},
},
}
def calculate_cost(
self,
provider: str,
model: str,
input_tokens: int,
output_tokens: int
) -> Tuple[float, float]:
"""
计算 API 调用成本
Args:
provider: 提供商名称
model: 模型名称
input_tokens: 输入 token 数量
output_tokens: 输出 token 数量
Returns:
(cost_usd, cost_cny) 元组
"""
cost_usd = 0.0
# 获取定价配置
pricing = self.pricing_config.get(provider, {})
if not pricing:
# 如果没有配置,返回 0
return 0.0, 0.0
# 处理不同的定价结构
if "input" in pricing and "output" in pricing:
# 统一定价(如 DeepSeek、Groq
input_price = pricing["input"]
output_price = pricing["output"]
elif model in pricing:
# 按模型定价(如 OpenAI
model_pricing = pricing[model]
input_price = model_pricing.get("input", 0.0)
output_price = model_pricing.get("output", 0.0)
else:
# 默认使用第一个模型的定价
if pricing:
first_model = list(pricing.keys())[0]
if isinstance(pricing[first_model], dict):
input_price = pricing[first_model].get("input", 0.0)
output_price = pricing[first_model].get("output", 0.0)
else:
input_price = pricing.get("input", 0.0)
output_price = pricing.get("output", 0.0)
else:
input_price = 0.0
output_price = 0.0
# 计算成本(价格是每 1K tokens)
cost_usd = (input_tokens / 1000.0 * input_price) + (output_tokens / 1000.0 * output_price)
cost_cny = cost_usd * self.usd_to_cny_rate
return cost_usd, cost_cny
def analyze_costs(
self,
api_calls_df: pd.DataFrame,
verify_results_df: Optional[pd.DataFrame] = None
) -> Dict:
"""
分析成本数据
Args:
api_calls_df: API 调用记录 DataFrame
verify_results_df: 验证结果 DataFrame(可选,用于 ROI 分析)
Returns:
成本分析结果字典
"""
if api_calls_df.empty:
return {
"total_cost_usd": 0.0,
"total_cost_cny": 0.0,
"total_tokens": 0,
"total_calls": 0,
"cost_by_operation": {},
"cost_by_provider": {},
"cost_by_keyword": {},
"cost_by_platform": {},
"daily_costs": [],
"roi_analysis": {}
}
# 总成本
total_cost_usd = api_calls_df["成本(USD)"].sum()
total_cost_cny = api_calls_df["成本(CNY)"].sum()
total_tokens = api_calls_df["总Token"].sum()
total_calls = len(api_calls_df)
# 按操作类型统计
cost_by_operation = {}
if "操作类型" in api_calls_df.columns:
operation_groups = api_calls_df.groupby("操作类型")
for op_type, group in operation_groups:
cost_by_operation[op_type] = {
"cost_usd": group["成本(USD)"].sum(),
"cost_cny": group["成本(CNY)"].sum(),
"calls": len(group),
"tokens": group["总Token"].sum()
}
# 按提供商统计
cost_by_provider = {}
if "提供商" in api_calls_df.columns:
provider_groups = api_calls_df.groupby("提供商")
for provider, group in provider_groups:
cost_by_provider[provider] = {
"cost_usd": group["成本(USD)"].sum(),
"cost_cny": group["成本(CNY)"].sum(),
"calls": len(group),
"tokens": group["总Token"].sum()
}
# 按关键词统计
cost_by_keyword = {}
if "关键词" in api_calls_df.columns:
keyword_groups = api_calls_df.groupby("关键词")
for keyword, group in keyword_groups:
if pd.notna(keyword) and keyword:
cost_by_keyword[keyword] = {
"cost_usd": group["成本(USD)"].sum(),
"cost_cny": group["成本(CNY)"].sum(),
"calls": len(group),
"tokens": group["总Token"].sum()
}
# 按平台统计
cost_by_platform = {}
if "平台" in api_calls_df.columns:
platform_groups = api_calls_df.groupby("平台")
for platform, group in platform_groups:
if pd.notna(platform) and platform:
cost_by_platform[platform] = {
"cost_usd": group["成本(USD)"].sum(),
"cost_cny": group["成本(CNY)"].sum(),
"calls": len(group),
"tokens": group["总Token"].sum()
}
# 每日成本趋势
daily_costs = []
if "调用时间" in api_calls_df.columns:
api_calls_df["日期"] = pd.to_datetime(api_calls_df["调用时间"]).dt.date
daily_groups = api_calls_df.groupby("日期")
for date, group in daily_groups:
daily_costs.append({
"date": date.isoformat() if isinstance(date, datetime.date) else str(date),
"cost_usd": group["成本(USD)"].sum(),
"cost_cny": group["成本(CNY)"].sum(),
"calls": len(group),
"tokens": group["总Token"].sum()
})
daily_costs.sort(key=lambda x: x["date"])
# ROI 分析(如果有验证结果)
roi_analysis = {}
if verify_results_df is not None and not verify_results_df.empty:
roi_analysis = self._calculate_roi(api_calls_df, verify_results_df)
return {
"total_cost_usd": total_cost_usd,
"total_cost_cny": total_cost_cny,
"total_tokens": int(total_tokens),
"total_calls": total_calls,
"cost_by_operation": cost_by_operation,
"cost_by_provider": cost_by_provider,
"cost_by_keyword": cost_by_keyword,
"cost_by_platform": cost_by_platform,
"daily_costs": daily_costs,
"roi_analysis": roi_analysis
}
def _calculate_roi(
self,
api_calls_df: pd.DataFrame,
verify_results_df: pd.DataFrame
) -> Dict:
"""
计算 ROI(基于验证结果)
Args:
api_calls_df: API 调用记录
verify_results_df: 验证结果
Returns:
ROI 分析结果
"""
# 计算总成本
total_cost = api_calls_df["成本(CNY)"].sum()
# 计算提及率提升(简化估算)
# 这里假设每次提及的价值为某个固定值(需要根据实际情况调整)
mention_value_per_mention = 10.0 # 每次提及的价值(CNY),可配置
# 统计品牌提及次数
brand_mentions = verify_results_df[verify_results_df["品牌"] == verify_results_df["品牌"].iloc[0] if len(verify_results_df) > 0 else ""]
total_mentions = brand_mentions["提及次数"].sum() if "提及次数" in brand_mentions.columns else 0
# 估算价值
estimated_value = total_mentions * mention_value_per_mention
# 计算 ROI
roi_ratio = (estimated_value - total_cost) / total_cost * 100 if total_cost > 0 else 0
roi_value = estimated_value - total_cost
# 按关键词分析 ROI
keyword_roi = {}
if "关键词" in api_calls_df.columns and "问题" in verify_results_df.columns:
# 合并数据
keyword_costs = api_calls_df.groupby("关键词")["成本(CNY)"].sum()
keyword_mentions = verify_results_df.groupby("问题")["提及次数"].sum()
for keyword in keyword_costs.index:
if pd.notna(keyword) and keyword:
cost = keyword_costs[keyword]
mentions = keyword_mentions.get(keyword, 0)
value = mentions * mention_value_per_mention
roi = (value - cost) / cost * 100 if cost > 0 else 0
keyword_roi[keyword] = {
"cost": cost,
"mentions": int(mentions),
"value": value,
"roi": roi
}
return {
"total_cost": total_cost,
"total_mentions": int(total_mentions),
"estimated_value": estimated_value,
"roi_ratio": roi_ratio,
"roi_value": roi_value,
"mention_value_per_mention": mention_value_per_mention,
"keyword_roi": keyword_roi
}
def get_optimization_suggestions(self, cost_analysis: Dict) -> List[Dict]:
"""
获取成本优化建议
Args:
cost_analysis: 成本分析结果
Returns:
优化建议列表
"""
suggestions = []
total_cost = cost_analysis.get("total_cost_cny", 0.0)
cost_by_provider = cost_analysis.get("cost_by_provider", {})
cost_by_keyword = cost_analysis.get("cost_by_keyword", {})
cost_by_operation = cost_analysis.get("cost_by_operation", {})
# 检查是否有高成本提供商
if cost_by_provider:
sorted_providers = sorted(
cost_by_provider.items(),
key=lambda x: x[1]["cost_cny"],
reverse=True
)
top_provider = sorted_providers[0]
if top_provider[1]["cost_cny"] > total_cost * 0.5:
suggestions.append({
"type": "provider",
"priority": "",
"title": f"考虑使用更便宜的提供商替代 {top_provider[0]}",
"description": f"{top_provider[0]} 占总成本的 {top_provider[1]['cost_cny']/total_cost*100:.1f}%,考虑使用更经济的替代方案",
"savings_estimate": top_provider[1]["cost_cny"] * 0.3 # 估算可节省30%
})
# 检查是否有低 ROI 关键词
roi_analysis = cost_analysis.get("roi_analysis", {})
keyword_roi = roi_analysis.get("keyword_roi", {})
if keyword_roi:
low_roi_keywords = [
(kw, data) for kw, data in keyword_roi.items()
if data.get("roi", 0) < 0
]
if low_roi_keywords:
suggestions.append({
"type": "keyword",
"priority": "",
"title": f"发现 {len(low_roi_keywords)} 个负 ROI 关键词",
"description": "这些关键词的成本高于产生的价值,建议暂停或优化",
"keywords": [kw for kw, _ in low_roi_keywords[:5]]
})
# 检查操作类型分布
if cost_by_operation:
verify_cost = cost_by_operation.get("验证", {}).get("cost_cny", 0.0)
generate_cost = cost_by_operation.get("生成", {}).get("cost_cny", 0.0)
if verify_cost > total_cost * 0.7:
suggestions.append({
"type": "operation",
"priority": "",
"title": "验证成本占比过高",
"description": f"验证操作占总成本的 {verify_cost/total_cost*100:.1f}%,建议减少验证频率或使用更便宜的验证模型",
"savings_estimate": verify_cost * 0.2
})
# 如果没有建议,添加通用建议
if not suggestions:
suggestions.append({
"type": "general",
"priority": "",
"title": "成本控制良好",
"description": "当前成本结构合理,继续保持"
})
return suggestions
def estimate_future_cost(
self,
api_calls_df: pd.DataFrame,
days: int = 30
) -> Dict:
"""
估算未来成本
Args:
api_calls_df: 历史 API 调用记录
days: 预测天数
Returns:
未来成本估算
"""
if api_calls_df.empty:
return {
"estimated_daily_cost_cny": 0.0,
"estimated_total_cost_cny": 0.0,
"confidence": ""
}
# 计算日均成本
if "调用时间" in api_calls_df.columns:
api_calls_df["日期"] = pd.to_datetime(api_calls_df["调用时间"]).dt.date
daily_costs = api_calls_df.groupby("日期")["成本(CNY)"].sum()
if len(daily_costs) > 0:
avg_daily_cost = daily_costs.mean()
estimated_total = avg_daily_cost * days
# 计算置信度(基于数据点数量)
confidence = "" if len(daily_costs) >= 7 else ("" if len(daily_costs) >= 3 else "")
return {
"estimated_daily_cost_cny": float(avg_daily_cost),
"estimated_total_cost_cny": float(estimated_total),
"confidence": confidence,
"data_points": len(daily_costs)
}
# 如果没有日期数据,使用总成本估算
total_cost = api_calls_df["成本(CNY)"].sum()
# 假设数据覆盖最近7天
avg_daily = total_cost / 7.0 if total_cost > 0 else 0.0
return {
"estimated_daily_cost_cny": avg_daily,
"estimated_total_cost_cny": avg_daily * days,
"confidence": "",
"data_points": 0
}