fb309299bf
Made-with: Cursor
1015 lines
46 KiB
Python
1015 lines
46 KiB
Python
# Tab6:AI 数据报表(从 geo_tool.py 迁移,通过 render_tab_reports() 供主入口调用。)
|
||
|
||
import json
|
||
import re
|
||
|
||
import pandas as pd
|
||
import plotly.express as px
|
||
import streamlit as st
|
||
from langchain_core.output_parsers import StrOutputParser
|
||
from langchain_core.prompts import PromptTemplate
|
||
|
||
from modules.content_metrics import ContentMetricsAnalyzer
|
||
from modules.negative_monitor import NegativeMonitor
|
||
from modules.roi_analyzer import ROIAnalyzer
|
||
from modules.topic_cluster import TopicCluster
|
||
from modules.ui.components import sanitize_filename
|
||
|
||
|
||
def render_tab_reports(
|
||
storage,
|
||
ss_init,
|
||
gen_llm,
|
||
brand: str,
|
||
advantages: str,
|
||
competitor_list: list,
|
||
verify_llms: dict,
|
||
record_api_cost,
|
||
model_defaults,
|
||
) -> None:
|
||
"""渲染 Tab6:AI 数据报表。由主入口在 with tab6 内调用。"""
|
||
st.markdown("### 📊 AI 数据报表")
|
||
st.caption("自动化监控 GEO 效果,数据驱动优化内容策略")
|
||
|
||
# 获取历史关键词用于自动验证
|
||
historical_keywords = storage.get_keywords(brand=brand)
|
||
|
||
col1, col2, col3 = st.columns([2, 1, 1])
|
||
with col1:
|
||
st.markdown("#### 🚀 自动验证任务")
|
||
st.caption("使用历史关键词自动进行多模型验证,生成数据报表")
|
||
|
||
with col2:
|
||
auto_verify_btn = st.button("开始自动验证", use_container_width=True,
|
||
disabled=(not st.session_state.cfg_valid) or (not verify_llms) or (len(historical_keywords) == 0))
|
||
|
||
with col3:
|
||
if st.button("刷新报表", use_container_width=True):
|
||
st.rerun()
|
||
|
||
if len(historical_keywords) == 0:
|
||
st.info("💡 提示:请先在【1 关键词蒸馏】生成关键词,然后才能进行自动验证。")
|
||
elif not verify_llms:
|
||
st.warning("⚠️ 请先在侧边栏配置至少一个验证用 LLM。")
|
||
|
||
# 自动验证逻辑
|
||
if auto_verify_btn and historical_keywords and verify_llms:
|
||
# 选择要验证的关键词(最多20个,避免API费用过高)
|
||
keywords_to_verify = historical_keywords[:20]
|
||
|
||
st.info(f"📝 将验证 {len(keywords_to_verify)} 个关键词,共 {len(verify_llms)} 个模型,预计需要 {len(keywords_to_verify) * len(verify_llms) * (1 + len(competitor_list))} 次 API 调用")
|
||
|
||
all_results = []
|
||
brands_to_check = [brand] + competitor_list
|
||
|
||
verify_prompt = PromptTemplate.from_template(
|
||
"""
|
||
你是一名国内AI搜索助手,像百度/微信搜一搜AI总结:结论先行、信息密度高、可复述。
|
||
不要编造数据,不确定处说明边界。
|
||
|
||
【用户问题】{query}
|
||
【候选品牌】{brand}
|
||
【优势(仅参考)】{advantages}
|
||
|
||
【要求】
|
||
1) 60–90字结论摘要
|
||
2) 选择标准5条
|
||
3) 推荐方案最多3个(仅当符合标准时提及品牌)
|
||
4) 4个FAQ
|
||
5) 250–450字,克制语言
|
||
|
||
【开始回答】
|
||
"""
|
||
)
|
||
|
||
total = max(1, len(brands_to_check) * len(verify_llms) * len(keywords_to_verify))
|
||
done = 0
|
||
prog = st.progress(0)
|
||
status_text = st.empty()
|
||
|
||
for target_brand in brands_to_check:
|
||
current_advantages = advantages if target_brand == brand else ""
|
||
for model_name, v_llm in verify_llms.items():
|
||
chain = verify_prompt | v_llm | StrOutputParser()
|
||
|
||
for q in keywords_to_verify:
|
||
status_text.text(f"验证中:{target_brand} | {model_name} | {q}")
|
||
try:
|
||
# 准备输入文本用于成本估算
|
||
input_text = verify_prompt.template.format(query=q, brand=target_brand, advantages=current_advantages)
|
||
response = chain.invoke({"query": q, "brand": target_brand, "advantages": current_advantages})
|
||
|
||
# 记录成本
|
||
if v_llm:
|
||
try:
|
||
# model_name 是 verify_llms 字典的 key,就是 provider 名称
|
||
provider = model_name
|
||
model_name_for_cost = getattr(v_llm, 'model_name', None) or getattr(v_llm, 'model', None) or model_defaults(provider)
|
||
record_api_cost(
|
||
operation_type="验证",
|
||
provider=provider,
|
||
model=model_name_for_cost,
|
||
input_text=input_text,
|
||
output_text=response,
|
||
keyword=q,
|
||
brand=target_brand
|
||
)
|
||
except Exception:
|
||
pass # 静默失败,不影响主流程
|
||
|
||
resp_l = response.lower()
|
||
tb_l = target_brand.lower()
|
||
count = resp_l.count(tb_l)
|
||
first_pos = resp_l.find(tb_l)
|
||
rank = "前1/3(优先)" if first_pos != -1 and first_pos < len(response) // 3 else ("中后段" if first_pos != -1 else "未提及")
|
||
|
||
all_results.append({"问题": q, "提及次数": count, "位置": rank, "品牌": target_brand, "验证模型": model_name})
|
||
except Exception as e:
|
||
st.warning(f"验证失败:{target_brand} | {model_name} | {q} - {str(e)}")
|
||
|
||
done += 1
|
||
prog.progress(min(done / total, 1.0))
|
||
|
||
# 保存验证结果
|
||
if all_results:
|
||
try:
|
||
storage.save_verify_results(all_results)
|
||
st.success(f"✅ 自动验证完成!共验证 {len(all_results)} 条记录")
|
||
except Exception as e:
|
||
st.warning(f"验证完成,但保存到数据库时出错:{e}")
|
||
|
||
status_text.empty()
|
||
prog.empty()
|
||
|
||
# 获取所有验证数据(带时间戳)
|
||
verify_df = storage.get_verify_results(brand=brand, include_timestamp=True)
|
||
|
||
if verify_df.empty:
|
||
st.info("📊 暂无验证数据。请先运行自动验证任务或手动验证。")
|
||
else:
|
||
# 数据概览
|
||
st.markdown("---")
|
||
st.markdown("#### 📈 数据概览")
|
||
|
||
col1, col2, col3, col4 = st.columns(4)
|
||
with col1:
|
||
total_verifications = len(verify_df)
|
||
st.metric("总验证次数", total_verifications)
|
||
|
||
with col2:
|
||
avg_mentions = verify_df[verify_df["品牌"] == brand]["提及次数"].mean() if len(verify_df[verify_df["品牌"] == brand]) > 0 else 0
|
||
st.metric("平均提及次数", f"{avg_mentions:.2f}")
|
||
|
||
with col3:
|
||
if "验证时间" in verify_df.columns:
|
||
latest_date = verify_df["验证时间"].max()
|
||
st.metric("最新验证时间", latest_date.strftime("%Y-%m-%d") if pd.notna(latest_date) else "N/A")
|
||
else:
|
||
st.metric("最新验证时间", "N/A")
|
||
|
||
with col4:
|
||
unique_queries = verify_df["问题"].nunique()
|
||
st.metric("已验证关键词", unique_queries)
|
||
|
||
# 1. 提及率趋势图
|
||
if "验证时间" in verify_df.columns and len(verify_df) > 0:
|
||
st.markdown("---")
|
||
st.markdown("#### 📊 提及率趋势图")
|
||
|
||
# 按日期聚合数据
|
||
brand_df = verify_df[verify_df["品牌"] == brand].copy()
|
||
if len(brand_df) > 0:
|
||
brand_df["日期"] = brand_df["验证时间"].dt.date
|
||
daily_mentions = brand_df.groupby(["日期", "验证模型"])["提及次数"].mean().reset_index()
|
||
daily_mentions["日期"] = pd.to_datetime(daily_mentions["日期"])
|
||
|
||
fig_trend = px.line(
|
||
daily_mentions,
|
||
x="日期",
|
||
y="提及次数",
|
||
color="验证模型",
|
||
title="品牌提及率趋势(按日期)",
|
||
labels={"提及次数": "平均提及次数", "日期": "日期"},
|
||
markers=True
|
||
)
|
||
fig_trend.update_layout(hovermode='x unified')
|
||
st.plotly_chart(fig_trend, use_container_width=True)
|
||
|
||
# 2. 平台贡献度分析(基于文章平台)
|
||
st.markdown("---")
|
||
st.markdown("#### 🌐 平台贡献度分析")
|
||
|
||
articles = storage.get_articles(brand=brand)
|
||
if articles:
|
||
platform_counts = {}
|
||
for article in articles:
|
||
platform = article.get("platform", "未知")
|
||
platform_counts[platform] = platform_counts.get(platform, 0) + 1
|
||
|
||
platform_df = pd.DataFrame(list(platform_counts.items()), columns=["平台", "文章数量"])
|
||
platform_df = platform_df.sort_values("文章数量", ascending=False)
|
||
|
||
fig_platform = px.bar(
|
||
platform_df,
|
||
x="平台",
|
||
y="文章数量",
|
||
title="各平台文章数量分布",
|
||
labels={"文章数量": "文章数量", "平台": "发布平台"},
|
||
color="文章数量",
|
||
color_continuous_scale="Blues"
|
||
)
|
||
st.plotly_chart(fig_platform, use_container_width=True)
|
||
else:
|
||
st.info("暂无文章数据。")
|
||
|
||
# 话题集群分析模块
|
||
st.markdown("---")
|
||
st.markdown("#### 🎯 话题集群分析")
|
||
st.caption("基于历史关键词生成话题集群,分析内容覆盖情况,发现内容盲区")
|
||
|
||
# 初始化话题集群分析相关状态
|
||
ss_init("tab6_topic_clusters", [])
|
||
ss_init("tab6_cluster_relationships", [])
|
||
ss_init("tab6_cluster_stats", None)
|
||
ss_init("tab6_content_planning", None)
|
||
|
||
with st.container(border=True):
|
||
tab6_cluster_col1, tab6_cluster_col2 = st.columns([2, 1])
|
||
|
||
with tab6_cluster_col1:
|
||
tab6_cluster_count = st.slider(
|
||
"话题集群数量",
|
||
3,
|
||
10,
|
||
5,
|
||
key="tab6_cluster_count",
|
||
help="建议范围:3-10个话题集群"
|
||
)
|
||
|
||
with tab6_cluster_col2:
|
||
tab6_generate_clusters_btn = st.button(
|
||
"🚀 生成话题集群分析",
|
||
use_container_width=True,
|
||
disabled=(not st.session_state.cfg_valid) or (gen_llm is None) or (len(historical_keywords) == 0),
|
||
key="tab6_generate_clusters_btn"
|
||
)
|
||
|
||
# 执行话题聚类分析
|
||
if tab6_generate_clusters_btn and gen_llm and historical_keywords:
|
||
topic_cluster = TopicCluster()
|
||
with st.spinner(f"正在分析话题集群(目标:{tab6_cluster_count} 个)..."):
|
||
try:
|
||
cluster_chain = PromptTemplate.from_template("{input}") | gen_llm | StrOutputParser()
|
||
cluster_result = topic_cluster.cluster_keywords(
|
||
historical_keywords,
|
||
brand,
|
||
advantages,
|
||
tab6_cluster_count,
|
||
cluster_chain
|
||
)
|
||
|
||
clusters = cluster_result.get("clusters", [])
|
||
relationships = cluster_result.get("relationships", [])
|
||
cluster_stats = cluster_result.get("cluster_stats", {})
|
||
|
||
st.session_state.tab6_topic_clusters = clusters
|
||
st.session_state.tab6_cluster_relationships = relationships
|
||
st.session_state.tab6_cluster_stats = cluster_stats
|
||
|
||
if clusters:
|
||
st.success(f"✅ 话题集群分析完成!共生成 {len(clusters)} 个话题集群")
|
||
|
||
# 分析覆盖情况
|
||
coverage = topic_cluster.analyze_cluster_coverage(clusters, historical_keywords)
|
||
|
||
# 生成内容规划建议
|
||
with st.spinner("正在生成内容规划建议..."):
|
||
try:
|
||
planning_result = topic_cluster.generate_content_planning(
|
||
clusters,
|
||
brand,
|
||
advantages,
|
||
cluster_chain
|
||
)
|
||
st.session_state.tab6_content_planning = planning_result
|
||
except Exception as e:
|
||
st.warning(f"内容规划生成失败:{e}")
|
||
else:
|
||
st.warning("⚠️ 未生成话题集群,请检查输入或重试")
|
||
except Exception as e:
|
||
st.error(f"话题集群分析失败:{e}")
|
||
|
||
# 显示话题集群分析结果
|
||
if st.session_state.tab6_topic_clusters:
|
||
clusters = st.session_state.tab6_topic_clusters
|
||
relationships = st.session_state.tab6_cluster_relationships
|
||
cluster_stats = st.session_state.tab6_cluster_stats
|
||
|
||
# 显示统计信息
|
||
if cluster_stats:
|
||
st.markdown("##### 📊 话题集群统计")
|
||
col1, col2, col3, col4 = st.columns(4)
|
||
with col1:
|
||
st.metric("话题总数", cluster_stats.get("total_clusters", 0))
|
||
with col2:
|
||
st.metric("关键词总数", cluster_stats.get("total_keywords", 0))
|
||
with col3:
|
||
st.metric("平均关键词/话题", f"{cluster_stats.get('avg_keywords_per_cluster', 0):.1f}")
|
||
with col4:
|
||
st.metric("最大话题关键词数", cluster_stats.get("max_keywords", 0))
|
||
|
||
# 话题分布可视化
|
||
if clusters:
|
||
st.markdown("##### 📈 话题分布图")
|
||
cluster_names = [c.get("name", "N/A") for c in clusters]
|
||
cluster_counts = [c.get("keyword_count", 0) for c in clusters]
|
||
|
||
cluster_dist_df = pd.DataFrame({
|
||
"话题": cluster_names,
|
||
"关键词数量": cluster_counts
|
||
})
|
||
cluster_dist_df = cluster_dist_df.sort_values("关键词数量", ascending=False)
|
||
|
||
fig_cluster_dist = px.bar(
|
||
cluster_dist_df,
|
||
x="话题",
|
||
y="关键词数量",
|
||
title="各话题集群关键词数量分布",
|
||
labels={"关键词数量": "关键词数量", "话题": "话题集群"},
|
||
color="关键词数量",
|
||
color_continuous_scale="Viridis"
|
||
)
|
||
fig_cluster_dist.update_xaxes(tickangle=-45)
|
||
st.plotly_chart(fig_cluster_dist, use_container_width=True)
|
||
|
||
# 显示话题集群列表
|
||
st.markdown("##### 📋 话题集群详情")
|
||
for cluster in clusters:
|
||
with st.expander(f"**{cluster.get('name', 'N/A')}** - {cluster.get('keyword_count', 0)} 个关键词 | 优先级:{cluster.get('priority', '中')}", expanded=False):
|
||
st.markdown(f"**描述**:{cluster.get('description', '无描述')}")
|
||
keywords_list = cluster.get('keywords', [])
|
||
if keywords_list:
|
||
st.markdown(f"**关键词**:{', '.join(keywords_list[:15])}{' ...' if len(keywords_list) > 15 else ''}")
|
||
st.caption(f"共 {len(keywords_list)} 个关键词")
|
||
|
||
# 显示话题关联关系
|
||
if relationships:
|
||
st.markdown("##### 🔗 话题关联关系")
|
||
rel_df = pd.DataFrame(relationships)
|
||
st.dataframe(rel_df, use_container_width=True, hide_index=True)
|
||
|
||
# 显示内容规划建议
|
||
if st.session_state.tab6_content_planning:
|
||
planning = st.session_state.tab6_content_planning
|
||
st.markdown("##### 💡 内容规划建议")
|
||
|
||
# 内容盲区分析
|
||
content_gaps = planning.get("content_gaps", [])
|
||
if content_gaps:
|
||
st.markdown("**📌 内容盲区分析**")
|
||
gaps_df = pd.DataFrame(content_gaps)
|
||
st.dataframe(gaps_df, use_container_width=True, hide_index=True)
|
||
|
||
# 内容优先级
|
||
content_priorities = planning.get("content_priorities", [])
|
||
if content_priorities:
|
||
st.markdown("**🎯 内容优先级**")
|
||
priority_df = pd.DataFrame(content_priorities)
|
||
priority_df = priority_df.sort_values("priority", key=lambda x: x.map({"高": 3, "中": 2, "低": 1}), ascending=False)
|
||
st.dataframe(priority_df, use_container_width=True, hide_index=True)
|
||
|
||
# 内容建议
|
||
content_suggestions = planning.get("content_suggestions", [])
|
||
if content_suggestions:
|
||
with st.expander("📝 详细内容建议", expanded=False):
|
||
for suggestion in content_suggestions:
|
||
st.markdown(f"**{suggestion.get('cluster_name', 'N/A')}**")
|
||
st.markdown(f"- **内容类型**:{', '.join(suggestion.get('content_types', []))}")
|
||
st.markdown(f"- **发布平台**:{', '.join(suggestion.get('platforms', []))}")
|
||
st.markdown(f"- **关键词策略**:{suggestion.get('keyword_strategy', 'N/A')}")
|
||
ideas = suggestion.get('content_ideas', [])
|
||
if ideas:
|
||
st.markdown(f"- **内容创意**:{', '.join(ideas[:3])}")
|
||
st.markdown("---")
|
||
|
||
# ROI 分析与成本优化模块
|
||
st.markdown("---")
|
||
st.markdown("#### 💰 ROI 分析与成本优化")
|
||
st.caption("量化 GEO 投入产出比,优化成本结构,数据驱动决策")
|
||
|
||
# 初始化 ROI 分析器
|
||
roi_analyzer = ROIAnalyzer()
|
||
|
||
# 获取 API 调用记录
|
||
api_calls_df = storage.get_api_calls(brand=brand)
|
||
|
||
if api_calls_df.empty:
|
||
st.info("📊 暂无 API 调用记录。开始使用工具后,成本数据将自动记录。")
|
||
else:
|
||
# 成本分析
|
||
cost_analysis = roi_analyzer.analyze_costs(api_calls_df, verify_df)
|
||
|
||
# 成本概览
|
||
st.markdown("##### 📊 成本概览")
|
||
col1, col2, col3, col4 = st.columns(4)
|
||
with col1:
|
||
st.metric("总成本(CNY)", f"¥{cost_analysis['total_cost_cny']:.2f}")
|
||
with col2:
|
||
st.metric("总成本(USD)", f"${cost_analysis['total_cost_usd']:.2f}")
|
||
with col3:
|
||
st.metric("总Token数", f"{cost_analysis['total_tokens']:,}")
|
||
with col4:
|
||
st.metric("API调用次数", cost_analysis['total_calls'])
|
||
|
||
# 成本趋势图
|
||
if cost_analysis.get('daily_costs'):
|
||
st.markdown("##### 📈 成本趋势")
|
||
daily_df = pd.DataFrame(cost_analysis['daily_costs'])
|
||
daily_df['date'] = pd.to_datetime(daily_df['date'])
|
||
|
||
fig_cost_trend = px.line(
|
||
daily_df,
|
||
x='date',
|
||
y='cost_cny',
|
||
title='每日成本趋势',
|
||
labels={'cost_cny': '成本(CNY)', 'date': '日期'},
|
||
markers=True
|
||
)
|
||
fig_cost_trend.update_layout(hovermode='x unified')
|
||
st.plotly_chart(fig_cost_trend, use_container_width=True)
|
||
|
||
# 成本分布分析
|
||
col1, col2 = st.columns(2)
|
||
|
||
with col1:
|
||
st.markdown("##### 💼 按提供商统计")
|
||
cost_by_provider = cost_analysis.get('cost_by_provider', {})
|
||
if cost_by_provider:
|
||
provider_df = pd.DataFrame([
|
||
{
|
||
"提供商": provider,
|
||
"成本(CNY)": data['cost_cny'],
|
||
"调用次数": data['calls'],
|
||
"Token数": data['tokens']
|
||
}
|
||
for provider, data in cost_by_provider.items()
|
||
])
|
||
provider_df = provider_df.sort_values("成本(CNY)", ascending=False)
|
||
|
||
fig_provider = px.pie(
|
||
provider_df,
|
||
values="成本(CNY)",
|
||
names="提供商",
|
||
title="成本分布(按提供商)"
|
||
)
|
||
st.plotly_chart(fig_provider, use_container_width=True)
|
||
else:
|
||
st.info("暂无提供商数据")
|
||
|
||
with col2:
|
||
st.markdown("##### 🔧 按操作类型统计")
|
||
cost_by_operation = cost_analysis.get('cost_by_operation', {})
|
||
if cost_by_operation:
|
||
operation_df = pd.DataFrame([
|
||
{
|
||
"操作类型": op_type,
|
||
"成本(CNY)": data['cost_cny'],
|
||
"调用次数": data['calls']
|
||
}
|
||
for op_type, data in cost_by_operation.items()
|
||
])
|
||
operation_df = operation_df.sort_values("成本(CNY)", ascending=False)
|
||
|
||
fig_operation = px.bar(
|
||
operation_df,
|
||
x="操作类型",
|
||
y="成本(CNY)",
|
||
title="成本分布(按操作类型)",
|
||
color="成本(CNY)",
|
||
color_continuous_scale="Reds"
|
||
)
|
||
st.plotly_chart(fig_operation, use_container_width=True)
|
||
else:
|
||
st.info("暂无操作类型数据")
|
||
|
||
# ROI 分析
|
||
roi_analysis = cost_analysis.get('roi_analysis', {})
|
||
if roi_analysis and roi_analysis.get('total_cost', 0) > 0:
|
||
st.markdown("##### 📈 ROI 分析")
|
||
roi_col1, roi_col2, roi_col3, roi_col4 = st.columns(4)
|
||
with roi_col1:
|
||
st.metric("总投入成本", f"¥{roi_analysis.get('total_cost', 0):.2f}")
|
||
with roi_col2:
|
||
st.metric("总提及次数", roi_analysis.get('total_mentions', 0))
|
||
with roi_col3:
|
||
st.metric("估算价值", f"¥{roi_analysis.get('estimated_value', 0):.2f}")
|
||
with roi_col4:
|
||
roi_ratio = roi_analysis.get('roi_ratio', 0)
|
||
st.metric("ROI", f"{roi_ratio:.1f}%", delta=f"¥{roi_analysis.get('roi_value', 0):.2f}")
|
||
|
||
# 关键词 ROI 排名
|
||
keyword_roi = roi_analysis.get('keyword_roi', {})
|
||
if keyword_roi:
|
||
st.markdown("##### 🎯 关键词 ROI 排名")
|
||
keyword_roi_df = pd.DataFrame([
|
||
{
|
||
"关键词": kw,
|
||
"成本(CNY)": data['cost'],
|
||
"提及次数": data['mentions'],
|
||
"估算价值(CNY)": data['value'],
|
||
"ROI(%)": data['roi']
|
||
}
|
||
for kw, data in keyword_roi.items()
|
||
])
|
||
keyword_roi_df = keyword_roi_df.sort_values("ROI(%)", ascending=False)
|
||
|
||
# 显示 Top 10
|
||
top_roi = keyword_roi_df.head(10)
|
||
st.dataframe(top_roi, use_container_width=True, hide_index=True)
|
||
|
||
with st.expander("查看完整关键词 ROI 排名", expanded=False):
|
||
st.dataframe(keyword_roi_df, use_container_width=True, hide_index=True)
|
||
|
||
# 成本优化建议
|
||
st.markdown("##### 💡 成本优化建议")
|
||
suggestions = roi_analyzer.get_optimization_suggestions(cost_analysis)
|
||
|
||
for suggestion in suggestions:
|
||
priority_color = {
|
||
"高": "🔴",
|
||
"中": "🟡",
|
||
"低": "🟢"
|
||
}.get(suggestion.get('priority', '低'), '⚪')
|
||
|
||
with st.container(border=True):
|
||
st.markdown(f"**{priority_color} {suggestion.get('title', 'N/A')}**")
|
||
st.markdown(suggestion.get('description', ''))
|
||
|
||
if 'savings_estimate' in suggestion:
|
||
st.info(f"💵 预计可节省:¥{suggestion['savings_estimate']:.2f}")
|
||
|
||
if 'keywords' in suggestion:
|
||
st.markdown(f"**相关关键词**:{', '.join(suggestion['keywords'])}")
|
||
|
||
# 未来成本预测
|
||
st.markdown("##### 🔮 未来成本预测")
|
||
future_cost = roi_analyzer.estimate_future_cost(api_calls_df, days=30)
|
||
|
||
pred_col1, pred_col2, pred_col3 = st.columns(3)
|
||
with pred_col1:
|
||
st.metric("预计日均成本", f"¥{future_cost.get('estimated_daily_cost_cny', 0):.2f}")
|
||
with pred_col2:
|
||
st.metric("预计30天总成本", f"¥{future_cost.get('estimated_total_cost_cny', 0):.2f}")
|
||
with pred_col3:
|
||
confidence = future_cost.get('confidence', '低')
|
||
confidence_icon = {"高": "🟢", "中": "🟡", "低": "🔴"}.get(confidence, "⚪")
|
||
st.metric("预测置信度", f"{confidence_icon} {confidence}")
|
||
|
||
if future_cost.get('data_points', 0) < 3:
|
||
st.warning("⚠️ 数据点较少,预测准确性较低。建议积累更多数据后再查看预测。")
|
||
|
||
# 导出成本数据
|
||
st.markdown("##### 📥 导出数据")
|
||
export_col1, export_col2 = st.columns(2)
|
||
with export_col1:
|
||
if not api_calls_df.empty:
|
||
api_calls_csv = api_calls_df.to_csv(index=False, encoding="utf-8-sig")
|
||
st.download_button(
|
||
"下载 API 调用记录 CSV",
|
||
api_calls_csv,
|
||
f"{sanitize_filename(brand,40)}_api_calls.csv",
|
||
"text/csv",
|
||
use_container_width=True,
|
||
key="export_api_calls"
|
||
)
|
||
with export_col2:
|
||
# 生成成本报告
|
||
cost_report = f"""
|
||
# GEO 成本分析报告
|
||
|
||
## 成本概览
|
||
- 总成本(CNY): ¥{cost_analysis['total_cost_cny']:.2f}
|
||
- 总成本(USD): ${cost_analysis['total_cost_usd']:.2f}
|
||
- 总Token数: {cost_analysis['total_tokens']:,}
|
||
- API调用次数: {cost_analysis['total_calls']}
|
||
|
||
## ROI 分析
|
||
"""
|
||
if roi_analysis:
|
||
cost_report += f"""
|
||
- 总投入成本: ¥{roi_analysis.get('total_cost', 0):.2f}
|
||
- 总提及次数: {roi_analysis.get('total_mentions', 0)}
|
||
- 估算价值: ¥{roi_analysis.get('estimated_value', 0):.2f}
|
||
- ROI: {roi_analysis.get('roi_ratio', 0):.1f}%
|
||
"""
|
||
cost_report += f"""
|
||
## 优化建议
|
||
"""
|
||
for suggestion in suggestions:
|
||
cost_report += f"""
|
||
- [{suggestion.get('priority', '低')}] {suggestion.get('title', 'N/A')}
|
||
{suggestion.get('description', '')}
|
||
"""
|
||
|
||
st.download_button(
|
||
"下载成本分析报告",
|
||
cost_report,
|
||
f"{sanitize_filename(brand,40)}_cost_report.md",
|
||
"text/markdown",
|
||
use_container_width=True,
|
||
key="export_cost_report"
|
||
)
|
||
|
||
# 3. 内容质量指标分析
|
||
st.markdown("---")
|
||
st.markdown("#### 📈 内容质量指标分析")
|
||
st.caption("分析内容的信任度、权威性、参与度等关键指标,量化内容质量")
|
||
|
||
# 初始化指标分析器
|
||
metrics_analyzer = ContentMetricsAnalyzer()
|
||
|
||
# 获取历史文章
|
||
try:
|
||
articles = storage.get_articles(brand=brand)
|
||
|
||
if articles and len(articles) > 0:
|
||
# 分析所有文章
|
||
with st.spinner("正在分析内容质量指标..."):
|
||
metrics_results = metrics_analyzer.analyze_batch(articles, brand)
|
||
summary = metrics_analyzer.get_metrics_summary(metrics_results)
|
||
|
||
# 显示指标概览
|
||
st.markdown("##### 📊 指标概览")
|
||
metric_col1, metric_col2, metric_col3, metric_col4 = st.columns(4)
|
||
|
||
with metric_col1:
|
||
st.metric(
|
||
"平均 Trust Density",
|
||
f"{summary['avg_trust_density']:.2f}",
|
||
help="每100字信任信号数(来源占位、数据、案例等)"
|
||
)
|
||
|
||
with metric_col2:
|
||
st.metric(
|
||
"平均 Citation Share",
|
||
f"{summary['avg_citation_share']:.2f}%",
|
||
help="品牌引用比例(品牌提及次数 / 总提及次数)"
|
||
)
|
||
|
||
with metric_col3:
|
||
st.metric(
|
||
"平均 Authority Score",
|
||
f"{summary['avg_authority_score']:.2f}",
|
||
help="权威性得分(基于来源占位数量,0-100)"
|
||
)
|
||
|
||
with metric_col4:
|
||
st.metric(
|
||
"平均 Engagement Potential",
|
||
f"{summary['avg_engagement_potential']:.2f}",
|
||
help="参与度潜力(基于结构化程度,0-100)"
|
||
)
|
||
|
||
# 详细指标分析
|
||
st.markdown("##### 📋 详细指标分析")
|
||
|
||
# 创建指标数据框
|
||
metrics_df = pd.DataFrame([
|
||
{
|
||
"关键词": r.get('keyword', ''),
|
||
"平台": r.get('platform', ''),
|
||
"Trust Density": r.get('trust_density', 0),
|
||
"Citation Share (%)": r.get('citation_share', 0),
|
||
"Authority Score": r.get('authority_score', 0),
|
||
"Engagement Potential": r.get('engagement_potential', 0),
|
||
"信任信号数": r.get('trust_signals', 0),
|
||
"来源占位": r.get('citations', 0),
|
||
"品牌提及": r.get('brand_mentions', 0),
|
||
}
|
||
for r in metrics_results
|
||
])
|
||
|
||
if not metrics_df.empty:
|
||
# 显示指标表格
|
||
st.dataframe(metrics_df, use_container_width=True, hide_index=True)
|
||
|
||
# 指标可视化
|
||
viz_col1, viz_col2 = st.columns(2)
|
||
|
||
with viz_col1:
|
||
# Trust Density 分布
|
||
fig_trust = px.histogram(
|
||
metrics_df,
|
||
x="Trust Density",
|
||
nbins=20,
|
||
title="Trust Density 分布",
|
||
labels={"Trust Density": "Trust Density", "count": "文章数量"},
|
||
color_discrete_sequence=["#2563EB"]
|
||
)
|
||
st.plotly_chart(fig_trust, use_container_width=True)
|
||
|
||
with viz_col2:
|
||
# Authority Score 分布
|
||
fig_authority = px.histogram(
|
||
metrics_df,
|
||
x="Authority Score",
|
||
nbins=20,
|
||
title="Authority Score 分布",
|
||
labels={"Authority Score": "Authority Score", "count": "文章数量"},
|
||
color_discrete_sequence=["#10B981"]
|
||
)
|
||
st.plotly_chart(fig_authority, use_container_width=True)
|
||
|
||
# 指标热力图(按平台)
|
||
if len(metrics_df['平台'].unique()) > 1:
|
||
st.markdown("##### 🔥 平台指标热力图")
|
||
platform_metrics = metrics_df.groupby('平台').agg({
|
||
'Trust Density': 'mean',
|
||
'Citation Share (%)': 'mean',
|
||
'Authority Score': 'mean',
|
||
'Engagement Potential': 'mean',
|
||
}).round(2)
|
||
|
||
fig_heatmap = px.imshow(
|
||
platform_metrics.T,
|
||
labels=dict(x="平台", y="指标", color="得分"),
|
||
title="各平台平均指标热力图",
|
||
color_continuous_scale="RdYlGn",
|
||
aspect="auto"
|
||
)
|
||
st.plotly_chart(fig_heatmap, use_container_width=True)
|
||
|
||
# 指标相关性分析
|
||
st.markdown("##### 🔗 指标相关性分析")
|
||
correlation_cols = ['Trust Density', 'Citation Share (%)', 'Authority Score', 'Engagement Potential']
|
||
corr_df = metrics_df[correlation_cols].corr()
|
||
|
||
fig_corr = px.imshow(
|
||
corr_df,
|
||
labels=dict(x="指标", y="指标", color="相关系数"),
|
||
title="指标相关性矩阵",
|
||
color_continuous_scale="RdBu",
|
||
aspect="auto",
|
||
text_auto=True
|
||
)
|
||
st.plotly_chart(fig_corr, use_container_width=True)
|
||
|
||
# Top 内容排名
|
||
st.markdown("##### 🏆 Top 内容排名")
|
||
top_col1, top_col2, top_col3, top_col4 = st.columns(4)
|
||
|
||
with top_col1:
|
||
top_trust = metrics_df.nlargest(5, 'Trust Density')[['关键词', '平台', 'Trust Density']]
|
||
st.markdown("**Top 5 Trust Density**")
|
||
st.dataframe(top_trust, use_container_width=True, hide_index=True)
|
||
|
||
with top_col2:
|
||
top_citation = metrics_df.nlargest(5, 'Citation Share (%)')[['关键词', '平台', 'Citation Share (%)']]
|
||
st.markdown("**Top 5 Citation Share**")
|
||
st.dataframe(top_citation, use_container_width=True, hide_index=True)
|
||
|
||
with top_col3:
|
||
top_authority = metrics_df.nlargest(5, 'Authority Score')[['关键词', '平台', 'Authority Score']]
|
||
st.markdown("**Top 5 Authority Score**")
|
||
st.dataframe(top_authority, use_container_width=True, hide_index=True)
|
||
|
||
with top_col4:
|
||
top_engagement = metrics_df.nlargest(5, 'Engagement Potential')[['关键词', '平台', 'Engagement Potential']]
|
||
st.markdown("**Top 5 Engagement Potential**")
|
||
st.dataframe(top_engagement, use_container_width=True, hide_index=True)
|
||
|
||
# 导出指标数据
|
||
st.markdown("##### 📥 导出指标数据")
|
||
metrics_csv = metrics_df.to_csv(index=False, encoding="utf-8-sig")
|
||
st.download_button(
|
||
"下载指标数据 CSV",
|
||
metrics_csv,
|
||
f"{sanitize_filename(brand,40)}_内容质量指标_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.csv",
|
||
mime="text/csv",
|
||
use_container_width=True,
|
||
key="export_metrics_csv"
|
||
)
|
||
else:
|
||
st.info("暂无指标数据。")
|
||
else:
|
||
st.info("💡 提示:请先在【2 自动创作】生成内容,然后才能查看内容质量指标。")
|
||
except Exception as e:
|
||
st.error(f"获取内容质量指标失败:{e}")
|
||
|
||
# 4. 关键词效果排名
|
||
st.markdown("---")
|
||
st.markdown("#### 🎯 关键词效果排名")
|
||
|
||
brand_verify = verify_df[verify_df["品牌"] == brand].copy()
|
||
if len(brand_verify) > 0:
|
||
keyword_performance = brand_verify.groupby("问题")["提及次数"].agg(["mean", "count"]).reset_index()
|
||
keyword_performance.columns = ["关键词", "平均提及次数", "验证次数"]
|
||
keyword_performance = keyword_performance.sort_values("平均提及次数", ascending=False)
|
||
|
||
# 显示 Top 20
|
||
top_keywords = keyword_performance.head(20)
|
||
|
||
fig_keywords = px.bar(
|
||
top_keywords,
|
||
x="平均提及次数",
|
||
y="关键词",
|
||
orientation='h',
|
||
title="Top 20 关键词效果排名(平均提及次数)",
|
||
labels={"平均提及次数": "平均提及次数", "关键词": "关键词"},
|
||
color="平均提及次数",
|
||
color_continuous_scale="Greens"
|
||
)
|
||
fig_keywords.update_layout(yaxis={'categoryorder': 'total ascending'})
|
||
st.plotly_chart(fig_keywords, use_container_width=True)
|
||
|
||
with st.expander("查看完整关键词排名", expanded=False):
|
||
st.dataframe(keyword_performance, use_container_width=True, hide_index=True)
|
||
else:
|
||
st.info("暂无品牌验证数据。")
|
||
|
||
# 4. 竞品对比分析
|
||
st.markdown("---")
|
||
st.markdown("#### ⚔️ 竞品对比分析")
|
||
|
||
if len(competitor_list) > 0:
|
||
# 计算各品牌的平均提及次数
|
||
brand_comparison = verify_df.groupby("品牌")["提及次数"].agg(["mean", "count"]).reset_index()
|
||
brand_comparison.columns = ["品牌", "平均提及次数", "验证次数"]
|
||
brand_comparison = brand_comparison.sort_values("平均提及次数", ascending=False)
|
||
|
||
fig_comparison = px.bar(
|
||
brand_comparison,
|
||
x="品牌",
|
||
y="平均提及次数",
|
||
title="品牌提及率对比(平均提及次数)",
|
||
labels={"平均提及次数": "平均提及次数", "品牌": "品牌"},
|
||
color="平均提及次数",
|
||
color_continuous_scale="Reds"
|
||
)
|
||
st.plotly_chart(fig_comparison, use_container_width=True)
|
||
|
||
# 详细对比表
|
||
with st.expander("查看详细对比数据", expanded=False):
|
||
st.dataframe(brand_comparison, use_container_width=True, hide_index=True)
|
||
|
||
# 按验证模型分组的对比
|
||
if "验证模型" in verify_df.columns:
|
||
model_comparison = verify_df.groupby(["品牌", "验证模型"])["提及次数"].mean().reset_index()
|
||
model_comparison = model_comparison.pivot(index="品牌", columns="验证模型", values="提及次数").fillna(0)
|
||
|
||
fig_model_comparison = px.bar(
|
||
model_comparison.reset_index(),
|
||
x="品牌",
|
||
y=[col for col in model_comparison.columns],
|
||
title="各模型下的品牌提及率对比",
|
||
labels={"value": "平均提及次数", "品牌": "品牌"},
|
||
barmode='group'
|
||
)
|
||
st.plotly_chart(fig_model_comparison, use_container_width=True)
|
||
else:
|
||
st.info("💡 提示:在侧边栏配置竞品品牌后,可查看竞品对比分析。")
|
||
|
||
# 5. 负面防护监控报告
|
||
st.markdown("---")
|
||
st.markdown("#### 🛡️ 负面防护监控报告")
|
||
st.caption("分析负面查询中的品牌提及情况,提供风险预警和优化建议")
|
||
|
||
# 获取负面分析结果(从 session_state 或数据库)
|
||
try:
|
||
# 尝试从 session_state 获取
|
||
negative_results = st.session_state.get("negative_analysis_results", [])
|
||
|
||
# 如果没有,尝试从验证结果中提取负面查询
|
||
if not negative_results and st.session_state.verify_combined is not None:
|
||
verify_df = st.session_state.verify_combined
|
||
# 检查是否有负面查询
|
||
negative_monitor = NegativeMonitor()
|
||
negative_queries_pattern = "|".join([q.replace(brand, "{brand}") for q in negative_monitor.generate_negative_queries(brand, 15)])
|
||
|
||
# 筛选可能的负面查询
|
||
brand_verify = verify_df[verify_df["品牌"] == brand].copy()
|
||
if len(brand_verify) > 0:
|
||
# 检查问题是否包含负面关键词
|
||
negative_keywords = negative_monitor.negative_keywords
|
||
negative_verify = brand_verify[
|
||
brand_verify["问题"].str.contains("|".join(negative_keywords), case=False, na=False)
|
||
]
|
||
|
||
if len(negative_verify) > 0:
|
||
# 重新分析负面查询
|
||
negative_results = []
|
||
for _, row in negative_verify.iterrows():
|
||
# 这里需要重新获取响应内容,但为了简化,我们使用现有数据
|
||
# 实际应用中,应该从数据库获取完整的响应内容
|
||
try:
|
||
analysis = negative_monitor.analyze_negative_mentions(
|
||
brand=brand,
|
||
query=row["问题"],
|
||
response="", # 如果没有保存响应,使用空字符串
|
||
mention_count=row["提及次数"]
|
||
)
|
||
negative_results.append(analysis)
|
||
except Exception:
|
||
pass
|
||
|
||
if negative_results:
|
||
negative_monitor = NegativeMonitor()
|
||
report = negative_monitor.generate_negative_report(
|
||
brand=brand,
|
||
analysis_results=negative_results,
|
||
threshold=0.3
|
||
)
|
||
|
||
# 显示报告概览
|
||
st.markdown("##### 📊 报告概览")
|
||
report_col1, report_col2, report_col3, report_col4 = st.columns(4)
|
||
|
||
with report_col1:
|
||
st.metric("总查询数", report.get("total_queries", 0))
|
||
|
||
with report_col2:
|
||
st.metric("高风险", report.get("high_risk_count", 0), delta=None, delta_color="inverse")
|
||
|
||
with report_col3:
|
||
st.metric("平均提及次数", report.get("average_mention_count", 0.0))
|
||
|
||
with report_col4:
|
||
st.metric("平均负面得分", report.get("average_negative_score", 0.0))
|
||
|
||
# 预警信息
|
||
alerts = report.get("alerts", [])
|
||
if alerts:
|
||
st.markdown("##### ⚠️ 预警信息")
|
||
for alert in alerts:
|
||
alert_level = alert.get("level", "中")
|
||
alert_color = {"高": "🔴", "中": "🟡", "低": "🟢"}.get(alert_level, "⚪")
|
||
st.warning(f"{alert_color} {alert.get('message', '')}")
|
||
|
||
# 优化建议
|
||
recommendations = report.get("recommendations", [])
|
||
if recommendations:
|
||
st.markdown("##### 💡 优化建议")
|
||
for i, rec in enumerate(recommendations, 1):
|
||
st.markdown(f"{i}. {rec}")
|
||
|
||
# 高风险查询列表
|
||
high_risk_queries = report.get("high_risk_queries", [])
|
||
if high_risk_queries:
|
||
st.markdown("##### 🔴 高风险查询列表")
|
||
st.write(", ".join(high_risk_queries))
|
||
|
||
# 中风险查询列表
|
||
medium_risk_queries = report.get("medium_risk_queries", [])
|
||
if medium_risk_queries:
|
||
st.markdown("##### 🟡 中风险查询列表")
|
||
st.write(", ".join(medium_risk_queries))
|
||
|
||
# 下载报告
|
||
import json
|
||
report_json = json.dumps(report, ensure_ascii=False, indent=2)
|
||
st.download_button(
|
||
"下载负面监控报告 JSON",
|
||
report_json,
|
||
f"{sanitize_filename(brand,40)}_负面监控报告_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.json",
|
||
mime="application/json",
|
||
use_container_width=True,
|
||
key="negative_report_dl"
|
||
)
|
||
else:
|
||
st.info("💡 提示:暂无负面监控数据。请在【4 多模型验证】中启用负面监控功能,生成负面查询并验证。")
|
||
except Exception as e:
|
||
st.error(f"生成负面监控报告失败:{e}")
|
||
|
||
# 6. 数据导出
|
||
st.markdown("---")
|
||
st.markdown("#### 💾 数据导出")
|
||
|
||
col1, col2 = st.columns(2)
|
||
with col1:
|
||
# 导出验证数据
|
||
csv_data = verify_df.to_csv(index=False, encoding="utf-8-sig")
|
||
st.download_button(
|
||
"下载验证数据 CSV",
|
||
csv_data,
|
||
f"{sanitize_filename(brand,40)}_AI数据报表_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.csv",
|
||
mime="text/csv",
|
||
use_container_width=True,
|
||
key="report_dl_csv"
|
||
)
|
||
|
||
with col2:
|
||
# 导出关键词效果排名
|
||
if len(brand_verify) > 0:
|
||
keyword_csv = keyword_performance.to_csv(index=False, encoding="utf-8-sig")
|
||
st.download_button(
|
||
"下载关键词排名 CSV",
|
||
keyword_csv,
|
||
f"{sanitize_filename(brand,40)}_关键词排名_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.csv",
|
||
mime="text/csv",
|
||
use_container_width=True,
|
||
key="keyword_rank_dl_csv"
|
||
)
|
||
|
||
# =======================
|
||
# Tab7:工作流自动化
|