Files
ChouJuGEO/modules/ui/tab_optimize.py
T

1092 lines
49 KiB
Python
Raw Normal View History

# Tab3:文章优化(从 geo_tool.py 迁移,通过 render_tab_optimize() 供主入口调用。)
import re
import streamlit as st
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from modules.eeat_enhancer import EEATEnhancer
from modules.fact_density_enhancer import FactDensityEnhancer
from modules.optimization_techniques import OptimizationTechniqueManager
from modules.schema_generator import SchemaGenerator
from modules.technical_config_generator import TechnicalConfigGenerator
from modules.ui.components import sanitize_filename, safe_decode_uploaded, render_tab_top_with_clear
def render_tab_optimize(
storage,
ss_init,
gen_llm,
brand: str,
advantages: str,
cfg: dict,
record_api_cost,
model_defaults,
) -> None:
"""渲染 Tab3:文章优化。由主入口在 with tab3 内调用。"""
# 标题和清空按钮
def _clear_optimize_state():
st.session_state.optimized_article = ""
st.session_state.opt_changes = ""
render_tab_top_with_clear(
title="🔧 文章优化",
caption="优化已有文章,生成结构化数据和技术配置,提升 GEO 效果",
clear_key="opt_clear",
on_clear=_clear_optimize_state,
)
# === 文章优化功能(主流程) ===
st.markdown("**✏️ 文章内容优化**")
with st.container(border=True):
st.markdown("粘贴或上传已写文章,一键提升 GEO 效果(结构化、可引用、自然植入品牌)")
# 输入方式与文章内容放在表单外,以便粘贴/上传后能触发重跑,从而正确更新「开始优化」按钮的可用状态
input_mode = st.radio(
"输入方式",
["粘贴文本", "上传文件(TXT/MD"],
horizontal=True,
key="opt_input_mode",
)
if input_mode == "粘贴文本":
original_article = st.text_area(
"粘贴文章内容", height=360, key="opt_text"
)
else:
uploaded = st.file_uploader(
"上传 TXT 或 MD 文件",
type=["txt", "md"],
key="opt_uploader",
)
original_article = ""
if uploaded:
try:
original_article = safe_decode_uploaded(uploaded) or ""
except Exception as e:
st.error(f"上传文件解析失败:{e}")
original_article = ""
if original_article:
st.text_area(
"上传内容预览",
original_article,
height=200,
disabled=True,
key="opt_upload_preview",
)
with st.form("opt_form", clear_on_submit=False):
target_platform = st.selectbox(
"目标平台(影响文风,可选)",
[
"通用优化",
"知乎(专业问答)",
"CSDN(技术博客)",
"GitHubREADME/文档)",
"B站(视频脚本)",
"头条号(资讯软文)",
"微信公众号(长文)",
"抖音图文(短内容)",
"百家号(资讯)",
"网易号(资讯)",
"企鹅号(资讯)",
"简书(文艺)",
],
index=[
"通用优化",
"知乎(专业问答)",
"CSDN(技术博客)",
"GitHubREADME/文档)",
"B站(视频脚本)",
"头条号(资讯软文)",
"微信公众号(长文)",
"抖音图文(短内容)",
"百家号(资讯)",
"网易号(资讯)",
"企鹅号(资讯)",
"简书(文艺)",
].index(
st.session_state.opt_platform
if st.session_state.opt_platform
in [
"通用优化",
"知乎(专业问答)",
"CSDN(技术博客)",
"GitHubREADME/文档)",
"B站(视频脚本)",
"头条号(资讯软文)",
"微信公众号(长文)",
"抖音图文(短内容)",
"百家号(资讯)",
"网易号(资讯)",
"企鹅号(资讯)",
"简书(文艺)",
]
else 0
),
key="opt_platform_sel",
)
# 高级优化技巧选择器(可选)
with st.expander("🎨 高级优化技巧(可选)", expanded=False):
opt_technique_manager = OptimizationTechniqueManager()
opt_all_techniques = opt_technique_manager.list_techniques()
opt_technique_options = [
f"{tech['icon']} {tech['name']}" for tech in opt_all_techniques
]
opt_selected_technique_names = st.multiselect(
"选择要应用的优化技巧(可多选)",
options=opt_technique_options,
default=[],
key="opt_techniques",
help="可选,提高 GEO 效果。技巧会动态调整文章优化策略。",
)
# 显示选择的技巧描述
if opt_selected_technique_names:
st.caption("已选择:" + "".join(opt_selected_technique_names))
with st.expander("查看技巧说明", expanded=False):
for tech_name in opt_selected_technique_names:
tech_icon_name = (
tech_name.split(" ", 1)[1]
if " " in tech_name
else tech_name
)
for tech in opt_all_techniques:
if tech["name"] == tech_icon_name:
st.markdown(f"**{tech['icon']} {tech['name']}**")
st.caption(tech["description"])
break
run_opt_disabled = (
(not st.session_state.cfg_valid)
or (gen_llm is None)
or (not original_article.strip())
)
run_opt = st.form_submit_button(
"开始优化", use_container_width=True, disabled=run_opt_disabled
)
if run_opt_disabled:
if not original_article.strip():
st.caption("请先在上方粘贴文章内容,或上传 TXT/MD 文件。")
elif not st.session_state.cfg_valid or gen_llm is None:
st.caption("当前未检测到可用的生成模型,请先在【全局设置】中完成模型/API 配置。")
if run_opt:
st.session_state.opt_platform = target_platform
opt_selected_technique_names = st.session_state.get("opt_techniques", [])
optimize_prompt_template = """
你是GEO优化专家,目标是提升文章在大模型中的引用率和品牌自然提及。
【原文章】
{original_article}
【品牌】{brand}
【优势】{advantages}
【目标平台】{platform}
【优化要求(严格GEO原则)】
1) 保留原意和核心信息,不改变事实
2) 增强结构化:标题、清单、FAQ、代码块(适用时)
3) 自然植入品牌2-4次(先通用标准,再品牌适用)
4) 提升权威感:评估维度、匿名案例、来源占位建议(不得编造)
5) 结论先行、信息密度高
6) 长度控制在原长度的1.0-1.3倍
7) 输出两部分:【优化后文章】 + 【变更说明】(列出主要改动点)
【输出格式要求】
请严格按照以下结构输出一次,不要在前后添加其他说明或重复输出:
【优化后文章】
(在此输出完整优化后的文章)
【变更说明】
(在此列出主要变更点,使用条目形式)
【E-E-A-T 强化要求】
- 专业性:增强专业术语使用,展示专业知识深度
- 经验性:添加实际使用经验表述(如"实际应用中""使用中发现"),至少1处经验性表述
- 权威性:添加来源占位(数据来源、案例来源、标准来源),至少2处来源占位
- 可信度:明确标注不确定信息,避免编造数据,使用占位建议
【开始优化】
"""
# 根据选择的优化技巧增强 Prompt
if opt_selected_technique_names:
opt_technique_manager = OptimizationTechniqueManager()
opt_technique_ids = opt_technique_manager.get_technique_ids_by_names(
[
name.split(" ", 1)[1] if " " in name else name
for name in opt_selected_technique_names
]
)
optimize_prompt_template = opt_technique_manager.enhance_prompt(
optimize_prompt_template, opt_technique_ids
)
# 对超长文章给出提醒,避免模型上下文溢出
if len(original_article) > 8000:
st.warning(
"当前文章长度较长(超过 8000 字符),可能导致大模型上下文溢出或响应失败。"
" 建议适当拆分文章后分别优化。"
)
optimize_prompt = PromptTemplate.from_template(optimize_prompt_template)
try:
with st.spinner("优化中..."):
chain = optimize_prompt | gen_llm | StrOutputParser()
# 准备输入文本用于成本估算
input_text = optimize_prompt.template.format(
original_article=original_article[
:500
], # 只取前500字符用于估算
brand=brand,
advantages=advantages,
platform=target_platform,
)
result = chain.invoke(
{
"original_article": original_article,
"brand": brand,
"advantages": advantages,
"platform": target_platform,
}
)
# 记录成本
if gen_llm:
try:
model_name = (
getattr(gen_llm, "model_name", None)
or getattr(gen_llm, "model", None)
or model_defaults(cfg["gen_provider"])
)
provider = cfg["gen_provider"]
record_api_cost(
operation_type="优化",
provider=provider,
model=model_name,
input_text=original_article[
:1000
], # 使用实际输入文本的前1000字符
output_text=result,
platform=target_platform,
brand=brand,
)
except Exception:
# 记录成本失败不影响主流程
pass
if "【优化后文章】" in result and "【变更说明】" in result:
optimized_article = (
result.split("【优化后文章】", 1)[1]
.split("【变更说明】", 1)[0]
.strip()
)
changes = result.split("【变更说明】", 1)[1].strip()
else:
optimized_article = result.strip()
changes = "无详细变更说明(模型未按模板输出)。"
st.session_state.optimized_article = optimized_article
st.session_state.opt_changes = changes
# 保存到数据库
try:
storage.save_optimization(
original_article,
optimized_article,
changes,
target_platform,
brand,
)
except Exception as e:
st.warning(f"优化完成,但保存到数据库时出错:{e}")
except Exception as e:
st.error(f"文章优化失败:{e}")
# === 优化结果 & 质量评估 ===
if st.session_state.optimized_article:
st.markdown("---")
st.markdown("#### 📝 优化结果")
# 结果 Tabs:优化后文章 / 变更说明
result_tab1, result_tab2 = st.tabs(["📝 优化后文章", "🧾 变更说明"])
with result_tab1:
markdown_platforms = ["GitHub", "微信公众号", "百家号", "网易号", "企鹅号", "简书"]
if any(p in st.session_state.opt_platform for p in markdown_platforms):
st.code(st.session_state.optimized_article, language="markdown")
else:
st.markdown(st.session_state.optimized_article)
# 确定文件扩展名
ext = (
"md"
if any(p in st.session_state.opt_platform for p in markdown_platforms)
else "txt"
)
st.download_button(
"下载优化版",
st.session_state.optimized_article,
f"{sanitize_filename(brand,40)}_优化文章.{ext}",
use_container_width=True,
key="opt_dl",
)
with result_tab2:
st.markdown("#### 变更说明")
st.markdown(st.session_state.opt_changes)
# 提供简单的版本回退能力
if (
st.session_state.get("optimized_article_backup")
and st.session_state.optimized_article_backup
!= st.session_state.optimized_article
):
if st.button("恢复至强化前版本", key="opt_restore_backup"):
st.session_state.optimized_article = (
st.session_state.optimized_article_backup
)
st.toast("已恢复至强化前版本。")
st.markdown(
"可选步骤:对优化后的文章进行质量评估与再强化(会调用额外模型)。"
)
# E-E-A-T 评估和强化区域
st.markdown("#### 🎯 E-E-A-T 强化 + 来源占位")
st.caption("评估和强化内容的专业性、经验性、权威性、可信度")
eeat_col1, eeat_col2 = st.columns(2)
with eeat_col1:
assess_eeat_btn = st.button(
"📊 评估 E-E-A-T",
use_container_width=True,
disabled=(not st.session_state.cfg_valid) or (gen_llm is None),
)
with eeat_col2:
enhance_eeat_btn = st.button(
"✨ 强化 E-E-A-T",
use_container_width=True,
disabled=(not st.session_state.cfg_valid) or (gen_llm is None),
)
st.caption("强化会覆盖当前优化结果,建议先下载备份。")
# 初始化 E-E-A-T 相关状态
ss_init("eeat_assessment", None)
ss_init("eeat_enhanced_content", "")
ss_init("eeat_source_placeholders", [])
ss_init("optimized_article_backup", "")
# E-E-A-T 评估
if assess_eeat_btn and gen_llm:
eeat_enhancer = EEATEnhancer()
with st.spinner("正在评估 E-E-A-T..."):
try:
score_chain = (
PromptTemplate.from_template("{input}")
| gen_llm
| StrOutputParser()
)
assessment = eeat_enhancer.assess_eeat(
st.session_state.optimized_article,
brand,
advantages,
st.session_state.opt_platform,
score_chain,
)
st.session_state.eeat_assessment = assessment
except Exception as e:
st.error(f"E-E-A-T 评估失败:{e}")
# E-E-A-T 强化(带备份与安全校验)
if enhance_eeat_btn and gen_llm:
eeat_enhancer = EEATEnhancer()
st.session_state.optimized_article_backup = (
st.session_state.optimized_article
)
with st.spinner("正在强化 E-E-A-T..."):
try:
enhance_chain = (
PromptTemplate.from_template("{input}")
| gen_llm
| StrOutputParser()
)
enhanced = eeat_enhancer.enhance_eeat(
st.session_state.optimized_article,
brand,
advantages,
st.session_state.opt_platform,
enhance_chain,
)
new_content = enhanced.get("enhanced_content", "") or ""
if not new_content.strip() or len(new_content.strip()) < 100:
st.error(
"E-E-A-T 强化失败:模型返回内容异常,已保留强化前版本。"
)
else:
st.session_state.eeat_enhanced_content = new_content
st.session_state.eeat_source_placeholders = enhanced.get(
"source_placeholders", []
)
st.session_state.optimized_article = new_content
st.success(
f"✅ E-E-A-T 强化完成!已添加 {len(st.session_state.eeat_source_placeholders)} 个来源占位"
)
except Exception as e:
st.error(f"E-E-A-T 强化失败:{e}")
# 显示 E-E-A-T 评估结果
if st.session_state.eeat_assessment:
assessment = st.session_state.eeat_assessment
scores = assessment.get("eeat_scores", {})
total_score = scores.get("total", 0)
eeat_enhancer = EEATEnhancer()
level, color = eeat_enhancer.get_eeat_level(total_score)
st.markdown("##### 📊 E-E-A-T 评估结果")
col1, col2, col3, col4, col5 = st.columns(5)
with col1:
st.metric("总分", f"{total_score}/100", delta=level, delta_color="off")
with col2:
st.metric("专业性", f"{scores.get('expertise', 0)}/25")
with col3:
st.metric("经验性", f"{scores.get('experience', 0)}/25")
with col4:
st.metric("权威性", f"{scores.get('authoritativeness', 0)}/25")
with col5:
st.metric("可信度", f"{scores.get('trustworthiness', 0)}/25")
# 详细评估和改进建议
with st.container(border=True):
st.markdown("##### 📝 详细评估与改进建议")
details = assessment.get("details", {})
improvements = assessment.get("improvements", [])
source_suggestions = assessment.get("source_suggestions", [])
st.markdown("**详细评估:**")
st.markdown(f"- **专业性**{details.get('expertise', '')}")
st.markdown(f"- **经验性**{details.get('experience', '')}")
st.markdown(f"- **权威性**{details.get('authoritativeness', '')}")
st.markdown(f"- **可信度**{details.get('trustworthiness', '')}")
if improvements:
st.markdown("**💡 改进建议:**")
for improvement in improvements:
st.markdown(f"- {improvement}")
if source_suggestions:
st.markdown("**📚 来源占位建议:**")
for suggestion in source_suggestions:
st.markdown(f"- {suggestion}")
# 来源占位检查
placeholders = assessment.get("source_placeholders", {})
if placeholders:
st.markdown("**✅ 已检测到的来源占位:**")
if placeholders.get("data_sources"):
st.markdown(
f"- 数据来源:{len(placeholders['data_sources'])}"
)
if placeholders.get("case_sources"):
st.markdown(
f"- 案例来源:{len(placeholders['case_sources'])}"
)
if placeholders.get("standard_sources"):
st.markdown(
f"- 标准来源:{len(placeholders['standard_sources'])}"
)
if placeholders.get("expert_opinions"):
st.markdown(
f"- 专家观点:{len(placeholders['expert_opinions'])}"
)
# 显示 E-E-A-T 强化后的来源占位清单
if st.session_state.eeat_source_placeholders:
with st.container(border=True):
st.markdown("##### 📚 来源占位清单")
for placeholder in st.session_state.eeat_source_placeholders:
st.markdown(f"- {placeholder}")
# 事实密度 + 结构化块评估和强化
st.markdown("---")
st.markdown("#### 📊 事实密度 + 结构化块")
st.caption("评估和强化内容的事实信息密度和结构化程度")
fact_col1, fact_col2 = st.columns(2)
with fact_col1:
assess_opt_fact = st.button(
"📊 评估事实密度",
use_container_width=True,
disabled=(not st.session_state.cfg_valid) or (gen_llm is None),
)
with fact_col2:
enhance_opt_fact = st.button(
"✨ 强化事实密度",
use_container_width=True,
disabled=(not st.session_state.cfg_valid) or (gen_llm is None),
)
st.caption("强化会覆盖当前优化结果,建议先下载备份。")
# 初始化事实密度状态
ss_init("opt_fact_assessment", None)
ss_init("opt_fact_enhanced", "")
ss_init("opt_fact_details", [])
# 事实密度评估
if assess_opt_fact and gen_llm:
fact_enhancer = FactDensityEnhancer()
with st.spinner("正在评估事实密度和结构化块..."):
try:
score_chain = (
PromptTemplate.from_template("{input}")
| gen_llm
| StrOutputParser()
)
assessment = fact_enhancer.assess_fact_density(
st.session_state.optimized_article,
brand,
advantages,
st.session_state.opt_platform,
score_chain,
)
st.session_state.opt_fact_assessment = assessment
except Exception as e:
st.error(f"事实密度评估失败:{e}")
# 事实密度强化(带备份与安全校验)
if enhance_opt_fact and gen_llm:
fact_enhancer = FactDensityEnhancer()
st.session_state.optimized_article_backup = (
st.session_state.optimized_article
)
with st.spinner("正在强化事实密度和结构化块..."):
try:
enhance_chain = (
PromptTemplate.from_template("{input}")
| gen_llm
| StrOutputParser()
)
enhanced = fact_enhancer.enhance_fact_density(
st.session_state.optimized_article,
brand,
advantages,
st.session_state.opt_platform,
enhance_chain,
)
new_content = enhanced.get("enhanced_content", "") or ""
if not new_content.strip() or len(new_content.strip()) < 100:
st.error(
"事实密度强化失败:模型返回内容异常,已保留强化前版本。"
)
else:
st.session_state.opt_fact_enhanced = new_content
st.session_state.opt_fact_details = enhanced.get(
"enhancement_details", []
)
st.session_state.optimized_article = new_content
st.success(
f"✅ 事实密度强化完成!已添加 {len(st.session_state.opt_fact_details)} 处事实信息和结构化块"
)
except Exception as e:
st.error(f"事实密度强化失败:{e}")
# 显示事实密度评估结果
if st.session_state.opt_fact_assessment:
assessment = st.session_state.opt_fact_assessment
scores = assessment.get("scores", {})
total_score = scores.get("total", 0)
fact_enhancer = FactDensityEnhancer()
level, color = fact_enhancer.get_score_level(total_score)
st.markdown("##### 📊 事实密度 + 结构化评估结果")
col1, col2, col3 = st.columns(3)
with col1:
st.metric("总分", f"{total_score}/100", delta=level, delta_color="off")
with col2:
st.metric("事实密度", f"{scores.get('fact_density', 0)}/50")
with col3:
st.metric("结构化", f"{scores.get('structure', 0)}/50")
# 使用 tabs 组织分析结果
fact_analysis = assessment.get("fact_analysis", {})
structure_analysis = assessment.get("structure_analysis", {})
has_details = bool(st.session_state.get("opt_fact_details"))
# 构建可用的 tabs
tab_labels = []
if fact_analysis:
tab_labels.append("📈 事实密度")
if structure_analysis:
tab_labels.append("🏗️ 结构化块")
if has_details:
tab_labels.append("📝 强化详情")
if tab_labels:
analysis_tabs = st.tabs(tab_labels)
tab_idx = 0
# 事实密度分析
if fact_analysis:
with analysis_tabs[tab_idx]:
with st.container(border=True):
col1, col2, col3, col4, col5, col6 = st.columns(6)
with col1:
st.metric("数据", fact_analysis.get("data_count", 0))
with col2:
st.metric("案例", fact_analysis.get("case_count", 0))
with col3:
st.metric("标准", fact_analysis.get("standard_count", 0))
with col4:
st.metric(
"对比", fact_analysis.get("comparison_count", 0)
)
with col5:
st.metric("时间", fact_analysis.get("time_count", 0))
with col6:
st.metric("来源", fact_analysis.get("source_count", 0))
missing_facts = fact_analysis.get("missing_facts", [])
if missing_facts:
st.markdown("**缺失的事实类型:**")
for fact in missing_facts:
st.markdown(f"- {fact}")
tab_idx += 1
# 结构化分析
if structure_analysis:
with analysis_tabs[tab_idx]:
with st.container(border=True):
col1, col2, col3, col4 = st.columns(4)
with col1:
st.markdown(
f"**标题层级**{'' if structure_analysis.get('has_title') else ''}"
)
st.markdown(
f"**结论摘要**{'' if structure_analysis.get('has_summary') else ''}"
)
with col2:
st.markdown(
f"**清单列表**{'' if structure_analysis.get('has_list') else ''}"
)
st.markdown(
f"**FAQ部分**{'' if structure_analysis.get('has_faq') else ''}"
)
with col3:
st.markdown(
f"**代码块**{'' if structure_analysis.get('has_code') else ''}"
)
st.markdown(
f"**对比表格**{'' if structure_analysis.get('has_table') else ''}"
)
with col4:
st.markdown(
f"**步骤说明**{'' if structure_analysis.get('has_steps') else ''}"
)
st.markdown(
f"**总结部分**{'' if structure_analysis.get('has_conclusion') else ''}"
)
missing_blocks = structure_analysis.get("missing_blocks", [])
if missing_blocks:
st.markdown("**缺失的结构化块:**")
for block in missing_blocks:
st.markdown(f"- {block}")
tab_idx += 1
# 强化详情
if has_details:
with analysis_tabs[tab_idx]:
with st.container(border=True):
for detail in st.session_state.opt_fact_details:
st.markdown(f"- {detail}")
# === 高级:结构化 Schema & 技术配置(折叠区) ===
with st.expander(
"高级:结构化 Schema & 技术 SEO 配置(可选)", expanded=False
):
# 结构化数据生成
st.markdown("**📋 结构化数据生成**")
st.caption(
"生成符合 Schema.org 规范的 JSON-LD 代码,提升品牌在 AI 模型中的实体识别和权威性"
)
with st.container(border=True):
schema_col1, schema_col2 = st.columns([2, 1])
with schema_col1:
schema_type = st.selectbox(
"Schema 类型",
[
"Organization(组织/公司)",
"SoftwareApplication(软件应用)",
"Product(产品)",
"Service(服务)",
"组合(Organization + SoftwareApplication",
],
index=1,
key="schema_type_sel",
help="选择适合您品牌的 Schema 类型",
)
with schema_col2:
generate_schema_btn = st.button(
"🚀 生成 JSON-LD",
use_container_width=True,
key="generate_schema_btn",
)
# 初始化 JSON-LD 相关状态
ss_init("generated_json_ld", None)
ss_init("generated_html_script", None)
# 生成 JSON-LD(带基础信息校验)
if generate_schema_btn:
if not brand or not advantages or len(brand.strip()) < 2:
st.warning(
"请先在基础信息中填写品牌名称和优势,再生成 Schema。"
)
else:
try:
schema_gen = SchemaGenerator()
if schema_type == "Organization(组织/公司)":
schema_dict = schema_gen.generate_organization_schema(
brand_name=brand,
description=advantages,
url="", # 用户可以在生成后手动添加
logo="",
founding_date="",
)
elif schema_type == "SoftwareApplication(软件应用)":
schema_dict = schema_gen.generate_software_application_schema(
brand_name=brand,
application_name=brand,
description=advantages,
url="",
application_category="BusinessApplication",
operating_system="Web",
)
elif schema_type == "Product(产品)":
schema_dict = schema_gen.generate_product_schema(
brand_name=brand,
product_name=brand,
description=advantages,
url="",
)
elif schema_type == "Service(服务)":
schema_dict = schema_gen.generate_service_schema(
brand_name=brand,
service_name=brand,
description=advantages,
url="",
)
else: # 组合
schema_dict = schema_gen.generate_combined_schema(
brand_name=brand,
advantages=advantages,
schema_types=[
"Organization",
"SoftwareApplication",
],
)
# 格式化输出
json_ld_code = schema_gen.format_json_ld(schema_dict)
html_script = schema_gen.generate_html_script_tag(
schema_dict
)
st.session_state.generated_json_ld = json_ld_code
st.session_state.generated_html_script = html_script
st.success("✅ JSON-LD Schema 生成成功!")
except Exception as e:
st.error(f"JSON-LD 生成失败:{e}")
# 显示生成的 JSON-LD
if st.session_state.generated_json_ld:
st.markdown("##### 📄 JSON-LD 代码")
st.code(st.session_state.generated_json_ld, language="json")
st.markdown("##### 📄 HTML Script 标签(可直接嵌入网页)")
st.code(st.session_state.generated_html_script, language="html")
# 下载按钮
col1, col2 = st.columns(2)
with col1:
st.download_button(
"下载 JSON-LD",
st.session_state.generated_json_ld,
f"{sanitize_filename(brand,40)}_schema.json",
mime="application/json",
use_container_width=True,
key="schema_dl_json",
)
with col2:
st.download_button(
"下载 HTML Script",
st.session_state.generated_html_script,
f"{sanitize_filename(brand,40)}_schema.html",
mime="text/html",
use_container_width=True,
key="schema_dl_html",
)
st.info(
"💡 **使用说明**:将 HTML Script 标签复制到您的官网 `<head>` 部分,或将 JSON-LD 代码添加到 GitHub README 中。"
)
# 技术配置生成
st.markdown("---")
st.markdown("**⚙️ 技术配置生成**")
st.caption("生成 robots.txt、sitemap.xml 等技术配置文件,提升内容收录效果(提升 20-30%")
with st.container(border=True):
config_tab1, config_tab2 = st.tabs(["🤖 robots.txt", "🗺️ sitemap.xml"])
# robots.txt 生成
with config_tab1:
st.markdown("##### 🤖 robots.txt 生成")
st.caption("控制搜索引擎爬虫的访问权限,提升内容收录效果")
robots_col1, robots_col2 = st.columns([2, 1])
with robots_col1:
robots_base_url = st.text_input(
"网站基础 URL",
value="",
key="robots_base_url",
placeholder="https://example.com",
help="您的网站基础 URL(如 https://example.com",
)
with robots_col2:
generate_robots_btn = st.button(
"🚀 生成 robots.txt",
use_container_width=True,
key="generate_robots_btn",
)
# 允许/禁止路径配置
robots_config_col1, robots_config_col2 = st.columns(2)
with robots_config_col1:
allow_paths_input = st.text_area(
"允许爬取的路径(每行一个)",
value="/\n/blog\n/docs",
key="robots_allow_paths",
help="每行一个路径,如 /、/blog、/docs",
height=100,
)
with robots_config_col2:
disallow_paths_input = st.text_area(
"禁止爬取的路径(每行一个)",
value="/admin\n/private\n/api",
key="robots_disallow_paths",
help="每行一个路径,如 /admin、/private、/api",
height=100,
)
# 初始化状态
ss_init("generated_robots_txt", None)
# 生成 robots.txt(带 URL 校验)
if generate_robots_btn:
if not robots_base_url.strip():
st.error("请填写网站基础 URL(如 https://example.com)。")
else:
if not robots_base_url.startswith("http"):
st.warning(
"建议使用完整 URL(含 http/https),避免 robots.txt 中出现无效链接。"
)
try:
config_gen = TechnicalConfigGenerator()
# 解析允许路径
allow_paths = (
[
p.strip()
for p in allow_paths_input.split("\n")
if p.strip()
]
if allow_paths_input
else None
)
# 解析禁止路径
disallow_paths = (
[
p.strip()
for p in disallow_paths_input.split("\n")
if p.strip()
]
if disallow_paths_input
else None
)
robots_txt = config_gen.generate_robots_txt(
base_url=robots_base_url,
allow_paths=allow_paths,
disallow_paths=disallow_paths,
sitemap_url="", # 自动生成
user_agent="*",
crawl_delay=None,
)
st.session_state.generated_robots_txt = robots_txt
st.success("✅ robots.txt 生成成功!")
except Exception as e:
st.error(f"robots.txt 生成失败:{e}")
# 显示生成的 robots.txt
if st.session_state.generated_robots_txt:
st.markdown("##### 📄 robots.txt 内容")
st.code(st.session_state.generated_robots_txt, language="text")
st.download_button(
"下载 robots.txt",
st.session_state.generated_robots_txt,
"robots.txt",
mime="text/plain",
use_container_width=True,
key="robots_dl",
)
st.info(
"💡 **使用说明**:将 robots.txt 文件上传到您网站的根目录(如 https://example.com/robots.txt"
)
# sitemap.xml 生成
with config_tab2:
st.markdown("##### 🗺️ sitemap.xml 生成")
st.caption("帮助搜索引擎发现和索引您的所有页面,提升内容收录效果")
sitemap_col1, sitemap_col2 = st.columns([2, 1])
with sitemap_col1:
sitemap_base_url = st.text_input(
"网站基础 URL",
value="",
key="sitemap_base_url",
placeholder="https://example.com",
help="您的网站基础 URL(如 https://example.com",
)
with sitemap_col2:
generate_sitemap_btn = st.button(
"🚀 生成 sitemap.xml",
use_container_width=True,
key="generate_sitemap_btn",
)
# 选择数据源
sitemap_source = st.radio(
"数据源",
["基于关键词生成", "基于历史文章生成"],
key="sitemap_source",
horizontal=True,
)
# 初始化状态
ss_init("generated_sitemap_xml", None)
# 生成 sitemap.xml(带 URL 校验)
if generate_sitemap_btn:
if not sitemap_base_url.strip():
st.error("请填写网站基础 URL(如 https://example.com)。")
else:
if not sitemap_base_url.startswith("http"):
st.warning(
"建议使用完整 URL(含 http/https),避免 sitemap.xml 中出现无效链接。"
)
try:
config_gen = TechnicalConfigGenerator()
if sitemap_source == "基于关键词生成":
# 基于关键词生成
keywords_for_sitemap = (
st.session_state.keywords
if st.session_state.keywords
else []
)
if not keywords_for_sitemap:
st.warning(
"⚠️ 请先在【1 关键词蒸馏】生成关键词,或选择【基于历史文章生成】"
)
else:
sitemap_xml = (
config_gen.generate_sitemap_xml(
base_url=sitemap_base_url,
keywords=keywords_for_sitemap,
lastmod=None, # 使用当前日期
changefreq="weekly",
priority=0.8,
)
)
st.session_state.generated_sitemap_xml = (
sitemap_xml
)
st.success(
f"✅ sitemap.xml 生成成功!包含 {len(keywords_for_sitemap)} 个 URL"
)
else:
# 基于历史文章生成
try:
articles = storage.get_articles(brand=brand)
if not articles:
st.warning(
"⚠️ 暂无历史文章,请先生成内容,或选择【基于关键词生成】"
)
else:
sitemap_xml = (
config_gen.generate_sitemap_from_articles(
base_url=sitemap_base_url,
articles=articles,
lastmod=None,
changefreq="weekly",
priority=0.8,
)
)
st.session_state.generated_sitemap_xml = (
sitemap_xml
)
st.success(
f"✅ sitemap.xml 生成成功!包含 {len(articles)} 个 URL"
)
except Exception as e:
st.error(f"获取历史文章失败:{e}")
except Exception as e:
st.error(f"sitemap.xml 生成失败:{e}")
# 显示生成的 sitemap.xml
if st.session_state.generated_sitemap_xml:
st.markdown("##### 📄 sitemap.xml 内容")
st.code(st.session_state.generated_sitemap_xml, language="xml")
st.download_button(
"下载 sitemap.xml",
st.session_state.generated_sitemap_xml,
"sitemap.xml",
mime="application/xml",
use_container_width=True,
key="sitemap_dl",
)
st.info(
"💡 **使用说明**:将 sitemap.xml 文件上传到您网站的根目录(如 https://example.com/sitemap.xml),并在 Google Search Console 中提交"
)