ChouJuGEO/modules/ui/tab_optimize.py

# Tab3：文章优化（从 geo_tool.py 迁移，通过 render_tab_optimize() 供主入口调用。）

import re

import streamlit as st
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate

from modules.eeat_enhancer import EEATEnhancer
from modules.fact_density_enhancer import FactDensityEnhancer
from modules.optimization_techniques import OptimizationTechniqueManager
from modules.schema_generator import SchemaGenerator
from modules.technical_config_generator import TechnicalConfigGenerator
from modules.ui.components import sanitize_filename, safe_decode_uploaded, render_tab_top_with_clear


def render_tab_optimize(
    storage,
    ss_init,
    gen_llm,
    brand: str,
    advantages: str,
    cfg: dict,
    record_api_cost,
    model_defaults,
) -> None:
    """渲染 Tab3：文章优化。由主入口在 with tab3 内调用。"""
    # 标题和清空按钮
    def _clear_optimize_state():
        st.session_state.optimized_article = ""
        st.session_state.opt_changes = ""

    render_tab_top_with_clear(
        title="🔧 文章优化",
        caption="优化已有文章，生成结构化数据和技术配置，提升 GEO 效果",
        clear_key="opt_clear",
        on_clear=_clear_optimize_state,
    )

    # === 文章优化功能（主流程） ===
    st.markdown("**✏️ 文章内容优化**")

    with st.container(border=True):
        st.markdown("粘贴或上传已写文章，一键提升 GEO 效果（结构化、可引用、自然植入品牌）")

        # 输入方式与文章内容放在表单外，以便粘贴/上传后能触发重跑，从而正确更新「开始优化」按钮的可用状态
        input_mode = st.radio(
            "输入方式",
            ["粘贴文本", "上传文件（TXT/MD）"],
            horizontal=True,
            key="opt_input_mode",
        )
        if input_mode == "粘贴文本":
            original_article = st.text_area(
                "粘贴文章内容", height=360, key="opt_text"
            )
        else:
            uploaded = st.file_uploader(
                "上传 TXT 或 MD 文件",
                type=["txt", "md"],
                key="opt_uploader",
            )
            original_article = ""
            if uploaded:
                try:
                    original_article = safe_decode_uploaded(uploaded) or ""
                except Exception as e:
                    st.error(f"上传文件解析失败：{e}")
                    original_article = ""
                if original_article:
                    st.text_area(
                        "上传内容预览",
                        original_article,
                        height=200,
                        disabled=True,
                        key="opt_upload_preview",
                    )

        with st.form("opt_form", clear_on_submit=False):
            target_platform = st.selectbox(
                "目标平台（影响文风，可选）",
                [
                    "通用优化",
                    "知乎（专业问答）",
                    "CSDN（技术博客）",
                    "GitHub（README/文档）",
                    "B站（视频脚本）",
                    "头条号（资讯软文）",
                    "微信公众号（长文）",
                    "抖音图文（短内容）",
                    "百家号（资讯）",
                    "网易号（资讯）",
                    "企鹅号（资讯）",
                    "简书（文艺）",
                ],
                index=[
                    "通用优化",
                    "知乎（专业问答）",
                    "CSDN（技术博客）",
                    "GitHub（README/文档）",
                    "B站（视频脚本）",
                    "头条号（资讯软文）",
                    "微信公众号（长文）",
                    "抖音图文（短内容）",
                    "百家号（资讯）",
                    "网易号（资讯）",
                    "企鹅号（资讯）",
                    "简书（文艺）",
                ].index(
                    st.session_state.opt_platform
                    if st.session_state.opt_platform
                    in [
                        "通用优化",
                        "知乎（专业问答）",
                        "CSDN（技术博客）",
                        "GitHub（README/文档）",
                        "B站（视频脚本）",
                        "头条号（资讯软文）",
                        "微信公众号（长文）",
                        "抖音图文（短内容）",
                        "百家号（资讯）",
                        "网易号（资讯）",
                        "企鹅号（资讯）",
                        "简书（文艺）",
                    ]
                    else 0
                ),
                key="opt_platform_sel",
            )

            # 高级优化技巧选择器（可选）
            with st.expander("🎨 高级优化技巧（可选）", expanded=False):
                opt_technique_manager = OptimizationTechniqueManager()
                opt_all_techniques = opt_technique_manager.list_techniques()
                opt_technique_options = [
                    f"{tech['icon']} {tech['name']}" for tech in opt_all_techniques
                ]

                opt_selected_technique_names = st.multiselect(
                    "选择要应用的优化技巧（可多选）",
                    options=opt_technique_options,
                    default=[],
                    key="opt_techniques",
                    help="可选，提高 GEO 效果。技巧会动态调整文章优化策略。",
                )

                # 显示选择的技巧描述
                if opt_selected_technique_names:
                    st.caption("已选择：" + "、".join(opt_selected_technique_names))
                    with st.expander("查看技巧说明", expanded=False):
                        for tech_name in opt_selected_technique_names:
                            tech_icon_name = (
                                tech_name.split(" ", 1)[1]
                                if " " in tech_name
                                else tech_name
                            )
                            for tech in opt_all_techniques:
                                if tech["name"] == tech_icon_name:
                                    st.markdown(f"**{tech['icon']} {tech['name']}**")
                                    st.caption(tech["description"])
                                    break

            run_opt_disabled = (
                (not st.session_state.cfg_valid)
                or (gen_llm is None)
                or (not original_article.strip())
            )
            run_opt = st.form_submit_button(
                "开始优化", use_container_width=True, disabled=run_opt_disabled
            )

            if run_opt_disabled:
                if not original_article.strip():
                    st.caption("请先在上方粘贴文章内容，或上传 TXT/MD 文件。")
                elif not st.session_state.cfg_valid or gen_llm is None:
                    st.caption("当前未检测到可用的生成模型，请先在【全局设置】中完成模型/API 配置。")

        if run_opt:
            st.session_state.opt_platform = target_platform
            opt_selected_technique_names = st.session_state.get("opt_techniques", [])

            optimize_prompt_template = """
你是GEO优化专家，目标是提升文章在大模型中的引用率和品牌自然提及。

【原文章】
{original_article}

【品牌】{brand}
【优势】{advantages}
【目标平台】{platform}

【优化要求（严格GEO原则）】
1) 保留原意和核心信息，不改变事实
2) 增强结构化：标题、清单、FAQ、代码块（适用时）
3) 自然植入品牌2-4次（先通用标准，再品牌适用）
4) 提升权威感：评估维度、匿名案例、来源占位建议（不得编造）
5) 结论先行、信息密度高
6) 长度控制在原长度的1.0-1.3倍
7) 输出两部分：【优化后文章】 + 【变更说明】（列出主要改动点）

【输出格式要求】
请严格按照以下结构输出一次，不要在前后添加其他说明或重复输出：
【优化后文章】
（在此输出完整优化后的文章）
【变更说明】
（在此列出主要变更点，使用条目形式）

【E-E-A-T 强化要求】
- 专业性：增强专业术语使用，展示专业知识深度
- 经验性：添加实际使用经验表述（如"实际应用中"、"使用中发现"），至少1处经验性表述
- 权威性：添加来源占位（数据来源、案例来源、标准来源），至少2处来源占位
- 可信度：明确标注不确定信息，避免编造数据，使用占位建议

【开始优化】
"""

            # 根据选择的优化技巧增强 Prompt
            if opt_selected_technique_names:
                opt_technique_manager = OptimizationTechniqueManager()
                opt_technique_ids = opt_technique_manager.get_technique_ids_by_names(
                    [
                        name.split(" ", 1)[1] if " " in name else name
                        for name in opt_selected_technique_names
                    ]
                )
                optimize_prompt_template = opt_technique_manager.enhance_prompt(
                    optimize_prompt_template, opt_technique_ids
                )

            # 对超长文章给出提醒，避免模型上下文溢出
            if len(original_article) > 8000:
                st.warning(
                    "当前文章长度较长（超过 8000 字符），可能导致大模型上下文溢出或响应失败。"
                    " 建议适当拆分文章后分别优化。"
                )

            optimize_prompt = PromptTemplate.from_template(optimize_prompt_template)

            try:
                with st.spinner("优化中..."):
                    chain = optimize_prompt | gen_llm | StrOutputParser()

                    # 准备输入文本用于成本估算
                    input_text = optimize_prompt.template.format(
                        original_article=original_article[
                            :500
                        ],  # 只取前500字符用于估算
                        brand=brand,
                        advantages=advantages,
                        platform=target_platform,
                    )
                    result = chain.invoke(
                        {
                            "original_article": original_article,
                            "brand": brand,
                            "advantages": advantages,
                            "platform": target_platform,
                        }
                    )

                    # 记录成本
                    if gen_llm:
                        try:
                            model_name = (
                                getattr(gen_llm, "model_name", None)
                                or getattr(gen_llm, "model", None)
                                or model_defaults(cfg["gen_provider"])
                            )
                            provider = cfg["gen_provider"]
                            record_api_cost(
                                operation_type="优化",
                                provider=provider,
                                model=model_name,
                                input_text=original_article[
                                    :1000
                                ],  # 使用实际输入文本的前1000字符
                                output_text=result,
                                platform=target_platform,
                                brand=brand,
                            )
                        except Exception:
                            # 记录成本失败不影响主流程
                            pass

                if "【优化后文章】" in result and "【变更说明】" in result:
                    optimized_article = (
                        result.split("【优化后文章】", 1)[1]
                        .split("【变更说明】", 1)[0]
                        .strip()
                    )
                    changes = result.split("【变更说明】", 1)[1].strip()
                else:
                    optimized_article = result.strip()
                    changes = "无详细变更说明（模型未按模板输出）。"

                st.session_state.optimized_article = optimized_article
                st.session_state.opt_changes = changes
                # 保存到数据库
                try:
                    storage.save_optimization(
                        original_article,
                        optimized_article,
                        changes,
                        target_platform,
                        brand,
                    )
                except Exception as e:
                    st.warning(f"优化完成，但保存到数据库时出错：{e}")
            except Exception as e:
                st.error(f"文章优化失败：{e}")

    # === 优化结果 & 质量评估 ===
    if st.session_state.optimized_article:
        st.markdown("---")
        st.markdown("#### 📝 优化结果")

        # 结果 Tabs：优化后文章 / 变更说明
        result_tab1, result_tab2 = st.tabs(["📝 优化后文章", "🧾 变更说明"])
        with result_tab1:
            markdown_platforms = ["GitHub", "微信公众号", "百家号", "网易号", "企鹅号", "简书"]
            if any(p in st.session_state.opt_platform for p in markdown_platforms):
                st.code(st.session_state.optimized_article, language="markdown")
            else:
                st.markdown(st.session_state.optimized_article)

            # 确定文件扩展名
            ext = (
                "md"
                if any(p in st.session_state.opt_platform for p in markdown_platforms)
                else "txt"
            )
            st.download_button(
                "下载优化版",
                st.session_state.optimized_article,
                f"{sanitize_filename(brand,40)}_优化文章.{ext}",
                use_container_width=True,
                key="opt_dl",
            )

        with result_tab2:
            st.markdown("#### 变更说明")
            st.markdown(st.session_state.opt_changes)

        # 提供简单的版本回退能力
        if (
            st.session_state.get("optimized_article_backup")
            and st.session_state.optimized_article_backup
            != st.session_state.optimized_article
        ):
            if st.button("恢复至强化前版本", key="opt_restore_backup"):
                st.session_state.optimized_article = (
                    st.session_state.optimized_article_backup
                )
                st.toast("已恢复至强化前版本。")

        st.markdown(
            "可选步骤：对优化后的文章进行质量评估与再强化（会调用额外模型）。"
        )

        # E-E-A-T 评估和强化区域
        st.markdown("#### 🎯 E-E-A-T 强化 + 来源占位")
        st.caption("评估和强化内容的专业性、经验性、权威性、可信度")

        eeat_col1, eeat_col2 = st.columns(2)

        with eeat_col1:
            assess_eeat_btn = st.button(
                "📊 评估 E-E-A-T",
                use_container_width=True,
                disabled=(not st.session_state.cfg_valid) or (gen_llm is None),
            )

        with eeat_col2:
            enhance_eeat_btn = st.button(
                "✨ 强化 E-E-A-T",
                use_container_width=True,
                disabled=(not st.session_state.cfg_valid) or (gen_llm is None),
            )
            st.caption("强化会覆盖当前优化结果，建议先下载备份。")

        # 初始化 E-E-A-T 相关状态
        ss_init("eeat_assessment", None)
        ss_init("eeat_enhanced_content", "")
        ss_init("eeat_source_placeholders", [])
        ss_init("optimized_article_backup", "")

        # E-E-A-T 评估
        if assess_eeat_btn and gen_llm:
            eeat_enhancer = EEATEnhancer()
            with st.spinner("正在评估 E-E-A-T..."):
                try:
                    score_chain = (
                        PromptTemplate.from_template("{input}")
                        | gen_llm
                        | StrOutputParser()
                    )
                    assessment = eeat_enhancer.assess_eeat(
                        st.session_state.optimized_article,
                        brand,
                        advantages,
                        st.session_state.opt_platform,
                        score_chain,
                    )
                    st.session_state.eeat_assessment = assessment
                except Exception as e:
                    st.error(f"E-E-A-T 评估失败：{e}")

        # E-E-A-T 强化（带备份与安全校验）
        if enhance_eeat_btn and gen_llm:
            eeat_enhancer = EEATEnhancer()
            st.session_state.optimized_article_backup = (
                st.session_state.optimized_article
            )
            with st.spinner("正在强化 E-E-A-T..."):
                try:
                    enhance_chain = (
                        PromptTemplate.from_template("{input}")
                        | gen_llm
                        | StrOutputParser()
                    )
                    enhanced = eeat_enhancer.enhance_eeat(
                        st.session_state.optimized_article,
                        brand,
                        advantages,
                        st.session_state.opt_platform,
                        enhance_chain,
                    )
                    new_content = enhanced.get("enhanced_content", "") or ""
                    if not new_content.strip() or len(new_content.strip()) < 100:
                        st.error(
                            "E-E-A-T 强化失败：模型返回内容异常，已保留强化前版本。"
                        )
                    else:
                        st.session_state.eeat_enhanced_content = new_content
                        st.session_state.eeat_source_placeholders = enhanced.get(
                            "source_placeholders", []
                        )
                        st.session_state.optimized_article = new_content
                        st.success(
                            f"✅ E-E-A-T 强化完成！已添加 {len(st.session_state.eeat_source_placeholders)} 个来源占位"
                        )
                except Exception as e:
                    st.error(f"E-E-A-T 强化失败：{e}")

        # 显示 E-E-A-T 评估结果
        if st.session_state.eeat_assessment:
            assessment = st.session_state.eeat_assessment
            scores = assessment.get("eeat_scores", {})
            total_score = scores.get("total", 0)
            eeat_enhancer = EEATEnhancer()
            level, color = eeat_enhancer.get_eeat_level(total_score)

            st.markdown("##### 📊 E-E-A-T 评估结果")
            col1, col2, col3, col4, col5 = st.columns(5)
            with col1:
                st.metric("总分", f"{total_score}/100", delta=level, delta_color="off")
            with col2:
                st.metric("专业性", f"{scores.get('expertise', 0)}/25")
            with col3:
                st.metric("经验性", f"{scores.get('experience', 0)}/25")
            with col4:
                st.metric("权威性", f"{scores.get('authoritativeness', 0)}/25")
            with col5:
                st.metric("可信度", f"{scores.get('trustworthiness', 0)}/25")

            # 详细评估和改进建议
            with st.container(border=True):
                st.markdown("##### 📝 详细评估与改进建议")
                details = assessment.get("details", {})
                improvements = assessment.get("improvements", [])
                source_suggestions = assessment.get("source_suggestions", [])

                st.markdown("**详细评估：**")
                st.markdown(f"- **专业性**：{details.get('expertise', '无')}")
                st.markdown(f"- **经验性**：{details.get('experience', '无')}")
                st.markdown(f"- **权威性**：{details.get('authoritativeness', '无')}")
                st.markdown(f"- **可信度**：{details.get('trustworthiness', '无')}")

                if improvements:
                    st.markdown("**💡 改进建议：**")
                    for improvement in improvements:
                        st.markdown(f"- {improvement}")

                if source_suggestions:
                    st.markdown("**📚 来源占位建议：**")
                    for suggestion in source_suggestions:
                        st.markdown(f"- {suggestion}")

                # 来源占位检查
                placeholders = assessment.get("source_placeholders", {})
                if placeholders:
                    st.markdown("**✅ 已检测到的来源占位：**")
                    if placeholders.get("data_sources"):
                        st.markdown(
                            f"- 数据来源：{len(placeholders['data_sources'])} 处"
                        )
                    if placeholders.get("case_sources"):
                        st.markdown(
                            f"- 案例来源：{len(placeholders['case_sources'])} 处"
                        )
                    if placeholders.get("standard_sources"):
                        st.markdown(
                            f"- 标准来源：{len(placeholders['standard_sources'])} 处"
                        )
                    if placeholders.get("expert_opinions"):
                        st.markdown(
                            f"- 专家观点：{len(placeholders['expert_opinions'])} 处"
                        )

        # 显示 E-E-A-T 强化后的来源占位清单
        if st.session_state.eeat_source_placeholders:
            with st.container(border=True):
                st.markdown("##### 📚 来源占位清单")
                for placeholder in st.session_state.eeat_source_placeholders:
                    st.markdown(f"- {placeholder}")

        # 事实密度 + 结构化块评估和强化
        st.markdown("---")
        st.markdown("#### 📊 事实密度 + 结构化块")
        st.caption("评估和强化内容的事实信息密度和结构化程度")

        fact_col1, fact_col2 = st.columns(2)

        with fact_col1:
            assess_opt_fact = st.button(
                "📊 评估事实密度",
                use_container_width=True,
                disabled=(not st.session_state.cfg_valid) or (gen_llm is None),
            )

        with fact_col2:
            enhance_opt_fact = st.button(
                "✨ 强化事实密度",
                use_container_width=True,
                disabled=(not st.session_state.cfg_valid) or (gen_llm is None),
            )
            st.caption("强化会覆盖当前优化结果，建议先下载备份。")

        # 初始化事实密度状态
        ss_init("opt_fact_assessment", None)
        ss_init("opt_fact_enhanced", "")
        ss_init("opt_fact_details", [])

        # 事实密度评估
        if assess_opt_fact and gen_llm:
            fact_enhancer = FactDensityEnhancer()
            with st.spinner("正在评估事实密度和结构化块..."):
                try:
                    score_chain = (
                        PromptTemplate.from_template("{input}")
                        | gen_llm
                        | StrOutputParser()
                    )
                    assessment = fact_enhancer.assess_fact_density(
                        st.session_state.optimized_article,
                        brand,
                        advantages,
                        st.session_state.opt_platform,
                        score_chain,
                    )
                    st.session_state.opt_fact_assessment = assessment
                except Exception as e:
                    st.error(f"事实密度评估失败：{e}")

        # 事实密度强化（带备份与安全校验）
        if enhance_opt_fact and gen_llm:
            fact_enhancer = FactDensityEnhancer()
            st.session_state.optimized_article_backup = (
                st.session_state.optimized_article
            )
            with st.spinner("正在强化事实密度和结构化块..."):
                try:
                    enhance_chain = (
                        PromptTemplate.from_template("{input}")
                        | gen_llm
                        | StrOutputParser()
                    )
                    enhanced = fact_enhancer.enhance_fact_density(
                        st.session_state.optimized_article,
                        brand,
                        advantages,
                        st.session_state.opt_platform,
                        enhance_chain,
                    )
                    new_content = enhanced.get("enhanced_content", "") or ""
                    if not new_content.strip() or len(new_content.strip()) < 100:
                        st.error(
                            "事实密度强化失败：模型返回内容异常，已保留强化前版本。"
                        )
                    else:
                        st.session_state.opt_fact_enhanced = new_content
                        st.session_state.opt_fact_details = enhanced.get(
                            "enhancement_details", []
                        )
                        st.session_state.optimized_article = new_content
                        st.success(
                            f"✅ 事实密度强化完成！已添加 {len(st.session_state.opt_fact_details)} 处事实信息和结构化块"
                        )
                except Exception as e:
                    st.error(f"事实密度强化失败：{e}")

        # 显示事实密度评估结果
        if st.session_state.opt_fact_assessment:
            assessment = st.session_state.opt_fact_assessment
            scores = assessment.get("scores", {})
            total_score = scores.get("total", 0)
            fact_enhancer = FactDensityEnhancer()
            level, color = fact_enhancer.get_score_level(total_score)

            st.markdown("##### 📊 事实密度 + 结构化评估结果")
            col1, col2, col3 = st.columns(3)
            with col1:
                st.metric("总分", f"{total_score}/100", delta=level, delta_color="off")
            with col2:
                st.metric("事实密度", f"{scores.get('fact_density', 0)}/50")
            with col3:
                st.metric("结构化", f"{scores.get('structure', 0)}/50")

            # 使用 tabs 组织分析结果
            fact_analysis = assessment.get("fact_analysis", {})
            structure_analysis = assessment.get("structure_analysis", {})
            has_details = bool(st.session_state.get("opt_fact_details"))

            # 构建可用的 tabs
            tab_labels = []
            if fact_analysis:
                tab_labels.append("📈 事实密度")
            if structure_analysis:
                tab_labels.append("🏗️ 结构化块")
            if has_details:
                tab_labels.append("📝 强化详情")

            if tab_labels:
                analysis_tabs = st.tabs(tab_labels)
                tab_idx = 0

                # 事实密度分析
                if fact_analysis:
                    with analysis_tabs[tab_idx]:
                        with st.container(border=True):
                            col1, col2, col3, col4, col5, col6 = st.columns(6)
                            with col1:
                                st.metric("数据", fact_analysis.get("data_count", 0))
                            with col2:
                                st.metric("案例", fact_analysis.get("case_count", 0))
                            with col3:
                                st.metric("标准", fact_analysis.get("standard_count", 0))
                            with col4:
                                st.metric(
                                    "对比", fact_analysis.get("comparison_count", 0)
                                )
                            with col5:
                                st.metric("时间", fact_analysis.get("time_count", 0))
                            with col6:
                                st.metric("来源", fact_analysis.get("source_count", 0))

                            missing_facts = fact_analysis.get("missing_facts", [])
                            if missing_facts:
                                st.markdown("**缺失的事实类型：**")
                                for fact in missing_facts:
                                    st.markdown(f"- {fact}")
                    tab_idx += 1

                # 结构化分析
                if structure_analysis:
                    with analysis_tabs[tab_idx]:
                        with st.container(border=True):
                            col1, col2, col3, col4 = st.columns(4)
                            with col1:
                                st.markdown(
                                    f"**标题层级**：{'✅' if structure_analysis.get('has_title') else '❌'}"
                                )
                                st.markdown(
                                    f"**结论摘要**：{'✅' if structure_analysis.get('has_summary') else '❌'}"
                                )
                            with col2:
                                st.markdown(
                                    f"**清单列表**：{'✅' if structure_analysis.get('has_list') else '❌'}"
                                )
                                st.markdown(
                                    f"**FAQ部分**：{'✅' if structure_analysis.get('has_faq') else '❌'}"
                                )
                            with col3:
                                st.markdown(
                                    f"**代码块**：{'✅' if structure_analysis.get('has_code') else '❌'}"
                                )
                                st.markdown(
                                    f"**对比表格**：{'✅' if structure_analysis.get('has_table') else '❌'}"
                                )
                            with col4:
                                st.markdown(
                                    f"**步骤说明**：{'✅' if structure_analysis.get('has_steps') else '❌'}"
                                )
                                st.markdown(
                                    f"**总结部分**：{'✅' if structure_analysis.get('has_conclusion') else '❌'}"
                                )

                            missing_blocks = structure_analysis.get("missing_blocks", [])
                            if missing_blocks:
                                st.markdown("**缺失的结构化块：**")
                                for block in missing_blocks:
                                    st.markdown(f"- {block}")
                    tab_idx += 1

                # 强化详情
                if has_details:
                    with analysis_tabs[tab_idx]:
                        with st.container(border=True):
                            for detail in st.session_state.opt_fact_details:
                                st.markdown(f"- {detail}")

    # === 高级：结构化 Schema & 技术配置（折叠区） ===
    with st.expander(
        "高级：结构化 Schema & 技术 SEO 配置（可选）", expanded=False
    ):
        # 结构化数据生成
        st.markdown("**📋 结构化数据生成**")
        st.caption(
            "生成符合 Schema.org 规范的 JSON-LD 代码，提升品牌在 AI 模型中的实体识别和权威性"
        )

        with st.container(border=True):
            schema_col1, schema_col2 = st.columns([2, 1])

            with schema_col1:
                schema_type = st.selectbox(
                    "Schema 类型",
                    [
                        "Organization（组织/公司）",
                        "SoftwareApplication（软件应用）",
                        "Product（产品）",
                        "Service（服务）",
                        "组合（Organization + SoftwareApplication）",
                    ],
                    index=1,
                    key="schema_type_sel",
                    help="选择适合您品牌的 Schema 类型",
                )

            with schema_col2:
                generate_schema_btn = st.button(
                    "🚀 生成 JSON-LD",
                    use_container_width=True,
                    key="generate_schema_btn",
                )

            # 初始化 JSON-LD 相关状态
            ss_init("generated_json_ld", None)
            ss_init("generated_html_script", None)

            # 生成 JSON-LD（带基础信息校验）
            if generate_schema_btn:
                if not brand or not advantages or len(brand.strip()) < 2:
                    st.warning(
                        "请先在基础信息中填写品牌名称和优势，再生成 Schema。"
                    )
                else:
                    try:
                        schema_gen = SchemaGenerator()

                        if schema_type == "Organization（组织/公司）":
                            schema_dict = schema_gen.generate_organization_schema(
                                brand_name=brand,
                                description=advantages,
                                url="",  # 用户可以在生成后手动添加
                                logo="",
                                founding_date="",
                            )
                        elif schema_type == "SoftwareApplication（软件应用）":
                            schema_dict = schema_gen.generate_software_application_schema(
                                brand_name=brand,
                                application_name=brand,
                                description=advantages,
                                url="",
                                application_category="BusinessApplication",
                                operating_system="Web",
                            )
                        elif schema_type == "Product（产品）":
                            schema_dict = schema_gen.generate_product_schema(
                                brand_name=brand,
                                product_name=brand,
                                description=advantages,
                                url="",
                            )
                        elif schema_type == "Service（服务）":
                            schema_dict = schema_gen.generate_service_schema(
                                brand_name=brand,
                                service_name=brand,
                                description=advantages,
                                url="",
                            )
                        else:  # 组合
                            schema_dict = schema_gen.generate_combined_schema(
                                brand_name=brand,
                                advantages=advantages,
                                schema_types=[
                                    "Organization",
                                    "SoftwareApplication",
                                ],
                            )

                        # 格式化输出
                        json_ld_code = schema_gen.format_json_ld(schema_dict)
                        html_script = schema_gen.generate_html_script_tag(
                            schema_dict
                        )

                        st.session_state.generated_json_ld = json_ld_code
                        st.session_state.generated_html_script = html_script

                        st.success("✅ JSON-LD Schema 生成成功！")
                    except Exception as e:
                        st.error(f"JSON-LD 生成失败：{e}")

            # 显示生成的 JSON-LD
            if st.session_state.generated_json_ld:
                st.markdown("##### 📄 JSON-LD 代码")
                st.code(st.session_state.generated_json_ld, language="json")

                st.markdown("##### 📄 HTML Script 标签（可直接嵌入网页）")
                st.code(st.session_state.generated_html_script, language="html")

                # 下载按钮
                col1, col2 = st.columns(2)
                with col1:
                    st.download_button(
                        "下载 JSON-LD",
                        st.session_state.generated_json_ld,
                        f"{sanitize_filename(brand,40)}_schema.json",
                        mime="application/json",
                        use_container_width=True,
                        key="schema_dl_json",
                    )
                with col2:
                    st.download_button(
                        "下载 HTML Script",
                        st.session_state.generated_html_script,
                        f"{sanitize_filename(brand,40)}_schema.html",
                        mime="text/html",
                        use_container_width=True,
                        key="schema_dl_html",
                    )

                st.info(
                    "💡 **使用说明**：将 HTML Script 标签复制到您的官网 `<head>` 部分，或将 JSON-LD 代码添加到 GitHub README 中。"
                )

        # 技术配置生成
        st.markdown("---")
        st.markdown("**⚙️ 技术配置生成**")
        st.caption("生成 robots.txt、sitemap.xml 等技术配置文件，提升内容收录效果（提升 20-30%）")

        with st.container(border=True):
            config_tab1, config_tab2 = st.tabs(["🤖 robots.txt", "🗺️ sitemap.xml"])

            # robots.txt 生成
            with config_tab1:
                st.markdown("##### 🤖 robots.txt 生成")
                st.caption("控制搜索引擎爬虫的访问权限，提升内容收录效果")

                robots_col1, robots_col2 = st.columns([2, 1])

                with robots_col1:
                    robots_base_url = st.text_input(
                        "网站基础 URL",
                        value="",
                        key="robots_base_url",
                        placeholder="https://example.com",
                        help="您的网站基础 URL（如 https://example.com）",
                    )

                with robots_col2:
                    generate_robots_btn = st.button(
                        "🚀 生成 robots.txt",
                        use_container_width=True,
                        key="generate_robots_btn",
                    )

                # 允许/禁止路径配置
                robots_config_col1, robots_config_col2 = st.columns(2)

                with robots_config_col1:
                    allow_paths_input = st.text_area(
                        "允许爬取的路径（每行一个）",
                        value="/\n/blog\n/docs",
                        key="robots_allow_paths",
                        help="每行一个路径，如 /、/blog、/docs",
                        height=100,
                    )

                with robots_config_col2:
                    disallow_paths_input = st.text_area(
                        "禁止爬取的路径（每行一个）",
                        value="/admin\n/private\n/api",
                        key="robots_disallow_paths",
                        help="每行一个路径，如 /admin、/private、/api",
                        height=100,
                    )

                # 初始化状态
                ss_init("generated_robots_txt", None)

                # 生成 robots.txt（带 URL 校验）
                if generate_robots_btn:
                    if not robots_base_url.strip():
                        st.error("请填写网站基础 URL（如 https://example.com）。")
                    else:
                        if not robots_base_url.startswith("http"):
                            st.warning(
                                "建议使用完整 URL（含 http/https），避免 robots.txt 中出现无效链接。"
                            )
                        try:
                            config_gen = TechnicalConfigGenerator()

                            # 解析允许路径
                            allow_paths = (
                                [
                                    p.strip()
                                    for p in allow_paths_input.split("\n")
                                    if p.strip()
                                ]
                                if allow_paths_input
                                else None
                            )

                            # 解析禁止路径
                            disallow_paths = (
                                [
                                    p.strip()
                                    for p in disallow_paths_input.split("\n")
                                    if p.strip()
                                ]
                                if disallow_paths_input
                                else None
                            )

                            robots_txt = config_gen.generate_robots_txt(
                                base_url=robots_base_url,
                                allow_paths=allow_paths,
                                disallow_paths=disallow_paths,
                                sitemap_url="",  # 自动生成
                                user_agent="*",
                                crawl_delay=None,
                            )

                            st.session_state.generated_robots_txt = robots_txt
                            st.success("✅ robots.txt 生成成功！")
                        except Exception as e:
                            st.error(f"robots.txt 生成失败：{e}")

                # 显示生成的 robots.txt
                if st.session_state.generated_robots_txt:
                    st.markdown("##### 📄 robots.txt 内容")
                    st.code(st.session_state.generated_robots_txt, language="text")

                    st.download_button(
                        "下载 robots.txt",
                        st.session_state.generated_robots_txt,
                        "robots.txt",
                        mime="text/plain",
                        use_container_width=True,
                        key="robots_dl",
                    )

                    st.info(
                        "💡 **使用说明**：将 robots.txt 文件上传到您网站的根目录（如 https://example.com/robots.txt）"
                    )

            # sitemap.xml 生成
            with config_tab2:
                st.markdown("##### 🗺️ sitemap.xml 生成")
                st.caption("帮助搜索引擎发现和索引您的所有页面，提升内容收录效果")

                sitemap_col1, sitemap_col2 = st.columns([2, 1])

                with sitemap_col1:
                    sitemap_base_url = st.text_input(
                        "网站基础 URL",
                        value="",
                        key="sitemap_base_url",
                        placeholder="https://example.com",
                        help="您的网站基础 URL（如 https://example.com）",
                    )

                with sitemap_col2:
                    generate_sitemap_btn = st.button(
                        "🚀 生成 sitemap.xml",
                        use_container_width=True,
                        key="generate_sitemap_btn",
                    )

                # 选择数据源
                sitemap_source = st.radio(
                    "数据源",
                    ["基于关键词生成", "基于历史文章生成"],
                    key="sitemap_source",
                    horizontal=True,
                )

                # 初始化状态
                ss_init("generated_sitemap_xml", None)

                # 生成 sitemap.xml（带 URL 校验）
                if generate_sitemap_btn:
                    if not sitemap_base_url.strip():
                        st.error("请填写网站基础 URL（如 https://example.com）。")
                    else:
                        if not sitemap_base_url.startswith("http"):
                            st.warning(
                                "建议使用完整 URL（含 http/https），避免 sitemap.xml 中出现无效链接。"
                            )
                        try:
                            config_gen = TechnicalConfigGenerator()

                            if sitemap_source == "基于关键词生成":
                                # 基于关键词生成
                                keywords_for_sitemap = (
                                    st.session_state.keywords
                                    if st.session_state.keywords
                                    else []
                                )

                                if not keywords_for_sitemap:
                                    st.warning(
                                        "⚠️ 请先在【1 关键词蒸馏】生成关键词，或选择【基于历史文章生成】"
                                    )
                                else:
                                    sitemap_xml = (
                                        config_gen.generate_sitemap_xml(
                                            base_url=sitemap_base_url,
                                            keywords=keywords_for_sitemap,
                                            lastmod=None,  # 使用当前日期
                                            changefreq="weekly",
                                            priority=0.8,
                                        )
                                    )
                                    st.session_state.generated_sitemap_xml = (
                                        sitemap_xml
                                    )
                                    st.success(
                                        f"✅ sitemap.xml 生成成功！包含 {len(keywords_for_sitemap)} 个 URL"
                                    )
                            else:
                                # 基于历史文章生成
                                try:
                                    articles = storage.get_articles(brand=brand)

                                    if not articles:
                                        st.warning(
                                            "⚠️ 暂无历史文章，请先生成内容，或选择【基于关键词生成】"
                                        )
                                    else:
                                        sitemap_xml = (
                                            config_gen.generate_sitemap_from_articles(
                                                base_url=sitemap_base_url,
                                                articles=articles,
                                                lastmod=None,
                                                changefreq="weekly",
                                                priority=0.8,
                                            )
                                        )
                                        st.session_state.generated_sitemap_xml = (
                                            sitemap_xml
                                        )
                                        st.success(
                                            f"✅ sitemap.xml 生成成功！包含 {len(articles)} 个 URL"
                                        )
                                except Exception as e:
                                    st.error(f"获取历史文章失败：{e}")

                        except Exception as e:
                            st.error(f"sitemap.xml 生成失败：{e}")

                # 显示生成的 sitemap.xml
                if st.session_state.generated_sitemap_xml:
                    st.markdown("##### 📄 sitemap.xml 内容")
                    st.code(st.session_state.generated_sitemap_xml, language="xml")

                    st.download_button(
                        "下载 sitemap.xml",
                        st.session_state.generated_sitemap_xml,
                        "sitemap.xml",
                        mime="application/xml",
                        use_container_width=True,
                        key="sitemap_dl",
                    )

                    st.info(
                        "💡 **使用说明**：将 sitemap.xml 文件上传到您网站的根目录（如 https://example.com/sitemap.xml），并在 Google Search Console 中提交"
                    )