添加产品规格文档并优化项目结构
Made-with: Cursor
This commit is contained in:
@@ -0,0 +1,354 @@
|
||||
"""
|
||||
关键词数据增强模块
|
||||
从历史验证数据中提取高价值关键词,反哺关键词生成
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, List, Optional, Any
|
||||
from collections import Counter
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class KeywordDataEnhancer:
|
||||
"""关键词数据增强器"""
|
||||
|
||||
def __init__(self, storage):
|
||||
"""
|
||||
Args:
|
||||
storage: DataStorage 实例
|
||||
"""
|
||||
self.storage = storage
|
||||
|
||||
def analyze_historical_performance(self, brand: str,
|
||||
days: int = 30) -> Dict:
|
||||
"""
|
||||
分析历史验证数据,提取高价值关键词
|
||||
|
||||
Args:
|
||||
brand: 品牌名
|
||||
days: 分析最近 N 天的数据
|
||||
|
||||
Returns:
|
||||
分析结果
|
||||
"""
|
||||
# 获取历史验证结果
|
||||
verify_results = self.storage.get_verify_results(brand=brand)
|
||||
|
||||
if not verify_results:
|
||||
return {
|
||||
"has_data": False,
|
||||
"message": "暂无历史验证数据"
|
||||
}
|
||||
|
||||
# 转换为 DataFrame 便于分析
|
||||
import pandas as pd
|
||||
df = pd.DataFrame(verify_results)
|
||||
|
||||
if df.empty:
|
||||
return {"has_data": False, "message": "暂无历史验证数据"}
|
||||
|
||||
# 分析关键词表现
|
||||
keyword_performance = self._analyze_keyword_performance(df)
|
||||
|
||||
# 提取高价值关键词
|
||||
high_value_keywords = self._extract_high_value_keywords(keyword_performance)
|
||||
|
||||
# 分析搜索意图分布
|
||||
intent_distribution = self._analyze_intent_distribution(keyword_performance)
|
||||
|
||||
# 生成关键词建议
|
||||
suggestions = self._generate_keyword_suggestions(keyword_performance)
|
||||
|
||||
return {
|
||||
"has_data": True,
|
||||
"total_keywords": len(keyword_performance),
|
||||
"high_value_keywords": high_value_keywords,
|
||||
"intent_distribution": intent_distribution,
|
||||
"suggestions": suggestions,
|
||||
"keyword_details": keyword_performance
|
||||
}
|
||||
|
||||
def _analyze_keyword_performance(self, df) -> List[Dict]:
|
||||
"""分析每个关键词的表现"""
|
||||
keyword_stats = []
|
||||
|
||||
for keyword in df["问题"].unique():
|
||||
keyword_df = df[df["问题"] == keyword]
|
||||
|
||||
# 计算提及率
|
||||
mentioned = keyword_df[keyword_df["提及次数"] > 0]
|
||||
mention_rate = len(mentioned) / len(keyword_df) if len(keyword_df) > 0 else 0
|
||||
|
||||
# 计算平均提及次数
|
||||
avg_mentions = keyword_df["提及次数"].mean()
|
||||
|
||||
# 分析提及位置
|
||||
position_counts = Counter()
|
||||
for _, row in keyword_df.iterrows():
|
||||
pos = row.get("位置", "未提及")
|
||||
if pos and pos != "未提及":
|
||||
position_counts[pos] += 1
|
||||
|
||||
# 计算综合价值分数
|
||||
value_score = self._calculate_value_score(
|
||||
mention_rate, avg_mentions, position_counts
|
||||
)
|
||||
|
||||
keyword_stats.append({
|
||||
"keyword": keyword,
|
||||
"total_verifications": len(keyword_df),
|
||||
"mention_rate": mention_rate,
|
||||
"avg_mentions": avg_mentions,
|
||||
"position_distribution": dict(position_counts),
|
||||
"value_score": value_score,
|
||||
"suggested_action": self._suggest_action(mention_rate, value_score)
|
||||
})
|
||||
|
||||
# 按价值分数排序
|
||||
keyword_stats.sort(key=lambda x: x["value_score"], reverse=True)
|
||||
|
||||
return keyword_stats
|
||||
|
||||
def _calculate_value_score(self, mention_rate: float,
|
||||
avg_mentions: float,
|
||||
position_counts: Counter) -> float:
|
||||
"""
|
||||
计算关键词价值分数
|
||||
|
||||
Args:
|
||||
mention_rate: 提及率
|
||||
avg_mentions: 平均提及次数
|
||||
position_counts: 位置分布
|
||||
|
||||
Returns:
|
||||
价值分数 (0-100)
|
||||
"""
|
||||
score = 0
|
||||
|
||||
# 提及率权重 (40%)
|
||||
score += mention_rate * 40
|
||||
|
||||
# 平均提及次数权重 (30%)
|
||||
mention_score = min(avg_mentions / 3, 1) * 30
|
||||
score += mention_score
|
||||
|
||||
# 位置权重 (30%)
|
||||
total_mentions = sum(position_counts.values())
|
||||
if total_mentions > 0:
|
||||
front_ratio = position_counts.get("前1/3", 0) / total_mentions
|
||||
score += front_ratio * 30
|
||||
|
||||
return score
|
||||
|
||||
def _suggest_action(self, mention_rate: float, value_score: float) -> str:
|
||||
"""根据表现建议操作"""
|
||||
if value_score >= 70:
|
||||
return "✅ 高价值关键词,继续保持"
|
||||
elif value_score >= 40:
|
||||
if mention_rate < 0.5:
|
||||
return "⚡ 提及率较低,建议优化内容"
|
||||
else:
|
||||
return "📈 有提升空间,建议增加深度"
|
||||
else:
|
||||
if mention_rate < 0.3:
|
||||
return "🔄 效果不佳,考虑替换关键词"
|
||||
else:
|
||||
return "🔍 价值较低,可减少投入"
|
||||
|
||||
def _extract_high_value_keywords(self, keyword_performance: List[Dict],
|
||||
top_n: int = 10) -> List[Dict]:
|
||||
"""提取高价值关键词"""
|
||||
return keyword_performance[:top_n]
|
||||
|
||||
def _analyze_intent_distribution(self, keyword_performance: List[Dict]) -> Dict:
|
||||
"""分析搜索意图分布"""
|
||||
intent_keywords = {
|
||||
"对比": ["对比", "比较", "vs", "versus", "哪个好"],
|
||||
"评测": ["评测", "评价", "测评", "怎么样", "好不好"],
|
||||
"使用": ["怎么用", "如何使用", "教程", "入门", "指南"],
|
||||
"购买": ["价格", "多少钱", "购买", "付费", "免费"],
|
||||
"问题": ["问题", "错误", "失败", "怎么办", "解决"],
|
||||
"推荐": ["推荐", "最好", "排行", "排名", "前十"]
|
||||
}
|
||||
|
||||
intent_counts = {intent: 0 for intent in intent_keywords}
|
||||
intent_keywords_map = {intent: [] for intent in intent_keywords}
|
||||
|
||||
for kw_data in keyword_performance:
|
||||
keyword = kw_data["keyword"]
|
||||
categorized = False
|
||||
|
||||
for intent, patterns in intent_keywords.items():
|
||||
if any(pattern in keyword for pattern in patterns):
|
||||
intent_counts[intent] += 1
|
||||
intent_keywords_map[intent].append(keyword)
|
||||
categorized = True
|
||||
break
|
||||
|
||||
if not categorized:
|
||||
intent_counts["其他"] = intent_counts.get("其他", 0) + 1
|
||||
|
||||
return {
|
||||
"counts": intent_counts,
|
||||
"keywords": intent_keywords_map
|
||||
}
|
||||
|
||||
def _generate_keyword_suggestions(self, keyword_performance: List[Dict]) -> List[Dict]:
|
||||
"""生成关键词优化建议"""
|
||||
suggestions = []
|
||||
|
||||
# 找出低效关键词
|
||||
low_performers = [kw for kw in keyword_performance if kw["value_score"] < 30]
|
||||
if low_performers:
|
||||
suggestions.append({
|
||||
"type": "replace",
|
||||
"priority": "high",
|
||||
"message": f"有 {len(low_performers)} 个关键词效果不佳,建议替换",
|
||||
"keywords": [kw["keyword"] for kw in low_performers[:3]]
|
||||
})
|
||||
|
||||
# 找出高价值关键词
|
||||
high_performers = [kw for kw in keyword_performance if kw["value_score"] >= 70]
|
||||
if high_performers:
|
||||
suggestions.append({
|
||||
"type": "expand",
|
||||
"priority": "medium",
|
||||
"message": f"有 {len(high_performers)} 个高价值关键词,建议扩展相关内容",
|
||||
"keywords": [kw["keyword"] for kw in high_performers[:3]]
|
||||
})
|
||||
|
||||
# 找出提及率低但有潜力的关键词
|
||||
potential_keywords = [
|
||||
kw for kw in keyword_performance
|
||||
if 0.3 <= kw["mention_rate"] < 0.5 and kw["total_verifications"] >= 3
|
||||
]
|
||||
if potential_keywords:
|
||||
suggestions.append({
|
||||
"type": "optimize",
|
||||
"priority": "medium",
|
||||
"message": f"有 {len(potential_keywords)} 个关键词有提升空间,建议优化内容",
|
||||
"keywords": [kw["keyword"] for kw in potential_keywords[:3]]
|
||||
})
|
||||
|
||||
return suggestions
|
||||
|
||||
def generate_enhanced_keyword_prompt(self, brand: str, advantages: str,
|
||||
existing_keywords: List[str] = None) -> str:
|
||||
"""
|
||||
生成增强的关键词生成提示词
|
||||
|
||||
Args:
|
||||
brand: 品牌名
|
||||
advantages: 品牌优势
|
||||
existing_keywords: 已有关键词列表
|
||||
|
||||
Returns:
|
||||
增强的提示词
|
||||
"""
|
||||
# 获取历史分析
|
||||
analysis = self.analyze_historical_performance(brand)
|
||||
|
||||
prompt = f"""你是一个 GEO(生成式引擎优化)关键词策略专家。
|
||||
|
||||
品牌信息:
|
||||
- 品牌名:{brand}
|
||||
- 品牌优势:{advantages}
|
||||
|
||||
"""
|
||||
|
||||
if analysis.get("has_data"):
|
||||
prompt += """历史验证数据分析:
|
||||
"""
|
||||
# 添加高价值关键词
|
||||
high_value = analysis.get("high_value_keywords", [])
|
||||
if high_value:
|
||||
prompt += "\n高价值关键词(已验证有效):\n"
|
||||
for kw in high_value[:5]:
|
||||
prompt += f"- {kw['keyword']} (提及率: {kw['mention_rate']:.0%}, 价值分: {kw['value_score']:.0f})\n"
|
||||
|
||||
# 添加优化建议
|
||||
suggestions = analysis.get("suggestions", [])
|
||||
if suggestions:
|
||||
prompt += "\n优化建议:\n"
|
||||
for suggestion in suggestions:
|
||||
prompt += f"- {suggestion['message']}\n"
|
||||
|
||||
# 添加意图分布
|
||||
intent_dist = analysis.get("intent_distribution", {}).get("counts", {})
|
||||
if intent_dist:
|
||||
prompt += "\n搜索意图分布:\n"
|
||||
for intent, count in sorted(intent_dist.items(), key=lambda x: x[1], reverse=True):
|
||||
if count > 0:
|
||||
prompt += f"- {intent}: {count} 个关键词\n"
|
||||
|
||||
if existing_keywords:
|
||||
prompt += f"\n已有关键词(避免重复):\n"
|
||||
for kw in existing_keywords[:10]:
|
||||
prompt += f"- {kw}\n"
|
||||
|
||||
prompt += """
|
||||
请生成 20 个新的 GEO 优化关键词,要求:
|
||||
1. 70% 泛词(行业相关)+ 30% 品牌词
|
||||
2. 覆盖多种搜索意图:对比、评测、使用、购买、问题、推荐
|
||||
3. 关键词长度 12-28 字,口语化,符合用户真实搜索习惯
|
||||
4. 每个关键词附带:category(类别)、intent(意图)、estimated_value(预估价值 1-5)
|
||||
|
||||
输出 JSON 数组格式:
|
||||
[
|
||||
{
|
||||
"keyword": "关键词内容",
|
||||
"category": "类别",
|
||||
"intent": "意图",
|
||||
"estimated_value": 4
|
||||
}
|
||||
]
|
||||
"""
|
||||
|
||||
return prompt
|
||||
|
||||
def get_keyword_trends(self, brand: str, keyword: str,
|
||||
days: int = 30) -> Dict:
|
||||
"""
|
||||
获取关键词趋势数据
|
||||
|
||||
Args:
|
||||
brand: 品牌名
|
||||
keyword: 关键词
|
||||
days: 分析天数
|
||||
|
||||
Returns:
|
||||
趋势数据
|
||||
"""
|
||||
verify_results = self.storage.get_verify_results(brand=brand)
|
||||
|
||||
if not verify_results:
|
||||
return {"has_data": False}
|
||||
|
||||
import pandas as pd
|
||||
df = pd.DataFrame(verify_results)
|
||||
|
||||
# 过滤指定关键词
|
||||
keyword_df = df[df["问题"] == keyword]
|
||||
|
||||
if keyword_df.empty:
|
||||
return {"has_data": False, "message": f"未找到关键词 '{keyword}' 的验证数据"}
|
||||
|
||||
# 按日期分组
|
||||
if "验证时间" in keyword_df.columns:
|
||||
keyword_df["日期"] = pd.to_datetime(keyword_df["验证时间"]).dt.date
|
||||
daily_stats = keyword_df.groupby("日期").agg({
|
||||
"提及次数": "mean",
|
||||
"问题": "count"
|
||||
}).rename(columns={"问题": "验证次数"})
|
||||
|
||||
return {
|
||||
"has_data": True,
|
||||
"keyword": keyword,
|
||||
"daily_stats": daily_stats.to_dict("records"),
|
||||
"overall_mention_rate": len(keyword_df[keyword_df["提及次数"] > 0]) / len(keyword_df)
|
||||
}
|
||||
|
||||
return {"has_data": False, "message": "缺少时间戳数据"}
|
||||
Reference in New Issue
Block a user