feat: 重构项目结构并添加平台同步基础架构
- 重构项目目录结构,将功能模块移至 modules/ 目录 - 创建平台同步基础架构,包括发布器基类和 GitHub 发布器 - 新增 UI 状态管理模块 (modules/ui/state.py) 统一管理会话状态 - 更新依赖配置,添加平台同步所需依赖 (httpx, pyperclip) - 整理文档结构,将所有文档分类移至 docs/ 目录 - 添加 .cursorrules 文件定义项目开发规范 - 清理根目录重复文件,保持项目结构整洁
This commit is contained in:
@@ -0,0 +1,394 @@
|
||||
"""
|
||||
内容质量指标分析模块
|
||||
计算 Trust Density、Citation Share、Authority Score、Engagement Potential 等指标
|
||||
"""
|
||||
import re
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from collections import Counter
|
||||
|
||||
|
||||
class ContentMetricsAnalyzer:
|
||||
"""内容质量指标分析器"""
|
||||
|
||||
def __init__(self):
|
||||
# 信任信号模式(来源占位、数据、案例等)
|
||||
self.trust_signal_patterns = [
|
||||
# 来源占位模式
|
||||
r'根据[^,。;:\n]{2,30}(?:报告|研究|数据|统计|调查|分析|标准|规范|文档|指南)',
|
||||
r'参考[^,。;:\n]{2,30}(?:报告|研究|数据|统计|调查|分析|标准|规范|文档|指南)',
|
||||
r'来自[^,。;:\n]{2,30}(?:报告|研究|数据|统计|调查|分析)',
|
||||
r'据[^,。;:\n]{2,30}(?:显示|表明|统计|调查|分析)',
|
||||
r'[^,。;:\n]{2,20}(?:报告|研究|数据|统计|调查)显示',
|
||||
r'[^,。;:\n]{2,20}(?:报告|研究|数据|统计|调查)表明',
|
||||
# 数据点模式
|
||||
r'\d+%', # 百分比
|
||||
r'\d+\.\d+%', # 小数百分比
|
||||
r'约\d+%', # 约XX%
|
||||
r'超过\d+%', # 超过XX%
|
||||
r'达到\d+%', # 达到XX%
|
||||
r'\d+倍', # XX倍
|
||||
r'\d+个', # XX个
|
||||
r'\d+项', # XX项
|
||||
r'\d+次', # XX次
|
||||
r'\d+年', # XX年(时间数据)
|
||||
r'\d+月', # XX月
|
||||
# 案例模式
|
||||
r'案例[::][^,。;\n]{5,100}',
|
||||
r'例如[^,。;\n]{5,100}',
|
||||
r'以[^,。;\n]{2,30}为例',
|
||||
r'某[^,。;\n]{2,20}(?:企业|公司|用户|项目|团队)',
|
||||
r'实际[^,。;\n]{2,30}(?:测试|应用|使用|经验)',
|
||||
r'使用[^,。;\n]{2,30}(?:发现|表明|显示)',
|
||||
]
|
||||
|
||||
# 结构化元素模式
|
||||
self.structure_patterns = [
|
||||
r'^#{1,6}\s+.+', # Markdown 标题
|
||||
r'^\d+[\.、]\s+.+', # 编号列表
|
||||
r'^[-*+]\s+.+', # 无序列表
|
||||
r'^\s*[-*+]\s+.+', # 缩进列表
|
||||
r'```[\s\S]*?```', # 代码块
|
||||
r'`[^`]+`', # 行内代码
|
||||
r'^\s*[Qq][::].*', # FAQ 问题
|
||||
r'^\s*[Aa][::].*', # FAQ 答案
|
||||
r'\|.*\|', # 表格
|
||||
r'^>.*', # 引用块
|
||||
]
|
||||
|
||||
def count_trust_signals(self, content: str) -> int:
|
||||
"""
|
||||
统计信任信号数量
|
||||
|
||||
Args:
|
||||
content: 内容文本
|
||||
|
||||
Returns:
|
||||
信任信号数量
|
||||
"""
|
||||
# 去重:如果同一个位置匹配多个模式,只算一次
|
||||
# 简化处理:使用集合去重匹配位置附近的一小段文本
|
||||
unique_matches = set()
|
||||
for pattern in self.trust_signal_patterns:
|
||||
for match in re.finditer(pattern, content, re.MULTILINE | re.IGNORECASE):
|
||||
pos_key = content[max(0, match.start() - 10): match.end() + 10]
|
||||
unique_matches.add(pos_key)
|
||||
|
||||
return len(unique_matches)
|
||||
|
||||
def count_citations(self, content: str) -> int:
|
||||
"""
|
||||
统计来源占位数量(Citation)
|
||||
|
||||
Args:
|
||||
content: 内容文本
|
||||
|
||||
Returns:
|
||||
来源占位数量
|
||||
"""
|
||||
citation_patterns = [
|
||||
r'根据[^,。;:\n]{2,30}(?:报告|研究|数据|统计|调查|分析|标准|规范|文档|指南)',
|
||||
r'参考[^,。;:\n]{2,30}(?:报告|研究|数据|统计|调查|分析|标准|规范|文档|指南)',
|
||||
r'来自[^,。;:\n]{2,30}(?:报告|研究|数据|统计|调查|分析)',
|
||||
r'据[^,。;:\n]{2,30}(?:显示|表明|统计|调查|分析)',
|
||||
r'[^,。;:\n]{2,20}(?:报告|研究|数据|统计|调查)(?:显示|表明)',
|
||||
]
|
||||
|
||||
citations = set()
|
||||
for pattern in citation_patterns:
|
||||
for match in re.finditer(pattern, content, re.MULTILINE | re.IGNORECASE):
|
||||
# 使用匹配位置作为唯一标识
|
||||
pos_key = (match.start(), match.end())
|
||||
citations.add(pos_key)
|
||||
|
||||
return len(citations)
|
||||
|
||||
def count_brand_mentions(self, content: str, brand: str) -> int:
|
||||
"""
|
||||
统计品牌提及次数
|
||||
|
||||
Args:
|
||||
content: 内容文本
|
||||
brand: 品牌名称
|
||||
|
||||
Returns:
|
||||
品牌提及次数
|
||||
"""
|
||||
if not brand:
|
||||
return 0
|
||||
|
||||
# 使用单词边界匹配,避免部分匹配
|
||||
pattern = r'\b' + re.escape(brand) + r'\b'
|
||||
matches = re.findall(pattern, content, re.IGNORECASE)
|
||||
return len(matches)
|
||||
|
||||
def count_structure_elements(self, content: str) -> Dict[str, int]:
|
||||
"""
|
||||
统计结构化元素数量
|
||||
|
||||
Args:
|
||||
content: 内容文本
|
||||
|
||||
Returns:
|
||||
结构化元素统计字典
|
||||
"""
|
||||
lines = content.split('\n')
|
||||
structure_count = {
|
||||
'headings': 0, # 标题
|
||||
'lists': 0, # 列表
|
||||
'code_blocks': 0, # 代码块
|
||||
'faq_pairs': 0, # FAQ 对
|
||||
'tables': 0, # 表格
|
||||
'quotes': 0, # 引用
|
||||
}
|
||||
|
||||
# 统计标题
|
||||
for line in lines:
|
||||
if re.match(r'^#{1,6}\s+.+', line):
|
||||
structure_count['headings'] += 1
|
||||
elif re.match(r'^\d+[\.、]\s+.+', line) or re.match(r'^[-*+]\s+.+', line):
|
||||
structure_count['lists'] += 1
|
||||
elif re.match(r'^\s*[Qq][::].*', line):
|
||||
structure_count['faq_pairs'] += 1
|
||||
elif re.match(r'^\s*\|.*\|', line):
|
||||
structure_count['tables'] += 1
|
||||
elif re.match(r'^>.*', line):
|
||||
structure_count['quotes'] += 1
|
||||
|
||||
# 统计代码块
|
||||
code_blocks = re.findall(r'```[\s\S]*?```', content)
|
||||
structure_count['code_blocks'] = len(code_blocks)
|
||||
|
||||
return structure_count
|
||||
|
||||
def calculate_trust_density(self, content: str) -> float:
|
||||
"""
|
||||
计算 Trust Density(每100字信任信号数)
|
||||
|
||||
Args:
|
||||
content: 内容文本
|
||||
|
||||
Returns:
|
||||
Trust Density 值
|
||||
"""
|
||||
if not content:
|
||||
return 0.0
|
||||
|
||||
# 计算实际文本长度(去除空白字符)
|
||||
text_length = len(re.sub(r'\s+', '', content))
|
||||
if text_length == 0:
|
||||
return 0.0
|
||||
|
||||
trust_signals = self.count_trust_signals(content)
|
||||
# 每100字信任信号数
|
||||
trust_density = (trust_signals / text_length) * 100
|
||||
|
||||
return round(trust_density, 2)
|
||||
|
||||
def calculate_citation_share(self, content: str, brand: str) -> float:
|
||||
"""
|
||||
计算 Citation Share(品牌引用比例)
|
||||
|
||||
Args:
|
||||
content: 内容文本
|
||||
brand: 品牌名称
|
||||
|
||||
Returns:
|
||||
Citation Share 值(0-100)
|
||||
"""
|
||||
if not content or not brand:
|
||||
return 0.0
|
||||
|
||||
brand_mentions = self.count_brand_mentions(content, brand)
|
||||
|
||||
# 统计所有可能的提及(品牌、竞品、通用术语等)
|
||||
# 简化处理:统计所有可能的品牌/产品提及
|
||||
# 使用常见品牌提及模式
|
||||
all_mentions_pattern = r'\b[A-Z][a-zA-Z0-9]{2,20}\b' # 大写开头的单词(可能是品牌)
|
||||
all_mentions = len(re.findall(all_mentions_pattern, content))
|
||||
|
||||
# 如果总提及数太少,使用品牌提及次数作为分母
|
||||
if all_mentions < brand_mentions * 2:
|
||||
all_mentions = brand_mentions * 2
|
||||
|
||||
if all_mentions == 0:
|
||||
return 0.0
|
||||
|
||||
citation_share = (brand_mentions / all_mentions) * 100
|
||||
return round(min(citation_share, 100.0), 2)
|
||||
|
||||
def calculate_authority_score(self, content: str) -> float:
|
||||
"""
|
||||
计算 Authority Score(权威性得分,0-100)
|
||||
|
||||
基于来源占位数量、数据密度等
|
||||
|
||||
Args:
|
||||
content: 内容文本
|
||||
|
||||
Returns:
|
||||
Authority Score 值(0-100)
|
||||
"""
|
||||
if not content:
|
||||
return 0.0
|
||||
|
||||
citations = self.count_citations(content)
|
||||
trust_signals = self.count_trust_signals(content)
|
||||
text_length = len(re.sub(r'\s+', '', content))
|
||||
|
||||
if text_length == 0:
|
||||
return 0.0
|
||||
|
||||
# 计算各项得分
|
||||
# 来源占位得分(最多30分)
|
||||
citation_score = min(citations * 5, 30)
|
||||
|
||||
# 信任信号密度得分(最多40分)
|
||||
trust_density = (trust_signals / text_length) * 1000 # 每1000字信任信号数
|
||||
trust_score = min(trust_density * 4, 40)
|
||||
|
||||
# 数据点得分(最多30分)
|
||||
data_points = len(re.findall(r'\d+%', content)) + len(re.findall(r'\d+\.\d+%', content))
|
||||
data_score = min(data_points * 2, 30)
|
||||
|
||||
authority_score = citation_score + trust_score + data_score
|
||||
return round(min(authority_score, 100.0), 2)
|
||||
|
||||
def calculate_engagement_potential(self, content: str) -> float:
|
||||
"""
|
||||
计算 Engagement Potential(参与度潜力,0-100)
|
||||
|
||||
基于结构化程度、互动元素等
|
||||
|
||||
Args:
|
||||
content: 内容文本
|
||||
|
||||
Returns:
|
||||
Engagement Potential 值(0-100)
|
||||
"""
|
||||
if not content:
|
||||
return 0.0
|
||||
|
||||
structure = self.count_structure_elements(content)
|
||||
text_length = len(re.sub(r'\s+', '', content))
|
||||
|
||||
if text_length == 0:
|
||||
return 0.0
|
||||
|
||||
# 计算各项得分
|
||||
# 标题得分(最多20分)
|
||||
heading_score = min(structure['headings'] * 2, 20)
|
||||
|
||||
# 列表得分(最多25分)
|
||||
list_score = min(structure['lists'] * 1.5, 25)
|
||||
|
||||
# FAQ 得分(最多25分)
|
||||
faq_score = min(structure['faq_pairs'] * 3, 25)
|
||||
|
||||
# 代码块得分(最多15分)
|
||||
code_score = min(structure['code_blocks'] * 5, 15)
|
||||
|
||||
# 表格得分(最多10分)
|
||||
table_score = min(structure['tables'] * 2, 10)
|
||||
|
||||
# 引用得分(最多5分)
|
||||
quote_score = min(structure['quotes'] * 1, 5)
|
||||
|
||||
engagement_score = heading_score + list_score + faq_score + code_score + table_score + quote_score
|
||||
return round(min(engagement_score, 100.0), 2)
|
||||
|
||||
def analyze_content(self, content: str, brand: str) -> Dict[str, any]:
|
||||
"""
|
||||
综合分析内容,返回所有指标
|
||||
|
||||
Args:
|
||||
content: 内容文本
|
||||
brand: 品牌名称
|
||||
|
||||
Returns:
|
||||
包含所有指标的字典
|
||||
"""
|
||||
if not content:
|
||||
return {
|
||||
'trust_density': 0.0,
|
||||
'citation_share': 0.0,
|
||||
'authority_score': 0.0,
|
||||
'engagement_potential': 0.0,
|
||||
'trust_signals': 0,
|
||||
'citations': 0,
|
||||
'brand_mentions': 0,
|
||||
'structure_elements': {},
|
||||
'text_length': 0,
|
||||
}
|
||||
|
||||
text_length = len(re.sub(r'\s+', '', content))
|
||||
trust_signals = self.count_trust_signals(content)
|
||||
citations = self.count_citations(content)
|
||||
brand_mentions = self.count_brand_mentions(content, brand)
|
||||
structure = self.count_structure_elements(content)
|
||||
|
||||
return {
|
||||
'trust_density': self.calculate_trust_density(content),
|
||||
'citation_share': self.calculate_citation_share(content, brand),
|
||||
'authority_score': self.calculate_authority_score(content),
|
||||
'engagement_potential': self.calculate_engagement_potential(content),
|
||||
'trust_signals': trust_signals,
|
||||
'citations': citations,
|
||||
'brand_mentions': brand_mentions,
|
||||
'structure_elements': structure,
|
||||
'text_length': text_length,
|
||||
}
|
||||
|
||||
def analyze_batch(self, contents: List[Dict[str, str]], brand: str) -> List[Dict[str, any]]:
|
||||
"""
|
||||
批量分析内容
|
||||
|
||||
Args:
|
||||
contents: 内容列表,每个元素包含 'content' 字段
|
||||
brand: 品牌名称
|
||||
|
||||
Returns:
|
||||
分析结果列表
|
||||
"""
|
||||
results = []
|
||||
for item in contents:
|
||||
content = item.get('content', '')
|
||||
metrics = self.analyze_content(content, brand)
|
||||
# 保留原始数据
|
||||
metrics['keyword'] = item.get('keyword', '')
|
||||
metrics['platform'] = item.get('platform', '')
|
||||
results.append(metrics)
|
||||
|
||||
return results
|
||||
|
||||
def get_metrics_summary(self, results: List[Dict[str, any]]) -> Dict[str, any]:
|
||||
"""
|
||||
获取指标汇总统计
|
||||
|
||||
Args:
|
||||
results: 分析结果列表
|
||||
|
||||
Returns:
|
||||
汇总统计字典
|
||||
"""
|
||||
if not results:
|
||||
return {
|
||||
'avg_trust_density': 0.0,
|
||||
'avg_citation_share': 0.0,
|
||||
'avg_authority_score': 0.0,
|
||||
'avg_engagement_potential': 0.0,
|
||||
'total_trust_signals': 0,
|
||||
'total_citations': 0,
|
||||
'total_brand_mentions': 0,
|
||||
'count': 0,
|
||||
}
|
||||
|
||||
return {
|
||||
'avg_trust_density': round(sum(r.get('trust_density', 0) for r in results) / len(results), 2),
|
||||
'avg_citation_share': round(sum(r.get('citation_share', 0) for r in results) / len(results), 2),
|
||||
'avg_authority_score': round(sum(r.get('authority_score', 0) for r in results) / len(results), 2),
|
||||
'avg_engagement_potential': round(sum(r.get('engagement_potential', 0) for r in results) / len(results), 2),
|
||||
'total_trust_signals': sum(r.get('trust_signals', 0) for r in results),
|
||||
'total_citations': sum(r.get('citations', 0) for r in results),
|
||||
'total_brand_mentions': sum(r.get('brand_mentions', 0) for r in results),
|
||||
'count': len(results),
|
||||
}
|
||||
Reference in New Issue
Block a user