2026-01-30 10:21:29 +08:00
|
|
|
|
"""
|
|
|
|
|
|
JSON-LD Schema.org 结构化数据生成模块
|
|
|
|
|
|
生成符合 Schema.org 规范的 JSON-LD 代码,提升品牌在 AI 模型中的实体识别和权威性
|
|
|
|
|
|
"""
|
|
|
|
|
|
from typing import Dict, List, Optional
|
|
|
|
|
|
import json
|
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SchemaGenerator:
|
|
|
|
|
|
"""Schema.org JSON-LD 生成器"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
|
# Schema.org 上下文
|
|
|
|
|
|
self.context = "https://schema.org"
|
|
|
|
|
|
|
|
|
|
|
|
def generate_organization_schema(
|
|
|
|
|
|
self,
|
|
|
|
|
|
brand_name: str,
|
|
|
|
|
|
description: str = "",
|
|
|
|
|
|
url: str = "",
|
|
|
|
|
|
logo: str = "",
|
|
|
|
|
|
founding_date: str = "",
|
|
|
|
|
|
contact_point: Dict = None
|
|
|
|
|
|
) -> Dict:
|
|
|
|
|
|
"""
|
|
|
|
|
|
生成 Organization(组织)类型的 Schema
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
brand_name: 品牌/组织名称
|
|
|
|
|
|
description: 组织描述
|
|
|
|
|
|
url: 官网 URL
|
|
|
|
|
|
logo: Logo URL
|
|
|
|
|
|
founding_date: 成立日期(YYYY-MM-DD)
|
|
|
|
|
|
contact_point: 联系方式(可选)
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
JSON-LD Schema 字典
|
|
|
|
|
|
"""
|
|
|
|
|
|
schema = {
|
|
|
|
|
|
"@context": self.context,
|
|
|
|
|
|
"@type": "Organization",
|
|
|
|
|
|
"name": brand_name
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if description:
|
|
|
|
|
|
schema["description"] = description
|
|
|
|
|
|
|
|
|
|
|
|
if url:
|
|
|
|
|
|
schema["url"] = url
|
|
|
|
|
|
|
|
|
|
|
|
if logo:
|
|
|
|
|
|
schema["logo"] = logo
|
|
|
|
|
|
|
|
|
|
|
|
if founding_date:
|
|
|
|
|
|
schema["foundingDate"] = founding_date
|
|
|
|
|
|
|
|
|
|
|
|
if contact_point:
|
|
|
|
|
|
schema["contactPoint"] = {
|
|
|
|
|
|
"@type": "ContactPoint",
|
|
|
|
|
|
**contact_point
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return schema
|
|
|
|
|
|
|
|
|
|
|
|
def generate_software_application_schema(
|
|
|
|
|
|
self,
|
|
|
|
|
|
brand_name: str,
|
|
|
|
|
|
application_name: str = "",
|
|
|
|
|
|
description: str = "",
|
|
|
|
|
|
url: str = "",
|
|
|
|
|
|
application_category: str = "BusinessApplication",
|
|
|
|
|
|
operating_system: str = "",
|
|
|
|
|
|
offers: Dict = None,
|
|
|
|
|
|
aggregate_rating: Dict = None,
|
|
|
|
|
|
feature_list: List[str] = None
|
|
|
|
|
|
) -> Dict:
|
|
|
|
|
|
"""
|
|
|
|
|
|
生成 SoftwareApplication(软件应用)类型的 Schema
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
brand_name: 品牌名称
|
|
|
|
|
|
application_name: 应用名称(默认使用品牌名称)
|
|
|
|
|
|
description: 应用描述
|
|
|
|
|
|
url: 应用 URL
|
|
|
|
|
|
application_category: 应用类别(如 BusinessApplication, WebApplication)
|
|
|
|
|
|
operating_system: 操作系统(如 Windows, macOS, Linux, Web)
|
|
|
|
|
|
offers: 价格信息(可选)
|
|
|
|
|
|
aggregate_rating: 评分信息(可选)
|
|
|
|
|
|
feature_list: 功能列表(可选)
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
JSON-LD Schema 字典
|
|
|
|
|
|
"""
|
|
|
|
|
|
schema = {
|
|
|
|
|
|
"@context": self.context,
|
|
|
|
|
|
"@type": "SoftwareApplication",
|
|
|
|
|
|
"name": application_name or brand_name,
|
|
|
|
|
|
"applicationCategory": application_category
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if description:
|
|
|
|
|
|
schema["description"] = description
|
|
|
|
|
|
|
|
|
|
|
|
if url:
|
|
|
|
|
|
schema["url"] = url
|
|
|
|
|
|
|
|
|
|
|
|
if operating_system:
|
|
|
|
|
|
schema["operatingSystem"] = operating_system
|
|
|
|
|
|
|
|
|
|
|
|
# 添加发布者(组织)
|
|
|
|
|
|
schema["publisher"] = {
|
|
|
|
|
|
"@type": "Organization",
|
|
|
|
|
|
"name": brand_name
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if offers:
|
|
|
|
|
|
schema["offers"] = {
|
|
|
|
|
|
"@type": "Offer",
|
|
|
|
|
|
**offers
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if aggregate_rating:
|
|
|
|
|
|
schema["aggregateRating"] = {
|
|
|
|
|
|
"@type": "AggregateRating",
|
|
|
|
|
|
**aggregate_rating
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if feature_list:
|
|
|
|
|
|
schema["featureList"] = feature_list
|
|
|
|
|
|
|
|
|
|
|
|
return schema
|
|
|
|
|
|
|
|
|
|
|
|
def generate_product_schema(
|
|
|
|
|
|
self,
|
|
|
|
|
|
brand_name: str,
|
|
|
|
|
|
product_name: str = "",
|
|
|
|
|
|
description: str = "",
|
|
|
|
|
|
url: str = "",
|
|
|
|
|
|
product_category: str = "",
|
|
|
|
|
|
brand: Dict = None,
|
|
|
|
|
|
offers: Dict = None,
|
|
|
|
|
|
aggregate_rating: Dict = None
|
|
|
|
|
|
) -> Dict:
|
|
|
|
|
|
"""
|
|
|
|
|
|
生成 Product(产品)类型的 Schema
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
brand_name: 品牌名称
|
|
|
|
|
|
product_name: 产品名称(默认使用品牌名称)
|
|
|
|
|
|
description: 产品描述
|
|
|
|
|
|
url: 产品 URL
|
|
|
|
|
|
product_category: 产品类别
|
|
|
|
|
|
brand: 品牌信息(可选)
|
|
|
|
|
|
offers: 价格信息(可选)
|
|
|
|
|
|
aggregate_rating: 评分信息(可选)
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
JSON-LD Schema 字典
|
|
|
|
|
|
"""
|
|
|
|
|
|
schema = {
|
|
|
|
|
|
"@context": self.context,
|
|
|
|
|
|
"@type": "Product",
|
|
|
|
|
|
"name": product_name or brand_name
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if description:
|
|
|
|
|
|
schema["description"] = description
|
|
|
|
|
|
|
|
|
|
|
|
if url:
|
|
|
|
|
|
schema["url"] = url
|
|
|
|
|
|
|
|
|
|
|
|
if product_category:
|
|
|
|
|
|
schema["category"] = product_category
|
|
|
|
|
|
|
|
|
|
|
|
if brand:
|
|
|
|
|
|
schema["brand"] = {
|
|
|
|
|
|
"@type": "Brand",
|
|
|
|
|
|
**brand
|
|
|
|
|
|
}
|
|
|
|
|
|
else:
|
|
|
|
|
|
schema["brand"] = {
|
|
|
|
|
|
"@type": "Brand",
|
|
|
|
|
|
"name": brand_name
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if offers:
|
|
|
|
|
|
schema["offers"] = {
|
|
|
|
|
|
"@type": "Offer",
|
|
|
|
|
|
**offers
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if aggregate_rating:
|
|
|
|
|
|
schema["aggregateRating"] = {
|
|
|
|
|
|
"@type": "AggregateRating",
|
|
|
|
|
|
**aggregate_rating
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return schema
|
|
|
|
|
|
|
|
|
|
|
|
def generate_service_schema(
|
|
|
|
|
|
self,
|
|
|
|
|
|
brand_name: str,
|
|
|
|
|
|
service_name: str = "",
|
|
|
|
|
|
description: str = "",
|
|
|
|
|
|
url: str = "",
|
|
|
|
|
|
service_type: str = "",
|
|
|
|
|
|
provider: Dict = None,
|
|
|
|
|
|
area_served: str = "",
|
|
|
|
|
|
offers: Dict = None
|
|
|
|
|
|
) -> Dict:
|
|
|
|
|
|
"""
|
|
|
|
|
|
生成 Service(服务)类型的 Schema
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
brand_name: 品牌名称
|
|
|
|
|
|
service_name: 服务名称(默认使用品牌名称)
|
|
|
|
|
|
description: 服务描述
|
|
|
|
|
|
url: 服务 URL
|
|
|
|
|
|
service_type: 服务类型
|
|
|
|
|
|
provider: 服务提供者信息(可选)
|
|
|
|
|
|
area_served: 服务区域
|
|
|
|
|
|
offers: 价格信息(可选)
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
JSON-LD Schema 字典
|
|
|
|
|
|
"""
|
|
|
|
|
|
schema = {
|
|
|
|
|
|
"@context": self.context,
|
|
|
|
|
|
"@type": "Service",
|
|
|
|
|
|
"name": service_name or brand_name
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if description:
|
|
|
|
|
|
schema["description"] = description
|
|
|
|
|
|
|
|
|
|
|
|
if url:
|
|
|
|
|
|
schema["url"] = url
|
|
|
|
|
|
|
|
|
|
|
|
if service_type:
|
|
|
|
|
|
schema["serviceType"] = service_type
|
|
|
|
|
|
|
|
|
|
|
|
if provider:
|
|
|
|
|
|
schema["provider"] = {
|
|
|
|
|
|
"@type": "Organization",
|
|
|
|
|
|
**provider
|
|
|
|
|
|
}
|
|
|
|
|
|
else:
|
|
|
|
|
|
schema["provider"] = {
|
|
|
|
|
|
"@type": "Organization",
|
|
|
|
|
|
"name": brand_name
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if area_served:
|
|
|
|
|
|
schema["areaServed"] = {
|
|
|
|
|
|
"@type": "Country",
|
|
|
|
|
|
"name": area_served
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if offers:
|
|
|
|
|
|
schema["offers"] = {
|
|
|
|
|
|
"@type": "Offer",
|
|
|
|
|
|
**offers
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return schema
|
|
|
|
|
|
|
|
|
|
|
|
def generate_combined_schema(
|
|
|
|
|
|
self,
|
|
|
|
|
|
brand_name: str,
|
|
|
|
|
|
advantages: str = "",
|
|
|
|
|
|
schema_types: List[str] = None,
|
|
|
|
|
|
**kwargs
|
|
|
|
|
|
) -> Dict:
|
|
|
|
|
|
"""
|
|
|
|
|
|
生成组合 Schema(包含多个类型)
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
brand_name: 品牌名称
|
|
|
|
|
|
advantages: 品牌优势(用于描述)
|
|
|
|
|
|
schema_types: Schema 类型列表(如 ["Organization", "SoftwareApplication"])
|
|
|
|
|
|
**kwargs: 其他参数
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
组合的 JSON-LD Schema 字典
|
|
|
|
|
|
"""
|
|
|
|
|
|
if schema_types is None:
|
|
|
|
|
|
schema_types = ["Organization", "SoftwareApplication"]
|
|
|
|
|
|
|
|
|
|
|
|
schemas = []
|
|
|
|
|
|
|
|
|
|
|
|
# 生成 Organization
|
|
|
|
|
|
if "Organization" in schema_types:
|
|
|
|
|
|
org_schema = self.generate_organization_schema(
|
|
|
|
|
|
brand_name=brand_name,
|
|
|
|
|
|
description=advantages or kwargs.get("description", ""),
|
|
|
|
|
|
url=kwargs.get("url", ""),
|
|
|
|
|
|
logo=kwargs.get("logo", ""),
|
|
|
|
|
|
founding_date=kwargs.get("founding_date", ""),
|
|
|
|
|
|
contact_point=kwargs.get("contact_point")
|
|
|
|
|
|
)
|
|
|
|
|
|
schemas.append(org_schema)
|
|
|
|
|
|
|
|
|
|
|
|
# 生成 SoftwareApplication
|
|
|
|
|
|
if "SoftwareApplication" in schema_types:
|
|
|
|
|
|
app_schema = self.generate_software_application_schema(
|
|
|
|
|
|
brand_name=brand_name,
|
|
|
|
|
|
application_name=kwargs.get("application_name", brand_name),
|
|
|
|
|
|
description=advantages or kwargs.get("description", ""),
|
|
|
|
|
|
url=kwargs.get("url", ""),
|
|
|
|
|
|
application_category=kwargs.get("application_category", "BusinessApplication"),
|
|
|
|
|
|
operating_system=kwargs.get("operating_system", ""),
|
|
|
|
|
|
offers=kwargs.get("offers"),
|
|
|
|
|
|
aggregate_rating=kwargs.get("aggregate_rating"),
|
|
|
|
|
|
feature_list=kwargs.get("feature_list")
|
|
|
|
|
|
)
|
|
|
|
|
|
schemas.append(app_schema)
|
|
|
|
|
|
|
|
|
|
|
|
# 生成 Product
|
|
|
|
|
|
if "Product" in schema_types:
|
|
|
|
|
|
product_schema = self.generate_product_schema(
|
|
|
|
|
|
brand_name=brand_name,
|
|
|
|
|
|
product_name=kwargs.get("product_name", brand_name),
|
|
|
|
|
|
description=advantages or kwargs.get("description", ""),
|
|
|
|
|
|
url=kwargs.get("url", ""),
|
|
|
|
|
|
product_category=kwargs.get("product_category", ""),
|
|
|
|
|
|
brand=kwargs.get("brand"),
|
|
|
|
|
|
offers=kwargs.get("offers"),
|
|
|
|
|
|
aggregate_rating=kwargs.get("aggregate_rating")
|
|
|
|
|
|
)
|
|
|
|
|
|
schemas.append(product_schema)
|
|
|
|
|
|
|
|
|
|
|
|
# 生成 Service
|
|
|
|
|
|
if "Service" in schema_types:
|
|
|
|
|
|
service_schema = self.generate_service_schema(
|
|
|
|
|
|
brand_name=brand_name,
|
|
|
|
|
|
service_name=kwargs.get("service_name", brand_name),
|
|
|
|
|
|
description=advantages or kwargs.get("description", ""),
|
|
|
|
|
|
url=kwargs.get("url", ""),
|
|
|
|
|
|
service_type=kwargs.get("service_type", ""),
|
|
|
|
|
|
provider=kwargs.get("provider"),
|
|
|
|
|
|
area_served=kwargs.get("area_served", ""),
|
|
|
|
|
|
offers=kwargs.get("offers")
|
|
|
|
|
|
)
|
|
|
|
|
|
schemas.append(service_schema)
|
|
|
|
|
|
|
|
|
|
|
|
# 如果只有一个 Schema,直接返回
|
|
|
|
|
|
if len(schemas) == 1:
|
|
|
|
|
|
return schemas[0]
|
|
|
|
|
|
|
|
|
|
|
|
# 多个 Schema 时,返回数组格式
|
|
|
|
|
|
return schemas
|
|
|
|
|
|
|
|
|
|
|
|
def format_json_ld(self, schema: Dict, indent: int = 2) -> str:
|
|
|
|
|
|
"""
|
|
|
|
|
|
格式化 JSON-LD 为字符串(用于嵌入 HTML)
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
schema: Schema 字典
|
|
|
|
|
|
indent: 缩进空格数
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
格式化的 JSON 字符串
|
|
|
|
|
|
"""
|
|
|
|
|
|
return json.dumps(schema, ensure_ascii=False, indent=indent)
|
|
|
|
|
|
|
|
|
|
|
|
def generate_html_script_tag(self, schema: Dict) -> str:
|
|
|
|
|
|
"""
|
|
|
|
|
|
生成 HTML script 标签(可直接嵌入网页)
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
schema: Schema 字典
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
HTML script 标签字符串
|
|
|
|
|
|
"""
|
|
|
|
|
|
json_str = self.format_json_ld(schema)
|
|
|
|
|
|
return f'<script type="application/ld+json">\n{json_str}\n</script>'
|
|
|
|
|
|
|
|
|
|
|
|
def validate_schema(self, schema: Dict) -> tuple[bool, List[str]]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
验证 Schema 的基本有效性
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
schema: Schema 字典
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
(是否有效, 错误列表)
|
|
|
|
|
|
"""
|
|
|
|
|
|
errors = []
|
|
|
|
|
|
|
|
|
|
|
|
# 检查必需字段
|
|
|
|
|
|
if "@context" not in schema:
|
|
|
|
|
|
errors.append("缺少 @context 字段")
|
|
|
|
|
|
|
|
|
|
|
|
if "@type" not in schema:
|
|
|
|
|
|
errors.append("缺少 @type 字段")
|
|
|
|
|
|
|
|
|
|
|
|
if "name" not in schema:
|
|
|
|
|
|
errors.append("缺少 name 字段")
|
|
|
|
|
|
|
|
|
|
|
|
# 检查 @context 值
|
|
|
|
|
|
if schema.get("@context") != self.context:
|
|
|
|
|
|
errors.append(f"@context 应为 {self.context}")
|
|
|
|
|
|
|
|
|
|
|
|
return len(errors) == 0, errors
|
|
|
|
|
|
|
|
|
|
|
|
def get_schema_types_info(self) -> Dict[str, str]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
获取支持的 Schema 类型信息
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
Schema 类型字典(类型名 -> 描述)
|
|
|
|
|
|
"""
|
|
|
|
|
|
return {
|
|
|
|
|
|
"Organization": "组织/公司(适合企业品牌)",
|
|
|
|
|
|
"SoftwareApplication": "软件应用(适合 SaaS 产品、软件工具)",
|
|
|
|
|
|
"Product": "产品(适合实体产品或数字产品)",
|
|
|
|
|
|
"Service": "服务(适合服务类业务)"
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def generate_for_github(self, brand_name: str, advantages: str = "", **kwargs) -> str:
|
|
|
|
|
|
"""
|
|
|
|
|
|
为 GitHub 项目生成 JSON-LD Schema
|
|
|
|
|
|
通常使用 SoftwareApplication 类型
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
brand_name: 品牌/项目名称
|
|
|
|
|
|
advantages: 项目优势/描述
|
|
|
|
|
|
**kwargs: 其他参数
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
格式化的 JSON-LD 字符串
|
|
|
|
|
|
"""
|
|
|
|
|
|
schema = self.generate_software_application_schema(
|
|
|
|
|
|
brand_name=brand_name,
|
|
|
|
|
|
application_name=kwargs.get("application_name", brand_name),
|
|
|
|
|
|
description=advantages or kwargs.get("description", ""),
|
|
|
|
|
|
url=kwargs.get("url", ""),
|
|
|
|
|
|
application_category=kwargs.get("application_category", "WebApplication"),
|
|
|
|
|
|
operating_system=kwargs.get("operating_system", "Web"),
|
|
|
|
|
|
feature_list=kwargs.get("feature_list")
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
return self.format_json_ld(schema)
|
|
|
|
|
|
|
|
|
|
|
|
def generate_for_website(self, brand_name: str, advantages: str = "", **kwargs) -> str:
|
|
|
|
|
|
"""
|
|
|
|
|
|
为官网生成 JSON-LD Schema
|
|
|
|
|
|
通常使用 Organization + SoftwareApplication/Product/Service 组合
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
brand_name: 品牌名称
|
|
|
|
|
|
advantages: 品牌优势/描述
|
|
|
|
|
|
**kwargs: 其他参数
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
HTML script 标签字符串(可直接嵌入网页)
|
|
|
|
|
|
"""
|
|
|
|
|
|
schema_types = kwargs.get("schema_types", ["Organization", "SoftwareApplication"])
|
|
|
|
|
|
schema = self.generate_combined_schema(
|
|
|
|
|
|
brand_name=brand_name,
|
|
|
|
|
|
advantages=advantages,
|
|
|
|
|
|
schema_types=schema_types,
|
|
|
|
|
|
**kwargs
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
return self.generate_html_script_tag(schema)
|
2026-04-30 18:37:46 +08:00
|
|
|
|
|
|
|
|
|
|
def generate_faq_schema(self, faq_items: List[Dict[str, str]]) -> Dict:
|
|
|
|
|
|
"""
|
|
|
|
|
|
生成 FAQPage 类型的 Schema
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
faq_items: FAQ 列表,每个元素包含 {"question": "...", "answer": "..."}
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
JSON-LD Schema 字典
|
|
|
|
|
|
"""
|
|
|
|
|
|
main_entity = []
|
|
|
|
|
|
for item in faq_items:
|
|
|
|
|
|
main_entity.append({
|
|
|
|
|
|
"@type": "Question",
|
|
|
|
|
|
"name": item["question"],
|
|
|
|
|
|
"acceptedAnswer": {
|
|
|
|
|
|
"@type": "Answer",
|
|
|
|
|
|
"text": item["answer"]
|
|
|
|
|
|
}
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|
"@context": self.context,
|
|
|
|
|
|
"@type": "FAQPage",
|
|
|
|
|
|
"mainEntity": main_entity
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def generate_howto_schema(self, title: str, steps: List[Dict[str, str]],
|
|
|
|
|
|
description: str = "") -> Dict:
|
|
|
|
|
|
"""
|
|
|
|
|
|
生成 HowTo 类型的 Schema
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
title: 操作标题
|
|
|
|
|
|
steps: 步骤列表,每个元素包含 {"name": "...", "text": "..."}
|
|
|
|
|
|
description: 操作描述
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
JSON-LD Schema 字典
|
|
|
|
|
|
"""
|
|
|
|
|
|
howto_steps = []
|
|
|
|
|
|
for i, step in enumerate(steps, 1):
|
|
|
|
|
|
howto_steps.append({
|
|
|
|
|
|
"@type": "HowToStep",
|
|
|
|
|
|
"position": i,
|
|
|
|
|
|
"name": step.get("name", f"步骤 {i}"),
|
|
|
|
|
|
"text": step.get("text", "")
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
schema = {
|
|
|
|
|
|
"@context": self.context,
|
|
|
|
|
|
"@type": "HowTo",
|
|
|
|
|
|
"name": title,
|
|
|
|
|
|
"step": howto_steps
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if description:
|
|
|
|
|
|
schema["description"] = description
|
|
|
|
|
|
|
|
|
|
|
|
return schema
|
|
|
|
|
|
|
|
|
|
|
|
def generate_article_schema(self, title: str, author: str,
|
|
|
|
|
|
date_published: str, description: str = "",
|
|
|
|
|
|
image: str = "", url: str = "") -> Dict:
|
|
|
|
|
|
"""
|
|
|
|
|
|
生成 Article 类型的 Schema
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
title: 文章标题
|
|
|
|
|
|
author: 作者名称
|
|
|
|
|
|
date_published: 发布日期 (YYYY-MM-DD)
|
|
|
|
|
|
description: 文章描述
|
|
|
|
|
|
image: 文章图片 URL
|
|
|
|
|
|
url: 文章 URL
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
JSON-LD Schema 字典
|
|
|
|
|
|
"""
|
|
|
|
|
|
schema = {
|
|
|
|
|
|
"@context": self.context,
|
|
|
|
|
|
"@type": "Article",
|
|
|
|
|
|
"headline": title,
|
|
|
|
|
|
"author": {
|
|
|
|
|
|
"@type": "Person",
|
|
|
|
|
|
"name": author
|
|
|
|
|
|
},
|
|
|
|
|
|
"datePublished": date_published
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if description:
|
|
|
|
|
|
schema["description"] = description
|
|
|
|
|
|
|
|
|
|
|
|
if image:
|
|
|
|
|
|
schema["image"] = image
|
|
|
|
|
|
|
|
|
|
|
|
if url:
|
|
|
|
|
|
schema["url"] = url
|
|
|
|
|
|
|
|
|
|
|
|
return schema
|
|
|
|
|
|
|
|
|
|
|
|
def generate_review_schema(self, item_name: str, review_body: str,
|
|
|
|
|
|
rating_value: float, reviewer: str,
|
|
|
|
|
|
item_type: str = "Product") -> Dict:
|
|
|
|
|
|
"""
|
|
|
|
|
|
生成 Review 类型的 Schema
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
item_name: 被评价项目名称
|
|
|
|
|
|
review_body: 评价内容
|
|
|
|
|
|
rating_value: 评分 (1-5)
|
|
|
|
|
|
reviewer: 评价者
|
|
|
|
|
|
item_type: 被评价项目类型 (Product, Service, etc.)
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
JSON-LD Schema 字典
|
|
|
|
|
|
"""
|
|
|
|
|
|
return {
|
|
|
|
|
|
"@context": self.context,
|
|
|
|
|
|
"@type": "Review",
|
|
|
|
|
|
"itemReviewed": {
|
|
|
|
|
|
"@type": item_type,
|
|
|
|
|
|
"name": item_name
|
|
|
|
|
|
},
|
|
|
|
|
|
"reviewBody": review_body,
|
|
|
|
|
|
"reviewRating": {
|
|
|
|
|
|
"@type": "Rating",
|
|
|
|
|
|
"ratingValue": rating_value,
|
|
|
|
|
|
"bestRating": 5
|
|
|
|
|
|
},
|
|
|
|
|
|
"author": {
|
|
|
|
|
|
"@type": "Person",
|
|
|
|
|
|
"name": reviewer
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def extract_qa_from_content(self, content: str) -> List[Dict[str, str]]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
从内容中自动提取 Q&A 对
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
content: 文本内容
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
Q&A 对列表
|
|
|
|
|
|
"""
|
|
|
|
|
|
import re
|
|
|
|
|
|
qa_pairs = []
|
|
|
|
|
|
|
|
|
|
|
|
# 模式1: Q: ... A: ...
|
|
|
|
|
|
pattern1 = r'[Qq][::]\s*(.+?)[\n\r]+[Aa][::]\s*(.+?)(?=[Qq][::]|\n\n|$)'
|
|
|
|
|
|
matches1 = re.findall(pattern1, content, re.DOTALL)
|
|
|
|
|
|
for q, a in matches1:
|
|
|
|
|
|
qa_pairs.append({"question": q.strip(), "answer": a.strip()})
|
|
|
|
|
|
|
|
|
|
|
|
# 模式2: 问题:... 回答:...
|
|
|
|
|
|
pattern2 = r'问题[::]\s*(.+?)[\n\r]+回答[::]\s*(.+?)(?=问题[::]|\n\n|$)'
|
|
|
|
|
|
matches2 = re.findall(pattern2, content, re.DOTALL)
|
|
|
|
|
|
for q, a in matches2:
|
|
|
|
|
|
qa_pairs.append({"question": q.strip(), "answer": a.strip()})
|
|
|
|
|
|
|
|
|
|
|
|
# 模式3: ## 问题标题 (以问号结尾) + 后续段落作为回答
|
|
|
|
|
|
lines = content.split('\n')
|
|
|
|
|
|
current_question = None
|
|
|
|
|
|
current_answer = []
|
|
|
|
|
|
|
|
|
|
|
|
for line in lines:
|
|
|
|
|
|
line = line.strip()
|
|
|
|
|
|
if not line:
|
|
|
|
|
|
if current_question and current_answer:
|
|
|
|
|
|
qa_pairs.append({
|
|
|
|
|
|
"question": current_question,
|
|
|
|
|
|
"answer": '\n'.join(current_answer)
|
|
|
|
|
|
})
|
|
|
|
|
|
current_question = None
|
|
|
|
|
|
current_answer = []
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
# 检测问题行(以?或?结尾的标题)
|
|
|
|
|
|
if (line.startswith('#') or line.startswith('##')) and \
|
|
|
|
|
|
(line.endswith('?') or line.endswith('?')):
|
|
|
|
|
|
if current_question and current_answer:
|
|
|
|
|
|
qa_pairs.append({
|
|
|
|
|
|
"question": current_question,
|
|
|
|
|
|
"answer": '\n'.join(current_answer)
|
|
|
|
|
|
})
|
|
|
|
|
|
current_question = line.lstrip('#').strip()
|
|
|
|
|
|
current_answer = []
|
|
|
|
|
|
elif current_question:
|
|
|
|
|
|
current_answer.append(line)
|
|
|
|
|
|
|
|
|
|
|
|
# 保存最后一个 Q&A 对
|
|
|
|
|
|
if current_question and current_answer:
|
|
|
|
|
|
qa_pairs.append({
|
|
|
|
|
|
"question": current_question,
|
|
|
|
|
|
"answer": '\n'.join(current_answer)
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
return qa_pairs
|
|
|
|
|
|
|
|
|
|
|
|
def auto_generate_faq_schema(self, content: str) -> Optional[Dict]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
从内容中自动提取 Q&A 并生成 FAQ Schema
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
content: 文本内容
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
FAQPage Schema 或 None(如果没有找到 Q&A)
|
|
|
|
|
|
"""
|
|
|
|
|
|
qa_pairs = self.extract_qa_from_content(content)
|
|
|
|
|
|
|
|
|
|
|
|
if not qa_pairs:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
return self.generate_faq_schema(qa_pairs)
|