Files
ChouJuGEO/modules/multimodal_prompt.py
T
刘国栋 8f7f082c3d feat: 重构项目结构并添加平台同步基础架构
- 重构项目目录结构,将功能模块移至 modules/ 目录
- 创建平台同步基础架构,包括发布器基类和 GitHub 发布器
- 新增 UI 状态管理模块 (modules/ui/state.py) 统一管理会话状态
- 更新依赖配置,添加平台同步所需依赖 (httpx, pyperclip)
- 整理文档结构,将所有文档分类移至 docs/ 目录
- 添加 .cursorrules 文件定义项目开发规范
- 清理根目录重复文件,保持项目结构整洁
2026-01-30 10:21:29 +08:00

1051 lines
38 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
多模态提示生成模块
用于生成配图描述、视频脚本描述,并可选择性地生成图片
"""
from typing import List, Dict, Optional, Tuple
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
import json
import re
import base64
import io
from pathlib import Path
import time
class MultimodalPromptGenerator:
"""多模态提示生成器"""
def __init__(self):
# 配图描述生成 Prompt
self.image_prompt_template = """
你是专业的配图描述生成专家,专门为内容创作生成详细的配图描述。
【内容片段】
{content_segment}
【上下文】
- 品牌:{brand}
- 优势:{advantages}
- 平台:{platform}
- 关键词:{keyword}
【配图描述要求】
1. **详细描述**
- 描述图片应该包含的主要元素(人物、物品、场景等)
- 描述图片的风格(写实、插画、图表、截图等)
- 描述图片的色调和氛围(明亮、专业、温馨等)
- 描述图片的构图(居中、左右布局、上下布局等)
2. **平台适配**
- 小红书:生活化、美观、有吸引力
- 抖音:视觉冲击力强、简洁明了
- 微信公众号:专业、清晰、符合文章风格
- B站:适合视频封面、有动感
3. **品牌融入**
- 如果内容涉及品牌,配图应自然融入品牌元素
- 但不要过于商业化,保持自然
4. **实用性**
- 描述要具体,便于设计师或AI生图工具理解
- 长度控制在50-150字
- 使用中文描述
【输出格式】
请严格按照以下 JSON 格式输出,不要添加任何其他内容:
{{
"image_description": "<详细的配图描述>",
"style": "<风格:写实/插画/图表/截图/其他>",
"tone": "<色调:明亮/专业/温馨/商务/其他>",
"composition": "<构图:居中/左右/上下/其他>",
"key_elements": ["<元素1>", "<元素2>", ...],
"platform_specific": "<平台特定要求>"
}}
【开始生成】
"""
# 视频脚本描述生成 Prompt
self.video_script_template = """
你是专业的视频脚本描述生成专家,专门为B站等视频平台生成详细的画面描述。
【内容片段】
{content_segment}
【上下文】
- 品牌:{brand}
- 优势:{advantages}
- 关键词:{keyword}
- 时间戳:{timestamp}
【视频画面描述要求】
1. **画面描述**
- 描述画面应该展示的内容(场景、人物、物品、动作等)
- 描述画面类型(实拍、动画、截图、演示等)
- 描述画面节奏(快切、慢镜头、定格等)
2. **镜头语言**
- 镜头类型(特写、中景、全景等)
- 镜头运动(推拉、摇移、跟随等)
- 画面转场(切换、淡入淡出、划入等)
3. **音效和字幕**
- 建议的音效(背景音乐、音效等)
- 字幕要点(关键信息、强调内容)
4. **时长建议**
- 该片段的建议时长(秒)
【输出格式】
请严格按照以下 JSON 格式输出,不要添加任何其他内容:
{{
"scene_description": "<画面描述>",
"shot_type": "<镜头类型:特写/中景/全景/其他>",
"camera_movement": "<镜头运动:推拉/摇移/跟随/固定/其他>",
"transition": "<转场:切换/淡入淡出/划入/其他>",
"audio_suggestion": "<音效建议>",
"subtitle_key_points": ["<字幕要点1>", "<字幕要点2>", ...],
"duration_seconds": <建议时长(秒)>
}}
【开始生成】
"""
# 批量配图描述生成 Prompt
self.batch_image_prompt_template = """
你是专业的配图描述生成专家,为内容生成多个配图描述。
【完整内容】
{full_content}
【品牌】{brand}
【优势】{advantages}
【平台】{platform}
【关键词】{keyword}
【要求】
1. 识别内容中所有需要配图的位置(已标注【配图:xxx】)
2. 为每个配图位置生成详细的配图描述
3. 确保配图描述与内容上下文相关
4. 保持配图风格的统一性
【输出格式】
请严格按照以下 JSON 格式输出,不要添加任何其他内容:
{{
"image_descriptions": [
{{
"position": "<在内容中的位置描述>",
"original_hint": "<原始配图提示>",
"detailed_description": "<详细配图描述>",
"style": "<风格>",
"tone": "<色调>",
"key_elements": ["<元素1>", "<元素2>", ...]
}},
...
],
"total_images": <配图总数>,
"style_consistency": "<整体风格一致性说明>"
}}
【开始生成】
"""
# 通义万相文生图 Prompt 生成模板(核心)
self.tongyi_prompt_template = """
你是专业的通义万相文生图 Prompt 工程师,目标是为文章生成最匹配、高质量的配图。
文章内容:
{content}
要求:
- 输出纯中文 Prompt,长度 60–120 字,越详细越好。
- 画面必须紧扣文章核心观点、关键场景或品牌 {brand}(可自然融入产品形态、科技元素、logo 氛围)。
- 风格建议:高清、科技感/写实/插画/未来主义,根据文章调性自动判断。
- 构图:主体突出、背景简洁、视觉冲击力强、色彩和谐。
- 避免任何敏感词,确保合规。
- 只输出纯 Prompt 文本,不要加任何解释、标题或多余内容。
最终输出示例:
"一张未来科技感极强的插画,中央是品牌 {brand} 的 AI 模型界面,周围环绕多模态数据流和实时知识图标,背景是深蓝星空,画面干净高清,2048分辨率"
"""
# 图片插入位置推荐 Prompt
self.image_position_template = """
阅读以下文章内容,判断最适合插入配图的位置,并给出理由。
文章内容:
{content}
要求:
- 推荐 12 个最佳插入点(例如"第2段结尾""总结部分前")。
- 每处插入点说明:为什么这里适合配图(增强理解、吸引眼球、突出品牌等)。
- 输出格式:
插入位置1{具体位置}
理由:{简短说明}
插入位置2{具体位置}
理由:{简短说明}
只输出插入建议,不要输出其他内容。
"""
def extract_image_placeholders(self, content: str) -> List[Dict]:
"""
从内容中提取配图占位符
Args:
content: 内容文本
Returns:
配图占位符列表,每个包含位置、原始提示等信息
"""
placeholders = []
# 匹配【配图:xxx】格式
pattern = r'【配图[:]([^】]+)】'
matches = re.finditer(pattern, content)
for match in matches:
start_pos = match.start()
end_pos = match.end()
hint = match.group(1).strip()
# 获取上下文(前后各100字)
context_start = max(0, start_pos - 100)
context_end = min(len(content), end_pos + 100)
context = content[context_start:context_end]
# 获取所在段落
paragraph_start = content.rfind('\n', 0, start_pos) + 1
paragraph_end = content.find('\n', end_pos)
if paragraph_end == -1:
paragraph_end = len(content)
paragraph = content[paragraph_start:paragraph_end]
placeholders.append({
"position": start_pos,
"hint": hint,
"context": context,
"paragraph": paragraph,
"full_match": match.group(0)
})
return placeholders
def generate_image_description(
self,
content_segment: str,
brand: str,
advantages: str,
platform: str,
keyword: str,
llm_chain
) -> Dict:
"""
生成单个配图的详细描述
Args:
content_segment: 内容片段
brand: 品牌名称
advantages: 品牌优势
platform: 平台名称
keyword: 关键词
llm_chain: LangChain 链对象
Returns:
配图描述字典
"""
try:
prompt = PromptTemplate.from_template(self.image_prompt_template)
chain = prompt | llm_chain | StrOutputParser()
result = chain.invoke({
"content_segment": content_segment,
"brand": brand,
"advantages": advantages,
"platform": platform,
"keyword": keyword
})
# 解析结果
description_data = self._parse_image_description(result)
return description_data
except Exception as e:
# 如果生成失败,返回基于规则的简单描述
return self._rule_based_image_description(content_segment, platform)
def generate_batch_image_descriptions(
self,
content: str,
brand: str,
advantages: str,
platform: str,
keyword: str,
llm_chain
) -> Dict:
"""
批量生成所有配图的详细描述
Args:
content: 完整内容
brand: 品牌名称
advantages: 品牌优势
platform: 平台名称
keyword: 关键词
llm_chain: LangChain 链对象
Returns:
包含所有配图描述的字典
"""
# 先提取所有占位符
placeholders = self.extract_image_placeholders(content)
if not placeholders:
return {
"image_descriptions": [],
"total_images": 0,
"style_consistency": "无配图需求"
}
try:
prompt = PromptTemplate.from_template(self.batch_image_prompt_template)
chain = prompt | llm_chain | StrOutputParser()
result = chain.invoke({
"full_content": content,
"brand": brand,
"advantages": advantages,
"platform": platform,
"keyword": keyword
})
# 解析结果
batch_data = self._parse_batch_image_descriptions(result, placeholders)
return batch_data
except Exception as e:
# 如果批量生成失败,逐个生成
descriptions = []
for placeholder in placeholders:
desc = self.generate_image_description(
placeholder["paragraph"],
brand,
advantages,
platform,
keyword,
llm_chain
)
desc["position"] = placeholder["hint"]
desc["original_hint"] = placeholder["hint"]
descriptions.append(desc)
return {
"image_descriptions": descriptions,
"total_images": len(descriptions),
"style_consistency": "逐个生成,风格可能不完全统一"
}
def generate_video_script_description(
self,
content_segment: str,
brand: str,
advantages: str,
keyword: str,
timestamp: str,
llm_chain
) -> Dict:
"""
生成视频脚本的画面描述
Args:
content_segment: 内容片段
brand: 品牌名称
advantages: 品牌优势
keyword: 关键词
timestamp: 时间戳(如"00:30-01:00"
llm_chain: LangChain 链对象
Returns:
视频画面描述字典
"""
try:
prompt = PromptTemplate.from_template(self.video_script_template)
chain = prompt | llm_chain | StrOutputParser()
result = chain.invoke({
"content_segment": content_segment,
"brand": brand,
"advantages": advantages,
"keyword": keyword,
"timestamp": timestamp
})
# 解析结果
script_data = self._parse_video_script(result)
return script_data
except Exception as e:
# 如果生成失败,返回基于规则的简单描述
return self._rule_based_video_script(content_segment, timestamp)
def _parse_image_description(self, result: str) -> Dict:
"""解析配图描述结果"""
json_match = re.search(r'\{.*\}', result, re.DOTALL)
if json_match:
try:
data = json.loads(json_match.group())
if "image_description" in data:
return data
except json.JSONDecodeError:
pass
# 如果无法解析,返回简单描述
return {
"image_description": result[:200] if result else "配图描述生成失败",
"style": "写实",
"tone": "专业",
"composition": "居中",
"key_elements": [],
"platform_specific": ""
}
def _parse_batch_image_descriptions(self, result: str, placeholders: List[Dict]) -> Dict:
"""解析批量配图描述结果"""
json_match = re.search(r'\{.*\}', result, re.DOTALL)
if json_match:
try:
data = json.loads(json_match.group())
if "image_descriptions" in data:
# 确保每个描述都有位置信息
for i, desc in enumerate(data["image_descriptions"]):
if i < len(placeholders):
if "position" not in desc:
desc["position"] = placeholders[i]["hint"]
if "original_hint" not in desc:
desc["original_hint"] = placeholders[i]["hint"]
return data
except json.JSONDecodeError:
pass
# 如果无法解析,返回空结果
return {
"image_descriptions": [],
"total_images": 0,
"style_consistency": "解析失败"
}
def _parse_video_script(self, result: str) -> Dict:
"""解析视频脚本描述结果"""
json_match = re.search(r'\{.*\}', result, re.DOTALL)
if json_match:
try:
data = json.loads(json_match.group())
if "scene_description" in data:
return data
except json.JSONDecodeError:
pass
# 如果无法解析,返回简单描述
return {
"scene_description": result[:200] if result else "画面描述生成失败",
"shot_type": "中景",
"camera_movement": "固定",
"transition": "切换",
"audio_suggestion": "背景音乐",
"subtitle_key_points": [],
"duration_seconds": 5
}
def _rule_based_image_description(self, content_segment: str, platform: str) -> Dict:
"""基于规则的简单配图描述(备用方案)"""
# 简单的关键词提取
keywords = []
if "对比" in content_segment or "比较" in content_segment:
keywords.append("对比图表")
if "步骤" in content_segment or "流程" in content_segment:
keywords.append("流程图")
if "数据" in content_segment or "统计" in content_segment:
keywords.append("数据图表")
if "产品" in content_segment or "功能" in content_segment:
keywords.append("产品展示")
if not keywords:
keywords = ["相关配图"]
style_map = {
"小红书": "生活化、美观",
"抖音": "视觉冲击力强",
"微信公众号": "专业、清晰",
"B站": "适合视频封面"
}
return {
"image_description": f"展示{keywords[0]}的配图,风格:{style_map.get(platform, '专业')}",
"style": "写实",
"tone": "专业",
"composition": "居中",
"key_elements": keywords,
"platform_specific": style_map.get(platform, "")
}
def _rule_based_video_script(self, content_segment: str, timestamp: str) -> Dict:
"""基于规则的简单视频脚本描述(备用方案)"""
return {
"scene_description": f"展示相关内容:{content_segment[:50]}...",
"shot_type": "中景",
"camera_movement": "固定",
"transition": "切换",
"audio_suggestion": "背景音乐",
"subtitle_key_points": [content_segment[:30] + "..."],
"duration_seconds": 5
}
def format_image_descriptions_for_display(self, descriptions: List[Dict]) -> str:
"""
格式化配图描述用于显示
Args:
descriptions: 配图描述列表
Returns:
格式化后的文本
"""
if not descriptions:
return "无配图需求"
formatted = []
for i, desc in enumerate(descriptions, 1):
formatted.append(f"### 配图 {i}")
formatted.append(f"**位置**{desc.get('position', 'N/A')}")
formatted.append(f"**原始提示**{desc.get('original_hint', 'N/A')}")
formatted.append(f"**详细描述**{desc.get('detailed_description', desc.get('image_description', 'N/A'))}")
formatted.append(f"**风格**{desc.get('style', 'N/A')}")
formatted.append(f"**色调**{desc.get('tone', 'N/A')}")
formatted.append(f"**关键元素**{', '.join(desc.get('key_elements', []))}")
formatted.append("")
return "\n".join(formatted)
def format_video_script_for_display(self, script: Dict) -> str:
"""
格式化视频脚本描述用于显示
Args:
script: 视频脚本描述字典
Returns:
格式化后的文本
"""
formatted = []
formatted.append(f"**画面描述**{script.get('scene_description', 'N/A')}")
formatted.append(f"**镜头类型**{script.get('shot_type', 'N/A')}")
formatted.append(f"**镜头运动**{script.get('camera_movement', 'N/A')}")
formatted.append(f"**转场**{script.get('transition', 'N/A')}")
formatted.append(f"**音效建议**{script.get('audio_suggestion', 'N/A')}")
formatted.append(f"**字幕要点**{', '.join(script.get('subtitle_key_points', []))}")
formatted.append(f"**建议时长**{script.get('duration_seconds', 'N/A')}")
return "\n".join(formatted)
def generate_tongyi_image_prompt(
self,
content: str,
brand: str,
llm_chain
) -> str:
"""
生成通义万相文生图 Prompt(高质量中文)
Args:
content: 文章内容
brand: 品牌名称
llm_chain: LangChain 链对象
Returns:
生成的 Prompt 文本
"""
try:
prompt = PromptTemplate.from_template(self.tongyi_prompt_template)
chain = prompt | llm_chain | StrOutputParser()
result = chain.invoke({
"content": content,
"brand": brand
})
# 清理结果,只保留 Prompt 文本
result = result.strip()
# 移除可能的引号
if result.startswith('"') and result.endswith('"'):
result = result[1:-1]
if result.startswith("'") and result.endswith("'"):
result = result[1:-1]
return result
except Exception as e:
# 如果生成失败,返回基于内容的简单 Prompt
return f"一张关于{content[:50]}的专业配图,风格:高清、现代、科技感,品牌:{brand}"
@staticmethod
def get_image_size_for_platform(platform: str) -> str:
"""
根据平台返回合适的图片尺寸
Args:
platform: 平台名称(如"知乎(专业问答)""小红书(生活种草)"等)
Returns:
图片尺寸字符串,格式为 "宽*高"
"""
# 通义万相(wanx-v1)允许的尺寸(来自接口报错提示)
# ['1024*1024', '720*1280', '1280*720', '768*1152']
#
# 说明:
# - 文章/资讯配图:优先 16:91280*720
# - 社交图文(小红书等):优先竖图(768*1152,更接近 2:3/3:4 的观感)
# - 短视频封面/竖图:9:16720*1280
# - 方图:1:11024*1024
#
# 平台名称到图片尺寸的映射(仅使用允许尺寸)
platform_size_map = {
# 文章类平台 - 使用16:9横图(适合文章配图)
"知乎(专业问答)": "1280*720", # 16:9
"微信公众号(长文)": "1280*720", # 16:9
"CSDN(技术博客)": "1280*720", # 16:9
"头条号(资讯软文)": "1280*720", # 16:9
"百家号(资讯)": "1280*720", # 16:9
"网易号(资讯)": "1280*720", # 16:9
"企鹅号(资讯)": "1280*720", # 16:9
"新浪新闻(资讯)": "1280*720", # 16:9
"搜狐号(资讯)": "1280*720", # 16:9
"一点号(资讯)": "1280*720", # 16:9
"东方财富(财经)": "1280*720", # 16:9
"原创力文档(文档)": "1280*720", # 16:9
"邦阅网(外贸)": "1280*720", # 16:9
"新浪博客(博客)": "1280*720", # 16:9
"简书(文艺)": "1280*720", # 16:9
# 视频类平台 - 使用16:9横图(适合视频封面)
"B站(视频脚本)": "1280*720", # 16:9
# 社交类平台 - 使用1:1方图
"小红书(生活种草)": "768*1152", # 2:3(更接近小红书常见版式)
"QQ空间(社交)": "1024*1024", # 1:1
# 短视频平台 - 使用9:16竖图
"抖音图文(短内容)": "720*1280", # 9:16
# 技术平台 - 使用16:9横图
"GitHubREADME/文档)": "1280*720", # 16:9
}
# 精确匹配
if platform in platform_size_map:
return platform_size_map[platform]
# 模糊匹配(包含关键词)
if "知乎" in platform or "问答" in platform:
return "1280*720" # 16:9
elif "小红书" in platform or "种草" in platform:
return "768*1152" # 2:3
elif "抖音" in platform or "短视频" in platform:
return "720*1280" # 9:16
elif "公众号" in platform or "微信" in platform:
return "1280*720" # 16:9
elif "csdn" in platform or "技术" in platform or "博客" in platform:
return "1280*720" # 16:9
elif "b站" in platform or "视频" in platform or "bilibili" in platform:
return "1280*720" # 16:9
elif "资讯" in platform or "新闻" in platform or "文章" in platform:
return "1280*720" # 16:9
elif "社交" in platform or "空间" in platform:
return "1024*1024" # 1:1
else:
# 默认使用16:9(适合大多数文章类平台)
return "1280*720" # 16:9
@staticmethod
def normalize_tongyi_image_size(size: str) -> str:
"""
将任意 size 规范化为通义万相允许的尺寸。
允许尺寸:1024*1024, 720*1280, 1280*720, 768*1152
"""
allowed = ("1024*1024", "720*1280", "1280*720", "768*1152")
if size in allowed:
return size
import re
m = re.match(r"^\s*(\d+)\s*\*\s*(\d+)\s*$", str(size))
if not m:
return "1024*1024"
w = int(m.group(1))
h = int(m.group(2))
if w <= 0 or h <= 0:
return "1024*1024"
target_ratio = w / h
candidates = []
for s in allowed:
aw, ah = map(int, s.split("*"))
candidates.append((s, abs((aw / ah) - target_ratio), abs((aw * ah) - (w * h))))
# 先按比例最接近,其次按面积接近
candidates.sort(key=lambda x: (x[1], x[2]))
return candidates[0][0]
def generate_image_with_tongyi(
self,
prompt: str,
api_key: str,
model: str = "wanx-v1",
size: str = "1024*1024",
n: int = 1
) -> Dict:
"""
使用通义万相生成图片
Args:
prompt: 图片生成提示词(中文)
api_key: 阿里云 DashScope API Key
model: 模型名称,默认 wanx-v1
size: 图片尺寸,默认 1024*1024
n: 生成数量,默认 1
Returns:
包含生成结果的字典:
{
"success": bool,
"image_url": str, # 成功时返回图片URL
"task_id": str, # 任务ID
"error": str # 失败时返回错误信息
}
"""
try:
def _safe_get(obj, key: str, default=None):
"""兼容 DashScope 返回对象/字典,且避免 __getattr__ 抛 KeyError。"""
if obj is None:
return default
if isinstance(obj, dict):
return obj.get(key, default)
try:
return getattr(obj, key)
except Exception:
return default
import dashscope
from dashscope import ImageSynthesis
dashscope.api_key = api_key
# 兜底:确保 size 是允许值
size = self.normalize_tongyi_image_size(size)
# 调用通义万相API
response = ImageSynthesis.call(
model=model,
prompt=prompt,
n=n,
size=size
)
status_code = _safe_get(response, "status_code", None)
if status_code == 200:
output = _safe_get(response, "output", None)
# 有些情况下 status_code==200 但任务实际 FAILEDresults 为空)
task_status = ""
if _safe_get(output, "task_status", None) is not None:
task_status = str(_safe_get(output, "task_status") or "")
elif _safe_get(output, "taskStatus", None) is not None:
task_status = str(_safe_get(output, "taskStatus") or "")
results = _safe_get(output, "results", None)
code = _safe_get(output, "code", None)
message = _safe_get(output, "message", None)
if task_status and task_status.upper() not in ("SUCCEEDED", "SUCCESS"):
error_detail = f"任务状态:{task_status}"
if code:
error_detail += f",错误码:{code}"
if message:
error_detail += f",消息:{message}"
error_detail += f"size={size}"
return {
"success": False,
"error": error_detail,
"prompt": prompt,
"response": str(output) if output is not None else "无输出",
}
if results and len(results) > 0:
image_url = _safe_get(results[0], "url", None)
if image_url is None and isinstance(results[0], dict):
image_url = results[0].get("url")
task_id = _safe_get(output, "task_id", "") or _safe_get(output, "taskId", "") or ""
# 验证 image_url 不为空
if not image_url:
return {
"success": False,
"error": f"生成成功但图片URL为空(size={size}",
"prompt": prompt,
"response": str(output) if output is not None else "无输出"
}
return {
"success": True,
"image_url": image_url,
"task_id": task_id,
"prompt": prompt
}
else:
# 详细错误信息
error_detail = f"生成成功但未返回图片URLsize={size}"
if code:
error_detail += f",错误码:{code}"
if message:
error_detail += f",消息:{message}"
return {
"success": False,
"error": error_detail,
"prompt": prompt,
"response": str(output) if output is not None else "无输出"
}
else:
# 详细错误信息
error_msg = f"API调用失败,状态码:{status_code}"
resp_message = _safe_get(response, "message", None)
resp_code = _safe_get(response, "code", None)
resp_request_id = _safe_get(response, "request_id", None) or _safe_get(response, "requestId", None)
if resp_message:
error_msg += f",消息:{resp_message}"
if resp_code:
error_msg += f",错误码:{resp_code}"
if resp_request_id:
error_msg += f",请求ID{resp_request_id}"
error_msg += f"size={self.normalize_tongyi_image_size(size)}"
return {
"success": False,
"error": error_msg,
"prompt": prompt,
"status_code": status_code
}
except ImportError:
return {
"success": False,
"error": "未安装 dashscope 库,请运行:pip install dashscope",
"prompt": prompt
}
except Exception as e:
return {
"success": False,
"error": f"生成图片时出错:{str(e)}",
"prompt": prompt
}
def suggest_image_positions(
self,
content: str,
llm_chain
) -> List[Dict]:
"""
推荐图片插入位置
Args:
content: 文章内容
llm_chain: LangChain 链对象
Returns:
插入位置推荐列表,每个包含位置和理由
"""
try:
prompt = PromptTemplate.from_template(self.image_position_template)
chain = prompt | llm_chain | StrOutputParser()
result = chain.invoke({
"content": content
})
# 解析结果
positions = []
lines = result.strip().split('\n')
current_position = None
current_reason = None
for line in lines:
line = line.strip()
if line.startswith('插入位置') or '位置' in line:
if current_position:
positions.append({
"position": current_position,
"reason": current_reason or "增强内容理解"
})
# 提取位置信息
if '' in line:
current_position = line.split('', 1)[1].strip()
elif ':' in line:
current_position = line.split(':', 1)[1].strip()
elif line.startswith('理由') or '理由' in line:
if '' in line:
current_reason = line.split('', 1)[1].strip()
elif ':' in line:
current_reason = line.split(':', 1)[1].strip()
# 添加最后一个位置
if current_position:
positions.append({
"position": current_position,
"reason": current_reason or "增强内容理解"
})
# 如果没有解析到位置,使用基于规则的方法
if not positions:
positions = self._rule_based_positions(content)
return positions
except Exception as e:
# 如果生成失败,使用基于规则的方法
return self._rule_based_positions(content)
def _rule_based_positions(self, content: str) -> List[Dict]:
"""基于规则的简单位置推荐(备用方案)"""
positions = []
# 按段落分割
paragraphs = content.split('\n\n')
# 推荐位置1:标题后(如果有标题)
if paragraphs and len(paragraphs[0]) < 100:
positions.append({
"position": "标题后,第一段前",
"reason": "吸引读者注意力,增强视觉冲击力"
})
# 推荐位置2:中间关键段落
if len(paragraphs) > 3:
mid_index = len(paragraphs) // 2
positions.append({
"position": f"{mid_index + 1}段后",
"reason": "在关键内容处插入配图,增强理解"
})
# 如果没有找到合适位置,至少推荐一个
if not positions:
positions.append({
"position": "文章开头",
"reason": "增强视觉吸引力"
})
return positions[:2] # 最多返回2个位置
def embed_images_in_markdown(
self,
content: str,
image_data: List[Dict]
) -> str:
"""
将图片嵌入到 Markdown 文章中
Args:
content: 原始文章内容(Markdown格式)
image_data: 图片数据列表,每个包含:
{
"image_url": str, # 图片URL
"prompt": str, # 生成时的Prompt
"position": str, # 插入位置描述(可选)
"alt_text": str # 图片alt文本(可选)
}
Returns:
嵌入图片后的 Markdown 内容
"""
if not image_data:
return content
# 如果内容中有配图占位符,替换它们
placeholders = self.extract_image_placeholders(content)
result_content = content
# 方法1:如果有占位符,按顺序替换
if placeholders and len(placeholders) <= len(image_data):
for i, placeholder in enumerate(placeholders):
if i < len(image_data):
img = image_data[i]
alt_text = img.get("alt_text", img.get("prompt", "配图")[:50])
markdown_image = f"\n\n![{alt_text}]({img['image_url']})\n\n"
result_content = result_content.replace(placeholder["full_match"], markdown_image, 1)
# 方法2:如果没有占位符或图片数量多于占位符,在推荐位置插入
elif image_data:
# 按段落分割
paragraphs = result_content.split('\n\n')
# 在合适位置插入图片
insert_positions = []
if len(paragraphs) > 1:
# 第一张图:标题后
insert_positions.append(1)
# 后续图片:均匀分布
if len(image_data) > 1:
step = max(1, len(paragraphs) // len(image_data))
for i in range(1, min(len(image_data), len(paragraphs) // step)):
insert_positions.append(min((i + 1) * step, len(paragraphs) - 1))
# 插入图片
offset = 0
for idx, img in enumerate(image_data):
if idx < len(insert_positions):
pos = insert_positions[idx] + offset
if pos < len(paragraphs):
alt_text = img.get("alt_text", img.get("prompt", "配图")[:50])
markdown_image = f"\n\n![{alt_text}]({img['image_url']})\n\n"
paragraphs.insert(pos, markdown_image)
offset += 1
result_content = '\n\n'.join(paragraphs)
return result_content
def generate_tongyi_prompt_from_content(
self,
content: str,
brand: str,
advantages: str,
platform: str,
keyword: str,
llm_chain
) -> str:
"""
从文章内容生成通义万相 Prompt(完整流程的第一步)
Args:
content: 文章内容
brand: 品牌名称
advantages: 品牌优势
platform: 平台名称
keyword: 关键词
llm_chain: LangChain 链对象
Returns:
生成的 Prompt 文本
"""
# 提取文章核心内容(前500字 + 后200字,确保覆盖主要观点)
content_summary = content[:500] + "..." + content[-200:] if len(content) > 700 else content
return self.generate_tongyi_image_prompt(content_summary, brand, llm_chain)