1019 lines
40 KiB
Python
1019 lines
40 KiB
Python
"""
|
||
标书内容生成模块
|
||
流程:生成大纲 → 解析章节树 → 并发生成内容
|
||
"""
|
||
import re
|
||
import sqlite3
|
||
import logging
|
||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||
from datetime import datetime
|
||
|
||
import config
|
||
from utils import ai_client, prompts as P
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
BID_WRITING_SYSTEM = (
|
||
'你是一位资深的工程投标文件撰写专家,擅长以执行方视角撰写技术方案正文。'
|
||
'撰写时必须遵守以下铁律:'
|
||
|
||
'①【字数】用户规定的最低字数必须满足,但字数须由实质内容支撑,'
|
||
'不得用重复背景、堆砌承诺或复述要求来凑字数;'
|
||
|
||
'②【自称】投标方自称统一用"我方",禁用"我们""我公司";'
|
||
|
||
'③【禁止套话】禁用:综上所述、首先其次再次、我们深信、高度重视、全力以赴、'
|
||
'竭诚服务、不断优化、稳步推进、通过以上措施、我方将严格按照、我方承诺、'
|
||
'确保圆满完成、切实保障;'
|
||
|
||
'④【禁止前导句】严禁:本章节对应……、本小节主要说明……、'
|
||
'以下将从……方面说明、针对招标方要求……、根据招标文件……我方将……——'
|
||
'开头直接写实质内容;'
|
||
|
||
'⑤【禁止复述要求】招标文件给出的技术参数、工程量、服务数量、规范标准等均视为'
|
||
'已知条件,直接体现在方案中,禁止先复读要求再作答;'
|
||
'不用"满足招标方提出的XXX要求""针对招标文件第X条"等句式;'
|
||
|
||
'⑥【禁止重申背景——最常见的废稿场景】'
|
||
'禁止在章节正文中出现项目名称、建设单位、建设地点、工程规模、合同工期等基本信息;'
|
||
'尤其严禁将招标文件中的具体工程量数字(如"X条渠道""X公里""X座建筑物""X台设备"等)'
|
||
'反复引入到各个章节开头作为背景铺垫——'
|
||
'这类数字只能在专门的"项目概况/项目背景"章节出现一次,'
|
||
'质量、安全、进度、技术方案、人员配置等专业章节一律直接展开专业内容;'
|
||
|
||
'⑦【禁止虚构优越参数】严禁为了显示"超越"招标要求而捏造参数或数量:'
|
||
'招标文件要求多少就按多少写,不得无依据地写成"优于要求""高于标准";'
|
||
'如需体现竞争力,只能在工艺方法、管理措施、响应速度等可具体描述的维度展开,'
|
||
'不得在规格数量上自行拔高;'
|
||
|
||
'⑧【实质可检验】每项措施须给出具体做法、操作步骤、管理节点或时间节点;'
|
||
'凡写数量、型号、吨位、强度、时限等量化内容,须能在招标文件或工程量清单摘要中找到依据,'
|
||
'无依据处不写具体数字与型号,改用"按设计要求""与工况及进度相匹配""符合相应规范等级"等完整中文概括表述,'
|
||
'不做空洞承诺;'
|
||
|
||
'⑨【行文格式】纯文本,段落间空行分隔,列举用(1)(2)(3)编号,'
|
||
'不用markdown符号,不用连接词串联,不用"等"作结尾。'
|
||
|
||
'⑩【禁止占位符】方案叙述中严禁半角或全角方括号形式的未完稿待填(如[型号][数量][数值][X][Y]等),'
|
||
'亦不得用「待填」「TBD」留白;语义须用通顺的陈述句一次写清。'
|
||
'若另有图示/表格专用输出规范要求使用约定标记,仅在该规范限定的标记内可使用方括号。'
|
||
)
|
||
|
||
# 篇幅档位:key → (基础小节字数, 核心章节字数, 标签, 期望max_tokens)
|
||
VOLUME_PRESETS = {
|
||
'concise': (1200, 2500, '精简版', 5000),
|
||
'standard': (2000, 4000, '标准版', 8000),
|
||
'detailed': (3000, 5500, '详细版', 12000),
|
||
'full': (4000, 7000, '充实版', 16000),
|
||
}
|
||
|
||
# 各模型提供商的 max_tokens 硬上限
|
||
_PROVIDER_TOKEN_LIMITS = {
|
||
'deepseek': 8192,
|
||
'qwen': 8192,
|
||
'openai': 16384,
|
||
}
|
||
|
||
|
||
def _get_word_count_spec(volume: str) -> str:
|
||
"""根据篇幅档位返回嵌入提示词的字数要求段落"""
|
||
base, core, _, _ = VOLUME_PRESETS.get(volume, VOLUME_PRESETS['standard'])
|
||
return (
|
||
f'- 字数硬性要求(必须达到,不达标将被退回重写):\n'
|
||
f' · 一般小节:不少于 {base} 字\n'
|
||
f' · 核心技术/重点评分章节:不少于 {core} 字\n'
|
||
f'- 内容必须充分展开,每个要点均需具体阐述,不得一笔带过\n'
|
||
f'- 宁多勿少,写满写透,篇幅不足是最严重的质量问题'
|
||
)
|
||
|
||
|
||
def _get_max_tokens(volume: str) -> int:
|
||
"""根据篇幅档位返回 AI 调用的 max_tokens,自动适配提供商上限"""
|
||
_, _, _, tokens = VOLUME_PRESETS.get(volume, VOLUME_PRESETS['standard'])
|
||
provider = getattr(config, 'MODEL_PROVIDER', 'openai')
|
||
limit = _PROVIDER_TOKEN_LIMITS.get(provider, 8192)
|
||
return min(tokens, limit)
|
||
|
||
|
||
def _get_min_chars(volume: str) -> int:
|
||
"""触发续写的最低字数阈值(基础小节字数的 65%,略低于目标以多轮补足)"""
|
||
base, _, _, _ = VOLUME_PRESETS.get(volume, VOLUME_PRESETS['standard'])
|
||
return int(base * 0.65)
|
||
|
||
|
||
# 中文数字映射
|
||
CN_NUM_MAP = {
|
||
'一': 1, '二': 2, '三': 3, '四': 4, '五': 5,
|
||
'六': 6, '七': 7, '八': 8, '九': 9, '十': 10,
|
||
'十一': 11, '十二': 12, '十三': 13, '十四': 14, '十五': 15,
|
||
}
|
||
|
||
|
||
# ─── 大纲生成 ─────────────────────────────────────────────────────────────
|
||
|
||
def generate_outline(db_path: str, project_id: int) -> None:
|
||
"""后台:生成标书大纲并存入 bid_sections"""
|
||
conn = sqlite3.connect(db_path)
|
||
try:
|
||
_set_project_status(conn, project_id, 'outline_generating')
|
||
|
||
td = _get_tender_data(conn, project_id)
|
||
if not td:
|
||
raise ValueError('尚未解析招标文件,请先解析')
|
||
|
||
summary = td['summary'] or ''
|
||
rating = td['rating_requirements'] or ''
|
||
|
||
if rating:
|
||
prompt = P.get_outlines_with_rating_prompt(summary, rating)
|
||
else:
|
||
prompt = P.get_outlines_prompt(summary or td['raw_text'] or '')
|
||
|
||
outline_text = ai_client.chat(prompt, temperature=0.5, max_tokens=4096)
|
||
|
||
# 解析章节并自动重排序号,保存规范化后的大纲文本
|
||
bid_title, sections, normalized_text = _parse_outline(outline_text)
|
||
_save_outline_text(conn, project_id, normalized_text)
|
||
_save_sections(conn, project_id, sections)
|
||
|
||
_set_project_status(conn, project_id, 'outline_done')
|
||
logger.info(f'项目 {project_id} 大纲生成完成,共 {len(sections)} 节')
|
||
|
||
except Exception as e:
|
||
logger.exception(f'大纲生成失败 project_id={project_id}')
|
||
_set_project_status(conn, project_id, 'outline_error', str(e))
|
||
finally:
|
||
conn.close()
|
||
|
||
|
||
# ─── 章节内容生成 ──────────────────────────────────────────────────────────
|
||
|
||
def generate_section(db_path: str, project_id: int, section_id: int,
|
||
anon_requirements: str = '',
|
||
enable_figure: bool = False,
|
||
enable_table: bool = False) -> None:
|
||
"""后台:为指定 section 生成正文内容(单个章节入口,自行读取上下文)"""
|
||
conn = sqlite3.connect(db_path)
|
||
try:
|
||
section = _get_section(conn, section_id)
|
||
if not section:
|
||
raise ValueError(f'Section {section_id} 不存在')
|
||
|
||
td = _get_tender_data(conn, project_id)
|
||
outline_text = _get_outline_text(conn, project_id)
|
||
if not outline_text.strip():
|
||
raise ValueError('当前项目尚无可用大纲,请先保存或生成大纲')
|
||
summary = (td or {}).get('summary', '')
|
||
boq_summary = (td or {}).get('boq_summary', '')
|
||
conn.close()
|
||
conn = None
|
||
|
||
tender_kind = (td or {}).get('tender_kind', 'engineering') or 'engineering'
|
||
outline_head = outline_text.strip().splitlines()[0][:50] if outline_text.strip() else ''
|
||
logger.info(
|
||
f'章节生成读取大纲 project_id={project_id}, section_id={section_id}, '
|
||
f'outline_len={len(outline_text)}, outline_head="{outline_head}"'
|
||
)
|
||
_generate_one(db_path, section, summary, outline_text,
|
||
anon_requirements, enable_figure, enable_table,
|
||
boq_summary, tender_kind)
|
||
|
||
except Exception as e:
|
||
logger.exception(f'章节生成失败 section_id={section_id}')
|
||
_update_section_status_safe(db_path, section_id, 'error', str(e))
|
||
finally:
|
||
if conn:
|
||
conn.close()
|
||
|
||
|
||
MAX_CONTINUE_ROUNDS = 5
|
||
# 单次续写目标字数上限:与 DeepSeek/Qwen 8192 max_tokens 下的实际中文产出量匹配,略保守更易写满
|
||
_CONTINUE_CHUNK_CAP = 2800
|
||
_CONTINUE_TAIL_CHARS = 2200
|
||
|
||
|
||
def _auto_continue(content: str, min_chars: int, max_tok: int, title: str,
|
||
system: str = BID_WRITING_SYSTEM) -> str:
|
||
"""
|
||
自动续写:当首次生成的内容字数不足时,发起独立的续写调用。
|
||
不传入完整的原始 prompt(太长会挤占输出空间),而是只提供
|
||
已有内容的末尾部分作为上下文,让 AI 集中精力续写。
|
||
"""
|
||
for round_i in range(MAX_CONTINUE_ROUNDS):
|
||
if len(content) >= min_chars:
|
||
break
|
||
|
||
remaining = min_chars - len(content)
|
||
if remaining <= 200:
|
||
break
|
||
|
||
# 本轮只要求「差额」的一部分,多轮叠加更易达到总目标
|
||
chunk_goal = min(remaining, _CONTINUE_CHUNK_CAP)
|
||
|
||
tail = (
|
||
content[-_CONTINUE_TAIL_CHARS:]
|
||
if len(content) > _CONTINUE_TAIL_CHARS
|
||
else content
|
||
)
|
||
|
||
cont_prompt = (
|
||
f'以下是投标文件「{title}」小节已撰写的部分内容(末尾段落):\n\n'
|
||
f'{tail}\n\n'
|
||
f'━━━━━━━━━━━━━━━━━━━━━━━━━\n'
|
||
f'当前累计 {len(content)} 字,本节最低要求 {min_chars} 字,'
|
||
f'全文总差额约 {remaining} 字。\n'
|
||
f'请紧接上文末尾继续撰写,要求:\n'
|
||
f'(1) 不重复、不复述上文已有段落,自然衔接续写\n'
|
||
f'(2) 深入展开实施细节、技术参数、岗位、设备、流程与验收要点\n'
|
||
f'(3) 保持"我方"口吻,禁止AI套话与前导说明句\n'
|
||
f'(4) 直接输出续写正文,不写"续写如下"等引导语\n'
|
||
f'(5) 本轮续写不少于 {chunk_goal} 字,尽量写满\n'
|
||
)
|
||
|
||
logger.info(
|
||
f'[续写] "{title}" 第{round_i+1}轮 '
|
||
f'({len(content)}/{min_chars}字, 差{remaining}字, 本轮目标≥{chunk_goal}字)'
|
||
)
|
||
|
||
try:
|
||
extra = ai_client.chat(
|
||
cont_prompt,
|
||
system=system,
|
||
temperature=0.7,
|
||
max_tokens=max_tok,
|
||
)
|
||
except Exception as e:
|
||
logger.warning(f'[续写] "{title}" 第{round_i+1}轮失败: {e}')
|
||
break
|
||
|
||
if not extra or len(extra.strip()) < 80:
|
||
logger.info(f'[续写] "{title}" 第{round_i+1}轮返回内容过短,终止')
|
||
break
|
||
|
||
content = content.rstrip() + '\n\n' + extra.strip()
|
||
logger.info(
|
||
f'[续写] "{title}" 第{round_i+1}轮完成,'
|
||
f'+{len(extra.strip())}字,累计{len(content)}字'
|
||
)
|
||
|
||
logger.info(f'"{title}" 最终字数:{len(content)}')
|
||
return content
|
||
|
||
|
||
def _build_writing_system(anon_requirements: str = '') -> str:
|
||
"""根据暗标要求动态构建 system prompt"""
|
||
anon = anon_requirements.strip()
|
||
if not anon:
|
||
return BID_WRITING_SYSTEM
|
||
return (
|
||
BID_WRITING_SYSTEM
|
||
+ '\n\n【暗标合规要求(最高优先级,每个章节均须严格遵守)】\n'
|
||
+ anon
|
||
)
|
||
|
||
|
||
def _get_knowledge_context(title: str) -> str:
|
||
"""从企业知识库检索与章节标题相关的参考内容,供 AI 写作参考。
|
||
若知识库未安装或为空,静默返回空字符串。"""
|
||
try:
|
||
from modules.knowledge import search
|
||
chunks = search(title, top_k=config.TOP_K_KNOWLEDGE)
|
||
if not chunks:
|
||
return ''
|
||
parts = []
|
||
for i, chunk in enumerate(chunks, 1):
|
||
parts.append(f'[参考片段{i}]\n{chunk[:600]}')
|
||
return (
|
||
'\n\n【企业知识库参考内容(以下摘自历史投标文件,仅供参考,'
|
||
'须结合本项目实际情况重新撰写,禁止直接照抄)】\n'
|
||
+ '\n\n'.join(parts)
|
||
)
|
||
except Exception:
|
||
return ''
|
||
|
||
|
||
def _build_diagram_addon(enable_figure: bool, enable_table: bool) -> str:
|
||
"""构建图/表模式的提示词附加段"""
|
||
addon = ''
|
||
if enable_figure:
|
||
addon += P.get_figure_addon()
|
||
if enable_table:
|
||
addon += P.get_table_addon()
|
||
return addon
|
||
|
||
|
||
def _strip_line_serial_numbers(text: str) -> str:
|
||
"""
|
||
去除正文行首的纯序号(如 1. / 2、 / 370) / 12 ),保留正文语义。
|
||
"""
|
||
if not text:
|
||
return text
|
||
cleaned_lines = []
|
||
for line in text.splitlines():
|
||
cleaned = re.sub(r'^\s*\d{1,4}(?:[\..、)\s]+)\s*', '', line)
|
||
cleaned_lines.append(cleaned)
|
||
return '\n'.join(cleaned_lines)
|
||
|
||
|
||
def _generate_one(db_path: str, section: dict, summary: str, outline_text: str,
|
||
anon_requirements: str = '',
|
||
enable_figure: bool = False,
|
||
enable_table: bool = False,
|
||
boq_summary: str = '',
|
||
tender_kind: str = 'engineering') -> None:
|
||
"""
|
||
核心生成函数:纯 AI 调用 + 结果写库。
|
||
不长期持有 DB 连接,适合在线程池中并发调用。
|
||
"""
|
||
section_id = section['id']
|
||
is_leaf = bool(section['is_leaf'])
|
||
title = section['section_title']
|
||
|
||
writing_system = _build_writing_system(anon_requirements)
|
||
diagram_addon = _build_diagram_addon(enable_figure, enable_table)
|
||
|
||
_update_section_status_safe(db_path, section_id, 'generating')
|
||
|
||
try:
|
||
if is_leaf:
|
||
volume = getattr(config, 'CONTENT_VOLUME', 'standard')
|
||
wc_spec = _get_word_count_spec(volume)
|
||
max_tok = _get_max_tokens(volume)
|
||
min_chars = _get_min_chars(volume)
|
||
|
||
prompt = P.get_section_detail_prompt(
|
||
summary, outline_text, title,
|
||
word_count_spec=wc_spec,
|
||
boq_summary=boq_summary,
|
||
tender_kind=tender_kind or 'engineering',
|
||
)
|
||
# 知识库检索:将历史标书相关片段作为写作参考注入提示词
|
||
knowledge_ctx = _get_knowledge_context(title)
|
||
if knowledge_ctx:
|
||
prompt = prompt + knowledge_ctx
|
||
|
||
if diagram_addon:
|
||
prompt = prompt + diagram_addon
|
||
|
||
content = ai_client.chat(
|
||
prompt,
|
||
system=writing_system,
|
||
temperature=0.7,
|
||
max_tokens=max_tok,
|
||
)
|
||
|
||
content = _auto_continue(content, min_chars, max_tok, title,
|
||
system=writing_system)
|
||
content = _strip_line_serial_numbers(content)
|
||
_update_section_content_safe(db_path, section_id, content, '')
|
||
else:
|
||
prompt = P.get_section_intro_prompt(summary, outline_text, title)
|
||
if prompt:
|
||
intro = ai_client.chat(
|
||
prompt,
|
||
system=writing_system,
|
||
temperature=0.4,
|
||
max_tokens=1024,
|
||
)
|
||
else:
|
||
intro = ''
|
||
intro = _strip_line_serial_numbers(intro)
|
||
_update_section_content_safe(db_path, section_id, '', intro)
|
||
|
||
_update_section_status_safe(db_path, section_id, 'done')
|
||
logger.info(f'Section {section_id} "{title}" 生成完成')
|
||
|
||
except Exception as e:
|
||
logger.exception(f'章节生成失败 section_id={section_id}')
|
||
_update_section_status_safe(db_path, section_id, 'error', str(e))
|
||
|
||
|
||
def generate_all_sections(db_path: str, project_id: int,
|
||
anon_requirements: str = '',
|
||
enable_figure: bool = False,
|
||
enable_table: bool = False) -> None:
|
||
"""
|
||
后台:并发生成所有章节。
|
||
策略:先生成非叶节点(章节引言),再并发生成所有叶节点(正文)。
|
||
并发数由 config.MAX_CONCURRENT_SECTIONS 控制,避免超出 API 限流。
|
||
"""
|
||
try:
|
||
conn = sqlite3.connect(db_path)
|
||
cur = conn.cursor()
|
||
|
||
# 读取尚未生成的章节(跳过已完成的)
|
||
cur.execute('''
|
||
SELECT id, section_number, section_title, level, is_leaf, content, intro_content, status
|
||
FROM bid_sections WHERE project_id=? ORDER BY order_index
|
||
''', (project_id,))
|
||
rows = cur.fetchall()
|
||
|
||
td = _get_tender_data(conn, project_id)
|
||
outline_text = _get_outline_text(conn, project_id)
|
||
if not outline_text.strip():
|
||
conn.close()
|
||
raise ValueError('当前项目尚无可用大纲,请先保存或生成大纲')
|
||
summary = (td or {}).get('summary', '')
|
||
boq_summary = (td or {}).get('boq_summary', '')
|
||
tender_kind = (td or {}).get('tender_kind', 'engineering') or 'engineering'
|
||
outline_head = outline_text.strip().splitlines()[0][:50] if outline_text.strip() else ''
|
||
logger.info(
|
||
f'全量生成读取大纲 project_id={project_id}, outline_len={len(outline_text)}, outline_head="{outline_head}"'
|
||
)
|
||
conn.close()
|
||
|
||
all_sections = [
|
||
{'id': r[0], 'section_number': r[1], 'section_title': r[2],
|
||
'level': r[3], 'is_leaf': r[4], 'content': r[5], 'intro_content': r[6], 'status': r[7]}
|
||
for r in rows
|
||
]
|
||
|
||
# 只处理未完成的章节(pending / error 的重新生成)
|
||
sections = [s for s in all_sections if s.get('status') != 'done']
|
||
|
||
if not sections:
|
||
logger.info(f'项目 {project_id} 所有章节已生成完成,无需重新生成')
|
||
return
|
||
|
||
# 分组:非叶节点(章节引言,通常较短)+ 叶节点(正文内容,耗时较长)
|
||
non_leaf = [s for s in sections if not s['is_leaf']]
|
||
leaf = [s for s in sections if s['is_leaf']]
|
||
|
||
workers = max(1, config.MAX_CONCURRENT_SECTIONS)
|
||
logger.info(
|
||
f'项目 {project_id} 开始并发生成: '
|
||
f'{len(non_leaf)} 个章节引言 + {len(leaf)} 个叶节点, '
|
||
f'并发数={workers}'
|
||
)
|
||
|
||
# 第一阶段:并发生成非叶节点引言(通常很快)
|
||
if non_leaf:
|
||
_concurrent_generate(db_path, non_leaf, summary, outline_text, workers,
|
||
anon_requirements, enable_figure, enable_table,
|
||
boq_summary, tender_kind)
|
||
|
||
# 第二阶段:并发生成叶节点正文(主要耗时部分)
|
||
if leaf:
|
||
_concurrent_generate(db_path, leaf, summary, outline_text, workers,
|
||
anon_requirements, enable_figure, enable_table,
|
||
boq_summary, tender_kind)
|
||
|
||
# 统计结果
|
||
conn = sqlite3.connect(db_path)
|
||
cur = conn.cursor()
|
||
cur.execute('''
|
||
SELECT
|
||
COUNT(*) as total,
|
||
SUM(CASE WHEN status='done' THEN 1 ELSE 0 END) as done,
|
||
SUM(CASE WHEN status='error' THEN 1 ELSE 0 END) as errors
|
||
FROM bid_sections WHERE project_id=?
|
||
''', (project_id,))
|
||
total, done, errors = cur.fetchone()
|
||
conn.close()
|
||
logger.info(f'项目 {project_id} 全量生成完成: {done}/{total} 成功, {errors} 失败')
|
||
|
||
except Exception as e:
|
||
logger.exception(f'全量生成失败 project_id={project_id}')
|
||
|
||
|
||
def _concurrent_generate(db_path: str, sections: list, summary: str,
|
||
outline_text: str, workers: int,
|
||
anon_requirements: str = '',
|
||
enable_figure: bool = False,
|
||
enable_table: bool = False,
|
||
boq_summary: str = '',
|
||
tender_kind: str = 'engineering') -> None:
|
||
"""用线程池并发生成一批章节"""
|
||
with ThreadPoolExecutor(max_workers=workers, thread_name_prefix='gen') as pool:
|
||
futures = {}
|
||
for s in sections:
|
||
f = pool.submit(_generate_one, db_path, s, summary, outline_text,
|
||
anon_requirements, enable_figure, enable_table,
|
||
boq_summary, tender_kind)
|
||
futures[f] = s
|
||
|
||
for f in as_completed(futures):
|
||
s = futures[f]
|
||
try:
|
||
f.result()
|
||
except Exception as e:
|
||
logger.error(f'章节 {s["id"]} "{s["section_title"]}" 异常: {e}')
|
||
|
||
|
||
# ─── 大纲解析 ─────────────────────────────────────────────────────────────
|
||
|
||
_CN_NUMS_LIST = [
|
||
'', '一', '二', '三', '四', '五', '六', '七', '八', '九', '十',
|
||
'十一', '十二', '十三', '十四', '十五', '十六', '十七', '十八', '十九', '二十',
|
||
]
|
||
|
||
|
||
def _renumber_sections(sections: list) -> list:
|
||
"""
|
||
对章节列表按层级顺序重新编号,确保删除/增减章节后序号连续。
|
||
level 1 → 整数字符串 "1","2",...
|
||
level 2 → "1.1","1.2",...
|
||
level 3 → "1.1.1","1.1.2",...
|
||
level 4 → "1.1.1.1",...
|
||
直接修改传入列表中各节点的 number 字段,并返回该列表。
|
||
"""
|
||
counters = [0] * 5 # 索引 0-3 对应 level 1-4
|
||
for s in sections:
|
||
level = s['level']
|
||
idx = level - 1
|
||
counters[idx] += 1
|
||
for j in range(idx + 1, len(counters)):
|
||
counters[j] = 0
|
||
if level == 1:
|
||
s['number'] = str(counters[0])
|
||
else:
|
||
s['number'] = '.'.join(str(counters[i]) for i in range(level))
|
||
return sections
|
||
|
||
|
||
def _sections_to_outline_text(bid_title: str, sections: list) -> str:
|
||
"""将章节列表还原为大纲文本(不输出前置序号)。"""
|
||
lines = []
|
||
if bid_title:
|
||
lines.append(bid_title)
|
||
for s in sections:
|
||
level = s['level']
|
||
title = s['title']
|
||
indent = '\u3000' * (level - 1) # 全角空格缩进,保持可读性
|
||
lines.append(f'{indent}{title}')
|
||
return '\n'.join(lines)
|
||
|
||
|
||
def _parse_outline(text: str):
|
||
"""
|
||
将大纲文本解析为章节列表,并自动重排序号(修复删除章节后序号不连续的问题)。
|
||
返回 (bid_title, sections_list, normalized_text)
|
||
每个 section: {number, title, level, is_leaf, order_index}
|
||
"""
|
||
lines = text.strip().split('\n')
|
||
bid_title = ''
|
||
sections = []
|
||
order = 0
|
||
|
||
# 第一行非章节行作为标题
|
||
for i, line in enumerate(lines):
|
||
stripped = line.strip()
|
||
if not stripped:
|
||
continue
|
||
is_chapter_line = (
|
||
bool(re.match(r'^[一二三四五六七八九十百第]', stripped))
|
||
or bool(re.match(r'^\d+(?:[..、]\s*|\s+)?\S+', stripped))
|
||
)
|
||
if not is_chapter_line:
|
||
bid_title = stripped
|
||
lines = lines[i + 1:]
|
||
break
|
||
break
|
||
|
||
chapter_counter = 0
|
||
|
||
for line in lines:
|
||
raw_line = line.rstrip('\n')
|
||
stripped = raw_line.strip()
|
||
if not stripped:
|
||
continue
|
||
|
||
# 一级:中文数字 + 顿号/句号
|
||
m1 = re.match(r'^([一二三四五六七八九十百]+)[、。.]\s*(.*)', stripped)
|
||
if m1:
|
||
cn = m1.group(1)
|
||
title = m1.group(2).strip()
|
||
chapter_counter = CN_NUM_MAP.get(cn, chapter_counter + 1)
|
||
sections.append({
|
||
'number': str(chapter_counter),
|
||
'title': title,
|
||
'level': 1,
|
||
'is_leaf': True,
|
||
'order_index': order,
|
||
})
|
||
order += 1
|
||
continue
|
||
|
||
# 一级:阿拉伯数字 + 可选分隔(支持 "1 标题"、"1.标题"、"1标题")
|
||
m1_en = re.match(r'^(\d+)(?:[、。..]\s*|\s+)?(.*)', stripped)
|
||
if m1_en:
|
||
chapter_no = int(m1_en.group(1))
|
||
title = (m1_en.group(2) or '').strip()
|
||
title = re.sub(r'^[、。..\s]+', '', title)
|
||
if title:
|
||
chapter_counter = chapter_no
|
||
sections.append({
|
||
'number': str(chapter_counter),
|
||
'title': title,
|
||
'level': 1,
|
||
'is_leaf': True,
|
||
'order_index': order,
|
||
})
|
||
order += 1
|
||
continue
|
||
|
||
# 二/三/四级:X.X[.X[.X]] + 空格/制表符 + 标题
|
||
m_num = re.match(r'^(\d+(?:\.\d+)+)\s+(.*)', stripped)
|
||
if m_num:
|
||
num_str = m_num.group(1)
|
||
title = m_num.group(2).strip()
|
||
level = num_str.count('.') + 1
|
||
sections.append({
|
||
'number': num_str,
|
||
'title': title,
|
||
'level': min(level, 4),
|
||
'is_leaf': True,
|
||
'order_index': order,
|
||
})
|
||
order += 1
|
||
continue
|
||
|
||
# 兜底:无编号行按缩进推断层级(支持“纯标题大纲”)
|
||
indent_full = len(re.match(r'^[\u3000 ]*', raw_line).group(0))
|
||
# 约定:每 1 个全角空格/2 个半角空格视作 1 级缩进
|
||
level = min(max(1, (indent_full // 2) + 1), 4)
|
||
if level == 1:
|
||
chapter_counter += 1
|
||
number = str(chapter_counter)
|
||
else:
|
||
number = '1.' * (level - 1) + '1'
|
||
sections.append({
|
||
'number': number.strip('.'),
|
||
'title': stripped,
|
||
'level': level,
|
||
'is_leaf': True,
|
||
'order_index': order,
|
||
})
|
||
order += 1
|
||
|
||
# 重排序号(核心修复:删除章节后确保编号连续)
|
||
_renumber_sections(sections)
|
||
|
||
# 标记非叶节点(在重排后执行,确保前缀匹配正确)
|
||
nums = [s['number'] for s in sections]
|
||
for s in sections:
|
||
prefix = s['number'] + '.'
|
||
if any(n.startswith(prefix) for n in nums):
|
||
s['is_leaf'] = False
|
||
|
||
# 重建规范大纲文本(供回写数据库)
|
||
normalized_text = _sections_to_outline_text(bid_title, sections)
|
||
|
||
return bid_title, sections, normalized_text
|
||
|
||
|
||
# ─── 数据库工具 ───────────────────────────────────────────────────────────
|
||
|
||
def _get_tender_data(conn, project_id):
|
||
cur = conn.cursor()
|
||
cur.execute(
|
||
"SELECT summary, rating_requirements, rating_json, raw_text, boq_summary, tender_kind "
|
||
"FROM tender_data WHERE project_id=?",
|
||
(project_id,)
|
||
)
|
||
row = cur.fetchone()
|
||
if row:
|
||
return {
|
||
'summary': row[0],
|
||
'rating_requirements': row[1],
|
||
'rating_json': row[2],
|
||
'raw_text': row[3],
|
||
'boq_summary': row[4] or '',
|
||
'tender_kind': row[5] or 'engineering',
|
||
}
|
||
return None
|
||
|
||
|
||
def _get_outline_text(conn, project_id):
|
||
cur = conn.cursor()
|
||
cur.execute("SELECT outline FROM tender_data WHERE project_id=?", (project_id,))
|
||
row = cur.fetchone()
|
||
return row[0] if row and row[0] else ''
|
||
|
||
|
||
def _save_outline_text(conn, project_id, outline_text):
|
||
cur = conn.cursor()
|
||
# 兜底:若 tender_data 尚未初始化,先补齐空记录,避免 UPDATE 0 行导致“假保存成功”
|
||
cur.execute(
|
||
"INSERT OR IGNORE INTO tender_data (project_id, status) VALUES (?, 'pending')",
|
||
(project_id,),
|
||
)
|
||
cur.execute(
|
||
"UPDATE tender_data SET outline=?, updated_at=? WHERE project_id=?",
|
||
(outline_text, datetime.now(), project_id),
|
||
)
|
||
conn.commit()
|
||
|
||
|
||
def _save_sections(conn, project_id, sections):
|
||
cur = conn.cursor()
|
||
# 清除旧章节
|
||
cur.execute("DELETE FROM bid_sections WHERE project_id=?", (project_id,))
|
||
for s in sections:
|
||
cur.execute('''
|
||
INSERT INTO bid_sections
|
||
(project_id, section_number, section_title, level, is_leaf, order_index, status)
|
||
VALUES (?, ?, ?, ?, ?, ?, 'pending')
|
||
''', (project_id, s['number'], s['title'], s['level'], 1 if s['is_leaf'] else 0, s['order_index']))
|
||
conn.commit()
|
||
|
||
|
||
def _get_section(conn, section_id):
|
||
cur = conn.cursor()
|
||
cur.execute(
|
||
"SELECT id, section_number, section_title, level, is_leaf, content, intro_content FROM bid_sections WHERE id=?",
|
||
(section_id,)
|
||
)
|
||
row = cur.fetchone()
|
||
if row:
|
||
return {
|
||
'id': row[0], 'section_number': row[1], 'section_title': row[2],
|
||
'level': row[3], 'is_leaf': row[4], 'content': row[5], 'intro_content': row[6]
|
||
}
|
||
return None
|
||
|
||
|
||
def _update_section_status(conn, section_id, status, error=''):
|
||
cur = conn.cursor()
|
||
cur.execute(
|
||
"UPDATE bid_sections SET status=?, error_message=?, updated_at=? WHERE id=?",
|
||
(status, error, datetime.now(), section_id)
|
||
)
|
||
conn.commit()
|
||
|
||
|
||
def _update_section_content(conn, section_id, content, intro_content):
|
||
cur = conn.cursor()
|
||
cur.execute(
|
||
"UPDATE bid_sections SET content=?, intro_content=?, updated_at=? WHERE id=?",
|
||
(content, intro_content, datetime.now(), section_id)
|
||
)
|
||
conn.commit()
|
||
|
||
|
||
# ─── 线程安全的数据库操作(每次独立开关连接,启用 WAL)──────────────────
|
||
|
||
def _db_connect(db_path: str) -> sqlite3.Connection:
|
||
"""创建启用 WAL 模式的连接,适合多线程并发写入"""
|
||
conn = sqlite3.connect(db_path, timeout=30, check_same_thread=False)
|
||
conn.execute('PRAGMA journal_mode=WAL')
|
||
return conn
|
||
|
||
|
||
def _update_section_status_safe(db_path, section_id, status, error=''):
|
||
conn = _db_connect(db_path)
|
||
try:
|
||
_update_section_status(conn, section_id, status, error)
|
||
finally:
|
||
conn.close()
|
||
|
||
|
||
def _update_section_content_safe(db_path, section_id, content, intro_content):
|
||
conn = _db_connect(db_path)
|
||
try:
|
||
_update_section_content(conn, section_id, content, intro_content)
|
||
finally:
|
||
conn.close()
|
||
|
||
|
||
def _set_project_status(conn, project_id, status, error=''):
|
||
cur = conn.cursor()
|
||
cur.execute(
|
||
"UPDATE projects SET outline_status=?, outline_error=?, updated_at=? WHERE id=?",
|
||
(status, error, datetime.now(), project_id)
|
||
)
|
||
conn.commit()
|
||
|
||
|
||
# ─── AI自动填充小章节 ───────────────────────────────────────────────────────
|
||
|
||
def expand_outline(outline_text: str, summary: str = '', rating_requirements: str = '',
|
||
project_id: int = 0) -> str:
|
||
"""
|
||
根据用户输入的主章节标题,自动填充子章节。
|
||
"""
|
||
lines = outline_text.strip().split('\n')
|
||
bid_title = ''
|
||
main_chapters = []
|
||
|
||
# 提取标书标题(第一行非章节行且较长时视为标题)
|
||
for i, line in enumerate(lines):
|
||
stripped = line.strip()
|
||
if not stripped:
|
||
continue
|
||
is_chapter_format = re.match(r'^[一二三四五六七八九十百第]', stripped) or re.match(r'^\d+[..、\s]', stripped)
|
||
if not is_chapter_format and len(stripped) > 50:
|
||
bid_title = stripped
|
||
lines = lines[i + 1:]
|
||
break
|
||
break
|
||
|
||
# 提取一级章节
|
||
for line in lines:
|
||
stripped = line.strip()
|
||
if not stripped:
|
||
continue
|
||
|
||
# 先排除二级及以上章节
|
||
if re.match(r'^\d+(?:\.\d+)+', stripped):
|
||
continue
|
||
|
||
m1_cn = re.match(r'^([一二三四五六七八九十百]+)[、。..\s]+\s*(.*)', stripped)
|
||
if not m1_cn:
|
||
m1_cn = re.match(r'^第([一二三四五六七八九十百]+)[章节]\s*(.*)', stripped)
|
||
if not m1_cn:
|
||
m1_cn = re.match(r'^([一二三四五六七八九十百]+)(?![一二三四五六七八九十百])\s+(.*)', stripped)
|
||
|
||
m1_en = re.match(r'^(\d+)[、。..\s]+\s*(.*)', stripped)
|
||
if not m1_en:
|
||
m1_en = re.match(r'^第(\d+)[章节]\s*(.*)', stripped)
|
||
if not m1_en:
|
||
m1_en = re.match(r'^(\d+)(?!\d)\s+(.*)', stripped)
|
||
if not m1_en:
|
||
m1_en = re.match(r'^(\d+)([^\d].*)', stripped)
|
||
|
||
if m1_cn or m1_en:
|
||
title = (m1_cn.group(2) if m1_cn else m1_en.group(2)).strip()
|
||
title = re.sub(r'^[、。..\s]+', '', title)
|
||
if title:
|
||
main_chapters.append({'title': title})
|
||
else:
|
||
# 没有编号的短文本行,也允许作为主章节
|
||
if 0 < len(stripped) < 50:
|
||
main_chapters.append({'title': stripped})
|
||
|
||
if not main_chapters:
|
||
logger.warning(f'expand_outline未找到主章节,输入大纲:{outline_text[:200]}')
|
||
return outline_text
|
||
|
||
expanded_lines = []
|
||
if bid_title:
|
||
expanded_lines.append(bid_title)
|
||
|
||
# 并发生成主章节的小章节
|
||
with ThreadPoolExecutor(max_workers=min(len(main_chapters), 10)) as executor:
|
||
future_to_chapter = {
|
||
executor.submit(
|
||
_generate_sub_chapters, chapter['title'], summary, rating_requirements, idx + 1, project_id
|
||
): (idx, chapter['title'])
|
||
for idx, chapter in enumerate(main_chapters)
|
||
}
|
||
results = [None] * len(main_chapters)
|
||
for future in as_completed(future_to_chapter):
|
||
idx, title = future_to_chapter[future]
|
||
try:
|
||
results[idx] = future.result()
|
||
logger.info(f'主章节扩展成功: {title}')
|
||
except Exception as e:
|
||
logger.error(f'主章节扩展失败: {title}, 错误: {e}')
|
||
results[idx] = ''
|
||
|
||
# 组装结果
|
||
for idx, chapter in enumerate(main_chapters):
|
||
chapter_num = idx + 1
|
||
cn_num = _CN_NUMS_LIST[chapter_num] if chapter_num < len(_CN_NUMS_LIST) else str(chapter_num)
|
||
expanded_lines.append(f'{cn_num}、{chapter["title"]}')
|
||
if results[idx]:
|
||
expanded_lines.append(results[idx])
|
||
|
||
return '\n'.join(expanded_lines)
|
||
|
||
|
||
def _extract_title_text(title: str) -> str:
|
||
"""从标题中提取纯文本内容,去除序号和标点符号。"""
|
||
text = re.sub(r'^[一二三四五六七八九十百]+[、。.]\s*', '', title.strip())
|
||
text = re.sub(r'^\d+(?:\.\d+)*[、。.]?\s*', '', text)
|
||
text = re.sub(r'^\s*[、。,,;;::]+\s*', '', text)
|
||
text = re.sub(r'\s*[、。,,;;::]+\s*$', '', text)
|
||
return text.strip()
|
||
|
||
|
||
def _generate_sub_chapters(chapter_title: str, summary: str, rating_requirements: str, chapter_num: int,
|
||
project_id: int = 0) -> str:
|
||
"""为单个主章节生成子章节大纲。"""
|
||
boq_summary = _get_boq_summary_for_chapter(chapter_title, summary)
|
||
prompt = P.get_chapter_outline_prompt(summary, chapter_title, rating_requirements)
|
||
if boq_summary:
|
||
prompt += (
|
||
'\n\n【工程量清单关键信息】\n'
|
||
f'{boq_summary}\n\n请严格根据工程量清单中的工程项目生成子章节,确保每个子章节都与具体工程内容对应。'
|
||
)
|
||
|
||
try:
|
||
response = ai_client.chat(
|
||
prompt,
|
||
system='你是一位专业的标书大纲生成专家。请根据主章节标题和工程量清单内容生成合适的子章节列表,严格遵守编号规则:'
|
||
'绝对禁止出现1.0、2.0、1.0.1等0开头编号;'
|
||
'二级从X.1开始,三级从X.1.1开始,四级从X.1.1.1开始;'
|
||
'只输出子章节,不重复主章节标题。',
|
||
temperature=0.5,
|
||
max_tokens=2048,
|
||
)
|
||
logger.info(f'_generate_sub_chapters AI响应章节={chapter_title},长度={len(response)}')
|
||
|
||
main_title_text = _extract_title_text(chapter_title)
|
||
lines = response.strip().split('\n')
|
||
level_counts = {1: 0, 2: 0, 3: 0, 4: 0}
|
||
result_lines = []
|
||
|
||
for line in lines:
|
||
if not line or not line.strip():
|
||
continue
|
||
|
||
indent_count = 0
|
||
remaining = line
|
||
while remaining and (remaining[0] == '\u3000' or remaining[0] == ' '):
|
||
indent_count += 1
|
||
remaining = remaining[1:]
|
||
|
||
remaining = re.sub(r'^[\s#*>\-]+', '', remaining).strip()
|
||
if not remaining:
|
||
continue
|
||
|
||
m = re.match(r'^(\d+(?:\.\d+)*)[、。..]?\s*(.*)', remaining)
|
||
if m:
|
||
original_num = m.group(1)
|
||
parts = original_num.split('.')
|
||
has_invalid_zero = any(i > 0 and part and part[0] == '0' for i, part in enumerate(parts))
|
||
if has_invalid_zero:
|
||
continue
|
||
if len(parts) > 1:
|
||
level = len(parts) - 1
|
||
else:
|
||
if indent_count == 0:
|
||
level = 1
|
||
elif indent_count <= 2:
|
||
level = 2
|
||
else:
|
||
level = 3
|
||
title = m.group(2).strip()
|
||
else:
|
||
m_cn = re.match(r'^([一二三四五六七八九十百]+)[、。..]\s*(.*)', remaining)
|
||
if m_cn:
|
||
title = m_cn.group(2).strip()
|
||
level = 1
|
||
else:
|
||
title = remaining
|
||
if indent_count == 0:
|
||
level = 1
|
||
elif indent_count <= 2:
|
||
level = 2
|
||
else:
|
||
level = 3
|
||
|
||
title = _extract_title_text(title)
|
||
if not title or len(title) < 2:
|
||
continue
|
||
|
||
if main_title_text and _extract_title_text(title) == main_title_text:
|
||
continue
|
||
|
||
level = min(max(level, 1), 3)
|
||
level_counts[level] += 1
|
||
for l in range(level + 1, 5):
|
||
level_counts[l] = 0
|
||
|
||
if level == 1:
|
||
num = f'{chapter_num}.{level_counts[1]}'
|
||
indent = ''
|
||
elif level == 2:
|
||
num = f'{chapter_num}.{level_counts[1]}.{level_counts[2]}'
|
||
indent = '\u3000'
|
||
else:
|
||
num = f'{chapter_num}.{level_counts[1]}.{level_counts[2]}.{level_counts[3]}'
|
||
indent = '\u3000\u3000'
|
||
|
||
result_lines.append(f'{indent}{num} {title}')
|
||
|
||
return '\n'.join(result_lines)
|
||
except Exception:
|
||
logger.exception(f'生成子章节失败 chapter={chapter_title}')
|
||
return ''
|
||
|
||
|
||
def _get_boq_summary_for_chapter(chapter_title: str, summary: str) -> str:
|
||
"""
|
||
从摘要中提取与施工方案相关的工程量清单信息。
|
||
"""
|
||
if not summary:
|
||
return ''
|
||
|
||
boq_keywords = [
|
||
'项目编码', '清单编码', '编码', '编号', '序号', '项目编号', '清单编号',
|
||
'项目名称', '清单名称', '名称', '工程名称', '清单项目名称', '分项名称',
|
||
'计量单位', '单位', '计量', '工程量', '数量', '清单数量', '清单工程量',
|
||
'综合单价', '单价', '投标单价', '综合价', '合价', '金额', '合计金额', '综合合价', '合计', '总价', '小计',
|
||
'项目特征', '项目特征描述', '特征描述', '做法说明', '工程内容', '工作内容', '详述', '说明', '特征', '项目特征及内容',
|
||
'施工内容', '工艺要求', '技术措施', '施工要求', '施工方法'
|
||
]
|
||
|
||
lines = summary.strip().split('\n')
|
||
boq_lines = []
|
||
for line in lines:
|
||
if any(keyword in line for keyword in boq_keywords):
|
||
boq_lines.append(line.strip())
|
||
|
||
if boq_lines:
|
||
return '\n'.join(boq_lines[:20])
|
||
return ''
|