tech-bid-manage20260423/modules/exporter.py

"""
Word 文档导出模块
"""
import os
import re
import sqlite3
import logging
from datetime import datetime
from docx import Document
from docx.shared import Pt, Cm, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml import OxmlElement
from docx.oxml.ns import qn

import config
from utils.outline_numbering import format_heading_display

logger = logging.getLogger(__name__)

LEVEL_STYLES = {
    1: ('Heading 1', 16, True),
    2: ('Heading 2', 14, True),
    3: ('Heading 3', 13, False),
    4: ('Heading 4', 12, False),
}


def export_to_word(db_path: str, project_id: int) -> str:
    """
    生成 Word 文档并保存到 data/exports/，返回文件名。
    """
    conn = sqlite3.connect(db_path)
    try:
        # 获取项目信息
        cur = conn.cursor()
        cur.execute("SELECT name FROM projects WHERE id=?", (project_id,))
        project = cur.fetchone()
        if not project:
            raise ValueError(f'项目 {project_id} 不存在')
        project_name = project[0]

        # 获取标书大纲文本（用于标题页）
        cur.execute("SELECT outline FROM tender_data WHERE project_id=?", (project_id,))
        td = cur.fetchone()
        bid_title = project_name + '技术标书'
        if td and td[0]:
            first_line = td[0].strip().split('\n')[0].strip()
            if first_line:
                bid_title = first_line

        # 获取所有章节（按顺序）
        cur.execute('''
            SELECT section_number, section_title, level, is_leaf, content, intro_content
            FROM bid_sections
            WHERE project_id=?
            ORDER BY order_index
        ''', (project_id,))
        sections = cur.fetchall()

        doc = _build_document(bid_title, sections)

        # 保存文件
        os.makedirs(config.EXPORT_DIR, exist_ok=True)
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        safe_name = ''.join(c for c in project_name if c.isalnum() or c in '._- \u4e00-\u9fff')
        filename = f'{safe_name}_{timestamp}.docx'
        filepath = os.path.join(config.EXPORT_DIR, filename)
        doc.save(filepath)
        logger.info(f'导出完成: {filepath}')
        return filename

    finally:
        conn.close()


DISCLAIMER_TEXT = """\
免责声明

本工具仅供学习交流免费使用，所生成的技术方案不可直接用于投标，请务必人工核对。本工具不会通过任何平台进行销售，请用户注意辨别真伪。在您开始使用本AI标书制作服务之前，请认真阅读并同意以下关键条款。一旦您继续使用，即表示您已充分理解并认可本提示的全部内容。

服务定位
本工具为单机使用的AI标书辅助工具，旨在帮助您生成标书的参考素材。您需对最终自己编写的标书文件承担全部责任，包括审核、修改内容，确保其符合相关法律法规及项目要求。

准确性免责
本人不对AI生成内容的完全准确性与完整性作任何保证。您有义务自行核实所有关键信息，并自行承担因使用本工具所引发的一切后果。

标书风险
本工具所生成的素材文件仅作参考。若您使用（包括引用、修改或二次创作），需自行承担由此可能导致的废标、侵权等全部风险与责任，本人不承担任何相关责任。

责任限制
任何情形下，本人均不对因使用本服务而造成的任何直接、间接或衍生损失（例如利润损失、业务中断、数据丢失等）承担法律责任。

其他事项
本人保留随时修改或终止本服务的权利。本提示的解释及争议解决，均适用中华人民共和国法律。\
"""


def _add_disclaimer_page(doc: Document) -> None:
    """在文档开头插入免责声明页"""
    # 标题
    title_para = doc.add_paragraph()
    title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
    title_run = title_para.add_run('免责声明')
    title_run.font.size = Pt(16)
    title_run.font.bold = True
    title_run.font.name = '黑体'
    title_run._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体')

    doc.add_paragraph()

    # 正文各段（跳过第一行标题，已单独渲染）
    body_lines = DISCLAIMER_TEXT.split('\n')[2:]  # 跳过"免责声明"和空行
    for line in body_lines:
        p = doc.add_paragraph()
        stripped = line.strip()
        # 小标题行（非空且后面没有缩进，即段落标题）
        is_section_title = bool(stripped) and not line.startswith(' ') and not line.startswith('\u3000')
        run = p.add_run(stripped if stripped else '')
        if is_section_title and stripped:
            run.font.bold = True
            run.font.size = Pt(11)
        else:
            run.font.size = Pt(10.5)
        run.font.name = 'Times New Roman'
        run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
        p.paragraph_format.space_after = Pt(4)
        _set_line_spacing_15(p)

    doc.add_page_break()


def _add_toc_tree_page(doc: Document, sections: list) -> None:
    """标题页之后插入树状目录（按 level 缩进；静态文本，不含 Word 目录域）。"""
    toc_heading = doc.add_paragraph()
    toc_heading.alignment = WD_ALIGN_PARAGRAPH.CENTER
    tr = toc_heading.add_run('目录')
    tr.font.size = Pt(16)
    tr.font.bold = True
    tr.font.name = '黑体'
    tr._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体')
    doc.add_paragraph()

    for row in sections:
        section_number, title, level, _, _, _ = row
        level = min(int(level), 4)
        text = format_heading_display(level, str(section_number or ''), str(title or ''))
        p = doc.add_paragraph()
        p.paragraph_format.left_indent = Cm(0.75 * max(0, level - 1))
        p.paragraph_format.space_after = Pt(3)
        run = p.add_run(text)
        run.font.size = Pt(12)
        run.font.name = '宋体'
        run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')

    doc.add_page_break()


def _build_document(bid_title: str, sections) -> Document:
    doc = Document()

    # ── 页面设置 ─────────────────────────────────────────────────────────
    section_obj = doc.sections[0]
    section_obj.page_width = Cm(21)
    section_obj.page_height = Cm(29.7)
    section_obj.left_margin = Cm(3)
    section_obj.right_margin = Cm(2.5)
    section_obj.top_margin = Cm(2.5)
    section_obj.bottom_margin = Cm(2.5)

    # ── 免责声明页（第一页）─────────────────────────────────────────────
    _add_disclaimer_page(doc)

    # ── 标题页 ──────────────────────────────────────────────────────────
    title_para = doc.add_paragraph()
    title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
    title_run = title_para.add_run(bid_title)
    title_run.font.size = Pt(22)
    title_run.font.bold = True
    title_run.font.color.rgb = RGBColor(0x1a, 0x56, 0xdb)
    title_run.font.name = '黑体'
    title_run._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体')

    doc.add_paragraph()

    date_para = doc.add_paragraph()
    date_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
    date_run = date_para.add_run(datetime.now().strftime('%Y年%m月'))
    date_run.font.size = Pt(14)
    date_run.font.name = '宋体'
    date_run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')

    doc.add_page_break()

    # ── 树状目录页（标题页后、正文前）──────────────────────────────────
    _add_toc_tree_page(doc, sections)

    # ── 章节内容 ─────────────────────────────────────────────────────────
    for row in sections:
        section_number, title, level, is_leaf, content, intro = row
        level = min(int(level), 4)

        # 添加标题（带完整目录号）
        heading_text = format_heading_display(level, str(section_number or ''), str(title or ''))
        heading = doc.add_heading(level=level)
        heading.clear()
        run = heading.add_run(heading_text)
        _, font_size, bold = LEVEL_STYLES.get(level, ('Heading 4', 12, False))
        run.font.size = Pt(font_size)
        run.font.bold = bold
        run.font.name = '黑体' if level <= 2 else '宋体'
        run._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体' if level <= 2 else '宋体')

        # 章节引言（非叶节点）
        if intro and intro.strip():
            _add_body_paragraphs(doc, intro)

        # 正文内容（叶节点）
        if content and content.strip():
            _add_body_paragraphs(doc, content)

    return doc


def _set_line_spacing_15(paragraph):
    """将段落设为 1.5 倍行距（Word 中的 WD_LINE_SPACING.MULTIPLE × 1.5）"""
    from docx.oxml.ns import qn as _qn
    pPr = paragraph._element.get_or_add_pPr()
    spacing = pPr.find(_qn('w:spacing'))
    if spacing is None:
        spacing = OxmlElement('w:spacing')
        pPr.append(spacing)
    spacing.set(_qn('w:line'), '360')       # 240 × 1.5 = 360 twips
    spacing.set(_qn('w:lineRule'), 'auto')


# ── 图/表标记解析 ─────────────────────────────────────────────────────────

_BLOCK_PATTERN = re.compile(
    r'\[FIGURE:([^\]]+)\](.*?)\[/FIGURE\]'
    r'|\[TABLE:([^\]]+)\](.*?)\[/TABLE\]',
    re.DOTALL
)


def _split_content_blocks(text: str) -> list:
    """
    将章节正文拆分为有序内容块列表：
      {'type': 'text',   'content': '...'}
      {'type': 'figure', 'title': '...', 'content': '...'}
      {'type': 'table',  'title': '...', 'content': '...'}
    """
    blocks = []
    last = 0
    for m in _BLOCK_PATTERN.finditer(text):
        if m.start() > last:
            blocks.append({'type': 'text', 'content': text[last:m.start()]})
        if m.group(1) is not None:
            blocks.append({'type': 'figure',
                           'title': m.group(1).strip(),
                           'content': m.group(2).strip()})
        else:
            blocks.append({'type': 'table',
                           'title': m.group(3).strip(),
                           'content': m.group(4).strip()})
        last = m.end()
    if last < len(text):
        blocks.append({'type': 'text', 'content': text[last:]})
    return blocks


def _set_para_shading(para, hex_fill: str):
    """为段落设置背景填充色"""
    pPr = para._element.get_or_add_pPr()
    shd = OxmlElement('w:shd')
    shd.set(qn('w:val'), 'clear')
    shd.set(qn('w:color'), 'auto')
    shd.set(qn('w:fill'), hex_fill)
    pPr.append(shd)


def _set_cell_bg(cell, hex_fill: str):
    """为表格单元格设置背景色"""
    tc = cell._tc
    tcPr = tc.get_or_add_tcPr()
    shd = OxmlElement('w:shd')
    shd.set(qn('w:val'), 'clear')
    shd.set(qn('w:color'), 'auto')
    shd.set(qn('w:fill'), hex_fill)
    tcPr.append(shd)


def _set_cell_padding(cell, pt_value: float):
    """设置表格单元格四侧内边距（单位：磅）"""
    tc = cell._tc
    tcPr = tc.get_or_add_tcPr()
    tcMar = OxmlElement('w:tcMar')
    val = str(int(pt_value * 20))   # pt → twips（1pt = 20 twips）
    for side in ('top', 'left', 'bottom', 'right'):
        node = OxmlElement(f'w:{side}')
        node.set(qn('w:w'), val)
        node.set(qn('w:type'), 'dxa')
        tcMar.append(node)
    tcPr.append(tcMar)


def _safe_set_eastasia(run, font_name: str):
    """安全设置东亚字体，确保 rPr 已存在"""
    _ = run.font.size  # 触发 rPr 创建
    try:
        run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)
    except Exception:
        pass


def _add_block_caption(doc: Document, prefix: str, title: str):
    """添加图/表居中加粗标题行"""
    cap = doc.add_paragraph()
    cap.alignment = WD_ALIGN_PARAGRAPH.CENTER
    cap.paragraph_format.space_before = Pt(8)
    cap.paragraph_format.space_after = Pt(3)
    run = cap.add_run(f'{prefix}：{title}')
    run.font.bold = True
    run.font.size = Pt(11)
    run.font.name = 'Times New Roman'
    _safe_set_eastasia(run, '黑体')


def _add_figure_block(doc: Document, title: str, content: str):
    """
    将图示内容渲染为带边框 + 背景色的文字图示框。
    使用单格表格（Table Grid 样式）实现四周边框，比纯段落背景更专业。
    """
    _add_block_caption(doc, '图', title)

    lines = content.split('\n')

    # 单格表格：四周边框 + 淡蓝灰背景
    tbl = doc.add_table(rows=1, cols=1)
    tbl.style = 'Table Grid'
    cell = tbl.cell(0, 0)
    _set_cell_bg(cell, 'EFF3FB')        # 淡蓝灰背景
    _set_cell_padding(cell, 5)           # 内边距 5pt

    for i, line in enumerate(lines):
        if i == 0:
            para = cell.paragraphs[0]
            para.clear()
        else:
            para = cell.add_paragraph()
        para.paragraph_format.space_before = Pt(0)
        para.paragraph_format.space_after  = Pt(1)
        run = para.add_run(line if line else ' ')
        run.font.size = Pt(9.5)
        run.font.name = 'Courier New'
        _safe_set_eastasia(run, '宋体')

    # 图示后空行
    sp = doc.add_paragraph()
    sp.paragraph_format.space_after = Pt(8)


def _add_word_table(doc: Document, title: str, content: str):
    """将 Markdown 表格解析并渲染为 Word 表格"""
    # 解析 markdown 行，过滤掉分隔行（|---|）
    raw_rows = []
    for line in content.strip().split('\n'):
        line = line.strip()
        if not line:
            continue
        if re.match(r'^\|[\s\-:| ]+\|$', line):
            continue  # 分隔行
        if line.startswith('|') and line.endswith('|'):
            cells = [c.strip() for c in line[1:-1].split('|')]
            raw_rows.append(cells)

    if not raw_rows:
        # 没有解析到有效行时，降级为普通文本
        _add_block_caption(doc, '表', title)
        _add_plain_text(doc, content)
        return

    col_count = max(len(r) for r in raw_rows)
    rows = [r + [''] * (col_count - len(r)) for r in raw_rows]

    _add_block_caption(doc, '表', title)

    table = doc.add_table(rows=len(rows), cols=col_count)
    table.style = 'Table Grid'

    for i, row_data in enumerate(rows):
        for j, cell_text in enumerate(row_data):
            cell = table.cell(i, j)
            para = cell.paragraphs[0]
            para.clear()
            para.alignment = WD_ALIGN_PARAGRAPH.CENTER if i == 0 else WD_ALIGN_PARAGRAPH.LEFT
            run = para.add_run(cell_text)
            run.font.size = Pt(10)
            run.font.bold = (i == 0)
            run.font.name = 'Times New Roman'
            _safe_set_eastasia(run, '宋体')
            if i == 0:
                _set_cell_bg(cell, 'D6E4F7')  # 浅蓝表头

    # 表格后空行
    sp = doc.add_paragraph()
    sp.paragraph_format.space_after = Pt(6)


def _add_plain_text(doc: Document, text: str):
    """添加普通文本段落（内部辅助）"""
    for line in text.split('\n'):
        line = line.strip()
        if not line:
            continue
        p = doc.add_paragraph()
        p.paragraph_format.first_line_indent = Pt(24)
        p.paragraph_format.space_after = Pt(6)
        _set_line_spacing_15(p)
        run = p.add_run(line)
        run.font.size = Pt(12)
        run.font.name = 'Times New Roman'
        _safe_set_eastasia(run, '宋体')


def _add_body_paragraphs(doc: Document, text: str):
    """
    将正文文本分段渲染，自动识别并处理图示 [FIGURE:...] 和表格 [TABLE:...] 标记。
    """
    for block in _split_content_blocks(text):
        if block['type'] == 'figure':
            _add_figure_block(doc, block['title'], block['content'])
        elif block['type'] == 'table':
            _add_word_table(doc, block['title'], block['content'])
        else:
            _add_plain_text(doc, block['content'])