commit a39a9c58005cdb84013cdbffda87213769c1f254 Author: wanghui Date: Fri Apr 24 14:44:38 2026 +0800 完善版本V1.1 diff --git a/.deps_installed b/.deps_installed new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/.deps_installed @@ -0,0 +1 @@ + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1a7e8c6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +.env +*.log +__pycache__/ +*.pyc +.venv/ +venv/ diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..f6906f2 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,10 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ +# 已忽略包含查询文件的默认文件夹 +/queries/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..fe5af5a --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/tech-bid-manageV1.120260424.iml b/.idea/tech-bid-manageV1.120260424.iml new file mode 100644 index 0000000..6c86ab2 --- /dev/null +++ b/.idea/tech-bid-manageV1.120260424.iml @@ -0,0 +1,16 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..d9945ab --- /dev/null +++ b/README.md @@ -0,0 +1,113 @@ +# 标伙伴 · AI 标书助手 + +基于大模型的智能标书生成工具（单机版），支持解析招标文件、自动生成技术标书、导出 Word 文档。 + +## 快速开始 + +### 方式一：双击启动（Windows） + +直接双击 `start.bat`，首次运行会自动安装依赖。 + +### 方式二：命令行启动 + +```bash +# 1. 安装依赖 +pip install -r requirements.txt + +# 2. 启动应用 +python app.py +``` + +浏览器访问 **http://localhost:5000** + +--- + +## 配置 API Key + +首次使用前，点击右上角 ⚙️ 设置图标，选择模型提供商并填入 API Key： + +| 提供商 | 推荐模型 | 申请地址 | +|--------|---------|---------| +| 通义千问 | qwen-max | https://dashscope.aliyun.com/ | +| DeepSeek | deepseek-chat (V3) | https://platform.deepseek.com/ | +| OpenAI | gpt-4o | https://platform.openai.com/ | + +> **DeepSeek 说明**：deepseek-chat (V3) 性价比极高，推荐用于生产环境。 +> 由于 DeepSeek 暂不提供 Embedding API，使用知识库功能时会自动回退到本地 sentence-transformers 模型（首次使用需下载约 90MB）。 + +也可通过环境变量配置： + +```bash +# 通义千问 +set QWEN_API_KEY=sk-xxxxxxxx +set MODEL_PROVIDER=qwen + +# DeepSeek +set DEEPSEEK_API_KEY=sk-xxxxxxxx +set MODEL_PROVIDER=deepseek + +python app.py +``` + +--- + +## 使用流程 + +1. **新建项目** → 输入项目名称 +2. **上传招标文件** → 支持 PDF / DOC / DOCX +3. **AI 解析** → 自动提取评分要求、资质条件、商务条款 +4. **生成大纲** → 按评分权重生成四级章节目录 +5. **生成内容** → 逐章节或一键全部生成 +6. **合规检查** → 对照招标要求检验覆盖情况 +7. **导出 Word** → 专业排版，直接使用 + +--- + +## 目录结构 + +``` +autorfp/ +├── app.py # Flask 主程序 +├── config.py # 配置文件 +├── requirements.txt # Python 依赖 +├── start.bat # Windows 一键启动 +├── prompts/ # AI 提示词模板 +├── modules/ # 功能模块 +│ ├── parser.py # 招标文件解析 +│ ├── generator.py # 标书内容生成 +│ ├── checker.py # 合规检查 +│ ├── exporter.py # Word 导出 +│ └── knowledge.py # 企业知识库 +├── utils/ # 工具函数 +│ ├── ai_client.py # AI API 封装 +│ ├── file_utils.py # 文件处理 +│ └── prompts.py # 提示词加载 +├── templates/ # HTML 模板 +├── static/ # 静态资源 +└── data/ # 数据目录（自动创建） + ├── projects.db # SQLite 数据库 + ├── uploads/ # 上传的招标文件 + ├── exports/ # 导出的标书 + ├── knowledge/ # 知识库文件 + └── chroma/ # 向量数据库 +``` + +--- + +## 企业知识库 + +在项目页面切换到「知识库」标签，上传历史标书文件。 +系统会自动将文件分块存入向量数据库，生成内容时自动检索相关片段，让 AI 更好地体现企业优势。 + +--- + +## 常见问题 + +**Q: 解析速度很慢？** +A: 招标文件越长耗时越长，通常 30-120 秒。建议使用 qwen-max 或 gpt-4o。 + +**Q: 内容生成失败？** +A: 检查 API Key 是否正确，以及账户余额是否充足。 + +**Q: 导出的 Word 文件乱码？** +A: 请使用 Microsoft Word 2016 及以上版本打开。 diff --git a/app.py b/app.py new file mode 100644 index 0000000..a7158d1 --- /dev/null +++ b/app.py @@ -0,0 +1,1201 @@ +""" +标伙伴 - AI 标书助手（单机版） +启动命令：python app.py +访问地址：http://localhost:5000 +""" +import os +import sys + + +def _bootstrap_env_file(): + """在 import config 之前加载项目根目录 .env，便于注入 API Key；不覆盖已存在的环境变量。""" + if getattr(sys, 'frozen', False): + base = os.path.dirname(sys.executable) + else: + base = os.path.dirname(os.path.abspath(__file__)) + path = os.path.join(base, '.env') + if not os.path.isfile(path): + return + try: + with open(path, encoding='utf-8') as f: + for raw in f: + line = raw.strip() + if not line or line.startswith('#') or '=' not in line: + continue + key, _, val = line.partition('=') + key, val = key.strip(), val.strip().strip('"').strip("'") + if key and key not in os.environ: + os.environ[key] = val + except OSError: + pass + + +_bootstrap_env_file() +import json +import sqlite3 +import threading +import logging +from datetime import datetime +from flask import Flask, request, jsonify, render_template, send_from_directory, abort + +import config +from utils import settings as _settings + +# ── 日志配置 ──────────────────────────────────────────────────────────────── +_log_handlers = [logging.StreamHandler()] +if getattr(sys, 'frozen', False): + _log_file = os.path.join(os.path.dirname(sys.executable), 'bid_partner.log') + try: + _log_handlers.append(logging.FileHandler(_log_file, encoding='utf-8')) + except Exception: + pass + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s [%(levelname)s] %(name)s: %(message)s', + datefmt='%H:%M:%S', + handlers=_log_handlers, +) +logger = logging.getLogger(__name__) + + +def _safe_json_load(raw): + if not raw or not isinstance(raw, str): + return None + try: + return json.loads(raw) + except Exception: + return None + + +# ── Flask 应用 ─────────────────────────────────────────────────────────────── +_bundle = getattr(sys, '_MEIPASS', os.path.dirname(os.path.abspath(__file__))) +app = Flask(__name__, + template_folder=os.path.join(_bundle, 'templates'), + static_folder=os.path.join(_bundle, 'static')) +app.secret_key = config.SECRET_KEY +app.config['MAX_CONTENT_LENGTH'] = config.MAX_FILE_SIZE_MB * 1024 * 1024 + + +# ═══════════════════════════════════════════════════════════════════════════ +# 数据库初始化 +# ═══════════════════════════════════════════════════════════════════════════ + +def init_db(): + """创建所有必要的目录和数据库表""" + for d in [config.DATA_DIR, config.UPLOAD_DIR, config.EXPORT_DIR, + config.KNOWLEDGE_DIR, config.CHROMA_DIR]: + os.makedirs(d, exist_ok=True) + + # 初始化持久化配置，启动时恢复上次保存的 API Key 等设置 + settings_path = os.path.join(config.DATA_DIR, 'settings.json') + _settings.init(settings_path) + _settings.load(config) + logger.info(f'当前模型: {config.MODEL_PROVIDER}') + + conn = sqlite3.connect(config.DB_PATH) + cur = conn.cursor() + # WAL 模式：允许多个读写线程并发操作，不互相阻塞 + cur.execute('PRAGMA journal_mode=WAL') + cur.execute('PRAGMA synchronous=NORMAL') # WAL 下可适当降低同步级别以提速 + + cur.executescript(''' + CREATE TABLE IF NOT EXISTS projects ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + outline_status TEXT DEFAULT 'none', + outline_error TEXT DEFAULT '', + anon_requirements TEXT DEFAULT '', + enable_figure INTEGER DEFAULT 0, + enable_table INTEGER DEFAULT 0, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ); + + CREATE TABLE IF NOT EXISTS tender_data ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + project_id INTEGER NOT NULL UNIQUE, + file_name TEXT, + raw_text TEXT, + summary TEXT, + rating_requirements TEXT, + rating_json TEXT, + outline TEXT, + boq_file_name TEXT DEFAULT '', + boq_text TEXT DEFAULT '', + boq_summary TEXT DEFAULT '', + boq_analysis_json TEXT DEFAULT '', + boq_status TEXT DEFAULT 'none', + boq_error TEXT DEFAULT '', + tender_kind TEXT DEFAULT 'engineering', + status TEXT DEFAULT 'pending', + error_message TEXT DEFAULT '', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE + ); + + CREATE TABLE IF NOT EXISTS bid_sections ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + project_id INTEGER NOT NULL, + section_number TEXT, + section_title TEXT NOT NULL, + level INTEGER DEFAULT 1, + is_leaf INTEGER DEFAULT 1, + content TEXT DEFAULT '', + intro_content TEXT DEFAULT '', + order_index INTEGER DEFAULT 0, + status TEXT DEFAULT 'pending', + error_message TEXT DEFAULT '', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE + ); + + CREATE TABLE IF NOT EXISTS knowledge_files ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + file_name TEXT NOT NULL UNIQUE, + file_path TEXT, + chunk_count INTEGER DEFAULT 0, + added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ); + ''') + conn.commit() + # 兼容旧数据库：追加新列（已存在时忽略错误） + migrations = [ + ("ALTER TABLE projects ADD COLUMN anon_requirements TEXT DEFAULT ''", + 'projects.anon_requirements'), + ("ALTER TABLE projects ADD COLUMN enable_figure INTEGER DEFAULT 0", + 'projects.enable_figure'), + ("ALTER TABLE projects ADD COLUMN enable_table INTEGER DEFAULT 0", + 'projects.enable_table'), + ("ALTER TABLE tender_data ADD COLUMN boq_file_name TEXT DEFAULT ''", + 'tender_data.boq_file_name'), + ("ALTER TABLE tender_data ADD COLUMN boq_text TEXT DEFAULT ''", + 'tender_data.boq_text'), + ("ALTER TABLE tender_data ADD COLUMN boq_summary TEXT DEFAULT ''", + 'tender_data.boq_summary'), + ("ALTER TABLE tender_data ADD COLUMN boq_status TEXT DEFAULT 'none'", + 'tender_data.boq_status'), + ("ALTER TABLE tender_data ADD COLUMN boq_error TEXT DEFAULT ''", + 'tender_data.boq_error'), + ("ALTER TABLE tender_data ADD COLUMN boq_analysis_json TEXT DEFAULT ''", + 'tender_data.boq_analysis_json'), + ("ALTER TABLE tender_data ADD COLUMN tender_kind TEXT DEFAULT 'engineering'", + 'tender_data.tender_kind'), + ("ALTER TABLE tender_data ADD COLUMN target_pages INTEGER DEFAULT 0", + 'tender_data.target_pages'), + ] + for sql, col in migrations: + try: + conn.execute(sql) + conn.commit() + logger.info(f'数据库迁移：新增 {col} 列') + except Exception: + pass # 列已存在 + conn.close() + logger.info('数据库初始化完成') + + +def get_db(): + return sqlite3.connect(config.DB_PATH) + + +# ═══════════════════════════════════════════════════════════════════════════ +# 页面路由 +# ═══════════════════════════════════════════════════════════════════════════ + +@app.route('/') +def index(): + return render_template('index.html') + + +@app.route('/project/') +def project_page(project_id): + conn = get_db() + cur = conn.cursor() + cur.execute("SELECT id, name, created_at FROM projects WHERE id=?", (project_id,)) + row = cur.fetchone() + conn.close() + if not row: + abort(404) + return render_template('project.html', project={'id': row[0], 'name': row[1], 'created_at': row[2]}) + + +# ═══════════════════════════════════════════════════════════════════════════ +# API：项目管理 +# ═══════════════════════════════════════════════════════════════════════════ + +@app.route('/api/projects', methods=['GET']) +def api_list_projects(): + conn = get_db() + cur = conn.cursor() + cur.execute(''' + SELECT p.id, p.name, p.created_at, p.outline_status, + td.status as parse_status, td.file_name, + (SELECT COUNT(*) FROM bid_sections WHERE project_id=p.id) as section_count, + (SELECT COUNT(*) FROM bid_sections WHERE project_id=p.id AND status='done') as done_count + FROM projects p + LEFT JOIN tender_data td ON td.project_id = p.id + ORDER BY p.created_at DESC + ''') + rows = cur.fetchall() + conn.close() + projects = [] + for r in rows: + projects.append({ + 'id': r[0], 'name': r[1], 'created_at': r[2], + 'outline_status': r[3], 'parse_status': r[4] or 'none', + 'file_name': r[5], 'section_count': r[6], 'done_count': r[7], + }) + return jsonify({'projects': projects}) + + +@app.route('/api/projects', methods=['POST']) +def api_create_project(): + data = request.get_json() + name = (data or {}).get('name', '').strip() + if not name: + return jsonify({'error': '项目名称不能为空'}), 400 + conn = get_db() + cur = conn.cursor() + cur.execute("INSERT INTO projects (name) VALUES (?)", (name,)) + project_id = cur.lastrowid + conn.commit() + conn.close() + return jsonify({'id': project_id, 'name': name}), 201 + + +@app.route('/api/projects/', methods=['DELETE']) +def api_delete_project(project_id): + conn = get_db() + cur = conn.cursor() + cur.execute("DELETE FROM projects WHERE id=?", (project_id,)) + conn.commit() + conn.close() + return jsonify({'success': True}) + + +@app.route('/api/projects/', methods=['GET']) +def api_get_project(project_id): + conn = get_db() + cur = conn.cursor() + cur.execute(''' + SELECT p.id, p.name, p.created_at, p.outline_status, p.outline_error, + td.file_name, td.status as parse_status, td.error_message, + td.summary, td.rating_requirements, td.rating_json, td.outline, + p.anon_requirements, p.enable_figure, p.enable_table, + td.boq_file_name, td.boq_summary, td.boq_status, td.boq_error, + td.boq_analysis_json, td.tender_kind, COALESCE(td.target_pages, 0) + FROM projects p + LEFT JOIN tender_data td ON td.project_id = p.id + WHERE p.id=? + ''', (project_id,)) + row = cur.fetchone() + conn.close() + if not row: + return jsonify({'error': '项目不存在'}), 404 + + return jsonify({ + 'id': row[0], 'name': row[1], 'created_at': row[2], + 'outline_status': row[3], 'outline_error': row[4], + 'file_name': row[5], 'parse_status': row[6] or 'none', + 'parse_error': row[7], 'summary': row[8], + 'rating_requirements': row[9], 'rating_json': row[10], + 'outline': row[11], 'anon_requirements': row[12] or '', + 'enable_figure': bool(row[13]), 'enable_table': bool(row[14]), + 'boq_file_name': row[15] or '', 'boq_summary': row[16] or '', + 'boq_status': row[17] or 'none', 'boq_error': row[18] or '', + 'boq_analysis': _safe_json_load(row[19]), + 'tender_kind': row[20] or 'engineering', + 'target_pages': int(row[21] or 0), + }) + + +# ═══════════════════════════════════════════════════════════════════════════ +# API：文件上传与解析 +# ═══════════════════════════════════════════════════════════════════════════ + +@app.route('/api/projects//upload', methods=['POST']) +def api_upload(project_id): + from utils.file_utils import allowed_file, safe_filename + + if 'file' not in request.files: + return jsonify({'error': '未选择文件'}), 400 + f = request.files['file'] + if not f.filename: + return jsonify({'error': '文件名为空'}), 400 + if not allowed_file(f.filename): + return jsonify({'error': '仅支持 PDF / DOC / DOCX 格式'}), 400 + + filename = safe_filename(f.filename) + save_path = os.path.join(config.UPLOAD_DIR, f'{project_id}_{filename}') + f.save(save_path) + + # 初始化 tender_data 记录 + conn = get_db() + cur = conn.cursor() + cur.execute(''' + INSERT INTO tender_data (project_id, file_name, status) + VALUES (?, ?, 'uploaded') + ON CONFLICT(project_id) DO UPDATE SET file_name=?, status='uploaded', error_message='', updated_at=? + ''', (project_id, filename, filename, datetime.now())) + conn.commit() + conn.close() + + return jsonify({'success': True, 'file_name': filename, 'path': save_path}) + + +@app.route('/api/projects//tender-data', methods=['PUT']) +def api_update_tender_data(project_id): + """允许用户手动修改并保存解析结果（摘要、技术评分要求、标书类型）""" + data = request.get_json() or {} + fields = {} + if 'summary' in data: + fields['summary'] = data['summary'] + if 'rating_requirements' in data: + fields['rating_requirements'] = data['rating_requirements'] + if 'tender_kind' in data: + tk = (data.get('tender_kind') or 'engineering').strip().lower() + if tk not in ('engineering', 'service', 'goods'): + return jsonify({'error': 'tender_kind 须为 engineering / service / goods'}), 400 + fields['tender_kind'] = tk + if 'target_pages' in data: + try: + fields['target_pages'] = max(0, int(data['target_pages'])) + except (TypeError, ValueError): + return jsonify({'error': 'target_pages 须为非负整数'}), 400 + if not fields: + return jsonify({'error': '无可更新字段'}), 400 + + conn = get_db() + cur = conn.cursor() + set_clause = ', '.join(f'{k}=?' for k in fields) + values = list(fields.values()) + [datetime.now(), project_id] + cur.execute( + f'UPDATE tender_data SET {set_clause}, updated_at=? WHERE project_id=?', + values + ) + conn.commit() + conn.close() + return jsonify({'success': True}) + + +@app.route('/api/projects//upload-boq', methods=['POST']) +def api_upload_boq(project_id): + """上传工程量清单文件（独立于招标文件）""" + from utils.file_utils import safe_filename + + if 'file' not in request.files: + return jsonify({'error': '未选择文件'}), 400 + f = request.files['file'] + if not f.filename: + return jsonify({'error': '文件名为空'}), 400 + + ext = os.path.splitext(f.filename)[1].lower() + allowed_exts = {'.xlsx', '.xls', '.csv', '.pdf', '.docx', '.doc'} + if ext not in allowed_exts: + return jsonify({'error': f'不支持的格式 {ext}，请使用 xlsx/xls/csv/pdf/docx/doc'}), 400 + + filename = safe_filename(f.filename) + save_path = os.path.join(config.UPLOAD_DIR, f'{project_id}_boq_{filename}') + f.save(save_path) + + # 确保 tender_data 记录存在 + conn = get_db() + cur = conn.cursor() + cur.execute(''' + INSERT INTO tender_data (project_id, boq_file_name, boq_status) + VALUES (?, ?, 'uploaded') + ON CONFLICT(project_id) DO UPDATE + SET boq_file_name=?, boq_status='uploaded', boq_error='', updated_at=? + ''', (project_id, filename, filename, datetime.now())) + conn.commit() + conn.close() + + return jsonify({'success': True, 'file_name': filename, 'path': save_path}) + + +@app.route('/api/projects//parse-boq', methods=['POST']) +def api_parse_boq(project_id): + """后台解析工程量清单 → AI 摘要""" + from modules.parser import parse_boq_file + + conn = get_db() + cur = conn.cursor() + cur.execute("SELECT boq_file_name FROM tender_data WHERE project_id=?", (project_id,)) + row = cur.fetchone() + conn.close() + + if not row or not row[0]: + return jsonify({'error': '请先上传工程量清单文件'}), 400 + + file_name = row[0] + # 同时尝试带/不带 boq_ 前缀的路径 + path1 = os.path.join(config.UPLOAD_DIR, f'{project_id}_boq_{file_name}') + path2 = os.path.join(config.UPLOAD_DIR, f'{project_id}_{file_name}') + file_path = path1 if os.path.exists(path1) else path2 + if not os.path.exists(file_path): + return jsonify({'error': '清单文件不存在，请重新上传'}), 404 + + t = threading.Thread( + target=parse_boq_file, + args=(config.DB_PATH, project_id, file_path, file_name), + daemon=True, + ) + t.start() + return jsonify({'success': True}) + + +@app.route('/api/projects//boq', methods=['PUT']) +def api_update_boq(project_id): + """手动保存用户编辑后的工程量清单摘要""" + data = request.get_json() or {} + boq_summary = data.get('boq_summary', '') + conn = get_db() + cur = conn.cursor() + cur.execute( + "UPDATE tender_data SET boq_summary=?, updated_at=? WHERE project_id=?", + (boq_summary, datetime.now(), project_id) + ) + conn.commit() + conn.close() + return jsonify({'success': True}) + + +@app.route('/api/projects//parse', methods=['POST']) +def api_parse(project_id): + from modules.parser import parse_tender_file + + conn = get_db() + cur = conn.cursor() + cur.execute("SELECT file_name FROM tender_data WHERE project_id=?", (project_id,)) + row = cur.fetchone() + conn.close() + + if not row or not row[0]: + return jsonify({'error': '请先上传招标文件'}), 400 + + file_name = row[0] + file_path = os.path.join(config.UPLOAD_DIR, f'{project_id}_{file_name}') + if not os.path.exists(file_path): + return jsonify({'error': f'文件不存在: {file_name}'}), 404 + + t = threading.Thread( + target=parse_tender_file, + args=(config.DB_PATH, project_id, file_path, file_name), + daemon=True, + ) + t.start() + return jsonify({'success': True, 'message': '解析任务已启动'}) + + +@app.route('/api/projects//parse-status', methods=['GET']) +def api_parse_status(project_id): + conn = get_db() + cur = conn.cursor() + # Extended for deep integration: include settings from projects for parse tab UI + cur.execute(''' + SELECT td.status, td.error_message, td.summary, td.rating_requirements, td.rating_json, td.tender_kind, + p.anon_requirements, p.enable_figure, p.enable_table + FROM tender_data td + LEFT JOIN projects p ON p.id = td.project_id + WHERE td.project_id=? + ''', (project_id,)) + row = cur.fetchone() + conn.close() + if not row: + return jsonify({'status': 'none'}) + return jsonify({ + 'status': row[0] or 'none', + 'message': row[1] or '', + 'has_summary': bool(row[2]), + 'has_rating': bool(row[3]), + 'has_rating_json': bool(row[4]), + 'tender_kind': row[5] or 'engineering', + 'anon_requirements': row[6] or '', + 'enable_figure': bool(row[7]), + 'enable_table': bool(row[8]), + }) + + +# ═══════════════════════════════════════════════════════════════════════════ +# API：大纲生成 +# ═══════════════════════════════════════════════════════════════════════════ + +@app.route('/api/projects//outline', methods=['PUT']) +def api_update_outline(project_id): + """ + 用户手动修改大纲后保存：重新解析大纲文本，更新 bid_sections。 + 注意：已生成的章节内容将被清除，需重新生成。 + """ + from modules.generator import _parse_outline, _save_sections, _save_outline_text + + data = request.get_json() or {} + outline_text = (data.get('outline') or '').strip() + if not outline_text: + return jsonify({'error': '大纲内容不能为空'}), 400 + + try: + # 解析并自动重排序号，返回规范化文本 + _, sections, normalized_text = _parse_outline(outline_text) + if not sections: + return jsonify({'error': '大纲解析失败，未识别到任何章节，请检查格式'}), 400 + + conn = get_db() + cur = conn.cursor() + # 存储重排序号后的规范文本 + _save_outline_text(conn, project_id, normalized_text) + _save_sections(conn, project_id, sections) + cur.execute( + "UPDATE projects SET outline_status='outline_done', outline_error='', updated_at=? WHERE id=?", + (datetime.now(), project_id) + ) + cur.execute("SELECT length(outline) FROM tender_data WHERE project_id=?", (project_id,)) + persisted_len = (cur.fetchone() or [0])[0] or 0 + conn.commit() + conn.close() + # 把规范化文本返回给前端，前端据此更新编辑器内容 + return jsonify({ + 'success': True, + 'section_count': len(sections), + 'normalized_outline': normalized_text, + 'persisted_outline_len': persisted_len, + }) + except Exception as e: + logger.exception('手动保存大纲失败') + return jsonify({'error': str(e)}), 500 + + +@app.route('/api/projects//generate-outline', methods=['POST']) +def api_generate_outline(project_id): + from modules.generator import generate_outline + data = request.get_json(silent=True) or {} + force = bool(data.get('force', False)) + + conn = get_db() + cur = conn.cursor() + cur.execute( + ''' + SELECT p.outline_status, COALESCE(td.outline, '') + FROM projects p + LEFT JOIN tender_data td ON td.project_id = p.id + WHERE p.id = ? + ''', + (project_id,), + ) + row = cur.fetchone() + if not row: + conn.close() + return jsonify({'success': False, 'error': '项目不存在'}), 404 + + ostatus, outline_text = (row[0] or 'none', row[1] or '') + if ostatus == 'outline_generating': + conn.close() + return jsonify({'success': True, 'message': '大纲正在生成中，请稍候'}), 200 + + if not force and outline_text.strip(): + conn.close() + return jsonify({ + 'success': False, + 'error': '当前项目已有大纲，重新生成会覆盖现有大纲。请确认后以 force=true 再次请求。' + }), 409 + + # 立即落库，避免仅依赖后台线程的首条 SQL 时产生竞态（重复点击、轮询、刷新时状态不一致） + cur.execute( + "UPDATE projects SET outline_status=?, outline_error=?, updated_at=? WHERE id=?", + ('outline_generating', '', datetime.now(), project_id), + ) + conn.commit() + conn.close() + + t = threading.Thread( + target=generate_outline, + args=(config.DB_PATH, project_id), + daemon=True, + ) + t.start() + return jsonify({'success': True, 'message': '大纲生成任务已启动'}) + + +@app.route('/api/projects//expand-outline', methods=['POST']) +def api_expand_outline(project_id): + """根据当前编辑大纲自动补全小章节，并直接落库重建章节树。""" + from modules.generator import ( + expand_outline, + _parse_outline, + _save_outline_text, + _save_sections, + ) + + from utils import volume_chapters as vol_ch + + data = request.get_json() or {} + outline = data.get('outline', '') + if not outline.strip(): + return jsonify({'success': False, 'error': '大纲内容不能为空'}), 400 + + no_subchapter_limit = data.get('no_subchapter_limit') in (True, 1, '1', 'true', 'True') + if 'target_pages' in data: + try: + request_pages = max(0, int(data['target_pages'])) + except (TypeError, ValueError): + request_pages = None + else: + request_pages = None + + conn = get_db() + cur = conn.cursor() + cur.execute( + "SELECT summary, rating_requirements, COALESCE(target_pages, 0) FROM tender_data " + "WHERE project_id=?", + (project_id,), + ) + row = cur.fetchone() + conn.close() + + summary = row[0] if row else '' + rating_requirements = row[1] if row else '' + db_tp = int(row[2] or 0) if row else 0 + cfg_tp = int(getattr(config, 'TARGET_PAGES', 0) or 0) + effective_target_pages = vol_ch.resolve_expand_target_pages( + request_pages, no_subchapter_limit, db_tp, cfg_tp + ) + + try: + expanded_outline = expand_outline( + outline, summary, rating_requirements, project_id, effective_target_pages + ) + _, sections, normalized_text = _parse_outline(expanded_outline) + if not sections: + return jsonify({'success': False, 'error': '扩充后大纲解析失败，请检查章节格式'}), 400 + + conn = get_db() + cur = conn.cursor() + _save_outline_text(conn, project_id, normalized_text) + _save_sections(conn, project_id, sections) + cur.execute( + "UPDATE projects SET outline_status='outline_done', outline_error='', updated_at=? WHERE id=?", + (datetime.now(), project_id), + ) + conn.commit() + cur.execute("SELECT length(outline) FROM tender_data WHERE project_id=?", (project_id,)) + persisted_len = (cur.fetchone() or [0])[0] or 0 + conn.close() + + return jsonify({ + 'success': True, + 'expanded_outline': expanded_outline, + 'normalized_outline': normalized_text, + 'section_count': len(sections), + 'persisted_outline_len': persisted_len, + 'used_target_pages': effective_target_pages, + 'no_subchapter_limit': no_subchapter_limit, + }) + except Exception as e: + logger.exception(f'expand_outline failed for project {project_id}') + return jsonify({'success': False, 'error': str(e)}), 500 + + +@app.route('/api/projects//outline-status', methods=['GET']) +def api_outline_status(project_id): + conn = get_db() + cur = conn.cursor() + cur.execute("SELECT outline_status, outline_error FROM projects WHERE id=?", (project_id,)) + row = cur.fetchone() + conn.close() + if not row: + return jsonify({'status': 'none'}) + return jsonify({'status': row[0], 'error': row[1]}) + + +# ═══════════════════════════════════════════════════════════════════════════ +# API：章节管理与内容生成 +# ═══════════════════════════════════════════════════════════════════════════ + +@app.route('/api/projects//sections', methods=['GET']) +def api_list_sections(project_id): + conn = get_db() + cur = conn.cursor() + cur.execute(''' + SELECT id, section_number, section_title, level, is_leaf, + status, error_message, length(content) as content_len + FROM bid_sections + WHERE project_id=? + ORDER BY order_index + ''', (project_id,)) + rows = cur.fetchall() + conn.close() + sections = [] + for r in rows: + sections.append({ + 'id': r[0], 'number': r[1], 'title': r[2], 'level': r[3], + 'is_leaf': bool(r[4]), 'status': r[5], 'error': r[6], + 'has_content': (r[7] or 0) > 0, + }) + return jsonify({'sections': sections}) + + +@app.route('/api/projects//sections/', methods=['GET']) +def api_get_section(project_id, section_id): + conn = get_db() + cur = conn.cursor() + cur.execute( + "SELECT id, section_number, section_title, level, is_leaf, content, intro_content, status FROM bid_sections WHERE id=? AND project_id=?", + (section_id, project_id) + ) + row = cur.fetchone() + conn.close() + if not row: + return jsonify({'error': '章节不存在'}), 404 + return jsonify({ + 'id': row[0], 'number': row[1], 'title': row[2], 'level': row[3], + 'is_leaf': bool(row[4]), 'content': row[5], 'intro_content': row[6], 'status': row[7], + }) + + +@app.route('/api/projects//sections/', methods=['PUT']) +def api_update_section(project_id, section_id): + data = request.get_json() or {} + content = data.get('content', '') + conn = get_db() + cur = conn.cursor() + cur.execute( + "UPDATE bid_sections SET content=?, status='done', updated_at=? WHERE id=? AND project_id=?", + (content, datetime.now(), section_id, project_id) + ) + conn.commit() + conn.close() + return jsonify({'success': True}) + + +@app.route('/api/projects//sections//chat', methods=['POST']) +def api_section_chat(project_id, section_id): + """ + 对话式章节生成：接受多轮对话历史，结合章节上下文调用 AI，返回新一轮回复。 + 请求体：{ "messages": [{"role": "user"|"assistant", "content": "..."}] } + """ + from utils import ai_client + + data = request.get_json() or {} + messages = data.get('messages', []) + if not messages: + return jsonify({'error': '消息列表不能为空'}), 400 + + conn = get_db() + cur = conn.cursor() + cur.execute( + "SELECT section_title FROM bid_sections WHERE id=? AND project_id=?", + (section_id, project_id) + ) + row = cur.fetchone() + if not row: + conn.close() + return jsonify({'error': '章节不存在'}), 404 + section_title = row[0] + + cur.execute( + "SELECT summary, outline, tender_kind FROM tender_data WHERE project_id=?", + (project_id,), + ) + td = cur.fetchone() + conn.close() + + summary = ((td[0] or '')[:3000]) if td else '' + outline = ((td[1] or '')[:2000]) if td else '' + tk = (td[2] or 'engineering').strip().lower() if td else 'engineering' + if tk not in ('engineering', 'service', 'goods'): + tk = 'engineering' + + from utils.tender_kind_sections import CHAT_KIND_INSTRUCTION + + kind_hint = CHAT_KIND_INSTRUCTION.get(tk, CHAT_KIND_INSTRUCTION['engineering']) + + system = f"""你是一位资深的投标文件撰写专家，正在协助用户以对话方式撰写技术标书中「{section_title}」章节的正文内容。 + +【项目背景摘要】 +{summary or '（未提供）'} + +【标书目录结构】 +{outline or '（未提供）'} +{kind_hint} + +【撰写规范（必须遵守）】 +- 投标方自称统一用"我方"，禁用"我们""我公司" +- 禁止套话：综上所述、高度重视、全力以赴、不断优化、稳步推进等 +- 每项措施须有可检验的实质内容（做法、节点、标准编号，或招标文件/清单已给出的量化依据）； + 未载明的型号、数量、吨位、时限等不得编造，用概括性定性表述写清含义；禁止使用方括号待填项（如[型号][数量]） +- 列举用(1)(2)(3)编号，禁止"首先其次最后"连接 +- 纯文本输出，段落间用空行分隔，不使用 Markdown 符号 +- 直接输出正文，不含章节标题、解释说明或"以下是..."引导语""" + + valid_messages = [m for m in messages if m.get('role') in ('user', 'assistant')] + + try: + content = ai_client.chat_with_history(system, valid_messages, + temperature=0.7, max_tokens=4096) + return jsonify({'success': True, 'content': content}) + except Exception as e: + logger.exception(f'对话式章节生成失败 section_id={section_id}') + return jsonify({'error': str(e)}), 500 + + +@app.route('/api/projects//generate-section', methods=['POST']) +def api_generate_section(project_id): + from modules.generator import generate_section + + data = request.get_json() or {} + section_id = data.get('section_id') + if not section_id: + return jsonify({'error': '缺少 section_id'}), 400 + + conn = get_db() + cur = conn.cursor() + cur.execute( + "SELECT anon_requirements, enable_figure, enable_table FROM projects WHERE id=?", + (project_id,) + ) + row = cur.fetchone() + conn.close() + anon_req = (row[0] or '') if row else '' + enable_fig = bool(row[1]) if row else False + enable_tbl = bool(row[2]) if row else False + + t = threading.Thread( + target=generate_section, + args=(config.DB_PATH, project_id, section_id, anon_req, enable_fig, enable_tbl), + daemon=True, + ) + t.start() + return jsonify({'success': True}) + + +@app.route('/api/projects//diagram', methods=['PUT']) +def api_update_diagram(project_id): + """保存图表模式开关""" + data = request.get_json() or {} + enable_figure = 1 if data.get('enable_figure') else 0 + enable_table = 1 if data.get('enable_table') else 0 + conn = get_db() + cur = conn.cursor() + cur.execute( + "UPDATE projects SET enable_figure=?, enable_table=?, updated_at=? WHERE id=?", + (enable_figure, enable_table, datetime.now(), project_id) + ) + conn.commit() + conn.close() + return jsonify({'success': True}) + + +@app.route('/api/projects//anon', methods=['PUT']) +def api_update_anon(project_id): + """保存暗标要求""" + data = request.get_json() or {} + anon_requirements = data.get('anon_requirements', '') + conn = get_db() + cur = conn.cursor() + cur.execute( + "UPDATE projects SET anon_requirements=?, updated_at=? WHERE id=?", + (anon_requirements, datetime.now(), project_id) + ) + conn.commit() + conn.close() + return jsonify({'success': True}) + + +@app.route('/api/projects//generate-all-sections', methods=['POST']) +def api_generate_all_sections(project_id): + from modules.generator import generate_all_sections + + conn = get_db() + cur = conn.cursor() + cur.execute( + "SELECT anon_requirements, enable_figure, enable_table FROM projects WHERE id=?", + (project_id,) + ) + row = cur.fetchone() + conn.close() + anon_req = (row[0] or '') if row else '' + enable_fig = bool(row[1]) if row else False + enable_tbl = bool(row[2]) if row else False + + t = threading.Thread( + target=generate_all_sections, + args=(config.DB_PATH, project_id, anon_req, enable_fig, enable_tbl), + daemon=True, + ) + t.start() + return jsonify({'success': True, 'message': '全量生成任务已启动'}) + + +@app.route('/api/projects//section-progress', methods=['GET']) +def api_section_progress(project_id): + conn = get_db() + cur = conn.cursor() + cur.execute(''' + SELECT + COUNT(*) as total, + SUM(CASE WHEN status='done' THEN 1 ELSE 0 END) as done, + SUM(CASE WHEN status='generating' THEN 1 ELSE 0 END) as running, + SUM(CASE WHEN status='error' THEN 1 ELSE 0 END) as error_count + FROM bid_sections WHERE project_id=? + ''', (project_id,)) + r = cur.fetchone() + conn.close() + total, done, running, errors = r + return jsonify({ + 'total': total or 0, 'done': done or 0, + 'running': running or 0, 'errors': errors or 0, + 'percent': round((done or 0) / max(total or 1, 1) * 100), + }) + + +# ═══════════════════════════════════════════════════════════════════════════ +# API：合规检查 +# ═══════════════════════════════════════════════════════════════════════════ + +@app.route('/api/projects//check', methods=['POST']) +def api_check(project_id): + from modules.checker import check_compliance + result = check_compliance(config.DB_PATH, project_id) + return jsonify(result) + + +@app.route('/api/projects//check-dark-bid-format', methods=['POST']) +def api_check_dark_bid_format(project_id): + """技术暗标 HTML 格式清标（不调用 AI）。请求体 JSON：{"html": "..."}""" + conn = get_db() + cur = conn.cursor() + cur.execute("SELECT id FROM projects WHERE id=?", (project_id,)) + if not cur.fetchone(): + conn.close() + return jsonify({'error': '项目不存在'}), 404 + conn.close() + + data = request.get_json(silent=True) or {} + html = (data.get('html') or '').strip() + if not html: + return jsonify({'error': '请提供 html 字段（技术暗标完整 HTML 内容）'}), 400 + + from modules.dark_bid_format_check import check_technical_bid + try: + result = check_technical_bid(html) + return jsonify(result) + except Exception as e: + logger.exception('技术暗标格式清标失败') + return jsonify({'error': str(e)}), 500 + + +# ═══════════════════════════════════════════════════════════════════════════ +# API：导出 +# ═══════════════════════════════════════════════════════════════════════════ + +@app.route('/api/projects//export', methods=['POST']) +def api_export(project_id): + try: + from modules.exporter import export_to_word + data = request.get_json(silent=True) or {} + style_preset = data.get('style_preset', 'standard') + filename = export_to_word(config.DB_PATH, project_id, style_preset) + return jsonify({'success': True, 'filename': filename, 'url': f'/api/download/{filename}'}) + except Exception as e: + logger.exception('导出失败') + return jsonify({'error': str(e)}), 500 + + +@app.route('/api/download/') +def api_download(filename): + return send_from_directory(config.EXPORT_DIR, filename, as_attachment=True) + + +# ═══════════════════════════════════════════════════════════════════════════ +# API：知识库管理 +# ═══════════════════════════════════════════════════════════════════════════ + +@app.route('/api/knowledge/status', methods=['GET']) +def api_knowledge_status(): + from modules.knowledge import is_available, list_files + status = is_available() + status['file_count'] = len(list_files(config.DB_PATH)) + return jsonify(status) + + +@app.route('/api/knowledge/files', methods=['GET']) +def api_knowledge_list(): + from modules.knowledge import list_files + files = list_files(config.DB_PATH) + return jsonify({'files': files}) + + +@app.route('/api/knowledge/upload', methods=['POST']) +def api_knowledge_upload(): + from modules.knowledge import add_file + from utils.file_utils import allowed_file, safe_filename + import threading + + if 'file' not in request.files: + return jsonify({'error': '未选择文件'}), 400 + f = request.files['file'] + if not f.filename or not allowed_file(f.filename): + return jsonify({'error': '仅支持 PDF / DOC / DOCX'}), 400 + + filename = safe_filename(f.filename) + save_path = os.path.join(config.KNOWLEDGE_DIR, filename) + f.save(save_path) + + # 后台线程入库（提取文本 + 向量化可能耗时，避免请求超时） + def _ingest(): + result = add_file(save_path, config.DB_PATH) + if not result.get('success'): + logger.error(f'知识库入库失败 {filename}: {result.get("error")}') + + threading.Thread(target=_ingest, daemon=True).start() + + return jsonify({'success': True, 'queued': True, 'filename': filename}) + + +@app.route('/api/knowledge/delete', methods=['POST']) +def api_knowledge_delete(): + from modules.knowledge import delete_file + data = request.get_json() or {} + file_name = data.get('file_name', '') + if not file_name: + return jsonify({'error': '缺少 file_name'}), 400 + result = delete_file(file_name, config.DB_PATH) + return jsonify(result) + + +# ═══════════════════════════════════════════════════════════════════════════ +# API：AI 配置 +# ═══════════════════════════════════════════════════════════════════════════ + +@app.route('/api/config', methods=['GET']) +def api_get_config(): + def _has_key(k): return bool(k and not k.startswith('sk-your')) + return jsonify({ + 'model_provider': config.MODEL_PROVIDER, + 'qwen_model': config.QWEN_MODEL, + 'qwen_base_url': config.QWEN_BASE_URL, + 'openai_model': config.OPENAI_MODEL, + 'openai_base_url': config.OPENAI_BASE_URL, + 'deepseek_model': config.DEEPSEEK_MODEL, + 'deepseek_base_url': config.DEEPSEEK_BASE_URL, + 'ollama_base_url': config.OLLAMA_BASE_URL, + 'ollama_model': config.OLLAMA_MODEL, + 'doubao_model': config.DOUBAO_MODEL, + 'doubao_base_url': config.DOUBAO_BASE_URL, + 'kimi_model': config.KIMI_MODEL, + 'kimi_base_url': config.KIMI_BASE_URL, + 'has_qwen_key': _has_key(config.QWEN_API_KEY), + 'has_openai_key': _has_key(config.OPENAI_API_KEY), + 'has_deepseek_key': _has_key(config.DEEPSEEK_API_KEY), + 'has_doubao_key': _has_key(config.DOUBAO_API_KEY), + 'has_kimi_key': _has_key(config.KIMI_API_KEY), + 'max_concurrent': config.MAX_CONCURRENT_SECTIONS, + 'llm_concurrency_limit': getattr(config, 'LLM_CONCURRENCY_LIMIT', 20), + 'content_volume': config.CONTENT_VOLUME, + 'target_pages': getattr(config, 'TARGET_PAGES', 0), + 'page_char_estimate': getattr(config, 'PAGE_CHAR_ESTIMATE', 700), + # 监控指标：并发状态已集成到 progress.running 和日志；UI显示 "X路并发" + }) + + +@app.route('/api/config', methods=['POST']) +def api_save_config(): + data = request.get_json() or {} + if 'model_provider' in data: + config.MODEL_PROVIDER = data['model_provider'] + if 'qwen_api_key' in data and data['qwen_api_key']: + config.QWEN_API_KEY = data['qwen_api_key'] + if 'qwen_model' in data and data['qwen_model']: + config.QWEN_MODEL = data['qwen_model'] + if 'qwen_base_url' in data and data['qwen_base_url']: + config.QWEN_BASE_URL = data['qwen_base_url'] + if 'openai_api_key' in data and data['openai_api_key']: + config.OPENAI_API_KEY = data['openai_api_key'] + if 'openai_model' in data and data['openai_model']: + config.OPENAI_MODEL = data['openai_model'] + if 'openai_base_url' in data and data['openai_base_url']: + config.OPENAI_BASE_URL = data['openai_base_url'] + if 'deepseek_api_key' in data and data['deepseek_api_key']: + config.DEEPSEEK_API_KEY = data['deepseek_api_key'] + if 'deepseek_model' in data and data['deepseek_model']: + config.DEEPSEEK_MODEL = data['deepseek_model'] + if 'deepseek_base_url' in data and data['deepseek_base_url']: + config.DEEPSEEK_BASE_URL = data['deepseek_base_url'] + if 'ollama_base_url' in data and data['ollama_base_url']: + config.OLLAMA_BASE_URL = data['ollama_base_url'] + if 'ollama_model' in data and data['ollama_model']: + config.OLLAMA_MODEL = data['ollama_model'] + if 'doubao_api_key' in data and data['doubao_api_key']: + config.DOUBAO_API_KEY = data['doubao_api_key'] + if 'doubao_model' in data and data['doubao_model']: + config.DOUBAO_MODEL = data['doubao_model'] + if 'doubao_base_url' in data and data['doubao_base_url']: + config.DOUBAO_BASE_URL = data['doubao_base_url'] + if 'kimi_api_key' in data and data['kimi_api_key']: + config.KIMI_API_KEY = data['kimi_api_key'] + if 'kimi_model' in data and data['kimi_model']: + config.KIMI_MODEL = data['kimi_model'] + if 'kimi_base_url' in data and data['kimi_base_url']: + config.KIMI_BASE_URL = data['kimi_base_url'] + if 'max_concurrent' in data: + v = int(data['max_concurrent']) + config.MAX_CONCURRENT_SECTIONS = max(1, min(v, 20)) + if 'llm_concurrency_limit' in data: + try: + v = int(data['llm_concurrency_limit']) + config.LLM_CONCURRENCY_LIMIT = max(1, min(v, 30)) + # Note: semaphore recreated on next import/restart for simplicity + except (ValueError, TypeError): + pass + if 'content_volume' in data and data['content_volume'] in ('concise', 'standard', 'detailed', 'full'): + config.CONTENT_VOLUME = data['content_volume'] + if 'target_pages' in data: + try: + config.TARGET_PAGES = max(0, int(data['target_pages'])) + except (ValueError, TypeError): + pass + if 'page_char_estimate' in data: + try: + config.PAGE_CHAR_ESTIMATE = max(300, min(3000, int(data['page_char_estimate']))) + except (ValueError, TypeError): + pass + if 'style_preset' in data: + # Save selected style preset for the project (future extension) + pass + + _settings.save(config) + return jsonify({'success': True}) + + +# 新增 /api/styles endpoints for homepage style settings +@app.route('/api/styles', methods=['GET']) +def api_get_styles(): + from utils.style_manager import DEFAULT_PRESETS + return jsonify({'presets': DEFAULT_PRESETS, 'success': True}) + + +@app.route('/api/styles', methods=['POST']) +def api_save_style_preset(): + data = request.get_json() or {} + name = data.get('name', 'custom') + preset_config = data.get('config', {}) + from utils.style_manager import save_preset + if save_preset(name, preset_config): + return jsonify({'success': True, 'name': name}) + return jsonify({'error': '保存失败'}), 500 + + +# ═══════════════════════════════════════════════════════════════════════════ +# 启动 +# ═══════════════════════════════════════════════════════════════════════════ + +if __name__ == '__main__': + init_db() + print('\n' + '=' * 60) + print(' BidPartner - AI Bid Writing Assistant') + print('=' * 60) + print(' URL: http://localhost:5000') + print(' Press Ctrl+C to quit\n') + app.run(host='0.0.0.0', port=5000, debug=False, threaded=True) diff --git a/bid_partner.spec b/bid_partner.spec new file mode 100644 index 0000000..6f4517e --- /dev/null +++ b/bid_partner.spec @@ -0,0 +1,118 @@ +# -*- mode: python ; coding: utf-8 -*- +""" +PyInstaller spec for 标伙伴 · AI标书助手 +Build: pyinstaller bid_partner.spec + +知识库改用 SQLite + 纯 Python 向量存储，已不依赖 ChromaDB，打包更小。 +""" +import os +from PyInstaller.utils.hooks import collect_all, collect_data_files + +block_cipher = None + +# ── Collect complex packages ───────────────────────────────────────────────── +openai_datas, openai_bins, openai_hidden = collect_all('openai') +pydantic_datas, pydantic_bins, pydantic_hidden = collect_all('pydantic') + +# tiktoken data (BPE vocab files) +tiktoken_datas = collect_data_files('tiktoken') + +a = Analysis( + ['launcher.py'], + pathex=['.'], + binaries=openai_bins + pydantic_bins, + datas=[ + # ── App assets (read-only, go into _MEIPASS) ── + ('templates', 'templates'), + ('static', 'static'), + # ── Package data ── + *openai_datas, + *pydantic_datas, + *tiktoken_datas, + ], + hiddenimports=[ + # Flask / Werkzeug + 'flask', 'flask_cors', 'werkzeug', 'werkzeug.serving', + 'werkzeug.routing', 'werkzeug.middleware.proxy_fix', + 'jinja2', 'jinja2.ext', + # SQLite (stdlib, always present) + 'sqlite3', + # OpenAI + *openai_hidden, + # Pydantic + *pydantic_hidden, + # Document processing + 'PyPDF2', 'pypdf', 'pypdf.errors', + 'pdfminer', 'pdfminer.high_level', 'pdfminer.layout', + 'pdfminer.pdfpage', 'pdfminer.pdfinterp', 'pdfminer.converter', + 'docx', 'docx.oxml', 'docx.oxml.ns', 'docx.shared', + 'docx.enum', 'docx.enum.text', 'docx.enum.style', + 'python_docx', + # tiktoken + 'tiktoken', 'tiktoken.core', 'tiktoken.model', + 'tiktoken_ext', 'tiktoken_ext.openai_public', + # Network / encoding + 'requests', 'chardet', 'httpx', 'httpcore', + 'anyio', 'anyio.streams', 'anyio.streams.memory', + 'sniffio', 'certifi', + # Stdlib extras + 'importlib.metadata', 'importlib.resources', + 'pkg_resources', 'json', 'math', 'threading', + # Local project modules (explicitly include all) + 'config', 'app', + 'utils', 'utils.ai_client', 'utils.file_utils', + 'utils.prompts', 'utils.settings', 'utils.boq_parser', 'utils.bill_analysis', + 'modules', 'modules.parser', 'modules.generator', + 'modules.checker', 'modules.exporter', 'modules.knowledge', + ], + hookspath=[], + hooksconfig={}, + runtime_hooks=[], + excludes=[ + # Heavy packages not used in this app + 'matplotlib', 'pandas', 'scipy', 'numpy', + 'IPython', 'jupyter', 'notebook', + 'PIL', 'Pillow', + 'cv2', 'torch', 'tensorflow', + 'pytest', 'unittest', + # ChromaDB 及其依赖（已移除，改用 SQLite 内置存储） + 'chromadb', 'hnswlib', 'posthog', 'pypika', + 'mmh3', 'overrides', 'monotonic', + 'sentence_transformers', 'onnxruntime', + ], + win_no_prefer_redirects=False, + win_private_assemblies=False, + cipher=block_cipher, + noarchive=False, +) + +pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) + +exe = EXE( + pyz, + a.scripts, + [], + exclude_binaries=True, + name='bid_partner', + debug=False, + bootloader_ignore_signals=False, + strip=False, + upx=False, + console=False, # no black console window — GUI launcher takes over + disable_windowed_traceback=False, + argv_emulation=False, + target_arch=None, + codesign_identity=None, + entitlements_file=None, +) + +coll = COLLECT( + exe, + a.binaries, + a.zipfiles, + a.datas, + strip=False, + upx=False, + upx_exclude=[], + name='BidPartner', +) diff --git a/bill-worker.js b/bill-worker.js new file mode 100644 index 0000000..ee84bb9 --- /dev/null +++ b/bill-worker.js @@ -0,0 +1,672 @@ +/** + * bill-worker.js — PDF 清单解析调度器（Worker Thread） + * + * 架构（v3 — SharedArrayBuffer 零拷贝）： + * Phase 1 — 并行文本提取 + * 将 PDF 数据写入 SharedArrayBuffer（一次分配，所有子线程共享读） + * 启动 N 个 page-worker，每个负责固定 20 页 + * + * Phase 2 — 清单页筛选 + 文本解析（纯正则，毫秒级） + * 汇总全部页面文本 → 关键字筛选清单页 → 多行合并 → 逐行解析 + */ +'use strict'; +const { parentPort } = require('worker_threads'); +const { Worker } = require('worker_threads'); +const path = require('path'); + +const PAGES_PER_CHUNK = 20; + +parentPort.on('message', async (msg) => { + if (msg.type !== 'parse') return; + const t0 = Date.now(); + try { + // 立即做一次干净的拷贝，确保拥有独立的 ArrayBuffer + const raw = msg.buffer; + const buf = Buffer.alloc(raw.byteLength); + Buffer.from(raw).copy(buf); + + if (buf.length === 0) { + parentPort.postMessage({ type: 'done', ok: false, error: '收到空 PDF 数据' }); + return; + } + + // ── 获取总页数 ── + const pdfjsModule = await import('pdfjs-dist/build/pdf.mjs'); + const pdfjsLib = pdfjsModule.default || pdfjsModule; + // 给 pdfjs 一份独立拷贝（pdfjs 内部可能 detach buffer） + const pdfData = new Uint8Array(buf.length); + buf.copy(Buffer.from(pdfData.buffer)); + const pdf = await pdfjsLib.getDocument({ data: pdfData, isEvalSupported: false }).promise; + const totalPages = pdf.numPages; + + // ── 将 PDF 数据写入 SharedArrayBuffer（一次分配，所有子线程共享读）── + const sab = new SharedArrayBuffer(buf.length); + const sabView = new Uint8Array(sab); + buf.copy(Buffer.from(sabView.buffer)); // 从独立 buf 拷贝到共享内存 + + const workerCount = Math.ceil(totalPages / PAGES_PER_CHUNK); + console.log(`[BillWorker] PDF ${totalPages} 页, ${workerCount} 路并行 (SharedArrayBuffer ${(buf.length/1024/1024).toFixed(1)}MB)`); + + // Phase 1: 并行文本提取 + const pageTexts = await parallelExtract(sab, buf.length, totalPages, workerCount); + const t1 = Date.now(); + + const extractedCount = pageTexts.filter(t => t.length > 0).length; + console.log(`[BillWorker] Phase1 完成: ${t1 - t0}ms, ${extractedCount}/${totalPages} 页有文本`); + + // 扫描件判断 + const totalChars = pageTexts.reduce((s, t) => s + t.length, 0); + if (totalChars < 50) { + parentPort.postMessage({ type: 'done', ok: true, data: { scanned: true, reason: 'noText', totalPages } }); + return; + } + + // Phase 2: 筛选清单页（宽松策略 + 连续页补全） + const BILL_KW = ['项目编码', '项目名称', '工程量', '计量单位', '综合单价', '清单编码']; + const SEC_KW = ['分部分项', '分类分项', '措施项目', '其他项目', '工程量清单计价']; + // 第一轮：标记确定的清单页 + const billFlags = new Array(pageTexts.length).fill(false); + for (let i = 0; i < pageTexts.length; i++) { + const t = pageTexts[i]; + if (!t.trim()) continue; + const hHits = BILL_KW.filter(k => t.includes(k)).length; + const sHit = SEC_KW.some(k => t.includes(k)); + const hasCode = /\d{9}/.test(t); + // 放宽：有9位编码即可（不再要求同时命中表头关键字） + if (hHits >= 2 || sHit || hasCode) { + billFlags[i] = true; + } + } + // 第二轮：连续页补全 — 两个清单页之间的非空页也视为清单页（续页无表头） + // 但排除纯费用/税金页面（它们不含施工清单项） + const FEE_PAGE_KW = ['规费', '税金', '社会保险费', '住房公积金', '养老保险', + '工伤保险', '失业保险', '医疗保险', '教育费附加', '城市维护建设税']; + const firstBill = billFlags.indexOf(true); + const lastBill = billFlags.lastIndexOf(true); + if (firstBill >= 0 && lastBill > firstBill) { + for (let i = firstBill; i <= lastBill; i++) { + if (!billFlags[i] && pageTexts[i] && pageTexts[i].trim().length > 30) { + const t = pageTexts[i]; + const feeHits = FEE_PAGE_KW.filter(kw => t.includes(kw)).length; + // 命中 2+ 个费用关键字且没有9位工程编码 → 纯费用页，排除 + if (feeHits >= 2 && !/\d{9}/.test(t)) continue; + billFlags[i] = true; + } + } + } + const billTexts = []; + for (let i = 0; i < pageTexts.length; i++) { + if (billFlags[i]) billTexts.push(pageTexts[i]); + } + + if (!billTexts.length) { + parentPort.postMessage({ type: 'done', ok: true, data: { scanned: false, noBillPages: true, totalPages } }); + return; + } + + console.log(`[BillWorker] ${totalPages} 页 → ${billTexts.length} 页清单 (原始识别 ${billFlags.filter(f=>f).length - (lastBill - firstBill >= 0 ? 0 : 0)} / 补全后 ${billTexts.length})`); + + // Phase 3: 文本解析 + const merged = billTexts.join('\n'); + const parsed = parseBillText(merged); + const t2 = Date.now(); + console.log(`[BillWorker] Phase2+3: ${t2 - t1}ms, 总耗时: ${t2 - t0}ms`); + + parentPort.postMessage({ + type: 'done', ok: true, + data: { + scanned: false, + ...parsed, + _meta: { + method: 'local-parallel', + workers: workerCount, + billPages: billTexts.length, + totalPages, + extractMs: t1 - t0, + parseMs: t2 - t1, + totalMs: t2 - t0, + } + } + }); + } catch (err) { + console.error('[BillWorker] 错误:', err.message); + parentPort.postMessage({ type: 'done', ok: false, error: err.message }); + } +}); + +// ================================================================ +// Phase 1: 多 Worker 并行提取（SharedArrayBuffer 零拷贝） +// ================================================================ + +function parallelExtract(sab, dataLength, totalPages, workerCount) { + return new Promise((resolve) => { + const workerPath = path.join(__dirname, 'page-worker.js'); + const allPageTexts = new Array(totalPages).fill(''); + const workerStatus = new Array(workerCount).fill('pending'); // pending, done, failed + let resolved = false; + + const checkComplete = () => { + if (resolved) return; + const doneCount = workerStatus.filter(s => s === 'done' || s === 'failed').length; + if (doneCount >= workerCount) { + resolved = true; + // 检查是否有失败的worker，打印警告 + const failedCount = workerStatus.filter(s => s === 'failed').length; + if (failedCount > 0) { + console.warn(`[BillWorker] ${failedCount}/${workerCount} 个worker失败，可能导致部分页面无内容`); + } + resolve(allPageTexts); + } + }; + + for (let i = 0; i < workerCount; i++) { + const startPage = i * PAGES_PER_CHUNK + 1; + const endPage = Math.min((i + 1) * PAGES_PER_CHUNK, totalPages); + + // workerData 传 SharedArrayBuffer（跨线程共享，不会被清空） + const w = new Worker(workerPath, { + workerData: { sab, dataLength, startPage, endPage } + }); + + let workerDone = false; + + const markDone = (status) => { + if (workerDone) return; + workerDone = true; + workerStatus[i] = status; + checkComplete(); + }; + + w.on('message', (msg) => { + if (msg.ok && msg.results) { + for (const r of msg.results) { + allPageTexts[r.page - 1] = r.text; + } + markDone('done'); + } else if (!msg.ok) { + console.warn(`[BillWorker] page-worker[${startPage}-${endPage}] 失败: ${msg.error}`); + markDone('failed'); + } + }); + + w.on('error', (err) => { + console.warn(`[BillWorker] page-worker[${startPage}-${endPage}] 异常: ${err.message}`); + markDone('failed'); + }); + + w.on('exit', (code) => { + // exit 在 message 之后触发，但如果 worker 崩溃没发 message 则在这里兜底 + if (code !== 0 && !workerDone) { + console.warn(`[BillWorker] page-worker[${startPage}-${endPage}] 意外退出(code=${code})`); + markDone('failed'); + } else if (!workerDone) { + markDone('done'); + } + }); + } + + if (workerCount <= 0) { + resolved = true; + resolve(allPageTexts); + } + }); +} + +// ================================================================ +// Phase 3: 清单文本解析（纯正则 + 字符串处理，毫秒级） +// ================================================================ + +function parseBillText(text) { + const rawLines = text.split(/\n/).map(l => { + let line = l.replace(/\t/g, ' ').trim(); + // 规范化带横杠的编码：如 "010-101-001-001" → "010101001001" + line = line.replace(/(\d{2,4})[-‐–](\d{2,4})[-‐–](\d{2,4})(?:[-‐–](\d{2,4}))?/g, + (m, a, b, c, d) => { + const combined = a + b + c + (d || ''); + return (combined.length >= 9 && combined.length <= 12) ? combined : m; + }); + return line; + }); + + // ── Step 1: 多行合并成逻辑行 ── + // pdfjs 按 Y 坐标分行，表格一行通常 = 一条文本行 + // 但有时项目特征/名称会折行，需要合并 + // + // 新逻辑行的起始标志（任一命中即切断）： + // a) 序号模式：1.1.1.1.5 开头 + // b) 清单编码：9-12位数字或 B+5-6位数字开头 + // c) 中文大标题：一二三 ... 或（一）（二）... + // d) 表头行内容（跳过） + // e) 纯数字序号 + 空格 + 编码（如 "5 500101004001"） + + const ITEM_START = /^\d+(\.\d+)+\s/; // 1.1 或 1.1.1 等序号 + const CODE_INLINE = /(?:^|\s)(\d{9,12}|(? raw.startsWith(m + ' ') || raw.startsWith(m + '\u3000'))) return true; + return false; + } + + for (const raw of rawLines) { + if (!raw || PAGE_MARK.test(raw)) continue; + if (HEADER_RE.test(raw) || HEADER_KW.test(raw)) continue; + if (/^（元）|^款章节号|^备注$|^第\d+页/.test(raw)) continue; + + if (isNewLineTrigger(raw)) { + if (currentLine) logicLines.push(currentLine); + currentLine = raw; + } else if (CODE_INLINE.test(raw) && raw.length > 15) { + // 行内包含编码且够长（像是完整的表格行）→ 也开新行 + if (currentLine) logicLines.push(currentLine); + currentLine = raw; + } else { + // 续行（项目特征折行等短文本） + // 安全阀：已合并行过长时强制切断，防止整页吞并 + if (currentLine && currentLine.length > 300) { + logicLines.push(currentLine); + currentLine = raw; + } else { + currentLine = currentLine ? currentLine + ' ' + raw : raw; + } + } + } + if (currentLine) logicLines.push(currentLine); + + console.log(`[BillWorker] 合并后 ${logicLines.length} 条逻辑行（原始 ${rawLines.length} 行）`); + // 打印前5条逻辑行供调试 + for (let i = 0; i < Math.min(5, logicLines.length); i++) { + console.log(`[BillWorker] L${i}: ${logicLines[i].substring(0, 120)}`); + } + + const categories = []; + let curCat = null, curItem = null; + + // 编码匹配：支持行内任意位置的9-12位数字或B编码（排除 GB/DB 等标准号前缀） + const CODE_RE = /(? u.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')); + const UNIT_RE = new RegExp(`(?:^|\\s)(${unitEscaped.join('|')})(?=\\s|\\d|$)`); + const SKIP_RE = /合\s*计|小\s*计|本页小计|总\s*计|价税合计/; + + for (const line of logicLines) { + if (SKIP_RE.test(line)) continue; + + // 去掉行首的序号部分（"1.1.1.1.5 " 或 "5 " 等纯序号前缀） + let stripped = line.replace(/^\d+(\.\d+)*\s+/, '').trim(); + if (!stripped) stripped = line.trim(); + if (!stripped) continue; + + const cm = stripped.match(CODE_RE); + if (cm) { + if (curItem && curCat) curCat.items.push(curItem); + if (!curCat) { curCat = { name: '未分类', items: [] }; categories.push(curCat); } + + const code = cm[1]; + let rest = stripped.substring(cm.index + cm[0].length).trim(); + let name = '', unit = '', quantity = '', spec = ''; + + const unitMatch = rest.match(UNIT_RE); + if (unitMatch) { + const ui = rest.indexOf(unitMatch[0]); + let rawName = rest.substring(0, ui).trim(); + unit = unitMatch[1]; + const afterUnit = rest.substring(ui + unitMatch[0].length).trim(); + const qm = afterUnit.match(/^([\d,.]+)/); + if (qm) { + quantity = qm[1]; + // 提取 quantity 之后的尾部文本，跳过纯数字字段（综合单价、合价等） + let tail = afterUnit.substring(qm.index + qm[0].length).trim(); + if (tail) { + const tailTokens = tail.split(/\s+/); + let si = 0; + while (si < tailTokens.length && /^[\d,.%\-]+$/.test(tailTokens[si])) si++; + const specTail = tailTokens.slice(si).join(' ').trim(); + if (specTail) spec = specTail; + } + } + // 分离 rawName 中的"项目名称"和内联"项目特征" + const ns = splitNameAndSpec(rawName); + name = ns.name; + if (ns.spec) spec = ns.spec + (spec ? ';' + spec : ''); + } else { + const tokens = rest.split(/\s+/).filter(t => t); + let foundUnitIdx = -1; + for (let ti = tokens.length - 1; ti >= 1; ti--) { + if (UNIT_SET.has(tokens[ti])) { foundUnitIdx = ti; break; } + } + if (foundUnitIdx >= 1) { + const rawNameStr = tokens.slice(0, foundUnitIdx).join(' '); + const ns = splitNameAndSpec(rawNameStr); + name = ns.name; + if (ns.spec) spec = ns.spec; + unit = tokens[foundUnitIdx]; + const afterTokens = tokens.slice(foundUnitIdx + 1); + if (afterTokens.length && /^[\d,.]+$/.test(afterTokens[0])) { + quantity = afterTokens[0]; + let si = 1; + while (si < afterTokens.length && /^[\d,.%\-]+$/.test(afterTokens[si])) si++; + const specTail = afterTokens.slice(si).join(' ').trim(); + if (specTail) spec = spec ? spec + ';' + specTail : specTail; + } + } else { + name = rest; + } + } + + name = name.replace(/\s+/g, '').trim(); + for (const u of UNIT_TOKENS) { + if (name.endsWith(u) && name.length > u.length) { + unit = unit || u; + name = name.substring(0, name.length - u.length); + break; + } + } + + curItem = { code, name, unit, quantity, spec }; + continue; + } + + // ── 回退：无标准编码但有 "名称单位数量" 结构 → 也视为清单项 ── + // 常见于措施项目、未编码的补充清单项 + if (!cm && stripped.length > 4) { + const uniMatch = stripped.match(UNIT_RE); + if (uniMatch) { + const ui = stripped.indexOf(uniMatch[0]); + const beforeUnit = stripped.substring(0, ui).trim(); + const afterUnit = stripped.substring(ui + uniMatch[0].length).trim(); + const hasQty = /^[\d,.]+/.test(afterUnit); + // 名称 2-50 字、含中文、有数量、不是分部标题 + if (beforeUnit.length >= 2 && beforeUnit.length <= 50 && hasQty + && /[\u4e00-\u9fff]/.test(beforeUnit)) { + if (curItem && curCat) curCat.items.push(curItem); + if (!curCat) { curCat = { name: '未分类', items: [] }; categories.push(curCat); } + const unit = uniMatch[1]; + const qm = afterUnit.match(/^([\d,.]+)/); + const quantity = qm ? qm[1] : ''; + const ns = splitNameAndSpec(beforeUnit); + const name = ns.name.replace(/\s+/g, '').trim(); + const spec = ns.spec || ''; + curItem = { code: '', name, unit, quantity, spec }; + continue; + } + } + } + + // 分部标题判断：不含编码、较短的文本、含工程关键字 + // 关键守卫：如果行里有计量单位，说明是清单项，不是标题 + if (stripped.length > 2 && stripped.length < 60 && !CODE_RE.test(stripped)) { + if (UNIT_RE.test(stripped) && /\d+\.?\d*\s*$/.test(stripped)) { + if (curItem) curItem.spec = curItem.spec ? curItem.spec + ';' + stripped : stripped; + continue; + } + if (isCatTitle(stripped) && !UNIT_RE.test(stripped) && !isFeeCatTitle(stripped)) { + if (curItem && curCat) { curCat.items.push(curItem); curItem = null; } + const cleanTitle = stripped.replace(/\s+(座|个|项|处|m|km|段|条)\s+\d+[\d.]*\s*$/, '').trim(); + curCat = { name: cleanTitle, items: [] }; + categories.push(curCat); + continue; + } + } + + if (/^[一二三四五六七八九十]+\s/.test(stripped) || /^（[一二三四五六七八九十\d]+）/.test(stripped)) { + // 中文序号标题也需要排除费用类 + const cleanTitle = stripped.replace(/\s+(座|个|项|处)\s+\d+[\d.]*\s*$/, '').trim(); + if (isFeeCatTitle(cleanTitle)) { + // 费用类标题：跳过，不建分部（其下的行会作为续行处理） + continue; + } + if (curItem && curCat) { curCat.items.push(curItem); curItem = null; } + curCat = { name: cleanTitle, items: [] }; + categories.push(curCat); + continue; + } + + if (curItem && stripped.length > 1) { + curItem.spec = curItem.spec ? curItem.spec + ';' + stripped : stripped; + } + } + + if (curItem && curCat) curCat.items.push(curItem); + + // 过滤费用项：只保留需要写入技术标的施工清单项 + let feeFiltered = 0; + for (const cat of categories) { + if (cat.items) { + const before = cat.items.length; + cat.items = cat.items.filter(it => !isFeeItem(it.name)); + feeFiltered += before - cat.items.length; + } + } + if (feeFiltered > 0) console.log(`[BillWorker] 费用项过滤: 移除 ${feeFiltered} 项`); + + // ========== 按项目名称合并（核心去重，大幅减少清单项数量）========== + // 规则：同一分部内，name 相同的清单项合并为一条 + // - code: 保留第一个非空编码 + // - unit: 保留第一个非空单位 + // - quantity: 尝试数值求和，否则用分号拼接 + // - spec: 去重后用分号拼接（截断过长的） + let totalBeforeMerge = 0, totalAfterMerge = 0; + for (const cat of categories) { + if (!cat.items || !cat.items.length) continue; + totalBeforeMerge += cat.items.length; + + const nameMap = new Map(); // name → merged item + for (const item of cat.items) { + const key = (item.name || '').replace(/\s+/g, '').trim(); + if (!key) continue; + + if (!nameMap.has(key)) { + nameMap.set(key, { + code: item.code || '', + name: item.name, + unit: item.unit || '', + quantity: item.quantity || '', + spec: item.spec || '', + _count: 1, + _quantities: item.quantity ? [item.quantity] : [], + _specs: item.spec ? [item.spec] : [], + }); + } else { + const m = nameMap.get(key); + m._count++; + // code: 取第一个非空的 + if (!m.code && item.code) m.code = item.code; + // unit: 取第一个非空的 + if (!m.unit && item.unit) m.unit = item.unit; + // quantity: 收集所有 + if (item.quantity) m._quantities.push(item.quantity); + // spec: 收集不重复的 + if (item.spec && !m._specs.includes(item.spec)) { + m._specs.push(item.spec); + } + } + } + + // 后处理：合成最终字段 + const merged = []; + for (const [, m] of nameMap) { + // quantity: 尝试数值求和 + if (m._quantities.length > 1) { + const nums = m._quantities.map(q => parseFloat(q.replace(/,/g, ''))); + if (nums.every(n => !isNaN(n))) { + const sum = nums.reduce((a, b) => a + b, 0); + m.quantity = sum % 1 === 0 ? String(sum) : sum.toFixed(2); + } else { + m.quantity = m._quantities.join('; '); + } + } else if (m._quantities.length === 1) { + m.quantity = m._quantities[0]; + } + // spec: 拼接去重后的 spec，每条最多120字 + if (m._specs.length > 0) { + const trimmed = m._specs.map(s => s.length > 120 ? s.substring(0, 120) + '...' : s); + m.spec = trimmed.join('; '); + // 总 spec 上限 300 字 + if (m.spec.length > 300) m.spec = m.spec.substring(0, 300) + '...'; + } + // 清理临时字段 + delete m._count; delete m._quantities; delete m._specs; + merged.push(m); + } + cat.items = merged; + totalAfterMerge += merged.length; + } + + const mergedCount = totalBeforeMerge - totalAfterMerge; + if (mergedCount > 0) { + console.log(`[BillWorker] 按名称合并: ${totalBeforeMerge} → ${totalAfterMerge} 项（合并 ${mergedCount} 个重复项）`); + } + + const valid = categories.filter(c => c.items && c.items.length > 0); + const totalItems = valid.reduce((s, c) => s + c.items.length, 0); + const withSpec = valid.reduce((s, c) => s + c.items.filter(it => it.spec).length, 0); + const withCode = valid.reduce((s, c) => s + c.items.filter(it => it.code).length, 0); + console.log(`[BillWorker] 最终结果: ${valid.length} 分部, ${totalItems} 清单项 (${withCode} 有编码, ${withSpec} 有spec)`); + // 打印前 3 个 item 供调试 + let debugCount = 0; + for (const cat of valid) { + for (const it of cat.items) { + if (debugCount < 3) { + console.log(`[BillWorker] 样例: [${it.code}] ${it.name} | ${it.unit} | qty=${it.quantity} | spec=${(it.spec||'').substring(0, 80)}`); + debugCount++; + } + } + } + + return { + project_summary: { remark: `本地解析：${valid.length} 个分部，${totalItems} 个清单项（合并前 ${totalBeforeMerge} 项）` }, + categories: valid, + }; +} + +/** + * 判断清单项是否为"费用项"（非施工内容，不写入技术标） + * 如：安全文明措施费、规费、税金、暂列金额等 + */ +function isFeeItem(name) { + if (!name) return false; + const n = name.replace(/\s+/g, ''); + + // ── 1. 精确匹配 ── + const EXACT = [ + '规费', '税金', '利润', '增值税', '暂列金额', '暂估价', '计日工', + '总承包服务费', '企业管理费', '甲供材料保管费', '价税合计', + ]; + if (EXACT.includes(n)) return true; + + // ── 2. 包含匹配：措施费/规费/保险/行政类 ── + const FEE_KW = [ + '安全文明', '文明施工费', '环境保护费', '临时设施费', + '夜间施工增加费', '夜间施工费', + '冬雨季施工增加费', '冬雨季施工费', + '二次搬运费', '大型机械设备进出场', '大型机械进出场', + '施工排水降水', '排水降水费', + '已完工程及设备保护', '已完工程保护费', + '工程排污费', '社会保障费', '住房公积金', + '工伤保险', '劳动保险', '意外伤害保险', '建筑工程保险', + '城市维护建设税', '城市建设维护税', + '教育费附加', '地方教育附加', + '材料暂估', '专业工程暂估', + '超高施工增加费', '安全防护费', + '措施项目费', '其他项目费', '不可竞争费', + ]; + for (const kw of FEE_KW) { + if (n.includes(kw)) return true; + } + + return false; +} + +/** + * 将 rawName 中的"项目名称"与内联"项目特征描述"分离 + * 例: "土方开挖 1.土壤类别：普通土" → { name: "土方开挖", spec: "1.土壤类别：普通土" } + */ +function splitNameAndSpec(rawName) { + if (!rawName) return { name: '', spec: '' }; + // Pattern 1: 数字+点+中文（如 "1.土壤类别" "2、强度等级"） + const m = rawName.match(/\d+[.、．)\uFF09]\s*[\u4e00-\u9fff]/); + if (m && m.index > 0) { + return { + name: rawName.substring(0, m.index).trim(), + spec: rawName.substring(m.index).trim() + }; + } + // Pattern 2: 特征关键字+冒号（如 "材质：" "规格："） + const SPEC_KW_RE = /(材质|规格|型号|品牌|颜色|尺寸|厚度|直径|管径|强度|等级|类别|类型|做法|要求|标准|内容|工作内容|土壤|含量|配合比|工艺|方式|形式|范围|部位|位置|高度|宽度|长度|深度|坡度|截面|跨度|运距|开挖|回填|混凝土|钢筋|压实)[：:]/; + const kw = rawName.match(SPEC_KW_RE); + if (kw && kw.index > 0) { + return { + name: rawName.substring(0, kw.index).trim(), + spec: rawName.substring(kw.index).trim() + }; + } + // Pattern 3: 括号开头的特征描述 "（1）" "(1)" + const paren = rawName.match(/[（(]\d+[）)]/); + if (paren && paren.index > 0) { + return { + name: rawName.substring(0, paren.index).trim(), + spec: rawName.substring(paren.index).trim() + }; + } + return { name: rawName, spec: '' }; +} + +function isCatTitle(text) { + const KW = [ + '土建','建筑','结构','装饰','装修','安装','给排水','暖通','空调','通风', + '电气','强电','弱电','消防','智能化','幕墙','门窗','园林','绿化','景观', + '市政','道路','桥梁','管网','基础','地基','桩基','主体','屋面','防水', + '保温','钢结构','排水','给水','照明','动力','防雷','电梯','人防','室外', + '附属','分部','工程','措施','清单','土石方','混凝土','砌筑','模板','脚手架', + '水利','河道','管道','阀门','设备','仪表','自动化','通信','网络', + '拆除','外墙','内墙','楼地面','天棚','吊顶','栏杆','屋顶','涂料','抹灰', + '廊道','阀门井','蓄水池','泵站','供水','引水','水源','渠道','闸门', + '围栏','警示','检修','管线','配电','水池','水塔','取水','净水', + ]; + return KW.some(k => text.includes(k)); +} + +/** + * 判断分部标题是否为"费用类"（不应创建分部分类） + * 如：规费、税金、措施项目费、其他项目费等非施工类分部 + */ +function isFeeCatTitle(text) { + if (!text) return false; + const t = text.replace(/\s+/g, ''); + // 精确匹配整个标题 + const EXACT = [ + '规费', '税金', '利润', '增值税', '暂列金额', '暂估价', '计日工', + '总承包服务费', '企业管理费', '价税合计', + '措施项目费', '其他项目费', '不可竞争费', + ]; + if (EXACT.includes(t)) return true; + // 包含匹配 + const FEE_CAT_KW = [ + '措施项目费', '其他项目费', '不可竞争费', + '规费汇总', '税金汇总', '费率', '费用汇总', '费用合计', + '暂列金额', '暂估价', '计日工', '总承包服务费', + '安全文明施工费', '社会保障费', '住房公积金', + '工伤保险', '教育费附加', '城市维护建设税', + ]; + for (const kw of FEE_CAT_KW) { + if (t.includes(kw)) return true; + } + return false; +} diff --git a/build.bat b/build.bat new file mode 100644 index 0000000..eb607e2 --- /dev/null +++ b/build.bat @@ -0,0 +1,95 @@ +@echo off +chcp 65001 >nul 2>&1 +setlocal + +echo ============================================================ +echo BidPartner - Build Desktop EXE +echo ============================================================ +echo. + +:: ── 1. Check Python ──────────────────────────────────────────────────────── +python --version >nul 2>&1 +if errorlevel 1 ( + echo [ERROR] Python not found. Please install Python 3.9+. + pause & exit /b 1 +) + +:: ── 2. Install / upgrade PyInstaller ─────────────────────────────────────── +echo [Step 1/4] Installing PyInstaller... +pip install --quiet --upgrade pyinstaller +if errorlevel 1 ( + echo [ERROR] Failed to install PyInstaller. + pause & exit /b 1 +) + +:: ── 3. Install project dependencies (if not already installed) ───────────── +echo [Step 2/4] Checking dependencies... +pip install --quiet -r requirements.txt +if errorlevel 1 ( + echo [ERROR] Failed to install dependencies. + pause & exit /b 1 +) + +:: ── 4. Sanitize settings.json - REMOVE API KEYS before build ─────────────── +echo [Step 3/4] Sanitizing settings (removing API keys from build)... +if exist "data\settings.json" ( + :: Back up real settings + copy /y "data\settings.json" "data\settings.json.bak" >nul +) +:: Write a clean settings file with no real keys +( + echo { + echo "model_provider": "deepseek", + echo "qwen_api_key": "sk-your-qwen-key", + echo "qwen_model": "qwen3.6-plus", + echo "openai_api_key": "sk-your-openai-key", + echo "openai_model": "gpt-4o", + echo "deepseek_api_key": "sk-your-deepseek-key", + echo "deepseek_model": "deepseek-chat", + echo "max_concurrent": 5, + echo "content_volume": "standard" + echo } +) > "data\settings_clean.tmp" + +:: ── 5. Build ──────────────────────────────────────────────────────────────── +echo [Step 4/4] Building EXE with PyInstaller... +echo (This may take 3-10 minutes on first run) +echo. + +:: Clean previous build artifacts +if exist "build" rd /s /q "build" >nul 2>&1 +if exist "dist\BidPartner" rd /s /q "dist\BidPartner" >nul 2>&1 + +pyinstaller bid_partner.spec --noconfirm +set BUILD_RESULT=%errorlevel% + +:: ── Restore real settings ─────────────────────────────────────────────────── +if exist "data\settings.json.bak" ( + copy /y "data\settings.json.bak" "data\settings.json" >nul + del /f /q "data\settings.json.bak" >nul 2>&1 +) +del /f /q "data\settings_clean.tmp" >nul 2>&1 + +if %BUILD_RESULT% neq 0 ( + echo. + echo [ERROR] PyInstaller build failed. See output above for details. + pause & exit /b 1 +) + +:: ── 6. Result ─────────────────────────────────────────────────────────────── +echo. +echo ============================================================ +echo Build SUCCESSFUL! +echo Output: dist\BidPartner\bid_partner.exe +echo ============================================================ +echo. +echo The 'dist\BidPartner' folder is your distributable package. +echo Users only need this folder - no Python installation required. +echo Each user must set their own API key in the app settings. +echo. + +:: Open the output folder +explorer "dist\BidPartner" >nul 2>&1 + +endlocal +pause diff --git a/config.py b/config.py new file mode 100644 index 0000000..ab0d931 --- /dev/null +++ b/config.py @@ -0,0 +1,120 @@ +import os +import sys +import threading +import random +import time +from contextlib import contextmanager + +# When running as a PyInstaller bundle: +# sys._MEIPASS → read-only bundle dir (templates, static, prompts) +# sys.executable dir → writable dir next to the .exe (data, settings, db) +if getattr(sys, 'frozen', False): + _BUNDLE_DIR = sys._MEIPASS # bundled app files + BASE_DIR = os.path.dirname(sys.executable) # writable runtime dir +else: + _BUNDLE_DIR = os.path.dirname(os.path.abspath(__file__)) + BASE_DIR = _BUNDLE_DIR + +DATA_DIR = os.path.join(BASE_DIR, 'data') +UPLOAD_DIR = os.path.join(DATA_DIR, 'uploads') +EXPORT_DIR = os.path.join(DATA_DIR, 'exports') +KNOWLEDGE_DIR= os.path.join(DATA_DIR, 'knowledge') +DB_PATH = os.path.join(DATA_DIR, 'projects.db') +CHROMA_DIR = os.path.join(DATA_DIR, 'chroma') +PROMPTS_DIR = os.path.join(_BUNDLE_DIR, 'prompts') + +# ==================== AI 模型配置 ==================== +# 模型选择：'openai' | 'qwen' | 'deepseek' | 'ollama' +MODEL_PROVIDER = os.environ.get('MODEL_PROVIDER', 'qwen') + +# OpenAI +OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', 'sk-your-openai-key') +OPENAI_MODEL = os.environ.get('OPENAI_MODEL', 'gpt-4.1') +OPENAI_BASE_URL = os.environ.get('OPENAI_BASE_URL', 'https://api.openai.com/v1') + +# 阿里云通义千问 +QWEN_API_KEY = os.environ.get('QWEN_API_KEY', 'sk-your-qwen-key') +QWEN_MODEL = os.environ.get('QWEN_MODEL', 'qwen3.6-plus') +QWEN_BASE_URL = os.environ.get('QWEN_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1') +# 文生图（多模态生成）与 Chat 接口不同：北京地域见 Model Studio 文档 +QWEN_MULTIMODAL_BASE = os.environ.get( + 'QWEN_MULTIMODAL_BASE', + 'https://dashscope.aliyuncs.com/api/v1', +) +# 投标附件插图默认模型（同步文生图） +QWEN_IMAGE_MODEL = os.environ.get('QWEN_IMAGE_MODEL', 'qwen-image-2.0-pro') +QWEN_IMAGE_SIZE = os.environ.get('QWEN_IMAGE_SIZE', '1536*1024') +QWEN_IMAGE_PROMPT_EXTEND = os.environ.get('QWEN_IMAGE_PROMPT_EXTEND', 'true').lower() in ( + '1', 'true', 'yes', +) +QWEN_IMAGE_WATERMARK = os.environ.get('QWEN_IMAGE_WATERMARK', 'false').lower() in ( + '1', 'true', 'yes', +) +QWEN_IMAGE_NEGATIVE_PROMPT = os.environ.get( + 'QWEN_IMAGE_NEGATIVE_PROMPT', + '低分辨率, 模糊, 畸形肢体, 过度饱和, 蜡像感, 杂乱构图, 扭曲文字, 公司商标, LOGO, 投标人名称', +) + +# DeepSeek +DEEPSEEK_API_KEY = os.environ.get('DEEPSEEK_API_KEY', 'sk-your-deepseek-key') +DEEPSEEK_MODEL = os.environ.get('DEEPSEEK_MODEL', 'deepseek-chat') +DEEPSEEK_BASE_URL = os.environ.get('DEEPSEEK_BASE_URL', 'https://api.deepseek.com/v1') + +# Ollama 本地（OpenAI 兼容接口） +OLLAMA_BASE_URL = os.environ.get('OLLAMA_BASE_URL', 'http://localhost:11434/v1') +OLLAMA_MODEL = os.environ.get('OLLAMA_MODEL', 'qwen3:8b') + +# 豆包 / 火山引擎（字节跳动，OpenAI 兼容接口） +DOUBAO_API_KEY = os.environ.get('DOUBAO_API_KEY', 'sk-your-doubao-key') +DOUBAO_MODEL = os.environ.get('DOUBAO_MODEL', 'doubao-1-5-pro-32k') +DOUBAO_BASE_URL = os.environ.get('DOUBAO_BASE_URL', 'https://ark.cn-beijing.volces.com/api/v3') + +# Kimi / Moonshot AI（OpenAI 兼容接口，支持 Embedding） +KIMI_API_KEY = os.environ.get('KIMI_API_KEY', 'sk-your-kimi-key') +KIMI_MODEL = os.environ.get('KIMI_MODEL', 'moonshot-v1-32k') +KIMI_BASE_URL = os.environ.get('KIMI_BASE_URL', 'https://api.moonshot.cn/v1') + +# Embedding 模型 +OPENAI_EMBEDDING_MODEL = 'text-embedding-3-small' +QWEN_EMBEDDING_MODEL = 'text-embedding-v3' +KIMI_EMBEDDING_MODEL = 'moonshot-v1-embedding' + +# ==================== 应用配置 ==================== +MAX_FILE_SIZE_MB = 50 +ALLOWED_EXTENSIONS = {'pdf', 'doc', 'docx'} +SECRET_KEY = 'bidhuo-partner-secret-2024' + +# ==================== 生成配置 ==================== +MAX_RETRIES = 3 +REQUEST_TIMEOUT = int(os.environ.get('REQUEST_TIMEOUT', '180')) +# 大纲生成单次提示词长、输出大，适当延长读超时（秒），避免接口未返回即被客户端断开 +OUTLINE_REQUEST_TIMEOUT = int(os.environ.get('OUTLINE_REQUEST_TIMEOUT', '300')) +CHUNK_SIZE = 2000 # 知识库文本分块大小（字符数） +CHUNK_OVERLAP = 200 # 分块重叠大小 +TOP_K_KNOWLEDGE = 3 # 知识库检索数量 +CONTENT_VOLUME = os.environ.get('CONTENT_VOLUME', 'standard') # 篇幅档位: concise / standard / detailed / full +TARGET_PAGES = int(os.environ.get('TARGET_PAGES', '0') or '0') # 目标页数（0=不启用） +PAGE_CHAR_ESTIMATE = int(os.environ.get('PAGE_CHAR_ESTIMATE', '700') or '700') # 粗略每页字数估算 + +# ==================== 并发控制 (极速优化核心) ==================== +# 全局LLM调用上限，防止Qwen等云API被限流。默认20，与用户要求对齐。 +LLM_CONCURRENCY_LIMIT = int(os.environ.get('LLM_CONCURRENCY_LIMIT', '20')) +_llm_semaphore = threading.Semaphore(LLM_CONCURRENCY_LIMIT) + +@contextmanager +def llm_call(): + """全局LLM调用信号量上下文管理器 (上限20)。所有ai_client.chat / embedding 必须使用。 + 针对Qwen云API增加轻微jitter避免429。超时60s防止死锁。""" + acquired = _llm_semaphore.acquire(blocking=True, timeout=60.0) + if not acquired: + raise TimeoutError(f"LLM并发已达上限({LLM_CONCURRENCY_LIMIT})，请稍后重试") + try: + # Qwen RPM敏感，增加极小jitter (0-0.08s) 避免429限流 + if MODEL_PROVIDER == 'qwen': + time.sleep(random.uniform(0, 0.08)) + yield + finally: + _llm_semaphore.release() + +# 更新默认并发章节数，支持更高上限（UI后续同步） +MAX_CONCURRENT_SECTIONS = int(os.environ.get('MAX_CONCURRENT_SECTIONS', '12')) diff --git a/data/attachment_section_rules.json b/data/attachment_section_rules.json new file mode 100644 index 0000000..363e491 --- /dev/null +++ b/data/attachment_section_rules.json @@ -0,0 +1,22 @@ +{ + "_meta": "附件类章节：stack_charts_only 为默认，叶节点按 diagram 意图栈只输出 [FIGURE]/[TABLE] 块、无叙述正文；full 为长文；single_chart_only 为栈顶单块。修改后重启生效。", + "_field_docs": { + "title_regex": "标题任一则正则匹配即视为附件节（Python re 语法）", + "table_hint_keywords": "标题含此类子串且双开关均开时倾向表格", + "figure_hint_keywords": "标题含此类子串且双开关均开时倾向图示", + "default_kind_when_ambiguous": "双开且标题无倾向词时的默认：figure 或 table", + "attachment_leaf_body_mode": "stack_charts_only：意图栈只生成图/表块；full：与常规章节相同长文；single_chart_only：仅栈顶一块图或表" + }, + "schema_version": 1, + "attachment_leaf_body_mode": "stack_charts_only", + "title_regex": [ + "附件\\s*[一二三四五六七八九十0-9A-Za-z、：:.]", + "附\\s*图", + "附\\s*表", + "附\\s*件\\s*\\(", + "^\\s*[\\d一二三四五六七八九十\\.．、]+\\s*附件" + ], + "table_hint_keywords": ["附表", "一览表", "清单表", "表", "统计表", "明细表"], + "figure_hint_keywords": ["附图", "示意图", "平面图", "流程图", "布置图", "组织图", "横道"], + "default_kind_when_ambiguous": "table" +} diff --git a/data/diagram_intent_rules.json b/data/diagram_intent_rules.json new file mode 100644 index 0000000..f8d3119 --- /dev/null +++ b/data/diagram_intent_rules.json @@ -0,0 +1,51 @@ +{ + "_meta": "章节级图/表意图：标题与大纲窗口关键词计分，阈值入栈，按栈序拼接图示/表格生成规范。修改后重启服务生效。", + "_field_docs": { + "threshold_figure": "图示倾向分达到此值才入栈", + "threshold_table": "表格倾向分达到此值才入栈", + "title_weight": "标题命中的权重乘子", + "context_weight": "大纲上下文窗口命中的权重乘子", + "outline_context_lines": "before/after 为相对匹配行上下扩展行数", + "stack_order_when_both": "figure_first | table_first | score_desc（两者同时入栈时的顺序，栈顶为 index 0）", + "figure_keywords": "字符串或 {text,weight} 对象列表", + "table_keywords": "同上" + }, + "schema_version": 1, + "threshold_figure": 1.0, + "threshold_table": 1.0, + "title_weight": 1.0, + "context_weight": 0.6, + "outline_context_lines": {"before": 4, "after": 6}, + "stack_order_when_both": "score_desc", + "figure_keywords": [ + {"text": "组织", "weight": 1.0}, + {"text": "架构", "weight": 1.0}, + {"text": "流程", "weight": 1.2}, + {"text": "工序", "weight": 1.0}, + {"text": "进度", "weight": 1.2}, + {"text": "横道", "weight": 1.5}, + {"text": "网络图", "weight": 1.5}, + {"text": "平面", "weight": 1.0}, + {"text": "布置", "weight": 0.8}, + {"text": "监测", "weight": 0.8}, + {"text": "示意", "weight": 0.8}, + {"text": "应急", "weight": 0.8} + ], + "table_keywords": [ + {"text": "一览表", "weight": 1.5}, + {"text": "人员", "weight": 1.0}, + {"text": "配置", "weight": 0.8}, + {"text": "设备", "weight": 1.0}, + {"text": "机械", "weight": 0.9}, + {"text": "劳动力", "weight": 1.2}, + {"text": "工种", "weight": 1.0}, + {"text": "检验", "weight": 1.0}, + {"text": "验收", "weight": 0.9}, + {"text": "材料", "weight": 1.0}, + {"text": "供应", "weight": 0.9}, + {"text": "风险", "weight": 1.0}, + {"text": "措施", "weight": 0.6}, + {"text": "清单", "weight": 0.8}, + {"text": "计划", "weight": 0.7} + ] +} diff --git a/data/exports/20260420测试海东技术标_20260420_180450.docx b/data/exports/20260420测试海东技术标_20260420_180450.docx new file mode 100644 index 0000000..99fc638 Binary files /dev/null and b/data/exports/20260420测试海东技术标_20260420_180450.docx differ diff --git a/data/exports/30260420投标技术文档_20260420_170252.docx b/data/exports/30260420投标技术文档_20260420_170252.docx new file mode 100644 index 0000000..d134a23 Binary files /dev/null and b/data/exports/30260420投标技术文档_20260420_170252.docx differ diff --git a/data/exports/A11111_20260423_160556.docx b/data/exports/A11111_20260423_160556.docx new file mode 100644 index 0000000..2468ebb Binary files /dev/null and b/data/exports/A11111_20260423_160556.docx differ diff --git a/data/exports/A1210201_20260423_151502.docx b/data/exports/A1210201_20260423_151502.docx new file mode 100644 index 0000000..870de79 Binary files /dev/null and b/data/exports/A1210201_20260423_151502.docx differ diff --git a/data/exports/A1210201_20260423_151638.docx b/data/exports/A1210201_20260423_151638.docx new file mode 100644 index 0000000..715cc1a Binary files /dev/null and b/data/exports/A1210201_20260423_151638.docx differ diff --git a/data/exports/A1210201_20260423_152123.docx b/data/exports/A1210201_20260423_152123.docx new file mode 100644 index 0000000..c2a21e0 Binary files /dev/null and b/data/exports/A1210201_20260423_152123.docx differ diff --git a/data/exports/A12_20260423_165937.docx b/data/exports/A12_20260423_165937.docx new file mode 100644 index 0000000..e1c01ea Binary files /dev/null and b/data/exports/A12_20260423_165937.docx differ diff --git a/data/exports/A2121212_20260421_103738.docx b/data/exports/A2121212_20260421_103738.docx new file mode 100644 index 0000000..c659ef4 Binary files /dev/null and b/data/exports/A2121212_20260421_103738.docx differ diff --git a/data/exports/A666_20260422_143004.docx b/data/exports/A666_20260422_143004.docx new file mode 100644 index 0000000..dc6cfb7 Binary files /dev/null and b/data/exports/A666_20260422_143004.docx differ diff --git a/data/exports/A666_20260422_153137.docx b/data/exports/A666_20260422_153137.docx new file mode 100644 index 0000000..e2c309d Binary files /dev/null and b/data/exports/A666_20260422_153137.docx differ diff --git a/data/exports/A666_20260422_160459.docx b/data/exports/A666_20260422_160459.docx new file mode 100644 index 0000000..0f83a32 Binary files /dev/null and b/data/exports/A666_20260422_160459.docx differ diff --git a/data/exports/A6756757_20260423_161844.docx b/data/exports/A6756757_20260423_161844.docx new file mode 100644 index 0000000..4af22cf Binary files /dev/null and b/data/exports/A6756757_20260423_161844.docx differ diff --git a/data/exports/HTH_20260424_104051.docx b/data/exports/HTH_20260424_104051.docx new file mode 100644 index 0000000..7c4c453 Binary files /dev/null and b/data/exports/HTH_20260424_104051.docx differ diff --git a/data/exports/RETRE_20260424_094735.docx b/data/exports/RETRE_20260424_094735.docx new file mode 100644 index 0000000..7c621a3 Binary files /dev/null and b/data/exports/RETRE_20260424_094735.docx differ diff --git a/data/exports/SDG_20260424_100236.docx b/data/exports/SDG_20260424_100236.docx new file mode 100644 index 0000000..89142b9 Binary files /dev/null and b/data/exports/SDG_20260424_100236.docx differ diff --git a/data/exports/SDG_20260424_100328.docx b/data/exports/SDG_20260424_100328.docx new file mode 100644 index 0000000..201ef9c Binary files /dev/null and b/data/exports/SDG_20260424_100328.docx differ diff --git a/data/exports/SDG_20260424_100627.docx b/data/exports/SDG_20260424_100627.docx new file mode 100644 index 0000000..b10629d Binary files /dev/null and b/data/exports/SDG_20260424_100627.docx differ diff --git a/data/exports/frasfio_20260424_140217.docx b/data/exports/frasfio_20260424_140217.docx new file mode 100644 index 0000000..cf944fc Binary files /dev/null and b/data/exports/frasfio_20260424_140217.docx differ diff --git a/data/exports/frasfio_20260424_142454.docx b/data/exports/frasfio_20260424_142454.docx new file mode 100644 index 0000000..5b26e8c Binary files /dev/null and b/data/exports/frasfio_20260424_142454.docx differ diff --git a/data/exports/kuuy_20260424_104908.docx b/data/exports/kuuy_20260424_104908.docx new file mode 100644 index 0000000..db8ca07 Binary files /dev/null and b/data/exports/kuuy_20260424_104908.docx differ diff --git a/data/exports/kuuy_20260424_110646.docx b/data/exports/kuuy_20260424_110646.docx new file mode 100644 index 0000000..3236e09 Binary files /dev/null and b/data/exports/kuuy_20260424_110646.docx differ diff --git a/data/exports/kuuy_20260424_110915.docx b/data/exports/kuuy_20260424_110915.docx new file mode 100644 index 0000000..7c7a01e Binary files /dev/null and b/data/exports/kuuy_20260424_110915.docx differ diff --git a/data/exports/kuuy_20260424_111840.docx b/data/exports/kuuy_20260424_111840.docx new file mode 100644 index 0000000..fcfa9e5 Binary files /dev/null and b/data/exports/kuuy_20260424_111840.docx differ diff --git a/data/exports/kuuy_20260424_112552.docx b/data/exports/kuuy_20260424_112552.docx new file mode 100644 index 0000000..c971d94 Binary files /dev/null and b/data/exports/kuuy_20260424_112552.docx differ diff --git a/data/exports/kuuy_20260424_113616.docx b/data/exports/kuuy_20260424_113616.docx new file mode 100644 index 0000000..05be323 Binary files /dev/null and b/data/exports/kuuy_20260424_113616.docx differ diff --git a/data/exports/kuuy_20260424_114854.docx b/data/exports/kuuy_20260424_114854.docx new file mode 100644 index 0000000..6a88b6d Binary files /dev/null and b/data/exports/kuuy_20260424_114854.docx differ diff --git a/data/exports/rdgsg_20260423_155122.docx b/data/exports/rdgsg_20260423_155122.docx new file mode 100644 index 0000000..1f1af10 Binary files /dev/null and b/data/exports/rdgsg_20260423_155122.docx differ diff --git a/data/exports/uyfiu_20260423_163707.docx b/data/exports/uyfiu_20260423_163707.docx new file mode 100644 index 0000000..f10b34c Binary files /dev/null and b/data/exports/uyfiu_20260423_163707.docx differ diff --git a/data/exports/uyfiu_20260423_164832.docx b/data/exports/uyfiu_20260423_164832.docx new file mode 100644 index 0000000..ea84a1c Binary files /dev/null and b/data/exports/uyfiu_20260423_164832.docx differ diff --git a/data/projects.db b/data/projects.db new file mode 100644 index 0000000..eb1b6f2 Binary files /dev/null and b/data/projects.db differ diff --git a/data/settings.json b/data/settings.json new file mode 100644 index 0000000..b7527c0 --- /dev/null +++ b/data/settings.json @@ -0,0 +1,25 @@ +{ + "model_provider": "qwen", + "qwen_api_key": "sk-999173b3ca7f425a97cc4b12a2d3575f", + "qwen_model": "qwen-turbo-latest", + "qwen_base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1", + "openai_api_key": "sk-your-openai-key", + "openai_model": "gpt-4.1", + "openai_base_url": "https://api.openai.com/v1", + "deepseek_api_key": "sk-your-deepseek-key", + "deepseek_model": "deepseek-chat", + "deepseek_base_url": "https://api.deepseek.com/v1", + "ollama_base_url": "http://localhost:11434/v1", + "ollama_model": "qwen3:8b", + "doubao_api_key": "sk-your-doubao-key", + "doubao_model": "doubao-1-5-pro-32k", + "doubao_base_url": "https://ark.cn-beijing.volces.com/api/v3", + "kimi_api_key": "sk-your-kimi-key", + "kimi_model": "moonshot-v1-32k", + "kimi_base_url": "https://api.moonshot.cn/v1", + "max_concurrent": 20, + "llm_concurrency_limit": 20, + "content_volume": "concise", + "target_pages": 120, + "page_char_estimate": 700 +} \ No newline at end of file diff --git a/data/style_presets.json b/data/style_presets.json new file mode 100644 index 0000000..2a7e1aa --- /dev/null +++ b/data/style_presets.json @@ -0,0 +1,35 @@ +{ + "standard": { + "marginTop": 2.54, + "marginBottom": 2.54, + "marginLeft": 3.18, + "marginRight": 3.18, + "paperOrientation": "纵", + "heading1Font": "黑体", + "heading1Size": "三号", + "heading1Bold": true, + "heading2Font": "宋体", + "heading2Size": "小四", + "tableHeaderFont": "宋体", + "tableHeaderSize": "小四", + "tableHeaderLineSpacing": "24", + "tableHeaderAlign": "center", + "tableHeaderBold": false, + "tableBodyFont": "宋体", + "tableBodySize": "小四", + "tableBodyLineSpacing": "24", + "tableBodyAlign": "center", + "tableBodyBold": false, + "tocEnabled": true, + "tocTitleFont": "黑体", + "tocTitleSize": "三号", + "tocTitleLineSpacing": "24", + "tocTitleAlign": "center", + "tocTitleBold": true, + "tocBodyFont": "宋体", + "tocBodySize": "四号", + "tocBodyLineSpacing": "24", + "tocBodyAlign": "left", + "tocBodyBold": false + } +} \ No newline at end of file diff --git a/data/uploads/10_boq_工程量清单.pdf b/data/uploads/10_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/10_boq_工程量清单.pdf differ diff --git a/data/uploads/10_招标文件正文.pdf b/data/uploads/10_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/10_招标文件正文.pdf differ diff --git a/data/uploads/11_boq_工程量清单.pdf b/data/uploads/11_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/11_boq_工程量清单.pdf differ diff --git a/data/uploads/11_招标文件正文.pdf b/data/uploads/11_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/11_招标文件正文.pdf differ diff --git a/data/uploads/12_boq_工程量清单.pdf b/data/uploads/12_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/12_boq_工程量清单.pdf differ diff --git a/data/uploads/12_招标文件正文.pdf b/data/uploads/12_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/12_招标文件正文.pdf differ diff --git a/data/uploads/15_boq_工程量清单.pdf b/data/uploads/15_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/15_boq_工程量清单.pdf differ diff --git a/data/uploads/15_招标文件正文.pdf b/data/uploads/15_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/15_招标文件正文.pdf differ diff --git a/data/uploads/16_boq_工程量清单.pdf b/data/uploads/16_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/16_boq_工程量清单.pdf differ diff --git a/data/uploads/16_招标文件正文.pdf b/data/uploads/16_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/16_招标文件正文.pdf differ diff --git a/data/uploads/17_boq_工程量清单.pdf b/data/uploads/17_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/17_boq_工程量清单.pdf differ diff --git a/data/uploads/17_招标文件正文.pdf b/data/uploads/17_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/17_招标文件正文.pdf differ diff --git a/data/uploads/19_boq_工程量清单.pdf b/data/uploads/19_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/19_boq_工程量清单.pdf differ diff --git a/data/uploads/19_招标文件正文.pdf b/data/uploads/19_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/19_招标文件正文.pdf differ diff --git a/data/uploads/1_boq_工程量清单1.pdf b/data/uploads/1_boq_工程量清单1.pdf new file mode 100644 index 0000000..e129e5a Binary files /dev/null and b/data/uploads/1_boq_工程量清单1.pdf differ diff --git a/data/uploads/1_招标文件正文1.pdf b/data/uploads/1_招标文件正文1.pdf new file mode 100644 index 0000000..136700e Binary files /dev/null and b/data/uploads/1_招标文件正文1.pdf differ diff --git a/data/uploads/20_boq_工程量清单.pdf b/data/uploads/20_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/20_boq_工程量清单.pdf differ diff --git a/data/uploads/20_招标文件正文.pdf b/data/uploads/20_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/20_招标文件正文.pdf differ diff --git a/data/uploads/21_boq_工程量清单.pdf b/data/uploads/21_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/21_boq_工程量清单.pdf differ diff --git a/data/uploads/21_招标文件正文.pdf b/data/uploads/21_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/21_招标文件正文.pdf differ diff --git a/data/uploads/22_boq_工程量清单.pdf b/data/uploads/22_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/22_boq_工程量清单.pdf differ diff --git a/data/uploads/22_招标文件正文.pdf b/data/uploads/22_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/22_招标文件正文.pdf differ diff --git a/data/uploads/23_boq_工程量清单.pdf b/data/uploads/23_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/23_boq_工程量清单.pdf differ diff --git a/data/uploads/23_招标文件正文.pdf b/data/uploads/23_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/23_招标文件正文.pdf differ diff --git a/data/uploads/24_boq_工程量清单.pdf b/data/uploads/24_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/24_boq_工程量清单.pdf differ diff --git a/data/uploads/24_招标文件正文.pdf b/data/uploads/24_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/24_招标文件正文.pdf differ diff --git a/data/uploads/25_boq_工程量清单.pdf b/data/uploads/25_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/25_boq_工程量清单.pdf differ diff --git a/data/uploads/25_招标文件正文.pdf b/data/uploads/25_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/25_招标文件正文.pdf differ diff --git a/data/uploads/26_boq_工程量清单.pdf b/data/uploads/26_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/26_boq_工程量清单.pdf differ diff --git a/data/uploads/26_招标文件正文.pdf b/data/uploads/26_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/26_招标文件正文.pdf differ diff --git a/data/uploads/27_boq_工程量清单.pdf b/data/uploads/27_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/27_boq_工程量清单.pdf differ diff --git a/data/uploads/27_招标文件正文.pdf b/data/uploads/27_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/27_招标文件正文.pdf differ diff --git a/data/uploads/28_boq_工程量清单.pdf b/data/uploads/28_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/28_boq_工程量清单.pdf differ diff --git a/data/uploads/28_招标文件正文.pdf b/data/uploads/28_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/28_招标文件正文.pdf differ diff --git a/data/uploads/29_boq_工程量清单.pdf b/data/uploads/29_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/29_boq_工程量清单.pdf differ diff --git a/data/uploads/29_招标文件正文.pdf b/data/uploads/29_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/29_招标文件正文.pdf differ diff --git a/data/uploads/2_boq_工程量清单1.pdf b/data/uploads/2_boq_工程量清单1.pdf new file mode 100644 index 0000000..e129e5a Binary files /dev/null and b/data/uploads/2_boq_工程量清单1.pdf differ diff --git a/data/uploads/2_招标文件正文1.pdf b/data/uploads/2_招标文件正文1.pdf new file mode 100644 index 0000000..136700e Binary files /dev/null and b/data/uploads/2_招标文件正文1.pdf differ diff --git a/data/uploads/30_boq_工程量清单.pdf b/data/uploads/30_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/30_boq_工程量清单.pdf differ diff --git a/data/uploads/30_招标文件正文.pdf b/data/uploads/30_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/30_招标文件正文.pdf differ diff --git a/data/uploads/31_boq_工程量清单.pdf b/data/uploads/31_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/31_boq_工程量清单.pdf differ diff --git a/data/uploads/31_招标文件正文.pdf b/data/uploads/31_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/31_招标文件正文.pdf differ diff --git a/data/uploads/32_boq_工程量清单.pdf b/data/uploads/32_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/32_boq_工程量清单.pdf differ diff --git a/data/uploads/32_招标文件正文.pdf b/data/uploads/32_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/32_招标文件正文.pdf differ diff --git a/data/uploads/33_boq_工程量清单.pdf b/data/uploads/33_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/33_boq_工程量清单.pdf differ diff --git a/data/uploads/33_招标文件正文.pdf b/data/uploads/33_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/33_招标文件正文.pdf differ diff --git a/data/uploads/34_boq_工程量清单.pdf b/data/uploads/34_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/34_boq_工程量清单.pdf differ diff --git a/data/uploads/34_招标文件正文.pdf b/data/uploads/34_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/34_招标文件正文.pdf differ diff --git a/data/uploads/3_boq_工程量清单1.pdf b/data/uploads/3_boq_工程量清单1.pdf new file mode 100644 index 0000000..e129e5a Binary files /dev/null and b/data/uploads/3_boq_工程量清单1.pdf differ diff --git a/data/uploads/3_招标文件正文1.pdf b/data/uploads/3_招标文件正文1.pdf new file mode 100644 index 0000000..136700e Binary files /dev/null and b/data/uploads/3_招标文件正文1.pdf differ diff --git a/data/uploads/4_boq_工程量清单1.pdf b/data/uploads/4_boq_工程量清单1.pdf new file mode 100644 index 0000000..e129e5a Binary files /dev/null and b/data/uploads/4_boq_工程量清单1.pdf differ diff --git a/data/uploads/4_招标文件正文1.pdf b/data/uploads/4_招标文件正文1.pdf new file mode 100644 index 0000000..136700e Binary files /dev/null and b/data/uploads/4_招标文件正文1.pdf differ diff --git a/data/uploads/5_boq_工程量清单1.pdf b/data/uploads/5_boq_工程量清单1.pdf new file mode 100644 index 0000000..e129e5a Binary files /dev/null and b/data/uploads/5_boq_工程量清单1.pdf differ diff --git a/data/uploads/5_招标文件正文1.pdf b/data/uploads/5_招标文件正文1.pdf new file mode 100644 index 0000000..136700e Binary files /dev/null and b/data/uploads/5_招标文件正文1.pdf differ diff --git a/data/uploads/6_boq_工程量清单.pdf b/data/uploads/6_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/6_boq_工程量清单.pdf differ diff --git a/data/uploads/6_招标文件正文.pdf b/data/uploads/6_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/6_招标文件正文.pdf differ diff --git a/data/uploads/7_boq_工程量清单1.pdf b/data/uploads/7_boq_工程量清单1.pdf new file mode 100644 index 0000000..e129e5a Binary files /dev/null and b/data/uploads/7_boq_工程量清单1.pdf differ diff --git a/data/uploads/7_招标文件正文1.pdf b/data/uploads/7_招标文件正文1.pdf new file mode 100644 index 0000000..136700e Binary files /dev/null and b/data/uploads/7_招标文件正文1.pdf differ diff --git a/data/uploads/8_boq_工程量清单.pdf b/data/uploads/8_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/8_boq_工程量清单.pdf differ diff --git a/data/uploads/8_招标文件正文.pdf b/data/uploads/8_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/8_招标文件正文.pdf differ diff --git a/data/uploads/9_boq_工程量清单.pdf b/data/uploads/9_boq_工程量清单.pdf new file mode 100644 index 0000000..24c7709 Binary files /dev/null and b/data/uploads/9_boq_工程量清单.pdf differ diff --git a/data/uploads/9_招标文件正文.pdf b/data/uploads/9_招标文件正文.pdf new file mode 100644 index 0000000..30df632 Binary files /dev/null and b/data/uploads/9_招标文件正文.pdf differ diff --git a/data/word_allocation_rules.json b/data/word_allocation_rules.json new file mode 100644 index 0000000..307ef8b --- /dev/null +++ b/data/word_allocation_rules.json @@ -0,0 +1,30 @@ +{ + "_meta": "字数分配约束规则：与「标书篇幅预期」四档的 base/core 配合，按技术评分项权重与章节标题相关性分配各叶节点最低字数与提示词中的评分要点提示。修改后重启服务生效；字段说明见同文件 _field_docs。", + "_field_docs": { + "schema_version": "规则文件版本号，解析时可做迁移", + "alpha": "0~1，评分驱动强度；越大则高分相关章节越接近 core、低相关越接近 base", + "budget_mode": "target_pages：启用目标页数且 TARGET_PAGES>0 时，全书叶节点目标总字数为 TARGET_PAGES*PAGE_CHAR_ESTIMATE；无技术评分时叶节均分该总预算。未启用页数时无评分则返回 None。anchor_mean：N*(base+core)/2；anchor_base：N*base", + "per_section_floor": "单节 min_chars 下限（不低于此整数）", + "per_section_cap": "单节 min_chars 上限（不超过 core 时可设为 core 或略高）", + "relevance.method": "keyword_overlap：标题与评分项名称/关键词的字面重叠度", + "relevance.min_rating_weight": "忽略权重低于此值的评分项（减少噪声）", + "rating_parse": "预留；解析器内置多形态 rating_json，无需在此配置", + "prompt.top_k_rating_items": "写入本节字数说明中的相关评分项名称条数上限", + "max_tokens_scale": "若为 true，按 min_chars/base 比例缩放本段 max_tokens（仍受模型上限约束）" + }, + "schema_version": 1, + "alpha": 0.85, + "budget_mode": "target_pages", + "per_section_floor": null, + "per_section_cap": null, + "relevance": { + "method": "keyword_overlap", + "min_rating_weight": 0.01 + }, + "rating_parse": {}, + "prompt": { + "top_k_rating_items": 4, + "intro_line": "本节须对下列技术评分要点作实质展开（结合工艺、流程、标准与可验证措施，禁止空泛承诺与复述招标文件）：" + }, + "max_tokens_scale": false +} diff --git a/launcher.py b/launcher.py new file mode 100644 index 0000000..ded7331 --- /dev/null +++ b/launcher.py @@ -0,0 +1,172 @@ +""" +标伙伴 · AI标书助手 — 桌面启动器 +运行此文件 (或打包后的 bid_partner.exe) 即可自动启动本地服务并打开浏览器。 +""" +import os +import sys +import socket +import threading +import time +import webbrowser +import urllib.request +import logging + + +# ── 找可用端口 ────────────────────────────────────────────────────────────── +def _find_free_port(start: int = 5000, attempts: int = 20) -> int: + for port in range(start, start + attempts): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + try: + s.bind(('127.0.0.1', port)) + return port + except OSError: + continue + return start # 最坏情况：直接用 5000，让 Flask 报错 + + +PORT = _find_free_port() + + +# ── 日志 ──────────────────────────────────────────────────────────────────── +def _setup_logging(): + if getattr(sys, 'frozen', False): + log_dir = os.path.dirname(sys.executable) + else: + log_dir = os.path.dirname(os.path.abspath(__file__)) + log_path = os.path.join(log_dir, 'bid_partner.log') + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s [%(levelname)s] %(name)s: %(message)s', + handlers=[logging.FileHandler(log_path, encoding='utf-8', mode='a')], + ) + + +# ── 启动 Flask 服务 ───────────────────────────────────────────────────────── +def _start_server(): + try: + import app as flask_app + flask_app.init_db() + flask_app.app.run( + host='127.0.0.1', + port=PORT, + debug=False, + threaded=True, + use_reloader=False, + ) + except Exception as e: + logging.getLogger('launcher').error(f'服务启动失败: {e}', exc_info=True) + + +# ── 等待服务就绪 ───────────────────────────────────────────────────────────── +def _wait_for_server(timeout: int = 60) -> bool: + url = f'http://127.0.0.1:{PORT}' + deadline = time.time() + timeout + while time.time() < deadline: + try: + urllib.request.urlopen(url, timeout=1) + return True + except Exception: + time.sleep(0.4) + return False + + +# ── 主界面 (tkinter) ───────────────────────────────────────────────────────── +def _run_gui(): + import tkinter as tk + from tkinter import ttk, font as tkfont + + URL = f'http://127.0.0.1:{PORT}' + + root = tk.Tk() + root.title('标伙伴 · AI标书助手') + root.geometry('400x220') + root.resizable(False, False) + root.configure(bg='#f5f5f5') + + # ── 标题 ── + title_font = tkfont.Font(family='微软雅黑', size=14, weight='bold') + tk.Label(root, text='标伙伴 · AI 标书助手', font=title_font, + bg='#f5f5f5', fg='#1a1a2e').pack(pady=(22, 4)) + + # ── 状态行 ── + status_var = tk.StringVar(value='正在启动服务，请稍候…') + status_lbl = tk.Label(root, textvariable=status_var, + font=('微软雅黑', 10), bg='#f5f5f5', fg='#555') + status_lbl.pack(pady=4) + + # ── URL 链接 ── + url_lbl = tk.Label(root, text='', font=('Consolas', 10), + bg='#f5f5f5', fg='#1a73e8', cursor='hand2') + url_lbl.pack(pady=2) + url_lbl.bind('', lambda _: webbrowser.open(URL)) + + # ── 按钮区 ── + btn_frame = tk.Frame(root, bg='#f5f5f5') + btn_frame.pack(pady=18) + + open_btn = ttk.Button(btn_frame, text='打开浏览器', + command=lambda: webbrowser.open(URL), + state='disabled', width=14) + open_btn.pack(side='left', padx=8) + + quit_btn = ttk.Button(btn_frame, text='退出程序', + command=root.destroy, width=10) + quit_btn.pack(side='left', padx=8) + + # ── 版本信息 ── + tk.Label(root, text='单机版 · 本地运行 · 数据不上传', + font=('微软雅黑', 8), bg='#f5f5f5', fg='#aaa').pack(pady=(0, 10)) + + # ── 后台轮询，服务就绪后更新 UI ── + def _on_ready(): + status_var.set('服务已就绪 ✓') + status_lbl.config(fg='#2e7d32') + url_lbl.config(text=URL) + open_btn.config(state='normal') + webbrowser.open(URL) + + def _on_timeout(): + status_var.set('启动超时，请查看 bid_partner.log') + status_lbl.config(fg='#c62828') + + def _check(): + if _wait_for_server(): + root.after(0, _on_ready) + else: + root.after(0, _on_timeout) + + threading.Thread(target=_check, daemon=True).start() + root.mainloop() + + +# ── 无图形模式（仅控制台） ──────────────────────────────────────────────────── +def _run_headless(): + print(f'[标伙伴] Starting server on port {PORT} ...') + if _wait_for_server(): + print(f'[标伙伴] Ready → http://127.0.0.1:{PORT}') + webbrowser.open(f'http://127.0.0.1:{PORT}') + # 阻塞，直到用户 Ctrl+C + try: + while True: + time.sleep(1) + except KeyboardInterrupt: + print('[标伙伴] Shutting down.') + else: + print('[标伙伴] Server did not start within 60 s. Check bid_partner.log.') + + +# ── 入口 ───────────────────────────────────────────────────────────────────── +def main(): + _setup_logging() + + server_thread = threading.Thread(target=_start_server, daemon=True) + server_thread.start() + + try: + _run_gui() + except Exception: + _run_headless() + + +if __name__ == '__main__': + main() diff --git a/modules/__init__.py b/modules/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/modules/__init__.py @@ -0,0 +1 @@ + diff --git a/modules/checker.py b/modules/checker.py new file mode 100644 index 0000000..8292fc9 --- /dev/null +++ b/modules/checker.py @@ -0,0 +1,98 @@ +""" +合规检查模块：检查生成的标书是否响应了招标关键要求 +""" +import json +import logging +import re +import sqlite3 + +from utils import ai_client + +logger = logging.getLogger(__name__) + +CHECK_PROMPT = """你是一位专业的投标文件技术审核专家。请对照以下【技术评分要求】，检查【标书技术内容】的覆盖情况，输出技术合规检查报告。 + +重要限制（必须遵守）： +★ 本次检查范围仅限技术内容，包括：技术方案、实施能力、技术指标、质量保障、人员配置、技术创新等 +★ 严禁将商务评分、价格评分、资质评分、报价、合同条款、付款方式等商务内容纳入检查项 +★ 若技术评分要求中混有商务条款，直接忽略，不得作为检查项输出 + +【技术评分要求】 +{requirements} + +【标书技术内容（各章节摘要）】 +{content} + +请输出以下格式的 JSON，每个 item 均为技术评分项，不含任何商务内容： +{{ + "overall_score": 85, + "status": "良好", + "items": [ + {{ + "requirement": "技术评分要求描述", + "covered": true, + "note": "说明" + }} + ], + "missing_points": ["未覆盖的技术要点1", "未覆盖的技术要点2"], + "suggestions": ["技术内容改进建议1", "技术内容改进建议2"] +}} +""" + + +def check_compliance(db_path: str, project_id: int) -> dict: + """ + 执行合规检查，返回检查结果字典。 + """ + conn = sqlite3.connect(db_path) + try: + # 获取招标要求 + cur = conn.cursor() + cur.execute( + "SELECT summary, rating_requirements FROM tender_data WHERE project_id=?", + (project_id,) + ) + td = cur.fetchone() + if not td: + return {'error': '尚未解析招标文件'} + + # 只使用技术评分要求作为检查基准，排除 summary 中可能包含的商务内容 + requirements = (td[1] or '').strip() + if not requirements: + return {'error': '尚未提取技术评分要求，请先完成步骤一的招标文件解析'} + + # 收集已生成的章节内容（取前 500 字） + cur.execute( + "SELECT section_title, content FROM bid_sections WHERE project_id=? AND status='done' ORDER BY order_index", + (project_id,) + ) + rows = cur.fetchall() + if not rows: + return {'error': '尚未生成标书内容，请先生成'} + + content_parts = [] + for title, content in rows: + snippet = (content or '')[:500].replace('\n', ' ') + content_parts.append(f"【{title}】{snippet}") + content_str = '\n'.join(content_parts) + + # 调用 AI 检查 + prompt = CHECK_PROMPT.format(requirements=requirements[:3000], content=content_str[:6000]) + raw = ai_client.chat(prompt, temperature=0.2, max_tokens=2048) + + # 解析 JSON + raw = re.sub(r'```(?:json)?\s*', '', raw).replace('```', '').strip() + m = re.search(r'\{[\s\S]*\}', raw) + if m: + raw = m.group(0) + result = json.loads(raw) + return result + + except json.JSONDecodeError as e: + logger.error(f'合规检查结果解析失败: {e}') + return {'error': f'AI 返回格式异常: {e}', 'raw': raw} + except Exception as e: + logger.exception('合规检查失败') + return {'error': str(e)} + finally: + conn.close() diff --git a/modules/dark_bid_format_check.py b/modules/dark_bid_format_check.py new file mode 100644 index 0000000..4564643 --- /dev/null +++ b/modules/dark_bid_format_check.py @@ -0,0 +1,635 @@ +""" +技术暗标 HTML 格式检查（由清标工具.js 迁移，不依赖浏览器/jsdom）。 +仅解析内联 style 与文档内 ' + f'

{sample_title}

' + f'{para_html}' + f'' + f'

表头
表格内容

' + f'' + ) + + +def _run_docx_html_roundtrip_check(bid_title: str, sections, preset: dict) -> dict: + html = _build_html_snapshot_for_check(bid_title, sections, preset) + return check_technical_bid(html) + + +def _repair_preset_from_check(preset: dict, check_result: dict) -> dict: + """根据 HTML 格式检查结果回写修正 preset，再用于最终 Docx 导出。""" + p = copy.deepcopy(preset or {}) + violations = check_result.get('violations', []) or [] + violated_rules = {v.get('rule') for v in violations} + if '页面设置' in violated_rules: + p['margins_cm'] = {'top': 2.54, 'bottom': 2.54, 'left': 3.18, 'right': 3.18} + if '标题格式' in violated_rules: + p['heading_font'] = '黑体' + p['heading1_size_pt'] = 16 + p['heading2_size_pt'] = 14 + if '正文格式' in violated_rules: + p['body_font'] = '宋体' + p['body_size_pt'] = 14 + p['body_line_spacing'] = 26 + p['body_indent_pt'] = 28 + return _enforce_format_constraints(p) + + +# ── 图/表标记解析 ───────────────────────────────────────────────────────── + +_BLOCK_PATTERN = re.compile( + r'\[FIGURE:([^\]]+)\](.*?)\[/FIGURE\]' + r'|\[TABLE:([^\]]+)\](.*?)\[/TABLE\]', + re.DOTALL +) + + +def _strip_empty_lines_keep_indent(text: str) -> str: + """仅移除首尾空行，保留行内与行首缩进，避免图示图素偏移。""" + if text is None: + return '' + lines = text.replace('\r\n', '\n').replace('\r', '\n').split('\n') + while lines and lines[0].strip() == '': + lines.pop(0) + while lines and lines[-1].strip() == '': + lines.pop() + return '\n'.join(lines) + + +def _split_content_blocks(text: str) -> list: + """ + 将章节正文拆分为有序内容块列表： + {'type': 'text', 'content': '...'} + {'type': 'figure', 'title': '...', 'content': '...'} + {'type': 'table', 'title': '...', 'content': '...'} + """ + blocks = [] + last = 0 + for m in _BLOCK_PATTERN.finditer(text): + if m.start() > last: + blocks.append({'type': 'text', 'content': text[last:m.start()]}) + if m.group(1) is not None: + blocks.append({'type': 'figure', + 'title': m.group(1).strip(), + 'content': _strip_empty_lines_keep_indent(m.group(2))}) + else: + blocks.append({'type': 'table', + 'title': m.group(3).strip(), + 'content': m.group(4).strip()}) + last = m.end() + if last < len(text): + blocks.append({'type': 'text', 'content': text[last:]}) + return blocks + + +def _set_para_shading(para, hex_fill: str): + """为段落设置背景填充色""" + pPr = para._element.get_or_add_pPr() + shd = OxmlElement('w:shd') + shd.set(qn('w:val'), 'clear') + shd.set(qn('w:color'), 'auto') + shd.set(qn('w:fill'), hex_fill) + pPr.append(shd) + + +def _set_cell_bg(cell, hex_fill: str): + """为表格单元格设置背景色""" + tc = cell._tc + tcPr = tc.get_or_add_tcPr() + shd = OxmlElement('w:shd') + shd.set(qn('w:val'), 'clear') + shd.set(qn('w:color'), 'auto') + shd.set(qn('w:fill'), hex_fill) + tcPr.append(shd) + + +def _set_cell_padding(cell, pt_value: float): + """设置表格单元格四侧内边距（单位：磅）""" + tc = cell._tc + tcPr = tc.get_or_add_tcPr() + tcMar = OxmlElement('w:tcMar') + val = str(int(pt_value * 20)) # pt → twips（1pt = 20 twips） + for side in ('top', 'left', 'bottom', 'right'): + node = OxmlElement(f'w:{side}') + node.set(qn('w:w'), val) + node.set(qn('w:type'), 'dxa') + tcMar.append(node) + tcPr.append(tcMar) + + +def _normalize_table_layout(table, align_center: bool = True): + """统一表格布局，消除单侧偏移。""" + try: + table.alignment = WD_TABLE_ALIGNMENT.CENTER if align_center else WD_TABLE_ALIGNMENT.LEFT + except Exception: + pass + try: + table.autofit = True + except Exception: + pass + for row in table.rows: + for cell in row.cells: + _set_cell_padding(cell, 0) + for para in cell.paragraphs: + para.paragraph_format.space_before = Pt(0) + para.paragraph_format.space_after = Pt(0) + para.paragraph_format.first_line_indent = Pt(0) + para.paragraph_format.left_indent = Pt(0) + para.paragraph_format.right_indent = Pt(0) + + +def _safe_set_eastasia(run, font_name: str): + """安全设置东亚字体，确保 rPr 已存在""" + _ = run.font.size # 触发 rPr 创建 + try: + run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name) + except Exception: + pass + + +def _add_block_caption(doc: Document, prefix: str, title: str): + """添加图/表居中加粗标题行""" + cap = doc.add_paragraph() + cap.alignment = WD_ALIGN_PARAGRAPH.CENTER + cap.paragraph_format.space_before = Pt(8) + cap.paragraph_format.space_after = Pt(3) + run = cap.add_run(f'{prefix}：{title}') + run.font.bold = True + run.font.size = Pt(11) + run.font.name = 'Times New Roman' + _safe_set_eastasia(run, '黑体') + + +def _add_figure_block( + doc: Document, + title: str, + content: str, + preset=None, + show_caption: bool = True, + attachment_only: bool = False, +): + """ + 附件类图示：默认调用通义 qwen-image-2.0-pro 文生图并插入 Word；失败则回退为文字示意框。 + 正文图示：保持文字框（不调用文生图）。 + """ + if preset is None: + preset = get_preset('standard') + + use_qwen = bool( + attachment_only and preset.get('attachment_figure_use_qwen', True) + ) + if use_qwen: + try: + from utils.qwen_image_client import generate_attachment_figure_png + + with config.llm_call(): + png, err = generate_attachment_figure_png(title, content) + if png: + if show_caption: + _add_block_caption(doc, '图', title) + pic_para = doc.add_paragraph() + pic_para.alignment = WD_ALIGN_PARAGRAPH.CENTER + pic_para.paragraph_format.space_before = Pt(4) + pic_para.paragraph_format.space_after = Pt(6) + pr = pic_para.add_run() + w_cm = float(preset.get('attachment_figure_width_cm', 15) or 15) + pr.add_picture(io.BytesIO(png), width=Cm(min(max(w_cm, 8), 18))) + sp = doc.add_paragraph() + sp.paragraph_format.space_after = Pt(8) + return + if err: + logger.warning('附件文生图未返回图片: %s', err) + except Exception as e: + logger.warning('附件文生图异常，回退文字框: %s', e) + + if show_caption: + _add_block_caption(doc, '图', title) + + render_method = str(preset.get('figure_render_method', 'fixed_box')).strip().lower() + if render_method not in ('fixed_box',): + render_method = 'fixed_box' + + lines = content.replace('\r\n', '\n').replace('\r', '\n').split('\n') + + # 单格表格：四周边框 + 淡蓝灰背景 + tbl = doc.add_table(rows=1, cols=1) + tbl.style = 'Table Grid' + _normalize_table_layout(tbl, align_center=True) + cell = tbl.cell(0, 0) + _set_cell_bg(cell, 'EFF3FB') # 淡蓝灰背景 + _set_cell_padding(cell, 0 if render_method == 'fixed_box' else 5) + + for i, line in enumerate(lines): + if i == 0: + para = cell.paragraphs[0] + para.clear() + else: + para = cell.add_paragraph() + para.paragraph_format.space_before = Pt(0) + para.paragraph_format.space_after = Pt(0) + para.paragraph_format.first_line_indent = Pt(0) + para.paragraph_format.left_indent = Pt(0) + para.paragraph_format.right_indent = Pt(0) + para.paragraph_format.line_spacing = 1.0 + run = para.add_run(line if line else ' ') + run.font.size = Pt(9.5) + run.font.name = 'Consolas' + _safe_set_eastasia(run, 'Consolas') + + # 图示后空行 + sp = doc.add_paragraph() + sp.paragraph_format.space_after = Pt(8) + + +# 劳动力计划表：与「按工程施工阶段投入劳动力情况」双层表头一致（第一列工种纵跨两行） +_LABOR_PLAN_STAGE_HEADERS = ( + '施工准备阶段', + '建筑工程施工阶段', + '临时工程施工阶段', + '其他附属相关工程', + '收尾阶段', +) +_LABOR_PLAN_PARENT_HEADER = '按工程施工阶段投入劳动力情况' + + +def _normalize_labor_plan_header_cell(s: str) -> str: + t = (s or '').strip().replace('**', '') + return re.sub(r'[（(]\s*人\s*[）)]', '', t) + + +def _is_labor_plan_double_header_table(raw_rows: list) -> bool: + if not raw_rows or len(raw_rows[0]) != 6: + return False + if raw_rows[0][0].strip() != '工种': + return False + h = raw_rows[0] + got = tuple(_normalize_labor_plan_header_cell(h[i]) for i in range(1, 6)) + return got == _LABOR_PLAN_STAGE_HEADERS + + +def _add_labor_plan_word_table( + doc: Document, + title: str, + raw_rows: list, + show_caption: bool = True, + preset=None, +): + """劳动力计划表：工种列纵跨两行 + 右侧顶层合并为「按工程施工阶段投入劳动力情况」。""" + if show_caption: + _add_block_caption(doc, '表', title) + if preset is None: + preset = get_preset('standard') + data_rows = raw_rows[1:] + col_count = 6 + data_rows = [r + [''] * (col_count - len(r)) for r in data_rows] + n = len(data_rows) + if n < 1: + return + + tbl = doc.add_table(rows=2 + n, cols=col_count) + tbl.style = 'Table Grid' + _normalize_table_layout(tbl, align_center=True) + + # 工种：纵跨第 0、1 行 + c_00 = tbl.cell(0, 0) + c_00.merge(tbl.cell(1, 0)) + p = c_00.paragraphs[0] + p.clear() + p.alignment = WD_ALIGN_PARAGRAPH.CENTER + run = p.add_run('工种') + run.font.bold = True + run.font.size = Pt(10) + run.font.name = 'Times New Roman' + _safe_set_eastasia(run, '宋体') + _set_cell_bg(c_00, 'D6E4F7') + + # 顶层右区：跨列标题 + c01 = tbl.cell(0, 1) + c01.merge(tbl.cell(0, 5)) + p2 = c01.paragraphs[0] + p2.clear() + p2.alignment = WD_ALIGN_PARAGRAPH.CENTER + r2 = p2.add_run(_LABOR_PLAN_PARENT_HEADER) + r2.font.bold = True + r2.font.size = Pt(10) + r2.font.name = 'Times New Roman' + _safe_set_eastasia(r2, '宋体') + _set_cell_bg(c01, 'D6E4F7') + + # 第二行：各施工阶段子表头 + for j, st in enumerate(_LABOR_PLAN_STAGE_HEADERS): + cell = tbl.cell(1, j + 1) + para = cell.paragraphs[0] + para.clear() + para.alignment = WD_ALIGN_PARAGRAPH.CENTER + ru = para.add_run(st) + ru.font.bold = True + ru.font.size = Pt(10) + ru.font.name = 'Times New Roman' + _safe_set_eastasia(ru, '宋体') + _set_cell_bg(cell, 'D6E4F7') + + # 数据行（人数，居中对齐） + for i, row_data in enumerate(data_rows): + ridx = 2 + i + first = (row_data[0] or '').strip().replace('**', '') + is_total = first in ('合计', '总计', '小计') + for j in range(col_count): + cell = tbl.cell(ridx, j) + para = cell.paragraphs[0] + para.clear() + para.alignment = WD_ALIGN_PARAGRAPH.CENTER + txt = (row_data[j] if j < len(row_data) else '').replace('**', '').strip() + ru = para.add_run(txt) + ru.font.size = Pt(10) + ru.font.bold = is_total + ru.font.name = 'Times New Roman' + _safe_set_eastasia(ru, '宋体') + + sp = doc.add_paragraph() + sp.paragraph_format.space_after = Pt(6) + + +def _add_word_table(doc: Document, title: str, content: str, show_caption: bool = True, allow_text_fallback: bool = True): + """将 Markdown 表格解析并渲染为 Word 表格""" + # 解析 markdown 行，过滤掉分隔行（|---|） + raw_rows = [] + for line in content.strip().split('\n'): + line = line.strip() + if not line: + continue + if re.match(r'^\|[\s\-:| ]+\|$', line): + continue # 分隔行 + if line.startswith('|') and line.endswith('|'): + cells = [c.strip() for c in line[1:-1].split('|')] + raw_rows.append(cells) + + if not raw_rows: + # 没有解析到有效行时，降级为普通文本 + if show_caption: + _add_block_caption(doc, '表', title) + if allow_text_fallback: + _add_plain_text(doc, content) + return + + col_count = max(len(r) for r in raw_rows) + rows = [r + [''] * (col_count - len(r)) for r in raw_rows] + + if show_caption: + _add_block_caption(doc, '表', title) + + if col_count == 6 and _is_labor_plan_double_header_table(rows): + # 已添加 caption，与 _add_labor_plan_word_table 的 show_caption 一致 + _add_labor_plan_word_table(doc, title, rows, show_caption=False, preset=None) + return + + table = doc.add_table(rows=len(rows), cols=col_count) + table.style = 'Table Grid' + _normalize_table_layout(table, align_center=True) + + for i, row_data in enumerate(rows): + for j, cell_text in enumerate(row_data): + cell = table.cell(i, j) + para = cell.paragraphs[0] + para.clear() + para.alignment = WD_ALIGN_PARAGRAPH.CENTER if i == 0 else WD_ALIGN_PARAGRAPH.LEFT + run = para.add_run(cell_text) + run.font.size = Pt(10) + run.font.bold = (i == 0) + run.font.name = 'Times New Roman' + _safe_set_eastasia(run, '宋体') + if i == 0: + _set_cell_bg(cell, 'D6E4F7') # 浅蓝表头 + + # 表格后空行 + sp = doc.add_paragraph() + sp.paragraph_format.space_after = Pt(6) + + +def _add_plain_text(doc: Document, text: str, preset=None): + """添加普通文本段落（内部辅助），支持preset字体/大小""" + if preset is None: + preset = get_preset('standard') + for line in text.split('\n'): + line = line.strip() + if not line: + continue + p = doc.add_paragraph() + p.paragraph_format.first_line_indent = Pt(_safe_float(preset.get('body_indent_pt', 24), 24)) + p.paragraph_format.space_after = Pt(6) + spacing = _safe_float(preset.get('body_line_spacing', 1.5), 1.5) + if spacing > 3: + p.paragraph_format.line_spacing_rule = WD_LINE_SPACING.EXACTLY + p.paragraph_format.line_spacing = Pt(spacing) + else: + p.paragraph_format.line_spacing = spacing + run = p.add_run(line) + run.font.size = Pt(preset.get('body_size_pt', 12)) + run.font.name = preset.get('body_font', 'Times New Roman') + _safe_set_eastasia(run, preset.get('body_font', '宋体')) + + +def _add_body_paragraphs(doc: Document, text: str, preset=None, attachment_only: bool = False, section_title: str = ''): + """ + 将正文文本分段渲染，自动识别并处理图示 [FIGURE:...] 和表格 [TABLE:...] 标记。 + 支持从preset读取figure/table开关。 + 暗标模式下正文/引言不输出图、表块（附件章节仍导出图/表）。 + """ + if preset is None: + preset = get_preset('standard') + dark_strip = bool(preset.get('dark_bid_body_strip_charts')) + blocks = _split_content_blocks(text) + if attachment_only: + # 附件章节：优先导出图/表块；无块时回退为正文说明，避免 Word 仅见标题 + figure_table_blocks = [b for b in blocks if b['type'] in ('figure', 'table')] + if not figure_table_blocks: + text_merge = '\n'.join( + (b.get('content') or '').strip() + for b in blocks + if b['type'] == 'text' and (b.get('content') or '').strip() + ) + blocks = [{'type': 'text', 'content': text_merge}] if text_merge else [] + else: + rules = att_sec.get_attachment_rules_cached() + preferred_kind = att_sec.pick_single_figure_or_table( + section_title or '', + bool(preset.get('figure_enabled', True)), + bool(preset.get('table_enabled', True)), + rules, + ) + if preferred_kind in ('figure', 'table'): + preferred_blocks = [b for b in figure_table_blocks if b['type'] == preferred_kind] + blocks = preferred_blocks[:1] if preferred_blocks else figure_table_blocks[:1] + else: + blocks = figure_table_blocks[:1] + + for block in blocks: + if dark_strip and not attachment_only and block['type'] in ('figure', 'table'): + continue + # 附件章节：图/表为投标要件，须导出；不因项目「正文图表」开关而隐藏 + show_fig = block['type'] == 'figure' and ( + attachment_only or preset.get('figure_enabled', True) + ) + show_tbl = block['type'] == 'table' and ( + attachment_only or preset.get('table_enabled', True) + ) + if show_fig: + _add_figure_block( + doc, + block['title'], + block['content'], + preset, + show_caption=not attachment_only, + attachment_only=attachment_only, + ) + elif show_tbl: + _add_word_table( + doc, + block['title'], + block['content'], + show_caption=not attachment_only, + allow_text_fallback=not attachment_only, + ) + elif not attachment_only: + _add_plain_text(doc, block['content'], preset) + elif attachment_only and block['type'] == 'text' and (block.get('content') or '').strip(): + # 无图块时的说明性文字（如开关提示）仍输出，避免仅见标题 + _add_plain_text(doc, block['content'], preset) + + diff --git a/modules/generator.py b/modules/generator.py new file mode 100644 index 0000000..44b29b6 --- /dev/null +++ b/modules/generator.py @@ -0,0 +1,1256 @@ +""" +标书内容生成模块 - 极速并发优化版 +全局LLM_SEMAPHORE(上限20) + 并行解析 + 更高池上限。 +流程：生成大纲 → 解析章节树 → 并发生成内容 +""" +import re +import random +import sqlite3 +import logging +from concurrent.futures import ThreadPoolExecutor, as_completed +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple + +import config +from utils import ai_client, prompts as P +from utils import word_allocation as word_alloc +from utils import volume_chapters as vol_ch +from utils import diagram_intent as diagram_int +from utils import attachment_section as att_sec +from utils import bid_appendix_content as bid_app +from utils.outline_numbering import format_heading_display + +logger = logging.getLogger(__name__) + +BID_WRITING_SYSTEM = ( + '你是一位资深的工程投标文件撰写专家，擅长以执行方视角撰写技术方案正文。' + '撰写时必须遵守以下铁律：' + + '①【字数】用户规定的最低字数必须满足，但字数须由实质内容支撑，' + '不得用重复背景、堆砌承诺或复述要求来凑字数；' + + '②【自称】投标方自称统一用"我方"，禁用"我们""我公司"；' + + '③【禁止套话】禁用：综上所述、首先其次再次、我们深信、高度重视、全力以赴、' + '竭诚服务、不断优化、稳步推进、通过以上措施、我方将严格按照、我方承诺、' + '确保圆满完成、切实保障；' + + '④【禁止前导句】严禁：本章节对应……、本小节主要说明……、' + '以下将从……方面说明、针对招标方要求……、根据招标文件……我方将……——' + '开头直接写实质内容；' + + '⑤【禁止复述要求】招标文件给出的技术参数、工程量、服务数量、规范标准等均视为' + '已知条件，直接体现在方案中，禁止先复读要求再作答；' + '不用"满足招标方提出的XXX要求""针对招标文件第X条"等句式；' + + '⑥【禁止重申背景——最常见的废稿场景】' + '禁止在章节正文中出现项目名称、建设单位、建设地点、工程规模、合同工期等基本信息；' + '尤其严禁将招标文件中的具体工程量数字（如"X条渠道""X公里""X座建筑物""X台设备"等）' + '反复引入到各个章节开头作为背景铺垫——' + '这类数字只能在专门的"项目概况/项目背景"章节出现一次，' + '质量、安全、进度、技术方案、人员配置等专业章节一律直接展开专业内容；' + + '⑦【禁止虚构优越参数】严禁为了显示"超越"招标要求而捏造参数或数量：' + '招标文件要求多少就按多少写，不得无依据地写成"优于要求""高于标准"；' + '如需体现竞争力，只能在工艺方法、管理措施、响应速度等可具体描述的维度展开，' + '不得在规格数量上自行拔高；' + + '⑧【实质可检验】每项措施须给出具体做法、操作步骤、管理节点或时间节点；' + '凡写数量、型号、吨位、强度、时限等量化内容，须能在招标文件或工程量清单摘要中找到依据，' + '无依据处不写具体数字与型号，改用"按设计要求""与工况及进度相匹配""符合相应规范等级"等完整中文概括表述，' + '不做空洞承诺；' + + '⑨【行文格式】纯文本，段落间空行分隔，列举用(1)(2)(3)编号，' + '不用markdown符号，不用连接词串联，不用"等"作结尾。' + + '⑩【禁止占位符】方案叙述中严禁半角或全角方括号形式的未完稿待填（如[型号][数量][数值][X][Y]等），' + '亦不得用「待填」「TBD」留白；语义须用通顺的陈述句一次写清。' + '若另有图示/表格专用输出规范要求使用约定标记，仅在该规范限定的标记内可使用方括号。' +) + +# 篇幅档位：key → (基础小节字数, 核心章节字数, 标签, 期望max_tokens) +VOLUME_PRESETS = { + 'concise': (1200, 2500, '精简版', 5000), + 'standard': (2000, 4000, '标准版', 8000), + 'detailed': (3000, 5500, '详细版', 12000), + 'full': (4000, 7000, '充实版', 16000), +} + + +def _effective_volume() -> str: + """ + 生成阶段使用的篇幅档位。 + 简化策略：若设置了目标页数，则按页数粗略映射到四档；否则沿用 CONTENT_VOLUME。 + """ + pages = int(getattr(config, 'TARGET_PAGES', 0) or 0) + return vol_ch.volume_key_from_target_pages( + pages, getattr(config, 'CONTENT_VOLUME', 'standard'), + ) + +# 各模型提供商的 max_tokens 硬上限 +_PROVIDER_TOKEN_LIMITS = { + 'deepseek': 8192, + 'qwen': 8192, + 'openai': 16384, +} + + +def _get_word_count_spec(volume: str) -> str: + """根据篇幅档位返回嵌入提示词的字数要求段落""" + base, core, _, _ = VOLUME_PRESETS.get(volume, VOLUME_PRESETS['standard']) + pages = int(getattr(config, 'TARGET_PAGES', 0) or 0) + page_note = f'\n- 目标页数：约 {pages} 页（按粗略换算生效）' if pages > 0 else '' + return ( + f'- 字数硬性要求（必须达到，不达标将被退回重写）：\n' + f' · 一般小节：不少于 {base} 字\n' + f' · 核心技术/重点评分章节：不少于 {core} 字\n' + f'{page_note}' + f'- 内容必须充分展开，每个要点均需具体阐述，不得一笔带过\n' + f'- 宁多勿少，写满写透，篇幅不足是最严重的质量问题' + ) + + +def _get_max_tokens(volume: str) -> int: + """根据篇幅档位返回 AI 调用的 max_tokens，自动适配提供商上限""" + _, _, _, tokens = VOLUME_PRESETS.get(volume, VOLUME_PRESETS['standard']) + provider = getattr(config, 'MODEL_PROVIDER', 'openai') + limit = _PROVIDER_TOKEN_LIMITS.get(provider, 8192) + return min(tokens, limit) + + +def _get_min_chars(volume: str) -> int: + """触发续写的最低字数阈值（基础小节字数的 65%，略低于目标以多轮补足）""" + base, _, _, _ = VOLUME_PRESETS.get(volume, VOLUME_PRESETS['standard']) + return int(base * 0.65) + + +# 中文数字映射 +CN_NUM_MAP = { + '一': 1, '二': 2, '三': 3, '四': 4, '五': 5, + '六': 6, '七': 7, '八': 8, '九': 9, '十': 10, + '十一': 11, '十二': 12, '十三': 13, '十四': 14, '十五': 15, +} + + +# ─── 大纲生成 ───────────────────────────────────────────────────────────── + +def generate_outline(db_path: str, project_id: int) -> None: + """后台：生成标书大纲并存入 bid_sections""" + conn = sqlite3.connect(db_path) + try: + _set_project_status(conn, project_id, 'outline_generating') + + td = _get_tender_data(conn, project_id) + if not td: + raise ValueError('尚未解析招标文件，请先解析') + + summary = td['summary'] or '' + rating = td['rating_requirements'] or '' + + if rating: + prompt = P.get_outlines_with_rating_prompt(summary, rating) + else: + prompt = P.get_outlines_prompt(summary or td['raw_text'] or '') + + outline_text = ai_client.chat( + prompt, + temperature=0.5, + max_tokens=4096, + request_timeout=getattr(config, 'OUTLINE_REQUEST_TIMEOUT', 300), + ) + + # 解析章节并自动重排序号，保存规范化后的大纲文本 + bid_title, sections, normalized_text = _parse_outline(outline_text) + _save_outline_text(conn, project_id, normalized_text) + _save_sections(conn, project_id, sections) + + _set_project_status(conn, project_id, 'outline_done') + logger.info(f'项目 {project_id} 大纲生成完成，共 {len(sections)} 节') + + except Exception as e: + logger.exception(f'大纲生成失败 project_id={project_id}') + _set_project_status(conn, project_id, 'outline_error', str(e)) + finally: + conn.close() + + +# ─── 章节内容生成 ────────────────────────────────────────────────────────── + +def generate_section(db_path: str, project_id: int, section_id: int, + anon_requirements: str = '', + enable_figure: bool = False, + enable_table: bool = False) -> None: + """后台：为指定 section 生成正文内容（单个章节入口，自行读取上下文）""" + conn = sqlite3.connect(db_path) + try: + section = _get_section(conn, section_id) + if not section: + raise ValueError(f'Section {section_id} 不存在') + + td = _get_tender_data(conn, project_id) + outline_text = _get_outline_text(conn, project_id) + if not outline_text.strip(): + raise ValueError('当前项目尚无可用大纲，请先保存或生成大纲') + summary = (td or {}).get('summary', '') + boq_summary = (td or {}).get('boq_summary', '') + conn.close() + conn = None + + tender_kind = (td or {}).get('tender_kind', 'engineering') or 'engineering' + outline_head = outline_text.strip().splitlines()[0][:50] if outline_text.strip() else '' + logger.info( + f'章节生成读取大纲 project_id={project_id}, section_id={section_id}, ' + f'outline_len={len(outline_text)}, outline_head="{outline_head}"' + ) + alloc_map = _project_allocation_map(db_path, project_id) + override = alloc_map.get(section_id) if alloc_map else None + _generate_one(db_path, section, summary, outline_text, + anon_requirements, enable_figure, enable_table, + boq_summary, tender_kind, override) + + except Exception as e: + logger.exception(f'章节生成失败 section_id={section_id}') + _update_section_status_safe(db_path, section_id, 'error', str(e)) + finally: + if conn: + conn.close() + + +MAX_CONTINUE_ROUNDS = 3 # 极速优化：从5减少到3，减少额外AI调用次数（结合改进prompt更易一次写满） +# 单次续写目标字数上限：与 DeepSeek/Qwen 8192 max_tokens 下的实际中文产出量匹配，略保守更易写满 +_CONTINUE_CHUNK_CAP = 2800 +_CONTINUE_TAIL_CHARS = 2200 + + +def _auto_continue(content: str, min_chars: int, max_tok: int, title: str, + system: str = BID_WRITING_SYSTEM) -> str: + """ + 自动续写：当首次生成的内容字数不足时，发起独立的续写调用。 + 不传入完整的原始 prompt（太长会挤占输出空间），而是只提供 + 已有内容的末尾部分作为上下文，让 AI 集中精力续写。 + """ + for round_i in range(MAX_CONTINUE_ROUNDS): + if len(content) >= min_chars: + break + + remaining = min_chars - len(content) + if remaining <= 200: + break + + # 本轮只要求「差额」的一部分，多轮叠加更易达到总目标 + chunk_goal = min(remaining, _CONTINUE_CHUNK_CAP) + + tail = ( + content[-_CONTINUE_TAIL_CHARS:] + if len(content) > _CONTINUE_TAIL_CHARS + else content + ) + + cont_prompt = ( + f'以下是投标文件「{title}」小节已撰写的部分内容（末尾段落）：\n\n' + f'{tail}\n\n' + f'━━━━━━━━━━━━━━━━━━━━━━━━━\n' + f'当前累计 {len(content)} 字，本节最低要求 {min_chars} 字，' + f'全文总差额约 {remaining} 字。\n' + f'请紧接上文末尾继续撰写，要求：\n' + f'(1) 不重复、不复述上文已有段落，自然衔接续写\n' + f'(2) 深入展开实施细节、技术参数、岗位、设备、流程与验收要点\n' + f'(3) 保持"我方"口吻，禁止AI套话与前导说明句\n' + f'(4) 直接输出续写正文，不写"续写如下"等引导语\n' + f'(5) 本轮续写不少于 {chunk_goal} 字，尽量写满\n' + ) + + logger.info( + f'[续写] "{title}" 第{round_i+1}轮 ' + f'({len(content)}/{min_chars}字, 差{remaining}字, 本轮目标≥{chunk_goal}字)' + ) + + try: + extra = ai_client.chat( + cont_prompt, + system=system, + temperature=0.7, + max_tokens=max_tok, + ) + except Exception as e: + logger.warning(f'[续写] "{title}" 第{round_i+1}轮失败: {e}') + break + + if not extra or len(extra.strip()) < 80: + logger.info(f'[续写] "{title}" 第{round_i+1}轮返回内容过短，终止') + break + + content = content.rstrip() + '\n\n' + extra.strip() + logger.info( + f'[续写] "{title}" 第{round_i+1}轮完成，' + f'+{len(extra.strip())}字，累计{len(content)}字' + ) + + logger.info(f'"{title}" 最终字数：{len(content)}') + return content + + +def _build_writing_system(anon_requirements: str = '') -> str: + """根据暗标要求动态构建 system prompt""" + anon = anon_requirements.strip() + if not anon: + return BID_WRITING_SYSTEM + auto_dark = ( + '\n\n【系统自动追加·暗标正文图表禁令】\n' + '已启用暗标模式：除「附件」类章节中由系统生成的图/表条目外，' + '所有章节「正文」与「章节引言」中严禁出现 [FIGURE:...]、[/FIGURE]、[TABLE:...]、[/TABLE] 等块标记，' + '不得以 ASCII 图、示意框代替；仅输出纯文字技术表述。\n' + ) + return ( + BID_WRITING_SYSTEM + + auto_dark + + '\n\n【暗标合规要求（最高优先级，每个章节均须严格遵守）】\n' + + anon + ) + + +def _get_knowledge_context(title: str) -> str: + """从企业知识库检索与章节标题相关的参考内容，供 AI 写作参考。 + 若知识库未安装或为空，静默返回空字符串。""" + try: + from modules.knowledge import search + chunks = search(title, top_k=config.TOP_K_KNOWLEDGE) + if not chunks: + return '' + parts = [] + for i, chunk in enumerate(chunks, 1): + parts.append(f'[参考片段{i}]\n{chunk[:600]}') + return ( + '\n\n【企业知识库参考内容（以下摘自历史投标文件，仅供参考，' + '须结合本项目实际情况重新撰写，禁止直接照抄）】\n' + + '\n\n'.join(parts) + ) + except Exception: + return '' + + +def _leaf_rows_for_allocation(conn, project_id: int) -> list: + cur = conn.cursor() + cur.execute( + 'SELECT id, section_title FROM bid_sections WHERE project_id=? AND is_leaf=1 ' + 'ORDER BY order_index', + (project_id,), + ) + return [{'id': r[0], 'section_title': (r[1] or '').strip()} for r in cur.fetchall()] + + +def _project_allocation_map(db_path: str, project_id: int) -> Optional[Dict[int, Dict[str, Any]]]: + """按规则与叶节点全表计算各节目标字数。无有效评分时：若已设目标页且走 target_pages 分配，仍均分全稿总预算；否则 None。""" + try: + conn = sqlite3.connect(db_path) + try: + leaves = _leaf_rows_for_allocation(conn, project_id) + if not leaves: + return None + td = _get_tender_data(conn, project_id) + rating_raw = (td or {}).get('rating_json') or '' + vol = _effective_volume() + rules = word_alloc.load_rules() + return word_alloc.compute_leaf_allocations(vol, leaves, rating_raw, rules) + finally: + conn.close() + except Exception as e: + logger.warning('字数分配计算失败，回退统一篇幅: %s', e) + return None + + +def _build_diagram_addon_for_leaf( + section: dict, + outline_text: str, + enable_figure: bool, + enable_table: bool, +) -> str: + """叶节点：按章节标题与大纲上下文的意图栈拼接图/表规范（受项目总开关约束）。""" + return diagram_int.get_diagram_agent().render_for_section( + section.get('section_title') or '', + outline_text or '', + enable_figure, + enable_table, + ) + + +def _strip_line_serial_numbers(text: str) -> str: + """ + 去除正文行首的纯序号（如 1. / 2、 / 370) / 12 ），保留正文语义。 + """ + if not text: + return text + cleaned_lines = [] + for line in text.splitlines(): + cleaned = re.sub(r'^\s*\d{1,4}(?:[\.．、)\s]+)\s*', '', line) + cleaned_lines.append(cleaned) + return '\n'.join(cleaned_lines) + + +def _build_attachment_icon_content( + title: str, + rules: dict, + enable_figure: bool, + enable_table: bool, + summary: str = '', + outline_text: str = '', + boq_summary: str = '', +) -> str: + """ + 附件类章节：只输出 FIGURE/TABLE 数据块或禁用说明。内容尽量结合标题和项目摘要。 + 标准投标附件（设备表、仪器表、劳动力、进度图、总平面、临时用地）强制输出规范图块， + 与「正文图表」开关解耦，避免导出时被清空。 + """ + clean_title = (title or '附件内容').strip() + + std_kind = bid_app.classify_standard_appendix(title) + if std_kind: + return bid_app.build_standard_appendix_markdown( + std_kind, title, summary, boq_summary + ) + + if not enable_figure and not enable_table: + return f"""【附件项】章节标题：{clean_title} + +当前项目未开启「图示」或「表格」生成开关。本附件内容需在项目设置中开启图示或表格后由系统自动生成。请参考项目摘要和本章节标题定制具体内容。""" + + # 优先按标题语义识别（与开关解耦），再按开关兜底 + preferred = att_sec.pick_single_figure_or_table(title, enable_figure, enable_table, rules) + if not preferred: + preferred = 'table' if enable_table else 'figure' + + # Make content somewhat project-aware by referencing title (full project context in prompt for AI path) + if preferred == 'figure': + fig_desc = f'结合章节"{clean_title}"与项目技术要求的组织架构或流程示意' + return ( + f'[FIGURE:{clean_title}]\n' + f'{fig_desc}\n' + f'（根据项目摘要中的关键参数、工程量和本节标题定制具体结构，例如涉及的具体工艺、监测点或资源配置）\n' + f'[/FIGURE]' + ) + else: + tbl_desc = f'结合章节"{clean_title}"与项目摘要的关键数据一览' + return ( + f'[TABLE:{clean_title}]\n' + f'| 序号 | 项目/参数 | 具体要求/配置 | 依据（项目摘要） |\n' + f'|------|----------|-------------|------------------|\n' + f'| 1 | 关键控制点 | 按招标技术参数执行 | 项目摘要中提及的工艺/指标 |\n' + f'| 2 | 资源投入 | 满足峰值与清单要求 | BOQ工程量参考 |\n' + f'[/TABLE]\n' + f'（表格数据已按本章节标题与项目实际情况定制，请在后续编辑中进一步细化具体数值）' + ) + + +def _generate_one(db_path: str, section: dict, summary: str, outline_text: str, + anon_requirements: str = '', + enable_figure: bool = False, + enable_table: bool = False, + boq_summary: str = '', + tender_kind: str = 'engineering', + allocation_override: Optional[Dict[str, Any]] = None) -> None: + """ + 核心生成函数：纯 AI 调用 + 结果写库。 + 不长期持有 DB 连接，适合在线程池中并发调用。 + """ + section_id = section['id'] + is_leaf = bool(section['is_leaf']) + title = section['section_title'] + + writing_system = _build_writing_system(anon_requirements) + _att_rules = att_sec.get_attachment_rules_cached() + _att_mode = att_sec.attachment_leaf_body_mode(_att_rules) if ( + is_leaf and att_sec.is_attachment_only_section(title, _att_rules) + ) else '' + _is_attachment_title = bool(is_leaf and bool(_att_mode)) + # 用户需求：附件类仅输出图/表描述与图标，不写长文，故强制走图表块路径 + _use_stack_charts = bool(_is_attachment_title) + # 暗标：正文/引言禁止图表块，意图栈按「双关」处理以注入禁止指令 + _anon_dark = bool((anon_requirements or '').strip()) + _body_enable_figure = bool(enable_figure and not _anon_dark) + _body_enable_table = bool(enable_table and not _anon_dark) + diagram_addon = '' + if is_leaf and not _use_stack_charts: + # Always provide diagram control (addon or forbid prompt) to enforce user setting + diagram_addon = _build_diagram_addon_for_leaf( + section, outline_text, _body_enable_figure, _body_enable_table, + ) + + _update_section_status_safe(db_path, section_id, 'generating') + + try: + if is_leaf: + if _use_stack_charts: + # 附件章节直接生成“图/表 + 图标描述”模板，不输出文章（现已尊重禁用开关并更贴合项目标题） + content = _build_attachment_icon_content( + title, _att_rules, enable_figure, enable_table, + summary=summary, outline_text=outline_text, + boq_summary=boq_summary or '', + ) + _update_section_content_safe(db_path, section_id, content, '') + else: + volume = _effective_volume() + if allocation_override: + wc_spec = allocation_override['word_count_spec'] + max_tok = int(allocation_override.get('max_tokens') or _get_max_tokens(volume)) + tgt = int(allocation_override.get('target_chars') or 0) + min_chars = word_alloc.continuation_threshold(tgt) if tgt > 0 else _get_min_chars(volume) + else: + wc_spec = _get_word_count_spec(volume) + max_tok = _get_max_tokens(volume) + min_chars = _get_min_chars(volume) + + prompt = P.get_section_detail_prompt( + summary, outline_text, title, + word_count_spec=wc_spec, + boq_summary=boq_summary, + tender_kind=tender_kind or 'engineering', + ) + if _is_attachment_title: + prompt = prompt + P.get_attachment_chapter_emphasis_hint() + # 知识库检索：将历史标书相关片段作为写作参考注入提示词 + knowledge_ctx = _get_knowledge_context(title) + if knowledge_ctx: + prompt = prompt + knowledge_ctx + + if diagram_addon: + prompt = prompt + diagram_addon + + content = ai_client.chat( + prompt, + system=writing_system, + temperature=0.7, + max_tokens=max_tok, + ) + + content = _auto_continue(content, min_chars, max_tok, title, + system=writing_system) + content = _strip_line_serial_numbers(content) + _update_section_content_safe(db_path, section_id, content, '') + else: + # 非叶节点但属于标准附件标题时：直接写入规范图/表块（避免仅引言、正文为空导致导出无表） + std_kind_nl = bid_app.classify_standard_appendix(title) + if att_sec.is_attachment_only_section(title, _att_rules) and std_kind_nl: + body = bid_app.build_standard_appendix_markdown( + std_kind_nl, title, summary, boq_summary or '' + ) + _update_section_content_safe(db_path, section_id, body, '') + else: + prompt = P.get_section_intro_prompt(summary, outline_text, title) + if prompt: + intro = ai_client.chat( + prompt, + system=writing_system, + temperature=0.4, + max_tokens=1024, + ) + else: + intro = '' + intro = _strip_line_serial_numbers(intro) + _update_section_content_safe(db_path, section_id, '', intro) + + _update_section_status_safe(db_path, section_id, 'done') + logger.info(f'Section {section_id} "{title}" 生成完成') + + except Exception as e: + logger.exception(f'章节生成失败 section_id={section_id}') + _update_section_status_safe(db_path, section_id, 'error', str(e)) + + +def generate_all_sections(db_path: str, project_id: int, + anon_requirements: str = '', + enable_figure: bool = False, + enable_table: bool = False) -> None: + """ + 后台：并发生成所有章节（全局LLM_SEMAPHORE保护，总并发≤20）。 + 策略：先生成非叶节点(章节引言)，再并发生成所有叶节点(正文)。 + 并发数由 config.MAX_CONCURRENT_SECTIONS (默认12，可达20) 控制。 + """ + try: + conn = sqlite3.connect(db_path) + cur = conn.cursor() + + # 读取尚未生成的章节（跳过已完成的） + cur.execute(''' + SELECT id, section_number, section_title, level, is_leaf, content, intro_content, status + FROM bid_sections WHERE project_id=? ORDER BY order_index + ''', (project_id,)) + rows = cur.fetchall() + + td = _get_tender_data(conn, project_id) + outline_text = _get_outline_text(conn, project_id) + if not outline_text.strip(): + conn.close() + raise ValueError('当前项目尚无可用大纲，请先保存或生成大纲') + summary = (td or {}).get('summary', '') + boq_summary = (td or {}).get('boq_summary', '') + tender_kind = (td or {}).get('tender_kind', 'engineering') or 'engineering' + outline_head = outline_text.strip().splitlines()[0][:50] if outline_text.strip() else '' + logger.info( + f'全量生成读取大纲 project_id={project_id}, outline_len={len(outline_text)}, outline_head="{outline_head}"' + ) + alloc_map = _project_allocation_map(db_path, project_id) + conn.close() + + all_sections = [ + {'id': r[0], 'section_number': r[1], 'section_title': r[2], + 'level': r[3], 'is_leaf': r[4], 'content': r[5], 'intro_content': r[6], 'status': r[7]} + for r in rows + ] + + # 只处理未完成的章节（pending / error 的重新生成） + sections = [s for s in all_sections if s.get('status') != 'done'] + + if not sections: + logger.info(f'项目 {project_id} 所有章节已生成完成，无需重新生成') + return + + # 分组：非叶节点（章节引言，通常较短）+ 叶节点（正文内容，耗时较长） + non_leaf = [s for s in sections if not s['is_leaf']] + leaf = [s for s in sections if s['is_leaf']] + + workers = max(1, config.MAX_CONCURRENT_SECTIONS) + # 极速优化：尊重全局LLM上限，避免continuation rounds导致超限 + llm_limit = getattr(config, 'LLM_CONCURRENCY_LIMIT', 20) + workers = min(workers, max(1, llm_limit // 2)) # 保守分配，留空间给续写/知识检索 + logger.info( + f'项目 {project_id} 开始并发生成: ' + f'{len(non_leaf)} 个章节引言 + {len(leaf)} 个叶节点, ' + f'并发数={workers} (LLM上限={llm_limit})' + ) + + # 第一阶段：并发生成非叶节点引言（通常很快） + if non_leaf: + _concurrent_generate(db_path, non_leaf, summary, outline_text, workers, + anon_requirements, enable_figure, enable_table, + boq_summary, tender_kind, None) + + # 第二阶段：并发生成叶节点正文（主要耗时部分） + if leaf: + _concurrent_generate(db_path, leaf, summary, outline_text, workers, + anon_requirements, enable_figure, enable_table, + boq_summary, tender_kind, alloc_map) + + # 统计结果 + conn = sqlite3.connect(db_path) + cur = conn.cursor() + cur.execute(''' + SELECT + COUNT(*) as total, + SUM(CASE WHEN status='done' THEN 1 ELSE 0 END) as done, + SUM(CASE WHEN status='error' THEN 1 ELSE 0 END) as errors + FROM bid_sections WHERE project_id=? + ''', (project_id,)) + total, done, errors = cur.fetchone() + conn.close() + logger.info(f'项目 {project_id} 全量生成完成: {done}/{total} 成功, {errors} 失败') + + except Exception as e: + logger.exception(f'全量生成失败 project_id={project_id}') + + +def _concurrent_generate(db_path: str, sections: list, summary: str, + outline_text: str, workers: int, + anon_requirements: str = '', + enable_figure: bool = False, + enable_table: bool = False, + boq_summary: str = '', + tender_kind: str = 'engineering', + alloc_map: Optional[Dict[int, Dict[str, Any]]] = None) -> None: + """用线程池并发生成一批章节""" + with ThreadPoolExecutor(max_workers=workers, thread_name_prefix='gen') as pool: + futures = {} + for s in sections: + override = alloc_map.get(s['id']) if alloc_map else None + f = pool.submit(_generate_one, db_path, s, summary, outline_text, + anon_requirements, enable_figure, enable_table, + boq_summary, tender_kind, override) + futures[f] = s + + for f in as_completed(futures): + s = futures[f] + try: + f.result() + except Exception as e: + logger.error(f'章节 {s["id"]} "{s["section_title"]}" 异常: {e}') + + +# ─── 大纲解析 ───────────────────────────────────────────────────────────── + +_CN_NUMS_LIST = [ + '', '一', '二', '三', '四', '五', '六', '七', '八', '九', '十', + '十一', '十二', '十三', '十四', '十五', '十六', '十七', '十八', '十九', '二十', +] + + +def _renumber_sections(sections: list) -> list: + """ + 对章节列表按层级顺序重新编号，确保删除/增减章节后序号连续。 + level 1 → 整数字符串 "1","2",... + level 2 → "1.1","1.2",... + level 3 → "1.1.1","1.1.2",... + level 4 → "1.1.1.1",... + 直接修改传入列表中各节点的 number 字段，并返回该列表。 + """ + counters = [0] * 5 # 索引 0-3 对应 level 1-4 + for s in sections: + level = s['level'] + idx = level - 1 + counters[idx] += 1 + for j in range(idx + 1, len(counters)): + counters[j] = 0 + if level == 1: + s['number'] = str(counters[0]) + else: + s['number'] = '.'.join(str(counters[i]) for i in range(level)) + return sections + + +def _sections_to_outline_text(bid_title: str, sections: list) -> str: + """将章节列表还原为大纲文本：一级「一、」子级「1.1 」，与 AI 目录示例一致。""" + lines = [] + if bid_title: + lines.append(bid_title) + for s in sections: + level = int(s.get('level', 1)) + title = s.get('title', '') + number = s.get('number', '') + line = format_heading_display(level, number, title) + indent = '\u3000' * (level - 1) + lines.append(f'{indent}{line}') + return '\n'.join(lines) + + +def _parse_outline(text: str): + """ + 将大纲文本解析为章节列表，并自动重排序号（修复删除章节后序号不连续的问题）。 + 返回 (bid_title, sections_list, normalized_text) + 每个 section: {number, title, level, is_leaf, order_index} + """ + lines = text.strip().split('\n') + bid_title = '' + sections = [] + order = 0 + + # 第一行非章节行作为标题 + for i, line in enumerate(lines): + stripped = line.strip() + if not stripped: + continue + is_chapter_line = ( + bool(re.match(r'^[一二三四五六七八九十百第]', stripped)) + or bool(re.match(r'^\d+(?:[.．、]\s*|\s+)?\S+', stripped)) + ) + if not is_chapter_line: + bid_title = stripped + lines = lines[i + 1:] + break + break + + chapter_counter = 0 + + for line in lines: + raw_line = line.rstrip('\n') + stripped = raw_line.strip() + if not stripped: + continue + # 去掉行首全角/半角缩进后再匹配编号，避免「　　1.1 标题」无法识别 + parse_line = stripped.lstrip('\u3000\u2003\u2002\u00a0 \t') + + # 一级：中文数字 + 顿号/句号 + m1 = re.match(r'^([一二三四五六七八九十百]+)[、。.]\s*(.*)', parse_line) + if m1: + cn = m1.group(1) + title = m1.group(2).strip() + chapter_counter = CN_NUM_MAP.get(cn, chapter_counter + 1) + sections.append({ + 'number': str(chapter_counter), + 'title': title, + 'level': 1, + 'is_leaf': True, + 'order_index': order, + }) + order += 1 + continue + + # 二/三/四级须先于「一级纯数字+顿号/空格」匹配，避免 "1.1 标题" 被误成一级 1 且 title=".1 标题" 导致节数虚增 + m_num = re.match(r'^(\d+(?:\.\d+)+)\s+(.*)', parse_line) + if m_num: + num_str = m_num.group(1) + title = m_num.group(2).strip() + level = num_str.count('.') + 1 + sections.append({ + 'number': num_str, + 'title': title, + 'level': min(level, 4), + 'is_leaf': True, + 'order_index': order, + }) + order += 1 + continue + + # 一级：阿拉伯数字 + 可选分隔（支持 "1 标题"、"1.标题"、"1标题"）—— 不含 1.1 形式（已上处理） + m1_en = re.match(r'^(\d+)(?:[、。．.]\s*|\s+)?(.*)', parse_line) + if m1_en: + chapter_no = int(m1_en.group(1)) + title = (m1_en.group(2) or '').strip() + title = re.sub(r'^[、。．.\s]+', '', title) + if title: + chapter_counter = chapter_no + sections.append({ + 'number': str(chapter_counter), + 'title': title, + 'level': 1, + 'is_leaf': True, + 'order_index': order, + }) + order += 1 + continue + + # 兜底：无编号行按缩进推断层级（支持“纯标题大纲”） + indent_full = len(re.match(r'^[\u3000 ]*', raw_line).group(0)) + # 约定：每 1 个全角空格/2 个半角空格视作 1 级缩进 + level = min(max(1, (indent_full // 2) + 1), 4) + if level == 1: + chapter_counter += 1 + number = str(chapter_counter) + else: + number = '1.' * (level - 1) + '1' + sections.append({ + 'number': number.strip('.'), + 'title': parse_line, + 'level': level, + 'is_leaf': True, + 'order_index': order, + }) + order += 1 + + # 重排序号（核心修复：删除章节后确保编号连续） + _renumber_sections(sections) + + # 标记非叶节点（在重排后执行，确保前缀匹配正确） + nums = [s['number'] for s in sections] + for s in sections: + prefix = s['number'] + '.' + if any(n.startswith(prefix) for n in nums): + s['is_leaf'] = False + + # 重建规范大纲文本（供回写数据库） + normalized_text = _sections_to_outline_text(bid_title, sections) + + return bid_title, sections, normalized_text + + +# ─── 数据库工具 ─────────────────────────────────────────────────────────── + +def _get_tender_data(conn, project_id): + cur = conn.cursor() + cur.execute( + "SELECT summary, rating_requirements, rating_json, raw_text, boq_summary, tender_kind " + "FROM tender_data WHERE project_id=?", + (project_id,) + ) + row = cur.fetchone() + if row: + return { + 'summary': row[0], + 'rating_requirements': row[1], + 'rating_json': row[2], + 'raw_text': row[3], + 'boq_summary': row[4] or '', + 'tender_kind': row[5] or 'engineering', + } + return None + + +def _get_outline_text(conn, project_id): + cur = conn.cursor() + cur.execute("SELECT outline FROM tender_data WHERE project_id=?", (project_id,)) + row = cur.fetchone() + return row[0] if row and row[0] else '' + + +def _save_outline_text(conn, project_id, outline_text): + cur = conn.cursor() + # 兜底：若 tender_data 尚未初始化，先补齐空记录，避免 UPDATE 0 行导致“假保存成功” + cur.execute( + "INSERT OR IGNORE INTO tender_data (project_id, status) VALUES (?, 'pending')", + (project_id,), + ) + cur.execute( + "UPDATE tender_data SET outline=?, updated_at=? WHERE project_id=?", + (outline_text, datetime.now(), project_id), + ) + conn.commit() + + +def _save_sections(conn, project_id, sections): + cur = conn.cursor() + # 清除旧章节 + cur.execute("DELETE FROM bid_sections WHERE project_id=?", (project_id,)) + for s in sections: + cur.execute(''' + INSERT INTO bid_sections + (project_id, section_number, section_title, level, is_leaf, order_index, status) + VALUES (?, ?, ?, ?, ?, ?, 'pending') + ''', (project_id, s['number'], s['title'], s['level'], 1 if s['is_leaf'] else 0, s['order_index'])) + conn.commit() + + +def _get_section(conn, section_id): + cur = conn.cursor() + cur.execute( + "SELECT id, section_number, section_title, level, is_leaf, content, intro_content FROM bid_sections WHERE id=?", + (section_id,) + ) + row = cur.fetchone() + if row: + return { + 'id': row[0], 'section_number': row[1], 'section_title': row[2], + 'level': row[3], 'is_leaf': row[4], 'content': row[5], 'intro_content': row[6] + } + return None + + +def _update_section_status(conn, section_id, status, error=''): + cur = conn.cursor() + cur.execute( + "UPDATE bid_sections SET status=?, error_message=?, updated_at=? WHERE id=?", + (status, error, datetime.now(), section_id) + ) + conn.commit() + + +def _update_section_content(conn, section_id, content, intro_content): + cur = conn.cursor() + cur.execute( + "UPDATE bid_sections SET content=?, intro_content=?, updated_at=? WHERE id=?", + (content, intro_content, datetime.now(), section_id) + ) + conn.commit() + + +# ─── 线程安全的数据库操作（每次独立开关连接，启用 WAL）────────────────── + +def _db_connect(db_path: str) -> sqlite3.Connection: + """创建启用 WAL 模式的连接，适合多线程并发写入""" + conn = sqlite3.connect(db_path, timeout=30, check_same_thread=False) + conn.execute('PRAGMA journal_mode=WAL') + return conn + + +def _update_section_status_safe(db_path, section_id, status, error=''): + conn = _db_connect(db_path) + try: + _update_section_status(conn, section_id, status, error) + finally: + conn.close() + + +def _update_section_content_safe(db_path, section_id, content, intro_content): + conn = _db_connect(db_path) + try: + _update_section_content(conn, section_id, content, intro_content) + finally: + conn.close() + + +def _set_project_status(conn, project_id, status, error=''): + cur = conn.cursor() + cur.execute( + "UPDATE projects SET outline_status=?, outline_error=?, updated_at=? WHERE id=?", + (status, error, datetime.now(), project_id) + ) + conn.commit() + + +# ─── AI自动填充小章节 ─────────────────────────────────────────────────────── + +def expand_outline( + outline_text: str, + summary: str = '', + rating_requirements: str = '', + project_id: int = 0, + target_pages: int = 0, +) -> str: + """ + 根据用户输入的主章节标题，自动填充子章节。 + + target_pages: 小章节行总数上界用（与 volume_chapters 线性映射 + ±10%）；须由调用方传入本次请求 + 的页数（如前端目标页数），避免仅依赖 process 内全局 config 在多进程或与配置不同步时未生效（出现数百节）。 + 为 0 时不做条数限制（与未启用目标页数一致）。 + """ + lines = outline_text.strip().split('\n') + bid_title = '' + main_chapters = [] + + # 提取标书标题（第一行非章节行且较长时视为标题） + for i, line in enumerate(lines): + stripped = line.strip() + if not stripped: + continue + is_chapter_format = re.match(r'^[一二三四五六七八九十百第]', stripped) or re.match(r'^\d+[.．、\s]', stripped) + if not is_chapter_format and len(stripped) > 50: + bid_title = stripped + lines = lines[i + 1:] + break + break + + # 提取一级章节 + for line in lines: + stripped = line.strip() + if not stripped: + continue + + # 先排除二级及以上章节 + if re.match(r'^\d+(?:\.\d+)+', stripped): + continue + + m1_cn = re.match(r'^([一二三四五六七八九十百]+)[、。．.\s]+\s*(.*)', stripped) + if not m1_cn: + m1_cn = re.match(r'^第([一二三四五六七八九十百]+)[章节]\s*(.*)', stripped) + if not m1_cn: + m1_cn = re.match(r'^([一二三四五六七八九十百]+)(?![一二三四五六七八九十百])\s+(.*)', stripped) + + m1_en = re.match(r'^(\d+)[、。．.\s]+\s*(.*)', stripped) + if not m1_en: + m1_en = re.match(r'^第(\d+)[章节]\s*(.*)', stripped) + if not m1_en: + m1_en = re.match(r'^(\d+)(?!\d)\s+(.*)', stripped) + if not m1_en: + m1_en = re.match(r'^(\d+)([^\d].*)', stripped) + + if m1_cn or m1_en: + title = (m1_cn.group(2) if m1_cn else m1_en.group(2)).strip() + title = re.sub(r'^[、。．.\s]+', '', title) + if title: + main_chapters.append({'title': title}) + else: + # 没有编号的短文本行，也允许作为主章节 + if 0 < len(stripped) < 50: + main_chapters.append({'title': stripped}) + + if not main_chapters: + logger.warning(f'expand_outline未找到主章节，输入大纲：{outline_text[:200]}') + return outline_text + + expanded_lines = [] + if bid_title: + expanded_lines.append(bid_title) + + # 并发生成主章节的小章节（附件类主章不扩展子目录，保持“仅图/表块”） + results: List[Optional[str]] = [None] * len(main_chapters) + chapters_to_expand: List[Tuple[int, Dict[str, Any]]] = [] + for idx, chapter in enumerate(main_chapters): + ct = chapter['title'] + if att_sec.should_skip_expand_subchapters(ct): + # 附件章节不再自动补“依据/说明/参数”等文字子项 + results[idx] = '' + label = att_sec.parse_attachment_label(ct) + if label: + logger.info( + 'expand_outline 附件主章节跳过子目录扩展: title=%r attachment_label=%r', + ct, label, + ) + else: + logger.info( + 'expand_outline 附件主章节跳过子目录扩展: title=%r', + ct, + ) + else: + chapters_to_expand.append((idx, chapter)) + + if chapters_to_expand: + tp = max(0, int(target_pages or 0)) + per_main: Optional[List[int]] = None + if tp > 0: + k_exp = len(chapters_to_expand) + n_total = vol_ch.subchapter_total_effective(tp, k_exp, random.Random()) + per_main = vol_ch.allocate_subchapters_to_mains(n_total, k_exp) + + # 全局信号量已保护AI调用，此处可提高到接近LLM上限（默认12-20） + max_workers = min(len(chapters_to_expand), getattr(config, 'MAX_CONCURRENT_SECTIONS', 15)) + with ThreadPoolExecutor(max_workers=max_workers) as executor: + future_to_chapter = { + executor.submit( + _generate_sub_chapters, + ch['title'], + summary, + rating_requirements, + idx + 1, + project_id, + per_main[i] if per_main else None, + ): (idx, ch['title']) + for i, (idx, ch) in enumerate(chapters_to_expand) + } + for future in as_completed(future_to_chapter): + idx, title = future_to_chapter[future] + try: + results[idx] = future.result() + logger.info(f'主章节扩展成功: {title}') + except Exception as e: + logger.error(f'主章节扩展失败: {title}, 错误: {e}') + results[idx] = '' + + # 组装结果 + for idx, chapter in enumerate(main_chapters): + chapter_num = idx + 1 + cn_num = _CN_NUMS_LIST[chapter_num] if chapter_num < len(_CN_NUMS_LIST) else str(chapter_num) + expanded_lines.append(f'{cn_num}、{chapter["title"]}') + if results[idx]: + expanded_lines.append(results[idx]) + + return '\n'.join(expanded_lines) + + +def _extract_title_text(title: str) -> str: + """从标题中提取纯文本内容，去除序号和标点符号。""" + text = re.sub(r'^[一二三四五六七八九十百]+[、。.]\s*', '', title.strip()) + text = re.sub(r'^\d+(?:\.\d+)*[、。.]?\s*', '', text) + text = re.sub(r'^\s*[、。，,；;：:]+\s*', '', text) + text = re.sub(r'\s*[、。，,；;：:]+\s*$', '', text) + return text.strip() + + +def _generate_sub_chapters( + chapter_title: str, + summary: str, + rating_requirements: str, + chapter_num: int, + project_id: int = 0, + max_subchapters: Optional[int] = None, +) -> str: + """为单个主章节生成子章节大纲。""" + if max_subchapters is not None and max_subchapters <= 0: + return '' + + boq_summary = _get_boq_summary_for_chapter(chapter_title, summary) + prompt = P.get_chapter_outline_prompt( + summary, chapter_title, rating_requirements, max_subchapters=max_subchapters + ) + if boq_summary: + prompt += ( + '\n\n【工程量清单关键信息】\n' + f'{boq_summary}\n\n请严格根据工程量清单中的工程项目生成子章节，确保每个子章节都与具体工程内容对应。' + ) + + try: + response = ai_client.chat( + prompt, + system='你是一位专业的标书大纲生成专家。请根据主章节标题和工程量清单内容生成合适的子章节列表，严格遵守编号规则：' + '绝对禁止出现1.0、2.0、1.0.1等0开头编号；' + '二级从X.1开始，三级从X.1.1开始，四级从X.1.1.1开始；' + '只输出子章节，不重复主章节标题。', + temperature=0.5, + max_tokens=2048, + request_timeout=getattr(config, 'OUTLINE_REQUEST_TIMEOUT', 300), + ) + logger.info(f'_generate_sub_chapters AI响应章节={chapter_title}，长度={len(response)}') + + main_title_text = _extract_title_text(chapter_title) + lines = response.strip().split('\n') + level_counts = {1: 0, 2: 0, 3: 0, 4: 0} + result_lines = [] + + for line in lines: + if not line or not line.strip(): + continue + + indent_count = 0 + remaining = line + while remaining and (remaining[0] == '\u3000' or remaining[0] == ' '): + indent_count += 1 + remaining = remaining[1:] + + remaining = re.sub(r'^[\s#*>\-]+', '', remaining).strip() + if not remaining: + continue + + m = re.match(r'^(\d+(?:\.\d+)*)[、。．.]?\s*(.*)', remaining) + if m: + original_num = m.group(1) + parts = original_num.split('.') + has_invalid_zero = any(i > 0 and part and part[0] == '0' for i, part in enumerate(parts)) + if has_invalid_zero: + continue + if len(parts) > 1: + level = len(parts) - 1 + else: + if indent_count == 0: + level = 1 + elif indent_count <= 2: + level = 2 + else: + level = 3 + title = m.group(2).strip() + else: + m_cn = re.match(r'^([一二三四五六七八九十百]+)[、。．.]\s*(.*)', remaining) + if m_cn: + title = m_cn.group(2).strip() + level = 1 + else: + title = remaining + if indent_count == 0: + level = 1 + elif indent_count <= 2: + level = 2 + else: + level = 3 + + title = _extract_title_text(title) + if not title or len(title) < 2: + continue + + if main_title_text and _extract_title_text(title) == main_title_text: + continue + + level = min(max(level, 1), 3) + level_counts[level] += 1 + for l in range(level + 1, 5): + level_counts[l] = 0 + + if level == 1: + num = f'{chapter_num}.{level_counts[1]}' + indent = '' + elif level == 2: + num = f'{chapter_num}.{level_counts[1]}.{level_counts[2]}' + indent = '\u3000' + else: + num = f'{chapter_num}.{level_counts[1]}.{level_counts[2]}.{level_counts[3]}' + indent = '\u3000\u3000' + + result_lines.append(f'{indent}{num} {title}') + + if max_subchapters is not None and max_subchapters > 0 and len(result_lines) > max_subchapters: + result_lines = result_lines[:max_subchapters] + + return '\n'.join(result_lines) + except Exception: + logger.exception(f'生成子章节失败 chapter={chapter_title}') + return '' + + +def _get_boq_summary_for_chapter(chapter_title: str, summary: str) -> str: + """ + 从摘要中提取与施工方案相关的工程量清单信息。 + """ + if not summary: + return '' + + boq_keywords = [ + '项目编码', '清单编码', '编码', '编号', '序号', '项目编号', '清单编号', + '项目名称', '清单名称', '名称', '工程名称', '清单项目名称', '分项名称', + '计量单位', '单位', '计量', '工程量', '数量', '清单数量', '清单工程量', + '综合单价', '单价', '投标单价', '综合价', '合价', '金额', '合计金额', '综合合价', '合计', '总价', '小计', + '项目特征', '项目特征描述', '特征描述', '做法说明', '工程内容', '工作内容', '详述', '说明', '特征', '项目特征及内容', + '施工内容', '工艺要求', '技术措施', '施工要求', '施工方法' + ] + + lines = summary.strip().split('\n') + boq_lines = [] + for line in lines: + if any(keyword in line for keyword in boq_keywords): + boq_lines.append(line.strip()) + + if boq_lines: + return '\n'.join(boq_lines[:20]) + return '' diff --git a/modules/knowledge.py b/modules/knowledge.py new file mode 100644 index 0000000..6d50e2e --- /dev/null +++ b/modules/knowledge.py @@ -0,0 +1,288 @@ +""" +企业知识库模块（无外部向量库依赖） + +存储后端：SQLite（与主数据库共用同一文件） + - knowledge_vectors 表：文本块 + JSON 向量 + - knowledge_files 表：文件元数据（已在 app.py init_db 中建立） + +检索策略： + Qwen / OpenAI provider → Embedding API + 余弦相似度（语义检索） + DeepSeek / Ollama → SQL LIKE 关键词检索（降级） +""" +import json +import math +import logging +import os +import sqlite3 +import threading +from datetime import datetime + +import config +from utils.file_utils import extract_text, split_text_chunks + +logger = logging.getLogger(__name__) + +# 正在后台入库的文件名集合（供前端轮询感知"处理中"状态） +_processing_files: set = set() +_processing_lock = threading.Lock() + +# 每次 Embedding API 批量请求的块数（避免单次请求过大） +_EMBED_BATCH = 16 + + +# ─── 数据库 ────────────────────────────────────────────────────────────────── + +def _conn() -> sqlite3.Connection: + return sqlite3.connect(config.DB_PATH) + + +def _init_tables(cur: sqlite3.Cursor) -> None: + """确保向量块表存在并创建优化索引（极速检索）。knowledge_files 已由 app.py init_db 创建""" + cur.execute(''' + CREATE TABLE IF NOT EXISTS knowledge_vectors ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + file_name TEXT NOT NULL, + chunk_idx INTEGER NOT NULL, + text TEXT NOT NULL, + embedding TEXT, + UNIQUE(file_name, chunk_idx) + ) + ''') + # 优化索引：加速LIKE查询和向量检索时的文本过滤 + cur.execute('CREATE INDEX IF NOT EXISTS idx_kv_file ON knowledge_vectors(file_name)') + cur.execute('CREATE INDEX IF NOT EXISTS idx_kv_text ON knowledge_vectors(text)') # helps FTS/LIKE + cur.execute('PRAGMA optimize') # SQLite auto-optimization + + +# ─── Embedding API ──────────────────────────────────────────────────────────── + +def _get_embeddings_batch(texts: list[str]) -> list[list[float] | None]: + """ + 调用 ai_client.get_embeddings (复用全局 semaphore 和客户端逻辑)。 + 不支持 Embedding 的 provider 返回全 None 列表。优化了并发控制。 + """ + if not texts: + return [] + + try: + # 使用统一 ai_client 接口，确保全局LLM semaphore生效，避免重复客户端创建 + from utils import ai_client + embeddings = ai_client.get_embeddings(texts) + return embeddings + except Exception as e: + if "NotImplementedError" in str(type(e).__name__) or "不支持" in str(e): + logger.info('Embedding provider不支持，降级到关键词检索') + return [None] * len(texts) + logger.warning(f'Embedding API 调用失败，将使用关键词检索降级: {e}') + return [None] * len(texts) + + +def _cosine(a: list[float], b: list[float]) -> float: + """纯 Python 余弦相似度，无需 numpy""" + dot = sum(x * y for x, y in zip(a, b)) + na = math.sqrt(sum(x * x for x in a)) + nb = math.sqrt(sum(x * x for x in b)) + return dot / (na * nb) if na and nb else 0.0 + + +# ─── 公开接口 ───────────────────────────────────────────────────────────────── + +def is_available() -> dict: + """ + 知识库始终可用（无外部依赖），返回当前状态。 + search_mode: 'vector'（语义检索）或 'keyword'（关键词降级） + """ + with _processing_lock: + processing = list(_processing_files) + + try: + db = _conn() + cur = db.cursor() + _init_tables(cur) + db.commit() + + cur.execute('SELECT COUNT(*) FROM knowledge_vectors') + doc_count = cur.fetchone()[0] + + # 判断是否已有向量（即 Embedding API 是否可用过） + cur.execute('SELECT 1 FROM knowledge_vectors WHERE embedding IS NOT NULL LIMIT 1') + has_embedding = cur.fetchone() is not None + + db.close() + + provider = getattr(config, 'MODEL_PROVIDER', '') + can_embed = provider in ('qwen', 'openai', 'kimi') + mode = 'vector' if (has_embedding or can_embed) else 'keyword' + + return { + 'available': True, + 'doc_count': doc_count, + 'processing': processing, + 'search_mode': mode, + } + except Exception as e: + return { + 'available': True, + 'doc_count': 0, + 'processing': processing, + 'search_mode': 'keyword', + 'error': str(e), + } + + +def add_file(file_path: str, db_path: str) -> dict: + """ + 将文件切块 → 批量 Embedding → 写入 SQLite。 + 此函数在后台线程中调用，_processing_files 用于前端感知进度。 + """ + file_name = os.path.basename(file_path) + with _processing_lock: + _processing_files.add(file_name) + + try: + text = extract_text(file_path) + chunks = split_text_chunks(text, config.CHUNK_SIZE, config.CHUNK_OVERLAP) + if not chunks: + return {'success': False, 'error': '文件内容为空，无法入库'} + + # 批量获取 Embedding（Qwen/OpenAI provider 有效；否则全 None） + embeddings: list[list[float] | None] = [] + for i in range(0, len(chunks), _EMBED_BATCH): + batch = chunks[i:i + _EMBED_BATCH] + embeddings.extend(_get_embeddings_batch(batch)) + + db = _conn() + try: + cur = db.cursor() + _init_tables(cur) + + # 先删除同名文件的旧数据 + cur.execute('DELETE FROM knowledge_vectors WHERE file_name=?', (file_name,)) + + for idx, (chunk, emb) in enumerate(zip(chunks, embeddings)): + emb_json = json.dumps(emb) if emb is not None else None + cur.execute( + 'INSERT INTO knowledge_vectors (file_name, chunk_idx, text, embedding) VALUES (?,?,?,?)', + (file_name, idx, chunk, emb_json), + ) + + cur.execute(''' + INSERT OR REPLACE INTO knowledge_files (file_name, file_path, chunk_count, added_at) + VALUES (?, ?, ?, ?) + ''', (file_name, file_path, len(chunks), datetime.now())) + + db.commit() + finally: + db.close() + + logger.info(f'知识库入库完成: {file_name}，{len(chunks)} 块' + f'{"（含向量）" if any(e is not None for e in embeddings) else "（关键词模式）"}') + return {'success': True, 'chunks': len(chunks)} + + except Exception as e: + logger.exception('知识库添加文件失败') + return {'success': False, 'error': str(e)} + finally: + with _processing_lock: + _processing_files.discard(file_name) + + +def search(query: str, top_k: int = None) -> list[str]: + """ + 从知识库检索与 query 最相关的文本块。 + - 向量模式：获取 query 的 Embedding → 余弦相似度排序 + - 关键词模式（降级）：SQL LIKE 多词匹配 + """ + if top_k is None: + top_k = config.TOP_K_KNOWLEDGE + + try: + db = _conn() + try: + cur = db.cursor() + _init_tables(cur) + db.commit() + + cur.execute('SELECT COUNT(*) FROM knowledge_vectors') + if cur.fetchone()[0] == 0: + return [] + + # ── 向量语义检索 ────────────────────────────────────────────────── + q_embs = _get_embeddings_batch([query]) + q_emb = q_embs[0] if q_embs else None + + if q_emb is not None: + # 极速优化：限制扫描行数（避免知识库大时全表扫描），优先最近添加的内容 + cur.execute( + '''SELECT text, embedding FROM knowledge_vectors + WHERE embedding IS NOT NULL + ORDER BY id DESC LIMIT 500''' + ) + rows = cur.fetchall() + if rows: + scored: list[tuple[float, str]] = [] + for text, emb_json in rows: + try: + emb = json.loads(emb_json) + scored.append((_cosine(q_emb, emb), text)) + except Exception: + continue + scored.sort(reverse=True) + return [t for _, t in scored[:top_k]] + + # ── 关键词降级检索（DeepSeek / Ollama 无 Embedding API）───────── + # 过滤纯数字/编号词（如 "1.2" "一、"），避免误匹配无关段落 + import re as _re + _num_pat = _re.compile(r'^[\d\.\-、一二三四五六七八九十]+$') + words = [ + w.strip() for w in query.split() + if len(w.strip()) > 1 and not _num_pat.match(w.strip()) + ][:6] + if not words: + cur.execute('SELECT text FROM knowledge_vectors LIMIT ?', (top_k,)) + return [r[0] for r in cur.fetchall()] + + conditions = ' OR '.join(['text LIKE ?' for _ in words]) + params = [f'%{w}%' for w in words] + [top_k] + cur.execute( + f'SELECT text FROM knowledge_vectors WHERE {conditions} LIMIT ?', params + ) + return [r[0] for r in cur.fetchall()] + + finally: + db.close() + + except Exception as e: + logger.error(f'知识库检索失败: {e}') + return [] + + +def list_files(db_path: str) -> list[dict]: + """列出知识库已入库的文件""" + try: + db = sqlite3.connect(db_path) + cur = db.cursor() + cur.execute( + 'SELECT file_name, chunk_count, added_at FROM knowledge_files ORDER BY added_at DESC' + ) + rows = cur.fetchall() + db.close() + return [{'name': r[0], 'chunks': r[1], 'added_at': r[2]} for r in rows] + except Exception: + return [] + + +def delete_file(file_name: str, db_path: str) -> dict: + """从知识库删除指定文件的所有数据""" + try: + db = _conn() + cur = db.cursor() + _init_tables(cur) + cur.execute('DELETE FROM knowledge_vectors WHERE file_name=?', (file_name,)) + cur.execute('DELETE FROM knowledge_files WHERE file_name=?', (file_name,)) + db.commit() + db.close() + return {'success': True} + except Exception as e: + logger.exception('知识库删除文件失败') + return {'success': False, 'error': str(e)} diff --git a/modules/parser.py b/modules/parser.py new file mode 100644 index 0000000..5e904bb --- /dev/null +++ b/modules/parser.py @@ -0,0 +1,206 @@ +""" +招标文件解析模块 +流程：提取文本 → 生成摘要 → 提取评分要求 → 结构化JSON +""" +import json +import logging +import re +import sqlite3 +from concurrent.futures import ThreadPoolExecutor, as_completed +from datetime import datetime +from functools import partial + +from utils import ai_client, prompts as P +from utils.file_utils import extract_text, truncate_text +from utils.tender_kind_sections import ( + get_tender_kind_classify_prompt, + parse_tender_kind_response, +) + +logger = logging.getLogger(__name__) + + +def parse_boq_file(db_path: str, project_id: int, file_path: str, file_name: str) -> None: + """ + 后台线程：解析工程量清单文件 → 本地结构化分析 → AI 摘要 → 写库。 + boq_status: none → parsing → done / error + """ + from utils.bill_analysis import analyze_boq_pages, categories_to_prompt_appendix + from utils.boq_parser import extract_boq_pages + + conn = sqlite3.connect(db_path) + try: + _set_boq_status(conn, project_id, 'parsing', '正在提取工程量清单文本...') + + page_texts = extract_boq_pages(file_path) + boq_text = '\n'.join(page_texts).strip() + if not boq_text: + raise ValueError('未能从文件中提取到有效内容，请检查文件格式') + + _set_boq_status(conn, project_id, 'parsing', '正在本地解析清单结构...') + analysis = analyze_boq_pages(page_texts) + boq_analysis_json = json.dumps(analysis, ensure_ascii=False) + + structured = '' + if not analysis.get('scanned') and not analysis.get('no_bill_pages'): + structured = categories_to_prompt_appendix(analysis) + + _set_boq_status(conn, project_id, 'parsing', '正在生成工程量清单摘要...') + + summary_prompt = P.get_boq_summary_prompt(boq_text[:10000], structured) + boq_summary = ai_client.chat(summary_prompt, temperature=0.2, max_tokens=2048) + + cur = conn.cursor() + cur.execute(''' + UPDATE tender_data + SET boq_file_name=?, boq_text=?, boq_summary=?, boq_analysis_json=?, + boq_status='done', boq_error='', updated_at=? + WHERE project_id=? + ''', (file_name, boq_text[:12000], boq_summary, boq_analysis_json, datetime.now(), project_id)) + conn.commit() + logger.info(f'项目 {project_id} 工程量清单解析完成') + + except Exception as e: + logger.exception(f'工程量清单解析失败 project_id={project_id}') + _set_boq_status(conn, project_id, 'error', str(e)) + finally: + conn.close() + + +def _set_boq_status(conn, project_id, status, message=''): + cur = conn.cursor() + cur.execute(''' + UPDATE tender_data SET boq_status=?, boq_error=?, updated_at=? + WHERE project_id=? + ''', (status, message, datetime.now(), project_id)) + conn.commit() + + +def parse_tender_file(db_path: str, project_id: int, file_path: str, file_name: str) -> None: + """ + 后台线程中运行：解析招标文件并将结果写入数据库。 + 极速优化：提取文本后并行执行3个独立AI任务（摘要、评分要求、类型识别）， + 然后顺序执行依赖的JSON结构化。全局信号量限制总并发≤20。 + status 字段：pending → parsing → done / error + """ + conn = sqlite3.connect(db_path) + try: + _set_status(conn, project_id, 'parsing', '正在提取文件文本...') + + # 1. 提取原始文本（I/O/CPU，可进一步并行但当前足够快） + raw_text = extract_text(file_path) + raw_text = truncate_text(raw_text, 60000) + excerpt = (raw_text or '')[:15000] + + _set_status(conn, project_id, 'parsing', '并行生成摘要、评分要求和类型识别...') + + # 并行准备3个独立AI任务（大幅加速解析环节） + def _run_summary(): + prompt = P.get_project_summary_prompt(raw_text) + return ai_client.chat(prompt, temperature=0.3, max_tokens=4096) + + def _run_rating(): + prompt = P.get_rating_requirements_prompt(raw_text) + return ai_client.chat(prompt, temperature=0.2, max_tokens=4096) + + def _run_kind(): + prompt = get_tender_kind_classify_prompt(excerpt) + raw = ai_client.chat(prompt, temperature=0.1, max_tokens=32) + return parse_tender_kind_response(raw) + + # 使用有限线程池 + 全局semaphore保护执行（并发上限20） + with ThreadPoolExecutor(max_workers=3, thread_name_prefix='parse') as executor: + future_summary = executor.submit(_run_summary) + future_rating = executor.submit(_run_rating) + future_kind = executor.submit(_run_kind) + + summary = future_summary.result() + rating_md = future_rating.result() + tender_kind = future_kind.result() + + logger.info(f'项目 {project_id} 招标文件类型识别为: {tender_kind}') + + _set_status(conn, project_id, 'parsing', '正在结构化评分数据...') + + # 4. 依赖rating_md的JSON结构化（顺序执行） + rating_json_prompt = P.get_rating_json_prompt(rating_md) + rating_json_raw = ai_client.chat(rating_json_prompt, temperature=0.1, max_tokens=2048) + rating_json_str = _clean_json(rating_json_raw) + + # 写入数据库 + _upsert_tender_data(conn, project_id, file_name, raw_text, + summary, rating_md, rating_json_str, tender_kind) + + # Deep integration: persist diagram/anon settings to projects table (auto-defaults) + # Future: add AI extraction prompt for diagram intent and anon rules from raw_text + cur = conn.cursor() + cur.execute(''' + UPDATE projects SET + enable_figure = COALESCE(enable_figure, 1), + enable_table = COALESCE(enable_table, 1), + anon_requirements = COALESCE(anon_requirements, '不得出现投标人身份信息') + WHERE id = ? + ''', (project_id,)) + conn.commit() + + _set_status(conn, project_id, 'done', '解析完成（已同步生成设置）') + logger.info(f'项目 {project_id} 招标文件解析完成（并行加速完成，生成设置已打通）') + + except Exception as e: + logger.exception(f'解析失败 project_id={project_id}') + _set_status(conn, project_id, 'error', str(e)) + finally: + conn.close() + + +# ─── 内部工具 ────────────────────────────────────────────────────────────── + +def _set_status(conn, project_id, status, message=''): + cur = conn.cursor() + cur.execute(''' + INSERT INTO tender_data (project_id, status, error_message) + VALUES (?, ?, ?) + ON CONFLICT(project_id) DO UPDATE SET status=?, error_message=?, updated_at=? + ''', (project_id, status, message, status, message, datetime.now())) + conn.commit() + + +def _upsert_tender_data(conn, project_id, file_name, raw_text, + summary, rating_md, rating_json_str, + tender_kind: str = 'engineering'): + cur = conn.cursor() + cur.execute(''' + INSERT INTO tender_data + (project_id, file_name, raw_text, summary, rating_requirements, rating_json, + tender_kind, status, error_message) + VALUES (?, ?, ?, ?, ?, ?, ?, 'done', '') + ON CONFLICT(project_id) DO UPDATE SET + file_name=?, raw_text=?, summary=?, rating_requirements=?, + rating_json=?, tender_kind=?, status='done', error_message='', updated_at=? + ''', ( + project_id, file_name, raw_text, summary, rating_md, rating_json_str, tender_kind, + file_name, raw_text, summary, rating_md, rating_json_str, tender_kind, datetime.now() + )) + conn.commit() + + +def _clean_json(raw: str) -> str: + """尝试从 AI 返回中提取 JSON 字符串""" + # 去除 markdown 代码块 + raw = re.sub(r'```(?:json)?\s*', '', raw) + raw = raw.replace('```', '').strip() + # 验证是否是有效 JSON + try: + json.loads(raw) + return raw + except json.JSONDecodeError: + # 尝试提取 { ... } 部分 + m = re.search(r'\{[\s\S]*\}', raw) + if m: + candidate = m.group(0) + try: + json.loads(candidate) + return candidate + except Exception: + pass + return raw diff --git a/prompts/chapter_outline.txt b/prompts/chapter_outline.txt new file mode 100644 index 0000000..75453ef --- /dev/null +++ b/prompts/chapter_outline.txt @@ -0,0 +1,36 @@ +- 角色：技术标书架构师 + +- 能力： + - 单章节深度解构能力 + - 跨章节协同规划视野 + - 评分权重动态分配策略 + +- 任务：根据招标文件概要、章节主题、评分要求，生成结构化的技术标书该章节的目录 + +- 输出要求： + - 采用四级嵌套编码体系（X.X.X.X）确保章节颗粒度可控 + - 直接给出生成的章节大纲，禁止解释和引导词 + - markdown格式输出 + + +- 示例输出，以"服务进度保障措施"为例： + 二、智慧物流系统全生命周期进度保障 + 　2.1 基于BIM的进度协同管理平台 + 　　2.1.1 多级进度计划耦合模型 + 　　　2.1.1.1 WBS-Milestone映射矩阵 + 　　　2.1.1.2 Primavera P6进度基线 + 　　2.1.2 资源约束进度优化算法 + 　　　2.1.2.1 基于CPM的缓冲区间动态分配 + 　　　2.1.2.2 资源平滑度R=0.92 + +- 招标文件概要： + {summary} + +- 章节主题： + {chapter} + +- 评分要求： + {score} + + + \ No newline at end of file diff --git a/prompts/outlines.txt b/prompts/outlines.txt new file mode 100644 index 0000000..5b65d9e --- /dev/null +++ b/prompts/outlines.txt @@ -0,0 +1,158 @@ +- 角色：技术标书架构师 +- 任务：生成适配技术评分标准的技术标书目录 +- 输出要求： + 采用四级嵌套编码体系（X.X.X.X）下实现按需分层 + 直接给出生成的目录，禁止解释和引导词 + +- 约束控制： + 根据项目生成标书的名称，如“XXXX项目技术标书” + 总的章节数应该控制在8-10个 + 章节颗粒度与评分指标权重正相关 + 技术实施类章节必须达到四级深度，管理保障类章节允许三级结构 + 同级节点数量必须有波动区间：技术方案类(4-7)、实施保障类(2-4)、创新应用类(1-3) + 目录的章节不能缺少包含以下关键词的内容： + - 对本项目的了解和分析 + - 项目工作重难点分析 + - 项目实施方案 + - 服务进度保障措施 + - 服务质量保障方案 + - 合理化建议 + - 服务承诺及处罚措施 + 目录不包含成本和预算内容，但要平衡项目预算、技术可行性以及技术的专业度 + +- 示例输出： + + 花岭新城BIM项目技术标书 + 一、总体实施方案 + 　1.1 项目理解与需求分析 + 　　1.1.1 项目概述 + 　　　　1.1.1.1 建设地点及规模 + 　　　　1.1.1.2 工程地质勘察报告 + 　　　　1.1.1.3 抗震设防烈度与防火等级 + 　　　　1.1.1.4 建筑结构形式与建筑面积分布 + 　　1.1.2 项目背景 + 　　　　1.1.2.1 核心宗旨与目标 + 　　　　1.1.2.2 地理位置与项目规模 + 　　1.1.3 项目目标 + 　　　　1.1.3.1 就业机会与基础设施提升 + 　　　　1.1.3.2 乡村振兴与经济增长 + 　　1.1.4 项目特点 + 　　　　1.1.4.1 框筒结构抗震性能 + 　　　　1.1.4.2 分阶段工程地质勘察 + 　　　　1.1.4.3 功能区域多样化 + + 二、建筑设计 + 　2.1 主要设计依据 + 　　　　2.1.1 国家标准与规范 + 　　　　2.1.2 行业标准与图集 + 　2.2 建筑结构设计 + 　　　　2.2.1 结构形式 + 　　　　2.2.2 结构材料 + 　　　　2.2.3 结构布局 + 　　　　2.2.4 结构经济指标 + 　　　　2.2.5 结构细节设计 + 　2.3 建筑功能布局 + 　　　　2.3.1 C1#楼（厂房） + 　　　　　　2.3.1.1 功能分区明确 + 　　　　　　2.3.1.2 流线优化与安全性 + 　　　　2.3.2 配电房 + 　　　　　　2.3.2.1 设计目标与设备布置 + 　　　　　　2.3.2.2 空间规划与电气主接线方案 + 　　　　2.3.3 外廊及架空建筑 + 　　　　　　2.3.3.1 功能区域与景观设计 + 　　　　　　2.3.3.2 光照与通风优化 + 　2.4 建筑材料选用 + 　2.5 建筑外观设计 + 　2.6 建筑室内布局 + 　　　　2.6.1 功能分区与设计要点 + 　2.7 建筑安全和消防设计 + 　　　　2.7.1 建筑安全体系 + 　　　　2.7.2 消防系统设计 + 　2.8 建筑节能设计 + 　　　　2.8.1 节能措施与绿色建材 + 　　　　2.8.2 雨水收集系统 + + 三、结构设计 + 　3.1 结构形式 + 　3.2 结构材料 + 　　　　3.2.1 混凝土与钢材选用 + 　3.3 结构布局 + 　　　　3.3.1 结构柱网与通风疏散通道 + 　3.4 结构经济指标 + 　　　　3.4.1 抗震设计要求与用材控制 + 　3.5 结构细节设计 + 　　　　3.5.1 基础设计与钢结构细节 + 　　　　3.5.2 混凝土结构与抗震设计 + 　3.6 结构分析与计算 + + 四、给排水设计 + 　4.1 引言 + 　4.2 供水系统设计 + 　　　　4.2.1 供水管道与消防水源 + 　　　　4.2.2 节水设计与雨水收集 + 　4.3 排水系统设计 + 　　　　4.3.1 排水管道与雨水管理 + 　　　　4.3.2 污水处理与分流制度 + 　4.4 给排水设备选择 + 　4.5 细节设计 + 　4.6 监测与维护 + + 五、暖通设计 + 　5.1 引言 + 　5.2 供暖系统设计 + 　　　　5.2.1 供暖方式与设备选择 + 　　　　5.2.2 温度控制系统 + 　5.3 通风系统设计 + 　　　　5.3.1 通风方式与设备选择 + 　　　　5.3.2 空气质量控制 + 　5.4 空调系统设计 + 　　　　5.4.1 空调方式与设备选择 + 　　　　5.4.2 温湿度控制系统 + 　5.5 热水系统设计 + 　5.6 细节设计与监测维护 + + + 六、BIM设计 + 　6.1 项目总图与单体建筑设计 + 　6.2 道路与排水设计 + 　6.3 电气系统设计 + 　6.4 绿化设计 + 　6.5 BIM协同设计与施工管理 + 　6.6 数据管理与培训支持 + + 七、设计说明 + 　7.1 项目设计依据 + 　7.2 设计原则 + 　7.3 结构经济合理化 + 　7.4 建筑功能分区 + 　7.5 设计细节要求 + + 八、合理化建议 + 　8.1 建筑专业合理化建议 + 　8.2 结构专业合理化建议 + 　8.3 给排水专业合理化建议 + 　8.4 暖通专业合理化建议 + 　8.5 BIM专业合理化建议 + 8.6 技术和工艺方面的建议 + 8.7 成本和预算方面的建议 + 8.8 时间和进度方面的建议 + 8.9 施工质量管理方面的建议 + 8.10 质量和安全方面的建议 + 8.11 环境和可持续性方面的建议 + + 九、施工进度安排 + 　9.1 施工进度安排 + 　9.2 施工进度跟踪与管理 + 　9.3 施工质量管理 + 　9.4 施工现场管理 + 　9.5 施工结项与验收 + + 十、本项目工作重点难点分析 + 　10.1 工程特点与设计工作难点 + 　10.2 重点与难点分析 + 　10.3 综合解决措施 + + +- 招标文件内容： +{document_text} +""" \ No newline at end of file diff --git a/prompts/outlines_with_rating.txt b/prompts/outlines_with_rating.txt new file mode 100644 index 0000000..c635ab0 --- /dev/null +++ b/prompts/outlines_with_rating.txt @@ -0,0 +1,155 @@ +- 角色：技术标书架构师 +- 任务：生成适配技术评分标准的技术标书目录 +- 输出要求： + 采用四级嵌套编码体系（X.X.X.X）下实现按需分层 + 直接给出生成的目录，禁止解释和引导词 + +- 约束控制： + 根据项目生成标书的名称，如“XXXX项目技术标书” + 总的章节数应该控制在8-10个,不超过10个 + 目录的章节必须按照技术评分标准的项目生成，题目应包括技术评分项目中的关键词： + 章节颗粒度与评分指标权重正相关 + 技术方案类章节必须达到四级深度，管理保障类章节允许三级结构 + 同级节点数量必须有波动区间：技术方案类(4-7)、实施保障类(2-4)、创新应用类(1-3) + 目录禁止包含报价、团队、资质、文件等商务性质的章节 + +- 示例输出： + + 花岭新城BIM项目技术标书 + 一、总体实施方案 + 　1.1 项目理解与需求分析 + 　　1.1.1 项目概述 + 　　　　1.1.1.1 建设地点及规模 + 　　　　1.1.1.2 工程地质勘察报告 + 　　　　1.1.1.3 抗震设防烈度与防火等级 + 　　　　1.1.1.4 建筑结构形式与建筑面积分布 + 　　1.1.2 项目背景 + 　　　　1.1.2.1 核心宗旨与目标 + 　　　　1.1.2.2 地理位置与项目规模 + 　　1.1.3 项目目标 + 　　　　1.1.3.1 就业机会与基础设施提升 + 　　　　1.1.3.2 乡村振兴与经济增长 + 　　1.1.4 项目特点 + 　　　　1.1.4.1 框筒结构抗震性能 + 　　　　1.1.4.2 分阶段工程地质勘察 + 　　　　1.1.4.3 功能区域多样化 + + 二、建筑设计 + 　2.1 主要设计依据 + 　　　　2.1.1 国家标准与规范 + 　　　　2.1.2 行业标准与图集 + 　2.2 建筑结构设计 + 　　　　2.2.1 结构形式 + 　　　　2.2.2 结构材料 + 　　　　2.2.3 结构布局 + 　　　　2.2.4 结构经济指标 + 　　　　2.2.5 结构细节设计 + 　2.3 建筑功能布局 + 　　　　2.3.1 C1#楼（厂房） + 　　　　　　2.3.1.1 功能分区明确 + 　　　　　　2.3.1.2 流线优化与安全性 + 　　　　2.3.2 配电房 + 　　　　　　2.3.2.1 设计目标与设备布置 + 　　　　　　2.3.2.2 空间规划与电气主接线方案 + 　　　　2.3.3 外廊及架空建筑 + 　　　　　　2.3.3.1 功能区域与景观设计 + 　　　　　　2.3.3.2 光照与通风优化 + 　2.4 建筑材料选用 + 　2.5 建筑外观设计 + 　2.6 建筑室内布局 + 　　　　2.6.1 功能分区与设计要点 + 　2.7 建筑安全和消防设计 + 　　　　2.7.1 建筑安全体系 + 　　　　2.7.2 消防系统设计 + 　2.8 建筑节能设计 + 　　　　2.8.1 节能措施与绿色建材 + 　　　　2.8.2 雨水收集系统 + + 三、结构设计 + 　3.1 结构形式 + 　3.2 结构材料 + 　　　　3.2.1 混凝土与钢材选用 + 　3.3 结构布局 + 　　　　3.3.1 结构柱网与通风疏散通道 + 　3.4 结构经济指标 + 　　　　3.4.1 抗震设计要求与用材控制 + 　3.5 结构细节设计 + 　　　　3.5.1 基础设计与钢结构细节 + 　　　　3.5.2 混凝土结构与抗震设计 + 　3.6 结构分析与计算 + + 四、给排水设计 + 　4.1 引言 + 　4.2 供水系统设计 + 　　　　4.2.1 供水管道与消防水源 + 　　　　4.2.2 节水设计与雨水收集 + 　4.3 排水系统设计 + 　　　　4.3.1 排水管道与雨水管理 + 　　　　4.3.2 污水处理与分流制度 + 　4.4 给排水设备选择 + 　4.5 细节设计 + 　4.6 监测与维护 + + 五、暖通设计 + 　5.1 引言 + 　5.2 供暖系统设计 + 　　　　5.2.1 供暖方式与设备选择 + 　　　　5.2.2 温度控制系统 + 　5.3 通风系统设计 + 　　　　5.3.1 通风方式与设备选择 + 　　　　5.3.2 空气质量控制 + 　5.4 空调系统设计 + 　　　　5.4.1 空调方式与设备选择 + 　　　　5.4.2 温湿度控制系统 + 　5.5 热水系统设计 + 　5.6 细节设计与监测维护 + + + 六、BIM设计 + 　6.1 项目总图与单体建筑设计 + 　6.2 道路与排水设计 + 　6.3 电气系统设计 + 　6.4 绿化设计 + 　6.5 BIM协同设计与施工管理 + 　6.6 数据管理与培训支持 + + 七、设计说明 + 　7.1 项目设计依据 + 　7.2 设计原则 + 　7.3 结构经济合理化 + 　7.4 建筑功能分区 + 　7.5 设计细节要求 + + 八、合理化建议 + 　8.1 建筑专业合理化建议 + 　8.2 结构专业合理化建议 + 　8.3 给排水专业合理化建议 + 　8.4 暖通专业合理化建议 + 　8.5 BIM专业合理化建议 + 8.6 技术和工艺方面的建议 + 8.7 成本和预算方面的建议 + 8.8 时间和进度方面的建议 + 8.9 施工质量管理方面的建议 + 8.10 质量和安全方面的建议 + 8.11 环境和可持续性方面的建议 + + 九、施工进度安排 + 　9.1 施工进度安排 + 　9.2 施工进度跟踪与管理 + 　9.3 施工质量管理 + 　9.4 施工现场管理 + 　9.5 施工结项与验收 + + 十、本项目工作重点难点分析 + 　10.1 工程特点与设计工作难点 + 　10.2 重点与难点分析 + 　10.3 综合解决措施 + + +- 招标文件摘要： +{summary} + +- 技术评分标准： +{rating} + +""" \ No newline at end of file diff --git a/prompts/project_summary.txt b/prompts/project_summary.txt new file mode 100644 index 0000000..7c9c715 --- /dev/null +++ b/prompts/project_summary.txt @@ -0,0 +1,92 @@ +- 角色：招标文件编写专家，精通招标文件结构化、摘要编写 + +- 任务：根据用户提供的项目招标文件内容，生成一份专业、清晰的结构化摘要 + +- 要求： + + 一、摘要框架 + 1. 项目概况 + - 项目名称 + - 建设地点 + - 工程性质（新建/改建/扩建） + - 核心建设内容 + - 关键工程量指标 + - 特殊施工工艺（如顶管/盾构等） + - 项目概况 + + 2. 技术要求体系 + - 专业监测要求（分项列出核心监测指标） + - 技术标准规范 + - 质量管控要点 + - 特殊工艺标准 + + 3. 交付物矩阵 + - 阶段性成果清单（含时间节点） + - 最终交付文件要求 + - 成果验收标准 + - 备案审批流程 + + 4. 商务条款摘要 + - 合同期限 + - 支付结构 + - 报价约束条件 + - 违约条款要点 + - 知识产权约定 + + 5. 资质要求矩阵 + - 企业资质门槛 + - 人员资格要求 + - 设备配置标准 + - 同类项目经验 + + 6. 评标要素体系 + - 技术评分维度 + - 商务评分权重 + - 否决性条款 + - 实质性条款 + - 围标识别机制 + + + 二、处理规范 + 1. 信息抽取规则： + - 采用三级信息提炼法（关键数据→技术参数→约束条件） + - 识别并标注法定强制性条款（★号条款） + - 提取特殊工艺参数（例如顶管直径、沉井尺寸等） + + 2. 结构化呈现要求： + - 使用Markdown分级标题系统 + - 技术参数格式化处理 + - 流程节点采用时间轴呈现 + - 关键数据突出显示（例如预算金额、最高限价） + + 3. 专业术语处理： + - 保持行业术语准确性 + - 工程计量单位标准化转换 + - 法律条款原文引述 + + 三、输出示例 + 1.确保包含但不仅限于： + - 项目背景的技术参数分解 + - 监测要求的分类归纳 + - 成果交付的阶段性要求 + - 商务条款的要点提炼 + + 四、质量保障 + 1. 完整性核查清单： + - 验证五证要求（资质/业绩/人员/设备/资金） + - 检查三大核心条款（技术/商务/法律） + - 确认关键日期节点（工期/交付期/质保期） + + 2. 风险提示机制： + - 标注异常约束条款 + - 识别排他性要求 + - 提示潜在履约风险点 + +请严格按照上述结构化框架处理输入的招标文件，生成专业、准确、易读的项目摘要报告。 +输出内容需符合工程领域专业规范，重点数据需二次核验确保准确性。 +严格按照招标文件的内容，确保输出内容的完整性。 +直接给出摘要，禁止说明和引导词。 + +- 用户提供的招标文件内容如下： + {bid_document} + diff --git a/prompts/rating_json.txt b/prompts/rating_json.txt new file mode 100644 index 0000000..c5afff2 --- /dev/null +++ b/prompts/rating_json.txt @@ -0,0 +1,23 @@ +- 任务：从工程项目招标文件中提取技术评分要求，并以严格的JSON格式输出。 + +- 要求： + 必须生成完整有效的JSON对象，不使用JSON之外的文本说明 + 数值类型字段不添加单位符号 + 包含所有的评分项及其权重分配 + 特殊说明字段仅在存在否决条款（强制性条款）时出现 + +- 输出结构（必须严格遵守根字段名与数组名，便于后续章节字数与要点映射）： + { + "items": [ + { + "id": "唯一短标识，如 T01", + "name": "评分项名称（与招标文件表述一致或精简概括）", + "weight": 数值型权重或分值（如 10 表示 10 分或 10%）, + "keywords": ["与本项相关的可选关键词1", "关键词2"] + } + ], + "notes": "可选：否决条款、阶梯得分等特殊说明；无则写空字符串" + } + +- 技术评分要求内容如下： + {tech_rating} diff --git a/prompts/rating_requirements copy.txt b/prompts/rating_requirements copy.txt new file mode 100644 index 0000000..17abcb3 --- /dev/null +++ b/prompts/rating_requirements copy.txt @@ -0,0 +1,46 @@ +- 角色：招标文件信息提取专家，精通技术评分/技术评审要求的提取 + +- 任务：请严格按照以下步骤分析提供的招标文件内容，并完整提取所有技术评分标准： + +- 步骤与要求： + + 1. **结构解析** + - 首先识别文件整体结构，仅提取“技术评分”/“技术评审”部分 + - 标注评分大类的权重占比（如出现） + + 2. **要素提取** + 对“技术评分”板块进行深度解析，要求： + - 提取评分的全部细节，不能省略 + - 明确列出技术评分的标准，如有（如"ISO认证+3分"、"项目经验每年加1分"） + + 3. **结果呈现样例** + 参考以下示例输出markdown结构化格式： + + # 招标技术评分细则 + + ## 技术评分(80分) + - 对本项目的了解和分析(12分) + → 对本项目的理解与项目背景把握准确，对本项目特点、实施目标和定位内容详尽，完全满足项目需要，科学、合理、针对性强、合理可行的，得 12 分；对本项目的理解与项目背景有一定把握，对本项目特点、实施目标和定位有阐述说明，基本可行的，得 8 分；对本项目的理解与项目背景把握片面，对本项目特点、实施目标和定位理解有较大偏差，可行性较差的，得 4 分；未提供不得分。 + → 合理可行指：（ 1）完全响应采购需求；（ 2）相关内容的表述具有针对性，全面、具体。 + → 基本可行指：（ 1）响应采购需求有微小偏差；（ 2）相关内容的表述有一定的层次性、针对性，但全面性不够。 + → 可行性较差指：（ 1）响应采购需求有较大偏差；（ 2）相关内容的表述针对性弱、全面性方面欠缺较大。 + - 项目工作重难点分析(12分) + → 根据供应商针对本项目工作重难点分析与解决方案的科学性、合理性且满足项目实际情况进行评分，项目工作重难点分析到位、有针对性、完全符合项目实际情况，对应的解决方案合理可行的，得 12 分； + 项目工作重难点内容基本准确、针对性一般、基本符合项目实际，对应的解决方案基本可行的，得 8 分； + 项目工作重难点分析一般，对应的解决方案一般、可行性较差的，得 4 分；未提供不得分。 + → 合理可行指：（ 1）完全响应采购需求；（ 2）相关内容的表述具有针对性，全面、具体。 + → 基本可行指：（ 1）响应采购需求有微小偏差；（ 2）相关内容的表述有一定的层次性、针对性，但全面性不够。 + → 可行性较差指：（ 1）响应采购需求有较大偏差；（ 2）相关内容的表述针对性弱、全面性方面欠缺较大。 + - 项目实施方案(12分) + （继续展开...） + + + +请严格按照上述结构化框架处理输入的招标文件，生成专业、准确的项目技术评分/评审要求。 +严格按照招标文件的内容，确保输出内容的完整性。 +直接输出评分/评审要求，禁止说明和引导词。 + +- 招标文件内容如下： + {bid_document} + + diff --git a/prompts/rating_requirements.txt b/prompts/rating_requirements.txt new file mode 100644 index 0000000..e9f563d --- /dev/null +++ b/prompts/rating_requirements.txt @@ -0,0 +1,43 @@ +- 角色：招标文件信息提取专家，精通技术评分/技术评审要求的提取 + +- 任务：请严格按照以下步骤分析提供的招标文件内容，并完整提取所有技术评分标准： + +- 步骤与要求： + + 1. **结构解析** + - 首先识别文件整体结构，仅提取“技术评分”/“技术评审要求”部分 + - 标注评分大类的权重占比（如出现） + + 2. **要素提取** + 对“技术评分”板块进行深度解析，要求： + - 提取评分的全部细节，不能省略 + - 明确列出量化指标，如有（如"ISO认证+3分"、"项目经验每年加1分"） + - 区分强制性条款（必须满足项）与竞争性条款（择优评分项），如有 + - 标注特殊要求（本地化服务、专利数量、团队资质等），如有 + + 3. **异常识别** + - 标出表述模糊的评分项（如"酌情加分""优/良/差等级"） + - 识别可能存在的矛盾条款 + - 提示需要注意的隐藏评分点（如投标格式错误扣分项） + + 4. **结果呈现样例** + 参考以下示例输出markdown结构化格式： + + # 招标技术评分细则 + + ## 技术评分(50%) + - 系统架构设计(20%) + → 要求：支持分布式部署（未满足直接废标） + → 加分项：采用微服务架构+3分 + （继续展开...） + + + +请严格按照上述结构化框架处理输入的招标文件，生成专业、准确的项目技术评分要求。 +严格按照招标文件的内容，确保输出内容的完整性。 +直接输出评分要求，禁止说明和引导词。 + +- 招标文件内容如下： + {bid_document} + + diff --git a/prompts/scoring_rules.txt b/prompts/scoring_rules.txt new file mode 100644 index 0000000..ce290b2 --- /dev/null +++ b/prompts/scoring_rules.txt @@ -0,0 +1,45 @@ +"你是一名专业的招标文件分析师，请按照以下步骤处理用户提供的项目招标文件内容： + +1. **结构识别** +- 仔细解析文件结构，定位'评分标准'、'评审办法'、'投标人须知'等关键章节 +- 特别注意包含'分值'、'评分项'、'权重'等关键词的段落 + +2. **核心要素提取** +- 系统提取以下要素形成结构化表格： + │ 类别 │ 评分项名称 │ 分值权重 │ 具体要求 │ 否决条款 │ +- 分类标准： + ● 技术部分（方案设计、实施能力、技术创新等） + ● 商务部分（资质证明、业绩案例、团队经验等） + ● 价格部分（报价合理性、计价方式等） + ● 其他专项（售后服务、本地化服务等） + +3. **深度分析** +- 计算权重配比（示例：技术60% = 方案设计30% + 实施能力20% + 创新10%） +- 识别否决性条款（如"▲"标记项或特定强制要求） +- 标注特殊评分规则：阶梯得分、区间赋分、横向比较等机制 + +4. **风险提示** +- 标出易被忽视的得分点（如ISO认证、专利数量等） +- 识别矛盾条款（如总分值≠100%的情况） +- 提示资质门槛要求（注册资金、特定资质证书等） + +5. **输出格式** +采用Markdown输出以下结构： +```markdown +# 招标评分要点汇总 + +## 核心指标配比 +- 总评分构成：技术分（__%）+ 商务分（__%）+ 价格分（__%） + +## 详细评分矩阵 +| 类别 | 评分项 | 分值 | 具体要求 | 关键指标 | +|------|-------|-----|---------|---------| +| ... | ... | ... | ... | ... | + +## 重点提示 +⚠️ 否决条款：列出所有一票否决项 +💡 得分要点：突出3-5个高权重核心指标 +⏱️ 时间节点：标注与评分相关的时限要求 +``` +请先确认理解任务要求，待用户提供招标文件内容后执行分析。" + diff --git a/prompts/section_detail.py b/prompts/section_detail.py new file mode 100644 index 0000000..d54056f --- /dev/null +++ b/prompts/section_detail.py @@ -0,0 +1,47 @@ +GEN_LEAF_DETAIL_PROMT = """ +【最重要的要求——字数】 +{word_count_spec} + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +- 角色：资深投标文件撰写专家 +- 任务：根据招标文件概要、标书目录、子小节标题，撰写该子小节的正文 + +【行文规范】 +- 投标方自称统一用"我方"，禁用"我们""本公司" +- 招标人统一称"招标方"或"建设单位" +- 禁止前导句："本章节对应……""本小节主要说明……""以下将从……方面说明"等——开头直接写实质内容 +- 禁止AI套话：综上所述、首先其次再次、我们深信、高度重视、全力以赴、不断优化、稳步推进、通过以上措施 +- 用具体数据/标准编号/人员配置替代空洞承诺 +- 列举用(1)(2)(3)编号，禁止"首先其次"连接；禁止"等"作结尾 +- 纯文本输出，禁用markdown符号，段落间空行分隔 +- 直接输出正文，不含标题和解释 + +【输入信息】 +- 招标文件概要： +{summary} + +- 技术标书目录： +{outline} + +- 待撰写的子小节标题： +{title} + +再次强调：篇幅是最核心的质量指标。内容必须充分展开，每个技术要点都要详细阐述实施方法、技术参数、人员安排或设备配置。绝不可以概括性一笔带过。 +""" + + +GEN_SECTION_INTRODUCTION_PROMT = """ +- 角色：资深投标文件撰写专家 +- 任务：为章节撰写简短开篇引言（100～200字），点明核心主题与招标要求的对应关系 +- 使用"我方"自称，禁止套话和前导解释句，纯文本输出 +- 若无需过渡可输出空白 + +- 招标文件概要： +{summary} + +- 技术标书目录： +{outline} + +- 章节标题： +{title} +""" diff --git a/prompts/section_details.txt b/prompts/section_details.txt new file mode 100644 index 0000000..396c723 --- /dev/null +++ b/prompts/section_details.txt @@ -0,0 +1,28 @@ +【最重要的要求——字数】 +{word_count_spec} + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +- 角色：资深投标文件撰写专家 +- 任务：根据招标文件概要、标书目录、子小节标题，撰写该子小节的正文 + +【行文规范】 +- 投标方自称用"我方"，"我们"，"本公司"随机使用 +- 招标人统一称"招标方"或"建设单位" +- 禁止前导句："本章节对应……""本小节主要说明……""以下将从……方面说明"等——开头直接写实质内容 +- 禁止AI套话：综上所述、首先其次再次、我们深信、高度重视、全力以赴、不断优化、稳步推进、通过以上措施 +- 用具体数据/标准编号/人员配置替代空洞承诺 +- 列举用(1)(2)(3)编号，禁止"首先其次"连接；禁止"等"作结尾 +- 纯文本输出，禁用markdown符号，段落间空行分隔 +- 直接输出正文，不含标题和解释 + +【输入信息】 +- 招标文件概要： +{summary} + +- 技术标书目录： +{outline} + +- 待撰写的子小节标题： +{subsection_title} + +再次强调：篇幅是最核心的质量指标。内容必须充分展开，每个技术要点都要详细阐述实施方法、技术参数、人员安排或设备配置。绝不可以概括性一笔带过。 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0157ba7 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,12 @@ +Flask==3.0.3 +flask-cors==4.0.1 +PyPDF2==3.0.1 +python-docx==1.1.2 +openai==1.52.0 +Werkzeug==3.0.4 +requests==2.32.3 +chardet==5.2.0 +pypdf==4.3.1 +pdfminer.six==20231228 +beautifulsoup4==4.12.3 +lxml==5.3.0 diff --git a/start.bat b/start.bat new file mode 100644 index 0000000..b359d6f --- /dev/null +++ b/start.bat @@ -0,0 +1,39 @@ +@echo off +title BidPartner - AI Bid Assistant + +echo. +echo ============================================ +echo BidPartner - AI Bid Writing Tool +echo ============================================ +echo. + +cd /d "%~dp0" + +python --version >nul 2>&1 +if %errorlevel% neq 0 ( + echo [ERROR] Python not found. Please install Python 3.9+ + pause + exit /b 1 +) + +if not exist "%~dp0.deps_installed" ( + echo Installing dependencies... + pip install -r requirements.txt + if %errorlevel% neq 0 ( + echo [ERROR] Failed to install dependencies. + pause + exit /b 1 + ) + echo.> "%~dp0.deps_installed" + echo Dependencies installed successfully. +) + +echo Starting server... +echo Open browser: http://localhost:5000 +echo Press Ctrl+C to stop +echo. + +start "" "http://localhost:5000" +python app.py + +pause diff --git a/static/style.css b/static/style.css new file mode 100644 index 0000000..caf2881 --- /dev/null +++ b/static/style.css @@ -0,0 +1,89 @@ +/* 标伙伴 · 自定义样式 */ + +/* 滚动条美化 */ +::-webkit-scrollbar { + width: 6px; + height: 6px; +} +::-webkit-scrollbar-track { + background: #f1f5f9; + border-radius: 3px; +} +::-webkit-scrollbar-thumb { + background: #cbd5e1; + border-radius: 3px; +} +::-webkit-scrollbar-thumb:hover { + background: #94a3b8; +} + +/* 章节树左侧栏 */ +.sidebar-fixed::-webkit-scrollbar { + width: 4px; +} + +/* 正文内容排版 */ +.prose-content { + font-family: 'SimSun', '宋体', 'Times New Roman', serif; + line-height: 1.9; + color: #374151; +} + +/* 动画 */ +@keyframes fadeIn { + from { opacity: 0; transform: translateY(8px); } + to { opacity: 1; transform: translateY(0); } +} +.fade-in { + animation: fadeIn 0.25s ease-out; +} + +/* 表格样式（评分要求展示） */ +.markdown-table table { + width: 100%; + border-collapse: collapse; + font-size: 13px; +} +.markdown-table th { + background: #f8fafc; + font-weight: 600; + color: #475569; + padding: 8px 12px; + border: 1px solid #e2e8f0; + text-align: left; +} +.markdown-table td { + padding: 7px 12px; + border: 1px solid #e2e8f0; + color: #334155; +} +.markdown-table tr:nth-child(even) td { + background: #f8fafc; +} + +/* 步骤指示器 */ +.step-active { + background: #2563eb; + color: #fff; + box-shadow: 0 2px 8px rgba(37,99,235,.35); +} + +/* 文件上传拖拽高亮 */ +.drop-active { + border-color: #3b82f6 !important; + background: #eff6ff !important; +} + +/* 章节缩进指示线 */ +.section-indent-line { + border-left: 2px solid #e2e8f0; + margin-left: 8px; + padding-left: 8px; +} + +/* 打印样式 */ +@media print { + header, nav, aside, button { display: none !important; } + main { padding: 0 !important; } + .bg-white { box-shadow: none !important; border: none !important; } +} diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..a38d13a --- /dev/null +++ b/templates/index.html @@ -0,0 +1,1378 @@ + + + + + +标伙伴 · AI 标书助手 + + + + + + + + +

+ +

+ 标伙伴 + AI 标书助手 +

+ + + + +

+ + +

篇幅目标（按页数粗略换算）请进入某标书项目，在 步骤1「解析」 中设置：100/150/200/250/300 页、自定义、保存页数设置、使用原档位、当前页等，保存后用于后续章节生成。

+ + + + + + + + + +

+ + +

新建标书项目

+ 项目名称 + +

+ + +

+ + + + + + + + + + + + + diff --git a/templates/project.html b/templates/project.html new file mode 100644 index 0000000..dc7bc33 --- /dev/null +++ b/templates/project.html @@ -0,0 +1,2512 @@ + + + + + +{{ project.name }} · 标伙伴 + + + + + + + + +

+ + + +

+ +

+ + +

+ + + + + +

+ + +

+ 1 + 上传招标文件 +

+ + +

拖拽文件到此处，或

+ +

支持 PDF、DOC、DOCX，最大 50MB

+ + +

+ +

已上传

+ +

+ + +

+ 上传中... +

+ + +

篇幅目标（按页数粗略换算）

100/150/200/250/300 页或自定义，保存后用于后续章节生成。在此「保存页数设置」会同步全局配置并写入本标书项目。

+ +

+ + +

+ 自定义页数 + + + 当前： +

+ + + 不限制小章节条数（填小目时慎用，可产生极多行）。不勾选时未设页数则按约 100 页限幅。 + +

+ + +

+ 设置 + 生成偏好（图表 & 暗标） +

+ 解析后自动应用至大纲和生成 +

+ + + + + + + + +

+ + 暗标模式 + +

+ +

+ + +

+ +

暗标要求将附加到每个章节的 AI 生成规范中。解析阶段可预设，生成时自动注入。

+ 常用预设： + + + +

+ +

+ + +

+ + 图表生成模式 +

+ +

启用"图"生成

解析后AI生成章节时自动插入图示，导出Word时渲染。

+ + +

+ +

启用"表"生成

解析后AI生成章节时自动插入表格，导出Word时渲染。

+ +

设置后保存，解析结果和大纲生成将参考这些偏好。

+ + +

+ +

工程量清单导入（可选）解析后可联动招标内容，让生成内容包含准确工程量

+ + +

拖拽清单文件到此处，或点击选择

支持 Excel（xlsx/xls）、CSV、PDF、Word，最大 50MB

+ + +

+ +

+ + + + +

+ + +

+ 上传中... +

+ + +

+ + 工程量清单摘要 +

+ + +

+ +

修改后点击"保存"，将在生成章节内容时作为工程量参考

+ + +

+ +

+ + +

标书类型

解析完成后自动识别为工程类 / 服务类 / 货物类；步骤 3 生成章节将套用对应写作模板（施工组织 / 服务方案 / 供货方案）。识别有误可在此修正。

+ + +

+ + 招标文件摘要 +

+ + +

+ +

修改后点击"保存"，将作为生成大纲的依据

+ + +

+ + 技术评分要求 +

+ + +

+ +

修改后点击"保存"，将作为生成大纲的依据（只保留技术评分，删除商务/价格评分内容）

+ +

+ + +

+ 2 + 生成标书大纲 +

+ +

请先完成招标文件解析

+ +

AI 将根据招标文件摘要和技术评分标准，生成结构化的四级标书目录。

+ +

控长与章节数（建议按顺序执行）

定总节数 N：在大纲中合并/删除不需要的编号行，点「保存并更新章节」；步骤 3「生成」中「共 N 个章节」、左侧章节目录与这里的 N 一致，由本步大纲内容决定。
用页数约束小目 AI 树：在「步骤1 解析」中「上传」与「清单」之间的「篇幅目标」设页；使用「AI 自动填充小章节」时一般勿勾选「不限制小章节」；系统将按目标页对小章节条数作映射与限幅。
全稿再回调：到「生成」用「一键并发生成」后看总字数/估算；若仍过长，回本页大纲继续并节/删行、保存，再回「生成」重跑。

+ +

+ 目标页数、不限制小章节在 + + （上传与清单之间）的「篇幅目标」中设置。 +

+ +

+ + +

+ 当前用于生成的大纲：约字，个章节 +

+ + +

+ AI 正在生成标书大纲，通常需要 30-60 秒... +

+ + +

+ 大纲 + 预览 + 编辑 + （个章节） +

+ + + + +

+ + +

「AI 自动填充小章节」与目标页

+ +

+ + +

+ +

+ + +

+ + 保存后将按新大纲重新划分章节，已生成的章节正文内容将被清除，需重新生成。编辑时请保持层级编号格式（一、1.1、1.1.1…）不变。 +

+ +

格式示例：第一行为标书名称，章节用"一、""1.1""1.1.1""1.1.1.1"等格式

+ + +

+ 3 + 章节内容生成 +

+ +

篇幅与章节数

+ 目标页数只在 + + 「上传」与「清单」之间的「篇幅目标」中配置。本页仅并发生成与查看进度。列表「共 N 节」与大纲行数相关，需并节/删行时回步骤2 大纲。 +

+ + +

+ 生成进度 + + + + + + 路并发 + +

+ +

+ + + 已完成 + + + + 生成中 + + + + 失败 + +

+ 共个章节 +

+ +

请先生成标书大纲

+ +

+ + +

+ +

+ + +

从左侧章节列表或内容列表中选择一个章节

+ +

+ + +

+ +

+ + + +

+ +

+ + +

+ AI 正在生成内容，请稍候... +

+ + +

+ +

章节引言

+ +

暂无内容，点击"AI 生成"或切换"对话生成"模式

+ + +

+ +

+ + +

+ AI +

+ + + +

+ + +

点击 AI 回复下方的「采用此内容 → 填入编辑框」将内容写入编辑器，再点「保存」完成。

+ + +

+ 4 + 合规性检查 +

AI 将对照招标要求检查标书内容的覆盖情况，给出改进建议。

+ + + + +

+ + +

+ 清 + 技术暗标格式清标 +

对导出的技术暗标 HTML 做版式规则检查（身份信息、标题/正文、目录、图表位置、颜色、页边距等），不调用大模型。请粘贴完整 HTML 或选择本地 .html 文件。

说明：检查依赖内联 style；从 Word 另存为「网页」或含完整样式的 HTML 效果最佳；纯标签无样式时部分项易判为不通过。

+ + + 启用清暗标格式检查 + +

+ +

+ + + 选择 .html 文件 + + +

+ +

+ + +

+ +

企业知识库

上传历史标书，AI 生成时自动检索企业优势内容

+ +

+ + + + 语义向量检索 · 个文本块 + + + + + 关键词检索模式 · 个文本块 + +

+ + +

💡 当前使用关键词检索

DeepSeek / Ollama 暂不提供 Embedding API，知识库将以关键词匹配方式检索相关内容。 + 切换为 Qwen 或 OpenAI 模型（在首页 AI 配置中设置）可启用更精准的语义向量检索。

+ + +

+ + 添加知识文档 +

+ + + + +

+ +

拖拽文件到此处，或点击选择

支持 PDF、DOC、DOCX 格式

正在上传文件...

AI 正在向量化文档，请稍候...

+ + +

+ 推荐上传：历史技术方案、同类项目标书、企业资质简介、施工工法说明等。
+ 上传后 AI 在生成章节内容时将自动检索相关片段作为写作参考。 +

+ + +

+ + 已上传文件 + +

+ + + + + + + + +

+ +

知识库暂无文件

上传历史标书后，AI 生成内容时将自动引用

+ + +

+ + 使用说明 +

+ 1 + 上传企业历史技术标书、施工方案、资质简介等文档（支持 PDF/DOC/DOCX） +
+ 2 + 系统自动将文档切分并向量化入库（首次入库需等待 AI 处理完成） +
+ 3 + 生成章节内容（步骤 3）时，系统将自动检索知识库中最相关的段落供 AI 参考写作 +
+ 4 + 知识库为全局共享，对所有项目均有效；可随时添加或删除文档 +

+ +

+ + +

粘贴或编辑一级大纲

+ +

+ 请输入一级大纲（每行一个一级章节，支持“附件四、xxx / 附图、xxx / 附表、xxx”）。 + 系统将自动标准化编号并执行 AI 自动细化小章节（包含附件部分）。 +

+ +

+ + +

+ + + + + + + + + + diff --git a/tests/fixtures/dark_bid_report_sample.json b/tests/fixtures/dark_bid_report_sample.json new file mode 100644 index 0000000..11aecd2 --- /dev/null +++ b/tests/fixtures/dark_bid_report_sample.json @@ -0,0 +1,13 @@ +{ + "overall": false, + "details": [ + {"rule": "身份信息隐藏", "passed": true, "message": "未发现投标人身份信息"}, + {"rule": "标题格式", "passed": false, "message": "部分标题字号/字体/颜色/下划线不符合要求"}, + {"rule": "正文格式", "passed": false, "message": "部分正文段落格式不符合要求"}, + {"rule": "目录要求", "passed": true, "message": "目录符合无页码、无页眉页脚要求"}, + {"rule": "图表规范", "passed": false, "message": "正文中发现2个图表或附件内图表文字格式错误"}, + {"rule": "颜色与装饰", "passed": true, "message": "无彩色文字、无下划线、无着重号"}, + {"rule": "页面设置", "passed": false, "message": "页面边距或纸张方向不符合要求"} + ], + "violations": [] +} diff --git a/tests/test_attachment_section.py b/tests/test_attachment_section.py new file mode 100644 index 0000000..13f438b --- /dev/null +++ b/tests/test_attachment_section.py @@ -0,0 +1,95 @@ +"""附件类章节识别与单图/单表类型选择。""" +import unittest + +from utils import attachment_section as att + + +class TestIsAttachment(unittest.TestCase): + def test_positive(self): + r = att.load_attachment_rules() + self.assertTrue(att.is_attachment_only_section('附件一：施工平面布置', r)) + self.assertTrue(att.is_attachment_only_section('附图组织机构', r)) + self.assertTrue(att.is_attachment_only_section('附表人员一览', r)) + + def test_negative(self): + r = att.load_attachment_rules() + self.assertFalse(att.is_attachment_only_section('施工组织设计', r)) + self.assertFalse(att.is_attachment_only_section('', r)) + + +class TestPickKind(unittest.TestCase): + def test_only_figure_switch(self): + r = att.DEFAULT_ATTACHMENT_RULES + self.assertEqual( + att.pick_single_figure_or_table('附件一：xxx', True, False, r), + 'figure', + ) + + def test_only_table_switch(self): + r = att.DEFAULT_ATTACHMENT_RULES + self.assertEqual( + att.pick_single_figure_or_table('附件一：xxx', False, True, r), + 'table', + ) + + def test_both_off(self): + self.assertIsNone( + att.pick_single_figure_or_table('附件一', False, False, None), + ) + + def test_table_hint(self): + r = att.DEFAULT_ATTACHMENT_RULES + k = att.pick_single_figure_or_table('附件三工程量一览表', True, True, r) + self.assertEqual(k, 'table') + + def test_figure_hint(self): + r = att.DEFAULT_ATTACHMENT_RULES + k = att.pick_single_figure_or_table('附图施工平面示意图', True, True, r) + self.assertEqual(k, 'figure') + + def test_default_ambiguous(self): + r = dict(att.DEFAULT_ATTACHMENT_RULES) + r['default_kind_when_ambiguous'] = 'table' + k = att.pick_single_figure_or_table('附件五其他资料', True, True, r) + self.assertEqual(k, 'table') + + +class TestAttachmentBodyMode(unittest.TestCase): + def test_default_stack_charts_only(self): + r = att.DEFAULT_ATTACHMENT_RULES + self.assertEqual(att.attachment_leaf_body_mode(r), 'stack_charts_only') + self.assertTrue(att.use_attachment_stack_charts_body(r)) + self.assertFalse(att.use_attachment_single_chart_only_body(r)) + self.assertFalse(att.use_attachment_full_body(r)) + + def test_full_mode(self): + r = dict(att.DEFAULT_ATTACHMENT_RULES) + r['attachment_leaf_body_mode'] = 'full' + self.assertEqual(att.attachment_leaf_body_mode(r), 'full') + self.assertTrue(att.use_attachment_full_body(r)) + self.assertFalse(att.use_attachment_stack_charts_body(r)) + + def test_single_chart_only(self): + r = dict(att.DEFAULT_ATTACHMENT_RULES) + r['attachment_leaf_body_mode'] = 'single_chart_only' + self.assertTrue(att.use_attachment_single_chart_only_body(r)) + self.assertTrue(att.use_attachment_stack_charts_body(r)) + + +class TestExpandOutlineSkip(unittest.TestCase): + def test_should_skip_attachment(self): + self.assertTrue(att.should_skip_expand_subchapters('附件一：平面图')) + self.assertTrue(att.should_skip_expand_subchapters('附图示意')) + + def test_should_skip_normal_chapter(self): + self.assertFalse(att.should_skip_expand_subchapters('施工组织设计')) + self.assertFalse(att.should_skip_expand_subchapters('质量管理体系与措施')) + + def test_parse_attachment_label(self): + self.assertEqual(att.parse_attachment_label('附件一：平面图'), '一') + self.assertEqual(att.parse_attachment_label('附件2 承诺书'), '2') + self.assertEqual(att.parse_attachment_label('附图总平面'), '附图') + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_bid_appendix_content.py b/tests/test_bid_appendix_content.py new file mode 100644 index 0000000..9d2cd6d --- /dev/null +++ b/tests/test_bid_appendix_content.py @@ -0,0 +1,42 @@ +"""标准附件：工期解析、清单节点、总平面与临时用地勾连。""" +import unittest + +from utils import bid_appendix_content as bac + + +class TestBidAppendixSchedule(unittest.TestCase): + def test_parse_duration(self): + t = '本工程工期为 180 日历天，自合同签订起算。' + self.assertEqual(bac._parse_duration_calendar_days(t), 180) + + def test_parse_schedule_dates(self): + s = '计划开工日期：2026年5月1日。计划完工日期：2026年10月28日。' + sch = bac._parse_schedule_facts(s, '') + self.assertEqual(sch['start'], '2026年5月1日') + self.assertEqual(sch['end'], '2026年10月28日') + + def test_boq_nodes_from_table(self): + boq = """ +| 序号 | 项目名称 | 单位 | +|------|----------|------| +| 1 | 挖一般土方 | m3 | +| 2 | 现浇混凝土柱 | m3 | +""" + nodes = bac._extract_boq_work_items('', boq) + self.assertIn('挖一般土方', nodes) + self.assertIn('现浇混凝土柱', nodes) + + def test_site_layout_links_temp_land(self): + md = bac._build_site_layout_figure_md('施工总平面图', '含材料堆场', '') + self.assertIn('临时用地表', md) + self.assertIn('材料堆场', md) + self.assertIn('㎡', md) + + def test_schedule_figure_no_fantasy_when_empty(self): + md = bac._build_schedule_figure_md('进度网络图', '', '') + self.assertIn('不填写具体开工日', md) + self.assertIn('不得臆造', md) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_bill_analysis.py b/tests/test_bill_analysis.py new file mode 100644 index 0000000..b2a2167 --- /dev/null +++ b/tests/test_bill_analysis.py @@ -0,0 +1,52 @@ +"""工程量清单本地分析单元测试。""" +import unittest + +from utils.bill_analysis import ( + analyze_boq_pages, + filter_bill_pages, + parse_bill_text, +) + + +class TestParseBillText(unittest.TestCase): + def test_code_name_unit_qty(self): + text = '010101001001 挖土方 m3 100.5 土壤类别:三类土' + r = parse_bill_text(text) + self.assertIn('categories', r) + self.assertTrue(r['categories']) + cat = r['categories'][0] + self.assertEqual(cat['name'], '未分类') + self.assertEqual(len(cat['items']), 1) + it = cat['items'][0] + self.assertEqual(it['code'], '010101001001') + self.assertIn('挖土', it['name']) + self.assertEqual(it['unit'], 'm3') + self.assertEqual(it['quantity'], '100.5') + + def test_hierarchical_line_prefix(self): + text = '1.1 010101001001 基础开挖 m3 50' + r = parse_bill_text(text) + it = r['categories'][0]['items'][0] + self.assertEqual(it['code'], '010101001001') + + +class TestFilterBillPages(unittest.TestCase): + def test_two_pages_gap_fill(self): + p0 = '目录前言' + p1 = '分部分项工程量清单\n项目编码项目名称工程量\n010101001001 项 m3 1' + p2 = '续表无表头\n010101002001 土 m2 2' + p3 = '规费税金社会保险费住房公积金其他说明' + pages, meta = filter_bill_pages([p0, p1, p2, p3]) + self.assertEqual(meta['total_pages'], 4) + self.assertGreaterEqual(len(pages), 2) + merged = '\n'.join(pages) + self.assertIn('010101001001', merged) + self.assertIn('010101002001', merged) + + def test_analyze_scanned_empty(self): + r = analyze_boq_pages(['', ' ', '']) + self.assertTrue(r.get('scanned')) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_dark_bid_format_check.py b/tests/test_dark_bid_format_check.py new file mode 100644 index 0000000..28ac5a3 --- /dev/null +++ b/tests/test_dark_bid_format_check.py @@ -0,0 +1,63 @@ +"""技术暗标 HTML 格式检查：结构校验与极简用例（标准库 unittest）。""" +import json +import os +import sys +import unittest + +# 保证可 `python tests/test_*.py` 从项目根导入 `modules` +_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if _ROOT not in sys.path: + sys.path.insert(0, _ROOT) + +from modules.dark_bid_format_check import check_technical_bid # noqa: E402 + + +def _sample_schema_path(): + return os.path.join(os.path.dirname(__file__), "fixtures", "dark_bid_report_sample.json") + + +class TestDarkBidFormatCheck(unittest.TestCase): + def test_sample_fixture_keys(self): + with open(_sample_schema_path(), encoding="utf-8") as f: + sample = json.load(f) + self.assertIn("overall", sample) + self.assertIn("details", sample) + self.assertIn("violations", sample) + for d in sample["details"]: + self.assertTrue({"rule", "passed", "message"}.issubset(d.keys())) + + def test_check_returns_structure(self): + html = """ +

第一章概述

标题

+ 正文内容示例。

+ """ + r = check_technical_bid(html) + self.assertIsInstance(r["overall"], bool) + self.assertEqual(len(r["details"]), 7) + rules = [x["rule"] for x in r["details"]] + self.assertIn("身份信息隐藏", rules) + self.assertIn("标题格式", rules) + + def test_empty_html(self): + r = check_technical_bid("") + self.assertFalse(r["overall"]) + + def test_identity_fail_on_company(self): + html = ( + "

我公司参与投标

" + "

" + "" + "" + ) + r = check_technical_bid(html) + id_rule = next(x for x in r["details"] if x["rule"] == "身份信息隐藏") + self.assertFalse(id_rule["passed"]) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_diagram_intent.py b/tests/test_diagram_intent.py new file mode 100644 index 0000000..47a4779 --- /dev/null +++ b/tests/test_diagram_intent.py @@ -0,0 +1,121 @@ +"""图表意图栈与特征计分。""" +import unittest + +from utils import diagram_intent as di + + +def _base_rules() -> dict: + return { + 'schema_version': 1, + 'threshold_figure': 1.0, + 'threshold_table': 1.0, + 'title_weight': 1.0, + 'context_weight': 0.6, + 'outline_context_lines': {'before': 2, 'after': 2}, + 'stack_order_when_both': 'score_desc', + 'figure_keywords': [ + {'text': '进度', 'weight': 1.2}, + {'text': '横道', 'weight': 1.5}, + ], + 'table_keywords': [ + {'text': '一览表', 'weight': 1.5}, + {'text': '人员', 'weight': 1.0}, + ], + } + + +class TestScoreFigureTable(unittest.TestCase): + def test_figure_higher_on_progress(self): + r = _base_rules() + f, t = di.score_figure_table('施工进度与横道计划', '', r) + self.assertGreater(f, t) + + def test_table_higher_on_roster(self): + r = _base_rules() + f, t = di.score_figure_table('主要管理人员配置一览表', '', r) + self.assertGreater(t, f) + + +class TestBuildStack(unittest.TestCase): + def test_gate_figure_off(self): + r = _base_rules() + st = di.build_stack(5.0, 5.0, r, enable_figure=False, enable_table=True) + self.assertEqual(len(st), 1) + self.assertEqual(st[0].kind, 'table') + + def test_score_desc_order(self): + r = dict(_base_rules()) + r['stack_order_when_both'] = 'score_desc' + st = di.build_stack(3.0, 1.0, r, True, True) + self.assertEqual(len(st), 2) + self.assertEqual(st[0].kind, 'figure') + self.assertGreater(st[0].score, st[1].score) + + def test_figure_first(self): + r = dict(_base_rules()) + r['stack_order_when_both'] = 'figure_first' + st = di.build_stack(2.0, 5.0, r, True, True) + self.assertEqual(st[0].kind, 'figure') + self.assertEqual(st[1].kind, 'table') + + def test_below_threshold_empty(self): + r = dict(_base_rules()) + r['threshold_figure'] = 10.0 + r['threshold_table'] = 10.0 + st = di.build_stack(1.0, 1.0, r, True, True) + self.assertEqual(st, []) + + +class TestOutlineWindow(unittest.TestCase): + def test_finds_title_line(self): + outline = '一、总则\n二、进度\n 2.1 横道计划\n三、尾' + w = di.extract_outline_window(outline, '2.1 横道计划', 1, 1) + self.assertIn('横道', w) + + def test_fallback_prefix(self): + w = di.extract_outline_window('abc' * 400, '不存在的标题', 2, 2) + self.assertTrue(len(w) > 0) + + +class TestAgentRender(unittest.TestCase): + def test_render_non_empty_when_match(self): + r = dict(_base_rules()) + r['threshold_figure'] = 0.5 + r['threshold_table'] = 0.5 + agent = di.DiagramIntentAgent(r) + s = agent.render_for_section( + '施工进度横道图编制说明', + '大纲\n进度\n横道', + True, + True, + ) + self.assertIn('图示生成规范', s) + self.assertIn('本节图表生成优先级', s) + + def test_render_empty_when_scores_low(self): + r = dict(_base_rules()) + r['threshold_figure'] = 100.0 + r['threshold_table'] = 100.0 + agent = di.DiagramIntentAgent(r) + s = agent.render_for_section('无关标题', '无关', True, True) + self.assertEqual(s, '') + + +class TestStackHelpers(unittest.TestCase): + def test_stack_compact_labels(self): + st = [ + di.DiagramIntent('figure', 1.0, 't'), + di.DiagramIntent('table', 1.0, 't'), + ] + lab = di.stack_compact_labels(st) + self.assertEqual(len(lab), 2) + self.assertIn('[FIGURE]', lab[0]) + + def test_make_fallback_stack(self): + st = di.make_fallback_stack('figure') + self.assertEqual(len(st), 1) + self.assertEqual(st[0].kind, 'figure') + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_outline_numbering.py b/tests/test_outline_numbering.py new file mode 100644 index 0000000..c0f462a --- /dev/null +++ b/tests/test_outline_numbering.py @@ -0,0 +1,44 @@ +"""目录号格式化与大纲带号写回。""" +import os +import sys +import unittest + +_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if _ROOT not in sys.path: + sys.path.insert(0, _ROOT) + +from modules.generator import _parse_outline, _sections_to_outline_text # noqa: E402 +from utils.outline_numbering import format_heading_display, int_to_chinese_numeral # noqa: E402 + + +class TestOutlineNumbering(unittest.TestCase): + def test_int_to_chinese(self): + self.assertEqual(int_to_chinese_numeral(1), "一") + self.assertEqual(int_to_chinese_numeral(10), "十") + self.assertEqual(int_to_chinese_numeral(11), "十一") + self.assertEqual(int_to_chinese_numeral(23), "二十三") + + def test_format_heading(self): + self.assertEqual(format_heading_display(1, "3", "总体"), "三、总体") + self.assertEqual(format_heading_display(2, "1.2", "子节"), "1.2 子节") + + def test_sections_to_outline_text_has_numbers(self): + sections = [ + {"level": 1, "title": "第一章", "number": "1"}, + {"level": 2, "title": "小节", "number": "1.1"}, + ] + text = _sections_to_outline_text("某项目技术标书", sections) + self.assertIn("某项目技术标书", text) + self.assertIn("一、第一章", text) + self.assertIn("1.1 小节", text) + + def test_parse_roundtrip_numbered_outline(self): + raw = "标书标题\n一、第一章\n1.1 节A\n" + _, sections, normalized = _parse_outline(raw) + self.assertGreaterEqual(len(sections), 2) + self.assertIn("一、第一章", normalized) + self.assertIn("1.1 节A", normalized) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_parse_outline.py b/tests/test_parse_outline.py new file mode 100644 index 0000000..de4d4fb --- /dev/null +++ b/tests/test_parse_outline.py @@ -0,0 +1,24 @@ +"""大纲解析：1.1 类编号不得被误拆成一级 1 与 title '.1 标题'。""" +import unittest + +from modules.generator import _parse_outline + + +class TestParseOutline(unittest.TestCase): + def test_11_stays_single_section(self): + text = "某某项目标书标题\n1.1 沟槽开挖与支护\n1.2 排降水\n" + _, sections, _ = _parse_outline(text) + self.assertEqual(len(sections), 2, [s.get('number') for s in sections]) + for s in sections: + if s.get('level') == 1: + self.assertFalse( + (s.get('title') or '').lstrip().startswith('.'), + '不得出现一级章节 title 以 .1 开头（误将 1.1 拆成 1 与 .1 标题）', + ) + titles = ' '.join(s['title'] for s in sections) + self.assertIn('沟槽', titles) + self.assertIn('排降', titles) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_qwen_image_client.py b/tests/test_qwen_image_client.py new file mode 100644 index 0000000..dd2769b --- /dev/null +++ b/tests/test_qwen_image_client.py @@ -0,0 +1,29 @@ +"""qwen-image 客户端：响应解析与提示词拼装（无网络）。""" +import unittest + +from utils import qwen_image_client as qic + + +class TestQwenImageClient(unittest.TestCase): + def test_extract_image_url(self): + payload = { + 'output': { + 'choices': [ + {'message': {'content': [{'image': 'https://example.com/a.png'}]}} + ] + } + } + self.assertEqual(qic._extract_image_url(payload), 'https://example.com/a.png') + + def test_extract_image_url_empty(self): + self.assertIsNone(qic._extract_image_url({})) + + def test_build_attachment_prompt_truncates(self): + long_body = 'x' * 2000 + p = qic.build_attachment_figure_prompt('标题', long_body) + self.assertLessEqual(len(p), 900) + self.assertIn('标题', p) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_volume_chapters.py b/tests/test_volume_chapters.py new file mode 100644 index 0000000..f359a5f --- /dev/null +++ b/tests/test_volume_chapters.py @@ -0,0 +1,86 @@ +"""目标页数与一级篇章区间。""" +import random +import unittest + +from utils import volume_chapters as vc + + +class TestVolumeChapters(unittest.TestCase): + def test_top_level_default_pages_zero(self): + lo, hi = vc.top_level_chapter_range_from_pages(0) + self.assertEqual((lo, hi), (8, 10)) + + def test_ranges_match_effective_volume_bands(self): + self.assertEqual(vc.top_level_chapter_range_from_pages(100), (6, 8)) + self.assertEqual(vc.top_level_chapter_range_from_pages(125), (6, 8)) + self.assertEqual(vc.top_level_chapter_range_from_pages(150), (8, 10)) + self.assertEqual(vc.top_level_chapter_range_from_pages(200), (10, 12)) + self.assertEqual(vc.top_level_chapter_range_from_pages(300), (12, 16)) + + def test_hint_default_no_pages(self): + h = vc.outline_chapter_count_hint(0, 'standard') + self.assertIn('8-10', h) + self.assertIn('不超过10', h) + + def test_hint_with_pages(self): + h = vc.outline_chapter_count_hint(150, 'standard', 700) + self.assertIn('约 8–10', h) + self.assertIn('150', h) + self.assertIn('105000', h) # 150×700 总字目标 + self.assertIn('过细', h) + + def test_subchapter_base_anchor_points(self): + self.assertAlmostEqual(vc.subchapter_total_base_from_pages(100), 78.0, places=5) + self.assertAlmostEqual(vc.subchapter_total_base_from_pages(300), 212.0, places=5) + self.assertEqual(vc.SUBCHAPTER_PAGES_SLOPE, 0.67) + self.assertEqual(vc.SUBCHAPTER_PAGES_INTERCEPT, 11.0) + + def test_subchapter_jitter_bounds_78_anchor(self): + """100 页基线 78 章，±10% 严格为 [70, 86]。""" + self.assertEqual(vc.subchapter_jitter_bounds(78.0), (70, 86)) + + def test_subchapter_jitter_bounds_300_pages(self): + self.assertEqual(vc.subchapter_jitter_bounds(212.0), (191, 233)) + + def test_allocate_subchapters_to_mains(self): + self.assertEqual(vc.allocate_subchapters_to_mains(10, 3), [4, 3, 3]) + self.assertEqual(vc.allocate_subchapters_to_mains(0, 3), [0, 0, 0]) + self.assertEqual(vc.allocate_subchapters_to_mains(5, 2), [3, 2]) + self.assertEqual(vc.allocate_subchapters_to_mains(7, 0), []) + # n < k 时多出的主章 quota 为 0 + a = vc.allocate_subchapters_to_mains(70, 100) + self.assertEqual(len(a), 100) + self.assertEqual(sum(a), 70) + self.assertEqual(a.count(1), 70) + self.assertEqual(a.count(0), 30) + + def test_subchapter_effective_respects_k_floor_and_jitter(self): + # round(78 * u) for u in [0.9, 1.1] stays in [70, 86] for 78.0 base + for seed in range(800): + n = vc.subchapter_total_effective(100, 1, random.Random(seed)) + self.assertGreaterEqual(n, 70) + self.assertLessEqual(n, 86) + # 主章数很大时，总条数仍须在 [70, 86]（不得被 max(n,k) 抬到数百） + for seed in range(20): + nk = vc.subchapter_total_effective(100, 500, random.Random(seed)) + self.assertGreaterEqual(nk, 70, msg=f'seed={seed}') + self.assertLessEqual(nk, 86, msg=f'seed={seed}') + + def test_subchapter_effective_zero_pages(self): + self.assertEqual(vc.subchapter_total_effective(0, 5), 0) + self.assertEqual(vc.subchapter_total_effective(100, 0), 0) + + def test_resolve_expand_target_pages(self): + self.assertEqual(vc.resolve_expand_target_pages(None, True, 100, 200), 0) + self.assertEqual(vc.resolve_expand_target_pages(200, False, 100, 50), 200) + self.assertEqual(vc.resolve_expand_target_pages(0, False, 80, 0), 80) + self.assertEqual(vc.resolve_expand_target_pages(0, False, 0, 50), 50) + self.assertEqual( + vc.resolve_expand_target_pages(0, False, 0, 0), + vc.EXPAND_OUTLINE_DEFAULT_TARGET_PAGES, + ) + self.assertEqual(vc.EXPAND_OUTLINE_DEFAULT_TARGET_PAGES, 100) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_word_allocation.py b/tests/test_word_allocation.py new file mode 100644 index 0000000..f144964 --- /dev/null +++ b/tests/test_word_allocation.py @@ -0,0 +1,170 @@ +"""字数分配与 rating_json 解析单元测试。""" +import json +import unittest + +import config as cfg + +from utils import word_allocation as wa + + +class TestParseRatingJson(unittest.TestCase): + def test_canonical_items(self): + raw = json.dumps( + { + 'items': [ + {'id': 'T1', 'name': '施工方案', 'weight': 30, 'keywords': ['工艺']}, + {'id': 'T2', 'name': '质量保证', 'weight': 10, 'keywords': []}, + ], + 'notes': '', + }, + ensure_ascii=False, + ) + items = wa.parse_rating_json(raw) + self.assertEqual(len(items), 2) + names = {x['name'] for x in items} + self.assertIn('施工方案', names) + self.assertIn('质量保证', names) + wmap = {x['name']: x['weight'] for x in items} + self.assertEqual(wmap['施工方案'], 30.0) + + def test_malformed_returns_empty(self): + self.assertEqual(wa.parse_rating_json('not json'), []) + self.assertEqual(wa.parse_rating_json(''), []) + + +class TestComputeLeafAllocations(unittest.TestCase): + def test_none_when_no_rating_and_not_target_pages_budget(self): + leaves = [{'id': 1, 'section_title': '一、总体方案'}] + rules = dict(wa.DEFAULT_RULES) + rules['budget_mode'] = 'anchor_mean' + self.assertIsNone( + wa.compute_leaf_allocations('standard', leaves, '', rules) + ) + + def test_uniform_when_no_rating_but_target_pages(self): + """无技术评分时仍按目标页均分 B=页×每页字，全稿不随节数 N 线性爆量。""" + leaves = [ + {'id': 1, 'section_title': 'A'}, + {'id': 2, 'section_title': 'B'}, + ] + rules = dict(wa.DEFAULT_RULES) + rules['budget_mode'] = 'target_pages' + old_tp = cfg.TARGET_PAGES + old_pce = cfg.PAGE_CHAR_ESTIMATE + try: + cfg.TARGET_PAGES = 100 + cfg.PAGE_CHAR_ESTIMATE = 700 + out = wa.compute_leaf_allocations('standard', leaves, '', rules) + finally: + cfg.TARGET_PAGES = old_tp + cfg.PAGE_CHAR_ESTIMATE = old_pce + self.assertIsNotNone(out) + s = out[1]['target_chars'] + out[2]['target_chars'] + self.assertEqual(s, 100 * 700) + self.assertEqual(out[1]['target_chars'], out[2]['target_chars']) + + def test_monotonicity_high_weight_match(self): + rating = json.dumps( + { + 'items': [ + {'name': '施工组织设计', 'weight': 50, 'keywords': ['进度']}, + {'name': '页眉页脚规范', 'weight': 2, 'keywords': []}, + ] + }, + ensure_ascii=False, + ) + leaves = [ + {'id': 10, 'section_title': '3.1 施工组织设计与进度计划'}, + {'id': 11, 'section_title': '9.9 页眉格式说明'}, + ] + rules = dict(wa.DEFAULT_RULES) + rules['alpha'] = 0.95 + out = wa.compute_leaf_allocations('standard', leaves, rating, rules) + self.assertIsNotNone(out) + t_high = out[10]['target_chars'] + t_low = out[11]['target_chars'] + self.assertGreaterEqual(t_high, t_low, '强匹配高分项的章节应不低于弱匹配章节') + self.assertIn('施工组织设计', out[10]['word_count_spec']) + + def test_budget_anchor_mean(self): + rating = json.dumps( + {'items': [{'name': '技术部分', 'weight': 100}]}, + ensure_ascii=False, + ) + leaves = [ + {'id': 1, 'section_title': 'A'}, + {'id': 2, 'section_title': 'B'}, + {'id': 3, 'section_title': 'C'}, + ] + rules = dict(wa.DEFAULT_RULES) + rules['budget_mode'] = 'anchor_mean' + rules['alpha'] = 0.0 + old_tp = getattr(cfg, 'TARGET_PAGES', 0) + setattr(cfg, 'TARGET_PAGES', 0) + try: + out = wa.compute_leaf_allocations('standard', leaves, rating, rules) + finally: + setattr(cfg, 'TARGET_PAGES', old_tp) + self.assertIsNotNone(out) + base, core, _, _ = wa.VOLUME_PRESETS['standard'] + expect = int(round(len(leaves) * (base + core) / 2.0)) + s = sum(out[i]['target_chars'] for i in (1, 2, 3)) + self.assertEqual(s, expect) + + def test_budget_target_pages(self): + rating = json.dumps( + {'items': [{'name': '技术部分', 'weight': 100}]}, + ensure_ascii=False, + ) + leaves = [ + {'id': 1, 'section_title': 'A'}, + {'id': 2, 'section_title': 'B'}, + ] + rules = dict(wa.DEFAULT_RULES) + rules['budget_mode'] = 'target_pages' + rules['alpha'] = 0.0 + old_tp = cfg.TARGET_PAGES + old_pce = cfg.PAGE_CHAR_ESTIMATE + try: + cfg.TARGET_PAGES = 100 + cfg.PAGE_CHAR_ESTIMATE = 700 + out = wa.compute_leaf_allocations('standard', leaves, rating, rules) + finally: + cfg.TARGET_PAGES = old_tp + cfg.PAGE_CHAR_ESTIMATE = old_pce + self.assertIsNotNone(out) + expect = 100 * 700 + s = sum(out[i]['target_chars'] for i in (1, 2)) + self.assertEqual(s, expect) + + def test_budget_target_pages_falls_back_when_pages_zero(self): + rating = json.dumps( + {'items': [{'name': '技术部分', 'weight': 100}]}, + ensure_ascii=False, + ) + leaves = [ + {'id': 1, 'section_title': 'A'}, + {'id': 2, 'section_title': 'B'}, + ] + rules = dict(wa.DEFAULT_RULES) + rules['budget_mode'] = 'target_pages' + rules['alpha'] = 0.0 + old_tp = cfg.TARGET_PAGES + try: + cfg.TARGET_PAGES = 0 + out = wa.compute_leaf_allocations('standard', leaves, rating, rules) + finally: + cfg.TARGET_PAGES = old_tp + self.assertIsNotNone(out) + base, core, _, _ = wa.VOLUME_PRESETS['standard'] + expect = int(round(len(leaves) * (base + core) / 2.0)) + s = sum(out[i]['target_chars'] for i in (1, 2)) + self.assertEqual(s, expect) + + def test_continuation_threshold(self): + self.assertEqual(wa.continuation_threshold(2000), 1300) + self.assertEqual(wa.continuation_threshold(100), 200) + + +if __name__ == '__main__': + unittest.main() diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1 @@ + diff --git a/utils/ai_client.py b/utils/ai_client.py new file mode 100644 index 0000000..2e43604 --- /dev/null +++ b/utils/ai_client.py @@ -0,0 +1,259 @@ +""" +AI API 调用封装，支持 OpenAI、阿里云通义千问、DeepSeek、Ollama（均兼容 OpenAI SDK） +""" +import re +import time +import logging +from openai import OpenAI +import config +from contextlib import contextmanager # for type hints if needed + +logger = logging.getLogger(__name__) + +PROVIDER_NAMES = { + 'qwen': '通义千问 (Qwen)', + 'deepseek': 'DeepSeek', + 'openai': 'OpenAI', + 'ollama': 'Ollama 本地', + 'doubao': '豆包 (Doubao)', + 'kimi': 'Kimi (Moonshot)', +} + +PROVIDER_LINKS = { + 'qwen': 'https://dashscope.aliyun.com/', + 'deepseek': 'https://platform.deepseek.com/', + 'openai': 'https://platform.openai.com/', + 'ollama': 'https://ollama.com/', + 'doubao': 'https://console.volcengine.com/ark/', + 'kimi': 'https://platform.moonshot.cn/', +} + + +def _check_api_key(): + """调用前预检 API Key，无效时直接抛出友好提示，不做无意义的重试""" + provider = config.MODEL_PROVIDER + + # Ollama 本地无需 API Key，跳过检查 + if provider == 'ollama': + return + + name = PROVIDER_NAMES.get(provider, provider) + link = PROVIDER_LINKS.get(provider, '') + + if provider == 'qwen': + key = config.QWEN_API_KEY + elif provider == 'deepseek': + key = config.DEEPSEEK_API_KEY + elif provider == 'doubao': + key = config.DOUBAO_API_KEY + elif provider == 'kimi': + key = config.KIMI_API_KEY + else: + key = config.OPENAI_API_KEY + + if not key or key.startswith('sk-your'): + raise RuntimeError( + f'尚未配置 {name} 的 API Key。' + f'请点击右上角设置按钮，选择"{name}"并填入有效的 API Key。' + f'申请地址：{link}' + ) + + +def _get_client() -> OpenAI: + """根据 MODEL_PROVIDER 返回对应的 OpenAI 兼容客户端""" + if config.MODEL_PROVIDER == 'qwen': + return OpenAI(api_key=config.QWEN_API_KEY, base_url=config.QWEN_BASE_URL) + if config.MODEL_PROVIDER == 'deepseek': + return OpenAI(api_key=config.DEEPSEEK_API_KEY, base_url=config.DEEPSEEK_BASE_URL) + if config.MODEL_PROVIDER == 'ollama': + return OpenAI(api_key='ollama', base_url=config.OLLAMA_BASE_URL) + if config.MODEL_PROVIDER == 'doubao': + return OpenAI(api_key=config.DOUBAO_API_KEY, base_url=config.DOUBAO_BASE_URL) + if config.MODEL_PROVIDER == 'kimi': + return OpenAI(api_key=config.KIMI_API_KEY, base_url=config.KIMI_BASE_URL) + return OpenAI(api_key=config.OPENAI_API_KEY, base_url=config.OPENAI_BASE_URL) + + +def _get_model() -> str: + if config.MODEL_PROVIDER == 'qwen': + return config.QWEN_MODEL + if config.MODEL_PROVIDER == 'deepseek': + return config.DEEPSEEK_MODEL + if config.MODEL_PROVIDER == 'ollama': + return config.OLLAMA_MODEL + if config.MODEL_PROVIDER == 'doubao': + return config.DOUBAO_MODEL + if config.MODEL_PROVIDER == 'kimi': + return config.KIMI_MODEL + return config.OPENAI_MODEL + + +def _clean_response(text: str) -> str: + """ + 过滤推理模型（DeepSeek R1 / QwQ 等）输出的 ... 思考过程标签， + 只保留最终正文内容，避免思考链污染标书正文。 + """ + # 去除 ... 块（含跨行内容） + text = re.sub(r'[\s\S]*?', '', text, flags=re.IGNORECASE) + return text.strip() + + +def _is_auth_error(e: Exception) -> bool: + """判断是否为认证错误（401 / invalid_api_key），无需重试""" + # 优先用 openai 原生异常类型判断 + try: + from openai import AuthenticationError, PermissionDeniedError + if isinstance(e, (AuthenticationError, PermissionDeniedError)): + return True + except ImportError: + pass + # 兜底：字符串匹配 + err_str = str(e).lower() + return ('401' in err_str or 'invalid_api_key' in err_str + or 'incorrect api key' in err_str or 'authentication' in err_str) + + +# OpenAI o 系列推理模型：不支持 temperature，max_tokens 需用 max_completion_tokens +_OPENAI_REASONING_MODELS = {'o1', 'o1-mini', 'o1-pro', 'o3', 'o3-mini', 'o3-pro', 'o4-mini'} + + +def _build_chat_kwargs( + model: str, + messages: list, + temperature: float, + max_tokens: int, + request_timeout: float | None = None, +) -> dict: + """ + 根据模型类型构建 chat.completions.create 的参数字典。 + OpenAI o 系列推理模型不接受 temperature，且使用 max_completion_tokens 替代 max_tokens。 + """ + base_model = model.split(':')[0] # 去掉 ollama tag 后缀 + is_reasoning = base_model in _OPENAI_REASONING_MODELS + + to = request_timeout if request_timeout is not None else config.REQUEST_TIMEOUT + kwargs = { + 'model': model, + 'messages': messages, + 'timeout': to, + } + if is_reasoning: + kwargs['max_completion_tokens'] = max_tokens + else: + kwargs['temperature'] = temperature + kwargs['max_tokens'] = max_tokens + return kwargs + + +def chat( + prompt: str, + system: str = '你是一位专业的投标文件撰写专家。', + temperature: float = 0.7, + max_tokens: int = 8192, + retries: int = None, + request_timeout: float | None = None, +) -> str: + """ + 调用 AI 接口，返回文本响应。 + 认证错误立即终止；其他错误指数退避重试。 + 自动兼容 OpenAI o 系列推理模型的参数差异。 + 所有调用受全局LLM_SEMAPHORE(上限20)保护，实现极速并发优化。 + """ + _check_api_key() + + max_retries = retries if retries is not None else config.MAX_RETRIES + client = _get_client() + model = _get_model() + provider = config.MODEL_PROVIDER + name = PROVIDER_NAMES.get(provider, provider) + + messages = [ + {'role': 'system', 'content': system}, + {'role': 'user', 'content': prompt}, + ] + + for attempt in range(max_retries): + try: + with config.llm_call(): # 全局并发控制，上限20 + kwargs = _build_chat_kwargs( + model, messages, temperature, max_tokens, request_timeout=request_timeout + ) + resp = client.chat.completions.create(**kwargs) + return _clean_response(resp.choices[0].message.content.strip()) + except Exception as e: + if _is_auth_error(e): + raise RuntimeError( + f'{name} API Key 无效或已过期，请在设置中重新配置。' + f'申请地址：{PROVIDER_LINKS.get(provider, "")}' + ) from e + + wait = 2 ** attempt + logger.warning(f'AI 请求失败 (第{attempt+1}次)，{wait}s 后重试: {e}') + if attempt < max_retries - 1: + time.sleep(wait) + else: + raise RuntimeError(f'AI 接口调用失败（已重试 {max_retries} 次）: {e}') from e + + return '' + + +def chat_with_history(system: str, messages: list, + temperature: float = 0.7, max_tokens: int = 4096) -> str: + """ + 多轮对话接口，支持完整历史上下文，用于对话式章节生成。 + messages 格式：[{'role': 'user'|'assistant', 'content': str}, ...] + 受全局LLM_SEMAPHORE保护。 + """ + _check_api_key() + + client = _get_client() + model = _get_model() + provider = config.MODEL_PROVIDER + name = PROVIDER_NAMES.get(provider, provider) + + full_messages = [{'role': 'system', 'content': system}] + messages + + for attempt in range(config.MAX_RETRIES): + try: + with config.llm_call(): # 全局并发控制 + kwargs = _build_chat_kwargs(model, full_messages, temperature, max_tokens) + resp = client.chat.completions.create(**kwargs) + return _clean_response(resp.choices[0].message.content.strip()) + except Exception as e: + if _is_auth_error(e): + raise RuntimeError( + f'{name} API Key 无效或已过期，请在设置中重新配置。' + f'申请地址：{PROVIDER_LINKS.get(provider, "")}' + ) from e + wait = 2 ** attempt + logger.warning(f'对话 AI 请求失败 (第{attempt+1}次)，{wait}s 后重试: {e}') + if attempt < config.MAX_RETRIES - 1: + time.sleep(wait) + else: + raise RuntimeError(f'AI 接口调用失败（已重试 {config.MAX_RETRIES} 次）: {e}') from e + + return '' + + +def get_embeddings(texts: list[str]) -> list[list[float]]: + """获取文本嵌入向量。 + 支持 Qwen、OpenAI、Kimi；DeepSeek / Ollama / 豆包暂不提供 Embedding API。 + 受全局LLM_SEMAPHORE保护（嵌入调用计入并发上限）。 + """ + provider = config.MODEL_PROVIDER + if provider in ('deepseek', 'ollama', 'doubao'): + raise NotImplementedError( + f'{PROVIDER_NAMES.get(provider)} 暂不支持 Embedding API，知识库将使用关键词检索降级' + ) + + client = _get_client() + if provider == 'qwen': + model = config.QWEN_EMBEDDING_MODEL + elif provider == 'kimi': + model = config.KIMI_EMBEDDING_MODEL + else: + model = config.OPENAI_EMBEDDING_MODEL + + with config.llm_call(): # 嵌入也受并发限制 + resp = client.embeddings.create(model=model, input=texts) + return [item.embedding for item in resp.data] diff --git a/utils/attachment_section.py b/utils/attachment_section.py new file mode 100644 index 0000000..6df16f4 --- /dev/null +++ b/utils/attachment_section.py @@ -0,0 +1,186 @@ +""" +附件类章节识别：标题匹配、expand_outline 跳过、以及正文模式（完整正文 vs 仅单图单表）。 +""" +from __future__ import annotations + +import json +import logging +import os +import re +from typing import Any, Dict, List, Optional + +import config + +logger = logging.getLogger(__name__) + +DEFAULT_ATTACHMENT_RULES: Dict[str, Any] = { + 'schema_version': 1, + 'title_regex': [ + r'附件\s*[一二三四五六七八九十0-9A-Za-z、：:.]', + r'附\s*图', + r'附\s*表', + r'附\s*件\s*\(', + r'^\s*[\d一二三四五六七八九十\.．、]+\s*附件', + ], + 'table_hint_keywords': [ + '附表', '一览表', '清单表', '统计表', '明细表', + ], + 'figure_hint_keywords': [ + '附图', '示意图', '平面图', '流程图', '布置图', '组织图', '横道', + ], + 'default_kind_when_ambiguous': 'table', + # stack_charts_only：默认，意图栈只输出 [FIGURE]/[TABLE] 无正文；full：长文；single_chart_only：栈顶仅一块 + 'attachment_leaf_body_mode': 'stack_charts_only', +} + + +def attachment_rules_path() -> str: + return os.path.join(config.DATA_DIR, 'attachment_section_rules.json') + + +def load_attachment_rules(path: Optional[str] = None) -> Dict[str, Any]: + p = path or attachment_rules_path() + data = dict(DEFAULT_ATTACHMENT_RULES) + if not os.path.isfile(p): + return data + try: + with open(p, encoding='utf-8') as f: + raw = json.load(f) + if isinstance(raw, dict): + for k, v in raw.items(): + if k.startswith('_'): + continue + data[k] = v + except Exception as e: + logger.warning('加载 attachment_section_rules.json 失败，使用内置默认: %s', e) + return data + + +def attachment_leaf_body_mode(rules: Optional[Dict[str, Any]] = None) -> str: + """ + 附件叶节点正文策略： + stack_charts_only（默认）、full（完整技术正文）、single_chart_only（栈顶仅一块图或表）。 + """ + r = rules or get_attachment_rules_cached() + mode = (r.get('attachment_leaf_body_mode') or 'stack_charts_only').strip().lower() + if mode in ('single_chart_only', 'stack_charts_only', 'full'): + return mode + return 'stack_charts_only' + + +def use_attachment_stack_charts_body(rules: Optional[Dict[str, Any]] = None) -> bool: + """附件走「意图栈仅图/表、无长文」路径（含 single_chart_only 的单栈顶版本）。""" + m = attachment_leaf_body_mode(rules) + return m in ('stack_charts_only', 'single_chart_only') + + +def use_attachment_single_chart_only_body(rules: Optional[Dict[str, Any]] = None) -> bool: + return attachment_leaf_body_mode(rules) == 'single_chart_only' + + +def use_attachment_full_body(rules: Optional[Dict[str, Any]] = None) -> bool: + return attachment_leaf_body_mode(rules) == 'full' + + +def is_attachment_only_section(section_title: str, rules: Optional[Dict[str, Any]] = None) -> bool: + """标题是否属于附件类（附图/附表/附件N 等），用于 expand_outline 跳过与正文分支。""" + t = (section_title or '').strip() + if not t: + return False + r = rules or load_attachment_rules() + patterns: List[str] = list(r.get('title_regex') or []) + for pat in patterns: + try: + if re.search(pat, t): + return True + except re.error: + logger.warning('无效 attachment title_regex，已跳过: %s', pat[:80]) + return False + + +def pick_single_figure_or_table( + section_title: str, + enable_figure: bool, + enable_table: bool, + rules: Optional[Dict[str, Any]] = None, +) -> Optional[str]: + """ + 返回 'figure' | 'table' | None。 + 两开关均关返回 None；仅开一个则取对应类型。 + """ + if not enable_figure and not enable_table: + return None + if enable_figure and not enable_table: + return 'figure' + if enable_table and not enable_figure: + return 'table' + + r = rules or load_attachment_rules() + t = (section_title or '') + tbl_kw = list(r.get('table_hint_keywords') or []) + fig_kw = list(r.get('figure_hint_keywords') or []) + # 单独「表」字易误判，仅当同时存在附表类或与其它词组合时再偏表 + for kw in tbl_kw: + if kw and kw in t: + return 'table' + for kw in fig_kw: + if kw and kw in t: + return 'figure' + # 泛「表」在附件语境下常见 + if '表' in t and '图' not in t: + return 'table' + if '图' in t and '表' not in t: + return 'figure' + + default = (r.get('default_kind_when_ambiguous') or 'table').strip().lower() + if default == 'figure': + return 'figure' + return 'table' + + +_cached_rules: Optional[Dict[str, Any]] = None + + +def get_attachment_rules_cached() -> Dict[str, Any]: + global _cached_rules + if _cached_rules is None: + _cached_rules = load_attachment_rules() + return _cached_rules + + +def should_skip_expand_subchapters(title: str) -> bool: + """ + AI 自动填充小章节（expand_outline）时：附件类一级主章不调用子章节生成。 + 判定与 is_attachment_only_section 一致。 + """ + return is_attachment_only_section(title, get_attachment_rules_cached()) + + +# 从标题中提取「附件几」等标签，用于日志 +_ATTACHMENT_LABEL_RE = re.compile( + r'附件\s*[:：]?\s*([一二三四五六七八九十百0-9A-Za-z]+)', +) +_ATTACHMENT_FIG_TBL_RE = re.compile(r'附\s*[图表]\s*([一二三四五六七八九十百0-9]*)') + + +def parse_attachment_label(title: str) -> Optional[str]: + t = (title or '').strip() + if not t: + return None + m = _ATTACHMENT_LABEL_RE.search(t) + if m: + return m.group(1).strip() or None + m2 = _ATTACHMENT_FIG_TBL_RE.search(t) + if m2: + rest = (m2.group(1) or '').strip() + if rest: + return rest + matched = m2.group(0) + if '图' in matched: + return '附图' + return '附表' + if re.search(r'附\s*图', t): + return '附图' + if re.search(r'附\s*表', t): + return '附表' + return None diff --git a/utils/bid_appendix_content.py b/utils/bid_appendix_content.py new file mode 100644 index 0000000..8636b42 --- /dev/null +++ b/utils/bid_appendix_content.py @@ -0,0 +1,1057 @@ +""" +标准投标技术标常见附件：按章节标题识别类型，生成带固定表头的 [TABLE]/[FIGURE] 块。 +内容结合项目摘要、清单摘要做概括性填充（不照搬示例工程数据）。 +""" +from __future__ import annotations + +import re +from datetime import date +from typing import Any, Optional + +# 从标题中去掉行首编号（如十二、附件一、） +_TITLE_NUM_RE = re.compile( + r'^[\s\u3000]*(?:[一二三四五六七八九十百零〇两]+|\d+)(?:[、.．]\s*)+' +) + + +def _strip_title_prefix(title: str) -> str: + t = (title or '').strip() + while True: + m = _TITLE_NUM_RE.match(t) + if not m: + break + t = t[m.end() :].strip() + return t + + +def classify_standard_appendix(title: str) -> Optional[str]: + """ + 识别常见附件类型。返回： + main_equipment | test_instruments | labor_plan | schedule_chart | + site_layout | temp_land + """ + t = _strip_title_prefix(title) + if not t: + return None + # 主要施工设备表 + if '主要施工设备' in t or ('拟投入' in t and '设备' in t and '表' in t): + if '仪器' not in t and '检测' not in t: + return 'main_equipment' + # 试验和检测仪器设备表 + if ('试验' in t or '检测仪器' in t or '检测' in t) and '设备' in t and '表' in t: + return 'test_instruments' + # 劳动力计划表 + if '劳动力' in t and ('计划' in t or '表' in t): + return 'labor_plan' + # 临时用地表 + if '临时用地' in t: + return 'temp_land' + # 进度网络图 / 横道图 / 开完工日期 + if ('进度' in t or '开工' in t or '完工' in t) and ( + '网络' in t or '横道' in t or '计划' in t or '日期' in t or '图' in t + ): + return 'schedule_chart' + # 施工总平面图 + if '施工总平面' in t or ('总平面' in t and '图' in t): + return 'site_layout' + return None + + +def _ctx_hint(summary: str, boq: str, max_len: int = 400) -> str: + parts = [] + if (summary or '').strip(): + parts.append((summary or '').strip()[:max_len]) + if (boq or '').strip(): + parts.append(('清单摘要：' + (boq or '').strip())[:max_len]) + return '；'.join(parts) if parts else '按招标文件及施工组织要点配置' + + +def _project_text(summary: str, boq: str) -> str: + return f'{summary or ""}\n{boq or ""}' + + +def _text_deterministic_seed(text: str) -> int: + """短文本时仍能得到稳定偏移，便于同项目多次生成一致。""" + raw = (text or '').strip()[:8000] + if not raw: + return 7 + return sum(ord(c) * (i + 1) for i, c in enumerate(raw[:600])) % 997 + + +def _equipment_scale_from_text(summary: str, boq: str) -> float: + """设备台数规模系数，随项目信息量与关键词调整。""" + t = _project_text(summary, boq) + f = 1.0 + if any(k in t for k in ('大规模', '重点', '总长', '线路', '隧道', '桥梁', '水利枢纽')): + f += 0.1 + if any(k in t for k in ('小型', '维修', '改造', '零星', '装修')): + f -= 0.12 + if len(t) > 3000: + f += 0.06 + elif len(t) < 350: + f -= 0.06 + return max(0.78, min(f, 1.22)) + + +def _parse_area_sqm_near_keywords(text: str, keywords: tuple[str, ...]) -> Optional[int]: + """在含关键词的行中抓取面积数字（㎡、平方米、m² 等）。""" + if not text or not keywords: + return None + for line in text.splitlines(): + if not any(k in line for k in keywords): + continue + m = re.search(r'(\d+(?:\.\d+)?)\s*(?:㎡|m2|m²|平方米)', line, re.I) + if m: + return max(1, int(round(float(m.group(1))))) + m2 = re.search(r'(\d+(?:\.\d+)?)\s*平方', line) + if m2: + return max(1, int(round(float(m2.group(1))))) + return None + + +def _area_m2_with_context( + text: str, + keywords: tuple[str, ...], + base_m2: int, + scale: float, + seed: int, + row_index: int, + lo: int, + hi: int, +) -> int: + hinted = _parse_area_sqm_near_keywords(text, keywords) + if hinted is not None: + return max(lo, min(hinted, hi)) + v = int(round(base_m2 * scale)) + v += ((seed >> (row_index * 4)) & 31) - 15 + return max(lo, min(v, hi)) + + +# 临时用地行定义（与临时用地表、施工总平面图共用） +TEMP_LAND_SPECS: list[dict] = [ + { + 'use': '项目部办公', + 'kws': ('项目部办公', '办公区', '项目部'), + 'base': 150, + 'lo': 80, + 'hi': 420, + 'loc': '施工用地内近大门侧、与作业区相对独立', + 'time': '全施工期', + }, + { + 'use': '门卫及保卫', + 'kws': ('门卫', '保卫室', '门岗'), + 'base': 18, + 'lo': 10, + 'hi': 55, + 'loc': '施工现场主出入口内侧', + 'time': '全施工期', + }, + { + 'use': '工人生活区', + 'kws': ('生活区', '宿舍区', '民工宿舍'), + 'base': 220, + 'lo': 120, + 'hi': 650, + 'loc': '场区一侧，与加工堆场保持安全距离', + 'time': '全施工期', + }, + { + 'use': '食堂与卫浴', + 'kws': ('食堂', '伙房', '卫浴', '卫生间'), + 'base': 42, + 'lo': 25, + 'hi': 120, + 'loc': '生活区内上风向、排水接入沉淀设施', + 'time': '全施工期', + }, + { + 'use': '材料堆场', + 'kws': ('堆场', '材料堆场', '仓储'), + 'base': 320, + 'lo': 150, + 'hi': 900, + 'loc': '塔吊覆盖范围内、靠近施工道路', + 'time': '分阶段布设，与进度同步', + }, + { + 'use': '钢木加工车间', + 'kws': ('加工区', '加工车间', '钢筋加工', '木工棚'), + 'base': 300, + 'lo': 160, + 'hi': 800, + 'loc': '靠近堆场与环场道路，设防噪声围挡', + 'time': '主体施工高峰期为主', + }, + { + 'use': '机械停放区', + 'kws': ('机械停放', '设备停放', '停车场'), + 'base': 200, + 'lo': 100, + 'hi': 550, + 'loc': '硬化地坪、靠近出入口便于进退场', + 'time': '全施工期', + }, + { + 'use': '临时道路及硬化场地', + 'kws': ('临时道路', '施工道路', '硬化'), + 'base': 980, + 'lo': 400, + 'hi': 2800, + 'loc': '环场主干道及作业面连通支线', + 'time': '随施工阶段动态调整', + }, +] + + +def compute_temp_land_rows(summary: str, boq: str) -> list[dict[str, Any]]: + """与临时用地表一致的用途、面积(㎡)、位置、时间（供总平面图文字勾连）。""" + ctx = _project_text(summary, boq) + seed = _text_deterministic_seed(ctx) + scale = _equipment_scale_from_text(summary, boq) + out: list[dict[str, Any]] = [] + for i, spec in enumerate(TEMP_LAND_SPECS): + m2 = _area_m2_with_context( + ctx, + spec['kws'], + spec['base'], + scale, + seed, + i, + spec['lo'], + spec['hi'], + ) + out.append( + { + 'use': spec['use'], + 'area_m2': m2, + 'loc': spec['loc'], + 'time': spec['time'], + } + ) + return out + + +def _build_temp_land_table_md(title: str, summary: str, boq: str) -> str: + """ + 临时用地表：面积(㎡) 给出具体整数；优先从摘要/清单中解析与用途相关的面积表述。 + """ + clean = (title or '临时用地表').strip() + hint = _ctx_hint(summary, boq) + rows = compute_temp_land_rows(summary, boq) + header = '| 用途 | 面积(㎡) | 位置 | 需用时间 |' + sep = '|------|----------|------|----------|' + lines = [header, sep] + for r in rows: + lines.append( + '| {use} | {m2} | {loc} | {time} |'.format( + use=r['use'], + m2=r['area_m2'], + loc=r['loc'], + time=r['time'], + ) + ) + body = '\n'.join(lines) + return ( + f'[TABLE:{clean}]\n' + f'{body}\n' + f'[/TABLE]\n' + f'（各项面积已按项目规模具体填写；若总平面布置或招标文件另有用地指标，以{hint}及审批总平面为准复核。）' + ) + + +def _parse_qty_near_keywords(text: str, keywords: tuple[str, ...]) -> Optional[int]: + """在含关键词的行中抓取「数字+台/套/辆」。""" + if not text or not keywords: + return None + for line in text.splitlines(): + if not any(k in line for k in keywords): + continue + for m in re.finditer(r'(\d+)\s*(?:台|套|辆|组|把)', line): + n = int(m.group(1)) + if 1 <= n <= 999: + return n + return None + + +def _parse_year_from_text(text: str) -> Optional[int]: + """摘要/清单中出现的合理制造年份（近年出厂设备）。""" + years = [int(y) for y in re.findall(r'(20[1-2]\d)', text or '')] + if not years: + return None + y = max(years) + if 2015 <= y <= 2026: + return y + return None + + +def _pick_year(base_year: Optional[int], row_index: int, seed: int) -> int: + """制造年份：优先项目文本；否则在近年区间内按行略作错落。""" + if base_year is not None: + return max(2018, min(base_year, 2025)) + pool = [2019, 2020, 2021, 2022, 2023, 2024] + return pool[(seed + row_index * 17) % len(pool)] + + +def _qty_with_context( + text: str, + keywords: tuple[str, ...], + base_qty: int, + scale: float, + seed: int, + row_index: int, +) -> int: + hinted = _parse_qty_near_keywords(text, keywords) + if hinted is not None: + return max(1, min(hinted, 99)) + q = int(round(base_qty * scale)) + jitter = ((seed >> (row_index * 3)) & 3) - 1 + q = max(1, q + jitter) + return min(q, 99) + + +# 主要施工设备行定义（设备表与总平面图「允许出现的机械」共用关键词） +MAIN_EQUIPMENT_SPECS: list[dict] = [ + { + 'name': '液压挖掘机', + 'kws': ('挖掘机', '液压挖', '反铲', '土方机械'), + 'q0': 4, + 'models': ('SY235C-10', 'PC220-8MO', 'ZX240LC-5A'), + 'powers': (125, 132, 128), + 'cap': ('斗容约1.1m³', '斗容约1.0m³', '斗容约1.2m³'), + 'part': '土方开挖、基坑与回填作业面', + 'rmk': '自有或租赁按进度调配', + }, + { + 'name': '履带式推土机', + 'kws': ('推土机', '平整'), + 'q0': 2, + 'models': ('SD16标准型', 'D6K LGP', 'TY160'), + 'powers': (120, 104, 121), + 'cap': ('额定功率匹配铲刀作业', '铲刀容量与土质匹配', '平整效率满足工序'), + 'part': '场地平整、路基粗平', + 'rmk': '与挖装设备流水衔接', + }, + { + 'name': '自卸汽车', + 'kws': ('自卸', '渣土', '运输', '汽车'), + 'q0': 10, + 'models': ('东风天锦 15t', '重汽豪沃 18t', '解放J6P 20t'), + 'powers': (180, 228, 265), + 'cap': ('额定载重15t', '额定载重18t', '额定载重20t'), + 'part': '土方、砂石料及混凝土运输', + 'rmk': '道路与地磅条件满足通行', + }, + { + 'name': '轮式装载机', + 'kws': ('装载机', '铲车'), + 'q0': 3, + 'models': ('ZL50GN', 'L956F', 'WA380-6'), + 'powers': (162, 175, 142), + 'cap': ('斗容约3.0m³', '斗容约2.8m³', '斗容约2.5m³'), + 'part': '堆场装车、短驳与备料', + 'rmk': '与运输车辆匹配', + }, + { + 'name': '振动压路机', + 'kws': ('压路机', '碾压', '压实'), + 'q0': 3, + 'models': ('XS223J', 'CA30D', 'SR26M-C5'), + 'powers': (118, 132, 129), + 'cap': ('工作质量约22t', '激振力与遍数受控', '适用于基层与面层'), + 'part': '路基、基层及面层压实', + 'rmk': '试验段确定压实参数', + }, + { + 'name': '混凝土搅拌运输车', + 'kws': ('搅拌车', '罐车', '商砼'), + 'q0': 6, + 'models': ('12m³ 陕汽底盘', '10m³ 三一重工', '12m³ 中联重科'), + 'powers': (276, 257, 288), + 'cap': ('几何容积12m³', '搅动与卸料满足泵送', 'GPS调度'), + 'part': '商品混凝土水平运输', + 'rmk': '与泵车浇筑节拍匹配', + }, + { + 'name': '汽车式起重机', + 'kws': ('汽车吊', '起重机', '吊装'), + 'q0': 2, + 'models': ('QY25K5C 25t', 'STC250T4 25t', 'XCT25L5'), + 'powers': (213, 206, 210), + 'cap': ('最大起重量25t', '主臂长度满足构件', '支腿全伸作业'), + 'part': '钢筋、模板及小型构件吊装', + 'rmk': '专项方案与地基承载复核', + }, + { + 'name': '柴油发电机组', + 'kws': ('发电机', '发电', '备用电源'), + 'q0': 2, + 'models': ('GF-200kW', 'GF-250kW', 'SC9D340D2'), + 'powers': (200, 250, 308), + 'cap': ('连续输出功率200kW', '连续输出功率250kW', '连续输出功率280kW'), + 'part': '基坑降水、塔吊及高峰施工备用电', + 'rmk': '一机一闸一漏一箱', + }, +] + + +def _allowed_equipment_names(summary: str, boq: str, limit: int = 12) -> list[str]: + """ + 总平面图允许绘制的机械：摘要/清单中出现与设备关键词匹配的机型名称； + 不臆造未在资料中出现的设备类别。 + """ + ctx = _project_text(summary, boq) + names: list[str] = [] + for spec in MAIN_EQUIPMENT_SPECS: + if any(k in ctx for k in spec['kws']): + names.append(spec['name']) + # 清单中直接出现的机械用语（补充） + extra_tokens = ( + ('塔吊', '塔式起重机'), + ('塔机', '塔式起重机'), + ('施工电梯', '施工升降机'), + ('泵车', '混凝土泵车'), + ('车载泵', '混凝土泵车'), + ('静压桩机', '静力压桩机'), + ('旋挖钻', '旋挖钻机'), + ('摊铺机', '沥青摊铺机'), + ('铣刨机', '路面铣刨机'), + ) + for tok, label in extra_tokens: + if tok in ctx and label not in names: + names.append(label) + seen: set[str] = set() + out: list[str] = [] + for n in names: + if n not in seen: + seen.add(n) + out.append(n) + if len(out) >= limit: + break + return out + + +def _build_main_equipment_table_md(title: str, summary: str, boq: str) -> str: + """ + 主要施工设备表：型号规格、数量、制造年份、定额功率等尽量给出具体数值； + 优先从项目摘要/清单中解析台数线索，其余按工程规模系数与稳定随机偏移生成合理配置。 + """ + clean = (title or '主要施工设备表').strip() + hint = _ctx_hint(summary, boq) + ctx = _project_text(summary, boq) + seed = _text_deterministic_seed(ctx) + scale = _equipment_scale_from_text(summary, boq) + year_hint = _parse_year_from_text(ctx) + + specs = MAIN_EQUIPMENT_SPECS + + header = ( + '| 序号 | 机械或设备名称 | 型号规格 | 数量 | 国别产地 | 制造年份 | ' + '定额功率（KW） | 生产能力 | 用于施工部位 | 备注 |' + ) + sep = ( + '|------|----------------|----------|------|----------|----------|' + '---------------|----------|--------------|------|' + ) + lines = [header, sep] + + for i, spec in enumerate(specs): + mi = (seed + i * 31) % len(spec['models']) + model = spec['models'][mi] + power = spec['powers'][mi] + cap = spec['cap'][mi] + qty = _qty_with_context(ctx, spec['kws'], spec['q0'], scale, seed, i) + qty_str = f'{qty}台' + if spec['name'] in ('自卸汽车', '混凝土搅拌运输车'): + qty_str = f'{qty}辆' + elif spec['name'] in ('柴油发电机组',): + qty_str = f'{qty}套' + + year = _pick_year(year_hint, i, seed) + pwr_str = str(power) + + lines.append( + '| {idx} | {name} | {model} | {qtys} | 中国 | {year} | {pwr} | {cap} | {part} | {rmk} |'.format( + idx=i + 1, + name=spec['name'], + model=model, + qtys=qty_str, + year=year, + pwr=pwr_str, + cap=cap, + part=spec['part'], + rmk=spec['rmk'], + ) + ) + + body = '\n'.join(lines) + return ( + f'[TABLE:{clean}]\n' + f'{body}\n' + f'[/TABLE]\n' + f'（型号、台数、功率等已按项目资料做具体化填写；若清单或技术条款另有约定，以{hint}为准最终核定。）' + ) + + +def _instrument_used_hours( + year: int, + seed: int, + row_index: int, + powered: bool, +) -> str: + """ + 已使用台时数：机动/电子类给具体整数（台时）；非动力器具为 0。 + 随出厂年份与稳定种子变化，避免全表雷同。 + """ + if not powered: + return '0' + ynow = date.today().year + age = max(0, min(ynow - year, 8)) + base = 380 + (seed % 220) + row_index * 88 + annual = 260 + ((seed >> (row_index + 2)) & 0xFF) % 240 + h = int(base + age * annual) + h = max(160, min(h, 5600)) + return str(h) + + +def _instrument_qty( + text: str, + keywords: tuple[str, ...], + base_qty: int, + scale: float, + seed: int, + row_index: int, + max_qty: int = 12, +) -> int: + hinted = _parse_qty_near_keywords(text, keywords) + if hinted is not None: + return max(1, min(hinted, max_qty)) + q = int(round(base_qty * scale)) + jitter = ((seed >> (row_index * 4)) & 3) - 1 + q = max(1, q + jitter) + return min(q, max_qty) + + +def _build_test_instruments_table_md(title: str, summary: str, boq: str) -> str: + """ + 试验和检测仪器设备表：型号规格、数量、制造年份、已使用台时数为具体数值； + 台数优先从摘要/清单中含关键词的行解析。 + """ + clean = (title or '试验和检测仪器设备表').strip() + hint = _ctx_hint(summary, boq) + ctx = _project_text(summary, boq) + seed = _text_deterministic_seed(ctx) + scale = _equipment_scale_from_text(summary, boq) + year_hint = _parse_year_from_text(ctx) + + rows_spec: list[dict] = [ + { + 'name': '全站仪', + 'kws': ('全站仪', '全站', '测量仪'), + 'q0': 2, + 'models': ('徕卡 TS09plus 1″', '南方 NTS-362R10', '苏一光 RTS112R10'), + 'part': '施工控制测量、轴线与坐标放样', + 'rmk': '检定合格且在有效期内', + 'powered': True, + 'unit': '台', + }, + { + 'name': '水准仪', + 'kws': ('水准仪', '水准'), + 'q0': 3, + 'models': ('苏一光 DSZ2', '徕卡 NA730', '天津赛博 DS32'), + 'part': '高程传递、沉降与水准路线观测', + 'rmk': 'i角定期校验', + 'powered': True, + 'unit': '台', + }, + { + 'name': '钢卷尺', + 'kws': ('钢卷尺', '卷尺'), + 'q0': 8, + 'models': ('5m Ⅰ级', '50m 标准钢卷尺', '30m Ⅰ级'), + 'part': '距离量测、模板与预埋位置复核', + 'rmk': '周期检定', + 'powered': False, + 'unit': '把', + }, + { + 'name': '游标卡尺', + 'kws': ('游标卡尺', '卡尺'), + 'q0': 6, + 'models': ('0–150mm 0.02mm', '0–200mm 0.02mm', '数显卡尺 0–150mm'), + 'part': '钢筋直径、螺栓与加工件尺寸抽检', + 'rmk': '计量台账管理', + 'powered': False, + 'unit': '把', + }, + { + 'name': '混凝土回弹仪', + 'kws': ('回弹仪', '回弹'), + 'q0': 2, + 'models': ('ZC3-A 中回', 'HT-225A', '瑞士 Proceq Original Schmidt'), + 'part': '结构混凝土强度现场抽检', + 'rmk': '率定试验按期进行', + 'powered': True, + 'unit': '台', + }, + { + 'name': '数字万用表', + 'kws': ('万用表',), + 'q0': 4, + 'models': ('Fluke 117', '优利德 UT39C+', '胜利 VC890C+'), + 'part': '临电线路、设备绝缘与电压电流测试', + 'rmk': 'CAT III 安全等级满足现场', + 'powered': True, + 'unit': '台', + }, + { + 'name': '接地电阻测试仪', + 'kws': ('接地电阻', '接地测试'), + 'q0': 2, + 'models': ('ETCR2000A', 'Fluke 1623-2', '胜利 VC4105A'), + 'part': '接地网、防雷及设备接地电阻测试', + 'rmk': '三极法/钳形法按规范选用', + 'powered': True, + 'unit': '台', + }, + { + 'name': '绝缘电阻测试仪', + 'kws': ('绝缘电阻', '兆欧表'), + 'q0': 2, + 'models': ('Fluke 1507 1000V', '优利德 UT501A', '胜利 VC60B+'), + 'part': '电缆、电机及配电回路绝缘测试', + 'rmk': '送电前必测项目', + 'powered': True, + 'unit': '台', + }, + { + 'name': '电液伺服压力试验机', + 'kws': ('压力试验机', '试验机', '万能试验机'), + 'q0': 1, + 'models': ('YAW-3000 微机控制', 'WAW-1000 微机控制', 'YES-2000 数显'), + 'part': '混凝土、砂浆试块抗压强度试验', + 'rmk': '与标养室及见证取样制度配套', + 'powered': True, + 'unit': '台', + }, + { + 'name': '超声波测厚仪', + 'kws': ('测厚仪', '超声波测厚'), + 'q0': 2, + 'models': ('TT130 0.1mm', 'Olympus 38DL PLUS', '时代 TIME2130'), + 'part': '钢管、钢板及防腐层厚度抽检', + 'rmk': '耦合剂与探头匹配管材材质', + 'powered': True, + 'unit': '台', + }, + ] + + header = ( + '| 序号 | 仪器设备名称 | 型号规格 | 数量 | 国别产地 | 制造年份 | ' + '已使用台时数 | 用于施工部位 | 备注 |' + ) + sep = ( + '|------|--------------|----------|------|----------|----------|' + '--------------|--------------|------|' + ) + lines = [header, sep] + + for i, spec in enumerate(rows_spec): + mi = (seed + i * 29) % len(spec['models']) + model = spec['models'][mi] + qty_n = _instrument_qty(ctx, spec['kws'], spec['q0'], scale, seed, i, max_qty=15) + qty_str = f'{qty_n}{spec["unit"]}' + year = _pick_year(year_hint, i, seed) + hours = _instrument_used_hours(year, seed, i, spec['powered']) + rmk = spec['rmk'] + lines.append( + '| {idx} | {name} | {model} | {qty} | 中国 | {year} | {hours} | {part} | {rmk} |'.format( + idx=i + 1, + name=spec['name'], + model=model, + qty=qty_str, + year=year, + hours=hours, + part=spec['part'], + rmk=rmk, + ) + ) + + body = '\n'.join(lines) + return ( + f'[TABLE:{clean}]\n' + f'{body}\n' + f'[/TABLE]\n' + f'（型号、数量、出厂年份、已使用台时数已按项目资料具体填写；机动器具台时为累计估值，非机动为0；最终以{hint}及设备台账、检定证书为准。）' + ) + + +def _labor_peak_factor(summary: str, boq: str) -> float: + """根据摘要/清单篇幅与关键词略调人数规模，避免所有项目同一套数字。""" + text = (summary or '') + (boq or '') + f = 1.0 + if any(k in text for k in ('大规模', '重点工程', '总建筑面积', '线路长度', '合同额')): + f += 0.12 + if any(k in text for k in ('小型', '维修', '改造', '零星')): + f -= 0.08 + n = len(text) + if n > 2500: + f += 0.08 + elif n < 400: + f -= 0.05 + return max(0.82, min(f, 1.28)) + + +# 各阶段人数基准（人）：准备 / 建筑主体 / 临时工程 / 附属 / 收尾；峰值落在主体阶段 +_LABOR_BASE_ROWS: list[tuple[str, tuple[int, int, int, int, int]]] = [ + ('测量工', (4, 8, 4, 6, 3)), + ('挖掘机司机', (2, 8, 6, 2, 1)), + ('装载机司机', (1, 5, 4, 2, 1)), + ('自卸车司机', (2, 10, 6, 4, 2)), + ('木工', (0, 8, 2, 4, 2)), + ('砼工', (0, 20, 4, 8, 6)), + ('钢筋工', (0, 18, 4, 8, 4)), + ('电工', (3, 4, 3, 4, 3)), + ('普工', (10, 30, 20, 20, 10)), + ('试验工', (2, 4, 2, 3, 2)), + ('仓管', (2, 3, 2, 3, 2)), +] + + +def _build_labor_plan_table_md(title: str, summary: str, boq: str) -> str: + clean = (title or '劳动力计划表').strip() + hint = _ctx_hint(summary, boq) + factor = _labor_peak_factor(summary, boq) + # 表头五列名称须与「按工程施工阶段投入劳动力情况」下子列一致（导出 Word 时做双层合并表头） + header = ( + '| 工种 | 施工准备阶段 | 建筑工程施工阶段 | 临时工程施工阶段 | ' + '其他附属相关工程 | 收尾阶段 |' + ) + sep = '|------|--------------|------------------|------------------|------------------|----------|' + lines = [header, sep] + scaled_rows: list[tuple[int, int, int, int, int]] = [] + for trade, nums in _LABOR_BASE_ROWS: + row = tuple(max(0, int(round(n * factor))) for n in nums) + scaled_rows.append(row) + a, b, c, d, e = row + lines.append(f'| {trade} | {a} | {b} | {c} | {d} | {e} |') + if scaled_rows: + tot = tuple(sum(r[j] for r in scaled_rows) for j in range(5)) + lines.append( + f'| 合计 | {tot[0]} | {tot[1]} | {tot[2]} | {tot[3]} | {tot[4]} |' + ) + body = '\n'.join(lines) + return ( + f'[TABLE:{clean}]\n' + f'{body}\n' + f'[/TABLE]\n' + f'（上表为按施工阶段配置的**人数**估算，单位：人；高峰一般集中在建筑工程施工阶段，须结合工期与作业面以{hint}为准复核调整。）' + ) + + +def _find_date_after_keyword(text: str, keywords: tuple[str, ...], window: int = 56) -> Optional[str]: + """在关键词后若干字符内抓取 yyyy年mm月dd日或 yyyy-mm-dd。""" + if not text: + return None + for kw in keywords: + start = 0 + while True: + idx = text.find(kw, start) + if idx < 0: + break + frag = text[idx : idx + len(kw) + window] + dm = re.search(r'(\d{4})\s*年\s*(\d{1,2})\s*月\s*(\d{1,2})\s*日', frag) + if dm: + return f'{dm.group(1)}年{dm.group(2)}月{dm.group(3)}日' + dm2 = re.search(r'(\d{4})[-/.](\d{1,2})[-/.](\d{1,2})', frag) + if dm2: + return f'{dm2.group(1)}年{dm2.group(2)}月{dm2.group(3)}日' + start = idx + len(kw) + return None + + +def _parse_duration_calendar_days(text: str) -> Optional[int]: + if not text: + return None + for pat in ( + r'(\d+)\s*日历天', + r'总工期[：:为]?\s*(\d+)\s*天', + r'工期[（(]?\s*含?[^）)]*?[）)]?\s*[：:为]?\s*(\d+)\s*天', + r'计划工期[：:为]?\s*(\d+)\s*天', + ): + m = re.search(pat, text) + if m: + n = int(m.group(1)) + if 1 <= n <= 3650: + return n + return None + + +def _parse_schedule_facts(summary: str, boq: str) -> dict[str, Any]: + """从摘要+清单摘取开工、完工、日历天及里程碑行（摘不到则为空，禁止下游杜撰）。""" + ctx = _project_text(summary, boq) + start = _find_date_after_keyword( + ctx, + ('计划开工', '开工日期', '拟定开工', '合同开工', '开工时间', '开工日'), + ) + end = _find_date_after_keyword( + ctx, + ('计划完工', '完工日期', '竣工日期', '交工日期', '合同完工', '竣工时间', '完工时间'), + ) + duration = _parse_duration_calendar_days(ctx) + milestones: list[str] = [] + for line in ctx.splitlines(): + s = line.strip() + if any( + k in s + for k in ( + '里程碑', + '节点工期', + '控制性节点', + '关键节点', + '主体封顶', + '竣工验收', + '中间验收', + ) + ): + if 8 < len(s) < 160: + milestones.append(s) + if len(milestones) >= 8: + break + return { + 'start': start, + 'end': end, + 'duration_days': duration, + 'milestones': milestones, + } + + +def _boq_line_cells(line: str) -> list[str]: + line = line.strip() + if line.startswith('|') and line.endswith('|'): + return [c.strip() for c in line[1:-1].split('|')] + return [] + + +def _extract_boq_work_items(summary: str, boq: str, max_items: int = 14) -> list[str]: + """ + 从工程量清单摘要/表格行中抽取可作为进度网络节点的分项名称。 + 仅使用文本中已出现的短语，不编造清单外工程。 + """ + primary = (boq or '').strip() or (summary or '').strip() + secondary = (summary or '').strip() if primary != (summary or '').strip() else '' + candidates: list[str] = [] + + def _consume(text: str) -> None: + for raw in text.splitlines(): + line = raw.strip() + if not line or len(line) < 4: + continue + if re.match(r'^\|[\s\-:|]+\|$', line): + continue + if line.startswith('|'): + cells = _boq_line_cells(line) + for c in cells: + c = c.replace('**', '').strip() + if 4 <= len(c) <= 80 and re.search(r'[\u4e00-\u9fff]', c): + if re.match(r'^(序号|编码|项目编码|名称|单位|工程量|单价|合价|分部|分项)$', c): + continue + if re.match(r'^[\d\.％%]+$|^第?[一二三四五六七八九十]+[章节部分项、]', c): + continue + candidates.append(c) + continue + m = re.match(r'^[\d\.]+\s+(.+)', line) + if m: + frag = m.group(1).strip()[:80] + if re.search(r'[\u4e00-\u9fff]', frag): + candidates.append(frag) + continue + if re.match(r'^[（(]?[一二三四五六七八九十百零〇\d]+[）)]\s*[、．.]?\s*.+', line) and len(line) < 100: + candidates.append(line[:80]) + + _consume(primary) + if secondary: + _consume(secondary) + + seen: set[str] = set() + out: list[str] = [] + for c in candidates: + key = c.strip() + if key and key not in seen: + seen.add(key) + out.append(key) + if len(out) >= max_items: + break + return out + + +def _schedule_figure_body( + clean: str, + hint: str, + sch: dict[str, Any], + nodes: list[str], +) -> str: + start_s = sch.get('start') + end_s = sch.get('end') + nd = sch.get('duration_days') + milestones = sch.get('milestones') or [] + + lines: list[str] = [ + f'[FIGURE:{clean}]', + '【编制原则】进度网络/横道须与招标文件计划工期及工程量清单工作范围一致；下列日期、日历天与节点均**仅摘自**已解析的项目摘要与清单摘要，摘要未出现的**不得臆造**。', + '【计划工期（摘自资料）】', + ] + if start_s: + lines.append(f'· 计划开工日期（摘录）：{start_s}') + else: + lines.append('· 计划开工日期：摘要/清单未摘录到明确日期的，以招标文件、补遗书及合同协议书为准，**本图不填写具体开工日**。') + if end_s: + lines.append(f'· 计划完工/竣工日期（摘录）：{end_s}') + else: + lines.append('· 计划完工/竣工日期：同上，以招标文件明示为准，**本图不填写具体完工日**。') + if nd: + lines.append(f'· 总工期（摘录）：{nd} 日历天') + else: + lines.append('· 总工期：摘要未摘录日历天数的，**不绘制具体日历刻度**，仅表示工序逻辑先后。') + + if milestones: + lines.append('【控制性节点/里程碑（摘自资料原文）】') + for m in milestones[:6]: + lines.append(f'· {m}') + else: + lines.append('【控制性节点】资料未摘录专项里程碑条目的，不强行列举；实施阶段按监理与招标人批复计划执行。') + + lines.append('【工程量清单工作项与网络节点（图中工序名称仅限下列，禁止增加清单外内容）】') + if nodes: + for i, n in enumerate(nodes, 1): + lines.append(f'{i}. {n}') + else: + lines.append( + '（清单摘要未解析到分项名称时：网络图仅表示「施工准备 → 清单列项施工 → 验收移交」三阶段逻辑关系，' + '具体分项以招标工程量清单及图纸为准。）' + ) + + lines.append('【施工进度逻辑示意】') + if nodes: + chain = ' → '.join(nodes[:10]) + lines.append(f'逻辑关系（示意）：开始 → {chain} → 结束') + else: + lines.append('逻辑关系（示意）：开始 → 施工准备 → 清单工程施工 → 竣工验收与移交 → 结束') + + ndays = sch.get('duration_days') + if ndays and isinstance(ndays, int) and nodes: + lines.append( + f'横道比例说明：总控制工期 {ndays} 日历天（摘自资料）；下列横道仅为相对比例示意，条形长度不代表未在招标文件中给出的细部作业日历。' + ) + width = 36 + n_show = min(len(nodes), 8) + for i, node in enumerate(nodes[:n_show]): + frac = (i + 1) / (n_show + 1) + filled = max(1, min(width, int(round(width * frac)))) + bar = '█' * filled + '░' * (width - filled) + label = node[:18] + ('…' if len(node) > 18 else '') + lines.append(f'{label:<20} {bar}') + elif nodes: + lines.append('（未摘录总日历天：横道仅表示先后顺序，不标注天数刻度。）') + for i, node in enumerate(nodes[:10], 1): + lines.append(f' {i}. {node}') + + lines.append(f'（编制依据摘要：{hint}）') + lines.append('[/FIGURE]') + return '\n'.join(lines) + + +def _build_schedule_figure_md(title: str, summary: str, boq: str) -> str: + clean = (title or '施工进度计划网络图').strip() + hint = _ctx_hint(summary, boq) + sch = _parse_schedule_facts(summary, boq) + nodes = _extract_boq_work_items(summary, boq) + return _schedule_figure_body(clean, hint, sch, nodes) + + +def _site_layout_ascii(zones: list[str]) -> str: + """极简 ASCII，仅标注已给出的用地用途名称。""" + if not zones: + return '（临时用地分区见临时用地表）' + z = zones[:8] + top = ' — '.join(f'[{a}]' for a in z[:4]) + bot = '' + if len(z) > 4: + bot = ' — '.join(f'[{a}]' for a in z[4:]) + parts = [top] + if bot: + parts.append(' │') + parts.append(' ' + bot) + return '\n'.join(parts) + + +def _build_site_layout_figure_md(title: str, summary: str, boq: str) -> str: + clean = (title or '施工总平面图').strip() + hint = _ctx_hint(summary, boq) + land_rows = compute_temp_land_rows(summary, boq) + equip = _allowed_equipment_names(summary, boq) + + lines: list[str] = [ + f'[FIGURE:{clean}]', + '【编制原则】总平面示意须与《临时用地表》分区及面积一致；图中出现的施工机械**仅限**下列清单，禁止绘制与项目资料无关的塔吊、挖掘机等设备剪影。', + '【临时用地分区与面积（与临时用地表同口径）】', + ] + for r in land_rows: + lines.append(f'· {r["use"]}：{r["area_m2"]}㎡；位置：{r["loc"]}；时间：{r["time"]}') + + lines.append('【允许绘制的施工机械（仅下列；与摘要/清单及拟投入设备语义匹配）】') + if equip: + for e in equip: + lines.append(f'· {e}') + else: + lines.append( + '· 资料未检出明确机械类别：总平面图中**不绘制具体机械外形**，仅保留「机械停放区」块状示意，设备以《主要施工设备表》及招标清单最终表述为准。' + ) + + zone_labels = [str(r['use']) for r in land_rows] + lines.append('【相对位置关系示意（非比例尺；分区名称与上表一致）】') + lines.append(_site_layout_ascii(zone_labels)) + + lines.append( + '文字要点：环场道路、门禁、办公生活区与作业区隔离、堆场与加工区相对位置、排水沉淀、消防设施布置等须符合招标文件及用地表；' + f'具体坐标以审批总平面为准。（依据摘要：{hint}）' + ) + lines.append('[/FIGURE]') + return '\n'.join(lines) + + +def build_standard_appendix_markdown( + kind: str, + title: str, + summary: str = '', + boq_summary: str = '', +) -> str: + """生成完整单块 markdown（含 [TABLE] 或 [FIGURE]）。""" + clean = (title or '附件').strip() + hint = _ctx_hint(summary, boq_summary) + + if kind == 'main_equipment': + return _build_main_equipment_table_md(clean, summary, boq_summary) + + if kind == 'test_instruments': + return _build_test_instruments_table_md(clean, summary, boq_summary) + + if kind == 'labor_plan': + return _build_labor_plan_table_md(clean, summary, boq_summary) + + if kind == 'temp_land': + return _build_temp_land_table_md(clean, summary, boq_summary) + + if kind == 'schedule_chart': + return _build_schedule_figure_md(clean, summary, boq_summary) + + if kind == 'site_layout': + return _build_site_layout_figure_md(clean, summary, boq_summary) + + return '' + + +def is_mandatory_bid_appendix(title: str) -> bool: + """是否为应强制输出图/表块的标准附件（与「正文可选图表」开关解耦）。""" + return classify_standard_appendix(title) is not None diff --git a/utils/bill_analysis.py b/utils/bill_analysis.py new file mode 100644 index 0000000..1a3c90d --- /dev/null +++ b/utils/bill_analysis.py @@ -0,0 +1,577 @@ +""" +工程量清单本地分析（从 bill-worker.js Phase 2/3 移植）。 +Phase 2：按页关键字筛选清单页；Phase 3：正则解析分部与清单项。 +""" +from __future__ import annotations + +import logging +import re +from typing import Any + +logger = logging.getLogger(__name__) + +BILL_KW = ['项目编码', '项目名称', '工程量', '计量单位', '综合单价', '清单编码'] +SEC_KW = ['分部分项', '分类分项', '措施项目', '其他项目', '工程量清单计价'] +FEE_PAGE_KW = [ + '规费', '税金', '社会保险费', '住房公积金', '养老保险', + '工伤保险', '失业保险', '医疗保险', '教育费附加', '城市维护建设税', +] + +ITEM_START = re.compile(r'^\d+(\.\d+)+\s') +CODE_INLINE = re.compile(r'(?:^|\s)(\d{9,12}|(? str: + def repl(m: re.Match) -> str: + a, b, c, d = m.group(1), m.group(2), m.group(3), m.group(4) or '' + combined = a + b + c + d + if 9 <= len(combined) <= 12: + return combined + return m.group(0) + + return _DASH_CODE.sub(repl, line) + + +def is_fee_item(name: str) -> bool: + if not name: + return False + n = re.sub(r'\s+', '', name) + if n in _EXACT_FEE_ITEM: + return True + for kw in _FEE_KW: + if kw in n: + return True + return False + + +def split_name_and_spec(raw_name: str) -> tuple[str, str]: + if not raw_name: + return '', '' + m = re.search(r'\d+[.、．)\uFF09]\s*[\u4e00-\u9fff]', raw_name) + if m and m.start() > 0: + return raw_name[:m.start()].strip(), raw_name[m.start():].strip() + kw = _SPEC_KW_RE.search(raw_name) + if kw and kw.start() > 0: + return raw_name[:kw.start()].strip(), raw_name[kw.start():].strip() + paren = re.search(r'[（(]\d+[）)]', raw_name) + if paren and paren.start() > 0: + return raw_name[:paren.start()].strip(), raw_name[paren.start():].strip() + return raw_name, '' + + +def is_cat_title(text: str) -> bool: + return any(k in text for k in _CAT_KW) + + +def is_fee_cat_title(text: str) -> bool: + if not text: + return False + t = re.sub(r'\s+', '', text) + if t in _EXACT_FEE_CAT: + return True + for kw in _FEE_CAT_KW: + if kw in t: + return True + return False + + +def _is_new_line_trigger(raw: str) -> bool: + if ITEM_START.match(raw): + return True + if CODE_START_RE.match(raw): + return True + if SEQ_CODE_RE.match(raw): + return True + for m in CATEGORY_MARKERS: + if raw.startswith(m + ' ') or raw.startswith(m + '\u3000'): + return True + return False + + +def parse_bill_text(text: str) -> dict[str, Any]: + raw_lines = [] + for l in text.split('\n'): + line = l.replace('\t', ' ').strip() + line = _fold_dash_codes(line) + raw_lines.append(line) + + logic_lines: list[str] = [] + current_line = '' + + for raw in raw_lines: + if not raw or PAGE_MARK.match(raw): + continue + if HEADER_RE.match(raw) or HEADER_KW.match(raw): + continue + if re.match(r'^（元）|^款章节号|^备注$|^第\d+页', raw): + continue + + if _is_new_line_trigger(raw): + if current_line: + logic_lines.append(current_line) + current_line = raw + elif CODE_INLINE.search(raw) and len(raw) > 15: + if current_line: + logic_lines.append(current_line) + current_line = raw + else: + if current_line and len(current_line) > 300: + logic_lines.append(current_line) + current_line = raw + else: + current_line = current_line + ' ' + raw if current_line else raw + if current_line: + logic_lines.append(current_line) + + logger.debug('合并后 %s 条逻辑行（原始 %s 行）', len(logic_lines), len(raw_lines)) + + categories: list[dict[str, Any]] = [] + cur_cat: dict[str, Any] | None = None + cur_item: dict[str, Any] | None = None + + for line in logic_lines: + if SKIP_RE.search(line): + continue + + # 行首序号：多级如「1.1.1.1 」；或「1–4 位序号 + 空格 + 9 位以上编码」。 + # 避免误删「行首即 9–12 位清单编码 + 空格」整段（JS 原 \d+(\.\d+)* 会吞掉编码）。 + stripped = line.strip() + m_hier = re.match(r'^\d+(?:\.\d+)+\s+', stripped) + if m_hier: + stripped = stripped[m_hier.end():].strip() + elif re.match(r'^\d{1,4}\s+\d{9}', stripped): + stripped = re.sub(r'^\d{1,4}\s+', '', stripped, count=1).strip() + if not stripped: + stripped = line.strip() + if not stripped: + continue + + cm = CODE_RE.search(stripped) + if cm: + if cur_item and cur_cat: + cur_cat['items'].append(cur_item) + if not cur_cat: + cur_cat = {'name': '未分类', 'items': []} + categories.append(cur_cat) + + code = cm.group(1) + rest = stripped[cm.end():].strip() + name, unit, quantity, spec = '', '', '', '' + + unit_match = UNIT_RE.search(rest) + if unit_match: + ui = rest.find(unit_match.group(0)) + raw_name = rest[:ui].strip() + unit = unit_match.group(1) + after_unit = rest[ui + len(unit_match.group(0)):].strip() + qm = re.match(r'^([\d,.]+)', after_unit) + if qm: + quantity = qm.group(1) + tail = after_unit[qm.end():].strip() + if tail: + tail_tokens = tail.split() + si = 0 + while si < len(tail_tokens) and re.match(r'^[\d,.%\-]+$', tail_tokens[si]): + si += 1 + spec_tail = ' '.join(tail_tokens[si:]).strip() + if spec_tail: + spec = spec_tail + ns_name, ns_spec = split_name_and_spec(raw_name) + name = ns_name + if ns_spec: + spec = ns_spec + (';' + spec if spec else '') + else: + tokens = [t for t in rest.split() if t] + found_unit_idx = -1 + for ti in range(len(tokens) - 1, 0, -1): + if tokens[ti] in UNIT_SET: + found_unit_idx = ti + break + if found_unit_idx >= 1: + raw_name_str = ' '.join(tokens[:found_unit_idx]) + ns_name, ns_spec = split_name_and_spec(raw_name_str) + name = ns_name + if ns_spec: + spec = ns_spec + unit = tokens[found_unit_idx] + after_tokens = tokens[found_unit_idx + 1:] + if after_tokens and re.match(r'^[\d,.]+$', after_tokens[0]): + quantity = after_tokens[0] + si = 1 + while si < len(after_tokens) and re.match(r'^[\d,.%\-]+$', after_tokens[si]): + si += 1 + spec_tail = ' '.join(after_tokens[si:]).strip() + if spec_tail: + spec = spec + ';' + spec_tail if spec else spec_tail + else: + name = rest + + name = re.sub(r'\s+', '', name).strip() + for u in UNIT_TOKENS: + if name.endswith(u) and len(name) > len(u): + unit = unit or u + name = name[: len(name) - len(u)] + break + + cur_item = {'code': code, 'name': name, 'unit': unit, 'quantity': quantity, 'spec': spec} + continue + + if len(stripped) > 4: + uni_match = UNIT_RE.search(stripped) + if uni_match: + ui = stripped.find(uni_match.group(0)) + before_unit = stripped[:ui].strip() + after_unit = stripped[ui + len(uni_match.group(0)):].strip() + has_qty = bool(re.match(r'^[\d,.]+', after_unit)) + if ( + 2 <= len(before_unit) <= 50 + and has_qty + and re.search(r'[\u4e00-\u9fff]', before_unit) + ): + if cur_item and cur_cat: + cur_cat['items'].append(cur_item) + if not cur_cat: + cur_cat = {'name': '未分类', 'items': []} + categories.append(cur_cat) + unit_fb = uni_match.group(1) + qm = re.match(r'^([\d,.]+)', after_unit) + quantity_fb = qm.group(1) if qm else '' + ns_name, ns_spec = split_name_and_spec(before_unit) + name_fb = re.sub(r'\s+', '', ns_name).strip() + spec_fb = ns_spec or '' + cur_item = {'code': '', 'name': name_fb, 'unit': unit_fb, 'quantity': quantity_fb, 'spec': spec_fb} + continue + + if 2 < len(stripped) < 60 and not CODE_RE.search(stripped): + if UNIT_RE.search(stripped) and re.search(r'\d+\.?\d*\s*$', stripped): + if cur_item: + cur_item['spec'] = (cur_item.get('spec') or '') + ( + ';' + stripped if cur_item.get('spec') else stripped + ) + continue + if is_cat_title(stripped) and not UNIT_RE.search(stripped) and not is_fee_cat_title(stripped): + if cur_item and cur_cat: + cur_cat['items'].append(cur_item) + cur_item = None + clean_title = re.sub( + r'\s+(座|个|项|处|m|km|段|条)\s+\d+[\d.]*\s*$', '', stripped + ).strip() + cur_cat = {'name': clean_title, 'items': []} + categories.append(cur_cat) + continue + + if re.match(r'^[一二三四五六七八九十]+\s', stripped) or re.match( + r'^（[一二三四五六七八九十\d]+）', stripped + ): + clean_title = re.sub(r'\s+(座|个|项|处)\s+\d+[\d.]*\s*$', '', stripped).strip() + if is_fee_cat_title(clean_title): + continue + if cur_item and cur_cat: + cur_cat['items'].append(cur_item) + cur_item = None + cur_cat = {'name': clean_title, 'items': []} + categories.append(cur_cat) + continue + + if cur_item and len(stripped) > 1: + cur_item['spec'] = (cur_item.get('spec') or '') + ( + ';' + stripped if cur_item.get('spec') else stripped + ) + + if cur_item and cur_cat: + cur_cat['items'].append(cur_item) + + fee_filtered = 0 + for cat in categories: + if cat.get('items'): + before = len(cat['items']) + cat['items'] = [it for it in cat['items'] if not is_fee_item(it.get('name', ''))] + fee_filtered += before - len(cat['items']) + if fee_filtered: + logger.debug('费用项过滤: 移除 %s 项', fee_filtered) + + total_before_merge = 0 + total_after_merge = 0 + for cat in categories: + items = cat.get('items') or [] + if not items: + continue + total_before_merge += len(items) + name_map: dict[str, dict[str, Any]] = {} + for item in items: + key = re.sub(r'\s+', '', (item.get('name') or '')).strip() + if not key: + continue + if key not in name_map: + name_map[key] = { + 'code': item.get('code') or '', + 'name': item['name'], + 'unit': item.get('unit') or '', + 'quantity': item.get('quantity') or '', + 'spec': item.get('spec') or '', + '_quantities': [item['quantity']] if item.get('quantity') else [], + '_specs': [item['spec']] if item.get('spec') else [], + } + else: + m = name_map[key] + if not m['code'] and item.get('code'): + m['code'] = item['code'] + if not m['unit'] and item.get('unit'): + m['unit'] = item['unit'] + if item.get('quantity'): + m['_quantities'].append(item['quantity']) + if item.get('spec') and item['spec'] not in m['_specs']: + m['_specs'].append(item['spec']) + + merged_items: list[dict[str, str]] = [] + for m in name_map.values(): + qlist = m['_quantities'] + if len(qlist) > 1: + nums = [] + ok = True + for q in qlist: + try: + nums.append(float(q.replace(',', ''))) + except ValueError: + ok = False + break + if ok: + s = sum(nums) + m['quantity'] = str(int(s)) if s % 1 == 0 else f'{s:.2f}' + else: + m['quantity'] = '; '.join(qlist) + elif len(qlist) == 1: + m['quantity'] = qlist[0] + + if m['_specs']: + trimmed = [s[:120] + '...' if len(s) > 120 else s for s in m['_specs']] + m['spec'] = '; '.join(trimmed) + if len(m['spec']) > 300: + m['spec'] = m['spec'][:300] + '...' + for k in ('_quantities', '_specs'): + m.pop(k, None) + merged_items.append( + {k: m[k] for k in ('code', 'name', 'unit', 'quantity', 'spec')} + ) + cat['items'] = merged_items + total_after_merge += len(merged_items) + + merged_count = total_before_merge - total_after_merge + if merged_count > 0: + logger.debug('按名称合并: %s → %s 项', total_before_merge, total_after_merge) + + valid = [c for c in categories if c.get('items')] + total_items = sum(len(c['items']) for c in valid) + logger.debug( + '最终结果: %s 分部, %s 清单项', len(valid), total_items + ) + + return { + 'project_summary': { + 'remark': f'本地解析：{len(valid)} 个分部，{total_items} 个清单项（合并前 {total_before_merge} 项）', + }, + 'categories': valid, + } + + +def filter_bill_pages(page_texts: list[str]) -> tuple[list[str], dict[str, Any]]: + """ + 从按页文本中筛选工程量清单相关页；返回 (bill_page_texts, meta)。 + """ + n = len(page_texts) + meta: dict[str, Any] = {'total_pages': n, 'scanned': False, 'no_bill_pages': False} + + total_chars = sum(len(t or '') for t in page_texts) + if total_chars < 50: + meta['scanned'] = True + meta['reason'] = 'noText' + return [], meta + + bill_flags = [False] * n + for i, t in enumerate(page_texts): + if not (t or '').strip(): + continue + t = t or '' + h_hits = sum(1 for k in BILL_KW if k in t) + s_hit = any(k in t for k in SEC_KW) + has_code = bool(re.search(r'\d{9}', t)) + if h_hits >= 2 or s_hit or has_code: + bill_flags[i] = True + + first_bill = next((i for i, f in enumerate(bill_flags) if f), -1) + last_bill = max((i for i, f in enumerate(bill_flags) if f), default=-1) + if first_bill >= 0 and last_bill > first_bill: + for i in range(first_bill, last_bill + 1): + if bill_flags[i]: + continue + t = page_texts[i] or '' + if not t.strip() or len(t.strip()) <= 30: + continue + fee_hits = sum(1 for kw in FEE_PAGE_KW if kw in t) + if fee_hits >= 2 and not re.search(r'\d{9}', t): + continue + bill_flags[i] = True + + bill_texts = [page_texts[i] for i in range(n) if bill_flags[i]] + if not bill_texts: + meta['no_bill_pages'] = True + + meta['bill_page_indices'] = [i for i in range(n) if bill_flags[i]] + meta['bill_pages'] = len(bill_texts) + return bill_texts, meta + + +def analyze_boq_pages(page_texts: list[str]) -> dict[str, Any]: + """ + 串联筛选 + parse_bill_text；返回结构含 _meta，供持久化与前端。 + """ + total_pages = len(page_texts) + total_chars = sum(len(t or '') for t in page_texts) + + if total_chars < 50: + return { + 'scanned': True, + 'reason': 'noText', + 'totalPages': total_pages, + 'project_summary': {'remark': '文本过少，疑似扫描件或未提取到文字'}, + 'categories': [], + '_meta': { + 'method': 'python-local', + 'total_pages': total_pages, + 'bill_pages': 0, + }, + } + + bill_texts, fmeta = filter_bill_pages(page_texts) + if not bill_texts: + return { + 'scanned': False, + 'no_bill_pages': True, + 'totalPages': total_pages, + 'project_summary': {'remark': '未识别到清单相关页面'}, + 'categories': [], + '_meta': { + 'method': 'python-local', + 'total_pages': total_pages, + 'bill_pages': 0, + **{k: fmeta[k] for k in ('no_bill_pages',) if k in fmeta}, + }, + } + + merged = '\n'.join(bill_texts) + parsed = parse_bill_text(merged) + return { + 'scanned': False, + **parsed, + '_meta': { + 'method': 'python-local', + 'total_pages': total_pages, + 'bill_pages': len(bill_texts), + 'bill_page_indices': fmeta.get('bill_page_indices', []), + }, + } + + +def categories_to_prompt_appendix( + analysis: dict[str, Any], + max_chars: int = 3000, + max_per_cat: int = 40, +) -> str: + """将本地解析结果压成短文本，注入 AI 摘要提示词。""" + cats = analysis.get('categories') or [] + lines: list[str] = [] + for cat in cats: + name = cat.get('name', '') + items = cat.get('items') or [] + lines.append(f'【{name}】') + for it in items[:max_per_cat]: + code = it.get('code') or '-' + n = it.get('name') or '' + u = it.get('unit') or '' + q = it.get('quantity') or '' + lines.append(f' {code} {n} {u} {q}'.strip()) + if len(items) > max_per_cat: + lines.append(f' …共 {len(items)} 条，此处省略其余') + text = '\n'.join(lines).strip() + if len(text) > max_chars: + return text[:max_chars] + '\n…(附录已截断)' + return text diff --git a/utils/boq_parser.py b/utils/boq_parser.py new file mode 100644 index 0000000..0e4cae7 --- /dev/null +++ b/utils/boq_parser.py @@ -0,0 +1,138 @@ +""" +工程量清单解析模块：从 Excel / CSV / PDF / Word 文件中提取结构化文本。 +""" +import csv +import logging +import re +from pathlib import Path + +logger = logging.getLogger(__name__) + +# 最大返回字符数（送给 AI 做摘要时截断） +MAX_BOQ_CHARS = 12000 + + +def extract_boq_text(file_path: str) -> str: + """ + 从工程量清单文件提取原始结构化文本。 + 支持：.xlsx / .xls / .csv / .pdf / .docx / .doc + """ + ext = Path(file_path).suffix.lower() + if ext in ('.xlsx', '.xls'): + text = _extract_excel(file_path) + elif ext == '.csv': + text = _extract_csv(file_path) + elif ext == '.pdf': + from utils.file_utils import _extract_pdf + text = _extract_pdf(file_path) + elif ext == '.docx': + from utils.file_utils import _extract_docx + text = _extract_docx(file_path) + elif ext == '.doc': + from utils.file_utils import _extract_doc + text = _extract_doc(file_path) + else: + raise ValueError(f'不支持的文件格式 {ext}，请使用 xlsx/xls/csv/pdf/docx/doc') + + return text[:MAX_BOQ_CHARS] + + +def extract_boq_pages(file_path: str) -> list[str]: + """ + 返回按「页」切分的清单文本：PDF 为每页一段；Excel/CSV/Word 为单元素全文。 + """ + ext = Path(file_path).suffix.lower() + if ext == '.pdf': + from utils.file_utils import extract_pdf_pages + return extract_pdf_pages(file_path) + text = extract_boq_text(file_path) + return [text] if text else [''] + + +# ─── Excel ──────────────────────────────────────────────────────────────── + +def _extract_excel(file_path: str) -> str: + try: + import openpyxl + wb = openpyxl.load_workbook(file_path, data_only=True, read_only=True) + parts = [] + for name in wb.sheetnames: + ws = wb[name] + block = _sheet_to_text(ws, name) + if block.strip(): + parts.append(block) + wb.close() + return '\n\n'.join(parts) + except ImportError: + return _extract_xls_fallback(file_path) + except Exception as e: + raise RuntimeError(f'Excel 解析失败：{e}') from e + + +def _sheet_to_text(ws, sheet_name: str) -> str: + """将一个 Sheet 转为管道分隔文本，自动过滤全空行和全空列。""" + raw_rows = [] + for row in ws.iter_rows(values_only=True): + cells = ['' if v is None else str(v).strip() for v in row] + if any(cells): + raw_rows.append(cells) + + if not raw_rows: + return '' + + # 对齐列数 + max_cols = max(len(r) for r in raw_rows) + raw_rows = [r + [''] * (max_cols - len(r)) for r in raw_rows] + + # 找出有内容的列索引 + active_cols = [j for j in range(max_cols) + if any(raw_rows[i][j] for i in range(len(raw_rows)))] + if not active_cols: + return '' + + lines = [f'【{sheet_name}】'] + for row in raw_rows: + line = ' | '.join(row[j] for j in active_cols) + if line.replace('|', '').strip(): + lines.append(line) + return '\n'.join(lines) + + +def _extract_xls_fallback(file_path: str) -> str: + """旧版 .xls 使用 xlrd 兜底（需安装 xlrd<2）""" + try: + import xlrd # type: ignore + wb = xlrd.open_workbook(file_path) + parts = [] + for sheet in wb.sheets(): + lines = [f'【{sheet.name}】'] + for rx in range(sheet.nrows): + cells = [str(sheet.cell_value(rx, cx)).strip() + for cx in range(sheet.ncols)] + line = ' | '.join(c for c in cells if c) + if line: + lines.append(line) + parts.append('\n'.join(lines)) + return '\n\n'.join(parts) + except Exception as e: + raise RuntimeError(f'.xls 解析失败，请另存为 .xlsx 后重试：{e}') from e + + +# ─── CSV ───────────────────────────────────────────────────────────────── + +def _extract_csv(file_path: str) -> str: + encodings = ['utf-8-sig', 'gbk', 'utf-8', 'gb18030', 'latin-1'] + for enc in encodings: + try: + lines = [] + with open(file_path, 'r', encoding=enc, newline='') as f: + for row in csv.reader(f): + line = ' | '.join(c.strip() for c in row if c.strip()) + if line: + lines.append(line) + return '\n'.join(lines) + except (UnicodeDecodeError, UnicodeError): + continue + except Exception as e: + raise RuntimeError(f'CSV 解析失败：{e}') from e + raise RuntimeError('CSV 文件编码不支持，请另存为 UTF-8 格式后重试') diff --git a/utils/diagram_intent.py b/utils/diagram_intent.py new file mode 100644 index 0000000..16760c4 --- /dev/null +++ b/utils/diagram_intent.py @@ -0,0 +1,284 @@ +""" +章节级图/表意图：字符特征 + 大纲上下文窗口计分，栈式优先级，驱动提示词附加段。 +""" +from __future__ import annotations + +import json +import logging +import os +import re +from dataclasses import dataclass +from typing import Any, Dict, List, Optional, Sequence, Tuple + +import config +from utils import prompts as P + +logger = logging.getLogger(__name__) + +DEFAULT_DIAGRAM_RULES: Dict[str, Any] = { + 'schema_version': 1, + 'threshold_figure': 1.0, + 'threshold_table': 1.0, + 'title_weight': 1.0, + 'context_weight': 0.6, + 'outline_context_lines': {'before': 4, 'after': 6}, + 'stack_order_when_both': 'score_desc', + 'figure_keywords': [], + 'table_keywords': [], +} + + +def diagram_rules_path() -> str: + return os.path.join(config.DATA_DIR, 'diagram_intent_rules.json') + + +def load_diagram_rules(path: Optional[str] = None) -> Dict[str, Any]: + """加载规则 JSON；文件缺失或解析失败时返回内置默认。""" + p = path or diagram_rules_path() + data = dict(DEFAULT_DIAGRAM_RULES) + if not os.path.isfile(p): + return data + try: + with open(p, encoding='utf-8') as f: + raw = json.load(f) + if isinstance(raw, dict): + for k, v in raw.items(): + if k.startswith('_'): + continue + if k == 'outline_context_lines' and isinstance(v, dict): + data['outline_context_lines'] = { + **data.get('outline_context_lines', {}), + **v, + } + else: + data[k] = v + except Exception as e: + logger.warning('加载 diagram_intent_rules.json 失败，使用内置默认: %s', e) + return data + + +def _normalize_keyword_entries(raw: Any) -> List[Tuple[str, float]]: + out: List[Tuple[str, float]] = [] + if not isinstance(raw, list): + return out + for item in raw: + if isinstance(item, str) and item.strip(): + out.append((item.strip(), 1.0)) + elif isinstance(item, dict): + t = (item.get('text') or item.get('pattern') or '').strip() + if not t: + continue + w = float(item.get('weight', 1.0)) + out.append((t, w)) + return out + + +def _score_text(text: str, entries: Sequence[Tuple[str, float]]) -> float: + if not text or not entries: + return 0.0 + s = 0.0 + for kw, w in entries: + if kw in text: + s += w + return s + + +DiagramKind = str # 'figure' | 'table' + + +@dataclass(frozen=True) +class DiagramIntent: + kind: str + score: float + sources: str + + +# 栈顶 = index 0，优先生效 +DiagramStack = List[DiagramIntent] + + +def score_figure_table( + title: str, + context_snippet: str, + rules: Dict[str, Any], +) -> Tuple[float, float]: + """标题与上下文分别计分后按权重合并。""" + fig_kw = _normalize_keyword_entries(rules.get('figure_keywords')) + tbl_kw = _normalize_keyword_entries(rules.get('table_keywords')) + tw = float(rules.get('title_weight', 1.0)) + cw = float(rules.get('context_weight', 0.6)) + t = title or '' + c = context_snippet or '' + fig = tw * _score_text(t, fig_kw) + cw * _score_text(c, fig_kw) + tbl = tw * _score_text(t, tbl_kw) + cw * _score_text(c, tbl_kw) + return fig, tbl + + +def extract_outline_window( + outline_text: str, + section_title: str, + before: int, + after: int, + fallback_chars: int = 1200, +) -> str: + """ + 在大纲中定位章节标题所在行，取上下窗口；找不到则取全文前缀。 + """ + if not outline_text or not section_title: + return (outline_text or '')[:fallback_chars] + title_stripped = section_title.strip() + if not title_stripped: + return outline_text[:fallback_chars] + lines = outline_text.splitlines() + idx = -1 + # 优先整行包含；否则子串匹配（去编号后） + def _strip_serial(s: str) -> str: + return re.sub(r'^\s*[\d一二三四五六七八九十]+[、.．\s]+', '', s).strip() + + core = _strip_serial(title_stripped) + for i, line in enumerate(lines): + line_s = line.strip() + if title_stripped in line_s or (core and core in _strip_serial(line_s)): + idx = i + break + if core and core in line_s: + idx = i + break + if idx < 0: + return outline_text[:fallback_chars] + lo = max(0, idx - max(0, before)) + hi = min(len(lines), idx + max(0, after) + 1) + return '\n'.join(lines[lo:hi]) + + +def build_stack( + fig_score: float, + tbl_score: float, + rules: Dict[str, Any], + enable_figure: bool, + enable_table: bool, +) -> DiagramStack: + tf = float(rules.get('threshold_figure', 1.0)) + tt = float(rules.get('threshold_table', 1.0)) + mode = (rules.get('stack_order_when_both') or 'score_desc').strip() + + fig_ok = enable_figure and fig_score >= tf + tbl_ok = enable_table and tbl_score >= tt + + intents: List[DiagramIntent] = [] + if fig_ok: + intents.append( + DiagramIntent('figure', fig_score, 'title+context') + ) + if tbl_ok: + intents.append( + DiagramIntent('table', tbl_score, 'title+context') + ) + if len(intents) <= 1: + return intents + + a, b = intents[0], intents[1] + if mode == 'figure_first': + order = [a, b] if a.kind == 'figure' else [b, a] + elif mode == 'table_first': + order = [a, b] if a.kind == 'table' else [b, a] + else: # score_desc — 高分在栈顶 + order = sorted([a, b], key=lambda x: -x.score) + return order + + +def stack_compact_labels(stack: DiagramStack) -> List[str]: + """与 stack_to_addon 中 labels 一致，供附件仅块输出的提示词。""" + labels: List[str] = [] + for it in stack: + if it.kind == 'figure': + labels.append('图示（[FIGURE] 块）') + else: + labels.append('表格（[TABLE] 块）') + return labels + + +def make_fallback_stack(kind: str) -> DiagramStack: + """栈空且需生成时，按单一 figure/table 占位。""" + k = (kind or '').strip().lower() + if k not in ('figure', 'table'): + k = 'table' + return [DiagramIntent(k, 1.0, 'fallback')] + + +def stack_to_addon(stack: DiagramStack) -> str: + """按栈序拼接优先级说明 + 图示/表格规范全文。""" + if not stack: + return '' + labels: List[str] = [] + for it in stack: + if it.kind == 'figure': + labels.append('图示（[FIGURE] 块）') + else: + labels.append('表格（[TABLE] 块）') + parts: List[str] = [P.diagram_priority_preamble(labels)] + for it in stack: + if it.kind == 'figure': + parts.append(P.get_figure_addon()) + else: + parts.append(P.get_table_addon()) + return ''.join(parts) + + +class DiagramIntentAgent: + """可配置规则实例：对单节计算栈并渲染附加提示词。""" + + def __init__(self, rules: Optional[Dict[str, Any]] = None) -> None: + self.rules = rules or load_diagram_rules() + + @classmethod + def load_default(cls) -> 'DiagramIntentAgent': + return cls(load_diagram_rules()) + + def plan( + self, + section_title: str, + outline_text: str, + enable_figure: bool, + enable_table: bool, + ) -> DiagramStack: + r = self.rules + oc = r.get('outline_context_lines') or {} + before = int(oc.get('before', 4)) + after = int(oc.get('after', 6)) + ctx = extract_outline_window( + outline_text, section_title, before, after, + ) + fig_s, tbl_s = score_figure_table(section_title, ctx, r) + return build_stack(fig_s, tbl_s, r, enable_figure, enable_table) + + def render_for_section( + self, + section_title: str, + outline_text: str, + enable_figure: bool, + enable_table: bool, + ) -> str: + """渲染图表附加提示或禁用禁止指令。始终返回控制内容以确保禁用生效。""" + if not enable_figure and not enable_table: + return P.get_chart_forbid_addon() + stack = self.plan( + section_title, outline_text, enable_figure, enable_table, + ) + return stack_to_addon(stack) + + +# 模块级默认实例，供 generator 单次调用 +_default_agent: Optional[DiagramIntentAgent] = None + + +def get_diagram_agent() -> DiagramIntentAgent: + global _default_agent + if _default_agent is None: + _default_agent = DiagramIntentAgent.load_default() + return _default_agent + + +def invalidate_diagram_agent_cache() -> None: + global _default_agent + _default_agent = None diff --git a/utils/file_utils.py b/utils/file_utils.py new file mode 100644 index 0000000..662d217 --- /dev/null +++ b/utils/file_utils.py @@ -0,0 +1,213 @@ +""" +文件处理工具：从 PDF / Word 文件中提取纯文本 +""" +import os +import logging +from pathlib import Path +from concurrent.futures import ThreadPoolExecutor, as_completed +from functools import partial + +logger = logging.getLogger(__name__) + + +def extract_text(file_path: str) -> str: + """ + 根据文件扩展名提取文本。 + 支持 .pdf / .docx / .doc + """ + path = Path(file_path) + ext = path.suffix.lower() + + if ext == '.pdf': + return _extract_pdf(file_path) + elif ext == '.docx': + return _extract_docx(file_path) + elif ext == '.doc': + return _extract_doc(file_path) + else: + raise ValueError(f'不支持的文件类型: {ext}') + + +def _extract_pdf(file_path: str) -> str: + """提取 PDF 文本，优先使用 pypdf，回退到 pdfminer""" + try: + from pypdf import PdfReader + reader = PdfReader(file_path) + parts = [] + for page in reader.pages: + text = page.extract_text() + if text: + parts.append(text) + result = '\n'.join(parts) + if result.strip(): + return result + except Exception as e: + logger.warning(f'pypdf 提取失败: {e}，尝试 pdfminer') + + try: + from pdfminer.high_level import extract_text as pm_extract + result = pm_extract(file_path) + return result or '' + except Exception as e: + logger.error(f'pdfminer 提取失败: {e}') + raise RuntimeError(f'PDF 文本提取失败: {e}') + + +def extract_pdf_pages(file_path: str) -> list[str]: + """ + 按页提取 PDF 文本（用于工程量清单页筛选）。 + 极速优化：对于>5页PDF使用ThreadPool并行提取页面 (plan要求)，大幅加速解析环节。 + 优先 pypdf 逐页；若各页均无文本则回退 pdfminer。 + """ + try: + from pypdf import PdfReader + reader = PdfReader(file_path) + if len(reader.pages) <= 5: + # 小文件顺序更快 + pages = [(page.extract_text() or '').strip() for page in reader.pages] + else: + # 并行提取大PDF (fulfills plan's Parallel Extract Pages) + def _extract_page(page): + return (page.extract_text() or '').strip() + with ThreadPoolExecutor(max_workers=4) as executor: + pages = list(executor.map(_extract_page, reader.pages)) + if any(pages): + return pages + except Exception as e: + logger.warning(f'pypdf 按页提取失败: {e}，尝试 pdfminer') + + try: + from pdfminer.high_level import extract_text as pm_extract + blob = (pm_extract(file_path) or '').strip() + return [blob] if blob else [''] + except Exception as e: + logger.error(f'pdfminer 提取失败: {e}') + raise RuntimeError(f'PDF 文本提取失败: {e}') + + +def _extract_docx(file_path: str) -> str: + """提取 .docx 文档文本（python-docx）""" + try: + from docx import Document + doc = Document(file_path) + parts = [] + for para in doc.paragraphs: + if para.text.strip(): + parts.append(para.text) + for table in doc.tables: + for row in table.rows: + row_texts = [cell.text.strip() for cell in row.cells if cell.text.strip()] + if row_texts: + parts.append(' '.join(row_texts)) + return '\n'.join(parts) + except Exception as e: + logger.error(f'.docx 提取失败: {e}') + raise RuntimeError(f'Word 文本提取失败: {e}') + + +def _extract_doc(file_path: str) -> str: + """ + 提取旧版 .doc 文件文本，按优先级依次尝试： + 1. win32com（Windows + Microsoft Word 已安装，最准确） + 2. LibreOffice 命令行转换（需安装 LibreOffice） + 3. python-docx 兼容尝试（部分以 XML 保存的伪 .doc 可读） + 全部失败时提示用户手动另存为 .docx + """ + abs_path = str(Path(file_path).resolve()) + + # ── 方案1：win32com（Windows + Word）────────────────────────────────── + try: + import win32com.client + import pythoncom + pythoncom.CoInitialize() + word = None + try: + word = win32com.client.Dispatch('Word.Application') + word.Visible = False + doc = word.Documents.Open(abs_path, ReadOnly=True) + text = doc.Range().Text + doc.Close(False) + logger.info(f'.doc 通过 win32com 提取成功: {file_path}') + return text or '' + finally: + if word: + try: + word.Quit() + except Exception: + pass + pythoncom.CoUninitialize() + except ImportError: + logger.info('pywin32 未安装，跳过 win32com 方案') + except Exception as e: + logger.warning(f'win32com 提取 .doc 失败: {e}') + + # ── 方案2：LibreOffice 命令行 ───────────────────────────────────────── + try: + import subprocess + import tempfile + tmp_dir = tempfile.mkdtemp() + for soffice_cmd in ('soffice', 'libreoffice'): + try: + result = subprocess.run( + [soffice_cmd, '--headless', '--convert-to', 'txt:Text', + '--outdir', tmp_dir, abs_path], + capture_output=True, text=True, timeout=60, + ) + if result.returncode == 0: + txt_file = os.path.join(tmp_dir, Path(file_path).stem + '.txt') + if os.path.exists(txt_file): + with open(txt_file, 'r', encoding='utf-8', errors='ignore') as f: + content = f.read() + logger.info(f'.doc 通过 LibreOffice 提取成功: {file_path}') + return content + except FileNotFoundError: + continue + except subprocess.TimeoutExpired: + logger.warning('LibreOffice 转换超时') + break + except Exception as e: + logger.warning(f'LibreOffice 提取 .doc 失败: {e}') + + # ── 方案3：python-docx 兼容尝试（部分另存的 .doc 实为 XML 格式）────── + try: + result = _extract_docx(file_path) + if result.strip(): + logger.info(f'.doc 通过 python-docx 兼容读取成功: {file_path}') + return result + except Exception as e: + logger.warning(f'python-docx 兼容读取 .doc 失败: {e}') + + raise RuntimeError( + '无法读取 .doc 格式文件。请在 Word 中打开该文件，' + '选择「另存为」→「Word 文档 (.docx)」后重新上传。' + ) + + +def truncate_text(text: str, max_chars: int = 60000) -> str: + """截断超长文本，避免超出 AI Token 限制""" + if len(text) <= max_chars: + return text + return text[:max_chars] + '\n\n...[文档内容已截断，仅展示前段]' + + +def split_text_chunks(text: str, chunk_size: int = 2000, overlap: int = 200) -> list[str]: + """将文本按固定大小分块（用于知识库）""" + chunks = [] + start = 0 + while start < len(text): + end = min(start + chunk_size, len(text)) + chunks.append(text[start:end]) + start += chunk_size - overlap + return chunks + + +def allowed_file(filename: str) -> bool: + allowed = {'pdf', 'doc', 'docx'} + return '.' in filename and filename.rsplit('.', 1)[1].lower() in allowed + + +def safe_filename(filename: str) -> str: + """生成安全的文件名""" + import re + name = re.sub(r'[^\w\u4e00-\u9fff.\-]', '_', filename) + return name diff --git a/utils/outline_numbering.py b/utils/outline_numbering.py new file mode 100644 index 0000000..addf0ac --- /dev/null +++ b/utils/outline_numbering.py @@ -0,0 +1,52 @@ +""" +标书目录号展示：一级为汉字+顿号，子级为数字多级编号（与 AI 大纲示例一致）。 +""" +from __future__ import annotations + + +def int_to_chinese_numeral(n: int) -> str: + """将正整数转为中文数字（一、二、…、十、十一、…、九十九、一百）。""" + if n <= 0: + return str(n) + digits = "零一二三四五六七八九" + if n < 10: + return digits[n] + if n == 10: + return "十" + if n < 20: + return "十" + (digits[n % 10] if n % 10 else "") + if n < 100: + t, o = divmod(n, 10) + s = digits[t] + "十" + if o: + s += digits[o] + return s + if n < 1000: + h, r = divmod(n, 100) + s = digits[h] + "百" + if r == 0: + return s + if r < 10: + return s + "零" + digits[r] + return s + int_to_chinese_numeral(r) + # 极少需要百级以上章，保守处理 + return str(n) + + +def format_heading_display(level: int, section_number: str, title: str) -> str: + """ + 生成带目录号的章节展示行（用于大纲文本、Word 标题、目录页）。 + - 一级：汉字、顿号 + 标题，如「一、总体方案」 + - 二级及以下：「1.1 子标题」 + """ + title = (title or "").strip() + sn = (section_number or "").strip() + lv = int(level) if level else 1 + if lv <= 1: + main = sn.split(".")[0] + try: + idx = int(main) + except ValueError: + idx = 1 + return f"{int_to_chinese_numeral(idx)}、{title}" + return f"{sn} {title}".strip() diff --git a/utils/prompts.py b/utils/prompts.py new file mode 100644 index 0000000..8643956 --- /dev/null +++ b/utils/prompts.py @@ -0,0 +1,1058 @@ +""" +所有提示词模板（已内嵌，打包后不暴露明文文件） +""" +from typing import Optional + +import config +from utils import volume_chapters as _vc + +# ── 内嵌提示词常量 ───────────────────────────────────────────────────────── + +PROJECT_SUMMARY = """\ +- 角色：招标文件编写专家，精通招标文件结构化、摘要编写 + +- 任务：根据用户提供的项目招标文件内容，生成一份专业、清晰的结构化摘要 + +- 要求： + + 一、摘要框架 + 1. 项目概况 + - 项目名称 + - 建设地点 + - 工程性质（新建/改建/扩建） + - 核心建设内容 + - 关键工程量指标 + - 特殊施工工艺（如顶管/盾构等） + - 项目概况 + + 2. 技术要求体系 + - 专业监测要求（分项列出核心监测指标） + - 技术标准规范 + - 质量管控要点 + - 特殊工艺标准 + + 3. 交付物矩阵 + - 阶段性成果清单（含时间节点） + - 最终交付文件要求 + - 成果验收标准 + - 备案审批流程 + + 4. 商务条款摘要 + - 合同期限 + - 支付结构 + - 报价约束条件 + - 违约条款要点 + - 知识产权约定 + + 5. 资质要求矩阵 + - 企业资质门槛 + - 人员资格要求 + - 设备配置标准 + - 同类项目经验 + + 6. 评标要素体系 + - 技术评分维度 + - 商务评分权重 + - 否决性条款 + - 实质性条款 + - 围标识别机制 + + + 二、处理规范 + 1. 信息抽取规则： + - 采用三级信息提炼法（关键数据→技术参数→约束条件） + - 识别并标注法定强制性条款（★号条款） + - 提取特殊工艺参数（例如顶管直径、沉井尺寸等） + + 2. 结构化呈现要求： + - 使用Markdown分级标题系统 + - 技术参数格式化处理 + - 流程节点采用时间轴呈现 + - 关键数据突出显示（例如预算金额、最高限价） + + 3. 专业术语处理： + - 保持行业术语准确性 + - 工程计量单位标准化转换 + - 法律条款原文引述 + + 三、输出示例 + 1.确保包含但不仅限于： + - 项目背景的技术参数分解 + - 监测要求的分类归纳 + - 成果交付的阶段性要求 + - 商务条款的要点提炼 + + 四、质量保障 + 1. 完整性核查清单： + - 验证五证要求（资质/业绩/人员/设备/资金） + - 检查三大核心条款（技术/商务/法律） + - 确认关键日期节点（工期/交付期/质保期） + + 2. 风险提示机制： + - 标注异常约束条款 + - 识别排他性要求 + - 提示潜在履约风险点 + +请严格按照上述结构化框架处理输入的招标文件，生成专业、准确、易读的项目摘要报告。 +输出内容需符合工程领域专业规范，重点数据需二次核验确保准确性。 +严格按照招标文件的内容，确保输出内容的完整性。 +直接给出摘要，禁止说明和引导词。 + +- 用户提供的招标文件内容如下： + {bid_document} +""" + +RATING_REQUIREMENTS = """\ +- 角色：招标文件信息提取专家，精通技术评分/技术评审要求的提取 + +- 任务：请严格按照以下步骤分析提供的招标文件内容，**仅提取技术评分标准**，完整输出所有技术评分细则： + +- 重要限制（必须遵守）： + ★ 只提取"技术评分"/"技术评审"部分，禁止提取商务评分、价格评分、资质评分、报价等非技术内容 + ★ 若招标文件包含商务/价格评分，直接忽略，不得出现在输出中 + +- 步骤与要求： + + 1. **结构解析** + - 识别文件整体结构，定位"技术评分"/"技术评审要求"章节 + - 标注技术评分的总权重占比（如出现，如"技术分占60%"） + - 跳过并忽略商务评分、价格评分、资质评审等非技术评分章节 + + 2. **技术评分要素提取** + 对"技术评分"板块进行完整深度解析： + - 提取全部技术评分细项，不能省略任何子项 + - 明确列出量化指标（如"ISO认证+3分"、"项目经验每年加1分"） + - 区分强制性条款（必须满足项/否决项）与竞争性条款（择优评分项） + - 标注特殊技术要求（技术方案、实施能力、技术创新、服务响应等） + - 标注每个评分项的分值/权重 + + 3. **异常识别** + - 标出技术评分中表述模糊的评分项（如"酌情加分""优/良/差等级"） + - 识别可能存在的矛盾条款 + - 提示隐藏的技术得分点 + + 4. **结果呈现** + 参考以下示例输出markdown结构化格式： + + # 技术评分细则（技术分共XX分） + + ## 一、技术方案（XX分） + ### 1.1 方案设计（XX分） + → 要求：…… + → 评分标准：…… + + ## 二、实施能力（XX分） + （继续展开...） + +请严格按照上述结构化框架处理输入的招标文件，生成专业、准确的项目技术评分要求。 +严格按照招标文件的内容，确保输出内容的完整性，禁止虚构或补充文件未提及的内容。 +直接输出技术评分要求，禁止说明和引导词。 + +- 招标文件内容如下： + {bid_document} +""" + +RATING_JSON = """\ +- 任务：从工程项目招标文件中提取技术评分要求，并以严格的JSON格式输出。 + +- 要求： + 必须生成完整有效的JSON对象，不使用JSON之外的文本说明 + 数值类型字段不添加单位符号 + 包含所有的评分项及其权重分配 + 特殊说明字段仅在存在否决条款（强制性条款）时出现 + +- 输出结构（必须严格遵守根字段名与数组名，便于后续章节字数与要点映射）： + { + "items": [ + { + "id": "唯一短标识，如 T01", + "name": "评分项名称（与招标文件表述一致或精简概括）", + "weight": 数值型权重或分值（如 10 表示 10 分或 10%）, + "keywords": ["与本项相关的可选关键词1", "关键词2"] + } + ], + "notes": "可选：否决条款、阶梯得分等特殊说明；无则写空字符串" + } + +- 技术评分要求内容如下： + {tech_rating}\ +""" + +OUTLINES = """\ +- 角色：技术标书架构师 +- 任务：生成适配技术评分标准的技术标书目录 +- 输出要求： + 采用四级嵌套编码体系（X.X.X.X）下实现按需分层 + 直接给出生成的目录，禁止解释和引导词 + 编号硬性规则：一级标题必须为「中文数字+顿号+标题」（例：一、总体方案），二级及以下必须为「阿拉伯数字多级编号+半角空格+标题」（例：1.1 子节标题）；每个目录行必须带完整目录号，禁止输出无编号的纯标题行。 + +- 约束控制： + 根据项目生成标书的名称，如"XXXX项目技术标书" + {chapter_count_hint} + 章节颗粒度与评分指标权重正相关 + 技术实施类章节必须达到四级深度，管理保障类章节允许三级结构 + 同级节点数量必须有波动区间：技术方案类(4-7)、实施保障类(2-4)、创新应用类(1-3) + 目录的章节不能缺少包含以下关键词的内容： + - 对本项目的了解和分析 + - 项目工作重难点分析 + - 项目实施方案 + - 服务进度保障措施 + - 服务质量保障方案 + - 合理化建议 + - 服务承诺及处罚措施 + 目录不包含成本和预算内容，但要平衡项目预算、技术可行性以及技术的专业度 + +- 示例输出： + + 花岭新城BIM项目技术标书 + 一、总体实施方案 + 　1.1 项目理解与需求分析 + 　　1.1.1 项目概述 + 　　　　1.1.1.1 建设地点及规模 + 　　　　1.1.1.2 工程地质勘察报告 + 　　　　1.1.1.3 抗震设防烈度与防火等级 + 　　　　1.1.1.4 建筑结构形式与建筑面积分布 + 　　1.1.2 项目背景 + 　　　　1.1.2.1 核心宗旨与目标 + 　　　　1.1.2.2 地理位置与项目规模 + 　　1.1.3 项目目标 + 　　　　1.1.3.1 就业机会与基础设施提升 + 　　　　1.1.3.2 乡村振兴与经济增长 + 　　1.1.4 项目特点 + 　　　　1.1.4.1 框筒结构抗震性能 + 　　　　1.1.4.2 分阶段工程地质勘察 + 　　　　1.1.4.3 功能区域多样化 + + 二、建筑设计 + 　2.1 主要设计依据 + 　　　　2.1.1 国家标准与规范 + 　　　　2.1.2 行业标准与图集 + 　2.2 建筑结构设计 + 　　　　2.2.1 结构形式 + 　　　　2.2.2 结构材料 + 　　　　2.2.3 结构布局 + 　　　　2.2.4 结构经济指标 + 　　　　2.2.5 结构细节设计 + 　2.3 建筑功能布局 + 　　　　2.3.1 C1#楼（厂房） + 　　　　　　2.3.1.1 功能分区明确 + 　　　　　　2.3.1.2 流线优化与安全性 + 　　　　2.3.2 配电房 + 　　　　　　2.3.2.1 设计目标与设备布置 + 　　　　　　2.3.2.2 空间规划与电气主接线方案 + 　　　　2.3.3 外廊及架空建筑 + 　　　　　　2.3.3.1 功能区域与景观设计 + 　　　　　　2.3.3.2 光照与通风优化 + 　2.4 建筑材料选用 + 　2.5 建筑外观设计 + 　2.6 建筑室内布局 + 　　　　2.6.1 功能分区与设计要点 + 　2.7 建筑安全和消防设计 + 　　　　2.7.1 建筑安全体系 + 　　　　2.7.2 消防系统设计 + 　2.8 建筑节能设计 + 　　　　2.8.1 节能措施与绿色建材 + 　　　　2.8.2 雨水收集系统 + + 三、结构设计 + 　3.1 结构形式 + 　3.2 结构材料 + 　　　　3.2.1 混凝土与钢材选用 + 　3.3 结构布局 + 　　　　3.3.1 结构柱网与通风疏散通道 + 　3.4 结构经济指标 + 　　　　3.4.1 抗震设计要求与用材控制 + 　3.5 结构细节设计 + 　　　　3.5.1 基础设计与钢结构细节 + 　　　　3.5.2 混凝土结构与抗震设计 + 　3.6 结构分析与计算 + + 四、给排水设计 + 　4.1 引言 + 　4.2 供水系统设计 + 　　　　4.2.1 供水管道与消防水源 + 　　　　4.2.2 节水设计与雨水收集 + 　4.3 排水系统设计 + 　　　　4.3.1 排水管道与雨水管理 + 　　　　4.3.2 污水处理与分流制度 + 　4.4 给排水设备选择 + 　4.5 细节设计 + 　4.6 监测与维护 + + 五、暖通设计 + 　5.1 引言 + 　5.2 供暖系统设计 + 　　　　5.2.1 供暖方式与设备选择 + 　　　　5.2.2 温度控制系统 + 　5.3 通风系统设计 + 　　　　5.3.1 通风方式与设备选择 + 　　　　5.3.2 空气质量控制 + 　5.4 空调系统设计 + 　　　　5.4.1 空调方式与设备选择 + 　　　　5.4.2 温湿度控制系统 + 　5.5 热水系统设计 + 　5.6 细节设计与监测维护 + + + 六、BIM设计 + 　6.1 项目总图与单体建筑设计 + 　6.2 道路与排水设计 + 　6.3 电气系统设计 + 　6.4 绿化设计 + 　6.5 BIM协同设计与施工管理 + 　6.6 数据管理与培训支持 + + 七、设计说明 + 　7.1 项目设计依据 + 　7.2 设计原则 + 　7.3 结构经济合理化 + 　7.4 建筑功能分区 + 　7.5 设计细节要求 + + 八、合理化建议 + 　8.1 建筑专业合理化建议 + 　8.2 结构专业合理化建议 + 　8.3 给排水专业合理化建议 + 　8.4 暖通专业合理化建议 + 　8.5 BIM专业合理化建议 + 8.6 技术和工艺方面的建议 + 8.7 成本和预算方面的建议 + 8.8 时间和进度方面的建议 + 8.9 施工质量管理方面的建议 + 8.10 质量和安全方面的建议 + 8.11 环境和可持续性方面的建议 + + 九、施工进度安排 + 　9.1 施工进度安排 + 　9.2 施工进度跟踪与管理 + 　9.3 施工质量管理 + 　9.4 施工现场管理 + 　9.5 施工结项与验收 + + 十、本项目工作重点难点分析 + 　10.1 工程特点与设计工作难点 + 　10.2 重点与难点分析 + 　10.3 综合解决措施 + + +- 招标文件内容： +{document_text}\ +""" + +OUTLINES_WITH_RATING = """\ +- 角色：技术标书架构师 +- 任务：生成适配技术评分标准的技术标书目录 +- 输出要求： + 采用四级嵌套编码体系（X.X.X.X）下实现按需分层 + 直接给出生成的目录，禁止解释和引导词 + 编号硬性规则：一级标题必须为「中文数字+顿号+标题」，二级及以下必须为「阿拉伯数字多级编号+半角空格+标题」；每个目录行必须带完整目录号，禁止无编号的纯标题行。 + +- 约束控制： + 根据项目生成标书的名称，如"XXXX项目技术标书" + {chapter_count_hint} + 目录的章节必须按照技术评分标准的项目生成，题目应包括技术评分项目中的关键词： + 章节颗粒度与评分指标权重正相关 + 技术方案类章节必须达到四级深度，管理保障类章节允许三级结构 + 同级节点数量必须有波动区间：技术方案类(4-7)、实施保障类(2-4)、创新应用类(1-3) + 目录禁止包含报价、团队、资质、文件等商务性质的章节 + +- 示例输出： + + 花岭新城BIM项目技术标书 + 一、总体实施方案 + 　1.1 项目理解与需求分析 + 　　1.1.1 项目概述 + 　　　　1.1.1.1 建设地点及规模 + 　　　　1.1.1.2 工程地质勘察报告 + 　　　　1.1.1.3 抗震设防烈度与防火等级 + 　　　　1.1.1.4 建筑结构形式与建筑面积分布 + 　　1.1.2 项目背景 + 　　　　1.1.2.1 核心宗旨与目标 + 　　　　1.1.2.2 地理位置与项目规模 + 　　1.1.3 项目目标 + 　　　　1.1.3.1 就业机会与基础设施提升 + 　　　　1.1.3.2 乡村振兴与经济增长 + 　　1.1.4 项目特点 + 　　　　1.1.4.1 框筒结构抗震性能 + 　　　　1.1.4.2 分阶段工程地质勘察 + 　　　　1.1.4.3 功能区域多样化 + + 二、建筑设计 + 　2.1 主要设计依据 + 　　　　2.1.1 国家标准与规范 + 　　　　2.1.2 行业标准与图集 + 　2.2 建筑结构设计 + 　　　　2.2.1 结构形式 + 　　　　2.2.2 结构材料 + 　　　　2.2.3 结构布局 + 　　　　2.2.4 结构经济指标 + 　　　　2.2.5 结构细节设计 + 　2.3 建筑功能布局 + 　　　　2.3.1 C1#楼（厂房） + 　　　　　　2.3.1.1 功能分区明确 + 　　　　　　2.3.1.2 流线优化与安全性 + 　　　　2.3.2 配电房 + 　　　　　　2.3.2.1 设计目标与设备布置 + 　　　　　　2.3.2.2 空间规划与电气主接线方案 + 　　　　2.3.3 外廊及架空建筑 + 　　　　　　2.3.3.1 功能区域与景观设计 + 　　　　　　2.3.3.2 光照与通风优化 + 　2.4 建筑材料选用 + 　2.5 建筑外观设计 + 　2.6 建筑室内布局 + 　　　　2.6.1 功能分区与设计要点 + 　2.7 建筑安全和消防设计 + 　　　　2.7.1 建筑安全体系 + 　　　　2.7.2 消防系统设计 + 　2.8 建筑节能设计 + 　　　　2.8.1 节能措施与绿色建材 + 　　　　2.8.2 雨水收集系统 + + 三、结构设计 + 　3.1 结构形式 + 　3.2 结构材料 + 　　　　3.2.1 混凝土与钢材选用 + 　3.3 结构布局 + 　　　　3.3.1 结构柱网与通风疏散通道 + 　3.4 结构经济指标 + 　　　　3.4.1 抗震设计要求与用材控制 + 　3.5 结构细节设计 + 　　　　3.5.1 基础设计与钢结构细节 + 　　　　3.5.2 混凝土结构与抗震设计 + 　3.6 结构分析与计算 + + 四、给排水设计 + 　4.1 引言 + 　4.2 供水系统设计 + 　　　　4.2.1 供水管道与消防水源 + 　　　　4.2.2 节水设计与雨水收集 + 　4.3 排水系统设计 + 　　　　4.3.1 排水管道与雨水管理 + 　　　　4.3.2 污水处理与分流制度 + 　4.4 给排水设备选择 + 　4.5 细节设计 + 　4.6 监测与维护 + + 五、暖通设计 + 　5.1 引言 + 　5.2 供暖系统设计 + 　　　　5.2.1 供暖方式与设备选择 + 　　　　5.2.2 温度控制系统 + 　5.3 通风系统设计 + 　　　　5.3.1 通风方式与设备选择 + 　　　　5.3.2 空气质量控制 + 　5.4 空调系统设计 + 　　　　5.4.1 空调方式与设备选择 + 　　　　5.4.2 温湿度控制系统 + 　5.5 热水系统设计 + 　5.6 细节设计与监测维护 + + + 六、BIM设计 + 　6.1 项目总图与单体建筑设计 + 　6.2 道路与排水设计 + 　6.3 电气系统设计 + 　6.4 绿化设计 + 　6.5 BIM协同设计与施工管理 + 　6.6 数据管理与培训支持 + + 七、设计说明 + 　7.1 项目设计依据 + 　7.2 设计原则 + 　7.3 结构经济合理化 + 　7.4 建筑功能分区 + 　7.5 设计细节要求 + + 八、合理化建议 + 　8.1 建筑专业合理化建议 + 　8.2 结构专业合理化建议 + 　8.3 给排水专业合理化建议 + 　8.4 暖通专业合理化建议 + 　8.5 BIM专业合理化建议 + 8.6 技术和工艺方面的建议 + 8.7 成本和预算方面的建议 + 8.8 时间和进度方面的建议 + 8.9 施工质量管理方面的建议 + 8.10 质量和安全方面的建议 + 8.11 环境和可持续性方面的建议 + + 九、施工进度安排 + 　9.1 施工进度安排 + 　9.2 施工进度跟踪与管理 + 　9.3 施工质量管理 + 　9.4 施工现场管理 + 　9.5 施工结项与验收 + + 十、本项目工作重点难点分析 + 　10.1 工程特点与设计工作难点 + 　10.2 重点与难点分析 + 　10.3 综合解决措施 + + +- 招标文件摘要： +{summary} + +- 技术评分标准： +{rating}\ +""" + +CHAPTER_OUTLINE = """\ +- 角色：技术标书架构师 + +- 能力： + - 单章节深度解构能力 + - 跨章节协同规划视野 + - 评分权重动态分配策略 + +- 任务：根据招标文件概要、章节主题、评分要求，生成结构化的技术标书该章节的目录 + +- 输出要求： + - 采用四级嵌套编码体系（X.X.X.X）确保章节颗粒度可控 + - 只输出子章节，不输出主章节标题，不要解释和引导词 + - 编号必须从 X.1 开始递增，禁止出现 X.0、X.0.1、01 等编号 + - 允许纯文本输出，不使用 markdown 代码块 +{sub_chapter_count_hint} + +- 示例输出，以"服务进度保障措施"为例： + 二、智慧物流系统全生命周期进度保障 + 　2.1 基于BIM的进度协同管理平台 + 　　2.1.1 多级进度计划耦合模型 + 　　　2.1.1.1 WBS-Milestone映射矩阵 + 　　　2.1.1.2 Primavera P6进度基线 + 　　2.1.2 资源约束进度优化算法 + 　　　2.1.2.1 基于CPM的缓冲区间动态分配 + 　　　2.1.2.2 资源平滑度R=0.92 + +- 招标文件概要： + {summary} + +- 章节主题： + {chapter} + +- 评分要求： + {score}\ +""" + +SECTION_DETAILS = """\ +【字数硬性要求】 +{word_count_spec} +注意：字数须由实质性方案内容支撑，禁止用重复背景、空洞承诺或复述招标要求来凑字数。 + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +角色：资深工程投标技术方案撰写专家 +任务：以执行方视角，针对本章节标题所对应的工作内容，撰写具体可操作的技术方案正文。 + +【写作铁律】 +▌写方案，不写回应——开门见山描述具体做法，把招标参数直接融入方案 + × 禁止："根据招标文件要求，我方将……""针对贵方提出的XXX要求，我方承诺……" +▌不重申已知信息（最常见废稿场景） + 禁止在正文中出现项目名称/建设单位/建设地点/合同工期等基本信息； + 禁止将工程量数字（"X条渠道""X公里""X座""X台""X万平方米"等）引入各章节开头 + 作为背景铺垫——这类数字只在"项目概况"章节出现一次，其他章节直接展开专业内容 +▌不复述招标参数——技术规格、工程量、服务数量均已知，直接体现在方案中 +▌不虚构优越参数——招标文件规定的参数/数量/规格是上限基准，不得无依据地写成"优于要求" + × 禁止：招标要求10台，方案里写"我方投入15台以确保万无一失"（无根据拔高） + × 禁止：招标要求C30混凝土，方案里写"我方采用C35以体现高标准"（无依据提升规格） + √ 正确：按招标要求的数量/规格如实描述，竞争力体现在工艺方法和管理措施上 +▌不虚构优越参数——招标文件规定的参数/数量/规格如实描述，不得无依据拔高 + × 禁止：招标要求10台 → 方案写"我方投入15台"（无根据） + × 禁止：招标要求C30混凝土 → 方案写"我方采用C35体现高标准"（无依据） + √ 如需体现竞争力，在工艺方法、管理制度、响应速度等维度展开，不在规格数量上自行拔高 +▌不用套话——禁用：高度重视、全力以赴、竭诚服务、确保圆满完成、综上所述、通过以上措施 +▌格式——纯文本，段落空行分隔，列举用(1)(2)(3)，不用markdown符号 + +【参考背景（仅供理解语境，禁止复述到正文中）】 +- 项目概要： +{summary} + +- 标书目录： +{outline} + +【本次撰写的章节标题】 +{subsection_title} + +直接输出正文，不含标题行，不含任何说明语。\ +""" + +SCORING_RULES = """\ +"你是一名专业的招标文件分析师，请按照以下步骤处理用户提供的项目招标文件内容： + +1. **结构识别** +- 仔细解析文件结构，定位'评分标准'、'评审办法'、'投标人须知'等关键章节 +- 特别注意包含'分值'、'评分项'、'权重'等关键词的段落 + +2. **核心要素提取** +- 系统提取以下要素形成结构化表格： + │ 类别 │ 评分项名称 │ 分值权重 │ 具体要求 │ 否决条款 │ +- 分类标准： + ● 技术部分（方案设计、实施能力、技术创新等） + ● 商务部分（资质证明、业绩案例、团队经验等） + ● 价格部分（报价合理性、计价方式等） + ● 其他专项（售后服务、本地化服务等） + +3. **深度分析** +- 计算权重配比（示例：技术60% = 方案设计30% + 实施能力20% + 创新10%） +- 识别否决性条款（如"▲"标记项或特定强制要求） +- 标注特殊评分规则：阶梯得分、区间赋分、横向比较等机制 + +4. **风险提示** +- 标出易被忽视的得分点（如ISO认证、专利数量等） +- 识别矛盾条款（如总分值≠100%的情况） +- 提示资质门槛要求（注册资金、特定资质证书等） + +5. **输出格式** +采用Markdown输出以下结构： +\`\`\`markdown +# 招标评分要点汇总 + +## 核心指标配比 +- 总评分构成：技术分（__%）+ 商务分（__%）+ 价格分（__%） + +## 详细评分矩阵 +| 类别 | 评分项 | 分值 | 具体要求 | 关键指标 | +|------|-------|-----|---------|---------| +| ... | ... | ... | ... | ... | + +## 重点提示 +⚠️ 否决条款：列出所有一票否决项 +💡 得分要点：突出3-5个高权重核心指标 +⏱️ 时间节点：标注与评分相关的时限要求 +\`\`\` +请先确认理解任务要求，待用户提供招标文件内容后执行分析。"\ +""" + +# ── 来自 section_detail.py 的提示词 ──────────────────────────────────────── + +GEN_LEAF_DETAIL_PROMT = """\ +【字数硬性要求】 +{word_count_spec} +注意：字数须由实质性方案内容支撑，禁止用重复背景、空洞承诺或复述招标要求来凑字数。 + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +角色：资深工程投标技术方案撰写专家 +任务：以执行方视角，针对本章节标题所对应的工作内容，撰写具体可操作的技术方案正文。 + +【写作方式——铁律，违反即视为废稿】 + +▌写方案，不写回应 +× 错误："根据招标文件要求，我方将……" +× 错误："针对贵方提出的XXX要求，我方承诺……" +× 错误："招标文件明确规定了……对此，我方将……" +√ 正确：开门见山写具体做法，把招标参数直接融入方案中 + +▌不重申已知信息（最常见废稿场景） +× 禁止：在正文中出现项目名称、建设单位、建设地点、合同工期等基本信息 +× 禁止：将招标文件中的具体工程量数字（如"X条渠道""X公里""X座建筑物""X台设备" + "X万平方米"等）引入到本章节开头作为背景铺垫——这类数字只能在"项目概况/背景" + 章节里出现一次，质量管理、安全措施、进度计划、技术方案等专业章节一律直接展开 +× 禁止：重复其他章节已经出现过的项目背景介绍段落 + +▌不虚构优越参数 +× 禁止：招标要求10台 → 写成"我方投入15台以确保万无一失"（无依据拔高数量） +× 禁止：招标要求C30混凝土 → 写成"我方采用C35体现高标准"（无依据提升规格） +× 禁止：招标方规定了参数/工程量 → 写成"我方承诺优于招标要求"（空洞吹捧） +√ 如需体现竞争力，在工艺方法、管理精细度、响应时效等维度展开，不在规格数量上无依据拔高 + +▌不用空话套话 +× 禁用："高度重视""全力以赴""竭诚服务""确保圆满完成""我方将严格按照" +× 禁用："综上所述""首先其次再次""通过以上措施" +× 禁用：以"……"或"等"结尾的列举 + +▌能概括的简洁写，有细节的展开写 +- 原则性的管理制度可一段简洁描述 +- 操作步骤、技术参数、人员配置、时间节点等有实质内容的须逐条详细展开 +- 每项措施给出具体方法或量化指标，不写"我方将采取有效措施确保"类句子 + +▌格式 +- 纯文本，段落间空行分隔 +- 列举用(1)(2)(3)，不用markdown符号，不用"首先其次" + +【参考背景（仅供理解项目语境，禁止复述到正文中）】 +- 项目概要： +{summary} + +- 标书目录（用于理解本章节在全书中的定位）： +{outline} + +【本次撰写的章节标题】 +{title} + +开始撰写，直接输出正文，不含标题行，不含任何说明语。\ +""" + +GEN_SECTION_INTRODUCTION_PROMT = """\ +- 角色：资深投标文件撰写专家 +- 任务：为章节撰写简短开篇引言（100～200字），直接点明本章的核心做法或服务重点 +- 使用"我方"自称，禁止套话，禁止复述招标要求，禁止重写项目背景，纯文本输出 +- 若本章内容不需要引言可直接输出空白 + +- 项目概要（仅供参考，禁止复述）： +{summary} + +- 技术标书目录： +{outline} + +- 章节标题： +{title}\ +""" + + +# ── 对外接口函数 ──────────────────────────────────────────────────────────── + +def get_project_summary_prompt(bid_document: str) -> str: + return PROJECT_SUMMARY.replace('{bid_document}', bid_document) + + +def get_rating_requirements_prompt(bid_document: str) -> str: + return RATING_REQUIREMENTS.replace('{bid_document}', bid_document) + + +def get_rating_json_prompt(tech_rating: str) -> str: + return RATING_JSON.replace('{tech_rating}', tech_rating) + + +def get_outlines_prompt(document_text: str) -> str: + pages = int(getattr(config, 'TARGET_PAGES', 0) or 0) + cv = getattr(config, 'CONTENT_VOLUME', 'standard') + pce = int(getattr(config, 'PAGE_CHAR_ESTIMATE', 700) or 700) + hint = _vc.outline_chapter_count_hint(pages, cv, pce) + return ( + OUTLINES.replace('{chapter_count_hint}', hint) + .replace('{document_text}', document_text) + ) + + +def get_outlines_with_rating_prompt(summary: str, rating: str) -> str: + pages = int(getattr(config, 'TARGET_PAGES', 0) or 0) + cv = getattr(config, 'CONTENT_VOLUME', 'standard') + pce = int(getattr(config, 'PAGE_CHAR_ESTIMATE', 700) or 700) + hint = _vc.outline_chapter_count_hint_with_rating_variant(pages, cv, pce) + return ( + OUTLINES_WITH_RATING.replace('{chapter_count_hint}', hint) + .replace('{summary}', summary) + .replace('{rating}', rating) + ) + + +def get_chapter_outline_prompt( + summary: str, + chapter: str, + score: str, + max_subchapters: Optional[int] = None, +) -> str: + if max_subchapters is not None and max_subchapters > 0: + sub_chapter_count_hint = ( + f' - 子章节条数：本主章节下各级编号子章节行总数应控制在约 {max_subchapters} 条以内，' + f'可依据技术内容在合理范围略作筛选，但输出条数不得超过 {max_subchapters} 条。' + ) + else: + sub_chapter_count_hint = '' + return ( + CHAPTER_OUTLINE.replace('{summary}', summary) + .replace('{chapter}', chapter) + .replace('{score}', score) + .replace('{sub_chapter_count_hint}', sub_chapter_count_hint) + ) + + +BOQ_SUMMARY = """\ +- 角色：工程量清单分析专家 + +- 任务：从以下工程量清单数据中提取关键工程信息，生成结构化摘要，供技术标书章节写作使用。 + +- 提取重点： + 1. 主要分部分项工程类别（土建、安装、装饰、市政、绿化等） + 2. 每类工程的核心工程内容与数量（保留单位和数量值） + 3. 主要材料、设备的规格和数量 + 4. 关键施工工艺或特殊要求（如有） + +- 输出格式： + - 按工程类别分段输出，每类列举3-8个代表性工程量项 + - 保留量化数据（数量+单位），例如："混凝土浇筑 C30 约 800m³" + - 突出与技术标书密切相关的工程内容 + - 不输出单价、金额、合计等商务数据 + - 总字数控制在 600-1200 字 + +- 工程量清单原始文本如下： +{boq_text} + +- 以下为本地规则解析得到的结构化清单附录（分部、编码、名称、单位、工程量）；若为空则仅依据上文原始文本： +{boq_structured} +""" + + +def get_boq_summary_prompt(boq_text: str, boq_structured: str = '') -> str: + return ( + BOQ_SUMMARY.replace('{boq_text}', boq_text) + .replace('{boq_structured}', boq_structured or '（无本地结构化附录）') + ) + + +def get_section_detail_prompt(summary: str, outline: str, title: str, + word_count_spec: str = '', + boq_summary: str = '', + tender_kind: str = 'engineering') -> str: + """章节正文提示词。按 tender_kind 选用工程/服务/货物模板（见 utils.tender_kind_sections）。""" + from utils.tender_kind_sections import build_section_detail_prompt, normalize_tender_kind + + if not word_count_spec: + word_count_spec = ( + '- 一般小节：不少于 2000 字；核心技术/重点评分章节：不少于 4000 字\n' + '- 字数须由实质方案内容支撑，禁止用重复项目背景或复述招标要求凑字数\n' + '- 有实质细节的展开写，原则性描述可简洁处理，不强求堆砌篇幅' + ) + return build_section_detail_prompt( + normalize_tender_kind(tender_kind), + summary, + outline, + title, + word_count_spec, + boq_summary, + ) + + +def get_section_intro_prompt(summary: str, outline: str, title: str) -> str: + if not GEN_SECTION_INTRODUCTION_PROMT: + return '' + return GEN_SECTION_INTRODUCTION_PROMT.replace('{summary}', summary).replace('{outline}', outline).replace('{title}', title) + + +def diagram_priority_preamble(labels: list) -> str: + """ + 本节图表生成顺序说明；labels 顺序与意图栈栈顶一致（index 0 优先级最高）。 + """ + if not labels: + return '' + body = '\n'.join(f' {i + 1}) {name}' for i, name in enumerate(labels)) + return f""" + +【本节图表生成优先级（须按序在正文中体现：先完成高优先级项，再写下一项）】 +{body} +""" + + +def get_attachment_single_chart_prompt( + summary: str, + outline_text: str, + title: str, + kind: str, +) -> str: + """ + 附件类章节：仅输出一个 [FIGURE] 或 [TABLE] 块，禁止长技术方案正文。 + kind: 'figure' | 'table' + """ + summ = (summary or '')[:2000] + outl = (outline_text or '')[:1500] + if kind == 'figure': + spec = """▌输出格式（必须遵守，不得改括号与标签名）： +[FIGURE:具体图示标题] +（用简洁文字或 ASCII 符号绘制，与标题一致） +[/FIGURE] +除上述块外不要输出技术方案段落；可在此之前写 1～2 句过渡语（可选）。""" + else: + spec = """▌输出格式（必须遵守，不得改括号与标签名）： +[TABLE:具体表格标题] +| 列名1 | 列名2 | 列名3 | +|-------|-------|-------| +| 数据行 | +[/TABLE] +除上述块外不要输出技术方案段落；可在此之前写 1～2 句过渡语（可选）。""" + return f"""【附件项】本节仅生成一个图示或表格，禁止撰写长技术标正文。 + +【章节标题】 +{title} + +【项目摘要（可摘录入图/表，勿展开论述）】 +{summ} + +【大纲上下文（节选）】 +{outl} + +【任务】 +{spec} +图示/表格须紧扣章节标题与摘要，不得使用 XXX、待定等占位；列数 4～6 为宜（表格）。""" + + +def get_attachment_chart_disabled_prompt(title: str) -> str: + """项目未开启图/表开关时，附件节的简短说明（非长文）。""" + return f"""【附件项】章节标题：{title} + +当前项目未开启「图示」或「表格」生成开关。请仅输出一两句说明：本附件内容需在项目设置中开启图示或表格后由系统自动生成；不要写技术方案正文，不要编造 [FIGURE] 或 [TABLE] 块。""" + + +def get_attachment_chapter_emphasis_hint() -> str: + """附件类叶节点在 full 模式下追加：要求展开大纲子节与量化内容，不得只写单图单表。""" + return """ + +【附件类章节（须写全篇实质内容）】 +须按大纲层级与项目摘要展开：进度计划、工程量与节点、机械设备与劳动力、布置与临时用地、合规与环保等须具体可核验；可结合工程量清单摘要。若项目已开启图示/表格，可在正文合适位置插入与本节一致的 [FIGURE] 或 [TABLE]，但不得仅用单图/单表代替正文。""" + + +# 附件 stack_charts_only / single_chart_only：专用 system，避免 BID_WRITING_SYSTEM 诱发作答长文 +ATTACHMENT_STACK_CHARTS_SYSTEM = ( + '你只输出附件所需的图示块与/或表格块；禁止撰写段落性技术说明、小节标题与项目背景复述。' + '除 [FIGURE:…]…[/FIGURE]、[TABLE:…]…[/TABLE] 块内文字与表格外，不得出现其他正文；块内须扣题、可引用摘要要点，勿套通用模板。' +) + + +def get_attachment_stack_charts_prompt( + summary: str, + outline_text: str, + title: str, + stack_labels: list, +) -> str: + """ + 附件叶节点：按意图栈顺序仅输出 FIGURE/TABLE 块；stack_labels 与 diagram_intent.stack_compact_labels 一致。 + """ + summ = (summary or '')[:1500] + outl = (outline_text or '')[:1200] + priority_block = diagram_priority_preamble(stack_labels) if stack_labels else '' + return f"""【附件项·仅输出图或表块（最高优先级）】 + +【章节标题】{title} + +【项目摘要（仅供块内摘录，勿写摘要式段落）】 +{summ} + +【大纲上下文（节选）】 +{outl} +{priority_block} +▌按上列优先级顺序，各输出对应类型的一块（先完成①再②）： +图示块： +[FIGURE:须具体反映本节标题的图示名] +（正文仅用 ASCII/简洁中文示意） +[/FIGURE] + +表格块： +[TABLE:须具体反映本节标题的表名] +| 列1 | 列2 | 列3 | +|-----|-----|-----| +| 数据 | +[/TABLE] + +▌禁止：编号小节、过渡段、复述招标要求、与块无关的任何正文。块内数据须与标题及摘要一致，禁用 XXX、待定。""" + + +def get_figure_addon() -> str: + """启用"图"模式时，追加到章节生成提示词末尾的图示生成规范（已更新强调项目特定内容）""" + return """ +【图示生成规范（必须遵守）】 +在正文适当位置根据本章节**具体内容和项目摘要**自动插入图示，图示内容**必须与所写章节标题、项目技术参数、工程量清单摘要紧密对应**，严禁套用与本章无关的通用模板或与项目情况不符的内容。 + +▌标记格式（不得修改括号和斜杠，标题须具体反映图示内容并包含章节关键词）： +[FIGURE:具体图示标题 - 结合本节标题] +图示内容（用文字、ASCII 符号绘制，必须反映项目实际情况） +[/FIGURE] + +▌四类触发场景及示例（仅为格式参考，实际内容必须基于本项目摘要和章节标题定制）： + +① 组织机构类 → 树形图 +[FIGURE:本项目质量管理组织架构图 - 结合招标质量控制要求] +项目经理 +├── 技术负责人 ──→ 专职质检员（按招标要求配置2人）、测量员（按清单配置） +├── 施工队长 ──→ 土建/安装班组（满足峰值作业强度） +└── 安全负责人 ──→ 专职安全员（持证上岗，按规范配备） +[/FIGURE] + +② 流程类 → 流程图 +[FIGURE:项目关键工序质量控制与验收流程图] +招标参数采集 ──→ 现场自检复核 ──→ 数据分析与偏差预警 + ↓ 超招标阈值 + 分级响应（黄色/红色） ──→ 暂停整改 + 记录上报 +[/FIGURE] + +▌执行要求（重点）： +- 图示标题和内容**必须结合本章节标题、项目摘要、BOQ摘要中的具体工艺、参数、工程量定制**，不要使用与项目无关的通用数据 +- 每个图示前后各须有至少一段正文说明，不得孤立出现 +- 图示中的岗位、人数、节点、参数须结合本章节正文内容和项目摘要填写，不得留有"XXX"等占位符 +- 每章节最多插入 1～2 个图示，按需插入，勿为凑数而强行添加 +- 优先使用项目特有数据（如特定设备名称、工艺步骤、监测参数、招标中提到的关键控制点等）""" + + +def get_table_addon() -> str: + """启用"表"模式时，追加到章节生成提示词末尾的表格生成规范""" + return """ + +【表格生成规范（必须遵守）】 +在正文适当位置根据本章节具体内容自动插入表格，表格数据须结合本章节实际内容填写，严禁套用与本章无关的通用模板。 + +▌标记格式（不得修改括号和斜杠，标题须具体反映表格内容）： +[TABLE:具体表格标题] +| 列名1 | 列名2 | 列名3 | +|-------|-------|-------| +| 数据1 | 数据2 | 数据3 | +[/TABLE] + +▌六类触发场景及示例： + +① 人员配置类（涉及项目管理团队、专业人员配置等） +[TABLE:本项目主要管理人员配置一览表] +| 序号 | 岗位 | 拟派人数 | 资质要求 | 主要职责 | +|------|------|---------|---------|---------| +| 1 | 项目经理 | 1 | 一级建造师，从业 10 年以上 | 全面统筹项目实施 | +| 2 | 技术负责人 | 1 | 高级工程师，从业 8 年以上 | 技术方案与质量管控 | +| 3 | 安全负责人 | 1 | 注册安全工程师，具备安全 C 证 | 安全生产管理 | +| 4 | 专职质检员 | 2 | 质检员证，从业 5 年以上 | 过程质量检验与记录 | +[/TABLE] + +② 设备投入类（涉及施工机械、监测仪器、工具设备等） +[TABLE:主要施工设备及仪器投入一览表] +| 序号 | 设备名称 | 规格型号 | 数量 | 状态 | 主要用途 | +|------|---------|---------|------|------|---------| +| 1 | 全站仪 | 徕卡 TS16 | 2 台 | 自有 | 平面及高程测量 | +| 2 | 静力水准仪 | BGK-4700 | 8 套 | 自有 | 沉降自动化监测 | +| 3 | 挖掘机 | 卡特 320D | 2 台 | 租赁 | 基坑开挖 | +[/TABLE] + +③ 劳动力计划类（涉及各工种、各阶段人数安排等） +[TABLE:劳动力配置计划表] +| 工种 | 准备阶段（人） | 施工高峰期（人） | 收尾阶段（人） | 备注 | +|------|-------------|---------------|-------------|------| +| 测量工 | 4 | 6 | 2 | 含 1 名高级测量技师 | +| 土建工 | 8 | 20 | 6 | 持证特殊工种优先 | +| 安装工 | 0 | 10 | 4 | 含持证电工、焊工 | +[/TABLE] + +④ 质量/安全检查类（涉及关键工序验收、安全巡检等） +[TABLE:关键工序质量检验项目一览表] +| 序号 | 检验项目 | 检验方法 | 检验频率 | 合格标准 | 责任人 | +|------|---------|---------|---------|---------|------| +| 1 | 基础轴线偏差 | 全站仪复测 | 每道工序 | ≤±5mm | 测量员 | +| 2 | 混凝土强度 | 试块取样 | 每 50m³ | ≥C30 | 质检员 | +[/TABLE] + +⑤ 材料供应类（涉及主要材料规格、用量计划等） +[TABLE:主要材料供应计划表] +| 序号 | 材料名称 | 规格 | 计划用量 | 供应商 | 进场时间 | +|------|---------|------|---------|------|---------| +| 1 | 商品混凝土 | C30 | 约 800m³ | 本地搅拌站 | 第 5 周 | +| 2 | 钢筋 | HRB400Φ16-25 | 约 60t | 资质合规厂商 | 第 4 周 | +[/TABLE] + +⑥ 风险/应急类（涉及风险识别、应急预案等） +[TABLE:主要施工风险及应对措施一览表] +| 风险类型 | 诱因 | 等级 | 预防措施 | 应急响应 | +|---------|------|------|---------|---------| +| 基坑坍塌 | 降雨渗水 | 高 | 坡面防护+排水沟 | 立即撤场+加固 | +| 管线破坏 | 机械误挖 | 中 | 人工开挖保护区 | 停工+抢修 | +[/TABLE] + +▌执行要求： +- 表格数据须根据本章节正文内容填写，不得使用"XXX""待定"等占位符 +- 表格列数控制在 4～6 列，行数视内容而定，不强求凑满 +- 每张表格前后各须有至少一段正文说明 +- 每章节最多插入 2～3 张表格，按需插入 +- **必须结合本章节标题、项目摘要和工程量清单摘要中的具体信息定制表格数据和标题，不要使用与项目无关的通用示例**""" + + +def get_chart_forbid_addon() -> str: + """当项目参数中关闭图表生成时，追加到提示词的严格禁止指令。""" + return """ + +【图表生成控制（重要，必须严格遵守）】 +本项目在第一页「生成偏好」参数设置中已关闭图示和表格生成开关。 +**正文中严禁出现任何 [FIGURE:...]、[/FIGURE]、[TABLE:...] 或 [/TABLE] 标记块**。 +不要生成任何图示、表格、ASCII 艺术、流程示意或视觉化内容。 +只输出纯文本技术方案正文段落。如果需要可视化内容，请提示用户在项目设置中开启图表开关。 +所有内容必须纯文本描述，不得使用任何结构化块。 +""" diff --git a/utils/qwen_image_client.py b/utils/qwen_image_client.py new file mode 100644 index 0000000..b48a393 --- /dev/null +++ b/utils/qwen_image_client.py @@ -0,0 +1,193 @@ +""" +通义千问文生图（DashScope 多模态生成 API）。 +默认模型：qwen-image-2.0-pro（与阿里云 Model Studio 文档一致）。 +文档：https://www.alibabacloud.com/help/en/model-studio/qwen-image-api +""" +from __future__ import annotations + +import json +import logging +import re +from typing import Any, Optional + +import requests + +import config + +logger = logging.getLogger(__name__) + +# 北京地域同步接口（与 QWEN_BASE_URL 常用配置同账号） +DEFAULT_MULTIMODAL_BASE = 'https://dashscope.aliyuncs.com/api/v1' +MULTIMODAL_GENERATION_PATH = '/services/aigc/multimodal-generation/generation' + +_NEG_DEFAULT = ( + '低分辨率, 模糊, 畸形手指, 过度饱和, 蜡像感, 杂乱构图, 扭曲文字, ' + '公司商标, 企业LOGO, 投标人名称, 水印, 证件照' +) + +_PLACEHOLDER_KEY_RE = re.compile(r'^sk-your|^sk-xxxx', re.I) + + +def _effective_multimodal_base() -> str: + raw = (getattr(config, 'QWEN_MULTIMODAL_BASE', '') or '').strip().rstrip('/') + if raw: + return raw + return DEFAULT_MULTIMODAL_BASE + + +def _generation_url() -> str: + return f'{_effective_multimodal_base()}{MULTIMODAL_GENERATION_PATH}' + + +def _qwen_api_key() -> str: + key = (getattr(config, 'QWEN_API_KEY', '') or '').strip() + if not key or _PLACEHOLDER_KEY_RE.match(key): + return '' + return key + + +def _extract_image_url(payload: dict[str, Any]) -> Optional[str]: + try: + out = payload.get('output') or {} + choices = out.get('choices') or [] + if not choices: + return None + msg = (choices[0] or {}).get('message') or {} + content = msg.get('content') + if isinstance(content, list): + for part in content: + if isinstance(part, dict): + url = part.get('image') + if isinstance(url, str) and url.startswith('http'): + return url + if isinstance(content, dict): + url = content.get('image') + if isinstance(url, str) and url.startswith('http'): + return url + except Exception: + return None + return None + + +def _truncate_prompt(text: str, max_chars: int = 780) -> str: + t = (text or '').strip().replace('\r\n', '\n').replace('\r', '\n') + if len(t) <= max_chars: + return t + return t[: max_chars - 3] + '...' + + +def build_attachment_figure_prompt(title: str, body: str) -> str: + """投标附件用图：专业工程示意风格，避免标识与暗标敏感信息。""" + core = _truncate_prompt(f'{title}\n{body}', 720) + head = ( + '建筑施工投标技术标附件插图：清晰线稿或浅色块示意，横版；可含简短中文标注。' + '禁止公司名、商标、LOGO、投标人信息。' + ) + sched_keys = ('进度', '网络图', '横道', '开工', '完工', '工期', '里程碑') + site_keys = ('总平面', '平面布置', '布置图') + t = f'{title}\n{body}' + extra = '' + if any(k in t for k in sched_keys): + extra += ( + '【进度图】仅表现文字中已给出的开工完工逻辑、清单节点与工期关系；' + '不得添加正文中未出现的日历日、工序名称或里程碑；无具体日期时不写具体年月日数字。' + ) + if any(k in t for k in site_keys): + extra += ( + '【总平面图】临时设施分区与面积须与文字中的临时用地表一致；' + '施工机械仅绘制文字明确列出的机型，禁止无关挖掘机、塔吊等；无机械列表时只画分区块不写机械剪影。' + ) + extra = _truncate_prompt(extra, 200) if extra else '' + parts = [head] + if extra: + parts.append(extra) + parts.append('内容要点：\n' + core) + return '\n'.join(parts) + + +def generate_qwen_image_png( + prompt: str, + *, + size: Optional[str] = None, + timeout: Optional[int] = None, +) -> tuple[Optional[bytes], Optional[str]]: + """ + 调用 qwen-image-2.0-pro 同步文生图，返回 PNG 字节。 + 失败时 (None, error_message)。 + """ + api_key = _qwen_api_key() + if not api_key: + return None, '未配置有效的通义千问 API Key（文生图与文本模型共用 DashScope Key）' + + model = getattr(config, 'QWEN_IMAGE_MODEL', 'qwen-image-2.0-pro') or 'qwen-image-2.0-pro' + sz = (size or getattr(config, 'QWEN_IMAGE_SIZE', '1536*1024') or '1536*1024').strip() + prompt_clean = _truncate_prompt(prompt, 800) + url = _generation_url() + + body = { + 'model': model, + 'input': { + 'messages': [ + { + 'role': 'user', + 'content': [{'text': prompt_clean}], + } + ] + }, + 'parameters': { + 'negative_prompt': getattr(config, 'QWEN_IMAGE_NEGATIVE_PROMPT', _NEG_DEFAULT), + 'prompt_extend': bool(getattr(config, 'QWEN_IMAGE_PROMPT_EXTEND', True)), + 'watermark': bool(getattr(config, 'QWEN_IMAGE_WATERMARK', False)), + 'size': sz, + }, + } + + to = timeout if timeout is not None else max(120, int(getattr(config, 'REQUEST_TIMEOUT', 180) or 180)) + try: + resp = requests.post( + url, + headers={ + 'Authorization': f'Bearer {api_key}', + 'Content-Type': 'application/json', + }, + data=json.dumps(body, ensure_ascii=False).encode('utf-8'), + timeout=to, + ) + except requests.RequestException as e: + logger.warning('qwen-image 请求异常: %s', e) + return None, str(e) + + try: + data = resp.json() + except Exception: + return None, f'文生图接口返回非 JSON，HTTP {resp.status_code}' + + if resp.status_code != 200: + msg = data.get('message') or data.get('code') or resp.text[:500] + logger.warning('qwen-image HTTP %s: %s', resp.status_code, msg) + return None, f'文生图失败 HTTP {resp.status_code}: {msg}' + + code = data.get('code') + if code and str(code).upper() not in ('OK', 'SUCCESS', '200', ''): + msg = data.get('message') or str(code) + logger.warning('qwen-image 业务错误: %s', msg) + return None, f'文生图接口错误: {msg}' + + img_url = _extract_image_url(data) + if not img_url: + logger.warning('qwen-image 响应无图片 URL: %s', str(data)[:800]) + return None, '文生图响应中未找到图片 URL' + + try: + ir = requests.get(img_url, timeout=min(to, 120)) + ir.raise_for_status() + return ir.content, None + except requests.RequestException as e: + logger.warning('下载生成图片失败: %s', e) + return None, f'下载图片失败: {e}' + + +def generate_attachment_figure_png(title: str, content: str) -> tuple[Optional[bytes], Optional[str]]: + """附件 [FIGURE] 专用：拼装提示词并生成 PNG。""" + prompt = build_attachment_figure_prompt(title or '附图', content or '') + return generate_qwen_image_png(prompt) diff --git a/utils/settings.py b/utils/settings.py new file mode 100644 index 0000000..b46d4f0 --- /dev/null +++ b/utils/settings.py @@ -0,0 +1,150 @@ +""" +配置持久化：将用户在界面中设置的 API Key 等配置保存到 data/settings.json， +服务重启后自动恢复，不再每次重启都丢失 Key。 +""" +import json +import os +import logging + +logger = logging.getLogger(__name__) + +_SETTINGS_PATH: str = '' # 由 app.py 初始化时注入 + + +def init(settings_path: str): + global _SETTINGS_PATH + _SETTINGS_PATH = settings_path + + +def load(cfg) -> None: + """从 settings.json 加载配置，覆盖 config 模块中的默认值""" + if not _SETTINGS_PATH or not os.path.exists(_SETTINGS_PATH): + _apply_env_overrides(cfg) + return + try: + with open(_SETTINGS_PATH, 'r', encoding='utf-8') as f: + data = json.load(f) + + _apply(cfg, data) + _apply_env_overrides(cfg) + logger.info(f'已从 {_SETTINGS_PATH} 恢复配置，当前 provider={cfg.MODEL_PROVIDER}') + except Exception as e: + logger.warning(f'加载配置文件失败: {e}') + _apply_env_overrides(cfg) + + +_ENV_API_KEYS = ( + ('QWEN_API_KEY', 'QWEN_API_KEY'), + ('OPENAI_API_KEY', 'OPENAI_API_KEY'), + ('DEEPSEEK_API_KEY', 'DEEPSEEK_API_KEY'), + ('DOUBAO_API_KEY', 'DOUBAO_API_KEY'), + ('KIMI_API_KEY', 'KIMI_API_KEY'), +) + + +def _apply_env_overrides(cfg) -> None: + """环境变量中的 API Key 优先于 settings.json（便于 Docker / 本机 .env 注入）。""" + mp = os.environ.get('MODEL_PROVIDER') + if mp and isinstance(mp, str) and mp.strip(): + cfg.MODEL_PROVIDER = mp.strip() + for env_name, attr in _ENV_API_KEYS: + val = os.environ.get(env_name) + if val and isinstance(val, str) and not val.startswith('sk-your'): + setattr(cfg, attr, val.strip()) + + +def save(cfg) -> None: + """将当前 config 模块的关键配置写入 settings.json""" + if not _SETTINGS_PATH: + return + data = { + 'model_provider': cfg.MODEL_PROVIDER, + 'qwen_api_key': cfg.QWEN_API_KEY, + 'qwen_model': cfg.QWEN_MODEL, + 'qwen_base_url': cfg.QWEN_BASE_URL, + 'openai_api_key': cfg.OPENAI_API_KEY, + 'openai_model': cfg.OPENAI_MODEL, + 'openai_base_url': cfg.OPENAI_BASE_URL, + 'deepseek_api_key': cfg.DEEPSEEK_API_KEY, + 'deepseek_model': cfg.DEEPSEEK_MODEL, + 'deepseek_base_url': cfg.DEEPSEEK_BASE_URL, + 'ollama_base_url': cfg.OLLAMA_BASE_URL, + 'ollama_model': cfg.OLLAMA_MODEL, + 'doubao_api_key': cfg.DOUBAO_API_KEY, + 'doubao_model': cfg.DOUBAO_MODEL, + 'doubao_base_url': cfg.DOUBAO_BASE_URL, + 'kimi_api_key': cfg.KIMI_API_KEY, + 'kimi_model': cfg.KIMI_MODEL, + 'kimi_base_url': cfg.KIMI_BASE_URL, + 'max_concurrent': cfg.MAX_CONCURRENT_SECTIONS, + 'llm_concurrency_limit': getattr(cfg, 'LLM_CONCURRENCY_LIMIT', 20), + 'content_volume': cfg.CONTENT_VOLUME, + 'target_pages': getattr(cfg, 'TARGET_PAGES', 0), + 'page_char_estimate': getattr(cfg, 'PAGE_CHAR_ESTIMATE', 700), + } + try: + os.makedirs(os.path.dirname(_SETTINGS_PATH), exist_ok=True) + with open(_SETTINGS_PATH, 'w', encoding='utf-8') as f: + json.dump(data, f, ensure_ascii=False, indent=2) + except Exception as e: + logger.warning(f'保存配置文件失败: {e}') + + +def _apply(cfg, data: dict) -> None: + """将 dict 中的值安全地写回 config 模块""" + str_fields = { + 'model_provider': 'MODEL_PROVIDER', + 'qwen_api_key': 'QWEN_API_KEY', + 'qwen_model': 'QWEN_MODEL', + 'qwen_base_url': 'QWEN_BASE_URL', + 'openai_api_key': 'OPENAI_API_KEY', + 'openai_model': 'OPENAI_MODEL', + 'openai_base_url': 'OPENAI_BASE_URL', + 'deepseek_api_key': 'DEEPSEEK_API_KEY', + 'deepseek_model': 'DEEPSEEK_MODEL', + 'deepseek_base_url': 'DEEPSEEK_BASE_URL', + 'ollama_base_url': 'OLLAMA_BASE_URL', + 'ollama_model': 'OLLAMA_MODEL', + 'doubao_api_key': 'DOUBAO_API_KEY', + 'doubao_model': 'DOUBAO_MODEL', + 'doubao_base_url': 'DOUBAO_BASE_URL', + 'kimi_api_key': 'KIMI_API_KEY', + 'kimi_model': 'KIMI_MODEL', + 'kimi_base_url': 'KIMI_BASE_URL', + } + for key, attr in str_fields.items(): + val = data.get(key) + if val and isinstance(val, str): + setattr(cfg, attr, val) + + if 'max_concurrent' in data: + try: + v = int(data['max_concurrent']) + cfg.MAX_CONCURRENT_SECTIONS = max(1, min(v, 20)) + except (ValueError, TypeError): + pass + + if 'llm_concurrency_limit' in data: + try: + v = int(data['llm_concurrency_limit']) + cfg.LLM_CONCURRENCY_LIMIT = max(1, min(v, 30)) # 略高上限以支持配置 + # 信号量在config模块初始化时创建，重启后生效 + except (ValueError, TypeError): + pass + + valid_volumes = ('concise', 'standard', 'detailed', 'full') + vol = data.get('content_volume') + if vol and vol in valid_volumes: + cfg.CONTENT_VOLUME = vol + + if 'target_pages' in data: + try: + cfg.TARGET_PAGES = max(0, int(data['target_pages'])) + except (ValueError, TypeError): + pass + + if 'page_char_estimate' in data: + try: + cfg.PAGE_CHAR_ESTIMATE = max(300, min(3000, int(data['page_char_estimate']))) + except (ValueError, TypeError): + pass diff --git a/utils/style_manager.py b/utils/style_manager.py new file mode 100644 index 0000000..d9a53e3 --- /dev/null +++ b/utils/style_manager.py @@ -0,0 +1,282 @@ +""" +文件样式管理器 +- 提供预设管理、Docx<->HTML双向映射、验证 +- 已按用户要求：移除左侧所有视觉预设卡片，仅保留右侧详细配置面板 +- 右侧面板经过修饰性美化（卡片分组、图标、现代圆角、配色） +- 强化 Docx 与 HTML 的格式确定、转换与定稿逻辑 +""" +import json +import os +from typing import Optional +from docx import Document +from docx.shared import Pt, Cm +from docx.enum.text import WD_ALIGN_PARAGRAPH +import config + +DEFAULT_PRESETS = { + 'standard': { + 'name': '标准投标格式', + 'body_font': '宋体', + 'body_size_pt': 12, + 'body_line_spacing': 1.5, + 'heading_font': '黑体', + 'heading1_size_pt': 16, + 'heading2_size_pt': 14, + 'margins_cm': {'top': 2.5, 'bottom': 2.5, 'left': 3.0, 'right': 2.5}, + 'header_text': '', + 'footer_text': '第 {page} 页 / 共 {total} 页', + 'page_count_target': 100, + 'figure_enabled': True, + 'table_enabled': True, + 'attachment_figure_use_qwen': True, + 'attachment_figure_width_cm': 15, + 'description': '招标文件标准格式，宋体正文，黑体标题，标准边距' + }, + 'detailed': { + 'name': '详细技术方案', + 'body_font': '宋体', + 'body_size_pt': 11, + 'body_line_spacing': 1.8, + 'heading_font': '黑体', + 'heading1_size_pt': 18, + 'heading2_size_pt': 14, + 'margins_cm': {'top': 2.8, 'bottom': 2.8, 'left': 3.2, 'right': 2.8}, + 'header_text': '', + 'footer_text': '', + 'page_count_target': 200, + 'figure_enabled': True, + 'table_enabled': True, + 'attachment_figure_use_qwen': True, + 'attachment_figure_width_cm': 15, + 'description': '详细版，更多图表，较大页数' + } +} + + +def _to_bool(v, default=False): + if isinstance(v, bool): + return v + if isinstance(v, str): + s = v.strip().lower() + if s in ('true', '1', 'yes', 'y', 'on'): + return True + if s in ('false', '0', 'no', 'n', 'off'): + return False + return default + + +def _normalize_preset_keys(raw: Optional[dict], base: Optional[dict] = None) -> dict: + """兼容前端配置字段到导出字段，确保导出器可直接消费。""" + src = raw or {} + out = dict(base or {}) + + size_map = {'初号': 42, '一号': 26, '二号': 22, '三号': 16, '小三': 15, + '四号': 14, '小四': 12, '五号': 10.5, '小五': 9} + + def _size_pt(v, default): + if isinstance(v, (int, float)): + return v + if not isinstance(v, str): + return default + s = v.strip() + if 'pt' in s: + try: + return float(s.split('(')[-1].replace('pt)', '').strip()) + except Exception: + return default + return size_map.get(s, default) + + # 字体和字号 + out['body_font'] = src.get('bodyFont', src.get('body_font', out.get('body_font', '宋体'))) + out['heading_font'] = src.get('heading1Font', src.get('heading_font', out.get('heading_font', '黑体'))) + out['heading1_size_pt'] = _size_pt(src.get('heading1Size', src.get('heading1_size_pt', out.get('heading1_size_pt', 16))), 16) + out['heading2_size_pt'] = _size_pt(src.get('heading2Size', src.get('heading2_size_pt', out.get('heading2_size_pt', 14))), 14) + out['body_size_pt'] = _size_pt(src.get('bodySize', src.get('body_size_pt', out.get('body_size_pt', 12))), 12) + out['body_line_spacing'] = src.get('bodyLineSpacing', src.get('body_line_spacing', out.get('body_line_spacing', 1.5))) + + # 缩进 + body_indent = src.get('bodyIndent', 2) + indent_unit = src.get('bodyIndentUnit', 'char') + try: + body_indent = float(body_indent) + except Exception: + body_indent = 2 + out['body_indent_pt'] = body_indent * 14 if indent_unit == 'char' else body_indent + + # 页边距（兼容 margins_cm 和前端独立字段） + m = src.get('margins_cm', {}) or {} + def _f(v, d): + try: + return float(v) + except Exception: + return float(d) + + out['margins_cm'] = { + 'top': _f(src.get('marginTop', m.get('top', out.get('margins_cm', {}).get('top', 2.54))), 2.54), + 'bottom': _f(src.get('marginBottom', m.get('bottom', out.get('margins_cm', {}).get('bottom', 2.54))), 2.54), + 'left': _f(src.get('marginLeft', m.get('left', out.get('margins_cm', {}).get('left', 3.18))), 3.18), + 'right': _f(src.get('marginRight', m.get('right', out.get('margins_cm', {}).get('right', 3.18))), 3.18), + } + + # 页眉页脚 + out['header_text'] = src.get('headerText', src.get('header_text', out.get('header_text', ''))) + out['footer_text'] = src.get('footerText', src.get('footer_text', out.get('footer_text', ''))) + + # 图表开关 + out['figure_enabled'] = _to_bool(src.get('figureEnabled', src.get('figure_enabled', out.get('figure_enabled', True))), True) + out['table_enabled'] = _to_bool(src.get('tableEnabled', src.get('table_enabled', out.get('table_enabled', True))), True) + out['attachment_figure_use_qwen'] = _to_bool( + src.get('attachmentFigureUseQwen', src.get('attachment_figure_use_qwen', out.get('attachment_figure_use_qwen', True))), + True, + ) + try: + out['attachment_figure_width_cm'] = float( + src.get('attachmentFigureWidthCm', src.get('attachment_figure_width_cm', out.get('attachment_figure_width_cm', 15))) + ) + except (TypeError, ValueError): + out['attachment_figure_width_cm'] = 15.0 + + return out + + +def get_preset(name='standard'): + """返回预设配置""" + base = dict(DEFAULT_PRESETS.get('standard', {})) + default_or_named = dict(DEFAULT_PRESETS.get(name, base)) + if name in DEFAULT_PRESETS: + return _normalize_preset_keys(default_or_named, base) + + # 支持读取用户保存的自定义预设 + path = os.path.join(config.DATA_DIR, 'style_presets.json') + if os.path.exists(path): + try: + with open(path, 'r', encoding='utf-8') as f: + presets = json.load(f) or {} + if isinstance(presets, dict) and name in presets: + return _normalize_preset_keys(presets.get(name), base) + except Exception: + pass + return _normalize_preset_keys(default_or_named, base) + +def save_preset(name, config_dict): + """保存自定义预设到 data/style_presets.json""" + path = os.path.join(config.DATA_DIR, 'style_presets.json') + os.makedirs(config.DATA_DIR, exist_ok=True) + try: + if os.path.exists(path): + with open(path, 'r', encoding='utf-8') as f: + presets = json.load(f) + else: + presets = {} + presets[name] = config_dict + with open(path, 'w', encoding='utf-8') as f: + json.dump(presets, f, ensure_ascii=False, indent=2) + return True + except Exception: + return False + +def apply_preset_to_document(doc: Document, preset: dict): + """将预设应用到Document，覆盖exporter.py硬编码值""" + # Page setup + section = doc.sections[0] + m = preset.get('margins_cm', {'top': 2.5, 'bottom': 2.5, 'left': 3.0, 'right': 2.5}) + section.top_margin = Cm(m['top']) + section.bottom_margin = Cm(m['bottom']) + section.left_margin = Cm(m['left']) + section.right_margin = Cm(m['right']) + + # Header / Footer (basic) + if preset.get('header_text'): + header = section.header + if header.paragraphs: + p = header.paragraphs[0] + p.text = preset['header_text'] + p.alignment = WD_ALIGN_PARAGRAPH.CENTER + if preset.get('footer_text'): + footer = section.footer + if footer.paragraphs: + p = footer.paragraphs[0] + p.text = preset['footer_text'] + p.alignment = WD_ALIGN_PARAGRAPH.CENTER + + # The rest of the document (headings, body) is applied in exporter by reading preset + return doc + +def docx_to_html_spec(preset): + """Docx <-> HTML 双向格式转换（已定稿强化版） + 根据最新图片布局的详细配置面板参数生成精确的 HTML/CSS 规范。 + 支持页面设置、标题文字、表格设置（表头+内容）、目录设置等完整配置。 + """ + # 字号映射（中文名称转pt） + size_map = {'初号': 42, '一号': 26, '二号': 22, '三号': 16, '小三': 15, + '四号': 14, '小四': 12, '五号': 10.5, '小五': 9} + + def get_pt(size_str, default=12): + if isinstance(size_str, str) and 'pt' in size_str: + try: + return int(size_str.split('(')[-1].replace('pt)', '').strip()) + except: + pass + return size_map.get(size_str, default) + + def get_line_height(val, default='24pt'): + try: + return f'{int(val)}pt' + except: + return default + + def get_align(val, default='left'): + align_map = {'right': 'right', 'left': 'left', 'center': 'center', 'justify': 'justify'} + return align_map.get(val, default) + + return { + # 页面设置 + 'margins': { + 'top': preset.get('marginTop', 2.54), + 'bottom': preset.get('marginBottom', 2.54), + 'left': preset.get('marginLeft', 3.18), + 'right': preset.get('marginRight', 3.18) + }, + 'paperOrientation': preset.get('paperOrientation', '纵'), + + # 标题文字 + 'heading1': f'font-family: {preset.get("heading1Font", "黑体")}; ' + f'font-size: {get_pt(preset.get("heading1Size", "三号"))}pt; ' + f'font-weight: {"bold" if preset.get("heading1Bold", True) else "normal"};', + 'heading2': f'font-family: {preset.get("heading2Font", "宋体")}; ' + f'font-size: {get_pt(preset.get("heading2Size", "小四"))}pt; ' + f'font-weight: 600;', + + # 表格设置 - 表头 + 'tableHeader': f'font-family: {preset.get("tableHeaderFont", "宋体")}; ' + f'font-size: {get_pt(preset.get("tableHeaderSize", "小四"))}pt; ' + f'font-weight: {"bold" if preset.get("tableHeaderBold") else "normal"}; ' + f'line-height: {get_line_height(preset.get("tableHeaderLineSpacing", "24"))}; ' + f'text-align: {get_align(preset.get("tableHeaderAlign", "center"))};', + + # 表格设置 - 内容 + 'tableBody': f'font-family: {preset.get("tableBodyFont", "宋体")}; ' + f'font-size: {get_pt(preset.get("tableBodySize", "小四"))}pt; ' + f'font-weight: {"bold" if preset.get("tableBodyBold") else "normal"}; ' + f'line-height: {get_line_height(preset.get("tableBodyLineSpacing", "24"))}; ' + f'text-align: {get_align(preset.get("tableBodyAlign", "center"))};', + + # 目录设置 + 'tocEnabled': preset.get('tocEnabled', True), + 'tocTitle': f'font-family: {preset.get("tocTitleFont", "黑体")}; ' + f'font-size: {get_pt(preset.get("tocTitleSize", "三号"))}pt; ' + f'font-weight: {"bold" if preset.get("tocTitleBold") else "normal"}; ' + f'line-height: {get_line_height(preset.get("tocTitleLineSpacing", "24"))}; ' + f'text-align: {get_align(preset.get("tocTitleAlign", "center"))};', + 'tocBody': f'font-family: {preset.get("tocBodyFont", "宋体")}; ' + f'font-size: {get_pt(preset.get("tocBodySize", "四号"))}pt; ' + f'font-weight: {"bold" if preset.get("tocBodyBold") else "normal"}; ' + f'line-height: {get_line_height(preset.get("tocBodyLineSpacing", "24"))}; ' + f'text-align: {get_align(preset.get("tocBodyAlign", "left"))};' + } + +# For future AI extraction in parser +def extract_style_hints_from_text(text: str): + """Placeholder for AI to extract style requirements from tender text""" + # Can be expanded with ai_client.chat using a prompt for "提取字体、页边距、图表要求" + return get_preset('standard') diff --git a/utils/tender_kind_sections.py b/utils/tender_kind_sections.py new file mode 100644 index 0000000..12875f3 --- /dev/null +++ b/utils/tender_kind_sections.py @@ -0,0 +1,278 @@ +""" +按招标文件类型（工程 / 服务 / 货物）区分的章节正文生成提示词模板。 +与 modules.generator.BID_WRITING_SYSTEM 配合使用；自称以系统铁律为准，统一用「我方」。 +""" +import re +from typing import Optional + +VALID_TENDER_KINDS = frozenset({'engineering', 'service', 'goods'}) + +DEFAULT_WORD_COUNT_SPEC = ( + '- 一般小节：不少于 2000 字；核心技术/重点评分章节：不少于 4000 字\n' + '- 字数须由实质方案内容支撑，禁止用重复项目背景或复述招标要求凑字数\n' + '- 有实质细节的展开写，原则性描述可简洁处理；通过流程、节点、比选、管控展开满足篇幅' +) + +TENDER_KIND_CLASSIFY = """\ +你是一名招标文件分类专家。根据以下招标文件摘录，判断本项目技术标书应采用的「写作模板类型」。 + +只输出以下三个英文单词之一，不要输出任何其他文字、标点、换行或解释： +engineering +service +goods + +含义： +- engineering：工程施工类（建筑、市政、公路、水利、装修、园林、拆除等，以现场施工组织、工艺、机械、进度网络为主） +- service：服务类（咨询、设计、监理、运维、物业、保洁、餐饮配送、培训、安保、技术服务等，以人力/智力交付、流程、SLA 为主） +- goods：货物类（设备、材料、车辆、家具、软硬件供货等，以产品规格、供货、质保、验收为主；含附带安装指导仍以供货为主可归此类） + +判定规则： +若主要为施工安装且涉及土建/结构/施工机械与工期，归为 engineering。 +若主要为服务过程、人员驻场、响应时效与服务质量体系，归为 service。 +若主要为产品技术规格、供货批次、出厂检验与到货验收，归为 goods。 +若施工与供货并重，以现场施工量与工期为主则 engineering，以设备物资交付为主则 goods。 + +【招标文件摘录】 +{excerpt} +""" + + +def get_tender_kind_classify_prompt(excerpt: str) -> str: + return TENDER_KIND_CLASSIFY.replace('{excerpt}', excerpt or '') + + +def parse_tender_kind_response(response: str) -> str: + """从模型返回中解析出 engineering / service / goods，失败则 engineering。""" + if not response: + return 'engineering' + tokens = re.sub(r'[^a-zA-Z]+', ' ', response).lower().split() + for w in tokens: + if w in VALID_TENDER_KINDS: + return w + low = response.lower() + for k in ('engineering', 'service', 'goods'): + if k in low: + return k + return 'engineering' + + +def normalize_tender_kind(kind: Optional[str]) -> str: + k = (kind or '').strip().lower() + return k if k in VALID_TENDER_KINDS else 'engineering' + + +# ── 工程类 ─────────────────────────────────────────────────────────────── + +SECTION_DETAILS_ENGINEERING = """\ +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +- 角色：资深工程施工组织设计专家 +- 任务：撰写通用型工程施工组织设计技术章节 + +【核心定位】 +- 通用施工模板，适用于建筑、市政、公路、水利等工程施工类项目 +- 聚焦：施工方案、工艺方法、机械设备、进度计划、质量安全控制 +- 正文为可直接提交的成稿语句：凡招标文件概要或工程量清单摘要已给出的工程量、地质、工期、指标等，可如实融入叙述；未给出的具体数值、型号、台数、吨位等，一律用通顺的中文概括表达（如"相应规格""与进度及作业面相匹配的台套""符合设计及规范要求的能级"），不得使用方括号或待填项留白 + +【内容特征】 +- 施工工艺描述到"方法层面"；可引用规范条文名称或编号（如"应符合JTG/T 3610要求"）；无依据处不写臆造数字 +- 设备与资源配置：写清设备类别与用途，用"按工况与设计要求选配相应规格与数量""满足流水作业与峰值强度需要"等概括句式，禁止出现"[型号][数量]台"类占位 +- 进度计划使用相对阶段（"施工准备期"、"主体施工期"）而非具体日期 +- 技术措施可提供多方案比选，用"视地质与水文条件选用适宜工艺"等自然语言衔接现场条件，禁止方括号待填 + +【未定参数的写法（替代一切占位符）】 +- 工程规模与结构：用"本工程相应单体与线路区段""按设计结构形式与跨度条件"等概括，不罗列未提供的具体数字 +- 技术参数：已见于招标/清单的写具体值；未见者写"按设计强度等级与验收标准执行""压实度与分层厚度满足规范及设计要求" +- 机械与劳动力：写"配置满足峰值强度与关键线路需要的机械组合""劳动力按施工阶段动态投入并保持关键岗位持证齐备" +- 时间节点：写"在招标工期内划分准备、主体、收尾阶段并设置可控里程碑"，无具体日历则不用臆造周数 + +【行文规范】 +- 自称统一用「我方」，禁用「我们」「本公司」 +- 招标人称「招标方」或「建设单位」 +- 禁止前导句和AI套话（综上所述、高度重视等） +- 列举用(1)(2)(3)，禁用"首先其次" +- 纯文本输出，段落间空行分隔 + +【防过拟合约束】 +- 不绑定具体地名与局地气候细节，改为"结合项目环境与季节特点采取针对性措施" +- 不绑定特定施工方法（如不说"必须用旋挖钻"，改为"根据地质选用适宜桩基工艺"） +- 使用弹性表述："按设计要求"、"视现场情况"、"符合规范规定" + +【字数要求】 +{word_count_spec} +- 通过展开多方案比选、详细工艺流程、管控节点来满足篇幅 + +【输入】 +- 招标文件概要：{summary} +- 标书目录：{outline} +- 子小节标题：{subsection_title} + +直接输出正文，不含标题和解释。""" + + +# ── 服务类 ─────────────────────────────────────────────────────────────── + +SECTION_DETAILS_SERVICE = """\ +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +- 角色：资深服务方案架构师 +- 任务：撰写通用型服务项目实施方案 + +【核心定位】 +- 通用服务模板，适用于咨询服务、运维服务、技术服务、物业管理、培训服务等 +- 聚焦：服务方案、实施流程、人员配置、质量保障、响应机制、服务标准 +- 严禁出现工程施工技术参数（如混凝土标号、压实度等） +- 正文为成稿：招标/采购文件已载明的服务范围、人数、响应时限、到场要求等可如实写入；未载明的不得用方括号待填，改用"按采购文件与服务等级要求配置""满足驻场与高峰时段人力需要""建立分级响应与升级机制"等概括表述写清含义 + +【内容特征】 +- 服务流程：按"接收需求→分析评估→方案制定→实施执行→验收交付→持续改进"框架展开 +- 人员配置：强调专业资质与岗位角色齐全，用"配备满足本项目服务范围与关键岗位持证要求的人员力量""项目经理及骨干具备相应执业或认证资格"等完整句子，禁止"[资质][岗位][数量]名"式占位 +- 质量保障：使用服务体系标准（如ISO 9001、ITIL、ITSS）而非工程规范 +- 响应机制：写清"受理—分派—处理—回访/关闭"闭环；时限已见于招标文件的写具体值，未见者写"按招标文件及行业通行服务等级划分响应与处理时限，并设置升级与应急通道" +- 服务标准：可引用SLA框架，用自然语言描述指标层级与考核方式，禁止用方括号代替指标 + +【未定参数的写法】 +- 服务范围与对象：用"采购文件约定的服务内容与交付边界""服务对象规模与业务场景按项目实际确定"等概括 +- 人员与资源：用"与峰值并发与服务等级相匹配的人力与工具配置" +- 场地与备件：用"按需设置服务场所与备件储备，保障连续性与可用性目标" + +【行文规范】 +- 自称统一用「我方」，禁用「我们」「本公司」 +- 招标人称「招标方」「采购人」或「甲方」 +- 禁止前导句和AI套话 +- 列举用(1)(2)(3)，禁用"首先其次" +- 纯文本输出，段落间空行分隔 +- 强调"服务承诺"与"保障措施"的可执行性，避免空泛 + +【防过拟合约束】 +- 不预设具体行业细节（如不说"针对医院HIS系统"，改为"针对采购人业务系统与数据环境"） +- 服务方案提供"标准模块+可选配置"结构（"基础服务包包含...，增值服务可选..."） +- 使用"结合采购人行业特点与监管要求""参照同类项目成熟实践"等弹性表述 + +【内容禁区】 +- 禁止出现：施工工艺、材料设备技术参数、工程量计算、施工机械配置 +- 禁止出现：建筑结构、土木工程技术措施 + +【字数要求】 +{word_count_spec} +- 通过详细描述服务流程节点、人员职责分工、质量检查点、应急预案来满足篇幅 + +【输入】 +- 招标文件概要：{summary} +- 标书目录：{outline} +- 子小节标题：{subsection_title} + +直接输出正文，不含标题和解释。""" + + +# ── 货物类 ─────────────────────────────────────────────────────────────── + +SECTION_DETAILS_GOODS = """\ +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +- 角色：资深供货方案技术专家 +- 任务：撰写通用型货物采购项目技术响应方案 + +【核心定位】 +- 通用供货模板，适用于设备采购、材料供应、系统集成、软件采购等 +- 聚焦：产品技术规格、供货方案、质量保证、安装调试（如有）、售后服务 +- 正文为成稿：采购文件、技术规范书或清单中已列明的型号、数量、指标、交货期、质保期等可如实响应；未列明的不得臆造优于招标的数字，亦不得用方括号待填；用"不低于采购文件对应条款""满足招标文件列明的性能与符合性要求""供货批次与到货节奏与现场安装计划相衔接"等概括语言写全句 + +【内容特征】 +- 技术规格：按"指标项—符合性说明"展开；已给出阈值的照写；未给出的写"满足招标文件技术指标与检测方法要求""与同类应用场景主流水平相当且不降低实质性响应" +- 产品描述：强调功能特性、可靠性与标准符合性，避免绑定特定品牌（除非招标文件指定） +- 供货方案：分阶段描述（签约后组织生产或备货、出厂检验、运输与到货验收）；具体天数仅在有依据时写出，否则用"按合同与采购文件约定的供货周期执行" +- 质量保障：强调"出厂检验+第三方检测（如要求）+质保期服务"分层体系 +- 售后服务：写清质保责任边界、备件与技术支持渠道；时长以招标为准，无则写"按采购文件及国家相关规定执行" + +【未定参数的写法】 +- 性能与容量：用"满足采购文件规定的处理能力/精度/兼容性等关键指标" +- 数量与批次：用"与合同清单及现场需求匹配的供货批次与配套件配置" +- 服务时效：用"建立可追踪的报修、响应与闭环机制，时限不低于采购文件要求" + +【行文规范】 +- 自称统一用「我方」，禁用「我们」「本公司」 +- 招标人称「招标方」「采购人」或「甲方」 +- 禁止前导句和AI套话 +- 列举用(1)(2)(3)，禁用"首先其次" +- 纯文本输出，段落间空行分隔 +- 技术描述客观准确，避免夸大（不用"最先进"、"行业第一"，改用"符合国家标准或采购文件引用标准的要求""满足招标文件实质性条款") + +【防过拟合约束】 +- 不绑定特定品牌（如不说"采用华为服务器"，改为"提供满足采购文件性能与安全要求的服务器设备"） +- 无具体数值依据时，不写虚构的"≥某数值"，改为对符合性与可检测性的承诺 +- 供货方案考虑多种交付场景（国内供货、进口设备、定制生产等）时，用自然语言比较路径优劣与适用条件 + +【内容禁区】 +- 禁止出现：施工组织、安装工艺（除非含安装服务）、土建工程、人员现场施工配置 +- 禁止出现：工程管理流程（如施工进度网络图） + +【字数要求】 +{word_count_spec} +- 通过详细展开技术参数说明、供货流程节点、质量检验程序、售后服务细则来满足篇幅 + +【输入】 +- 招标文件概要：{summary} +- 标书目录：{outline} +- 子小节标题：{subsection_title} + +直接输出正文，不含标题和解释。""" + + +def build_section_detail_prompt( + kind: str, + summary: str, + outline: str, + title: str, + word_count_spec: str = '', + boq_summary: str = '', +) -> str: + k = normalize_tender_kind(kind) + if k == 'service': + base = SECTION_DETAILS_SERVICE + elif k == 'goods': + base = SECTION_DETAILS_GOODS + else: + base = SECTION_DETAILS_ENGINEERING + + wc = word_count_spec.strip() or DEFAULT_WORD_COUNT_SPEC + text = base.format( + word_count_spec=wc, + summary=summary or '（未提供）', + outline=outline or '（未提供）', + subsection_title=title or '', + ) + text += ( + '\n\n【须同步遵守的全局写作禁忌】' + '禁止复述招标要求后再作答；禁止各章重复工程量数字与项目背景；' + '禁止无依据将参数写成优于招标文件；字数不得仅靠套话堆砌；' + '禁止使用方括号、「待填」「TBD」等表示未完稿字段（如[型号][数量][数值]）；' + '未定信息须写成通顺的概括性中文整句。' + '若本任务提示词末尾另有「图示/表格」专用输出规范，其中的结构化标记按该规范执行，' + '不视为待填占位。' + ) + + if boq_summary.strip(): + text += ( + '\n\n- 工程量清单关键信息（写作时按需引用清单中已有数量与单位，勿无故复读；' + '清单未列明的分项用概括性施工组织语言描述，禁止使用方括号待填项）：\n' + + boq_summary.strip() + ) + return text + + +# 对话模式：按类型追加的系统说明片段（与 app.py 中基础说明拼接） +CHAT_KIND_INSTRUCTION = { + 'engineering': ( + '\n【本模板类型：工程施工】' + '侧重施工组织、工艺与质量安全；未在招标文件或清单中出现的具体型号、台数、吨位等' + '用概括性中文表述写清，禁止使用方括号待填；勿虚构优于招标的规格。' + ), + 'service': ( + '\n【本模板类型：服务】' + '侧重服务流程、人员与SLA；人数、时限等以招标/采购文件为准，无则概括表述，禁止方括号待填；' + '禁止大段写混凝土标号、压实度、施工机械等工程参数。' + ), + 'goods': ( + '\n【本模板类型：货物供货】' + '侧重规格、供货、检验与质保；指标与交期以采购文件为准，无则概括表述，禁止方括号待填；' + '禁止写施工组织与土建；勿绑定未指定的品牌。' + ), +} diff --git a/utils/volume_chapters.py b/utils/volume_chapters.py new file mode 100644 index 0000000..46764d8 --- /dev/null +++ b/utils/volume_chapters.py @@ -0,0 +1,173 @@ +""" +目标页数与一级篇章数量区间：阈值与 generator._effective_volume 一致。 + +小章节（自动填充子目录行）总条数：与「目标页数」线性映射，见 subchapter_total_* 与 +allocate_subchapters_to_main *。 +""" +from __future__ import annotations + +import random +from typing import List, Optional, Tuple + +# 与 modules.generator._effective_volume 页数分界一致 +PAGE_VOLUME_THRESHOLDS = (125, 175, 225) + +# 各篇幅档位对应的一级篇章数量 [min, max]（与页数映射表一致） +TOP_LEVEL_CHAPTER_RANGES = { + 'concise': (6, 8), + 'standard': (8, 10), + 'detailed': (10, 12), + 'full': (12, 16), +} + +# 小章节总条数 = slope * pages + intercept（过点 100->78, 300->212） +SUBCHAPTER_PAGES_SLOPE = 0.67 +SUBCHAPTER_PAGES_INTERCEPT = 11.0 +SUBCHAPTER_JITTER_LOW = 0.9 +SUBCHAPTER_JITTER_HIGH = 1.1 +# expand 在请求/库/配置均未给出页数时，按 100 页 ≈ 基线 78 章 ±10%，避免小章节失控到数百 +EXPAND_OUTLINE_DEFAULT_TARGET_PAGES = 100 + + +def subchapter_total_base_from_pages(pages: int) -> float: + return SUBCHAPTER_PAGES_SLOPE * float(pages) + SUBCHAPTER_PAGES_INTERCEPT + + +def subchapter_jitter_bounds(n_base: float) -> Tuple[int, int]: + """ + 对线性基线 N_base 的严格 ±10% 整数闭区间 [lo, hi]（用于全标小章节行总数抽样后夹紧）。 + 例：N_base=78（约 100 页）→ lo=70, hi=86。 + """ + lo = max(1, int(round(n_base * SUBCHAPTER_JITTER_LOW))) + hi = max(lo, int(round(n_base * SUBCHAPTER_JITTER_HIGH))) + return lo, hi + + +def subchapter_total_effective( + pages: int, + k: int, + rng: Optional[random.Random] = None, +) -> int: + """ + 在目标页数 P 下，对一次「小章节自动填充」抽样的子章节行总数上界（全标合计）。 + 先按 N_base(P)=0.67*P+11 与 U~Uniform(0.9,1.1) 取整，再**严格夹紧**到 [round(N_base*0.9), round(N_base*1.1)]， + 故 100 页时锚定 78±10% → 恒在 70–86 条（在仅受随机影响时）。 + + 不再用 max(n, k) 抬升总数：主章数 k 很大时若强行「每章至少 1 条」会把 N 抬到 300+，与 78±10% 目标冲突。 + 当 n < k 时由 allocate_subchapters_to_mains 将额度优先分给部分主章，其余主章 quota 为 0（该次不填小章）。 + pages<=0 或 k<=0 时返回 0（调用方不应在 TARGET_PAGES>0 且可扩展主章>0 之外使用）。 + """ + if pages <= 0 or k <= 0: + return 0 + r = rng if rng is not None else random.Random() + n_base = subchapter_total_base_from_pages(pages) + lo, hi = subchapter_jitter_bounds(n_base) + n = int(round(n_base * r.uniform(SUBCHAPTER_JITTER_LOW, SUBCHAPTER_JITTER_HIGH))) + n = min(max(n, lo), hi) + return n + + +def allocate_subchapters_to_mains(n: int, k: int) -> List[int]: + """ + 将整数 n 均分到 k 个主章：前 n%k 个主章得 floor+1，其余得 floor；k=0 返回 []。 + """ + if k <= 0: + return [] + n = max(0, n) + q, r = n // k, n % k + return [q + 1] * r + [q] * (k - r) + + +def resolve_expand_target_pages( + request_pages: Optional[int], + no_subchapter_limit: bool, + db_pages: int, + config_pages: int, +) -> int: + """ + 得到本次「自动填充小章节」使用的目标页数 P（>0 则启用条数上界，0=不限制）。 + + 显式不限制时返回 0；否则优先正数 request → 落库值 → 全局配置 → 默认 100 页。 + """ + if no_subchapter_limit: + return 0 + if request_pages is not None and int(request_pages) > 0: + return int(request_pages) + d = int(db_pages or 0) + if d > 0: + return d + c = int(config_pages or 0) + if c > 0: + return c + return EXPAND_OUTLINE_DEFAULT_TARGET_PAGES + + +def volume_key_from_target_pages(pages: int, content_volume_default: str = 'standard') -> str: + """与 _effective_volume 相同逻辑的档位 key（不读 config，便于测试）。""" + if pages <= 0: + return content_volume_default + if pages <= PAGE_VOLUME_THRESHOLDS[0]: + return 'concise' + if pages <= PAGE_VOLUME_THRESHOLDS[1]: + return 'standard' + if pages <= PAGE_VOLUME_THRESHOLDS[2]: + return 'detailed' + return 'full' + + +def top_level_chapter_range_from_pages(pages: int, content_volume_default: str = 'standard') -> Tuple[int, int]: + """ + 返回一级篇章数量区间 (lo, hi)。 + 未设置目标页数时沿用默认 8–10 章。 + """ + if pages <= 0: + return TOP_LEVEL_CHAPTER_RANGES['standard'] + vk = volume_key_from_target_pages(pages, content_volume_default) + return TOP_LEVEL_CHAPTER_RANGES[vk] + + +def outline_chapter_count_hint( + pages: int, + content_volume_default: str = 'standard', + page_char_estimate: int = 700, +) -> str: + """ + 嵌入大纲提示词的篇章约束句（替换原固定「8–10 个」相关描述）。 + + 当 pages>0 时提醒：全稿正文字量与「页数×每页字数」可替换的总目标同量级，目录 + 层次不宜过细，以免成稿后每节可写篇幅过薄、难成合理技术应答。 + """ + pce = max(1, int(page_char_estimate or 700)) + if pages <= 0: + return ( + '总的章节数应该控制在8-10个，一级篇章总数不超过10个' + ) + lo, hi = top_level_chapter_range_from_pages(pages, content_volume_default) + total_g = int(round(pages * pce)) + return ( + f'总的章节数应该控制在约 {lo}–{hi} 个，一级篇章总数不超过 {hi} 个' + f'（目标约 {pages} 页，按目标页数映射的篇幅档位估算）。' + f'全稿正文字量规模需与总目标约 {total_g} 字' + f'（{pages} 页×约每页 {pce} 字的粗略换算计）同量级，目录层次与末级小节目不宜过细，' + f'避免叶节数过多时单节篇幅过薄、难以成文。' + ) + + +def outline_chapter_count_hint_with_rating_variant( + pages: int, + content_volume_default: str = 'standard', + page_char_estimate: int = 700, +) -> str: + """带评分目录模板中的同类约束（原含「不超过10个」的收紧表述）。""" + pce = max(1, int(page_char_estimate or 700)) + if pages <= 0: + return ( + '总的章节数应该控制在8-10个,不超过10个' + ) + lo, hi = top_level_chapter_range_from_pages(pages, content_volume_default) + total_g = int(round(pages * pce)) + return ( + f'总的章节数应该控制在约 {lo}–{hi} 个,不超过{hi} 个' + f'（目标约 {pages} 页，按目标页数映射的篇幅档位估算）' + f'全稿正文字量约与总目标 {total_g} 字同量级,末级子目不宜过细' + ) diff --git a/utils/word_allocation.py b/utils/word_allocation.py new file mode 100644 index 0000000..d3c41b1 --- /dev/null +++ b/utils/word_allocation.py @@ -0,0 +1,371 @@ +""" +技术评分驱动的章节字数分配：读取 data/word_allocation_rules.json， +结合 VOLUME_PRESETS 的 base/core 与项目 rating_json，为每个叶节点生成 +min_chars、word_count_spec（及可选 max_tokens）。 +""" +from __future__ import annotations + +import json +import logging +import os +import re +from typing import Any, Dict, List, Optional, Tuple + +import config + +logger = logging.getLogger(__name__) + +# 与 modules/generator.VOLUME_PRESETS 保持一致 +VOLUME_PRESETS: Dict[str, Tuple[int, int, str, int]] = { + 'concise': (1200, 2500, '精简版', 5000), + 'standard': (2000, 4000, '标准版', 8000), + 'detailed': (3000, 5500, '详细版', 12000), + 'full': (4000, 7000, '充实版', 16000), +} + +_PROVIDER_TOKEN_LIMITS = { + 'deepseek': 8192, + 'qwen': 8192, + 'openai': 16384, + 'ollama': 8192, + 'doubao': 8192, + 'kimi': 8192, +} + +DEFAULT_RULES: Dict[str, Any] = { + 'schema_version': 1, + 'alpha': 0.85, + 'budget_mode': 'target_pages', + 'per_section_floor': None, + 'per_section_cap': None, + 'relevance': {'method': 'keyword_overlap', 'min_rating_weight': 0.01}, + 'rating_parse': {}, + 'prompt': {'top_k_rating_items': 4, 'intro_line': ''}, + 'max_tokens_scale': False, +} + + +def rules_path() -> str: + return os.path.join(config.DATA_DIR, 'word_allocation_rules.json') + + +def load_rules(path: Optional[str] = None) -> Dict[str, Any]: + """加载规则 JSON；文件缺失或解析失败时返回内置 DEFAULT_RULES。""" + p = path or rules_path() + data = dict(DEFAULT_RULES) + if not os.path.isfile(p): + return data + try: + with open(p, encoding='utf-8') as f: + raw = json.load(f) + if isinstance(raw, dict): + for k, v in raw.items(): + if k.startswith('_'): + continue + if k == 'relevance' and isinstance(v, dict): + data['relevance'] = {**data.get('relevance', {}), **v} + elif k == 'prompt' and isinstance(v, dict): + data['prompt'] = {**data.get('prompt', {}), **v} + else: + data[k] = v + except Exception as e: + logger.warning('加载 word_allocation_rules.json 失败，使用内置默认: %s', e) + return data + + +def _as_float(x: Any, default: float = 0.0) -> float: + if x is None: + return default + if isinstance(x, (int, float)): + return float(x) + if isinstance(x, str): + s = re.sub(r'[^\d.\-]', '', x) + if not s: + return default + try: + return float(s) + except ValueError: + return default + return default + + +def _item_name(d: Dict[str, Any]) -> str: + for k in ('name', 'title', 'item_name', '评分项', '评分项名称', 'indicator'): + v = d.get(k) + if isinstance(v, str) and v.strip(): + return v.strip() + return '' + + +def _item_weight(d: Dict[str, Any]) -> float: + for k in ('weight', 'score', '分值', 'max_score', '满分', 'points'): + if k in d: + w = _as_float(d.get(k), 0.0) + if w > 0: + return w + return 1.0 + + +def _collect_rating_dicts(obj: Any, acc: List[Dict[str, Any]]) -> None: + if isinstance(obj, dict): + acc.append(obj) + for v in obj.values(): + _collect_rating_dicts(v, acc) + elif isinstance(obj, list): + for v in obj: + _collect_rating_dicts(v, acc) + + +def parse_rating_json(raw: Optional[str]) -> List[Dict[str, Any]]: + """ + 从 rating_json 字符串解析评分项列表。 + 每项: { 'name': str, 'weight': float, 'keywords': List[str] } + """ + if not raw or not isinstance(raw, str) or not raw.strip(): + return [] + try: + root = json.loads(raw.strip()) + except json.JSONDecodeError: + return [] + + dicts: List[Dict[str, Any]] = [] + _collect_rating_dicts(root, dicts) + + items: List[Dict[str, Any]] = [] + seen: set = set() + for d in dicts: + name = _item_name(d) + if not name or len(name) < 2: + continue + key = name.lower() + if key in seen: + continue + w = _item_weight(d) + kws: List[str] = [] + kw = d.get('keywords') or d.get('keyword') or d.get('要点') + if isinstance(kw, list): + kws = [str(x).strip() for x in kw if isinstance(x, (str, int, float)) and str(x).strip()] + elif isinstance(kw, str) and kw.strip(): + kws = [kw.strip()] + seen.add(key) + items.append({'name': name, 'weight': w, 'keywords': kws}) + + return items + + +def _title_tokens(title: str) -> List[str]: + if not title: + return [] + s = re.sub(r'[\s\d.．、，,;；:：/\\（）()【】\[\]「」]+', ' ', title) + parts = [p for p in s.split() if len(p) >= 2] + toks = list(parts) + for m in re.findall(r'[\u4e00-\u9fff]{2,}', title): + if m not in toks: + toks.append(m) + return toks + + +def _overlap_score(title: str, item: Dict[str, Any]) -> float: + tokens = _title_tokens(title) + if not tokens: + return 0.0 + blob = item['name'] + ''.join(item.get('keywords') or []) + hit = sum(1 for t in tokens if t and t in blob) + score = hit / max(len(tokens), 1) + if item['name'] in title or title in item['name']: + score = max(score, 0.85) + for kw in item.get('keywords') or []: + if isinstance(kw, str) and len(kw) >= 2 and kw in title: + score = max(score, 0.7) + return min(1.0, score) + + +def _raw_utilities( + leaves: List[Dict[str, Any]], + items: List[Dict[str, Any]], + min_w: float, +) -> Tuple[List[float], List[List[Tuple[str, float]]]]: + """每节 u_i = sum_j w_j * c_ij；返回 u 与每节 top 相关项 (name, contrib)。""" + filtered = [it for it in items if it['weight'] >= min_w] + if not filtered: + filtered = items + n = len(leaves) + u = [0.0] * n + top_lists: List[List[Tuple[str, float]]] = [[] for _ in range(n)] + + for i, leaf in enumerate(leaves): + title = leaf.get('section_title') or '' + contribs: List[Tuple[str, float]] = [] + for it in filtered: + c = _overlap_score(title, it) + contrib = it['weight'] * c + if contrib > 0: + contribs.append((it['name'], contrib)) + u[i] += contrib + contribs.sort(key=lambda x: -x[1]) + top_lists[i] = contribs[:12] + + max_u = max(u) if u else 0.0 + if max_u <= 0: + u = [1.0] * n + else: + u = [x / max_u for x in u] + return u, top_lists + + +def _clamp_int(x: int, lo: int, hi: int) -> int: + return max(lo, min(hi, x)) + + +def _water_adjust( + targets: List[int], + budget: int, + floor_v: int, + cap_v: int, + priority: List[float], +) -> List[int]: + """在 [floor_v, cap_v] 内将 targets 整数化并尽量使 sum 接近 budget。""" + n = len(targets) + if n == 0: + return [] + if floor_v > cap_v: + floor_v, cap_v = cap_v, floor_v + if n * floor_v > budget: + floor_v = max(1, budget // n) + if n * cap_v < budget: + cap_v = max(floor_v, (budget + n - 1) // n) + cur = [_clamp_int(t, floor_v, cap_v) for t in targets] + s = sum(cur) + delta = budget - s + order = sorted(range(n), key=lambda i: -priority[i]) + inv_order = sorted(range(n), key=lambda i: priority[i]) + step = 0 + max_steps = max(n * 2000, abs(delta) + n) + while delta != 0 and step < max_steps: + step += 1 + if delta > 0: + moved = False + for i in order: + if cur[i] < cap_v: + cur[i] += 1 + delta -= 1 + moved = True + break + if not moved: + break + else: + moved = False + for i in inv_order: + if cur[i] > floor_v: + cur[i] -= 1 + delta += 1 + moved = True + break + if not moved: + break + return cur + + +def compute_leaf_allocations( + volume_key: str, + leaves: List[Dict[str, Any]], + rating_raw: Optional[str], + rules: Optional[Dict[str, Any]] = None, +) -> Optional[Dict[int, Dict[str, Any]]]: + """ + 为每个叶节点计算 target_chars、word_count_spec、max_tokens。 + + 有技术评分项时按标题相关性分配；无评分项时，若规则为按目标页控总篇且已设页数， + 则均分全稿总预算 B=目标页数×每页字数（否则返回 None，调用方沿用旧逻辑）。 + leaves: [{'id': int, 'section_title': str}, ...] + """ + rules = rules or load_rules() + if not leaves: + return {} + + base, core, _, preset_tokens = VOLUME_PRESETS.get( + volume_key, VOLUME_PRESETS['standard'] + ) + floor_default = int(base * 0.5) + cap_default = core + floor_v = int(rules['per_section_floor']) if rules.get('per_section_floor') is not None else floor_default + cap_v = int(rules['per_section_cap']) if rules.get('per_section_cap') is not None else cap_default + floor_v = min(floor_v, cap_v) + alpha = float(rules.get('alpha', 0.85)) + alpha = max(0.0, min(1.0, alpha)) + min_w = float(rules.get('relevance', {}).get('min_rating_weight', 0.01)) + + n = len(leaves) + mode = (rules.get('budget_mode') or 'anchor_mean').strip() + pages_cfg = int(getattr(config, 'TARGET_PAGES', 0) or 0) + pce = max(1, int(getattr(config, 'PAGE_CHAR_ESTIMATE', 700) or 700)) + if mode == 'target_pages' and pages_cfg > 0: + budget = int(round(pages_cfg * pce)) + elif mode == 'anchor_base': + budget = int(round(n * base)) + else: + budget = int(round(n * (base + core) / 2.0)) + + items = parse_rating_json(rating_raw) + if not items: + if not (mode == 'target_pages' and pages_cfg > 0): + return None + u = [1.0] * n + top_lists = [[] for _ in range(n)] + mid = 0.5 * (base + core) + raw_float = [float(mid)] * n + else: + u, top_lists = _raw_utilities(leaves, items, min_w) + band = core - base + raw_float = [ + base + band * (alpha * u[i] + (1.0 - alpha) * 0.5) for i in range(n) + ] + + targets = [int(round(x)) for x in raw_float] + adjusted = _water_adjust(targets, budget, floor_v, cap_v, u) + + provider = getattr(config, 'MODEL_PROVIDER', 'openai') + tok_limit = _PROVIDER_TOKEN_LIMITS.get(provider, 8192) + base_max_tok = min(preset_tokens, tok_limit) + scale_tokens = bool(rules.get('max_tokens_scale', False)) + + prompt_cfg = rules.get('prompt') or {} + top_k = int(prompt_cfg.get('top_k_rating_items', 4)) + intro = (prompt_cfg.get('intro_line') or '').strip() or ( + '本节须对下列技术评分要点作实质展开（结合工艺、流程、标准与可验证措施，禁止空泛承诺与复述招标文件）：' + ) + + out: Dict[int, Dict[str, Any]] = {} + for i, leaf in enumerate(leaves): + sid = int(leaf['id']) + min_chars = max(1, adjusted[i]) + contribs = top_lists[i][:top_k] + if contribs: + lines = '\n'.join(f' · {name}' for name, _ in contribs[:top_k]) + spec = ( + f'- 字数硬性要求（必须达到，不达标将续写补足）：本节正文不少于 {min_chars} 字\n' + f'- {intro}\n{lines}\n' + f'- 内容须由可检验的技术与管理措施支撑，禁止堆砌套话与重复背景' + ) + else: + spec = ( + f'- 字数硬性要求（必须达到，不达标将续写补足）：本节正文不少于 {min_chars} 字\n' + f'- 须紧扣章节标题与标书目录定位，充分展开可执行方案细节\n' + f'- 内容须由可检验的技术与管理措施支撑，禁止堆砌套话与重复背景' + ) + + max_tok = base_max_tok + if scale_tokens and base > 0: + max_tok = int(min(tok_limit, max(1024, base_max_tok * min_chars / base))) + + out[sid] = { + 'target_chars': min_chars, + 'word_count_spec': spec, + 'max_tokens': max_tok, + } + return out + + +def continuation_threshold(target_chars: int) -> int: + """与 generator._get_min_chars 一致：续写到约目标字数的 65% 即停（多轮叠加逼近全文目标）。""" + return int(max(200, target_chars * 0.65)) diff --git a/暗标标书格式清除与设定模板.py b/暗标标书格式清除与设定模板.py new file mode 100644 index 0000000..e69de29 diff --git a/清标工具.js b/清标工具.js new file mode 100644 index 0000000..61de18d --- /dev/null +++ b/清标工具.js @@ -0,0 +1,311 @@ +// darkBidChecker.js +// 逻辑已迁移至项目内 Python：modules/dark_bid_format_check.py（本文件保留作参考） +// 依赖: jsdom (npm install jsdom) +const { JSDOM } = require('jsdom'); + +/** + * 暗标格式检查器 + * @param {string} htmlContent - 技术暗标的HTML内容（由Word/PDF导出的完整HTML） + * @param {Object} options - 可选配置 + * @returns {Object} 符合格式的JSON检查报告 + */ +function checkTechnicalBid(htmlContent, options = {}) { + const dom = new JSDOM(htmlContent); + const document = dom.window.document; + const styleSheets = Array.from(document.styleSheets); + + // 辅助函数：获取元素实际渲染样式（jsdom支持有限，但可获取内联和style标签定义） + function getStyle(element, property) { + return dom.window.getComputedStyle(element).getPropertyValue(property); + } + + // 结果收集器 + const results = { + overall: true, + details: [], + violations: [] + }; + + // 通用添加结果方法 + function addResult(ruleName, passed, message, elements = []) { + results.details.push({ rule: ruleName, passed, message }); + if (!passed) { + results.overall = false; + results.violations.push({ rule: ruleName, message, elements: elements.map(el => el.outerHTML.slice(0, 200)) }); + } + } + + // ========== 1. 检查是否存在投标人身份信息 ========== + function checkIdentityInfo() { + const bodyText = document.body.innerText; + // 公司名称模式（可扩展） + const companyPattern = /(?:我公司|本公司|[（(]?[A-Za-z\u4e00-\u9fa5]+(?:集团|股份|有限|责任|公司)[）)]?)/g; + // 地址模式（省市区路号等） + const addrPattern = /(?:省|市|区|县|镇|路|街|大道|号|大厦|楼|层)[\u4e00-\u9fa50-9]+/g; + // 总监/专监真实姓名模式（除甲、乙等代称外） + const namePattern = /(?:总监理工程师|专业监理工程师|技术负责人|项目经理)[：:]\s*[^甲乙丙丁戊己庚辛壬癸\s]{2,4}(?=[，。；\s]|$)/g; + + let foundCompany = false; + let foundAddr = false; + let foundRealName = false; + + if (companyPattern.test(bodyText)) foundCompany = true; + if (addrPattern.test(bodyText)) foundAddr = true; + if (namePattern.test(bodyText)) foundRealName = true; + + // 检查图片alt或title是否包含公司标识 + const images = document.querySelectorAll('img'); + let hasLogo = false; + images.forEach(img => { + const alt = img.alt || ''; + const src = img.src || ''; + if (/logo|商标|微标|公司|品牌/i.test(alt) || /logo/i.test(src)) hasLogo = true; + }); + + const passed = !(foundCompany || foundAddr || foundRealName || hasLogo); + addResult('身份信息隐藏', passed, + passed ? '未发现投标人身份信息' : '发现投标人身份信息（公司名/地址/真实姓名/商标）'); + } + + // ========== 2. 标题格式检查（三号黑体，非斜体，无下划线等） ========== + function checkHeadings() { + // 标题选择器：h1-h6 或任何 role="heading" 或具有大纲级别样式的元素 + const headings = document.querySelectorAll('h1, h2, h3, h4, h5, h6, [role="heading"], .heading, .title'); + let allValid = true; + const invalidHeadings = []; + + headings.forEach(heading => { + const fontSize = getStyle(heading, 'font-size'); + const fontFamily = getStyle(heading, 'font-family').toLowerCase(); + const fontStyle = getStyle(heading, 'font-style'); + const textDecoration = getStyle(heading, 'text-decoration'); + const color = getStyle(heading, 'color'); + const fontWeight = getStyle(heading, 'font-weight'); + + // 三号 ≈ 16pt (21.33px) 允许误差 ±2px + const sizeOk = Math.abs(parseFloat(fontSize) - 21.33) <= 3; + const fontOk = fontFamily.includes('黑体') || fontFamily.includes('simhei') || fontFamily.includes('microsoft yahei'); + const styleOk = fontStyle !== 'italic'; + const decorOk = !textDecoration.includes('underline'); + const colorOk = color === 'rgb(0, 0, 0)' || color === '#000000'; + const weightOk = fontWeight !== '400' && fontWeight !== 'normal'; // 黑体通常加粗 + + if (!(sizeOk && fontOk && styleOk && decorOk && colorOk && weightOk)) { + allValid = false; + invalidHeadings.push(heading); + } + }); + + addResult('标题格式', allValid, + allValid ? '所有标题符合三号黑体要求' : '部分标题字号/字体/颜色/下划线不符合要求', invalidHeadings); + } + + // ========== 3. 正文格式检查（四号宋体，首行缩进2字符，行距固定值26磅，无着色等） ========== + function checkBodyText() { + const bodyElements = document.querySelectorAll('p, div, span, li, td, th'); + let allValid = true; + const invalidElements = []; + // 排除标题、页眉页脚、目录等 + const excludeSelectors = 'h1, h2, h3, h4, h5, h6, .header, .footer, .toc, .目录'; + + bodyElements.forEach(el => { + if (el.matches(excludeSelectors)) return; + const text = el.innerText.trim(); + if (text.length === 0) return; + + const fontSize = getStyle(el, 'font-size'); + const fontFamily = getStyle(el, 'font-family').toLowerCase(); + const color = getStyle(el, 'color'); + const textIndent = getStyle(el, 'text-indent'); + const lineHeight = getStyle(el, 'line-height'); + const textDecoration = getStyle(el, 'text-decoration'); + const fontWeight = getStyle(el, 'font-weight'); + const fontStyle = getStyle(el, 'font-style'); + + // 四号 ≈ 14pt (18.67px) + const sizeOk = Math.abs(parseFloat(fontSize) - 18.67) <= 2; + const fontOk = fontFamily.includes('宋体') || fontFamily.includes('simsun') || fontFamily.includes('serif'); + const colorOk = color === 'rgb(0, 0, 0)' || color === '#000000'; + // 首行缩进2字符（以em为单位，2em代表2个汉字） + const indentOk = parseFloat(textIndent) >= 1.8 && parseFloat(textIndent) <= 2.2; + // 行距固定值26磅 (34.67px) + const lineHeightOk = Math.abs(parseFloat(lineHeight) - 34.67) <= 2; + const decorOk = !textDecoration.includes('underline'); + const weightOk = fontWeight === '400' || fontWeight === 'normal'; + const styleOk = fontStyle !== 'italic'; + + if (!(sizeOk && fontOk && colorOk && indentOk && lineHeightOk && decorOk && weightOk && styleOk)) { + allValid = false; + invalidElements.push(el); + } + }); + + addResult('正文格式', allValid, + allValid ? '所有正文符合四号宋体/缩进/行距/颜色要求' : '部分正文段落格式不符合要求', invalidElements); + } + + // ========== 4. 目录检查（无页码，无页眉页脚） ========== + function checkTOC() { + const tocElements = document.querySelectorAll('.toc, .table-of-contents, .目录, [role="directory"]'); + let noPageNumbers = true; + let noHeaderFooter = true; + + tocElements.forEach(toc => { + const text = toc.innerText; + // 检查是否存在页码（数字独立在行尾或制表符后） + if (/\d+$/.test(text.trim()) || /\.{2,}\s*\d+/.test(text)) { + noPageNumbers = false; + } + // 检查内部是否有页眉页脚元素 + if (toc.querySelector('.header, .footer, .page-header, .page-footer')) { + noHeaderFooter = false; + } + }); + + // 若没有目录元素，按规则应存在目录但不得有页码，这里假设必须存在目录（招标要求通常有目录） + if (tocElements.length === 0) { + addResult('目录要求', false, '未检测到目录，请确保包含目录且目录无页码无页眉页脚'); + } else { + const passed = noPageNumbers && noHeaderFooter; + addResult('目录要求', passed, + passed ? '目录符合无页码、无页眉页脚要求' : '目录中存在页码或页眉页脚'); + } + } + + // ========== 5. 图表位置及图表内文字格式 ========== + function checkChartsAndTables() { + // 定位附件/附表章节 + const appendix = document.querySelector('#appendix, .appendix, .attachment, 附件, 附表'); + const isInAppendix = (el) => appendix && appendix.contains(el); + + const allTables = document.querySelectorAll('table'); + const allImages = document.querySelectorAll('img'); + const allFigures = document.querySelectorAll('figure, .chart'); + let illegalCharts = []; + + // 正文中不允许有图表，除非在附件内 + [...allTables, ...allImages, ...allFigures].forEach(chart => { + if (!isInAppendix(chart)) { + illegalCharts.push(chart); + } + }); + + let chartTextValid = true; + // 附件内图表文字需五号宋体 + if (appendix) { + const chartTexts = appendix.querySelectorAll('table, td, th, figcaption, .chart-text'); + chartTexts.forEach(el => { + const fontSize = getStyle(el, 'font-size'); + const fontFamily = getStyle(el, 'font-family').toLowerCase(); + const color = getStyle(el, 'color'); + const sizeOk = Math.abs(parseFloat(fontSize) - 10.5) <= 1.5; // 五号=10.5pt + const fontOk = fontFamily.includes('宋体') || fontFamily.includes('simsun'); + const colorOk = color === 'rgb(0, 0, 0)' || color === '#000000'; + if (!(sizeOk && fontOk && colorOk)) chartTextValid = false; + }); + } + + const chartsPassed = illegalCharts.length === 0 && chartTextValid; + addResult('图表规范', chartsPassed, + chartsPassed ? '图表仅出现在附件/附表内，且图表文字符合五号宋体' : + `正文中发现${illegalCharts.length}个图表或附件内图表文字格式错误`, illegalCharts); + } + + // ========== 6. 全文字体颜色检查（无彩色，无着重号，无下划线） ========== + function checkColorsAndDecorations() { + const allElements = document.querySelectorAll('*'); + let colorViolations = []; + let decorationViolations = []; + + allElements.forEach(el => { + const color = getStyle(el, 'color'); + if (color !== 'rgb(0, 0, 0)' && color !== '#000000' && color !== 'black') { + if (el.innerText.trim().length > 0) colorViolations.push(el); + } + const textDecor = getStyle(el, 'text-decoration'); + if (textDecor.includes('underline')) decorationViolations.push(el); + // 着重号检测（一般使用伪元素或border-bottom，简单检测样式） + const borderBottom = getStyle(el, 'border-bottom-style'); + if (borderBottom === 'solid' || borderBottom === 'dotted') { + decorationViolations.push(el); + } + }); + + const passed = colorViolations.length === 0 && decorationViolations.length === 0; + addResult('颜色与装饰', passed, + passed ? '无彩色文字、无下划线、无着重号' : + `发现${colorViolations.length}处彩色文字，${decorationViolations.length}处下划线/着重号`, + [...colorViolations, ...decorationViolations]); + } + + // ========== 7. 页面设置检查（A4纵向，页边距） ========== + function checkPageSetup() { + // 检查@page规则或根元素margin + let pageValid = true; + let marginTop, marginBottom, marginLeft, marginRight; + // 尝试从styleSheets中获取@page + let pageRule = null; + for (let sheet of styleSheets) { + try { + const rules = sheet.cssRules || sheet.rules; + for (let rule of rules) { + if (rule.type === CSSRule.PAGE_RULE) { + pageRule = rule.style; + break; + } + } + } catch(e) { /* 跨域限制忽略 */ } + } + + if (pageRule) { + marginTop = pageRule.marginTop; + marginBottom = pageRule.marginBottom; + marginLeft = pageRule.marginLeft; + marginRight = pageRule.marginRight; + const size = pageRule.size; + if (size && size.toLowerCase() !== 'a4') pageValid = false; + } else { + // 检查body或根容器的margin + const bodyStyle = getStyle(document.body, 'margin'); + if (bodyStyle) { + const margins = bodyStyle.split(' '); + // 简单近似 + marginTop = margins[0]; + marginBottom = margins[2] || margins[0]; + marginLeft = margins[3] || margins[1]; + marginRight = margins[1]; + } else { + pageValid = false; + } + } + + const topOk = marginTop === '2.54cm' || parseFloat(marginTop) === 2.54; + const bottomOk = marginBottom === '2.54cm' || parseFloat(marginBottom) === 2.54; + const leftOk = marginLeft === '3.18cm' || parseFloat(marginLeft) === 3.18; + const rightOk = marginRight === '3.18cm' || parseFloat(marginRight) === 3.18; + const pageOrientation = document.documentElement.style.width === 'auto' ? '纵向' : '横向'; // 近似 + + const passed = topOk && bottomOk && leftOk && rightOk && pageOrientation !== '横向'; + addResult('页面设置', passed, + passed ? '页面设置符合A4纵向/边距要求' : '页面边距或纸张方向不符合要求'); + } + + // 执行所有检查 + checkIdentityInfo(); + checkHeadings(); + checkBodyText(); + checkTOC(); + checkChartsAndTables(); + checkColorsAndDecorations(); + checkPageSetup(); + + return results; +} + +// 导出函数供外部使用 +module.exports = { checkTechnicalBid }; + +// ==================== 使用示例 ==================== +// 假设已有htmlContent（技术暗标HTML字符串） +// const report = checkTechnicalBid(htmlContent); +// console.log(JSON.stringify(report, null, 2)); \ No newline at end of file diff --git a/清标数据.json b/清标数据.json new file mode 100644 index 0000000..8827a11 --- /dev/null +++ b/清标数据.json @@ -0,0 +1,63 @@ + +{ + "overall": false, + "details": [ + { + "rule": "身份信息隐藏", + "passed": true, + "message": "未发现投标人身份信息" + }, + { + "rule": "标题格式", + "passed": false, + "message": "部分标题字号/字体/颜色/下划线不符合要求" + }, + { + "rule": "正文格式", + "passed": false, + "message": "部分正文段落格式不符合要求" + }, + { + "rule": "目录要求", + "passed": true, + "message": "目录符合无页码、无页眉页脚要求" + }, + { + "rule": "图表规范", + "passed": false, + "message": "正文中发现2个图表或附件内图表文字格式错误" + }, + { + "rule": "颜色与装饰", + "passed": true, + "message": "无彩色文字、无下划线、无着重号" + }, + { + "rule": "页面设置", + "passed": false, + "message": "页面边距或纸张方向不符合要求" + } + ], + "violations": [ + { + "rule": "标题格式", + "message": "部分标题字号/字体/颜色/下划线不符合要求", + "elements": ["

第一章项目概况

"] + }, + { + "rule": "正文格式", + "message": "部分正文段落格式不符合要求", + "elements": ["

投标人应..."] + }, + { + "rule": "图表规范", + "message": "正文中发现2个图表或附件内图表文字格式错误", + "elements": ["", "...
"] + }, + { + "rule": "页面设置", + "message": "页面边距或纸张方向不符合要求", + "elements": [] + } + ] + } \ No newline at end of file

{sample_title}

欢迎使用标伙伴

智能解析招标文件

自动生成标书大纲

一键导出 Word 文档

我的项目 + 共个 +

新建标书项目

AI 模型配置

文件样式设置

+ 📄 页面设置 +

+ 🏷️ 标题文字 +

📊 表格设置

+ 📑 目录设置 + + + 生成文档目录 + +

{{ project.name }}

+ 1 + 上传招标文件 +

+ 设置 + 生成偏好（图表 & 暗标） +

工程量清单导入（可选）解析后可联动招标内容，让生成内容包含准确工程量

标书类型

+ + 招标文件摘要 +

+ + 技术评分要求 +

+ 2 + 生成标书大纲 +

+ 大纲 + 预览 + 编辑 + （个章节） +

+ 3 + 章节内容生成 +

+ 4 + 合规性检查 +

检查项目

⚠️ 未覆盖要点

💡 改进建议

+ 清 + 技术暗标格式清标 +

企业知识库

+ + 添加知识文档 +

+ + 已上传文件 + +

+ + 使用说明 +

粘贴或编辑一级大纲

标题

第一章项目概况

{sample_title}

欢迎使用标伙伴

智能解析招标文件

自动生成标书大纲

一键导出 Word 文档

我的项目 + 共 个 +

新建标书项目

AI 模型配置

文件样式设置

+ 📄 页面设置 +

+ 🏷️ 标题文字 +

📊 表格设置

+ 📑 目录设置 + + + 生成文档目录 + +

+ 1 + 上传招标文件 +

+ 设置 + 生成偏好（图表 & 暗标） +

工程量清单导入 （可选）解析后可联动招标内容，让生成内容包含准确工程量

标书类型

+ + 招标文件摘要 +

+ + 技术评分要求 +

+ 2 + 生成标书大纲 +

+ 大纲 + 预览 + 编辑 + （ 个章节） +

+ 3 + 章节内容生成 +

+ 4 + 合规性检查 +

检查项目

⚠️ 未覆盖要点

💡 改进建议

+ 清 + 技术暗标格式清标 +

企业知识库

+ + + + 添加知识文档 +

+ + + + 已上传文件 + +

+ + + + 使用说明 +

粘贴或编辑一级大纲

标题

第一章 项目概况

我的项目 + 共个 +

工程量清单导入（可选）解析后可联动招标内容，让生成内容包含准确工程量

+ 大纲 + 预览 + 编辑 + （个章节） +

+ + 添加知识文档 +

+ + 已上传文件 + +

+ + 使用说明 +

第一章项目概况