提交
This commit is contained in:
commit
909e228a9b
1
.deps_installed
Normal file
1
.deps_installed
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
||||||
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
.env
|
||||||
|
*.log
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
.venv/
|
||||||
|
venv/
|
||||||
10
.idea/.gitignore
generated
vendored
Normal file
10
.idea/.gitignore
generated
vendored
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
# 默认忽略的文件
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
# 基于编辑器的 HTTP 客户端请求
|
||||||
|
/httpRequests/
|
||||||
|
# 已忽略包含查询文件的默认文件夹
|
||||||
|
/queries/
|
||||||
|
# Datasource local storage ignored files
|
||||||
|
/dataSources/
|
||||||
|
/dataSources.local.xml
|
||||||
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<settings>
|
||||||
|
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||||
|
<version value="1.0" />
|
||||||
|
</settings>
|
||||||
|
</component>
|
||||||
8
.idea/modules.xml
generated
Normal file
8
.idea/modules.xml
generated
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/tech-bid-manage20260423.iml" filepath="$PROJECT_DIR$/.idea/tech-bid-manage20260423.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
16
.idea/tech-bid-manage20260423.iml
generated
Normal file
16
.idea/tech-bid-manage20260423.iml
generated
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$" />
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
<component name="TemplatesService">
|
||||||
|
<option name="TEMPLATE_CONFIGURATION" value="Jinja2" />
|
||||||
|
<option name="TEMPLATE_FOLDERS">
|
||||||
|
<list>
|
||||||
|
<option value="$MODULE_DIR$/templates" />
|
||||||
|
</list>
|
||||||
|
</option>
|
||||||
|
</component>
|
||||||
|
</module>
|
||||||
6
.idea/vcs.xml
generated
Normal file
6
.idea/vcs.xml
generated
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
113
README.md
Normal file
113
README.md
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
# 标伙伴 · AI 标书助手
|
||||||
|
|
||||||
|
基于大模型的智能标书生成工具(单机版),支持解析招标文件、自动生成技术标书、导出 Word 文档。
|
||||||
|
|
||||||
|
## 快速开始
|
||||||
|
|
||||||
|
### 方式一:双击启动(Windows)
|
||||||
|
|
||||||
|
直接双击 `start.bat`,首次运行会自动安装依赖。
|
||||||
|
|
||||||
|
### 方式二:命令行启动
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. 安装依赖
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
# 2. 启动应用
|
||||||
|
python app.py
|
||||||
|
```
|
||||||
|
|
||||||
|
浏览器访问 **http://localhost:5000**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 配置 API Key
|
||||||
|
|
||||||
|
首次使用前,点击右上角 ⚙️ 设置图标,选择模型提供商并填入 API Key:
|
||||||
|
|
||||||
|
| 提供商 | 推荐模型 | 申请地址 |
|
||||||
|
|--------|---------|---------|
|
||||||
|
| 通义千问 | qwen-max | https://dashscope.aliyun.com/ |
|
||||||
|
| DeepSeek | deepseek-chat (V3) | https://platform.deepseek.com/ |
|
||||||
|
| OpenAI | gpt-4o | https://platform.openai.com/ |
|
||||||
|
|
||||||
|
> **DeepSeek 说明**:deepseek-chat (V3) 性价比极高,推荐用于生产环境。
|
||||||
|
> 由于 DeepSeek 暂不提供 Embedding API,使用知识库功能时会自动回退到本地 sentence-transformers 模型(首次使用需下载约 90MB)。
|
||||||
|
|
||||||
|
也可通过环境变量配置:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 通义千问
|
||||||
|
set QWEN_API_KEY=sk-xxxxxxxx
|
||||||
|
set MODEL_PROVIDER=qwen
|
||||||
|
|
||||||
|
# DeepSeek
|
||||||
|
set DEEPSEEK_API_KEY=sk-xxxxxxxx
|
||||||
|
set MODEL_PROVIDER=deepseek
|
||||||
|
|
||||||
|
python app.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 使用流程
|
||||||
|
|
||||||
|
1. **新建项目** → 输入项目名称
|
||||||
|
2. **上传招标文件** → 支持 PDF / DOC / DOCX
|
||||||
|
3. **AI 解析** → 自动提取评分要求、资质条件、商务条款
|
||||||
|
4. **生成大纲** → 按评分权重生成四级章节目录
|
||||||
|
5. **生成内容** → 逐章节或一键全部生成
|
||||||
|
6. **合规检查** → 对照招标要求检验覆盖情况
|
||||||
|
7. **导出 Word** → 专业排版,直接使用
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 目录结构
|
||||||
|
|
||||||
|
```
|
||||||
|
autorfp/
|
||||||
|
├── app.py # Flask 主程序
|
||||||
|
├── config.py # 配置文件
|
||||||
|
├── requirements.txt # Python 依赖
|
||||||
|
├── start.bat # Windows 一键启动
|
||||||
|
├── prompts/ # AI 提示词模板
|
||||||
|
├── modules/ # 功能模块
|
||||||
|
│ ├── parser.py # 招标文件解析
|
||||||
|
│ ├── generator.py # 标书内容生成
|
||||||
|
│ ├── checker.py # 合规检查
|
||||||
|
│ ├── exporter.py # Word 导出
|
||||||
|
│ └── knowledge.py # 企业知识库
|
||||||
|
├── utils/ # 工具函数
|
||||||
|
│ ├── ai_client.py # AI API 封装
|
||||||
|
│ ├── file_utils.py # 文件处理
|
||||||
|
│ └── prompts.py # 提示词加载
|
||||||
|
├── templates/ # HTML 模板
|
||||||
|
├── static/ # 静态资源
|
||||||
|
└── data/ # 数据目录(自动创建)
|
||||||
|
├── projects.db # SQLite 数据库
|
||||||
|
├── uploads/ # 上传的招标文件
|
||||||
|
├── exports/ # 导出的标书
|
||||||
|
├── knowledge/ # 知识库文件
|
||||||
|
└── chroma/ # 向量数据库
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 企业知识库
|
||||||
|
|
||||||
|
在项目页面切换到「知识库」标签,上传历史标书文件。
|
||||||
|
系统会自动将文件分块存入向量数据库,生成内容时自动检索相关片段,让 AI 更好地体现企业优势。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 常见问题
|
||||||
|
|
||||||
|
**Q: 解析速度很慢?**
|
||||||
|
A: 招标文件越长耗时越长,通常 30-120 秒。建议使用 qwen-max 或 gpt-4o。
|
||||||
|
|
||||||
|
**Q: 内容生成失败?**
|
||||||
|
A: 检查 API Key 是否正确,以及账户余额是否充足。
|
||||||
|
|
||||||
|
**Q: 导出的 Word 文件乱码?**
|
||||||
|
A: 请使用 Microsoft Word 2016 及以上版本打开。
|
||||||
118
bid_partner.spec
Normal file
118
bid_partner.spec
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
# -*- mode: python ; coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
PyInstaller spec for 标伙伴 · AI标书助手
|
||||||
|
Build: pyinstaller bid_partner.spec
|
||||||
|
|
||||||
|
知识库改用 SQLite + 纯 Python 向量存储,已不依赖 ChromaDB,打包更小。
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
from PyInstaller.utils.hooks import collect_all, collect_data_files
|
||||||
|
|
||||||
|
block_cipher = None
|
||||||
|
|
||||||
|
# ── Collect complex packages ─────────────────────────────────────────────────
|
||||||
|
openai_datas, openai_bins, openai_hidden = collect_all('openai')
|
||||||
|
pydantic_datas, pydantic_bins, pydantic_hidden = collect_all('pydantic')
|
||||||
|
|
||||||
|
# tiktoken data (BPE vocab files)
|
||||||
|
tiktoken_datas = collect_data_files('tiktoken')
|
||||||
|
|
||||||
|
a = Analysis(
|
||||||
|
['launcher.py'],
|
||||||
|
pathex=['.'],
|
||||||
|
binaries=openai_bins + pydantic_bins,
|
||||||
|
datas=[
|
||||||
|
# ── App assets (read-only, go into _MEIPASS) ──
|
||||||
|
('templates', 'templates'),
|
||||||
|
('static', 'static'),
|
||||||
|
# ── Package data ──
|
||||||
|
*openai_datas,
|
||||||
|
*pydantic_datas,
|
||||||
|
*tiktoken_datas,
|
||||||
|
],
|
||||||
|
hiddenimports=[
|
||||||
|
# Flask / Werkzeug
|
||||||
|
'flask', 'flask_cors', 'werkzeug', 'werkzeug.serving',
|
||||||
|
'werkzeug.routing', 'werkzeug.middleware.proxy_fix',
|
||||||
|
'jinja2', 'jinja2.ext',
|
||||||
|
# SQLite (stdlib, always present)
|
||||||
|
'sqlite3',
|
||||||
|
# OpenAI
|
||||||
|
*openai_hidden,
|
||||||
|
# Pydantic
|
||||||
|
*pydantic_hidden,
|
||||||
|
# Document processing
|
||||||
|
'PyPDF2', 'pypdf', 'pypdf.errors',
|
||||||
|
'pdfminer', 'pdfminer.high_level', 'pdfminer.layout',
|
||||||
|
'pdfminer.pdfpage', 'pdfminer.pdfinterp', 'pdfminer.converter',
|
||||||
|
'docx', 'docx.oxml', 'docx.oxml.ns', 'docx.shared',
|
||||||
|
'docx.enum', 'docx.enum.text', 'docx.enum.style',
|
||||||
|
'python_docx',
|
||||||
|
# tiktoken
|
||||||
|
'tiktoken', 'tiktoken.core', 'tiktoken.model',
|
||||||
|
'tiktoken_ext', 'tiktoken_ext.openai_public',
|
||||||
|
# Network / encoding
|
||||||
|
'requests', 'chardet', 'httpx', 'httpcore',
|
||||||
|
'anyio', 'anyio.streams', 'anyio.streams.memory',
|
||||||
|
'sniffio', 'certifi',
|
||||||
|
# Stdlib extras
|
||||||
|
'importlib.metadata', 'importlib.resources',
|
||||||
|
'pkg_resources', 'json', 'math', 'threading',
|
||||||
|
# Local project modules (explicitly include all)
|
||||||
|
'config', 'app',
|
||||||
|
'utils', 'utils.ai_client', 'utils.file_utils',
|
||||||
|
'utils.prompts', 'utils.settings', 'utils.boq_parser', 'utils.bill_analysis',
|
||||||
|
'modules', 'modules.parser', 'modules.generator',
|
||||||
|
'modules.checker', 'modules.exporter', 'modules.knowledge',
|
||||||
|
],
|
||||||
|
hookspath=[],
|
||||||
|
hooksconfig={},
|
||||||
|
runtime_hooks=[],
|
||||||
|
excludes=[
|
||||||
|
# Heavy packages not used in this app
|
||||||
|
'matplotlib', 'pandas', 'scipy', 'numpy',
|
||||||
|
'IPython', 'jupyter', 'notebook',
|
||||||
|
'PIL', 'Pillow',
|
||||||
|
'cv2', 'torch', 'tensorflow',
|
||||||
|
'pytest', 'unittest',
|
||||||
|
# ChromaDB 及其依赖(已移除,改用 SQLite 内置存储)
|
||||||
|
'chromadb', 'hnswlib', 'posthog', 'pypika',
|
||||||
|
'mmh3', 'overrides', 'monotonic',
|
||||||
|
'sentence_transformers', 'onnxruntime',
|
||||||
|
],
|
||||||
|
win_no_prefer_redirects=False,
|
||||||
|
win_private_assemblies=False,
|
||||||
|
cipher=block_cipher,
|
||||||
|
noarchive=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
|
||||||
|
|
||||||
|
exe = EXE(
|
||||||
|
pyz,
|
||||||
|
a.scripts,
|
||||||
|
[],
|
||||||
|
exclude_binaries=True,
|
||||||
|
name='bid_partner',
|
||||||
|
debug=False,
|
||||||
|
bootloader_ignore_signals=False,
|
||||||
|
strip=False,
|
||||||
|
upx=False,
|
||||||
|
console=False, # no black console window — GUI launcher takes over
|
||||||
|
disable_windowed_traceback=False,
|
||||||
|
argv_emulation=False,
|
||||||
|
target_arch=None,
|
||||||
|
codesign_identity=None,
|
||||||
|
entitlements_file=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
coll = COLLECT(
|
||||||
|
exe,
|
||||||
|
a.binaries,
|
||||||
|
a.zipfiles,
|
||||||
|
a.datas,
|
||||||
|
strip=False,
|
||||||
|
upx=False,
|
||||||
|
upx_exclude=[],
|
||||||
|
name='BidPartner',
|
||||||
|
)
|
||||||
672
bill-worker.js
Normal file
672
bill-worker.js
Normal file
@ -0,0 +1,672 @@
|
|||||||
|
/**
|
||||||
|
* bill-worker.js — PDF 清单解析调度器(Worker Thread)
|
||||||
|
*
|
||||||
|
* 架构(v3 — SharedArrayBuffer 零拷贝):
|
||||||
|
* Phase 1 — 并行文本提取
|
||||||
|
* 将 PDF 数据写入 SharedArrayBuffer(一次分配,所有子线程共享读)
|
||||||
|
* 启动 N 个 page-worker,每个负责固定 20 页
|
||||||
|
*
|
||||||
|
* Phase 2 — 清单页筛选 + 文本解析(纯正则,毫秒级)
|
||||||
|
* 汇总全部页面文本 → 关键字筛选清单页 → 多行合并 → 逐行解析
|
||||||
|
*/
|
||||||
|
'use strict';
|
||||||
|
const { parentPort } = require('worker_threads');
|
||||||
|
const { Worker } = require('worker_threads');
|
||||||
|
const path = require('path');
|
||||||
|
|
||||||
|
const PAGES_PER_CHUNK = 20;
|
||||||
|
|
||||||
|
parentPort.on('message', async (msg) => {
|
||||||
|
if (msg.type !== 'parse') return;
|
||||||
|
const t0 = Date.now();
|
||||||
|
try {
|
||||||
|
// 立即做一次干净的拷贝,确保拥有独立的 ArrayBuffer
|
||||||
|
const raw = msg.buffer;
|
||||||
|
const buf = Buffer.alloc(raw.byteLength);
|
||||||
|
Buffer.from(raw).copy(buf);
|
||||||
|
|
||||||
|
if (buf.length === 0) {
|
||||||
|
parentPort.postMessage({ type: 'done', ok: false, error: '收到空 PDF 数据' });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── 获取总页数 ──
|
||||||
|
const pdfjsModule = await import('pdfjs-dist/build/pdf.mjs');
|
||||||
|
const pdfjsLib = pdfjsModule.default || pdfjsModule;
|
||||||
|
// 给 pdfjs 一份独立拷贝(pdfjs 内部可能 detach buffer)
|
||||||
|
const pdfData = new Uint8Array(buf.length);
|
||||||
|
buf.copy(Buffer.from(pdfData.buffer));
|
||||||
|
const pdf = await pdfjsLib.getDocument({ data: pdfData, isEvalSupported: false }).promise;
|
||||||
|
const totalPages = pdf.numPages;
|
||||||
|
|
||||||
|
// ── 将 PDF 数据写入 SharedArrayBuffer(一次分配,所有子线程共享读)──
|
||||||
|
const sab = new SharedArrayBuffer(buf.length);
|
||||||
|
const sabView = new Uint8Array(sab);
|
||||||
|
buf.copy(Buffer.from(sabView.buffer)); // 从独立 buf 拷贝到共享内存
|
||||||
|
|
||||||
|
const workerCount = Math.ceil(totalPages / PAGES_PER_CHUNK);
|
||||||
|
console.log(`[BillWorker] PDF ${totalPages} 页, ${workerCount} 路并行 (SharedArrayBuffer ${(buf.length/1024/1024).toFixed(1)}MB)`);
|
||||||
|
|
||||||
|
// Phase 1: 并行文本提取
|
||||||
|
const pageTexts = await parallelExtract(sab, buf.length, totalPages, workerCount);
|
||||||
|
const t1 = Date.now();
|
||||||
|
|
||||||
|
const extractedCount = pageTexts.filter(t => t.length > 0).length;
|
||||||
|
console.log(`[BillWorker] Phase1 完成: ${t1 - t0}ms, ${extractedCount}/${totalPages} 页有文本`);
|
||||||
|
|
||||||
|
// 扫描件判断
|
||||||
|
const totalChars = pageTexts.reduce((s, t) => s + t.length, 0);
|
||||||
|
if (totalChars < 50) {
|
||||||
|
parentPort.postMessage({ type: 'done', ok: true, data: { scanned: true, reason: 'noText', totalPages } });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phase 2: 筛选清单页(宽松策略 + 连续页补全)
|
||||||
|
const BILL_KW = ['项目编码', '项目名称', '工程量', '计量单位', '综合单价', '清单编码'];
|
||||||
|
const SEC_KW = ['分部分项', '分类分项', '措施项目', '其他项目', '工程量清单计价'];
|
||||||
|
// 第一轮:标记确定的清单页
|
||||||
|
const billFlags = new Array(pageTexts.length).fill(false);
|
||||||
|
for (let i = 0; i < pageTexts.length; i++) {
|
||||||
|
const t = pageTexts[i];
|
||||||
|
if (!t.trim()) continue;
|
||||||
|
const hHits = BILL_KW.filter(k => t.includes(k)).length;
|
||||||
|
const sHit = SEC_KW.some(k => t.includes(k));
|
||||||
|
const hasCode = /\d{9}/.test(t);
|
||||||
|
// 放宽:有9位编码即可(不再要求同时命中表头关键字)
|
||||||
|
if (hHits >= 2 || sHit || hasCode) {
|
||||||
|
billFlags[i] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 第二轮:连续页补全 — 两个清单页之间的非空页也视为清单页(续页无表头)
|
||||||
|
// 但排除纯费用/税金页面(它们不含施工清单项)
|
||||||
|
const FEE_PAGE_KW = ['规费', '税金', '社会保险费', '住房公积金', '养老保险',
|
||||||
|
'工伤保险', '失业保险', '医疗保险', '教育费附加', '城市维护建设税'];
|
||||||
|
const firstBill = billFlags.indexOf(true);
|
||||||
|
const lastBill = billFlags.lastIndexOf(true);
|
||||||
|
if (firstBill >= 0 && lastBill > firstBill) {
|
||||||
|
for (let i = firstBill; i <= lastBill; i++) {
|
||||||
|
if (!billFlags[i] && pageTexts[i] && pageTexts[i].trim().length > 30) {
|
||||||
|
const t = pageTexts[i];
|
||||||
|
const feeHits = FEE_PAGE_KW.filter(kw => t.includes(kw)).length;
|
||||||
|
// 命中 2+ 个费用关键字且没有9位工程编码 → 纯费用页,排除
|
||||||
|
if (feeHits >= 2 && !/\d{9}/.test(t)) continue;
|
||||||
|
billFlags[i] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const billTexts = [];
|
||||||
|
for (let i = 0; i < pageTexts.length; i++) {
|
||||||
|
if (billFlags[i]) billTexts.push(pageTexts[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!billTexts.length) {
|
||||||
|
parentPort.postMessage({ type: 'done', ok: true, data: { scanned: false, noBillPages: true, totalPages } });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[BillWorker] ${totalPages} 页 → ${billTexts.length} 页清单 (原始识别 ${billFlags.filter(f=>f).length - (lastBill - firstBill >= 0 ? 0 : 0)} / 补全后 ${billTexts.length})`);
|
||||||
|
|
||||||
|
// Phase 3: 文本解析
|
||||||
|
const merged = billTexts.join('\n');
|
||||||
|
const parsed = parseBillText(merged);
|
||||||
|
const t2 = Date.now();
|
||||||
|
console.log(`[BillWorker] Phase2+3: ${t2 - t1}ms, 总耗时: ${t2 - t0}ms`);
|
||||||
|
|
||||||
|
parentPort.postMessage({
|
||||||
|
type: 'done', ok: true,
|
||||||
|
data: {
|
||||||
|
scanned: false,
|
||||||
|
...parsed,
|
||||||
|
_meta: {
|
||||||
|
method: 'local-parallel',
|
||||||
|
workers: workerCount,
|
||||||
|
billPages: billTexts.length,
|
||||||
|
totalPages,
|
||||||
|
extractMs: t1 - t0,
|
||||||
|
parseMs: t2 - t1,
|
||||||
|
totalMs: t2 - t0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
console.error('[BillWorker] 错误:', err.message);
|
||||||
|
parentPort.postMessage({ type: 'done', ok: false, error: err.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ================================================================
|
||||||
|
// Phase 1: 多 Worker 并行提取(SharedArrayBuffer 零拷贝)
|
||||||
|
// ================================================================
|
||||||
|
|
||||||
|
function parallelExtract(sab, dataLength, totalPages, workerCount) {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
const workerPath = path.join(__dirname, 'page-worker.js');
|
||||||
|
const allPageTexts = new Array(totalPages).fill('');
|
||||||
|
const workerStatus = new Array(workerCount).fill('pending'); // pending, done, failed
|
||||||
|
let resolved = false;
|
||||||
|
|
||||||
|
const checkComplete = () => {
|
||||||
|
if (resolved) return;
|
||||||
|
const doneCount = workerStatus.filter(s => s === 'done' || s === 'failed').length;
|
||||||
|
if (doneCount >= workerCount) {
|
||||||
|
resolved = true;
|
||||||
|
// 检查是否有失败的worker,打印警告
|
||||||
|
const failedCount = workerStatus.filter(s => s === 'failed').length;
|
||||||
|
if (failedCount > 0) {
|
||||||
|
console.warn(`[BillWorker] ${failedCount}/${workerCount} 个worker失败,可能导致部分页面无内容`);
|
||||||
|
}
|
||||||
|
resolve(allPageTexts);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
for (let i = 0; i < workerCount; i++) {
|
||||||
|
const startPage = i * PAGES_PER_CHUNK + 1;
|
||||||
|
const endPage = Math.min((i + 1) * PAGES_PER_CHUNK, totalPages);
|
||||||
|
|
||||||
|
// workerData 传 SharedArrayBuffer(跨线程共享,不会被清空)
|
||||||
|
const w = new Worker(workerPath, {
|
||||||
|
workerData: { sab, dataLength, startPage, endPage }
|
||||||
|
});
|
||||||
|
|
||||||
|
let workerDone = false;
|
||||||
|
|
||||||
|
const markDone = (status) => {
|
||||||
|
if (workerDone) return;
|
||||||
|
workerDone = true;
|
||||||
|
workerStatus[i] = status;
|
||||||
|
checkComplete();
|
||||||
|
};
|
||||||
|
|
||||||
|
w.on('message', (msg) => {
|
||||||
|
if (msg.ok && msg.results) {
|
||||||
|
for (const r of msg.results) {
|
||||||
|
allPageTexts[r.page - 1] = r.text;
|
||||||
|
}
|
||||||
|
markDone('done');
|
||||||
|
} else if (!msg.ok) {
|
||||||
|
console.warn(`[BillWorker] page-worker[${startPage}-${endPage}] 失败: ${msg.error}`);
|
||||||
|
markDone('failed');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
w.on('error', (err) => {
|
||||||
|
console.warn(`[BillWorker] page-worker[${startPage}-${endPage}] 异常: ${err.message}`);
|
||||||
|
markDone('failed');
|
||||||
|
});
|
||||||
|
|
||||||
|
w.on('exit', (code) => {
|
||||||
|
// exit 在 message 之后触发,但如果 worker 崩溃没发 message 则在这里兜底
|
||||||
|
if (code !== 0 && !workerDone) {
|
||||||
|
console.warn(`[BillWorker] page-worker[${startPage}-${endPage}] 意外退出(code=${code})`);
|
||||||
|
markDone('failed');
|
||||||
|
} else if (!workerDone) {
|
||||||
|
markDone('done');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (workerCount <= 0) {
|
||||||
|
resolved = true;
|
||||||
|
resolve(allPageTexts);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// ================================================================
|
||||||
|
// Phase 3: 清单文本解析(纯正则 + 字符串处理,毫秒级)
|
||||||
|
// ================================================================
|
||||||
|
|
||||||
|
function parseBillText(text) {
|
||||||
|
const rawLines = text.split(/\n/).map(l => {
|
||||||
|
let line = l.replace(/\t/g, ' ').trim();
|
||||||
|
// 规范化带横杠的编码:如 "010-101-001-001" → "010101001001"
|
||||||
|
line = line.replace(/(\d{2,4})[-‐–](\d{2,4})[-‐–](\d{2,4})(?:[-‐–](\d{2,4}))?/g,
|
||||||
|
(m, a, b, c, d) => {
|
||||||
|
const combined = a + b + c + (d || '');
|
||||||
|
return (combined.length >= 9 && combined.length <= 12) ? combined : m;
|
||||||
|
});
|
||||||
|
return line;
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── Step 1: 多行合并成逻辑行 ──
|
||||||
|
// pdfjs 按 Y 坐标分行,表格一行通常 = 一条文本行
|
||||||
|
// 但有时 项目特征/名称 会折行,需要合并
|
||||||
|
//
|
||||||
|
// 新逻辑行的起始标志(任一命中即切断):
|
||||||
|
// a) 序号模式:1.1.1.1.5 开头
|
||||||
|
// b) 清单编码:9-12位数字 或 B+5-6位数字 开头
|
||||||
|
// c) 中文大标题:一 二 三 ... 或 (一)(二)...
|
||||||
|
// d) 表头行内容(跳过)
|
||||||
|
// e) 纯数字序号 + 空格 + 编码(如 "5 500101004001")
|
||||||
|
|
||||||
|
const ITEM_START = /^\d+(\.\d+)+\s/; // 1.1 或 1.1.1 等序号
|
||||||
|
const CODE_INLINE = /(?:^|\s)(\d{9,12}|(?<![A-Za-z])B\d{5,6})\s/; // 行内含清单编码(排除 GB/DB 等标准号)
|
||||||
|
const CODE_START_RE = /^(\d{9,12}|B\d{5,6})\s/; // 行首就是清单编码(行首 B 不会有前缀字母)
|
||||||
|
const SEQ_CODE_RE = /^\d{1,4}\s+(\d{9,12}|(?<![A-Za-z])B\d{5,6})\s/; // "序号 编码"格式
|
||||||
|
const PAGE_MARK = /^--\s*\d+\s+of\s+\d+\s*--/;
|
||||||
|
const HEADER_RE = /^序号\s+(项目编码|项目名称)/;
|
||||||
|
const HEADER_KW = /^(项目编码|项目名称|清单编码|计量单位|综合单价|工程量|合\s*价|金额|序号)\s/;
|
||||||
|
const CATEGORY_MARKERS = ['一', '二', '三', '四', '五', '六', '七', '八', '九', '十',
|
||||||
|
'(一)', '(二)', '(三)', '(四)', '(五)'];
|
||||||
|
|
||||||
|
const logicLines = [];
|
||||||
|
let currentLine = '';
|
||||||
|
|
||||||
|
function isNewLineTrigger(raw) {
|
||||||
|
if (ITEM_START.test(raw)) return true;
|
||||||
|
if (CODE_START_RE.test(raw)) return true;
|
||||||
|
if (SEQ_CODE_RE.test(raw)) return true;
|
||||||
|
if (CATEGORY_MARKERS.some(m => raw.startsWith(m + ' ') || raw.startsWith(m + '\u3000'))) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const raw of rawLines) {
|
||||||
|
if (!raw || PAGE_MARK.test(raw)) continue;
|
||||||
|
if (HEADER_RE.test(raw) || HEADER_KW.test(raw)) continue;
|
||||||
|
if (/^(元)|^款章节号|^备注$|^第\d+页/.test(raw)) continue;
|
||||||
|
|
||||||
|
if (isNewLineTrigger(raw)) {
|
||||||
|
if (currentLine) logicLines.push(currentLine);
|
||||||
|
currentLine = raw;
|
||||||
|
} else if (CODE_INLINE.test(raw) && raw.length > 15) {
|
||||||
|
// 行内包含编码且够长(像是完整的表格行)→ 也开新行
|
||||||
|
if (currentLine) logicLines.push(currentLine);
|
||||||
|
currentLine = raw;
|
||||||
|
} else {
|
||||||
|
// 续行(项目特征折行等短文本)
|
||||||
|
// 安全阀:已合并行过长时强制切断,防止整页吞并
|
||||||
|
if (currentLine && currentLine.length > 300) {
|
||||||
|
logicLines.push(currentLine);
|
||||||
|
currentLine = raw;
|
||||||
|
} else {
|
||||||
|
currentLine = currentLine ? currentLine + ' ' + raw : raw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (currentLine) logicLines.push(currentLine);
|
||||||
|
|
||||||
|
console.log(`[BillWorker] 合并后 ${logicLines.length} 条逻辑行(原始 ${rawLines.length} 行)`);
|
||||||
|
// 打印前5条逻辑行供调试
|
||||||
|
for (let i = 0; i < Math.min(5, logicLines.length); i++) {
|
||||||
|
console.log(`[BillWorker] L${i}: ${logicLines[i].substring(0, 120)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const categories = [];
|
||||||
|
let curCat = null, curItem = null;
|
||||||
|
|
||||||
|
// 编码匹配:支持行内任意位置的9-12位数字或B编码(排除 GB/DB 等标准号前缀)
|
||||||
|
const CODE_RE = /(?<![A-Za-z])(\d{9,12}|(?<![A-Za-z])B\d{5,6})/;
|
||||||
|
const UNIT_TOKENS = ['m³','m²','m3','m2','km','hm2','㎡','㎥','t','kg',
|
||||||
|
'个','台','套','组','根','块','片','张','只','吨','项',
|
||||||
|
'处','座','件','段','条','把','扇','口','圈','道','孔',
|
||||||
|
'对','副','樘','方','延m','株','棵','m'];
|
||||||
|
const UNIT_SET = new Set(UNIT_TOKENS);
|
||||||
|
const unitEscaped = UNIT_TOKENS.map(u => u.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'));
|
||||||
|
const UNIT_RE = new RegExp(`(?:^|\\s)(${unitEscaped.join('|')})(?=\\s|\\d|$)`);
|
||||||
|
const SKIP_RE = /合\s*计|小\s*计|本页小计|总\s*计|价税合计/;
|
||||||
|
|
||||||
|
for (const line of logicLines) {
|
||||||
|
if (SKIP_RE.test(line)) continue;
|
||||||
|
|
||||||
|
// 去掉行首的序号部分("1.1.1.1.5 " 或 "5 " 等纯序号前缀)
|
||||||
|
let stripped = line.replace(/^\d+(\.\d+)*\s+/, '').trim();
|
||||||
|
if (!stripped) stripped = line.trim();
|
||||||
|
if (!stripped) continue;
|
||||||
|
|
||||||
|
const cm = stripped.match(CODE_RE);
|
||||||
|
if (cm) {
|
||||||
|
if (curItem && curCat) curCat.items.push(curItem);
|
||||||
|
if (!curCat) { curCat = { name: '未分类', items: [] }; categories.push(curCat); }
|
||||||
|
|
||||||
|
const code = cm[1];
|
||||||
|
let rest = stripped.substring(cm.index + cm[0].length).trim();
|
||||||
|
let name = '', unit = '', quantity = '', spec = '';
|
||||||
|
|
||||||
|
const unitMatch = rest.match(UNIT_RE);
|
||||||
|
if (unitMatch) {
|
||||||
|
const ui = rest.indexOf(unitMatch[0]);
|
||||||
|
let rawName = rest.substring(0, ui).trim();
|
||||||
|
unit = unitMatch[1];
|
||||||
|
const afterUnit = rest.substring(ui + unitMatch[0].length).trim();
|
||||||
|
const qm = afterUnit.match(/^([\d,.]+)/);
|
||||||
|
if (qm) {
|
||||||
|
quantity = qm[1];
|
||||||
|
// 提取 quantity 之后的尾部文本,跳过纯数字字段(综合单价、合价等)
|
||||||
|
let tail = afterUnit.substring(qm.index + qm[0].length).trim();
|
||||||
|
if (tail) {
|
||||||
|
const tailTokens = tail.split(/\s+/);
|
||||||
|
let si = 0;
|
||||||
|
while (si < tailTokens.length && /^[\d,.%\-]+$/.test(tailTokens[si])) si++;
|
||||||
|
const specTail = tailTokens.slice(si).join(' ').trim();
|
||||||
|
if (specTail) spec = specTail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 分离 rawName 中的"项目名称"和内联"项目特征"
|
||||||
|
const ns = splitNameAndSpec(rawName);
|
||||||
|
name = ns.name;
|
||||||
|
if (ns.spec) spec = ns.spec + (spec ? ';' + spec : '');
|
||||||
|
} else {
|
||||||
|
const tokens = rest.split(/\s+/).filter(t => t);
|
||||||
|
let foundUnitIdx = -1;
|
||||||
|
for (let ti = tokens.length - 1; ti >= 1; ti--) {
|
||||||
|
if (UNIT_SET.has(tokens[ti])) { foundUnitIdx = ti; break; }
|
||||||
|
}
|
||||||
|
if (foundUnitIdx >= 1) {
|
||||||
|
const rawNameStr = tokens.slice(0, foundUnitIdx).join(' ');
|
||||||
|
const ns = splitNameAndSpec(rawNameStr);
|
||||||
|
name = ns.name;
|
||||||
|
if (ns.spec) spec = ns.spec;
|
||||||
|
unit = tokens[foundUnitIdx];
|
||||||
|
const afterTokens = tokens.slice(foundUnitIdx + 1);
|
||||||
|
if (afterTokens.length && /^[\d,.]+$/.test(afterTokens[0])) {
|
||||||
|
quantity = afterTokens[0];
|
||||||
|
let si = 1;
|
||||||
|
while (si < afterTokens.length && /^[\d,.%\-]+$/.test(afterTokens[si])) si++;
|
||||||
|
const specTail = afterTokens.slice(si).join(' ').trim();
|
||||||
|
if (specTail) spec = spec ? spec + ';' + specTail : specTail;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
name = rest;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
name = name.replace(/\s+/g, '').trim();
|
||||||
|
for (const u of UNIT_TOKENS) {
|
||||||
|
if (name.endsWith(u) && name.length > u.length) {
|
||||||
|
unit = unit || u;
|
||||||
|
name = name.substring(0, name.length - u.length);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
curItem = { code, name, unit, quantity, spec };
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── 回退:无标准编码但有 "名称 单位 数量" 结构 → 也视为清单项 ──
|
||||||
|
// 常见于措施项目、未编码的补充清单项
|
||||||
|
if (!cm && stripped.length > 4) {
|
||||||
|
const uniMatch = stripped.match(UNIT_RE);
|
||||||
|
if (uniMatch) {
|
||||||
|
const ui = stripped.indexOf(uniMatch[0]);
|
||||||
|
const beforeUnit = stripped.substring(0, ui).trim();
|
||||||
|
const afterUnit = stripped.substring(ui + uniMatch[0].length).trim();
|
||||||
|
const hasQty = /^[\d,.]+/.test(afterUnit);
|
||||||
|
// 名称 2-50 字、含中文、有数量、不是分部标题
|
||||||
|
if (beforeUnit.length >= 2 && beforeUnit.length <= 50 && hasQty
|
||||||
|
&& /[\u4e00-\u9fff]/.test(beforeUnit)) {
|
||||||
|
if (curItem && curCat) curCat.items.push(curItem);
|
||||||
|
if (!curCat) { curCat = { name: '未分类', items: [] }; categories.push(curCat); }
|
||||||
|
const unit = uniMatch[1];
|
||||||
|
const qm = afterUnit.match(/^([\d,.]+)/);
|
||||||
|
const quantity = qm ? qm[1] : '';
|
||||||
|
const ns = splitNameAndSpec(beforeUnit);
|
||||||
|
const name = ns.name.replace(/\s+/g, '').trim();
|
||||||
|
const spec = ns.spec || '';
|
||||||
|
curItem = { code: '', name, unit, quantity, spec };
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 分部标题判断:不含编码、较短的文本、含工程关键字
|
||||||
|
// 关键守卫:如果行里有计量单位,说明是清单项,不是标题
|
||||||
|
if (stripped.length > 2 && stripped.length < 60 && !CODE_RE.test(stripped)) {
|
||||||
|
if (UNIT_RE.test(stripped) && /\d+\.?\d*\s*$/.test(stripped)) {
|
||||||
|
if (curItem) curItem.spec = curItem.spec ? curItem.spec + ';' + stripped : stripped;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (isCatTitle(stripped) && !UNIT_RE.test(stripped) && !isFeeCatTitle(stripped)) {
|
||||||
|
if (curItem && curCat) { curCat.items.push(curItem); curItem = null; }
|
||||||
|
const cleanTitle = stripped.replace(/\s+(座|个|项|处|m|km|段|条)\s+\d+[\d.]*\s*$/, '').trim();
|
||||||
|
curCat = { name: cleanTitle, items: [] };
|
||||||
|
categories.push(curCat);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (/^[一二三四五六七八九十]+\s/.test(stripped) || /^([一二三四五六七八九十\d]+)/.test(stripped)) {
|
||||||
|
// 中文序号标题也需要排除费用类
|
||||||
|
const cleanTitle = stripped.replace(/\s+(座|个|项|处)\s+\d+[\d.]*\s*$/, '').trim();
|
||||||
|
if (isFeeCatTitle(cleanTitle)) {
|
||||||
|
// 费用类标题:跳过,不建分部(其下的行会作为续行处理)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (curItem && curCat) { curCat.items.push(curItem); curItem = null; }
|
||||||
|
curCat = { name: cleanTitle, items: [] };
|
||||||
|
categories.push(curCat);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (curItem && stripped.length > 1) {
|
||||||
|
curItem.spec = curItem.spec ? curItem.spec + ';' + stripped : stripped;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (curItem && curCat) curCat.items.push(curItem);
|
||||||
|
|
||||||
|
// 过滤费用项:只保留需要写入技术标的施工清单项
|
||||||
|
let feeFiltered = 0;
|
||||||
|
for (const cat of categories) {
|
||||||
|
if (cat.items) {
|
||||||
|
const before = cat.items.length;
|
||||||
|
cat.items = cat.items.filter(it => !isFeeItem(it.name));
|
||||||
|
feeFiltered += before - cat.items.length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (feeFiltered > 0) console.log(`[BillWorker] 费用项过滤: 移除 ${feeFiltered} 项`);
|
||||||
|
|
||||||
|
// ========== 按项目名称合并(核心去重,大幅减少清单项数量)==========
|
||||||
|
// 规则:同一分部内,name 相同的清单项合并为一条
|
||||||
|
// - code: 保留第一个非空编码
|
||||||
|
// - unit: 保留第一个非空单位
|
||||||
|
// - quantity: 尝试数值求和,否则用分号拼接
|
||||||
|
// - spec: 去重后用分号拼接(截断过长的)
|
||||||
|
let totalBeforeMerge = 0, totalAfterMerge = 0;
|
||||||
|
for (const cat of categories) {
|
||||||
|
if (!cat.items || !cat.items.length) continue;
|
||||||
|
totalBeforeMerge += cat.items.length;
|
||||||
|
|
||||||
|
const nameMap = new Map(); // name → merged item
|
||||||
|
for (const item of cat.items) {
|
||||||
|
const key = (item.name || '').replace(/\s+/g, '').trim();
|
||||||
|
if (!key) continue;
|
||||||
|
|
||||||
|
if (!nameMap.has(key)) {
|
||||||
|
nameMap.set(key, {
|
||||||
|
code: item.code || '',
|
||||||
|
name: item.name,
|
||||||
|
unit: item.unit || '',
|
||||||
|
quantity: item.quantity || '',
|
||||||
|
spec: item.spec || '',
|
||||||
|
_count: 1,
|
||||||
|
_quantities: item.quantity ? [item.quantity] : [],
|
||||||
|
_specs: item.spec ? [item.spec] : [],
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
const m = nameMap.get(key);
|
||||||
|
m._count++;
|
||||||
|
// code: 取第一个非空的
|
||||||
|
if (!m.code && item.code) m.code = item.code;
|
||||||
|
// unit: 取第一个非空的
|
||||||
|
if (!m.unit && item.unit) m.unit = item.unit;
|
||||||
|
// quantity: 收集所有
|
||||||
|
if (item.quantity) m._quantities.push(item.quantity);
|
||||||
|
// spec: 收集不重复的
|
||||||
|
if (item.spec && !m._specs.includes(item.spec)) {
|
||||||
|
m._specs.push(item.spec);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 后处理:合成最终字段
|
||||||
|
const merged = [];
|
||||||
|
for (const [, m] of nameMap) {
|
||||||
|
// quantity: 尝试数值求和
|
||||||
|
if (m._quantities.length > 1) {
|
||||||
|
const nums = m._quantities.map(q => parseFloat(q.replace(/,/g, '')));
|
||||||
|
if (nums.every(n => !isNaN(n))) {
|
||||||
|
const sum = nums.reduce((a, b) => a + b, 0);
|
||||||
|
m.quantity = sum % 1 === 0 ? String(sum) : sum.toFixed(2);
|
||||||
|
} else {
|
||||||
|
m.quantity = m._quantities.join('; ');
|
||||||
|
}
|
||||||
|
} else if (m._quantities.length === 1) {
|
||||||
|
m.quantity = m._quantities[0];
|
||||||
|
}
|
||||||
|
// spec: 拼接去重后的 spec,每条最多120字
|
||||||
|
if (m._specs.length > 0) {
|
||||||
|
const trimmed = m._specs.map(s => s.length > 120 ? s.substring(0, 120) + '...' : s);
|
||||||
|
m.spec = trimmed.join('; ');
|
||||||
|
// 总 spec 上限 300 字
|
||||||
|
if (m.spec.length > 300) m.spec = m.spec.substring(0, 300) + '...';
|
||||||
|
}
|
||||||
|
// 清理临时字段
|
||||||
|
delete m._count; delete m._quantities; delete m._specs;
|
||||||
|
merged.push(m);
|
||||||
|
}
|
||||||
|
cat.items = merged;
|
||||||
|
totalAfterMerge += merged.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
const mergedCount = totalBeforeMerge - totalAfterMerge;
|
||||||
|
if (mergedCount > 0) {
|
||||||
|
console.log(`[BillWorker] 按名称合并: ${totalBeforeMerge} → ${totalAfterMerge} 项(合并 ${mergedCount} 个重复项)`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const valid = categories.filter(c => c.items && c.items.length > 0);
|
||||||
|
const totalItems = valid.reduce((s, c) => s + c.items.length, 0);
|
||||||
|
const withSpec = valid.reduce((s, c) => s + c.items.filter(it => it.spec).length, 0);
|
||||||
|
const withCode = valid.reduce((s, c) => s + c.items.filter(it => it.code).length, 0);
|
||||||
|
console.log(`[BillWorker] 最终结果: ${valid.length} 分部, ${totalItems} 清单项 (${withCode} 有编码, ${withSpec} 有spec)`);
|
||||||
|
// 打印前 3 个 item 供调试
|
||||||
|
let debugCount = 0;
|
||||||
|
for (const cat of valid) {
|
||||||
|
for (const it of cat.items) {
|
||||||
|
if (debugCount < 3) {
|
||||||
|
console.log(`[BillWorker] 样例: [${it.code}] ${it.name} | ${it.unit} | qty=${it.quantity} | spec=${(it.spec||'').substring(0, 80)}`);
|
||||||
|
debugCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
project_summary: { remark: `本地解析:${valid.length} 个分部,${totalItems} 个清单项(合并前 ${totalBeforeMerge} 项)` },
|
||||||
|
categories: valid,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 判断清单项是否为"费用项"(非施工内容,不写入技术标)
|
||||||
|
* 如:安全文明措施费、规费、税金、暂列金额等
|
||||||
|
*/
|
||||||
|
function isFeeItem(name) {
|
||||||
|
if (!name) return false;
|
||||||
|
const n = name.replace(/\s+/g, '');
|
||||||
|
|
||||||
|
// ── 1. 精确匹配 ──
|
||||||
|
const EXACT = [
|
||||||
|
'规费', '税金', '利润', '增值税', '暂列金额', '暂估价', '计日工',
|
||||||
|
'总承包服务费', '企业管理费', '甲供材料保管费', '价税合计',
|
||||||
|
];
|
||||||
|
if (EXACT.includes(n)) return true;
|
||||||
|
|
||||||
|
// ── 2. 包含匹配:措施费/规费/保险/行政类 ──
|
||||||
|
const FEE_KW = [
|
||||||
|
'安全文明', '文明施工费', '环境保护费', '临时设施费',
|
||||||
|
'夜间施工增加费', '夜间施工费',
|
||||||
|
'冬雨季施工增加费', '冬雨季施工费',
|
||||||
|
'二次搬运费', '大型机械设备进出场', '大型机械进出场',
|
||||||
|
'施工排水降水', '排水降水费',
|
||||||
|
'已完工程及设备保护', '已完工程保护费',
|
||||||
|
'工程排污费', '社会保障费', '住房公积金',
|
||||||
|
'工伤保险', '劳动保险', '意外伤害保险', '建筑工程保险',
|
||||||
|
'城市维护建设税', '城市建设维护税',
|
||||||
|
'教育费附加', '地方教育附加',
|
||||||
|
'材料暂估', '专业工程暂估',
|
||||||
|
'超高施工增加费', '安全防护费',
|
||||||
|
'措施项目费', '其他项目费', '不可竞争费',
|
||||||
|
];
|
||||||
|
for (const kw of FEE_KW) {
|
||||||
|
if (n.includes(kw)) return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 将 rawName 中的"项目名称"与内联"项目特征描述"分离
|
||||||
|
* 例: "土方开挖 1.土壤类别:普通土" → { name: "土方开挖", spec: "1.土壤类别:普通土" }
|
||||||
|
*/
|
||||||
|
function splitNameAndSpec(rawName) {
|
||||||
|
if (!rawName) return { name: '', spec: '' };
|
||||||
|
// Pattern 1: 数字+点+中文(如 "1.土壤类别" "2、强度等级")
|
||||||
|
const m = rawName.match(/\d+[.、.)\uFF09]\s*[\u4e00-\u9fff]/);
|
||||||
|
if (m && m.index > 0) {
|
||||||
|
return {
|
||||||
|
name: rawName.substring(0, m.index).trim(),
|
||||||
|
spec: rawName.substring(m.index).trim()
|
||||||
|
};
|
||||||
|
}
|
||||||
|
// Pattern 2: 特征关键字+冒号(如 "材质:" "规格:")
|
||||||
|
const SPEC_KW_RE = /(材质|规格|型号|品牌|颜色|尺寸|厚度|直径|管径|强度|等级|类别|类型|做法|要求|标准|内容|工作内容|土壤|含量|配合比|工艺|方式|形式|范围|部位|位置|高度|宽度|长度|深度|坡度|截面|跨度|运距|开挖|回填|混凝土|钢筋|压实)[::]/;
|
||||||
|
const kw = rawName.match(SPEC_KW_RE);
|
||||||
|
if (kw && kw.index > 0) {
|
||||||
|
return {
|
||||||
|
name: rawName.substring(0, kw.index).trim(),
|
||||||
|
spec: rawName.substring(kw.index).trim()
|
||||||
|
};
|
||||||
|
}
|
||||||
|
// Pattern 3: 括号开头的特征描述 "(1)" "(1)"
|
||||||
|
const paren = rawName.match(/[((]\d+[))]/);
|
||||||
|
if (paren && paren.index > 0) {
|
||||||
|
return {
|
||||||
|
name: rawName.substring(0, paren.index).trim(),
|
||||||
|
spec: rawName.substring(paren.index).trim()
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return { name: rawName, spec: '' };
|
||||||
|
}
|
||||||
|
|
||||||
|
function isCatTitle(text) {
|
||||||
|
const KW = [
|
||||||
|
'土建','建筑','结构','装饰','装修','安装','给排水','暖通','空调','通风',
|
||||||
|
'电气','强电','弱电','消防','智能化','幕墙','门窗','园林','绿化','景观',
|
||||||
|
'市政','道路','桥梁','管网','基础','地基','桩基','主体','屋面','防水',
|
||||||
|
'保温','钢结构','排水','给水','照明','动力','防雷','电梯','人防','室外',
|
||||||
|
'附属','分部','工程','措施','清单','土石方','混凝土','砌筑','模板','脚手架',
|
||||||
|
'水利','河道','管道','阀门','设备','仪表','自动化','通信','网络',
|
||||||
|
'拆除','外墙','内墙','楼地面','天棚','吊顶','栏杆','屋顶','涂料','抹灰',
|
||||||
|
'廊道','阀门井','蓄水池','泵站','供水','引水','水源','渠道','闸门',
|
||||||
|
'围栏','警示','检修','管线','配电','水池','水塔','取水','净水',
|
||||||
|
];
|
||||||
|
return KW.some(k => text.includes(k));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 判断分部标题是否为"费用类"(不应创建分部分类)
|
||||||
|
* 如:规费、税金、措施项目费、其他项目费 等非施工类分部
|
||||||
|
*/
|
||||||
|
function isFeeCatTitle(text) {
|
||||||
|
if (!text) return false;
|
||||||
|
const t = text.replace(/\s+/g, '');
|
||||||
|
// 精确匹配整个标题
|
||||||
|
const EXACT = [
|
||||||
|
'规费', '税金', '利润', '增值税', '暂列金额', '暂估价', '计日工',
|
||||||
|
'总承包服务费', '企业管理费', '价税合计',
|
||||||
|
'措施项目费', '其他项目费', '不可竞争费',
|
||||||
|
];
|
||||||
|
if (EXACT.includes(t)) return true;
|
||||||
|
// 包含匹配
|
||||||
|
const FEE_CAT_KW = [
|
||||||
|
'措施项目费', '其他项目费', '不可竞争费',
|
||||||
|
'规费汇总', '税金汇总', '费率', '费用汇总', '费用合计',
|
||||||
|
'暂列金额', '暂估价', '计日工', '总承包服务费',
|
||||||
|
'安全文明施工费', '社会保障费', '住房公积金',
|
||||||
|
'工伤保险', '教育费附加', '城市维护建设税',
|
||||||
|
];
|
||||||
|
for (const kw of FEE_CAT_KW) {
|
||||||
|
if (t.includes(kw)) return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
95
build.bat
Normal file
95
build.bat
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
@echo off
|
||||||
|
chcp 65001 >nul 2>&1
|
||||||
|
setlocal
|
||||||
|
|
||||||
|
echo ============================================================
|
||||||
|
echo BidPartner - Build Desktop EXE
|
||||||
|
echo ============================================================
|
||||||
|
echo.
|
||||||
|
|
||||||
|
:: ── 1. Check Python ────────────────────────────────────────────────────────
|
||||||
|
python --version >nul 2>&1
|
||||||
|
if errorlevel 1 (
|
||||||
|
echo [ERROR] Python not found. Please install Python 3.9+.
|
||||||
|
pause & exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
:: ── 2. Install / upgrade PyInstaller ───────────────────────────────────────
|
||||||
|
echo [Step 1/4] Installing PyInstaller...
|
||||||
|
pip install --quiet --upgrade pyinstaller
|
||||||
|
if errorlevel 1 (
|
||||||
|
echo [ERROR] Failed to install PyInstaller.
|
||||||
|
pause & exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
:: ── 3. Install project dependencies (if not already installed) ─────────────
|
||||||
|
echo [Step 2/4] Checking dependencies...
|
||||||
|
pip install --quiet -r requirements.txt
|
||||||
|
if errorlevel 1 (
|
||||||
|
echo [ERROR] Failed to install dependencies.
|
||||||
|
pause & exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
:: ── 4. Sanitize settings.json - REMOVE API KEYS before build ───────────────
|
||||||
|
echo [Step 3/4] Sanitizing settings (removing API keys from build)...
|
||||||
|
if exist "data\settings.json" (
|
||||||
|
:: Back up real settings
|
||||||
|
copy /y "data\settings.json" "data\settings.json.bak" >nul
|
||||||
|
)
|
||||||
|
:: Write a clean settings file with no real keys
|
||||||
|
(
|
||||||
|
echo {
|
||||||
|
echo "model_provider": "deepseek",
|
||||||
|
echo "qwen_api_key": "sk-your-qwen-key",
|
||||||
|
echo "qwen_model": "qwen3.6-plus",
|
||||||
|
echo "openai_api_key": "sk-your-openai-key",
|
||||||
|
echo "openai_model": "gpt-4o",
|
||||||
|
echo "deepseek_api_key": "sk-your-deepseek-key",
|
||||||
|
echo "deepseek_model": "deepseek-chat",
|
||||||
|
echo "max_concurrent": 5,
|
||||||
|
echo "content_volume": "standard"
|
||||||
|
echo }
|
||||||
|
) > "data\settings_clean.tmp"
|
||||||
|
|
||||||
|
:: ── 5. Build ────────────────────────────────────────────────────────────────
|
||||||
|
echo [Step 4/4] Building EXE with PyInstaller...
|
||||||
|
echo (This may take 3-10 minutes on first run)
|
||||||
|
echo.
|
||||||
|
|
||||||
|
:: Clean previous build artifacts
|
||||||
|
if exist "build" rd /s /q "build" >nul 2>&1
|
||||||
|
if exist "dist\BidPartner" rd /s /q "dist\BidPartner" >nul 2>&1
|
||||||
|
|
||||||
|
pyinstaller bid_partner.spec --noconfirm
|
||||||
|
set BUILD_RESULT=%errorlevel%
|
||||||
|
|
||||||
|
:: ── Restore real settings ───────────────────────────────────────────────────
|
||||||
|
if exist "data\settings.json.bak" (
|
||||||
|
copy /y "data\settings.json.bak" "data\settings.json" >nul
|
||||||
|
del /f /q "data\settings.json.bak" >nul 2>&1
|
||||||
|
)
|
||||||
|
del /f /q "data\settings_clean.tmp" >nul 2>&1
|
||||||
|
|
||||||
|
if %BUILD_RESULT% neq 0 (
|
||||||
|
echo.
|
||||||
|
echo [ERROR] PyInstaller build failed. See output above for details.
|
||||||
|
pause & exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
:: ── 6. Result ───────────────────────────────────────────────────────────────
|
||||||
|
echo.
|
||||||
|
echo ============================================================
|
||||||
|
echo Build SUCCESSFUL!
|
||||||
|
echo Output: dist\BidPartner\bid_partner.exe
|
||||||
|
echo ============================================================
|
||||||
|
echo.
|
||||||
|
echo The 'dist\BidPartner' folder is your distributable package.
|
||||||
|
echo Users only need this folder - no Python installation required.
|
||||||
|
echo Each user must set their own API key in the app settings.
|
||||||
|
echo.
|
||||||
|
|
||||||
|
:: Open the output folder
|
||||||
|
explorer "dist\BidPartner" >nul 2>&1
|
||||||
|
|
||||||
|
endlocal
|
||||||
|
pause
|
||||||
76
config.py
Normal file
76
config.py
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# When running as a PyInstaller bundle:
|
||||||
|
# sys._MEIPASS → read-only bundle dir (templates, static, prompts)
|
||||||
|
# sys.executable dir → writable dir next to the .exe (data, settings, db)
|
||||||
|
if getattr(sys, 'frozen', False):
|
||||||
|
_BUNDLE_DIR = sys._MEIPASS # bundled app files
|
||||||
|
BASE_DIR = os.path.dirname(sys.executable) # writable runtime dir
|
||||||
|
else:
|
||||||
|
_BUNDLE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
BASE_DIR = _BUNDLE_DIR
|
||||||
|
|
||||||
|
DATA_DIR = os.path.join(BASE_DIR, 'data')
|
||||||
|
UPLOAD_DIR = os.path.join(DATA_DIR, 'uploads')
|
||||||
|
EXPORT_DIR = os.path.join(DATA_DIR, 'exports')
|
||||||
|
KNOWLEDGE_DIR= os.path.join(DATA_DIR, 'knowledge')
|
||||||
|
DB_PATH = os.path.join(DATA_DIR, 'projects.db')
|
||||||
|
CHROMA_DIR = os.path.join(DATA_DIR, 'chroma')
|
||||||
|
PROMPTS_DIR = os.path.join(_BUNDLE_DIR, 'prompts')
|
||||||
|
|
||||||
|
# ==================== AI 模型配置 ====================
|
||||||
|
# 模型选择:'openai' | 'qwen' | 'deepseek' | 'ollama'
|
||||||
|
MODEL_PROVIDER = os.environ.get('MODEL_PROVIDER', 'qwen')
|
||||||
|
|
||||||
|
# OpenAI
|
||||||
|
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', 'sk-your-openai-key')
|
||||||
|
OPENAI_MODEL = os.environ.get('OPENAI_MODEL', 'gpt-4.1')
|
||||||
|
OPENAI_BASE_URL = os.environ.get('OPENAI_BASE_URL', 'https://api.openai.com/v1')
|
||||||
|
|
||||||
|
# 阿里云通义千问
|
||||||
|
QWEN_API_KEY = os.environ.get('QWEN_API_KEY', 'sk-your-qwen-key')
|
||||||
|
QWEN_MODEL = os.environ.get('QWEN_MODEL', 'qwen3.6-plus')
|
||||||
|
QWEN_BASE_URL = os.environ.get('QWEN_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1')
|
||||||
|
|
||||||
|
# DeepSeek
|
||||||
|
DEEPSEEK_API_KEY = os.environ.get('DEEPSEEK_API_KEY', 'sk-your-deepseek-key')
|
||||||
|
DEEPSEEK_MODEL = os.environ.get('DEEPSEEK_MODEL', 'deepseek-chat')
|
||||||
|
DEEPSEEK_BASE_URL = os.environ.get('DEEPSEEK_BASE_URL', 'https://api.deepseek.com/v1')
|
||||||
|
|
||||||
|
# Ollama 本地(OpenAI 兼容接口)
|
||||||
|
OLLAMA_BASE_URL = os.environ.get('OLLAMA_BASE_URL', 'http://localhost:11434/v1')
|
||||||
|
OLLAMA_MODEL = os.environ.get('OLLAMA_MODEL', 'qwen3:8b')
|
||||||
|
|
||||||
|
# 豆包 / 火山引擎(字节跳动,OpenAI 兼容接口)
|
||||||
|
DOUBAO_API_KEY = os.environ.get('DOUBAO_API_KEY', 'sk-your-doubao-key')
|
||||||
|
DOUBAO_MODEL = os.environ.get('DOUBAO_MODEL', 'doubao-1-5-pro-32k')
|
||||||
|
DOUBAO_BASE_URL = os.environ.get('DOUBAO_BASE_URL', 'https://ark.cn-beijing.volces.com/api/v3')
|
||||||
|
|
||||||
|
# Kimi / Moonshot AI(OpenAI 兼容接口,支持 Embedding)
|
||||||
|
KIMI_API_KEY = os.environ.get('KIMI_API_KEY', 'sk-your-kimi-key')
|
||||||
|
KIMI_MODEL = os.environ.get('KIMI_MODEL', 'moonshot-v1-32k')
|
||||||
|
KIMI_BASE_URL = os.environ.get('KIMI_BASE_URL', 'https://api.moonshot.cn/v1')
|
||||||
|
|
||||||
|
# Embedding 模型
|
||||||
|
OPENAI_EMBEDDING_MODEL = 'text-embedding-3-small'
|
||||||
|
QWEN_EMBEDDING_MODEL = 'text-embedding-v3'
|
||||||
|
KIMI_EMBEDDING_MODEL = 'moonshot-v1-embedding'
|
||||||
|
|
||||||
|
# ==================== 应用配置 ====================
|
||||||
|
MAX_FILE_SIZE_MB = 50
|
||||||
|
ALLOWED_EXTENSIONS = {'pdf', 'doc', 'docx'}
|
||||||
|
SECRET_KEY = 'bidhuo-partner-secret-2024'
|
||||||
|
|
||||||
|
# ==================== 生成配置 ====================
|
||||||
|
MAX_RETRIES = 3
|
||||||
|
REQUEST_TIMEOUT = int(os.environ.get('REQUEST_TIMEOUT', '180'))
|
||||||
|
# 大纲生成单次提示词长、输出大,适当延长读超时(秒),避免接口未返回即被客户端断开
|
||||||
|
OUTLINE_REQUEST_TIMEOUT = int(os.environ.get('OUTLINE_REQUEST_TIMEOUT', '300'))
|
||||||
|
CHUNK_SIZE = 2000 # 知识库文本分块大小(字符数)
|
||||||
|
CHUNK_OVERLAP = 200 # 分块重叠大小
|
||||||
|
TOP_K_KNOWLEDGE = 3 # 知识库检索数量
|
||||||
|
MAX_CONCURRENT_SECTIONS = int(os.environ.get('MAX_CONCURRENT_SECTIONS', '5')) # 并发生成章节数
|
||||||
|
CONTENT_VOLUME = os.environ.get('CONTENT_VOLUME', 'standard') # 篇幅档位: concise / standard / detailed / full
|
||||||
|
TARGET_PAGES = int(os.environ.get('TARGET_PAGES', '0') or '0') # 目标页数(0=不启用)
|
||||||
|
PAGE_CHAR_ESTIMATE = int(os.environ.get('PAGE_CHAR_ESTIMATE', '700') or '700') # 粗略每页字数估算
|
||||||
22
data/attachment_section_rules.json
Normal file
22
data/attachment_section_rules.json
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
{
|
||||||
|
"_meta": "附件类章节:stack_charts_only 为默认,叶节点按 diagram 意图栈只输出 [FIGURE]/[TABLE] 块、无叙述正文;full 为长文;single_chart_only 为栈顶单块。修改后重启生效。",
|
||||||
|
"_field_docs": {
|
||||||
|
"title_regex": "标题任一则正则匹配即视为附件节(Python re 语法)",
|
||||||
|
"table_hint_keywords": "标题含此类子串且双开关均开时倾向表格",
|
||||||
|
"figure_hint_keywords": "标题含此类子串且双开关均开时倾向图示",
|
||||||
|
"default_kind_when_ambiguous": "双开且标题无倾向词时的默认:figure 或 table",
|
||||||
|
"attachment_leaf_body_mode": "stack_charts_only:意图栈只生成图/表块;full:与常规章节相同长文;single_chart_only:仅栈顶一块图或表"
|
||||||
|
},
|
||||||
|
"schema_version": 1,
|
||||||
|
"attachment_leaf_body_mode": "stack_charts_only",
|
||||||
|
"title_regex": [
|
||||||
|
"附件\\s*[一二三四五六七八九十0-9A-Za-z、::.]",
|
||||||
|
"附\\s*图",
|
||||||
|
"附\\s*表",
|
||||||
|
"附\\s*件\\s*\\(",
|
||||||
|
"^\\s*[\\d一二三四五六七八九十\\..、]+\\s*附件"
|
||||||
|
],
|
||||||
|
"table_hint_keywords": ["附表", "一览表", "清单表", "表", "统计表", "明细表"],
|
||||||
|
"figure_hint_keywords": ["附图", "示意图", "平面图", "流程图", "布置图", "组织图", "横道"],
|
||||||
|
"default_kind_when_ambiguous": "table"
|
||||||
|
}
|
||||||
51
data/diagram_intent_rules.json
Normal file
51
data/diagram_intent_rules.json
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
{
|
||||||
|
"_meta": "章节级图/表意图:标题与大纲窗口关键词计分,阈值入栈,按栈序拼接图示/表格生成规范。修改后重启服务生效。",
|
||||||
|
"_field_docs": {
|
||||||
|
"threshold_figure": "图示倾向分达到此值才入栈",
|
||||||
|
"threshold_table": "表格倾向分达到此值才入栈",
|
||||||
|
"title_weight": "标题命中的权重乘子",
|
||||||
|
"context_weight": "大纲上下文窗口命中的权重乘子",
|
||||||
|
"outline_context_lines": "before/after 为相对匹配行上下扩展行数",
|
||||||
|
"stack_order_when_both": "figure_first | table_first | score_desc(两者同时入栈时的顺序,栈顶为 index 0)",
|
||||||
|
"figure_keywords": "字符串或 {text,weight} 对象列表",
|
||||||
|
"table_keywords": "同上"
|
||||||
|
},
|
||||||
|
"schema_version": 1,
|
||||||
|
"threshold_figure": 1.0,
|
||||||
|
"threshold_table": 1.0,
|
||||||
|
"title_weight": 1.0,
|
||||||
|
"context_weight": 0.6,
|
||||||
|
"outline_context_lines": {"before": 4, "after": 6},
|
||||||
|
"stack_order_when_both": "score_desc",
|
||||||
|
"figure_keywords": [
|
||||||
|
{"text": "组织", "weight": 1.0},
|
||||||
|
{"text": "架构", "weight": 1.0},
|
||||||
|
{"text": "流程", "weight": 1.2},
|
||||||
|
{"text": "工序", "weight": 1.0},
|
||||||
|
{"text": "进度", "weight": 1.2},
|
||||||
|
{"text": "横道", "weight": 1.5},
|
||||||
|
{"text": "网络图", "weight": 1.5},
|
||||||
|
{"text": "平面", "weight": 1.0},
|
||||||
|
{"text": "布置", "weight": 0.8},
|
||||||
|
{"text": "监测", "weight": 0.8},
|
||||||
|
{"text": "示意", "weight": 0.8},
|
||||||
|
{"text": "应急", "weight": 0.8}
|
||||||
|
],
|
||||||
|
"table_keywords": [
|
||||||
|
{"text": "一览表", "weight": 1.5},
|
||||||
|
{"text": "人员", "weight": 1.0},
|
||||||
|
{"text": "配置", "weight": 0.8},
|
||||||
|
{"text": "设备", "weight": 1.0},
|
||||||
|
{"text": "机械", "weight": 0.9},
|
||||||
|
{"text": "劳动力", "weight": 1.2},
|
||||||
|
{"text": "工种", "weight": 1.0},
|
||||||
|
{"text": "检验", "weight": 1.0},
|
||||||
|
{"text": "验收", "weight": 0.9},
|
||||||
|
{"text": "材料", "weight": 1.0},
|
||||||
|
{"text": "供应", "weight": 0.9},
|
||||||
|
{"text": "风险", "weight": 1.0},
|
||||||
|
{"text": "措施", "weight": 0.6},
|
||||||
|
{"text": "清单", "weight": 0.8},
|
||||||
|
{"text": "计划", "weight": 0.7}
|
||||||
|
]
|
||||||
|
}
|
||||||
BIN
data/exports/20260420测试海东技术标_20260420_180450.docx
Normal file
BIN
data/exports/20260420测试海东技术标_20260420_180450.docx
Normal file
Binary file not shown.
BIN
data/exports/30260420投标技术文档_20260420_170252.docx
Normal file
BIN
data/exports/30260420投标技术文档_20260420_170252.docx
Normal file
Binary file not shown.
BIN
data/exports/A2121212_20260421_103738.docx
Normal file
BIN
data/exports/A2121212_20260421_103738.docx
Normal file
Binary file not shown.
BIN
data/exports/A666_20260422_143004.docx
Normal file
BIN
data/exports/A666_20260422_143004.docx
Normal file
Binary file not shown.
BIN
data/exports/A666_20260422_153137.docx
Normal file
BIN
data/exports/A666_20260422_153137.docx
Normal file
Binary file not shown.
BIN
data/exports/A666_20260422_160459.docx
Normal file
BIN
data/exports/A666_20260422_160459.docx
Normal file
Binary file not shown.
BIN
data/projects.db
Normal file
BIN
data/projects.db
Normal file
Binary file not shown.
24
data/settings.json
Normal file
24
data/settings.json
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
{
|
||||||
|
"model_provider": "qwen",
|
||||||
|
"qwen_api_key": "sk-999173b3ca7f425a97cc4b12a2d3575f",
|
||||||
|
"qwen_model": "qwen3.6-plus",
|
||||||
|
"qwen_base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||||
|
"openai_api_key": "sk-your-openai-key",
|
||||||
|
"openai_model": "gpt-4.1",
|
||||||
|
"openai_base_url": "https://api.openai.com/v1",
|
||||||
|
"deepseek_api_key": "sk-your-deepseek-key",
|
||||||
|
"deepseek_model": "deepseek-chat",
|
||||||
|
"deepseek_base_url": "https://api.deepseek.com/v1",
|
||||||
|
"ollama_base_url": "http://localhost:11434/v1",
|
||||||
|
"ollama_model": "qwen3:8b",
|
||||||
|
"doubao_api_key": "sk-your-doubao-key",
|
||||||
|
"doubao_model": "doubao-1-5-pro-32k",
|
||||||
|
"doubao_base_url": "https://ark.cn-beijing.volces.com/api/v3",
|
||||||
|
"kimi_api_key": "sk-your-kimi-key",
|
||||||
|
"kimi_model": "moonshot-v1-32k",
|
||||||
|
"kimi_base_url": "https://api.moonshot.cn/v1",
|
||||||
|
"max_concurrent": 10,
|
||||||
|
"content_volume": "full",
|
||||||
|
"target_pages": 120,
|
||||||
|
"page_char_estimate": 700
|
||||||
|
}
|
||||||
BIN
data/uploads/10_boq_工程量清单.pdf
Normal file
BIN
data/uploads/10_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/10_招标文件正文.pdf
Normal file
BIN
data/uploads/10_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/11_boq_工程量清单.pdf
Normal file
BIN
data/uploads/11_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/11_招标文件正文.pdf
Normal file
BIN
data/uploads/11_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/12_boq_工程量清单.pdf
Normal file
BIN
data/uploads/12_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/12_招标文件正文.pdf
Normal file
BIN
data/uploads/12_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/15_boq_工程量清单.pdf
Normal file
BIN
data/uploads/15_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/15_招标文件正文.pdf
Normal file
BIN
data/uploads/15_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/16_boq_工程量清单.pdf
Normal file
BIN
data/uploads/16_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/16_招标文件正文.pdf
Normal file
BIN
data/uploads/16_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/17_boq_工程量清单.pdf
Normal file
BIN
data/uploads/17_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/17_招标文件正文.pdf
Normal file
BIN
data/uploads/17_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/1_boq_工程量清单1.pdf
Normal file
BIN
data/uploads/1_boq_工程量清单1.pdf
Normal file
Binary file not shown.
BIN
data/uploads/1_招标文件正文1.pdf
Normal file
BIN
data/uploads/1_招标文件正文1.pdf
Normal file
Binary file not shown.
BIN
data/uploads/2_boq_工程量清单1.pdf
Normal file
BIN
data/uploads/2_boq_工程量清单1.pdf
Normal file
Binary file not shown.
BIN
data/uploads/2_招标文件正文1.pdf
Normal file
BIN
data/uploads/2_招标文件正文1.pdf
Normal file
Binary file not shown.
BIN
data/uploads/3_boq_工程量清单1.pdf
Normal file
BIN
data/uploads/3_boq_工程量清单1.pdf
Normal file
Binary file not shown.
BIN
data/uploads/3_招标文件正文1.pdf
Normal file
BIN
data/uploads/3_招标文件正文1.pdf
Normal file
Binary file not shown.
BIN
data/uploads/4_boq_工程量清单1.pdf
Normal file
BIN
data/uploads/4_boq_工程量清单1.pdf
Normal file
Binary file not shown.
BIN
data/uploads/4_招标文件正文1.pdf
Normal file
BIN
data/uploads/4_招标文件正文1.pdf
Normal file
Binary file not shown.
BIN
data/uploads/5_boq_工程量清单1.pdf
Normal file
BIN
data/uploads/5_boq_工程量清单1.pdf
Normal file
Binary file not shown.
BIN
data/uploads/5_招标文件正文1.pdf
Normal file
BIN
data/uploads/5_招标文件正文1.pdf
Normal file
Binary file not shown.
BIN
data/uploads/6_boq_工程量清单.pdf
Normal file
BIN
data/uploads/6_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/6_招标文件正文.pdf
Normal file
BIN
data/uploads/6_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/7_boq_工程量清单1.pdf
Normal file
BIN
data/uploads/7_boq_工程量清单1.pdf
Normal file
Binary file not shown.
BIN
data/uploads/7_招标文件正文1.pdf
Normal file
BIN
data/uploads/7_招标文件正文1.pdf
Normal file
Binary file not shown.
BIN
data/uploads/8_boq_工程量清单.pdf
Normal file
BIN
data/uploads/8_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/8_招标文件正文.pdf
Normal file
BIN
data/uploads/8_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/9_boq_工程量清单.pdf
Normal file
BIN
data/uploads/9_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/9_招标文件正文.pdf
Normal file
BIN
data/uploads/9_招标文件正文.pdf
Normal file
Binary file not shown.
30
data/word_allocation_rules.json
Normal file
30
data/word_allocation_rules.json
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
{
|
||||||
|
"_meta": "字数分配约束规则:与「标书篇幅预期」四档的 base/core 配合,按技术评分项权重与章节标题相关性分配各叶节点最低字数与提示词中的评分要点提示。修改后重启服务生效;字段说明见同文件 _field_docs。",
|
||||||
|
"_field_docs": {
|
||||||
|
"schema_version": "规则文件版本号,解析时可做迁移",
|
||||||
|
"alpha": "0~1,评分驱动强度;越大则高分相关章节越接近 core、低相关越接近 base",
|
||||||
|
"budget_mode": "target_pages:启用目标页数且 TARGET_PAGES>0 时,全书叶节点目标总字数为 TARGET_PAGES*PAGE_CHAR_ESTIMATE;无技术评分时叶节均分该总预算。未启用页数时无评分则返回 None。anchor_mean:N*(base+core)/2;anchor_base:N*base",
|
||||||
|
"per_section_floor": "单节 min_chars 下限(不低于此整数)",
|
||||||
|
"per_section_cap": "单节 min_chars 上限(不超过 core 时可设为 core 或略高)",
|
||||||
|
"relevance.method": "keyword_overlap:标题与评分项名称/关键词的字面重叠度",
|
||||||
|
"relevance.min_rating_weight": "忽略权重低于此值的评分项(减少噪声)",
|
||||||
|
"rating_parse": "预留;解析器内置多形态 rating_json,无需在此配置",
|
||||||
|
"prompt.top_k_rating_items": "写入本节字数说明中的相关评分项名称条数上限",
|
||||||
|
"max_tokens_scale": "若为 true,按 min_chars/base 比例缩放本段 max_tokens(仍受模型上限约束)"
|
||||||
|
},
|
||||||
|
"schema_version": 1,
|
||||||
|
"alpha": 0.85,
|
||||||
|
"budget_mode": "target_pages",
|
||||||
|
"per_section_floor": null,
|
||||||
|
"per_section_cap": null,
|
||||||
|
"relevance": {
|
||||||
|
"method": "keyword_overlap",
|
||||||
|
"min_rating_weight": 0.01
|
||||||
|
},
|
||||||
|
"rating_parse": {},
|
||||||
|
"prompt": {
|
||||||
|
"top_k_rating_items": 4,
|
||||||
|
"intro_line": "本节须对下列技术评分要点作实质展开(结合工艺、流程、标准与可验证措施,禁止空泛承诺与复述招标文件):"
|
||||||
|
},
|
||||||
|
"max_tokens_scale": false
|
||||||
|
}
|
||||||
172
launcher.py
Normal file
172
launcher.py
Normal file
@ -0,0 +1,172 @@
|
|||||||
|
"""
|
||||||
|
标伙伴 · AI标书助手 — 桌面启动器
|
||||||
|
运行此文件 (或打包后的 bid_partner.exe) 即可自动启动本地服务并打开浏览器。
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import socket
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import webbrowser
|
||||||
|
import urllib.request
|
||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
# ── 找可用端口 ──────────────────────────────────────────────────────────────
|
||||||
|
def _find_free_port(start: int = 5000, attempts: int = 20) -> int:
|
||||||
|
for port in range(start, start + attempts):
|
||||||
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||||
|
try:
|
||||||
|
s.bind(('127.0.0.1', port))
|
||||||
|
return port
|
||||||
|
except OSError:
|
||||||
|
continue
|
||||||
|
return start # 最坏情况:直接用 5000,让 Flask 报错
|
||||||
|
|
||||||
|
|
||||||
|
PORT = _find_free_port()
|
||||||
|
|
||||||
|
|
||||||
|
# ── 日志 ────────────────────────────────────────────────────────────────────
|
||||||
|
def _setup_logging():
|
||||||
|
if getattr(sys, 'frozen', False):
|
||||||
|
log_dir = os.path.dirname(sys.executable)
|
||||||
|
else:
|
||||||
|
log_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
log_path = os.path.join(log_dir, 'bid_partner.log')
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format='%(asctime)s [%(levelname)s] %(name)s: %(message)s',
|
||||||
|
handlers=[logging.FileHandler(log_path, encoding='utf-8', mode='a')],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── 启动 Flask 服务 ─────────────────────────────────────────────────────────
|
||||||
|
def _start_server():
|
||||||
|
try:
|
||||||
|
import app as flask_app
|
||||||
|
flask_app.init_db()
|
||||||
|
flask_app.app.run(
|
||||||
|
host='127.0.0.1',
|
||||||
|
port=PORT,
|
||||||
|
debug=False,
|
||||||
|
threaded=True,
|
||||||
|
use_reloader=False,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logging.getLogger('launcher').error(f'服务启动失败: {e}', exc_info=True)
|
||||||
|
|
||||||
|
|
||||||
|
# ── 等待服务就绪 ─────────────────────────────────────────────────────────────
|
||||||
|
def _wait_for_server(timeout: int = 60) -> bool:
|
||||||
|
url = f'http://127.0.0.1:{PORT}'
|
||||||
|
deadline = time.time() + timeout
|
||||||
|
while time.time() < deadline:
|
||||||
|
try:
|
||||||
|
urllib.request.urlopen(url, timeout=1)
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
time.sleep(0.4)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# ── 主界面 (tkinter) ─────────────────────────────────────────────────────────
|
||||||
|
def _run_gui():
|
||||||
|
import tkinter as tk
|
||||||
|
from tkinter import ttk, font as tkfont
|
||||||
|
|
||||||
|
URL = f'http://127.0.0.1:{PORT}'
|
||||||
|
|
||||||
|
root = tk.Tk()
|
||||||
|
root.title('标伙伴 · AI标书助手')
|
||||||
|
root.geometry('400x220')
|
||||||
|
root.resizable(False, False)
|
||||||
|
root.configure(bg='#f5f5f5')
|
||||||
|
|
||||||
|
# ── 标题 ──
|
||||||
|
title_font = tkfont.Font(family='微软雅黑', size=14, weight='bold')
|
||||||
|
tk.Label(root, text='标伙伴 · AI 标书助手', font=title_font,
|
||||||
|
bg='#f5f5f5', fg='#1a1a2e').pack(pady=(22, 4))
|
||||||
|
|
||||||
|
# ── 状态行 ──
|
||||||
|
status_var = tk.StringVar(value='正在启动服务,请稍候…')
|
||||||
|
status_lbl = tk.Label(root, textvariable=status_var,
|
||||||
|
font=('微软雅黑', 10), bg='#f5f5f5', fg='#555')
|
||||||
|
status_lbl.pack(pady=4)
|
||||||
|
|
||||||
|
# ── URL 链接 ──
|
||||||
|
url_lbl = tk.Label(root, text='', font=('Consolas', 10),
|
||||||
|
bg='#f5f5f5', fg='#1a73e8', cursor='hand2')
|
||||||
|
url_lbl.pack(pady=2)
|
||||||
|
url_lbl.bind('<Button-1>', lambda _: webbrowser.open(URL))
|
||||||
|
|
||||||
|
# ── 按钮区 ──
|
||||||
|
btn_frame = tk.Frame(root, bg='#f5f5f5')
|
||||||
|
btn_frame.pack(pady=18)
|
||||||
|
|
||||||
|
open_btn = ttk.Button(btn_frame, text='打开浏览器',
|
||||||
|
command=lambda: webbrowser.open(URL),
|
||||||
|
state='disabled', width=14)
|
||||||
|
open_btn.pack(side='left', padx=8)
|
||||||
|
|
||||||
|
quit_btn = ttk.Button(btn_frame, text='退出程序',
|
||||||
|
command=root.destroy, width=10)
|
||||||
|
quit_btn.pack(side='left', padx=8)
|
||||||
|
|
||||||
|
# ── 版本信息 ──
|
||||||
|
tk.Label(root, text='单机版 · 本地运行 · 数据不上传',
|
||||||
|
font=('微软雅黑', 8), bg='#f5f5f5', fg='#aaa').pack(pady=(0, 10))
|
||||||
|
|
||||||
|
# ── 后台轮询,服务就绪后更新 UI ──
|
||||||
|
def _on_ready():
|
||||||
|
status_var.set('服务已就绪 ✓')
|
||||||
|
status_lbl.config(fg='#2e7d32')
|
||||||
|
url_lbl.config(text=URL)
|
||||||
|
open_btn.config(state='normal')
|
||||||
|
webbrowser.open(URL)
|
||||||
|
|
||||||
|
def _on_timeout():
|
||||||
|
status_var.set('启动超时,请查看 bid_partner.log')
|
||||||
|
status_lbl.config(fg='#c62828')
|
||||||
|
|
||||||
|
def _check():
|
||||||
|
if _wait_for_server():
|
||||||
|
root.after(0, _on_ready)
|
||||||
|
else:
|
||||||
|
root.after(0, _on_timeout)
|
||||||
|
|
||||||
|
threading.Thread(target=_check, daemon=True).start()
|
||||||
|
root.mainloop()
|
||||||
|
|
||||||
|
|
||||||
|
# ── 无图形模式(仅控制台) ────────────────────────────────────────────────────
|
||||||
|
def _run_headless():
|
||||||
|
print(f'[标伙伴] Starting server on port {PORT} ...')
|
||||||
|
if _wait_for_server():
|
||||||
|
print(f'[标伙伴] Ready → http://127.0.0.1:{PORT}')
|
||||||
|
webbrowser.open(f'http://127.0.0.1:{PORT}')
|
||||||
|
# 阻塞,直到用户 Ctrl+C
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
time.sleep(1)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print('[标伙伴] Shutting down.')
|
||||||
|
else:
|
||||||
|
print('[标伙伴] Server did not start within 60 s. Check bid_partner.log.')
|
||||||
|
|
||||||
|
|
||||||
|
# ── 入口 ─────────────────────────────────────────────────────────────────────
|
||||||
|
def main():
|
||||||
|
_setup_logging()
|
||||||
|
|
||||||
|
server_thread = threading.Thread(target=_start_server, daemon=True)
|
||||||
|
server_thread.start()
|
||||||
|
|
||||||
|
try:
|
||||||
|
_run_gui()
|
||||||
|
except Exception:
|
||||||
|
_run_headless()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
1
modules/__init__.py
Normal file
1
modules/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
||||||
98
modules/checker.py
Normal file
98
modules/checker.py
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
"""
|
||||||
|
合规检查模块:检查生成的标书是否响应了招标关键要求
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
|
from utils import ai_client
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
CHECK_PROMPT = """你是一位专业的投标文件技术审核专家。请对照以下【技术评分要求】,检查【标书技术内容】的覆盖情况,输出技术合规检查报告。
|
||||||
|
|
||||||
|
重要限制(必须遵守):
|
||||||
|
★ 本次检查范围仅限技术内容,包括:技术方案、实施能力、技术指标、质量保障、人员配置、技术创新等
|
||||||
|
★ 严禁将商务评分、价格评分、资质评分、报价、合同条款、付款方式等商务内容纳入检查项
|
||||||
|
★ 若技术评分要求中混有商务条款,直接忽略,不得作为检查项输出
|
||||||
|
|
||||||
|
【技术评分要求】
|
||||||
|
{requirements}
|
||||||
|
|
||||||
|
【标书技术内容(各章节摘要)】
|
||||||
|
{content}
|
||||||
|
|
||||||
|
请输出以下格式的 JSON,每个 item 均为技术评分项,不含任何商务内容:
|
||||||
|
{{
|
||||||
|
"overall_score": 85,
|
||||||
|
"status": "良好",
|
||||||
|
"items": [
|
||||||
|
{{
|
||||||
|
"requirement": "技术评分要求描述",
|
||||||
|
"covered": true,
|
||||||
|
"note": "说明"
|
||||||
|
}}
|
||||||
|
],
|
||||||
|
"missing_points": ["未覆盖的技术要点1", "未覆盖的技术要点2"],
|
||||||
|
"suggestions": ["技术内容改进建议1", "技术内容改进建议2"]
|
||||||
|
}}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def check_compliance(db_path: str, project_id: int) -> dict:
|
||||||
|
"""
|
||||||
|
执行合规检查,返回检查结果字典。
|
||||||
|
"""
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
try:
|
||||||
|
# 获取招标要求
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute(
|
||||||
|
"SELECT summary, rating_requirements FROM tender_data WHERE project_id=?",
|
||||||
|
(project_id,)
|
||||||
|
)
|
||||||
|
td = cur.fetchone()
|
||||||
|
if not td:
|
||||||
|
return {'error': '尚未解析招标文件'}
|
||||||
|
|
||||||
|
# 只使用技术评分要求作为检查基准,排除 summary 中可能包含的商务内容
|
||||||
|
requirements = (td[1] or '').strip()
|
||||||
|
if not requirements:
|
||||||
|
return {'error': '尚未提取技术评分要求,请先完成步骤一的招标文件解析'}
|
||||||
|
|
||||||
|
# 收集已生成的章节内容(取前 500 字)
|
||||||
|
cur.execute(
|
||||||
|
"SELECT section_title, content FROM bid_sections WHERE project_id=? AND status='done' ORDER BY order_index",
|
||||||
|
(project_id,)
|
||||||
|
)
|
||||||
|
rows = cur.fetchall()
|
||||||
|
if not rows:
|
||||||
|
return {'error': '尚未生成标书内容,请先生成'}
|
||||||
|
|
||||||
|
content_parts = []
|
||||||
|
for title, content in rows:
|
||||||
|
snippet = (content or '')[:500].replace('\n', ' ')
|
||||||
|
content_parts.append(f"【{title}】{snippet}")
|
||||||
|
content_str = '\n'.join(content_parts)
|
||||||
|
|
||||||
|
# 调用 AI 检查
|
||||||
|
prompt = CHECK_PROMPT.format(requirements=requirements[:3000], content=content_str[:6000])
|
||||||
|
raw = ai_client.chat(prompt, temperature=0.2, max_tokens=2048)
|
||||||
|
|
||||||
|
# 解析 JSON
|
||||||
|
raw = re.sub(r'```(?:json)?\s*', '', raw).replace('```', '').strip()
|
||||||
|
m = re.search(r'\{[\s\S]*\}', raw)
|
||||||
|
if m:
|
||||||
|
raw = m.group(0)
|
||||||
|
result = json.loads(raw)
|
||||||
|
return result
|
||||||
|
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.error(f'合规检查结果解析失败: {e}')
|
||||||
|
return {'error': f'AI 返回格式异常: {e}', 'raw': raw}
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception('合规检查失败')
|
||||||
|
return {'error': str(e)}
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
635
modules/dark_bid_format_check.py
Normal file
635
modules/dark_bid_format_check.py
Normal file
@ -0,0 +1,635 @@
|
|||||||
|
"""
|
||||||
|
技术暗标 HTML 格式检查(由 清标工具.js 迁移,不依赖浏览器/jsdom)。
|
||||||
|
仅解析内联 style 与文档内 <style> 中的 @page 简单规则;无内联样式时部分项可能判为不符合。
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup, Tag
|
||||||
|
|
||||||
|
# 1pt ≈ 96/72 px (CSS 标准)
|
||||||
|
_PT_PX = 96.0 / 72.0
|
||||||
|
|
||||||
|
# 三号 16pt / 四号 14pt / 五号 10.5pt / 行距 26pt
|
||||||
|
_TARGET_H = 16 * _PT_PX # 21.333...
|
||||||
|
_TARGET_BODY = 14 * _PT_PX
|
||||||
|
_TARGET_LH = 26 * _PT_PX
|
||||||
|
_TARGET_FIG = 10.5 * _PT_PX
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_style_attr(style: str | None) -> dict[str, str]:
|
||||||
|
if not style or not style.strip():
|
||||||
|
return {}
|
||||||
|
out: dict[str, str] = {}
|
||||||
|
for part in style.split(";"):
|
||||||
|
part = part.strip()
|
||||||
|
if ":" not in part:
|
||||||
|
continue
|
||||||
|
k, v = part.split(":", 1)
|
||||||
|
k, v = k.strip().lower(), v.strip()
|
||||||
|
if k:
|
||||||
|
out[k] = v
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _num(s: str) -> float:
|
||||||
|
try:
|
||||||
|
return float(re.sub(r"[^\d.\-]", "", s) or "nan")
|
||||||
|
except ValueError:
|
||||||
|
return float("nan")
|
||||||
|
|
||||||
|
|
||||||
|
def _length_to_px(val: str, font_size_px: float | None = None) -> float:
|
||||||
|
"""将 font-size / line-height 等长度转为近似 px 浮点,用于与 JS 中 getComputedStyle(px) 对齐。"""
|
||||||
|
val = (val or "").strip().lower()
|
||||||
|
if not val or val in ("normal", "inherit", "initial"):
|
||||||
|
return float("nan")
|
||||||
|
if val.isdigit():
|
||||||
|
return float(val)
|
||||||
|
m = re.match(r"^([\d.]+)\s*(pt|px|em|rem)?\s*$", val)
|
||||||
|
if not m:
|
||||||
|
m2 = re.match(r"^([\d.]+)", val)
|
||||||
|
return float(m2.group(1)) if m2 else float("nan")
|
||||||
|
n, unit = float(m.group(1)), (m.group(2) or "px")
|
||||||
|
if unit == "pt":
|
||||||
|
return n * _PT_PX
|
||||||
|
if unit == "px":
|
||||||
|
return n
|
||||||
|
if unit in ("em", "rem") and font_size_px and font_size_px == font_size_px:
|
||||||
|
return n * font_size_px
|
||||||
|
if unit in ("em", "rem"):
|
||||||
|
return n # 无字号时仅返回 em 数,供 text-indent 等判断
|
||||||
|
return n
|
||||||
|
|
||||||
|
|
||||||
|
def _indent_value(style: dict[str, str], font_size_px: float) -> float:
|
||||||
|
"""与 JS 中 parseFloat(textIndent) 对齐:'2em' -> 2.0;'2ch' 等取首数字段。"""
|
||||||
|
raw = (style.get("text-indent") or "").strip()
|
||||||
|
if not raw:
|
||||||
|
return float("nan")
|
||||||
|
if "em" in raw.lower():
|
||||||
|
m = re.search(r"([\d.]+)\s*em", raw, re.I)
|
||||||
|
return float(m.group(1)) if m else _num(raw)
|
||||||
|
# px 转 em 近似
|
||||||
|
px = _length_to_px(raw, font_size_px)
|
||||||
|
if px == px and font_size_px > 0:
|
||||||
|
return px / font_size_px
|
||||||
|
return _num(raw)
|
||||||
|
|
||||||
|
|
||||||
|
def _color_normalized(style: dict[str, str]) -> str:
|
||||||
|
c = (style.get("color") or "").strip().lower()
|
||||||
|
if not c:
|
||||||
|
return ""
|
||||||
|
c = c.replace(" ", "")
|
||||||
|
if c in ("#000", "#000000", "black", "rgb(0,0,0)"):
|
||||||
|
return "rgb(0, 0, 0)"
|
||||||
|
m = re.match(r"rgb\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)", c)
|
||||||
|
if m:
|
||||||
|
r, g, b = int(m.group(1)), int(m.group(2)), int(m.group(3))
|
||||||
|
if r == 0 and g == 0 and b == 0:
|
||||||
|
return "rgb(0, 0, 0)"
|
||||||
|
return c
|
||||||
|
return c
|
||||||
|
|
||||||
|
|
||||||
|
def _el_style_dict(tag: Tag) -> dict[str, str]:
|
||||||
|
s = tag.get("style")
|
||||||
|
if isinstance(s, str):
|
||||||
|
return _parse_style_attr(s)
|
||||||
|
if isinstance(s, list):
|
||||||
|
return _parse_style_attr(";".join(s))
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def _get_inline_property(tag: Tag, prop: str) -> str:
|
||||||
|
d = _el_style_dict(tag)
|
||||||
|
return d.get(prop.lower(), "")
|
||||||
|
|
||||||
|
|
||||||
|
def _outer_html_sample(tag: Tag, limit: int = 200) -> str:
|
||||||
|
s = str(tag)
|
||||||
|
return s[:limit] if len(s) > limit else s
|
||||||
|
|
||||||
|
|
||||||
|
def _is_under(node: Tag | None, ancestor: Tag | None) -> bool:
|
||||||
|
if node is None or ancestor is None:
|
||||||
|
return False
|
||||||
|
p: Tag | None = node
|
||||||
|
while p is not None:
|
||||||
|
if p is ancestor:
|
||||||
|
return True
|
||||||
|
p = p.parent
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _body_text(soup: BeautifulSoup) -> str:
|
||||||
|
body = soup.body
|
||||||
|
if not body:
|
||||||
|
return soup.get_text("\n", strip=True)
|
||||||
|
return body.get_text("\n", strip=True)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_page_margins_from_html(raw_html: str) -> dict[str, str] | None:
|
||||||
|
"""从 <style> 中粗提取 @page 块内 margin 与 size。"""
|
||||||
|
for m in re.finditer(
|
||||||
|
r"@page\s*\{([^}]+)\}",
|
||||||
|
raw_html,
|
||||||
|
re.I | re.DOTALL,
|
||||||
|
):
|
||||||
|
block = m.group(1)
|
||||||
|
msh = re.search(r"margin\s*:\s*([^;]+);", block, re.I)
|
||||||
|
if msh:
|
||||||
|
return {"shorthand": msh.group(1).strip()}
|
||||||
|
margins: dict[str, str] = {}
|
||||||
|
for name, key in (
|
||||||
|
(r"margin-top\s*:\s*([^;]+)", "top"),
|
||||||
|
(r"margin-bottom\s*:\s*([^;]+)", "bottom"),
|
||||||
|
(r"margin-left\s*:\s*([^;]+)", "left"),
|
||||||
|
(r"margin-right\s*:\s*([^;]+)", "right"),
|
||||||
|
(r"size\s*:\s*([^;]+)", "size"),
|
||||||
|
):
|
||||||
|
mm = re.search(name, block, re.I)
|
||||||
|
if mm:
|
||||||
|
margins[key] = mm.group(1).strip()
|
||||||
|
if margins:
|
||||||
|
return margins
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def check_technical_bid(html_content: str) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
对技术暗标 HTML 执行格式检查。
|
||||||
|
返回结构与清标数据.json 一致:overall, details, violations。
|
||||||
|
"""
|
||||||
|
results: dict[str, Any] = {
|
||||||
|
"overall": True,
|
||||||
|
"details": [],
|
||||||
|
"violations": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
def add_result(
|
||||||
|
rule_name: str,
|
||||||
|
passed: bool,
|
||||||
|
message: str,
|
||||||
|
elements: list[Tag] | None = None,
|
||||||
|
) -> None:
|
||||||
|
results["details"].append(
|
||||||
|
{"rule": rule_name, "passed": passed, "message": message}
|
||||||
|
)
|
||||||
|
if not passed:
|
||||||
|
results["overall"] = False
|
||||||
|
el_snips: list[str] = []
|
||||||
|
for el in elements or []:
|
||||||
|
if isinstance(el, Tag):
|
||||||
|
el_snips.append(_outer_html_sample(el))
|
||||||
|
results["violations"].append(
|
||||||
|
{"rule": rule_name, "message": message, "elements": el_snips}
|
||||||
|
)
|
||||||
|
|
||||||
|
if not (html_content or "").strip():
|
||||||
|
add_result("身份信息隐藏", False, "HTML 内容为空", [])
|
||||||
|
return results
|
||||||
|
|
||||||
|
raw_html = html_content
|
||||||
|
soup = BeautifulSoup(html_content, "lxml")
|
||||||
|
if not soup.body:
|
||||||
|
soup = BeautifulSoup(f"<html><body>{html_content}</body></html>", "lxml")
|
||||||
|
|
||||||
|
body = soup.body
|
||||||
|
if not body:
|
||||||
|
add_result("身份信息隐藏", False, "无法解析 body", [])
|
||||||
|
return results
|
||||||
|
|
||||||
|
# ---- 1. 身份 ----
|
||||||
|
body_text = _body_text(soup)
|
||||||
|
company_pattern = re.compile(
|
||||||
|
r"(?:我公司|本公司|[((]?[A-Za-z\u4e00-\u9fa5]+(?:集团|股份|有限|责任|公司)[))]?)"
|
||||||
|
)
|
||||||
|
addr_pattern = re.compile(
|
||||||
|
r"(?:省|市|区|县|镇|路|街|大道|号|大厦|楼|层)[\u4e00-\u9fa50-9]+"
|
||||||
|
)
|
||||||
|
name_pattern = re.compile(
|
||||||
|
r"(?:总监理工程师|专业监理工程师|技术负责人|项目经理)[::]\s*"
|
||||||
|
r"[^甲乙丙丁戊己庚辛壬癸\s]{2,4}(?=[,。;\s]|$)"
|
||||||
|
)
|
||||||
|
found_company = bool(company_pattern.search(body_text))
|
||||||
|
found_addr = bool(addr_pattern.search(body_text))
|
||||||
|
found_name = bool(name_pattern.search(body_text))
|
||||||
|
has_logo = False
|
||||||
|
for img in soup.find_all("img"):
|
||||||
|
if not isinstance(img, Tag):
|
||||||
|
continue
|
||||||
|
alt = (img.get("alt") or "") + ""
|
||||||
|
src = (img.get("src") or "") + ""
|
||||||
|
if re.search(r"logo|商标|微标|公司|品牌", alt, re.I) or re.search(
|
||||||
|
r"logo", src, re.I
|
||||||
|
):
|
||||||
|
has_logo = True
|
||||||
|
break
|
||||||
|
passed_id = not (
|
||||||
|
found_company or found_addr or found_name or has_logo
|
||||||
|
)
|
||||||
|
add_result(
|
||||||
|
"身份信息隐藏",
|
||||||
|
passed_id,
|
||||||
|
"未发现投标人身份信息"
|
||||||
|
if passed_id
|
||||||
|
else "发现投标人身份信息(公司名/地址/真实姓名/商标)",
|
||||||
|
)
|
||||||
|
|
||||||
|
def heading_style_ok(tag: Tag) -> bool:
|
||||||
|
st = _el_style_dict(tag)
|
||||||
|
fs_raw = st.get("font-size", "")
|
||||||
|
fs_px = _length_to_px(fs_raw)
|
||||||
|
if "em" in (fs_raw or "").lower() and "rem" not in (fs_raw or "").lower():
|
||||||
|
fs_px = _num(fs_raw) * 16.0
|
||||||
|
size_ok = abs(fs_px - _TARGET_H) <= 3
|
||||||
|
fam = (st.get("font-family") or "").lower()
|
||||||
|
font_ok = "黑体" in fam or "simhei" in fam or "microsoft yahei" in fam
|
||||||
|
font_style = (st.get("font-style") or "").lower()
|
||||||
|
style_ok = font_style != "italic"
|
||||||
|
text_dec = (st.get("text-decoration") or "").lower()
|
||||||
|
decor_ok = "underline" not in text_dec
|
||||||
|
cr = (st.get("color") or "").strip().lower()
|
||||||
|
if not cr or cr in ("inherit", "initial"):
|
||||||
|
color_ok = True
|
||||||
|
else:
|
||||||
|
cn = _color_normalized(st)
|
||||||
|
color_ok = cn == "rgb(0, 0, 0)" or cr in (
|
||||||
|
"#000",
|
||||||
|
"#000000",
|
||||||
|
"black",
|
||||||
|
"rgb(0,0,0)",
|
||||||
|
)
|
||||||
|
fw = (st.get("font-weight") or "400").lower()
|
||||||
|
weight_ok = fw not in ("400", "normal")
|
||||||
|
if not st.get("font-size"):
|
||||||
|
size_ok = False
|
||||||
|
return (
|
||||||
|
size_ok
|
||||||
|
and font_ok
|
||||||
|
and style_ok
|
||||||
|
and decor_ok
|
||||||
|
and color_ok
|
||||||
|
and weight_ok
|
||||||
|
)
|
||||||
|
|
||||||
|
# ---- 2. 标题 ----
|
||||||
|
heading_tags: list[Tag] = []
|
||||||
|
for sel in ("h1", "h2", "h3", "h4", "h5", "h6"):
|
||||||
|
heading_tags.extend(soup.find_all(sel))
|
||||||
|
for t in soup.find_all(attrs={"role": "heading"}):
|
||||||
|
if isinstance(t, Tag):
|
||||||
|
heading_tags.append(t)
|
||||||
|
for t in soup.select(".heading, .title"):
|
||||||
|
if isinstance(t, Tag) and t not in heading_tags:
|
||||||
|
heading_tags.append(t)
|
||||||
|
|
||||||
|
invalid_h: list[Tag] = []
|
||||||
|
for h in heading_tags:
|
||||||
|
if not isinstance(h, Tag):
|
||||||
|
continue
|
||||||
|
if not heading_style_ok(h):
|
||||||
|
invalid_h.append(h)
|
||||||
|
h_ok = len(invalid_h) == 0
|
||||||
|
add_result(
|
||||||
|
"标题格式",
|
||||||
|
h_ok,
|
||||||
|
"所有标题符合三号黑体要求"
|
||||||
|
if h_ok
|
||||||
|
else "部分标题字号/字体/颜色/下划线不符合要求",
|
||||||
|
invalid_h,
|
||||||
|
)
|
||||||
|
|
||||||
|
def body_el_ok(el: Tag) -> bool:
|
||||||
|
st = _el_style_dict(el)
|
||||||
|
if el.name in ("h1", "h2", "h3", "h4", "h5", "h6"):
|
||||||
|
return True
|
||||||
|
cls = " ".join(el.get("class", [])) if el.get("class") else ""
|
||||||
|
if any(
|
||||||
|
x in cls
|
||||||
|
for x in ("header", "footer", "toc", "目录", "table-of-contents")
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
text = el.get_text(strip=True)
|
||||||
|
if not text:
|
||||||
|
return True
|
||||||
|
fs_raw = st.get("font-size", "")
|
||||||
|
font_px = _length_to_px(fs_raw)
|
||||||
|
if not fs_raw:
|
||||||
|
return False
|
||||||
|
size_ok = abs(font_px - _TARGET_BODY) <= 2
|
||||||
|
fam = (st.get("font-family") or "").lower()
|
||||||
|
font_ok = "宋体" in fam or "simsun" in fam or "serif" in fam
|
||||||
|
col = st.get("color", "")
|
||||||
|
color_ok = (not col) or _color_normalized(st) == "rgb(0, 0, 0)" or col.lower() in (
|
||||||
|
"#000",
|
||||||
|
"#000000",
|
||||||
|
"black",
|
||||||
|
"rgb(0,0,0)",
|
||||||
|
)
|
||||||
|
ind = _indent_value(st, font_px)
|
||||||
|
indent_ok = ind == ind and 1.8 <= ind <= 2.2
|
||||||
|
lh_raw = (st.get("line-height") or "").strip()
|
||||||
|
if not lh_raw:
|
||||||
|
line_ok = False
|
||||||
|
else:
|
||||||
|
if "pt" in lh_raw or "px" in lh_raw:
|
||||||
|
lh_px = _length_to_px(lh_raw, font_px)
|
||||||
|
elif re.match(r"^[\d.]+$", lh_raw):
|
||||||
|
lh_px = float(lh_raw) * font_px
|
||||||
|
else:
|
||||||
|
lh_px = _length_to_px(lh_raw, font_px)
|
||||||
|
line_ok = abs(lh_px - _TARGET_LH) <= 2
|
||||||
|
tdec = (st.get("text-decoration") or "").lower()
|
||||||
|
decor_ok = "underline" not in tdec
|
||||||
|
fw = (st.get("font-weight") or "400").lower()
|
||||||
|
weight_ok = fw in ("400", "normal", "")
|
||||||
|
fst = (st.get("font-style") or "").lower()
|
||||||
|
style_ok = fst != "italic"
|
||||||
|
return (
|
||||||
|
size_ok
|
||||||
|
and font_ok
|
||||||
|
and color_ok
|
||||||
|
and indent_ok
|
||||||
|
and line_ok
|
||||||
|
and decor_ok
|
||||||
|
and weight_ok
|
||||||
|
and style_ok
|
||||||
|
)
|
||||||
|
|
||||||
|
exclude_set = {
|
||||||
|
"h1",
|
||||||
|
"h2",
|
||||||
|
"h3",
|
||||||
|
"h4",
|
||||||
|
"h5",
|
||||||
|
"h6",
|
||||||
|
}
|
||||||
|
invalid_body: list[Tag] = []
|
||||||
|
for el in soup.find_all(["p", "div", "span", "li", "td", "th"]):
|
||||||
|
if not isinstance(el, Tag):
|
||||||
|
continue
|
||||||
|
if el.name in exclude_set:
|
||||||
|
continue
|
||||||
|
if "header" in " ".join(el.get("class", [])):
|
||||||
|
continue
|
||||||
|
if "footer" in " ".join(el.get("class", [])):
|
||||||
|
continue
|
||||||
|
if "toc" in " ".join(el.get("class", [])) or "目录" in " ".join(
|
||||||
|
el.get("class", [])
|
||||||
|
):
|
||||||
|
continue
|
||||||
|
if not el.get_text(strip=True):
|
||||||
|
continue
|
||||||
|
if not body_el_ok(el):
|
||||||
|
invalid_body.append(el)
|
||||||
|
|
||||||
|
b_ok = len(invalid_body) == 0
|
||||||
|
add_result(
|
||||||
|
"正文格式",
|
||||||
|
b_ok,
|
||||||
|
"所有正文符合四号宋体/缩进/行距/颜色要求"
|
||||||
|
if b_ok
|
||||||
|
else "部分正文段落格式不符合要求",
|
||||||
|
invalid_body,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ---- 4. 目录 ----
|
||||||
|
toc_els: list[Tag] = []
|
||||||
|
for cls in ("toc", "table-of-contents", "目录"):
|
||||||
|
for t in soup.find_all(class_=cls):
|
||||||
|
if isinstance(t, Tag) and t not in toc_els:
|
||||||
|
toc_els.append(t)
|
||||||
|
for t in soup.find_all(attrs={"role": "directory"}):
|
||||||
|
if isinstance(t, Tag) and t not in toc_els:
|
||||||
|
toc_els.append(t)
|
||||||
|
|
||||||
|
if not toc_els:
|
||||||
|
add_result("目录要求", False, "未检测到目录,请确保包含目录且目录无页码无页眉页脚")
|
||||||
|
else:
|
||||||
|
no_pn = True
|
||||||
|
no_hf = True
|
||||||
|
for toc in toc_els:
|
||||||
|
text = toc.get_text("\n", strip=True)
|
||||||
|
lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
|
||||||
|
for line in lines:
|
||||||
|
if re.search(r"\d+\s*$", line) and re.search(r"\d$", line):
|
||||||
|
if re.search(r"\.{2,}\s*\d+", line) or re.match(
|
||||||
|
r"^.*\d$", line
|
||||||
|
):
|
||||||
|
if re.search(r"\.{2,}\s*\d+", line):
|
||||||
|
no_pn = False
|
||||||
|
if re.search(r"\.{2,}\s*\d+", line):
|
||||||
|
no_pn = False
|
||||||
|
if toc.find(class_=re.compile("header|page-header", re.I)):
|
||||||
|
no_hf = False
|
||||||
|
if toc.find(class_=re.compile("footer|page-footer", re.I)):
|
||||||
|
no_hf = False
|
||||||
|
t_ok = no_pn and no_hf
|
||||||
|
add_result(
|
||||||
|
"目录要求",
|
||||||
|
t_ok,
|
||||||
|
"目录符合无页码、无页眉页脚要求"
|
||||||
|
if t_ok
|
||||||
|
else "目录中存在页码或页眉页脚",
|
||||||
|
)
|
||||||
|
|
||||||
|
# ---- 5. 图表 / 附件(合法选择器)----
|
||||||
|
appendix: Tag | None = None
|
||||||
|
for sel in (
|
||||||
|
"#appendix",
|
||||||
|
".appendix",
|
||||||
|
".attachment",
|
||||||
|
'[id*="附件"]',
|
||||||
|
'[class*="附件"]',
|
||||||
|
'[class*="附表"]',
|
||||||
|
):
|
||||||
|
hit = soup.select_one(sel)
|
||||||
|
if hit and isinstance(hit, Tag):
|
||||||
|
appendix = hit
|
||||||
|
break
|
||||||
|
|
||||||
|
illegal: list[Tag] = []
|
||||||
|
for tbl in soup.find_all("table"):
|
||||||
|
if isinstance(tbl, Tag) and not _is_under(tbl, appendix):
|
||||||
|
illegal.append(tbl)
|
||||||
|
for im in soup.find_all("img"):
|
||||||
|
if isinstance(im, Tag) and not _is_under(im, appendix):
|
||||||
|
illegal.append(im)
|
||||||
|
for el in soup.find_all("figure"):
|
||||||
|
if isinstance(el, Tag) and not _is_under(el, appendix):
|
||||||
|
illegal.append(el)
|
||||||
|
for el in soup.find_all(class_="chart"):
|
||||||
|
if isinstance(el, Tag) and not _is_under(el, appendix) and el not in illegal:
|
||||||
|
illegal.append(el)
|
||||||
|
|
||||||
|
chart_text_valid = True
|
||||||
|
if appendix:
|
||||||
|
for el in appendix.select("table, td, th, figcaption, .chart-text"):
|
||||||
|
if not isinstance(el, Tag):
|
||||||
|
continue
|
||||||
|
st = _el_style_dict(el)
|
||||||
|
if not st.get("font-size"):
|
||||||
|
continue
|
||||||
|
fs = _length_to_px(st.get("font-size", ""))
|
||||||
|
size_ok = abs(fs - _TARGET_FIG) <= 1.5
|
||||||
|
fam = (st.get("font-family") or "").lower()
|
||||||
|
font_ok = "宋体" in fam or "simsun" in fam
|
||||||
|
c_raw = (st.get("color") or "").strip()
|
||||||
|
if c_raw and c_raw.lower() not in ("inherit", "initial"):
|
||||||
|
c_ok = _color_normalized(st) == "rgb(0, 0, 0)" or c_raw.lower() in (
|
||||||
|
"#000",
|
||||||
|
"#000000",
|
||||||
|
"black",
|
||||||
|
"rgb(0,0,0)",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
c_ok = True
|
||||||
|
if not (size_ok and font_ok and c_ok):
|
||||||
|
chart_text_valid = False
|
||||||
|
|
||||||
|
c_ok2 = len(illegal) == 0 and chart_text_valid
|
||||||
|
add_result(
|
||||||
|
"图表规范",
|
||||||
|
c_ok2,
|
||||||
|
"图表仅出现在附件/附表内,且图表文字符合五号宋体"
|
||||||
|
if c_ok2
|
||||||
|
else f"正文中发现{len(illegal)}个图表或附件内图表文字格式错误",
|
||||||
|
illegal,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ---- 6. 颜色与装饰 ----
|
||||||
|
color_v: list[Tag] = []
|
||||||
|
decor_v: list[Tag] = []
|
||||||
|
for el in soup.find_all(True):
|
||||||
|
if not isinstance(el, Tag):
|
||||||
|
continue
|
||||||
|
st = _el_style_dict(el)
|
||||||
|
if not st.get("color") and not st.get("text-decoration") and not st.get(
|
||||||
|
"border-bottom-style"
|
||||||
|
):
|
||||||
|
continue
|
||||||
|
col = (st.get("color") or "").strip().lower()
|
||||||
|
if col and col not in (
|
||||||
|
"inherit",
|
||||||
|
"initial",
|
||||||
|
"",
|
||||||
|
"#000",
|
||||||
|
"#000000",
|
||||||
|
"black",
|
||||||
|
"rgb(0,0,0)",
|
||||||
|
"rgb(0, 0, 0)",
|
||||||
|
):
|
||||||
|
if _color_normalized(st) and _color_normalized(st) != "rgb(0, 0, 0)":
|
||||||
|
if el.get_text(strip=True):
|
||||||
|
color_v.append(el)
|
||||||
|
tdec = (st.get("text-decoration") or "").lower()
|
||||||
|
if "underline" in tdec and el.get_text(strip=True):
|
||||||
|
decor_v.append(el)
|
||||||
|
bbs = (st.get("border-bottom-style") or "").lower()
|
||||||
|
if bbs in ("solid", "dotted") and el.get_text(strip=True):
|
||||||
|
decor_v.append(el)
|
||||||
|
col_ok = len(color_v) == 0 and len(decor_v) == 0
|
||||||
|
add_result(
|
||||||
|
"颜色与装饰",
|
||||||
|
col_ok,
|
||||||
|
"无彩色文字、无下划线、无着重号"
|
||||||
|
if col_ok
|
||||||
|
else f"发现{len(color_v)}处彩色文字,{len(decor_v)}处下划线/着重号",
|
||||||
|
(color_v + decor_v)[:20],
|
||||||
|
)
|
||||||
|
|
||||||
|
# ---- 7. 页面 ----
|
||||||
|
page_valid = True
|
||||||
|
margin_top = margin_bottom = margin_left = margin_right = None
|
||||||
|
page_info = _parse_page_margins_from_html(raw_html)
|
||||||
|
# Word 常把 @page 写在 style 里,已在 raw_html 中解析
|
||||||
|
if page_info and "shorthand" in page_info:
|
||||||
|
# margin: 2.54cm 3.18cm
|
||||||
|
parts = page_info["shorthand"].split()
|
||||||
|
if len(parts) >= 4:
|
||||||
|
margin_top, margin_right, margin_bottom, margin_left = (
|
||||||
|
parts[0],
|
||||||
|
parts[1],
|
||||||
|
parts[2],
|
||||||
|
parts[3],
|
||||||
|
)
|
||||||
|
elif len(parts) == 2:
|
||||||
|
margin_top = margin_bottom = parts[0]
|
||||||
|
margin_left = margin_right = parts[1]
|
||||||
|
elif page_info:
|
||||||
|
margin_top = page_info.get("top")
|
||||||
|
margin_bottom = page_info.get("bottom")
|
||||||
|
margin_left = page_info.get("left")
|
||||||
|
margin_right = page_info.get("right")
|
||||||
|
|
||||||
|
bst = _el_style_dict(body) if body else {}
|
||||||
|
mraw = bst.get("margin", "")
|
||||||
|
if mraw and not margin_top:
|
||||||
|
margins = mraw.split()
|
||||||
|
if len(margins) >= 1:
|
||||||
|
margin_top = margins[0]
|
||||||
|
if len(margins) >= 2:
|
||||||
|
margin_right = margins[1]
|
||||||
|
if len(margins) >= 3:
|
||||||
|
margin_bottom = margins[2]
|
||||||
|
if len(margins) >= 4:
|
||||||
|
margin_left = margins[3]
|
||||||
|
else:
|
||||||
|
margin_left = margin_right
|
||||||
|
|
||||||
|
if not margin_top and body:
|
||||||
|
margin_top = _get_inline_property(body, "margin-top")
|
||||||
|
margin_bottom = _get_inline_property(body, "margin-bottom")
|
||||||
|
margin_left = _get_inline_property(body, "margin-left")
|
||||||
|
margin_right = _get_inline_property(body, "margin-right")
|
||||||
|
|
||||||
|
if not any([margin_top, margin_bottom, margin_left, margin_right]) and not page_info:
|
||||||
|
page_valid = False
|
||||||
|
|
||||||
|
def m_ok(
|
||||||
|
m: str | None,
|
||||||
|
target: float,
|
||||||
|
) -> bool:
|
||||||
|
if not m:
|
||||||
|
return False
|
||||||
|
s = m.strip()
|
||||||
|
if "cm" in s:
|
||||||
|
return abs(_num(s) - target) < 0.01
|
||||||
|
return abs(_num(s) - target) < 0.01
|
||||||
|
|
||||||
|
top_ok = m_ok(margin_top, 2.54) or (
|
||||||
|
(margin_top or "") in ("2.54cm", "1in")
|
||||||
|
)
|
||||||
|
bottom_ok = m_ok(margin_bottom, 2.54) or (
|
||||||
|
(margin_bottom or "") in ("2.54cm", "1in")
|
||||||
|
)
|
||||||
|
left_ok = m_ok(margin_left, 3.18) or (margin_left or "").startswith("3.18")
|
||||||
|
right_ok = m_ok(margin_right, 3.18) or (margin_right or "").startswith("3.18")
|
||||||
|
|
||||||
|
html_tag = soup.find("html")
|
||||||
|
w = _get_inline_property(html_tag, "width") if isinstance(html_tag, Tag) else "" # type: ignore[arg-type]
|
||||||
|
page_orientation = "横向" if w and w != "auto" and "%" not in w else "纵向"
|
||||||
|
page_ok = bool(
|
||||||
|
top_ok
|
||||||
|
and bottom_ok
|
||||||
|
and left_ok
|
||||||
|
and right_ok
|
||||||
|
and (page_orientation != "横向" or w in ("", "auto"))
|
||||||
|
)
|
||||||
|
if not margin_top:
|
||||||
|
page_ok = False
|
||||||
|
|
||||||
|
add_result(
|
||||||
|
"页面设置",
|
||||||
|
page_ok,
|
||||||
|
"页面设置符合A4纵向/边距要求"
|
||||||
|
if page_ok
|
||||||
|
else "页面边距或纸张方向不符合要求",
|
||||||
|
)
|
||||||
|
|
||||||
|
return results
|
||||||
437
modules/exporter.py
Normal file
437
modules/exporter.py
Normal file
@ -0,0 +1,437 @@
|
|||||||
|
"""
|
||||||
|
Word 文档导出模块
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sqlite3
|
||||||
|
import logging
|
||||||
|
from datetime import datetime
|
||||||
|
from docx import Document
|
||||||
|
from docx.shared import Pt, Cm, RGBColor
|
||||||
|
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||||||
|
from docx.oxml import OxmlElement
|
||||||
|
from docx.oxml.ns import qn
|
||||||
|
|
||||||
|
import config
|
||||||
|
from utils.outline_numbering import format_heading_display
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
LEVEL_STYLES = {
|
||||||
|
1: ('Heading 1', 16, True),
|
||||||
|
2: ('Heading 2', 14, True),
|
||||||
|
3: ('Heading 3', 13, False),
|
||||||
|
4: ('Heading 4', 12, False),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def export_to_word(db_path: str, project_id: int) -> str:
|
||||||
|
"""
|
||||||
|
生成 Word 文档并保存到 data/exports/,返回文件名。
|
||||||
|
"""
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
try:
|
||||||
|
# 获取项目信息
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute("SELECT name FROM projects WHERE id=?", (project_id,))
|
||||||
|
project = cur.fetchone()
|
||||||
|
if not project:
|
||||||
|
raise ValueError(f'项目 {project_id} 不存在')
|
||||||
|
project_name = project[0]
|
||||||
|
|
||||||
|
# 获取标书大纲文本(用于标题页)
|
||||||
|
cur.execute("SELECT outline FROM tender_data WHERE project_id=?", (project_id,))
|
||||||
|
td = cur.fetchone()
|
||||||
|
bid_title = project_name + '技术标书'
|
||||||
|
if td and td[0]:
|
||||||
|
first_line = td[0].strip().split('\n')[0].strip()
|
||||||
|
if first_line:
|
||||||
|
bid_title = first_line
|
||||||
|
|
||||||
|
# 获取所有章节(按顺序)
|
||||||
|
cur.execute('''
|
||||||
|
SELECT section_number, section_title, level, is_leaf, content, intro_content
|
||||||
|
FROM bid_sections
|
||||||
|
WHERE project_id=?
|
||||||
|
ORDER BY order_index
|
||||||
|
''', (project_id,))
|
||||||
|
sections = cur.fetchall()
|
||||||
|
|
||||||
|
doc = _build_document(bid_title, sections)
|
||||||
|
|
||||||
|
# 保存文件
|
||||||
|
os.makedirs(config.EXPORT_DIR, exist_ok=True)
|
||||||
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||||
|
safe_name = ''.join(c for c in project_name if c.isalnum() or c in '._- \u4e00-\u9fff')
|
||||||
|
filename = f'{safe_name}_{timestamp}.docx'
|
||||||
|
filepath = os.path.join(config.EXPORT_DIR, filename)
|
||||||
|
doc.save(filepath)
|
||||||
|
logger.info(f'导出完成: {filepath}')
|
||||||
|
return filename
|
||||||
|
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
DISCLAIMER_TEXT = """\
|
||||||
|
免责声明
|
||||||
|
|
||||||
|
本工具仅供学习交流免费使用,所生成的技术方案不可直接用于投标,请务必人工核对。本工具不会通过任何平台进行销售,请用户注意辨别真伪。在您开始使用本AI标书制作服务之前,请认真阅读并同意以下关键条款。一旦您继续使用,即表示您已充分理解并认可本提示的全部内容。
|
||||||
|
|
||||||
|
服务定位
|
||||||
|
本工具为单机使用的AI标书辅助工具,旨在帮助您生成标书的参考素材。您需对最终自己编写的标书文件承担全部责任,包括审核、修改内容,确保其符合相关法律法规及项目要求。
|
||||||
|
|
||||||
|
准确性免责
|
||||||
|
本人不对AI生成内容的完全准确性与完整性作任何保证。您有义务自行核实所有关键信息,并自行承担因使用本工具所引发的一切后果。
|
||||||
|
|
||||||
|
标书风险
|
||||||
|
本工具所生成的素材文件仅作参考。若您使用(包括引用、修改或二次创作),需自行承担由此可能导致的废标、侵权等全部风险与责任,本人不承担任何相关责任。
|
||||||
|
|
||||||
|
责任限制
|
||||||
|
任何情形下,本人均不对因使用本服务而造成的任何直接、间接或衍生损失(例如利润损失、业务中断、数据丢失等)承担法律责任。
|
||||||
|
|
||||||
|
其他事项
|
||||||
|
本人保留随时修改或终止本服务的权利。本提示的解释及争议解决,均适用中华人民共和国法律。\
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def _add_disclaimer_page(doc: Document) -> None:
|
||||||
|
"""在文档开头插入免责声明页"""
|
||||||
|
# 标题
|
||||||
|
title_para = doc.add_paragraph()
|
||||||
|
title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||||
|
title_run = title_para.add_run('免责声明')
|
||||||
|
title_run.font.size = Pt(16)
|
||||||
|
title_run.font.bold = True
|
||||||
|
title_run.font.name = '黑体'
|
||||||
|
title_run._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体')
|
||||||
|
|
||||||
|
doc.add_paragraph()
|
||||||
|
|
||||||
|
# 正文各段(跳过第一行标题,已单独渲染)
|
||||||
|
body_lines = DISCLAIMER_TEXT.split('\n')[2:] # 跳过"免责声明"和空行
|
||||||
|
for line in body_lines:
|
||||||
|
p = doc.add_paragraph()
|
||||||
|
stripped = line.strip()
|
||||||
|
# 小标题行(非空且后面没有缩进,即段落标题)
|
||||||
|
is_section_title = bool(stripped) and not line.startswith(' ') and not line.startswith('\u3000')
|
||||||
|
run = p.add_run(stripped if stripped else '')
|
||||||
|
if is_section_title and stripped:
|
||||||
|
run.font.bold = True
|
||||||
|
run.font.size = Pt(11)
|
||||||
|
else:
|
||||||
|
run.font.size = Pt(10.5)
|
||||||
|
run.font.name = 'Times New Roman'
|
||||||
|
run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
|
||||||
|
p.paragraph_format.space_after = Pt(4)
|
||||||
|
_set_line_spacing_15(p)
|
||||||
|
|
||||||
|
doc.add_page_break()
|
||||||
|
|
||||||
|
|
||||||
|
def _add_toc_tree_page(doc: Document, sections: list) -> None:
|
||||||
|
"""标题页之后插入树状目录(按 level 缩进;静态文本,不含 Word 目录域)。"""
|
||||||
|
toc_heading = doc.add_paragraph()
|
||||||
|
toc_heading.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||||
|
tr = toc_heading.add_run('目录')
|
||||||
|
tr.font.size = Pt(16)
|
||||||
|
tr.font.bold = True
|
||||||
|
tr.font.name = '黑体'
|
||||||
|
tr._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体')
|
||||||
|
doc.add_paragraph()
|
||||||
|
|
||||||
|
for row in sections:
|
||||||
|
section_number, title, level, _, _, _ = row
|
||||||
|
level = min(int(level), 4)
|
||||||
|
text = format_heading_display(level, str(section_number or ''), str(title or ''))
|
||||||
|
p = doc.add_paragraph()
|
||||||
|
p.paragraph_format.left_indent = Cm(0.75 * max(0, level - 1))
|
||||||
|
p.paragraph_format.space_after = Pt(3)
|
||||||
|
run = p.add_run(text)
|
||||||
|
run.font.size = Pt(12)
|
||||||
|
run.font.name = '宋体'
|
||||||
|
run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
|
||||||
|
|
||||||
|
doc.add_page_break()
|
||||||
|
|
||||||
|
|
||||||
|
def _build_document(bid_title: str, sections) -> Document:
|
||||||
|
doc = Document()
|
||||||
|
|
||||||
|
# ── 页面设置 ─────────────────────────────────────────────────────────
|
||||||
|
section_obj = doc.sections[0]
|
||||||
|
section_obj.page_width = Cm(21)
|
||||||
|
section_obj.page_height = Cm(29.7)
|
||||||
|
section_obj.left_margin = Cm(3)
|
||||||
|
section_obj.right_margin = Cm(2.5)
|
||||||
|
section_obj.top_margin = Cm(2.5)
|
||||||
|
section_obj.bottom_margin = Cm(2.5)
|
||||||
|
|
||||||
|
# ── 免责声明页(第一页)─────────────────────────────────────────────
|
||||||
|
_add_disclaimer_page(doc)
|
||||||
|
|
||||||
|
# ── 标题页 ──────────────────────────────────────────────────────────
|
||||||
|
title_para = doc.add_paragraph()
|
||||||
|
title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||||
|
title_run = title_para.add_run(bid_title)
|
||||||
|
title_run.font.size = Pt(22)
|
||||||
|
title_run.font.bold = True
|
||||||
|
title_run.font.color.rgb = RGBColor(0x1a, 0x56, 0xdb)
|
||||||
|
title_run.font.name = '黑体'
|
||||||
|
title_run._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体')
|
||||||
|
|
||||||
|
doc.add_paragraph()
|
||||||
|
|
||||||
|
date_para = doc.add_paragraph()
|
||||||
|
date_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||||
|
date_run = date_para.add_run(datetime.now().strftime('%Y年%m月'))
|
||||||
|
date_run.font.size = Pt(14)
|
||||||
|
date_run.font.name = '宋体'
|
||||||
|
date_run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
|
||||||
|
|
||||||
|
doc.add_page_break()
|
||||||
|
|
||||||
|
# ── 树状目录页(标题页后、正文前)──────────────────────────────────
|
||||||
|
_add_toc_tree_page(doc, sections)
|
||||||
|
|
||||||
|
# ── 章节内容 ─────────────────────────────────────────────────────────
|
||||||
|
for row in sections:
|
||||||
|
section_number, title, level, is_leaf, content, intro = row
|
||||||
|
level = min(int(level), 4)
|
||||||
|
|
||||||
|
# 添加标题(带完整目录号)
|
||||||
|
heading_text = format_heading_display(level, str(section_number or ''), str(title or ''))
|
||||||
|
heading = doc.add_heading(level=level)
|
||||||
|
heading.clear()
|
||||||
|
run = heading.add_run(heading_text)
|
||||||
|
_, font_size, bold = LEVEL_STYLES.get(level, ('Heading 4', 12, False))
|
||||||
|
run.font.size = Pt(font_size)
|
||||||
|
run.font.bold = bold
|
||||||
|
run.font.name = '黑体' if level <= 2 else '宋体'
|
||||||
|
run._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体' if level <= 2 else '宋体')
|
||||||
|
|
||||||
|
# 章节引言(非叶节点)
|
||||||
|
if intro and intro.strip():
|
||||||
|
_add_body_paragraphs(doc, intro)
|
||||||
|
|
||||||
|
# 正文内容(叶节点)
|
||||||
|
if content and content.strip():
|
||||||
|
_add_body_paragraphs(doc, content)
|
||||||
|
|
||||||
|
return doc
|
||||||
|
|
||||||
|
|
||||||
|
def _set_line_spacing_15(paragraph):
|
||||||
|
"""将段落设为 1.5 倍行距(Word 中的 WD_LINE_SPACING.MULTIPLE × 1.5)"""
|
||||||
|
from docx.oxml.ns import qn as _qn
|
||||||
|
pPr = paragraph._element.get_or_add_pPr()
|
||||||
|
spacing = pPr.find(_qn('w:spacing'))
|
||||||
|
if spacing is None:
|
||||||
|
spacing = OxmlElement('w:spacing')
|
||||||
|
pPr.append(spacing)
|
||||||
|
spacing.set(_qn('w:line'), '360') # 240 × 1.5 = 360 twips
|
||||||
|
spacing.set(_qn('w:lineRule'), 'auto')
|
||||||
|
|
||||||
|
|
||||||
|
# ── 图/表标记解析 ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
_BLOCK_PATTERN = re.compile(
|
||||||
|
r'\[FIGURE:([^\]]+)\](.*?)\[/FIGURE\]'
|
||||||
|
r'|\[TABLE:([^\]]+)\](.*?)\[/TABLE\]',
|
||||||
|
re.DOTALL
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _split_content_blocks(text: str) -> list:
|
||||||
|
"""
|
||||||
|
将章节正文拆分为有序内容块列表:
|
||||||
|
{'type': 'text', 'content': '...'}
|
||||||
|
{'type': 'figure', 'title': '...', 'content': '...'}
|
||||||
|
{'type': 'table', 'title': '...', 'content': '...'}
|
||||||
|
"""
|
||||||
|
blocks = []
|
||||||
|
last = 0
|
||||||
|
for m in _BLOCK_PATTERN.finditer(text):
|
||||||
|
if m.start() > last:
|
||||||
|
blocks.append({'type': 'text', 'content': text[last:m.start()]})
|
||||||
|
if m.group(1) is not None:
|
||||||
|
blocks.append({'type': 'figure',
|
||||||
|
'title': m.group(1).strip(),
|
||||||
|
'content': m.group(2).strip()})
|
||||||
|
else:
|
||||||
|
blocks.append({'type': 'table',
|
||||||
|
'title': m.group(3).strip(),
|
||||||
|
'content': m.group(4).strip()})
|
||||||
|
last = m.end()
|
||||||
|
if last < len(text):
|
||||||
|
blocks.append({'type': 'text', 'content': text[last:]})
|
||||||
|
return blocks
|
||||||
|
|
||||||
|
|
||||||
|
def _set_para_shading(para, hex_fill: str):
|
||||||
|
"""为段落设置背景填充色"""
|
||||||
|
pPr = para._element.get_or_add_pPr()
|
||||||
|
shd = OxmlElement('w:shd')
|
||||||
|
shd.set(qn('w:val'), 'clear')
|
||||||
|
shd.set(qn('w:color'), 'auto')
|
||||||
|
shd.set(qn('w:fill'), hex_fill)
|
||||||
|
pPr.append(shd)
|
||||||
|
|
||||||
|
|
||||||
|
def _set_cell_bg(cell, hex_fill: str):
|
||||||
|
"""为表格单元格设置背景色"""
|
||||||
|
tc = cell._tc
|
||||||
|
tcPr = tc.get_or_add_tcPr()
|
||||||
|
shd = OxmlElement('w:shd')
|
||||||
|
shd.set(qn('w:val'), 'clear')
|
||||||
|
shd.set(qn('w:color'), 'auto')
|
||||||
|
shd.set(qn('w:fill'), hex_fill)
|
||||||
|
tcPr.append(shd)
|
||||||
|
|
||||||
|
|
||||||
|
def _set_cell_padding(cell, pt_value: float):
|
||||||
|
"""设置表格单元格四侧内边距(单位:磅)"""
|
||||||
|
tc = cell._tc
|
||||||
|
tcPr = tc.get_or_add_tcPr()
|
||||||
|
tcMar = OxmlElement('w:tcMar')
|
||||||
|
val = str(int(pt_value * 20)) # pt → twips(1pt = 20 twips)
|
||||||
|
for side in ('top', 'left', 'bottom', 'right'):
|
||||||
|
node = OxmlElement(f'w:{side}')
|
||||||
|
node.set(qn('w:w'), val)
|
||||||
|
node.set(qn('w:type'), 'dxa')
|
||||||
|
tcMar.append(node)
|
||||||
|
tcPr.append(tcMar)
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_set_eastasia(run, font_name: str):
|
||||||
|
"""安全设置东亚字体,确保 rPr 已存在"""
|
||||||
|
_ = run.font.size # 触发 rPr 创建
|
||||||
|
try:
|
||||||
|
run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _add_block_caption(doc: Document, prefix: str, title: str):
|
||||||
|
"""添加图/表居中加粗标题行"""
|
||||||
|
cap = doc.add_paragraph()
|
||||||
|
cap.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||||
|
cap.paragraph_format.space_before = Pt(8)
|
||||||
|
cap.paragraph_format.space_after = Pt(3)
|
||||||
|
run = cap.add_run(f'{prefix}:{title}')
|
||||||
|
run.font.bold = True
|
||||||
|
run.font.size = Pt(11)
|
||||||
|
run.font.name = 'Times New Roman'
|
||||||
|
_safe_set_eastasia(run, '黑体')
|
||||||
|
|
||||||
|
|
||||||
|
def _add_figure_block(doc: Document, title: str, content: str):
|
||||||
|
"""
|
||||||
|
将图示内容渲染为带边框 + 背景色的文字图示框。
|
||||||
|
使用单格表格(Table Grid 样式)实现四周边框,比纯段落背景更专业。
|
||||||
|
"""
|
||||||
|
_add_block_caption(doc, '图', title)
|
||||||
|
|
||||||
|
lines = content.split('\n')
|
||||||
|
|
||||||
|
# 单格表格:四周边框 + 淡蓝灰背景
|
||||||
|
tbl = doc.add_table(rows=1, cols=1)
|
||||||
|
tbl.style = 'Table Grid'
|
||||||
|
cell = tbl.cell(0, 0)
|
||||||
|
_set_cell_bg(cell, 'EFF3FB') # 淡蓝灰背景
|
||||||
|
_set_cell_padding(cell, 5) # 内边距 5pt
|
||||||
|
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
if i == 0:
|
||||||
|
para = cell.paragraphs[0]
|
||||||
|
para.clear()
|
||||||
|
else:
|
||||||
|
para = cell.add_paragraph()
|
||||||
|
para.paragraph_format.space_before = Pt(0)
|
||||||
|
para.paragraph_format.space_after = Pt(1)
|
||||||
|
run = para.add_run(line if line else ' ')
|
||||||
|
run.font.size = Pt(9.5)
|
||||||
|
run.font.name = 'Courier New'
|
||||||
|
_safe_set_eastasia(run, '宋体')
|
||||||
|
|
||||||
|
# 图示后空行
|
||||||
|
sp = doc.add_paragraph()
|
||||||
|
sp.paragraph_format.space_after = Pt(8)
|
||||||
|
|
||||||
|
|
||||||
|
def _add_word_table(doc: Document, title: str, content: str):
|
||||||
|
"""将 Markdown 表格解析并渲染为 Word 表格"""
|
||||||
|
# 解析 markdown 行,过滤掉分隔行(|---|)
|
||||||
|
raw_rows = []
|
||||||
|
for line in content.strip().split('\n'):
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
if re.match(r'^\|[\s\-:| ]+\|$', line):
|
||||||
|
continue # 分隔行
|
||||||
|
if line.startswith('|') and line.endswith('|'):
|
||||||
|
cells = [c.strip() for c in line[1:-1].split('|')]
|
||||||
|
raw_rows.append(cells)
|
||||||
|
|
||||||
|
if not raw_rows:
|
||||||
|
# 没有解析到有效行时,降级为普通文本
|
||||||
|
_add_block_caption(doc, '表', title)
|
||||||
|
_add_plain_text(doc, content)
|
||||||
|
return
|
||||||
|
|
||||||
|
col_count = max(len(r) for r in raw_rows)
|
||||||
|
rows = [r + [''] * (col_count - len(r)) for r in raw_rows]
|
||||||
|
|
||||||
|
_add_block_caption(doc, '表', title)
|
||||||
|
|
||||||
|
table = doc.add_table(rows=len(rows), cols=col_count)
|
||||||
|
table.style = 'Table Grid'
|
||||||
|
|
||||||
|
for i, row_data in enumerate(rows):
|
||||||
|
for j, cell_text in enumerate(row_data):
|
||||||
|
cell = table.cell(i, j)
|
||||||
|
para = cell.paragraphs[0]
|
||||||
|
para.clear()
|
||||||
|
para.alignment = WD_ALIGN_PARAGRAPH.CENTER if i == 0 else WD_ALIGN_PARAGRAPH.LEFT
|
||||||
|
run = para.add_run(cell_text)
|
||||||
|
run.font.size = Pt(10)
|
||||||
|
run.font.bold = (i == 0)
|
||||||
|
run.font.name = 'Times New Roman'
|
||||||
|
_safe_set_eastasia(run, '宋体')
|
||||||
|
if i == 0:
|
||||||
|
_set_cell_bg(cell, 'D6E4F7') # 浅蓝表头
|
||||||
|
|
||||||
|
# 表格后空行
|
||||||
|
sp = doc.add_paragraph()
|
||||||
|
sp.paragraph_format.space_after = Pt(6)
|
||||||
|
|
||||||
|
|
||||||
|
def _add_plain_text(doc: Document, text: str):
|
||||||
|
"""添加普通文本段落(内部辅助)"""
|
||||||
|
for line in text.split('\n'):
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
p = doc.add_paragraph()
|
||||||
|
p.paragraph_format.first_line_indent = Pt(24)
|
||||||
|
p.paragraph_format.space_after = Pt(6)
|
||||||
|
_set_line_spacing_15(p)
|
||||||
|
run = p.add_run(line)
|
||||||
|
run.font.size = Pt(12)
|
||||||
|
run.font.name = 'Times New Roman'
|
||||||
|
_safe_set_eastasia(run, '宋体')
|
||||||
|
|
||||||
|
|
||||||
|
def _add_body_paragraphs(doc: Document, text: str):
|
||||||
|
"""
|
||||||
|
将正文文本分段渲染,自动识别并处理图示 [FIGURE:...] 和表格 [TABLE:...] 标记。
|
||||||
|
"""
|
||||||
|
for block in _split_content_blocks(text):
|
||||||
|
if block['type'] == 'figure':
|
||||||
|
_add_figure_block(doc, block['title'], block['content'])
|
||||||
|
elif block['type'] == 'table':
|
||||||
|
_add_word_table(doc, block['title'], block['content'])
|
||||||
|
else:
|
||||||
|
_add_plain_text(doc, block['content'])
|
||||||
|
|
||||||
|
|
||||||
1205
modules/generator.py
Normal file
1205
modules/generator.py
Normal file
File diff suppressed because it is too large
Load Diff
292
modules/knowledge.py
Normal file
292
modules/knowledge.py
Normal file
@ -0,0 +1,292 @@
|
|||||||
|
"""
|
||||||
|
企业知识库模块(无外部向量库依赖)
|
||||||
|
|
||||||
|
存储后端:SQLite(与主数据库共用同一文件)
|
||||||
|
- knowledge_vectors 表:文本块 + JSON 向量
|
||||||
|
- knowledge_files 表:文件元数据(已在 app.py init_db 中建立)
|
||||||
|
|
||||||
|
检索策略:
|
||||||
|
Qwen / OpenAI provider → Embedding API + 余弦相似度(语义检索)
|
||||||
|
DeepSeek / Ollama → SQL LIKE 关键词检索(降级)
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import math
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import threading
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import config
|
||||||
|
from utils.file_utils import extract_text, split_text_chunks
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# 正在后台入库的文件名集合(供前端轮询感知"处理中"状态)
|
||||||
|
_processing_files: set = set()
|
||||||
|
_processing_lock = threading.Lock()
|
||||||
|
|
||||||
|
# 每次 Embedding API 批量请求的块数(避免单次请求过大)
|
||||||
|
_EMBED_BATCH = 16
|
||||||
|
|
||||||
|
|
||||||
|
# ─── 数据库 ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _conn() -> sqlite3.Connection:
|
||||||
|
return sqlite3.connect(config.DB_PATH)
|
||||||
|
|
||||||
|
|
||||||
|
def _init_tables(cur: sqlite3.Cursor) -> None:
|
||||||
|
"""确保向量块表存在(knowledge_files 已由 app.py init_db 创建)"""
|
||||||
|
cur.execute('''
|
||||||
|
CREATE TABLE IF NOT EXISTS knowledge_vectors (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
file_name TEXT NOT NULL,
|
||||||
|
chunk_idx INTEGER NOT NULL,
|
||||||
|
text TEXT NOT NULL,
|
||||||
|
embedding TEXT,
|
||||||
|
UNIQUE(file_name, chunk_idx)
|
||||||
|
)
|
||||||
|
''')
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Embedding API ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _get_embeddings_batch(texts: list[str]) -> list[list[float] | None]:
|
||||||
|
"""
|
||||||
|
调用当前 provider 的 Embedding API,批量返回向量列表。
|
||||||
|
不支持 Embedding 的 provider(DeepSeek / Ollama)返回全 None 列表。
|
||||||
|
"""
|
||||||
|
if not texts:
|
||||||
|
return []
|
||||||
|
|
||||||
|
provider = getattr(config, 'MODEL_PROVIDER', '')
|
||||||
|
try:
|
||||||
|
from openai import OpenAI
|
||||||
|
if provider == 'qwen':
|
||||||
|
client = OpenAI(api_key=config.QWEN_API_KEY, base_url=config.QWEN_BASE_URL)
|
||||||
|
model = config.QWEN_EMBEDDING_MODEL
|
||||||
|
elif provider == 'openai':
|
||||||
|
client = OpenAI(api_key=config.OPENAI_API_KEY, base_url=config.OPENAI_BASE_URL)
|
||||||
|
model = config.OPENAI_EMBEDDING_MODEL
|
||||||
|
elif provider == 'kimi':
|
||||||
|
client = OpenAI(api_key=config.KIMI_API_KEY, base_url=config.KIMI_BASE_URL)
|
||||||
|
model = config.KIMI_EMBEDDING_MODEL
|
||||||
|
else:
|
||||||
|
# DeepSeek / Ollama / 豆包 无公开 Embedding API,降级到关键词检索
|
||||||
|
return [None] * len(texts)
|
||||||
|
|
||||||
|
resp = client.embeddings.create(input=texts, model=model)
|
||||||
|
return [item.embedding for item in resp.data]
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f'Embedding API 调用失败,将使用关键词检索降级: {e}')
|
||||||
|
return [None] * len(texts)
|
||||||
|
|
||||||
|
|
||||||
|
def _cosine(a: list[float], b: list[float]) -> float:
|
||||||
|
"""纯 Python 余弦相似度,无需 numpy"""
|
||||||
|
dot = sum(x * y for x, y in zip(a, b))
|
||||||
|
na = math.sqrt(sum(x * x for x in a))
|
||||||
|
nb = math.sqrt(sum(x * x for x in b))
|
||||||
|
return dot / (na * nb) if na and nb else 0.0
|
||||||
|
|
||||||
|
|
||||||
|
# ─── 公开接口 ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def is_available() -> dict:
|
||||||
|
"""
|
||||||
|
知识库始终可用(无外部依赖),返回当前状态。
|
||||||
|
search_mode: 'vector'(语义检索)或 'keyword'(关键词降级)
|
||||||
|
"""
|
||||||
|
with _processing_lock:
|
||||||
|
processing = list(_processing_files)
|
||||||
|
|
||||||
|
try:
|
||||||
|
db = _conn()
|
||||||
|
cur = db.cursor()
|
||||||
|
_init_tables(cur)
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
cur.execute('SELECT COUNT(*) FROM knowledge_vectors')
|
||||||
|
doc_count = cur.fetchone()[0]
|
||||||
|
|
||||||
|
# 判断是否已有向量(即 Embedding API 是否可用过)
|
||||||
|
cur.execute('SELECT 1 FROM knowledge_vectors WHERE embedding IS NOT NULL LIMIT 1')
|
||||||
|
has_embedding = cur.fetchone() is not None
|
||||||
|
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
provider = getattr(config, 'MODEL_PROVIDER', '')
|
||||||
|
can_embed = provider in ('qwen', 'openai', 'kimi')
|
||||||
|
mode = 'vector' if (has_embedding or can_embed) else 'keyword'
|
||||||
|
|
||||||
|
return {
|
||||||
|
'available': True,
|
||||||
|
'doc_count': doc_count,
|
||||||
|
'processing': processing,
|
||||||
|
'search_mode': mode,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
return {
|
||||||
|
'available': True,
|
||||||
|
'doc_count': 0,
|
||||||
|
'processing': processing,
|
||||||
|
'search_mode': 'keyword',
|
||||||
|
'error': str(e),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def add_file(file_path: str, db_path: str) -> dict:
|
||||||
|
"""
|
||||||
|
将文件切块 → 批量 Embedding → 写入 SQLite。
|
||||||
|
此函数在后台线程中调用,_processing_files 用于前端感知进度。
|
||||||
|
"""
|
||||||
|
file_name = os.path.basename(file_path)
|
||||||
|
with _processing_lock:
|
||||||
|
_processing_files.add(file_name)
|
||||||
|
|
||||||
|
try:
|
||||||
|
text = extract_text(file_path)
|
||||||
|
chunks = split_text_chunks(text, config.CHUNK_SIZE, config.CHUNK_OVERLAP)
|
||||||
|
if not chunks:
|
||||||
|
return {'success': False, 'error': '文件内容为空,无法入库'}
|
||||||
|
|
||||||
|
# 批量获取 Embedding(Qwen/OpenAI provider 有效;否则全 None)
|
||||||
|
embeddings: list[list[float] | None] = []
|
||||||
|
for i in range(0, len(chunks), _EMBED_BATCH):
|
||||||
|
batch = chunks[i:i + _EMBED_BATCH]
|
||||||
|
embeddings.extend(_get_embeddings_batch(batch))
|
||||||
|
|
||||||
|
db = _conn()
|
||||||
|
try:
|
||||||
|
cur = db.cursor()
|
||||||
|
_init_tables(cur)
|
||||||
|
|
||||||
|
# 先删除同名文件的旧数据
|
||||||
|
cur.execute('DELETE FROM knowledge_vectors WHERE file_name=?', (file_name,))
|
||||||
|
|
||||||
|
for idx, (chunk, emb) in enumerate(zip(chunks, embeddings)):
|
||||||
|
emb_json = json.dumps(emb) if emb is not None else None
|
||||||
|
cur.execute(
|
||||||
|
'INSERT INTO knowledge_vectors (file_name, chunk_idx, text, embedding) VALUES (?,?,?,?)',
|
||||||
|
(file_name, idx, chunk, emb_json),
|
||||||
|
)
|
||||||
|
|
||||||
|
cur.execute('''
|
||||||
|
INSERT OR REPLACE INTO knowledge_files (file_name, file_path, chunk_count, added_at)
|
||||||
|
VALUES (?, ?, ?, ?)
|
||||||
|
''', (file_name, file_path, len(chunks), datetime.now()))
|
||||||
|
|
||||||
|
db.commit()
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
logger.info(f'知识库入库完成: {file_name},{len(chunks)} 块'
|
||||||
|
f'{"(含向量)" if any(e is not None for e in embeddings) else "(关键词模式)"}')
|
||||||
|
return {'success': True, 'chunks': len(chunks)}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception('知识库添加文件失败')
|
||||||
|
return {'success': False, 'error': str(e)}
|
||||||
|
finally:
|
||||||
|
with _processing_lock:
|
||||||
|
_processing_files.discard(file_name)
|
||||||
|
|
||||||
|
|
||||||
|
def search(query: str, top_k: int = None) -> list[str]:
|
||||||
|
"""
|
||||||
|
从知识库检索与 query 最相关的文本块。
|
||||||
|
- 向量模式:获取 query 的 Embedding → 余弦相似度排序
|
||||||
|
- 关键词模式(降级):SQL LIKE 多词匹配
|
||||||
|
"""
|
||||||
|
if top_k is None:
|
||||||
|
top_k = config.TOP_K_KNOWLEDGE
|
||||||
|
|
||||||
|
try:
|
||||||
|
db = _conn()
|
||||||
|
try:
|
||||||
|
cur = db.cursor()
|
||||||
|
_init_tables(cur)
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
cur.execute('SELECT COUNT(*) FROM knowledge_vectors')
|
||||||
|
if cur.fetchone()[0] == 0:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# ── 向量语义检索 ──────────────────────────────────────────────────
|
||||||
|
q_embs = _get_embeddings_batch([query])
|
||||||
|
q_emb = q_embs[0] if q_embs else None
|
||||||
|
|
||||||
|
if q_emb is not None:
|
||||||
|
cur.execute(
|
||||||
|
'SELECT text, embedding FROM knowledge_vectors WHERE embedding IS NOT NULL'
|
||||||
|
)
|
||||||
|
rows = cur.fetchall()
|
||||||
|
if rows:
|
||||||
|
scored: list[tuple[float, str]] = []
|
||||||
|
for text, emb_json in rows:
|
||||||
|
try:
|
||||||
|
emb = json.loads(emb_json)
|
||||||
|
scored.append((_cosine(q_emb, emb), text))
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
scored.sort(reverse=True)
|
||||||
|
return [t for _, t in scored[:top_k]]
|
||||||
|
|
||||||
|
# ── 关键词降级检索(DeepSeek / Ollama 无 Embedding API)─────────
|
||||||
|
# 过滤纯数字/编号词(如 "1.2" "一、"),避免误匹配无关段落
|
||||||
|
import re as _re
|
||||||
|
_num_pat = _re.compile(r'^[\d\.\-、一二三四五六七八九十]+$')
|
||||||
|
words = [
|
||||||
|
w.strip() for w in query.split()
|
||||||
|
if len(w.strip()) > 1 and not _num_pat.match(w.strip())
|
||||||
|
][:6]
|
||||||
|
if not words:
|
||||||
|
cur.execute('SELECT text FROM knowledge_vectors LIMIT ?', (top_k,))
|
||||||
|
return [r[0] for r in cur.fetchall()]
|
||||||
|
|
||||||
|
conditions = ' OR '.join(['text LIKE ?' for _ in words])
|
||||||
|
params = [f'%{w}%' for w in words] + [top_k]
|
||||||
|
cur.execute(
|
||||||
|
f'SELECT text FROM knowledge_vectors WHERE {conditions} LIMIT ?', params
|
||||||
|
)
|
||||||
|
return [r[0] for r in cur.fetchall()]
|
||||||
|
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f'知识库检索失败: {e}')
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def list_files(db_path: str) -> list[dict]:
|
||||||
|
"""列出知识库已入库的文件"""
|
||||||
|
try:
|
||||||
|
db = sqlite3.connect(db_path)
|
||||||
|
cur = db.cursor()
|
||||||
|
cur.execute(
|
||||||
|
'SELECT file_name, chunk_count, added_at FROM knowledge_files ORDER BY added_at DESC'
|
||||||
|
)
|
||||||
|
rows = cur.fetchall()
|
||||||
|
db.close()
|
||||||
|
return [{'name': r[0], 'chunks': r[1], 'added_at': r[2]} for r in rows]
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def delete_file(file_name: str, db_path: str) -> dict:
|
||||||
|
"""从知识库删除指定文件的所有数据"""
|
||||||
|
try:
|
||||||
|
db = _conn()
|
||||||
|
cur = db.cursor()
|
||||||
|
_init_tables(cur)
|
||||||
|
cur.execute('DELETE FROM knowledge_vectors WHERE file_name=?', (file_name,))
|
||||||
|
cur.execute('DELETE FROM knowledge_files WHERE file_name=?', (file_name,))
|
||||||
|
db.commit()
|
||||||
|
db.close()
|
||||||
|
return {'success': True}
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception('知识库删除文件失败')
|
||||||
|
return {'success': False, 'error': str(e)}
|
||||||
179
modules/parser.py
Normal file
179
modules/parser.py
Normal file
@ -0,0 +1,179 @@
|
|||||||
|
"""
|
||||||
|
招标文件解析模块
|
||||||
|
流程:提取文本 → 生成摘要 → 提取评分要求 → 结构化JSON
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import sqlite3
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from utils import ai_client, prompts as P
|
||||||
|
from utils.file_utils import extract_text, truncate_text
|
||||||
|
from utils.tender_kind_sections import (
|
||||||
|
get_tender_kind_classify_prompt,
|
||||||
|
parse_tender_kind_response,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_boq_file(db_path: str, project_id: int, file_path: str, file_name: str) -> None:
|
||||||
|
"""
|
||||||
|
后台线程:解析工程量清单文件 → 本地结构化分析 → AI 摘要 → 写库。
|
||||||
|
boq_status: none → parsing → done / error
|
||||||
|
"""
|
||||||
|
from utils.bill_analysis import analyze_boq_pages, categories_to_prompt_appendix
|
||||||
|
from utils.boq_parser import extract_boq_pages
|
||||||
|
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
try:
|
||||||
|
_set_boq_status(conn, project_id, 'parsing', '正在提取工程量清单文本...')
|
||||||
|
|
||||||
|
page_texts = extract_boq_pages(file_path)
|
||||||
|
boq_text = '\n'.join(page_texts).strip()
|
||||||
|
if not boq_text:
|
||||||
|
raise ValueError('未能从文件中提取到有效内容,请检查文件格式')
|
||||||
|
|
||||||
|
_set_boq_status(conn, project_id, 'parsing', '正在本地解析清单结构...')
|
||||||
|
analysis = analyze_boq_pages(page_texts)
|
||||||
|
boq_analysis_json = json.dumps(analysis, ensure_ascii=False)
|
||||||
|
|
||||||
|
structured = ''
|
||||||
|
if not analysis.get('scanned') and not analysis.get('no_bill_pages'):
|
||||||
|
structured = categories_to_prompt_appendix(analysis)
|
||||||
|
|
||||||
|
_set_boq_status(conn, project_id, 'parsing', '正在生成工程量清单摘要...')
|
||||||
|
|
||||||
|
summary_prompt = P.get_boq_summary_prompt(boq_text[:10000], structured)
|
||||||
|
boq_summary = ai_client.chat(summary_prompt, temperature=0.2, max_tokens=2048)
|
||||||
|
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute('''
|
||||||
|
UPDATE tender_data
|
||||||
|
SET boq_file_name=?, boq_text=?, boq_summary=?, boq_analysis_json=?,
|
||||||
|
boq_status='done', boq_error='', updated_at=?
|
||||||
|
WHERE project_id=?
|
||||||
|
''', (file_name, boq_text[:12000], boq_summary, boq_analysis_json, datetime.now(), project_id))
|
||||||
|
conn.commit()
|
||||||
|
logger.info(f'项目 {project_id} 工程量清单解析完成')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f'工程量清单解析失败 project_id={project_id}')
|
||||||
|
_set_boq_status(conn, project_id, 'error', str(e))
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _set_boq_status(conn, project_id, status, message=''):
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute('''
|
||||||
|
UPDATE tender_data SET boq_status=?, boq_error=?, updated_at=?
|
||||||
|
WHERE project_id=?
|
||||||
|
''', (status, message, datetime.now(), project_id))
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def parse_tender_file(db_path: str, project_id: int, file_path: str, file_name: str) -> None:
|
||||||
|
"""
|
||||||
|
后台线程中运行:解析招标文件并将结果写入数据库。
|
||||||
|
status 字段:pending → parsing → done / error
|
||||||
|
"""
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
try:
|
||||||
|
_set_status(conn, project_id, 'parsing', '正在提取文件文本...')
|
||||||
|
|
||||||
|
# 1. 提取原始文本
|
||||||
|
raw_text = extract_text(file_path)
|
||||||
|
raw_text = truncate_text(raw_text, 60000)
|
||||||
|
|
||||||
|
_set_status(conn, project_id, 'parsing', '正在生成招标摘要...')
|
||||||
|
|
||||||
|
# 2. 生成结构化摘要
|
||||||
|
summary_prompt = P.get_project_summary_prompt(raw_text)
|
||||||
|
summary = ai_client.chat(summary_prompt, temperature=0.3, max_tokens=4096)
|
||||||
|
|
||||||
|
_set_status(conn, project_id, 'parsing', '正在提取技术评分要求...')
|
||||||
|
|
||||||
|
# 3. 提取技术评分要求(Markdown 格式)
|
||||||
|
rating_prompt = P.get_rating_requirements_prompt(raw_text)
|
||||||
|
rating_md = ai_client.chat(rating_prompt, temperature=0.2, max_tokens=4096)
|
||||||
|
|
||||||
|
_set_status(conn, project_id, 'parsing', '正在结构化评分数据...')
|
||||||
|
|
||||||
|
# 4. 将评分要求转换为 JSON
|
||||||
|
rating_json_prompt = P.get_rating_json_prompt(rating_md)
|
||||||
|
rating_json_raw = ai_client.chat(rating_json_prompt, temperature=0.1, max_tokens=2048)
|
||||||
|
rating_json_str = _clean_json(rating_json_raw)
|
||||||
|
|
||||||
|
_set_status(conn, project_id, 'parsing', '正在识别招标文件类型(工程/服务/货物)...')
|
||||||
|
excerpt = (raw_text or '')[:15000]
|
||||||
|
kind_prompt = get_tender_kind_classify_prompt(excerpt)
|
||||||
|
kind_raw = ai_client.chat(kind_prompt, temperature=0.1, max_tokens=32)
|
||||||
|
tender_kind = parse_tender_kind_response(kind_raw)
|
||||||
|
logger.info(f'项目 {project_id} 招标文件类型识别为: {tender_kind}')
|
||||||
|
|
||||||
|
# 写入数据库
|
||||||
|
_upsert_tender_data(conn, project_id, file_name, raw_text,
|
||||||
|
summary, rating_md, rating_json_str, tender_kind)
|
||||||
|
_set_status(conn, project_id, 'done', '解析完成')
|
||||||
|
logger.info(f'项目 {project_id} 招标文件解析完成')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f'解析失败 project_id={project_id}')
|
||||||
|
_set_status(conn, project_id, 'error', str(e))
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
# ─── 内部工具 ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _set_status(conn, project_id, status, message=''):
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute('''
|
||||||
|
INSERT INTO tender_data (project_id, status, error_message)
|
||||||
|
VALUES (?, ?, ?)
|
||||||
|
ON CONFLICT(project_id) DO UPDATE SET status=?, error_message=?, updated_at=?
|
||||||
|
''', (project_id, status, message, status, message, datetime.now()))
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def _upsert_tender_data(conn, project_id, file_name, raw_text,
|
||||||
|
summary, rating_md, rating_json_str,
|
||||||
|
tender_kind: str = 'engineering'):
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute('''
|
||||||
|
INSERT INTO tender_data
|
||||||
|
(project_id, file_name, raw_text, summary, rating_requirements, rating_json,
|
||||||
|
tender_kind, status, error_message)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, 'done', '')
|
||||||
|
ON CONFLICT(project_id) DO UPDATE SET
|
||||||
|
file_name=?, raw_text=?, summary=?, rating_requirements=?,
|
||||||
|
rating_json=?, tender_kind=?, status='done', error_message='', updated_at=?
|
||||||
|
''', (
|
||||||
|
project_id, file_name, raw_text, summary, rating_md, rating_json_str, tender_kind,
|
||||||
|
file_name, raw_text, summary, rating_md, rating_json_str, tender_kind, datetime.now()
|
||||||
|
))
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def _clean_json(raw: str) -> str:
|
||||||
|
"""尝试从 AI 返回中提取 JSON 字符串"""
|
||||||
|
# 去除 markdown 代码块
|
||||||
|
raw = re.sub(r'```(?:json)?\s*', '', raw)
|
||||||
|
raw = raw.replace('```', '').strip()
|
||||||
|
# 验证是否是有效 JSON
|
||||||
|
try:
|
||||||
|
json.loads(raw)
|
||||||
|
return raw
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# 尝试提取 { ... } 部分
|
||||||
|
m = re.search(r'\{[\s\S]*\}', raw)
|
||||||
|
if m:
|
||||||
|
candidate = m.group(0)
|
||||||
|
try:
|
||||||
|
json.loads(candidate)
|
||||||
|
return candidate
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return raw
|
||||||
36
prompts/chapter_outline.txt
Normal file
36
prompts/chapter_outline.txt
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
- 角色:技术标书架构师
|
||||||
|
|
||||||
|
- 能力:
|
||||||
|
- 单章节深度解构能力
|
||||||
|
- 跨章节协同规划视野
|
||||||
|
- 评分权重动态分配策略
|
||||||
|
|
||||||
|
- 任务:根据招标文件概要、章节主题、评分要求,生成结构化的技术标书该章节的目录
|
||||||
|
|
||||||
|
- 输出要求:
|
||||||
|
- 采用四级嵌套编码体系(X.X.X.X)确保章节颗粒度可控
|
||||||
|
- 直接给出生成的章节大纲,禁止解释和引导词
|
||||||
|
- markdown格式输出
|
||||||
|
|
||||||
|
|
||||||
|
- 示例输出,以"服务进度保障措施"为例:
|
||||||
|
二、智慧物流系统全生命周期进度保障
|
||||||
|
2.1 基于BIM的进度协同管理平台
|
||||||
|
2.1.1 多级进度计划耦合模型
|
||||||
|
2.1.1.1 WBS-Milestone映射矩阵
|
||||||
|
2.1.1.2 Primavera P6进度基线
|
||||||
|
2.1.2 资源约束进度优化算法
|
||||||
|
2.1.2.1 基于CPM的缓冲区间动态分配
|
||||||
|
2.1.2.2 资源平滑度R=0.92
|
||||||
|
|
||||||
|
- 招标文件概要:
|
||||||
|
{summary}
|
||||||
|
|
||||||
|
- 章节主题:
|
||||||
|
{chapter}
|
||||||
|
|
||||||
|
- 评分要求:
|
||||||
|
{score}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
158
prompts/outlines.txt
Normal file
158
prompts/outlines.txt
Normal file
@ -0,0 +1,158 @@
|
|||||||
|
- 角色:技术标书架构师
|
||||||
|
- 任务:生成适配技术评分标准的技术标书目录
|
||||||
|
- 输出要求:
|
||||||
|
采用四级嵌套编码体系(X.X.X.X)下实现按需分层
|
||||||
|
直接给出生成的目录,禁止解释和引导词
|
||||||
|
|
||||||
|
- 约束控制:
|
||||||
|
根据项目生成标书的名称,如“XXXX项目技术标书”
|
||||||
|
总的章节数应该控制在8-10个
|
||||||
|
章节颗粒度与评分指标权重正相关
|
||||||
|
技术实施类章节必须达到四级深度,管理保障类章节允许三级结构
|
||||||
|
同级节点数量必须有波动区间:技术方案类(4-7)、实施保障类(2-4)、创新应用类(1-3)
|
||||||
|
目录的章节不能缺少包含以下关键词的内容:
|
||||||
|
- 对本项目的了解和分析
|
||||||
|
- 项目工作重难点分析
|
||||||
|
- 项目实施方案
|
||||||
|
- 服务进度保障措施
|
||||||
|
- 服务质量保障方案
|
||||||
|
- 合理化建议
|
||||||
|
- 服务承诺及处罚措施
|
||||||
|
目录不包含成本和预算内容,但要平衡项目预算、技术可行性以及技术的专业度
|
||||||
|
|
||||||
|
- 示例输出:
|
||||||
|
<example>
|
||||||
|
花岭新城BIM项目技术标书
|
||||||
|
一、总体实施方案
|
||||||
|
1.1 项目理解与需求分析
|
||||||
|
1.1.1 项目概述
|
||||||
|
1.1.1.1 建设地点及规模
|
||||||
|
1.1.1.2 工程地质勘察报告
|
||||||
|
1.1.1.3 抗震设防烈度与防火等级
|
||||||
|
1.1.1.4 建筑结构形式与建筑面积分布
|
||||||
|
1.1.2 项目背景
|
||||||
|
1.1.2.1 核心宗旨与目标
|
||||||
|
1.1.2.2 地理位置与项目规模
|
||||||
|
1.1.3 项目目标
|
||||||
|
1.1.3.1 就业机会与基础设施提升
|
||||||
|
1.1.3.2 乡村振兴与经济增长
|
||||||
|
1.1.4 项目特点
|
||||||
|
1.1.4.1 框筒结构抗震性能
|
||||||
|
1.1.4.2 分阶段工程地质勘察
|
||||||
|
1.1.4.3 功能区域多样化
|
||||||
|
|
||||||
|
二、建筑设计
|
||||||
|
2.1 主要设计依据
|
||||||
|
2.1.1 国家标准与规范
|
||||||
|
2.1.2 行业标准与图集
|
||||||
|
2.2 建筑结构设计
|
||||||
|
2.2.1 结构形式
|
||||||
|
2.2.2 结构材料
|
||||||
|
2.2.3 结构布局
|
||||||
|
2.2.4 结构经济指标
|
||||||
|
2.2.5 结构细节设计
|
||||||
|
2.3 建筑功能布局
|
||||||
|
2.3.1 C1#楼(厂房)
|
||||||
|
2.3.1.1 功能分区明确
|
||||||
|
2.3.1.2 流线优化与安全性
|
||||||
|
2.3.2 配电房
|
||||||
|
2.3.2.1 设计目标与设备布置
|
||||||
|
2.3.2.2 空间规划与电气主接线方案
|
||||||
|
2.3.3 外廊及架空建筑
|
||||||
|
2.3.3.1 功能区域与景观设计
|
||||||
|
2.3.3.2 光照与通风优化
|
||||||
|
2.4 建筑材料选用
|
||||||
|
2.5 建筑外观设计
|
||||||
|
2.6 建筑室内布局
|
||||||
|
2.6.1 功能分区与设计要点
|
||||||
|
2.7 建筑安全和消防设计
|
||||||
|
2.7.1 建筑安全体系
|
||||||
|
2.7.2 消防系统设计
|
||||||
|
2.8 建筑节能设计
|
||||||
|
2.8.1 节能措施与绿色建材
|
||||||
|
2.8.2 雨水收集系统
|
||||||
|
|
||||||
|
三、结构设计
|
||||||
|
3.1 结构形式
|
||||||
|
3.2 结构材料
|
||||||
|
3.2.1 混凝土与钢材选用
|
||||||
|
3.3 结构布局
|
||||||
|
3.3.1 结构柱网与通风疏散通道
|
||||||
|
3.4 结构经济指标
|
||||||
|
3.4.1 抗震设计要求与用材控制
|
||||||
|
3.5 结构细节设计
|
||||||
|
3.5.1 基础设计与钢结构细节
|
||||||
|
3.5.2 混凝土结构与抗震设计
|
||||||
|
3.6 结构分析与计算
|
||||||
|
|
||||||
|
四、给排水设计
|
||||||
|
4.1 引言
|
||||||
|
4.2 供水系统设计
|
||||||
|
4.2.1 供水管道与消防水源
|
||||||
|
4.2.2 节水设计与雨水收集
|
||||||
|
4.3 排水系统设计
|
||||||
|
4.3.1 排水管道与雨水管理
|
||||||
|
4.3.2 污水处理与分流制度
|
||||||
|
4.4 给排水设备选择
|
||||||
|
4.5 细节设计
|
||||||
|
4.6 监测与维护
|
||||||
|
|
||||||
|
五、暖通设计
|
||||||
|
5.1 引言
|
||||||
|
5.2 供暖系统设计
|
||||||
|
5.2.1 供暖方式与设备选择
|
||||||
|
5.2.2 温度控制系统
|
||||||
|
5.3 通风系统设计
|
||||||
|
5.3.1 通风方式与设备选择
|
||||||
|
5.3.2 空气质量控制
|
||||||
|
5.4 空调系统设计
|
||||||
|
5.4.1 空调方式与设备选择
|
||||||
|
5.4.2 温湿度控制系统
|
||||||
|
5.5 热水系统设计
|
||||||
|
5.6 细节设计与监测维护
|
||||||
|
|
||||||
|
|
||||||
|
六、BIM设计
|
||||||
|
6.1 项目总图与单体建筑设计
|
||||||
|
6.2 道路与排水设计
|
||||||
|
6.3 电气系统设计
|
||||||
|
6.4 绿化设计
|
||||||
|
6.5 BIM协同设计与施工管理
|
||||||
|
6.6 数据管理与培训支持
|
||||||
|
|
||||||
|
七、设计说明
|
||||||
|
7.1 项目设计依据
|
||||||
|
7.2 设计原则
|
||||||
|
7.3 结构经济合理化
|
||||||
|
7.4 建筑功能分区
|
||||||
|
7.5 设计细节要求
|
||||||
|
|
||||||
|
八、合理化建议
|
||||||
|
8.1 建筑专业合理化建议
|
||||||
|
8.2 结构专业合理化建议
|
||||||
|
8.3 给排水专业合理化建议
|
||||||
|
8.4 暖通专业合理化建议
|
||||||
|
8.5 BIM专业合理化建议
|
||||||
|
8.6 技术和工艺方面的建议
|
||||||
|
8.7 成本和预算方面的建议
|
||||||
|
8.8 时间和进度方面的建议
|
||||||
|
8.9 施工质量管理方面的建议
|
||||||
|
8.10 质量和安全方面的建议
|
||||||
|
8.11 环境和可持续性方面的建议
|
||||||
|
|
||||||
|
九、施工进度安排
|
||||||
|
9.1 施工进度安排
|
||||||
|
9.2 施工进度跟踪与管理
|
||||||
|
9.3 施工质量管理
|
||||||
|
9.4 施工现场管理
|
||||||
|
9.5 施工结项与验收
|
||||||
|
|
||||||
|
十、本项目工作重点难点分析
|
||||||
|
10.1 工程特点与设计工作难点
|
||||||
|
10.2 重点与难点分析
|
||||||
|
10.3 综合解决措施
|
||||||
|
</example>
|
||||||
|
|
||||||
|
- 招标文件内容:
|
||||||
|
{document_text}
|
||||||
|
"""
|
||||||
155
prompts/outlines_with_rating.txt
Normal file
155
prompts/outlines_with_rating.txt
Normal file
@ -0,0 +1,155 @@
|
|||||||
|
- 角色:技术标书架构师
|
||||||
|
- 任务:生成适配技术评分标准的技术标书目录
|
||||||
|
- 输出要求:
|
||||||
|
采用四级嵌套编码体系(X.X.X.X)下实现按需分层
|
||||||
|
直接给出生成的目录,禁止解释和引导词
|
||||||
|
|
||||||
|
- 约束控制:
|
||||||
|
根据项目生成标书的名称,如“XXXX项目技术标书”
|
||||||
|
总的章节数应该控制在8-10个,不超过10个
|
||||||
|
目录的章节必须按照技术评分标准的项目生成,题目应包括技术评分项目中的关键词:
|
||||||
|
章节颗粒度与评分指标权重正相关
|
||||||
|
技术方案类章节必须达到四级深度,管理保障类章节允许三级结构
|
||||||
|
同级节点数量必须有波动区间:技术方案类(4-7)、实施保障类(2-4)、创新应用类(1-3)
|
||||||
|
目录禁止包含报价、团队、资质、文件等商务性质的章节
|
||||||
|
|
||||||
|
- 示例输出:
|
||||||
|
<example>
|
||||||
|
花岭新城BIM项目技术标书
|
||||||
|
一、总体实施方案
|
||||||
|
1.1 项目理解与需求分析
|
||||||
|
1.1.1 项目概述
|
||||||
|
1.1.1.1 建设地点及规模
|
||||||
|
1.1.1.2 工程地质勘察报告
|
||||||
|
1.1.1.3 抗震设防烈度与防火等级
|
||||||
|
1.1.1.4 建筑结构形式与建筑面积分布
|
||||||
|
1.1.2 项目背景
|
||||||
|
1.1.2.1 核心宗旨与目标
|
||||||
|
1.1.2.2 地理位置与项目规模
|
||||||
|
1.1.3 项目目标
|
||||||
|
1.1.3.1 就业机会与基础设施提升
|
||||||
|
1.1.3.2 乡村振兴与经济增长
|
||||||
|
1.1.4 项目特点
|
||||||
|
1.1.4.1 框筒结构抗震性能
|
||||||
|
1.1.4.2 分阶段工程地质勘察
|
||||||
|
1.1.4.3 功能区域多样化
|
||||||
|
|
||||||
|
二、建筑设计
|
||||||
|
2.1 主要设计依据
|
||||||
|
2.1.1 国家标准与规范
|
||||||
|
2.1.2 行业标准与图集
|
||||||
|
2.2 建筑结构设计
|
||||||
|
2.2.1 结构形式
|
||||||
|
2.2.2 结构材料
|
||||||
|
2.2.3 结构布局
|
||||||
|
2.2.4 结构经济指标
|
||||||
|
2.2.5 结构细节设计
|
||||||
|
2.3 建筑功能布局
|
||||||
|
2.3.1 C1#楼(厂房)
|
||||||
|
2.3.1.1 功能分区明确
|
||||||
|
2.3.1.2 流线优化与安全性
|
||||||
|
2.3.2 配电房
|
||||||
|
2.3.2.1 设计目标与设备布置
|
||||||
|
2.3.2.2 空间规划与电气主接线方案
|
||||||
|
2.3.3 外廊及架空建筑
|
||||||
|
2.3.3.1 功能区域与景观设计
|
||||||
|
2.3.3.2 光照与通风优化
|
||||||
|
2.4 建筑材料选用
|
||||||
|
2.5 建筑外观设计
|
||||||
|
2.6 建筑室内布局
|
||||||
|
2.6.1 功能分区与设计要点
|
||||||
|
2.7 建筑安全和消防设计
|
||||||
|
2.7.1 建筑安全体系
|
||||||
|
2.7.2 消防系统设计
|
||||||
|
2.8 建筑节能设计
|
||||||
|
2.8.1 节能措施与绿色建材
|
||||||
|
2.8.2 雨水收集系统
|
||||||
|
|
||||||
|
三、结构设计
|
||||||
|
3.1 结构形式
|
||||||
|
3.2 结构材料
|
||||||
|
3.2.1 混凝土与钢材选用
|
||||||
|
3.3 结构布局
|
||||||
|
3.3.1 结构柱网与通风疏散通道
|
||||||
|
3.4 结构经济指标
|
||||||
|
3.4.1 抗震设计要求与用材控制
|
||||||
|
3.5 结构细节设计
|
||||||
|
3.5.1 基础设计与钢结构细节
|
||||||
|
3.5.2 混凝土结构与抗震设计
|
||||||
|
3.6 结构分析与计算
|
||||||
|
|
||||||
|
四、给排水设计
|
||||||
|
4.1 引言
|
||||||
|
4.2 供水系统设计
|
||||||
|
4.2.1 供水管道与消防水源
|
||||||
|
4.2.2 节水设计与雨水收集
|
||||||
|
4.3 排水系统设计
|
||||||
|
4.3.1 排水管道与雨水管理
|
||||||
|
4.3.2 污水处理与分流制度
|
||||||
|
4.4 给排水设备选择
|
||||||
|
4.5 细节设计
|
||||||
|
4.6 监测与维护
|
||||||
|
|
||||||
|
五、暖通设计
|
||||||
|
5.1 引言
|
||||||
|
5.2 供暖系统设计
|
||||||
|
5.2.1 供暖方式与设备选择
|
||||||
|
5.2.2 温度控制系统
|
||||||
|
5.3 通风系统设计
|
||||||
|
5.3.1 通风方式与设备选择
|
||||||
|
5.3.2 空气质量控制
|
||||||
|
5.4 空调系统设计
|
||||||
|
5.4.1 空调方式与设备选择
|
||||||
|
5.4.2 温湿度控制系统
|
||||||
|
5.5 热水系统设计
|
||||||
|
5.6 细节设计与监测维护
|
||||||
|
|
||||||
|
|
||||||
|
六、BIM设计
|
||||||
|
6.1 项目总图与单体建筑设计
|
||||||
|
6.2 道路与排水设计
|
||||||
|
6.3 电气系统设计
|
||||||
|
6.4 绿化设计
|
||||||
|
6.5 BIM协同设计与施工管理
|
||||||
|
6.6 数据管理与培训支持
|
||||||
|
|
||||||
|
七、设计说明
|
||||||
|
7.1 项目设计依据
|
||||||
|
7.2 设计原则
|
||||||
|
7.3 结构经济合理化
|
||||||
|
7.4 建筑功能分区
|
||||||
|
7.5 设计细节要求
|
||||||
|
|
||||||
|
八、合理化建议
|
||||||
|
8.1 建筑专业合理化建议
|
||||||
|
8.2 结构专业合理化建议
|
||||||
|
8.3 给排水专业合理化建议
|
||||||
|
8.4 暖通专业合理化建议
|
||||||
|
8.5 BIM专业合理化建议
|
||||||
|
8.6 技术和工艺方面的建议
|
||||||
|
8.7 成本和预算方面的建议
|
||||||
|
8.8 时间和进度方面的建议
|
||||||
|
8.9 施工质量管理方面的建议
|
||||||
|
8.10 质量和安全方面的建议
|
||||||
|
8.11 环境和可持续性方面的建议
|
||||||
|
|
||||||
|
九、施工进度安排
|
||||||
|
9.1 施工进度安排
|
||||||
|
9.2 施工进度跟踪与管理
|
||||||
|
9.3 施工质量管理
|
||||||
|
9.4 施工现场管理
|
||||||
|
9.5 施工结项与验收
|
||||||
|
|
||||||
|
十、本项目工作重点难点分析
|
||||||
|
10.1 工程特点与设计工作难点
|
||||||
|
10.2 重点与难点分析
|
||||||
|
10.3 综合解决措施
|
||||||
|
</example>
|
||||||
|
|
||||||
|
- 招标文件摘要:
|
||||||
|
{summary}
|
||||||
|
|
||||||
|
- 技术评分标准:
|
||||||
|
{rating}
|
||||||
|
|
||||||
|
"""
|
||||||
92
prompts/project_summary.txt
Normal file
92
prompts/project_summary.txt
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
- 角色:招标文件编写专家,精通招标文件结构化、摘要编写
|
||||||
|
|
||||||
|
- 任务:根据用户提供的项目招标文件内容,生成一份专业、清晰的结构化摘要
|
||||||
|
|
||||||
|
- 要求:
|
||||||
|
|
||||||
|
一、摘要框架
|
||||||
|
1. 项目概况
|
||||||
|
- 项目名称
|
||||||
|
- 建设地点
|
||||||
|
- 工程性质(新建/改建/扩建)
|
||||||
|
- 核心建设内容
|
||||||
|
- 关键工程量指标
|
||||||
|
- 特殊施工工艺(如顶管/盾构等)
|
||||||
|
- 项目概况
|
||||||
|
|
||||||
|
2. 技术要求体系
|
||||||
|
- 专业监测要求(分项列出核心监测指标)
|
||||||
|
- 技术标准规范
|
||||||
|
- 质量管控要点
|
||||||
|
- 特殊工艺标准
|
||||||
|
|
||||||
|
3. 交付物矩阵
|
||||||
|
- 阶段性成果清单(含时间节点)
|
||||||
|
- 最终交付文件要求
|
||||||
|
- 成果验收标准
|
||||||
|
- 备案审批流程
|
||||||
|
|
||||||
|
4. 商务条款摘要
|
||||||
|
- 合同期限
|
||||||
|
- 支付结构
|
||||||
|
- 报价约束条件
|
||||||
|
- 违约条款要点
|
||||||
|
- 知识产权约定
|
||||||
|
|
||||||
|
5. 资质要求矩阵
|
||||||
|
- 企业资质门槛
|
||||||
|
- 人员资格要求
|
||||||
|
- 设备配置标准
|
||||||
|
- 同类项目经验
|
||||||
|
|
||||||
|
6. 评标要素体系
|
||||||
|
- 技术评分维度
|
||||||
|
- 商务评分权重
|
||||||
|
- 否决性条款
|
||||||
|
- 实质性条款
|
||||||
|
- 围标识别机制
|
||||||
|
|
||||||
|
|
||||||
|
二、处理规范
|
||||||
|
1. 信息抽取规则:
|
||||||
|
- 采用三级信息提炼法(关键数据→技术参数→约束条件)
|
||||||
|
- 识别并标注法定强制性条款(★号条款)
|
||||||
|
- 提取特殊工艺参数(例如顶管直径、沉井尺寸等)
|
||||||
|
|
||||||
|
2. 结构化呈现要求:
|
||||||
|
- 使用Markdown分级标题系统
|
||||||
|
- 技术参数格式化处理
|
||||||
|
- 流程节点采用时间轴呈现
|
||||||
|
- 关键数据突出显示(例如预算金额、最高限价)
|
||||||
|
|
||||||
|
3. 专业术语处理:
|
||||||
|
- 保持行业术语准确性
|
||||||
|
- 工程计量单位标准化转换
|
||||||
|
- 法律条款原文引述
|
||||||
|
|
||||||
|
三、输出示例
|
||||||
|
1.确保包含但不仅限于:
|
||||||
|
- 项目背景的技术参数分解
|
||||||
|
- 监测要求的分类归纳
|
||||||
|
- 成果交付的阶段性要求
|
||||||
|
- 商务条款的要点提炼
|
||||||
|
|
||||||
|
四、质量保障
|
||||||
|
1. 完整性核查清单:
|
||||||
|
- 验证五证要求(资质/业绩/人员/设备/资金)
|
||||||
|
- 检查三大核心条款(技术/商务/法律)
|
||||||
|
- 确认关键日期节点(工期/交付期/质保期)
|
||||||
|
|
||||||
|
2. 风险提示机制:
|
||||||
|
- 标注异常约束条款
|
||||||
|
- 识别排他性要求
|
||||||
|
- 提示潜在履约风险点
|
||||||
|
|
||||||
|
请严格按照上述结构化框架处理输入的招标文件,生成专业、准确、易读的项目摘要报告。
|
||||||
|
输出内容需符合工程领域专业规范,重点数据需二次核验确保准确性。
|
||||||
|
严格按照招标文件的内容,确保输出内容的完整性。
|
||||||
|
直接给出摘要,禁止说明和引导词。
|
||||||
|
|
||||||
|
- 用户提供的招标文件内容如下:
|
||||||
|
{bid_document}
|
||||||
|
|
||||||
23
prompts/rating_json.txt
Normal file
23
prompts/rating_json.txt
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
- 任务:从工程项目招标文件中提取技术评分要求,并以严格的JSON格式输出。
|
||||||
|
|
||||||
|
- 要求:
|
||||||
|
必须生成完整有效的JSON对象,不使用JSON之外的文本说明
|
||||||
|
数值类型字段不添加单位符号
|
||||||
|
包含所有的评分项及其权重分配
|
||||||
|
特殊说明字段仅在存在否决条款(强制性条款)时出现
|
||||||
|
|
||||||
|
- 输出结构(必须严格遵守根字段名与数组名,便于后续章节字数与要点映射):
|
||||||
|
{
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"id": "唯一短标识,如 T01",
|
||||||
|
"name": "评分项名称(与招标文件表述一致或精简概括)",
|
||||||
|
"weight": 数值型权重或分值(如 10 表示 10 分或 10%),
|
||||||
|
"keywords": ["与本项相关的可选关键词1", "关键词2"]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"notes": "可选:否决条款、阶梯得分等特殊说明;无则写空字符串"
|
||||||
|
}
|
||||||
|
|
||||||
|
- 技术评分要求内容如下:
|
||||||
|
{tech_rating}
|
||||||
46
prompts/rating_requirements copy.txt
Normal file
46
prompts/rating_requirements copy.txt
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
- 角色:招标文件信息提取专家,精通技术评分/技术评审要求的提取
|
||||||
|
|
||||||
|
- 任务:请严格按照以下步骤分析提供的招标文件内容,并完整提取所有技术评分标准:
|
||||||
|
|
||||||
|
- 步骤与要求:
|
||||||
|
|
||||||
|
1. **结构解析**
|
||||||
|
- 首先识别文件整体结构,仅提取“技术评分”/“技术评审”部分
|
||||||
|
- 标注评分大类的权重占比(如出现)
|
||||||
|
|
||||||
|
2. **要素提取**
|
||||||
|
对“技术评分”板块进行深度解析,要求:
|
||||||
|
- 提取评分的全部细节,不能省略
|
||||||
|
- 明确列出技术评分的标准,如有(如"ISO认证+3分"、"项目经验每年加1分")
|
||||||
|
|
||||||
|
3. **结果呈现样例**
|
||||||
|
参考以下示例输出markdown结构化格式:
|
||||||
|
|
||||||
|
# 招标技术评分细则
|
||||||
|
|
||||||
|
## 技术评分(80分)
|
||||||
|
- 对本项目的了解和分析(12分)
|
||||||
|
→ 对本项目的理解与项目背景把握准确,对本项目特点、实 施目标和定位内容详尽,完全满足项目需要,科学、合理、 针对性强、合理可行的,得 12 分; 对本项目的理解与项 目背景有一定把握,对本项目特点、实施目标和定位有阐 述说明,基本可行的,得 8 分;对本项目的理解与项目 背景把握片面,对本项目特点、实施目标和定位理解有较 大偏差,可行性较差的,得 4 分;未提供不得分。
|
||||||
|
→ 合理可行指:( 1)完全响应采购需求;( 2)相关内容的表述具有针对性,全面、具体。
|
||||||
|
→ 基本可行指:( 1)响应采购需求有微小偏差;( 2)相关 内容的表述有一定的层次性、针对性,但全面性不够。
|
||||||
|
→ 可行性较差指:( 1)响应采购需求有较大偏差;( 2)相 关内容的表述针对性弱、全面性方面欠缺较大。
|
||||||
|
- 项目工作重难点分析(12分)
|
||||||
|
→ 根据供应商针对本项目工作重难点分析与解决方案的科学性、合理性且满足项目实际情况进行评分,项目工作重 难点分析到位、有针对性、完全符合项目实际情况,对应 的解决方案合理可行的,得 12 分;
|
||||||
|
项目工作重难点内容 基本准确、针对性一般、基本符合项目实际,对应的解决 方案基本可行的,得 8 分;
|
||||||
|
项目工作重难点分析一般,对应的解决方案一般、可行性较差的,得 4 分;未提供 不得分。
|
||||||
|
→ 合理可行指:( 1)完全响应采购需求;( 2)相关内容的表述具有针对性,全面、具体。
|
||||||
|
→ 基本可行指:( 1)响应采购需求有微小偏差;( 2)相关 内容的表述有一定的层次性、针对性,但全面性不够。
|
||||||
|
→ 可行性较差指:( 1)响应采购需求有较大偏差;( 2)相 关内容的表述针对性弱、全面性方面欠缺较大。
|
||||||
|
- 项目实施方案(12分)
|
||||||
|
(继续展开...)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
请严格按照上述结构化框架处理输入的招标文件,生成专业、准确的项目技术评分/评审要求。
|
||||||
|
严格按照招标文件的内容,确保输出内容的完整性。
|
||||||
|
直接输出评分/评审要求,禁止说明和引导词。
|
||||||
|
|
||||||
|
- 招标文件内容如下:
|
||||||
|
{bid_document}
|
||||||
|
|
||||||
|
|
||||||
43
prompts/rating_requirements.txt
Normal file
43
prompts/rating_requirements.txt
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
- 角色:招标文件信息提取专家,精通技术评分/技术评审要求的提取
|
||||||
|
|
||||||
|
- 任务:请严格按照以下步骤分析提供的招标文件内容,并完整提取所有技术评分标准:
|
||||||
|
|
||||||
|
- 步骤与要求:
|
||||||
|
|
||||||
|
1. **结构解析**
|
||||||
|
- 首先识别文件整体结构,仅提取“技术评分”/“技术评审要求”部分
|
||||||
|
- 标注评分大类的权重占比(如出现)
|
||||||
|
|
||||||
|
2. **要素提取**
|
||||||
|
对“技术评分”板块进行深度解析,要求:
|
||||||
|
- 提取评分的全部细节,不能省略
|
||||||
|
- 明确列出量化指标,如有(如"ISO认证+3分"、"项目经验每年加1分")
|
||||||
|
- 区分强制性条款(必须满足项)与竞争性条款(择优评分项),如有
|
||||||
|
- 标注特殊要求(本地化服务、专利数量、团队资质等),如有
|
||||||
|
|
||||||
|
3. **异常识别**
|
||||||
|
- 标出表述模糊的评分项(如"酌情加分""优/良/差等级")
|
||||||
|
- 识别可能存在的矛盾条款
|
||||||
|
- 提示需要注意的隐藏评分点(如投标格式错误扣分项)
|
||||||
|
|
||||||
|
4. **结果呈现样例**
|
||||||
|
参考以下示例输出markdown结构化格式:
|
||||||
|
|
||||||
|
# 招标技术评分细则
|
||||||
|
|
||||||
|
## 技术评分(50%)
|
||||||
|
- 系统架构设计(20%)
|
||||||
|
→ 要求:支持分布式部署(未满足直接废标)
|
||||||
|
→ 加分项:采用微服务架构+3分
|
||||||
|
(继续展开...)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
请严格按照上述结构化框架处理输入的招标文件,生成专业、准确的项目技术评分要求。
|
||||||
|
严格按照招标文件的内容,确保输出内容的完整性。
|
||||||
|
直接输出评分要求,禁止说明和引导词。
|
||||||
|
|
||||||
|
- 招标文件内容如下:
|
||||||
|
{bid_document}
|
||||||
|
|
||||||
|
|
||||||
45
prompts/scoring_rules.txt
Normal file
45
prompts/scoring_rules.txt
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
"你是一名专业的招标文件分析师,请按照以下步骤处理用户提供的项目招标文件内容:
|
||||||
|
|
||||||
|
1. **结构识别**
|
||||||
|
- 仔细解析文件结构,定位'评分标准'、'评审办法'、'投标人须知'等关键章节
|
||||||
|
- 特别注意包含'分值'、'评分项'、'权重'等关键词的段落
|
||||||
|
|
||||||
|
2. **核心要素提取**
|
||||||
|
- 系统提取以下要素形成结构化表格:
|
||||||
|
│ 类别 │ 评分项名称 │ 分值权重 │ 具体要求 │ 否决条款 │
|
||||||
|
- 分类标准:
|
||||||
|
● 技术部分(方案设计、实施能力、技术创新等)
|
||||||
|
● 商务部分(资质证明、业绩案例、团队经验等)
|
||||||
|
● 价格部分(报价合理性、计价方式等)
|
||||||
|
● 其他专项(售后服务、本地化服务等)
|
||||||
|
|
||||||
|
3. **深度分析**
|
||||||
|
- 计算权重配比(示例:技术60% = 方案设计30% + 实施能力20% + 创新10%)
|
||||||
|
- 识别否决性条款(如"▲"标记项或特定强制要求)
|
||||||
|
- 标注特殊评分规则:阶梯得分、区间赋分、横向比较等机制
|
||||||
|
|
||||||
|
4. **风险提示**
|
||||||
|
- 标出易被忽视的得分点(如ISO认证、专利数量等)
|
||||||
|
- 识别矛盾条款(如总分值≠100%的情况)
|
||||||
|
- 提示资质门槛要求(注册资金、特定资质证书等)
|
||||||
|
|
||||||
|
5. **输出格式**
|
||||||
|
采用Markdown输出以下结构:
|
||||||
|
```markdown
|
||||||
|
# 招标评分要点汇总
|
||||||
|
|
||||||
|
## 核心指标配比
|
||||||
|
- 总评分构成:技术分(__%)+ 商务分(__%)+ 价格分(__%)
|
||||||
|
|
||||||
|
## 详细评分矩阵
|
||||||
|
| 类别 | 评分项 | 分值 | 具体要求 | 关键指标 |
|
||||||
|
|------|-------|-----|---------|---------|
|
||||||
|
| ... | ... | ... | ... | ... |
|
||||||
|
|
||||||
|
## 重点提示
|
||||||
|
⚠️ 否决条款:列出所有一票否决项
|
||||||
|
💡 得分要点:突出3-5个高权重核心指标
|
||||||
|
⏱️ 时间节点:标注与评分相关的时限要求
|
||||||
|
```
|
||||||
|
请先确认理解任务要求,待用户提供招标文件内容后执行分析。"
|
||||||
|
|
||||||
47
prompts/section_detail.py
Normal file
47
prompts/section_detail.py
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
GEN_LEAF_DETAIL_PROMT = """
|
||||||
|
【最重要的要求——字数】
|
||||||
|
{word_count_spec}
|
||||||
|
|
||||||
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||||
|
- 角色:资深投标文件撰写专家
|
||||||
|
- 任务:根据招标文件概要、标书目录、子小节标题,撰写该子小节的正文
|
||||||
|
|
||||||
|
【行文规范】
|
||||||
|
- 投标方自称统一用"我方",禁用"我们""本公司"
|
||||||
|
- 招标人统一称"招标方"或"建设单位"
|
||||||
|
- 禁止前导句:"本章节对应……""本小节主要说明……""以下将从……方面说明"等——开头直接写实质内容
|
||||||
|
- 禁止AI套话:综上所述、首先其次再次、我们深信、高度重视、全力以赴、不断优化、稳步推进、通过以上措施
|
||||||
|
- 用具体数据/标准编号/人员配置替代空洞承诺
|
||||||
|
- 列举用(1)(2)(3)编号,禁止"首先其次"连接;禁止"等"作结尾
|
||||||
|
- 纯文本输出,禁用markdown符号,段落间空行分隔
|
||||||
|
- 直接输出正文,不含标题和解释
|
||||||
|
|
||||||
|
【输入信息】
|
||||||
|
- 招标文件概要:
|
||||||
|
{summary}
|
||||||
|
|
||||||
|
- 技术标书目录:
|
||||||
|
{outline}
|
||||||
|
|
||||||
|
- 待撰写的子小节标题:
|
||||||
|
{title}
|
||||||
|
|
||||||
|
再次强调:篇幅是最核心的质量指标。内容必须充分展开,每个技术要点都要详细阐述实施方法、技术参数、人员安排或设备配置。绝不可以概括性一笔带过。
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
GEN_SECTION_INTRODUCTION_PROMT = """
|
||||||
|
- 角色:资深投标文件撰写专家
|
||||||
|
- 任务:为章节撰写简短开篇引言(100~200字),点明核心主题与招标要求的对应关系
|
||||||
|
- 使用"我方"自称,禁止套话和前导解释句,纯文本输出
|
||||||
|
- 若无需过渡可输出空白
|
||||||
|
|
||||||
|
- 招标文件概要:
|
||||||
|
{summary}
|
||||||
|
|
||||||
|
- 技术标书目录:
|
||||||
|
{outline}
|
||||||
|
|
||||||
|
- 章节标题:
|
||||||
|
{title}
|
||||||
|
"""
|
||||||
28
prompts/section_details.txt
Normal file
28
prompts/section_details.txt
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
【最重要的要求——字数】
|
||||||
|
{word_count_spec}
|
||||||
|
|
||||||
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||||
|
- 角色:资深投标文件撰写专家
|
||||||
|
- 任务:根据招标文件概要、标书目录、子小节标题,撰写该子小节的正文
|
||||||
|
|
||||||
|
【行文规范】
|
||||||
|
- 投标方自称用"我方","我们","本公司"随机使用
|
||||||
|
- 招标人统一称"招标方"或"建设单位"
|
||||||
|
- 禁止前导句:"本章节对应……""本小节主要说明……""以下将从……方面说明"等——开头直接写实质内容
|
||||||
|
- 禁止AI套话:综上所述、首先其次再次、我们深信、高度重视、全力以赴、不断优化、稳步推进、通过以上措施
|
||||||
|
- 用具体数据/标准编号/人员配置替代空洞承诺
|
||||||
|
- 列举用(1)(2)(3)编号,禁止"首先其次"连接;禁止"等"作结尾
|
||||||
|
- 纯文本输出,禁用markdown符号,段落间空行分隔
|
||||||
|
- 直接输出正文,不含标题和解释
|
||||||
|
|
||||||
|
【输入信息】
|
||||||
|
- 招标文件概要:
|
||||||
|
{summary}
|
||||||
|
|
||||||
|
- 技术标书目录:
|
||||||
|
{outline}
|
||||||
|
|
||||||
|
- 待撰写的子小节标题:
|
||||||
|
{subsection_title}
|
||||||
|
|
||||||
|
再次强调:篇幅是最核心的质量指标。内容必须充分展开,每个技术要点都要详细阐述实施方法、技术参数、人员安排或设备配置。绝不可以概括性一笔带过。
|
||||||
12
requirements.txt
Normal file
12
requirements.txt
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
Flask==3.0.3
|
||||||
|
flask-cors==4.0.1
|
||||||
|
PyPDF2==3.0.1
|
||||||
|
python-docx==1.1.2
|
||||||
|
openai==1.52.0
|
||||||
|
Werkzeug==3.0.4
|
||||||
|
requests==2.32.3
|
||||||
|
chardet==5.2.0
|
||||||
|
pypdf==4.3.1
|
||||||
|
pdfminer.six==20231228
|
||||||
|
beautifulsoup4==4.12.3
|
||||||
|
lxml==5.3.0
|
||||||
39
start.bat
Normal file
39
start.bat
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
@echo off
|
||||||
|
title BidPartner - AI Bid Assistant
|
||||||
|
|
||||||
|
echo.
|
||||||
|
echo ============================================
|
||||||
|
echo BidPartner - AI Bid Writing Tool
|
||||||
|
echo ============================================
|
||||||
|
echo.
|
||||||
|
|
||||||
|
cd /d "%~dp0"
|
||||||
|
|
||||||
|
python --version >nul 2>&1
|
||||||
|
if %errorlevel% neq 0 (
|
||||||
|
echo [ERROR] Python not found. Please install Python 3.9+
|
||||||
|
pause
|
||||||
|
exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
if not exist "%~dp0.deps_installed" (
|
||||||
|
echo Installing dependencies...
|
||||||
|
pip install -r requirements.txt
|
||||||
|
if %errorlevel% neq 0 (
|
||||||
|
echo [ERROR] Failed to install dependencies.
|
||||||
|
pause
|
||||||
|
exit /b 1
|
||||||
|
)
|
||||||
|
echo.> "%~dp0.deps_installed"
|
||||||
|
echo Dependencies installed successfully.
|
||||||
|
)
|
||||||
|
|
||||||
|
echo Starting server...
|
||||||
|
echo Open browser: http://localhost:5000
|
||||||
|
echo Press Ctrl+C to stop
|
||||||
|
echo.
|
||||||
|
|
||||||
|
start "" "http://localhost:5000"
|
||||||
|
python app.py
|
||||||
|
|
||||||
|
pause
|
||||||
89
static/style.css
Normal file
89
static/style.css
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
/* 标伙伴 · 自定义样式 */
|
||||||
|
|
||||||
|
/* 滚动条美化 */
|
||||||
|
::-webkit-scrollbar {
|
||||||
|
width: 6px;
|
||||||
|
height: 6px;
|
||||||
|
}
|
||||||
|
::-webkit-scrollbar-track {
|
||||||
|
background: #f1f5f9;
|
||||||
|
border-radius: 3px;
|
||||||
|
}
|
||||||
|
::-webkit-scrollbar-thumb {
|
||||||
|
background: #cbd5e1;
|
||||||
|
border-radius: 3px;
|
||||||
|
}
|
||||||
|
::-webkit-scrollbar-thumb:hover {
|
||||||
|
background: #94a3b8;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 章节树左侧栏 */
|
||||||
|
.sidebar-fixed::-webkit-scrollbar {
|
||||||
|
width: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 正文内容排版 */
|
||||||
|
.prose-content {
|
||||||
|
font-family: 'SimSun', '宋体', 'Times New Roman', serif;
|
||||||
|
line-height: 1.9;
|
||||||
|
color: #374151;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 动画 */
|
||||||
|
@keyframes fadeIn {
|
||||||
|
from { opacity: 0; transform: translateY(8px); }
|
||||||
|
to { opacity: 1; transform: translateY(0); }
|
||||||
|
}
|
||||||
|
.fade-in {
|
||||||
|
animation: fadeIn 0.25s ease-out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 表格样式(评分要求展示) */
|
||||||
|
.markdown-table table {
|
||||||
|
width: 100%;
|
||||||
|
border-collapse: collapse;
|
||||||
|
font-size: 13px;
|
||||||
|
}
|
||||||
|
.markdown-table th {
|
||||||
|
background: #f8fafc;
|
||||||
|
font-weight: 600;
|
||||||
|
color: #475569;
|
||||||
|
padding: 8px 12px;
|
||||||
|
border: 1px solid #e2e8f0;
|
||||||
|
text-align: left;
|
||||||
|
}
|
||||||
|
.markdown-table td {
|
||||||
|
padding: 7px 12px;
|
||||||
|
border: 1px solid #e2e8f0;
|
||||||
|
color: #334155;
|
||||||
|
}
|
||||||
|
.markdown-table tr:nth-child(even) td {
|
||||||
|
background: #f8fafc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 步骤指示器 */
|
||||||
|
.step-active {
|
||||||
|
background: #2563eb;
|
||||||
|
color: #fff;
|
||||||
|
box-shadow: 0 2px 8px rgba(37,99,235,.35);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 文件上传拖拽高亮 */
|
||||||
|
.drop-active {
|
||||||
|
border-color: #3b82f6 !important;
|
||||||
|
background: #eff6ff !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 章节缩进指示线 */
|
||||||
|
.section-indent-line {
|
||||||
|
border-left: 2px solid #e2e8f0;
|
||||||
|
margin-left: 8px;
|
||||||
|
padding-left: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 打印样式 */
|
||||||
|
@media print {
|
||||||
|
header, nav, aside, button { display: none !important; }
|
||||||
|
main { padding: 0 !important; }
|
||||||
|
.bg-white { box-shadow: none !important; border: none !important; }
|
||||||
|
}
|
||||||
881
templates/index.html
Normal file
881
templates/index.html
Normal file
@ -0,0 +1,881 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="zh-CN">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>标伙伴 · AI 标书助手</title>
|
||||||
|
<script src="https://cdn.tailwindcss.com"></script>
|
||||||
|
<script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
|
||||||
|
<link rel="stylesheet" href="/static/style.css">
|
||||||
|
<style>
|
||||||
|
[x-cloak]{display:none!important}
|
||||||
|
body{font-family:'PingFang SC','Microsoft YaHei',sans-serif;background:#f0f4f8}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body class="min-h-screen" x-data="app()" x-init="init()">
|
||||||
|
|
||||||
|
<!-- ── 顶栏 ── -->
|
||||||
|
<header class="bg-white border-b border-gray-200 sticky top-0 z-50 shadow-sm">
|
||||||
|
<div class="max-w-7xl mx-auto px-6 h-16 flex items-center justify-between">
|
||||||
|
<div class="flex items-center gap-3">
|
||||||
|
<div class="w-9 h-9 rounded-xl bg-gradient-to-br from-blue-600 to-indigo-600 flex items-center justify-center shadow">
|
||||||
|
<svg class="w-5 h-5 text-white" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
|
||||||
|
d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z"/>
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<span class="text-lg font-bold text-gray-900">标伙伴</span>
|
||||||
|
<span class="ml-2 text-xs text-gray-400 font-medium">AI 标书助手</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="flex items-center gap-3">
|
||||||
|
<button @click="showConfig=true"
|
||||||
|
class="p-2 text-gray-500 hover:text-blue-600 hover:bg-blue-50 rounded-lg transition">
|
||||||
|
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
|
||||||
|
d="M10.325 4.317c.426-1.756 2.924-1.756 3.35 0a1.724 1.724 0 002.573 1.066c1.543-.94 3.31.826 2.37 2.37a1.724 1.724 0 001.065 2.572c1.756.426 1.756 2.924 0 3.35a1.724 1.724 0 00-1.066 2.573c.94 1.543-.826 3.31-2.37 2.37a1.724 1.724 0 00-2.572 1.065c-.426 1.756-2.924 1.756-3.35 0a1.724 1.724 0 00-2.573-1.066c-1.543.94-3.31-.826-2.37-2.37a1.724 1.724 0 00-1.065-2.572c-1.756-.426-1.756-2.924 0-3.35a1.724 1.724 0 001.066-2.573c-.94-1.543.826-3.31 2.37-2.37.996.608 2.296.07 2.572-1.065z"/>
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15 12a3 3 0 11-6 0 3 3 0 016 0z"/>
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
<button @click="showCreate=true"
|
||||||
|
class="flex items-center gap-2 px-4 py-2 bg-blue-600 hover:bg-blue-700 text-white text-sm font-medium rounded-lg shadow-sm transition">
|
||||||
|
<svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 4v16m8-8H4"/>
|
||||||
|
</svg>
|
||||||
|
新建项目
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<!-- ── 主内容 ── -->
|
||||||
|
<main class="max-w-7xl mx-auto px-6 py-8">
|
||||||
|
|
||||||
|
<!-- 篇幅目标仅存在于「标书项目 → 步骤1 解析」;本页不重复控件 -->
|
||||||
|
<div x-show="!loading" x-cloak class="mb-6 p-3.5 bg-slate-50 border border-slate-200 rounded-xl text-sm text-slate-600 leading-relaxed">
|
||||||
|
<p><strong>篇幅目标(按页数粗略换算)</strong>请进入某标书项目,在 <strong>步骤1「解析」</strong> 中设置:100/150/200/250/300 页、自定义、保存页数设置、使用原档位、当前页等,保存后用于后续章节生成。</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- 欢迎横幅(无项目时显示) -->
|
||||||
|
<template x-if="projects.length === 0 && !loading">
|
||||||
|
<div class="text-center py-20">
|
||||||
|
<div class="w-24 h-24 mx-auto mb-6 rounded-3xl bg-gradient-to-br from-blue-100 to-indigo-100 flex items-center justify-center">
|
||||||
|
<svg class="w-12 h-12 text-blue-600" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5"
|
||||||
|
d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z"/>
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
|
<h2 class="text-2xl font-bold text-gray-800 mb-2">欢迎使用标伙伴</h2>
|
||||||
|
<p class="text-gray-500 mb-8 max-w-md mx-auto">AI 驱动的标书写作助手,上传招标文件,一键生成专业技术标书</p>
|
||||||
|
<button @click="showCreate=true"
|
||||||
|
class="px-6 py-3 bg-blue-600 hover:bg-blue-700 text-white font-medium rounded-xl shadow-md transition">
|
||||||
|
创建第一个项目
|
||||||
|
</button>
|
||||||
|
|
||||||
|
<!-- 功能介绍 -->
|
||||||
|
<div class="grid grid-cols-3 gap-6 mt-16 max-w-3xl mx-auto text-left">
|
||||||
|
<div class="bg-white rounded-2xl p-6 shadow-sm border border-gray-100">
|
||||||
|
<div class="w-10 h-10 bg-blue-100 rounded-xl flex items-center justify-center mb-4">
|
||||||
|
<svg class="w-5 h-5 text-blue-600" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 16v1a3 3 0 003 3h10a3 3 0 003-3v-1m-4-8l-4-4m0 0L8 8m4-4v12"/>
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
|
<h3 class="font-semibold text-gray-800 mb-1">智能解析招标文件</h3>
|
||||||
|
<p class="text-sm text-gray-500">自动提取评分要求、资质条件、技术参数</p>
|
||||||
|
</div>
|
||||||
|
<div class="bg-white rounded-2xl p-6 shadow-sm border border-gray-100">
|
||||||
|
<div class="w-10 h-10 bg-green-100 rounded-xl flex items-center justify-center mb-4">
|
||||||
|
<svg class="w-5 h-5 text-green-600" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2"/>
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
|
<h3 class="font-semibold text-gray-800 mb-1">自动生成标书大纲</h3>
|
||||||
|
<p class="text-sm text-gray-500">按评分权重生成四级章节结构,精准对标要求</p>
|
||||||
|
</div>
|
||||||
|
<div class="bg-white rounded-2xl p-6 shadow-sm border border-gray-100">
|
||||||
|
<div class="w-10 h-10 bg-purple-100 rounded-xl flex items-center justify-center mb-4">
|
||||||
|
<svg class="w-5 h-5 text-purple-600" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 10v6m0 0l-3-3m3 3l3-3m2 8H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z"/>
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
|
<h3 class="font-semibold text-gray-800 mb-1">一键导出 Word 文档</h3>
|
||||||
|
<p class="text-sm text-gray-500">专业排版,直接交付使用</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<!-- 加载中 -->
|
||||||
|
<template x-if="loading">
|
||||||
|
<div class="flex justify-center py-20">
|
||||||
|
<div class="w-8 h-8 border-4 border-blue-200 border-t-blue-600 rounded-full animate-spin"></div>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<!-- 项目列表 -->
|
||||||
|
<template x-if="projects.length > 0">
|
||||||
|
<div>
|
||||||
|
<div class="flex items-center justify-between mb-6">
|
||||||
|
<h2 class="text-xl font-bold text-gray-800">我的项目
|
||||||
|
<span class="ml-2 text-sm font-normal text-gray-400">共 <span x-text="projects.length"></span> 个</span>
|
||||||
|
</h2>
|
||||||
|
</div>
|
||||||
|
<div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-5">
|
||||||
|
<template x-for="p in projects" :key="p.id">
|
||||||
|
<div class="bg-white rounded-2xl border border-gray-100 shadow-sm hover:shadow-md transition-shadow cursor-pointer group"
|
||||||
|
@click="window.location='/project/'+p.id">
|
||||||
|
<div class="p-5">
|
||||||
|
<!-- 状态徽标 -->
|
||||||
|
<div class="flex items-start justify-between mb-3">
|
||||||
|
<div class="flex-1 min-w-0">
|
||||||
|
<h3 class="font-semibold text-gray-900 group-hover:text-blue-600 transition truncate" x-text="p.name"></h3>
|
||||||
|
<p class="text-xs text-gray-400 mt-1" x-text="formatDate(p.created_at)"></p>
|
||||||
|
</div>
|
||||||
|
<span class="ml-2 flex-shrink-0 px-2 py-0.5 rounded-full text-xs font-medium"
|
||||||
|
:class="statusBadge(p.parse_status).cls" x-text="statusBadge(p.parse_status).text"></span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- 文件名 -->
|
||||||
|
<div x-show="p.file_name" class="flex items-center gap-1.5 text-xs text-gray-500 mb-3">
|
||||||
|
<svg class="w-3.5 h-3.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z"/>
|
||||||
|
</svg>
|
||||||
|
<span class="truncate" x-text="p.file_name"></span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- 进度条 -->
|
||||||
|
<div x-show="p.section_count > 0" class="mb-3">
|
||||||
|
<div class="flex justify-between text-xs text-gray-500 mb-1">
|
||||||
|
<span>章节生成进度</span>
|
||||||
|
<span x-text="p.done_count + '/' + p.section_count"></span>
|
||||||
|
</div>
|
||||||
|
<div class="h-1.5 bg-gray-100 rounded-full overflow-hidden">
|
||||||
|
<div class="h-full bg-blue-500 rounded-full transition-all"
|
||||||
|
:style="'width:' + (p.section_count ? p.done_count/p.section_count*100 : 0) + '%'"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- 操作按钮 -->
|
||||||
|
<div class="flex gap-2 pt-3 border-t border-gray-50">
|
||||||
|
<button class="flex-1 text-xs text-blue-600 hover:text-blue-700 font-medium py-1 hover:bg-blue-50 rounded-lg transition"
|
||||||
|
@click.stop="window.location='/project/'+p.id">
|
||||||
|
进入项目
|
||||||
|
</button>
|
||||||
|
<button class="text-xs text-red-400 hover:text-red-600 font-medium px-3 py-1 hover:bg-red-50 rounded-lg transition"
|
||||||
|
@click.stop="deleteProject(p.id, p.name)">
|
||||||
|
删除
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
</main>
|
||||||
|
|
||||||
|
<!-- ══ 新建项目弹窗 ══ -->
|
||||||
|
<div x-show="showCreate" x-cloak class="fixed inset-0 z-50 flex items-center justify-center p-4 bg-black/50 backdrop-blur-sm">
|
||||||
|
<div class="bg-white rounded-2xl shadow-2xl w-full max-w-md p-6" @click.stop>
|
||||||
|
<h2 class="text-lg font-bold text-gray-900 mb-4">新建标书项目</h2>
|
||||||
|
<div class="mb-4">
|
||||||
|
<label class="block text-sm font-medium text-gray-700 mb-1">项目名称</label>
|
||||||
|
<input type="text" x-model="newProjectName" @keydown.enter="createProject()"
|
||||||
|
placeholder="例如:XX智慧城市信息化建设项目"
|
||||||
|
class="w-full px-4 py-2.5 border border-gray-300 rounded-xl focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent text-sm">
|
||||||
|
</div>
|
||||||
|
<div class="flex gap-3">
|
||||||
|
<button @click="showCreate=false" class="flex-1 px-4 py-2 border border-gray-200 text-gray-600 rounded-xl text-sm hover:bg-gray-50 transition">取消</button>
|
||||||
|
<button @click="createProject()" :disabled="creating"
|
||||||
|
class="flex-1 px-4 py-2 bg-blue-600 hover:bg-blue-700 text-white rounded-xl text-sm font-medium transition disabled:opacity-60">
|
||||||
|
<span x-show="!creating">创建项目</span>
|
||||||
|
<span x-show="creating">创建中...</span>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- ══ AI 配置弹窗 ══ -->
|
||||||
|
<div x-show="showConfig" x-cloak class="fixed inset-0 z-50 flex items-center justify-center p-4 bg-black/50 backdrop-blur-sm">
|
||||||
|
<div class="bg-white rounded-2xl shadow-2xl w-full max-w-lg p-6" @click.stop>
|
||||||
|
<h2 class="text-lg font-bold text-gray-900 mb-4">AI 模型配置</h2>
|
||||||
|
|
||||||
|
<div class="mb-4">
|
||||||
|
<label class="block text-sm font-medium text-gray-700 mb-2">选择模型提供商</label>
|
||||||
|
<div class="grid grid-cols-3 gap-2">
|
||||||
|
<label class="flex items-center gap-2 p-3 border-2 rounded-xl cursor-pointer transition"
|
||||||
|
:class="cfg.model_provider==='qwen' ? 'border-blue-500 bg-blue-50' : 'border-gray-200 hover:border-gray-300'">
|
||||||
|
<input type="radio" name="provider" value="qwen" x-model="cfg.model_provider" class="accent-blue-600">
|
||||||
|
<div>
|
||||||
|
<p class="font-medium text-sm leading-tight">通义千问</p>
|
||||||
|
<p class="text-xs text-gray-400">Qwen · 阿里云</p>
|
||||||
|
</div>
|
||||||
|
</label>
|
||||||
|
<label class="flex items-center gap-2 p-3 border-2 rounded-xl cursor-pointer transition"
|
||||||
|
:class="cfg.model_provider==='deepseek' ? 'border-blue-500 bg-blue-50' : 'border-gray-200 hover:border-gray-300'">
|
||||||
|
<input type="radio" name="provider" value="deepseek" x-model="cfg.model_provider" class="accent-blue-600">
|
||||||
|
<div>
|
||||||
|
<p class="font-medium text-sm leading-tight">DeepSeek</p>
|
||||||
|
<p class="text-xs text-gray-400">高性价比 · 云端</p>
|
||||||
|
</div>
|
||||||
|
</label>
|
||||||
|
<label class="flex items-center gap-2 p-3 border-2 rounded-xl cursor-pointer transition"
|
||||||
|
:class="cfg.model_provider==='openai' ? 'border-blue-500 bg-blue-50' : 'border-gray-200 hover:border-gray-300'">
|
||||||
|
<input type="radio" name="provider" value="openai" x-model="cfg.model_provider" class="accent-blue-600">
|
||||||
|
<div>
|
||||||
|
<p class="font-medium text-sm leading-tight">OpenAI</p>
|
||||||
|
<p class="text-xs text-gray-400">GPT-4.1 · 云端</p>
|
||||||
|
</div>
|
||||||
|
</label>
|
||||||
|
<label class="flex items-center gap-2 p-3 border-2 rounded-xl cursor-pointer transition"
|
||||||
|
:class="cfg.model_provider==='doubao' ? 'border-blue-500 bg-blue-50' : 'border-gray-200 hover:border-gray-300'">
|
||||||
|
<input type="radio" name="provider" value="doubao" x-model="cfg.model_provider" class="accent-blue-600">
|
||||||
|
<div>
|
||||||
|
<p class="font-medium text-sm leading-tight">豆包</p>
|
||||||
|
<p class="text-xs text-gray-400">字节跳动 · 云端</p>
|
||||||
|
</div>
|
||||||
|
</label>
|
||||||
|
<label class="flex items-center gap-2 p-3 border-2 rounded-xl cursor-pointer transition"
|
||||||
|
:class="cfg.model_provider==='kimi' ? 'border-blue-500 bg-blue-50' : 'border-gray-200 hover:border-gray-300'">
|
||||||
|
<input type="radio" name="provider" value="kimi" x-model="cfg.model_provider" class="accent-blue-600">
|
||||||
|
<div>
|
||||||
|
<p class="font-medium text-sm leading-tight">Kimi</p>
|
||||||
|
<p class="text-xs text-gray-400">Moonshot · 长文本</p>
|
||||||
|
</div>
|
||||||
|
</label>
|
||||||
|
<label class="flex items-center gap-2 p-3 border-2 rounded-xl cursor-pointer transition"
|
||||||
|
:class="cfg.model_provider==='ollama' ? 'border-green-500 bg-green-50' : 'border-gray-200 hover:border-gray-300'">
|
||||||
|
<input type="radio" name="provider" value="ollama" x-model="cfg.model_provider" class="accent-green-600">
|
||||||
|
<div>
|
||||||
|
<p class="font-medium text-sm leading-tight">Ollama 本地</p>
|
||||||
|
<p class="text-xs text-gray-400">免费 · 离线</p>
|
||||||
|
</div>
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<template x-if="cfg.model_provider==='qwen'">
|
||||||
|
<div class="space-y-3 mb-4">
|
||||||
|
<div>
|
||||||
|
<label class="block text-sm font-medium text-gray-700 mb-1">Qwen API Key
|
||||||
|
<a href="https://dashscope.aliyun.com/" target="_blank" class="ml-1 text-blue-500 text-xs hover:underline font-normal">申请地址 ↗</a>
|
||||||
|
</label>
|
||||||
|
<input type="password" x-model="cfg.qwen_api_key" placeholder="sk-..."
|
||||||
|
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500">
|
||||||
|
<p x-show="cfg.has_qwen_key" class="text-xs text-green-600 mt-1">✓ 已配置</p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label class="block text-sm font-medium text-gray-700 mb-1">模型</label>
|
||||||
|
<select x-model="cfg.qwen_model" class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500">
|
||||||
|
<optgroup label="─── Qwen3.6(本项默认:生成+解析)───">
|
||||||
|
<option value="qwen3.6-plus">qwen3.6-plus ★ 默认</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── 旗舰版 ───">
|
||||||
|
<option value="qwen-max">qwen-max ★ 推荐</option>
|
||||||
|
<option value="qwen-max-latest">qwen-max-latest(自动追踪最新)</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── 均衡版 ───">
|
||||||
|
<option value="qwen-plus">qwen-plus</option>
|
||||||
|
<option value="qwen-plus-latest">qwen-plus-latest(自动追踪最新)</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── 快速版 ───">
|
||||||
|
<option value="qwen-turbo">qwen-turbo</option>
|
||||||
|
<option value="qwen-turbo-latest">qwen-turbo-latest(自动追踪最新)</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── 超长上下文 ───">
|
||||||
|
<option value="qwen-long">qwen-long(1M tokens)</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── Qwen3 系列 API ───">
|
||||||
|
<option value="qwen3-235b-a22b">qwen3-235b-a22b(MoE 旗舰)</option>
|
||||||
|
<option value="qwen3-32b">qwen3-32b</option>
|
||||||
|
<option value="qwen3-30b-a3b">qwen3-30b-a3b(MoE 高效)</option>
|
||||||
|
<option value="qwen3-14b">qwen3-14b</option>
|
||||||
|
<option value="qwen3-8b">qwen3-8b</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── 自定义 ───">
|
||||||
|
<option value="">手动输入模型名</option>
|
||||||
|
</optgroup>
|
||||||
|
</select>
|
||||||
|
<div x-show="!qwenPresets.includes(cfg.qwen_model)" class="mt-2">
|
||||||
|
<input type="text" x-model="cfg.qwen_model" placeholder="输入模型名,如 qwen-max-2025-01-25"
|
||||||
|
class="w-full px-3 py-2 border border-blue-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 font-mono">
|
||||||
|
<p class="text-xs text-gray-400 mt-1">输入任意 DashScope 兼容的模型名</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<!-- 自定义 API 地址(可选,供代理/中转使用) -->
|
||||||
|
<div>
|
||||||
|
<button type="button" @click="cfg._qwen_adv = !cfg._qwen_adv"
|
||||||
|
class="text-xs text-gray-400 hover:text-gray-600 flex items-center gap-1">
|
||||||
|
<span x-text="cfg._qwen_adv ? '▾' : '▸'"></span>
|
||||||
|
高级:自定义 API 地址(代理/中转)
|
||||||
|
</button>
|
||||||
|
<div x-show="cfg._qwen_adv" class="mt-2">
|
||||||
|
<input type="text" x-model="cfg.qwen_base_url"
|
||||||
|
placeholder="https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||||||
|
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 font-mono">
|
||||||
|
<p class="text-xs text-gray-400 mt-1">默认:https://dashscope.aliyuncs.com/compatible-mode/v1</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<template x-if="cfg.model_provider==='deepseek'">
|
||||||
|
<div class="space-y-3 mb-4">
|
||||||
|
<div>
|
||||||
|
<label class="block text-sm font-medium text-gray-700 mb-1">DeepSeek API Key
|
||||||
|
<a href="https://platform.deepseek.com/" target="_blank" class="ml-1 text-blue-500 text-xs hover:underline font-normal">申请地址 ↗</a>
|
||||||
|
</label>
|
||||||
|
<input type="password" x-model="cfg.deepseek_api_key" placeholder="sk-..."
|
||||||
|
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500">
|
||||||
|
<p x-show="cfg.has_deepseek_key" class="text-xs text-green-600 mt-1">✓ 已配置</p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label class="block text-sm font-medium text-gray-700 mb-1">模型</label>
|
||||||
|
<select x-model="cfg.deepseek_model" class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500">
|
||||||
|
<optgroup label="─── 对话模型 ───">
|
||||||
|
<option value="deepseek-chat">deepseek-chat ★ 推荐(V3 最新)</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── 推理模型 ───">
|
||||||
|
<option value="deepseek-reasoner">deepseek-reasoner(R1)</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── 自定义 ───">
|
||||||
|
<option value="">手动输入模型名</option>
|
||||||
|
</optgroup>
|
||||||
|
</select>
|
||||||
|
<div x-show="!deepseekPresets.includes(cfg.deepseek_model)" class="mt-2">
|
||||||
|
<input type="text" x-model="cfg.deepseek_model" placeholder="输入模型名,如 deepseek-chat-v3-0324"
|
||||||
|
class="w-full px-3 py-2 border border-blue-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 font-mono">
|
||||||
|
<p class="text-xs text-gray-400 mt-1">输入任意 DeepSeek 兼容的模型名</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<!-- 自定义 API 地址 -->
|
||||||
|
<div>
|
||||||
|
<button type="button" @click="cfg._ds_adv = !cfg._ds_adv"
|
||||||
|
class="text-xs text-gray-400 hover:text-gray-600 flex items-center gap-1">
|
||||||
|
<span x-text="cfg._ds_adv ? '▾' : '▸'"></span>
|
||||||
|
高级:自定义 API 地址(代理/中转)
|
||||||
|
</button>
|
||||||
|
<div x-show="cfg._ds_adv" class="mt-2">
|
||||||
|
<input type="text" x-model="cfg.deepseek_base_url"
|
||||||
|
placeholder="https://api.deepseek.com/v1"
|
||||||
|
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 font-mono">
|
||||||
|
<p class="text-xs text-gray-400 mt-1">默认:https://api.deepseek.com/v1</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="flex items-start gap-2 p-3 bg-amber-50 rounded-lg border border-amber-200 text-xs text-amber-700">
|
||||||
|
<svg class="w-4 h-4 flex-shrink-0 mt-0.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"/>
|
||||||
|
</svg>
|
||||||
|
<span>DeepSeek 暂不提供 Embedding API,知识库功能将自动使用本地默认模型(需下载约 90MB 模型)。其他功能不受影响。</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<!-- ── 豆包配置面板 ── -->
|
||||||
|
<template x-if="cfg.model_provider==='doubao'">
|
||||||
|
<div class="space-y-3 mb-4">
|
||||||
|
<div>
|
||||||
|
<label class="block text-sm font-medium text-gray-700 mb-1">豆包 API Key
|
||||||
|
<a href="https://console.volcengine.com/ark/" target="_blank" class="ml-1 text-blue-500 text-xs hover:underline font-normal">申请地址 ↗</a>
|
||||||
|
</label>
|
||||||
|
<input type="password" x-model="cfg.doubao_api_key" placeholder="sk-..."
|
||||||
|
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500">
|
||||||
|
<p x-show="cfg.has_doubao_key" class="text-xs text-green-600 mt-1">✓ 已配置</p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label class="block text-sm font-medium text-gray-700 mb-1">模型</label>
|
||||||
|
<select x-model="cfg.doubao_model" class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500">
|
||||||
|
<optgroup label="─── 豆包 1.5 系列(2025 最新)───">
|
||||||
|
<option value="doubao-1-5-pro-32k">doubao-1-5-pro-32k ★ 推荐</option>
|
||||||
|
<option value="doubao-1-5-pro-128k">doubao-1-5-pro-128k(超长上下文)</option>
|
||||||
|
<option value="doubao-1-5-lite-32k">doubao-1-5-lite-32k(快速低价)</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── 豆包 Pro 系列 ───">
|
||||||
|
<option value="doubao-pro-32k">doubao-pro-32k</option>
|
||||||
|
<option value="doubao-pro-128k">doubao-pro-128k</option>
|
||||||
|
<option value="doubao-pro-256k">doubao-pro-256k(超长)</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── 豆包 Lite 系列 ───">
|
||||||
|
<option value="doubao-lite-32k">doubao-lite-32k</option>
|
||||||
|
<option value="doubao-lite-128k">doubao-lite-128k</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── 自定义 ───">
|
||||||
|
<option value="">手动输入模型名</option>
|
||||||
|
</optgroup>
|
||||||
|
</select>
|
||||||
|
<div x-show="!doubaoPresets.includes(cfg.doubao_model)" class="mt-2">
|
||||||
|
<input type="text" x-model="cfg.doubao_model" placeholder="输入模型名,如 doubao-1-5-pro-32k"
|
||||||
|
class="w-full px-3 py-2 border border-blue-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 font-mono">
|
||||||
|
<p class="text-xs text-gray-400 mt-1">输入火山引擎方舟平台支持的任意模型名</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<!-- 自定义 API 地址 -->
|
||||||
|
<div>
|
||||||
|
<button type="button" @click="cfg._doubao_adv = !cfg._doubao_adv"
|
||||||
|
class="text-xs text-gray-400 hover:text-gray-600 flex items-center gap-1">
|
||||||
|
<span x-text="cfg._doubao_adv ? '▾' : '▸'"></span>
|
||||||
|
高级:自定义 API 地址(代理/中转)
|
||||||
|
</button>
|
||||||
|
<div x-show="cfg._doubao_adv" class="mt-2">
|
||||||
|
<input type="text" x-model="cfg.doubao_base_url"
|
||||||
|
placeholder="https://ark.cn-beijing.volces.com/api/v3"
|
||||||
|
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 font-mono">
|
||||||
|
<p class="text-xs text-gray-400 mt-1">默认:https://ark.cn-beijing.volces.com/api/v3</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="flex items-start gap-2 p-3 bg-amber-50 rounded-lg border border-amber-200 text-xs text-amber-700">
|
||||||
|
<svg class="w-4 h-4 flex-shrink-0 mt-0.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"/>
|
||||||
|
</svg>
|
||||||
|
<span>豆包暂不提供通用 Embedding API,知识库将自动使用关键词检索模式。其他功能完全正常。</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<!-- ── Kimi 配置面板 ── -->
|
||||||
|
<template x-if="cfg.model_provider==='kimi'">
|
||||||
|
<div class="space-y-3 mb-4">
|
||||||
|
<div>
|
||||||
|
<label class="block text-sm font-medium text-gray-700 mb-1">Kimi API Key
|
||||||
|
<a href="https://platform.moonshot.cn/" target="_blank" class="ml-1 text-blue-500 text-xs hover:underline font-normal">申请地址 ↗</a>
|
||||||
|
</label>
|
||||||
|
<input type="password" x-model="cfg.kimi_api_key" placeholder="sk-..."
|
||||||
|
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500">
|
||||||
|
<p x-show="cfg.has_kimi_key" class="text-xs text-green-600 mt-1">✓ 已配置</p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label class="block text-sm font-medium text-gray-700 mb-1">模型</label>
|
||||||
|
<select x-model="cfg.kimi_model" class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500">
|
||||||
|
<optgroup label="─── Moonshot 系列 ───">
|
||||||
|
<option value="moonshot-v1-32k">moonshot-v1-32k ★ 推荐(均衡)</option>
|
||||||
|
<option value="moonshot-v1-128k">moonshot-v1-128k(超长上下文)</option>
|
||||||
|
<option value="moonshot-v1-8k">moonshot-v1-8k(快速低价)</option>
|
||||||
|
<option value="moonshot-v1-auto">moonshot-v1-auto(自动选择)</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── 自定义 ───">
|
||||||
|
<option value="">手动输入模型名</option>
|
||||||
|
</optgroup>
|
||||||
|
</select>
|
||||||
|
<div x-show="!kimiPresets.includes(cfg.kimi_model)" class="mt-2">
|
||||||
|
<input type="text" x-model="cfg.kimi_model" placeholder="输入模型名,如 moonshot-v1-128k"
|
||||||
|
class="w-full px-3 py-2 border border-blue-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 font-mono">
|
||||||
|
<p class="text-xs text-gray-400 mt-1">输入 Moonshot 平台支持的任意模型名</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<!-- 自定义 API 地址 -->
|
||||||
|
<div>
|
||||||
|
<button type="button" @click="cfg._kimi_adv = !cfg._kimi_adv"
|
||||||
|
class="text-xs text-gray-400 hover:text-gray-600 flex items-center gap-1">
|
||||||
|
<span x-text="cfg._kimi_adv ? '▾' : '▸'"></span>
|
||||||
|
高级:自定义 API 地址(代理/中转)
|
||||||
|
</button>
|
||||||
|
<div x-show="cfg._kimi_adv" class="mt-2">
|
||||||
|
<input type="text" x-model="cfg.kimi_base_url"
|
||||||
|
placeholder="https://api.moonshot.cn/v1"
|
||||||
|
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 font-mono">
|
||||||
|
<p class="text-xs text-gray-400 mt-1">默认:https://api.moonshot.cn/v1</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="flex items-start gap-2 p-3 bg-teal-50 rounded-lg border border-teal-200 text-xs text-teal-700">
|
||||||
|
<svg class="w-4 h-4 flex-shrink-0 mt-0.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z"/>
|
||||||
|
</svg>
|
||||||
|
<span>Kimi 支持 Embedding API(moonshot-v1-embedding),知识库将使用语义向量检索,效果更佳。</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<template x-if="cfg.model_provider==='openai'">
|
||||||
|
<div class="space-y-3 mb-4">
|
||||||
|
<div>
|
||||||
|
<label class="block text-sm font-medium text-gray-700 mb-1">OpenAI API Key
|
||||||
|
<a href="https://platform.openai.com/" target="_blank" class="ml-1 text-blue-500 text-xs hover:underline font-normal">申请地址 ↗</a>
|
||||||
|
</label>
|
||||||
|
<input type="password" x-model="cfg.openai_api_key" placeholder="sk-..."
|
||||||
|
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500">
|
||||||
|
<p x-show="cfg.has_openai_key" class="text-xs text-green-600 mt-1">✓ 已配置</p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label class="block text-sm font-medium text-gray-700 mb-1">模型</label>
|
||||||
|
<select x-model="cfg.openai_model" class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500">
|
||||||
|
<optgroup label="─── GPT-4.1 系列(2025)───">
|
||||||
|
<option value="gpt-4.1">gpt-4.1 ★ 推荐(旗舰,1M 上下文)</option>
|
||||||
|
<option value="gpt-4.1-mini">gpt-4.1-mini(快速均衡)</option>
|
||||||
|
<option value="gpt-4.1-nano">gpt-4.1-nano(最轻量低价)</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── o 推理系列(深度推理,适合复杂标书)───">
|
||||||
|
<option value="o4-mini">o4-mini(推理,高性价比)</option>
|
||||||
|
<option value="o3">o3(最强推理,较慢)</option>
|
||||||
|
<option value="o3-mini">o3-mini(快速推理)</option>
|
||||||
|
<option value="o1">o1(深度推理)</option>
|
||||||
|
<option value="o1-mini">o1-mini(推理入门)</option>
|
||||||
|
<option value="o1-pro">o1-pro(最高质量推理)</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── GPT-4o 系列 ───">
|
||||||
|
<option value="gpt-4o">gpt-4o</option>
|
||||||
|
<option value="gpt-4o-mini">gpt-4o-mini</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── 旧版 ───">
|
||||||
|
<option value="gpt-4-turbo">gpt-4-turbo</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── 自定义 ───">
|
||||||
|
<option value="">手动输入模型名</option>
|
||||||
|
</optgroup>
|
||||||
|
</select>
|
||||||
|
<div x-show="!openaiPresets.includes(cfg.openai_model)" class="mt-2">
|
||||||
|
<input type="text" x-model="cfg.openai_model" placeholder="输入模型名,如 gpt-5、gpt-4.1-2025-04-14"
|
||||||
|
class="w-full px-3 py-2 border border-blue-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 font-mono">
|
||||||
|
<p class="text-xs text-gray-400 mt-1">输入 OpenAI 平台支持的任意模型名</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<!-- 自定义 API 地址 -->
|
||||||
|
<div>
|
||||||
|
<button type="button" @click="cfg._oai_adv = !cfg._oai_adv"
|
||||||
|
class="text-xs text-gray-400 hover:text-gray-600 flex items-center gap-1">
|
||||||
|
<span x-text="cfg._oai_adv ? '▾' : '▸'"></span>
|
||||||
|
高级:自定义 API 地址(Azure / 代理 / 中转)
|
||||||
|
</button>
|
||||||
|
<div x-show="cfg._oai_adv" class="mt-2">
|
||||||
|
<input type="text" x-model="cfg.openai_base_url"
|
||||||
|
placeholder="https://api.openai.com/v1"
|
||||||
|
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 font-mono">
|
||||||
|
<p class="text-xs text-gray-400 mt-1">默认:https://api.openai.com/v1 | Azure 示例:https://YOUR.openai.azure.com/openai/deployments/MODEL</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<template x-if="cfg.model_provider==='ollama'">
|
||||||
|
<div class="space-y-3 mb-4">
|
||||||
|
<!-- 状态检测 -->
|
||||||
|
<div class="flex items-center justify-between p-3 bg-green-50 rounded-lg border border-green-200">
|
||||||
|
<div class="flex items-center gap-2 text-xs text-green-700">
|
||||||
|
<svg class="w-4 h-4 flex-shrink-0" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z"/>
|
||||||
|
</svg>
|
||||||
|
<span>本地运行,数据不上传云端,完全免费</span>
|
||||||
|
</div>
|
||||||
|
<button type="button" @click="testOllama()"
|
||||||
|
class="text-xs px-2 py-1 bg-green-600 hover:bg-green-700 text-white rounded-lg transition flex-shrink-0">
|
||||||
|
检测连接
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Ollama 服务地址 -->
|
||||||
|
<div>
|
||||||
|
<label class="block text-sm font-medium text-gray-700 mb-1">
|
||||||
|
Ollama 服务地址
|
||||||
|
<span class="ml-1 text-xs text-gray-400 font-normal">(默认本机)</span>
|
||||||
|
</label>
|
||||||
|
<input type="text" x-model="cfg.ollama_base_url" placeholder="http://localhost:11434/v1"
|
||||||
|
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-green-500 font-mono">
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- 模型选择 -->
|
||||||
|
<div>
|
||||||
|
<label class="block text-sm font-medium text-gray-700 mb-1">选择模型</label>
|
||||||
|
<select x-model="cfg.ollama_model"
|
||||||
|
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-green-500">
|
||||||
|
<optgroup label="★ 推荐:标书写作首选">
|
||||||
|
<option value="qwen3:8b">qwen3:8b ★ 推荐入门(约 5 GB)</option>
|
||||||
|
<option value="qwen3:14b">qwen3:14b ★ 推荐均衡(约 9 GB)</option>
|
||||||
|
<option value="qwen3:32b">qwen3:32b ★ 推荐高质量(约 20 GB)</option>
|
||||||
|
<option value="deepseek-r1:14b">deepseek-r1:14b ★ 推荐推理(约 9 GB)</option>
|
||||||
|
<option value="deepseek-r1:32b">deepseek-r1:32b ★ 推荐高质量推理(约 20 GB)</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── Qwen3 系列(阿里,2025最新)───">
|
||||||
|
<option value="qwen3:0.6b">qwen3:0.6b(最轻量,约 0.5 GB)</option>
|
||||||
|
<option value="qwen3:1.7b">qwen3:1.7b(约 1 GB)</option>
|
||||||
|
<option value="qwen3:4b">qwen3:4b(约 2.5 GB)</option>
|
||||||
|
<option value="qwen3:8b">qwen3:8b(约 5 GB)</option>
|
||||||
|
<option value="qwen3:14b">qwen3:14b(约 9 GB)</option>
|
||||||
|
<option value="qwen3:32b">qwen3:32b(约 20 GB)</option>
|
||||||
|
<option value="qwen3:30b-a3b">qwen3:30b-a3b(MoE 高效,约 19 GB)</option>
|
||||||
|
<option value="qwen3:235b-a22b">qwen3:235b-a22b(MoE 旗舰,约 142 GB)</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── Qwen2.5 系列(阿里)───">
|
||||||
|
<option value="qwen2.5:0.5b">qwen2.5:0.5b(约 0.4 GB)</option>
|
||||||
|
<option value="qwen2.5:1.5b">qwen2.5:1.5b(约 1 GB)</option>
|
||||||
|
<option value="qwen2.5:3b">qwen2.5:3b(约 2 GB)</option>
|
||||||
|
<option value="qwen2.5:7b">qwen2.5:7b(约 4.7 GB)</option>
|
||||||
|
<option value="qwen2.5:14b">qwen2.5:14b(约 9 GB)</option>
|
||||||
|
<option value="qwen2.5:32b">qwen2.5:32b(约 20 GB)</option>
|
||||||
|
<option value="qwen2.5:72b">qwen2.5:72b(约 47 GB)</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── Qwen2.5-Coder 系列(代码增强)───">
|
||||||
|
<option value="qwen2.5-coder:1.5b">qwen2.5-coder:1.5b(约 1 GB)</option>
|
||||||
|
<option value="qwen2.5-coder:3b">qwen2.5-coder:3b(约 2 GB)</option>
|
||||||
|
<option value="qwen2.5-coder:7b">qwen2.5-coder:7b(约 4.7 GB)</option>
|
||||||
|
<option value="qwen2.5-coder:14b">qwen2.5-coder:14b(约 9 GB)</option>
|
||||||
|
<option value="qwen2.5-coder:32b">qwen2.5-coder:32b(约 20 GB)</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── QwQ 系列(阿里深度推理)───">
|
||||||
|
<option value="qwq:32b">qwq:32b(深度推理,约 20 GB)</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── DeepSeek R1 系列(推理增强)───">
|
||||||
|
<option value="deepseek-r1:1.5b">deepseek-r1:1.5b(约 1 GB)</option>
|
||||||
|
<option value="deepseek-r1:7b">deepseek-r1:7b(约 4.7 GB)</option>
|
||||||
|
<option value="deepseek-r1:8b">deepseek-r1:8b(约 5 GB)</option>
|
||||||
|
<option value="deepseek-r1:14b">deepseek-r1:14b(约 9 GB)</option>
|
||||||
|
<option value="deepseek-r1:32b">deepseek-r1:32b(约 20 GB)</option>
|
||||||
|
<option value="deepseek-r1:70b">deepseek-r1:70b(约 43 GB)</option>
|
||||||
|
<option value="deepseek-r1:671b">deepseek-r1:671b(原版,需超大显存)</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── DeepSeek V2 系列 ───">
|
||||||
|
<option value="deepseek-v2:16b">deepseek-v2:16b(Lite,约 10 GB)</option>
|
||||||
|
<option value="deepseek-v2:236b">deepseek-v2:236b(全量,约 150 GB)</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── DeepSeek V3 系列 ───">
|
||||||
|
<option value="deepseek-v3:7b">deepseek-v3:7b(约 4.7 GB)</option>
|
||||||
|
<option value="deepseek-v3:671b">deepseek-v3:671b(完整版,需超大显存)</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="─── 自定义 ───">
|
||||||
|
<option value="">手动输入模型名</option>
|
||||||
|
</optgroup>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- 自定义模型名(选中"手动输入"或填入了预设外的值时显示) -->
|
||||||
|
<div x-show="!ollamaPresets.includes(cfg.ollama_model)">
|
||||||
|
<input type="text" x-model="cfg.ollama_model" placeholder="例如:qwen3:latest 或 deepseek-r1:latest"
|
||||||
|
class="w-full px-3 py-2 border border-green-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-green-500 font-mono">
|
||||||
|
<p class="text-xs text-gray-400 mt-1">请输入已通过 <code class="bg-gray-100 px-1 rounded">ollama pull <模型名></code> 下载的模型名</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="flex items-start gap-2 p-3 bg-amber-50 rounded-lg border border-amber-200 text-xs text-amber-700">
|
||||||
|
<svg class="w-4 h-4 flex-shrink-0 mt-0.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"/>
|
||||||
|
</svg>
|
||||||
|
<span>使用前请先安装 <a href="https://ollama.com/" target="_blank" class="underline">Ollama</a> 并下载模型,例如:<br>
|
||||||
|
<code class="bg-amber-100 px-1 rounded">ollama pull qwen3:14b</code>(Qwen3 推荐)
|
||||||
|
<code class="bg-amber-100 px-1 rounded">ollama pull deepseek-r1:14b</code>(DeepSeek R1 推荐)<br>
|
||||||
|
Ollama 本地不支持知识库 Embedding,该功能将自动回退到本地模型。推理模型(R1/QwQ)可能输出 <code class="bg-amber-100 px-1 rounded"><think></code> 标签,不影响正文使用。</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<!-- 标书篇幅设置 -->
|
||||||
|
<div class="mb-4 pt-3 border-t border-gray-100">
|
||||||
|
<p class="text-xs text-gray-500 mb-3 p-2 bg-slate-50 rounded-lg">
|
||||||
|
全稿「目标总页数」在<strong>已打开的标书项目</strong>中,到 <strong>步骤1「解析」</strong> 里设置,与下方「每节字数档」是两项不同设置。
|
||||||
|
</p>
|
||||||
|
<label class="block text-sm font-medium text-gray-700 mb-2">
|
||||||
|
标书篇幅预期
|
||||||
|
<span class="ml-1 text-xs text-gray-400 font-normal">(控制每个章节生成内容的字数)</span>
|
||||||
|
</label>
|
||||||
|
<div class="grid grid-cols-2 gap-2">
|
||||||
|
<label class="flex items-center gap-2.5 p-3 border-2 rounded-xl cursor-pointer transition"
|
||||||
|
:class="cfg.content_volume==='concise' ? 'border-blue-500 bg-blue-50' : 'border-gray-200 hover:border-gray-300'">
|
||||||
|
<input type="radio" name="volume" value="concise" x-model="cfg.content_volume" class="accent-blue-600">
|
||||||
|
<div>
|
||||||
|
<p class="font-medium text-sm leading-tight">精简版</p>
|
||||||
|
<p class="text-xs text-gray-400">每节约 1200 字</p>
|
||||||
|
</div>
|
||||||
|
</label>
|
||||||
|
<label class="flex items-center gap-2.5 p-3 border-2 rounded-xl cursor-pointer transition"
|
||||||
|
:class="cfg.content_volume==='standard' ? 'border-blue-500 bg-blue-50' : 'border-gray-200 hover:border-gray-300'">
|
||||||
|
<input type="radio" name="volume" value="standard" x-model="cfg.content_volume" class="accent-blue-600">
|
||||||
|
<div>
|
||||||
|
<p class="font-medium text-sm leading-tight">标准版(推荐)</p>
|
||||||
|
<p class="text-xs text-gray-400">每节约 2000 字</p>
|
||||||
|
</div>
|
||||||
|
</label>
|
||||||
|
<label class="flex items-center gap-2.5 p-3 border-2 rounded-xl cursor-pointer transition"
|
||||||
|
:class="cfg.content_volume==='detailed' ? 'border-blue-500 bg-blue-50' : 'border-gray-200 hover:border-gray-300'">
|
||||||
|
<input type="radio" name="volume" value="detailed" x-model="cfg.content_volume" class="accent-blue-600">
|
||||||
|
<div>
|
||||||
|
<p class="font-medium text-sm leading-tight">详细版</p>
|
||||||
|
<p class="text-xs text-gray-400">每节约 3000 字</p>
|
||||||
|
</div>
|
||||||
|
</label>
|
||||||
|
<label class="flex items-center gap-2.5 p-3 border-2 rounded-xl cursor-pointer transition"
|
||||||
|
:class="cfg.content_volume==='full' ? 'border-blue-500 bg-blue-50' : 'border-gray-200 hover:border-gray-300'">
|
||||||
|
<input type="radio" name="volume" value="full" x-model="cfg.content_volume" class="accent-blue-600">
|
||||||
|
<div>
|
||||||
|
<p class="font-medium text-sm leading-tight">充实版</p>
|
||||||
|
<p class="text-xs text-gray-400">每节约 4000 字</p>
|
||||||
|
</div>
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- 并发生成设置 -->
|
||||||
|
<div class="mb-4 pt-3 border-t border-gray-100">
|
||||||
|
<label class="block text-sm font-medium text-gray-700 mb-2">
|
||||||
|
并发生成章节数
|
||||||
|
<span class="ml-1 text-xs text-gray-400 font-normal">(同时调用 AI 的线程数,越大越快但需注意 API 限流)</span>
|
||||||
|
</label>
|
||||||
|
<div class="flex items-center gap-3">
|
||||||
|
<input type="range" min="1" max="10" step="1" x-model.number="cfg.max_concurrent"
|
||||||
|
class="flex-1 accent-blue-600">
|
||||||
|
<span class="w-12 text-center text-sm font-bold text-blue-600 bg-blue-50 rounded-lg py-1"
|
||||||
|
x-text="cfg.max_concurrent + ' 路'"></span>
|
||||||
|
</div>
|
||||||
|
<div class="flex justify-between text-xs text-gray-400 mt-1 px-0.5">
|
||||||
|
<span>保守(1路)</span>
|
||||||
|
<span>推荐(3-5路)</span>
|
||||||
|
<span>激进(10路)</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="flex gap-3 mt-2">
|
||||||
|
<button @click="showConfig=false" class="flex-1 px-4 py-2 border border-gray-200 text-gray-600 rounded-xl text-sm hover:bg-gray-50 transition">取消</button>
|
||||||
|
<button @click="saveConfig()" class="flex-1 px-4 py-2 bg-blue-600 hover:bg-blue-700 text-white rounded-xl text-sm font-medium transition">保存配置</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
function app() {
|
||||||
|
return {
|
||||||
|
projects: [],
|
||||||
|
loading: true,
|
||||||
|
showCreate: false,
|
||||||
|
showConfig: false,
|
||||||
|
newProjectName: '',
|
||||||
|
creating: false,
|
||||||
|
cfg: {
|
||||||
|
model_provider: 'qwen',
|
||||||
|
target_pages: 0,
|
||||||
|
qwen_api_key: '', qwen_model: 'qwen3.6-plus', qwen_base_url: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
|
||||||
|
openai_api_key: '', openai_model: 'gpt-4.1', openai_base_url: 'https://api.openai.com/v1',
|
||||||
|
deepseek_api_key: '', deepseek_model: 'deepseek-chat', deepseek_base_url: 'https://api.deepseek.com/v1',
|
||||||
|
ollama_base_url: 'http://localhost:11434/v1', ollama_model: 'qwen3:8b',
|
||||||
|
doubao_api_key: '', doubao_model: 'doubao-1-5-pro-32k', doubao_base_url: 'https://ark.cn-beijing.volces.com/api/v3',
|
||||||
|
kimi_api_key: '', kimi_model: 'moonshot-v1-32k', kimi_base_url: 'https://api.moonshot.cn/v1',
|
||||||
|
max_concurrent: 5, content_volume: 'standard',
|
||||||
|
_qwen_adv: false, _ds_adv: false, _oai_adv: false, _doubao_adv: false, _kimi_adv: false,
|
||||||
|
},
|
||||||
|
qwenPresets: [
|
||||||
|
'qwen3.6-plus',
|
||||||
|
'qwen-max','qwen-max-latest','qwen-plus','qwen-plus-latest',
|
||||||
|
'qwen-turbo','qwen-turbo-latest','qwen-long',
|
||||||
|
'qwen3-235b-a22b','qwen3-32b','qwen3-30b-a3b','qwen3-14b','qwen3-8b',
|
||||||
|
],
|
||||||
|
deepseekPresets: ['deepseek-chat','deepseek-reasoner'],
|
||||||
|
openaiPresets: [
|
||||||
|
'gpt-4.1','gpt-4.1-mini','gpt-4.1-nano',
|
||||||
|
'o4-mini','o3','o3-mini','o1','o1-mini','o1-pro',
|
||||||
|
'gpt-4o','gpt-4o-mini','gpt-4-turbo',
|
||||||
|
],
|
||||||
|
doubaoPresets: [
|
||||||
|
'doubao-1-5-pro-32k', 'doubao-1-5-pro-128k', 'doubao-1-5-lite-32k',
|
||||||
|
'doubao-pro-32k', 'doubao-pro-128k', 'doubao-pro-256k',
|
||||||
|
'doubao-lite-32k', 'doubao-lite-128k',
|
||||||
|
],
|
||||||
|
kimiPresets: [
|
||||||
|
'moonshot-v1-8k', 'moonshot-v1-32k', 'moonshot-v1-128k', 'moonshot-v1-auto',
|
||||||
|
],
|
||||||
|
ollamaPresets: [
|
||||||
|
// 推荐
|
||||||
|
'qwen3:8b','qwen3:14b','qwen3:32b','deepseek-r1:14b','deepseek-r1:32b',
|
||||||
|
// Qwen3
|
||||||
|
'qwen3:0.6b','qwen3:1.7b','qwen3:4b','qwen3:30b-a3b','qwen3:235b-a22b',
|
||||||
|
// Qwen2.5
|
||||||
|
'qwen2.5:0.5b','qwen2.5:1.5b','qwen2.5:3b','qwen2.5:7b','qwen2.5:14b','qwen2.5:32b','qwen2.5:72b',
|
||||||
|
// Qwen2.5-Coder
|
||||||
|
'qwen2.5-coder:1.5b','qwen2.5-coder:3b','qwen2.5-coder:7b','qwen2.5-coder:14b','qwen2.5-coder:32b',
|
||||||
|
// QwQ
|
||||||
|
'qwq:32b',
|
||||||
|
// DeepSeek R1
|
||||||
|
'deepseek-r1:1.5b','deepseek-r1:7b','deepseek-r1:8b','deepseek-r1:70b','deepseek-r1:671b',
|
||||||
|
// DeepSeek V2
|
||||||
|
'deepseek-v2:16b','deepseek-v2:236b',
|
||||||
|
// DeepSeek V3
|
||||||
|
'deepseek-v3:7b','deepseek-v3:671b',
|
||||||
|
],
|
||||||
|
|
||||||
|
async init() {
|
||||||
|
await Promise.all([this.loadProjects(), this.loadConfig()])
|
||||||
|
this.loading = false
|
||||||
|
},
|
||||||
|
|
||||||
|
async loadProjects() {
|
||||||
|
const res = await fetch('/api/projects')
|
||||||
|
const data = await res.json()
|
||||||
|
this.projects = data.projects || []
|
||||||
|
},
|
||||||
|
|
||||||
|
async loadConfig() {
|
||||||
|
const res = await fetch('/api/config')
|
||||||
|
const data = await res.json()
|
||||||
|
this.cfg = { ...this.cfg, ...data }
|
||||||
|
},
|
||||||
|
|
||||||
|
async createProject() {
|
||||||
|
if (!this.newProjectName.trim()) return
|
||||||
|
this.creating = true
|
||||||
|
const res = await fetch('/api/projects', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ name: this.newProjectName.trim() })
|
||||||
|
})
|
||||||
|
const data = await res.json()
|
||||||
|
this.creating = false
|
||||||
|
this.showCreate = false
|
||||||
|
this.newProjectName = ''
|
||||||
|
if (data.id) {
|
||||||
|
window.location = '/project/' + data.id
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
async deleteProject(id, name) {
|
||||||
|
if (!confirm(`确定要删除项目"${name}"吗?此操作不可恢复。`)) return
|
||||||
|
await fetch('/api/projects/' + id, { method: 'DELETE' })
|
||||||
|
this.projects = this.projects.filter(p => p.id !== id)
|
||||||
|
},
|
||||||
|
|
||||||
|
async saveConfig() {
|
||||||
|
await fetch('/api/config', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify(this.cfg)
|
||||||
|
})
|
||||||
|
this.showConfig = false
|
||||||
|
alert('配置已保存')
|
||||||
|
},
|
||||||
|
|
||||||
|
async testOllama() {
|
||||||
|
const baseUrl = (this.cfg.ollama_base_url || 'http://localhost:11434/v1').replace(/\/v1\/?$/, '')
|
||||||
|
try {
|
||||||
|
const res = await fetch(baseUrl + '/api/tags', { signal: AbortSignal.timeout(3000) })
|
||||||
|
if (res.ok) {
|
||||||
|
const data = await res.json()
|
||||||
|
const models = (data.models || []).map(m => m.name).join('、') || '(暂无已下载模型)'
|
||||||
|
alert('✅ Ollama 连接成功!\n已下载模型:' + models)
|
||||||
|
} else {
|
||||||
|
alert('⚠️ Ollama 已启动,但返回状态异常:' + res.status)
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
alert('❌ 无法连接到 Ollama(' + (this.cfg.ollama_base_url || 'http://localhost:11434/v1') + ')\n\n请确认:\n1. 已安装 Ollama(https://ollama.com)\n2. Ollama 服务正在运行\n3. 服务地址填写正确')
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
formatDate(dt) {
|
||||||
|
if (!dt) return ''
|
||||||
|
const d = new Date(dt)
|
||||||
|
return `${d.getFullYear()}-${String(d.getMonth()+1).padStart(2,'0')}-${String(d.getDate()).padStart(2,'0')}`
|
||||||
|
},
|
||||||
|
|
||||||
|
statusBadge(status) {
|
||||||
|
const map = {
|
||||||
|
'none': { text: '未上传', cls: 'bg-gray-100 text-gray-500' },
|
||||||
|
'uploaded': { text: '待解析', cls: 'bg-yellow-100 text-yellow-700' },
|
||||||
|
'parsing': { text: '解析中', cls: 'bg-blue-100 text-blue-700' },
|
||||||
|
'done': { text: '已解析', cls: 'bg-green-100 text-green-700' },
|
||||||
|
'error': { text: '解析失败', cls: 'bg-red-100 text-red-600' },
|
||||||
|
}
|
||||||
|
return map[status] || map['none']
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<!-- 页脚版权声明 -->
|
||||||
|
<footer class="mt-auto py-4 px-6 border-t border-gray-200 bg-white text-center text-xs text-gray-500 space-y-1">
|
||||||
|
<p class="font-medium text-gray-600">© 标书老崔</p>
|
||||||
|
<p>本工具仅限学习交流免费使用,生成的技术方案请人工核对。本工具不会在任何平台售卖,请注意甄别。</p>
|
||||||
|
</footer>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
2356
templates/project.html
Normal file
2356
templates/project.html
Normal file
File diff suppressed because it is too large
Load Diff
13
tests/fixtures/dark_bid_report_sample.json
vendored
Normal file
13
tests/fixtures/dark_bid_report_sample.json
vendored
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
{
|
||||||
|
"overall": false,
|
||||||
|
"details": [
|
||||||
|
{"rule": "身份信息隐藏", "passed": true, "message": "未发现投标人身份信息"},
|
||||||
|
{"rule": "标题格式", "passed": false, "message": "部分标题字号/字体/颜色/下划线不符合要求"},
|
||||||
|
{"rule": "正文格式", "passed": false, "message": "部分正文段落格式不符合要求"},
|
||||||
|
{"rule": "目录要求", "passed": true, "message": "目录符合无页码、无页眉页脚要求"},
|
||||||
|
{"rule": "图表规范", "passed": false, "message": "正文中发现2个图表或附件内图表文字格式错误"},
|
||||||
|
{"rule": "颜色与装饰", "passed": true, "message": "无彩色文字、无下划线、无着重号"},
|
||||||
|
{"rule": "页面设置", "passed": false, "message": "页面边距或纸张方向不符合要求"}
|
||||||
|
],
|
||||||
|
"violations": []
|
||||||
|
}
|
||||||
95
tests/test_attachment_section.py
Normal file
95
tests/test_attachment_section.py
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
"""附件类章节识别与单图/单表类型选择。"""
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from utils import attachment_section as att
|
||||||
|
|
||||||
|
|
||||||
|
class TestIsAttachment(unittest.TestCase):
|
||||||
|
def test_positive(self):
|
||||||
|
r = att.load_attachment_rules()
|
||||||
|
self.assertTrue(att.is_attachment_only_section('附件一:施工平面布置', r))
|
||||||
|
self.assertTrue(att.is_attachment_only_section('附图 组织机构', r))
|
||||||
|
self.assertTrue(att.is_attachment_only_section('附表 人员一览', r))
|
||||||
|
|
||||||
|
def test_negative(self):
|
||||||
|
r = att.load_attachment_rules()
|
||||||
|
self.assertFalse(att.is_attachment_only_section('施工组织设计', r))
|
||||||
|
self.assertFalse(att.is_attachment_only_section('', r))
|
||||||
|
|
||||||
|
|
||||||
|
class TestPickKind(unittest.TestCase):
|
||||||
|
def test_only_figure_switch(self):
|
||||||
|
r = att.DEFAULT_ATTACHMENT_RULES
|
||||||
|
self.assertEqual(
|
||||||
|
att.pick_single_figure_or_table('附件一:xxx', True, False, r),
|
||||||
|
'figure',
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_only_table_switch(self):
|
||||||
|
r = att.DEFAULT_ATTACHMENT_RULES
|
||||||
|
self.assertEqual(
|
||||||
|
att.pick_single_figure_or_table('附件一:xxx', False, True, r),
|
||||||
|
'table',
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_both_off(self):
|
||||||
|
self.assertIsNone(
|
||||||
|
att.pick_single_figure_or_table('附件一', False, False, None),
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_table_hint(self):
|
||||||
|
r = att.DEFAULT_ATTACHMENT_RULES
|
||||||
|
k = att.pick_single_figure_or_table('附件三 工程量一览表', True, True, r)
|
||||||
|
self.assertEqual(k, 'table')
|
||||||
|
|
||||||
|
def test_figure_hint(self):
|
||||||
|
r = att.DEFAULT_ATTACHMENT_RULES
|
||||||
|
k = att.pick_single_figure_or_table('附图 施工平面示意图', True, True, r)
|
||||||
|
self.assertEqual(k, 'figure')
|
||||||
|
|
||||||
|
def test_default_ambiguous(self):
|
||||||
|
r = dict(att.DEFAULT_ATTACHMENT_RULES)
|
||||||
|
r['default_kind_when_ambiguous'] = 'table'
|
||||||
|
k = att.pick_single_figure_or_table('附件五 其他资料', True, True, r)
|
||||||
|
self.assertEqual(k, 'table')
|
||||||
|
|
||||||
|
|
||||||
|
class TestAttachmentBodyMode(unittest.TestCase):
|
||||||
|
def test_default_stack_charts_only(self):
|
||||||
|
r = att.DEFAULT_ATTACHMENT_RULES
|
||||||
|
self.assertEqual(att.attachment_leaf_body_mode(r), 'stack_charts_only')
|
||||||
|
self.assertTrue(att.use_attachment_stack_charts_body(r))
|
||||||
|
self.assertFalse(att.use_attachment_single_chart_only_body(r))
|
||||||
|
self.assertFalse(att.use_attachment_full_body(r))
|
||||||
|
|
||||||
|
def test_full_mode(self):
|
||||||
|
r = dict(att.DEFAULT_ATTACHMENT_RULES)
|
||||||
|
r['attachment_leaf_body_mode'] = 'full'
|
||||||
|
self.assertEqual(att.attachment_leaf_body_mode(r), 'full')
|
||||||
|
self.assertTrue(att.use_attachment_full_body(r))
|
||||||
|
self.assertFalse(att.use_attachment_stack_charts_body(r))
|
||||||
|
|
||||||
|
def test_single_chart_only(self):
|
||||||
|
r = dict(att.DEFAULT_ATTACHMENT_RULES)
|
||||||
|
r['attachment_leaf_body_mode'] = 'single_chart_only'
|
||||||
|
self.assertTrue(att.use_attachment_single_chart_only_body(r))
|
||||||
|
self.assertTrue(att.use_attachment_stack_charts_body(r))
|
||||||
|
|
||||||
|
|
||||||
|
class TestExpandOutlineSkip(unittest.TestCase):
|
||||||
|
def test_should_skip_attachment(self):
|
||||||
|
self.assertTrue(att.should_skip_expand_subchapters('附件一:平面图'))
|
||||||
|
self.assertTrue(att.should_skip_expand_subchapters('附图 示意'))
|
||||||
|
|
||||||
|
def test_should_skip_normal_chapter(self):
|
||||||
|
self.assertFalse(att.should_skip_expand_subchapters('施工组织设计'))
|
||||||
|
self.assertFalse(att.should_skip_expand_subchapters('质量管理体系与措施'))
|
||||||
|
|
||||||
|
def test_parse_attachment_label(self):
|
||||||
|
self.assertEqual(att.parse_attachment_label('附件一:平面图'), '一')
|
||||||
|
self.assertEqual(att.parse_attachment_label('附件2 承诺书'), '2')
|
||||||
|
self.assertEqual(att.parse_attachment_label('附图 总平面'), '附图')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
||||||
52
tests/test_bill_analysis.py
Normal file
52
tests/test_bill_analysis.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
"""工程量清单本地分析单元测试。"""
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from utils.bill_analysis import (
|
||||||
|
analyze_boq_pages,
|
||||||
|
filter_bill_pages,
|
||||||
|
parse_bill_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseBillText(unittest.TestCase):
|
||||||
|
def test_code_name_unit_qty(self):
|
||||||
|
text = '010101001001 挖土方 m3 100.5 土壤类别:三类土'
|
||||||
|
r = parse_bill_text(text)
|
||||||
|
self.assertIn('categories', r)
|
||||||
|
self.assertTrue(r['categories'])
|
||||||
|
cat = r['categories'][0]
|
||||||
|
self.assertEqual(cat['name'], '未分类')
|
||||||
|
self.assertEqual(len(cat['items']), 1)
|
||||||
|
it = cat['items'][0]
|
||||||
|
self.assertEqual(it['code'], '010101001001')
|
||||||
|
self.assertIn('挖土', it['name'])
|
||||||
|
self.assertEqual(it['unit'], 'm3')
|
||||||
|
self.assertEqual(it['quantity'], '100.5')
|
||||||
|
|
||||||
|
def test_hierarchical_line_prefix(self):
|
||||||
|
text = '1.1 010101001001 基础开挖 m3 50'
|
||||||
|
r = parse_bill_text(text)
|
||||||
|
it = r['categories'][0]['items'][0]
|
||||||
|
self.assertEqual(it['code'], '010101001001')
|
||||||
|
|
||||||
|
|
||||||
|
class TestFilterBillPages(unittest.TestCase):
|
||||||
|
def test_two_pages_gap_fill(self):
|
||||||
|
p0 = '目录 前言'
|
||||||
|
p1 = '分部分项工程量清单\n项目编码 项目名称 工程量\n010101001001 项 m3 1'
|
||||||
|
p2 = '续表无表头\n010101002001 土 m2 2'
|
||||||
|
p3 = '规费 税金 社会保险费 住房公积金 其他说明'
|
||||||
|
pages, meta = filter_bill_pages([p0, p1, p2, p3])
|
||||||
|
self.assertEqual(meta['total_pages'], 4)
|
||||||
|
self.assertGreaterEqual(len(pages), 2)
|
||||||
|
merged = '\n'.join(pages)
|
||||||
|
self.assertIn('010101001001', merged)
|
||||||
|
self.assertIn('010101002001', merged)
|
||||||
|
|
||||||
|
def test_analyze_scanned_empty(self):
|
||||||
|
r = analyze_boq_pages(['', ' ', ''])
|
||||||
|
self.assertTrue(r.get('scanned'))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
||||||
63
tests/test_dark_bid_format_check.py
Normal file
63
tests/test_dark_bid_format_check.py
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
"""技术暗标 HTML 格式检查:结构校验与极简用例(标准库 unittest)。"""
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
# 保证可 `python tests/test_*.py` 从项目根导入 `modules`
|
||||||
|
_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
if _ROOT not in sys.path:
|
||||||
|
sys.path.insert(0, _ROOT)
|
||||||
|
|
||||||
|
from modules.dark_bid_format_check import check_technical_bid # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
|
def _sample_schema_path():
|
||||||
|
return os.path.join(os.path.dirname(__file__), "fixtures", "dark_bid_report_sample.json")
|
||||||
|
|
||||||
|
|
||||||
|
class TestDarkBidFormatCheck(unittest.TestCase):
|
||||||
|
def test_sample_fixture_keys(self):
|
||||||
|
with open(_sample_schema_path(), encoding="utf-8") as f:
|
||||||
|
sample = json.load(f)
|
||||||
|
self.assertIn("overall", sample)
|
||||||
|
self.assertIn("details", sample)
|
||||||
|
self.assertIn("violations", sample)
|
||||||
|
for d in sample["details"]:
|
||||||
|
self.assertTrue({"rule", "passed", "message"}.issubset(d.keys()))
|
||||||
|
|
||||||
|
def test_check_returns_structure(self):
|
||||||
|
html = """<!DOCTYPE html><html><head><style>
|
||||||
|
@page { margin: 2.54cm 3.18cm 2.54cm 3.18cm; size: A4; }
|
||||||
|
</style></head><body style="margin:2.54cm 3.18cm">
|
||||||
|
<div class="toc">第一章 概述</div>
|
||||||
|
<h2 style="font-size:16pt;font-family:SimHei;font-weight:bold;color:#000">标题</h2>
|
||||||
|
<p style="font-size:14pt;font-family:SimSun;line-height:26pt;text-indent:2em;color:#000">
|
||||||
|
正文内容示例。</p>
|
||||||
|
</body></html>"""
|
||||||
|
r = check_technical_bid(html)
|
||||||
|
self.assertIsInstance(r["overall"], bool)
|
||||||
|
self.assertEqual(len(r["details"]), 7)
|
||||||
|
rules = [x["rule"] for x in r["details"]]
|
||||||
|
self.assertIn("身份信息隐藏", rules)
|
||||||
|
self.assertIn("标题格式", rules)
|
||||||
|
|
||||||
|
def test_empty_html(self):
|
||||||
|
r = check_technical_bid("")
|
||||||
|
self.assertFalse(r["overall"])
|
||||||
|
|
||||||
|
def test_identity_fail_on_company(self):
|
||||||
|
html = (
|
||||||
|
"<html><body><p style='font-size:14pt;font-family:SimSun;"
|
||||||
|
"line-height:26pt;text-indent:2em;color:#000'>我公司参与投标</p>"
|
||||||
|
"<div class='toc'>x</div>"
|
||||||
|
"<style>@page{margin:2.54cm 3.18cm 2.54cm 3.18cm}</style>"
|
||||||
|
"</body></html>"
|
||||||
|
)
|
||||||
|
r = check_technical_bid(html)
|
||||||
|
id_rule = next(x for x in r["details"] if x["rule"] == "身份信息隐藏")
|
||||||
|
self.assertFalse(id_rule["passed"])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
121
tests/test_diagram_intent.py
Normal file
121
tests/test_diagram_intent.py
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
"""图表意图栈与特征计分。"""
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from utils import diagram_intent as di
|
||||||
|
|
||||||
|
|
||||||
|
def _base_rules() -> dict:
|
||||||
|
return {
|
||||||
|
'schema_version': 1,
|
||||||
|
'threshold_figure': 1.0,
|
||||||
|
'threshold_table': 1.0,
|
||||||
|
'title_weight': 1.0,
|
||||||
|
'context_weight': 0.6,
|
||||||
|
'outline_context_lines': {'before': 2, 'after': 2},
|
||||||
|
'stack_order_when_both': 'score_desc',
|
||||||
|
'figure_keywords': [
|
||||||
|
{'text': '进度', 'weight': 1.2},
|
||||||
|
{'text': '横道', 'weight': 1.5},
|
||||||
|
],
|
||||||
|
'table_keywords': [
|
||||||
|
{'text': '一览表', 'weight': 1.5},
|
||||||
|
{'text': '人员', 'weight': 1.0},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TestScoreFigureTable(unittest.TestCase):
|
||||||
|
def test_figure_higher_on_progress(self):
|
||||||
|
r = _base_rules()
|
||||||
|
f, t = di.score_figure_table('施工进度与横道计划', '', r)
|
||||||
|
self.assertGreater(f, t)
|
||||||
|
|
||||||
|
def test_table_higher_on_roster(self):
|
||||||
|
r = _base_rules()
|
||||||
|
f, t = di.score_figure_table('主要管理人员配置一览表', '', r)
|
||||||
|
self.assertGreater(t, f)
|
||||||
|
|
||||||
|
|
||||||
|
class TestBuildStack(unittest.TestCase):
|
||||||
|
def test_gate_figure_off(self):
|
||||||
|
r = _base_rules()
|
||||||
|
st = di.build_stack(5.0, 5.0, r, enable_figure=False, enable_table=True)
|
||||||
|
self.assertEqual(len(st), 1)
|
||||||
|
self.assertEqual(st[0].kind, 'table')
|
||||||
|
|
||||||
|
def test_score_desc_order(self):
|
||||||
|
r = dict(_base_rules())
|
||||||
|
r['stack_order_when_both'] = 'score_desc'
|
||||||
|
st = di.build_stack(3.0, 1.0, r, True, True)
|
||||||
|
self.assertEqual(len(st), 2)
|
||||||
|
self.assertEqual(st[0].kind, 'figure')
|
||||||
|
self.assertGreater(st[0].score, st[1].score)
|
||||||
|
|
||||||
|
def test_figure_first(self):
|
||||||
|
r = dict(_base_rules())
|
||||||
|
r['stack_order_when_both'] = 'figure_first'
|
||||||
|
st = di.build_stack(2.0, 5.0, r, True, True)
|
||||||
|
self.assertEqual(st[0].kind, 'figure')
|
||||||
|
self.assertEqual(st[1].kind, 'table')
|
||||||
|
|
||||||
|
def test_below_threshold_empty(self):
|
||||||
|
r = dict(_base_rules())
|
||||||
|
r['threshold_figure'] = 10.0
|
||||||
|
r['threshold_table'] = 10.0
|
||||||
|
st = di.build_stack(1.0, 1.0, r, True, True)
|
||||||
|
self.assertEqual(st, [])
|
||||||
|
|
||||||
|
|
||||||
|
class TestOutlineWindow(unittest.TestCase):
|
||||||
|
def test_finds_title_line(self):
|
||||||
|
outline = '一、总则\n二、进度\n 2.1 横道计划\n三、尾'
|
||||||
|
w = di.extract_outline_window(outline, '2.1 横道计划', 1, 1)
|
||||||
|
self.assertIn('横道', w)
|
||||||
|
|
||||||
|
def test_fallback_prefix(self):
|
||||||
|
w = di.extract_outline_window('abc' * 400, '不存在的标题', 2, 2)
|
||||||
|
self.assertTrue(len(w) > 0)
|
||||||
|
|
||||||
|
|
||||||
|
class TestAgentRender(unittest.TestCase):
|
||||||
|
def test_render_non_empty_when_match(self):
|
||||||
|
r = dict(_base_rules())
|
||||||
|
r['threshold_figure'] = 0.5
|
||||||
|
r['threshold_table'] = 0.5
|
||||||
|
agent = di.DiagramIntentAgent(r)
|
||||||
|
s = agent.render_for_section(
|
||||||
|
'施工进度横道图编制说明',
|
||||||
|
'大纲\n进度\n横道',
|
||||||
|
True,
|
||||||
|
True,
|
||||||
|
)
|
||||||
|
self.assertIn('图示生成规范', s)
|
||||||
|
self.assertIn('本节图表生成优先级', s)
|
||||||
|
|
||||||
|
def test_render_empty_when_scores_low(self):
|
||||||
|
r = dict(_base_rules())
|
||||||
|
r['threshold_figure'] = 100.0
|
||||||
|
r['threshold_table'] = 100.0
|
||||||
|
agent = di.DiagramIntentAgent(r)
|
||||||
|
s = agent.render_for_section('无关标题', '无关', True, True)
|
||||||
|
self.assertEqual(s, '')
|
||||||
|
|
||||||
|
|
||||||
|
class TestStackHelpers(unittest.TestCase):
|
||||||
|
def test_stack_compact_labels(self):
|
||||||
|
st = [
|
||||||
|
di.DiagramIntent('figure', 1.0, 't'),
|
||||||
|
di.DiagramIntent('table', 1.0, 't'),
|
||||||
|
]
|
||||||
|
lab = di.stack_compact_labels(st)
|
||||||
|
self.assertEqual(len(lab), 2)
|
||||||
|
self.assertIn('[FIGURE]', lab[0])
|
||||||
|
|
||||||
|
def test_make_fallback_stack(self):
|
||||||
|
st = di.make_fallback_stack('figure')
|
||||||
|
self.assertEqual(len(st), 1)
|
||||||
|
self.assertEqual(st[0].kind, 'figure')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
||||||
44
tests/test_outline_numbering.py
Normal file
44
tests/test_outline_numbering.py
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
"""目录号格式化与大纲带号写回。"""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
if _ROOT not in sys.path:
|
||||||
|
sys.path.insert(0, _ROOT)
|
||||||
|
|
||||||
|
from modules.generator import _parse_outline, _sections_to_outline_text # noqa: E402
|
||||||
|
from utils.outline_numbering import format_heading_display, int_to_chinese_numeral # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
|
class TestOutlineNumbering(unittest.TestCase):
|
||||||
|
def test_int_to_chinese(self):
|
||||||
|
self.assertEqual(int_to_chinese_numeral(1), "一")
|
||||||
|
self.assertEqual(int_to_chinese_numeral(10), "十")
|
||||||
|
self.assertEqual(int_to_chinese_numeral(11), "十一")
|
||||||
|
self.assertEqual(int_to_chinese_numeral(23), "二十三")
|
||||||
|
|
||||||
|
def test_format_heading(self):
|
||||||
|
self.assertEqual(format_heading_display(1, "3", "总体"), "三、总体")
|
||||||
|
self.assertEqual(format_heading_display(2, "1.2", "子节"), "1.2 子节")
|
||||||
|
|
||||||
|
def test_sections_to_outline_text_has_numbers(self):
|
||||||
|
sections = [
|
||||||
|
{"level": 1, "title": "第一章", "number": "1"},
|
||||||
|
{"level": 2, "title": "小节", "number": "1.1"},
|
||||||
|
]
|
||||||
|
text = _sections_to_outline_text("某项目技术标书", sections)
|
||||||
|
self.assertIn("某项目技术标书", text)
|
||||||
|
self.assertIn("一、第一章", text)
|
||||||
|
self.assertIn("1.1 小节", text)
|
||||||
|
|
||||||
|
def test_parse_roundtrip_numbered_outline(self):
|
||||||
|
raw = "标书标题\n一、第一章\n1.1 节A\n"
|
||||||
|
_, sections, normalized = _parse_outline(raw)
|
||||||
|
self.assertGreaterEqual(len(sections), 2)
|
||||||
|
self.assertIn("一、第一章", normalized)
|
||||||
|
self.assertIn("1.1 节A", normalized)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
24
tests/test_parse_outline.py
Normal file
24
tests/test_parse_outline.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
"""大纲解析:1.1 类编号不得被误拆成一级 1 与 title '.1 标题'。"""
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from modules.generator import _parse_outline
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseOutline(unittest.TestCase):
|
||||||
|
def test_11_stays_single_section(self):
|
||||||
|
text = "某某项目标书标题\n1.1 沟槽开挖与支护\n1.2 排降水\n"
|
||||||
|
_, sections, _ = _parse_outline(text)
|
||||||
|
self.assertEqual(len(sections), 2, [s.get('number') for s in sections])
|
||||||
|
for s in sections:
|
||||||
|
if s.get('level') == 1:
|
||||||
|
self.assertFalse(
|
||||||
|
(s.get('title') or '').lstrip().startswith('.'),
|
||||||
|
'不得出现一级章节 title 以 .1 开头(误将 1.1 拆成 1 与 .1 标题)',
|
||||||
|
)
|
||||||
|
titles = ' '.join(s['title'] for s in sections)
|
||||||
|
self.assertIn('沟槽', titles)
|
||||||
|
self.assertIn('排降', titles)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
||||||
86
tests/test_volume_chapters.py
Normal file
86
tests/test_volume_chapters.py
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
"""目标页数与一级篇章区间。"""
|
||||||
|
import random
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from utils import volume_chapters as vc
|
||||||
|
|
||||||
|
|
||||||
|
class TestVolumeChapters(unittest.TestCase):
|
||||||
|
def test_top_level_default_pages_zero(self):
|
||||||
|
lo, hi = vc.top_level_chapter_range_from_pages(0)
|
||||||
|
self.assertEqual((lo, hi), (8, 10))
|
||||||
|
|
||||||
|
def test_ranges_match_effective_volume_bands(self):
|
||||||
|
self.assertEqual(vc.top_level_chapter_range_from_pages(100), (6, 8))
|
||||||
|
self.assertEqual(vc.top_level_chapter_range_from_pages(125), (6, 8))
|
||||||
|
self.assertEqual(vc.top_level_chapter_range_from_pages(150), (8, 10))
|
||||||
|
self.assertEqual(vc.top_level_chapter_range_from_pages(200), (10, 12))
|
||||||
|
self.assertEqual(vc.top_level_chapter_range_from_pages(300), (12, 16))
|
||||||
|
|
||||||
|
def test_hint_default_no_pages(self):
|
||||||
|
h = vc.outline_chapter_count_hint(0, 'standard')
|
||||||
|
self.assertIn('8-10', h)
|
||||||
|
self.assertIn('不超过10', h)
|
||||||
|
|
||||||
|
def test_hint_with_pages(self):
|
||||||
|
h = vc.outline_chapter_count_hint(150, 'standard', 700)
|
||||||
|
self.assertIn('约 8–10', h)
|
||||||
|
self.assertIn('150', h)
|
||||||
|
self.assertIn('105000', h) # 150×700 总字目标
|
||||||
|
self.assertIn('过细', h)
|
||||||
|
|
||||||
|
def test_subchapter_base_anchor_points(self):
|
||||||
|
self.assertAlmostEqual(vc.subchapter_total_base_from_pages(100), 78.0, places=5)
|
||||||
|
self.assertAlmostEqual(vc.subchapter_total_base_from_pages(300), 212.0, places=5)
|
||||||
|
self.assertEqual(vc.SUBCHAPTER_PAGES_SLOPE, 0.67)
|
||||||
|
self.assertEqual(vc.SUBCHAPTER_PAGES_INTERCEPT, 11.0)
|
||||||
|
|
||||||
|
def test_subchapter_jitter_bounds_78_anchor(self):
|
||||||
|
"""100 页基线 78 章,±10% 严格为 [70, 86]。"""
|
||||||
|
self.assertEqual(vc.subchapter_jitter_bounds(78.0), (70, 86))
|
||||||
|
|
||||||
|
def test_subchapter_jitter_bounds_300_pages(self):
|
||||||
|
self.assertEqual(vc.subchapter_jitter_bounds(212.0), (191, 233))
|
||||||
|
|
||||||
|
def test_allocate_subchapters_to_mains(self):
|
||||||
|
self.assertEqual(vc.allocate_subchapters_to_mains(10, 3), [4, 3, 3])
|
||||||
|
self.assertEqual(vc.allocate_subchapters_to_mains(0, 3), [0, 0, 0])
|
||||||
|
self.assertEqual(vc.allocate_subchapters_to_mains(5, 2), [3, 2])
|
||||||
|
self.assertEqual(vc.allocate_subchapters_to_mains(7, 0), [])
|
||||||
|
# n < k 时多出的主章 quota 为 0
|
||||||
|
a = vc.allocate_subchapters_to_mains(70, 100)
|
||||||
|
self.assertEqual(len(a), 100)
|
||||||
|
self.assertEqual(sum(a), 70)
|
||||||
|
self.assertEqual(a.count(1), 70)
|
||||||
|
self.assertEqual(a.count(0), 30)
|
||||||
|
|
||||||
|
def test_subchapter_effective_respects_k_floor_and_jitter(self):
|
||||||
|
# round(78 * u) for u in [0.9, 1.1] stays in [70, 86] for 78.0 base
|
||||||
|
for seed in range(800):
|
||||||
|
n = vc.subchapter_total_effective(100, 1, random.Random(seed))
|
||||||
|
self.assertGreaterEqual(n, 70)
|
||||||
|
self.assertLessEqual(n, 86)
|
||||||
|
# 主章数很大时,总条数仍须在 [70, 86](不得被 max(n,k) 抬到数百)
|
||||||
|
for seed in range(20):
|
||||||
|
nk = vc.subchapter_total_effective(100, 500, random.Random(seed))
|
||||||
|
self.assertGreaterEqual(nk, 70, msg=f'seed={seed}')
|
||||||
|
self.assertLessEqual(nk, 86, msg=f'seed={seed}')
|
||||||
|
|
||||||
|
def test_subchapter_effective_zero_pages(self):
|
||||||
|
self.assertEqual(vc.subchapter_total_effective(0, 5), 0)
|
||||||
|
self.assertEqual(vc.subchapter_total_effective(100, 0), 0)
|
||||||
|
|
||||||
|
def test_resolve_expand_target_pages(self):
|
||||||
|
self.assertEqual(vc.resolve_expand_target_pages(None, True, 100, 200), 0)
|
||||||
|
self.assertEqual(vc.resolve_expand_target_pages(200, False, 100, 50), 200)
|
||||||
|
self.assertEqual(vc.resolve_expand_target_pages(0, False, 80, 0), 80)
|
||||||
|
self.assertEqual(vc.resolve_expand_target_pages(0, False, 0, 50), 50)
|
||||||
|
self.assertEqual(
|
||||||
|
vc.resolve_expand_target_pages(0, False, 0, 0),
|
||||||
|
vc.EXPAND_OUTLINE_DEFAULT_TARGET_PAGES,
|
||||||
|
)
|
||||||
|
self.assertEqual(vc.EXPAND_OUTLINE_DEFAULT_TARGET_PAGES, 100)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
||||||
170
tests/test_word_allocation.py
Normal file
170
tests/test_word_allocation.py
Normal file
@ -0,0 +1,170 @@
|
|||||||
|
"""字数分配与 rating_json 解析单元测试。"""
|
||||||
|
import json
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
import config as cfg
|
||||||
|
|
||||||
|
from utils import word_allocation as wa
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseRatingJson(unittest.TestCase):
|
||||||
|
def test_canonical_items(self):
|
||||||
|
raw = json.dumps(
|
||||||
|
{
|
||||||
|
'items': [
|
||||||
|
{'id': 'T1', 'name': '施工方案', 'weight': 30, 'keywords': ['工艺']},
|
||||||
|
{'id': 'T2', 'name': '质量保证', 'weight': 10, 'keywords': []},
|
||||||
|
],
|
||||||
|
'notes': '',
|
||||||
|
},
|
||||||
|
ensure_ascii=False,
|
||||||
|
)
|
||||||
|
items = wa.parse_rating_json(raw)
|
||||||
|
self.assertEqual(len(items), 2)
|
||||||
|
names = {x['name'] for x in items}
|
||||||
|
self.assertIn('施工方案', names)
|
||||||
|
self.assertIn('质量保证', names)
|
||||||
|
wmap = {x['name']: x['weight'] for x in items}
|
||||||
|
self.assertEqual(wmap['施工方案'], 30.0)
|
||||||
|
|
||||||
|
def test_malformed_returns_empty(self):
|
||||||
|
self.assertEqual(wa.parse_rating_json('not json'), [])
|
||||||
|
self.assertEqual(wa.parse_rating_json(''), [])
|
||||||
|
|
||||||
|
|
||||||
|
class TestComputeLeafAllocations(unittest.TestCase):
|
||||||
|
def test_none_when_no_rating_and_not_target_pages_budget(self):
|
||||||
|
leaves = [{'id': 1, 'section_title': '一、总体方案'}]
|
||||||
|
rules = dict(wa.DEFAULT_RULES)
|
||||||
|
rules['budget_mode'] = 'anchor_mean'
|
||||||
|
self.assertIsNone(
|
||||||
|
wa.compute_leaf_allocations('standard', leaves, '', rules)
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_uniform_when_no_rating_but_target_pages(self):
|
||||||
|
"""无技术评分时仍按目标页均分 B=页×每页字,全稿不随节数 N 线性爆量。"""
|
||||||
|
leaves = [
|
||||||
|
{'id': 1, 'section_title': 'A'},
|
||||||
|
{'id': 2, 'section_title': 'B'},
|
||||||
|
]
|
||||||
|
rules = dict(wa.DEFAULT_RULES)
|
||||||
|
rules['budget_mode'] = 'target_pages'
|
||||||
|
old_tp = cfg.TARGET_PAGES
|
||||||
|
old_pce = cfg.PAGE_CHAR_ESTIMATE
|
||||||
|
try:
|
||||||
|
cfg.TARGET_PAGES = 100
|
||||||
|
cfg.PAGE_CHAR_ESTIMATE = 700
|
||||||
|
out = wa.compute_leaf_allocations('standard', leaves, '', rules)
|
||||||
|
finally:
|
||||||
|
cfg.TARGET_PAGES = old_tp
|
||||||
|
cfg.PAGE_CHAR_ESTIMATE = old_pce
|
||||||
|
self.assertIsNotNone(out)
|
||||||
|
s = out[1]['target_chars'] + out[2]['target_chars']
|
||||||
|
self.assertEqual(s, 100 * 700)
|
||||||
|
self.assertEqual(out[1]['target_chars'], out[2]['target_chars'])
|
||||||
|
|
||||||
|
def test_monotonicity_high_weight_match(self):
|
||||||
|
rating = json.dumps(
|
||||||
|
{
|
||||||
|
'items': [
|
||||||
|
{'name': '施工组织设计', 'weight': 50, 'keywords': ['进度']},
|
||||||
|
{'name': '页眉页脚规范', 'weight': 2, 'keywords': []},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
ensure_ascii=False,
|
||||||
|
)
|
||||||
|
leaves = [
|
||||||
|
{'id': 10, 'section_title': '3.1 施工组织设计与进度计划'},
|
||||||
|
{'id': 11, 'section_title': '9.9 页眉格式说明'},
|
||||||
|
]
|
||||||
|
rules = dict(wa.DEFAULT_RULES)
|
||||||
|
rules['alpha'] = 0.95
|
||||||
|
out = wa.compute_leaf_allocations('standard', leaves, rating, rules)
|
||||||
|
self.assertIsNotNone(out)
|
||||||
|
t_high = out[10]['target_chars']
|
||||||
|
t_low = out[11]['target_chars']
|
||||||
|
self.assertGreaterEqual(t_high, t_low, '强匹配高分项的章节应不低于弱匹配章节')
|
||||||
|
self.assertIn('施工组织设计', out[10]['word_count_spec'])
|
||||||
|
|
||||||
|
def test_budget_anchor_mean(self):
|
||||||
|
rating = json.dumps(
|
||||||
|
{'items': [{'name': '技术部分', 'weight': 100}]},
|
||||||
|
ensure_ascii=False,
|
||||||
|
)
|
||||||
|
leaves = [
|
||||||
|
{'id': 1, 'section_title': 'A'},
|
||||||
|
{'id': 2, 'section_title': 'B'},
|
||||||
|
{'id': 3, 'section_title': 'C'},
|
||||||
|
]
|
||||||
|
rules = dict(wa.DEFAULT_RULES)
|
||||||
|
rules['budget_mode'] = 'anchor_mean'
|
||||||
|
rules['alpha'] = 0.0
|
||||||
|
old_tp = getattr(cfg, 'TARGET_PAGES', 0)
|
||||||
|
setattr(cfg, 'TARGET_PAGES', 0)
|
||||||
|
try:
|
||||||
|
out = wa.compute_leaf_allocations('standard', leaves, rating, rules)
|
||||||
|
finally:
|
||||||
|
setattr(cfg, 'TARGET_PAGES', old_tp)
|
||||||
|
self.assertIsNotNone(out)
|
||||||
|
base, core, _, _ = wa.VOLUME_PRESETS['standard']
|
||||||
|
expect = int(round(len(leaves) * (base + core) / 2.0))
|
||||||
|
s = sum(out[i]['target_chars'] for i in (1, 2, 3))
|
||||||
|
self.assertEqual(s, expect)
|
||||||
|
|
||||||
|
def test_budget_target_pages(self):
|
||||||
|
rating = json.dumps(
|
||||||
|
{'items': [{'name': '技术部分', 'weight': 100}]},
|
||||||
|
ensure_ascii=False,
|
||||||
|
)
|
||||||
|
leaves = [
|
||||||
|
{'id': 1, 'section_title': 'A'},
|
||||||
|
{'id': 2, 'section_title': 'B'},
|
||||||
|
]
|
||||||
|
rules = dict(wa.DEFAULT_RULES)
|
||||||
|
rules['budget_mode'] = 'target_pages'
|
||||||
|
rules['alpha'] = 0.0
|
||||||
|
old_tp = cfg.TARGET_PAGES
|
||||||
|
old_pce = cfg.PAGE_CHAR_ESTIMATE
|
||||||
|
try:
|
||||||
|
cfg.TARGET_PAGES = 100
|
||||||
|
cfg.PAGE_CHAR_ESTIMATE = 700
|
||||||
|
out = wa.compute_leaf_allocations('standard', leaves, rating, rules)
|
||||||
|
finally:
|
||||||
|
cfg.TARGET_PAGES = old_tp
|
||||||
|
cfg.PAGE_CHAR_ESTIMATE = old_pce
|
||||||
|
self.assertIsNotNone(out)
|
||||||
|
expect = 100 * 700
|
||||||
|
s = sum(out[i]['target_chars'] for i in (1, 2))
|
||||||
|
self.assertEqual(s, expect)
|
||||||
|
|
||||||
|
def test_budget_target_pages_falls_back_when_pages_zero(self):
|
||||||
|
rating = json.dumps(
|
||||||
|
{'items': [{'name': '技术部分', 'weight': 100}]},
|
||||||
|
ensure_ascii=False,
|
||||||
|
)
|
||||||
|
leaves = [
|
||||||
|
{'id': 1, 'section_title': 'A'},
|
||||||
|
{'id': 2, 'section_title': 'B'},
|
||||||
|
]
|
||||||
|
rules = dict(wa.DEFAULT_RULES)
|
||||||
|
rules['budget_mode'] = 'target_pages'
|
||||||
|
rules['alpha'] = 0.0
|
||||||
|
old_tp = cfg.TARGET_PAGES
|
||||||
|
try:
|
||||||
|
cfg.TARGET_PAGES = 0
|
||||||
|
out = wa.compute_leaf_allocations('standard', leaves, rating, rules)
|
||||||
|
finally:
|
||||||
|
cfg.TARGET_PAGES = old_tp
|
||||||
|
self.assertIsNotNone(out)
|
||||||
|
base, core, _, _ = wa.VOLUME_PRESETS['standard']
|
||||||
|
expect = int(round(len(leaves) * (base + core) / 2.0))
|
||||||
|
s = sum(out[i]['target_chars'] for i in (1, 2))
|
||||||
|
self.assertEqual(s, expect)
|
||||||
|
|
||||||
|
def test_continuation_threshold(self):
|
||||||
|
self.assertEqual(wa.continuation_threshold(2000), 1300)
|
||||||
|
self.assertEqual(wa.continuation_threshold(100), 200)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
||||||
1
utils/__init__.py
Normal file
1
utils/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
||||||
252
utils/ai_client.py
Normal file
252
utils/ai_client.py
Normal file
@ -0,0 +1,252 @@
|
|||||||
|
"""
|
||||||
|
AI API 调用封装,支持 OpenAI、阿里云通义千问、DeepSeek、Ollama(均兼容 OpenAI SDK)
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
from openai import OpenAI
|
||||||
|
import config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
PROVIDER_NAMES = {
|
||||||
|
'qwen': '通义千问 (Qwen)',
|
||||||
|
'deepseek': 'DeepSeek',
|
||||||
|
'openai': 'OpenAI',
|
||||||
|
'ollama': 'Ollama 本地',
|
||||||
|
'doubao': '豆包 (Doubao)',
|
||||||
|
'kimi': 'Kimi (Moonshot)',
|
||||||
|
}
|
||||||
|
|
||||||
|
PROVIDER_LINKS = {
|
||||||
|
'qwen': 'https://dashscope.aliyun.com/',
|
||||||
|
'deepseek': 'https://platform.deepseek.com/',
|
||||||
|
'openai': 'https://platform.openai.com/',
|
||||||
|
'ollama': 'https://ollama.com/',
|
||||||
|
'doubao': 'https://console.volcengine.com/ark/',
|
||||||
|
'kimi': 'https://platform.moonshot.cn/',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _check_api_key():
|
||||||
|
"""调用前预检 API Key,无效时直接抛出友好提示,不做无意义的重试"""
|
||||||
|
provider = config.MODEL_PROVIDER
|
||||||
|
|
||||||
|
# Ollama 本地无需 API Key,跳过检查
|
||||||
|
if provider == 'ollama':
|
||||||
|
return
|
||||||
|
|
||||||
|
name = PROVIDER_NAMES.get(provider, provider)
|
||||||
|
link = PROVIDER_LINKS.get(provider, '')
|
||||||
|
|
||||||
|
if provider == 'qwen':
|
||||||
|
key = config.QWEN_API_KEY
|
||||||
|
elif provider == 'deepseek':
|
||||||
|
key = config.DEEPSEEK_API_KEY
|
||||||
|
elif provider == 'doubao':
|
||||||
|
key = config.DOUBAO_API_KEY
|
||||||
|
elif provider == 'kimi':
|
||||||
|
key = config.KIMI_API_KEY
|
||||||
|
else:
|
||||||
|
key = config.OPENAI_API_KEY
|
||||||
|
|
||||||
|
if not key or key.startswith('sk-your'):
|
||||||
|
raise RuntimeError(
|
||||||
|
f'尚未配置 {name} 的 API Key。'
|
||||||
|
f'请点击右上角设置按钮,选择"{name}"并填入有效的 API Key。'
|
||||||
|
f'申请地址:{link}'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_client() -> OpenAI:
|
||||||
|
"""根据 MODEL_PROVIDER 返回对应的 OpenAI 兼容客户端"""
|
||||||
|
if config.MODEL_PROVIDER == 'qwen':
|
||||||
|
return OpenAI(api_key=config.QWEN_API_KEY, base_url=config.QWEN_BASE_URL)
|
||||||
|
if config.MODEL_PROVIDER == 'deepseek':
|
||||||
|
return OpenAI(api_key=config.DEEPSEEK_API_KEY, base_url=config.DEEPSEEK_BASE_URL)
|
||||||
|
if config.MODEL_PROVIDER == 'ollama':
|
||||||
|
return OpenAI(api_key='ollama', base_url=config.OLLAMA_BASE_URL)
|
||||||
|
if config.MODEL_PROVIDER == 'doubao':
|
||||||
|
return OpenAI(api_key=config.DOUBAO_API_KEY, base_url=config.DOUBAO_BASE_URL)
|
||||||
|
if config.MODEL_PROVIDER == 'kimi':
|
||||||
|
return OpenAI(api_key=config.KIMI_API_KEY, base_url=config.KIMI_BASE_URL)
|
||||||
|
return OpenAI(api_key=config.OPENAI_API_KEY, base_url=config.OPENAI_BASE_URL)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_model() -> str:
|
||||||
|
if config.MODEL_PROVIDER == 'qwen':
|
||||||
|
return config.QWEN_MODEL
|
||||||
|
if config.MODEL_PROVIDER == 'deepseek':
|
||||||
|
return config.DEEPSEEK_MODEL
|
||||||
|
if config.MODEL_PROVIDER == 'ollama':
|
||||||
|
return config.OLLAMA_MODEL
|
||||||
|
if config.MODEL_PROVIDER == 'doubao':
|
||||||
|
return config.DOUBAO_MODEL
|
||||||
|
if config.MODEL_PROVIDER == 'kimi':
|
||||||
|
return config.KIMI_MODEL
|
||||||
|
return config.OPENAI_MODEL
|
||||||
|
|
||||||
|
|
||||||
|
def _clean_response(text: str) -> str:
|
||||||
|
"""
|
||||||
|
过滤推理模型(DeepSeek R1 / QwQ 等)输出的 <think>...</think> 思考过程标签,
|
||||||
|
只保留最终正文内容,避免思考链污染标书正文。
|
||||||
|
"""
|
||||||
|
# 去除 <think>...</think> 块(含跨行内容)
|
||||||
|
text = re.sub(r'<think>[\s\S]*?</think>', '', text, flags=re.IGNORECASE)
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _is_auth_error(e: Exception) -> bool:
|
||||||
|
"""判断是否为认证错误(401 / invalid_api_key),无需重试"""
|
||||||
|
# 优先用 openai 原生异常类型判断
|
||||||
|
try:
|
||||||
|
from openai import AuthenticationError, PermissionDeniedError
|
||||||
|
if isinstance(e, (AuthenticationError, PermissionDeniedError)):
|
||||||
|
return True
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
# 兜底:字符串匹配
|
||||||
|
err_str = str(e).lower()
|
||||||
|
return ('401' in err_str or 'invalid_api_key' in err_str
|
||||||
|
or 'incorrect api key' in err_str or 'authentication' in err_str)
|
||||||
|
|
||||||
|
|
||||||
|
# OpenAI o 系列推理模型:不支持 temperature,max_tokens 需用 max_completion_tokens
|
||||||
|
_OPENAI_REASONING_MODELS = {'o1', 'o1-mini', 'o1-pro', 'o3', 'o3-mini', 'o3-pro', 'o4-mini'}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_chat_kwargs(
|
||||||
|
model: str,
|
||||||
|
messages: list,
|
||||||
|
temperature: float,
|
||||||
|
max_tokens: int,
|
||||||
|
request_timeout: float | None = None,
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
根据模型类型构建 chat.completions.create 的参数字典。
|
||||||
|
OpenAI o 系列推理模型不接受 temperature,且使用 max_completion_tokens 替代 max_tokens。
|
||||||
|
"""
|
||||||
|
base_model = model.split(':')[0] # 去掉 ollama tag 后缀
|
||||||
|
is_reasoning = base_model in _OPENAI_REASONING_MODELS
|
||||||
|
|
||||||
|
to = request_timeout if request_timeout is not None else config.REQUEST_TIMEOUT
|
||||||
|
kwargs = {
|
||||||
|
'model': model,
|
||||||
|
'messages': messages,
|
||||||
|
'timeout': to,
|
||||||
|
}
|
||||||
|
if is_reasoning:
|
||||||
|
kwargs['max_completion_tokens'] = max_tokens
|
||||||
|
else:
|
||||||
|
kwargs['temperature'] = temperature
|
||||||
|
kwargs['max_tokens'] = max_tokens
|
||||||
|
return kwargs
|
||||||
|
|
||||||
|
|
||||||
|
def chat(
|
||||||
|
prompt: str,
|
||||||
|
system: str = '你是一位专业的投标文件撰写专家。',
|
||||||
|
temperature: float = 0.7,
|
||||||
|
max_tokens: int = 8192,
|
||||||
|
retries: int = None,
|
||||||
|
request_timeout: float | None = None,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
调用 AI 接口,返回文本响应。
|
||||||
|
认证错误立即终止;其他错误指数退避重试。
|
||||||
|
自动兼容 OpenAI o 系列推理模型的参数差异。
|
||||||
|
"""
|
||||||
|
_check_api_key()
|
||||||
|
|
||||||
|
max_retries = retries if retries is not None else config.MAX_RETRIES
|
||||||
|
client = _get_client()
|
||||||
|
model = _get_model()
|
||||||
|
provider = config.MODEL_PROVIDER
|
||||||
|
name = PROVIDER_NAMES.get(provider, provider)
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{'role': 'system', 'content': system},
|
||||||
|
{'role': 'user', 'content': prompt},
|
||||||
|
]
|
||||||
|
|
||||||
|
for attempt in range(max_retries):
|
||||||
|
try:
|
||||||
|
kwargs = _build_chat_kwargs(
|
||||||
|
model, messages, temperature, max_tokens, request_timeout=request_timeout
|
||||||
|
)
|
||||||
|
resp = client.chat.completions.create(**kwargs)
|
||||||
|
return _clean_response(resp.choices[0].message.content.strip())
|
||||||
|
except Exception as e:
|
||||||
|
if _is_auth_error(e):
|
||||||
|
raise RuntimeError(
|
||||||
|
f'{name} API Key 无效或已过期,请在设置中重新配置。'
|
||||||
|
f'申请地址:{PROVIDER_LINKS.get(provider, "")}'
|
||||||
|
) from e
|
||||||
|
|
||||||
|
wait = 2 ** attempt
|
||||||
|
logger.warning(f'AI 请求失败 (第{attempt+1}次),{wait}s 后重试: {e}')
|
||||||
|
if attempt < max_retries - 1:
|
||||||
|
time.sleep(wait)
|
||||||
|
else:
|
||||||
|
raise RuntimeError(f'AI 接口调用失败(已重试 {max_retries} 次): {e}') from e
|
||||||
|
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
|
def chat_with_history(system: str, messages: list,
|
||||||
|
temperature: float = 0.7, max_tokens: int = 4096) -> str:
|
||||||
|
"""
|
||||||
|
多轮对话接口,支持完整历史上下文,用于对话式章节生成。
|
||||||
|
messages 格式:[{'role': 'user'|'assistant', 'content': str}, ...]
|
||||||
|
"""
|
||||||
|
_check_api_key()
|
||||||
|
|
||||||
|
client = _get_client()
|
||||||
|
model = _get_model()
|
||||||
|
provider = config.MODEL_PROVIDER
|
||||||
|
name = PROVIDER_NAMES.get(provider, provider)
|
||||||
|
|
||||||
|
full_messages = [{'role': 'system', 'content': system}] + messages
|
||||||
|
|
||||||
|
for attempt in range(config.MAX_RETRIES):
|
||||||
|
try:
|
||||||
|
kwargs = _build_chat_kwargs(model, full_messages, temperature, max_tokens)
|
||||||
|
resp = client.chat.completions.create(**kwargs)
|
||||||
|
return _clean_response(resp.choices[0].message.content.strip())
|
||||||
|
except Exception as e:
|
||||||
|
if _is_auth_error(e):
|
||||||
|
raise RuntimeError(
|
||||||
|
f'{name} API Key 无效或已过期,请在设置中重新配置。'
|
||||||
|
f'申请地址:{PROVIDER_LINKS.get(provider, "")}'
|
||||||
|
) from e
|
||||||
|
wait = 2 ** attempt
|
||||||
|
logger.warning(f'对话 AI 请求失败 (第{attempt+1}次),{wait}s 后重试: {e}')
|
||||||
|
if attempt < config.MAX_RETRIES - 1:
|
||||||
|
time.sleep(wait)
|
||||||
|
else:
|
||||||
|
raise RuntimeError(f'AI 接口调用失败(已重试 {config.MAX_RETRIES} 次): {e}') from e
|
||||||
|
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
|
def get_embeddings(texts: list[str]) -> list[list[float]]:
|
||||||
|
"""获取文本嵌入向量。
|
||||||
|
支持 Qwen、OpenAI、Kimi;DeepSeek / Ollama / 豆包 暂不提供 Embedding API。
|
||||||
|
"""
|
||||||
|
provider = config.MODEL_PROVIDER
|
||||||
|
if provider in ('deepseek', 'ollama', 'doubao'):
|
||||||
|
raise NotImplementedError(
|
||||||
|
f'{PROVIDER_NAMES.get(provider)} 暂不支持 Embedding API,知识库将使用关键词检索降级'
|
||||||
|
)
|
||||||
|
|
||||||
|
client = _get_client()
|
||||||
|
if provider == 'qwen':
|
||||||
|
model = config.QWEN_EMBEDDING_MODEL
|
||||||
|
elif provider == 'kimi':
|
||||||
|
model = config.KIMI_EMBEDDING_MODEL
|
||||||
|
else:
|
||||||
|
model = config.OPENAI_EMBEDDING_MODEL
|
||||||
|
|
||||||
|
resp = client.embeddings.create(model=model, input=texts)
|
||||||
|
return [item.embedding for item in resp.data]
|
||||||
186
utils/attachment_section.py
Normal file
186
utils/attachment_section.py
Normal file
@ -0,0 +1,186 @@
|
|||||||
|
"""
|
||||||
|
附件类章节识别:标题匹配、expand_outline 跳过、以及正文模式(完整正文 vs 仅单图单表)。
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
import config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
DEFAULT_ATTACHMENT_RULES: Dict[str, Any] = {
|
||||||
|
'schema_version': 1,
|
||||||
|
'title_regex': [
|
||||||
|
r'附件\s*[一二三四五六七八九十0-9A-Za-z、::.]',
|
||||||
|
r'附\s*图',
|
||||||
|
r'附\s*表',
|
||||||
|
r'附\s*件\s*\(',
|
||||||
|
r'^\s*[\d一二三四五六七八九十\..、]+\s*附件',
|
||||||
|
],
|
||||||
|
'table_hint_keywords': [
|
||||||
|
'附表', '一览表', '清单表', '统计表', '明细表',
|
||||||
|
],
|
||||||
|
'figure_hint_keywords': [
|
||||||
|
'附图', '示意图', '平面图', '流程图', '布置图', '组织图', '横道',
|
||||||
|
],
|
||||||
|
'default_kind_when_ambiguous': 'table',
|
||||||
|
# stack_charts_only:默认,意图栈只输出 [FIGURE]/[TABLE] 无正文;full:长文;single_chart_only:栈顶仅一块
|
||||||
|
'attachment_leaf_body_mode': 'stack_charts_only',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def attachment_rules_path() -> str:
|
||||||
|
return os.path.join(config.DATA_DIR, 'attachment_section_rules.json')
|
||||||
|
|
||||||
|
|
||||||
|
def load_attachment_rules(path: Optional[str] = None) -> Dict[str, Any]:
|
||||||
|
p = path or attachment_rules_path()
|
||||||
|
data = dict(DEFAULT_ATTACHMENT_RULES)
|
||||||
|
if not os.path.isfile(p):
|
||||||
|
return data
|
||||||
|
try:
|
||||||
|
with open(p, encoding='utf-8') as f:
|
||||||
|
raw = json.load(f)
|
||||||
|
if isinstance(raw, dict):
|
||||||
|
for k, v in raw.items():
|
||||||
|
if k.startswith('_'):
|
||||||
|
continue
|
||||||
|
data[k] = v
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning('加载 attachment_section_rules.json 失败,使用内置默认: %s', e)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def attachment_leaf_body_mode(rules: Optional[Dict[str, Any]] = None) -> str:
|
||||||
|
"""
|
||||||
|
附件叶节点正文策略:
|
||||||
|
stack_charts_only(默认)、full(完整技术正文)、single_chart_only(栈顶仅一块图或表)。
|
||||||
|
"""
|
||||||
|
r = rules or get_attachment_rules_cached()
|
||||||
|
mode = (r.get('attachment_leaf_body_mode') or 'stack_charts_only').strip().lower()
|
||||||
|
if mode in ('single_chart_only', 'stack_charts_only', 'full'):
|
||||||
|
return mode
|
||||||
|
return 'stack_charts_only'
|
||||||
|
|
||||||
|
|
||||||
|
def use_attachment_stack_charts_body(rules: Optional[Dict[str, Any]] = None) -> bool:
|
||||||
|
"""附件走「意图栈仅图/表、无长文」路径(含 single_chart_only 的单栈顶版本)。"""
|
||||||
|
m = attachment_leaf_body_mode(rules)
|
||||||
|
return m in ('stack_charts_only', 'single_chart_only')
|
||||||
|
|
||||||
|
|
||||||
|
def use_attachment_single_chart_only_body(rules: Optional[Dict[str, Any]] = None) -> bool:
|
||||||
|
return attachment_leaf_body_mode(rules) == 'single_chart_only'
|
||||||
|
|
||||||
|
|
||||||
|
def use_attachment_full_body(rules: Optional[Dict[str, Any]] = None) -> bool:
|
||||||
|
return attachment_leaf_body_mode(rules) == 'full'
|
||||||
|
|
||||||
|
|
||||||
|
def is_attachment_only_section(section_title: str, rules: Optional[Dict[str, Any]] = None) -> bool:
|
||||||
|
"""标题是否属于附件类(附图/附表/附件N 等),用于 expand_outline 跳过与正文分支。"""
|
||||||
|
t = (section_title or '').strip()
|
||||||
|
if not t:
|
||||||
|
return False
|
||||||
|
r = rules or load_attachment_rules()
|
||||||
|
patterns: List[str] = list(r.get('title_regex') or [])
|
||||||
|
for pat in patterns:
|
||||||
|
try:
|
||||||
|
if re.search(pat, t):
|
||||||
|
return True
|
||||||
|
except re.error:
|
||||||
|
logger.warning('无效 attachment title_regex,已跳过: %s', pat[:80])
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def pick_single_figure_or_table(
|
||||||
|
section_title: str,
|
||||||
|
enable_figure: bool,
|
||||||
|
enable_table: bool,
|
||||||
|
rules: Optional[Dict[str, Any]] = None,
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
返回 'figure' | 'table' | None。
|
||||||
|
两开关均关返回 None;仅开一个则取对应类型。
|
||||||
|
"""
|
||||||
|
if not enable_figure and not enable_table:
|
||||||
|
return None
|
||||||
|
if enable_figure and not enable_table:
|
||||||
|
return 'figure'
|
||||||
|
if enable_table and not enable_figure:
|
||||||
|
return 'table'
|
||||||
|
|
||||||
|
r = rules or load_attachment_rules()
|
||||||
|
t = (section_title or '')
|
||||||
|
tbl_kw = list(r.get('table_hint_keywords') or [])
|
||||||
|
fig_kw = list(r.get('figure_hint_keywords') or [])
|
||||||
|
# 单独「表」字易误判,仅当同时存在附表类或与其它词组合时再偏表
|
||||||
|
for kw in tbl_kw:
|
||||||
|
if kw and kw in t:
|
||||||
|
return 'table'
|
||||||
|
for kw in fig_kw:
|
||||||
|
if kw and kw in t:
|
||||||
|
return 'figure'
|
||||||
|
# 泛「表」在附件语境下常见
|
||||||
|
if '表' in t and '图' not in t:
|
||||||
|
return 'table'
|
||||||
|
if '图' in t and '表' not in t:
|
||||||
|
return 'figure'
|
||||||
|
|
||||||
|
default = (r.get('default_kind_when_ambiguous') or 'table').strip().lower()
|
||||||
|
if default == 'figure':
|
||||||
|
return 'figure'
|
||||||
|
return 'table'
|
||||||
|
|
||||||
|
|
||||||
|
_cached_rules: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_attachment_rules_cached() -> Dict[str, Any]:
|
||||||
|
global _cached_rules
|
||||||
|
if _cached_rules is None:
|
||||||
|
_cached_rules = load_attachment_rules()
|
||||||
|
return _cached_rules
|
||||||
|
|
||||||
|
|
||||||
|
def should_skip_expand_subchapters(title: str) -> bool:
|
||||||
|
"""
|
||||||
|
AI 自动填充小章节(expand_outline)时:附件类一级主章不调用子章节生成。
|
||||||
|
判定与 is_attachment_only_section 一致。
|
||||||
|
"""
|
||||||
|
return is_attachment_only_section(title, get_attachment_rules_cached())
|
||||||
|
|
||||||
|
|
||||||
|
# 从标题中提取「附件几」等标签,用于日志
|
||||||
|
_ATTACHMENT_LABEL_RE = re.compile(
|
||||||
|
r'附件\s*[::]?\s*([一二三四五六七八九十百0-9A-Za-z]+)',
|
||||||
|
)
|
||||||
|
_ATTACHMENT_FIG_TBL_RE = re.compile(r'附\s*[图表]\s*([一二三四五六七八九十百0-9]*)')
|
||||||
|
|
||||||
|
|
||||||
|
def parse_attachment_label(title: str) -> Optional[str]:
|
||||||
|
t = (title or '').strip()
|
||||||
|
if not t:
|
||||||
|
return None
|
||||||
|
m = _ATTACHMENT_LABEL_RE.search(t)
|
||||||
|
if m:
|
||||||
|
return m.group(1).strip() or None
|
||||||
|
m2 = _ATTACHMENT_FIG_TBL_RE.search(t)
|
||||||
|
if m2:
|
||||||
|
rest = (m2.group(1) or '').strip()
|
||||||
|
if rest:
|
||||||
|
return rest
|
||||||
|
matched = m2.group(0)
|
||||||
|
if '图' in matched:
|
||||||
|
return '附图'
|
||||||
|
return '附表'
|
||||||
|
if re.search(r'附\s*图', t):
|
||||||
|
return '附图'
|
||||||
|
if re.search(r'附\s*表', t):
|
||||||
|
return '附表'
|
||||||
|
return None
|
||||||
577
utils/bill_analysis.py
Normal file
577
utils/bill_analysis.py
Normal file
@ -0,0 +1,577 @@
|
|||||||
|
"""
|
||||||
|
工程量清单本地分析(从 bill-worker.js Phase 2/3 移植)。
|
||||||
|
Phase 2:按页关键字筛选清单页;Phase 3:正则解析分部与清单项。
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
BILL_KW = ['项目编码', '项目名称', '工程量', '计量单位', '综合单价', '清单编码']
|
||||||
|
SEC_KW = ['分部分项', '分类分项', '措施项目', '其他项目', '工程量清单计价']
|
||||||
|
FEE_PAGE_KW = [
|
||||||
|
'规费', '税金', '社会保险费', '住房公积金', '养老保险',
|
||||||
|
'工伤保险', '失业保险', '医疗保险', '教育费附加', '城市维护建设税',
|
||||||
|
]
|
||||||
|
|
||||||
|
ITEM_START = re.compile(r'^\d+(\.\d+)+\s')
|
||||||
|
CODE_INLINE = re.compile(r'(?:^|\s)(\d{9,12}|(?<![A-Za-z])B\d{5,6})\s')
|
||||||
|
CODE_START_RE = re.compile(r'^(\d{9,12}|B\d{5,6})\s')
|
||||||
|
SEQ_CODE_RE = re.compile(r'^\d{1,4}\s+(\d{9,12}|(?<![A-Za-z])B\d{5,6})\s')
|
||||||
|
PAGE_MARK = re.compile(r'^--\s*\d+\s+of\s+\d+\s*--')
|
||||||
|
HEADER_RE = re.compile(r'^序号\s+(项目编码|项目名称)')
|
||||||
|
HEADER_KW = re.compile(
|
||||||
|
r'^(项目编码|项目名称|清单编码|计量单位|综合单价|工程量|合\s*价|金额|序号)\s'
|
||||||
|
)
|
||||||
|
CATEGORY_MARKERS = [
|
||||||
|
'一', '二', '三', '四', '五', '六', '七', '八', '九', '十',
|
||||||
|
'(一)', '(二)', '(三)', '(四)', '(五)',
|
||||||
|
]
|
||||||
|
|
||||||
|
# 编码:行内 9–12 位数字或 B 编码(排除字母前缀如 GB)
|
||||||
|
CODE_RE = re.compile(r'(?<![A-Za-z])(\d{9,12}|(?<![A-Za-z])B\d{5,6})')
|
||||||
|
|
||||||
|
UNIT_TOKENS = [
|
||||||
|
'm³', 'm²', 'm3', 'm2', 'km', 'hm2', '㎡', '㎥', 't', 'kg',
|
||||||
|
'个', '台', '套', '组', '根', '块', '片', '张', '只', '吨', '项',
|
||||||
|
'处', '座', '件', '段', '条', '把', '扇', '口', '圈', '道', '孔',
|
||||||
|
'对', '副', '樘', '方', '延m', '株', '棵', 'm',
|
||||||
|
]
|
||||||
|
UNIT_SET = frozenset(UNIT_TOKENS)
|
||||||
|
_unit_escaped = [re.escape(u) for u in UNIT_TOKENS]
|
||||||
|
UNIT_RE = re.compile(r'(?:^|\s)(' + '|'.join(_unit_escaped) + r')(?=\s|\d|$)')
|
||||||
|
|
||||||
|
SKIP_RE = re.compile(r'合\s*计|小\s*计|本页小计|总\s*计|价税合计')
|
||||||
|
|
||||||
|
_DASH_CODE = re.compile(
|
||||||
|
r'(\d{2,4})[-‐–](\d{2,4})[-‐–](\d{2,4})(?:[-‐–](\d{2,4}))?'
|
||||||
|
)
|
||||||
|
|
||||||
|
_EXACT_FEE_ITEM = frozenset([
|
||||||
|
'规费', '税金', '利润', '增值税', '暂列金额', '暂估价', '计日工',
|
||||||
|
'总承包服务费', '企业管理费', '甲供材料保管费', '价税合计',
|
||||||
|
])
|
||||||
|
_FEE_KW = [
|
||||||
|
'安全文明', '文明施工费', '环境保护费', '临时设施费',
|
||||||
|
'夜间施工增加费', '夜间施工费',
|
||||||
|
'冬雨季施工增加费', '冬雨季施工费',
|
||||||
|
'二次搬运费', '大型机械设备进出场', '大型机械进出场',
|
||||||
|
'施工排水降水', '排水降水费',
|
||||||
|
'已完工程及设备保护', '已完工程保护费',
|
||||||
|
'工程排污费', '社会保障费', '住房公积金',
|
||||||
|
'工伤保险', '劳动保险', '意外伤害保险', '建筑工程保险',
|
||||||
|
'城市维护建设税', '城市建设维护税',
|
||||||
|
'教育费附加', '地方教育附加',
|
||||||
|
'材料暂估', '专业工程暂估',
|
||||||
|
'超高施工增加费', '安全防护费',
|
||||||
|
'措施项目费', '其他项目费', '不可竞争费',
|
||||||
|
]
|
||||||
|
|
||||||
|
_CAT_KW = [
|
||||||
|
'土建', '建筑', '结构', '装饰', '装修', '安装', '给排水', '暖通', '空调', '通风',
|
||||||
|
'电气', '强电', '弱电', '消防', '智能化', '幕墙', '门窗', '园林', '绿化', '景观',
|
||||||
|
'市政', '道路', '桥梁', '管网', '基础', '地基', '桩基', '主体', '屋面', '防水',
|
||||||
|
'保温', '钢结构', '排水', '给水', '照明', '动力', '防雷', '电梯', '人防', '室外',
|
||||||
|
'附属', '分部', '工程', '措施', '清单', '土石方', '混凝土', '砌筑', '模板', '脚手架',
|
||||||
|
'水利', '河道', '管道', '阀门', '设备', '仪表', '自动化', '通信', '网络',
|
||||||
|
'拆除', '外墙', '内墙', '楼地面', '天棚', '吊顶', '栏杆', '屋顶', '涂料', '抹灰',
|
||||||
|
'廊道', '阀门井', '蓄水池', '泵站', '供水', '引水', '水源', '渠道', '闸门',
|
||||||
|
'围栏', '警示', '检修', '管线', '配电', '水池', '水塔', '取水', '净水',
|
||||||
|
]
|
||||||
|
|
||||||
|
_EXACT_FEE_CAT = frozenset([
|
||||||
|
'规费', '税金', '利润', '增值税', '暂列金额', '暂估价', '计日工',
|
||||||
|
'总承包服务费', '企业管理费', '价税合计',
|
||||||
|
'措施项目费', '其他项目费', '不可竞争费',
|
||||||
|
])
|
||||||
|
_FEE_CAT_KW = [
|
||||||
|
'措施项目费', '其他项目费', '不可竞争费',
|
||||||
|
'规费汇总', '税金汇总', '费率', '费用汇总', '费用合计',
|
||||||
|
'暂列金额', '暂估价', '计日工', '总承包服务费',
|
||||||
|
'安全文明施工费', '社会保障费', '住房公积金',
|
||||||
|
'工伤保险', '教育费附加', '城市维护建设税',
|
||||||
|
]
|
||||||
|
|
||||||
|
_SPEC_KW_RE = re.compile(
|
||||||
|
r'(材质|规格|型号|品牌|颜色|尺寸|厚度|直径|管径|强度|等级|类别|类型|做法|要求|标准|内容|工作内容|土壤|含量|配合比|工艺|方式|形式|范围|部位|位置|高度|宽度|长度|深度|坡度|截面|跨度|运距|开挖|回填|混凝土|钢筋|压实)[::]'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _fold_dash_codes(line: str) -> str:
|
||||||
|
def repl(m: re.Match) -> str:
|
||||||
|
a, b, c, d = m.group(1), m.group(2), m.group(3), m.group(4) or ''
|
||||||
|
combined = a + b + c + d
|
||||||
|
if 9 <= len(combined) <= 12:
|
||||||
|
return combined
|
||||||
|
return m.group(0)
|
||||||
|
|
||||||
|
return _DASH_CODE.sub(repl, line)
|
||||||
|
|
||||||
|
|
||||||
|
def is_fee_item(name: str) -> bool:
|
||||||
|
if not name:
|
||||||
|
return False
|
||||||
|
n = re.sub(r'\s+', '', name)
|
||||||
|
if n in _EXACT_FEE_ITEM:
|
||||||
|
return True
|
||||||
|
for kw in _FEE_KW:
|
||||||
|
if kw in n:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def split_name_and_spec(raw_name: str) -> tuple[str, str]:
|
||||||
|
if not raw_name:
|
||||||
|
return '', ''
|
||||||
|
m = re.search(r'\d+[.、.)\uFF09]\s*[\u4e00-\u9fff]', raw_name)
|
||||||
|
if m and m.start() > 0:
|
||||||
|
return raw_name[:m.start()].strip(), raw_name[m.start():].strip()
|
||||||
|
kw = _SPEC_KW_RE.search(raw_name)
|
||||||
|
if kw and kw.start() > 0:
|
||||||
|
return raw_name[:kw.start()].strip(), raw_name[kw.start():].strip()
|
||||||
|
paren = re.search(r'[((]\d+[))]', raw_name)
|
||||||
|
if paren and paren.start() > 0:
|
||||||
|
return raw_name[:paren.start()].strip(), raw_name[paren.start():].strip()
|
||||||
|
return raw_name, ''
|
||||||
|
|
||||||
|
|
||||||
|
def is_cat_title(text: str) -> bool:
|
||||||
|
return any(k in text for k in _CAT_KW)
|
||||||
|
|
||||||
|
|
||||||
|
def is_fee_cat_title(text: str) -> bool:
|
||||||
|
if not text:
|
||||||
|
return False
|
||||||
|
t = re.sub(r'\s+', '', text)
|
||||||
|
if t in _EXACT_FEE_CAT:
|
||||||
|
return True
|
||||||
|
for kw in _FEE_CAT_KW:
|
||||||
|
if kw in t:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _is_new_line_trigger(raw: str) -> bool:
|
||||||
|
if ITEM_START.match(raw):
|
||||||
|
return True
|
||||||
|
if CODE_START_RE.match(raw):
|
||||||
|
return True
|
||||||
|
if SEQ_CODE_RE.match(raw):
|
||||||
|
return True
|
||||||
|
for m in CATEGORY_MARKERS:
|
||||||
|
if raw.startswith(m + ' ') or raw.startswith(m + '\u3000'):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def parse_bill_text(text: str) -> dict[str, Any]:
|
||||||
|
raw_lines = []
|
||||||
|
for l in text.split('\n'):
|
||||||
|
line = l.replace('\t', ' ').strip()
|
||||||
|
line = _fold_dash_codes(line)
|
||||||
|
raw_lines.append(line)
|
||||||
|
|
||||||
|
logic_lines: list[str] = []
|
||||||
|
current_line = ''
|
||||||
|
|
||||||
|
for raw in raw_lines:
|
||||||
|
if not raw or PAGE_MARK.match(raw):
|
||||||
|
continue
|
||||||
|
if HEADER_RE.match(raw) or HEADER_KW.match(raw):
|
||||||
|
continue
|
||||||
|
if re.match(r'^(元)|^款章节号|^备注$|^第\d+页', raw):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if _is_new_line_trigger(raw):
|
||||||
|
if current_line:
|
||||||
|
logic_lines.append(current_line)
|
||||||
|
current_line = raw
|
||||||
|
elif CODE_INLINE.search(raw) and len(raw) > 15:
|
||||||
|
if current_line:
|
||||||
|
logic_lines.append(current_line)
|
||||||
|
current_line = raw
|
||||||
|
else:
|
||||||
|
if current_line and len(current_line) > 300:
|
||||||
|
logic_lines.append(current_line)
|
||||||
|
current_line = raw
|
||||||
|
else:
|
||||||
|
current_line = current_line + ' ' + raw if current_line else raw
|
||||||
|
if current_line:
|
||||||
|
logic_lines.append(current_line)
|
||||||
|
|
||||||
|
logger.debug('合并后 %s 条逻辑行(原始 %s 行)', len(logic_lines), len(raw_lines))
|
||||||
|
|
||||||
|
categories: list[dict[str, Any]] = []
|
||||||
|
cur_cat: dict[str, Any] | None = None
|
||||||
|
cur_item: dict[str, Any] | None = None
|
||||||
|
|
||||||
|
for line in logic_lines:
|
||||||
|
if SKIP_RE.search(line):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 行首序号:多级如「1.1.1.1 」;或「1–4 位序号 + 空格 + 9 位以上编码」。
|
||||||
|
# 避免误删「行首即 9–12 位清单编码 + 空格」整段(JS 原 \d+(\.\d+)* 会吞掉编码)。
|
||||||
|
stripped = line.strip()
|
||||||
|
m_hier = re.match(r'^\d+(?:\.\d+)+\s+', stripped)
|
||||||
|
if m_hier:
|
||||||
|
stripped = stripped[m_hier.end():].strip()
|
||||||
|
elif re.match(r'^\d{1,4}\s+\d{9}', stripped):
|
||||||
|
stripped = re.sub(r'^\d{1,4}\s+', '', stripped, count=1).strip()
|
||||||
|
if not stripped:
|
||||||
|
stripped = line.strip()
|
||||||
|
if not stripped:
|
||||||
|
continue
|
||||||
|
|
||||||
|
cm = CODE_RE.search(stripped)
|
||||||
|
if cm:
|
||||||
|
if cur_item and cur_cat:
|
||||||
|
cur_cat['items'].append(cur_item)
|
||||||
|
if not cur_cat:
|
||||||
|
cur_cat = {'name': '未分类', 'items': []}
|
||||||
|
categories.append(cur_cat)
|
||||||
|
|
||||||
|
code = cm.group(1)
|
||||||
|
rest = stripped[cm.end():].strip()
|
||||||
|
name, unit, quantity, spec = '', '', '', ''
|
||||||
|
|
||||||
|
unit_match = UNIT_RE.search(rest)
|
||||||
|
if unit_match:
|
||||||
|
ui = rest.find(unit_match.group(0))
|
||||||
|
raw_name = rest[:ui].strip()
|
||||||
|
unit = unit_match.group(1)
|
||||||
|
after_unit = rest[ui + len(unit_match.group(0)):].strip()
|
||||||
|
qm = re.match(r'^([\d,.]+)', after_unit)
|
||||||
|
if qm:
|
||||||
|
quantity = qm.group(1)
|
||||||
|
tail = after_unit[qm.end():].strip()
|
||||||
|
if tail:
|
||||||
|
tail_tokens = tail.split()
|
||||||
|
si = 0
|
||||||
|
while si < len(tail_tokens) and re.match(r'^[\d,.%\-]+$', tail_tokens[si]):
|
||||||
|
si += 1
|
||||||
|
spec_tail = ' '.join(tail_tokens[si:]).strip()
|
||||||
|
if spec_tail:
|
||||||
|
spec = spec_tail
|
||||||
|
ns_name, ns_spec = split_name_and_spec(raw_name)
|
||||||
|
name = ns_name
|
||||||
|
if ns_spec:
|
||||||
|
spec = ns_spec + (';' + spec if spec else '')
|
||||||
|
else:
|
||||||
|
tokens = [t for t in rest.split() if t]
|
||||||
|
found_unit_idx = -1
|
||||||
|
for ti in range(len(tokens) - 1, 0, -1):
|
||||||
|
if tokens[ti] in UNIT_SET:
|
||||||
|
found_unit_idx = ti
|
||||||
|
break
|
||||||
|
if found_unit_idx >= 1:
|
||||||
|
raw_name_str = ' '.join(tokens[:found_unit_idx])
|
||||||
|
ns_name, ns_spec = split_name_and_spec(raw_name_str)
|
||||||
|
name = ns_name
|
||||||
|
if ns_spec:
|
||||||
|
spec = ns_spec
|
||||||
|
unit = tokens[found_unit_idx]
|
||||||
|
after_tokens = tokens[found_unit_idx + 1:]
|
||||||
|
if after_tokens and re.match(r'^[\d,.]+$', after_tokens[0]):
|
||||||
|
quantity = after_tokens[0]
|
||||||
|
si = 1
|
||||||
|
while si < len(after_tokens) and re.match(r'^[\d,.%\-]+$', after_tokens[si]):
|
||||||
|
si += 1
|
||||||
|
spec_tail = ' '.join(after_tokens[si:]).strip()
|
||||||
|
if spec_tail:
|
||||||
|
spec = spec + ';' + spec_tail if spec else spec_tail
|
||||||
|
else:
|
||||||
|
name = rest
|
||||||
|
|
||||||
|
name = re.sub(r'\s+', '', name).strip()
|
||||||
|
for u in UNIT_TOKENS:
|
||||||
|
if name.endswith(u) and len(name) > len(u):
|
||||||
|
unit = unit or u
|
||||||
|
name = name[: len(name) - len(u)]
|
||||||
|
break
|
||||||
|
|
||||||
|
cur_item = {'code': code, 'name': name, 'unit': unit, 'quantity': quantity, 'spec': spec}
|
||||||
|
continue
|
||||||
|
|
||||||
|
if len(stripped) > 4:
|
||||||
|
uni_match = UNIT_RE.search(stripped)
|
||||||
|
if uni_match:
|
||||||
|
ui = stripped.find(uni_match.group(0))
|
||||||
|
before_unit = stripped[:ui].strip()
|
||||||
|
after_unit = stripped[ui + len(uni_match.group(0)):].strip()
|
||||||
|
has_qty = bool(re.match(r'^[\d,.]+', after_unit))
|
||||||
|
if (
|
||||||
|
2 <= len(before_unit) <= 50
|
||||||
|
and has_qty
|
||||||
|
and re.search(r'[\u4e00-\u9fff]', before_unit)
|
||||||
|
):
|
||||||
|
if cur_item and cur_cat:
|
||||||
|
cur_cat['items'].append(cur_item)
|
||||||
|
if not cur_cat:
|
||||||
|
cur_cat = {'name': '未分类', 'items': []}
|
||||||
|
categories.append(cur_cat)
|
||||||
|
unit_fb = uni_match.group(1)
|
||||||
|
qm = re.match(r'^([\d,.]+)', after_unit)
|
||||||
|
quantity_fb = qm.group(1) if qm else ''
|
||||||
|
ns_name, ns_spec = split_name_and_spec(before_unit)
|
||||||
|
name_fb = re.sub(r'\s+', '', ns_name).strip()
|
||||||
|
spec_fb = ns_spec or ''
|
||||||
|
cur_item = {'code': '', 'name': name_fb, 'unit': unit_fb, 'quantity': quantity_fb, 'spec': spec_fb}
|
||||||
|
continue
|
||||||
|
|
||||||
|
if 2 < len(stripped) < 60 and not CODE_RE.search(stripped):
|
||||||
|
if UNIT_RE.search(stripped) and re.search(r'\d+\.?\d*\s*$', stripped):
|
||||||
|
if cur_item:
|
||||||
|
cur_item['spec'] = (cur_item.get('spec') or '') + (
|
||||||
|
';' + stripped if cur_item.get('spec') else stripped
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
if is_cat_title(stripped) and not UNIT_RE.search(stripped) and not is_fee_cat_title(stripped):
|
||||||
|
if cur_item and cur_cat:
|
||||||
|
cur_cat['items'].append(cur_item)
|
||||||
|
cur_item = None
|
||||||
|
clean_title = re.sub(
|
||||||
|
r'\s+(座|个|项|处|m|km|段|条)\s+\d+[\d.]*\s*$', '', stripped
|
||||||
|
).strip()
|
||||||
|
cur_cat = {'name': clean_title, 'items': []}
|
||||||
|
categories.append(cur_cat)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if re.match(r'^[一二三四五六七八九十]+\s', stripped) or re.match(
|
||||||
|
r'^([一二三四五六七八九十\d]+)', stripped
|
||||||
|
):
|
||||||
|
clean_title = re.sub(r'\s+(座|个|项|处)\s+\d+[\d.]*\s*$', '', stripped).strip()
|
||||||
|
if is_fee_cat_title(clean_title):
|
||||||
|
continue
|
||||||
|
if cur_item and cur_cat:
|
||||||
|
cur_cat['items'].append(cur_item)
|
||||||
|
cur_item = None
|
||||||
|
cur_cat = {'name': clean_title, 'items': []}
|
||||||
|
categories.append(cur_cat)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if cur_item and len(stripped) > 1:
|
||||||
|
cur_item['spec'] = (cur_item.get('spec') or '') + (
|
||||||
|
';' + stripped if cur_item.get('spec') else stripped
|
||||||
|
)
|
||||||
|
|
||||||
|
if cur_item and cur_cat:
|
||||||
|
cur_cat['items'].append(cur_item)
|
||||||
|
|
||||||
|
fee_filtered = 0
|
||||||
|
for cat in categories:
|
||||||
|
if cat.get('items'):
|
||||||
|
before = len(cat['items'])
|
||||||
|
cat['items'] = [it for it in cat['items'] if not is_fee_item(it.get('name', ''))]
|
||||||
|
fee_filtered += before - len(cat['items'])
|
||||||
|
if fee_filtered:
|
||||||
|
logger.debug('费用项过滤: 移除 %s 项', fee_filtered)
|
||||||
|
|
||||||
|
total_before_merge = 0
|
||||||
|
total_after_merge = 0
|
||||||
|
for cat in categories:
|
||||||
|
items = cat.get('items') or []
|
||||||
|
if not items:
|
||||||
|
continue
|
||||||
|
total_before_merge += len(items)
|
||||||
|
name_map: dict[str, dict[str, Any]] = {}
|
||||||
|
for item in items:
|
||||||
|
key = re.sub(r'\s+', '', (item.get('name') or '')).strip()
|
||||||
|
if not key:
|
||||||
|
continue
|
||||||
|
if key not in name_map:
|
||||||
|
name_map[key] = {
|
||||||
|
'code': item.get('code') or '',
|
||||||
|
'name': item['name'],
|
||||||
|
'unit': item.get('unit') or '',
|
||||||
|
'quantity': item.get('quantity') or '',
|
||||||
|
'spec': item.get('spec') or '',
|
||||||
|
'_quantities': [item['quantity']] if item.get('quantity') else [],
|
||||||
|
'_specs': [item['spec']] if item.get('spec') else [],
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
m = name_map[key]
|
||||||
|
if not m['code'] and item.get('code'):
|
||||||
|
m['code'] = item['code']
|
||||||
|
if not m['unit'] and item.get('unit'):
|
||||||
|
m['unit'] = item['unit']
|
||||||
|
if item.get('quantity'):
|
||||||
|
m['_quantities'].append(item['quantity'])
|
||||||
|
if item.get('spec') and item['spec'] not in m['_specs']:
|
||||||
|
m['_specs'].append(item['spec'])
|
||||||
|
|
||||||
|
merged_items: list[dict[str, str]] = []
|
||||||
|
for m in name_map.values():
|
||||||
|
qlist = m['_quantities']
|
||||||
|
if len(qlist) > 1:
|
||||||
|
nums = []
|
||||||
|
ok = True
|
||||||
|
for q in qlist:
|
||||||
|
try:
|
||||||
|
nums.append(float(q.replace(',', '')))
|
||||||
|
except ValueError:
|
||||||
|
ok = False
|
||||||
|
break
|
||||||
|
if ok:
|
||||||
|
s = sum(nums)
|
||||||
|
m['quantity'] = str(int(s)) if s % 1 == 0 else f'{s:.2f}'
|
||||||
|
else:
|
||||||
|
m['quantity'] = '; '.join(qlist)
|
||||||
|
elif len(qlist) == 1:
|
||||||
|
m['quantity'] = qlist[0]
|
||||||
|
|
||||||
|
if m['_specs']:
|
||||||
|
trimmed = [s[:120] + '...' if len(s) > 120 else s for s in m['_specs']]
|
||||||
|
m['spec'] = '; '.join(trimmed)
|
||||||
|
if len(m['spec']) > 300:
|
||||||
|
m['spec'] = m['spec'][:300] + '...'
|
||||||
|
for k in ('_quantities', '_specs'):
|
||||||
|
m.pop(k, None)
|
||||||
|
merged_items.append(
|
||||||
|
{k: m[k] for k in ('code', 'name', 'unit', 'quantity', 'spec')}
|
||||||
|
)
|
||||||
|
cat['items'] = merged_items
|
||||||
|
total_after_merge += len(merged_items)
|
||||||
|
|
||||||
|
merged_count = total_before_merge - total_after_merge
|
||||||
|
if merged_count > 0:
|
||||||
|
logger.debug('按名称合并: %s → %s 项', total_before_merge, total_after_merge)
|
||||||
|
|
||||||
|
valid = [c for c in categories if c.get('items')]
|
||||||
|
total_items = sum(len(c['items']) for c in valid)
|
||||||
|
logger.debug(
|
||||||
|
'最终结果: %s 分部, %s 清单项', len(valid), total_items
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'project_summary': {
|
||||||
|
'remark': f'本地解析:{len(valid)} 个分部,{total_items} 个清单项(合并前 {total_before_merge} 项)',
|
||||||
|
},
|
||||||
|
'categories': valid,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def filter_bill_pages(page_texts: list[str]) -> tuple[list[str], dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
从按页文本中筛选工程量清单相关页;返回 (bill_page_texts, meta)。
|
||||||
|
"""
|
||||||
|
n = len(page_texts)
|
||||||
|
meta: dict[str, Any] = {'total_pages': n, 'scanned': False, 'no_bill_pages': False}
|
||||||
|
|
||||||
|
total_chars = sum(len(t or '') for t in page_texts)
|
||||||
|
if total_chars < 50:
|
||||||
|
meta['scanned'] = True
|
||||||
|
meta['reason'] = 'noText'
|
||||||
|
return [], meta
|
||||||
|
|
||||||
|
bill_flags = [False] * n
|
||||||
|
for i, t in enumerate(page_texts):
|
||||||
|
if not (t or '').strip():
|
||||||
|
continue
|
||||||
|
t = t or ''
|
||||||
|
h_hits = sum(1 for k in BILL_KW if k in t)
|
||||||
|
s_hit = any(k in t for k in SEC_KW)
|
||||||
|
has_code = bool(re.search(r'\d{9}', t))
|
||||||
|
if h_hits >= 2 or s_hit or has_code:
|
||||||
|
bill_flags[i] = True
|
||||||
|
|
||||||
|
first_bill = next((i for i, f in enumerate(bill_flags) if f), -1)
|
||||||
|
last_bill = max((i for i, f in enumerate(bill_flags) if f), default=-1)
|
||||||
|
if first_bill >= 0 and last_bill > first_bill:
|
||||||
|
for i in range(first_bill, last_bill + 1):
|
||||||
|
if bill_flags[i]:
|
||||||
|
continue
|
||||||
|
t = page_texts[i] or ''
|
||||||
|
if not t.strip() or len(t.strip()) <= 30:
|
||||||
|
continue
|
||||||
|
fee_hits = sum(1 for kw in FEE_PAGE_KW if kw in t)
|
||||||
|
if fee_hits >= 2 and not re.search(r'\d{9}', t):
|
||||||
|
continue
|
||||||
|
bill_flags[i] = True
|
||||||
|
|
||||||
|
bill_texts = [page_texts[i] for i in range(n) if bill_flags[i]]
|
||||||
|
if not bill_texts:
|
||||||
|
meta['no_bill_pages'] = True
|
||||||
|
|
||||||
|
meta['bill_page_indices'] = [i for i in range(n) if bill_flags[i]]
|
||||||
|
meta['bill_pages'] = len(bill_texts)
|
||||||
|
return bill_texts, meta
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_boq_pages(page_texts: list[str]) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
串联筛选 + parse_bill_text;返回结构含 _meta,供持久化与前端。
|
||||||
|
"""
|
||||||
|
total_pages = len(page_texts)
|
||||||
|
total_chars = sum(len(t or '') for t in page_texts)
|
||||||
|
|
||||||
|
if total_chars < 50:
|
||||||
|
return {
|
||||||
|
'scanned': True,
|
||||||
|
'reason': 'noText',
|
||||||
|
'totalPages': total_pages,
|
||||||
|
'project_summary': {'remark': '文本过少,疑似扫描件或未提取到文字'},
|
||||||
|
'categories': [],
|
||||||
|
'_meta': {
|
||||||
|
'method': 'python-local',
|
||||||
|
'total_pages': total_pages,
|
||||||
|
'bill_pages': 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
bill_texts, fmeta = filter_bill_pages(page_texts)
|
||||||
|
if not bill_texts:
|
||||||
|
return {
|
||||||
|
'scanned': False,
|
||||||
|
'no_bill_pages': True,
|
||||||
|
'totalPages': total_pages,
|
||||||
|
'project_summary': {'remark': '未识别到清单相关页面'},
|
||||||
|
'categories': [],
|
||||||
|
'_meta': {
|
||||||
|
'method': 'python-local',
|
||||||
|
'total_pages': total_pages,
|
||||||
|
'bill_pages': 0,
|
||||||
|
**{k: fmeta[k] for k in ('no_bill_pages',) if k in fmeta},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
merged = '\n'.join(bill_texts)
|
||||||
|
parsed = parse_bill_text(merged)
|
||||||
|
return {
|
||||||
|
'scanned': False,
|
||||||
|
**parsed,
|
||||||
|
'_meta': {
|
||||||
|
'method': 'python-local',
|
||||||
|
'total_pages': total_pages,
|
||||||
|
'bill_pages': len(bill_texts),
|
||||||
|
'bill_page_indices': fmeta.get('bill_page_indices', []),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def categories_to_prompt_appendix(
|
||||||
|
analysis: dict[str, Any],
|
||||||
|
max_chars: int = 3000,
|
||||||
|
max_per_cat: int = 40,
|
||||||
|
) -> str:
|
||||||
|
"""将本地解析结果压成短文本,注入 AI 摘要提示词。"""
|
||||||
|
cats = analysis.get('categories') or []
|
||||||
|
lines: list[str] = []
|
||||||
|
for cat in cats:
|
||||||
|
name = cat.get('name', '')
|
||||||
|
items = cat.get('items') or []
|
||||||
|
lines.append(f'【{name}】')
|
||||||
|
for it in items[:max_per_cat]:
|
||||||
|
code = it.get('code') or '-'
|
||||||
|
n = it.get('name') or ''
|
||||||
|
u = it.get('unit') or ''
|
||||||
|
q = it.get('quantity') or ''
|
||||||
|
lines.append(f' {code} {n} {u} {q}'.strip())
|
||||||
|
if len(items) > max_per_cat:
|
||||||
|
lines.append(f' …共 {len(items)} 条,此处省略其余')
|
||||||
|
text = '\n'.join(lines).strip()
|
||||||
|
if len(text) > max_chars:
|
||||||
|
return text[:max_chars] + '\n…(附录已截断)'
|
||||||
|
return text
|
||||||
138
utils/boq_parser.py
Normal file
138
utils/boq_parser.py
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
"""
|
||||||
|
工程量清单解析模块:从 Excel / CSV / PDF / Word 文件中提取结构化文本。
|
||||||
|
"""
|
||||||
|
import csv
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# 最大返回字符数(送给 AI 做摘要时截断)
|
||||||
|
MAX_BOQ_CHARS = 12000
|
||||||
|
|
||||||
|
|
||||||
|
def extract_boq_text(file_path: str) -> str:
|
||||||
|
"""
|
||||||
|
从工程量清单文件提取原始结构化文本。
|
||||||
|
支持:.xlsx / .xls / .csv / .pdf / .docx / .doc
|
||||||
|
"""
|
||||||
|
ext = Path(file_path).suffix.lower()
|
||||||
|
if ext in ('.xlsx', '.xls'):
|
||||||
|
text = _extract_excel(file_path)
|
||||||
|
elif ext == '.csv':
|
||||||
|
text = _extract_csv(file_path)
|
||||||
|
elif ext == '.pdf':
|
||||||
|
from utils.file_utils import _extract_pdf
|
||||||
|
text = _extract_pdf(file_path)
|
||||||
|
elif ext == '.docx':
|
||||||
|
from utils.file_utils import _extract_docx
|
||||||
|
text = _extract_docx(file_path)
|
||||||
|
elif ext == '.doc':
|
||||||
|
from utils.file_utils import _extract_doc
|
||||||
|
text = _extract_doc(file_path)
|
||||||
|
else:
|
||||||
|
raise ValueError(f'不支持的文件格式 {ext},请使用 xlsx/xls/csv/pdf/docx/doc')
|
||||||
|
|
||||||
|
return text[:MAX_BOQ_CHARS]
|
||||||
|
|
||||||
|
|
||||||
|
def extract_boq_pages(file_path: str) -> list[str]:
|
||||||
|
"""
|
||||||
|
返回按「页」切分的清单文本:PDF 为每页一段;Excel/CSV/Word 为单元素全文。
|
||||||
|
"""
|
||||||
|
ext = Path(file_path).suffix.lower()
|
||||||
|
if ext == '.pdf':
|
||||||
|
from utils.file_utils import extract_pdf_pages
|
||||||
|
return extract_pdf_pages(file_path)
|
||||||
|
text = extract_boq_text(file_path)
|
||||||
|
return [text] if text else ['']
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Excel ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _extract_excel(file_path: str) -> str:
|
||||||
|
try:
|
||||||
|
import openpyxl
|
||||||
|
wb = openpyxl.load_workbook(file_path, data_only=True, read_only=True)
|
||||||
|
parts = []
|
||||||
|
for name in wb.sheetnames:
|
||||||
|
ws = wb[name]
|
||||||
|
block = _sheet_to_text(ws, name)
|
||||||
|
if block.strip():
|
||||||
|
parts.append(block)
|
||||||
|
wb.close()
|
||||||
|
return '\n\n'.join(parts)
|
||||||
|
except ImportError:
|
||||||
|
return _extract_xls_fallback(file_path)
|
||||||
|
except Exception as e:
|
||||||
|
raise RuntimeError(f'Excel 解析失败:{e}') from e
|
||||||
|
|
||||||
|
|
||||||
|
def _sheet_to_text(ws, sheet_name: str) -> str:
|
||||||
|
"""将一个 Sheet 转为管道分隔文本,自动过滤全空行和全空列。"""
|
||||||
|
raw_rows = []
|
||||||
|
for row in ws.iter_rows(values_only=True):
|
||||||
|
cells = ['' if v is None else str(v).strip() for v in row]
|
||||||
|
if any(cells):
|
||||||
|
raw_rows.append(cells)
|
||||||
|
|
||||||
|
if not raw_rows:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
# 对齐列数
|
||||||
|
max_cols = max(len(r) for r in raw_rows)
|
||||||
|
raw_rows = [r + [''] * (max_cols - len(r)) for r in raw_rows]
|
||||||
|
|
||||||
|
# 找出有内容的列索引
|
||||||
|
active_cols = [j for j in range(max_cols)
|
||||||
|
if any(raw_rows[i][j] for i in range(len(raw_rows)))]
|
||||||
|
if not active_cols:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
lines = [f'【{sheet_name}】']
|
||||||
|
for row in raw_rows:
|
||||||
|
line = ' | '.join(row[j] for j in active_cols)
|
||||||
|
if line.replace('|', '').strip():
|
||||||
|
lines.append(line)
|
||||||
|
return '\n'.join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_xls_fallback(file_path: str) -> str:
|
||||||
|
"""旧版 .xls 使用 xlrd 兜底(需安装 xlrd<2)"""
|
||||||
|
try:
|
||||||
|
import xlrd # type: ignore
|
||||||
|
wb = xlrd.open_workbook(file_path)
|
||||||
|
parts = []
|
||||||
|
for sheet in wb.sheets():
|
||||||
|
lines = [f'【{sheet.name}】']
|
||||||
|
for rx in range(sheet.nrows):
|
||||||
|
cells = [str(sheet.cell_value(rx, cx)).strip()
|
||||||
|
for cx in range(sheet.ncols)]
|
||||||
|
line = ' | '.join(c for c in cells if c)
|
||||||
|
if line:
|
||||||
|
lines.append(line)
|
||||||
|
parts.append('\n'.join(lines))
|
||||||
|
return '\n\n'.join(parts)
|
||||||
|
except Exception as e:
|
||||||
|
raise RuntimeError(f'.xls 解析失败,请另存为 .xlsx 后重试:{e}') from e
|
||||||
|
|
||||||
|
|
||||||
|
# ─── CSV ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _extract_csv(file_path: str) -> str:
|
||||||
|
encodings = ['utf-8-sig', 'gbk', 'utf-8', 'gb18030', 'latin-1']
|
||||||
|
for enc in encodings:
|
||||||
|
try:
|
||||||
|
lines = []
|
||||||
|
with open(file_path, 'r', encoding=enc, newline='') as f:
|
||||||
|
for row in csv.reader(f):
|
||||||
|
line = ' | '.join(c.strip() for c in row if c.strip())
|
||||||
|
if line:
|
||||||
|
lines.append(line)
|
||||||
|
return '\n'.join(lines)
|
||||||
|
except (UnicodeDecodeError, UnicodeError):
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
raise RuntimeError(f'CSV 解析失败:{e}') from e
|
||||||
|
raise RuntimeError('CSV 文件编码不支持,请另存为 UTF-8 格式后重试')
|
||||||
283
utils/diagram_intent.py
Normal file
283
utils/diagram_intent.py
Normal file
@ -0,0 +1,283 @@
|
|||||||
|
"""
|
||||||
|
章节级图/表意图:字符特征 + 大纲上下文窗口计分,栈式优先级,驱动提示词附加段。
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
||||||
|
|
||||||
|
import config
|
||||||
|
from utils import prompts as P
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
DEFAULT_DIAGRAM_RULES: Dict[str, Any] = {
|
||||||
|
'schema_version': 1,
|
||||||
|
'threshold_figure': 1.0,
|
||||||
|
'threshold_table': 1.0,
|
||||||
|
'title_weight': 1.0,
|
||||||
|
'context_weight': 0.6,
|
||||||
|
'outline_context_lines': {'before': 4, 'after': 6},
|
||||||
|
'stack_order_when_both': 'score_desc',
|
||||||
|
'figure_keywords': [],
|
||||||
|
'table_keywords': [],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def diagram_rules_path() -> str:
|
||||||
|
return os.path.join(config.DATA_DIR, 'diagram_intent_rules.json')
|
||||||
|
|
||||||
|
|
||||||
|
def load_diagram_rules(path: Optional[str] = None) -> Dict[str, Any]:
|
||||||
|
"""加载规则 JSON;文件缺失或解析失败时返回内置默认。"""
|
||||||
|
p = path or diagram_rules_path()
|
||||||
|
data = dict(DEFAULT_DIAGRAM_RULES)
|
||||||
|
if not os.path.isfile(p):
|
||||||
|
return data
|
||||||
|
try:
|
||||||
|
with open(p, encoding='utf-8') as f:
|
||||||
|
raw = json.load(f)
|
||||||
|
if isinstance(raw, dict):
|
||||||
|
for k, v in raw.items():
|
||||||
|
if k.startswith('_'):
|
||||||
|
continue
|
||||||
|
if k == 'outline_context_lines' and isinstance(v, dict):
|
||||||
|
data['outline_context_lines'] = {
|
||||||
|
**data.get('outline_context_lines', {}),
|
||||||
|
**v,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
data[k] = v
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning('加载 diagram_intent_rules.json 失败,使用内置默认: %s', e)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_keyword_entries(raw: Any) -> List[Tuple[str, float]]:
|
||||||
|
out: List[Tuple[str, float]] = []
|
||||||
|
if not isinstance(raw, list):
|
||||||
|
return out
|
||||||
|
for item in raw:
|
||||||
|
if isinstance(item, str) and item.strip():
|
||||||
|
out.append((item.strip(), 1.0))
|
||||||
|
elif isinstance(item, dict):
|
||||||
|
t = (item.get('text') or item.get('pattern') or '').strip()
|
||||||
|
if not t:
|
||||||
|
continue
|
||||||
|
w = float(item.get('weight', 1.0))
|
||||||
|
out.append((t, w))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _score_text(text: str, entries: Sequence[Tuple[str, float]]) -> float:
|
||||||
|
if not text or not entries:
|
||||||
|
return 0.0
|
||||||
|
s = 0.0
|
||||||
|
for kw, w in entries:
|
||||||
|
if kw in text:
|
||||||
|
s += w
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
DiagramKind = str # 'figure' | 'table'
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class DiagramIntent:
|
||||||
|
kind: str
|
||||||
|
score: float
|
||||||
|
sources: str
|
||||||
|
|
||||||
|
|
||||||
|
# 栈顶 = index 0,优先生效
|
||||||
|
DiagramStack = List[DiagramIntent]
|
||||||
|
|
||||||
|
|
||||||
|
def score_figure_table(
|
||||||
|
title: str,
|
||||||
|
context_snippet: str,
|
||||||
|
rules: Dict[str, Any],
|
||||||
|
) -> Tuple[float, float]:
|
||||||
|
"""标题与上下文分别计分后按权重合并。"""
|
||||||
|
fig_kw = _normalize_keyword_entries(rules.get('figure_keywords'))
|
||||||
|
tbl_kw = _normalize_keyword_entries(rules.get('table_keywords'))
|
||||||
|
tw = float(rules.get('title_weight', 1.0))
|
||||||
|
cw = float(rules.get('context_weight', 0.6))
|
||||||
|
t = title or ''
|
||||||
|
c = context_snippet or ''
|
||||||
|
fig = tw * _score_text(t, fig_kw) + cw * _score_text(c, fig_kw)
|
||||||
|
tbl = tw * _score_text(t, tbl_kw) + cw * _score_text(c, tbl_kw)
|
||||||
|
return fig, tbl
|
||||||
|
|
||||||
|
|
||||||
|
def extract_outline_window(
|
||||||
|
outline_text: str,
|
||||||
|
section_title: str,
|
||||||
|
before: int,
|
||||||
|
after: int,
|
||||||
|
fallback_chars: int = 1200,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
在大纲中定位章节标题所在行,取上下窗口;找不到则取全文前缀。
|
||||||
|
"""
|
||||||
|
if not outline_text or not section_title:
|
||||||
|
return (outline_text or '')[:fallback_chars]
|
||||||
|
title_stripped = section_title.strip()
|
||||||
|
if not title_stripped:
|
||||||
|
return outline_text[:fallback_chars]
|
||||||
|
lines = outline_text.splitlines()
|
||||||
|
idx = -1
|
||||||
|
# 优先整行包含;否则子串匹配(去编号后)
|
||||||
|
def _strip_serial(s: str) -> str:
|
||||||
|
return re.sub(r'^\s*[\d一二三四五六七八九十]+[、..\s]+', '', s).strip()
|
||||||
|
|
||||||
|
core = _strip_serial(title_stripped)
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
line_s = line.strip()
|
||||||
|
if title_stripped in line_s or (core and core in _strip_serial(line_s)):
|
||||||
|
idx = i
|
||||||
|
break
|
||||||
|
if core and core in line_s:
|
||||||
|
idx = i
|
||||||
|
break
|
||||||
|
if idx < 0:
|
||||||
|
return outline_text[:fallback_chars]
|
||||||
|
lo = max(0, idx - max(0, before))
|
||||||
|
hi = min(len(lines), idx + max(0, after) + 1)
|
||||||
|
return '\n'.join(lines[lo:hi])
|
||||||
|
|
||||||
|
|
||||||
|
def build_stack(
|
||||||
|
fig_score: float,
|
||||||
|
tbl_score: float,
|
||||||
|
rules: Dict[str, Any],
|
||||||
|
enable_figure: bool,
|
||||||
|
enable_table: bool,
|
||||||
|
) -> DiagramStack:
|
||||||
|
tf = float(rules.get('threshold_figure', 1.0))
|
||||||
|
tt = float(rules.get('threshold_table', 1.0))
|
||||||
|
mode = (rules.get('stack_order_when_both') or 'score_desc').strip()
|
||||||
|
|
||||||
|
fig_ok = enable_figure and fig_score >= tf
|
||||||
|
tbl_ok = enable_table and tbl_score >= tt
|
||||||
|
|
||||||
|
intents: List[DiagramIntent] = []
|
||||||
|
if fig_ok:
|
||||||
|
intents.append(
|
||||||
|
DiagramIntent('figure', fig_score, 'title+context')
|
||||||
|
)
|
||||||
|
if tbl_ok:
|
||||||
|
intents.append(
|
||||||
|
DiagramIntent('table', tbl_score, 'title+context')
|
||||||
|
)
|
||||||
|
if len(intents) <= 1:
|
||||||
|
return intents
|
||||||
|
|
||||||
|
a, b = intents[0], intents[1]
|
||||||
|
if mode == 'figure_first':
|
||||||
|
order = [a, b] if a.kind == 'figure' else [b, a]
|
||||||
|
elif mode == 'table_first':
|
||||||
|
order = [a, b] if a.kind == 'table' else [b, a]
|
||||||
|
else: # score_desc — 高分在栈顶
|
||||||
|
order = sorted([a, b], key=lambda x: -x.score)
|
||||||
|
return order
|
||||||
|
|
||||||
|
|
||||||
|
def stack_compact_labels(stack: DiagramStack) -> List[str]:
|
||||||
|
"""与 stack_to_addon 中 labels 一致,供附件仅块输出的提示词。"""
|
||||||
|
labels: List[str] = []
|
||||||
|
for it in stack:
|
||||||
|
if it.kind == 'figure':
|
||||||
|
labels.append('图示([FIGURE] 块)')
|
||||||
|
else:
|
||||||
|
labels.append('表格([TABLE] 块)')
|
||||||
|
return labels
|
||||||
|
|
||||||
|
|
||||||
|
def make_fallback_stack(kind: str) -> DiagramStack:
|
||||||
|
"""栈空且需生成时,按单一 figure/table 占位。"""
|
||||||
|
k = (kind or '').strip().lower()
|
||||||
|
if k not in ('figure', 'table'):
|
||||||
|
k = 'table'
|
||||||
|
return [DiagramIntent(k, 1.0, 'fallback')]
|
||||||
|
|
||||||
|
|
||||||
|
def stack_to_addon(stack: DiagramStack) -> str:
|
||||||
|
"""按栈序拼接优先级说明 + 图示/表格规范全文。"""
|
||||||
|
if not stack:
|
||||||
|
return ''
|
||||||
|
labels: List[str] = []
|
||||||
|
for it in stack:
|
||||||
|
if it.kind == 'figure':
|
||||||
|
labels.append('图示([FIGURE] 块)')
|
||||||
|
else:
|
||||||
|
labels.append('表格([TABLE] 块)')
|
||||||
|
parts: List[str] = [P.diagram_priority_preamble(labels)]
|
||||||
|
for it in stack:
|
||||||
|
if it.kind == 'figure':
|
||||||
|
parts.append(P.get_figure_addon())
|
||||||
|
else:
|
||||||
|
parts.append(P.get_table_addon())
|
||||||
|
return ''.join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
class DiagramIntentAgent:
|
||||||
|
"""可配置规则实例:对单节计算栈并渲染附加提示词。"""
|
||||||
|
|
||||||
|
def __init__(self, rules: Optional[Dict[str, Any]] = None) -> None:
|
||||||
|
self.rules = rules or load_diagram_rules()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def load_default(cls) -> 'DiagramIntentAgent':
|
||||||
|
return cls(load_diagram_rules())
|
||||||
|
|
||||||
|
def plan(
|
||||||
|
self,
|
||||||
|
section_title: str,
|
||||||
|
outline_text: str,
|
||||||
|
enable_figure: bool,
|
||||||
|
enable_table: bool,
|
||||||
|
) -> DiagramStack:
|
||||||
|
r = self.rules
|
||||||
|
oc = r.get('outline_context_lines') or {}
|
||||||
|
before = int(oc.get('before', 4))
|
||||||
|
after = int(oc.get('after', 6))
|
||||||
|
ctx = extract_outline_window(
|
||||||
|
outline_text, section_title, before, after,
|
||||||
|
)
|
||||||
|
fig_s, tbl_s = score_figure_table(section_title, ctx, r)
|
||||||
|
return build_stack(fig_s, tbl_s, r, enable_figure, enable_table)
|
||||||
|
|
||||||
|
def render_for_section(
|
||||||
|
self,
|
||||||
|
section_title: str,
|
||||||
|
outline_text: str,
|
||||||
|
enable_figure: bool,
|
||||||
|
enable_table: bool,
|
||||||
|
) -> str:
|
||||||
|
if not enable_figure and not enable_table:
|
||||||
|
return ''
|
||||||
|
stack = self.plan(
|
||||||
|
section_title, outline_text, enable_figure, enable_table,
|
||||||
|
)
|
||||||
|
return stack_to_addon(stack)
|
||||||
|
|
||||||
|
|
||||||
|
# 模块级默认实例,供 generator 单次调用
|
||||||
|
_default_agent: Optional[DiagramIntentAgent] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_diagram_agent() -> DiagramIntentAgent:
|
||||||
|
global _default_agent
|
||||||
|
if _default_agent is None:
|
||||||
|
_default_agent = DiagramIntentAgent.load_default()
|
||||||
|
return _default_agent
|
||||||
|
|
||||||
|
|
||||||
|
def invalidate_diagram_agent_cache() -> None:
|
||||||
|
global _default_agent
|
||||||
|
_default_agent = None
|
||||||
205
utils/file_utils.py
Normal file
205
utils/file_utils.py
Normal file
@ -0,0 +1,205 @@
|
|||||||
|
"""
|
||||||
|
文件处理工具:从 PDF / Word 文件中提取纯文本
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_text(file_path: str) -> str:
|
||||||
|
"""
|
||||||
|
根据文件扩展名提取文本。
|
||||||
|
支持 .pdf / .docx / .doc
|
||||||
|
"""
|
||||||
|
path = Path(file_path)
|
||||||
|
ext = path.suffix.lower()
|
||||||
|
|
||||||
|
if ext == '.pdf':
|
||||||
|
return _extract_pdf(file_path)
|
||||||
|
elif ext == '.docx':
|
||||||
|
return _extract_docx(file_path)
|
||||||
|
elif ext == '.doc':
|
||||||
|
return _extract_doc(file_path)
|
||||||
|
else:
|
||||||
|
raise ValueError(f'不支持的文件类型: {ext}')
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_pdf(file_path: str) -> str:
|
||||||
|
"""提取 PDF 文本,优先使用 pypdf,回退到 pdfminer"""
|
||||||
|
try:
|
||||||
|
from pypdf import PdfReader
|
||||||
|
reader = PdfReader(file_path)
|
||||||
|
parts = []
|
||||||
|
for page in reader.pages:
|
||||||
|
text = page.extract_text()
|
||||||
|
if text:
|
||||||
|
parts.append(text)
|
||||||
|
result = '\n'.join(parts)
|
||||||
|
if result.strip():
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f'pypdf 提取失败: {e},尝试 pdfminer')
|
||||||
|
|
||||||
|
try:
|
||||||
|
from pdfminer.high_level import extract_text as pm_extract
|
||||||
|
result = pm_extract(file_path)
|
||||||
|
return result or ''
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f'pdfminer 提取失败: {e}')
|
||||||
|
raise RuntimeError(f'PDF 文本提取失败: {e}')
|
||||||
|
|
||||||
|
|
||||||
|
def extract_pdf_pages(file_path: str) -> list[str]:
|
||||||
|
"""
|
||||||
|
按页提取 PDF 文本(用于工程量清单页筛选)。
|
||||||
|
优先 pypdf 逐页;若各页均无文本则回退 pdfminer 整篇作为单元素列表。
|
||||||
|
"""
|
||||||
|
pages: list[str] = []
|
||||||
|
try:
|
||||||
|
from pypdf import PdfReader
|
||||||
|
reader = PdfReader(file_path)
|
||||||
|
for page in reader.pages:
|
||||||
|
text = page.extract_text()
|
||||||
|
pages.append((text or '').strip())
|
||||||
|
if any(pages):
|
||||||
|
return pages
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f'pypdf 按页提取失败: {e},尝试 pdfminer')
|
||||||
|
|
||||||
|
try:
|
||||||
|
from pdfminer.high_level import extract_text as pm_extract
|
||||||
|
blob = (pm_extract(file_path) or '').strip()
|
||||||
|
return [blob] if blob else ['']
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f'pdfminer 提取失败: {e}')
|
||||||
|
raise RuntimeError(f'PDF 文本提取失败: {e}')
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_docx(file_path: str) -> str:
|
||||||
|
"""提取 .docx 文档文本(python-docx)"""
|
||||||
|
try:
|
||||||
|
from docx import Document
|
||||||
|
doc = Document(file_path)
|
||||||
|
parts = []
|
||||||
|
for para in doc.paragraphs:
|
||||||
|
if para.text.strip():
|
||||||
|
parts.append(para.text)
|
||||||
|
for table in doc.tables:
|
||||||
|
for row in table.rows:
|
||||||
|
row_texts = [cell.text.strip() for cell in row.cells if cell.text.strip()]
|
||||||
|
if row_texts:
|
||||||
|
parts.append(' '.join(row_texts))
|
||||||
|
return '\n'.join(parts)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f'.docx 提取失败: {e}')
|
||||||
|
raise RuntimeError(f'Word 文本提取失败: {e}')
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_doc(file_path: str) -> str:
|
||||||
|
"""
|
||||||
|
提取旧版 .doc 文件文本,按优先级依次尝试:
|
||||||
|
1. win32com(Windows + Microsoft Word 已安装,最准确)
|
||||||
|
2. LibreOffice 命令行转换(需安装 LibreOffice)
|
||||||
|
3. python-docx 兼容尝试(部分以 XML 保存的伪 .doc 可读)
|
||||||
|
全部失败时提示用户手动另存为 .docx
|
||||||
|
"""
|
||||||
|
abs_path = str(Path(file_path).resolve())
|
||||||
|
|
||||||
|
# ── 方案1:win32com(Windows + Word)──────────────────────────────────
|
||||||
|
try:
|
||||||
|
import win32com.client
|
||||||
|
import pythoncom
|
||||||
|
pythoncom.CoInitialize()
|
||||||
|
word = None
|
||||||
|
try:
|
||||||
|
word = win32com.client.Dispatch('Word.Application')
|
||||||
|
word.Visible = False
|
||||||
|
doc = word.Documents.Open(abs_path, ReadOnly=True)
|
||||||
|
text = doc.Range().Text
|
||||||
|
doc.Close(False)
|
||||||
|
logger.info(f'.doc 通过 win32com 提取成功: {file_path}')
|
||||||
|
return text or ''
|
||||||
|
finally:
|
||||||
|
if word:
|
||||||
|
try:
|
||||||
|
word.Quit()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
pythoncom.CoUninitialize()
|
||||||
|
except ImportError:
|
||||||
|
logger.info('pywin32 未安装,跳过 win32com 方案')
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f'win32com 提取 .doc 失败: {e}')
|
||||||
|
|
||||||
|
# ── 方案2:LibreOffice 命令行 ─────────────────────────────────────────
|
||||||
|
try:
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
tmp_dir = tempfile.mkdtemp()
|
||||||
|
for soffice_cmd in ('soffice', 'libreoffice'):
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
[soffice_cmd, '--headless', '--convert-to', 'txt:Text',
|
||||||
|
'--outdir', tmp_dir, abs_path],
|
||||||
|
capture_output=True, text=True, timeout=60,
|
||||||
|
)
|
||||||
|
if result.returncode == 0:
|
||||||
|
txt_file = os.path.join(tmp_dir, Path(file_path).stem + '.txt')
|
||||||
|
if os.path.exists(txt_file):
|
||||||
|
with open(txt_file, 'r', encoding='utf-8', errors='ignore') as f:
|
||||||
|
content = f.read()
|
||||||
|
logger.info(f'.doc 通过 LibreOffice 提取成功: {file_path}')
|
||||||
|
return content
|
||||||
|
except FileNotFoundError:
|
||||||
|
continue
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
logger.warning('LibreOffice 转换超时')
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f'LibreOffice 提取 .doc 失败: {e}')
|
||||||
|
|
||||||
|
# ── 方案3:python-docx 兼容尝试(部分另存的 .doc 实为 XML 格式)──────
|
||||||
|
try:
|
||||||
|
result = _extract_docx(file_path)
|
||||||
|
if result.strip():
|
||||||
|
logger.info(f'.doc 通过 python-docx 兼容读取成功: {file_path}')
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f'python-docx 兼容读取 .doc 失败: {e}')
|
||||||
|
|
||||||
|
raise RuntimeError(
|
||||||
|
'无法读取 .doc 格式文件。请在 Word 中打开该文件,'
|
||||||
|
'选择「另存为」→「Word 文档 (.docx)」后重新上传。'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def truncate_text(text: str, max_chars: int = 60000) -> str:
|
||||||
|
"""截断超长文本,避免超出 AI Token 限制"""
|
||||||
|
if len(text) <= max_chars:
|
||||||
|
return text
|
||||||
|
return text[:max_chars] + '\n\n...[文档内容已截断,仅展示前段]'
|
||||||
|
|
||||||
|
|
||||||
|
def split_text_chunks(text: str, chunk_size: int = 2000, overlap: int = 200) -> list[str]:
|
||||||
|
"""将文本按固定大小分块(用于知识库)"""
|
||||||
|
chunks = []
|
||||||
|
start = 0
|
||||||
|
while start < len(text):
|
||||||
|
end = min(start + chunk_size, len(text))
|
||||||
|
chunks.append(text[start:end])
|
||||||
|
start += chunk_size - overlap
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
def allowed_file(filename: str) -> bool:
|
||||||
|
allowed = {'pdf', 'doc', 'docx'}
|
||||||
|
return '.' in filename and filename.rsplit('.', 1)[1].lower() in allowed
|
||||||
|
|
||||||
|
|
||||||
|
def safe_filename(filename: str) -> str:
|
||||||
|
"""生成安全的文件名"""
|
||||||
|
import re
|
||||||
|
name = re.sub(r'[^\w\u4e00-\u9fff.\-]', '_', filename)
|
||||||
|
return name
|
||||||
52
utils/outline_numbering.py
Normal file
52
utils/outline_numbering.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
"""
|
||||||
|
标书目录号展示:一级为汉字+顿号,子级为数字多级编号(与 AI 大纲示例一致)。
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
|
def int_to_chinese_numeral(n: int) -> str:
|
||||||
|
"""将正整数转为中文数字(一、二、…、十、十一、…、九十九、一百)。"""
|
||||||
|
if n <= 0:
|
||||||
|
return str(n)
|
||||||
|
digits = "零一二三四五六七八九"
|
||||||
|
if n < 10:
|
||||||
|
return digits[n]
|
||||||
|
if n == 10:
|
||||||
|
return "十"
|
||||||
|
if n < 20:
|
||||||
|
return "十" + (digits[n % 10] if n % 10 else "")
|
||||||
|
if n < 100:
|
||||||
|
t, o = divmod(n, 10)
|
||||||
|
s = digits[t] + "十"
|
||||||
|
if o:
|
||||||
|
s += digits[o]
|
||||||
|
return s
|
||||||
|
if n < 1000:
|
||||||
|
h, r = divmod(n, 100)
|
||||||
|
s = digits[h] + "百"
|
||||||
|
if r == 0:
|
||||||
|
return s
|
||||||
|
if r < 10:
|
||||||
|
return s + "零" + digits[r]
|
||||||
|
return s + int_to_chinese_numeral(r)
|
||||||
|
# 极少需要百级以上章,保守处理
|
||||||
|
return str(n)
|
||||||
|
|
||||||
|
|
||||||
|
def format_heading_display(level: int, section_number: str, title: str) -> str:
|
||||||
|
"""
|
||||||
|
生成带目录号的章节展示行(用于大纲文本、Word 标题、目录页)。
|
||||||
|
- 一级:汉字、顿号 + 标题,如「一、总体方案」
|
||||||
|
- 二级及以下:「1.1 子标题」
|
||||||
|
"""
|
||||||
|
title = (title or "").strip()
|
||||||
|
sn = (section_number or "").strip()
|
||||||
|
lv = int(level) if level else 1
|
||||||
|
if lv <= 1:
|
||||||
|
main = sn.split(".")[0]
|
||||||
|
try:
|
||||||
|
idx = int(main)
|
||||||
|
except ValueError:
|
||||||
|
idx = 1
|
||||||
|
return f"{int_to_chinese_numeral(idx)}、{title}"
|
||||||
|
return f"{sn} {title}".strip()
|
||||||
1069
utils/prompts.py
Normal file
1069
utils/prompts.py
Normal file
File diff suppressed because it is too large
Load Diff
141
utils/settings.py
Normal file
141
utils/settings.py
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
"""
|
||||||
|
配置持久化:将用户在界面中设置的 API Key 等配置保存到 data/settings.json,
|
||||||
|
服务重启后自动恢复,不再每次重启都丢失 Key。
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_SETTINGS_PATH: str = '' # 由 app.py 初始化时注入
|
||||||
|
|
||||||
|
|
||||||
|
def init(settings_path: str):
|
||||||
|
global _SETTINGS_PATH
|
||||||
|
_SETTINGS_PATH = settings_path
|
||||||
|
|
||||||
|
|
||||||
|
def load(cfg) -> None:
|
||||||
|
"""从 settings.json 加载配置,覆盖 config 模块中的默认值"""
|
||||||
|
if not _SETTINGS_PATH or not os.path.exists(_SETTINGS_PATH):
|
||||||
|
_apply_env_overrides(cfg)
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
with open(_SETTINGS_PATH, 'r', encoding='utf-8') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
|
||||||
|
_apply(cfg, data)
|
||||||
|
_apply_env_overrides(cfg)
|
||||||
|
logger.info(f'已从 {_SETTINGS_PATH} 恢复配置,当前 provider={cfg.MODEL_PROVIDER}')
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f'加载配置文件失败: {e}')
|
||||||
|
_apply_env_overrides(cfg)
|
||||||
|
|
||||||
|
|
||||||
|
_ENV_API_KEYS = (
|
||||||
|
('QWEN_API_KEY', 'QWEN_API_KEY'),
|
||||||
|
('OPENAI_API_KEY', 'OPENAI_API_KEY'),
|
||||||
|
('DEEPSEEK_API_KEY', 'DEEPSEEK_API_KEY'),
|
||||||
|
('DOUBAO_API_KEY', 'DOUBAO_API_KEY'),
|
||||||
|
('KIMI_API_KEY', 'KIMI_API_KEY'),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_env_overrides(cfg) -> None:
|
||||||
|
"""环境变量中的 API Key 优先于 settings.json(便于 Docker / 本机 .env 注入)。"""
|
||||||
|
mp = os.environ.get('MODEL_PROVIDER')
|
||||||
|
if mp and isinstance(mp, str) and mp.strip():
|
||||||
|
cfg.MODEL_PROVIDER = mp.strip()
|
||||||
|
for env_name, attr in _ENV_API_KEYS:
|
||||||
|
val = os.environ.get(env_name)
|
||||||
|
if val and isinstance(val, str) and not val.startswith('sk-your'):
|
||||||
|
setattr(cfg, attr, val.strip())
|
||||||
|
|
||||||
|
|
||||||
|
def save(cfg) -> None:
|
||||||
|
"""将当前 config 模块的关键配置写入 settings.json"""
|
||||||
|
if not _SETTINGS_PATH:
|
||||||
|
return
|
||||||
|
data = {
|
||||||
|
'model_provider': cfg.MODEL_PROVIDER,
|
||||||
|
'qwen_api_key': cfg.QWEN_API_KEY,
|
||||||
|
'qwen_model': cfg.QWEN_MODEL,
|
||||||
|
'qwen_base_url': cfg.QWEN_BASE_URL,
|
||||||
|
'openai_api_key': cfg.OPENAI_API_KEY,
|
||||||
|
'openai_model': cfg.OPENAI_MODEL,
|
||||||
|
'openai_base_url': cfg.OPENAI_BASE_URL,
|
||||||
|
'deepseek_api_key': cfg.DEEPSEEK_API_KEY,
|
||||||
|
'deepseek_model': cfg.DEEPSEEK_MODEL,
|
||||||
|
'deepseek_base_url': cfg.DEEPSEEK_BASE_URL,
|
||||||
|
'ollama_base_url': cfg.OLLAMA_BASE_URL,
|
||||||
|
'ollama_model': cfg.OLLAMA_MODEL,
|
||||||
|
'doubao_api_key': cfg.DOUBAO_API_KEY,
|
||||||
|
'doubao_model': cfg.DOUBAO_MODEL,
|
||||||
|
'doubao_base_url': cfg.DOUBAO_BASE_URL,
|
||||||
|
'kimi_api_key': cfg.KIMI_API_KEY,
|
||||||
|
'kimi_model': cfg.KIMI_MODEL,
|
||||||
|
'kimi_base_url': cfg.KIMI_BASE_URL,
|
||||||
|
'max_concurrent': cfg.MAX_CONCURRENT_SECTIONS,
|
||||||
|
'content_volume': cfg.CONTENT_VOLUME,
|
||||||
|
'target_pages': getattr(cfg, 'TARGET_PAGES', 0),
|
||||||
|
'page_char_estimate': getattr(cfg, 'PAGE_CHAR_ESTIMATE', 700),
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
os.makedirs(os.path.dirname(_SETTINGS_PATH), exist_ok=True)
|
||||||
|
with open(_SETTINGS_PATH, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f'保存配置文件失败: {e}')
|
||||||
|
|
||||||
|
|
||||||
|
def _apply(cfg, data: dict) -> None:
|
||||||
|
"""将 dict 中的值安全地写回 config 模块"""
|
||||||
|
str_fields = {
|
||||||
|
'model_provider': 'MODEL_PROVIDER',
|
||||||
|
'qwen_api_key': 'QWEN_API_KEY',
|
||||||
|
'qwen_model': 'QWEN_MODEL',
|
||||||
|
'qwen_base_url': 'QWEN_BASE_URL',
|
||||||
|
'openai_api_key': 'OPENAI_API_KEY',
|
||||||
|
'openai_model': 'OPENAI_MODEL',
|
||||||
|
'openai_base_url': 'OPENAI_BASE_URL',
|
||||||
|
'deepseek_api_key': 'DEEPSEEK_API_KEY',
|
||||||
|
'deepseek_model': 'DEEPSEEK_MODEL',
|
||||||
|
'deepseek_base_url': 'DEEPSEEK_BASE_URL',
|
||||||
|
'ollama_base_url': 'OLLAMA_BASE_URL',
|
||||||
|
'ollama_model': 'OLLAMA_MODEL',
|
||||||
|
'doubao_api_key': 'DOUBAO_API_KEY',
|
||||||
|
'doubao_model': 'DOUBAO_MODEL',
|
||||||
|
'doubao_base_url': 'DOUBAO_BASE_URL',
|
||||||
|
'kimi_api_key': 'KIMI_API_KEY',
|
||||||
|
'kimi_model': 'KIMI_MODEL',
|
||||||
|
'kimi_base_url': 'KIMI_BASE_URL',
|
||||||
|
}
|
||||||
|
for key, attr in str_fields.items():
|
||||||
|
val = data.get(key)
|
||||||
|
if val and isinstance(val, str):
|
||||||
|
setattr(cfg, attr, val)
|
||||||
|
|
||||||
|
if 'max_concurrent' in data:
|
||||||
|
try:
|
||||||
|
v = int(data['max_concurrent'])
|
||||||
|
cfg.MAX_CONCURRENT_SECTIONS = max(1, min(v, 20))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
valid_volumes = ('concise', 'standard', 'detailed', 'full')
|
||||||
|
vol = data.get('content_volume')
|
||||||
|
if vol and vol in valid_volumes:
|
||||||
|
cfg.CONTENT_VOLUME = vol
|
||||||
|
|
||||||
|
if 'target_pages' in data:
|
||||||
|
try:
|
||||||
|
cfg.TARGET_PAGES = max(0, int(data['target_pages']))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
if 'page_char_estimate' in data:
|
||||||
|
try:
|
||||||
|
cfg.PAGE_CHAR_ESTIMATE = max(300, min(3000, int(data['page_char_estimate'])))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
278
utils/tender_kind_sections.py
Normal file
278
utils/tender_kind_sections.py
Normal file
@ -0,0 +1,278 @@
|
|||||||
|
"""
|
||||||
|
按招标文件类型(工程 / 服务 / 货物)区分的章节正文生成提示词模板。
|
||||||
|
与 modules.generator.BID_WRITING_SYSTEM 配合使用;自称以系统铁律为准,统一用「我方」。
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
VALID_TENDER_KINDS = frozenset({'engineering', 'service', 'goods'})
|
||||||
|
|
||||||
|
DEFAULT_WORD_COUNT_SPEC = (
|
||||||
|
'- 一般小节:不少于 2000 字;核心技术/重点评分章节:不少于 4000 字\n'
|
||||||
|
'- 字数须由实质方案内容支撑,禁止用重复项目背景或复述招标要求凑字数\n'
|
||||||
|
'- 有实质细节的展开写,原则性描述可简洁处理;通过流程、节点、比选、管控展开满足篇幅'
|
||||||
|
)
|
||||||
|
|
||||||
|
TENDER_KIND_CLASSIFY = """\
|
||||||
|
你是一名招标文件分类专家。根据以下招标文件摘录,判断本项目技术标书应采用的「写作模板类型」。
|
||||||
|
|
||||||
|
只输出以下三个英文单词之一,不要输出任何其他文字、标点、换行或解释:
|
||||||
|
engineering
|
||||||
|
service
|
||||||
|
goods
|
||||||
|
|
||||||
|
含义:
|
||||||
|
- engineering:工程施工类(建筑、市政、公路、水利、装修、园林、拆除等,以现场施工组织、工艺、机械、进度网络为主)
|
||||||
|
- service:服务类(咨询、设计、监理、运维、物业、保洁、餐饮配送、培训、安保、技术服务等,以人力/智力交付、流程、SLA 为主)
|
||||||
|
- goods:货物类(设备、材料、车辆、家具、软硬件供货等,以产品规格、供货、质保、验收为主;含附带安装指导仍以供货为主可归此类)
|
||||||
|
|
||||||
|
判定规则:
|
||||||
|
若主要为施工安装且涉及土建/结构/施工机械与工期,归为 engineering。
|
||||||
|
若主要为服务过程、人员驻场、响应时效与服务质量体系,归为 service。
|
||||||
|
若主要为产品技术规格、供货批次、出厂检验与到货验收,归为 goods。
|
||||||
|
若施工与供货并重,以现场施工量与工期为主则 engineering,以设备物资交付为主则 goods。
|
||||||
|
|
||||||
|
【招标文件摘录】
|
||||||
|
{excerpt}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def get_tender_kind_classify_prompt(excerpt: str) -> str:
|
||||||
|
return TENDER_KIND_CLASSIFY.replace('{excerpt}', excerpt or '')
|
||||||
|
|
||||||
|
|
||||||
|
def parse_tender_kind_response(response: str) -> str:
|
||||||
|
"""从模型返回中解析出 engineering / service / goods,失败则 engineering。"""
|
||||||
|
if not response:
|
||||||
|
return 'engineering'
|
||||||
|
tokens = re.sub(r'[^a-zA-Z]+', ' ', response).lower().split()
|
||||||
|
for w in tokens:
|
||||||
|
if w in VALID_TENDER_KINDS:
|
||||||
|
return w
|
||||||
|
low = response.lower()
|
||||||
|
for k in ('engineering', 'service', 'goods'):
|
||||||
|
if k in low:
|
||||||
|
return k
|
||||||
|
return 'engineering'
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_tender_kind(kind: Optional[str]) -> str:
|
||||||
|
k = (kind or '').strip().lower()
|
||||||
|
return k if k in VALID_TENDER_KINDS else 'engineering'
|
||||||
|
|
||||||
|
|
||||||
|
# ── 工程类 ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
SECTION_DETAILS_ENGINEERING = """\
|
||||||
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||||
|
- 角色:资深工程施工组织设计专家
|
||||||
|
- 任务:撰写通用型工程施工组织设计技术章节
|
||||||
|
|
||||||
|
【核心定位】
|
||||||
|
- 通用施工模板,适用于建筑、市政、公路、水利等工程施工类项目
|
||||||
|
- 聚焦:施工方案、工艺方法、机械设备、进度计划、质量安全控制
|
||||||
|
- 正文为可直接提交的成稿语句:凡招标文件概要或工程量清单摘要已给出的工程量、地质、工期、指标等,可如实融入叙述;未给出的具体数值、型号、台数、吨位等,一律用通顺的中文概括表达(如"相应规格""与进度及作业面相匹配的台套""符合设计及规范要求的能级"),不得使用方括号或待填项留白
|
||||||
|
|
||||||
|
【内容特征】
|
||||||
|
- 施工工艺描述到"方法层面";可引用规范条文名称或编号(如"应符合JTG/T 3610要求");无依据处不写臆造数字
|
||||||
|
- 设备与资源配置:写清设备类别与用途,用"按工况与设计要求选配相应规格与数量""满足流水作业与峰值强度需要"等概括句式,禁止出现"[型号][数量]台"类占位
|
||||||
|
- 进度计划使用相对阶段("施工准备期"、"主体施工期")而非具体日期
|
||||||
|
- 技术措施可提供多方案比选,用"视地质与水文条件选用适宜工艺"等自然语言衔接现场条件,禁止方括号待填
|
||||||
|
|
||||||
|
【未定参数的写法(替代一切占位符)】
|
||||||
|
- 工程规模与结构:用"本工程相应单体与线路区段""按设计结构形式与跨度条件"等概括,不罗列未提供的具体数字
|
||||||
|
- 技术参数:已见于招标/清单的写具体值;未见者写"按设计强度等级与验收标准执行""压实度与分层厚度满足规范及设计要求"
|
||||||
|
- 机械与劳动力:写"配置满足峰值强度与关键线路需要的机械组合""劳动力按施工阶段动态投入并保持关键岗位持证齐备"
|
||||||
|
- 时间节点:写"在招标工期内划分准备、主体、收尾阶段并设置可控里程碑",无具体日历则不用臆造周数
|
||||||
|
|
||||||
|
【行文规范】
|
||||||
|
- 自称统一用「我方」,禁用「我们」「本公司」
|
||||||
|
- 招标人称「招标方」或「建设单位」
|
||||||
|
- 禁止前导句和AI套话(综上所述、高度重视等)
|
||||||
|
- 列举用(1)(2)(3),禁用"首先其次"
|
||||||
|
- 纯文本输出,段落间空行分隔
|
||||||
|
|
||||||
|
【防过拟合约束】
|
||||||
|
- 不绑定具体地名与局地气候细节,改为"结合项目环境与季节特点采取针对性措施"
|
||||||
|
- 不绑定特定施工方法(如不说"必须用旋挖钻",改为"根据地质选用适宜桩基工艺")
|
||||||
|
- 使用弹性表述:"按设计要求"、"视现场情况"、"符合规范规定"
|
||||||
|
|
||||||
|
【字数要求】
|
||||||
|
{word_count_spec}
|
||||||
|
- 通过展开多方案比选、详细工艺流程、管控节点来满足篇幅
|
||||||
|
|
||||||
|
【输入】
|
||||||
|
- 招标文件概要:{summary}
|
||||||
|
- 标书目录:{outline}
|
||||||
|
- 子小节标题:{subsection_title}
|
||||||
|
|
||||||
|
直接输出正文,不含标题和解释。"""
|
||||||
|
|
||||||
|
|
||||||
|
# ── 服务类 ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
SECTION_DETAILS_SERVICE = """\
|
||||||
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||||
|
- 角色:资深服务方案架构师
|
||||||
|
- 任务:撰写通用型服务项目实施方案
|
||||||
|
|
||||||
|
【核心定位】
|
||||||
|
- 通用服务模板,适用于咨询服务、运维服务、技术服务、物业管理、培训服务等
|
||||||
|
- 聚焦:服务方案、实施流程、人员配置、质量保障、响应机制、服务标准
|
||||||
|
- 严禁出现工程施工技术参数(如混凝土标号、压实度等)
|
||||||
|
- 正文为成稿:招标/采购文件已载明的服务范围、人数、响应时限、到场要求等可如实写入;未载明的不得用方括号待填,改用"按采购文件与服务等级要求配置""满足驻场与高峰时段人力需要""建立分级响应与升级机制"等概括表述写清含义
|
||||||
|
|
||||||
|
【内容特征】
|
||||||
|
- 服务流程:按"接收需求→分析评估→方案制定→实施执行→验收交付→持续改进"框架展开
|
||||||
|
- 人员配置:强调专业资质与岗位角色齐全,用"配备满足本项目服务范围与关键岗位持证要求的人员力量""项目经理及骨干具备相应执业或认证资格"等完整句子,禁止"[资质][岗位][数量]名"式占位
|
||||||
|
- 质量保障:使用服务体系标准(如ISO 9001、ITIL、ITSS)而非工程规范
|
||||||
|
- 响应机制:写清"受理—分派—处理—回访/关闭"闭环;时限已见于招标文件的写具体值,未见者写"按招标文件及行业通行服务等级划分响应与处理时限,并设置升级与应急通道"
|
||||||
|
- 服务标准:可引用SLA框架,用自然语言描述指标层级与考核方式,禁止用方括号代替指标
|
||||||
|
|
||||||
|
【未定参数的写法】
|
||||||
|
- 服务范围与对象:用"采购文件约定的服务内容与交付边界""服务对象规模与业务场景按项目实际确定"等概括
|
||||||
|
- 人员与资源:用"与峰值并发与服务等级相匹配的人力与工具配置"
|
||||||
|
- 场地与备件:用"按需设置服务场所与备件储备,保障连续性与可用性目标"
|
||||||
|
|
||||||
|
【行文规范】
|
||||||
|
- 自称统一用「我方」,禁用「我们」「本公司」
|
||||||
|
- 招标人称「招标方」「采购人」或「甲方」
|
||||||
|
- 禁止前导句和AI套话
|
||||||
|
- 列举用(1)(2)(3),禁用"首先其次"
|
||||||
|
- 纯文本输出,段落间空行分隔
|
||||||
|
- 强调"服务承诺"与"保障措施"的可执行性,避免空泛
|
||||||
|
|
||||||
|
【防过拟合约束】
|
||||||
|
- 不预设具体行业细节(如不说"针对医院HIS系统",改为"针对采购人业务系统与数据环境")
|
||||||
|
- 服务方案提供"标准模块+可选配置"结构("基础服务包包含...,增值服务可选...")
|
||||||
|
- 使用"结合采购人行业特点与监管要求""参照同类项目成熟实践"等弹性表述
|
||||||
|
|
||||||
|
【内容禁区】
|
||||||
|
- 禁止出现:施工工艺、材料设备技术参数、工程量计算、施工机械配置
|
||||||
|
- 禁止出现:建筑结构、土木工程技术措施
|
||||||
|
|
||||||
|
【字数要求】
|
||||||
|
{word_count_spec}
|
||||||
|
- 通过详细描述服务流程节点、人员职责分工、质量检查点、应急预案来满足篇幅
|
||||||
|
|
||||||
|
【输入】
|
||||||
|
- 招标文件概要:{summary}
|
||||||
|
- 标书目录:{outline}
|
||||||
|
- 子小节标题:{subsection_title}
|
||||||
|
|
||||||
|
直接输出正文,不含标题和解释。"""
|
||||||
|
|
||||||
|
|
||||||
|
# ── 货物类 ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
SECTION_DETAILS_GOODS = """\
|
||||||
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||||
|
- 角色:资深供货方案技术专家
|
||||||
|
- 任务:撰写通用型货物采购项目技术响应方案
|
||||||
|
|
||||||
|
【核心定位】
|
||||||
|
- 通用供货模板,适用于设备采购、材料供应、系统集成、软件采购等
|
||||||
|
- 聚焦:产品技术规格、供货方案、质量保证、安装调试(如有)、售后服务
|
||||||
|
- 正文为成稿:采购文件、技术规范书或清单中已列明的型号、数量、指标、交货期、质保期等可如实响应;未列明的不得臆造优于招标的数字,亦不得用方括号待填;用"不低于采购文件对应条款""满足招标文件列明的性能与符合性要求""供货批次与到货节奏与现场安装计划相衔接"等概括语言写全句
|
||||||
|
|
||||||
|
【内容特征】
|
||||||
|
- 技术规格:按"指标项—符合性说明"展开;已给出阈值的照写;未给出的写"满足招标文件技术指标与检测方法要求""与同类应用场景主流水平相当且不降低实质性响应"
|
||||||
|
- 产品描述:强调功能特性、可靠性与标准符合性,避免绑定特定品牌(除非招标文件指定)
|
||||||
|
- 供货方案:分阶段描述(签约后组织生产或备货、出厂检验、运输与到货验收);具体天数仅在有依据时写出,否则用"按合同与采购文件约定的供货周期执行"
|
||||||
|
- 质量保障:强调"出厂检验+第三方检测(如要求)+质保期服务"分层体系
|
||||||
|
- 售后服务:写清质保责任边界、备件与技术支持渠道;时长以招标为准,无则写"按采购文件及国家相关规定执行"
|
||||||
|
|
||||||
|
【未定参数的写法】
|
||||||
|
- 性能与容量:用"满足采购文件规定的处理能力/精度/兼容性等关键指标"
|
||||||
|
- 数量与批次:用"与合同清单及现场需求匹配的供货批次与配套件配置"
|
||||||
|
- 服务时效:用"建立可追踪的报修、响应与闭环机制,时限不低于采购文件要求"
|
||||||
|
|
||||||
|
【行文规范】
|
||||||
|
- 自称统一用「我方」,禁用「我们」「本公司」
|
||||||
|
- 招标人称「招标方」「采购人」或「甲方」
|
||||||
|
- 禁止前导句和AI套话
|
||||||
|
- 列举用(1)(2)(3),禁用"首先其次"
|
||||||
|
- 纯文本输出,段落间空行分隔
|
||||||
|
- 技术描述客观准确,避免夸大(不用"最先进"、"行业第一",改用"符合国家标准或采购文件引用标准的要求""满足招标文件实质性条款")
|
||||||
|
|
||||||
|
【防过拟合约束】
|
||||||
|
- 不绑定特定品牌(如不说"采用华为服务器",改为"提供满足采购文件性能与安全要求的服务器设备")
|
||||||
|
- 无具体数值依据时,不写虚构的"≥某数值",改为对符合性与可检测性的承诺
|
||||||
|
- 供货方案考虑多种交付场景(国内供货、进口设备、定制生产等)时,用自然语言比较路径优劣与适用条件
|
||||||
|
|
||||||
|
【内容禁区】
|
||||||
|
- 禁止出现:施工组织、安装工艺(除非含安装服务)、土建工程、人员现场施工配置
|
||||||
|
- 禁止出现:工程管理流程(如施工进度网络图)
|
||||||
|
|
||||||
|
【字数要求】
|
||||||
|
{word_count_spec}
|
||||||
|
- 通过详细展开技术参数说明、供货流程节点、质量检验程序、售后服务细则来满足篇幅
|
||||||
|
|
||||||
|
【输入】
|
||||||
|
- 招标文件概要:{summary}
|
||||||
|
- 标书目录:{outline}
|
||||||
|
- 子小节标题:{subsection_title}
|
||||||
|
|
||||||
|
直接输出正文,不含标题和解释。"""
|
||||||
|
|
||||||
|
|
||||||
|
def build_section_detail_prompt(
|
||||||
|
kind: str,
|
||||||
|
summary: str,
|
||||||
|
outline: str,
|
||||||
|
title: str,
|
||||||
|
word_count_spec: str = '',
|
||||||
|
boq_summary: str = '',
|
||||||
|
) -> str:
|
||||||
|
k = normalize_tender_kind(kind)
|
||||||
|
if k == 'service':
|
||||||
|
base = SECTION_DETAILS_SERVICE
|
||||||
|
elif k == 'goods':
|
||||||
|
base = SECTION_DETAILS_GOODS
|
||||||
|
else:
|
||||||
|
base = SECTION_DETAILS_ENGINEERING
|
||||||
|
|
||||||
|
wc = word_count_spec.strip() or DEFAULT_WORD_COUNT_SPEC
|
||||||
|
text = base.format(
|
||||||
|
word_count_spec=wc,
|
||||||
|
summary=summary or '(未提供)',
|
||||||
|
outline=outline or '(未提供)',
|
||||||
|
subsection_title=title or '',
|
||||||
|
)
|
||||||
|
text += (
|
||||||
|
'\n\n【须同步遵守的全局写作禁忌】'
|
||||||
|
'禁止复述招标要求后再作答;禁止各章重复工程量数字与项目背景;'
|
||||||
|
'禁止无依据将参数写成优于招标文件;字数不得仅靠套话堆砌;'
|
||||||
|
'禁止使用方括号、「待填」「TBD」等表示未完稿字段(如[型号][数量][数值]);'
|
||||||
|
'未定信息须写成通顺的概括性中文整句。'
|
||||||
|
'若本任务提示词末尾另有「图示/表格」专用输出规范,其中的结构化标记按该规范执行,'
|
||||||
|
'不视为待填占位。'
|
||||||
|
)
|
||||||
|
|
||||||
|
if boq_summary.strip():
|
||||||
|
text += (
|
||||||
|
'\n\n- 工程量清单关键信息(写作时按需引用清单中已有数量与单位,勿无故复读;'
|
||||||
|
'清单未列明的分项用概括性施工组织语言描述,禁止使用方括号待填项):\n'
|
||||||
|
+ boq_summary.strip()
|
||||||
|
)
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
# 对话模式:按类型追加的系统说明片段(与 app.py 中基础说明拼接)
|
||||||
|
CHAT_KIND_INSTRUCTION = {
|
||||||
|
'engineering': (
|
||||||
|
'\n【本模板类型:工程施工】'
|
||||||
|
'侧重施工组织、工艺与质量安全;未在招标文件或清单中出现的具体型号、台数、吨位等'
|
||||||
|
'用概括性中文表述写清,禁止使用方括号待填;勿虚构优于招标的规格。'
|
||||||
|
),
|
||||||
|
'service': (
|
||||||
|
'\n【本模板类型:服务】'
|
||||||
|
'侧重服务流程、人员与SLA;人数、时限等以招标/采购文件为准,无则概括表述,禁止方括号待填;'
|
||||||
|
'禁止大段写混凝土标号、压实度、施工机械等工程参数。'
|
||||||
|
),
|
||||||
|
'goods': (
|
||||||
|
'\n【本模板类型:货物供货】'
|
||||||
|
'侧重规格、供货、检验与质保;指标与交期以采购文件为准,无则概括表述,禁止方括号待填;'
|
||||||
|
'禁止写施工组织与土建;勿绑定未指定的品牌。'
|
||||||
|
),
|
||||||
|
}
|
||||||
173
utils/volume_chapters.py
Normal file
173
utils/volume_chapters.py
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
"""
|
||||||
|
目标页数与一级篇章数量区间:阈值与 generator._effective_volume 一致。
|
||||||
|
|
||||||
|
小章节(自动填充子目录行)总条数:与「目标页数」线性映射,见 subchapter_total_* 与
|
||||||
|
allocate_subchapters_to_main *。
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import random
|
||||||
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
|
# 与 modules.generator._effective_volume 页数分界一致
|
||||||
|
PAGE_VOLUME_THRESHOLDS = (125, 175, 225)
|
||||||
|
|
||||||
|
# 各篇幅档位对应的一级篇章数量 [min, max](与页数映射表一致)
|
||||||
|
TOP_LEVEL_CHAPTER_RANGES = {
|
||||||
|
'concise': (6, 8),
|
||||||
|
'standard': (8, 10),
|
||||||
|
'detailed': (10, 12),
|
||||||
|
'full': (12, 16),
|
||||||
|
}
|
||||||
|
|
||||||
|
# 小章节总条数 = slope * pages + intercept(过点 100->78, 300->212)
|
||||||
|
SUBCHAPTER_PAGES_SLOPE = 0.67
|
||||||
|
SUBCHAPTER_PAGES_INTERCEPT = 11.0
|
||||||
|
SUBCHAPTER_JITTER_LOW = 0.9
|
||||||
|
SUBCHAPTER_JITTER_HIGH = 1.1
|
||||||
|
# expand 在请求/库/配置均未给出页数时,按 100 页 ≈ 基线 78 章 ±10%,避免小章节失控到数百
|
||||||
|
EXPAND_OUTLINE_DEFAULT_TARGET_PAGES = 100
|
||||||
|
|
||||||
|
|
||||||
|
def subchapter_total_base_from_pages(pages: int) -> float:
|
||||||
|
return SUBCHAPTER_PAGES_SLOPE * float(pages) + SUBCHAPTER_PAGES_INTERCEPT
|
||||||
|
|
||||||
|
|
||||||
|
def subchapter_jitter_bounds(n_base: float) -> Tuple[int, int]:
|
||||||
|
"""
|
||||||
|
对线性基线 N_base 的严格 ±10% 整数闭区间 [lo, hi](用于全标小章节行总数抽样后夹紧)。
|
||||||
|
例:N_base=78(约 100 页)→ lo=70, hi=86。
|
||||||
|
"""
|
||||||
|
lo = max(1, int(round(n_base * SUBCHAPTER_JITTER_LOW)))
|
||||||
|
hi = max(lo, int(round(n_base * SUBCHAPTER_JITTER_HIGH)))
|
||||||
|
return lo, hi
|
||||||
|
|
||||||
|
|
||||||
|
def subchapter_total_effective(
|
||||||
|
pages: int,
|
||||||
|
k: int,
|
||||||
|
rng: Optional[random.Random] = None,
|
||||||
|
) -> int:
|
||||||
|
"""
|
||||||
|
在目标页数 P 下,对一次「小章节自动填充」抽样的子章节行总数上界(全标合计)。
|
||||||
|
先按 N_base(P)=0.67*P+11 与 U~Uniform(0.9,1.1) 取整,再**严格夹紧**到 [round(N_base*0.9), round(N_base*1.1)],
|
||||||
|
故 100 页时锚定 78±10% → 恒在 70–86 条(在仅受随机影响时)。
|
||||||
|
|
||||||
|
不再用 max(n, k) 抬升总数:主章数 k 很大时若强行「每章至少 1 条」会把 N 抬到 300+,与 78±10% 目标冲突。
|
||||||
|
当 n < k 时由 allocate_subchapters_to_mains 将额度优先分给部分主章,其余主章 quota 为 0(该次不填小章)。
|
||||||
|
pages<=0 或 k<=0 时返回 0(调用方不应在 TARGET_PAGES>0 且可扩展主章>0 之外使用)。
|
||||||
|
"""
|
||||||
|
if pages <= 0 or k <= 0:
|
||||||
|
return 0
|
||||||
|
r = rng if rng is not None else random.Random()
|
||||||
|
n_base = subchapter_total_base_from_pages(pages)
|
||||||
|
lo, hi = subchapter_jitter_bounds(n_base)
|
||||||
|
n = int(round(n_base * r.uniform(SUBCHAPTER_JITTER_LOW, SUBCHAPTER_JITTER_HIGH)))
|
||||||
|
n = min(max(n, lo), hi)
|
||||||
|
return n
|
||||||
|
|
||||||
|
|
||||||
|
def allocate_subchapters_to_mains(n: int, k: int) -> List[int]:
|
||||||
|
"""
|
||||||
|
将整数 n 均分到 k 个主章:前 n%k 个主章得 floor+1,其余得 floor;k=0 返回 []。
|
||||||
|
"""
|
||||||
|
if k <= 0:
|
||||||
|
return []
|
||||||
|
n = max(0, n)
|
||||||
|
q, r = n // k, n % k
|
||||||
|
return [q + 1] * r + [q] * (k - r)
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_expand_target_pages(
|
||||||
|
request_pages: Optional[int],
|
||||||
|
no_subchapter_limit: bool,
|
||||||
|
db_pages: int,
|
||||||
|
config_pages: int,
|
||||||
|
) -> int:
|
||||||
|
"""
|
||||||
|
得到本次「自动填充小章节」使用的目标页数 P(>0 则启用条数上界,0=不限制)。
|
||||||
|
|
||||||
|
显式不限制时返回 0;否则优先正数 request → 落库值 → 全局配置 → 默认 100 页。
|
||||||
|
"""
|
||||||
|
if no_subchapter_limit:
|
||||||
|
return 0
|
||||||
|
if request_pages is not None and int(request_pages) > 0:
|
||||||
|
return int(request_pages)
|
||||||
|
d = int(db_pages or 0)
|
||||||
|
if d > 0:
|
||||||
|
return d
|
||||||
|
c = int(config_pages or 0)
|
||||||
|
if c > 0:
|
||||||
|
return c
|
||||||
|
return EXPAND_OUTLINE_DEFAULT_TARGET_PAGES
|
||||||
|
|
||||||
|
|
||||||
|
def volume_key_from_target_pages(pages: int, content_volume_default: str = 'standard') -> str:
|
||||||
|
"""与 _effective_volume 相同逻辑的档位 key(不读 config,便于测试)。"""
|
||||||
|
if pages <= 0:
|
||||||
|
return content_volume_default
|
||||||
|
if pages <= PAGE_VOLUME_THRESHOLDS[0]:
|
||||||
|
return 'concise'
|
||||||
|
if pages <= PAGE_VOLUME_THRESHOLDS[1]:
|
||||||
|
return 'standard'
|
||||||
|
if pages <= PAGE_VOLUME_THRESHOLDS[2]:
|
||||||
|
return 'detailed'
|
||||||
|
return 'full'
|
||||||
|
|
||||||
|
|
||||||
|
def top_level_chapter_range_from_pages(pages: int, content_volume_default: str = 'standard') -> Tuple[int, int]:
|
||||||
|
"""
|
||||||
|
返回一级篇章数量区间 (lo, hi)。
|
||||||
|
未设置目标页数时沿用默认 8–10 章。
|
||||||
|
"""
|
||||||
|
if pages <= 0:
|
||||||
|
return TOP_LEVEL_CHAPTER_RANGES['standard']
|
||||||
|
vk = volume_key_from_target_pages(pages, content_volume_default)
|
||||||
|
return TOP_LEVEL_CHAPTER_RANGES[vk]
|
||||||
|
|
||||||
|
|
||||||
|
def outline_chapter_count_hint(
|
||||||
|
pages: int,
|
||||||
|
content_volume_default: str = 'standard',
|
||||||
|
page_char_estimate: int = 700,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
嵌入大纲提示词的篇章约束句(替换原固定「8–10 个」相关描述)。
|
||||||
|
|
||||||
|
当 pages>0 时提醒:全稿正文字量与「页数×每页字数」可替换的总目标同量级,目录
|
||||||
|
层次不宜过细,以免成稿后每节可写篇幅过薄、难成合理技术应答。
|
||||||
|
"""
|
||||||
|
pce = max(1, int(page_char_estimate or 700))
|
||||||
|
if pages <= 0:
|
||||||
|
return (
|
||||||
|
'总的章节数应该控制在8-10个,一级篇章总数不超过10个'
|
||||||
|
)
|
||||||
|
lo, hi = top_level_chapter_range_from_pages(pages, content_volume_default)
|
||||||
|
total_g = int(round(pages * pce))
|
||||||
|
return (
|
||||||
|
f'总的章节数应该控制在约 {lo}–{hi} 个,一级篇章总数不超过 {hi} 个'
|
||||||
|
f'(目标约 {pages} 页,按目标页数映射的篇幅档位估算)。'
|
||||||
|
f'全稿正文字量规模需与总目标约 {total_g} 字'
|
||||||
|
f'({pages} 页×约每页 {pce} 字的粗略换算计)同量级,目录层次与末级小节目不宜过细,'
|
||||||
|
f'避免叶节数过多时单节篇幅过薄、难以成文。'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def outline_chapter_count_hint_with_rating_variant(
|
||||||
|
pages: int,
|
||||||
|
content_volume_default: str = 'standard',
|
||||||
|
page_char_estimate: int = 700,
|
||||||
|
) -> str:
|
||||||
|
"""带评分目录模板中的同类约束(原含「不超过10个」的收紧表述)。"""
|
||||||
|
pce = max(1, int(page_char_estimate or 700))
|
||||||
|
if pages <= 0:
|
||||||
|
return (
|
||||||
|
'总的章节数应该控制在8-10个,不超过10个'
|
||||||
|
)
|
||||||
|
lo, hi = top_level_chapter_range_from_pages(pages, content_volume_default)
|
||||||
|
total_g = int(round(pages * pce))
|
||||||
|
return (
|
||||||
|
f'总的章节数应该控制在约 {lo}–{hi} 个,不超过{hi} 个'
|
||||||
|
f'(目标约 {pages} 页,按目标页数映射的篇幅档位估算)'
|
||||||
|
f'全稿正文字量约与总目标 {total_g} 字同量级,末级子目不宜过细'
|
||||||
|
)
|
||||||
371
utils/word_allocation.py
Normal file
371
utils/word_allocation.py
Normal file
@ -0,0 +1,371 @@
|
|||||||
|
"""
|
||||||
|
技术评分驱动的章节字数分配:读取 data/word_allocation_rules.json,
|
||||||
|
结合 VOLUME_PRESETS 的 base/core 与项目 rating_json,为每个叶节点生成
|
||||||
|
min_chars、word_count_spec(及可选 max_tokens)。
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
import config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# 与 modules/generator.VOLUME_PRESETS 保持一致
|
||||||
|
VOLUME_PRESETS: Dict[str, Tuple[int, int, str, int]] = {
|
||||||
|
'concise': (1200, 2500, '精简版', 5000),
|
||||||
|
'standard': (2000, 4000, '标准版', 8000),
|
||||||
|
'detailed': (3000, 5500, '详细版', 12000),
|
||||||
|
'full': (4000, 7000, '充实版', 16000),
|
||||||
|
}
|
||||||
|
|
||||||
|
_PROVIDER_TOKEN_LIMITS = {
|
||||||
|
'deepseek': 8192,
|
||||||
|
'qwen': 8192,
|
||||||
|
'openai': 16384,
|
||||||
|
'ollama': 8192,
|
||||||
|
'doubao': 8192,
|
||||||
|
'kimi': 8192,
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFAULT_RULES: Dict[str, Any] = {
|
||||||
|
'schema_version': 1,
|
||||||
|
'alpha': 0.85,
|
||||||
|
'budget_mode': 'target_pages',
|
||||||
|
'per_section_floor': None,
|
||||||
|
'per_section_cap': None,
|
||||||
|
'relevance': {'method': 'keyword_overlap', 'min_rating_weight': 0.01},
|
||||||
|
'rating_parse': {},
|
||||||
|
'prompt': {'top_k_rating_items': 4, 'intro_line': ''},
|
||||||
|
'max_tokens_scale': False,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def rules_path() -> str:
|
||||||
|
return os.path.join(config.DATA_DIR, 'word_allocation_rules.json')
|
||||||
|
|
||||||
|
|
||||||
|
def load_rules(path: Optional[str] = None) -> Dict[str, Any]:
|
||||||
|
"""加载规则 JSON;文件缺失或解析失败时返回内置 DEFAULT_RULES。"""
|
||||||
|
p = path or rules_path()
|
||||||
|
data = dict(DEFAULT_RULES)
|
||||||
|
if not os.path.isfile(p):
|
||||||
|
return data
|
||||||
|
try:
|
||||||
|
with open(p, encoding='utf-8') as f:
|
||||||
|
raw = json.load(f)
|
||||||
|
if isinstance(raw, dict):
|
||||||
|
for k, v in raw.items():
|
||||||
|
if k.startswith('_'):
|
||||||
|
continue
|
||||||
|
if k == 'relevance' and isinstance(v, dict):
|
||||||
|
data['relevance'] = {**data.get('relevance', {}), **v}
|
||||||
|
elif k == 'prompt' and isinstance(v, dict):
|
||||||
|
data['prompt'] = {**data.get('prompt', {}), **v}
|
||||||
|
else:
|
||||||
|
data[k] = v
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning('加载 word_allocation_rules.json 失败,使用内置默认: %s', e)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def _as_float(x: Any, default: float = 0.0) -> float:
|
||||||
|
if x is None:
|
||||||
|
return default
|
||||||
|
if isinstance(x, (int, float)):
|
||||||
|
return float(x)
|
||||||
|
if isinstance(x, str):
|
||||||
|
s = re.sub(r'[^\d.\-]', '', x)
|
||||||
|
if not s:
|
||||||
|
return default
|
||||||
|
try:
|
||||||
|
return float(s)
|
||||||
|
except ValueError:
|
||||||
|
return default
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
def _item_name(d: Dict[str, Any]) -> str:
|
||||||
|
for k in ('name', 'title', 'item_name', '评分项', '评分项名称', 'indicator'):
|
||||||
|
v = d.get(k)
|
||||||
|
if isinstance(v, str) and v.strip():
|
||||||
|
return v.strip()
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
|
def _item_weight(d: Dict[str, Any]) -> float:
|
||||||
|
for k in ('weight', 'score', '分值', 'max_score', '满分', 'points'):
|
||||||
|
if k in d:
|
||||||
|
w = _as_float(d.get(k), 0.0)
|
||||||
|
if w > 0:
|
||||||
|
return w
|
||||||
|
return 1.0
|
||||||
|
|
||||||
|
|
||||||
|
def _collect_rating_dicts(obj: Any, acc: List[Dict[str, Any]]) -> None:
|
||||||
|
if isinstance(obj, dict):
|
||||||
|
acc.append(obj)
|
||||||
|
for v in obj.values():
|
||||||
|
_collect_rating_dicts(v, acc)
|
||||||
|
elif isinstance(obj, list):
|
||||||
|
for v in obj:
|
||||||
|
_collect_rating_dicts(v, acc)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_rating_json(raw: Optional[str]) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
从 rating_json 字符串解析评分项列表。
|
||||||
|
每项: { 'name': str, 'weight': float, 'keywords': List[str] }
|
||||||
|
"""
|
||||||
|
if not raw or not isinstance(raw, str) or not raw.strip():
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
root = json.loads(raw.strip())
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return []
|
||||||
|
|
||||||
|
dicts: List[Dict[str, Any]] = []
|
||||||
|
_collect_rating_dicts(root, dicts)
|
||||||
|
|
||||||
|
items: List[Dict[str, Any]] = []
|
||||||
|
seen: set = set()
|
||||||
|
for d in dicts:
|
||||||
|
name = _item_name(d)
|
||||||
|
if not name or len(name) < 2:
|
||||||
|
continue
|
||||||
|
key = name.lower()
|
||||||
|
if key in seen:
|
||||||
|
continue
|
||||||
|
w = _item_weight(d)
|
||||||
|
kws: List[str] = []
|
||||||
|
kw = d.get('keywords') or d.get('keyword') or d.get('要点')
|
||||||
|
if isinstance(kw, list):
|
||||||
|
kws = [str(x).strip() for x in kw if isinstance(x, (str, int, float)) and str(x).strip()]
|
||||||
|
elif isinstance(kw, str) and kw.strip():
|
||||||
|
kws = [kw.strip()]
|
||||||
|
seen.add(key)
|
||||||
|
items.append({'name': name, 'weight': w, 'keywords': kws})
|
||||||
|
|
||||||
|
return items
|
||||||
|
|
||||||
|
|
||||||
|
def _title_tokens(title: str) -> List[str]:
|
||||||
|
if not title:
|
||||||
|
return []
|
||||||
|
s = re.sub(r'[\s\d..、,,;;::/\\()()【】\[\]「」]+', ' ', title)
|
||||||
|
parts = [p for p in s.split() if len(p) >= 2]
|
||||||
|
toks = list(parts)
|
||||||
|
for m in re.findall(r'[\u4e00-\u9fff]{2,}', title):
|
||||||
|
if m not in toks:
|
||||||
|
toks.append(m)
|
||||||
|
return toks
|
||||||
|
|
||||||
|
|
||||||
|
def _overlap_score(title: str, item: Dict[str, Any]) -> float:
|
||||||
|
tokens = _title_tokens(title)
|
||||||
|
if not tokens:
|
||||||
|
return 0.0
|
||||||
|
blob = item['name'] + ''.join(item.get('keywords') or [])
|
||||||
|
hit = sum(1 for t in tokens if t and t in blob)
|
||||||
|
score = hit / max(len(tokens), 1)
|
||||||
|
if item['name'] in title or title in item['name']:
|
||||||
|
score = max(score, 0.85)
|
||||||
|
for kw in item.get('keywords') or []:
|
||||||
|
if isinstance(kw, str) and len(kw) >= 2 and kw in title:
|
||||||
|
score = max(score, 0.7)
|
||||||
|
return min(1.0, score)
|
||||||
|
|
||||||
|
|
||||||
|
def _raw_utilities(
|
||||||
|
leaves: List[Dict[str, Any]],
|
||||||
|
items: List[Dict[str, Any]],
|
||||||
|
min_w: float,
|
||||||
|
) -> Tuple[List[float], List[List[Tuple[str, float]]]]:
|
||||||
|
"""每节 u_i = sum_j w_j * c_ij;返回 u 与每节 top 相关项 (name, contrib)。"""
|
||||||
|
filtered = [it for it in items if it['weight'] >= min_w]
|
||||||
|
if not filtered:
|
||||||
|
filtered = items
|
||||||
|
n = len(leaves)
|
||||||
|
u = [0.0] * n
|
||||||
|
top_lists: List[List[Tuple[str, float]]] = [[] for _ in range(n)]
|
||||||
|
|
||||||
|
for i, leaf in enumerate(leaves):
|
||||||
|
title = leaf.get('section_title') or ''
|
||||||
|
contribs: List[Tuple[str, float]] = []
|
||||||
|
for it in filtered:
|
||||||
|
c = _overlap_score(title, it)
|
||||||
|
contrib = it['weight'] * c
|
||||||
|
if contrib > 0:
|
||||||
|
contribs.append((it['name'], contrib))
|
||||||
|
u[i] += contrib
|
||||||
|
contribs.sort(key=lambda x: -x[1])
|
||||||
|
top_lists[i] = contribs[:12]
|
||||||
|
|
||||||
|
max_u = max(u) if u else 0.0
|
||||||
|
if max_u <= 0:
|
||||||
|
u = [1.0] * n
|
||||||
|
else:
|
||||||
|
u = [x / max_u for x in u]
|
||||||
|
return u, top_lists
|
||||||
|
|
||||||
|
|
||||||
|
def _clamp_int(x: int, lo: int, hi: int) -> int:
|
||||||
|
return max(lo, min(hi, x))
|
||||||
|
|
||||||
|
|
||||||
|
def _water_adjust(
|
||||||
|
targets: List[int],
|
||||||
|
budget: int,
|
||||||
|
floor_v: int,
|
||||||
|
cap_v: int,
|
||||||
|
priority: List[float],
|
||||||
|
) -> List[int]:
|
||||||
|
"""在 [floor_v, cap_v] 内将 targets 整数化并尽量使 sum 接近 budget。"""
|
||||||
|
n = len(targets)
|
||||||
|
if n == 0:
|
||||||
|
return []
|
||||||
|
if floor_v > cap_v:
|
||||||
|
floor_v, cap_v = cap_v, floor_v
|
||||||
|
if n * floor_v > budget:
|
||||||
|
floor_v = max(1, budget // n)
|
||||||
|
if n * cap_v < budget:
|
||||||
|
cap_v = max(floor_v, (budget + n - 1) // n)
|
||||||
|
cur = [_clamp_int(t, floor_v, cap_v) for t in targets]
|
||||||
|
s = sum(cur)
|
||||||
|
delta = budget - s
|
||||||
|
order = sorted(range(n), key=lambda i: -priority[i])
|
||||||
|
inv_order = sorted(range(n), key=lambda i: priority[i])
|
||||||
|
step = 0
|
||||||
|
max_steps = max(n * 2000, abs(delta) + n)
|
||||||
|
while delta != 0 and step < max_steps:
|
||||||
|
step += 1
|
||||||
|
if delta > 0:
|
||||||
|
moved = False
|
||||||
|
for i in order:
|
||||||
|
if cur[i] < cap_v:
|
||||||
|
cur[i] += 1
|
||||||
|
delta -= 1
|
||||||
|
moved = True
|
||||||
|
break
|
||||||
|
if not moved:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
moved = False
|
||||||
|
for i in inv_order:
|
||||||
|
if cur[i] > floor_v:
|
||||||
|
cur[i] -= 1
|
||||||
|
delta += 1
|
||||||
|
moved = True
|
||||||
|
break
|
||||||
|
if not moved:
|
||||||
|
break
|
||||||
|
return cur
|
||||||
|
|
||||||
|
|
||||||
|
def compute_leaf_allocations(
|
||||||
|
volume_key: str,
|
||||||
|
leaves: List[Dict[str, Any]],
|
||||||
|
rating_raw: Optional[str],
|
||||||
|
rules: Optional[Dict[str, Any]] = None,
|
||||||
|
) -> Optional[Dict[int, Dict[str, Any]]]:
|
||||||
|
"""
|
||||||
|
为每个叶节点计算 target_chars、word_count_spec、max_tokens。
|
||||||
|
|
||||||
|
有技术评分项时按标题相关性分配;无评分项时,若规则为按目标页控总篇且已设页数,
|
||||||
|
则均分全稿总预算 B=目标页数×每页字数(否则返回 None,调用方沿用旧逻辑)。
|
||||||
|
leaves: [{'id': int, 'section_title': str}, ...]
|
||||||
|
"""
|
||||||
|
rules = rules or load_rules()
|
||||||
|
if not leaves:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
base, core, _, preset_tokens = VOLUME_PRESETS.get(
|
||||||
|
volume_key, VOLUME_PRESETS['standard']
|
||||||
|
)
|
||||||
|
floor_default = int(base * 0.5)
|
||||||
|
cap_default = core
|
||||||
|
floor_v = int(rules['per_section_floor']) if rules.get('per_section_floor') is not None else floor_default
|
||||||
|
cap_v = int(rules['per_section_cap']) if rules.get('per_section_cap') is not None else cap_default
|
||||||
|
floor_v = min(floor_v, cap_v)
|
||||||
|
alpha = float(rules.get('alpha', 0.85))
|
||||||
|
alpha = max(0.0, min(1.0, alpha))
|
||||||
|
min_w = float(rules.get('relevance', {}).get('min_rating_weight', 0.01))
|
||||||
|
|
||||||
|
n = len(leaves)
|
||||||
|
mode = (rules.get('budget_mode') or 'anchor_mean').strip()
|
||||||
|
pages_cfg = int(getattr(config, 'TARGET_PAGES', 0) or 0)
|
||||||
|
pce = max(1, int(getattr(config, 'PAGE_CHAR_ESTIMATE', 700) or 700))
|
||||||
|
if mode == 'target_pages' and pages_cfg > 0:
|
||||||
|
budget = int(round(pages_cfg * pce))
|
||||||
|
elif mode == 'anchor_base':
|
||||||
|
budget = int(round(n * base))
|
||||||
|
else:
|
||||||
|
budget = int(round(n * (base + core) / 2.0))
|
||||||
|
|
||||||
|
items = parse_rating_json(rating_raw)
|
||||||
|
if not items:
|
||||||
|
if not (mode == 'target_pages' and pages_cfg > 0):
|
||||||
|
return None
|
||||||
|
u = [1.0] * n
|
||||||
|
top_lists = [[] for _ in range(n)]
|
||||||
|
mid = 0.5 * (base + core)
|
||||||
|
raw_float = [float(mid)] * n
|
||||||
|
else:
|
||||||
|
u, top_lists = _raw_utilities(leaves, items, min_w)
|
||||||
|
band = core - base
|
||||||
|
raw_float = [
|
||||||
|
base + band * (alpha * u[i] + (1.0 - alpha) * 0.5) for i in range(n)
|
||||||
|
]
|
||||||
|
|
||||||
|
targets = [int(round(x)) for x in raw_float]
|
||||||
|
adjusted = _water_adjust(targets, budget, floor_v, cap_v, u)
|
||||||
|
|
||||||
|
provider = getattr(config, 'MODEL_PROVIDER', 'openai')
|
||||||
|
tok_limit = _PROVIDER_TOKEN_LIMITS.get(provider, 8192)
|
||||||
|
base_max_tok = min(preset_tokens, tok_limit)
|
||||||
|
scale_tokens = bool(rules.get('max_tokens_scale', False))
|
||||||
|
|
||||||
|
prompt_cfg = rules.get('prompt') or {}
|
||||||
|
top_k = int(prompt_cfg.get('top_k_rating_items', 4))
|
||||||
|
intro = (prompt_cfg.get('intro_line') or '').strip() or (
|
||||||
|
'本节须对下列技术评分要点作实质展开(结合工艺、流程、标准与可验证措施,禁止空泛承诺与复述招标文件):'
|
||||||
|
)
|
||||||
|
|
||||||
|
out: Dict[int, Dict[str, Any]] = {}
|
||||||
|
for i, leaf in enumerate(leaves):
|
||||||
|
sid = int(leaf['id'])
|
||||||
|
min_chars = max(1, adjusted[i])
|
||||||
|
contribs = top_lists[i][:top_k]
|
||||||
|
if contribs:
|
||||||
|
lines = '\n'.join(f' · {name}' for name, _ in contribs[:top_k])
|
||||||
|
spec = (
|
||||||
|
f'- 字数硬性要求(必须达到,不达标将续写补足):本节正文不少于 {min_chars} 字\n'
|
||||||
|
f'- {intro}\n{lines}\n'
|
||||||
|
f'- 内容须由可检验的技术与管理措施支撑,禁止堆砌套话与重复背景'
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
spec = (
|
||||||
|
f'- 字数硬性要求(必须达到,不达标将续写补足):本节正文不少于 {min_chars} 字\n'
|
||||||
|
f'- 须紧扣章节标题与标书目录定位,充分展开可执行方案细节\n'
|
||||||
|
f'- 内容须由可检验的技术与管理措施支撑,禁止堆砌套话与重复背景'
|
||||||
|
)
|
||||||
|
|
||||||
|
max_tok = base_max_tok
|
||||||
|
if scale_tokens and base > 0:
|
||||||
|
max_tok = int(min(tok_limit, max(1024, base_max_tok * min_chars / base)))
|
||||||
|
|
||||||
|
out[sid] = {
|
||||||
|
'target_chars': min_chars,
|
||||||
|
'word_count_spec': spec,
|
||||||
|
'max_tokens': max_tok,
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def continuation_threshold(target_chars: int) -> int:
|
||||||
|
"""与 generator._get_min_chars 一致:续写到约目标字数的 65% 即停(多轮叠加逼近全文目标)。"""
|
||||||
|
return int(max(200, target_chars * 0.65))
|
||||||
0
暗标标书格式清除与设定模板.py
Normal file
0
暗标标书格式清除与设定模板.py
Normal file
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user