完善版本V1.1
This commit is contained in:
commit
a39a9c5800
1
.deps_installed
Normal file
1
.deps_installed
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
||||||
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
.env
|
||||||
|
*.log
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
.venv/
|
||||||
|
venv/
|
||||||
10
.idea/.gitignore
generated
vendored
Normal file
10
.idea/.gitignore
generated
vendored
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
# 默认忽略的文件
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
# 基于编辑器的 HTTP 客户端请求
|
||||||
|
/httpRequests/
|
||||||
|
# 已忽略包含查询文件的默认文件夹
|
||||||
|
/queries/
|
||||||
|
# Datasource local storage ignored files
|
||||||
|
/dataSources/
|
||||||
|
/dataSources.local.xml
|
||||||
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<settings>
|
||||||
|
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||||
|
<version value="1.0" />
|
||||||
|
</settings>
|
||||||
|
</component>
|
||||||
8
.idea/modules.xml
generated
Normal file
8
.idea/modules.xml
generated
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/tech-bid-manageV1.120260424.iml" filepath="$PROJECT_DIR$/.idea/tech-bid-manageV1.120260424.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
16
.idea/tech-bid-manageV1.120260424.iml
generated
Normal file
16
.idea/tech-bid-manageV1.120260424.iml
generated
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$" />
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
<component name="TemplatesService">
|
||||||
|
<option name="TEMPLATE_CONFIGURATION" value="Jinja2" />
|
||||||
|
<option name="TEMPLATE_FOLDERS">
|
||||||
|
<list>
|
||||||
|
<option value="$MODULE_DIR$/templates" />
|
||||||
|
</list>
|
||||||
|
</option>
|
||||||
|
</component>
|
||||||
|
</module>
|
||||||
6
.idea/vcs.xml
generated
Normal file
6
.idea/vcs.xml
generated
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
113
README.md
Normal file
113
README.md
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
# 标伙伴 · AI 标书助手
|
||||||
|
|
||||||
|
基于大模型的智能标书生成工具(单机版),支持解析招标文件、自动生成技术标书、导出 Word 文档。
|
||||||
|
|
||||||
|
## 快速开始
|
||||||
|
|
||||||
|
### 方式一:双击启动(Windows)
|
||||||
|
|
||||||
|
直接双击 `start.bat`,首次运行会自动安装依赖。
|
||||||
|
|
||||||
|
### 方式二:命令行启动
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. 安装依赖
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
# 2. 启动应用
|
||||||
|
python app.py
|
||||||
|
```
|
||||||
|
|
||||||
|
浏览器访问 **http://localhost:5000**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 配置 API Key
|
||||||
|
|
||||||
|
首次使用前,点击右上角 ⚙️ 设置图标,选择模型提供商并填入 API Key:
|
||||||
|
|
||||||
|
| 提供商 | 推荐模型 | 申请地址 |
|
||||||
|
|--------|---------|---------|
|
||||||
|
| 通义千问 | qwen-max | https://dashscope.aliyun.com/ |
|
||||||
|
| DeepSeek | deepseek-chat (V3) | https://platform.deepseek.com/ |
|
||||||
|
| OpenAI | gpt-4o | https://platform.openai.com/ |
|
||||||
|
|
||||||
|
> **DeepSeek 说明**:deepseek-chat (V3) 性价比极高,推荐用于生产环境。
|
||||||
|
> 由于 DeepSeek 暂不提供 Embedding API,使用知识库功能时会自动回退到本地 sentence-transformers 模型(首次使用需下载约 90MB)。
|
||||||
|
|
||||||
|
也可通过环境变量配置:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 通义千问
|
||||||
|
set QWEN_API_KEY=sk-xxxxxxxx
|
||||||
|
set MODEL_PROVIDER=qwen
|
||||||
|
|
||||||
|
# DeepSeek
|
||||||
|
set DEEPSEEK_API_KEY=sk-xxxxxxxx
|
||||||
|
set MODEL_PROVIDER=deepseek
|
||||||
|
|
||||||
|
python app.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 使用流程
|
||||||
|
|
||||||
|
1. **新建项目** → 输入项目名称
|
||||||
|
2. **上传招标文件** → 支持 PDF / DOC / DOCX
|
||||||
|
3. **AI 解析** → 自动提取评分要求、资质条件、商务条款
|
||||||
|
4. **生成大纲** → 按评分权重生成四级章节目录
|
||||||
|
5. **生成内容** → 逐章节或一键全部生成
|
||||||
|
6. **合规检查** → 对照招标要求检验覆盖情况
|
||||||
|
7. **导出 Word** → 专业排版,直接使用
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 目录结构
|
||||||
|
|
||||||
|
```
|
||||||
|
autorfp/
|
||||||
|
├── app.py # Flask 主程序
|
||||||
|
├── config.py # 配置文件
|
||||||
|
├── requirements.txt # Python 依赖
|
||||||
|
├── start.bat # Windows 一键启动
|
||||||
|
├── prompts/ # AI 提示词模板
|
||||||
|
├── modules/ # 功能模块
|
||||||
|
│ ├── parser.py # 招标文件解析
|
||||||
|
│ ├── generator.py # 标书内容生成
|
||||||
|
│ ├── checker.py # 合规检查
|
||||||
|
│ ├── exporter.py # Word 导出
|
||||||
|
│ └── knowledge.py # 企业知识库
|
||||||
|
├── utils/ # 工具函数
|
||||||
|
│ ├── ai_client.py # AI API 封装
|
||||||
|
│ ├── file_utils.py # 文件处理
|
||||||
|
│ └── prompts.py # 提示词加载
|
||||||
|
├── templates/ # HTML 模板
|
||||||
|
├── static/ # 静态资源
|
||||||
|
└── data/ # 数据目录(自动创建)
|
||||||
|
├── projects.db # SQLite 数据库
|
||||||
|
├── uploads/ # 上传的招标文件
|
||||||
|
├── exports/ # 导出的标书
|
||||||
|
├── knowledge/ # 知识库文件
|
||||||
|
└── chroma/ # 向量数据库
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 企业知识库
|
||||||
|
|
||||||
|
在项目页面切换到「知识库」标签,上传历史标书文件。
|
||||||
|
系统会自动将文件分块存入向量数据库,生成内容时自动检索相关片段,让 AI 更好地体现企业优势。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 常见问题
|
||||||
|
|
||||||
|
**Q: 解析速度很慢?**
|
||||||
|
A: 招标文件越长耗时越长,通常 30-120 秒。建议使用 qwen-max 或 gpt-4o。
|
||||||
|
|
||||||
|
**Q: 内容生成失败?**
|
||||||
|
A: 检查 API Key 是否正确,以及账户余额是否充足。
|
||||||
|
|
||||||
|
**Q: 导出的 Word 文件乱码?**
|
||||||
|
A: 请使用 Microsoft Word 2016 及以上版本打开。
|
||||||
118
bid_partner.spec
Normal file
118
bid_partner.spec
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
# -*- mode: python ; coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
PyInstaller spec for 标伙伴 · AI标书助手
|
||||||
|
Build: pyinstaller bid_partner.spec
|
||||||
|
|
||||||
|
知识库改用 SQLite + 纯 Python 向量存储,已不依赖 ChromaDB,打包更小。
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
from PyInstaller.utils.hooks import collect_all, collect_data_files
|
||||||
|
|
||||||
|
block_cipher = None
|
||||||
|
|
||||||
|
# ── Collect complex packages ─────────────────────────────────────────────────
|
||||||
|
openai_datas, openai_bins, openai_hidden = collect_all('openai')
|
||||||
|
pydantic_datas, pydantic_bins, pydantic_hidden = collect_all('pydantic')
|
||||||
|
|
||||||
|
# tiktoken data (BPE vocab files)
|
||||||
|
tiktoken_datas = collect_data_files('tiktoken')
|
||||||
|
|
||||||
|
a = Analysis(
|
||||||
|
['launcher.py'],
|
||||||
|
pathex=['.'],
|
||||||
|
binaries=openai_bins + pydantic_bins,
|
||||||
|
datas=[
|
||||||
|
# ── App assets (read-only, go into _MEIPASS) ──
|
||||||
|
('templates', 'templates'),
|
||||||
|
('static', 'static'),
|
||||||
|
# ── Package data ──
|
||||||
|
*openai_datas,
|
||||||
|
*pydantic_datas,
|
||||||
|
*tiktoken_datas,
|
||||||
|
],
|
||||||
|
hiddenimports=[
|
||||||
|
# Flask / Werkzeug
|
||||||
|
'flask', 'flask_cors', 'werkzeug', 'werkzeug.serving',
|
||||||
|
'werkzeug.routing', 'werkzeug.middleware.proxy_fix',
|
||||||
|
'jinja2', 'jinja2.ext',
|
||||||
|
# SQLite (stdlib, always present)
|
||||||
|
'sqlite3',
|
||||||
|
# OpenAI
|
||||||
|
*openai_hidden,
|
||||||
|
# Pydantic
|
||||||
|
*pydantic_hidden,
|
||||||
|
# Document processing
|
||||||
|
'PyPDF2', 'pypdf', 'pypdf.errors',
|
||||||
|
'pdfminer', 'pdfminer.high_level', 'pdfminer.layout',
|
||||||
|
'pdfminer.pdfpage', 'pdfminer.pdfinterp', 'pdfminer.converter',
|
||||||
|
'docx', 'docx.oxml', 'docx.oxml.ns', 'docx.shared',
|
||||||
|
'docx.enum', 'docx.enum.text', 'docx.enum.style',
|
||||||
|
'python_docx',
|
||||||
|
# tiktoken
|
||||||
|
'tiktoken', 'tiktoken.core', 'tiktoken.model',
|
||||||
|
'tiktoken_ext', 'tiktoken_ext.openai_public',
|
||||||
|
# Network / encoding
|
||||||
|
'requests', 'chardet', 'httpx', 'httpcore',
|
||||||
|
'anyio', 'anyio.streams', 'anyio.streams.memory',
|
||||||
|
'sniffio', 'certifi',
|
||||||
|
# Stdlib extras
|
||||||
|
'importlib.metadata', 'importlib.resources',
|
||||||
|
'pkg_resources', 'json', 'math', 'threading',
|
||||||
|
# Local project modules (explicitly include all)
|
||||||
|
'config', 'app',
|
||||||
|
'utils', 'utils.ai_client', 'utils.file_utils',
|
||||||
|
'utils.prompts', 'utils.settings', 'utils.boq_parser', 'utils.bill_analysis',
|
||||||
|
'modules', 'modules.parser', 'modules.generator',
|
||||||
|
'modules.checker', 'modules.exporter', 'modules.knowledge',
|
||||||
|
],
|
||||||
|
hookspath=[],
|
||||||
|
hooksconfig={},
|
||||||
|
runtime_hooks=[],
|
||||||
|
excludes=[
|
||||||
|
# Heavy packages not used in this app
|
||||||
|
'matplotlib', 'pandas', 'scipy', 'numpy',
|
||||||
|
'IPython', 'jupyter', 'notebook',
|
||||||
|
'PIL', 'Pillow',
|
||||||
|
'cv2', 'torch', 'tensorflow',
|
||||||
|
'pytest', 'unittest',
|
||||||
|
# ChromaDB 及其依赖(已移除,改用 SQLite 内置存储)
|
||||||
|
'chromadb', 'hnswlib', 'posthog', 'pypika',
|
||||||
|
'mmh3', 'overrides', 'monotonic',
|
||||||
|
'sentence_transformers', 'onnxruntime',
|
||||||
|
],
|
||||||
|
win_no_prefer_redirects=False,
|
||||||
|
win_private_assemblies=False,
|
||||||
|
cipher=block_cipher,
|
||||||
|
noarchive=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
|
||||||
|
|
||||||
|
exe = EXE(
|
||||||
|
pyz,
|
||||||
|
a.scripts,
|
||||||
|
[],
|
||||||
|
exclude_binaries=True,
|
||||||
|
name='bid_partner',
|
||||||
|
debug=False,
|
||||||
|
bootloader_ignore_signals=False,
|
||||||
|
strip=False,
|
||||||
|
upx=False,
|
||||||
|
console=False, # no black console window — GUI launcher takes over
|
||||||
|
disable_windowed_traceback=False,
|
||||||
|
argv_emulation=False,
|
||||||
|
target_arch=None,
|
||||||
|
codesign_identity=None,
|
||||||
|
entitlements_file=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
coll = COLLECT(
|
||||||
|
exe,
|
||||||
|
a.binaries,
|
||||||
|
a.zipfiles,
|
||||||
|
a.datas,
|
||||||
|
strip=False,
|
||||||
|
upx=False,
|
||||||
|
upx_exclude=[],
|
||||||
|
name='BidPartner',
|
||||||
|
)
|
||||||
672
bill-worker.js
Normal file
672
bill-worker.js
Normal file
@ -0,0 +1,672 @@
|
|||||||
|
/**
|
||||||
|
* bill-worker.js — PDF 清单解析调度器(Worker Thread)
|
||||||
|
*
|
||||||
|
* 架构(v3 — SharedArrayBuffer 零拷贝):
|
||||||
|
* Phase 1 — 并行文本提取
|
||||||
|
* 将 PDF 数据写入 SharedArrayBuffer(一次分配,所有子线程共享读)
|
||||||
|
* 启动 N 个 page-worker,每个负责固定 20 页
|
||||||
|
*
|
||||||
|
* Phase 2 — 清单页筛选 + 文本解析(纯正则,毫秒级)
|
||||||
|
* 汇总全部页面文本 → 关键字筛选清单页 → 多行合并 → 逐行解析
|
||||||
|
*/
|
||||||
|
'use strict';
|
||||||
|
const { parentPort } = require('worker_threads');
|
||||||
|
const { Worker } = require('worker_threads');
|
||||||
|
const path = require('path');
|
||||||
|
|
||||||
|
const PAGES_PER_CHUNK = 20;
|
||||||
|
|
||||||
|
parentPort.on('message', async (msg) => {
|
||||||
|
if (msg.type !== 'parse') return;
|
||||||
|
const t0 = Date.now();
|
||||||
|
try {
|
||||||
|
// 立即做一次干净的拷贝,确保拥有独立的 ArrayBuffer
|
||||||
|
const raw = msg.buffer;
|
||||||
|
const buf = Buffer.alloc(raw.byteLength);
|
||||||
|
Buffer.from(raw).copy(buf);
|
||||||
|
|
||||||
|
if (buf.length === 0) {
|
||||||
|
parentPort.postMessage({ type: 'done', ok: false, error: '收到空 PDF 数据' });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── 获取总页数 ──
|
||||||
|
const pdfjsModule = await import('pdfjs-dist/build/pdf.mjs');
|
||||||
|
const pdfjsLib = pdfjsModule.default || pdfjsModule;
|
||||||
|
// 给 pdfjs 一份独立拷贝(pdfjs 内部可能 detach buffer)
|
||||||
|
const pdfData = new Uint8Array(buf.length);
|
||||||
|
buf.copy(Buffer.from(pdfData.buffer));
|
||||||
|
const pdf = await pdfjsLib.getDocument({ data: pdfData, isEvalSupported: false }).promise;
|
||||||
|
const totalPages = pdf.numPages;
|
||||||
|
|
||||||
|
// ── 将 PDF 数据写入 SharedArrayBuffer(一次分配,所有子线程共享读)──
|
||||||
|
const sab = new SharedArrayBuffer(buf.length);
|
||||||
|
const sabView = new Uint8Array(sab);
|
||||||
|
buf.copy(Buffer.from(sabView.buffer)); // 从独立 buf 拷贝到共享内存
|
||||||
|
|
||||||
|
const workerCount = Math.ceil(totalPages / PAGES_PER_CHUNK);
|
||||||
|
console.log(`[BillWorker] PDF ${totalPages} 页, ${workerCount} 路并行 (SharedArrayBuffer ${(buf.length/1024/1024).toFixed(1)}MB)`);
|
||||||
|
|
||||||
|
// Phase 1: 并行文本提取
|
||||||
|
const pageTexts = await parallelExtract(sab, buf.length, totalPages, workerCount);
|
||||||
|
const t1 = Date.now();
|
||||||
|
|
||||||
|
const extractedCount = pageTexts.filter(t => t.length > 0).length;
|
||||||
|
console.log(`[BillWorker] Phase1 完成: ${t1 - t0}ms, ${extractedCount}/${totalPages} 页有文本`);
|
||||||
|
|
||||||
|
// 扫描件判断
|
||||||
|
const totalChars = pageTexts.reduce((s, t) => s + t.length, 0);
|
||||||
|
if (totalChars < 50) {
|
||||||
|
parentPort.postMessage({ type: 'done', ok: true, data: { scanned: true, reason: 'noText', totalPages } });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phase 2: 筛选清单页(宽松策略 + 连续页补全)
|
||||||
|
const BILL_KW = ['项目编码', '项目名称', '工程量', '计量单位', '综合单价', '清单编码'];
|
||||||
|
const SEC_KW = ['分部分项', '分类分项', '措施项目', '其他项目', '工程量清单计价'];
|
||||||
|
// 第一轮:标记确定的清单页
|
||||||
|
const billFlags = new Array(pageTexts.length).fill(false);
|
||||||
|
for (let i = 0; i < pageTexts.length; i++) {
|
||||||
|
const t = pageTexts[i];
|
||||||
|
if (!t.trim()) continue;
|
||||||
|
const hHits = BILL_KW.filter(k => t.includes(k)).length;
|
||||||
|
const sHit = SEC_KW.some(k => t.includes(k));
|
||||||
|
const hasCode = /\d{9}/.test(t);
|
||||||
|
// 放宽:有9位编码即可(不再要求同时命中表头关键字)
|
||||||
|
if (hHits >= 2 || sHit || hasCode) {
|
||||||
|
billFlags[i] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 第二轮:连续页补全 — 两个清单页之间的非空页也视为清单页(续页无表头)
|
||||||
|
// 但排除纯费用/税金页面(它们不含施工清单项)
|
||||||
|
const FEE_PAGE_KW = ['规费', '税金', '社会保险费', '住房公积金', '养老保险',
|
||||||
|
'工伤保险', '失业保险', '医疗保险', '教育费附加', '城市维护建设税'];
|
||||||
|
const firstBill = billFlags.indexOf(true);
|
||||||
|
const lastBill = billFlags.lastIndexOf(true);
|
||||||
|
if (firstBill >= 0 && lastBill > firstBill) {
|
||||||
|
for (let i = firstBill; i <= lastBill; i++) {
|
||||||
|
if (!billFlags[i] && pageTexts[i] && pageTexts[i].trim().length > 30) {
|
||||||
|
const t = pageTexts[i];
|
||||||
|
const feeHits = FEE_PAGE_KW.filter(kw => t.includes(kw)).length;
|
||||||
|
// 命中 2+ 个费用关键字且没有9位工程编码 → 纯费用页,排除
|
||||||
|
if (feeHits >= 2 && !/\d{9}/.test(t)) continue;
|
||||||
|
billFlags[i] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const billTexts = [];
|
||||||
|
for (let i = 0; i < pageTexts.length; i++) {
|
||||||
|
if (billFlags[i]) billTexts.push(pageTexts[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!billTexts.length) {
|
||||||
|
parentPort.postMessage({ type: 'done', ok: true, data: { scanned: false, noBillPages: true, totalPages } });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[BillWorker] ${totalPages} 页 → ${billTexts.length} 页清单 (原始识别 ${billFlags.filter(f=>f).length - (lastBill - firstBill >= 0 ? 0 : 0)} / 补全后 ${billTexts.length})`);
|
||||||
|
|
||||||
|
// Phase 3: 文本解析
|
||||||
|
const merged = billTexts.join('\n');
|
||||||
|
const parsed = parseBillText(merged);
|
||||||
|
const t2 = Date.now();
|
||||||
|
console.log(`[BillWorker] Phase2+3: ${t2 - t1}ms, 总耗时: ${t2 - t0}ms`);
|
||||||
|
|
||||||
|
parentPort.postMessage({
|
||||||
|
type: 'done', ok: true,
|
||||||
|
data: {
|
||||||
|
scanned: false,
|
||||||
|
...parsed,
|
||||||
|
_meta: {
|
||||||
|
method: 'local-parallel',
|
||||||
|
workers: workerCount,
|
||||||
|
billPages: billTexts.length,
|
||||||
|
totalPages,
|
||||||
|
extractMs: t1 - t0,
|
||||||
|
parseMs: t2 - t1,
|
||||||
|
totalMs: t2 - t0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
console.error('[BillWorker] 错误:', err.message);
|
||||||
|
parentPort.postMessage({ type: 'done', ok: false, error: err.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ================================================================
|
||||||
|
// Phase 1: 多 Worker 并行提取(SharedArrayBuffer 零拷贝)
|
||||||
|
// ================================================================
|
||||||
|
|
||||||
|
function parallelExtract(sab, dataLength, totalPages, workerCount) {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
const workerPath = path.join(__dirname, 'page-worker.js');
|
||||||
|
const allPageTexts = new Array(totalPages).fill('');
|
||||||
|
const workerStatus = new Array(workerCount).fill('pending'); // pending, done, failed
|
||||||
|
let resolved = false;
|
||||||
|
|
||||||
|
const checkComplete = () => {
|
||||||
|
if (resolved) return;
|
||||||
|
const doneCount = workerStatus.filter(s => s === 'done' || s === 'failed').length;
|
||||||
|
if (doneCount >= workerCount) {
|
||||||
|
resolved = true;
|
||||||
|
// 检查是否有失败的worker,打印警告
|
||||||
|
const failedCount = workerStatus.filter(s => s === 'failed').length;
|
||||||
|
if (failedCount > 0) {
|
||||||
|
console.warn(`[BillWorker] ${failedCount}/${workerCount} 个worker失败,可能导致部分页面无内容`);
|
||||||
|
}
|
||||||
|
resolve(allPageTexts);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
for (let i = 0; i < workerCount; i++) {
|
||||||
|
const startPage = i * PAGES_PER_CHUNK + 1;
|
||||||
|
const endPage = Math.min((i + 1) * PAGES_PER_CHUNK, totalPages);
|
||||||
|
|
||||||
|
// workerData 传 SharedArrayBuffer(跨线程共享,不会被清空)
|
||||||
|
const w = new Worker(workerPath, {
|
||||||
|
workerData: { sab, dataLength, startPage, endPage }
|
||||||
|
});
|
||||||
|
|
||||||
|
let workerDone = false;
|
||||||
|
|
||||||
|
const markDone = (status) => {
|
||||||
|
if (workerDone) return;
|
||||||
|
workerDone = true;
|
||||||
|
workerStatus[i] = status;
|
||||||
|
checkComplete();
|
||||||
|
};
|
||||||
|
|
||||||
|
w.on('message', (msg) => {
|
||||||
|
if (msg.ok && msg.results) {
|
||||||
|
for (const r of msg.results) {
|
||||||
|
allPageTexts[r.page - 1] = r.text;
|
||||||
|
}
|
||||||
|
markDone('done');
|
||||||
|
} else if (!msg.ok) {
|
||||||
|
console.warn(`[BillWorker] page-worker[${startPage}-${endPage}] 失败: ${msg.error}`);
|
||||||
|
markDone('failed');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
w.on('error', (err) => {
|
||||||
|
console.warn(`[BillWorker] page-worker[${startPage}-${endPage}] 异常: ${err.message}`);
|
||||||
|
markDone('failed');
|
||||||
|
});
|
||||||
|
|
||||||
|
w.on('exit', (code) => {
|
||||||
|
// exit 在 message 之后触发,但如果 worker 崩溃没发 message 则在这里兜底
|
||||||
|
if (code !== 0 && !workerDone) {
|
||||||
|
console.warn(`[BillWorker] page-worker[${startPage}-${endPage}] 意外退出(code=${code})`);
|
||||||
|
markDone('failed');
|
||||||
|
} else if (!workerDone) {
|
||||||
|
markDone('done');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (workerCount <= 0) {
|
||||||
|
resolved = true;
|
||||||
|
resolve(allPageTexts);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// ================================================================
|
||||||
|
// Phase 3: 清单文本解析(纯正则 + 字符串处理,毫秒级)
|
||||||
|
// ================================================================
|
||||||
|
|
||||||
|
function parseBillText(text) {
|
||||||
|
const rawLines = text.split(/\n/).map(l => {
|
||||||
|
let line = l.replace(/\t/g, ' ').trim();
|
||||||
|
// 规范化带横杠的编码:如 "010-101-001-001" → "010101001001"
|
||||||
|
line = line.replace(/(\d{2,4})[-‐–](\d{2,4})[-‐–](\d{2,4})(?:[-‐–](\d{2,4}))?/g,
|
||||||
|
(m, a, b, c, d) => {
|
||||||
|
const combined = a + b + c + (d || '');
|
||||||
|
return (combined.length >= 9 && combined.length <= 12) ? combined : m;
|
||||||
|
});
|
||||||
|
return line;
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── Step 1: 多行合并成逻辑行 ──
|
||||||
|
// pdfjs 按 Y 坐标分行,表格一行通常 = 一条文本行
|
||||||
|
// 但有时 项目特征/名称 会折行,需要合并
|
||||||
|
//
|
||||||
|
// 新逻辑行的起始标志(任一命中即切断):
|
||||||
|
// a) 序号模式:1.1.1.1.5 开头
|
||||||
|
// b) 清单编码:9-12位数字 或 B+5-6位数字 开头
|
||||||
|
// c) 中文大标题:一 二 三 ... 或 (一)(二)...
|
||||||
|
// d) 表头行内容(跳过)
|
||||||
|
// e) 纯数字序号 + 空格 + 编码(如 "5 500101004001")
|
||||||
|
|
||||||
|
const ITEM_START = /^\d+(\.\d+)+\s/; // 1.1 或 1.1.1 等序号
|
||||||
|
const CODE_INLINE = /(?:^|\s)(\d{9,12}|(?<![A-Za-z])B\d{5,6})\s/; // 行内含清单编码(排除 GB/DB 等标准号)
|
||||||
|
const CODE_START_RE = /^(\d{9,12}|B\d{5,6})\s/; // 行首就是清单编码(行首 B 不会有前缀字母)
|
||||||
|
const SEQ_CODE_RE = /^\d{1,4}\s+(\d{9,12}|(?<![A-Za-z])B\d{5,6})\s/; // "序号 编码"格式
|
||||||
|
const PAGE_MARK = /^--\s*\d+\s+of\s+\d+\s*--/;
|
||||||
|
const HEADER_RE = /^序号\s+(项目编码|项目名称)/;
|
||||||
|
const HEADER_KW = /^(项目编码|项目名称|清单编码|计量单位|综合单价|工程量|合\s*价|金额|序号)\s/;
|
||||||
|
const CATEGORY_MARKERS = ['一', '二', '三', '四', '五', '六', '七', '八', '九', '十',
|
||||||
|
'(一)', '(二)', '(三)', '(四)', '(五)'];
|
||||||
|
|
||||||
|
const logicLines = [];
|
||||||
|
let currentLine = '';
|
||||||
|
|
||||||
|
function isNewLineTrigger(raw) {
|
||||||
|
if (ITEM_START.test(raw)) return true;
|
||||||
|
if (CODE_START_RE.test(raw)) return true;
|
||||||
|
if (SEQ_CODE_RE.test(raw)) return true;
|
||||||
|
if (CATEGORY_MARKERS.some(m => raw.startsWith(m + ' ') || raw.startsWith(m + '\u3000'))) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const raw of rawLines) {
|
||||||
|
if (!raw || PAGE_MARK.test(raw)) continue;
|
||||||
|
if (HEADER_RE.test(raw) || HEADER_KW.test(raw)) continue;
|
||||||
|
if (/^(元)|^款章节号|^备注$|^第\d+页/.test(raw)) continue;
|
||||||
|
|
||||||
|
if (isNewLineTrigger(raw)) {
|
||||||
|
if (currentLine) logicLines.push(currentLine);
|
||||||
|
currentLine = raw;
|
||||||
|
} else if (CODE_INLINE.test(raw) && raw.length > 15) {
|
||||||
|
// 行内包含编码且够长(像是完整的表格行)→ 也开新行
|
||||||
|
if (currentLine) logicLines.push(currentLine);
|
||||||
|
currentLine = raw;
|
||||||
|
} else {
|
||||||
|
// 续行(项目特征折行等短文本)
|
||||||
|
// 安全阀:已合并行过长时强制切断,防止整页吞并
|
||||||
|
if (currentLine && currentLine.length > 300) {
|
||||||
|
logicLines.push(currentLine);
|
||||||
|
currentLine = raw;
|
||||||
|
} else {
|
||||||
|
currentLine = currentLine ? currentLine + ' ' + raw : raw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (currentLine) logicLines.push(currentLine);
|
||||||
|
|
||||||
|
console.log(`[BillWorker] 合并后 ${logicLines.length} 条逻辑行(原始 ${rawLines.length} 行)`);
|
||||||
|
// 打印前5条逻辑行供调试
|
||||||
|
for (let i = 0; i < Math.min(5, logicLines.length); i++) {
|
||||||
|
console.log(`[BillWorker] L${i}: ${logicLines[i].substring(0, 120)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const categories = [];
|
||||||
|
let curCat = null, curItem = null;
|
||||||
|
|
||||||
|
// 编码匹配:支持行内任意位置的9-12位数字或B编码(排除 GB/DB 等标准号前缀)
|
||||||
|
const CODE_RE = /(?<![A-Za-z])(\d{9,12}|(?<![A-Za-z])B\d{5,6})/;
|
||||||
|
const UNIT_TOKENS = ['m³','m²','m3','m2','km','hm2','㎡','㎥','t','kg',
|
||||||
|
'个','台','套','组','根','块','片','张','只','吨','项',
|
||||||
|
'处','座','件','段','条','把','扇','口','圈','道','孔',
|
||||||
|
'对','副','樘','方','延m','株','棵','m'];
|
||||||
|
const UNIT_SET = new Set(UNIT_TOKENS);
|
||||||
|
const unitEscaped = UNIT_TOKENS.map(u => u.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'));
|
||||||
|
const UNIT_RE = new RegExp(`(?:^|\\s)(${unitEscaped.join('|')})(?=\\s|\\d|$)`);
|
||||||
|
const SKIP_RE = /合\s*计|小\s*计|本页小计|总\s*计|价税合计/;
|
||||||
|
|
||||||
|
for (const line of logicLines) {
|
||||||
|
if (SKIP_RE.test(line)) continue;
|
||||||
|
|
||||||
|
// 去掉行首的序号部分("1.1.1.1.5 " 或 "5 " 等纯序号前缀)
|
||||||
|
let stripped = line.replace(/^\d+(\.\d+)*\s+/, '').trim();
|
||||||
|
if (!stripped) stripped = line.trim();
|
||||||
|
if (!stripped) continue;
|
||||||
|
|
||||||
|
const cm = stripped.match(CODE_RE);
|
||||||
|
if (cm) {
|
||||||
|
if (curItem && curCat) curCat.items.push(curItem);
|
||||||
|
if (!curCat) { curCat = { name: '未分类', items: [] }; categories.push(curCat); }
|
||||||
|
|
||||||
|
const code = cm[1];
|
||||||
|
let rest = stripped.substring(cm.index + cm[0].length).trim();
|
||||||
|
let name = '', unit = '', quantity = '', spec = '';
|
||||||
|
|
||||||
|
const unitMatch = rest.match(UNIT_RE);
|
||||||
|
if (unitMatch) {
|
||||||
|
const ui = rest.indexOf(unitMatch[0]);
|
||||||
|
let rawName = rest.substring(0, ui).trim();
|
||||||
|
unit = unitMatch[1];
|
||||||
|
const afterUnit = rest.substring(ui + unitMatch[0].length).trim();
|
||||||
|
const qm = afterUnit.match(/^([\d,.]+)/);
|
||||||
|
if (qm) {
|
||||||
|
quantity = qm[1];
|
||||||
|
// 提取 quantity 之后的尾部文本,跳过纯数字字段(综合单价、合价等)
|
||||||
|
let tail = afterUnit.substring(qm.index + qm[0].length).trim();
|
||||||
|
if (tail) {
|
||||||
|
const tailTokens = tail.split(/\s+/);
|
||||||
|
let si = 0;
|
||||||
|
while (si < tailTokens.length && /^[\d,.%\-]+$/.test(tailTokens[si])) si++;
|
||||||
|
const specTail = tailTokens.slice(si).join(' ').trim();
|
||||||
|
if (specTail) spec = specTail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 分离 rawName 中的"项目名称"和内联"项目特征"
|
||||||
|
const ns = splitNameAndSpec(rawName);
|
||||||
|
name = ns.name;
|
||||||
|
if (ns.spec) spec = ns.spec + (spec ? ';' + spec : '');
|
||||||
|
} else {
|
||||||
|
const tokens = rest.split(/\s+/).filter(t => t);
|
||||||
|
let foundUnitIdx = -1;
|
||||||
|
for (let ti = tokens.length - 1; ti >= 1; ti--) {
|
||||||
|
if (UNIT_SET.has(tokens[ti])) { foundUnitIdx = ti; break; }
|
||||||
|
}
|
||||||
|
if (foundUnitIdx >= 1) {
|
||||||
|
const rawNameStr = tokens.slice(0, foundUnitIdx).join(' ');
|
||||||
|
const ns = splitNameAndSpec(rawNameStr);
|
||||||
|
name = ns.name;
|
||||||
|
if (ns.spec) spec = ns.spec;
|
||||||
|
unit = tokens[foundUnitIdx];
|
||||||
|
const afterTokens = tokens.slice(foundUnitIdx + 1);
|
||||||
|
if (afterTokens.length && /^[\d,.]+$/.test(afterTokens[0])) {
|
||||||
|
quantity = afterTokens[0];
|
||||||
|
let si = 1;
|
||||||
|
while (si < afterTokens.length && /^[\d,.%\-]+$/.test(afterTokens[si])) si++;
|
||||||
|
const specTail = afterTokens.slice(si).join(' ').trim();
|
||||||
|
if (specTail) spec = spec ? spec + ';' + specTail : specTail;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
name = rest;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
name = name.replace(/\s+/g, '').trim();
|
||||||
|
for (const u of UNIT_TOKENS) {
|
||||||
|
if (name.endsWith(u) && name.length > u.length) {
|
||||||
|
unit = unit || u;
|
||||||
|
name = name.substring(0, name.length - u.length);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
curItem = { code, name, unit, quantity, spec };
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── 回退:无标准编码但有 "名称 单位 数量" 结构 → 也视为清单项 ──
|
||||||
|
// 常见于措施项目、未编码的补充清单项
|
||||||
|
if (!cm && stripped.length > 4) {
|
||||||
|
const uniMatch = stripped.match(UNIT_RE);
|
||||||
|
if (uniMatch) {
|
||||||
|
const ui = stripped.indexOf(uniMatch[0]);
|
||||||
|
const beforeUnit = stripped.substring(0, ui).trim();
|
||||||
|
const afterUnit = stripped.substring(ui + uniMatch[0].length).trim();
|
||||||
|
const hasQty = /^[\d,.]+/.test(afterUnit);
|
||||||
|
// 名称 2-50 字、含中文、有数量、不是分部标题
|
||||||
|
if (beforeUnit.length >= 2 && beforeUnit.length <= 50 && hasQty
|
||||||
|
&& /[\u4e00-\u9fff]/.test(beforeUnit)) {
|
||||||
|
if (curItem && curCat) curCat.items.push(curItem);
|
||||||
|
if (!curCat) { curCat = { name: '未分类', items: [] }; categories.push(curCat); }
|
||||||
|
const unit = uniMatch[1];
|
||||||
|
const qm = afterUnit.match(/^([\d,.]+)/);
|
||||||
|
const quantity = qm ? qm[1] : '';
|
||||||
|
const ns = splitNameAndSpec(beforeUnit);
|
||||||
|
const name = ns.name.replace(/\s+/g, '').trim();
|
||||||
|
const spec = ns.spec || '';
|
||||||
|
curItem = { code: '', name, unit, quantity, spec };
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 分部标题判断:不含编码、较短的文本、含工程关键字
|
||||||
|
// 关键守卫:如果行里有计量单位,说明是清单项,不是标题
|
||||||
|
if (stripped.length > 2 && stripped.length < 60 && !CODE_RE.test(stripped)) {
|
||||||
|
if (UNIT_RE.test(stripped) && /\d+\.?\d*\s*$/.test(stripped)) {
|
||||||
|
if (curItem) curItem.spec = curItem.spec ? curItem.spec + ';' + stripped : stripped;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (isCatTitle(stripped) && !UNIT_RE.test(stripped) && !isFeeCatTitle(stripped)) {
|
||||||
|
if (curItem && curCat) { curCat.items.push(curItem); curItem = null; }
|
||||||
|
const cleanTitle = stripped.replace(/\s+(座|个|项|处|m|km|段|条)\s+\d+[\d.]*\s*$/, '').trim();
|
||||||
|
curCat = { name: cleanTitle, items: [] };
|
||||||
|
categories.push(curCat);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (/^[一二三四五六七八九十]+\s/.test(stripped) || /^([一二三四五六七八九十\d]+)/.test(stripped)) {
|
||||||
|
// 中文序号标题也需要排除费用类
|
||||||
|
const cleanTitle = stripped.replace(/\s+(座|个|项|处)\s+\d+[\d.]*\s*$/, '').trim();
|
||||||
|
if (isFeeCatTitle(cleanTitle)) {
|
||||||
|
// 费用类标题:跳过,不建分部(其下的行会作为续行处理)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (curItem && curCat) { curCat.items.push(curItem); curItem = null; }
|
||||||
|
curCat = { name: cleanTitle, items: [] };
|
||||||
|
categories.push(curCat);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (curItem && stripped.length > 1) {
|
||||||
|
curItem.spec = curItem.spec ? curItem.spec + ';' + stripped : stripped;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (curItem && curCat) curCat.items.push(curItem);
|
||||||
|
|
||||||
|
// 过滤费用项:只保留需要写入技术标的施工清单项
|
||||||
|
let feeFiltered = 0;
|
||||||
|
for (const cat of categories) {
|
||||||
|
if (cat.items) {
|
||||||
|
const before = cat.items.length;
|
||||||
|
cat.items = cat.items.filter(it => !isFeeItem(it.name));
|
||||||
|
feeFiltered += before - cat.items.length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (feeFiltered > 0) console.log(`[BillWorker] 费用项过滤: 移除 ${feeFiltered} 项`);
|
||||||
|
|
||||||
|
// ========== 按项目名称合并(核心去重,大幅减少清单项数量)==========
|
||||||
|
// 规则:同一分部内,name 相同的清单项合并为一条
|
||||||
|
// - code: 保留第一个非空编码
|
||||||
|
// - unit: 保留第一个非空单位
|
||||||
|
// - quantity: 尝试数值求和,否则用分号拼接
|
||||||
|
// - spec: 去重后用分号拼接(截断过长的)
|
||||||
|
let totalBeforeMerge = 0, totalAfterMerge = 0;
|
||||||
|
for (const cat of categories) {
|
||||||
|
if (!cat.items || !cat.items.length) continue;
|
||||||
|
totalBeforeMerge += cat.items.length;
|
||||||
|
|
||||||
|
const nameMap = new Map(); // name → merged item
|
||||||
|
for (const item of cat.items) {
|
||||||
|
const key = (item.name || '').replace(/\s+/g, '').trim();
|
||||||
|
if (!key) continue;
|
||||||
|
|
||||||
|
if (!nameMap.has(key)) {
|
||||||
|
nameMap.set(key, {
|
||||||
|
code: item.code || '',
|
||||||
|
name: item.name,
|
||||||
|
unit: item.unit || '',
|
||||||
|
quantity: item.quantity || '',
|
||||||
|
spec: item.spec || '',
|
||||||
|
_count: 1,
|
||||||
|
_quantities: item.quantity ? [item.quantity] : [],
|
||||||
|
_specs: item.spec ? [item.spec] : [],
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
const m = nameMap.get(key);
|
||||||
|
m._count++;
|
||||||
|
// code: 取第一个非空的
|
||||||
|
if (!m.code && item.code) m.code = item.code;
|
||||||
|
// unit: 取第一个非空的
|
||||||
|
if (!m.unit && item.unit) m.unit = item.unit;
|
||||||
|
// quantity: 收集所有
|
||||||
|
if (item.quantity) m._quantities.push(item.quantity);
|
||||||
|
// spec: 收集不重复的
|
||||||
|
if (item.spec && !m._specs.includes(item.spec)) {
|
||||||
|
m._specs.push(item.spec);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 后处理:合成最终字段
|
||||||
|
const merged = [];
|
||||||
|
for (const [, m] of nameMap) {
|
||||||
|
// quantity: 尝试数值求和
|
||||||
|
if (m._quantities.length > 1) {
|
||||||
|
const nums = m._quantities.map(q => parseFloat(q.replace(/,/g, '')));
|
||||||
|
if (nums.every(n => !isNaN(n))) {
|
||||||
|
const sum = nums.reduce((a, b) => a + b, 0);
|
||||||
|
m.quantity = sum % 1 === 0 ? String(sum) : sum.toFixed(2);
|
||||||
|
} else {
|
||||||
|
m.quantity = m._quantities.join('; ');
|
||||||
|
}
|
||||||
|
} else if (m._quantities.length === 1) {
|
||||||
|
m.quantity = m._quantities[0];
|
||||||
|
}
|
||||||
|
// spec: 拼接去重后的 spec,每条最多120字
|
||||||
|
if (m._specs.length > 0) {
|
||||||
|
const trimmed = m._specs.map(s => s.length > 120 ? s.substring(0, 120) + '...' : s);
|
||||||
|
m.spec = trimmed.join('; ');
|
||||||
|
// 总 spec 上限 300 字
|
||||||
|
if (m.spec.length > 300) m.spec = m.spec.substring(0, 300) + '...';
|
||||||
|
}
|
||||||
|
// 清理临时字段
|
||||||
|
delete m._count; delete m._quantities; delete m._specs;
|
||||||
|
merged.push(m);
|
||||||
|
}
|
||||||
|
cat.items = merged;
|
||||||
|
totalAfterMerge += merged.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
const mergedCount = totalBeforeMerge - totalAfterMerge;
|
||||||
|
if (mergedCount > 0) {
|
||||||
|
console.log(`[BillWorker] 按名称合并: ${totalBeforeMerge} → ${totalAfterMerge} 项(合并 ${mergedCount} 个重复项)`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const valid = categories.filter(c => c.items && c.items.length > 0);
|
||||||
|
const totalItems = valid.reduce((s, c) => s + c.items.length, 0);
|
||||||
|
const withSpec = valid.reduce((s, c) => s + c.items.filter(it => it.spec).length, 0);
|
||||||
|
const withCode = valid.reduce((s, c) => s + c.items.filter(it => it.code).length, 0);
|
||||||
|
console.log(`[BillWorker] 最终结果: ${valid.length} 分部, ${totalItems} 清单项 (${withCode} 有编码, ${withSpec} 有spec)`);
|
||||||
|
// 打印前 3 个 item 供调试
|
||||||
|
let debugCount = 0;
|
||||||
|
for (const cat of valid) {
|
||||||
|
for (const it of cat.items) {
|
||||||
|
if (debugCount < 3) {
|
||||||
|
console.log(`[BillWorker] 样例: [${it.code}] ${it.name} | ${it.unit} | qty=${it.quantity} | spec=${(it.spec||'').substring(0, 80)}`);
|
||||||
|
debugCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
project_summary: { remark: `本地解析:${valid.length} 个分部,${totalItems} 个清单项(合并前 ${totalBeforeMerge} 项)` },
|
||||||
|
categories: valid,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 判断清单项是否为"费用项"(非施工内容,不写入技术标)
|
||||||
|
* 如:安全文明措施费、规费、税金、暂列金额等
|
||||||
|
*/
|
||||||
|
function isFeeItem(name) {
|
||||||
|
if (!name) return false;
|
||||||
|
const n = name.replace(/\s+/g, '');
|
||||||
|
|
||||||
|
// ── 1. 精确匹配 ──
|
||||||
|
const EXACT = [
|
||||||
|
'规费', '税金', '利润', '增值税', '暂列金额', '暂估价', '计日工',
|
||||||
|
'总承包服务费', '企业管理费', '甲供材料保管费', '价税合计',
|
||||||
|
];
|
||||||
|
if (EXACT.includes(n)) return true;
|
||||||
|
|
||||||
|
// ── 2. 包含匹配:措施费/规费/保险/行政类 ──
|
||||||
|
const FEE_KW = [
|
||||||
|
'安全文明', '文明施工费', '环境保护费', '临时设施费',
|
||||||
|
'夜间施工增加费', '夜间施工费',
|
||||||
|
'冬雨季施工增加费', '冬雨季施工费',
|
||||||
|
'二次搬运费', '大型机械设备进出场', '大型机械进出场',
|
||||||
|
'施工排水降水', '排水降水费',
|
||||||
|
'已完工程及设备保护', '已完工程保护费',
|
||||||
|
'工程排污费', '社会保障费', '住房公积金',
|
||||||
|
'工伤保险', '劳动保险', '意外伤害保险', '建筑工程保险',
|
||||||
|
'城市维护建设税', '城市建设维护税',
|
||||||
|
'教育费附加', '地方教育附加',
|
||||||
|
'材料暂估', '专业工程暂估',
|
||||||
|
'超高施工增加费', '安全防护费',
|
||||||
|
'措施项目费', '其他项目费', '不可竞争费',
|
||||||
|
];
|
||||||
|
for (const kw of FEE_KW) {
|
||||||
|
if (n.includes(kw)) return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 将 rawName 中的"项目名称"与内联"项目特征描述"分离
|
||||||
|
* 例: "土方开挖 1.土壤类别:普通土" → { name: "土方开挖", spec: "1.土壤类别:普通土" }
|
||||||
|
*/
|
||||||
|
function splitNameAndSpec(rawName) {
|
||||||
|
if (!rawName) return { name: '', spec: '' };
|
||||||
|
// Pattern 1: 数字+点+中文(如 "1.土壤类别" "2、强度等级")
|
||||||
|
const m = rawName.match(/\d+[.、.)\uFF09]\s*[\u4e00-\u9fff]/);
|
||||||
|
if (m && m.index > 0) {
|
||||||
|
return {
|
||||||
|
name: rawName.substring(0, m.index).trim(),
|
||||||
|
spec: rawName.substring(m.index).trim()
|
||||||
|
};
|
||||||
|
}
|
||||||
|
// Pattern 2: 特征关键字+冒号(如 "材质:" "规格:")
|
||||||
|
const SPEC_KW_RE = /(材质|规格|型号|品牌|颜色|尺寸|厚度|直径|管径|强度|等级|类别|类型|做法|要求|标准|内容|工作内容|土壤|含量|配合比|工艺|方式|形式|范围|部位|位置|高度|宽度|长度|深度|坡度|截面|跨度|运距|开挖|回填|混凝土|钢筋|压实)[::]/;
|
||||||
|
const kw = rawName.match(SPEC_KW_RE);
|
||||||
|
if (kw && kw.index > 0) {
|
||||||
|
return {
|
||||||
|
name: rawName.substring(0, kw.index).trim(),
|
||||||
|
spec: rawName.substring(kw.index).trim()
|
||||||
|
};
|
||||||
|
}
|
||||||
|
// Pattern 3: 括号开头的特征描述 "(1)" "(1)"
|
||||||
|
const paren = rawName.match(/[((]\d+[))]/);
|
||||||
|
if (paren && paren.index > 0) {
|
||||||
|
return {
|
||||||
|
name: rawName.substring(0, paren.index).trim(),
|
||||||
|
spec: rawName.substring(paren.index).trim()
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return { name: rawName, spec: '' };
|
||||||
|
}
|
||||||
|
|
||||||
|
function isCatTitle(text) {
|
||||||
|
const KW = [
|
||||||
|
'土建','建筑','结构','装饰','装修','安装','给排水','暖通','空调','通风',
|
||||||
|
'电气','强电','弱电','消防','智能化','幕墙','门窗','园林','绿化','景观',
|
||||||
|
'市政','道路','桥梁','管网','基础','地基','桩基','主体','屋面','防水',
|
||||||
|
'保温','钢结构','排水','给水','照明','动力','防雷','电梯','人防','室外',
|
||||||
|
'附属','分部','工程','措施','清单','土石方','混凝土','砌筑','模板','脚手架',
|
||||||
|
'水利','河道','管道','阀门','设备','仪表','自动化','通信','网络',
|
||||||
|
'拆除','外墙','内墙','楼地面','天棚','吊顶','栏杆','屋顶','涂料','抹灰',
|
||||||
|
'廊道','阀门井','蓄水池','泵站','供水','引水','水源','渠道','闸门',
|
||||||
|
'围栏','警示','检修','管线','配电','水池','水塔','取水','净水',
|
||||||
|
];
|
||||||
|
return KW.some(k => text.includes(k));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 判断分部标题是否为"费用类"(不应创建分部分类)
|
||||||
|
* 如:规费、税金、措施项目费、其他项目费 等非施工类分部
|
||||||
|
*/
|
||||||
|
function isFeeCatTitle(text) {
|
||||||
|
if (!text) return false;
|
||||||
|
const t = text.replace(/\s+/g, '');
|
||||||
|
// 精确匹配整个标题
|
||||||
|
const EXACT = [
|
||||||
|
'规费', '税金', '利润', '增值税', '暂列金额', '暂估价', '计日工',
|
||||||
|
'总承包服务费', '企业管理费', '价税合计',
|
||||||
|
'措施项目费', '其他项目费', '不可竞争费',
|
||||||
|
];
|
||||||
|
if (EXACT.includes(t)) return true;
|
||||||
|
// 包含匹配
|
||||||
|
const FEE_CAT_KW = [
|
||||||
|
'措施项目费', '其他项目费', '不可竞争费',
|
||||||
|
'规费汇总', '税金汇总', '费率', '费用汇总', '费用合计',
|
||||||
|
'暂列金额', '暂估价', '计日工', '总承包服务费',
|
||||||
|
'安全文明施工费', '社会保障费', '住房公积金',
|
||||||
|
'工伤保险', '教育费附加', '城市维护建设税',
|
||||||
|
];
|
||||||
|
for (const kw of FEE_CAT_KW) {
|
||||||
|
if (t.includes(kw)) return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
95
build.bat
Normal file
95
build.bat
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
@echo off
|
||||||
|
chcp 65001 >nul 2>&1
|
||||||
|
setlocal
|
||||||
|
|
||||||
|
echo ============================================================
|
||||||
|
echo BidPartner - Build Desktop EXE
|
||||||
|
echo ============================================================
|
||||||
|
echo.
|
||||||
|
|
||||||
|
:: ── 1. Check Python ────────────────────────────────────────────────────────
|
||||||
|
python --version >nul 2>&1
|
||||||
|
if errorlevel 1 (
|
||||||
|
echo [ERROR] Python not found. Please install Python 3.9+.
|
||||||
|
pause & exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
:: ── 2. Install / upgrade PyInstaller ───────────────────────────────────────
|
||||||
|
echo [Step 1/4] Installing PyInstaller...
|
||||||
|
pip install --quiet --upgrade pyinstaller
|
||||||
|
if errorlevel 1 (
|
||||||
|
echo [ERROR] Failed to install PyInstaller.
|
||||||
|
pause & exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
:: ── 3. Install project dependencies (if not already installed) ─────────────
|
||||||
|
echo [Step 2/4] Checking dependencies...
|
||||||
|
pip install --quiet -r requirements.txt
|
||||||
|
if errorlevel 1 (
|
||||||
|
echo [ERROR] Failed to install dependencies.
|
||||||
|
pause & exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
:: ── 4. Sanitize settings.json - REMOVE API KEYS before build ───────────────
|
||||||
|
echo [Step 3/4] Sanitizing settings (removing API keys from build)...
|
||||||
|
if exist "data\settings.json" (
|
||||||
|
:: Back up real settings
|
||||||
|
copy /y "data\settings.json" "data\settings.json.bak" >nul
|
||||||
|
)
|
||||||
|
:: Write a clean settings file with no real keys
|
||||||
|
(
|
||||||
|
echo {
|
||||||
|
echo "model_provider": "deepseek",
|
||||||
|
echo "qwen_api_key": "sk-your-qwen-key",
|
||||||
|
echo "qwen_model": "qwen3.6-plus",
|
||||||
|
echo "openai_api_key": "sk-your-openai-key",
|
||||||
|
echo "openai_model": "gpt-4o",
|
||||||
|
echo "deepseek_api_key": "sk-your-deepseek-key",
|
||||||
|
echo "deepseek_model": "deepseek-chat",
|
||||||
|
echo "max_concurrent": 5,
|
||||||
|
echo "content_volume": "standard"
|
||||||
|
echo }
|
||||||
|
) > "data\settings_clean.tmp"
|
||||||
|
|
||||||
|
:: ── 5. Build ────────────────────────────────────────────────────────────────
|
||||||
|
echo [Step 4/4] Building EXE with PyInstaller...
|
||||||
|
echo (This may take 3-10 minutes on first run)
|
||||||
|
echo.
|
||||||
|
|
||||||
|
:: Clean previous build artifacts
|
||||||
|
if exist "build" rd /s /q "build" >nul 2>&1
|
||||||
|
if exist "dist\BidPartner" rd /s /q "dist\BidPartner" >nul 2>&1
|
||||||
|
|
||||||
|
pyinstaller bid_partner.spec --noconfirm
|
||||||
|
set BUILD_RESULT=%errorlevel%
|
||||||
|
|
||||||
|
:: ── Restore real settings ───────────────────────────────────────────────────
|
||||||
|
if exist "data\settings.json.bak" (
|
||||||
|
copy /y "data\settings.json.bak" "data\settings.json" >nul
|
||||||
|
del /f /q "data\settings.json.bak" >nul 2>&1
|
||||||
|
)
|
||||||
|
del /f /q "data\settings_clean.tmp" >nul 2>&1
|
||||||
|
|
||||||
|
if %BUILD_RESULT% neq 0 (
|
||||||
|
echo.
|
||||||
|
echo [ERROR] PyInstaller build failed. See output above for details.
|
||||||
|
pause & exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
:: ── 6. Result ───────────────────────────────────────────────────────────────
|
||||||
|
echo.
|
||||||
|
echo ============================================================
|
||||||
|
echo Build SUCCESSFUL!
|
||||||
|
echo Output: dist\BidPartner\bid_partner.exe
|
||||||
|
echo ============================================================
|
||||||
|
echo.
|
||||||
|
echo The 'dist\BidPartner' folder is your distributable package.
|
||||||
|
echo Users only need this folder - no Python installation required.
|
||||||
|
echo Each user must set their own API key in the app settings.
|
||||||
|
echo.
|
||||||
|
|
||||||
|
:: Open the output folder
|
||||||
|
explorer "dist\BidPartner" >nul 2>&1
|
||||||
|
|
||||||
|
endlocal
|
||||||
|
pause
|
||||||
120
config.py
Normal file
120
config.py
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
from contextlib import contextmanager
|
||||||
|
|
||||||
|
# When running as a PyInstaller bundle:
|
||||||
|
# sys._MEIPASS → read-only bundle dir (templates, static, prompts)
|
||||||
|
# sys.executable dir → writable dir next to the .exe (data, settings, db)
|
||||||
|
if getattr(sys, 'frozen', False):
|
||||||
|
_BUNDLE_DIR = sys._MEIPASS # bundled app files
|
||||||
|
BASE_DIR = os.path.dirname(sys.executable) # writable runtime dir
|
||||||
|
else:
|
||||||
|
_BUNDLE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
BASE_DIR = _BUNDLE_DIR
|
||||||
|
|
||||||
|
DATA_DIR = os.path.join(BASE_DIR, 'data')
|
||||||
|
UPLOAD_DIR = os.path.join(DATA_DIR, 'uploads')
|
||||||
|
EXPORT_DIR = os.path.join(DATA_DIR, 'exports')
|
||||||
|
KNOWLEDGE_DIR= os.path.join(DATA_DIR, 'knowledge')
|
||||||
|
DB_PATH = os.path.join(DATA_DIR, 'projects.db')
|
||||||
|
CHROMA_DIR = os.path.join(DATA_DIR, 'chroma')
|
||||||
|
PROMPTS_DIR = os.path.join(_BUNDLE_DIR, 'prompts')
|
||||||
|
|
||||||
|
# ==================== AI 模型配置 ====================
|
||||||
|
# 模型选择:'openai' | 'qwen' | 'deepseek' | 'ollama'
|
||||||
|
MODEL_PROVIDER = os.environ.get('MODEL_PROVIDER', 'qwen')
|
||||||
|
|
||||||
|
# OpenAI
|
||||||
|
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', 'sk-your-openai-key')
|
||||||
|
OPENAI_MODEL = os.environ.get('OPENAI_MODEL', 'gpt-4.1')
|
||||||
|
OPENAI_BASE_URL = os.environ.get('OPENAI_BASE_URL', 'https://api.openai.com/v1')
|
||||||
|
|
||||||
|
# 阿里云通义千问
|
||||||
|
QWEN_API_KEY = os.environ.get('QWEN_API_KEY', 'sk-your-qwen-key')
|
||||||
|
QWEN_MODEL = os.environ.get('QWEN_MODEL', 'qwen3.6-plus')
|
||||||
|
QWEN_BASE_URL = os.environ.get('QWEN_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1')
|
||||||
|
# 文生图(多模态生成)与 Chat 接口不同:北京地域见 Model Studio 文档
|
||||||
|
QWEN_MULTIMODAL_BASE = os.environ.get(
|
||||||
|
'QWEN_MULTIMODAL_BASE',
|
||||||
|
'https://dashscope.aliyuncs.com/api/v1',
|
||||||
|
)
|
||||||
|
# 投标附件插图默认模型(同步文生图)
|
||||||
|
QWEN_IMAGE_MODEL = os.environ.get('QWEN_IMAGE_MODEL', 'qwen-image-2.0-pro')
|
||||||
|
QWEN_IMAGE_SIZE = os.environ.get('QWEN_IMAGE_SIZE', '1536*1024')
|
||||||
|
QWEN_IMAGE_PROMPT_EXTEND = os.environ.get('QWEN_IMAGE_PROMPT_EXTEND', 'true').lower() in (
|
||||||
|
'1', 'true', 'yes',
|
||||||
|
)
|
||||||
|
QWEN_IMAGE_WATERMARK = os.environ.get('QWEN_IMAGE_WATERMARK', 'false').lower() in (
|
||||||
|
'1', 'true', 'yes',
|
||||||
|
)
|
||||||
|
QWEN_IMAGE_NEGATIVE_PROMPT = os.environ.get(
|
||||||
|
'QWEN_IMAGE_NEGATIVE_PROMPT',
|
||||||
|
'低分辨率, 模糊, 畸形肢体, 过度饱和, 蜡像感, 杂乱构图, 扭曲文字, 公司商标, LOGO, 投标人名称',
|
||||||
|
)
|
||||||
|
|
||||||
|
# DeepSeek
|
||||||
|
DEEPSEEK_API_KEY = os.environ.get('DEEPSEEK_API_KEY', 'sk-your-deepseek-key')
|
||||||
|
DEEPSEEK_MODEL = os.environ.get('DEEPSEEK_MODEL', 'deepseek-chat')
|
||||||
|
DEEPSEEK_BASE_URL = os.environ.get('DEEPSEEK_BASE_URL', 'https://api.deepseek.com/v1')
|
||||||
|
|
||||||
|
# Ollama 本地(OpenAI 兼容接口)
|
||||||
|
OLLAMA_BASE_URL = os.environ.get('OLLAMA_BASE_URL', 'http://localhost:11434/v1')
|
||||||
|
OLLAMA_MODEL = os.environ.get('OLLAMA_MODEL', 'qwen3:8b')
|
||||||
|
|
||||||
|
# 豆包 / 火山引擎(字节跳动,OpenAI 兼容接口)
|
||||||
|
DOUBAO_API_KEY = os.environ.get('DOUBAO_API_KEY', 'sk-your-doubao-key')
|
||||||
|
DOUBAO_MODEL = os.environ.get('DOUBAO_MODEL', 'doubao-1-5-pro-32k')
|
||||||
|
DOUBAO_BASE_URL = os.environ.get('DOUBAO_BASE_URL', 'https://ark.cn-beijing.volces.com/api/v3')
|
||||||
|
|
||||||
|
# Kimi / Moonshot AI(OpenAI 兼容接口,支持 Embedding)
|
||||||
|
KIMI_API_KEY = os.environ.get('KIMI_API_KEY', 'sk-your-kimi-key')
|
||||||
|
KIMI_MODEL = os.environ.get('KIMI_MODEL', 'moonshot-v1-32k')
|
||||||
|
KIMI_BASE_URL = os.environ.get('KIMI_BASE_URL', 'https://api.moonshot.cn/v1')
|
||||||
|
|
||||||
|
# Embedding 模型
|
||||||
|
OPENAI_EMBEDDING_MODEL = 'text-embedding-3-small'
|
||||||
|
QWEN_EMBEDDING_MODEL = 'text-embedding-v3'
|
||||||
|
KIMI_EMBEDDING_MODEL = 'moonshot-v1-embedding'
|
||||||
|
|
||||||
|
# ==================== 应用配置 ====================
|
||||||
|
MAX_FILE_SIZE_MB = 50
|
||||||
|
ALLOWED_EXTENSIONS = {'pdf', 'doc', 'docx'}
|
||||||
|
SECRET_KEY = 'bidhuo-partner-secret-2024'
|
||||||
|
|
||||||
|
# ==================== 生成配置 ====================
|
||||||
|
MAX_RETRIES = 3
|
||||||
|
REQUEST_TIMEOUT = int(os.environ.get('REQUEST_TIMEOUT', '180'))
|
||||||
|
# 大纲生成单次提示词长、输出大,适当延长读超时(秒),避免接口未返回即被客户端断开
|
||||||
|
OUTLINE_REQUEST_TIMEOUT = int(os.environ.get('OUTLINE_REQUEST_TIMEOUT', '300'))
|
||||||
|
CHUNK_SIZE = 2000 # 知识库文本分块大小(字符数)
|
||||||
|
CHUNK_OVERLAP = 200 # 分块重叠大小
|
||||||
|
TOP_K_KNOWLEDGE = 3 # 知识库检索数量
|
||||||
|
CONTENT_VOLUME = os.environ.get('CONTENT_VOLUME', 'standard') # 篇幅档位: concise / standard / detailed / full
|
||||||
|
TARGET_PAGES = int(os.environ.get('TARGET_PAGES', '0') or '0') # 目标页数(0=不启用)
|
||||||
|
PAGE_CHAR_ESTIMATE = int(os.environ.get('PAGE_CHAR_ESTIMATE', '700') or '700') # 粗略每页字数估算
|
||||||
|
|
||||||
|
# ==================== 并发控制 (极速优化核心) ====================
|
||||||
|
# 全局LLM调用上限,防止Qwen等云API被限流。默认20,与用户要求对齐。
|
||||||
|
LLM_CONCURRENCY_LIMIT = int(os.environ.get('LLM_CONCURRENCY_LIMIT', '20'))
|
||||||
|
_llm_semaphore = threading.Semaphore(LLM_CONCURRENCY_LIMIT)
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def llm_call():
|
||||||
|
"""全局LLM调用信号量上下文管理器 (上限20)。所有ai_client.chat / embedding 必须使用。
|
||||||
|
针对Qwen云API增加轻微jitter避免429。超时60s防止死锁。"""
|
||||||
|
acquired = _llm_semaphore.acquire(blocking=True, timeout=60.0)
|
||||||
|
if not acquired:
|
||||||
|
raise TimeoutError(f"LLM并发已达上限({LLM_CONCURRENCY_LIMIT}),请稍后重试")
|
||||||
|
try:
|
||||||
|
# Qwen RPM敏感,增加极小jitter (0-0.08s) 避免429限流
|
||||||
|
if MODEL_PROVIDER == 'qwen':
|
||||||
|
time.sleep(random.uniform(0, 0.08))
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
_llm_semaphore.release()
|
||||||
|
|
||||||
|
# 更新默认并发章节数,支持更高上限(UI后续同步)
|
||||||
|
MAX_CONCURRENT_SECTIONS = int(os.environ.get('MAX_CONCURRENT_SECTIONS', '12'))
|
||||||
22
data/attachment_section_rules.json
Normal file
22
data/attachment_section_rules.json
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
{
|
||||||
|
"_meta": "附件类章节:stack_charts_only 为默认,叶节点按 diagram 意图栈只输出 [FIGURE]/[TABLE] 块、无叙述正文;full 为长文;single_chart_only 为栈顶单块。修改后重启生效。",
|
||||||
|
"_field_docs": {
|
||||||
|
"title_regex": "标题任一则正则匹配即视为附件节(Python re 语法)",
|
||||||
|
"table_hint_keywords": "标题含此类子串且双开关均开时倾向表格",
|
||||||
|
"figure_hint_keywords": "标题含此类子串且双开关均开时倾向图示",
|
||||||
|
"default_kind_when_ambiguous": "双开且标题无倾向词时的默认:figure 或 table",
|
||||||
|
"attachment_leaf_body_mode": "stack_charts_only:意图栈只生成图/表块;full:与常规章节相同长文;single_chart_only:仅栈顶一块图或表"
|
||||||
|
},
|
||||||
|
"schema_version": 1,
|
||||||
|
"attachment_leaf_body_mode": "stack_charts_only",
|
||||||
|
"title_regex": [
|
||||||
|
"附件\\s*[一二三四五六七八九十0-9A-Za-z、::.]",
|
||||||
|
"附\\s*图",
|
||||||
|
"附\\s*表",
|
||||||
|
"附\\s*件\\s*\\(",
|
||||||
|
"^\\s*[\\d一二三四五六七八九十\\..、]+\\s*附件"
|
||||||
|
],
|
||||||
|
"table_hint_keywords": ["附表", "一览表", "清单表", "表", "统计表", "明细表"],
|
||||||
|
"figure_hint_keywords": ["附图", "示意图", "平面图", "流程图", "布置图", "组织图", "横道"],
|
||||||
|
"default_kind_when_ambiguous": "table"
|
||||||
|
}
|
||||||
51
data/diagram_intent_rules.json
Normal file
51
data/diagram_intent_rules.json
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
{
|
||||||
|
"_meta": "章节级图/表意图:标题与大纲窗口关键词计分,阈值入栈,按栈序拼接图示/表格生成规范。修改后重启服务生效。",
|
||||||
|
"_field_docs": {
|
||||||
|
"threshold_figure": "图示倾向分达到此值才入栈",
|
||||||
|
"threshold_table": "表格倾向分达到此值才入栈",
|
||||||
|
"title_weight": "标题命中的权重乘子",
|
||||||
|
"context_weight": "大纲上下文窗口命中的权重乘子",
|
||||||
|
"outline_context_lines": "before/after 为相对匹配行上下扩展行数",
|
||||||
|
"stack_order_when_both": "figure_first | table_first | score_desc(两者同时入栈时的顺序,栈顶为 index 0)",
|
||||||
|
"figure_keywords": "字符串或 {text,weight} 对象列表",
|
||||||
|
"table_keywords": "同上"
|
||||||
|
},
|
||||||
|
"schema_version": 1,
|
||||||
|
"threshold_figure": 1.0,
|
||||||
|
"threshold_table": 1.0,
|
||||||
|
"title_weight": 1.0,
|
||||||
|
"context_weight": 0.6,
|
||||||
|
"outline_context_lines": {"before": 4, "after": 6},
|
||||||
|
"stack_order_when_both": "score_desc",
|
||||||
|
"figure_keywords": [
|
||||||
|
{"text": "组织", "weight": 1.0},
|
||||||
|
{"text": "架构", "weight": 1.0},
|
||||||
|
{"text": "流程", "weight": 1.2},
|
||||||
|
{"text": "工序", "weight": 1.0},
|
||||||
|
{"text": "进度", "weight": 1.2},
|
||||||
|
{"text": "横道", "weight": 1.5},
|
||||||
|
{"text": "网络图", "weight": 1.5},
|
||||||
|
{"text": "平面", "weight": 1.0},
|
||||||
|
{"text": "布置", "weight": 0.8},
|
||||||
|
{"text": "监测", "weight": 0.8},
|
||||||
|
{"text": "示意", "weight": 0.8},
|
||||||
|
{"text": "应急", "weight": 0.8}
|
||||||
|
],
|
||||||
|
"table_keywords": [
|
||||||
|
{"text": "一览表", "weight": 1.5},
|
||||||
|
{"text": "人员", "weight": 1.0},
|
||||||
|
{"text": "配置", "weight": 0.8},
|
||||||
|
{"text": "设备", "weight": 1.0},
|
||||||
|
{"text": "机械", "weight": 0.9},
|
||||||
|
{"text": "劳动力", "weight": 1.2},
|
||||||
|
{"text": "工种", "weight": 1.0},
|
||||||
|
{"text": "检验", "weight": 1.0},
|
||||||
|
{"text": "验收", "weight": 0.9},
|
||||||
|
{"text": "材料", "weight": 1.0},
|
||||||
|
{"text": "供应", "weight": 0.9},
|
||||||
|
{"text": "风险", "weight": 1.0},
|
||||||
|
{"text": "措施", "weight": 0.6},
|
||||||
|
{"text": "清单", "weight": 0.8},
|
||||||
|
{"text": "计划", "weight": 0.7}
|
||||||
|
]
|
||||||
|
}
|
||||||
BIN
data/exports/20260420测试海东技术标_20260420_180450.docx
Normal file
BIN
data/exports/20260420测试海东技术标_20260420_180450.docx
Normal file
Binary file not shown.
BIN
data/exports/30260420投标技术文档_20260420_170252.docx
Normal file
BIN
data/exports/30260420投标技术文档_20260420_170252.docx
Normal file
Binary file not shown.
BIN
data/exports/A11111_20260423_160556.docx
Normal file
BIN
data/exports/A11111_20260423_160556.docx
Normal file
Binary file not shown.
BIN
data/exports/A1210201_20260423_151502.docx
Normal file
BIN
data/exports/A1210201_20260423_151502.docx
Normal file
Binary file not shown.
BIN
data/exports/A1210201_20260423_151638.docx
Normal file
BIN
data/exports/A1210201_20260423_151638.docx
Normal file
Binary file not shown.
BIN
data/exports/A1210201_20260423_152123.docx
Normal file
BIN
data/exports/A1210201_20260423_152123.docx
Normal file
Binary file not shown.
BIN
data/exports/A12_20260423_165937.docx
Normal file
BIN
data/exports/A12_20260423_165937.docx
Normal file
Binary file not shown.
BIN
data/exports/A2121212_20260421_103738.docx
Normal file
BIN
data/exports/A2121212_20260421_103738.docx
Normal file
Binary file not shown.
BIN
data/exports/A666_20260422_143004.docx
Normal file
BIN
data/exports/A666_20260422_143004.docx
Normal file
Binary file not shown.
BIN
data/exports/A666_20260422_153137.docx
Normal file
BIN
data/exports/A666_20260422_153137.docx
Normal file
Binary file not shown.
BIN
data/exports/A666_20260422_160459.docx
Normal file
BIN
data/exports/A666_20260422_160459.docx
Normal file
Binary file not shown.
BIN
data/exports/A6756757_20260423_161844.docx
Normal file
BIN
data/exports/A6756757_20260423_161844.docx
Normal file
Binary file not shown.
BIN
data/exports/HTH_20260424_104051.docx
Normal file
BIN
data/exports/HTH_20260424_104051.docx
Normal file
Binary file not shown.
BIN
data/exports/RETRE_20260424_094735.docx
Normal file
BIN
data/exports/RETRE_20260424_094735.docx
Normal file
Binary file not shown.
BIN
data/exports/SDG_20260424_100236.docx
Normal file
BIN
data/exports/SDG_20260424_100236.docx
Normal file
Binary file not shown.
BIN
data/exports/SDG_20260424_100328.docx
Normal file
BIN
data/exports/SDG_20260424_100328.docx
Normal file
Binary file not shown.
BIN
data/exports/SDG_20260424_100627.docx
Normal file
BIN
data/exports/SDG_20260424_100627.docx
Normal file
Binary file not shown.
BIN
data/exports/frasfio_20260424_140217.docx
Normal file
BIN
data/exports/frasfio_20260424_140217.docx
Normal file
Binary file not shown.
BIN
data/exports/frasfio_20260424_142454.docx
Normal file
BIN
data/exports/frasfio_20260424_142454.docx
Normal file
Binary file not shown.
BIN
data/exports/kuuy_20260424_104908.docx
Normal file
BIN
data/exports/kuuy_20260424_104908.docx
Normal file
Binary file not shown.
BIN
data/exports/kuuy_20260424_110646.docx
Normal file
BIN
data/exports/kuuy_20260424_110646.docx
Normal file
Binary file not shown.
BIN
data/exports/kuuy_20260424_110915.docx
Normal file
BIN
data/exports/kuuy_20260424_110915.docx
Normal file
Binary file not shown.
BIN
data/exports/kuuy_20260424_111840.docx
Normal file
BIN
data/exports/kuuy_20260424_111840.docx
Normal file
Binary file not shown.
BIN
data/exports/kuuy_20260424_112552.docx
Normal file
BIN
data/exports/kuuy_20260424_112552.docx
Normal file
Binary file not shown.
BIN
data/exports/kuuy_20260424_113616.docx
Normal file
BIN
data/exports/kuuy_20260424_113616.docx
Normal file
Binary file not shown.
BIN
data/exports/kuuy_20260424_114854.docx
Normal file
BIN
data/exports/kuuy_20260424_114854.docx
Normal file
Binary file not shown.
BIN
data/exports/rdgsg_20260423_155122.docx
Normal file
BIN
data/exports/rdgsg_20260423_155122.docx
Normal file
Binary file not shown.
BIN
data/exports/uyfiu_20260423_163707.docx
Normal file
BIN
data/exports/uyfiu_20260423_163707.docx
Normal file
Binary file not shown.
BIN
data/exports/uyfiu_20260423_164832.docx
Normal file
BIN
data/exports/uyfiu_20260423_164832.docx
Normal file
Binary file not shown.
BIN
data/projects.db
Normal file
BIN
data/projects.db
Normal file
Binary file not shown.
25
data/settings.json
Normal file
25
data/settings.json
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
{
|
||||||
|
"model_provider": "qwen",
|
||||||
|
"qwen_api_key": "sk-999173b3ca7f425a97cc4b12a2d3575f",
|
||||||
|
"qwen_model": "qwen-turbo-latest",
|
||||||
|
"qwen_base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||||
|
"openai_api_key": "sk-your-openai-key",
|
||||||
|
"openai_model": "gpt-4.1",
|
||||||
|
"openai_base_url": "https://api.openai.com/v1",
|
||||||
|
"deepseek_api_key": "sk-your-deepseek-key",
|
||||||
|
"deepseek_model": "deepseek-chat",
|
||||||
|
"deepseek_base_url": "https://api.deepseek.com/v1",
|
||||||
|
"ollama_base_url": "http://localhost:11434/v1",
|
||||||
|
"ollama_model": "qwen3:8b",
|
||||||
|
"doubao_api_key": "sk-your-doubao-key",
|
||||||
|
"doubao_model": "doubao-1-5-pro-32k",
|
||||||
|
"doubao_base_url": "https://ark.cn-beijing.volces.com/api/v3",
|
||||||
|
"kimi_api_key": "sk-your-kimi-key",
|
||||||
|
"kimi_model": "moonshot-v1-32k",
|
||||||
|
"kimi_base_url": "https://api.moonshot.cn/v1",
|
||||||
|
"max_concurrent": 20,
|
||||||
|
"llm_concurrency_limit": 20,
|
||||||
|
"content_volume": "concise",
|
||||||
|
"target_pages": 120,
|
||||||
|
"page_char_estimate": 700
|
||||||
|
}
|
||||||
35
data/style_presets.json
Normal file
35
data/style_presets.json
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
{
|
||||||
|
"standard": {
|
||||||
|
"marginTop": 2.54,
|
||||||
|
"marginBottom": 2.54,
|
||||||
|
"marginLeft": 3.18,
|
||||||
|
"marginRight": 3.18,
|
||||||
|
"paperOrientation": "纵",
|
||||||
|
"heading1Font": "黑体",
|
||||||
|
"heading1Size": "三号",
|
||||||
|
"heading1Bold": true,
|
||||||
|
"heading2Font": "宋体",
|
||||||
|
"heading2Size": "小四",
|
||||||
|
"tableHeaderFont": "宋体",
|
||||||
|
"tableHeaderSize": "小四",
|
||||||
|
"tableHeaderLineSpacing": "24",
|
||||||
|
"tableHeaderAlign": "center",
|
||||||
|
"tableHeaderBold": false,
|
||||||
|
"tableBodyFont": "宋体",
|
||||||
|
"tableBodySize": "小四",
|
||||||
|
"tableBodyLineSpacing": "24",
|
||||||
|
"tableBodyAlign": "center",
|
||||||
|
"tableBodyBold": false,
|
||||||
|
"tocEnabled": true,
|
||||||
|
"tocTitleFont": "黑体",
|
||||||
|
"tocTitleSize": "三号",
|
||||||
|
"tocTitleLineSpacing": "24",
|
||||||
|
"tocTitleAlign": "center",
|
||||||
|
"tocTitleBold": true,
|
||||||
|
"tocBodyFont": "宋体",
|
||||||
|
"tocBodySize": "四号",
|
||||||
|
"tocBodyLineSpacing": "24",
|
||||||
|
"tocBodyAlign": "left",
|
||||||
|
"tocBodyBold": false
|
||||||
|
}
|
||||||
|
}
|
||||||
BIN
data/uploads/10_boq_工程量清单.pdf
Normal file
BIN
data/uploads/10_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/10_招标文件正文.pdf
Normal file
BIN
data/uploads/10_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/11_boq_工程量清单.pdf
Normal file
BIN
data/uploads/11_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/11_招标文件正文.pdf
Normal file
BIN
data/uploads/11_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/12_boq_工程量清单.pdf
Normal file
BIN
data/uploads/12_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/12_招标文件正文.pdf
Normal file
BIN
data/uploads/12_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/15_boq_工程量清单.pdf
Normal file
BIN
data/uploads/15_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/15_招标文件正文.pdf
Normal file
BIN
data/uploads/15_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/16_boq_工程量清单.pdf
Normal file
BIN
data/uploads/16_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/16_招标文件正文.pdf
Normal file
BIN
data/uploads/16_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/17_boq_工程量清单.pdf
Normal file
BIN
data/uploads/17_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/17_招标文件正文.pdf
Normal file
BIN
data/uploads/17_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/19_boq_工程量清单.pdf
Normal file
BIN
data/uploads/19_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/19_招标文件正文.pdf
Normal file
BIN
data/uploads/19_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/1_boq_工程量清单1.pdf
Normal file
BIN
data/uploads/1_boq_工程量清单1.pdf
Normal file
Binary file not shown.
BIN
data/uploads/1_招标文件正文1.pdf
Normal file
BIN
data/uploads/1_招标文件正文1.pdf
Normal file
Binary file not shown.
BIN
data/uploads/20_boq_工程量清单.pdf
Normal file
BIN
data/uploads/20_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/20_招标文件正文.pdf
Normal file
BIN
data/uploads/20_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/21_boq_工程量清单.pdf
Normal file
BIN
data/uploads/21_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/21_招标文件正文.pdf
Normal file
BIN
data/uploads/21_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/22_boq_工程量清单.pdf
Normal file
BIN
data/uploads/22_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/22_招标文件正文.pdf
Normal file
BIN
data/uploads/22_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/23_boq_工程量清单.pdf
Normal file
BIN
data/uploads/23_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/23_招标文件正文.pdf
Normal file
BIN
data/uploads/23_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/24_boq_工程量清单.pdf
Normal file
BIN
data/uploads/24_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/24_招标文件正文.pdf
Normal file
BIN
data/uploads/24_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/25_boq_工程量清单.pdf
Normal file
BIN
data/uploads/25_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/25_招标文件正文.pdf
Normal file
BIN
data/uploads/25_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/26_boq_工程量清单.pdf
Normal file
BIN
data/uploads/26_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/26_招标文件正文.pdf
Normal file
BIN
data/uploads/26_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/27_boq_工程量清单.pdf
Normal file
BIN
data/uploads/27_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/27_招标文件正文.pdf
Normal file
BIN
data/uploads/27_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/28_boq_工程量清单.pdf
Normal file
BIN
data/uploads/28_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/28_招标文件正文.pdf
Normal file
BIN
data/uploads/28_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/29_boq_工程量清单.pdf
Normal file
BIN
data/uploads/29_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/29_招标文件正文.pdf
Normal file
BIN
data/uploads/29_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/2_boq_工程量清单1.pdf
Normal file
BIN
data/uploads/2_boq_工程量清单1.pdf
Normal file
Binary file not shown.
BIN
data/uploads/2_招标文件正文1.pdf
Normal file
BIN
data/uploads/2_招标文件正文1.pdf
Normal file
Binary file not shown.
BIN
data/uploads/30_boq_工程量清单.pdf
Normal file
BIN
data/uploads/30_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/30_招标文件正文.pdf
Normal file
BIN
data/uploads/30_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/31_boq_工程量清单.pdf
Normal file
BIN
data/uploads/31_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/31_招标文件正文.pdf
Normal file
BIN
data/uploads/31_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/32_boq_工程量清单.pdf
Normal file
BIN
data/uploads/32_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/32_招标文件正文.pdf
Normal file
BIN
data/uploads/32_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/33_boq_工程量清单.pdf
Normal file
BIN
data/uploads/33_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/33_招标文件正文.pdf
Normal file
BIN
data/uploads/33_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/34_boq_工程量清单.pdf
Normal file
BIN
data/uploads/34_boq_工程量清单.pdf
Normal file
Binary file not shown.
BIN
data/uploads/34_招标文件正文.pdf
Normal file
BIN
data/uploads/34_招标文件正文.pdf
Normal file
Binary file not shown.
BIN
data/uploads/3_boq_工程量清单1.pdf
Normal file
BIN
data/uploads/3_boq_工程量清单1.pdf
Normal file
Binary file not shown.
BIN
data/uploads/3_招标文件正文1.pdf
Normal file
BIN
data/uploads/3_招标文件正文1.pdf
Normal file
Binary file not shown.
BIN
data/uploads/4_boq_工程量清单1.pdf
Normal file
BIN
data/uploads/4_boq_工程量清单1.pdf
Normal file
Binary file not shown.
BIN
data/uploads/4_招标文件正文1.pdf
Normal file
BIN
data/uploads/4_招标文件正文1.pdf
Normal file
Binary file not shown.
BIN
data/uploads/5_boq_工程量清单1.pdf
Normal file
BIN
data/uploads/5_boq_工程量清单1.pdf
Normal file
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user