This commit is contained in:
wanghui 2026-04-23 14:37:19 +08:00
commit 909e228a9b
102 changed files with 14254 additions and 0 deletions

1
.deps_installed Normal file
View File

@ -0,0 +1 @@

6
.gitignore vendored Normal file
View File

@ -0,0 +1,6 @@
.env
*.log
__pycache__/
*.pyc
.venv/
venv/

10
.idea/.gitignore generated vendored Normal file
View File

@ -0,0 +1,10 @@
# 默认忽略的文件
/shelf/
/workspace.xml
# 基于编辑器的 HTTP 客户端请求
/httpRequests/
# 已忽略包含查询文件的默认文件夹
/queries/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

8
.idea/modules.xml generated Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/tech-bid-manage20260423.iml" filepath="$PROJECT_DIR$/.idea/tech-bid-manage20260423.iml" />
</modules>
</component>
</project>

16
.idea/tech-bid-manage20260423.iml generated Normal file
View File

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TemplatesService">
<option name="TEMPLATE_CONFIGURATION" value="Jinja2" />
<option name="TEMPLATE_FOLDERS">
<list>
<option value="$MODULE_DIR$/templates" />
</list>
</option>
</component>
</module>

6
.idea/vcs.xml generated Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

113
README.md Normal file
View File

@ -0,0 +1,113 @@
# 标伙伴 · AI 标书助手
基于大模型的智能标书生成工具(单机版),支持解析招标文件、自动生成技术标书、导出 Word 文档。
## 快速开始
### 方式一双击启动Windows
直接双击 `start.bat`,首次运行会自动安装依赖。
### 方式二:命令行启动
```bash
# 1. 安装依赖
pip install -r requirements.txt
# 2. 启动应用
python app.py
```
浏览器访问 **http://localhost:5000**
---
## 配置 API Key
首次使用前,点击右上角 ⚙️ 设置图标,选择模型提供商并填入 API Key
| 提供商 | 推荐模型 | 申请地址 |
|--------|---------|---------|
| 通义千问 | qwen-max | https://dashscope.aliyun.com/ |
| DeepSeek | deepseek-chat (V3) | https://platform.deepseek.com/ |
| OpenAI | gpt-4o | https://platform.openai.com/ |
> **DeepSeek 说明**deepseek-chat (V3) 性价比极高,推荐用于生产环境。
> 由于 DeepSeek 暂不提供 Embedding API使用知识库功能时会自动回退到本地 sentence-transformers 模型(首次使用需下载约 90MB
也可通过环境变量配置:
```bash
# 通义千问
set QWEN_API_KEY=sk-xxxxxxxx
set MODEL_PROVIDER=qwen
# DeepSeek
set DEEPSEEK_API_KEY=sk-xxxxxxxx
set MODEL_PROVIDER=deepseek
python app.py
```
---
## 使用流程
1. **新建项目** → 输入项目名称
2. **上传招标文件** → 支持 PDF / DOC / DOCX
3. **AI 解析** → 自动提取评分要求、资质条件、商务条款
4. **生成大纲** → 按评分权重生成四级章节目录
5. **生成内容** → 逐章节或一键全部生成
6. **合规检查** → 对照招标要求检验覆盖情况
7. **导出 Word** → 专业排版,直接使用
---
## 目录结构
```
autorfp/
├── app.py # Flask 主程序
├── config.py # 配置文件
├── requirements.txt # Python 依赖
├── start.bat # Windows 一键启动
├── prompts/ # AI 提示词模板
├── modules/ # 功能模块
│ ├── parser.py # 招标文件解析
│ ├── generator.py # 标书内容生成
│ ├── checker.py # 合规检查
│ ├── exporter.py # Word 导出
│ └── knowledge.py # 企业知识库
├── utils/ # 工具函数
│ ├── ai_client.py # AI API 封装
│ ├── file_utils.py # 文件处理
│ └── prompts.py # 提示词加载
├── templates/ # HTML 模板
├── static/ # 静态资源
└── data/ # 数据目录(自动创建)
├── projects.db # SQLite 数据库
├── uploads/ # 上传的招标文件
├── exports/ # 导出的标书
├── knowledge/ # 知识库文件
└── chroma/ # 向量数据库
```
---
## 企业知识库
在项目页面切换到「知识库」标签,上传历史标书文件。
系统会自动将文件分块存入向量数据库,生成内容时自动检索相关片段,让 AI 更好地体现企业优势。
---
## 常见问题
**Q: 解析速度很慢?**
A: 招标文件越长耗时越长,通常 30-120 秒。建议使用 qwen-max 或 gpt-4o。
**Q: 内容生成失败?**
A: 检查 API Key 是否正确,以及账户余额是否充足。
**Q: 导出的 Word 文件乱码?**
A: 请使用 Microsoft Word 2016 及以上版本打开。

1163
app.py Normal file

File diff suppressed because it is too large Load Diff

118
bid_partner.spec Normal file
View File

@ -0,0 +1,118 @@
# -*- mode: python ; coding: utf-8 -*-
"""
PyInstaller spec for 标伙伴 · AI标书助手
Build: pyinstaller bid_partner.spec
知识库改用 SQLite + Python 向量存储,已不依赖 ChromaDB打包更小。
"""
import os
from PyInstaller.utils.hooks import collect_all, collect_data_files
block_cipher = None
# ── Collect complex packages ─────────────────────────────────────────────────
openai_datas, openai_bins, openai_hidden = collect_all('openai')
pydantic_datas, pydantic_bins, pydantic_hidden = collect_all('pydantic')
# tiktoken data (BPE vocab files)
tiktoken_datas = collect_data_files('tiktoken')
a = Analysis(
['launcher.py'],
pathex=['.'],
binaries=openai_bins + pydantic_bins,
datas=[
# ── App assets (read-only, go into _MEIPASS) ──
('templates', 'templates'),
('static', 'static'),
# ── Package data ──
*openai_datas,
*pydantic_datas,
*tiktoken_datas,
],
hiddenimports=[
# Flask / Werkzeug
'flask', 'flask_cors', 'werkzeug', 'werkzeug.serving',
'werkzeug.routing', 'werkzeug.middleware.proxy_fix',
'jinja2', 'jinja2.ext',
# SQLite (stdlib, always present)
'sqlite3',
# OpenAI
*openai_hidden,
# Pydantic
*pydantic_hidden,
# Document processing
'PyPDF2', 'pypdf', 'pypdf.errors',
'pdfminer', 'pdfminer.high_level', 'pdfminer.layout',
'pdfminer.pdfpage', 'pdfminer.pdfinterp', 'pdfminer.converter',
'docx', 'docx.oxml', 'docx.oxml.ns', 'docx.shared',
'docx.enum', 'docx.enum.text', 'docx.enum.style',
'python_docx',
# tiktoken
'tiktoken', 'tiktoken.core', 'tiktoken.model',
'tiktoken_ext', 'tiktoken_ext.openai_public',
# Network / encoding
'requests', 'chardet', 'httpx', 'httpcore',
'anyio', 'anyio.streams', 'anyio.streams.memory',
'sniffio', 'certifi',
# Stdlib extras
'importlib.metadata', 'importlib.resources',
'pkg_resources', 'json', 'math', 'threading',
# Local project modules (explicitly include all)
'config', 'app',
'utils', 'utils.ai_client', 'utils.file_utils',
'utils.prompts', 'utils.settings', 'utils.boq_parser', 'utils.bill_analysis',
'modules', 'modules.parser', 'modules.generator',
'modules.checker', 'modules.exporter', 'modules.knowledge',
],
hookspath=[],
hooksconfig={},
runtime_hooks=[],
excludes=[
# Heavy packages not used in this app
'matplotlib', 'pandas', 'scipy', 'numpy',
'IPython', 'jupyter', 'notebook',
'PIL', 'Pillow',
'cv2', 'torch', 'tensorflow',
'pytest', 'unittest',
# ChromaDB 及其依赖(已移除,改用 SQLite 内置存储)
'chromadb', 'hnswlib', 'posthog', 'pypika',
'mmh3', 'overrides', 'monotonic',
'sentence_transformers', 'onnxruntime',
],
win_no_prefer_redirects=False,
win_private_assemblies=False,
cipher=block_cipher,
noarchive=False,
)
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
exe = EXE(
pyz,
a.scripts,
[],
exclude_binaries=True,
name='bid_partner',
debug=False,
bootloader_ignore_signals=False,
strip=False,
upx=False,
console=False, # no black console window — GUI launcher takes over
disable_windowed_traceback=False,
argv_emulation=False,
target_arch=None,
codesign_identity=None,
entitlements_file=None,
)
coll = COLLECT(
exe,
a.binaries,
a.zipfiles,
a.datas,
strip=False,
upx=False,
upx_exclude=[],
name='BidPartner',
)

672
bill-worker.js Normal file
View File

@ -0,0 +1,672 @@
/**
* bill-worker.js PDF 清单解析调度器Worker Thread
*
* 架构v3 SharedArrayBuffer 零拷贝
* Phase 1 并行文本提取
* PDF 数据写入 SharedArrayBuffer一次分配所有子线程共享读
* 启动 N page-worker每个负责固定 20
*
* Phase 2 清单页筛选 + 文本解析纯正则毫秒级
* 汇总全部页面文本 关键字筛选清单页 多行合并 逐行解析
*/
'use strict';
const { parentPort } = require('worker_threads');
const { Worker } = require('worker_threads');
const path = require('path');
const PAGES_PER_CHUNK = 20;
parentPort.on('message', async (msg) => {
if (msg.type !== 'parse') return;
const t0 = Date.now();
try {
// 立即做一次干净的拷贝,确保拥有独立的 ArrayBuffer
const raw = msg.buffer;
const buf = Buffer.alloc(raw.byteLength);
Buffer.from(raw).copy(buf);
if (buf.length === 0) {
parentPort.postMessage({ type: 'done', ok: false, error: '收到空 PDF 数据' });
return;
}
// ── 获取总页数 ──
const pdfjsModule = await import('pdfjs-dist/build/pdf.mjs');
const pdfjsLib = pdfjsModule.default || pdfjsModule;
// 给 pdfjs 一份独立拷贝pdfjs 内部可能 detach buffer
const pdfData = new Uint8Array(buf.length);
buf.copy(Buffer.from(pdfData.buffer));
const pdf = await pdfjsLib.getDocument({ data: pdfData, isEvalSupported: false }).promise;
const totalPages = pdf.numPages;
// ── 将 PDF 数据写入 SharedArrayBuffer一次分配所有子线程共享读──
const sab = new SharedArrayBuffer(buf.length);
const sabView = new Uint8Array(sab);
buf.copy(Buffer.from(sabView.buffer)); // 从独立 buf 拷贝到共享内存
const workerCount = Math.ceil(totalPages / PAGES_PER_CHUNK);
console.log(`[BillWorker] PDF ${totalPages} 页, ${workerCount} 路并行 (SharedArrayBuffer ${(buf.length/1024/1024).toFixed(1)}MB)`);
// Phase 1: 并行文本提取
const pageTexts = await parallelExtract(sab, buf.length, totalPages, workerCount);
const t1 = Date.now();
const extractedCount = pageTexts.filter(t => t.length > 0).length;
console.log(`[BillWorker] Phase1 完成: ${t1 - t0}ms, ${extractedCount}/${totalPages} 页有文本`);
// 扫描件判断
const totalChars = pageTexts.reduce((s, t) => s + t.length, 0);
if (totalChars < 50) {
parentPort.postMessage({ type: 'done', ok: true, data: { scanned: true, reason: 'noText', totalPages } });
return;
}
// Phase 2: 筛选清单页(宽松策略 + 连续页补全)
const BILL_KW = ['项目编码', '项目名称', '工程量', '计量单位', '综合单价', '清单编码'];
const SEC_KW = ['分部分项', '分类分项', '措施项目', '其他项目', '工程量清单计价'];
// 第一轮:标记确定的清单页
const billFlags = new Array(pageTexts.length).fill(false);
for (let i = 0; i < pageTexts.length; i++) {
const t = pageTexts[i];
if (!t.trim()) continue;
const hHits = BILL_KW.filter(k => t.includes(k)).length;
const sHit = SEC_KW.some(k => t.includes(k));
const hasCode = /\d{9}/.test(t);
// 放宽有9位编码即可不再要求同时命中表头关键字
if (hHits >= 2 || sHit || hasCode) {
billFlags[i] = true;
}
}
// 第二轮:连续页补全 — 两个清单页之间的非空页也视为清单页(续页无表头)
// 但排除纯费用/税金页面(它们不含施工清单项)
const FEE_PAGE_KW = ['规费', '税金', '社会保险费', '住房公积金', '养老保险',
'工伤保险', '失业保险', '医疗保险', '教育费附加', '城市维护建设税'];
const firstBill = billFlags.indexOf(true);
const lastBill = billFlags.lastIndexOf(true);
if (firstBill >= 0 && lastBill > firstBill) {
for (let i = firstBill; i <= lastBill; i++) {
if (!billFlags[i] && pageTexts[i] && pageTexts[i].trim().length > 30) {
const t = pageTexts[i];
const feeHits = FEE_PAGE_KW.filter(kw => t.includes(kw)).length;
// 命中 2+ 个费用关键字且没有9位工程编码 → 纯费用页,排除
if (feeHits >= 2 && !/\d{9}/.test(t)) continue;
billFlags[i] = true;
}
}
}
const billTexts = [];
for (let i = 0; i < pageTexts.length; i++) {
if (billFlags[i]) billTexts.push(pageTexts[i]);
}
if (!billTexts.length) {
parentPort.postMessage({ type: 'done', ok: true, data: { scanned: false, noBillPages: true, totalPages } });
return;
}
console.log(`[BillWorker] ${totalPages} 页 → ${billTexts.length} 页清单 (原始识别 ${billFlags.filter(f=>f).length - (lastBill - firstBill >= 0 ? 0 : 0)} / 补全后 ${billTexts.length})`);
// Phase 3: 文本解析
const merged = billTexts.join('\n');
const parsed = parseBillText(merged);
const t2 = Date.now();
console.log(`[BillWorker] Phase2+3: ${t2 - t1}ms, 总耗时: ${t2 - t0}ms`);
parentPort.postMessage({
type: 'done', ok: true,
data: {
scanned: false,
...parsed,
_meta: {
method: 'local-parallel',
workers: workerCount,
billPages: billTexts.length,
totalPages,
extractMs: t1 - t0,
parseMs: t2 - t1,
totalMs: t2 - t0,
}
}
});
} catch (err) {
console.error('[BillWorker] 错误:', err.message);
parentPort.postMessage({ type: 'done', ok: false, error: err.message });
}
});
// ================================================================
// Phase 1: 多 Worker 并行提取SharedArrayBuffer 零拷贝)
// ================================================================
function parallelExtract(sab, dataLength, totalPages, workerCount) {
return new Promise((resolve) => {
const workerPath = path.join(__dirname, 'page-worker.js');
const allPageTexts = new Array(totalPages).fill('');
const workerStatus = new Array(workerCount).fill('pending'); // pending, done, failed
let resolved = false;
const checkComplete = () => {
if (resolved) return;
const doneCount = workerStatus.filter(s => s === 'done' || s === 'failed').length;
if (doneCount >= workerCount) {
resolved = true;
// 检查是否有失败的worker打印警告
const failedCount = workerStatus.filter(s => s === 'failed').length;
if (failedCount > 0) {
console.warn(`[BillWorker] ${failedCount}/${workerCount} 个worker失败可能导致部分页面无内容`);
}
resolve(allPageTexts);
}
};
for (let i = 0; i < workerCount; i++) {
const startPage = i * PAGES_PER_CHUNK + 1;
const endPage = Math.min((i + 1) * PAGES_PER_CHUNK, totalPages);
// workerData 传 SharedArrayBuffer跨线程共享不会被清空
const w = new Worker(workerPath, {
workerData: { sab, dataLength, startPage, endPage }
});
let workerDone = false;
const markDone = (status) => {
if (workerDone) return;
workerDone = true;
workerStatus[i] = status;
checkComplete();
};
w.on('message', (msg) => {
if (msg.ok && msg.results) {
for (const r of msg.results) {
allPageTexts[r.page - 1] = r.text;
}
markDone('done');
} else if (!msg.ok) {
console.warn(`[BillWorker] page-worker[${startPage}-${endPage}] 失败: ${msg.error}`);
markDone('failed');
}
});
w.on('error', (err) => {
console.warn(`[BillWorker] page-worker[${startPage}-${endPage}] 异常: ${err.message}`);
markDone('failed');
});
w.on('exit', (code) => {
// exit 在 message 之后触发,但如果 worker 崩溃没发 message 则在这里兜底
if (code !== 0 && !workerDone) {
console.warn(`[BillWorker] page-worker[${startPage}-${endPage}] 意外退出(code=${code})`);
markDone('failed');
} else if (!workerDone) {
markDone('done');
}
});
}
if (workerCount <= 0) {
resolved = true;
resolve(allPageTexts);
}
});
}
// ================================================================
// Phase 3: 清单文本解析(纯正则 + 字符串处理,毫秒级)
// ================================================================
function parseBillText(text) {
const rawLines = text.split(/\n/).map(l => {
let line = l.replace(/\t/g, ' ').trim();
// 规范化带横杠的编码:如 "010-101-001-001" → "010101001001"
line = line.replace(/(\d{2,4})[-](\d{2,4})[-](\d{2,4})(?:[-](\d{2,4}))?/g,
(m, a, b, c, d) => {
const combined = a + b + c + (d || '');
return (combined.length >= 9 && combined.length <= 12) ? combined : m;
});
return line;
});
// ── Step 1: 多行合并成逻辑行 ──
// pdfjs 按 Y 坐标分行,表格一行通常 = 一条文本行
// 但有时 项目特征/名称 会折行,需要合并
//
// 新逻辑行的起始标志(任一命中即切断):
// a) 序号模式1.1.1.1.5 开头
// b) 清单编码9-12位数字 或 B+5-6位数字 开头
// c) 中文大标题:一 二 三 ... 或 (一)(二)...
// d) 表头行内容(跳过)
// e) 纯数字序号 + 空格 + 编码(如 "5 500101004001"
const ITEM_START = /^\d+(\.\d+)+\s/; // 1.1 或 1.1.1 等序号
const CODE_INLINE = /(?:^|\s)(\d{9,12}|(?<![A-Za-z])B\d{5,6})\s/; // 行内含清单编码(排除 GB/DB 等标准号)
const CODE_START_RE = /^(\d{9,12}|B\d{5,6})\s/; // 行首就是清单编码(行首 B 不会有前缀字母)
const SEQ_CODE_RE = /^\d{1,4}\s+(\d{9,12}|(?<![A-Za-z])B\d{5,6})\s/; // "序号 编码"格式
const PAGE_MARK = /^--\s*\d+\s+of\s+\d+\s*--/;
const HEADER_RE = /^序号\s+(项目编码|项目名称)/;
const HEADER_KW = /^(项目编码|项目名称|清单编码|计量单位|综合单价|工程量|合\s*价|金额|序号)\s/;
const CATEGORY_MARKERS = ['一', '二', '三', '四', '五', '六', '七', '八', '九', '十',
'(一)', '(二)', '(三)', '(四)', '(五)'];
const logicLines = [];
let currentLine = '';
function isNewLineTrigger(raw) {
if (ITEM_START.test(raw)) return true;
if (CODE_START_RE.test(raw)) return true;
if (SEQ_CODE_RE.test(raw)) return true;
if (CATEGORY_MARKERS.some(m => raw.startsWith(m + ' ') || raw.startsWith(m + '\u3000'))) return true;
return false;
}
for (const raw of rawLines) {
if (!raw || PAGE_MARK.test(raw)) continue;
if (HEADER_RE.test(raw) || HEADER_KW.test(raw)) continue;
if (/^(元)|^款章节号|^备注$|^第\d+页/.test(raw)) continue;
if (isNewLineTrigger(raw)) {
if (currentLine) logicLines.push(currentLine);
currentLine = raw;
} else if (CODE_INLINE.test(raw) && raw.length > 15) {
// 行内包含编码且够长(像是完整的表格行)→ 也开新行
if (currentLine) logicLines.push(currentLine);
currentLine = raw;
} else {
// 续行(项目特征折行等短文本)
// 安全阀:已合并行过长时强制切断,防止整页吞并
if (currentLine && currentLine.length > 300) {
logicLines.push(currentLine);
currentLine = raw;
} else {
currentLine = currentLine ? currentLine + ' ' + raw : raw;
}
}
}
if (currentLine) logicLines.push(currentLine);
console.log(`[BillWorker] 合并后 ${logicLines.length} 条逻辑行(原始 ${rawLines.length} 行)`);
// 打印前5条逻辑行供调试
for (let i = 0; i < Math.min(5, logicLines.length); i++) {
console.log(`[BillWorker] L${i}: ${logicLines[i].substring(0, 120)}`);
}
const categories = [];
let curCat = null, curItem = null;
// 编码匹配支持行内任意位置的9-12位数字或B编码排除 GB/DB 等标准号前缀)
const CODE_RE = /(?<![A-Za-z])(\d{9,12}|(?<![A-Za-z])B\d{5,6})/;
const UNIT_TOKENS = ['m³','m²','m3','m2','km','hm2','㎡','㎥','t','kg',
'个','台','套','组','根','块','片','张','只','吨','项',
'处','座','件','段','条','把','扇','口','圈','道','孔',
'对','副','樘','方','延m','株','棵','m'];
const UNIT_SET = new Set(UNIT_TOKENS);
const unitEscaped = UNIT_TOKENS.map(u => u.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'));
const UNIT_RE = new RegExp(`(?:^|\\s)(${unitEscaped.join('|')})(?=\\s|\\d|$)`);
const SKIP_RE = /合\s*计|小\s*计|本页小计|总\s*计|价税合计/;
for (const line of logicLines) {
if (SKIP_RE.test(line)) continue;
// 去掉行首的序号部分("1.1.1.1.5 " 或 "5 " 等纯序号前缀)
let stripped = line.replace(/^\d+(\.\d+)*\s+/, '').trim();
if (!stripped) stripped = line.trim();
if (!stripped) continue;
const cm = stripped.match(CODE_RE);
if (cm) {
if (curItem && curCat) curCat.items.push(curItem);
if (!curCat) { curCat = { name: '未分类', items: [] }; categories.push(curCat); }
const code = cm[1];
let rest = stripped.substring(cm.index + cm[0].length).trim();
let name = '', unit = '', quantity = '', spec = '';
const unitMatch = rest.match(UNIT_RE);
if (unitMatch) {
const ui = rest.indexOf(unitMatch[0]);
let rawName = rest.substring(0, ui).trim();
unit = unitMatch[1];
const afterUnit = rest.substring(ui + unitMatch[0].length).trim();
const qm = afterUnit.match(/^([\d,.]+)/);
if (qm) {
quantity = qm[1];
// 提取 quantity 之后的尾部文本,跳过纯数字字段(综合单价、合价等)
let tail = afterUnit.substring(qm.index + qm[0].length).trim();
if (tail) {
const tailTokens = tail.split(/\s+/);
let si = 0;
while (si < tailTokens.length && /^[\d,.%\-]+$/.test(tailTokens[si])) si++;
const specTail = tailTokens.slice(si).join(' ').trim();
if (specTail) spec = specTail;
}
}
// 分离 rawName 中的"项目名称"和内联"项目特征"
const ns = splitNameAndSpec(rawName);
name = ns.name;
if (ns.spec) spec = ns.spec + (spec ? ';' + spec : '');
} else {
const tokens = rest.split(/\s+/).filter(t => t);
let foundUnitIdx = -1;
for (let ti = tokens.length - 1; ti >= 1; ti--) {
if (UNIT_SET.has(tokens[ti])) { foundUnitIdx = ti; break; }
}
if (foundUnitIdx >= 1) {
const rawNameStr = tokens.slice(0, foundUnitIdx).join(' ');
const ns = splitNameAndSpec(rawNameStr);
name = ns.name;
if (ns.spec) spec = ns.spec;
unit = tokens[foundUnitIdx];
const afterTokens = tokens.slice(foundUnitIdx + 1);
if (afterTokens.length && /^[\d,.]+$/.test(afterTokens[0])) {
quantity = afterTokens[0];
let si = 1;
while (si < afterTokens.length && /^[\d,.%\-]+$/.test(afterTokens[si])) si++;
const specTail = afterTokens.slice(si).join(' ').trim();
if (specTail) spec = spec ? spec + ';' + specTail : specTail;
}
} else {
name = rest;
}
}
name = name.replace(/\s+/g, '').trim();
for (const u of UNIT_TOKENS) {
if (name.endsWith(u) && name.length > u.length) {
unit = unit || u;
name = name.substring(0, name.length - u.length);
break;
}
}
curItem = { code, name, unit, quantity, spec };
continue;
}
// ── 回退:无标准编码但有 "名称 单位 数量" 结构 → 也视为清单项 ──
// 常见于措施项目、未编码的补充清单项
if (!cm && stripped.length > 4) {
const uniMatch = stripped.match(UNIT_RE);
if (uniMatch) {
const ui = stripped.indexOf(uniMatch[0]);
const beforeUnit = stripped.substring(0, ui).trim();
const afterUnit = stripped.substring(ui + uniMatch[0].length).trim();
const hasQty = /^[\d,.]+/.test(afterUnit);
// 名称 2-50 字、含中文、有数量、不是分部标题
if (beforeUnit.length >= 2 && beforeUnit.length <= 50 && hasQty
&& /[\u4e00-\u9fff]/.test(beforeUnit)) {
if (curItem && curCat) curCat.items.push(curItem);
if (!curCat) { curCat = { name: '未分类', items: [] }; categories.push(curCat); }
const unit = uniMatch[1];
const qm = afterUnit.match(/^([\d,.]+)/);
const quantity = qm ? qm[1] : '';
const ns = splitNameAndSpec(beforeUnit);
const name = ns.name.replace(/\s+/g, '').trim();
const spec = ns.spec || '';
curItem = { code: '', name, unit, quantity, spec };
continue;
}
}
}
// 分部标题判断:不含编码、较短的文本、含工程关键字
// 关键守卫:如果行里有计量单位,说明是清单项,不是标题
if (stripped.length > 2 && stripped.length < 60 && !CODE_RE.test(stripped)) {
if (UNIT_RE.test(stripped) && /\d+\.?\d*\s*$/.test(stripped)) {
if (curItem) curItem.spec = curItem.spec ? curItem.spec + ';' + stripped : stripped;
continue;
}
if (isCatTitle(stripped) && !UNIT_RE.test(stripped) && !isFeeCatTitle(stripped)) {
if (curItem && curCat) { curCat.items.push(curItem); curItem = null; }
const cleanTitle = stripped.replace(/\s+(座|个|项|处|m|km|段|条)\s+\d+[\d.]*\s*$/, '').trim();
curCat = { name: cleanTitle, items: [] };
categories.push(curCat);
continue;
}
}
if (/^[一二三四五六七八九十]+\s/.test(stripped) || /^[一二三四五六七八九十\d]+/.test(stripped)) {
// 中文序号标题也需要排除费用类
const cleanTitle = stripped.replace(/\s+(座|个|项|处)\s+\d+[\d.]*\s*$/, '').trim();
if (isFeeCatTitle(cleanTitle)) {
// 费用类标题:跳过,不建分部(其下的行会作为续行处理)
continue;
}
if (curItem && curCat) { curCat.items.push(curItem); curItem = null; }
curCat = { name: cleanTitle, items: [] };
categories.push(curCat);
continue;
}
if (curItem && stripped.length > 1) {
curItem.spec = curItem.spec ? curItem.spec + ';' + stripped : stripped;
}
}
if (curItem && curCat) curCat.items.push(curItem);
// 过滤费用项:只保留需要写入技术标的施工清单项
let feeFiltered = 0;
for (const cat of categories) {
if (cat.items) {
const before = cat.items.length;
cat.items = cat.items.filter(it => !isFeeItem(it.name));
feeFiltered += before - cat.items.length;
}
}
if (feeFiltered > 0) console.log(`[BillWorker] 费用项过滤: 移除 ${feeFiltered}`);
// ========== 按项目名称合并(核心去重,大幅减少清单项数量)==========
// 规则同一分部内name 相同的清单项合并为一条
// - code: 保留第一个非空编码
// - unit: 保留第一个非空单位
// - quantity: 尝试数值求和,否则用分号拼接
// - spec: 去重后用分号拼接(截断过长的)
let totalBeforeMerge = 0, totalAfterMerge = 0;
for (const cat of categories) {
if (!cat.items || !cat.items.length) continue;
totalBeforeMerge += cat.items.length;
const nameMap = new Map(); // name → merged item
for (const item of cat.items) {
const key = (item.name || '').replace(/\s+/g, '').trim();
if (!key) continue;
if (!nameMap.has(key)) {
nameMap.set(key, {
code: item.code || '',
name: item.name,
unit: item.unit || '',
quantity: item.quantity || '',
spec: item.spec || '',
_count: 1,
_quantities: item.quantity ? [item.quantity] : [],
_specs: item.spec ? [item.spec] : [],
});
} else {
const m = nameMap.get(key);
m._count++;
// code: 取第一个非空的
if (!m.code && item.code) m.code = item.code;
// unit: 取第一个非空的
if (!m.unit && item.unit) m.unit = item.unit;
// quantity: 收集所有
if (item.quantity) m._quantities.push(item.quantity);
// spec: 收集不重复的
if (item.spec && !m._specs.includes(item.spec)) {
m._specs.push(item.spec);
}
}
}
// 后处理:合成最终字段
const merged = [];
for (const [, m] of nameMap) {
// quantity: 尝试数值求和
if (m._quantities.length > 1) {
const nums = m._quantities.map(q => parseFloat(q.replace(/,/g, '')));
if (nums.every(n => !isNaN(n))) {
const sum = nums.reduce((a, b) => a + b, 0);
m.quantity = sum % 1 === 0 ? String(sum) : sum.toFixed(2);
} else {
m.quantity = m._quantities.join('; ');
}
} else if (m._quantities.length === 1) {
m.quantity = m._quantities[0];
}
// spec: 拼接去重后的 spec每条最多120字
if (m._specs.length > 0) {
const trimmed = m._specs.map(s => s.length > 120 ? s.substring(0, 120) + '...' : s);
m.spec = trimmed.join('; ');
// 总 spec 上限 300 字
if (m.spec.length > 300) m.spec = m.spec.substring(0, 300) + '...';
}
// 清理临时字段
delete m._count; delete m._quantities; delete m._specs;
merged.push(m);
}
cat.items = merged;
totalAfterMerge += merged.length;
}
const mergedCount = totalBeforeMerge - totalAfterMerge;
if (mergedCount > 0) {
console.log(`[BillWorker] 按名称合并: ${totalBeforeMerge}${totalAfterMerge} 项(合并 ${mergedCount} 个重复项)`);
}
const valid = categories.filter(c => c.items && c.items.length > 0);
const totalItems = valid.reduce((s, c) => s + c.items.length, 0);
const withSpec = valid.reduce((s, c) => s + c.items.filter(it => it.spec).length, 0);
const withCode = valid.reduce((s, c) => s + c.items.filter(it => it.code).length, 0);
console.log(`[BillWorker] 最终结果: ${valid.length} 分部, ${totalItems} 清单项 (${withCode} 有编码, ${withSpec} 有spec)`);
// 打印前 3 个 item 供调试
let debugCount = 0;
for (const cat of valid) {
for (const it of cat.items) {
if (debugCount < 3) {
console.log(`[BillWorker] 样例: [${it.code}] ${it.name} | ${it.unit} | qty=${it.quantity} | spec=${(it.spec||'').substring(0, 80)}`);
debugCount++;
}
}
}
return {
project_summary: { remark: `本地解析:${valid.length} 个分部,${totalItems} 个清单项(合并前 ${totalBeforeMerge} 项)` },
categories: valid,
};
}
/**
* 判断清单项是否为"费用项"非施工内容不写入技术标
* 安全文明措施费规费税金暂列金额等
*/
function isFeeItem(name) {
if (!name) return false;
const n = name.replace(/\s+/g, '');
// ── 1. 精确匹配 ──
const EXACT = [
'规费', '税金', '利润', '增值税', '暂列金额', '暂估价', '计日工',
'总承包服务费', '企业管理费', '甲供材料保管费', '价税合计',
];
if (EXACT.includes(n)) return true;
// ── 2. 包含匹配:措施费/规费/保险/行政类 ──
const FEE_KW = [
'安全文明', '文明施工费', '环境保护费', '临时设施费',
'夜间施工增加费', '夜间施工费',
'冬雨季施工增加费', '冬雨季施工费',
'二次搬运费', '大型机械设备进出场', '大型机械进出场',
'施工排水降水', '排水降水费',
'已完工程及设备保护', '已完工程保护费',
'工程排污费', '社会保障费', '住房公积金',
'工伤保险', '劳动保险', '意外伤害保险', '建筑工程保险',
'城市维护建设税', '城市建设维护税',
'教育费附加', '地方教育附加',
'材料暂估', '专业工程暂估',
'超高施工增加费', '安全防护费',
'措施项目费', '其他项目费', '不可竞争费',
];
for (const kw of FEE_KW) {
if (n.includes(kw)) return true;
}
return false;
}
/**
* rawName 中的"项目名称"与内联"项目特征描述"分离
* : "土方开挖 1.土壤类别:普通土" { name: "土方开挖", spec: "1.土壤类别:普通土" }
*/
function splitNameAndSpec(rawName) {
if (!rawName) return { name: '', spec: '' };
// Pattern 1: 数字+点+中文(如 "1.土壤类别" "2、强度等级"
const m = rawName.match(/\d+[.、.)\uFF09]\s*[\u4e00-\u9fff]/);
if (m && m.index > 0) {
return {
name: rawName.substring(0, m.index).trim(),
spec: rawName.substring(m.index).trim()
};
}
// Pattern 2: 特征关键字+冒号(如 "材质:" "规格:"
const SPEC_KW_RE = /(材质|规格|型号|品牌|颜色|尺寸|厚度|直径|管径|强度|等级|类别|类型|做法|要求|标准|内容|工作内容|土壤|含量|配合比|工艺|方式|形式|范围|部位|位置|高度|宽度|长度|深度|坡度|截面|跨度|运距|开挖|回填|混凝土|钢筋|压实)[:]/;
const kw = rawName.match(SPEC_KW_RE);
if (kw && kw.index > 0) {
return {
name: rawName.substring(0, kw.index).trim(),
spec: rawName.substring(kw.index).trim()
};
}
// Pattern 3: 括号开头的特征描述 "1" "(1)"
const paren = rawName.match(/[(]\d+[)]/);
if (paren && paren.index > 0) {
return {
name: rawName.substring(0, paren.index).trim(),
spec: rawName.substring(paren.index).trim()
};
}
return { name: rawName, spec: '' };
}
function isCatTitle(text) {
const KW = [
'土建','建筑','结构','装饰','装修','安装','给排水','暖通','空调','通风',
'电气','强电','弱电','消防','智能化','幕墙','门窗','园林','绿化','景观',
'市政','道路','桥梁','管网','基础','地基','桩基','主体','屋面','防水',
'保温','钢结构','排水','给水','照明','动力','防雷','电梯','人防','室外',
'附属','分部','工程','措施','清单','土石方','混凝土','砌筑','模板','脚手架',
'水利','河道','管道','阀门','设备','仪表','自动化','通信','网络',
'拆除','外墙','内墙','楼地面','天棚','吊顶','栏杆','屋顶','涂料','抹灰',
'廊道','阀门井','蓄水池','泵站','供水','引水','水源','渠道','闸门',
'围栏','警示','检修','管线','配电','水池','水塔','取水','净水',
];
return KW.some(k => text.includes(k));
}
/**
* 判断分部标题是否为"费用类"不应创建分部分类
* 规费税金措施项目费其他项目费 等非施工类分部
*/
function isFeeCatTitle(text) {
if (!text) return false;
const t = text.replace(/\s+/g, '');
// 精确匹配整个标题
const EXACT = [
'规费', '税金', '利润', '增值税', '暂列金额', '暂估价', '计日工',
'总承包服务费', '企业管理费', '价税合计',
'措施项目费', '其他项目费', '不可竞争费',
];
if (EXACT.includes(t)) return true;
// 包含匹配
const FEE_CAT_KW = [
'措施项目费', '其他项目费', '不可竞争费',
'规费汇总', '税金汇总', '费率', '费用汇总', '费用合计',
'暂列金额', '暂估价', '计日工', '总承包服务费',
'安全文明施工费', '社会保障费', '住房公积金',
'工伤保险', '教育费附加', '城市维护建设税',
];
for (const kw of FEE_CAT_KW) {
if (t.includes(kw)) return true;
}
return false;
}

95
build.bat Normal file
View File

@ -0,0 +1,95 @@
@echo off
chcp 65001 >nul 2>&1
setlocal
echo ============================================================
echo BidPartner - Build Desktop EXE
echo ============================================================
echo.
:: ── 1. Check Python ────────────────────────────────────────────────────────
python --version >nul 2>&1
if errorlevel 1 (
echo [ERROR] Python not found. Please install Python 3.9+.
pause & exit /b 1
)
:: ── 2. Install / upgrade PyInstaller ───────────────────────────────────────
echo [Step 1/4] Installing PyInstaller...
pip install --quiet --upgrade pyinstaller
if errorlevel 1 (
echo [ERROR] Failed to install PyInstaller.
pause & exit /b 1
)
:: ── 3. Install project dependencies (if not already installed) ─────────────
echo [Step 2/4] Checking dependencies...
pip install --quiet -r requirements.txt
if errorlevel 1 (
echo [ERROR] Failed to install dependencies.
pause & exit /b 1
)
:: ── 4. Sanitize settings.json - REMOVE API KEYS before build ───────────────
echo [Step 3/4] Sanitizing settings (removing API keys from build)...
if exist "data\settings.json" (
:: Back up real settings
copy /y "data\settings.json" "data\settings.json.bak" >nul
)
:: Write a clean settings file with no real keys
(
echo {
echo "model_provider": "deepseek",
echo "qwen_api_key": "sk-your-qwen-key",
echo "qwen_model": "qwen3.6-plus",
echo "openai_api_key": "sk-your-openai-key",
echo "openai_model": "gpt-4o",
echo "deepseek_api_key": "sk-your-deepseek-key",
echo "deepseek_model": "deepseek-chat",
echo "max_concurrent": 5,
echo "content_volume": "standard"
echo }
) > "data\settings_clean.tmp"
:: ── 5. Build ────────────────────────────────────────────────────────────────
echo [Step 4/4] Building EXE with PyInstaller...
echo (This may take 3-10 minutes on first run)
echo.
:: Clean previous build artifacts
if exist "build" rd /s /q "build" >nul 2>&1
if exist "dist\BidPartner" rd /s /q "dist\BidPartner" >nul 2>&1
pyinstaller bid_partner.spec --noconfirm
set BUILD_RESULT=%errorlevel%
:: ── Restore real settings ───────────────────────────────────────────────────
if exist "data\settings.json.bak" (
copy /y "data\settings.json.bak" "data\settings.json" >nul
del /f /q "data\settings.json.bak" >nul 2>&1
)
del /f /q "data\settings_clean.tmp" >nul 2>&1
if %BUILD_RESULT% neq 0 (
echo.
echo [ERROR] PyInstaller build failed. See output above for details.
pause & exit /b 1
)
:: ── 6. Result ───────────────────────────────────────────────────────────────
echo.
echo ============================================================
echo Build SUCCESSFUL!
echo Output: dist\BidPartner\bid_partner.exe
echo ============================================================
echo.
echo The 'dist\BidPartner' folder is your distributable package.
echo Users only need this folder - no Python installation required.
echo Each user must set their own API key in the app settings.
echo.
:: Open the output folder
explorer "dist\BidPartner" >nul 2>&1
endlocal
pause

76
config.py Normal file
View File

@ -0,0 +1,76 @@
import os
import sys
# When running as a PyInstaller bundle:
# sys._MEIPASS → read-only bundle dir (templates, static, prompts)
# sys.executable dir → writable dir next to the .exe (data, settings, db)
if getattr(sys, 'frozen', False):
_BUNDLE_DIR = sys._MEIPASS # bundled app files
BASE_DIR = os.path.dirname(sys.executable) # writable runtime dir
else:
_BUNDLE_DIR = os.path.dirname(os.path.abspath(__file__))
BASE_DIR = _BUNDLE_DIR
DATA_DIR = os.path.join(BASE_DIR, 'data')
UPLOAD_DIR = os.path.join(DATA_DIR, 'uploads')
EXPORT_DIR = os.path.join(DATA_DIR, 'exports')
KNOWLEDGE_DIR= os.path.join(DATA_DIR, 'knowledge')
DB_PATH = os.path.join(DATA_DIR, 'projects.db')
CHROMA_DIR = os.path.join(DATA_DIR, 'chroma')
PROMPTS_DIR = os.path.join(_BUNDLE_DIR, 'prompts')
# ==================== AI 模型配置 ====================
# 模型选择:'openai' | 'qwen' | 'deepseek' | 'ollama'
MODEL_PROVIDER = os.environ.get('MODEL_PROVIDER', 'qwen')
# OpenAI
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', 'sk-your-openai-key')
OPENAI_MODEL = os.environ.get('OPENAI_MODEL', 'gpt-4.1')
OPENAI_BASE_URL = os.environ.get('OPENAI_BASE_URL', 'https://api.openai.com/v1')
# 阿里云通义千问
QWEN_API_KEY = os.environ.get('QWEN_API_KEY', 'sk-your-qwen-key')
QWEN_MODEL = os.environ.get('QWEN_MODEL', 'qwen3.6-plus')
QWEN_BASE_URL = os.environ.get('QWEN_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1')
# DeepSeek
DEEPSEEK_API_KEY = os.environ.get('DEEPSEEK_API_KEY', 'sk-your-deepseek-key')
DEEPSEEK_MODEL = os.environ.get('DEEPSEEK_MODEL', 'deepseek-chat')
DEEPSEEK_BASE_URL = os.environ.get('DEEPSEEK_BASE_URL', 'https://api.deepseek.com/v1')
# Ollama 本地OpenAI 兼容接口)
OLLAMA_BASE_URL = os.environ.get('OLLAMA_BASE_URL', 'http://localhost:11434/v1')
OLLAMA_MODEL = os.environ.get('OLLAMA_MODEL', 'qwen3:8b')
# 豆包 / 火山引擎字节跳动OpenAI 兼容接口)
DOUBAO_API_KEY = os.environ.get('DOUBAO_API_KEY', 'sk-your-doubao-key')
DOUBAO_MODEL = os.environ.get('DOUBAO_MODEL', 'doubao-1-5-pro-32k')
DOUBAO_BASE_URL = os.environ.get('DOUBAO_BASE_URL', 'https://ark.cn-beijing.volces.com/api/v3')
# Kimi / Moonshot AIOpenAI 兼容接口,支持 Embedding
KIMI_API_KEY = os.environ.get('KIMI_API_KEY', 'sk-your-kimi-key')
KIMI_MODEL = os.environ.get('KIMI_MODEL', 'moonshot-v1-32k')
KIMI_BASE_URL = os.environ.get('KIMI_BASE_URL', 'https://api.moonshot.cn/v1')
# Embedding 模型
OPENAI_EMBEDDING_MODEL = 'text-embedding-3-small'
QWEN_EMBEDDING_MODEL = 'text-embedding-v3'
KIMI_EMBEDDING_MODEL = 'moonshot-v1-embedding'
# ==================== 应用配置 ====================
MAX_FILE_SIZE_MB = 50
ALLOWED_EXTENSIONS = {'pdf', 'doc', 'docx'}
SECRET_KEY = 'bidhuo-partner-secret-2024'
# ==================== 生成配置 ====================
MAX_RETRIES = 3
REQUEST_TIMEOUT = int(os.environ.get('REQUEST_TIMEOUT', '180'))
# 大纲生成单次提示词长、输出大,适当延长读超时(秒),避免接口未返回即被客户端断开
OUTLINE_REQUEST_TIMEOUT = int(os.environ.get('OUTLINE_REQUEST_TIMEOUT', '300'))
CHUNK_SIZE = 2000 # 知识库文本分块大小(字符数)
CHUNK_OVERLAP = 200 # 分块重叠大小
TOP_K_KNOWLEDGE = 3 # 知识库检索数量
MAX_CONCURRENT_SECTIONS = int(os.environ.get('MAX_CONCURRENT_SECTIONS', '5')) # 并发生成章节数
CONTENT_VOLUME = os.environ.get('CONTENT_VOLUME', 'standard') # 篇幅档位: concise / standard / detailed / full
TARGET_PAGES = int(os.environ.get('TARGET_PAGES', '0') or '0') # 目标页数0=不启用)
PAGE_CHAR_ESTIMATE = int(os.environ.get('PAGE_CHAR_ESTIMATE', '700') or '700') # 粗略每页字数估算

View File

@ -0,0 +1,22 @@
{
"_meta": "附件类章节stack_charts_only 为默认,叶节点按 diagram 意图栈只输出 [FIGURE]/[TABLE] 块、无叙述正文full 为长文single_chart_only 为栈顶单块。修改后重启生效。",
"_field_docs": {
"title_regex": "标题任一则正则匹配即视为附件节Python re 语法)",
"table_hint_keywords": "标题含此类子串且双开关均开时倾向表格",
"figure_hint_keywords": "标题含此类子串且双开关均开时倾向图示",
"default_kind_when_ambiguous": "双开且标题无倾向词时的默认figure 或 table",
"attachment_leaf_body_mode": "stack_charts_only意图栈只生成图/表块full与常规章节相同长文single_chart_only仅栈顶一块图或表"
},
"schema_version": 1,
"attachment_leaf_body_mode": "stack_charts_only",
"title_regex": [
"附件\\s*[一二三四五六七八九十0-9A-Za-z、:.]",
"附\\s*图",
"附\\s*表",
"附\\s*件\\s*\\(",
"^\\s*[\\d一二三四五六七八九十\\..、]+\\s*附件"
],
"table_hint_keywords": ["附表", "一览表", "清单表", "表", "统计表", "明细表"],
"figure_hint_keywords": ["附图", "示意图", "平面图", "流程图", "布置图", "组织图", "横道"],
"default_kind_when_ambiguous": "table"
}

View File

@ -0,0 +1,51 @@
{
"_meta": "章节级图/表意图:标题与大纲窗口关键词计分,阈值入栈,按栈序拼接图示/表格生成规范。修改后重启服务生效。",
"_field_docs": {
"threshold_figure": "图示倾向分达到此值才入栈",
"threshold_table": "表格倾向分达到此值才入栈",
"title_weight": "标题命中的权重乘子",
"context_weight": "大纲上下文窗口命中的权重乘子",
"outline_context_lines": "before/after 为相对匹配行上下扩展行数",
"stack_order_when_both": "figure_first | table_first | score_desc两者同时入栈时的顺序栈顶为 index 0",
"figure_keywords": "字符串或 {text,weight} 对象列表",
"table_keywords": "同上"
},
"schema_version": 1,
"threshold_figure": 1.0,
"threshold_table": 1.0,
"title_weight": 1.0,
"context_weight": 0.6,
"outline_context_lines": {"before": 4, "after": 6},
"stack_order_when_both": "score_desc",
"figure_keywords": [
{"text": "组织", "weight": 1.0},
{"text": "架构", "weight": 1.0},
{"text": "流程", "weight": 1.2},
{"text": "工序", "weight": 1.0},
{"text": "进度", "weight": 1.2},
{"text": "横道", "weight": 1.5},
{"text": "网络图", "weight": 1.5},
{"text": "平面", "weight": 1.0},
{"text": "布置", "weight": 0.8},
{"text": "监测", "weight": 0.8},
{"text": "示意", "weight": 0.8},
{"text": "应急", "weight": 0.8}
],
"table_keywords": [
{"text": "一览表", "weight": 1.5},
{"text": "人员", "weight": 1.0},
{"text": "配置", "weight": 0.8},
{"text": "设备", "weight": 1.0},
{"text": "机械", "weight": 0.9},
{"text": "劳动力", "weight": 1.2},
{"text": "工种", "weight": 1.0},
{"text": "检验", "weight": 1.0},
{"text": "验收", "weight": 0.9},
{"text": "材料", "weight": 1.0},
{"text": "供应", "weight": 0.9},
{"text": "风险", "weight": 1.0},
{"text": "措施", "weight": 0.6},
{"text": "清单", "weight": 0.8},
{"text": "计划", "weight": 0.7}
]
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
data/projects.db Normal file

Binary file not shown.

24
data/settings.json Normal file
View File

@ -0,0 +1,24 @@
{
"model_provider": "qwen",
"qwen_api_key": "sk-999173b3ca7f425a97cc4b12a2d3575f",
"qwen_model": "qwen3.6-plus",
"qwen_base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
"openai_api_key": "sk-your-openai-key",
"openai_model": "gpt-4.1",
"openai_base_url": "https://api.openai.com/v1",
"deepseek_api_key": "sk-your-deepseek-key",
"deepseek_model": "deepseek-chat",
"deepseek_base_url": "https://api.deepseek.com/v1",
"ollama_base_url": "http://localhost:11434/v1",
"ollama_model": "qwen3:8b",
"doubao_api_key": "sk-your-doubao-key",
"doubao_model": "doubao-1-5-pro-32k",
"doubao_base_url": "https://ark.cn-beijing.volces.com/api/v3",
"kimi_api_key": "sk-your-kimi-key",
"kimi_model": "moonshot-v1-32k",
"kimi_base_url": "https://api.moonshot.cn/v1",
"max_concurrent": 10,
"content_volume": "full",
"target_pages": 120,
"page_char_estimate": 700
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,30 @@
{
"_meta": "字数分配约束规则:与「标书篇幅预期」四档的 base/core 配合,按技术评分项权重与章节标题相关性分配各叶节点最低字数与提示词中的评分要点提示。修改后重启服务生效;字段说明见同文件 _field_docs。",
"_field_docs": {
"schema_version": "规则文件版本号,解析时可做迁移",
"alpha": "0~1评分驱动强度越大则高分相关章节越接近 core、低相关越接近 base",
"budget_mode": "target_pages启用目标页数且 TARGET_PAGES>0 时,全书叶节点目标总字数为 TARGET_PAGES*PAGE_CHAR_ESTIMATE无技术评分时叶节均分该总预算。未启用页数时无评分则返回 None。anchor_meanN*(base+core)/2anchor_baseN*base",
"per_section_floor": "单节 min_chars 下限(不低于此整数)",
"per_section_cap": "单节 min_chars 上限(不超过 core 时可设为 core 或略高)",
"relevance.method": "keyword_overlap标题与评分项名称/关键词的字面重叠度",
"relevance.min_rating_weight": "忽略权重低于此值的评分项(减少噪声)",
"rating_parse": "预留;解析器内置多形态 rating_json无需在此配置",
"prompt.top_k_rating_items": "写入本节字数说明中的相关评分项名称条数上限",
"max_tokens_scale": "若为 true按 min_chars/base 比例缩放本段 max_tokens仍受模型上限约束"
},
"schema_version": 1,
"alpha": 0.85,
"budget_mode": "target_pages",
"per_section_floor": null,
"per_section_cap": null,
"relevance": {
"method": "keyword_overlap",
"min_rating_weight": 0.01
},
"rating_parse": {},
"prompt": {
"top_k_rating_items": 4,
"intro_line": "本节须对下列技术评分要点作实质展开(结合工艺、流程、标准与可验证措施,禁止空泛承诺与复述招标文件):"
},
"max_tokens_scale": false
}

172
launcher.py Normal file
View File

@ -0,0 +1,172 @@
"""
标伙伴 · AI标书助手 桌面启动器
运行此文件 (或打包后的 bid_partner.exe) 即可自动启动本地服务并打开浏览器
"""
import os
import sys
import socket
import threading
import time
import webbrowser
import urllib.request
import logging
# ── 找可用端口 ──────────────────────────────────────────────────────────────
def _find_free_port(start: int = 5000, attempts: int = 20) -> int:
for port in range(start, start + attempts):
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
try:
s.bind(('127.0.0.1', port))
return port
except OSError:
continue
return start # 最坏情况:直接用 5000让 Flask 报错
PORT = _find_free_port()
# ── 日志 ────────────────────────────────────────────────────────────────────
def _setup_logging():
if getattr(sys, 'frozen', False):
log_dir = os.path.dirname(sys.executable)
else:
log_dir = os.path.dirname(os.path.abspath(__file__))
log_path = os.path.join(log_dir, 'bid_partner.log')
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(name)s: %(message)s',
handlers=[logging.FileHandler(log_path, encoding='utf-8', mode='a')],
)
# ── 启动 Flask 服务 ─────────────────────────────────────────────────────────
def _start_server():
try:
import app as flask_app
flask_app.init_db()
flask_app.app.run(
host='127.0.0.1',
port=PORT,
debug=False,
threaded=True,
use_reloader=False,
)
except Exception as e:
logging.getLogger('launcher').error(f'服务启动失败: {e}', exc_info=True)
# ── 等待服务就绪 ─────────────────────────────────────────────────────────────
def _wait_for_server(timeout: int = 60) -> bool:
url = f'http://127.0.0.1:{PORT}'
deadline = time.time() + timeout
while time.time() < deadline:
try:
urllib.request.urlopen(url, timeout=1)
return True
except Exception:
time.sleep(0.4)
return False
# ── 主界面 (tkinter) ─────────────────────────────────────────────────────────
def _run_gui():
import tkinter as tk
from tkinter import ttk, font as tkfont
URL = f'http://127.0.0.1:{PORT}'
root = tk.Tk()
root.title('标伙伴 · AI标书助手')
root.geometry('400x220')
root.resizable(False, False)
root.configure(bg='#f5f5f5')
# ── 标题 ──
title_font = tkfont.Font(family='微软雅黑', size=14, weight='bold')
tk.Label(root, text='标伙伴 · AI 标书助手', font=title_font,
bg='#f5f5f5', fg='#1a1a2e').pack(pady=(22, 4))
# ── 状态行 ──
status_var = tk.StringVar(value='正在启动服务,请稍候…')
status_lbl = tk.Label(root, textvariable=status_var,
font=('微软雅黑', 10), bg='#f5f5f5', fg='#555')
status_lbl.pack(pady=4)
# ── URL 链接 ──
url_lbl = tk.Label(root, text='', font=('Consolas', 10),
bg='#f5f5f5', fg='#1a73e8', cursor='hand2')
url_lbl.pack(pady=2)
url_lbl.bind('<Button-1>', lambda _: webbrowser.open(URL))
# ── 按钮区 ──
btn_frame = tk.Frame(root, bg='#f5f5f5')
btn_frame.pack(pady=18)
open_btn = ttk.Button(btn_frame, text='打开浏览器',
command=lambda: webbrowser.open(URL),
state='disabled', width=14)
open_btn.pack(side='left', padx=8)
quit_btn = ttk.Button(btn_frame, text='退出程序',
command=root.destroy, width=10)
quit_btn.pack(side='left', padx=8)
# ── 版本信息 ──
tk.Label(root, text='单机版 · 本地运行 · 数据不上传',
font=('微软雅黑', 8), bg='#f5f5f5', fg='#aaa').pack(pady=(0, 10))
# ── 后台轮询,服务就绪后更新 UI ──
def _on_ready():
status_var.set('服务已就绪 ✓')
status_lbl.config(fg='#2e7d32')
url_lbl.config(text=URL)
open_btn.config(state='normal')
webbrowser.open(URL)
def _on_timeout():
status_var.set('启动超时,请查看 bid_partner.log')
status_lbl.config(fg='#c62828')
def _check():
if _wait_for_server():
root.after(0, _on_ready)
else:
root.after(0, _on_timeout)
threading.Thread(target=_check, daemon=True).start()
root.mainloop()
# ── 无图形模式(仅控制台) ────────────────────────────────────────────────────
def _run_headless():
print(f'[标伙伴] Starting server on port {PORT} ...')
if _wait_for_server():
print(f'[标伙伴] Ready → http://127.0.0.1:{PORT}')
webbrowser.open(f'http://127.0.0.1:{PORT}')
# 阻塞,直到用户 Ctrl+C
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
print('[标伙伴] Shutting down.')
else:
print('[标伙伴] Server did not start within 60 s. Check bid_partner.log.')
# ── 入口 ─────────────────────────────────────────────────────────────────────
def main():
_setup_logging()
server_thread = threading.Thread(target=_start_server, daemon=True)
server_thread.start()
try:
_run_gui()
except Exception:
_run_headless()
if __name__ == '__main__':
main()

1
modules/__init__.py Normal file
View File

@ -0,0 +1 @@

98
modules/checker.py Normal file
View File

@ -0,0 +1,98 @@
"""
合规检查模块检查生成的标书是否响应了招标关键要求
"""
import json
import logging
import re
import sqlite3
from utils import ai_client
logger = logging.getLogger(__name__)
CHECK_PROMPT = """你是一位专业的投标文件技术审核专家。请对照以下【技术评分要求】,检查【标书技术内容】的覆盖情况,输出技术合规检查报告。
重要限制必须遵守
本次检查范围仅限技术内容包括技术方案实施能力技术指标质量保障人员配置技术创新等
严禁将商务评分价格评分资质评分报价合同条款付款方式等商务内容纳入检查项
若技术评分要求中混有商务条款直接忽略不得作为检查项输出
技术评分要求
{requirements}
标书技术内容各章节摘要
{content}
请输出以下格式的 JSON每个 item 均为技术评分项不含任何商务内容
{{
"overall_score": 85,
"status": "良好",
"items": [
{{
"requirement": "技术评分要求描述",
"covered": true,
"note": "说明"
}}
],
"missing_points": ["未覆盖的技术要点1", "未覆盖的技术要点2"],
"suggestions": ["技术内容改进建议1", "技术内容改进建议2"]
}}
"""
def check_compliance(db_path: str, project_id: int) -> dict:
"""
执行合规检查返回检查结果字典
"""
conn = sqlite3.connect(db_path)
try:
# 获取招标要求
cur = conn.cursor()
cur.execute(
"SELECT summary, rating_requirements FROM tender_data WHERE project_id=?",
(project_id,)
)
td = cur.fetchone()
if not td:
return {'error': '尚未解析招标文件'}
# 只使用技术评分要求作为检查基准,排除 summary 中可能包含的商务内容
requirements = (td[1] or '').strip()
if not requirements:
return {'error': '尚未提取技术评分要求,请先完成步骤一的招标文件解析'}
# 收集已生成的章节内容(取前 500 字)
cur.execute(
"SELECT section_title, content FROM bid_sections WHERE project_id=? AND status='done' ORDER BY order_index",
(project_id,)
)
rows = cur.fetchall()
if not rows:
return {'error': '尚未生成标书内容,请先生成'}
content_parts = []
for title, content in rows:
snippet = (content or '')[:500].replace('\n', ' ')
content_parts.append(f"{title}{snippet}")
content_str = '\n'.join(content_parts)
# 调用 AI 检查
prompt = CHECK_PROMPT.format(requirements=requirements[:3000], content=content_str[:6000])
raw = ai_client.chat(prompt, temperature=0.2, max_tokens=2048)
# 解析 JSON
raw = re.sub(r'```(?:json)?\s*', '', raw).replace('```', '').strip()
m = re.search(r'\{[\s\S]*\}', raw)
if m:
raw = m.group(0)
result = json.loads(raw)
return result
except json.JSONDecodeError as e:
logger.error(f'合规检查结果解析失败: {e}')
return {'error': f'AI 返回格式异常: {e}', 'raw': raw}
except Exception as e:
logger.exception('合规检查失败')
return {'error': str(e)}
finally:
conn.close()

View File

@ -0,0 +1,635 @@
"""
技术暗标 HTML 格式检查 清标工具.js 迁移不依赖浏览器/jsdom
仅解析内联 style 与文档内 <style> 中的 @page 简单规则无内联样式时部分项可能判为不符合
"""
from __future__ import annotations
import re
from typing import Any
from bs4 import BeautifulSoup, Tag
# 1pt ≈ 96/72 px (CSS 标准)
_PT_PX = 96.0 / 72.0
# 三号 16pt / 四号 14pt / 五号 10.5pt / 行距 26pt
_TARGET_H = 16 * _PT_PX # 21.333...
_TARGET_BODY = 14 * _PT_PX
_TARGET_LH = 26 * _PT_PX
_TARGET_FIG = 10.5 * _PT_PX
def _parse_style_attr(style: str | None) -> dict[str, str]:
if not style or not style.strip():
return {}
out: dict[str, str] = {}
for part in style.split(";"):
part = part.strip()
if ":" not in part:
continue
k, v = part.split(":", 1)
k, v = k.strip().lower(), v.strip()
if k:
out[k] = v
return out
def _num(s: str) -> float:
try:
return float(re.sub(r"[^\d.\-]", "", s) or "nan")
except ValueError:
return float("nan")
def _length_to_px(val: str, font_size_px: float | None = None) -> float:
"""将 font-size / line-height 等长度转为近似 px 浮点,用于与 JS 中 getComputedStyle(px) 对齐。"""
val = (val or "").strip().lower()
if not val or val in ("normal", "inherit", "initial"):
return float("nan")
if val.isdigit():
return float(val)
m = re.match(r"^([\d.]+)\s*(pt|px|em|rem)?\s*$", val)
if not m:
m2 = re.match(r"^([\d.]+)", val)
return float(m2.group(1)) if m2 else float("nan")
n, unit = float(m.group(1)), (m.group(2) or "px")
if unit == "pt":
return n * _PT_PX
if unit == "px":
return n
if unit in ("em", "rem") and font_size_px and font_size_px == font_size_px:
return n * font_size_px
if unit in ("em", "rem"):
return n # 无字号时仅返回 em 数,供 text-indent 等判断
return n
def _indent_value(style: dict[str, str], font_size_px: float) -> float:
"""与 JS 中 parseFloat(textIndent) 对齐:'2em' -> 2.0'2ch' 等取首数字段。"""
raw = (style.get("text-indent") or "").strip()
if not raw:
return float("nan")
if "em" in raw.lower():
m = re.search(r"([\d.]+)\s*em", raw, re.I)
return float(m.group(1)) if m else _num(raw)
# px 转 em 近似
px = _length_to_px(raw, font_size_px)
if px == px and font_size_px > 0:
return px / font_size_px
return _num(raw)
def _color_normalized(style: dict[str, str]) -> str:
c = (style.get("color") or "").strip().lower()
if not c:
return ""
c = c.replace(" ", "")
if c in ("#000", "#000000", "black", "rgb(0,0,0)"):
return "rgb(0, 0, 0)"
m = re.match(r"rgb\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)", c)
if m:
r, g, b = int(m.group(1)), int(m.group(2)), int(m.group(3))
if r == 0 and g == 0 and b == 0:
return "rgb(0, 0, 0)"
return c
return c
def _el_style_dict(tag: Tag) -> dict[str, str]:
s = tag.get("style")
if isinstance(s, str):
return _parse_style_attr(s)
if isinstance(s, list):
return _parse_style_attr(";".join(s))
return {}
def _get_inline_property(tag: Tag, prop: str) -> str:
d = _el_style_dict(tag)
return d.get(prop.lower(), "")
def _outer_html_sample(tag: Tag, limit: int = 200) -> str:
s = str(tag)
return s[:limit] if len(s) > limit else s
def _is_under(node: Tag | None, ancestor: Tag | None) -> bool:
if node is None or ancestor is None:
return False
p: Tag | None = node
while p is not None:
if p is ancestor:
return True
p = p.parent
return False
def _body_text(soup: BeautifulSoup) -> str:
body = soup.body
if not body:
return soup.get_text("\n", strip=True)
return body.get_text("\n", strip=True)
def _parse_page_margins_from_html(raw_html: str) -> dict[str, str] | None:
"""从 <style> 中粗提取 @page 块内 margin 与 size。"""
for m in re.finditer(
r"@page\s*\{([^}]+)\}",
raw_html,
re.I | re.DOTALL,
):
block = m.group(1)
msh = re.search(r"margin\s*:\s*([^;]+);", block, re.I)
if msh:
return {"shorthand": msh.group(1).strip()}
margins: dict[str, str] = {}
for name, key in (
(r"margin-top\s*:\s*([^;]+)", "top"),
(r"margin-bottom\s*:\s*([^;]+)", "bottom"),
(r"margin-left\s*:\s*([^;]+)", "left"),
(r"margin-right\s*:\s*([^;]+)", "right"),
(r"size\s*:\s*([^;]+)", "size"),
):
mm = re.search(name, block, re.I)
if mm:
margins[key] = mm.group(1).strip()
if margins:
return margins
return None
def check_technical_bid(html_content: str) -> dict[str, Any]:
"""
对技术暗标 HTML 执行格式检查
返回结构与清标数据.json 一致overall, details, violations
"""
results: dict[str, Any] = {
"overall": True,
"details": [],
"violations": [],
}
def add_result(
rule_name: str,
passed: bool,
message: str,
elements: list[Tag] | None = None,
) -> None:
results["details"].append(
{"rule": rule_name, "passed": passed, "message": message}
)
if not passed:
results["overall"] = False
el_snips: list[str] = []
for el in elements or []:
if isinstance(el, Tag):
el_snips.append(_outer_html_sample(el))
results["violations"].append(
{"rule": rule_name, "message": message, "elements": el_snips}
)
if not (html_content or "").strip():
add_result("身份信息隐藏", False, "HTML 内容为空", [])
return results
raw_html = html_content
soup = BeautifulSoup(html_content, "lxml")
if not soup.body:
soup = BeautifulSoup(f"<html><body>{html_content}</body></html>", "lxml")
body = soup.body
if not body:
add_result("身份信息隐藏", False, "无法解析 body", [])
return results
# ---- 1. 身份 ----
body_text = _body_text(soup)
company_pattern = re.compile(
r"(?:我公司|本公司|[(]?[A-Za-z\u4e00-\u9fa5]+(?:集团|股份|有限|责任|公司)[)]?)"
)
addr_pattern = re.compile(
r"(?:省|市|区|县|镇|路|街|大道|号|大厦|楼|层)[\u4e00-\u9fa50-9]+"
)
name_pattern = re.compile(
r"(?:总监理工程师|专业监理工程师|技术负责人|项目经理)[:]\s*"
r"[^甲乙丙丁戊己庚辛壬癸\s]{2,4}(?=[,。;\s]|$)"
)
found_company = bool(company_pattern.search(body_text))
found_addr = bool(addr_pattern.search(body_text))
found_name = bool(name_pattern.search(body_text))
has_logo = False
for img in soup.find_all("img"):
if not isinstance(img, Tag):
continue
alt = (img.get("alt") or "") + ""
src = (img.get("src") or "") + ""
if re.search(r"logo|商标|微标|公司|品牌", alt, re.I) or re.search(
r"logo", src, re.I
):
has_logo = True
break
passed_id = not (
found_company or found_addr or found_name or has_logo
)
add_result(
"身份信息隐藏",
passed_id,
"未发现投标人身份信息"
if passed_id
else "发现投标人身份信息(公司名/地址/真实姓名/商标)",
)
def heading_style_ok(tag: Tag) -> bool:
st = _el_style_dict(tag)
fs_raw = st.get("font-size", "")
fs_px = _length_to_px(fs_raw)
if "em" in (fs_raw or "").lower() and "rem" not in (fs_raw or "").lower():
fs_px = _num(fs_raw) * 16.0
size_ok = abs(fs_px - _TARGET_H) <= 3
fam = (st.get("font-family") or "").lower()
font_ok = "黑体" in fam or "simhei" in fam or "microsoft yahei" in fam
font_style = (st.get("font-style") or "").lower()
style_ok = font_style != "italic"
text_dec = (st.get("text-decoration") or "").lower()
decor_ok = "underline" not in text_dec
cr = (st.get("color") or "").strip().lower()
if not cr or cr in ("inherit", "initial"):
color_ok = True
else:
cn = _color_normalized(st)
color_ok = cn == "rgb(0, 0, 0)" or cr in (
"#000",
"#000000",
"black",
"rgb(0,0,0)",
)
fw = (st.get("font-weight") or "400").lower()
weight_ok = fw not in ("400", "normal")
if not st.get("font-size"):
size_ok = False
return (
size_ok
and font_ok
and style_ok
and decor_ok
and color_ok
and weight_ok
)
# ---- 2. 标题 ----
heading_tags: list[Tag] = []
for sel in ("h1", "h2", "h3", "h4", "h5", "h6"):
heading_tags.extend(soup.find_all(sel))
for t in soup.find_all(attrs={"role": "heading"}):
if isinstance(t, Tag):
heading_tags.append(t)
for t in soup.select(".heading, .title"):
if isinstance(t, Tag) and t not in heading_tags:
heading_tags.append(t)
invalid_h: list[Tag] = []
for h in heading_tags:
if not isinstance(h, Tag):
continue
if not heading_style_ok(h):
invalid_h.append(h)
h_ok = len(invalid_h) == 0
add_result(
"标题格式",
h_ok,
"所有标题符合三号黑体要求"
if h_ok
else "部分标题字号/字体/颜色/下划线不符合要求",
invalid_h,
)
def body_el_ok(el: Tag) -> bool:
st = _el_style_dict(el)
if el.name in ("h1", "h2", "h3", "h4", "h5", "h6"):
return True
cls = " ".join(el.get("class", [])) if el.get("class") else ""
if any(
x in cls
for x in ("header", "footer", "toc", "目录", "table-of-contents")
):
return True
text = el.get_text(strip=True)
if not text:
return True
fs_raw = st.get("font-size", "")
font_px = _length_to_px(fs_raw)
if not fs_raw:
return False
size_ok = abs(font_px - _TARGET_BODY) <= 2
fam = (st.get("font-family") or "").lower()
font_ok = "宋体" in fam or "simsun" in fam or "serif" in fam
col = st.get("color", "")
color_ok = (not col) or _color_normalized(st) == "rgb(0, 0, 0)" or col.lower() in (
"#000",
"#000000",
"black",
"rgb(0,0,0)",
)
ind = _indent_value(st, font_px)
indent_ok = ind == ind and 1.8 <= ind <= 2.2
lh_raw = (st.get("line-height") or "").strip()
if not lh_raw:
line_ok = False
else:
if "pt" in lh_raw or "px" in lh_raw:
lh_px = _length_to_px(lh_raw, font_px)
elif re.match(r"^[\d.]+$", lh_raw):
lh_px = float(lh_raw) * font_px
else:
lh_px = _length_to_px(lh_raw, font_px)
line_ok = abs(lh_px - _TARGET_LH) <= 2
tdec = (st.get("text-decoration") or "").lower()
decor_ok = "underline" not in tdec
fw = (st.get("font-weight") or "400").lower()
weight_ok = fw in ("400", "normal", "")
fst = (st.get("font-style") or "").lower()
style_ok = fst != "italic"
return (
size_ok
and font_ok
and color_ok
and indent_ok
and line_ok
and decor_ok
and weight_ok
and style_ok
)
exclude_set = {
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
}
invalid_body: list[Tag] = []
for el in soup.find_all(["p", "div", "span", "li", "td", "th"]):
if not isinstance(el, Tag):
continue
if el.name in exclude_set:
continue
if "header" in " ".join(el.get("class", [])):
continue
if "footer" in " ".join(el.get("class", [])):
continue
if "toc" in " ".join(el.get("class", [])) or "目录" in " ".join(
el.get("class", [])
):
continue
if not el.get_text(strip=True):
continue
if not body_el_ok(el):
invalid_body.append(el)
b_ok = len(invalid_body) == 0
add_result(
"正文格式",
b_ok,
"所有正文符合四号宋体/缩进/行距/颜色要求"
if b_ok
else "部分正文段落格式不符合要求",
invalid_body,
)
# ---- 4. 目录 ----
toc_els: list[Tag] = []
for cls in ("toc", "table-of-contents", "目录"):
for t in soup.find_all(class_=cls):
if isinstance(t, Tag) and t not in toc_els:
toc_els.append(t)
for t in soup.find_all(attrs={"role": "directory"}):
if isinstance(t, Tag) and t not in toc_els:
toc_els.append(t)
if not toc_els:
add_result("目录要求", False, "未检测到目录,请确保包含目录且目录无页码无页眉页脚")
else:
no_pn = True
no_hf = True
for toc in toc_els:
text = toc.get_text("\n", strip=True)
lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
for line in lines:
if re.search(r"\d+\s*$", line) and re.search(r"\d$", line):
if re.search(r"\.{2,}\s*\d+", line) or re.match(
r"^.*\d$", line
):
if re.search(r"\.{2,}\s*\d+", line):
no_pn = False
if re.search(r"\.{2,}\s*\d+", line):
no_pn = False
if toc.find(class_=re.compile("header|page-header", re.I)):
no_hf = False
if toc.find(class_=re.compile("footer|page-footer", re.I)):
no_hf = False
t_ok = no_pn and no_hf
add_result(
"目录要求",
t_ok,
"目录符合无页码、无页眉页脚要求"
if t_ok
else "目录中存在页码或页眉页脚",
)
# ---- 5. 图表 / 附件(合法选择器)----
appendix: Tag | None = None
for sel in (
"#appendix",
".appendix",
".attachment",
'[id*="附件"]',
'[class*="附件"]',
'[class*="附表"]',
):
hit = soup.select_one(sel)
if hit and isinstance(hit, Tag):
appendix = hit
break
illegal: list[Tag] = []
for tbl in soup.find_all("table"):
if isinstance(tbl, Tag) and not _is_under(tbl, appendix):
illegal.append(tbl)
for im in soup.find_all("img"):
if isinstance(im, Tag) and not _is_under(im, appendix):
illegal.append(im)
for el in soup.find_all("figure"):
if isinstance(el, Tag) and not _is_under(el, appendix):
illegal.append(el)
for el in soup.find_all(class_="chart"):
if isinstance(el, Tag) and not _is_under(el, appendix) and el not in illegal:
illegal.append(el)
chart_text_valid = True
if appendix:
for el in appendix.select("table, td, th, figcaption, .chart-text"):
if not isinstance(el, Tag):
continue
st = _el_style_dict(el)
if not st.get("font-size"):
continue
fs = _length_to_px(st.get("font-size", ""))
size_ok = abs(fs - _TARGET_FIG) <= 1.5
fam = (st.get("font-family") or "").lower()
font_ok = "宋体" in fam or "simsun" in fam
c_raw = (st.get("color") or "").strip()
if c_raw and c_raw.lower() not in ("inherit", "initial"):
c_ok = _color_normalized(st) == "rgb(0, 0, 0)" or c_raw.lower() in (
"#000",
"#000000",
"black",
"rgb(0,0,0)",
)
else:
c_ok = True
if not (size_ok and font_ok and c_ok):
chart_text_valid = False
c_ok2 = len(illegal) == 0 and chart_text_valid
add_result(
"图表规范",
c_ok2,
"图表仅出现在附件/附表内,且图表文字符合五号宋体"
if c_ok2
else f"正文中发现{len(illegal)}个图表或附件内图表文字格式错误",
illegal,
)
# ---- 6. 颜色与装饰 ----
color_v: list[Tag] = []
decor_v: list[Tag] = []
for el in soup.find_all(True):
if not isinstance(el, Tag):
continue
st = _el_style_dict(el)
if not st.get("color") and not st.get("text-decoration") and not st.get(
"border-bottom-style"
):
continue
col = (st.get("color") or "").strip().lower()
if col and col not in (
"inherit",
"initial",
"",
"#000",
"#000000",
"black",
"rgb(0,0,0)",
"rgb(0, 0, 0)",
):
if _color_normalized(st) and _color_normalized(st) != "rgb(0, 0, 0)":
if el.get_text(strip=True):
color_v.append(el)
tdec = (st.get("text-decoration") or "").lower()
if "underline" in tdec and el.get_text(strip=True):
decor_v.append(el)
bbs = (st.get("border-bottom-style") or "").lower()
if bbs in ("solid", "dotted") and el.get_text(strip=True):
decor_v.append(el)
col_ok = len(color_v) == 0 and len(decor_v) == 0
add_result(
"颜色与装饰",
col_ok,
"无彩色文字、无下划线、无着重号"
if col_ok
else f"发现{len(color_v)}处彩色文字,{len(decor_v)}处下划线/着重号",
(color_v + decor_v)[:20],
)
# ---- 7. 页面 ----
page_valid = True
margin_top = margin_bottom = margin_left = margin_right = None
page_info = _parse_page_margins_from_html(raw_html)
# Word 常把 @page 写在 style 里,已在 raw_html 中解析
if page_info and "shorthand" in page_info:
# margin: 2.54cm 3.18cm
parts = page_info["shorthand"].split()
if len(parts) >= 4:
margin_top, margin_right, margin_bottom, margin_left = (
parts[0],
parts[1],
parts[2],
parts[3],
)
elif len(parts) == 2:
margin_top = margin_bottom = parts[0]
margin_left = margin_right = parts[1]
elif page_info:
margin_top = page_info.get("top")
margin_bottom = page_info.get("bottom")
margin_left = page_info.get("left")
margin_right = page_info.get("right")
bst = _el_style_dict(body) if body else {}
mraw = bst.get("margin", "")
if mraw and not margin_top:
margins = mraw.split()
if len(margins) >= 1:
margin_top = margins[0]
if len(margins) >= 2:
margin_right = margins[1]
if len(margins) >= 3:
margin_bottom = margins[2]
if len(margins) >= 4:
margin_left = margins[3]
else:
margin_left = margin_right
if not margin_top and body:
margin_top = _get_inline_property(body, "margin-top")
margin_bottom = _get_inline_property(body, "margin-bottom")
margin_left = _get_inline_property(body, "margin-left")
margin_right = _get_inline_property(body, "margin-right")
if not any([margin_top, margin_bottom, margin_left, margin_right]) and not page_info:
page_valid = False
def m_ok(
m: str | None,
target: float,
) -> bool:
if not m:
return False
s = m.strip()
if "cm" in s:
return abs(_num(s) - target) < 0.01
return abs(_num(s) - target) < 0.01
top_ok = m_ok(margin_top, 2.54) or (
(margin_top or "") in ("2.54cm", "1in")
)
bottom_ok = m_ok(margin_bottom, 2.54) or (
(margin_bottom or "") in ("2.54cm", "1in")
)
left_ok = m_ok(margin_left, 3.18) or (margin_left or "").startswith("3.18")
right_ok = m_ok(margin_right, 3.18) or (margin_right or "").startswith("3.18")
html_tag = soup.find("html")
w = _get_inline_property(html_tag, "width") if isinstance(html_tag, Tag) else "" # type: ignore[arg-type]
page_orientation = "横向" if w and w != "auto" and "%" not in w else "纵向"
page_ok = bool(
top_ok
and bottom_ok
and left_ok
and right_ok
and (page_orientation != "横向" or w in ("", "auto"))
)
if not margin_top:
page_ok = False
add_result(
"页面设置",
page_ok,
"页面设置符合A4纵向/边距要求"
if page_ok
else "页面边距或纸张方向不符合要求",
)
return results

437
modules/exporter.py Normal file
View File

@ -0,0 +1,437 @@
"""
Word 文档导出模块
"""
import os
import re
import sqlite3
import logging
from datetime import datetime
from docx import Document
from docx.shared import Pt, Cm, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
import config
from utils.outline_numbering import format_heading_display
logger = logging.getLogger(__name__)
LEVEL_STYLES = {
1: ('Heading 1', 16, True),
2: ('Heading 2', 14, True),
3: ('Heading 3', 13, False),
4: ('Heading 4', 12, False),
}
def export_to_word(db_path: str, project_id: int) -> str:
"""
生成 Word 文档并保存到 data/exports/返回文件名
"""
conn = sqlite3.connect(db_path)
try:
# 获取项目信息
cur = conn.cursor()
cur.execute("SELECT name FROM projects WHERE id=?", (project_id,))
project = cur.fetchone()
if not project:
raise ValueError(f'项目 {project_id} 不存在')
project_name = project[0]
# 获取标书大纲文本(用于标题页)
cur.execute("SELECT outline FROM tender_data WHERE project_id=?", (project_id,))
td = cur.fetchone()
bid_title = project_name + '技术标书'
if td and td[0]:
first_line = td[0].strip().split('\n')[0].strip()
if first_line:
bid_title = first_line
# 获取所有章节(按顺序)
cur.execute('''
SELECT section_number, section_title, level, is_leaf, content, intro_content
FROM bid_sections
WHERE project_id=?
ORDER BY order_index
''', (project_id,))
sections = cur.fetchall()
doc = _build_document(bid_title, sections)
# 保存文件
os.makedirs(config.EXPORT_DIR, exist_ok=True)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
safe_name = ''.join(c for c in project_name if c.isalnum() or c in '._- \u4e00-\u9fff')
filename = f'{safe_name}_{timestamp}.docx'
filepath = os.path.join(config.EXPORT_DIR, filename)
doc.save(filepath)
logger.info(f'导出完成: {filepath}')
return filename
finally:
conn.close()
DISCLAIMER_TEXT = """\
免责声明
本工具仅供学习交流免费使用所生成的技术方案不可直接用于投标请务必人工核对本工具不会通过任何平台进行销售请用户注意辨别真伪在您开始使用本AI标书制作服务之前请认真阅读并同意以下关键条款一旦您继续使用即表示您已充分理解并认可本提示的全部内容
服务定位
本工具为单机使用的AI标书辅助工具旨在帮助您生成标书的参考素材您需对最终自己编写的标书文件承担全部责任包括审核修改内容确保其符合相关法律法规及项目要求
准确性免责
本人不对AI生成内容的完全准确性与完整性作任何保证您有义务自行核实所有关键信息并自行承担因使用本工具所引发的一切后果
标书风险
本工具所生成的素材文件仅作参考若您使用包括引用修改或二次创作需自行承担由此可能导致的废标侵权等全部风险与责任本人不承担任何相关责任
责任限制
任何情形下本人均不对因使用本服务而造成的任何直接间接或衍生损失例如利润损失业务中断数据丢失等承担法律责任
其他事项
本人保留随时修改或终止本服务的权利本提示的解释及争议解决均适用中华人民共和国法律\
"""
def _add_disclaimer_page(doc: Document) -> None:
"""在文档开头插入免责声明页"""
# 标题
title_para = doc.add_paragraph()
title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
title_run = title_para.add_run('免责声明')
title_run.font.size = Pt(16)
title_run.font.bold = True
title_run.font.name = '黑体'
title_run._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体')
doc.add_paragraph()
# 正文各段(跳过第一行标题,已单独渲染)
body_lines = DISCLAIMER_TEXT.split('\n')[2:] # 跳过"免责声明"和空行
for line in body_lines:
p = doc.add_paragraph()
stripped = line.strip()
# 小标题行(非空且后面没有缩进,即段落标题)
is_section_title = bool(stripped) and not line.startswith(' ') and not line.startswith('\u3000')
run = p.add_run(stripped if stripped else '')
if is_section_title and stripped:
run.font.bold = True
run.font.size = Pt(11)
else:
run.font.size = Pt(10.5)
run.font.name = 'Times New Roman'
run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
p.paragraph_format.space_after = Pt(4)
_set_line_spacing_15(p)
doc.add_page_break()
def _add_toc_tree_page(doc: Document, sections: list) -> None:
"""标题页之后插入树状目录(按 level 缩进;静态文本,不含 Word 目录域)。"""
toc_heading = doc.add_paragraph()
toc_heading.alignment = WD_ALIGN_PARAGRAPH.CENTER
tr = toc_heading.add_run('目录')
tr.font.size = Pt(16)
tr.font.bold = True
tr.font.name = '黑体'
tr._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体')
doc.add_paragraph()
for row in sections:
section_number, title, level, _, _, _ = row
level = min(int(level), 4)
text = format_heading_display(level, str(section_number or ''), str(title or ''))
p = doc.add_paragraph()
p.paragraph_format.left_indent = Cm(0.75 * max(0, level - 1))
p.paragraph_format.space_after = Pt(3)
run = p.add_run(text)
run.font.size = Pt(12)
run.font.name = '宋体'
run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
doc.add_page_break()
def _build_document(bid_title: str, sections) -> Document:
doc = Document()
# ── 页面设置 ─────────────────────────────────────────────────────────
section_obj = doc.sections[0]
section_obj.page_width = Cm(21)
section_obj.page_height = Cm(29.7)
section_obj.left_margin = Cm(3)
section_obj.right_margin = Cm(2.5)
section_obj.top_margin = Cm(2.5)
section_obj.bottom_margin = Cm(2.5)
# ── 免责声明页(第一页)─────────────────────────────────────────────
_add_disclaimer_page(doc)
# ── 标题页 ──────────────────────────────────────────────────────────
title_para = doc.add_paragraph()
title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
title_run = title_para.add_run(bid_title)
title_run.font.size = Pt(22)
title_run.font.bold = True
title_run.font.color.rgb = RGBColor(0x1a, 0x56, 0xdb)
title_run.font.name = '黑体'
title_run._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体')
doc.add_paragraph()
date_para = doc.add_paragraph()
date_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
date_run = date_para.add_run(datetime.now().strftime('%Y年%m月'))
date_run.font.size = Pt(14)
date_run.font.name = '宋体'
date_run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
doc.add_page_break()
# ── 树状目录页(标题页后、正文前)──────────────────────────────────
_add_toc_tree_page(doc, sections)
# ── 章节内容 ─────────────────────────────────────────────────────────
for row in sections:
section_number, title, level, is_leaf, content, intro = row
level = min(int(level), 4)
# 添加标题(带完整目录号)
heading_text = format_heading_display(level, str(section_number or ''), str(title or ''))
heading = doc.add_heading(level=level)
heading.clear()
run = heading.add_run(heading_text)
_, font_size, bold = LEVEL_STYLES.get(level, ('Heading 4', 12, False))
run.font.size = Pt(font_size)
run.font.bold = bold
run.font.name = '黑体' if level <= 2 else '宋体'
run._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体' if level <= 2 else '宋体')
# 章节引言(非叶节点)
if intro and intro.strip():
_add_body_paragraphs(doc, intro)
# 正文内容(叶节点)
if content and content.strip():
_add_body_paragraphs(doc, content)
return doc
def _set_line_spacing_15(paragraph):
"""将段落设为 1.5 倍行距Word 中的 WD_LINE_SPACING.MULTIPLE × 1.5"""
from docx.oxml.ns import qn as _qn
pPr = paragraph._element.get_or_add_pPr()
spacing = pPr.find(_qn('w:spacing'))
if spacing is None:
spacing = OxmlElement('w:spacing')
pPr.append(spacing)
spacing.set(_qn('w:line'), '360') # 240 × 1.5 = 360 twips
spacing.set(_qn('w:lineRule'), 'auto')
# ── 图/表标记解析 ─────────────────────────────────────────────────────────
_BLOCK_PATTERN = re.compile(
r'\[FIGURE:([^\]]+)\](.*?)\[/FIGURE\]'
r'|\[TABLE:([^\]]+)\](.*?)\[/TABLE\]',
re.DOTALL
)
def _split_content_blocks(text: str) -> list:
"""
将章节正文拆分为有序内容块列表
{'type': 'text', 'content': '...'}
{'type': 'figure', 'title': '...', 'content': '...'}
{'type': 'table', 'title': '...', 'content': '...'}
"""
blocks = []
last = 0
for m in _BLOCK_PATTERN.finditer(text):
if m.start() > last:
blocks.append({'type': 'text', 'content': text[last:m.start()]})
if m.group(1) is not None:
blocks.append({'type': 'figure',
'title': m.group(1).strip(),
'content': m.group(2).strip()})
else:
blocks.append({'type': 'table',
'title': m.group(3).strip(),
'content': m.group(4).strip()})
last = m.end()
if last < len(text):
blocks.append({'type': 'text', 'content': text[last:]})
return blocks
def _set_para_shading(para, hex_fill: str):
"""为段落设置背景填充色"""
pPr = para._element.get_or_add_pPr()
shd = OxmlElement('w:shd')
shd.set(qn('w:val'), 'clear')
shd.set(qn('w:color'), 'auto')
shd.set(qn('w:fill'), hex_fill)
pPr.append(shd)
def _set_cell_bg(cell, hex_fill: str):
"""为表格单元格设置背景色"""
tc = cell._tc
tcPr = tc.get_or_add_tcPr()
shd = OxmlElement('w:shd')
shd.set(qn('w:val'), 'clear')
shd.set(qn('w:color'), 'auto')
shd.set(qn('w:fill'), hex_fill)
tcPr.append(shd)
def _set_cell_padding(cell, pt_value: float):
"""设置表格单元格四侧内边距(单位:磅)"""
tc = cell._tc
tcPr = tc.get_or_add_tcPr()
tcMar = OxmlElement('w:tcMar')
val = str(int(pt_value * 20)) # pt → twips1pt = 20 twips
for side in ('top', 'left', 'bottom', 'right'):
node = OxmlElement(f'w:{side}')
node.set(qn('w:w'), val)
node.set(qn('w:type'), 'dxa')
tcMar.append(node)
tcPr.append(tcMar)
def _safe_set_eastasia(run, font_name: str):
"""安全设置东亚字体,确保 rPr 已存在"""
_ = run.font.size # 触发 rPr 创建
try:
run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)
except Exception:
pass
def _add_block_caption(doc: Document, prefix: str, title: str):
"""添加图/表居中加粗标题行"""
cap = doc.add_paragraph()
cap.alignment = WD_ALIGN_PARAGRAPH.CENTER
cap.paragraph_format.space_before = Pt(8)
cap.paragraph_format.space_after = Pt(3)
run = cap.add_run(f'{prefix}{title}')
run.font.bold = True
run.font.size = Pt(11)
run.font.name = 'Times New Roman'
_safe_set_eastasia(run, '黑体')
def _add_figure_block(doc: Document, title: str, content: str):
"""
将图示内容渲染为带边框 + 背景色的文字图示框
使用单格表格Table Grid 样式实现四周边框比纯段落背景更专业
"""
_add_block_caption(doc, '', title)
lines = content.split('\n')
# 单格表格:四周边框 + 淡蓝灰背景
tbl = doc.add_table(rows=1, cols=1)
tbl.style = 'Table Grid'
cell = tbl.cell(0, 0)
_set_cell_bg(cell, 'EFF3FB') # 淡蓝灰背景
_set_cell_padding(cell, 5) # 内边距 5pt
for i, line in enumerate(lines):
if i == 0:
para = cell.paragraphs[0]
para.clear()
else:
para = cell.add_paragraph()
para.paragraph_format.space_before = Pt(0)
para.paragraph_format.space_after = Pt(1)
run = para.add_run(line if line else ' ')
run.font.size = Pt(9.5)
run.font.name = 'Courier New'
_safe_set_eastasia(run, '宋体')
# 图示后空行
sp = doc.add_paragraph()
sp.paragraph_format.space_after = Pt(8)
def _add_word_table(doc: Document, title: str, content: str):
"""将 Markdown 表格解析并渲染为 Word 表格"""
# 解析 markdown 行,过滤掉分隔行(|---|
raw_rows = []
for line in content.strip().split('\n'):
line = line.strip()
if not line:
continue
if re.match(r'^\|[\s\-:| ]+\|$', line):
continue # 分隔行
if line.startswith('|') and line.endswith('|'):
cells = [c.strip() for c in line[1:-1].split('|')]
raw_rows.append(cells)
if not raw_rows:
# 没有解析到有效行时,降级为普通文本
_add_block_caption(doc, '', title)
_add_plain_text(doc, content)
return
col_count = max(len(r) for r in raw_rows)
rows = [r + [''] * (col_count - len(r)) for r in raw_rows]
_add_block_caption(doc, '', title)
table = doc.add_table(rows=len(rows), cols=col_count)
table.style = 'Table Grid'
for i, row_data in enumerate(rows):
for j, cell_text in enumerate(row_data):
cell = table.cell(i, j)
para = cell.paragraphs[0]
para.clear()
para.alignment = WD_ALIGN_PARAGRAPH.CENTER if i == 0 else WD_ALIGN_PARAGRAPH.LEFT
run = para.add_run(cell_text)
run.font.size = Pt(10)
run.font.bold = (i == 0)
run.font.name = 'Times New Roman'
_safe_set_eastasia(run, '宋体')
if i == 0:
_set_cell_bg(cell, 'D6E4F7') # 浅蓝表头
# 表格后空行
sp = doc.add_paragraph()
sp.paragraph_format.space_after = Pt(6)
def _add_plain_text(doc: Document, text: str):
"""添加普通文本段落(内部辅助)"""
for line in text.split('\n'):
line = line.strip()
if not line:
continue
p = doc.add_paragraph()
p.paragraph_format.first_line_indent = Pt(24)
p.paragraph_format.space_after = Pt(6)
_set_line_spacing_15(p)
run = p.add_run(line)
run.font.size = Pt(12)
run.font.name = 'Times New Roman'
_safe_set_eastasia(run, '宋体')
def _add_body_paragraphs(doc: Document, text: str):
"""
将正文文本分段渲染自动识别并处理图示 [FIGURE:...] 和表格 [TABLE:...] 标记
"""
for block in _split_content_blocks(text):
if block['type'] == 'figure':
_add_figure_block(doc, block['title'], block['content'])
elif block['type'] == 'table':
_add_word_table(doc, block['title'], block['content'])
else:
_add_plain_text(doc, block['content'])

1205
modules/generator.py Normal file

File diff suppressed because it is too large Load Diff

292
modules/knowledge.py Normal file
View File

@ -0,0 +1,292 @@
"""
企业知识库模块无外部向量库依赖
存储后端SQLite与主数据库共用同一文件
- knowledge_vectors 文本块 + JSON 向量
- knowledge_files 文件元数据已在 app.py init_db 中建立
检索策略
Qwen / OpenAI provider Embedding API + 余弦相似度语义检索
DeepSeek / Ollama SQL LIKE 关键词检索降级
"""
import json
import math
import logging
import os
import sqlite3
import threading
from datetime import datetime
import config
from utils.file_utils import extract_text, split_text_chunks
logger = logging.getLogger(__name__)
# 正在后台入库的文件名集合(供前端轮询感知"处理中"状态)
_processing_files: set = set()
_processing_lock = threading.Lock()
# 每次 Embedding API 批量请求的块数(避免单次请求过大)
_EMBED_BATCH = 16
# ─── 数据库 ──────────────────────────────────────────────────────────────────
def _conn() -> sqlite3.Connection:
return sqlite3.connect(config.DB_PATH)
def _init_tables(cur: sqlite3.Cursor) -> None:
"""确保向量块表存在knowledge_files 已由 app.py init_db 创建)"""
cur.execute('''
CREATE TABLE IF NOT EXISTS knowledge_vectors (
id INTEGER PRIMARY KEY AUTOINCREMENT,
file_name TEXT NOT NULL,
chunk_idx INTEGER NOT NULL,
text TEXT NOT NULL,
embedding TEXT,
UNIQUE(file_name, chunk_idx)
)
''')
# ─── Embedding API ────────────────────────────────────────────────────────────
def _get_embeddings_batch(texts: list[str]) -> list[list[float] | None]:
"""
调用当前 provider Embedding API批量返回向量列表
不支持 Embedding providerDeepSeek / Ollama返回全 None 列表
"""
if not texts:
return []
provider = getattr(config, 'MODEL_PROVIDER', '')
try:
from openai import OpenAI
if provider == 'qwen':
client = OpenAI(api_key=config.QWEN_API_KEY, base_url=config.QWEN_BASE_URL)
model = config.QWEN_EMBEDDING_MODEL
elif provider == 'openai':
client = OpenAI(api_key=config.OPENAI_API_KEY, base_url=config.OPENAI_BASE_URL)
model = config.OPENAI_EMBEDDING_MODEL
elif provider == 'kimi':
client = OpenAI(api_key=config.KIMI_API_KEY, base_url=config.KIMI_BASE_URL)
model = config.KIMI_EMBEDDING_MODEL
else:
# DeepSeek / Ollama / 豆包 无公开 Embedding API降级到关键词检索
return [None] * len(texts)
resp = client.embeddings.create(input=texts, model=model)
return [item.embedding for item in resp.data]
except Exception as e:
logger.warning(f'Embedding API 调用失败,将使用关键词检索降级: {e}')
return [None] * len(texts)
def _cosine(a: list[float], b: list[float]) -> float:
"""纯 Python 余弦相似度,无需 numpy"""
dot = sum(x * y for x, y in zip(a, b))
na = math.sqrt(sum(x * x for x in a))
nb = math.sqrt(sum(x * x for x in b))
return dot / (na * nb) if na and nb else 0.0
# ─── 公开接口 ─────────────────────────────────────────────────────────────────
def is_available() -> dict:
"""
知识库始终可用无外部依赖返回当前状态
search_mode: 'vector'语义检索 'keyword'关键词降级
"""
with _processing_lock:
processing = list(_processing_files)
try:
db = _conn()
cur = db.cursor()
_init_tables(cur)
db.commit()
cur.execute('SELECT COUNT(*) FROM knowledge_vectors')
doc_count = cur.fetchone()[0]
# 判断是否已有向量(即 Embedding API 是否可用过)
cur.execute('SELECT 1 FROM knowledge_vectors WHERE embedding IS NOT NULL LIMIT 1')
has_embedding = cur.fetchone() is not None
db.close()
provider = getattr(config, 'MODEL_PROVIDER', '')
can_embed = provider in ('qwen', 'openai', 'kimi')
mode = 'vector' if (has_embedding or can_embed) else 'keyword'
return {
'available': True,
'doc_count': doc_count,
'processing': processing,
'search_mode': mode,
}
except Exception as e:
return {
'available': True,
'doc_count': 0,
'processing': processing,
'search_mode': 'keyword',
'error': str(e),
}
def add_file(file_path: str, db_path: str) -> dict:
"""
将文件切块 批量 Embedding 写入 SQLite
此函数在后台线程中调用_processing_files 用于前端感知进度
"""
file_name = os.path.basename(file_path)
with _processing_lock:
_processing_files.add(file_name)
try:
text = extract_text(file_path)
chunks = split_text_chunks(text, config.CHUNK_SIZE, config.CHUNK_OVERLAP)
if not chunks:
return {'success': False, 'error': '文件内容为空,无法入库'}
# 批量获取 EmbeddingQwen/OpenAI provider 有效;否则全 None
embeddings: list[list[float] | None] = []
for i in range(0, len(chunks), _EMBED_BATCH):
batch = chunks[i:i + _EMBED_BATCH]
embeddings.extend(_get_embeddings_batch(batch))
db = _conn()
try:
cur = db.cursor()
_init_tables(cur)
# 先删除同名文件的旧数据
cur.execute('DELETE FROM knowledge_vectors WHERE file_name=?', (file_name,))
for idx, (chunk, emb) in enumerate(zip(chunks, embeddings)):
emb_json = json.dumps(emb) if emb is not None else None
cur.execute(
'INSERT INTO knowledge_vectors (file_name, chunk_idx, text, embedding) VALUES (?,?,?,?)',
(file_name, idx, chunk, emb_json),
)
cur.execute('''
INSERT OR REPLACE INTO knowledge_files (file_name, file_path, chunk_count, added_at)
VALUES (?, ?, ?, ?)
''', (file_name, file_path, len(chunks), datetime.now()))
db.commit()
finally:
db.close()
logger.info(f'知识库入库完成: {file_name}{len(chunks)}'
f'{"(含向量)" if any(e is not None for e in embeddings) else "(关键词模式)"}')
return {'success': True, 'chunks': len(chunks)}
except Exception as e:
logger.exception('知识库添加文件失败')
return {'success': False, 'error': str(e)}
finally:
with _processing_lock:
_processing_files.discard(file_name)
def search(query: str, top_k: int = None) -> list[str]:
"""
从知识库检索与 query 最相关的文本块
- 向量模式获取 query Embedding 余弦相似度排序
- 关键词模式降级SQL LIKE 多词匹配
"""
if top_k is None:
top_k = config.TOP_K_KNOWLEDGE
try:
db = _conn()
try:
cur = db.cursor()
_init_tables(cur)
db.commit()
cur.execute('SELECT COUNT(*) FROM knowledge_vectors')
if cur.fetchone()[0] == 0:
return []
# ── 向量语义检索 ──────────────────────────────────────────────────
q_embs = _get_embeddings_batch([query])
q_emb = q_embs[0] if q_embs else None
if q_emb is not None:
cur.execute(
'SELECT text, embedding FROM knowledge_vectors WHERE embedding IS NOT NULL'
)
rows = cur.fetchall()
if rows:
scored: list[tuple[float, str]] = []
for text, emb_json in rows:
try:
emb = json.loads(emb_json)
scored.append((_cosine(q_emb, emb), text))
except Exception:
continue
scored.sort(reverse=True)
return [t for _, t in scored[:top_k]]
# ── 关键词降级检索DeepSeek / Ollama 无 Embedding API─────────
# 过滤纯数字/编号词(如 "1.2" "一、"),避免误匹配无关段落
import re as _re
_num_pat = _re.compile(r'^[\d\.\-、一二三四五六七八九十]+$')
words = [
w.strip() for w in query.split()
if len(w.strip()) > 1 and not _num_pat.match(w.strip())
][:6]
if not words:
cur.execute('SELECT text FROM knowledge_vectors LIMIT ?', (top_k,))
return [r[0] for r in cur.fetchall()]
conditions = ' OR '.join(['text LIKE ?' for _ in words])
params = [f'%{w}%' for w in words] + [top_k]
cur.execute(
f'SELECT text FROM knowledge_vectors WHERE {conditions} LIMIT ?', params
)
return [r[0] for r in cur.fetchall()]
finally:
db.close()
except Exception as e:
logger.error(f'知识库检索失败: {e}')
return []
def list_files(db_path: str) -> list[dict]:
"""列出知识库已入库的文件"""
try:
db = sqlite3.connect(db_path)
cur = db.cursor()
cur.execute(
'SELECT file_name, chunk_count, added_at FROM knowledge_files ORDER BY added_at DESC'
)
rows = cur.fetchall()
db.close()
return [{'name': r[0], 'chunks': r[1], 'added_at': r[2]} for r in rows]
except Exception:
return []
def delete_file(file_name: str, db_path: str) -> dict:
"""从知识库删除指定文件的所有数据"""
try:
db = _conn()
cur = db.cursor()
_init_tables(cur)
cur.execute('DELETE FROM knowledge_vectors WHERE file_name=?', (file_name,))
cur.execute('DELETE FROM knowledge_files WHERE file_name=?', (file_name,))
db.commit()
db.close()
return {'success': True}
except Exception as e:
logger.exception('知识库删除文件失败')
return {'success': False, 'error': str(e)}

179
modules/parser.py Normal file
View File

@ -0,0 +1,179 @@
"""
招标文件解析模块
流程提取文本 生成摘要 提取评分要求 结构化JSON
"""
import json
import logging
import re
import sqlite3
from datetime import datetime
from utils import ai_client, prompts as P
from utils.file_utils import extract_text, truncate_text
from utils.tender_kind_sections import (
get_tender_kind_classify_prompt,
parse_tender_kind_response,
)
logger = logging.getLogger(__name__)
def parse_boq_file(db_path: str, project_id: int, file_path: str, file_name: str) -> None:
"""
后台线程解析工程量清单文件 本地结构化分析 AI 摘要 写库
boq_status: none parsing done / error
"""
from utils.bill_analysis import analyze_boq_pages, categories_to_prompt_appendix
from utils.boq_parser import extract_boq_pages
conn = sqlite3.connect(db_path)
try:
_set_boq_status(conn, project_id, 'parsing', '正在提取工程量清单文本...')
page_texts = extract_boq_pages(file_path)
boq_text = '\n'.join(page_texts).strip()
if not boq_text:
raise ValueError('未能从文件中提取到有效内容,请检查文件格式')
_set_boq_status(conn, project_id, 'parsing', '正在本地解析清单结构...')
analysis = analyze_boq_pages(page_texts)
boq_analysis_json = json.dumps(analysis, ensure_ascii=False)
structured = ''
if not analysis.get('scanned') and not analysis.get('no_bill_pages'):
structured = categories_to_prompt_appendix(analysis)
_set_boq_status(conn, project_id, 'parsing', '正在生成工程量清单摘要...')
summary_prompt = P.get_boq_summary_prompt(boq_text[:10000], structured)
boq_summary = ai_client.chat(summary_prompt, temperature=0.2, max_tokens=2048)
cur = conn.cursor()
cur.execute('''
UPDATE tender_data
SET boq_file_name=?, boq_text=?, boq_summary=?, boq_analysis_json=?,
boq_status='done', boq_error='', updated_at=?
WHERE project_id=?
''', (file_name, boq_text[:12000], boq_summary, boq_analysis_json, datetime.now(), project_id))
conn.commit()
logger.info(f'项目 {project_id} 工程量清单解析完成')
except Exception as e:
logger.exception(f'工程量清单解析失败 project_id={project_id}')
_set_boq_status(conn, project_id, 'error', str(e))
finally:
conn.close()
def _set_boq_status(conn, project_id, status, message=''):
cur = conn.cursor()
cur.execute('''
UPDATE tender_data SET boq_status=?, boq_error=?, updated_at=?
WHERE project_id=?
''', (status, message, datetime.now(), project_id))
conn.commit()
def parse_tender_file(db_path: str, project_id: int, file_path: str, file_name: str) -> None:
"""
后台线程中运行解析招标文件并将结果写入数据库
status 字段pending parsing done / error
"""
conn = sqlite3.connect(db_path)
try:
_set_status(conn, project_id, 'parsing', '正在提取文件文本...')
# 1. 提取原始文本
raw_text = extract_text(file_path)
raw_text = truncate_text(raw_text, 60000)
_set_status(conn, project_id, 'parsing', '正在生成招标摘要...')
# 2. 生成结构化摘要
summary_prompt = P.get_project_summary_prompt(raw_text)
summary = ai_client.chat(summary_prompt, temperature=0.3, max_tokens=4096)
_set_status(conn, project_id, 'parsing', '正在提取技术评分要求...')
# 3. 提取技术评分要求Markdown 格式)
rating_prompt = P.get_rating_requirements_prompt(raw_text)
rating_md = ai_client.chat(rating_prompt, temperature=0.2, max_tokens=4096)
_set_status(conn, project_id, 'parsing', '正在结构化评分数据...')
# 4. 将评分要求转换为 JSON
rating_json_prompt = P.get_rating_json_prompt(rating_md)
rating_json_raw = ai_client.chat(rating_json_prompt, temperature=0.1, max_tokens=2048)
rating_json_str = _clean_json(rating_json_raw)
_set_status(conn, project_id, 'parsing', '正在识别招标文件类型(工程/服务/货物)...')
excerpt = (raw_text or '')[:15000]
kind_prompt = get_tender_kind_classify_prompt(excerpt)
kind_raw = ai_client.chat(kind_prompt, temperature=0.1, max_tokens=32)
tender_kind = parse_tender_kind_response(kind_raw)
logger.info(f'项目 {project_id} 招标文件类型识别为: {tender_kind}')
# 写入数据库
_upsert_tender_data(conn, project_id, file_name, raw_text,
summary, rating_md, rating_json_str, tender_kind)
_set_status(conn, project_id, 'done', '解析完成')
logger.info(f'项目 {project_id} 招标文件解析完成')
except Exception as e:
logger.exception(f'解析失败 project_id={project_id}')
_set_status(conn, project_id, 'error', str(e))
finally:
conn.close()
# ─── 内部工具 ──────────────────────────────────────────────────────────────
def _set_status(conn, project_id, status, message=''):
cur = conn.cursor()
cur.execute('''
INSERT INTO tender_data (project_id, status, error_message)
VALUES (?, ?, ?)
ON CONFLICT(project_id) DO UPDATE SET status=?, error_message=?, updated_at=?
''', (project_id, status, message, status, message, datetime.now()))
conn.commit()
def _upsert_tender_data(conn, project_id, file_name, raw_text,
summary, rating_md, rating_json_str,
tender_kind: str = 'engineering'):
cur = conn.cursor()
cur.execute('''
INSERT INTO tender_data
(project_id, file_name, raw_text, summary, rating_requirements, rating_json,
tender_kind, status, error_message)
VALUES (?, ?, ?, ?, ?, ?, ?, 'done', '')
ON CONFLICT(project_id) DO UPDATE SET
file_name=?, raw_text=?, summary=?, rating_requirements=?,
rating_json=?, tender_kind=?, status='done', error_message='', updated_at=?
''', (
project_id, file_name, raw_text, summary, rating_md, rating_json_str, tender_kind,
file_name, raw_text, summary, rating_md, rating_json_str, tender_kind, datetime.now()
))
conn.commit()
def _clean_json(raw: str) -> str:
"""尝试从 AI 返回中提取 JSON 字符串"""
# 去除 markdown 代码块
raw = re.sub(r'```(?:json)?\s*', '', raw)
raw = raw.replace('```', '').strip()
# 验证是否是有效 JSON
try:
json.loads(raw)
return raw
except json.JSONDecodeError:
# 尝试提取 { ... } 部分
m = re.search(r'\{[\s\S]*\}', raw)
if m:
candidate = m.group(0)
try:
json.loads(candidate)
return candidate
except Exception:
pass
return raw

View File

@ -0,0 +1,36 @@
- 角色:技术标书架构师
- 能力:
- 单章节深度解构能力
- 跨章节协同规划视野
- 评分权重动态分配策略
- 任务:根据招标文件概要、章节主题、评分要求,生成结构化的技术标书该章节的目录
- 输出要求:
- 采用四级嵌套编码体系X.X.X.X确保章节颗粒度可控
- 直接给出生成的章节大纲,禁止解释和引导词
- markdown格式输出
- 示例输出,以"服务进度保障措施"为例:
二、智慧物流系统全生命周期进度保障
 2.1 基于BIM的进度协同管理平台
  2.1.1 多级进度计划耦合模型
   2.1.1.1 WBS-Milestone映射矩阵
   2.1.1.2 Primavera P6进度基线
  2.1.2 资源约束进度优化算法
   2.1.2.1 基于CPM的缓冲区间动态分配
   2.1.2.2 资源平滑度R=0.92
- 招标文件概要:
{summary}
- 章节主题:
{chapter}
- 评分要求:
{score}

158
prompts/outlines.txt Normal file
View File

@ -0,0 +1,158 @@
- 角色:技术标书架构师
- 任务:生成适配技术评分标准的技术标书目录
- 输出要求:
采用四级嵌套编码体系X.X.X.X下实现按需分层
直接给出生成的目录,禁止解释和引导词
- 约束控制:
根据项目生成标书的名称如“XXXX项目技术标书”
总的章节数应该控制在8-10个
章节颗粒度与评分指标权重正相关
技术实施类章节必须达到四级深度,管理保障类章节允许三级结构
同级节点数量必须有波动区间:技术方案类(4-7)、实施保障类(2-4)、创新应用类(1-3)
目录的章节不能缺少包含以下关键词的内容:
- 对本项目的了解和分析
- 项目工作重难点分析
- 项目实施方案
- 服务进度保障措施
- 服务质量保障方案
- 合理化建议
- 服务承诺及处罚措施
目录不包含成本和预算内容,但要平衡项目预算、技术可行性以及技术的专业度
- 示例输出:
<example>
花岭新城BIM项目技术标书
一、总体实施方案
 1.1 项目理解与需求分析
  1.1.1 项目概述
    1.1.1.1 建设地点及规模
    1.1.1.2 工程地质勘察报告
    1.1.1.3 抗震设防烈度与防火等级
    1.1.1.4 建筑结构形式与建筑面积分布
  1.1.2 项目背景
    1.1.2.1 核心宗旨与目标
    1.1.2.2 地理位置与项目规模
  1.1.3 项目目标
    1.1.3.1 就业机会与基础设施提升
    1.1.3.2 乡村振兴与经济增长
  1.1.4 项目特点
    1.1.4.1 框筒结构抗震性能
    1.1.4.2 分阶段工程地质勘察
    1.1.4.3 功能区域多样化
二、建筑设计
 2.1 主要设计依据
    2.1.1 国家标准与规范
    2.1.2 行业标准与图集
 2.2 建筑结构设计
    2.2.1 结构形式
    2.2.2 结构材料
    2.2.3 结构布局
    2.2.4 结构经济指标
    2.2.5 结构细节设计
 2.3 建筑功能布局
    2.3.1 C1#楼(厂房)
      2.3.1.1 功能分区明确
      2.3.1.2 流线优化与安全性
    2.3.2 配电房
      2.3.2.1 设计目标与设备布置
      2.3.2.2 空间规划与电气主接线方案
    2.3.3 外廊及架空建筑
      2.3.3.1 功能区域与景观设计
      2.3.3.2 光照与通风优化
 2.4 建筑材料选用
 2.5 建筑外观设计
 2.6 建筑室内布局
    2.6.1 功能分区与设计要点
 2.7 建筑安全和消防设计
    2.7.1 建筑安全体系
    2.7.2 消防系统设计
 2.8 建筑节能设计
    2.8.1 节能措施与绿色建材
    2.8.2 雨水收集系统
三、结构设计
 3.1 结构形式
 3.2 结构材料
    3.2.1 混凝土与钢材选用
 3.3 结构布局
    3.3.1 结构柱网与通风疏散通道
 3.4 结构经济指标
    3.4.1 抗震设计要求与用材控制
 3.5 结构细节设计
    3.5.1 基础设计与钢结构细节
    3.5.2 混凝土结构与抗震设计
 3.6 结构分析与计算
四、给排水设计
 4.1 引言
 4.2 供水系统设计
    4.2.1 供水管道与消防水源
    4.2.2 节水设计与雨水收集
 4.3 排水系统设计
    4.3.1 排水管道与雨水管理
    4.3.2 污水处理与分流制度
 4.4 给排水设备选择
 4.5 细节设计
 4.6 监测与维护
五、暖通设计
 5.1 引言
 5.2 供暖系统设计
    5.2.1 供暖方式与设备选择
    5.2.2 温度控制系统
 5.3 通风系统设计
    5.3.1 通风方式与设备选择
    5.3.2 空气质量控制
 5.4 空调系统设计
    5.4.1 空调方式与设备选择
    5.4.2 温湿度控制系统
 5.5 热水系统设计
 5.6 细节设计与监测维护
六、BIM设计
 6.1 项目总图与单体建筑设计
 6.2 道路与排水设计
 6.3 电气系统设计
 6.4 绿化设计
 6.5 BIM协同设计与施工管理
 6.6 数据管理与培训支持
七、设计说明
 7.1 项目设计依据
 7.2 设计原则
 7.3 结构经济合理化
 7.4 建筑功能分区
 7.5 设计细节要求
八、合理化建议
 8.1 建筑专业合理化建议
 8.2 结构专业合理化建议
 8.3 给排水专业合理化建议
 8.4 暖通专业合理化建议
 8.5 BIM专业合理化建议
8.6 技术和工艺方面的建议
8.7 成本和预算方面的建议
8.8 时间和进度方面的建议
8.9 施工质量管理方面的建议
8.10 质量和安全方面的建议
8.11 环境和可持续性方面的建议
九、施工进度安排
 9.1 施工进度安排
 9.2 施工进度跟踪与管理
 9.3 施工质量管理
 9.4 施工现场管理
 9.5 施工结项与验收
十、本项目工作重点难点分析
 10.1 工程特点与设计工作难点
 10.2 重点与难点分析
 10.3 综合解决措施
</example>
- 招标文件内容:
{document_text}
"""

View File

@ -0,0 +1,155 @@
- 角色:技术标书架构师
- 任务:生成适配技术评分标准的技术标书目录
- 输出要求:
采用四级嵌套编码体系X.X.X.X下实现按需分层
直接给出生成的目录,禁止解释和引导词
- 约束控制:
根据项目生成标书的名称如“XXXX项目技术标书”
总的章节数应该控制在8-10个,不超过10个
目录的章节必须按照技术评分标准的项目生成,题目应包括技术评分项目中的关键词:
章节颗粒度与评分指标权重正相关
技术方案类章节必须达到四级深度,管理保障类章节允许三级结构
同级节点数量必须有波动区间:技术方案类(4-7)、实施保障类(2-4)、创新应用类(1-3)
目录禁止包含报价、团队、资质、文件等商务性质的章节
- 示例输出:
<example>
花岭新城BIM项目技术标书
一、总体实施方案
 1.1 项目理解与需求分析
  1.1.1 项目概述
    1.1.1.1 建设地点及规模
    1.1.1.2 工程地质勘察报告
    1.1.1.3 抗震设防烈度与防火等级
    1.1.1.4 建筑结构形式与建筑面积分布
  1.1.2 项目背景
    1.1.2.1 核心宗旨与目标
    1.1.2.2 地理位置与项目规模
  1.1.3 项目目标
    1.1.3.1 就业机会与基础设施提升
    1.1.3.2 乡村振兴与经济增长
  1.1.4 项目特点
    1.1.4.1 框筒结构抗震性能
    1.1.4.2 分阶段工程地质勘察
    1.1.4.3 功能区域多样化
二、建筑设计
 2.1 主要设计依据
    2.1.1 国家标准与规范
    2.1.2 行业标准与图集
 2.2 建筑结构设计
    2.2.1 结构形式
    2.2.2 结构材料
    2.2.3 结构布局
    2.2.4 结构经济指标
    2.2.5 结构细节设计
 2.3 建筑功能布局
    2.3.1 C1#楼(厂房)
      2.3.1.1 功能分区明确
      2.3.1.2 流线优化与安全性
    2.3.2 配电房
      2.3.2.1 设计目标与设备布置
      2.3.2.2 空间规划与电气主接线方案
    2.3.3 外廊及架空建筑
      2.3.3.1 功能区域与景观设计
      2.3.3.2 光照与通风优化
 2.4 建筑材料选用
 2.5 建筑外观设计
 2.6 建筑室内布局
    2.6.1 功能分区与设计要点
 2.7 建筑安全和消防设计
    2.7.1 建筑安全体系
    2.7.2 消防系统设计
 2.8 建筑节能设计
    2.8.1 节能措施与绿色建材
    2.8.2 雨水收集系统
三、结构设计
 3.1 结构形式
 3.2 结构材料
    3.2.1 混凝土与钢材选用
 3.3 结构布局
    3.3.1 结构柱网与通风疏散通道
 3.4 结构经济指标
    3.4.1 抗震设计要求与用材控制
 3.5 结构细节设计
    3.5.1 基础设计与钢结构细节
    3.5.2 混凝土结构与抗震设计
 3.6 结构分析与计算
四、给排水设计
 4.1 引言
 4.2 供水系统设计
    4.2.1 供水管道与消防水源
    4.2.2 节水设计与雨水收集
 4.3 排水系统设计
    4.3.1 排水管道与雨水管理
    4.3.2 污水处理与分流制度
 4.4 给排水设备选择
 4.5 细节设计
 4.6 监测与维护
五、暖通设计
 5.1 引言
 5.2 供暖系统设计
    5.2.1 供暖方式与设备选择
    5.2.2 温度控制系统
 5.3 通风系统设计
    5.3.1 通风方式与设备选择
    5.3.2 空气质量控制
 5.4 空调系统设计
    5.4.1 空调方式与设备选择
    5.4.2 温湿度控制系统
 5.5 热水系统设计
 5.6 细节设计与监测维护
六、BIM设计
 6.1 项目总图与单体建筑设计
 6.2 道路与排水设计
 6.3 电气系统设计
 6.4 绿化设计
 6.5 BIM协同设计与施工管理
 6.6 数据管理与培训支持
七、设计说明
 7.1 项目设计依据
 7.2 设计原则
 7.3 结构经济合理化
 7.4 建筑功能分区
 7.5 设计细节要求
八、合理化建议
 8.1 建筑专业合理化建议
 8.2 结构专业合理化建议
 8.3 给排水专业合理化建议
 8.4 暖通专业合理化建议
 8.5 BIM专业合理化建议
8.6 技术和工艺方面的建议
8.7 成本和预算方面的建议
8.8 时间和进度方面的建议
8.9 施工质量管理方面的建议
8.10 质量和安全方面的建议
8.11 环境和可持续性方面的建议
九、施工进度安排
 9.1 施工进度安排
 9.2 施工进度跟踪与管理
 9.3 施工质量管理
 9.4 施工现场管理
 9.5 施工结项与验收
十、本项目工作重点难点分析
 10.1 工程特点与设计工作难点
 10.2 重点与难点分析
 10.3 综合解决措施
</example>
- 招标文件摘要:
{summary}
- 技术评分标准:
{rating}
"""

View File

@ -0,0 +1,92 @@
- 角色:招标文件编写专家,精通招标文件结构化、摘要编写
- 任务:根据用户提供的项目招标文件内容,生成一份专业、清晰的结构化摘要
- 要求:
一、摘要框架
1. 项目概况
- 项目名称
- 建设地点
- 工程性质(新建/改建/扩建)
- 核心建设内容
- 关键工程量指标
- 特殊施工工艺(如顶管/盾构等)
- 项目概况
2. 技术要求体系
- 专业监测要求(分项列出核心监测指标)
- 技术标准规范
- 质量管控要点
- 特殊工艺标准
3. 交付物矩阵
- 阶段性成果清单(含时间节点)
- 最终交付文件要求
- 成果验收标准
- 备案审批流程
4. 商务条款摘要
- 合同期限
- 支付结构
- 报价约束条件
- 违约条款要点
- 知识产权约定
5. 资质要求矩阵
- 企业资质门槛
- 人员资格要求
- 设备配置标准
- 同类项目经验
6. 评标要素体系
- 技术评分维度
- 商务评分权重
- 否决性条款
- 实质性条款
- 围标识别机制
二、处理规范
1. 信息抽取规则:
- 采用三级信息提炼法(关键数据→技术参数→约束条件)
- 识别并标注法定强制性条款(★号条款)
- 提取特殊工艺参数(例如顶管直径、沉井尺寸等)
2. 结构化呈现要求:
- 使用Markdown分级标题系统
- 技术参数格式化处理
- 流程节点采用时间轴呈现
- 关键数据突出显示(例如预算金额、最高限价)
3. 专业术语处理:
- 保持行业术语准确性
- 工程计量单位标准化转换
- 法律条款原文引述
三、输出示例
1.确保包含但不仅限于:
- 项目背景的技术参数分解
- 监测要求的分类归纳
- 成果交付的阶段性要求
- 商务条款的要点提炼
四、质量保障
1. 完整性核查清单:
- 验证五证要求(资质/业绩/人员/设备/资金)
- 检查三大核心条款(技术/商务/法律)
- 确认关键日期节点(工期/交付期/质保期)
2. 风险提示机制:
- 标注异常约束条款
- 识别排他性要求
- 提示潜在履约风险点
请严格按照上述结构化框架处理输入的招标文件,生成专业、准确、易读的项目摘要报告。
输出内容需符合工程领域专业规范,重点数据需二次核验确保准确性。
严格按照招标文件的内容,确保输出内容的完整性。
直接给出摘要,禁止说明和引导词。
- 用户提供的招标文件内容如下:
{bid_document}

23
prompts/rating_json.txt Normal file
View File

@ -0,0 +1,23 @@
- 任务从工程项目招标文件中提取技术评分要求并以严格的JSON格式输出。
- 要求:
必须生成完整有效的JSON对象不使用JSON之外的文本说明
数值类型字段不添加单位符号
包含所有的评分项及其权重分配
特殊说明字段仅在存在否决条款(强制性条款)时出现
- 输出结构(必须严格遵守根字段名与数组名,便于后续章节字数与要点映射):
{
"items": [
{
"id": "唯一短标识,如 T01",
"name": "评分项名称(与招标文件表述一致或精简概括)",
"weight": 数值型权重或分值(如 10 表示 10 分或 10%,
"keywords": ["与本项相关的可选关键词1", "关键词2"]
}
],
"notes": "可选:否决条款、阶梯得分等特殊说明;无则写空字符串"
}
- 技术评分要求内容如下:
{tech_rating}

View File

@ -0,0 +1,46 @@
- 角色:招标文件信息提取专家,精通技术评分/技术评审要求的提取
- 任务:请严格按照以下步骤分析提供的招标文件内容,并完整提取所有技术评分标准:
- 步骤与要求:
1. **结构解析**
- 首先识别文件整体结构,仅提取“技术评分”/“技术评审”部分
- 标注评分大类的权重占比(如出现)
2. **要素提取**
对“技术评分”板块进行深度解析,要求:
- 提取评分的全部细节,不能省略
- 明确列出技术评分的标准,如有(如"ISO认证+3分"、"项目经验每年加1分"
3. **结果呈现样例**
参考以下示例输出markdown结构化格式
# 招标技术评分细则
## 技术评分(80分)
- 对本项目的了解和分析(12分)
→ 对本项目的理解与项目背景把握准确,对本项目特点、实 施目标和定位内容详尽,完全满足项目需要,科学、合理、 针对性强、合理可行的,得 12 分; 对本项目的理解与项 目背景有一定把握,对本项目特点、实施目标和定位有阐 述说明,基本可行的,得 8 分;对本项目的理解与项目 背景把握片面,对本项目特点、实施目标和定位理解有较 大偏差,可行性较差的,得 4 分;未提供不得分。
→ 合理可行指:( 1完全响应采购需求 2相关内容的表述具有针对性全面、具体。
→ 基本可行指:( 1响应采购需求有微小偏差 2相关 内容的表述有一定的层次性、针对性,但全面性不够。
→ 可行性较差指:( 1响应采购需求有较大偏差 2相 关内容的表述针对性弱、全面性方面欠缺较大。
- 项目工作重难点分析(12分)
→ 根据供应商针对本项目工作重难点分析与解决方案的科学性、合理性且满足项目实际情况进行评分,项目工作重 难点分析到位、有针对性、完全符合项目实际情况,对应 的解决方案合理可行的,得 12 分;
项目工作重难点内容 基本准确、针对性一般、基本符合项目实际,对应的解决 方案基本可行的,得 8 分;
项目工作重难点分析一般,对应的解决方案一般、可行性较差的,得 4 分;未提供 不得分。
→ 合理可行指:( 1完全响应采购需求 2相关内容的表述具有针对性全面、具体。
→ 基本可行指:( 1响应采购需求有微小偏差 2相关 内容的表述有一定的层次性、针对性,但全面性不够。
→ 可行性较差指:( 1响应采购需求有较大偏差 2相 关内容的表述针对性弱、全面性方面欠缺较大。
- 项目实施方案(12分)
(继续展开...
请严格按照上述结构化框架处理输入的招标文件,生成专业、准确的项目技术评分/评审要求。
严格按照招标文件的内容,确保输出内容的完整性。
直接输出评分/评审要求,禁止说明和引导词。
- 招标文件内容如下:
{bid_document}

View File

@ -0,0 +1,43 @@
- 角色:招标文件信息提取专家,精通技术评分/技术评审要求的提取
- 任务:请严格按照以下步骤分析提供的招标文件内容,并完整提取所有技术评分标准:
- 步骤与要求:
1. **结构解析**
- 首先识别文件整体结构,仅提取“技术评分”/“技术评审要求”部分
- 标注评分大类的权重占比(如出现)
2. **要素提取**
对“技术评分”板块进行深度解析,要求:
- 提取评分的全部细节,不能省略
- 明确列出量化指标,如有(如"ISO认证+3分"、"项目经验每年加1分"
- 区分强制性条款(必须满足项)与竞争性条款(择优评分项),如有
- 标注特殊要求(本地化服务、专利数量、团队资质等),如有
3. **异常识别**
- 标出表述模糊的评分项(如"酌情加分""优/良/差等级"
- 识别可能存在的矛盾条款
- 提示需要注意的隐藏评分点(如投标格式错误扣分项)
4. **结果呈现样例**
参考以下示例输出markdown结构化格式
# 招标技术评分细则
## 技术评分(50%)
- 系统架构设计(20%)
→ 要求:支持分布式部署(未满足直接废标)
→ 加分项:采用微服务架构+3分
(继续展开...
请严格按照上述结构化框架处理输入的招标文件,生成专业、准确的项目技术评分要求。
严格按照招标文件的内容,确保输出内容的完整性。
直接输出评分要求,禁止说明和引导词。
- 招标文件内容如下:
{bid_document}

45
prompts/scoring_rules.txt Normal file
View File

@ -0,0 +1,45 @@
"你是一名专业的招标文件分析师,请按照以下步骤处理用户提供的项目招标文件内容:
1. **结构识别**
- 仔细解析文件结构,定位'评分标准'、'评审办法'、'投标人须知'等关键章节
- 特别注意包含'分值'、'评分项'、'权重'等关键词的段落
2. **核心要素提取**
- 系统提取以下要素形成结构化表格:
│ 类别 │ 评分项名称 │ 分值权重 │ 具体要求 │ 否决条款 │
- 分类标准:
● 技术部分(方案设计、实施能力、技术创新等)
● 商务部分(资质证明、业绩案例、团队经验等)
● 价格部分(报价合理性、计价方式等)
● 其他专项(售后服务、本地化服务等)
3. **深度分析**
- 计算权重配比示例技术60% = 方案设计30% + 实施能力20% + 创新10%
- 识别否决性条款(如"▲"标记项或特定强制要求)
- 标注特殊评分规则:阶梯得分、区间赋分、横向比较等机制
4. **风险提示**
- 标出易被忽视的得分点如ISO认证、专利数量等
- 识别矛盾条款如总分值≠100%的情况)
- 提示资质门槛要求(注册资金、特定资质证书等)
5. **输出格式**
采用Markdown输出以下结构
```markdown
# 招标评分要点汇总
## 核心指标配比
- 总评分构成技术分__%+ 商务分__%+ 价格分__%
## 详细评分矩阵
| 类别 | 评分项 | 分值 | 具体要求 | 关键指标 |
|------|-------|-----|---------|---------|
| ... | ... | ... | ... | ... |
## 重点提示
⚠️ 否决条款:列出所有一票否决项
💡 得分要点突出3-5个高权重核心指标
⏱️ 时间节点:标注与评分相关的时限要求
```
请先确认理解任务要求,待用户提供招标文件内容后执行分析。"

47
prompts/section_detail.py Normal file
View File

@ -0,0 +1,47 @@
GEN_LEAF_DETAIL_PROMT = """
最重要的要求字数
{word_count_spec}
- 角色资深投标文件撰写专家
- 任务根据招标文件概要标书目录子小节标题撰写该子小节的正文
行文规范
- 投标方自称统一用"我方"禁用"我们""本公司"
- 招标人统一称"招标方""建设单位"
- 禁止前导句"本章节对应……""本小节主要说明……""以下将从……方面说明"开头直接写实质内容
- 禁止AI套话综上所述首先其次再次我们深信高度重视全力以赴不断优化稳步推进通过以上措施
- 用具体数据/标准编号/人员配置替代空洞承诺
- 列举用(1)(2)(3)编号禁止"首先其次"连接禁止""作结尾
- 纯文本输出禁用markdown符号段落间空行分隔
- 直接输出正文不含标题和解释
输入信息
- 招标文件概要
{summary}
- 技术标书目录
{outline}
- 待撰写的子小节标题
{title}
再次强调篇幅是最核心的质量指标内容必须充分展开每个技术要点都要详细阐述实施方法技术参数人员安排或设备配置绝不可以概括性一笔带过
"""
GEN_SECTION_INTRODUCTION_PROMT = """
- 角色资深投标文件撰写专家
- 任务为章节撰写简短开篇引言100200点明核心主题与招标要求的对应关系
- 使用"我方"自称禁止套话和前导解释句纯文本输出
- 若无需过渡可输出空白
- 招标文件概要
{summary}
- 技术标书目录
{outline}
- 章节标题
{title}
"""

View File

@ -0,0 +1,28 @@
【最重要的要求——字数】
{word_count_spec}
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
- 角色:资深投标文件撰写专家
- 任务:根据招标文件概要、标书目录、子小节标题,撰写该子小节的正文
【行文规范】
- 投标方自称用"我方""我们""本公司"随机使用
- 招标人统一称"招标方"或"建设单位"
- 禁止前导句:"本章节对应……""本小节主要说明……""以下将从……方面说明"等——开头直接写实质内容
- 禁止AI套话综上所述、首先其次再次、我们深信、高度重视、全力以赴、不断优化、稳步推进、通过以上措施
- 用具体数据/标准编号/人员配置替代空洞承诺
- 列举用(1)(2)(3)编号,禁止"首先其次"连接;禁止"等"作结尾
- 纯文本输出禁用markdown符号段落间空行分隔
- 直接输出正文,不含标题和解释
【输入信息】
- 招标文件概要:
{summary}
- 技术标书目录:
{outline}
- 待撰写的子小节标题:
{subsection_title}
再次强调:篇幅是最核心的质量指标。内容必须充分展开,每个技术要点都要详细阐述实施方法、技术参数、人员安排或设备配置。绝不可以概括性一笔带过。

12
requirements.txt Normal file
View File

@ -0,0 +1,12 @@
Flask==3.0.3
flask-cors==4.0.1
PyPDF2==3.0.1
python-docx==1.1.2
openai==1.52.0
Werkzeug==3.0.4
requests==2.32.3
chardet==5.2.0
pypdf==4.3.1
pdfminer.six==20231228
beautifulsoup4==4.12.3
lxml==5.3.0

39
start.bat Normal file
View File

@ -0,0 +1,39 @@
@echo off
title BidPartner - AI Bid Assistant
echo.
echo ============================================
echo BidPartner - AI Bid Writing Tool
echo ============================================
echo.
cd /d "%~dp0"
python --version >nul 2>&1
if %errorlevel% neq 0 (
echo [ERROR] Python not found. Please install Python 3.9+
pause
exit /b 1
)
if not exist "%~dp0.deps_installed" (
echo Installing dependencies...
pip install -r requirements.txt
if %errorlevel% neq 0 (
echo [ERROR] Failed to install dependencies.
pause
exit /b 1
)
echo.> "%~dp0.deps_installed"
echo Dependencies installed successfully.
)
echo Starting server...
echo Open browser: http://localhost:5000
echo Press Ctrl+C to stop
echo.
start "" "http://localhost:5000"
python app.py
pause

89
static/style.css Normal file
View File

@ -0,0 +1,89 @@
/* 标伙伴 · 自定义样式 */
/* 滚动条美化 */
::-webkit-scrollbar {
width: 6px;
height: 6px;
}
::-webkit-scrollbar-track {
background: #f1f5f9;
border-radius: 3px;
}
::-webkit-scrollbar-thumb {
background: #cbd5e1;
border-radius: 3px;
}
::-webkit-scrollbar-thumb:hover {
background: #94a3b8;
}
/* 章节树左侧栏 */
.sidebar-fixed::-webkit-scrollbar {
width: 4px;
}
/* 正文内容排版 */
.prose-content {
font-family: 'SimSun', '宋体', 'Times New Roman', serif;
line-height: 1.9;
color: #374151;
}
/* 动画 */
@keyframes fadeIn {
from { opacity: 0; transform: translateY(8px); }
to { opacity: 1; transform: translateY(0); }
}
.fade-in {
animation: fadeIn 0.25s ease-out;
}
/* 表格样式(评分要求展示) */
.markdown-table table {
width: 100%;
border-collapse: collapse;
font-size: 13px;
}
.markdown-table th {
background: #f8fafc;
font-weight: 600;
color: #475569;
padding: 8px 12px;
border: 1px solid #e2e8f0;
text-align: left;
}
.markdown-table td {
padding: 7px 12px;
border: 1px solid #e2e8f0;
color: #334155;
}
.markdown-table tr:nth-child(even) td {
background: #f8fafc;
}
/* 步骤指示器 */
.step-active {
background: #2563eb;
color: #fff;
box-shadow: 0 2px 8px rgba(37,99,235,.35);
}
/* 文件上传拖拽高亮 */
.drop-active {
border-color: #3b82f6 !important;
background: #eff6ff !important;
}
/* 章节缩进指示线 */
.section-indent-line {
border-left: 2px solid #e2e8f0;
margin-left: 8px;
padding-left: 8px;
}
/* 打印样式 */
@media print {
header, nav, aside, button { display: none !important; }
main { padding: 0 !important; }
.bg-white { box-shadow: none !important; border: none !important; }
}

881
templates/index.html Normal file
View File

@ -0,0 +1,881 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>标伙伴 · AI 标书助手</title>
<script src="https://cdn.tailwindcss.com"></script>
<script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
<link rel="stylesheet" href="/static/style.css">
<style>
[x-cloak]{display:none!important}
body{font-family:'PingFang SC','Microsoft YaHei',sans-serif;background:#f0f4f8}
</style>
</head>
<body class="min-h-screen" x-data="app()" x-init="init()">
<!-- ── 顶栏 ── -->
<header class="bg-white border-b border-gray-200 sticky top-0 z-50 shadow-sm">
<div class="max-w-7xl mx-auto px-6 h-16 flex items-center justify-between">
<div class="flex items-center gap-3">
<div class="w-9 h-9 rounded-xl bg-gradient-to-br from-blue-600 to-indigo-600 flex items-center justify-center shadow">
<svg class="w-5 h-5 text-white" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z"/>
</svg>
</div>
<div>
<span class="text-lg font-bold text-gray-900">标伙伴</span>
<span class="ml-2 text-xs text-gray-400 font-medium">AI 标书助手</span>
</div>
</div>
<div class="flex items-center gap-3">
<button @click="showConfig=true"
class="p-2 text-gray-500 hover:text-blue-600 hover:bg-blue-50 rounded-lg transition">
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
d="M10.325 4.317c.426-1.756 2.924-1.756 3.35 0a1.724 1.724 0 002.573 1.066c1.543-.94 3.31.826 2.37 2.37a1.724 1.724 0 001.065 2.572c1.756.426 1.756 2.924 0 3.35a1.724 1.724 0 00-1.066 2.573c.94 1.543-.826 3.31-2.37 2.37a1.724 1.724 0 00-2.572 1.065c-.426 1.756-2.924 1.756-3.35 0a1.724 1.724 0 00-2.573-1.066c-1.543.94-3.31-.826-2.37-2.37a1.724 1.724 0 00-1.065-2.572c-1.756-.426-1.756-2.924 0-3.35a1.724 1.724 0 001.066-2.573c-.94-1.543.826-3.31 2.37-2.37.996.608 2.296.07 2.572-1.065z"/>
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15 12a3 3 0 11-6 0 3 3 0 016 0z"/>
</svg>
</button>
<button @click="showCreate=true"
class="flex items-center gap-2 px-4 py-2 bg-blue-600 hover:bg-blue-700 text-white text-sm font-medium rounded-lg shadow-sm transition">
<svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 4v16m8-8H4"/>
</svg>
新建项目
</button>
</div>
</div>
</header>
<!-- ── 主内容 ── -->
<main class="max-w-7xl mx-auto px-6 py-8">
<!-- 篇幅目标仅存在于「标书项目 → 步骤1 解析」;本页不重复控件 -->
<div x-show="!loading" x-cloak class="mb-6 p-3.5 bg-slate-50 border border-slate-200 rounded-xl text-sm text-slate-600 leading-relaxed">
<p><strong>篇幅目标(按页数粗略换算)</strong>请进入某标书项目,在 <strong>步骤1「解析」</strong> 中设置100/150/200/250/300 页、自定义、保存页数设置、使用原档位、当前页等,保存后用于后续章节生成。</p>
</div>
<!-- 欢迎横幅(无项目时显示) -->
<template x-if="projects.length === 0 && !loading">
<div class="text-center py-20">
<div class="w-24 h-24 mx-auto mb-6 rounded-3xl bg-gradient-to-br from-blue-100 to-indigo-100 flex items-center justify-center">
<svg class="w-12 h-12 text-blue-600" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5"
d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z"/>
</svg>
</div>
<h2 class="text-2xl font-bold text-gray-800 mb-2">欢迎使用标伙伴</h2>
<p class="text-gray-500 mb-8 max-w-md mx-auto">AI 驱动的标书写作助手,上传招标文件,一键生成专业技术标书</p>
<button @click="showCreate=true"
class="px-6 py-3 bg-blue-600 hover:bg-blue-700 text-white font-medium rounded-xl shadow-md transition">
创建第一个项目
</button>
<!-- 功能介绍 -->
<div class="grid grid-cols-3 gap-6 mt-16 max-w-3xl mx-auto text-left">
<div class="bg-white rounded-2xl p-6 shadow-sm border border-gray-100">
<div class="w-10 h-10 bg-blue-100 rounded-xl flex items-center justify-center mb-4">
<svg class="w-5 h-5 text-blue-600" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 16v1a3 3 0 003 3h10a3 3 0 003-3v-1m-4-8l-4-4m0 0L8 8m4-4v12"/>
</svg>
</div>
<h3 class="font-semibold text-gray-800 mb-1">智能解析招标文件</h3>
<p class="text-sm text-gray-500">自动提取评分要求、资质条件、技术参数</p>
</div>
<div class="bg-white rounded-2xl p-6 shadow-sm border border-gray-100">
<div class="w-10 h-10 bg-green-100 rounded-xl flex items-center justify-center mb-4">
<svg class="w-5 h-5 text-green-600" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2"/>
</svg>
</div>
<h3 class="font-semibold text-gray-800 mb-1">自动生成标书大纲</h3>
<p class="text-sm text-gray-500">按评分权重生成四级章节结构,精准对标要求</p>
</div>
<div class="bg-white rounded-2xl p-6 shadow-sm border border-gray-100">
<div class="w-10 h-10 bg-purple-100 rounded-xl flex items-center justify-center mb-4">
<svg class="w-5 h-5 text-purple-600" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 10v6m0 0l-3-3m3 3l3-3m2 8H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z"/>
</svg>
</div>
<h3 class="font-semibold text-gray-800 mb-1">一键导出 Word 文档</h3>
<p class="text-sm text-gray-500">专业排版,直接交付使用</p>
</div>
</div>
</div>
</template>
<!-- 加载中 -->
<template x-if="loading">
<div class="flex justify-center py-20">
<div class="w-8 h-8 border-4 border-blue-200 border-t-blue-600 rounded-full animate-spin"></div>
</div>
</template>
<!-- 项目列表 -->
<template x-if="projects.length > 0">
<div>
<div class="flex items-center justify-between mb-6">
<h2 class="text-xl font-bold text-gray-800">我的项目
<span class="ml-2 text-sm font-normal text-gray-400"><span x-text="projects.length"></span></span>
</h2>
</div>
<div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-5">
<template x-for="p in projects" :key="p.id">
<div class="bg-white rounded-2xl border border-gray-100 shadow-sm hover:shadow-md transition-shadow cursor-pointer group"
@click="window.location='/project/'+p.id">
<div class="p-5">
<!-- 状态徽标 -->
<div class="flex items-start justify-between mb-3">
<div class="flex-1 min-w-0">
<h3 class="font-semibold text-gray-900 group-hover:text-blue-600 transition truncate" x-text="p.name"></h3>
<p class="text-xs text-gray-400 mt-1" x-text="formatDate(p.created_at)"></p>
</div>
<span class="ml-2 flex-shrink-0 px-2 py-0.5 rounded-full text-xs font-medium"
:class="statusBadge(p.parse_status).cls" x-text="statusBadge(p.parse_status).text"></span>
</div>
<!-- 文件名 -->
<div x-show="p.file_name" class="flex items-center gap-1.5 text-xs text-gray-500 mb-3">
<svg class="w-3.5 h-3.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z"/>
</svg>
<span class="truncate" x-text="p.file_name"></span>
</div>
<!-- 进度条 -->
<div x-show="p.section_count > 0" class="mb-3">
<div class="flex justify-between text-xs text-gray-500 mb-1">
<span>章节生成进度</span>
<span x-text="p.done_count + '/' + p.section_count"></span>
</div>
<div class="h-1.5 bg-gray-100 rounded-full overflow-hidden">
<div class="h-full bg-blue-500 rounded-full transition-all"
:style="'width:' + (p.section_count ? p.done_count/p.section_count*100 : 0) + '%'"></div>
</div>
</div>
<!-- 操作按钮 -->
<div class="flex gap-2 pt-3 border-t border-gray-50">
<button class="flex-1 text-xs text-blue-600 hover:text-blue-700 font-medium py-1 hover:bg-blue-50 rounded-lg transition"
@click.stop="window.location='/project/'+p.id">
进入项目
</button>
<button class="text-xs text-red-400 hover:text-red-600 font-medium px-3 py-1 hover:bg-red-50 rounded-lg transition"
@click.stop="deleteProject(p.id, p.name)">
删除
</button>
</div>
</div>
</div>
</template>
</div>
</div>
</template>
</main>
<!-- ══ 新建项目弹窗 ══ -->
<div x-show="showCreate" x-cloak class="fixed inset-0 z-50 flex items-center justify-center p-4 bg-black/50 backdrop-blur-sm">
<div class="bg-white rounded-2xl shadow-2xl w-full max-w-md p-6" @click.stop>
<h2 class="text-lg font-bold text-gray-900 mb-4">新建标书项目</h2>
<div class="mb-4">
<label class="block text-sm font-medium text-gray-700 mb-1">项目名称</label>
<input type="text" x-model="newProjectName" @keydown.enter="createProject()"
placeholder="例如XX智慧城市信息化建设项目"
class="w-full px-4 py-2.5 border border-gray-300 rounded-xl focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent text-sm">
</div>
<div class="flex gap-3">
<button @click="showCreate=false" class="flex-1 px-4 py-2 border border-gray-200 text-gray-600 rounded-xl text-sm hover:bg-gray-50 transition">取消</button>
<button @click="createProject()" :disabled="creating"
class="flex-1 px-4 py-2 bg-blue-600 hover:bg-blue-700 text-white rounded-xl text-sm font-medium transition disabled:opacity-60">
<span x-show="!creating">创建项目</span>
<span x-show="creating">创建中...</span>
</button>
</div>
</div>
</div>
<!-- ══ AI 配置弹窗 ══ -->
<div x-show="showConfig" x-cloak class="fixed inset-0 z-50 flex items-center justify-center p-4 bg-black/50 backdrop-blur-sm">
<div class="bg-white rounded-2xl shadow-2xl w-full max-w-lg p-6" @click.stop>
<h2 class="text-lg font-bold text-gray-900 mb-4">AI 模型配置</h2>
<div class="mb-4">
<label class="block text-sm font-medium text-gray-700 mb-2">选择模型提供商</label>
<div class="grid grid-cols-3 gap-2">
<label class="flex items-center gap-2 p-3 border-2 rounded-xl cursor-pointer transition"
:class="cfg.model_provider==='qwen' ? 'border-blue-500 bg-blue-50' : 'border-gray-200 hover:border-gray-300'">
<input type="radio" name="provider" value="qwen" x-model="cfg.model_provider" class="accent-blue-600">
<div>
<p class="font-medium text-sm leading-tight">通义千问</p>
<p class="text-xs text-gray-400">Qwen · 阿里云</p>
</div>
</label>
<label class="flex items-center gap-2 p-3 border-2 rounded-xl cursor-pointer transition"
:class="cfg.model_provider==='deepseek' ? 'border-blue-500 bg-blue-50' : 'border-gray-200 hover:border-gray-300'">
<input type="radio" name="provider" value="deepseek" x-model="cfg.model_provider" class="accent-blue-600">
<div>
<p class="font-medium text-sm leading-tight">DeepSeek</p>
<p class="text-xs text-gray-400">高性价比 · 云端</p>
</div>
</label>
<label class="flex items-center gap-2 p-3 border-2 rounded-xl cursor-pointer transition"
:class="cfg.model_provider==='openai' ? 'border-blue-500 bg-blue-50' : 'border-gray-200 hover:border-gray-300'">
<input type="radio" name="provider" value="openai" x-model="cfg.model_provider" class="accent-blue-600">
<div>
<p class="font-medium text-sm leading-tight">OpenAI</p>
<p class="text-xs text-gray-400">GPT-4.1 · 云端</p>
</div>
</label>
<label class="flex items-center gap-2 p-3 border-2 rounded-xl cursor-pointer transition"
:class="cfg.model_provider==='doubao' ? 'border-blue-500 bg-blue-50' : 'border-gray-200 hover:border-gray-300'">
<input type="radio" name="provider" value="doubao" x-model="cfg.model_provider" class="accent-blue-600">
<div>
<p class="font-medium text-sm leading-tight">豆包</p>
<p class="text-xs text-gray-400">字节跳动 · 云端</p>
</div>
</label>
<label class="flex items-center gap-2 p-3 border-2 rounded-xl cursor-pointer transition"
:class="cfg.model_provider==='kimi' ? 'border-blue-500 bg-blue-50' : 'border-gray-200 hover:border-gray-300'">
<input type="radio" name="provider" value="kimi" x-model="cfg.model_provider" class="accent-blue-600">
<div>
<p class="font-medium text-sm leading-tight">Kimi</p>
<p class="text-xs text-gray-400">Moonshot · 长文本</p>
</div>
</label>
<label class="flex items-center gap-2 p-3 border-2 rounded-xl cursor-pointer transition"
:class="cfg.model_provider==='ollama' ? 'border-green-500 bg-green-50' : 'border-gray-200 hover:border-gray-300'">
<input type="radio" name="provider" value="ollama" x-model="cfg.model_provider" class="accent-green-600">
<div>
<p class="font-medium text-sm leading-tight">Ollama 本地</p>
<p class="text-xs text-gray-400">免费 · 离线</p>
</div>
</label>
</div>
</div>
<template x-if="cfg.model_provider==='qwen'">
<div class="space-y-3 mb-4">
<div>
<label class="block text-sm font-medium text-gray-700 mb-1">Qwen API Key
<a href="https://dashscope.aliyun.com/" target="_blank" class="ml-1 text-blue-500 text-xs hover:underline font-normal">申请地址 ↗</a>
</label>
<input type="password" x-model="cfg.qwen_api_key" placeholder="sk-..."
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500">
<p x-show="cfg.has_qwen_key" class="text-xs text-green-600 mt-1">✓ 已配置</p>
</div>
<div>
<label class="block text-sm font-medium text-gray-700 mb-1">模型</label>
<select x-model="cfg.qwen_model" class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500">
<optgroup label="─── Qwen3.6(本项默认:生成+解析)───">
<option value="qwen3.6-plus">qwen3.6-plus ★ 默认</option>
</optgroup>
<optgroup label="─── 旗舰版 ───">
<option value="qwen-max">qwen-max ★ 推荐</option>
<option value="qwen-max-latest">qwen-max-latest自动追踪最新</option>
</optgroup>
<optgroup label="─── 均衡版 ───">
<option value="qwen-plus">qwen-plus</option>
<option value="qwen-plus-latest">qwen-plus-latest自动追踪最新</option>
</optgroup>
<optgroup label="─── 快速版 ───">
<option value="qwen-turbo">qwen-turbo</option>
<option value="qwen-turbo-latest">qwen-turbo-latest自动追踪最新</option>
</optgroup>
<optgroup label="─── 超长上下文 ───">
<option value="qwen-long">qwen-long1M tokens</option>
</optgroup>
<optgroup label="─── Qwen3 系列 API ───">
<option value="qwen3-235b-a22b">qwen3-235b-a22bMoE 旗舰)</option>
<option value="qwen3-32b">qwen3-32b</option>
<option value="qwen3-30b-a3b">qwen3-30b-a3bMoE 高效)</option>
<option value="qwen3-14b">qwen3-14b</option>
<option value="qwen3-8b">qwen3-8b</option>
</optgroup>
<optgroup label="─── 自定义 ───">
<option value="">手动输入模型名</option>
</optgroup>
</select>
<div x-show="!qwenPresets.includes(cfg.qwen_model)" class="mt-2">
<input type="text" x-model="cfg.qwen_model" placeholder="输入模型名,如 qwen-max-2025-01-25"
class="w-full px-3 py-2 border border-blue-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 font-mono">
<p class="text-xs text-gray-400 mt-1">输入任意 DashScope 兼容的模型名</p>
</div>
</div>
<!-- 自定义 API 地址(可选,供代理/中转使用) -->
<div>
<button type="button" @click="cfg._qwen_adv = !cfg._qwen_adv"
class="text-xs text-gray-400 hover:text-gray-600 flex items-center gap-1">
<span x-text="cfg._qwen_adv ? '▾' : '▸'"></span>
高级:自定义 API 地址(代理/中转)
</button>
<div x-show="cfg._qwen_adv" class="mt-2">
<input type="text" x-model="cfg.qwen_base_url"
placeholder="https://dashscope.aliyuncs.com/compatible-mode/v1"
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 font-mono">
<p class="text-xs text-gray-400 mt-1">默认https://dashscope.aliyuncs.com/compatible-mode/v1</p>
</div>
</div>
</div>
</template>
<template x-if="cfg.model_provider==='deepseek'">
<div class="space-y-3 mb-4">
<div>
<label class="block text-sm font-medium text-gray-700 mb-1">DeepSeek API Key
<a href="https://platform.deepseek.com/" target="_blank" class="ml-1 text-blue-500 text-xs hover:underline font-normal">申请地址 ↗</a>
</label>
<input type="password" x-model="cfg.deepseek_api_key" placeholder="sk-..."
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500">
<p x-show="cfg.has_deepseek_key" class="text-xs text-green-600 mt-1">✓ 已配置</p>
</div>
<div>
<label class="block text-sm font-medium text-gray-700 mb-1">模型</label>
<select x-model="cfg.deepseek_model" class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500">
<optgroup label="─── 对话模型 ───">
<option value="deepseek-chat">deepseek-chat ★ 推荐V3 最新)</option>
</optgroup>
<optgroup label="─── 推理模型 ───">
<option value="deepseek-reasoner">deepseek-reasonerR1</option>
</optgroup>
<optgroup label="─── 自定义 ───">
<option value="">手动输入模型名</option>
</optgroup>
</select>
<div x-show="!deepseekPresets.includes(cfg.deepseek_model)" class="mt-2">
<input type="text" x-model="cfg.deepseek_model" placeholder="输入模型名,如 deepseek-chat-v3-0324"
class="w-full px-3 py-2 border border-blue-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 font-mono">
<p class="text-xs text-gray-400 mt-1">输入任意 DeepSeek 兼容的模型名</p>
</div>
</div>
<!-- 自定义 API 地址 -->
<div>
<button type="button" @click="cfg._ds_adv = !cfg._ds_adv"
class="text-xs text-gray-400 hover:text-gray-600 flex items-center gap-1">
<span x-text="cfg._ds_adv ? '▾' : '▸'"></span>
高级:自定义 API 地址(代理/中转)
</button>
<div x-show="cfg._ds_adv" class="mt-2">
<input type="text" x-model="cfg.deepseek_base_url"
placeholder="https://api.deepseek.com/v1"
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 font-mono">
<p class="text-xs text-gray-400 mt-1">默认https://api.deepseek.com/v1</p>
</div>
</div>
<div class="flex items-start gap-2 p-3 bg-amber-50 rounded-lg border border-amber-200 text-xs text-amber-700">
<svg class="w-4 h-4 flex-shrink-0 mt-0.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"/>
</svg>
<span>DeepSeek 暂不提供 Embedding API知识库功能将自动使用本地默认模型需下载约 90MB 模型)。其他功能不受影响。</span>
</div>
</div>
</template>
<!-- ── 豆包配置面板 ── -->
<template x-if="cfg.model_provider==='doubao'">
<div class="space-y-3 mb-4">
<div>
<label class="block text-sm font-medium text-gray-700 mb-1">豆包 API Key
<a href="https://console.volcengine.com/ark/" target="_blank" class="ml-1 text-blue-500 text-xs hover:underline font-normal">申请地址 ↗</a>
</label>
<input type="password" x-model="cfg.doubao_api_key" placeholder="sk-..."
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500">
<p x-show="cfg.has_doubao_key" class="text-xs text-green-600 mt-1">✓ 已配置</p>
</div>
<div>
<label class="block text-sm font-medium text-gray-700 mb-1">模型</label>
<select x-model="cfg.doubao_model" class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500">
<optgroup label="─── 豆包 1.5 系列2025 最新)───">
<option value="doubao-1-5-pro-32k">doubao-1-5-pro-32k ★ 推荐</option>
<option value="doubao-1-5-pro-128k">doubao-1-5-pro-128k超长上下文</option>
<option value="doubao-1-5-lite-32k">doubao-1-5-lite-32k快速低价</option>
</optgroup>
<optgroup label="─── 豆包 Pro 系列 ───">
<option value="doubao-pro-32k">doubao-pro-32k</option>
<option value="doubao-pro-128k">doubao-pro-128k</option>
<option value="doubao-pro-256k">doubao-pro-256k超长</option>
</optgroup>
<optgroup label="─── 豆包 Lite 系列 ───">
<option value="doubao-lite-32k">doubao-lite-32k</option>
<option value="doubao-lite-128k">doubao-lite-128k</option>
</optgroup>
<optgroup label="─── 自定义 ───">
<option value="">手动输入模型名</option>
</optgroup>
</select>
<div x-show="!doubaoPresets.includes(cfg.doubao_model)" class="mt-2">
<input type="text" x-model="cfg.doubao_model" placeholder="输入模型名,如 doubao-1-5-pro-32k"
class="w-full px-3 py-2 border border-blue-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 font-mono">
<p class="text-xs text-gray-400 mt-1">输入火山引擎方舟平台支持的任意模型名</p>
</div>
</div>
<!-- 自定义 API 地址 -->
<div>
<button type="button" @click="cfg._doubao_adv = !cfg._doubao_adv"
class="text-xs text-gray-400 hover:text-gray-600 flex items-center gap-1">
<span x-text="cfg._doubao_adv ? '▾' : '▸'"></span>
高级:自定义 API 地址(代理/中转)
</button>
<div x-show="cfg._doubao_adv" class="mt-2">
<input type="text" x-model="cfg.doubao_base_url"
placeholder="https://ark.cn-beijing.volces.com/api/v3"
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 font-mono">
<p class="text-xs text-gray-400 mt-1">默认https://ark.cn-beijing.volces.com/api/v3</p>
</div>
</div>
<div class="flex items-start gap-2 p-3 bg-amber-50 rounded-lg border border-amber-200 text-xs text-amber-700">
<svg class="w-4 h-4 flex-shrink-0 mt-0.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"/>
</svg>
<span>豆包暂不提供通用 Embedding API知识库将自动使用关键词检索模式。其他功能完全正常。</span>
</div>
</div>
</template>
<!-- ── Kimi 配置面板 ── -->
<template x-if="cfg.model_provider==='kimi'">
<div class="space-y-3 mb-4">
<div>
<label class="block text-sm font-medium text-gray-700 mb-1">Kimi API Key
<a href="https://platform.moonshot.cn/" target="_blank" class="ml-1 text-blue-500 text-xs hover:underline font-normal">申请地址 ↗</a>
</label>
<input type="password" x-model="cfg.kimi_api_key" placeholder="sk-..."
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500">
<p x-show="cfg.has_kimi_key" class="text-xs text-green-600 mt-1">✓ 已配置</p>
</div>
<div>
<label class="block text-sm font-medium text-gray-700 mb-1">模型</label>
<select x-model="cfg.kimi_model" class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500">
<optgroup label="─── Moonshot 系列 ───">
<option value="moonshot-v1-32k">moonshot-v1-32k ★ 推荐(均衡)</option>
<option value="moonshot-v1-128k">moonshot-v1-128k超长上下文</option>
<option value="moonshot-v1-8k">moonshot-v1-8k快速低价</option>
<option value="moonshot-v1-auto">moonshot-v1-auto自动选择</option>
</optgroup>
<optgroup label="─── 自定义 ───">
<option value="">手动输入模型名</option>
</optgroup>
</select>
<div x-show="!kimiPresets.includes(cfg.kimi_model)" class="mt-2">
<input type="text" x-model="cfg.kimi_model" placeholder="输入模型名,如 moonshot-v1-128k"
class="w-full px-3 py-2 border border-blue-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 font-mono">
<p class="text-xs text-gray-400 mt-1">输入 Moonshot 平台支持的任意模型名</p>
</div>
</div>
<!-- 自定义 API 地址 -->
<div>
<button type="button" @click="cfg._kimi_adv = !cfg._kimi_adv"
class="text-xs text-gray-400 hover:text-gray-600 flex items-center gap-1">
<span x-text="cfg._kimi_adv ? '▾' : '▸'"></span>
高级:自定义 API 地址(代理/中转)
</button>
<div x-show="cfg._kimi_adv" class="mt-2">
<input type="text" x-model="cfg.kimi_base_url"
placeholder="https://api.moonshot.cn/v1"
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 font-mono">
<p class="text-xs text-gray-400 mt-1">默认https://api.moonshot.cn/v1</p>
</div>
</div>
<div class="flex items-start gap-2 p-3 bg-teal-50 rounded-lg border border-teal-200 text-xs text-teal-700">
<svg class="w-4 h-4 flex-shrink-0 mt-0.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z"/>
</svg>
<span>Kimi 支持 Embedding APImoonshot-v1-embedding知识库将使用语义向量检索效果更佳。</span>
</div>
</div>
</template>
<template x-if="cfg.model_provider==='openai'">
<div class="space-y-3 mb-4">
<div>
<label class="block text-sm font-medium text-gray-700 mb-1">OpenAI API Key
<a href="https://platform.openai.com/" target="_blank" class="ml-1 text-blue-500 text-xs hover:underline font-normal">申请地址 ↗</a>
</label>
<input type="password" x-model="cfg.openai_api_key" placeholder="sk-..."
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500">
<p x-show="cfg.has_openai_key" class="text-xs text-green-600 mt-1">✓ 已配置</p>
</div>
<div>
<label class="block text-sm font-medium text-gray-700 mb-1">模型</label>
<select x-model="cfg.openai_model" class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500">
<optgroup label="─── GPT-4.1 系列2025───">
<option value="gpt-4.1">gpt-4.1 ★ 推荐旗舰1M 上下文)</option>
<option value="gpt-4.1-mini">gpt-4.1-mini快速均衡</option>
<option value="gpt-4.1-nano">gpt-4.1-nano最轻量低价</option>
</optgroup>
<optgroup label="─── o 推理系列(深度推理,适合复杂标书)───">
<option value="o4-mini">o4-mini推理高性价比</option>
<option value="o3">o3最强推理较慢</option>
<option value="o3-mini">o3-mini快速推理</option>
<option value="o1">o1深度推理</option>
<option value="o1-mini">o1-mini推理入门</option>
<option value="o1-pro">o1-pro最高质量推理</option>
</optgroup>
<optgroup label="─── GPT-4o 系列 ───">
<option value="gpt-4o">gpt-4o</option>
<option value="gpt-4o-mini">gpt-4o-mini</option>
</optgroup>
<optgroup label="─── 旧版 ───">
<option value="gpt-4-turbo">gpt-4-turbo</option>
</optgroup>
<optgroup label="─── 自定义 ───">
<option value="">手动输入模型名</option>
</optgroup>
</select>
<div x-show="!openaiPresets.includes(cfg.openai_model)" class="mt-2">
<input type="text" x-model="cfg.openai_model" placeholder="输入模型名,如 gpt-5、gpt-4.1-2025-04-14"
class="w-full px-3 py-2 border border-blue-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 font-mono">
<p class="text-xs text-gray-400 mt-1">输入 OpenAI 平台支持的任意模型名</p>
</div>
</div>
<!-- 自定义 API 地址 -->
<div>
<button type="button" @click="cfg._oai_adv = !cfg._oai_adv"
class="text-xs text-gray-400 hover:text-gray-600 flex items-center gap-1">
<span x-text="cfg._oai_adv ? '▾' : '▸'"></span>
高级:自定义 API 地址Azure / 代理 / 中转)
</button>
<div x-show="cfg._oai_adv" class="mt-2">
<input type="text" x-model="cfg.openai_base_url"
placeholder="https://api.openai.com/v1"
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 font-mono">
<p class="text-xs text-gray-400 mt-1">默认https://api.openai.com/v1 &nbsp;|&nbsp; Azure 示例https://YOUR.openai.azure.com/openai/deployments/MODEL</p>
</div>
</div>
</div>
</template>
<template x-if="cfg.model_provider==='ollama'">
<div class="space-y-3 mb-4">
<!-- 状态检测 -->
<div class="flex items-center justify-between p-3 bg-green-50 rounded-lg border border-green-200">
<div class="flex items-center gap-2 text-xs text-green-700">
<svg class="w-4 h-4 flex-shrink-0" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z"/>
</svg>
<span>本地运行,数据不上传云端,完全免费</span>
</div>
<button type="button" @click="testOllama()"
class="text-xs px-2 py-1 bg-green-600 hover:bg-green-700 text-white rounded-lg transition flex-shrink-0">
检测连接
</button>
</div>
<!-- Ollama 服务地址 -->
<div>
<label class="block text-sm font-medium text-gray-700 mb-1">
Ollama 服务地址
<span class="ml-1 text-xs text-gray-400 font-normal">(默认本机)</span>
</label>
<input type="text" x-model="cfg.ollama_base_url" placeholder="http://localhost:11434/v1"
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-green-500 font-mono">
</div>
<!-- 模型选择 -->
<div>
<label class="block text-sm font-medium text-gray-700 mb-1">选择模型</label>
<select x-model="cfg.ollama_model"
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-green-500">
<optgroup label="★ 推荐:标书写作首选">
<option value="qwen3:8b">qwen3:8b ★ 推荐入门(约 5 GB</option>
<option value="qwen3:14b">qwen3:14b ★ 推荐均衡(约 9 GB</option>
<option value="qwen3:32b">qwen3:32b ★ 推荐高质量(约 20 GB</option>
<option value="deepseek-r1:14b">deepseek-r1:14b ★ 推荐推理(约 9 GB</option>
<option value="deepseek-r1:32b">deepseek-r1:32b ★ 推荐高质量推理(约 20 GB</option>
</optgroup>
<optgroup label="─── Qwen3 系列阿里2025最新───">
<option value="qwen3:0.6b">qwen3:0.6b(最轻量,约 0.5 GB</option>
<option value="qwen3:1.7b">qwen3:1.7b(约 1 GB</option>
<option value="qwen3:4b">qwen3:4b约 2.5 GB</option>
<option value="qwen3:8b">qwen3:8b约 5 GB</option>
<option value="qwen3:14b">qwen3:14b约 9 GB</option>
<option value="qwen3:32b">qwen3:32b约 20 GB</option>
<option value="qwen3:30b-a3b">qwen3:30b-a3bMoE 高效,约 19 GB</option>
<option value="qwen3:235b-a22b">qwen3:235b-a22bMoE 旗舰,约 142 GB</option>
</optgroup>
<optgroup label="─── Qwen2.5 系列(阿里)───">
<option value="qwen2.5:0.5b">qwen2.5:0.5b(约 0.4 GB</option>
<option value="qwen2.5:1.5b">qwen2.5:1.5b(约 1 GB</option>
<option value="qwen2.5:3b">qwen2.5:3b约 2 GB</option>
<option value="qwen2.5:7b">qwen2.5:7b约 4.7 GB</option>
<option value="qwen2.5:14b">qwen2.5:14b约 9 GB</option>
<option value="qwen2.5:32b">qwen2.5:32b约 20 GB</option>
<option value="qwen2.5:72b">qwen2.5:72b约 47 GB</option>
</optgroup>
<optgroup label="─── Qwen2.5-Coder 系列(代码增强)───">
<option value="qwen2.5-coder:1.5b">qwen2.5-coder:1.5b(约 1 GB</option>
<option value="qwen2.5-coder:3b">qwen2.5-coder:3b约 2 GB</option>
<option value="qwen2.5-coder:7b">qwen2.5-coder:7b约 4.7 GB</option>
<option value="qwen2.5-coder:14b">qwen2.5-coder:14b约 9 GB</option>
<option value="qwen2.5-coder:32b">qwen2.5-coder:32b约 20 GB</option>
</optgroup>
<optgroup label="─── QwQ 系列(阿里深度推理)───">
<option value="qwq:32b">qwq:32b深度推理约 20 GB</option>
</optgroup>
<optgroup label="─── DeepSeek R1 系列(推理增强)───">
<option value="deepseek-r1:1.5b">deepseek-r1:1.5b(约 1 GB</option>
<option value="deepseek-r1:7b">deepseek-r1:7b约 4.7 GB</option>
<option value="deepseek-r1:8b">deepseek-r1:8b约 5 GB</option>
<option value="deepseek-r1:14b">deepseek-r1:14b约 9 GB</option>
<option value="deepseek-r1:32b">deepseek-r1:32b约 20 GB</option>
<option value="deepseek-r1:70b">deepseek-r1:70b约 43 GB</option>
<option value="deepseek-r1:671b">deepseek-r1:671b原版需超大显存</option>
</optgroup>
<optgroup label="─── DeepSeek V2 系列 ───">
<option value="deepseek-v2:16b">deepseek-v2:16bLite约 10 GB</option>
<option value="deepseek-v2:236b">deepseek-v2:236b全量约 150 GB</option>
</optgroup>
<optgroup label="─── DeepSeek V3 系列 ───">
<option value="deepseek-v3:7b">deepseek-v3:7b约 4.7 GB</option>
<option value="deepseek-v3:671b">deepseek-v3:671b完整版需超大显存</option>
</optgroup>
<optgroup label="─── 自定义 ───">
<option value="">手动输入模型名</option>
</optgroup>
</select>
</div>
<!-- 自定义模型名(选中"手动输入"或填入了预设外的值时显示) -->
<div x-show="!ollamaPresets.includes(cfg.ollama_model)">
<input type="text" x-model="cfg.ollama_model" placeholder="例如qwen3:latest 或 deepseek-r1:latest"
class="w-full px-3 py-2 border border-green-300 rounded-lg text-sm focus:outline-none focus:ring-2 focus:ring-green-500 font-mono">
<p class="text-xs text-gray-400 mt-1">请输入已通过 <code class="bg-gray-100 px-1 rounded">ollama pull &lt;模型名&gt;</code> 下载的模型名</p>
</div>
<div class="flex items-start gap-2 p-3 bg-amber-50 rounded-lg border border-amber-200 text-xs text-amber-700">
<svg class="w-4 h-4 flex-shrink-0 mt-0.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"/>
</svg>
<span>使用前请先安装 <a href="https://ollama.com/" target="_blank" class="underline">Ollama</a> 并下载模型,例如:<br>
<code class="bg-amber-100 px-1 rounded">ollama pull qwen3:14b</code>Qwen3 推荐)&nbsp;
<code class="bg-amber-100 px-1 rounded">ollama pull deepseek-r1:14b</code>DeepSeek R1 推荐)<br>
Ollama 本地不支持知识库 Embedding该功能将自动回退到本地模型。推理模型R1/QwQ可能输出 <code class="bg-amber-100 px-1 rounded">&lt;think&gt;</code> 标签,不影响正文使用。</span>
</div>
</div>
</template>
<!-- 标书篇幅设置 -->
<div class="mb-4 pt-3 border-t border-gray-100">
<p class="text-xs text-gray-500 mb-3 p-2 bg-slate-50 rounded-lg">
全稿「目标总页数」在<strong>已打开的标书项目</strong>中,到 <strong>步骤1「解析」</strong> 里设置,与下方「每节字数档」是两项不同设置。
</p>
<label class="block text-sm font-medium text-gray-700 mb-2">
标书篇幅预期
<span class="ml-1 text-xs text-gray-400 font-normal">(控制每个章节生成内容的字数)</span>
</label>
<div class="grid grid-cols-2 gap-2">
<label class="flex items-center gap-2.5 p-3 border-2 rounded-xl cursor-pointer transition"
:class="cfg.content_volume==='concise' ? 'border-blue-500 bg-blue-50' : 'border-gray-200 hover:border-gray-300'">
<input type="radio" name="volume" value="concise" x-model="cfg.content_volume" class="accent-blue-600">
<div>
<p class="font-medium text-sm leading-tight">精简版</p>
<p class="text-xs text-gray-400">每节约 1200 字</p>
</div>
</label>
<label class="flex items-center gap-2.5 p-3 border-2 rounded-xl cursor-pointer transition"
:class="cfg.content_volume==='standard' ? 'border-blue-500 bg-blue-50' : 'border-gray-200 hover:border-gray-300'">
<input type="radio" name="volume" value="standard" x-model="cfg.content_volume" class="accent-blue-600">
<div>
<p class="font-medium text-sm leading-tight">标准版(推荐)</p>
<p class="text-xs text-gray-400">每节约 2000 字</p>
</div>
</label>
<label class="flex items-center gap-2.5 p-3 border-2 rounded-xl cursor-pointer transition"
:class="cfg.content_volume==='detailed' ? 'border-blue-500 bg-blue-50' : 'border-gray-200 hover:border-gray-300'">
<input type="radio" name="volume" value="detailed" x-model="cfg.content_volume" class="accent-blue-600">
<div>
<p class="font-medium text-sm leading-tight">详细版</p>
<p class="text-xs text-gray-400">每节约 3000 字</p>
</div>
</label>
<label class="flex items-center gap-2.5 p-3 border-2 rounded-xl cursor-pointer transition"
:class="cfg.content_volume==='full' ? 'border-blue-500 bg-blue-50' : 'border-gray-200 hover:border-gray-300'">
<input type="radio" name="volume" value="full" x-model="cfg.content_volume" class="accent-blue-600">
<div>
<p class="font-medium text-sm leading-tight">充实版</p>
<p class="text-xs text-gray-400">每节约 4000 字</p>
</div>
</label>
</div>
</div>
<!-- 并发生成设置 -->
<div class="mb-4 pt-3 border-t border-gray-100">
<label class="block text-sm font-medium text-gray-700 mb-2">
并发生成章节数
<span class="ml-1 text-xs text-gray-400 font-normal">(同时调用 AI 的线程数,越大越快但需注意 API 限流)</span>
</label>
<div class="flex items-center gap-3">
<input type="range" min="1" max="10" step="1" x-model.number="cfg.max_concurrent"
class="flex-1 accent-blue-600">
<span class="w-12 text-center text-sm font-bold text-blue-600 bg-blue-50 rounded-lg py-1"
x-text="cfg.max_concurrent + ' 路'"></span>
</div>
<div class="flex justify-between text-xs text-gray-400 mt-1 px-0.5">
<span>保守1路</span>
<span>推荐3-5路</span>
<span>激进10路</span>
</div>
</div>
<div class="flex gap-3 mt-2">
<button @click="showConfig=false" class="flex-1 px-4 py-2 border border-gray-200 text-gray-600 rounded-xl text-sm hover:bg-gray-50 transition">取消</button>
<button @click="saveConfig()" class="flex-1 px-4 py-2 bg-blue-600 hover:bg-blue-700 text-white rounded-xl text-sm font-medium transition">保存配置</button>
</div>
</div>
</div>
<script>
function app() {
return {
projects: [],
loading: true,
showCreate: false,
showConfig: false,
newProjectName: '',
creating: false,
cfg: {
model_provider: 'qwen',
target_pages: 0,
qwen_api_key: '', qwen_model: 'qwen3.6-plus', qwen_base_url: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
openai_api_key: '', openai_model: 'gpt-4.1', openai_base_url: 'https://api.openai.com/v1',
deepseek_api_key: '', deepseek_model: 'deepseek-chat', deepseek_base_url: 'https://api.deepseek.com/v1',
ollama_base_url: 'http://localhost:11434/v1', ollama_model: 'qwen3:8b',
doubao_api_key: '', doubao_model: 'doubao-1-5-pro-32k', doubao_base_url: 'https://ark.cn-beijing.volces.com/api/v3',
kimi_api_key: '', kimi_model: 'moonshot-v1-32k', kimi_base_url: 'https://api.moonshot.cn/v1',
max_concurrent: 5, content_volume: 'standard',
_qwen_adv: false, _ds_adv: false, _oai_adv: false, _doubao_adv: false, _kimi_adv: false,
},
qwenPresets: [
'qwen3.6-plus',
'qwen-max','qwen-max-latest','qwen-plus','qwen-plus-latest',
'qwen-turbo','qwen-turbo-latest','qwen-long',
'qwen3-235b-a22b','qwen3-32b','qwen3-30b-a3b','qwen3-14b','qwen3-8b',
],
deepseekPresets: ['deepseek-chat','deepseek-reasoner'],
openaiPresets: [
'gpt-4.1','gpt-4.1-mini','gpt-4.1-nano',
'o4-mini','o3','o3-mini','o1','o1-mini','o1-pro',
'gpt-4o','gpt-4o-mini','gpt-4-turbo',
],
doubaoPresets: [
'doubao-1-5-pro-32k', 'doubao-1-5-pro-128k', 'doubao-1-5-lite-32k',
'doubao-pro-32k', 'doubao-pro-128k', 'doubao-pro-256k',
'doubao-lite-32k', 'doubao-lite-128k',
],
kimiPresets: [
'moonshot-v1-8k', 'moonshot-v1-32k', 'moonshot-v1-128k', 'moonshot-v1-auto',
],
ollamaPresets: [
// 推荐
'qwen3:8b','qwen3:14b','qwen3:32b','deepseek-r1:14b','deepseek-r1:32b',
// Qwen3
'qwen3:0.6b','qwen3:1.7b','qwen3:4b','qwen3:30b-a3b','qwen3:235b-a22b',
// Qwen2.5
'qwen2.5:0.5b','qwen2.5:1.5b','qwen2.5:3b','qwen2.5:7b','qwen2.5:14b','qwen2.5:32b','qwen2.5:72b',
// Qwen2.5-Coder
'qwen2.5-coder:1.5b','qwen2.5-coder:3b','qwen2.5-coder:7b','qwen2.5-coder:14b','qwen2.5-coder:32b',
// QwQ
'qwq:32b',
// DeepSeek R1
'deepseek-r1:1.5b','deepseek-r1:7b','deepseek-r1:8b','deepseek-r1:70b','deepseek-r1:671b',
// DeepSeek V2
'deepseek-v2:16b','deepseek-v2:236b',
// DeepSeek V3
'deepseek-v3:7b','deepseek-v3:671b',
],
async init() {
await Promise.all([this.loadProjects(), this.loadConfig()])
this.loading = false
},
async loadProjects() {
const res = await fetch('/api/projects')
const data = await res.json()
this.projects = data.projects || []
},
async loadConfig() {
const res = await fetch('/api/config')
const data = await res.json()
this.cfg = { ...this.cfg, ...data }
},
async createProject() {
if (!this.newProjectName.trim()) return
this.creating = true
const res = await fetch('/api/projects', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ name: this.newProjectName.trim() })
})
const data = await res.json()
this.creating = false
this.showCreate = false
this.newProjectName = ''
if (data.id) {
window.location = '/project/' + data.id
}
},
async deleteProject(id, name) {
if (!confirm(`确定要删除项目"${name}"吗?此操作不可恢复。`)) return
await fetch('/api/projects/' + id, { method: 'DELETE' })
this.projects = this.projects.filter(p => p.id !== id)
},
async saveConfig() {
await fetch('/api/config', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(this.cfg)
})
this.showConfig = false
alert('配置已保存')
},
async testOllama() {
const baseUrl = (this.cfg.ollama_base_url || 'http://localhost:11434/v1').replace(/\/v1\/?$/, '')
try {
const res = await fetch(baseUrl + '/api/tags', { signal: AbortSignal.timeout(3000) })
if (res.ok) {
const data = await res.json()
const models = (data.models || []).map(m => m.name).join('、') || '(暂无已下载模型)'
alert('✅ Ollama 连接成功!\n已下载模型' + models)
} else {
alert('⚠️ Ollama 已启动,但返回状态异常:' + res.status)
}
} catch (e) {
alert('❌ 无法连接到 Ollama' + (this.cfg.ollama_base_url || 'http://localhost:11434/v1') + '\n\n请确认\n1. 已安装 Ollamahttps://ollama.com\n2. Ollama 服务正在运行\n3. 服务地址填写正确')
}
},
formatDate(dt) {
if (!dt) return ''
const d = new Date(dt)
return `${d.getFullYear()}-${String(d.getMonth()+1).padStart(2,'0')}-${String(d.getDate()).padStart(2,'0')}`
},
statusBadge(status) {
const map = {
'none': { text: '未上传', cls: 'bg-gray-100 text-gray-500' },
'uploaded': { text: '待解析', cls: 'bg-yellow-100 text-yellow-700' },
'parsing': { text: '解析中', cls: 'bg-blue-100 text-blue-700' },
'done': { text: '已解析', cls: 'bg-green-100 text-green-700' },
'error': { text: '解析失败', cls: 'bg-red-100 text-red-600' },
}
return map[status] || map['none']
}
}
}
</script>
<!-- 页脚版权声明 -->
<footer class="mt-auto py-4 px-6 border-t border-gray-200 bg-white text-center text-xs text-gray-500 space-y-1">
<p class="font-medium text-gray-600">© 标书老崔</p>
<p>本工具仅限学习交流免费使用,生成的技术方案请人工核对。本工具不会在任何平台售卖,请注意甄别。</p>
</footer>
</body>
</html>

2356
templates/project.html Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,13 @@
{
"overall": false,
"details": [
{"rule": "身份信息隐藏", "passed": true, "message": "未发现投标人身份信息"},
{"rule": "标题格式", "passed": false, "message": "部分标题字号/字体/颜色/下划线不符合要求"},
{"rule": "正文格式", "passed": false, "message": "部分正文段落格式不符合要求"},
{"rule": "目录要求", "passed": true, "message": "目录符合无页码、无页眉页脚要求"},
{"rule": "图表规范", "passed": false, "message": "正文中发现2个图表或附件内图表文字格式错误"},
{"rule": "颜色与装饰", "passed": true, "message": "无彩色文字、无下划线、无着重号"},
{"rule": "页面设置", "passed": false, "message": "页面边距或纸张方向不符合要求"}
],
"violations": []
}

View File

@ -0,0 +1,95 @@
"""附件类章节识别与单图/单表类型选择。"""
import unittest
from utils import attachment_section as att
class TestIsAttachment(unittest.TestCase):
def test_positive(self):
r = att.load_attachment_rules()
self.assertTrue(att.is_attachment_only_section('附件一:施工平面布置', r))
self.assertTrue(att.is_attachment_only_section('附图 组织机构', r))
self.assertTrue(att.is_attachment_only_section('附表 人员一览', r))
def test_negative(self):
r = att.load_attachment_rules()
self.assertFalse(att.is_attachment_only_section('施工组织设计', r))
self.assertFalse(att.is_attachment_only_section('', r))
class TestPickKind(unittest.TestCase):
def test_only_figure_switch(self):
r = att.DEFAULT_ATTACHMENT_RULES
self.assertEqual(
att.pick_single_figure_or_table('附件一xxx', True, False, r),
'figure',
)
def test_only_table_switch(self):
r = att.DEFAULT_ATTACHMENT_RULES
self.assertEqual(
att.pick_single_figure_or_table('附件一xxx', False, True, r),
'table',
)
def test_both_off(self):
self.assertIsNone(
att.pick_single_figure_or_table('附件一', False, False, None),
)
def test_table_hint(self):
r = att.DEFAULT_ATTACHMENT_RULES
k = att.pick_single_figure_or_table('附件三 工程量一览表', True, True, r)
self.assertEqual(k, 'table')
def test_figure_hint(self):
r = att.DEFAULT_ATTACHMENT_RULES
k = att.pick_single_figure_or_table('附图 施工平面示意图', True, True, r)
self.assertEqual(k, 'figure')
def test_default_ambiguous(self):
r = dict(att.DEFAULT_ATTACHMENT_RULES)
r['default_kind_when_ambiguous'] = 'table'
k = att.pick_single_figure_or_table('附件五 其他资料', True, True, r)
self.assertEqual(k, 'table')
class TestAttachmentBodyMode(unittest.TestCase):
def test_default_stack_charts_only(self):
r = att.DEFAULT_ATTACHMENT_RULES
self.assertEqual(att.attachment_leaf_body_mode(r), 'stack_charts_only')
self.assertTrue(att.use_attachment_stack_charts_body(r))
self.assertFalse(att.use_attachment_single_chart_only_body(r))
self.assertFalse(att.use_attachment_full_body(r))
def test_full_mode(self):
r = dict(att.DEFAULT_ATTACHMENT_RULES)
r['attachment_leaf_body_mode'] = 'full'
self.assertEqual(att.attachment_leaf_body_mode(r), 'full')
self.assertTrue(att.use_attachment_full_body(r))
self.assertFalse(att.use_attachment_stack_charts_body(r))
def test_single_chart_only(self):
r = dict(att.DEFAULT_ATTACHMENT_RULES)
r['attachment_leaf_body_mode'] = 'single_chart_only'
self.assertTrue(att.use_attachment_single_chart_only_body(r))
self.assertTrue(att.use_attachment_stack_charts_body(r))
class TestExpandOutlineSkip(unittest.TestCase):
def test_should_skip_attachment(self):
self.assertTrue(att.should_skip_expand_subchapters('附件一:平面图'))
self.assertTrue(att.should_skip_expand_subchapters('附图 示意'))
def test_should_skip_normal_chapter(self):
self.assertFalse(att.should_skip_expand_subchapters('施工组织设计'))
self.assertFalse(att.should_skip_expand_subchapters('质量管理体系与措施'))
def test_parse_attachment_label(self):
self.assertEqual(att.parse_attachment_label('附件一:平面图'), '')
self.assertEqual(att.parse_attachment_label('附件2 承诺书'), '2')
self.assertEqual(att.parse_attachment_label('附图 总平面'), '附图')
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,52 @@
"""工程量清单本地分析单元测试。"""
import unittest
from utils.bill_analysis import (
analyze_boq_pages,
filter_bill_pages,
parse_bill_text,
)
class TestParseBillText(unittest.TestCase):
def test_code_name_unit_qty(self):
text = '010101001001 挖土方 m3 100.5 土壤类别:三类土'
r = parse_bill_text(text)
self.assertIn('categories', r)
self.assertTrue(r['categories'])
cat = r['categories'][0]
self.assertEqual(cat['name'], '未分类')
self.assertEqual(len(cat['items']), 1)
it = cat['items'][0]
self.assertEqual(it['code'], '010101001001')
self.assertIn('挖土', it['name'])
self.assertEqual(it['unit'], 'm3')
self.assertEqual(it['quantity'], '100.5')
def test_hierarchical_line_prefix(self):
text = '1.1 010101001001 基础开挖 m3 50'
r = parse_bill_text(text)
it = r['categories'][0]['items'][0]
self.assertEqual(it['code'], '010101001001')
class TestFilterBillPages(unittest.TestCase):
def test_two_pages_gap_fill(self):
p0 = '目录 前言'
p1 = '分部分项工程量清单\n项目编码 项目名称 工程量\n010101001001 项 m3 1'
p2 = '续表无表头\n010101002001 土 m2 2'
p3 = '规费 税金 社会保险费 住房公积金 其他说明'
pages, meta = filter_bill_pages([p0, p1, p2, p3])
self.assertEqual(meta['total_pages'], 4)
self.assertGreaterEqual(len(pages), 2)
merged = '\n'.join(pages)
self.assertIn('010101001001', merged)
self.assertIn('010101002001', merged)
def test_analyze_scanned_empty(self):
r = analyze_boq_pages(['', ' ', ''])
self.assertTrue(r.get('scanned'))
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,63 @@
"""技术暗标 HTML 格式检查:结构校验与极简用例(标准库 unittest"""
import json
import os
import sys
import unittest
# 保证可 `python tests/test_*.py` 从项目根导入 `modules`
_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if _ROOT not in sys.path:
sys.path.insert(0, _ROOT)
from modules.dark_bid_format_check import check_technical_bid # noqa: E402
def _sample_schema_path():
return os.path.join(os.path.dirname(__file__), "fixtures", "dark_bid_report_sample.json")
class TestDarkBidFormatCheck(unittest.TestCase):
def test_sample_fixture_keys(self):
with open(_sample_schema_path(), encoding="utf-8") as f:
sample = json.load(f)
self.assertIn("overall", sample)
self.assertIn("details", sample)
self.assertIn("violations", sample)
for d in sample["details"]:
self.assertTrue({"rule", "passed", "message"}.issubset(d.keys()))
def test_check_returns_structure(self):
html = """<!DOCTYPE html><html><head><style>
@page { margin: 2.54cm 3.18cm 2.54cm 3.18cm; size: A4; }
</style></head><body style="margin:2.54cm 3.18cm">
<div class="toc">第一章 概述</div>
<h2 style="font-size:16pt;font-family:SimHei;font-weight:bold;color:#000">标题</h2>
<p style="font-size:14pt;font-family:SimSun;line-height:26pt;text-indent:2em;color:#000">
正文内容示例</p>
</body></html>"""
r = check_technical_bid(html)
self.assertIsInstance(r["overall"], bool)
self.assertEqual(len(r["details"]), 7)
rules = [x["rule"] for x in r["details"]]
self.assertIn("身份信息隐藏", rules)
self.assertIn("标题格式", rules)
def test_empty_html(self):
r = check_technical_bid("")
self.assertFalse(r["overall"])
def test_identity_fail_on_company(self):
html = (
"<html><body><p style='font-size:14pt;font-family:SimSun;"
"line-height:26pt;text-indent:2em;color:#000'>我公司参与投标</p>"
"<div class='toc'>x</div>"
"<style>@page{margin:2.54cm 3.18cm 2.54cm 3.18cm}</style>"
"</body></html>"
)
r = check_technical_bid(html)
id_rule = next(x for x in r["details"] if x["rule"] == "身份信息隐藏")
self.assertFalse(id_rule["passed"])
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,121 @@
"""图表意图栈与特征计分。"""
import unittest
from utils import diagram_intent as di
def _base_rules() -> dict:
return {
'schema_version': 1,
'threshold_figure': 1.0,
'threshold_table': 1.0,
'title_weight': 1.0,
'context_weight': 0.6,
'outline_context_lines': {'before': 2, 'after': 2},
'stack_order_when_both': 'score_desc',
'figure_keywords': [
{'text': '进度', 'weight': 1.2},
{'text': '横道', 'weight': 1.5},
],
'table_keywords': [
{'text': '一览表', 'weight': 1.5},
{'text': '人员', 'weight': 1.0},
],
}
class TestScoreFigureTable(unittest.TestCase):
def test_figure_higher_on_progress(self):
r = _base_rules()
f, t = di.score_figure_table('施工进度与横道计划', '', r)
self.assertGreater(f, t)
def test_table_higher_on_roster(self):
r = _base_rules()
f, t = di.score_figure_table('主要管理人员配置一览表', '', r)
self.assertGreater(t, f)
class TestBuildStack(unittest.TestCase):
def test_gate_figure_off(self):
r = _base_rules()
st = di.build_stack(5.0, 5.0, r, enable_figure=False, enable_table=True)
self.assertEqual(len(st), 1)
self.assertEqual(st[0].kind, 'table')
def test_score_desc_order(self):
r = dict(_base_rules())
r['stack_order_when_both'] = 'score_desc'
st = di.build_stack(3.0, 1.0, r, True, True)
self.assertEqual(len(st), 2)
self.assertEqual(st[0].kind, 'figure')
self.assertGreater(st[0].score, st[1].score)
def test_figure_first(self):
r = dict(_base_rules())
r['stack_order_when_both'] = 'figure_first'
st = di.build_stack(2.0, 5.0, r, True, True)
self.assertEqual(st[0].kind, 'figure')
self.assertEqual(st[1].kind, 'table')
def test_below_threshold_empty(self):
r = dict(_base_rules())
r['threshold_figure'] = 10.0
r['threshold_table'] = 10.0
st = di.build_stack(1.0, 1.0, r, True, True)
self.assertEqual(st, [])
class TestOutlineWindow(unittest.TestCase):
def test_finds_title_line(self):
outline = '一、总则\n二、进度\n 2.1 横道计划\n三、尾'
w = di.extract_outline_window(outline, '2.1 横道计划', 1, 1)
self.assertIn('横道', w)
def test_fallback_prefix(self):
w = di.extract_outline_window('abc' * 400, '不存在的标题', 2, 2)
self.assertTrue(len(w) > 0)
class TestAgentRender(unittest.TestCase):
def test_render_non_empty_when_match(self):
r = dict(_base_rules())
r['threshold_figure'] = 0.5
r['threshold_table'] = 0.5
agent = di.DiagramIntentAgent(r)
s = agent.render_for_section(
'施工进度横道图编制说明',
'大纲\n进度\n横道',
True,
True,
)
self.assertIn('图示生成规范', s)
self.assertIn('本节图表生成优先级', s)
def test_render_empty_when_scores_low(self):
r = dict(_base_rules())
r['threshold_figure'] = 100.0
r['threshold_table'] = 100.0
agent = di.DiagramIntentAgent(r)
s = agent.render_for_section('无关标题', '无关', True, True)
self.assertEqual(s, '')
class TestStackHelpers(unittest.TestCase):
def test_stack_compact_labels(self):
st = [
di.DiagramIntent('figure', 1.0, 't'),
di.DiagramIntent('table', 1.0, 't'),
]
lab = di.stack_compact_labels(st)
self.assertEqual(len(lab), 2)
self.assertIn('[FIGURE]', lab[0])
def test_make_fallback_stack(self):
st = di.make_fallback_stack('figure')
self.assertEqual(len(st), 1)
self.assertEqual(st[0].kind, 'figure')
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,44 @@
"""目录号格式化与大纲带号写回。"""
import os
import sys
import unittest
_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if _ROOT not in sys.path:
sys.path.insert(0, _ROOT)
from modules.generator import _parse_outline, _sections_to_outline_text # noqa: E402
from utils.outline_numbering import format_heading_display, int_to_chinese_numeral # noqa: E402
class TestOutlineNumbering(unittest.TestCase):
def test_int_to_chinese(self):
self.assertEqual(int_to_chinese_numeral(1), "")
self.assertEqual(int_to_chinese_numeral(10), "")
self.assertEqual(int_to_chinese_numeral(11), "十一")
self.assertEqual(int_to_chinese_numeral(23), "二十三")
def test_format_heading(self):
self.assertEqual(format_heading_display(1, "3", "总体"), "三、总体")
self.assertEqual(format_heading_display(2, "1.2", "子节"), "1.2 子节")
def test_sections_to_outline_text_has_numbers(self):
sections = [
{"level": 1, "title": "第一章", "number": "1"},
{"level": 2, "title": "小节", "number": "1.1"},
]
text = _sections_to_outline_text("某项目技术标书", sections)
self.assertIn("某项目技术标书", text)
self.assertIn("一、第一章", text)
self.assertIn("1.1 小节", text)
def test_parse_roundtrip_numbered_outline(self):
raw = "标书标题\n一、第一章\n1.1 节A\n"
_, sections, normalized = _parse_outline(raw)
self.assertGreaterEqual(len(sections), 2)
self.assertIn("一、第一章", normalized)
self.assertIn("1.1 节A", normalized)
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,24 @@
"""大纲解析1.1 类编号不得被误拆成一级 1 与 title '.1 标题'"""
import unittest
from modules.generator import _parse_outline
class TestParseOutline(unittest.TestCase):
def test_11_stays_single_section(self):
text = "某某项目标书标题\n1.1 沟槽开挖与支护\n1.2 排降水\n"
_, sections, _ = _parse_outline(text)
self.assertEqual(len(sections), 2, [s.get('number') for s in sections])
for s in sections:
if s.get('level') == 1:
self.assertFalse(
(s.get('title') or '').lstrip().startswith('.'),
'不得出现一级章节 title 以 .1 开头(误将 1.1 拆成 1 与 .1 标题)',
)
titles = ' '.join(s['title'] for s in sections)
self.assertIn('沟槽', titles)
self.assertIn('排降', titles)
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,86 @@
"""目标页数与一级篇章区间。"""
import random
import unittest
from utils import volume_chapters as vc
class TestVolumeChapters(unittest.TestCase):
def test_top_level_default_pages_zero(self):
lo, hi = vc.top_level_chapter_range_from_pages(0)
self.assertEqual((lo, hi), (8, 10))
def test_ranges_match_effective_volume_bands(self):
self.assertEqual(vc.top_level_chapter_range_from_pages(100), (6, 8))
self.assertEqual(vc.top_level_chapter_range_from_pages(125), (6, 8))
self.assertEqual(vc.top_level_chapter_range_from_pages(150), (8, 10))
self.assertEqual(vc.top_level_chapter_range_from_pages(200), (10, 12))
self.assertEqual(vc.top_level_chapter_range_from_pages(300), (12, 16))
def test_hint_default_no_pages(self):
h = vc.outline_chapter_count_hint(0, 'standard')
self.assertIn('8-10', h)
self.assertIn('不超过10', h)
def test_hint_with_pages(self):
h = vc.outline_chapter_count_hint(150, 'standard', 700)
self.assertIn('约 810', h)
self.assertIn('150', h)
self.assertIn('105000', h) # 150×700 总字目标
self.assertIn('过细', h)
def test_subchapter_base_anchor_points(self):
self.assertAlmostEqual(vc.subchapter_total_base_from_pages(100), 78.0, places=5)
self.assertAlmostEqual(vc.subchapter_total_base_from_pages(300), 212.0, places=5)
self.assertEqual(vc.SUBCHAPTER_PAGES_SLOPE, 0.67)
self.assertEqual(vc.SUBCHAPTER_PAGES_INTERCEPT, 11.0)
def test_subchapter_jitter_bounds_78_anchor(self):
"""100 页基线 78 章±10% 严格为 [70, 86]。"""
self.assertEqual(vc.subchapter_jitter_bounds(78.0), (70, 86))
def test_subchapter_jitter_bounds_300_pages(self):
self.assertEqual(vc.subchapter_jitter_bounds(212.0), (191, 233))
def test_allocate_subchapters_to_mains(self):
self.assertEqual(vc.allocate_subchapters_to_mains(10, 3), [4, 3, 3])
self.assertEqual(vc.allocate_subchapters_to_mains(0, 3), [0, 0, 0])
self.assertEqual(vc.allocate_subchapters_to_mains(5, 2), [3, 2])
self.assertEqual(vc.allocate_subchapters_to_mains(7, 0), [])
# n < k 时多出的主章 quota 为 0
a = vc.allocate_subchapters_to_mains(70, 100)
self.assertEqual(len(a), 100)
self.assertEqual(sum(a), 70)
self.assertEqual(a.count(1), 70)
self.assertEqual(a.count(0), 30)
def test_subchapter_effective_respects_k_floor_and_jitter(self):
# round(78 * u) for u in [0.9, 1.1] stays in [70, 86] for 78.0 base
for seed in range(800):
n = vc.subchapter_total_effective(100, 1, random.Random(seed))
self.assertGreaterEqual(n, 70)
self.assertLessEqual(n, 86)
# 主章数很大时,总条数仍须在 [70, 86](不得被 max(n,k) 抬到数百)
for seed in range(20):
nk = vc.subchapter_total_effective(100, 500, random.Random(seed))
self.assertGreaterEqual(nk, 70, msg=f'seed={seed}')
self.assertLessEqual(nk, 86, msg=f'seed={seed}')
def test_subchapter_effective_zero_pages(self):
self.assertEqual(vc.subchapter_total_effective(0, 5), 0)
self.assertEqual(vc.subchapter_total_effective(100, 0), 0)
def test_resolve_expand_target_pages(self):
self.assertEqual(vc.resolve_expand_target_pages(None, True, 100, 200), 0)
self.assertEqual(vc.resolve_expand_target_pages(200, False, 100, 50), 200)
self.assertEqual(vc.resolve_expand_target_pages(0, False, 80, 0), 80)
self.assertEqual(vc.resolve_expand_target_pages(0, False, 0, 50), 50)
self.assertEqual(
vc.resolve_expand_target_pages(0, False, 0, 0),
vc.EXPAND_OUTLINE_DEFAULT_TARGET_PAGES,
)
self.assertEqual(vc.EXPAND_OUTLINE_DEFAULT_TARGET_PAGES, 100)
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,170 @@
"""字数分配与 rating_json 解析单元测试。"""
import json
import unittest
import config as cfg
from utils import word_allocation as wa
class TestParseRatingJson(unittest.TestCase):
def test_canonical_items(self):
raw = json.dumps(
{
'items': [
{'id': 'T1', 'name': '施工方案', 'weight': 30, 'keywords': ['工艺']},
{'id': 'T2', 'name': '质量保证', 'weight': 10, 'keywords': []},
],
'notes': '',
},
ensure_ascii=False,
)
items = wa.parse_rating_json(raw)
self.assertEqual(len(items), 2)
names = {x['name'] for x in items}
self.assertIn('施工方案', names)
self.assertIn('质量保证', names)
wmap = {x['name']: x['weight'] for x in items}
self.assertEqual(wmap['施工方案'], 30.0)
def test_malformed_returns_empty(self):
self.assertEqual(wa.parse_rating_json('not json'), [])
self.assertEqual(wa.parse_rating_json(''), [])
class TestComputeLeafAllocations(unittest.TestCase):
def test_none_when_no_rating_and_not_target_pages_budget(self):
leaves = [{'id': 1, 'section_title': '一、总体方案'}]
rules = dict(wa.DEFAULT_RULES)
rules['budget_mode'] = 'anchor_mean'
self.assertIsNone(
wa.compute_leaf_allocations('standard', leaves, '', rules)
)
def test_uniform_when_no_rating_but_target_pages(self):
"""无技术评分时仍按目标页均分 B=页×每页字,全稿不随节数 N 线性爆量。"""
leaves = [
{'id': 1, 'section_title': 'A'},
{'id': 2, 'section_title': 'B'},
]
rules = dict(wa.DEFAULT_RULES)
rules['budget_mode'] = 'target_pages'
old_tp = cfg.TARGET_PAGES
old_pce = cfg.PAGE_CHAR_ESTIMATE
try:
cfg.TARGET_PAGES = 100
cfg.PAGE_CHAR_ESTIMATE = 700
out = wa.compute_leaf_allocations('standard', leaves, '', rules)
finally:
cfg.TARGET_PAGES = old_tp
cfg.PAGE_CHAR_ESTIMATE = old_pce
self.assertIsNotNone(out)
s = out[1]['target_chars'] + out[2]['target_chars']
self.assertEqual(s, 100 * 700)
self.assertEqual(out[1]['target_chars'], out[2]['target_chars'])
def test_monotonicity_high_weight_match(self):
rating = json.dumps(
{
'items': [
{'name': '施工组织设计', 'weight': 50, 'keywords': ['进度']},
{'name': '页眉页脚规范', 'weight': 2, 'keywords': []},
]
},
ensure_ascii=False,
)
leaves = [
{'id': 10, 'section_title': '3.1 施工组织设计与进度计划'},
{'id': 11, 'section_title': '9.9 页眉格式说明'},
]
rules = dict(wa.DEFAULT_RULES)
rules['alpha'] = 0.95
out = wa.compute_leaf_allocations('standard', leaves, rating, rules)
self.assertIsNotNone(out)
t_high = out[10]['target_chars']
t_low = out[11]['target_chars']
self.assertGreaterEqual(t_high, t_low, '强匹配高分项的章节应不低于弱匹配章节')
self.assertIn('施工组织设计', out[10]['word_count_spec'])
def test_budget_anchor_mean(self):
rating = json.dumps(
{'items': [{'name': '技术部分', 'weight': 100}]},
ensure_ascii=False,
)
leaves = [
{'id': 1, 'section_title': 'A'},
{'id': 2, 'section_title': 'B'},
{'id': 3, 'section_title': 'C'},
]
rules = dict(wa.DEFAULT_RULES)
rules['budget_mode'] = 'anchor_mean'
rules['alpha'] = 0.0
old_tp = getattr(cfg, 'TARGET_PAGES', 0)
setattr(cfg, 'TARGET_PAGES', 0)
try:
out = wa.compute_leaf_allocations('standard', leaves, rating, rules)
finally:
setattr(cfg, 'TARGET_PAGES', old_tp)
self.assertIsNotNone(out)
base, core, _, _ = wa.VOLUME_PRESETS['standard']
expect = int(round(len(leaves) * (base + core) / 2.0))
s = sum(out[i]['target_chars'] for i in (1, 2, 3))
self.assertEqual(s, expect)
def test_budget_target_pages(self):
rating = json.dumps(
{'items': [{'name': '技术部分', 'weight': 100}]},
ensure_ascii=False,
)
leaves = [
{'id': 1, 'section_title': 'A'},
{'id': 2, 'section_title': 'B'},
]
rules = dict(wa.DEFAULT_RULES)
rules['budget_mode'] = 'target_pages'
rules['alpha'] = 0.0
old_tp = cfg.TARGET_PAGES
old_pce = cfg.PAGE_CHAR_ESTIMATE
try:
cfg.TARGET_PAGES = 100
cfg.PAGE_CHAR_ESTIMATE = 700
out = wa.compute_leaf_allocations('standard', leaves, rating, rules)
finally:
cfg.TARGET_PAGES = old_tp
cfg.PAGE_CHAR_ESTIMATE = old_pce
self.assertIsNotNone(out)
expect = 100 * 700
s = sum(out[i]['target_chars'] for i in (1, 2))
self.assertEqual(s, expect)
def test_budget_target_pages_falls_back_when_pages_zero(self):
rating = json.dumps(
{'items': [{'name': '技术部分', 'weight': 100}]},
ensure_ascii=False,
)
leaves = [
{'id': 1, 'section_title': 'A'},
{'id': 2, 'section_title': 'B'},
]
rules = dict(wa.DEFAULT_RULES)
rules['budget_mode'] = 'target_pages'
rules['alpha'] = 0.0
old_tp = cfg.TARGET_PAGES
try:
cfg.TARGET_PAGES = 0
out = wa.compute_leaf_allocations('standard', leaves, rating, rules)
finally:
cfg.TARGET_PAGES = old_tp
self.assertIsNotNone(out)
base, core, _, _ = wa.VOLUME_PRESETS['standard']
expect = int(round(len(leaves) * (base + core) / 2.0))
s = sum(out[i]['target_chars'] for i in (1, 2))
self.assertEqual(s, expect)
def test_continuation_threshold(self):
self.assertEqual(wa.continuation_threshold(2000), 1300)
self.assertEqual(wa.continuation_threshold(100), 200)
if __name__ == '__main__':
unittest.main()

1
utils/__init__.py Normal file
View File

@ -0,0 +1 @@

252
utils/ai_client.py Normal file
View File

@ -0,0 +1,252 @@
"""
AI API 调用封装支持 OpenAI阿里云通义千问DeepSeekOllama均兼容 OpenAI SDK
"""
import re
import time
import logging
from openai import OpenAI
import config
logger = logging.getLogger(__name__)
PROVIDER_NAMES = {
'qwen': '通义千问 (Qwen)',
'deepseek': 'DeepSeek',
'openai': 'OpenAI',
'ollama': 'Ollama 本地',
'doubao': '豆包 (Doubao)',
'kimi': 'Kimi (Moonshot)',
}
PROVIDER_LINKS = {
'qwen': 'https://dashscope.aliyun.com/',
'deepseek': 'https://platform.deepseek.com/',
'openai': 'https://platform.openai.com/',
'ollama': 'https://ollama.com/',
'doubao': 'https://console.volcengine.com/ark/',
'kimi': 'https://platform.moonshot.cn/',
}
def _check_api_key():
"""调用前预检 API Key无效时直接抛出友好提示不做无意义的重试"""
provider = config.MODEL_PROVIDER
# Ollama 本地无需 API Key跳过检查
if provider == 'ollama':
return
name = PROVIDER_NAMES.get(provider, provider)
link = PROVIDER_LINKS.get(provider, '')
if provider == 'qwen':
key = config.QWEN_API_KEY
elif provider == 'deepseek':
key = config.DEEPSEEK_API_KEY
elif provider == 'doubao':
key = config.DOUBAO_API_KEY
elif provider == 'kimi':
key = config.KIMI_API_KEY
else:
key = config.OPENAI_API_KEY
if not key or key.startswith('sk-your'):
raise RuntimeError(
f'尚未配置 {name} 的 API Key。'
f'请点击右上角设置按钮,选择"{name}"并填入有效的 API Key。'
f'申请地址:{link}'
)
def _get_client() -> OpenAI:
"""根据 MODEL_PROVIDER 返回对应的 OpenAI 兼容客户端"""
if config.MODEL_PROVIDER == 'qwen':
return OpenAI(api_key=config.QWEN_API_KEY, base_url=config.QWEN_BASE_URL)
if config.MODEL_PROVIDER == 'deepseek':
return OpenAI(api_key=config.DEEPSEEK_API_KEY, base_url=config.DEEPSEEK_BASE_URL)
if config.MODEL_PROVIDER == 'ollama':
return OpenAI(api_key='ollama', base_url=config.OLLAMA_BASE_URL)
if config.MODEL_PROVIDER == 'doubao':
return OpenAI(api_key=config.DOUBAO_API_KEY, base_url=config.DOUBAO_BASE_URL)
if config.MODEL_PROVIDER == 'kimi':
return OpenAI(api_key=config.KIMI_API_KEY, base_url=config.KIMI_BASE_URL)
return OpenAI(api_key=config.OPENAI_API_KEY, base_url=config.OPENAI_BASE_URL)
def _get_model() -> str:
if config.MODEL_PROVIDER == 'qwen':
return config.QWEN_MODEL
if config.MODEL_PROVIDER == 'deepseek':
return config.DEEPSEEK_MODEL
if config.MODEL_PROVIDER == 'ollama':
return config.OLLAMA_MODEL
if config.MODEL_PROVIDER == 'doubao':
return config.DOUBAO_MODEL
if config.MODEL_PROVIDER == 'kimi':
return config.KIMI_MODEL
return config.OPENAI_MODEL
def _clean_response(text: str) -> str:
"""
过滤推理模型DeepSeek R1 / QwQ 输出的 <think>...</think> 思考过程标签
只保留最终正文内容避免思考链污染标书正文
"""
# 去除 <think>...</think> 块(含跨行内容)
text = re.sub(r'<think>[\s\S]*?</think>', '', text, flags=re.IGNORECASE)
return text.strip()
def _is_auth_error(e: Exception) -> bool:
"""判断是否为认证错误401 / invalid_api_key无需重试"""
# 优先用 openai 原生异常类型判断
try:
from openai import AuthenticationError, PermissionDeniedError
if isinstance(e, (AuthenticationError, PermissionDeniedError)):
return True
except ImportError:
pass
# 兜底:字符串匹配
err_str = str(e).lower()
return ('401' in err_str or 'invalid_api_key' in err_str
or 'incorrect api key' in err_str or 'authentication' in err_str)
# OpenAI o 系列推理模型:不支持 temperaturemax_tokens 需用 max_completion_tokens
_OPENAI_REASONING_MODELS = {'o1', 'o1-mini', 'o1-pro', 'o3', 'o3-mini', 'o3-pro', 'o4-mini'}
def _build_chat_kwargs(
model: str,
messages: list,
temperature: float,
max_tokens: int,
request_timeout: float | None = None,
) -> dict:
"""
根据模型类型构建 chat.completions.create 的参数字典
OpenAI o 系列推理模型不接受 temperature且使用 max_completion_tokens 替代 max_tokens
"""
base_model = model.split(':')[0] # 去掉 ollama tag 后缀
is_reasoning = base_model in _OPENAI_REASONING_MODELS
to = request_timeout if request_timeout is not None else config.REQUEST_TIMEOUT
kwargs = {
'model': model,
'messages': messages,
'timeout': to,
}
if is_reasoning:
kwargs['max_completion_tokens'] = max_tokens
else:
kwargs['temperature'] = temperature
kwargs['max_tokens'] = max_tokens
return kwargs
def chat(
prompt: str,
system: str = '你是一位专业的投标文件撰写专家。',
temperature: float = 0.7,
max_tokens: int = 8192,
retries: int = None,
request_timeout: float | None = None,
) -> str:
"""
调用 AI 接口返回文本响应
认证错误立即终止其他错误指数退避重试
自动兼容 OpenAI o 系列推理模型的参数差异
"""
_check_api_key()
max_retries = retries if retries is not None else config.MAX_RETRIES
client = _get_client()
model = _get_model()
provider = config.MODEL_PROVIDER
name = PROVIDER_NAMES.get(provider, provider)
messages = [
{'role': 'system', 'content': system},
{'role': 'user', 'content': prompt},
]
for attempt in range(max_retries):
try:
kwargs = _build_chat_kwargs(
model, messages, temperature, max_tokens, request_timeout=request_timeout
)
resp = client.chat.completions.create(**kwargs)
return _clean_response(resp.choices[0].message.content.strip())
except Exception as e:
if _is_auth_error(e):
raise RuntimeError(
f'{name} API Key 无效或已过期,请在设置中重新配置。'
f'申请地址:{PROVIDER_LINKS.get(provider, "")}'
) from e
wait = 2 ** attempt
logger.warning(f'AI 请求失败 (第{attempt+1}次){wait}s 后重试: {e}')
if attempt < max_retries - 1:
time.sleep(wait)
else:
raise RuntimeError(f'AI 接口调用失败(已重试 {max_retries} 次): {e}') from e
return ''
def chat_with_history(system: str, messages: list,
temperature: float = 0.7, max_tokens: int = 4096) -> str:
"""
多轮对话接口支持完整历史上下文用于对话式章节生成
messages 格式[{'role': 'user'|'assistant', 'content': str}, ...]
"""
_check_api_key()
client = _get_client()
model = _get_model()
provider = config.MODEL_PROVIDER
name = PROVIDER_NAMES.get(provider, provider)
full_messages = [{'role': 'system', 'content': system}] + messages
for attempt in range(config.MAX_RETRIES):
try:
kwargs = _build_chat_kwargs(model, full_messages, temperature, max_tokens)
resp = client.chat.completions.create(**kwargs)
return _clean_response(resp.choices[0].message.content.strip())
except Exception as e:
if _is_auth_error(e):
raise RuntimeError(
f'{name} API Key 无效或已过期,请在设置中重新配置。'
f'申请地址:{PROVIDER_LINKS.get(provider, "")}'
) from e
wait = 2 ** attempt
logger.warning(f'对话 AI 请求失败 (第{attempt+1}次){wait}s 后重试: {e}')
if attempt < config.MAX_RETRIES - 1:
time.sleep(wait)
else:
raise RuntimeError(f'AI 接口调用失败(已重试 {config.MAX_RETRIES} 次): {e}') from e
return ''
def get_embeddings(texts: list[str]) -> list[list[float]]:
"""获取文本嵌入向量。
支持 QwenOpenAIKimiDeepSeek / Ollama / 豆包 暂不提供 Embedding API
"""
provider = config.MODEL_PROVIDER
if provider in ('deepseek', 'ollama', 'doubao'):
raise NotImplementedError(
f'{PROVIDER_NAMES.get(provider)} 暂不支持 Embedding API知识库将使用关键词检索降级'
)
client = _get_client()
if provider == 'qwen':
model = config.QWEN_EMBEDDING_MODEL
elif provider == 'kimi':
model = config.KIMI_EMBEDDING_MODEL
else:
model = config.OPENAI_EMBEDDING_MODEL
resp = client.embeddings.create(model=model, input=texts)
return [item.embedding for item in resp.data]

186
utils/attachment_section.py Normal file
View File

@ -0,0 +1,186 @@
"""
附件类章节识别标题匹配expand_outline 跳过以及正文模式完整正文 vs 仅单图单表
"""
from __future__ import annotations
import json
import logging
import os
import re
from typing import Any, Dict, List, Optional
import config
logger = logging.getLogger(__name__)
DEFAULT_ATTACHMENT_RULES: Dict[str, Any] = {
'schema_version': 1,
'title_regex': [
r'附件\s*[一二三四五六七八九十0-9A-Za-z、:.]',
r'\s*图',
r'\s*表',
r'\s*件\s*\(',
r'^\s*[\d一二三四五六七八九十\..、]+\s*附件',
],
'table_hint_keywords': [
'附表', '一览表', '清单表', '统计表', '明细表',
],
'figure_hint_keywords': [
'附图', '示意图', '平面图', '流程图', '布置图', '组织图', '横道',
],
'default_kind_when_ambiguous': 'table',
# stack_charts_only默认意图栈只输出 [FIGURE]/[TABLE] 无正文full长文single_chart_only栈顶仅一块
'attachment_leaf_body_mode': 'stack_charts_only',
}
def attachment_rules_path() -> str:
return os.path.join(config.DATA_DIR, 'attachment_section_rules.json')
def load_attachment_rules(path: Optional[str] = None) -> Dict[str, Any]:
p = path or attachment_rules_path()
data = dict(DEFAULT_ATTACHMENT_RULES)
if not os.path.isfile(p):
return data
try:
with open(p, encoding='utf-8') as f:
raw = json.load(f)
if isinstance(raw, dict):
for k, v in raw.items():
if k.startswith('_'):
continue
data[k] = v
except Exception as e:
logger.warning('加载 attachment_section_rules.json 失败,使用内置默认: %s', e)
return data
def attachment_leaf_body_mode(rules: Optional[Dict[str, Any]] = None) -> str:
"""
附件叶节点正文策略
stack_charts_only默认full完整技术正文single_chart_only栈顶仅一块图或表
"""
r = rules or get_attachment_rules_cached()
mode = (r.get('attachment_leaf_body_mode') or 'stack_charts_only').strip().lower()
if mode in ('single_chart_only', 'stack_charts_only', 'full'):
return mode
return 'stack_charts_only'
def use_attachment_stack_charts_body(rules: Optional[Dict[str, Any]] = None) -> bool:
"""附件走「意图栈仅图/表、无长文」路径(含 single_chart_only 的单栈顶版本)。"""
m = attachment_leaf_body_mode(rules)
return m in ('stack_charts_only', 'single_chart_only')
def use_attachment_single_chart_only_body(rules: Optional[Dict[str, Any]] = None) -> bool:
return attachment_leaf_body_mode(rules) == 'single_chart_only'
def use_attachment_full_body(rules: Optional[Dict[str, Any]] = None) -> bool:
return attachment_leaf_body_mode(rules) == 'full'
def is_attachment_only_section(section_title: str, rules: Optional[Dict[str, Any]] = None) -> bool:
"""标题是否属于附件类(附图/附表/附件N 等),用于 expand_outline 跳过与正文分支。"""
t = (section_title or '').strip()
if not t:
return False
r = rules or load_attachment_rules()
patterns: List[str] = list(r.get('title_regex') or [])
for pat in patterns:
try:
if re.search(pat, t):
return True
except re.error:
logger.warning('无效 attachment title_regex已跳过: %s', pat[:80])
return False
def pick_single_figure_or_table(
section_title: str,
enable_figure: bool,
enable_table: bool,
rules: Optional[Dict[str, Any]] = None,
) -> Optional[str]:
"""
返回 'figure' | 'table' | None
两开关均关返回 None仅开一个则取对应类型
"""
if not enable_figure and not enable_table:
return None
if enable_figure and not enable_table:
return 'figure'
if enable_table and not enable_figure:
return 'table'
r = rules or load_attachment_rules()
t = (section_title or '')
tbl_kw = list(r.get('table_hint_keywords') or [])
fig_kw = list(r.get('figure_hint_keywords') or [])
# 单独「表」字易误判,仅当同时存在附表类或与其它词组合时再偏表
for kw in tbl_kw:
if kw and kw in t:
return 'table'
for kw in fig_kw:
if kw and kw in t:
return 'figure'
# 泛「表」在附件语境下常见
if '' in t and '' not in t:
return 'table'
if '' in t and '' not in t:
return 'figure'
default = (r.get('default_kind_when_ambiguous') or 'table').strip().lower()
if default == 'figure':
return 'figure'
return 'table'
_cached_rules: Optional[Dict[str, Any]] = None
def get_attachment_rules_cached() -> Dict[str, Any]:
global _cached_rules
if _cached_rules is None:
_cached_rules = load_attachment_rules()
return _cached_rules
def should_skip_expand_subchapters(title: str) -> bool:
"""
AI 自动填充小章节expand_outline附件类一级主章不调用子章节生成
判定与 is_attachment_only_section 一致
"""
return is_attachment_only_section(title, get_attachment_rules_cached())
# 从标题中提取「附件几」等标签,用于日志
_ATTACHMENT_LABEL_RE = re.compile(
r'附件\s*[:]?\s*([一二三四五六七八九十百0-9A-Za-z]+)',
)
_ATTACHMENT_FIG_TBL_RE = re.compile(r'\s*[图表]\s*([一二三四五六七八九十百0-9]*)')
def parse_attachment_label(title: str) -> Optional[str]:
t = (title or '').strip()
if not t:
return None
m = _ATTACHMENT_LABEL_RE.search(t)
if m:
return m.group(1).strip() or None
m2 = _ATTACHMENT_FIG_TBL_RE.search(t)
if m2:
rest = (m2.group(1) or '').strip()
if rest:
return rest
matched = m2.group(0)
if '' in matched:
return '附图'
return '附表'
if re.search(r'\s*图', t):
return '附图'
if re.search(r'\s*表', t):
return '附表'
return None

577
utils/bill_analysis.py Normal file
View File

@ -0,0 +1,577 @@
"""
工程量清单本地分析 bill-worker.js Phase 2/3 移植
Phase 2按页关键字筛选清单页Phase 3正则解析分部与清单项
"""
from __future__ import annotations
import logging
import re
from typing import Any
logger = logging.getLogger(__name__)
BILL_KW = ['项目编码', '项目名称', '工程量', '计量单位', '综合单价', '清单编码']
SEC_KW = ['分部分项', '分类分项', '措施项目', '其他项目', '工程量清单计价']
FEE_PAGE_KW = [
'规费', '税金', '社会保险费', '住房公积金', '养老保险',
'工伤保险', '失业保险', '医疗保险', '教育费附加', '城市维护建设税',
]
ITEM_START = re.compile(r'^\d+(\.\d+)+\s')
CODE_INLINE = re.compile(r'(?:^|\s)(\d{9,12}|(?<![A-Za-z])B\d{5,6})\s')
CODE_START_RE = re.compile(r'^(\d{9,12}|B\d{5,6})\s')
SEQ_CODE_RE = re.compile(r'^\d{1,4}\s+(\d{9,12}|(?<![A-Za-z])B\d{5,6})\s')
PAGE_MARK = re.compile(r'^--\s*\d+\s+of\s+\d+\s*--')
HEADER_RE = re.compile(r'^序号\s+(项目编码|项目名称)')
HEADER_KW = re.compile(
r'^(项目编码|项目名称|清单编码|计量单位|综合单价|工程量|合\s*价|金额|序号)\s'
)
CATEGORY_MARKERS = [
'', '', '', '', '', '', '', '', '', '',
'(一)', '(二)', '(三)', '(四)', '(五)',
]
# 编码:行内 912 位数字或 B 编码(排除字母前缀如 GB
CODE_RE = re.compile(r'(?<![A-Za-z])(\d{9,12}|(?<![A-Za-z])B\d{5,6})')
UNIT_TOKENS = [
'', '', 'm3', 'm2', 'km', 'hm2', '', '', 't', 'kg',
'', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '延m', '', '', 'm',
]
UNIT_SET = frozenset(UNIT_TOKENS)
_unit_escaped = [re.escape(u) for u in UNIT_TOKENS]
UNIT_RE = re.compile(r'(?:^|\s)(' + '|'.join(_unit_escaped) + r')(?=\s|\d|$)')
SKIP_RE = re.compile(r'\s*计|小\s*计|本页小计|总\s*计|价税合计')
_DASH_CODE = re.compile(
r'(\d{2,4})[-](\d{2,4})[-](\d{2,4})(?:[-](\d{2,4}))?'
)
_EXACT_FEE_ITEM = frozenset([
'规费', '税金', '利润', '增值税', '暂列金额', '暂估价', '计日工',
'总承包服务费', '企业管理费', '甲供材料保管费', '价税合计',
])
_FEE_KW = [
'安全文明', '文明施工费', '环境保护费', '临时设施费',
'夜间施工增加费', '夜间施工费',
'冬雨季施工增加费', '冬雨季施工费',
'二次搬运费', '大型机械设备进出场', '大型机械进出场',
'施工排水降水', '排水降水费',
'已完工程及设备保护', '已完工程保护费',
'工程排污费', '社会保障费', '住房公积金',
'工伤保险', '劳动保险', '意外伤害保险', '建筑工程保险',
'城市维护建设税', '城市建设维护税',
'教育费附加', '地方教育附加',
'材料暂估', '专业工程暂估',
'超高施工增加费', '安全防护费',
'措施项目费', '其他项目费', '不可竞争费',
]
_CAT_KW = [
'土建', '建筑', '结构', '装饰', '装修', '安装', '给排水', '暖通', '空调', '通风',
'电气', '强电', '弱电', '消防', '智能化', '幕墙', '门窗', '园林', '绿化', '景观',
'市政', '道路', '桥梁', '管网', '基础', '地基', '桩基', '主体', '屋面', '防水',
'保温', '钢结构', '排水', '给水', '照明', '动力', '防雷', '电梯', '人防', '室外',
'附属', '分部', '工程', '措施', '清单', '土石方', '混凝土', '砌筑', '模板', '脚手架',
'水利', '河道', '管道', '阀门', '设备', '仪表', '自动化', '通信', '网络',
'拆除', '外墙', '内墙', '楼地面', '天棚', '吊顶', '栏杆', '屋顶', '涂料', '抹灰',
'廊道', '阀门井', '蓄水池', '泵站', '供水', '引水', '水源', '渠道', '闸门',
'围栏', '警示', '检修', '管线', '配电', '水池', '水塔', '取水', '净水',
]
_EXACT_FEE_CAT = frozenset([
'规费', '税金', '利润', '增值税', '暂列金额', '暂估价', '计日工',
'总承包服务费', '企业管理费', '价税合计',
'措施项目费', '其他项目费', '不可竞争费',
])
_FEE_CAT_KW = [
'措施项目费', '其他项目费', '不可竞争费',
'规费汇总', '税金汇总', '费率', '费用汇总', '费用合计',
'暂列金额', '暂估价', '计日工', '总承包服务费',
'安全文明施工费', '社会保障费', '住房公积金',
'工伤保险', '教育费附加', '城市维护建设税',
]
_SPEC_KW_RE = re.compile(
r'(材质|规格|型号|品牌|颜色|尺寸|厚度|直径|管径|强度|等级|类别|类型|做法|要求|标准|内容|工作内容|土壤|含量|配合比|工艺|方式|形式|范围|部位|位置|高度|宽度|长度|深度|坡度|截面|跨度|运距|开挖|回填|混凝土|钢筋|压实)[:]'
)
def _fold_dash_codes(line: str) -> str:
def repl(m: re.Match) -> str:
a, b, c, d = m.group(1), m.group(2), m.group(3), m.group(4) or ''
combined = a + b + c + d
if 9 <= len(combined) <= 12:
return combined
return m.group(0)
return _DASH_CODE.sub(repl, line)
def is_fee_item(name: str) -> bool:
if not name:
return False
n = re.sub(r'\s+', '', name)
if n in _EXACT_FEE_ITEM:
return True
for kw in _FEE_KW:
if kw in n:
return True
return False
def split_name_and_spec(raw_name: str) -> tuple[str, str]:
if not raw_name:
return '', ''
m = re.search(r'\d+[.、.)\uFF09]\s*[\u4e00-\u9fff]', raw_name)
if m and m.start() > 0:
return raw_name[:m.start()].strip(), raw_name[m.start():].strip()
kw = _SPEC_KW_RE.search(raw_name)
if kw and kw.start() > 0:
return raw_name[:kw.start()].strip(), raw_name[kw.start():].strip()
paren = re.search(r'[(]\d+[)]', raw_name)
if paren and paren.start() > 0:
return raw_name[:paren.start()].strip(), raw_name[paren.start():].strip()
return raw_name, ''
def is_cat_title(text: str) -> bool:
return any(k in text for k in _CAT_KW)
def is_fee_cat_title(text: str) -> bool:
if not text:
return False
t = re.sub(r'\s+', '', text)
if t in _EXACT_FEE_CAT:
return True
for kw in _FEE_CAT_KW:
if kw in t:
return True
return False
def _is_new_line_trigger(raw: str) -> bool:
if ITEM_START.match(raw):
return True
if CODE_START_RE.match(raw):
return True
if SEQ_CODE_RE.match(raw):
return True
for m in CATEGORY_MARKERS:
if raw.startswith(m + ' ') or raw.startswith(m + '\u3000'):
return True
return False
def parse_bill_text(text: str) -> dict[str, Any]:
raw_lines = []
for l in text.split('\n'):
line = l.replace('\t', ' ').strip()
line = _fold_dash_codes(line)
raw_lines.append(line)
logic_lines: list[str] = []
current_line = ''
for raw in raw_lines:
if not raw or PAGE_MARK.match(raw):
continue
if HEADER_RE.match(raw) or HEADER_KW.match(raw):
continue
if re.match(r'^(元)|^款章节号|^备注$|^第\d+页', raw):
continue
if _is_new_line_trigger(raw):
if current_line:
logic_lines.append(current_line)
current_line = raw
elif CODE_INLINE.search(raw) and len(raw) > 15:
if current_line:
logic_lines.append(current_line)
current_line = raw
else:
if current_line and len(current_line) > 300:
logic_lines.append(current_line)
current_line = raw
else:
current_line = current_line + ' ' + raw if current_line else raw
if current_line:
logic_lines.append(current_line)
logger.debug('合并后 %s 条逻辑行(原始 %s 行)', len(logic_lines), len(raw_lines))
categories: list[dict[str, Any]] = []
cur_cat: dict[str, Any] | None = None
cur_item: dict[str, Any] | None = None
for line in logic_lines:
if SKIP_RE.search(line):
continue
# 行首序号多级如「1.1.1.1 」或「14 位序号 + 空格 + 9 位以上编码」。
# 避免误删「行首即 912 位清单编码 + 空格」整段JS 原 \d+(\.\d+)* 会吞掉编码)。
stripped = line.strip()
m_hier = re.match(r'^\d+(?:\.\d+)+\s+', stripped)
if m_hier:
stripped = stripped[m_hier.end():].strip()
elif re.match(r'^\d{1,4}\s+\d{9}', stripped):
stripped = re.sub(r'^\d{1,4}\s+', '', stripped, count=1).strip()
if not stripped:
stripped = line.strip()
if not stripped:
continue
cm = CODE_RE.search(stripped)
if cm:
if cur_item and cur_cat:
cur_cat['items'].append(cur_item)
if not cur_cat:
cur_cat = {'name': '未分类', 'items': []}
categories.append(cur_cat)
code = cm.group(1)
rest = stripped[cm.end():].strip()
name, unit, quantity, spec = '', '', '', ''
unit_match = UNIT_RE.search(rest)
if unit_match:
ui = rest.find(unit_match.group(0))
raw_name = rest[:ui].strip()
unit = unit_match.group(1)
after_unit = rest[ui + len(unit_match.group(0)):].strip()
qm = re.match(r'^([\d,.]+)', after_unit)
if qm:
quantity = qm.group(1)
tail = after_unit[qm.end():].strip()
if tail:
tail_tokens = tail.split()
si = 0
while si < len(tail_tokens) and re.match(r'^[\d,.%\-]+$', tail_tokens[si]):
si += 1
spec_tail = ' '.join(tail_tokens[si:]).strip()
if spec_tail:
spec = spec_tail
ns_name, ns_spec = split_name_and_spec(raw_name)
name = ns_name
if ns_spec:
spec = ns_spec + (';' + spec if spec else '')
else:
tokens = [t for t in rest.split() if t]
found_unit_idx = -1
for ti in range(len(tokens) - 1, 0, -1):
if tokens[ti] in UNIT_SET:
found_unit_idx = ti
break
if found_unit_idx >= 1:
raw_name_str = ' '.join(tokens[:found_unit_idx])
ns_name, ns_spec = split_name_and_spec(raw_name_str)
name = ns_name
if ns_spec:
spec = ns_spec
unit = tokens[found_unit_idx]
after_tokens = tokens[found_unit_idx + 1:]
if after_tokens and re.match(r'^[\d,.]+$', after_tokens[0]):
quantity = after_tokens[0]
si = 1
while si < len(after_tokens) and re.match(r'^[\d,.%\-]+$', after_tokens[si]):
si += 1
spec_tail = ' '.join(after_tokens[si:]).strip()
if spec_tail:
spec = spec + ';' + spec_tail if spec else spec_tail
else:
name = rest
name = re.sub(r'\s+', '', name).strip()
for u in UNIT_TOKENS:
if name.endswith(u) and len(name) > len(u):
unit = unit or u
name = name[: len(name) - len(u)]
break
cur_item = {'code': code, 'name': name, 'unit': unit, 'quantity': quantity, 'spec': spec}
continue
if len(stripped) > 4:
uni_match = UNIT_RE.search(stripped)
if uni_match:
ui = stripped.find(uni_match.group(0))
before_unit = stripped[:ui].strip()
after_unit = stripped[ui + len(uni_match.group(0)):].strip()
has_qty = bool(re.match(r'^[\d,.]+', after_unit))
if (
2 <= len(before_unit) <= 50
and has_qty
and re.search(r'[\u4e00-\u9fff]', before_unit)
):
if cur_item and cur_cat:
cur_cat['items'].append(cur_item)
if not cur_cat:
cur_cat = {'name': '未分类', 'items': []}
categories.append(cur_cat)
unit_fb = uni_match.group(1)
qm = re.match(r'^([\d,.]+)', after_unit)
quantity_fb = qm.group(1) if qm else ''
ns_name, ns_spec = split_name_and_spec(before_unit)
name_fb = re.sub(r'\s+', '', ns_name).strip()
spec_fb = ns_spec or ''
cur_item = {'code': '', 'name': name_fb, 'unit': unit_fb, 'quantity': quantity_fb, 'spec': spec_fb}
continue
if 2 < len(stripped) < 60 and not CODE_RE.search(stripped):
if UNIT_RE.search(stripped) and re.search(r'\d+\.?\d*\s*$', stripped):
if cur_item:
cur_item['spec'] = (cur_item.get('spec') or '') + (
';' + stripped if cur_item.get('spec') else stripped
)
continue
if is_cat_title(stripped) and not UNIT_RE.search(stripped) and not is_fee_cat_title(stripped):
if cur_item and cur_cat:
cur_cat['items'].append(cur_item)
cur_item = None
clean_title = re.sub(
r'\s+(座|个|项|处|m|km|段|条)\s+\d+[\d.]*\s*$', '', stripped
).strip()
cur_cat = {'name': clean_title, 'items': []}
categories.append(cur_cat)
continue
if re.match(r'^[一二三四五六七八九十]+\s', stripped) or re.match(
r'^[一二三四五六七八九十\d]+', stripped
):
clean_title = re.sub(r'\s+(座|个|项|处)\s+\d+[\d.]*\s*$', '', stripped).strip()
if is_fee_cat_title(clean_title):
continue
if cur_item and cur_cat:
cur_cat['items'].append(cur_item)
cur_item = None
cur_cat = {'name': clean_title, 'items': []}
categories.append(cur_cat)
continue
if cur_item and len(stripped) > 1:
cur_item['spec'] = (cur_item.get('spec') or '') + (
';' + stripped if cur_item.get('spec') else stripped
)
if cur_item and cur_cat:
cur_cat['items'].append(cur_item)
fee_filtered = 0
for cat in categories:
if cat.get('items'):
before = len(cat['items'])
cat['items'] = [it for it in cat['items'] if not is_fee_item(it.get('name', ''))]
fee_filtered += before - len(cat['items'])
if fee_filtered:
logger.debug('费用项过滤: 移除 %s', fee_filtered)
total_before_merge = 0
total_after_merge = 0
for cat in categories:
items = cat.get('items') or []
if not items:
continue
total_before_merge += len(items)
name_map: dict[str, dict[str, Any]] = {}
for item in items:
key = re.sub(r'\s+', '', (item.get('name') or '')).strip()
if not key:
continue
if key not in name_map:
name_map[key] = {
'code': item.get('code') or '',
'name': item['name'],
'unit': item.get('unit') or '',
'quantity': item.get('quantity') or '',
'spec': item.get('spec') or '',
'_quantities': [item['quantity']] if item.get('quantity') else [],
'_specs': [item['spec']] if item.get('spec') else [],
}
else:
m = name_map[key]
if not m['code'] and item.get('code'):
m['code'] = item['code']
if not m['unit'] and item.get('unit'):
m['unit'] = item['unit']
if item.get('quantity'):
m['_quantities'].append(item['quantity'])
if item.get('spec') and item['spec'] not in m['_specs']:
m['_specs'].append(item['spec'])
merged_items: list[dict[str, str]] = []
for m in name_map.values():
qlist = m['_quantities']
if len(qlist) > 1:
nums = []
ok = True
for q in qlist:
try:
nums.append(float(q.replace(',', '')))
except ValueError:
ok = False
break
if ok:
s = sum(nums)
m['quantity'] = str(int(s)) if s % 1 == 0 else f'{s:.2f}'
else:
m['quantity'] = '; '.join(qlist)
elif len(qlist) == 1:
m['quantity'] = qlist[0]
if m['_specs']:
trimmed = [s[:120] + '...' if len(s) > 120 else s for s in m['_specs']]
m['spec'] = '; '.join(trimmed)
if len(m['spec']) > 300:
m['spec'] = m['spec'][:300] + '...'
for k in ('_quantities', '_specs'):
m.pop(k, None)
merged_items.append(
{k: m[k] for k in ('code', 'name', 'unit', 'quantity', 'spec')}
)
cat['items'] = merged_items
total_after_merge += len(merged_items)
merged_count = total_before_merge - total_after_merge
if merged_count > 0:
logger.debug('按名称合并: %s%s', total_before_merge, total_after_merge)
valid = [c for c in categories if c.get('items')]
total_items = sum(len(c['items']) for c in valid)
logger.debug(
'最终结果: %s 分部, %s 清单项', len(valid), total_items
)
return {
'project_summary': {
'remark': f'本地解析:{len(valid)} 个分部,{total_items} 个清单项(合并前 {total_before_merge} 项)',
},
'categories': valid,
}
def filter_bill_pages(page_texts: list[str]) -> tuple[list[str], dict[str, Any]]:
"""
从按页文本中筛选工程量清单相关页返回 (bill_page_texts, meta)
"""
n = len(page_texts)
meta: dict[str, Any] = {'total_pages': n, 'scanned': False, 'no_bill_pages': False}
total_chars = sum(len(t or '') for t in page_texts)
if total_chars < 50:
meta['scanned'] = True
meta['reason'] = 'noText'
return [], meta
bill_flags = [False] * n
for i, t in enumerate(page_texts):
if not (t or '').strip():
continue
t = t or ''
h_hits = sum(1 for k in BILL_KW if k in t)
s_hit = any(k in t for k in SEC_KW)
has_code = bool(re.search(r'\d{9}', t))
if h_hits >= 2 or s_hit or has_code:
bill_flags[i] = True
first_bill = next((i for i, f in enumerate(bill_flags) if f), -1)
last_bill = max((i for i, f in enumerate(bill_flags) if f), default=-1)
if first_bill >= 0 and last_bill > first_bill:
for i in range(first_bill, last_bill + 1):
if bill_flags[i]:
continue
t = page_texts[i] or ''
if not t.strip() or len(t.strip()) <= 30:
continue
fee_hits = sum(1 for kw in FEE_PAGE_KW if kw in t)
if fee_hits >= 2 and not re.search(r'\d{9}', t):
continue
bill_flags[i] = True
bill_texts = [page_texts[i] for i in range(n) if bill_flags[i]]
if not bill_texts:
meta['no_bill_pages'] = True
meta['bill_page_indices'] = [i for i in range(n) if bill_flags[i]]
meta['bill_pages'] = len(bill_texts)
return bill_texts, meta
def analyze_boq_pages(page_texts: list[str]) -> dict[str, Any]:
"""
串联筛选 + parse_bill_text返回结构含 _meta供持久化与前端
"""
total_pages = len(page_texts)
total_chars = sum(len(t or '') for t in page_texts)
if total_chars < 50:
return {
'scanned': True,
'reason': 'noText',
'totalPages': total_pages,
'project_summary': {'remark': '文本过少,疑似扫描件或未提取到文字'},
'categories': [],
'_meta': {
'method': 'python-local',
'total_pages': total_pages,
'bill_pages': 0,
},
}
bill_texts, fmeta = filter_bill_pages(page_texts)
if not bill_texts:
return {
'scanned': False,
'no_bill_pages': True,
'totalPages': total_pages,
'project_summary': {'remark': '未识别到清单相关页面'},
'categories': [],
'_meta': {
'method': 'python-local',
'total_pages': total_pages,
'bill_pages': 0,
**{k: fmeta[k] for k in ('no_bill_pages',) if k in fmeta},
},
}
merged = '\n'.join(bill_texts)
parsed = parse_bill_text(merged)
return {
'scanned': False,
**parsed,
'_meta': {
'method': 'python-local',
'total_pages': total_pages,
'bill_pages': len(bill_texts),
'bill_page_indices': fmeta.get('bill_page_indices', []),
},
}
def categories_to_prompt_appendix(
analysis: dict[str, Any],
max_chars: int = 3000,
max_per_cat: int = 40,
) -> str:
"""将本地解析结果压成短文本,注入 AI 摘要提示词。"""
cats = analysis.get('categories') or []
lines: list[str] = []
for cat in cats:
name = cat.get('name', '')
items = cat.get('items') or []
lines.append(f'{name}')
for it in items[:max_per_cat]:
code = it.get('code') or '-'
n = it.get('name') or ''
u = it.get('unit') or ''
q = it.get('quantity') or ''
lines.append(f' {code} {n} {u} {q}'.strip())
if len(items) > max_per_cat:
lines.append(f' …共 {len(items)} 条,此处省略其余')
text = '\n'.join(lines).strip()
if len(text) > max_chars:
return text[:max_chars] + '\n…(附录已截断)'
return text

138
utils/boq_parser.py Normal file
View File

@ -0,0 +1,138 @@
"""
工程量清单解析模块 Excel / CSV / PDF / Word 文件中提取结构化文本
"""
import csv
import logging
import re
from pathlib import Path
logger = logging.getLogger(__name__)
# 最大返回字符数(送给 AI 做摘要时截断)
MAX_BOQ_CHARS = 12000
def extract_boq_text(file_path: str) -> str:
"""
从工程量清单文件提取原始结构化文本
支持.xlsx / .xls / .csv / .pdf / .docx / .doc
"""
ext = Path(file_path).suffix.lower()
if ext in ('.xlsx', '.xls'):
text = _extract_excel(file_path)
elif ext == '.csv':
text = _extract_csv(file_path)
elif ext == '.pdf':
from utils.file_utils import _extract_pdf
text = _extract_pdf(file_path)
elif ext == '.docx':
from utils.file_utils import _extract_docx
text = _extract_docx(file_path)
elif ext == '.doc':
from utils.file_utils import _extract_doc
text = _extract_doc(file_path)
else:
raise ValueError(f'不支持的文件格式 {ext},请使用 xlsx/xls/csv/pdf/docx/doc')
return text[:MAX_BOQ_CHARS]
def extract_boq_pages(file_path: str) -> list[str]:
"""
返回按切分的清单文本PDF 为每页一段Excel/CSV/Word 为单元素全文
"""
ext = Path(file_path).suffix.lower()
if ext == '.pdf':
from utils.file_utils import extract_pdf_pages
return extract_pdf_pages(file_path)
text = extract_boq_text(file_path)
return [text] if text else ['']
# ─── Excel ────────────────────────────────────────────────────────────────
def _extract_excel(file_path: str) -> str:
try:
import openpyxl
wb = openpyxl.load_workbook(file_path, data_only=True, read_only=True)
parts = []
for name in wb.sheetnames:
ws = wb[name]
block = _sheet_to_text(ws, name)
if block.strip():
parts.append(block)
wb.close()
return '\n\n'.join(parts)
except ImportError:
return _extract_xls_fallback(file_path)
except Exception as e:
raise RuntimeError(f'Excel 解析失败:{e}') from e
def _sheet_to_text(ws, sheet_name: str) -> str:
"""将一个 Sheet 转为管道分隔文本,自动过滤全空行和全空列。"""
raw_rows = []
for row in ws.iter_rows(values_only=True):
cells = ['' if v is None else str(v).strip() for v in row]
if any(cells):
raw_rows.append(cells)
if not raw_rows:
return ''
# 对齐列数
max_cols = max(len(r) for r in raw_rows)
raw_rows = [r + [''] * (max_cols - len(r)) for r in raw_rows]
# 找出有内容的列索引
active_cols = [j for j in range(max_cols)
if any(raw_rows[i][j] for i in range(len(raw_rows)))]
if not active_cols:
return ''
lines = [f'{sheet_name}']
for row in raw_rows:
line = ' | '.join(row[j] for j in active_cols)
if line.replace('|', '').strip():
lines.append(line)
return '\n'.join(lines)
def _extract_xls_fallback(file_path: str) -> str:
"""旧版 .xls 使用 xlrd 兜底(需安装 xlrd<2"""
try:
import xlrd # type: ignore
wb = xlrd.open_workbook(file_path)
parts = []
for sheet in wb.sheets():
lines = [f'{sheet.name}']
for rx in range(sheet.nrows):
cells = [str(sheet.cell_value(rx, cx)).strip()
for cx in range(sheet.ncols)]
line = ' | '.join(c for c in cells if c)
if line:
lines.append(line)
parts.append('\n'.join(lines))
return '\n\n'.join(parts)
except Exception as e:
raise RuntimeError(f'.xls 解析失败,请另存为 .xlsx 后重试:{e}') from e
# ─── CSV ─────────────────────────────────────────────────────────────────
def _extract_csv(file_path: str) -> str:
encodings = ['utf-8-sig', 'gbk', 'utf-8', 'gb18030', 'latin-1']
for enc in encodings:
try:
lines = []
with open(file_path, 'r', encoding=enc, newline='') as f:
for row in csv.reader(f):
line = ' | '.join(c.strip() for c in row if c.strip())
if line:
lines.append(line)
return '\n'.join(lines)
except (UnicodeDecodeError, UnicodeError):
continue
except Exception as e:
raise RuntimeError(f'CSV 解析失败:{e}') from e
raise RuntimeError('CSV 文件编码不支持,请另存为 UTF-8 格式后重试')

283
utils/diagram_intent.py Normal file
View File

@ -0,0 +1,283 @@
"""
章节级图/表意图字符特征 + 大纲上下文窗口计分栈式优先级驱动提示词附加段
"""
from __future__ import annotations
import json
import logging
import os
import re
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Sequence, Tuple
import config
from utils import prompts as P
logger = logging.getLogger(__name__)
DEFAULT_DIAGRAM_RULES: Dict[str, Any] = {
'schema_version': 1,
'threshold_figure': 1.0,
'threshold_table': 1.0,
'title_weight': 1.0,
'context_weight': 0.6,
'outline_context_lines': {'before': 4, 'after': 6},
'stack_order_when_both': 'score_desc',
'figure_keywords': [],
'table_keywords': [],
}
def diagram_rules_path() -> str:
return os.path.join(config.DATA_DIR, 'diagram_intent_rules.json')
def load_diagram_rules(path: Optional[str] = None) -> Dict[str, Any]:
"""加载规则 JSON文件缺失或解析失败时返回内置默认。"""
p = path or diagram_rules_path()
data = dict(DEFAULT_DIAGRAM_RULES)
if not os.path.isfile(p):
return data
try:
with open(p, encoding='utf-8') as f:
raw = json.load(f)
if isinstance(raw, dict):
for k, v in raw.items():
if k.startswith('_'):
continue
if k == 'outline_context_lines' and isinstance(v, dict):
data['outline_context_lines'] = {
**data.get('outline_context_lines', {}),
**v,
}
else:
data[k] = v
except Exception as e:
logger.warning('加载 diagram_intent_rules.json 失败,使用内置默认: %s', e)
return data
def _normalize_keyword_entries(raw: Any) -> List[Tuple[str, float]]:
out: List[Tuple[str, float]] = []
if not isinstance(raw, list):
return out
for item in raw:
if isinstance(item, str) and item.strip():
out.append((item.strip(), 1.0))
elif isinstance(item, dict):
t = (item.get('text') or item.get('pattern') or '').strip()
if not t:
continue
w = float(item.get('weight', 1.0))
out.append((t, w))
return out
def _score_text(text: str, entries: Sequence[Tuple[str, float]]) -> float:
if not text or not entries:
return 0.0
s = 0.0
for kw, w in entries:
if kw in text:
s += w
return s
DiagramKind = str # 'figure' | 'table'
@dataclass(frozen=True)
class DiagramIntent:
kind: str
score: float
sources: str
# 栈顶 = index 0优先生效
DiagramStack = List[DiagramIntent]
def score_figure_table(
title: str,
context_snippet: str,
rules: Dict[str, Any],
) -> Tuple[float, float]:
"""标题与上下文分别计分后按权重合并。"""
fig_kw = _normalize_keyword_entries(rules.get('figure_keywords'))
tbl_kw = _normalize_keyword_entries(rules.get('table_keywords'))
tw = float(rules.get('title_weight', 1.0))
cw = float(rules.get('context_weight', 0.6))
t = title or ''
c = context_snippet or ''
fig = tw * _score_text(t, fig_kw) + cw * _score_text(c, fig_kw)
tbl = tw * _score_text(t, tbl_kw) + cw * _score_text(c, tbl_kw)
return fig, tbl
def extract_outline_window(
outline_text: str,
section_title: str,
before: int,
after: int,
fallback_chars: int = 1200,
) -> str:
"""
在大纲中定位章节标题所在行取上下窗口找不到则取全文前缀
"""
if not outline_text or not section_title:
return (outline_text or '')[:fallback_chars]
title_stripped = section_title.strip()
if not title_stripped:
return outline_text[:fallback_chars]
lines = outline_text.splitlines()
idx = -1
# 优先整行包含;否则子串匹配(去编号后)
def _strip_serial(s: str) -> str:
return re.sub(r'^\s*[\d一二三四五六七八九十]+[、.\s]+', '', s).strip()
core = _strip_serial(title_stripped)
for i, line in enumerate(lines):
line_s = line.strip()
if title_stripped in line_s or (core and core in _strip_serial(line_s)):
idx = i
break
if core and core in line_s:
idx = i
break
if idx < 0:
return outline_text[:fallback_chars]
lo = max(0, idx - max(0, before))
hi = min(len(lines), idx + max(0, after) + 1)
return '\n'.join(lines[lo:hi])
def build_stack(
fig_score: float,
tbl_score: float,
rules: Dict[str, Any],
enable_figure: bool,
enable_table: bool,
) -> DiagramStack:
tf = float(rules.get('threshold_figure', 1.0))
tt = float(rules.get('threshold_table', 1.0))
mode = (rules.get('stack_order_when_both') or 'score_desc').strip()
fig_ok = enable_figure and fig_score >= tf
tbl_ok = enable_table and tbl_score >= tt
intents: List[DiagramIntent] = []
if fig_ok:
intents.append(
DiagramIntent('figure', fig_score, 'title+context')
)
if tbl_ok:
intents.append(
DiagramIntent('table', tbl_score, 'title+context')
)
if len(intents) <= 1:
return intents
a, b = intents[0], intents[1]
if mode == 'figure_first':
order = [a, b] if a.kind == 'figure' else [b, a]
elif mode == 'table_first':
order = [a, b] if a.kind == 'table' else [b, a]
else: # score_desc — 高分在栈顶
order = sorted([a, b], key=lambda x: -x.score)
return order
def stack_compact_labels(stack: DiagramStack) -> List[str]:
"""与 stack_to_addon 中 labels 一致,供附件仅块输出的提示词。"""
labels: List[str] = []
for it in stack:
if it.kind == 'figure':
labels.append('图示([FIGURE] 块)')
else:
labels.append('表格([TABLE] 块)')
return labels
def make_fallback_stack(kind: str) -> DiagramStack:
"""栈空且需生成时,按单一 figure/table 占位。"""
k = (kind or '').strip().lower()
if k not in ('figure', 'table'):
k = 'table'
return [DiagramIntent(k, 1.0, 'fallback')]
def stack_to_addon(stack: DiagramStack) -> str:
"""按栈序拼接优先级说明 + 图示/表格规范全文。"""
if not stack:
return ''
labels: List[str] = []
for it in stack:
if it.kind == 'figure':
labels.append('图示([FIGURE] 块)')
else:
labels.append('表格([TABLE] 块)')
parts: List[str] = [P.diagram_priority_preamble(labels)]
for it in stack:
if it.kind == 'figure':
parts.append(P.get_figure_addon())
else:
parts.append(P.get_table_addon())
return ''.join(parts)
class DiagramIntentAgent:
"""可配置规则实例:对单节计算栈并渲染附加提示词。"""
def __init__(self, rules: Optional[Dict[str, Any]] = None) -> None:
self.rules = rules or load_diagram_rules()
@classmethod
def load_default(cls) -> 'DiagramIntentAgent':
return cls(load_diagram_rules())
def plan(
self,
section_title: str,
outline_text: str,
enable_figure: bool,
enable_table: bool,
) -> DiagramStack:
r = self.rules
oc = r.get('outline_context_lines') or {}
before = int(oc.get('before', 4))
after = int(oc.get('after', 6))
ctx = extract_outline_window(
outline_text, section_title, before, after,
)
fig_s, tbl_s = score_figure_table(section_title, ctx, r)
return build_stack(fig_s, tbl_s, r, enable_figure, enable_table)
def render_for_section(
self,
section_title: str,
outline_text: str,
enable_figure: bool,
enable_table: bool,
) -> str:
if not enable_figure and not enable_table:
return ''
stack = self.plan(
section_title, outline_text, enable_figure, enable_table,
)
return stack_to_addon(stack)
# 模块级默认实例,供 generator 单次调用
_default_agent: Optional[DiagramIntentAgent] = None
def get_diagram_agent() -> DiagramIntentAgent:
global _default_agent
if _default_agent is None:
_default_agent = DiagramIntentAgent.load_default()
return _default_agent
def invalidate_diagram_agent_cache() -> None:
global _default_agent
_default_agent = None

205
utils/file_utils.py Normal file
View File

@ -0,0 +1,205 @@
"""
文件处理工具 PDF / Word 文件中提取纯文本
"""
import os
import logging
from pathlib import Path
logger = logging.getLogger(__name__)
def extract_text(file_path: str) -> str:
"""
根据文件扩展名提取文本
支持 .pdf / .docx / .doc
"""
path = Path(file_path)
ext = path.suffix.lower()
if ext == '.pdf':
return _extract_pdf(file_path)
elif ext == '.docx':
return _extract_docx(file_path)
elif ext == '.doc':
return _extract_doc(file_path)
else:
raise ValueError(f'不支持的文件类型: {ext}')
def _extract_pdf(file_path: str) -> str:
"""提取 PDF 文本,优先使用 pypdf回退到 pdfminer"""
try:
from pypdf import PdfReader
reader = PdfReader(file_path)
parts = []
for page in reader.pages:
text = page.extract_text()
if text:
parts.append(text)
result = '\n'.join(parts)
if result.strip():
return result
except Exception as e:
logger.warning(f'pypdf 提取失败: {e},尝试 pdfminer')
try:
from pdfminer.high_level import extract_text as pm_extract
result = pm_extract(file_path)
return result or ''
except Exception as e:
logger.error(f'pdfminer 提取失败: {e}')
raise RuntimeError(f'PDF 文本提取失败: {e}')
def extract_pdf_pages(file_path: str) -> list[str]:
"""
按页提取 PDF 文本用于工程量清单页筛选
优先 pypdf 逐页若各页均无文本则回退 pdfminer 整篇作为单元素列表
"""
pages: list[str] = []
try:
from pypdf import PdfReader
reader = PdfReader(file_path)
for page in reader.pages:
text = page.extract_text()
pages.append((text or '').strip())
if any(pages):
return pages
except Exception as e:
logger.warning(f'pypdf 按页提取失败: {e},尝试 pdfminer')
try:
from pdfminer.high_level import extract_text as pm_extract
blob = (pm_extract(file_path) or '').strip()
return [blob] if blob else ['']
except Exception as e:
logger.error(f'pdfminer 提取失败: {e}')
raise RuntimeError(f'PDF 文本提取失败: {e}')
def _extract_docx(file_path: str) -> str:
"""提取 .docx 文档文本python-docx"""
try:
from docx import Document
doc = Document(file_path)
parts = []
for para in doc.paragraphs:
if para.text.strip():
parts.append(para.text)
for table in doc.tables:
for row in table.rows:
row_texts = [cell.text.strip() for cell in row.cells if cell.text.strip()]
if row_texts:
parts.append(' '.join(row_texts))
return '\n'.join(parts)
except Exception as e:
logger.error(f'.docx 提取失败: {e}')
raise RuntimeError(f'Word 文本提取失败: {e}')
def _extract_doc(file_path: str) -> str:
"""
提取旧版 .doc 文件文本按优先级依次尝试
1. win32comWindows + Microsoft Word 已安装最准确
2. LibreOffice 命令行转换需安装 LibreOffice
3. python-docx 兼容尝试部分以 XML 保存的伪 .doc 可读
全部失败时提示用户手动另存为 .docx
"""
abs_path = str(Path(file_path).resolve())
# ── 方案1win32comWindows + Word──────────────────────────────────
try:
import win32com.client
import pythoncom
pythoncom.CoInitialize()
word = None
try:
word = win32com.client.Dispatch('Word.Application')
word.Visible = False
doc = word.Documents.Open(abs_path, ReadOnly=True)
text = doc.Range().Text
doc.Close(False)
logger.info(f'.doc 通过 win32com 提取成功: {file_path}')
return text or ''
finally:
if word:
try:
word.Quit()
except Exception:
pass
pythoncom.CoUninitialize()
except ImportError:
logger.info('pywin32 未安装,跳过 win32com 方案')
except Exception as e:
logger.warning(f'win32com 提取 .doc 失败: {e}')
# ── 方案2LibreOffice 命令行 ─────────────────────────────────────────
try:
import subprocess
import tempfile
tmp_dir = tempfile.mkdtemp()
for soffice_cmd in ('soffice', 'libreoffice'):
try:
result = subprocess.run(
[soffice_cmd, '--headless', '--convert-to', 'txt:Text',
'--outdir', tmp_dir, abs_path],
capture_output=True, text=True, timeout=60,
)
if result.returncode == 0:
txt_file = os.path.join(tmp_dir, Path(file_path).stem + '.txt')
if os.path.exists(txt_file):
with open(txt_file, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
logger.info(f'.doc 通过 LibreOffice 提取成功: {file_path}')
return content
except FileNotFoundError:
continue
except subprocess.TimeoutExpired:
logger.warning('LibreOffice 转换超时')
break
except Exception as e:
logger.warning(f'LibreOffice 提取 .doc 失败: {e}')
# ── 方案3python-docx 兼容尝试(部分另存的 .doc 实为 XML 格式)──────
try:
result = _extract_docx(file_path)
if result.strip():
logger.info(f'.doc 通过 python-docx 兼容读取成功: {file_path}')
return result
except Exception as e:
logger.warning(f'python-docx 兼容读取 .doc 失败: {e}')
raise RuntimeError(
'无法读取 .doc 格式文件。请在 Word 中打开该文件,'
'选择「另存为」→「Word 文档 (.docx)」后重新上传。'
)
def truncate_text(text: str, max_chars: int = 60000) -> str:
"""截断超长文本,避免超出 AI Token 限制"""
if len(text) <= max_chars:
return text
return text[:max_chars] + '\n\n...[文档内容已截断,仅展示前段]'
def split_text_chunks(text: str, chunk_size: int = 2000, overlap: int = 200) -> list[str]:
"""将文本按固定大小分块(用于知识库)"""
chunks = []
start = 0
while start < len(text):
end = min(start + chunk_size, len(text))
chunks.append(text[start:end])
start += chunk_size - overlap
return chunks
def allowed_file(filename: str) -> bool:
allowed = {'pdf', 'doc', 'docx'}
return '.' in filename and filename.rsplit('.', 1)[1].lower() in allowed
def safe_filename(filename: str) -> str:
"""生成安全的文件名"""
import re
name = re.sub(r'[^\w\u4e00-\u9fff.\-]', '_', filename)
return name

View File

@ -0,0 +1,52 @@
"""
标书目录号展示一级为汉字+顿号子级为数字多级编号 AI 大纲示例一致
"""
from __future__ import annotations
def int_to_chinese_numeral(n: int) -> str:
"""将正整数转为中文数字(一、二、…、十、十一、…、九十九、一百)。"""
if n <= 0:
return str(n)
digits = "零一二三四五六七八九"
if n < 10:
return digits[n]
if n == 10:
return ""
if n < 20:
return "" + (digits[n % 10] if n % 10 else "")
if n < 100:
t, o = divmod(n, 10)
s = digits[t] + ""
if o:
s += digits[o]
return s
if n < 1000:
h, r = divmod(n, 100)
s = digits[h] + ""
if r == 0:
return s
if r < 10:
return s + "" + digits[r]
return s + int_to_chinese_numeral(r)
# 极少需要百级以上章,保守处理
return str(n)
def format_heading_display(level: int, section_number: str, title: str) -> str:
"""
生成带目录号的章节展示行用于大纲文本Word 标题目录页
- 一级汉字顿号 + 标题总体方案
- 二级及以下1.1 子标题
"""
title = (title or "").strip()
sn = (section_number or "").strip()
lv = int(level) if level else 1
if lv <= 1:
main = sn.split(".")[0]
try:
idx = int(main)
except ValueError:
idx = 1
return f"{int_to_chinese_numeral(idx)}{title}"
return f"{sn} {title}".strip()

1069
utils/prompts.py Normal file

File diff suppressed because it is too large Load Diff

141
utils/settings.py Normal file
View File

@ -0,0 +1,141 @@
"""
配置持久化将用户在界面中设置的 API Key 等配置保存到 data/settings.json
服务重启后自动恢复不再每次重启都丢失 Key
"""
import json
import os
import logging
logger = logging.getLogger(__name__)
_SETTINGS_PATH: str = '' # 由 app.py 初始化时注入
def init(settings_path: str):
global _SETTINGS_PATH
_SETTINGS_PATH = settings_path
def load(cfg) -> None:
"""从 settings.json 加载配置,覆盖 config 模块中的默认值"""
if not _SETTINGS_PATH or not os.path.exists(_SETTINGS_PATH):
_apply_env_overrides(cfg)
return
try:
with open(_SETTINGS_PATH, 'r', encoding='utf-8') as f:
data = json.load(f)
_apply(cfg, data)
_apply_env_overrides(cfg)
logger.info(f'已从 {_SETTINGS_PATH} 恢复配置,当前 provider={cfg.MODEL_PROVIDER}')
except Exception as e:
logger.warning(f'加载配置文件失败: {e}')
_apply_env_overrides(cfg)
_ENV_API_KEYS = (
('QWEN_API_KEY', 'QWEN_API_KEY'),
('OPENAI_API_KEY', 'OPENAI_API_KEY'),
('DEEPSEEK_API_KEY', 'DEEPSEEK_API_KEY'),
('DOUBAO_API_KEY', 'DOUBAO_API_KEY'),
('KIMI_API_KEY', 'KIMI_API_KEY'),
)
def _apply_env_overrides(cfg) -> None:
"""环境变量中的 API Key 优先于 settings.json便于 Docker / 本机 .env 注入)。"""
mp = os.environ.get('MODEL_PROVIDER')
if mp and isinstance(mp, str) and mp.strip():
cfg.MODEL_PROVIDER = mp.strip()
for env_name, attr in _ENV_API_KEYS:
val = os.environ.get(env_name)
if val and isinstance(val, str) and not val.startswith('sk-your'):
setattr(cfg, attr, val.strip())
def save(cfg) -> None:
"""将当前 config 模块的关键配置写入 settings.json"""
if not _SETTINGS_PATH:
return
data = {
'model_provider': cfg.MODEL_PROVIDER,
'qwen_api_key': cfg.QWEN_API_KEY,
'qwen_model': cfg.QWEN_MODEL,
'qwen_base_url': cfg.QWEN_BASE_URL,
'openai_api_key': cfg.OPENAI_API_KEY,
'openai_model': cfg.OPENAI_MODEL,
'openai_base_url': cfg.OPENAI_BASE_URL,
'deepseek_api_key': cfg.DEEPSEEK_API_KEY,
'deepseek_model': cfg.DEEPSEEK_MODEL,
'deepseek_base_url': cfg.DEEPSEEK_BASE_URL,
'ollama_base_url': cfg.OLLAMA_BASE_URL,
'ollama_model': cfg.OLLAMA_MODEL,
'doubao_api_key': cfg.DOUBAO_API_KEY,
'doubao_model': cfg.DOUBAO_MODEL,
'doubao_base_url': cfg.DOUBAO_BASE_URL,
'kimi_api_key': cfg.KIMI_API_KEY,
'kimi_model': cfg.KIMI_MODEL,
'kimi_base_url': cfg.KIMI_BASE_URL,
'max_concurrent': cfg.MAX_CONCURRENT_SECTIONS,
'content_volume': cfg.CONTENT_VOLUME,
'target_pages': getattr(cfg, 'TARGET_PAGES', 0),
'page_char_estimate': getattr(cfg, 'PAGE_CHAR_ESTIMATE', 700),
}
try:
os.makedirs(os.path.dirname(_SETTINGS_PATH), exist_ok=True)
with open(_SETTINGS_PATH, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
except Exception as e:
logger.warning(f'保存配置文件失败: {e}')
def _apply(cfg, data: dict) -> None:
"""将 dict 中的值安全地写回 config 模块"""
str_fields = {
'model_provider': 'MODEL_PROVIDER',
'qwen_api_key': 'QWEN_API_KEY',
'qwen_model': 'QWEN_MODEL',
'qwen_base_url': 'QWEN_BASE_URL',
'openai_api_key': 'OPENAI_API_KEY',
'openai_model': 'OPENAI_MODEL',
'openai_base_url': 'OPENAI_BASE_URL',
'deepseek_api_key': 'DEEPSEEK_API_KEY',
'deepseek_model': 'DEEPSEEK_MODEL',
'deepseek_base_url': 'DEEPSEEK_BASE_URL',
'ollama_base_url': 'OLLAMA_BASE_URL',
'ollama_model': 'OLLAMA_MODEL',
'doubao_api_key': 'DOUBAO_API_KEY',
'doubao_model': 'DOUBAO_MODEL',
'doubao_base_url': 'DOUBAO_BASE_URL',
'kimi_api_key': 'KIMI_API_KEY',
'kimi_model': 'KIMI_MODEL',
'kimi_base_url': 'KIMI_BASE_URL',
}
for key, attr in str_fields.items():
val = data.get(key)
if val and isinstance(val, str):
setattr(cfg, attr, val)
if 'max_concurrent' in data:
try:
v = int(data['max_concurrent'])
cfg.MAX_CONCURRENT_SECTIONS = max(1, min(v, 20))
except (ValueError, TypeError):
pass
valid_volumes = ('concise', 'standard', 'detailed', 'full')
vol = data.get('content_volume')
if vol and vol in valid_volumes:
cfg.CONTENT_VOLUME = vol
if 'target_pages' in data:
try:
cfg.TARGET_PAGES = max(0, int(data['target_pages']))
except (ValueError, TypeError):
pass
if 'page_char_estimate' in data:
try:
cfg.PAGE_CHAR_ESTIMATE = max(300, min(3000, int(data['page_char_estimate'])))
except (ValueError, TypeError):
pass

View File

@ -0,0 +1,278 @@
"""
按招标文件类型工程 / 服务 / 货物区分的章节正文生成提示词模板
modules.generator.BID_WRITING_SYSTEM 配合使用自称以系统铁律为准统一用我方
"""
import re
from typing import Optional
VALID_TENDER_KINDS = frozenset({'engineering', 'service', 'goods'})
DEFAULT_WORD_COUNT_SPEC = (
'- 一般小节:不少于 2000 字;核心技术/重点评分章节:不少于 4000 字\n'
'- 字数须由实质方案内容支撑,禁止用重复项目背景或复述招标要求凑字数\n'
'- 有实质细节的展开写,原则性描述可简洁处理;通过流程、节点、比选、管控展开满足篇幅'
)
TENDER_KIND_CLASSIFY = """\
你是一名招标文件分类专家根据以下招标文件摘录判断本项目技术标书应采用的写作模板类型
只输出以下三个英文单词之一不要输出任何其他文字标点换行或解释
engineering
service
goods
含义
- engineering工程施工类建筑市政公路水利装修园林拆除等以现场施工组织工艺机械进度网络为主
- service服务类咨询设计监理运维物业保洁餐饮配送培训安保技术服务等以人力/智力交付流程SLA 为主
- goods货物类设备材料车辆家具软硬件供货等以产品规格供货质保验收为主含附带安装指导仍以供货为主可归此类
判定规则
若主要为施工安装且涉及土建/结构/施工机械与工期归为 engineering
若主要为服务过程人员驻场响应时效与服务质量体系归为 service
若主要为产品技术规格供货批次出厂检验与到货验收归为 goods
若施工与供货并重以现场施工量与工期为主则 engineering以设备物资交付为主则 goods
招标文件摘录
{excerpt}
"""
def get_tender_kind_classify_prompt(excerpt: str) -> str:
return TENDER_KIND_CLASSIFY.replace('{excerpt}', excerpt or '')
def parse_tender_kind_response(response: str) -> str:
"""从模型返回中解析出 engineering / service / goods失败则 engineering。"""
if not response:
return 'engineering'
tokens = re.sub(r'[^a-zA-Z]+', ' ', response).lower().split()
for w in tokens:
if w in VALID_TENDER_KINDS:
return w
low = response.lower()
for k in ('engineering', 'service', 'goods'):
if k in low:
return k
return 'engineering'
def normalize_tender_kind(kind: Optional[str]) -> str:
k = (kind or '').strip().lower()
return k if k in VALID_TENDER_KINDS else 'engineering'
# ── 工程类 ───────────────────────────────────────────────────────────────
SECTION_DETAILS_ENGINEERING = """\
- 角色资深工程施工组织设计专家
- 任务撰写通用型工程施工组织设计技术章节
核心定位
- 通用施工模板适用于建筑市政公路水利等工程施工类项目
- 聚焦施工方案工艺方法机械设备进度计划质量安全控制
- 正文为可直接提交的成稿语句凡招标文件概要或工程量清单摘要已给出的工程量地质工期指标等可如实融入叙述未给出的具体数值型号台数吨位等一律用通顺的中文概括表达"相应规格""与进度及作业面相匹配的台套""符合设计及规范要求的能级"不得使用方括号或待填项留白
内容特征
- 施工工艺描述到"方法层面"可引用规范条文名称或编号"应符合JTG/T 3610要求"无依据处不写臆造数字
- 设备与资源配置写清设备类别与用途"按工况与设计要求选配相应规格与数量""满足流水作业与峰值强度需要"等概括句式禁止出现"[型号][数量]台"类占位
- 进度计划使用相对阶段"施工准备期""主体施工期"而非具体日期
- 技术措施可提供多方案比选"视地质与水文条件选用适宜工艺"等自然语言衔接现场条件禁止方括号待填
未定参数的写法替代一切占位符
- 工程规模与结构"本工程相应单体与线路区段""按设计结构形式与跨度条件"等概括不罗列未提供的具体数字
- 技术参数已见于招标/清单的写具体值未见者写"按设计强度等级与验收标准执行""压实度与分层厚度满足规范及设计要求"
- 机械与劳动力"配置满足峰值强度与关键线路需要的机械组合""劳动力按施工阶段动态投入并保持关键岗位持证齐备"
- 时间节点"在招标工期内划分准备、主体、收尾阶段并设置可控里程碑"无具体日历则不用臆造周数
行文规范
- 自称统一用我方禁用我们本公司
- 招标人称招标方建设单位
- 禁止前导句和AI套话综上所述高度重视等
- 列举用(1)(2)(3)禁用"首先其次"
- 纯文本输出段落间空行分隔
防过拟合约束
- 不绑定具体地名与局地气候细节改为"结合项目环境与季节特点采取针对性措施"
- 不绑定特定施工方法如不说"必须用旋挖钻"改为"根据地质选用适宜桩基工艺"
- 使用弹性表述"按设计要求""视现场情况""符合规范规定"
字数要求
{word_count_spec}
- 通过展开多方案比选详细工艺流程管控节点来满足篇幅
输入
- 招标文件概要{summary}
- 标书目录{outline}
- 子小节标题{subsection_title}
直接输出正文不含标题和解释"""
# ── 服务类 ───────────────────────────────────────────────────────────────
SECTION_DETAILS_SERVICE = """\
- 角色资深服务方案架构师
- 任务撰写通用型服务项目实施方案
核心定位
- 通用服务模板适用于咨询服务运维服务技术服务物业管理培训服务等
- 聚焦服务方案实施流程人员配置质量保障响应机制服务标准
- 严禁出现工程施工技术参数如混凝土标号压实度等
- 正文为成稿招标/采购文件已载明的服务范围人数响应时限到场要求等可如实写入未载明的不得用方括号待填改用"按采购文件与服务等级要求配置""满足驻场与高峰时段人力需要""建立分级响应与升级机制"等概括表述写清含义
内容特征
- 服务流程"接收需求→分析评估→方案制定→实施执行→验收交付→持续改进"框架展开
- 人员配置强调专业资质与岗位角色齐全"配备满足本项目服务范围与关键岗位持证要求的人员力量""项目经理及骨干具备相应执业或认证资格"等完整句子禁止"[资质][岗位][数量]名"式占位
- 质量保障使用服务体系标准如ISO 9001ITILITSS而非工程规范
- 响应机制写清"受理—分派—处理—回访/关闭"闭环时限已见于招标文件的写具体值未见者写"按招标文件及行业通行服务等级划分响应与处理时限,并设置升级与应急通道"
- 服务标准可引用SLA框架用自然语言描述指标层级与考核方式禁止用方括号代替指标
未定参数的写法
- 服务范围与对象"采购文件约定的服务内容与交付边界""服务对象规模与业务场景按项目实际确定"等概括
- 人员与资源"与峰值并发与服务等级相匹配的人力与工具配置"
- 场地与备件"按需设置服务场所与备件储备,保障连续性与可用性目标"
行文规范
- 自称统一用我方禁用我们本公司
- 招标人称招标方采购人甲方
- 禁止前导句和AI套话
- 列举用(1)(2)(3)禁用"首先其次"
- 纯文本输出段落间空行分隔
- 强调"服务承诺""保障措施"的可执行性避免空泛
防过拟合约束
- 不预设具体行业细节如不说"针对医院HIS系统"改为"针对采购人业务系统与数据环境"
- 服务方案提供"标准模块+可选配置"结构"基础服务包包含...,增值服务可选..."
- 使用"结合采购人行业特点与监管要求""参照同类项目成熟实践"等弹性表述
内容禁区
- 禁止出现施工工艺材料设备技术参数工程量计算施工机械配置
- 禁止出现建筑结构土木工程技术措施
字数要求
{word_count_spec}
- 通过详细描述服务流程节点人员职责分工质量检查点应急预案来满足篇幅
输入
- 招标文件概要{summary}
- 标书目录{outline}
- 子小节标题{subsection_title}
直接输出正文不含标题和解释"""
# ── 货物类 ───────────────────────────────────────────────────────────────
SECTION_DETAILS_GOODS = """\
- 角色资深供货方案技术专家
- 任务撰写通用型货物采购项目技术响应方案
核心定位
- 通用供货模板适用于设备采购材料供应系统集成软件采购等
- 聚焦产品技术规格供货方案质量保证安装调试如有售后服务
- 正文为成稿采购文件技术规范书或清单中已列明的型号数量指标交货期质保期等可如实响应未列明的不得臆造优于招标的数字亦不得用方括号待填"不低于采购文件对应条款""满足招标文件列明的性能与符合性要求""供货批次与到货节奏与现场安装计划相衔接"等概括语言写全句
内容特征
- 技术规格"指标项—符合性说明"展开已给出阈值的照写未给出的写"满足招标文件技术指标与检测方法要求""与同类应用场景主流水平相当且不降低实质性响应"
- 产品描述强调功能特性可靠性与标准符合性避免绑定特定品牌除非招标文件指定
- 供货方案分阶段描述签约后组织生产或备货出厂检验运输与到货验收具体天数仅在有依据时写出否则用"按合同与采购文件约定的供货周期执行"
- 质量保障强调"出厂检验+第三方检测(如要求)+质保期服务"分层体系
- 售后服务写清质保责任边界备件与技术支持渠道时长以招标为准无则写"按采购文件及国家相关规定执行"
未定参数的写法
- 性能与容量"满足采购文件规定的处理能力/精度/兼容性等关键指标"
- 数量与批次"与合同清单及现场需求匹配的供货批次与配套件配置"
- 服务时效"建立可追踪的报修、响应与闭环机制,时限不低于采购文件要求"
行文规范
- 自称统一用我方禁用我们本公司
- 招标人称招标方采购人甲方
- 禁止前导句和AI套话
- 列举用(1)(2)(3)禁用"首先其次"
- 纯文本输出段落间空行分隔
- 技术描述客观准确避免夸大不用"最先进""行业第一"改用"符合国家标准或采购文件引用标准的要求""满足招标文件实质性条款")
防过拟合约束
- 不绑定特定品牌如不说"采用华为服务器"改为"提供满足采购文件性能与安全要求的服务器设备"
- 无具体数值依据时不写虚构的"≥某数值"改为对符合性与可检测性的承诺
- 供货方案考虑多种交付场景国内供货进口设备定制生产等用自然语言比较路径优劣与适用条件
内容禁区
- 禁止出现施工组织安装工艺除非含安装服务土建工程人员现场施工配置
- 禁止出现工程管理流程如施工进度网络图
字数要求
{word_count_spec}
- 通过详细展开技术参数说明供货流程节点质量检验程序售后服务细则来满足篇幅
输入
- 招标文件概要{summary}
- 标书目录{outline}
- 子小节标题{subsection_title}
直接输出正文不含标题和解释"""
def build_section_detail_prompt(
kind: str,
summary: str,
outline: str,
title: str,
word_count_spec: str = '',
boq_summary: str = '',
) -> str:
k = normalize_tender_kind(kind)
if k == 'service':
base = SECTION_DETAILS_SERVICE
elif k == 'goods':
base = SECTION_DETAILS_GOODS
else:
base = SECTION_DETAILS_ENGINEERING
wc = word_count_spec.strip() or DEFAULT_WORD_COUNT_SPEC
text = base.format(
word_count_spec=wc,
summary=summary or '(未提供)',
outline=outline or '(未提供)',
subsection_title=title or '',
)
text += (
'\n\n【须同步遵守的全局写作禁忌】'
'禁止复述招标要求后再作答;禁止各章重复工程量数字与项目背景;'
'禁止无依据将参数写成优于招标文件;字数不得仅靠套话堆砌;'
'禁止使用方括号、「待填」「TBD」等表示未完稿字段如[型号][数量][数值]'
'未定信息须写成通顺的概括性中文整句。'
'若本任务提示词末尾另有「图示/表格」专用输出规范,其中的结构化标记按该规范执行,'
'不视为待填占位。'
)
if boq_summary.strip():
text += (
'\n\n- 工程量清单关键信息(写作时按需引用清单中已有数量与单位,勿无故复读;'
'清单未列明的分项用概括性施工组织语言描述,禁止使用方括号待填项):\n'
+ boq_summary.strip()
)
return text
# 对话模式:按类型追加的系统说明片段(与 app.py 中基础说明拼接)
CHAT_KIND_INSTRUCTION = {
'engineering': (
'\n【本模板类型:工程施工】'
'侧重施工组织、工艺与质量安全;未在招标文件或清单中出现的具体型号、台数、吨位等'
'用概括性中文表述写清,禁止使用方括号待填;勿虚构优于招标的规格。'
),
'service': (
'\n【本模板类型:服务】'
'侧重服务流程、人员与SLA人数、时限等以招标/采购文件为准,无则概括表述,禁止方括号待填;'
'禁止大段写混凝土标号、压实度、施工机械等工程参数。'
),
'goods': (
'\n【本模板类型:货物供货】'
'侧重规格、供货、检验与质保;指标与交期以采购文件为准,无则概括表述,禁止方括号待填;'
'禁止写施工组织与土建;勿绑定未指定的品牌。'
),
}

173
utils/volume_chapters.py Normal file
View File

@ -0,0 +1,173 @@
"""
目标页数与一级篇章数量区间阈值与 generator._effective_volume 一致
小章节自动填充子目录行总条数目标页数线性映射 subchapter_total_*
allocate_subchapters_to_main *
"""
from __future__ import annotations
import random
from typing import List, Optional, Tuple
# 与 modules.generator._effective_volume 页数分界一致
PAGE_VOLUME_THRESHOLDS = (125, 175, 225)
# 各篇幅档位对应的一级篇章数量 [min, max](与页数映射表一致)
TOP_LEVEL_CHAPTER_RANGES = {
'concise': (6, 8),
'standard': (8, 10),
'detailed': (10, 12),
'full': (12, 16),
}
# 小章节总条数 = slope * pages + intercept过点 100->78, 300->212
SUBCHAPTER_PAGES_SLOPE = 0.67
SUBCHAPTER_PAGES_INTERCEPT = 11.0
SUBCHAPTER_JITTER_LOW = 0.9
SUBCHAPTER_JITTER_HIGH = 1.1
# expand 在请求/库/配置均未给出页数时,按 100 页 ≈ 基线 78 章 ±10%,避免小章节失控到数百
EXPAND_OUTLINE_DEFAULT_TARGET_PAGES = 100
def subchapter_total_base_from_pages(pages: int) -> float:
return SUBCHAPTER_PAGES_SLOPE * float(pages) + SUBCHAPTER_PAGES_INTERCEPT
def subchapter_jitter_bounds(n_base: float) -> Tuple[int, int]:
"""
对线性基线 N_base 的严格 ±10% 整数闭区间 [lo, hi]用于全标小章节行总数抽样后夹紧
N_base=78 100 lo=70, hi=86
"""
lo = max(1, int(round(n_base * SUBCHAPTER_JITTER_LOW)))
hi = max(lo, int(round(n_base * SUBCHAPTER_JITTER_HIGH)))
return lo, hi
def subchapter_total_effective(
pages: int,
k: int,
rng: Optional[random.Random] = None,
) -> int:
"""
在目标页数 P 对一次小章节自动填充抽样的子章节行总数上界全标合计
先按 N_base(P)=0.67*P+11 U~Uniform(0.9,1.1) 取整**严格夹紧** [round(N_base*0.9), round(N_base*1.1)]
100 页时锚定 78±10% 恒在 7086 在仅受随机影响时
不再用 max(n, k) 抬升总数主章数 k 很大时若强行每章至少 1 会把 N 抬到 300+ 78±10% 目标冲突
n < k 时由 allocate_subchapters_to_mains 将额度优先分给部分主章其余主章 quota 0该次不填小章
pages<=0 k<=0 时返回 0调用方不应在 TARGET_PAGES>0 且可扩展主章>0 之外使用
"""
if pages <= 0 or k <= 0:
return 0
r = rng if rng is not None else random.Random()
n_base = subchapter_total_base_from_pages(pages)
lo, hi = subchapter_jitter_bounds(n_base)
n = int(round(n_base * r.uniform(SUBCHAPTER_JITTER_LOW, SUBCHAPTER_JITTER_HIGH)))
n = min(max(n, lo), hi)
return n
def allocate_subchapters_to_mains(n: int, k: int) -> List[int]:
"""
将整数 n 均分到 k 个主章 n%k 个主章得 floor+1其余得 floork=0 返回 []
"""
if k <= 0:
return []
n = max(0, n)
q, r = n // k, n % k
return [q + 1] * r + [q] * (k - r)
def resolve_expand_target_pages(
request_pages: Optional[int],
no_subchapter_limit: bool,
db_pages: int,
config_pages: int,
) -> int:
"""
得到本次自动填充小章节使用的目标页数 P>0 则启用条数上界0=不限制
显式不限制时返回 0否则优先正数 request 落库值 全局配置 默认 100
"""
if no_subchapter_limit:
return 0
if request_pages is not None and int(request_pages) > 0:
return int(request_pages)
d = int(db_pages or 0)
if d > 0:
return d
c = int(config_pages or 0)
if c > 0:
return c
return EXPAND_OUTLINE_DEFAULT_TARGET_PAGES
def volume_key_from_target_pages(pages: int, content_volume_default: str = 'standard') -> str:
"""与 _effective_volume 相同逻辑的档位 key不读 config便于测试"""
if pages <= 0:
return content_volume_default
if pages <= PAGE_VOLUME_THRESHOLDS[0]:
return 'concise'
if pages <= PAGE_VOLUME_THRESHOLDS[1]:
return 'standard'
if pages <= PAGE_VOLUME_THRESHOLDS[2]:
return 'detailed'
return 'full'
def top_level_chapter_range_from_pages(pages: int, content_volume_default: str = 'standard') -> Tuple[int, int]:
"""
返回一级篇章数量区间 (lo, hi)
未设置目标页数时沿用默认 810
"""
if pages <= 0:
return TOP_LEVEL_CHAPTER_RANGES['standard']
vk = volume_key_from_target_pages(pages, content_volume_default)
return TOP_LEVEL_CHAPTER_RANGES[vk]
def outline_chapter_count_hint(
pages: int,
content_volume_default: str = 'standard',
page_char_estimate: int = 700,
) -> str:
"""
嵌入大纲提示词的篇章约束句替换原固定810 相关描述
pages>0 时提醒全稿正文字量与页数×每页字数可替换的总目标同量级目录
层次不宜过细以免成稿后每节可写篇幅过薄难成合理技术应答
"""
pce = max(1, int(page_char_estimate or 700))
if pages <= 0:
return (
'总的章节数应该控制在8-10个一级篇章总数不超过10个'
)
lo, hi = top_level_chapter_range_from_pages(pages, content_volume_default)
total_g = int(round(pages * pce))
return (
f'总的章节数应该控制在约 {lo}{hi} 个,一级篇章总数不超过 {hi}'
f'(目标约 {pages} 页,按目标页数映射的篇幅档位估算)。'
f'全稿正文字量规模需与总目标约 {total_g}'
f'{pages} 页×约每页 {pce} 字的粗略换算计)同量级,目录层次与末级小节目不宜过细,'
f'避免叶节数过多时单节篇幅过薄、难以成文。'
)
def outline_chapter_count_hint_with_rating_variant(
pages: int,
content_volume_default: str = 'standard',
page_char_estimate: int = 700,
) -> str:
"""带评分目录模板中的同类约束原含「不超过10个」的收紧表述"""
pce = max(1, int(page_char_estimate or 700))
if pages <= 0:
return (
'总的章节数应该控制在8-10个,不超过10个'
)
lo, hi = top_level_chapter_range_from_pages(pages, content_volume_default)
total_g = int(round(pages * pce))
return (
f'总的章节数应该控制在约 {lo}{hi} 个,不超过{hi}'
f'(目标约 {pages} 页,按目标页数映射的篇幅档位估算)'
f'全稿正文字量约与总目标 {total_g} 字同量级,末级子目不宜过细'
)

371
utils/word_allocation.py Normal file
View File

@ -0,0 +1,371 @@
"""
技术评分驱动的章节字数分配读取 data/word_allocation_rules.json
结合 VOLUME_PRESETS base/core 与项目 rating_json为每个叶节点生成
min_charsword_count_spec及可选 max_tokens
"""
from __future__ import annotations
import json
import logging
import os
import re
from typing import Any, Dict, List, Optional, Tuple
import config
logger = logging.getLogger(__name__)
# 与 modules/generator.VOLUME_PRESETS 保持一致
VOLUME_PRESETS: Dict[str, Tuple[int, int, str, int]] = {
'concise': (1200, 2500, '精简版', 5000),
'standard': (2000, 4000, '标准版', 8000),
'detailed': (3000, 5500, '详细版', 12000),
'full': (4000, 7000, '充实版', 16000),
}
_PROVIDER_TOKEN_LIMITS = {
'deepseek': 8192,
'qwen': 8192,
'openai': 16384,
'ollama': 8192,
'doubao': 8192,
'kimi': 8192,
}
DEFAULT_RULES: Dict[str, Any] = {
'schema_version': 1,
'alpha': 0.85,
'budget_mode': 'target_pages',
'per_section_floor': None,
'per_section_cap': None,
'relevance': {'method': 'keyword_overlap', 'min_rating_weight': 0.01},
'rating_parse': {},
'prompt': {'top_k_rating_items': 4, 'intro_line': ''},
'max_tokens_scale': False,
}
def rules_path() -> str:
return os.path.join(config.DATA_DIR, 'word_allocation_rules.json')
def load_rules(path: Optional[str] = None) -> Dict[str, Any]:
"""加载规则 JSON文件缺失或解析失败时返回内置 DEFAULT_RULES。"""
p = path or rules_path()
data = dict(DEFAULT_RULES)
if not os.path.isfile(p):
return data
try:
with open(p, encoding='utf-8') as f:
raw = json.load(f)
if isinstance(raw, dict):
for k, v in raw.items():
if k.startswith('_'):
continue
if k == 'relevance' and isinstance(v, dict):
data['relevance'] = {**data.get('relevance', {}), **v}
elif k == 'prompt' and isinstance(v, dict):
data['prompt'] = {**data.get('prompt', {}), **v}
else:
data[k] = v
except Exception as e:
logger.warning('加载 word_allocation_rules.json 失败,使用内置默认: %s', e)
return data
def _as_float(x: Any, default: float = 0.0) -> float:
if x is None:
return default
if isinstance(x, (int, float)):
return float(x)
if isinstance(x, str):
s = re.sub(r'[^\d.\-]', '', x)
if not s:
return default
try:
return float(s)
except ValueError:
return default
return default
def _item_name(d: Dict[str, Any]) -> str:
for k in ('name', 'title', 'item_name', '评分项', '评分项名称', 'indicator'):
v = d.get(k)
if isinstance(v, str) and v.strip():
return v.strip()
return ''
def _item_weight(d: Dict[str, Any]) -> float:
for k in ('weight', 'score', '分值', 'max_score', '满分', 'points'):
if k in d:
w = _as_float(d.get(k), 0.0)
if w > 0:
return w
return 1.0
def _collect_rating_dicts(obj: Any, acc: List[Dict[str, Any]]) -> None:
if isinstance(obj, dict):
acc.append(obj)
for v in obj.values():
_collect_rating_dicts(v, acc)
elif isinstance(obj, list):
for v in obj:
_collect_rating_dicts(v, acc)
def parse_rating_json(raw: Optional[str]) -> List[Dict[str, Any]]:
"""
rating_json 字符串解析评分项列表
每项: { 'name': str, 'weight': float, 'keywords': List[str] }
"""
if not raw or not isinstance(raw, str) or not raw.strip():
return []
try:
root = json.loads(raw.strip())
except json.JSONDecodeError:
return []
dicts: List[Dict[str, Any]] = []
_collect_rating_dicts(root, dicts)
items: List[Dict[str, Any]] = []
seen: set = set()
for d in dicts:
name = _item_name(d)
if not name or len(name) < 2:
continue
key = name.lower()
if key in seen:
continue
w = _item_weight(d)
kws: List[str] = []
kw = d.get('keywords') or d.get('keyword') or d.get('要点')
if isinstance(kw, list):
kws = [str(x).strip() for x in kw if isinstance(x, (str, int, float)) and str(x).strip()]
elif isinstance(kw, str) and kw.strip():
kws = [kw.strip()]
seen.add(key)
items.append({'name': name, 'weight': w, 'keywords': kws})
return items
def _title_tokens(title: str) -> List[str]:
if not title:
return []
s = re.sub(r'[\s\d..、,,;:/\\()【】\[\]「」]+', ' ', title)
parts = [p for p in s.split() if len(p) >= 2]
toks = list(parts)
for m in re.findall(r'[\u4e00-\u9fff]{2,}', title):
if m not in toks:
toks.append(m)
return toks
def _overlap_score(title: str, item: Dict[str, Any]) -> float:
tokens = _title_tokens(title)
if not tokens:
return 0.0
blob = item['name'] + ''.join(item.get('keywords') or [])
hit = sum(1 for t in tokens if t and t in blob)
score = hit / max(len(tokens), 1)
if item['name'] in title or title in item['name']:
score = max(score, 0.85)
for kw in item.get('keywords') or []:
if isinstance(kw, str) and len(kw) >= 2 and kw in title:
score = max(score, 0.7)
return min(1.0, score)
def _raw_utilities(
leaves: List[Dict[str, Any]],
items: List[Dict[str, Any]],
min_w: float,
) -> Tuple[List[float], List[List[Tuple[str, float]]]]:
"""每节 u_i = sum_j w_j * c_ij返回 u 与每节 top 相关项 (name, contrib)。"""
filtered = [it for it in items if it['weight'] >= min_w]
if not filtered:
filtered = items
n = len(leaves)
u = [0.0] * n
top_lists: List[List[Tuple[str, float]]] = [[] for _ in range(n)]
for i, leaf in enumerate(leaves):
title = leaf.get('section_title') or ''
contribs: List[Tuple[str, float]] = []
for it in filtered:
c = _overlap_score(title, it)
contrib = it['weight'] * c
if contrib > 0:
contribs.append((it['name'], contrib))
u[i] += contrib
contribs.sort(key=lambda x: -x[1])
top_lists[i] = contribs[:12]
max_u = max(u) if u else 0.0
if max_u <= 0:
u = [1.0] * n
else:
u = [x / max_u for x in u]
return u, top_lists
def _clamp_int(x: int, lo: int, hi: int) -> int:
return max(lo, min(hi, x))
def _water_adjust(
targets: List[int],
budget: int,
floor_v: int,
cap_v: int,
priority: List[float],
) -> List[int]:
"""在 [floor_v, cap_v] 内将 targets 整数化并尽量使 sum 接近 budget。"""
n = len(targets)
if n == 0:
return []
if floor_v > cap_v:
floor_v, cap_v = cap_v, floor_v
if n * floor_v > budget:
floor_v = max(1, budget // n)
if n * cap_v < budget:
cap_v = max(floor_v, (budget + n - 1) // n)
cur = [_clamp_int(t, floor_v, cap_v) for t in targets]
s = sum(cur)
delta = budget - s
order = sorted(range(n), key=lambda i: -priority[i])
inv_order = sorted(range(n), key=lambda i: priority[i])
step = 0
max_steps = max(n * 2000, abs(delta) + n)
while delta != 0 and step < max_steps:
step += 1
if delta > 0:
moved = False
for i in order:
if cur[i] < cap_v:
cur[i] += 1
delta -= 1
moved = True
break
if not moved:
break
else:
moved = False
for i in inv_order:
if cur[i] > floor_v:
cur[i] -= 1
delta += 1
moved = True
break
if not moved:
break
return cur
def compute_leaf_allocations(
volume_key: str,
leaves: List[Dict[str, Any]],
rating_raw: Optional[str],
rules: Optional[Dict[str, Any]] = None,
) -> Optional[Dict[int, Dict[str, Any]]]:
"""
为每个叶节点计算 target_charsword_count_specmax_tokens
有技术评分项时按标题相关性分配无评分项时若规则为按目标页控总篇且已设页数
则均分全稿总预算 B=目标页数×每页字数否则返回 None调用方沿用旧逻辑
leaves: [{'id': int, 'section_title': str}, ...]
"""
rules = rules or load_rules()
if not leaves:
return {}
base, core, _, preset_tokens = VOLUME_PRESETS.get(
volume_key, VOLUME_PRESETS['standard']
)
floor_default = int(base * 0.5)
cap_default = core
floor_v = int(rules['per_section_floor']) if rules.get('per_section_floor') is not None else floor_default
cap_v = int(rules['per_section_cap']) if rules.get('per_section_cap') is not None else cap_default
floor_v = min(floor_v, cap_v)
alpha = float(rules.get('alpha', 0.85))
alpha = max(0.0, min(1.0, alpha))
min_w = float(rules.get('relevance', {}).get('min_rating_weight', 0.01))
n = len(leaves)
mode = (rules.get('budget_mode') or 'anchor_mean').strip()
pages_cfg = int(getattr(config, 'TARGET_PAGES', 0) or 0)
pce = max(1, int(getattr(config, 'PAGE_CHAR_ESTIMATE', 700) or 700))
if mode == 'target_pages' and pages_cfg > 0:
budget = int(round(pages_cfg * pce))
elif mode == 'anchor_base':
budget = int(round(n * base))
else:
budget = int(round(n * (base + core) / 2.0))
items = parse_rating_json(rating_raw)
if not items:
if not (mode == 'target_pages' and pages_cfg > 0):
return None
u = [1.0] * n
top_lists = [[] for _ in range(n)]
mid = 0.5 * (base + core)
raw_float = [float(mid)] * n
else:
u, top_lists = _raw_utilities(leaves, items, min_w)
band = core - base
raw_float = [
base + band * (alpha * u[i] + (1.0 - alpha) * 0.5) for i in range(n)
]
targets = [int(round(x)) for x in raw_float]
adjusted = _water_adjust(targets, budget, floor_v, cap_v, u)
provider = getattr(config, 'MODEL_PROVIDER', 'openai')
tok_limit = _PROVIDER_TOKEN_LIMITS.get(provider, 8192)
base_max_tok = min(preset_tokens, tok_limit)
scale_tokens = bool(rules.get('max_tokens_scale', False))
prompt_cfg = rules.get('prompt') or {}
top_k = int(prompt_cfg.get('top_k_rating_items', 4))
intro = (prompt_cfg.get('intro_line') or '').strip() or (
'本节须对下列技术评分要点作实质展开(结合工艺、流程、标准与可验证措施,禁止空泛承诺与复述招标文件):'
)
out: Dict[int, Dict[str, Any]] = {}
for i, leaf in enumerate(leaves):
sid = int(leaf['id'])
min_chars = max(1, adjusted[i])
contribs = top_lists[i][:top_k]
if contribs:
lines = '\n'.join(f' · {name}' for name, _ in contribs[:top_k])
spec = (
f'- 字数硬性要求(必须达到,不达标将续写补足):本节正文不少于 {min_chars}\n'
f'- {intro}\n{lines}\n'
f'- 内容须由可检验的技术与管理措施支撑,禁止堆砌套话与重复背景'
)
else:
spec = (
f'- 字数硬性要求(必须达到,不达标将续写补足):本节正文不少于 {min_chars}\n'
f'- 须紧扣章节标题与标书目录定位,充分展开可执行方案细节\n'
f'- 内容须由可检验的技术与管理措施支撑,禁止堆砌套话与重复背景'
)
max_tok = base_max_tok
if scale_tokens and base > 0:
max_tok = int(min(tok_limit, max(1024, base_max_tok * min_chars / base)))
out[sid] = {
'target_chars': min_chars,
'word_count_spec': spec,
'max_tokens': max_tok,
}
return out
def continuation_threshold(target_chars: int) -> int:
"""与 generator._get_min_chars 一致:续写到约目标字数的 65% 即停(多轮叠加逼近全文目标)。"""
return int(max(200, target_chars * 0.65))

Some files were not shown because too many files have changed in this diff Show More