/** * bill-worker.js — PDF 清单解析调度器(Worker Thread) * * 架构(v3 — SharedArrayBuffer 零拷贝): * Phase 1 — 并行文本提取 * 将 PDF 数据写入 SharedArrayBuffer(一次分配,所有子线程共享读) * 启动 N 个 page-worker,每个负责固定 20 页 * * Phase 2 — 清单页筛选 + 文本解析(纯正则,毫秒级) * 汇总全部页面文本 → 关键字筛选清单页 → 多行合并 → 逐行解析 */ 'use strict'; const { parentPort } = require('worker_threads'); const { Worker } = require('worker_threads'); const path = require('path'); const PAGES_PER_CHUNK = 20; parentPort.on('message', async (msg) => { if (msg.type !== 'parse') return; const t0 = Date.now(); try { // 立即做一次干净的拷贝,确保拥有独立的 ArrayBuffer const raw = msg.buffer; const buf = Buffer.alloc(raw.byteLength); Buffer.from(raw).copy(buf); if (buf.length === 0) { parentPort.postMessage({ type: 'done', ok: false, error: '收到空 PDF 数据' }); return; } // ── 获取总页数 ── const pdfjsModule = await import('pdfjs-dist/build/pdf.mjs'); const pdfjsLib = pdfjsModule.default || pdfjsModule; // 给 pdfjs 一份独立拷贝(pdfjs 内部可能 detach buffer) const pdfData = new Uint8Array(buf.length); buf.copy(Buffer.from(pdfData.buffer)); const pdf = await pdfjsLib.getDocument({ data: pdfData, isEvalSupported: false }).promise; const totalPages = pdf.numPages; // ── 将 PDF 数据写入 SharedArrayBuffer(一次分配,所有子线程共享读)── const sab = new SharedArrayBuffer(buf.length); const sabView = new Uint8Array(sab); buf.copy(Buffer.from(sabView.buffer)); // 从独立 buf 拷贝到共享内存 const workerCount = Math.ceil(totalPages / PAGES_PER_CHUNK); console.log(`[BillWorker] PDF ${totalPages} 页, ${workerCount} 路并行 (SharedArrayBuffer ${(buf.length/1024/1024).toFixed(1)}MB)`); // Phase 1: 并行文本提取 const pageTexts = await parallelExtract(sab, buf.length, totalPages, workerCount); const t1 = Date.now(); const extractedCount = pageTexts.filter(t => t.length > 0).length; console.log(`[BillWorker] Phase1 完成: ${t1 - t0}ms, ${extractedCount}/${totalPages} 页有文本`); // 扫描件判断 const totalChars = pageTexts.reduce((s, t) => s + t.length, 0); if (totalChars < 50) { parentPort.postMessage({ type: 'done', ok: true, data: { scanned: true, reason: 'noText', totalPages } }); return; } // Phase 2: 筛选清单页(宽松策略 + 连续页补全) const BILL_KW = ['项目编码', '项目名称', '工程量', '计量单位', '综合单价', '清单编码']; const SEC_KW = ['分部分项', '分类分项', '措施项目', '其他项目', '工程量清单计价']; // 第一轮:标记确定的清单页 const billFlags = new Array(pageTexts.length).fill(false); for (let i = 0; i < pageTexts.length; i++) { const t = pageTexts[i]; if (!t.trim()) continue; const hHits = BILL_KW.filter(k => t.includes(k)).length; const sHit = SEC_KW.some(k => t.includes(k)); const hasCode = /\d{9}/.test(t); // 放宽:有9位编码即可(不再要求同时命中表头关键字) if (hHits >= 2 || sHit || hasCode) { billFlags[i] = true; } } // 第二轮:连续页补全 — 两个清单页之间的非空页也视为清单页(续页无表头) // 但排除纯费用/税金页面(它们不含施工清单项) const FEE_PAGE_KW = ['规费', '税金', '社会保险费', '住房公积金', '养老保险', '工伤保险', '失业保险', '医疗保险', '教育费附加', '城市维护建设税']; const firstBill = billFlags.indexOf(true); const lastBill = billFlags.lastIndexOf(true); if (firstBill >= 0 && lastBill > firstBill) { for (let i = firstBill; i <= lastBill; i++) { if (!billFlags[i] && pageTexts[i] && pageTexts[i].trim().length > 30) { const t = pageTexts[i]; const feeHits = FEE_PAGE_KW.filter(kw => t.includes(kw)).length; // 命中 2+ 个费用关键字且没有9位工程编码 → 纯费用页,排除 if (feeHits >= 2 && !/\d{9}/.test(t)) continue; billFlags[i] = true; } } } const billTexts = []; for (let i = 0; i < pageTexts.length; i++) { if (billFlags[i]) billTexts.push(pageTexts[i]); } if (!billTexts.length) { parentPort.postMessage({ type: 'done', ok: true, data: { scanned: false, noBillPages: true, totalPages } }); return; } console.log(`[BillWorker] ${totalPages} 页 → ${billTexts.length} 页清单 (原始识别 ${billFlags.filter(f=>f).length - (lastBill - firstBill >= 0 ? 0 : 0)} / 补全后 ${billTexts.length})`); // Phase 3: 文本解析 const merged = billTexts.join('\n'); const parsed = parseBillText(merged); const t2 = Date.now(); console.log(`[BillWorker] Phase2+3: ${t2 - t1}ms, 总耗时: ${t2 - t0}ms`); parentPort.postMessage({ type: 'done', ok: true, data: { scanned: false, ...parsed, _meta: { method: 'local-parallel', workers: workerCount, billPages: billTexts.length, totalPages, extractMs: t1 - t0, parseMs: t2 - t1, totalMs: t2 - t0, } } }); } catch (err) { console.error('[BillWorker] 错误:', err.message); parentPort.postMessage({ type: 'done', ok: false, error: err.message }); } }); // ================================================================ // Phase 1: 多 Worker 并行提取(SharedArrayBuffer 零拷贝) // ================================================================ function parallelExtract(sab, dataLength, totalPages, workerCount) { return new Promise((resolve) => { const workerPath = path.join(__dirname, 'page-worker.js'); const allPageTexts = new Array(totalPages).fill(''); const workerStatus = new Array(workerCount).fill('pending'); // pending, done, failed let resolved = false; const checkComplete = () => { if (resolved) return; const doneCount = workerStatus.filter(s => s === 'done' || s === 'failed').length; if (doneCount >= workerCount) { resolved = true; // 检查是否有失败的worker,打印警告 const failedCount = workerStatus.filter(s => s === 'failed').length; if (failedCount > 0) { console.warn(`[BillWorker] ${failedCount}/${workerCount} 个worker失败,可能导致部分页面无内容`); } resolve(allPageTexts); } }; for (let i = 0; i < workerCount; i++) { const startPage = i * PAGES_PER_CHUNK + 1; const endPage = Math.min((i + 1) * PAGES_PER_CHUNK, totalPages); // workerData 传 SharedArrayBuffer(跨线程共享,不会被清空) const w = new Worker(workerPath, { workerData: { sab, dataLength, startPage, endPage } }); let workerDone = false; const markDone = (status) => { if (workerDone) return; workerDone = true; workerStatus[i] = status; checkComplete(); }; w.on('message', (msg) => { if (msg.ok && msg.results) { for (const r of msg.results) { allPageTexts[r.page - 1] = r.text; } markDone('done'); } else if (!msg.ok) { console.warn(`[BillWorker] page-worker[${startPage}-${endPage}] 失败: ${msg.error}`); markDone('failed'); } }); w.on('error', (err) => { console.warn(`[BillWorker] page-worker[${startPage}-${endPage}] 异常: ${err.message}`); markDone('failed'); }); w.on('exit', (code) => { // exit 在 message 之后触发,但如果 worker 崩溃没发 message 则在这里兜底 if (code !== 0 && !workerDone) { console.warn(`[BillWorker] page-worker[${startPage}-${endPage}] 意外退出(code=${code})`); markDone('failed'); } else if (!workerDone) { markDone('done'); } }); } if (workerCount <= 0) { resolved = true; resolve(allPageTexts); } }); } // ================================================================ // Phase 3: 清单文本解析(纯正则 + 字符串处理,毫秒级) // ================================================================ function parseBillText(text) { const rawLines = text.split(/\n/).map(l => { let line = l.replace(/\t/g, ' ').trim(); // 规范化带横杠的编码:如 "010-101-001-001" → "010101001001" line = line.replace(/(\d{2,4})[-‐–](\d{2,4})[-‐–](\d{2,4})(?:[-‐–](\d{2,4}))?/g, (m, a, b, c, d) => { const combined = a + b + c + (d || ''); return (combined.length >= 9 && combined.length <= 12) ? combined : m; }); return line; }); // ── Step 1: 多行合并成逻辑行 ── // pdfjs 按 Y 坐标分行,表格一行通常 = 一条文本行 // 但有时 项目特征/名称 会折行,需要合并 // // 新逻辑行的起始标志(任一命中即切断): // a) 序号模式:1.1.1.1.5 开头 // b) 清单编码:9-12位数字 或 B+5-6位数字 开头 // c) 中文大标题:一 二 三 ... 或 (一)(二)... // d) 表头行内容(跳过) // e) 纯数字序号 + 空格 + 编码(如 "5 500101004001") const ITEM_START = /^\d+(\.\d+)+\s/; // 1.1 或 1.1.1 等序号 const CODE_INLINE = /(?:^|\s)(\d{9,12}|(? raw.startsWith(m + ' ') || raw.startsWith(m + '\u3000'))) return true; return false; } for (const raw of rawLines) { if (!raw || PAGE_MARK.test(raw)) continue; if (HEADER_RE.test(raw) || HEADER_KW.test(raw)) continue; if (/^(元)|^款章节号|^备注$|^第\d+页/.test(raw)) continue; if (isNewLineTrigger(raw)) { if (currentLine) logicLines.push(currentLine); currentLine = raw; } else if (CODE_INLINE.test(raw) && raw.length > 15) { // 行内包含编码且够长(像是完整的表格行)→ 也开新行 if (currentLine) logicLines.push(currentLine); currentLine = raw; } else { // 续行(项目特征折行等短文本) // 安全阀:已合并行过长时强制切断,防止整页吞并 if (currentLine && currentLine.length > 300) { logicLines.push(currentLine); currentLine = raw; } else { currentLine = currentLine ? currentLine + ' ' + raw : raw; } } } if (currentLine) logicLines.push(currentLine); console.log(`[BillWorker] 合并后 ${logicLines.length} 条逻辑行(原始 ${rawLines.length} 行)`); // 打印前5条逻辑行供调试 for (let i = 0; i < Math.min(5, logicLines.length); i++) { console.log(`[BillWorker] L${i}: ${logicLines[i].substring(0, 120)}`); } const categories = []; let curCat = null, curItem = null; // 编码匹配:支持行内任意位置的9-12位数字或B编码(排除 GB/DB 等标准号前缀) const CODE_RE = /(? u.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')); const UNIT_RE = new RegExp(`(?:^|\\s)(${unitEscaped.join('|')})(?=\\s|\\d|$)`); const SKIP_RE = /合\s*计|小\s*计|本页小计|总\s*计|价税合计/; for (const line of logicLines) { if (SKIP_RE.test(line)) continue; // 去掉行首的序号部分("1.1.1.1.5 " 或 "5 " 等纯序号前缀) let stripped = line.replace(/^\d+(\.\d+)*\s+/, '').trim(); if (!stripped) stripped = line.trim(); if (!stripped) continue; const cm = stripped.match(CODE_RE); if (cm) { if (curItem && curCat) curCat.items.push(curItem); if (!curCat) { curCat = { name: '未分类', items: [] }; categories.push(curCat); } const code = cm[1]; let rest = stripped.substring(cm.index + cm[0].length).trim(); let name = '', unit = '', quantity = '', spec = ''; const unitMatch = rest.match(UNIT_RE); if (unitMatch) { const ui = rest.indexOf(unitMatch[0]); let rawName = rest.substring(0, ui).trim(); unit = unitMatch[1]; const afterUnit = rest.substring(ui + unitMatch[0].length).trim(); const qm = afterUnit.match(/^([\d,.]+)/); if (qm) { quantity = qm[1]; // 提取 quantity 之后的尾部文本,跳过纯数字字段(综合单价、合价等) let tail = afterUnit.substring(qm.index + qm[0].length).trim(); if (tail) { const tailTokens = tail.split(/\s+/); let si = 0; while (si < tailTokens.length && /^[\d,.%\-]+$/.test(tailTokens[si])) si++; const specTail = tailTokens.slice(si).join(' ').trim(); if (specTail) spec = specTail; } } // 分离 rawName 中的"项目名称"和内联"项目特征" const ns = splitNameAndSpec(rawName); name = ns.name; if (ns.spec) spec = ns.spec + (spec ? ';' + spec : ''); } else { const tokens = rest.split(/\s+/).filter(t => t); let foundUnitIdx = -1; for (let ti = tokens.length - 1; ti >= 1; ti--) { if (UNIT_SET.has(tokens[ti])) { foundUnitIdx = ti; break; } } if (foundUnitIdx >= 1) { const rawNameStr = tokens.slice(0, foundUnitIdx).join(' '); const ns = splitNameAndSpec(rawNameStr); name = ns.name; if (ns.spec) spec = ns.spec; unit = tokens[foundUnitIdx]; const afterTokens = tokens.slice(foundUnitIdx + 1); if (afterTokens.length && /^[\d,.]+$/.test(afterTokens[0])) { quantity = afterTokens[0]; let si = 1; while (si < afterTokens.length && /^[\d,.%\-]+$/.test(afterTokens[si])) si++; const specTail = afterTokens.slice(si).join(' ').trim(); if (specTail) spec = spec ? spec + ';' + specTail : specTail; } } else { name = rest; } } name = name.replace(/\s+/g, '').trim(); for (const u of UNIT_TOKENS) { if (name.endsWith(u) && name.length > u.length) { unit = unit || u; name = name.substring(0, name.length - u.length); break; } } curItem = { code, name, unit, quantity, spec }; continue; } // ── 回退:无标准编码但有 "名称 单位 数量" 结构 → 也视为清单项 ── // 常见于措施项目、未编码的补充清单项 if (!cm && stripped.length > 4) { const uniMatch = stripped.match(UNIT_RE); if (uniMatch) { const ui = stripped.indexOf(uniMatch[0]); const beforeUnit = stripped.substring(0, ui).trim(); const afterUnit = stripped.substring(ui + uniMatch[0].length).trim(); const hasQty = /^[\d,.]+/.test(afterUnit); // 名称 2-50 字、含中文、有数量、不是分部标题 if (beforeUnit.length >= 2 && beforeUnit.length <= 50 && hasQty && /[\u4e00-\u9fff]/.test(beforeUnit)) { if (curItem && curCat) curCat.items.push(curItem); if (!curCat) { curCat = { name: '未分类', items: [] }; categories.push(curCat); } const unit = uniMatch[1]; const qm = afterUnit.match(/^([\d,.]+)/); const quantity = qm ? qm[1] : ''; const ns = splitNameAndSpec(beforeUnit); const name = ns.name.replace(/\s+/g, '').trim(); const spec = ns.spec || ''; curItem = { code: '', name, unit, quantity, spec }; continue; } } } // 分部标题判断:不含编码、较短的文本、含工程关键字 // 关键守卫:如果行里有计量单位,说明是清单项,不是标题 if (stripped.length > 2 && stripped.length < 60 && !CODE_RE.test(stripped)) { if (UNIT_RE.test(stripped) && /\d+\.?\d*\s*$/.test(stripped)) { if (curItem) curItem.spec = curItem.spec ? curItem.spec + ';' + stripped : stripped; continue; } if (isCatTitle(stripped) && !UNIT_RE.test(stripped) && !isFeeCatTitle(stripped)) { if (curItem && curCat) { curCat.items.push(curItem); curItem = null; } const cleanTitle = stripped.replace(/\s+(座|个|项|处|m|km|段|条)\s+\d+[\d.]*\s*$/, '').trim(); curCat = { name: cleanTitle, items: [] }; categories.push(curCat); continue; } } if (/^[一二三四五六七八九十]+\s/.test(stripped) || /^([一二三四五六七八九十\d]+)/.test(stripped)) { // 中文序号标题也需要排除费用类 const cleanTitle = stripped.replace(/\s+(座|个|项|处)\s+\d+[\d.]*\s*$/, '').trim(); if (isFeeCatTitle(cleanTitle)) { // 费用类标题:跳过,不建分部(其下的行会作为续行处理) continue; } if (curItem && curCat) { curCat.items.push(curItem); curItem = null; } curCat = { name: cleanTitle, items: [] }; categories.push(curCat); continue; } if (curItem && stripped.length > 1) { curItem.spec = curItem.spec ? curItem.spec + ';' + stripped : stripped; } } if (curItem && curCat) curCat.items.push(curItem); // 过滤费用项:只保留需要写入技术标的施工清单项 let feeFiltered = 0; for (const cat of categories) { if (cat.items) { const before = cat.items.length; cat.items = cat.items.filter(it => !isFeeItem(it.name)); feeFiltered += before - cat.items.length; } } if (feeFiltered > 0) console.log(`[BillWorker] 费用项过滤: 移除 ${feeFiltered} 项`); // ========== 按项目名称合并(核心去重,大幅减少清单项数量)========== // 规则:同一分部内,name 相同的清单项合并为一条 // - code: 保留第一个非空编码 // - unit: 保留第一个非空单位 // - quantity: 尝试数值求和,否则用分号拼接 // - spec: 去重后用分号拼接(截断过长的) let totalBeforeMerge = 0, totalAfterMerge = 0; for (const cat of categories) { if (!cat.items || !cat.items.length) continue; totalBeforeMerge += cat.items.length; const nameMap = new Map(); // name → merged item for (const item of cat.items) { const key = (item.name || '').replace(/\s+/g, '').trim(); if (!key) continue; if (!nameMap.has(key)) { nameMap.set(key, { code: item.code || '', name: item.name, unit: item.unit || '', quantity: item.quantity || '', spec: item.spec || '', _count: 1, _quantities: item.quantity ? [item.quantity] : [], _specs: item.spec ? [item.spec] : [], }); } else { const m = nameMap.get(key); m._count++; // code: 取第一个非空的 if (!m.code && item.code) m.code = item.code; // unit: 取第一个非空的 if (!m.unit && item.unit) m.unit = item.unit; // quantity: 收集所有 if (item.quantity) m._quantities.push(item.quantity); // spec: 收集不重复的 if (item.spec && !m._specs.includes(item.spec)) { m._specs.push(item.spec); } } } // 后处理:合成最终字段 const merged = []; for (const [, m] of nameMap) { // quantity: 尝试数值求和 if (m._quantities.length > 1) { const nums = m._quantities.map(q => parseFloat(q.replace(/,/g, ''))); if (nums.every(n => !isNaN(n))) { const sum = nums.reduce((a, b) => a + b, 0); m.quantity = sum % 1 === 0 ? String(sum) : sum.toFixed(2); } else { m.quantity = m._quantities.join('; '); } } else if (m._quantities.length === 1) { m.quantity = m._quantities[0]; } // spec: 拼接去重后的 spec,每条最多120字 if (m._specs.length > 0) { const trimmed = m._specs.map(s => s.length > 120 ? s.substring(0, 120) + '...' : s); m.spec = trimmed.join('; '); // 总 spec 上限 300 字 if (m.spec.length > 300) m.spec = m.spec.substring(0, 300) + '...'; } // 清理临时字段 delete m._count; delete m._quantities; delete m._specs; merged.push(m); } cat.items = merged; totalAfterMerge += merged.length; } const mergedCount = totalBeforeMerge - totalAfterMerge; if (mergedCount > 0) { console.log(`[BillWorker] 按名称合并: ${totalBeforeMerge} → ${totalAfterMerge} 项(合并 ${mergedCount} 个重复项)`); } const valid = categories.filter(c => c.items && c.items.length > 0); const totalItems = valid.reduce((s, c) => s + c.items.length, 0); const withSpec = valid.reduce((s, c) => s + c.items.filter(it => it.spec).length, 0); const withCode = valid.reduce((s, c) => s + c.items.filter(it => it.code).length, 0); console.log(`[BillWorker] 最终结果: ${valid.length} 分部, ${totalItems} 清单项 (${withCode} 有编码, ${withSpec} 有spec)`); // 打印前 3 个 item 供调试 let debugCount = 0; for (const cat of valid) { for (const it of cat.items) { if (debugCount < 3) { console.log(`[BillWorker] 样例: [${it.code}] ${it.name} | ${it.unit} | qty=${it.quantity} | spec=${(it.spec||'').substring(0, 80)}`); debugCount++; } } } return { project_summary: { remark: `本地解析:${valid.length} 个分部,${totalItems} 个清单项(合并前 ${totalBeforeMerge} 项)` }, categories: valid, }; } /** * 判断清单项是否为"费用项"(非施工内容,不写入技术标) * 如:安全文明措施费、规费、税金、暂列金额等 */ function isFeeItem(name) { if (!name) return false; const n = name.replace(/\s+/g, ''); // ── 1. 精确匹配 ── const EXACT = [ '规费', '税金', '利润', '增值税', '暂列金额', '暂估价', '计日工', '总承包服务费', '企业管理费', '甲供材料保管费', '价税合计', ]; if (EXACT.includes(n)) return true; // ── 2. 包含匹配:措施费/规费/保险/行政类 ── const FEE_KW = [ '安全文明', '文明施工费', '环境保护费', '临时设施费', '夜间施工增加费', '夜间施工费', '冬雨季施工增加费', '冬雨季施工费', '二次搬运费', '大型机械设备进出场', '大型机械进出场', '施工排水降水', '排水降水费', '已完工程及设备保护', '已完工程保护费', '工程排污费', '社会保障费', '住房公积金', '工伤保险', '劳动保险', '意外伤害保险', '建筑工程保险', '城市维护建设税', '城市建设维护税', '教育费附加', '地方教育附加', '材料暂估', '专业工程暂估', '超高施工增加费', '安全防护费', '措施项目费', '其他项目费', '不可竞争费', ]; for (const kw of FEE_KW) { if (n.includes(kw)) return true; } return false; } /** * 将 rawName 中的"项目名称"与内联"项目特征描述"分离 * 例: "土方开挖 1.土壤类别:普通土" → { name: "土方开挖", spec: "1.土壤类别:普通土" } */ function splitNameAndSpec(rawName) { if (!rawName) return { name: '', spec: '' }; // Pattern 1: 数字+点+中文(如 "1.土壤类别" "2、强度等级") const m = rawName.match(/\d+[.、.)\uFF09]\s*[\u4e00-\u9fff]/); if (m && m.index > 0) { return { name: rawName.substring(0, m.index).trim(), spec: rawName.substring(m.index).trim() }; } // Pattern 2: 特征关键字+冒号(如 "材质:" "规格:") const SPEC_KW_RE = /(材质|规格|型号|品牌|颜色|尺寸|厚度|直径|管径|强度|等级|类别|类型|做法|要求|标准|内容|工作内容|土壤|含量|配合比|工艺|方式|形式|范围|部位|位置|高度|宽度|长度|深度|坡度|截面|跨度|运距|开挖|回填|混凝土|钢筋|压实)[::]/; const kw = rawName.match(SPEC_KW_RE); if (kw && kw.index > 0) { return { name: rawName.substring(0, kw.index).trim(), spec: rawName.substring(kw.index).trim() }; } // Pattern 3: 括号开头的特征描述 "(1)" "(1)" const paren = rawName.match(/[((]\d+[))]/); if (paren && paren.index > 0) { return { name: rawName.substring(0, paren.index).trim(), spec: rawName.substring(paren.index).trim() }; } return { name: rawName, spec: '' }; } function isCatTitle(text) { const KW = [ '土建','建筑','结构','装饰','装修','安装','给排水','暖通','空调','通风', '电气','强电','弱电','消防','智能化','幕墙','门窗','园林','绿化','景观', '市政','道路','桥梁','管网','基础','地基','桩基','主体','屋面','防水', '保温','钢结构','排水','给水','照明','动力','防雷','电梯','人防','室外', '附属','分部','工程','措施','清单','土石方','混凝土','砌筑','模板','脚手架', '水利','河道','管道','阀门','设备','仪表','自动化','通信','网络', '拆除','外墙','内墙','楼地面','天棚','吊顶','栏杆','屋顶','涂料','抹灰', '廊道','阀门井','蓄水池','泵站','供水','引水','水源','渠道','闸门', '围栏','警示','检修','管线','配电','水池','水塔','取水','净水', ]; return KW.some(k => text.includes(k)); } /** * 判断分部标题是否为"费用类"(不应创建分部分类) * 如:规费、税金、措施项目费、其他项目费 等非施工类分部 */ function isFeeCatTitle(text) { if (!text) return false; const t = text.replace(/\s+/g, ''); // 精确匹配整个标题 const EXACT = [ '规费', '税金', '利润', '增值税', '暂列金额', '暂估价', '计日工', '总承包服务费', '企业管理费', '价税合计', '措施项目费', '其他项目费', '不可竞争费', ]; if (EXACT.includes(t)) return true; // 包含匹配 const FEE_CAT_KW = [ '措施项目费', '其他项目费', '不可竞争费', '规费汇总', '税金汇总', '费率', '费用汇总', '费用合计', '暂列金额', '暂估价', '计日工', '总承包服务费', '安全文明施工费', '社会保障费', '住房公积金', '工伤保险', '教育费附加', '城市维护建设税', ]; for (const kw of FEE_CAT_KW) { if (t.includes(kw)) return true; } return false; }