tech-bid-manage20260422/tests/test_word_allocation.py

"""字数分配与 rating_json 解析单元测试。"""
import json
import unittest

import config as cfg

from utils import word_allocation as wa


class TestParseRatingJson(unittest.TestCase):
    def test_canonical_items(self):
        raw = json.dumps(
            {
                'items': [
                    {'id': 'T1', 'name': '施工方案', 'weight': 30, 'keywords': ['工艺']},
                    {'id': 'T2', 'name': '质量保证', 'weight': 10, 'keywords': []},
                ],
                'notes': '',
            },
            ensure_ascii=False,
        )
        items = wa.parse_rating_json(raw)
        self.assertEqual(len(items), 2)
        names = {x['name'] for x in items}
        self.assertIn('施工方案', names)
        self.assertIn('质量保证', names)
        wmap = {x['name']: x['weight'] for x in items}
        self.assertEqual(wmap['施工方案'], 30.0)

    def test_malformed_returns_empty(self):
        self.assertEqual(wa.parse_rating_json('not json'), [])
        self.assertEqual(wa.parse_rating_json(''), [])


class TestComputeLeafAllocations(unittest.TestCase):
    def test_none_when_no_rating_and_not_target_pages_budget(self):
        leaves = [{'id': 1, 'section_title': '一、总体方案'}]
        rules = dict(wa.DEFAULT_RULES)
        rules['budget_mode'] = 'anchor_mean'
        self.assertIsNone(
            wa.compute_leaf_allocations('standard', leaves, '', rules)
        )

    def test_uniform_when_no_rating_but_target_pages(self):
        """无技术评分时仍按目标页均分 B=页×每页字，全稿不随节数 N 线性爆量。"""
        leaves = [
            {'id': 1, 'section_title': 'A'},
            {'id': 2, 'section_title': 'B'},
        ]
        rules = dict(wa.DEFAULT_RULES)
        rules['budget_mode'] = 'target_pages'
        old_tp = cfg.TARGET_PAGES
        old_pce = cfg.PAGE_CHAR_ESTIMATE
        try:
            cfg.TARGET_PAGES = 100
            cfg.PAGE_CHAR_ESTIMATE = 700
            out = wa.compute_leaf_allocations('standard', leaves, '', rules)
        finally:
            cfg.TARGET_PAGES = old_tp
            cfg.PAGE_CHAR_ESTIMATE = old_pce
        self.assertIsNotNone(out)
        s = out[1]['target_chars'] + out[2]['target_chars']
        self.assertEqual(s, 100 * 700)
        self.assertEqual(out[1]['target_chars'], out[2]['target_chars'])

    def test_monotonicity_high_weight_match(self):
        rating = json.dumps(
            {
                'items': [
                    {'name': '施工组织设计', 'weight': 50, 'keywords': ['进度']},
                    {'name': '页眉页脚规范', 'weight': 2, 'keywords': []},
                ]
            },
            ensure_ascii=False,
        )
        leaves = [
            {'id': 10, 'section_title': '3.1 施工组织设计与进度计划'},
            {'id': 11, 'section_title': '9.9 页眉格式说明'},
        ]
        rules = dict(wa.DEFAULT_RULES)
        rules['alpha'] = 0.95
        out = wa.compute_leaf_allocations('standard', leaves, rating, rules)
        self.assertIsNotNone(out)
        t_high = out[10]['target_chars']
        t_low = out[11]['target_chars']
        self.assertGreaterEqual(t_high, t_low, '强匹配高分项的章节应不低于弱匹配章节')
        self.assertIn('施工组织设计', out[10]['word_count_spec'])

    def test_budget_anchor_mean(self):
        rating = json.dumps(
            {'items': [{'name': '技术部分', 'weight': 100}]},
            ensure_ascii=False,
        )
        leaves = [
            {'id': 1, 'section_title': 'A'},
            {'id': 2, 'section_title': 'B'},
            {'id': 3, 'section_title': 'C'},
        ]
        rules = dict(wa.DEFAULT_RULES)
        rules['budget_mode'] = 'anchor_mean'
        rules['alpha'] = 0.0
        old_tp = getattr(cfg, 'TARGET_PAGES', 0)
        setattr(cfg, 'TARGET_PAGES', 0)
        try:
            out = wa.compute_leaf_allocations('standard', leaves, rating, rules)
        finally:
            setattr(cfg, 'TARGET_PAGES', old_tp)
        self.assertIsNotNone(out)
        base, core, _, _ = wa.VOLUME_PRESETS['standard']
        expect = int(round(len(leaves) * (base + core) / 2.0))
        s = sum(out[i]['target_chars'] for i in (1, 2, 3))
        self.assertEqual(s, expect)

    def test_budget_target_pages(self):
        rating = json.dumps(
            {'items': [{'name': '技术部分', 'weight': 100}]},
            ensure_ascii=False,
        )
        leaves = [
            {'id': 1, 'section_title': 'A'},
            {'id': 2, 'section_title': 'B'},
        ]
        rules = dict(wa.DEFAULT_RULES)
        rules['budget_mode'] = 'target_pages'
        rules['alpha'] = 0.0
        old_tp = cfg.TARGET_PAGES
        old_pce = cfg.PAGE_CHAR_ESTIMATE
        try:
            cfg.TARGET_PAGES = 100
            cfg.PAGE_CHAR_ESTIMATE = 700
            out = wa.compute_leaf_allocations('standard', leaves, rating, rules)
        finally:
            cfg.TARGET_PAGES = old_tp
            cfg.PAGE_CHAR_ESTIMATE = old_pce
        self.assertIsNotNone(out)
        expect = 100 * 700
        s = sum(out[i]['target_chars'] for i in (1, 2))
        self.assertEqual(s, expect)

    def test_budget_target_pages_falls_back_when_pages_zero(self):
        rating = json.dumps(
            {'items': [{'name': '技术部分', 'weight': 100}]},
            ensure_ascii=False,
        )
        leaves = [
            {'id': 1, 'section_title': 'A'},
            {'id': 2, 'section_title': 'B'},
        ]
        rules = dict(wa.DEFAULT_RULES)
        rules['budget_mode'] = 'target_pages'
        rules['alpha'] = 0.0
        old_tp = cfg.TARGET_PAGES
        try:
            cfg.TARGET_PAGES = 0
            out = wa.compute_leaf_allocations('standard', leaves, rating, rules)
        finally:
            cfg.TARGET_PAGES = old_tp
        self.assertIsNotNone(out)
        base, core, _, _ = wa.VOLUME_PRESETS['standard']
        expect = int(round(len(leaves) * (base + core) / 2.0))
        s = sum(out[i]['target_chars'] for i in (1, 2))
        self.assertEqual(s, expect)

    def test_continuation_threshold(self):
        self.assertEqual(wa.continuation_threshold(2000), 1300)
        self.assertEqual(wa.continuation_threshold(100), 200)


if __name__ == '__main__':
    unittest.main()