171 lines
6.2 KiB
Python
171 lines
6.2 KiB
Python
"""字数分配与 rating_json 解析单元测试。"""
|
|
import json
|
|
import unittest
|
|
|
|
import config as cfg
|
|
|
|
from utils import word_allocation as wa
|
|
|
|
|
|
class TestParseRatingJson(unittest.TestCase):
|
|
def test_canonical_items(self):
|
|
raw = json.dumps(
|
|
{
|
|
'items': [
|
|
{'id': 'T1', 'name': '施工方案', 'weight': 30, 'keywords': ['工艺']},
|
|
{'id': 'T2', 'name': '质量保证', 'weight': 10, 'keywords': []},
|
|
],
|
|
'notes': '',
|
|
},
|
|
ensure_ascii=False,
|
|
)
|
|
items = wa.parse_rating_json(raw)
|
|
self.assertEqual(len(items), 2)
|
|
names = {x['name'] for x in items}
|
|
self.assertIn('施工方案', names)
|
|
self.assertIn('质量保证', names)
|
|
wmap = {x['name']: x['weight'] for x in items}
|
|
self.assertEqual(wmap['施工方案'], 30.0)
|
|
|
|
def test_malformed_returns_empty(self):
|
|
self.assertEqual(wa.parse_rating_json('not json'), [])
|
|
self.assertEqual(wa.parse_rating_json(''), [])
|
|
|
|
|
|
class TestComputeLeafAllocations(unittest.TestCase):
|
|
def test_none_when_no_rating_and_not_target_pages_budget(self):
|
|
leaves = [{'id': 1, 'section_title': '一、总体方案'}]
|
|
rules = dict(wa.DEFAULT_RULES)
|
|
rules['budget_mode'] = 'anchor_mean'
|
|
self.assertIsNone(
|
|
wa.compute_leaf_allocations('standard', leaves, '', rules)
|
|
)
|
|
|
|
def test_uniform_when_no_rating_but_target_pages(self):
|
|
"""无技术评分时仍按目标页均分 B=页×每页字,全稿不随节数 N 线性爆量。"""
|
|
leaves = [
|
|
{'id': 1, 'section_title': 'A'},
|
|
{'id': 2, 'section_title': 'B'},
|
|
]
|
|
rules = dict(wa.DEFAULT_RULES)
|
|
rules['budget_mode'] = 'target_pages'
|
|
old_tp = cfg.TARGET_PAGES
|
|
old_pce = cfg.PAGE_CHAR_ESTIMATE
|
|
try:
|
|
cfg.TARGET_PAGES = 100
|
|
cfg.PAGE_CHAR_ESTIMATE = 700
|
|
out = wa.compute_leaf_allocations('standard', leaves, '', rules)
|
|
finally:
|
|
cfg.TARGET_PAGES = old_tp
|
|
cfg.PAGE_CHAR_ESTIMATE = old_pce
|
|
self.assertIsNotNone(out)
|
|
s = out[1]['target_chars'] + out[2]['target_chars']
|
|
self.assertEqual(s, 100 * 700)
|
|
self.assertEqual(out[1]['target_chars'], out[2]['target_chars'])
|
|
|
|
def test_monotonicity_high_weight_match(self):
|
|
rating = json.dumps(
|
|
{
|
|
'items': [
|
|
{'name': '施工组织设计', 'weight': 50, 'keywords': ['进度']},
|
|
{'name': '页眉页脚规范', 'weight': 2, 'keywords': []},
|
|
]
|
|
},
|
|
ensure_ascii=False,
|
|
)
|
|
leaves = [
|
|
{'id': 10, 'section_title': '3.1 施工组织设计与进度计划'},
|
|
{'id': 11, 'section_title': '9.9 页眉格式说明'},
|
|
]
|
|
rules = dict(wa.DEFAULT_RULES)
|
|
rules['alpha'] = 0.95
|
|
out = wa.compute_leaf_allocations('standard', leaves, rating, rules)
|
|
self.assertIsNotNone(out)
|
|
t_high = out[10]['target_chars']
|
|
t_low = out[11]['target_chars']
|
|
self.assertGreaterEqual(t_high, t_low, '强匹配高分项的章节应不低于弱匹配章节')
|
|
self.assertIn('施工组织设计', out[10]['word_count_spec'])
|
|
|
|
def test_budget_anchor_mean(self):
|
|
rating = json.dumps(
|
|
{'items': [{'name': '技术部分', 'weight': 100}]},
|
|
ensure_ascii=False,
|
|
)
|
|
leaves = [
|
|
{'id': 1, 'section_title': 'A'},
|
|
{'id': 2, 'section_title': 'B'},
|
|
{'id': 3, 'section_title': 'C'},
|
|
]
|
|
rules = dict(wa.DEFAULT_RULES)
|
|
rules['budget_mode'] = 'anchor_mean'
|
|
rules['alpha'] = 0.0
|
|
old_tp = getattr(cfg, 'TARGET_PAGES', 0)
|
|
setattr(cfg, 'TARGET_PAGES', 0)
|
|
try:
|
|
out = wa.compute_leaf_allocations('standard', leaves, rating, rules)
|
|
finally:
|
|
setattr(cfg, 'TARGET_PAGES', old_tp)
|
|
self.assertIsNotNone(out)
|
|
base, core, _, _ = wa.VOLUME_PRESETS['standard']
|
|
expect = int(round(len(leaves) * (base + core) / 2.0))
|
|
s = sum(out[i]['target_chars'] for i in (1, 2, 3))
|
|
self.assertEqual(s, expect)
|
|
|
|
def test_budget_target_pages(self):
|
|
rating = json.dumps(
|
|
{'items': [{'name': '技术部分', 'weight': 100}]},
|
|
ensure_ascii=False,
|
|
)
|
|
leaves = [
|
|
{'id': 1, 'section_title': 'A'},
|
|
{'id': 2, 'section_title': 'B'},
|
|
]
|
|
rules = dict(wa.DEFAULT_RULES)
|
|
rules['budget_mode'] = 'target_pages'
|
|
rules['alpha'] = 0.0
|
|
old_tp = cfg.TARGET_PAGES
|
|
old_pce = cfg.PAGE_CHAR_ESTIMATE
|
|
try:
|
|
cfg.TARGET_PAGES = 100
|
|
cfg.PAGE_CHAR_ESTIMATE = 700
|
|
out = wa.compute_leaf_allocations('standard', leaves, rating, rules)
|
|
finally:
|
|
cfg.TARGET_PAGES = old_tp
|
|
cfg.PAGE_CHAR_ESTIMATE = old_pce
|
|
self.assertIsNotNone(out)
|
|
expect = 100 * 700
|
|
s = sum(out[i]['target_chars'] for i in (1, 2))
|
|
self.assertEqual(s, expect)
|
|
|
|
def test_budget_target_pages_falls_back_when_pages_zero(self):
|
|
rating = json.dumps(
|
|
{'items': [{'name': '技术部分', 'weight': 100}]},
|
|
ensure_ascii=False,
|
|
)
|
|
leaves = [
|
|
{'id': 1, 'section_title': 'A'},
|
|
{'id': 2, 'section_title': 'B'},
|
|
]
|
|
rules = dict(wa.DEFAULT_RULES)
|
|
rules['budget_mode'] = 'target_pages'
|
|
rules['alpha'] = 0.0
|
|
old_tp = cfg.TARGET_PAGES
|
|
try:
|
|
cfg.TARGET_PAGES = 0
|
|
out = wa.compute_leaf_allocations('standard', leaves, rating, rules)
|
|
finally:
|
|
cfg.TARGET_PAGES = old_tp
|
|
self.assertIsNotNone(out)
|
|
base, core, _, _ = wa.VOLUME_PRESETS['standard']
|
|
expect = int(round(len(leaves) * (base + core) / 2.0))
|
|
s = sum(out[i]['target_chars'] for i in (1, 2))
|
|
self.assertEqual(s, expect)
|
|
|
|
def test_continuation_threshold(self):
|
|
self.assertEqual(wa.continuation_threshold(2000), 1300)
|
|
self.assertEqual(wa.continuation_threshold(100), 200)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|