146 lines
6.7 KiB
Python
146 lines
6.7 KiB
Python
import os
|
||
import sys
|
||
import threading
|
||
import random
|
||
import time
|
||
from contextlib import contextmanager
|
||
|
||
# When running as a PyInstaller bundle:
|
||
# sys._MEIPASS → read-only bundle dir (templates, static, prompts)
|
||
# sys.executable dir → writable dir next to the .exe (data, settings, db)
|
||
if getattr(sys, 'frozen', False):
|
||
_BUNDLE_DIR = sys._MEIPASS # bundled app files
|
||
BASE_DIR = os.path.dirname(sys.executable) # writable runtime dir
|
||
else:
|
||
_BUNDLE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||
BASE_DIR = _BUNDLE_DIR
|
||
|
||
DATA_DIR = os.path.join(BASE_DIR, 'data')
|
||
UPLOAD_DIR = os.path.join(DATA_DIR, 'uploads')
|
||
EXPORT_DIR = os.path.join(DATA_DIR, 'exports')
|
||
KNOWLEDGE_DIR= os.path.join(DATA_DIR, 'knowledge')
|
||
DB_PATH = os.path.join(DATA_DIR, 'projects.db')
|
||
CHROMA_DIR = os.path.join(DATA_DIR, 'chroma')
|
||
PROMPTS_DIR = os.path.join(_BUNDLE_DIR, 'prompts')
|
||
|
||
# ==================== AI 模型配置 ====================
|
||
# 模型选择:'openai' | 'qwen' | 'deepseek' | 'ollama'
|
||
MODEL_PROVIDER = os.environ.get('MODEL_PROVIDER', 'qwen')
|
||
|
||
# OpenAI
|
||
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', 'sk-your-openai-key')
|
||
OPENAI_MODEL = os.environ.get('OPENAI_MODEL', 'gpt-4.1')
|
||
OPENAI_BASE_URL = os.environ.get('OPENAI_BASE_URL', 'https://api.openai.com/v1')
|
||
|
||
# 阿里云通义千问
|
||
QWEN_API_KEY = os.environ.get('QWEN_API_KEY', 'sk-your-qwen-key')
|
||
QWEN_MODEL = os.environ.get('QWEN_MODEL', 'qwen3.6-plus')
|
||
QWEN_BASE_URL = os.environ.get('QWEN_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1')
|
||
# 文生图(多模态生成)与 Chat 接口不同:北京地域见 Model Studio 文档
|
||
QWEN_MULTIMODAL_BASE = os.environ.get(
|
||
'QWEN_MULTIMODAL_BASE',
|
||
'https://dashscope.aliyuncs.com/api/v1',
|
||
)
|
||
# 投标附件插图默认模型(同步文生图)
|
||
QWEN_IMAGE_MODEL = os.environ.get('QWEN_IMAGE_MODEL', 'qwen-image-2.0-pro')
|
||
QWEN_IMAGE_SIZE = os.environ.get('QWEN_IMAGE_SIZE', '1536*1024')
|
||
QWEN_IMAGE_PROMPT_EXTEND = os.environ.get('QWEN_IMAGE_PROMPT_EXTEND', 'true').lower() in (
|
||
'1', 'true', 'yes',
|
||
)
|
||
QWEN_IMAGE_WATERMARK = os.environ.get('QWEN_IMAGE_WATERMARK', 'false').lower() in (
|
||
'1', 'true', 'yes',
|
||
)
|
||
QWEN_IMAGE_NEGATIVE_PROMPT = os.environ.get(
|
||
'QWEN_IMAGE_NEGATIVE_PROMPT',
|
||
'低分辨率, 模糊, 畸形肢体, 过度饱和, 蜡像感, 杂乱构图, 扭曲文字, 公司商标, LOGO, 投标人名称, '
|
||
'塔吊, 塔式起重机, 起重机, 吊车, 挖掘机, 装载机, 压路机, 泵车, 搅拌车, 推土机, 施工机械, 工程车辆, '
|
||
'crane, excavator, tower crane, bulldozer',
|
||
)
|
||
|
||
# DeepSeek
|
||
DEEPSEEK_API_KEY = os.environ.get('DEEPSEEK_API_KEY', 'sk-your-deepseek-key')
|
||
DEEPSEEK_MODEL = os.environ.get('DEEPSEEK_MODEL', 'deepseek-chat')
|
||
DEEPSEEK_BASE_URL = os.environ.get('DEEPSEEK_BASE_URL', 'https://api.deepseek.com/v1')
|
||
|
||
# Ollama 本地(OpenAI 兼容接口)
|
||
OLLAMA_BASE_URL = os.environ.get('OLLAMA_BASE_URL', 'http://localhost:11434/v1')
|
||
OLLAMA_MODEL = os.environ.get('OLLAMA_MODEL', 'qwen3:8b')
|
||
|
||
# 豆包 / 火山引擎(字节跳动,OpenAI 兼容接口)
|
||
DOUBAO_API_KEY = os.environ.get('DOUBAO_API_KEY', 'sk-your-doubao-key')
|
||
DOUBAO_MODEL = os.environ.get('DOUBAO_MODEL', 'doubao-1-5-pro-32k')
|
||
DOUBAO_BASE_URL = os.environ.get('DOUBAO_BASE_URL', 'https://ark.cn-beijing.volces.com/api/v3')
|
||
|
||
# Kimi / Moonshot AI(OpenAI 兼容接口,支持 Embedding)
|
||
KIMI_API_KEY = os.environ.get('KIMI_API_KEY', 'sk-your-kimi-key')
|
||
KIMI_MODEL = os.environ.get('KIMI_MODEL', 'moonshot-v1-32k')
|
||
KIMI_BASE_URL = os.environ.get('KIMI_BASE_URL', 'https://api.moonshot.cn/v1')
|
||
|
||
# Embedding 模型
|
||
OPENAI_EMBEDDING_MODEL = 'text-embedding-3-small'
|
||
QWEN_EMBEDDING_MODEL = 'text-embedding-v3'
|
||
KIMI_EMBEDDING_MODEL = 'moonshot-v1-embedding'
|
||
|
||
# ==================== 应用配置 ====================
|
||
MAX_FILE_SIZE_MB = 50
|
||
ALLOWED_EXTENSIONS = {'pdf', 'doc', 'docx'}
|
||
SECRET_KEY = 'bidhuo-partner-secret-2024'
|
||
|
||
# ==================== 生成配置 ====================
|
||
MAX_RETRIES = 3
|
||
REQUEST_TIMEOUT = int(os.environ.get('REQUEST_TIMEOUT', '180'))
|
||
# 大纲生成单次提示词长、输出大,适当延长读超时(秒),避免接口未返回即被客户端断开
|
||
OUTLINE_REQUEST_TIMEOUT = int(os.environ.get('OUTLINE_REQUEST_TIMEOUT', '300'))
|
||
CHUNK_SIZE = 2000 # 知识库文本分块大小(字符数)
|
||
CHUNK_OVERLAP = 200 # 分块重叠大小
|
||
TOP_K_KNOWLEDGE = 3 # 知识库检索数量
|
||
CONTENT_VOLUME = os.environ.get('CONTENT_VOLUME', 'standard') # 篇幅档位: concise / standard / detailed / full
|
||
TARGET_PAGES = int(os.environ.get('TARGET_PAGES', '0') or '0') # 目标页数(0=不启用)
|
||
PAGE_CHAR_ESTIMATE = int(os.environ.get('PAGE_CHAR_ESTIMATE', '700') or '700') # 粗略每页字数估算
|
||
|
||
# ==================== 并发控制 (极速优化核心) ====================
|
||
# 全局 LLM 调用信号量与章节生成线程池共用同一上限,默认 40 路真实并发。
|
||
# 可通过环境变量 LLM_CONCURRENCY_CAP 提高硬上限(界面保存仍不可超过该值)。
|
||
LLM_CONCURRENCY_CAP = int(os.environ.get('LLM_CONCURRENCY_CAP', '40'))
|
||
_env_def = str(LLM_CONCURRENCY_CAP)
|
||
_raw_llm = int(os.environ.get('LLM_CONCURRENCY_LIMIT', _env_def))
|
||
_raw_sec = int(os.environ.get('MAX_CONCURRENT_SECTIONS', _env_def))
|
||
_cv = max(1, min(_raw_llm, _raw_sec, LLM_CONCURRENCY_CAP))
|
||
LLM_CONCURRENCY_LIMIT = _cv
|
||
MAX_CONCURRENT_SECTIONS = _cv
|
||
_llm_semaphore = threading.Semaphore(_cv)
|
||
|
||
|
||
def refresh_llm_semaphore() -> None:
|
||
"""按当前 LLM/章节并发配置重建信号量(加载 settings.json 后须调用)。"""
|
||
global _llm_semaphore, LLM_CONCURRENCY_LIMIT, MAX_CONCURRENT_SECTIONS
|
||
v = max(1, min(int(LLM_CONCURRENCY_LIMIT), int(MAX_CONCURRENT_SECTIONS), int(LLM_CONCURRENCY_CAP)))
|
||
LLM_CONCURRENCY_LIMIT = v
|
||
MAX_CONCURRENT_SECTIONS = v
|
||
_llm_semaphore = threading.Semaphore(v)
|
||
|
||
|
||
def set_concurrency(n: int) -> None:
|
||
"""统一设置章节池并发与 LLM 信号量上限,并重置信号量。"""
|
||
global MAX_CONCURRENT_SECTIONS, LLM_CONCURRENCY_LIMIT, _llm_semaphore
|
||
v = max(1, min(int(n), int(LLM_CONCURRENCY_CAP)))
|
||
MAX_CONCURRENT_SECTIONS = v
|
||
LLM_CONCURRENCY_LIMIT = v
|
||
_llm_semaphore = threading.Semaphore(v)
|
||
|
||
|
||
@contextmanager
|
||
def llm_call():
|
||
"""全局LLM调用信号量。所有 ai_client.chat / embedding 必须使用。
|
||
针对Qwen云API增加轻微jitter避免429。超时60s防止死锁。"""
|
||
acquired = _llm_semaphore.acquire(blocking=True, timeout=60.0)
|
||
if not acquired:
|
||
raise TimeoutError(f"LLM并发已达上限({LLM_CONCURRENCY_LIMIT}),请稍后重试")
|
||
try:
|
||
# Qwen RPM敏感,增加极小jitter (0-0.08s) 避免429限流
|
||
if MODEL_PROVIDER == 'qwen':
|
||
time.sleep(random.uniform(0, 0.08))
|
||
yield
|
||
finally:
|
||
_llm_semaphore.release()
|