import os import sys import threading import random import time from contextlib import contextmanager # When running as a PyInstaller bundle: # sys._MEIPASS → read-only bundle dir (templates, static, prompts) # sys.executable dir → writable dir next to the .exe (data, settings, db) if getattr(sys, 'frozen', False): _BUNDLE_DIR = sys._MEIPASS # bundled app files BASE_DIR = os.path.dirname(sys.executable) # writable runtime dir else: _BUNDLE_DIR = os.path.dirname(os.path.abspath(__file__)) BASE_DIR = _BUNDLE_DIR DATA_DIR = os.path.join(BASE_DIR, 'data') UPLOAD_DIR = os.path.join(DATA_DIR, 'uploads') EXPORT_DIR = os.path.join(DATA_DIR, 'exports') KNOWLEDGE_DIR= os.path.join(DATA_DIR, 'knowledge') DB_PATH = os.path.join(DATA_DIR, 'projects.db') CHROMA_DIR = os.path.join(DATA_DIR, 'chroma') PROMPTS_DIR = os.path.join(_BUNDLE_DIR, 'prompts') # ==================== AI 模型配置 ==================== # 模型选择:'openai' | 'qwen' | 'deepseek' | 'ollama' MODEL_PROVIDER = os.environ.get('MODEL_PROVIDER', 'qwen') # OpenAI OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', 'sk-your-openai-key') OPENAI_MODEL = os.environ.get('OPENAI_MODEL', 'gpt-4.1') OPENAI_BASE_URL = os.environ.get('OPENAI_BASE_URL', 'https://api.openai.com/v1') # 阿里云通义千问 QWEN_API_KEY = os.environ.get('QWEN_API_KEY', 'sk-your-qwen-key') QWEN_MODEL = os.environ.get('QWEN_MODEL', 'qwen3.6-plus') QWEN_BASE_URL = os.environ.get('QWEN_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1') # 文生图(多模态生成)与 Chat 接口不同:北京地域见 Model Studio 文档 QWEN_MULTIMODAL_BASE = os.environ.get( 'QWEN_MULTIMODAL_BASE', 'https://dashscope.aliyuncs.com/api/v1', ) # 投标附件插图默认模型(同步文生图) QWEN_IMAGE_MODEL = os.environ.get('QWEN_IMAGE_MODEL', 'qwen-image-2.0-pro') QWEN_IMAGE_SIZE = os.environ.get('QWEN_IMAGE_SIZE', '1536*1024') QWEN_IMAGE_PROMPT_EXTEND = os.environ.get('QWEN_IMAGE_PROMPT_EXTEND', 'true').lower() in ( '1', 'true', 'yes', ) QWEN_IMAGE_WATERMARK = os.environ.get('QWEN_IMAGE_WATERMARK', 'false').lower() in ( '1', 'true', 'yes', ) QWEN_IMAGE_NEGATIVE_PROMPT = os.environ.get( 'QWEN_IMAGE_NEGATIVE_PROMPT', '低分辨率, 模糊, 畸形肢体, 过度饱和, 蜡像感, 杂乱构图, 扭曲文字, 公司商标, LOGO, 投标人名称, ' '塔吊, 塔式起重机, 起重机, 吊车, 挖掘机, 装载机, 压路机, 泵车, 搅拌车, 推土机, 施工机械, 工程车辆, ' 'crane, excavator, tower crane, bulldozer', ) # DeepSeek DEEPSEEK_API_KEY = os.environ.get('DEEPSEEK_API_KEY', 'sk-your-deepseek-key') DEEPSEEK_MODEL = os.environ.get('DEEPSEEK_MODEL', 'deepseek-chat') DEEPSEEK_BASE_URL = os.environ.get('DEEPSEEK_BASE_URL', 'https://api.deepseek.com/v1') # Ollama 本地(OpenAI 兼容接口) OLLAMA_BASE_URL = os.environ.get('OLLAMA_BASE_URL', 'http://localhost:11434/v1') OLLAMA_MODEL = os.environ.get('OLLAMA_MODEL', 'qwen3:8b') # 豆包 / 火山引擎(字节跳动,OpenAI 兼容接口) DOUBAO_API_KEY = os.environ.get('DOUBAO_API_KEY', 'sk-your-doubao-key') DOUBAO_MODEL = os.environ.get('DOUBAO_MODEL', 'doubao-1-5-pro-32k') DOUBAO_BASE_URL = os.environ.get('DOUBAO_BASE_URL', 'https://ark.cn-beijing.volces.com/api/v3') # Kimi / Moonshot AI(OpenAI 兼容接口,支持 Embedding) KIMI_API_KEY = os.environ.get('KIMI_API_KEY', 'sk-your-kimi-key') KIMI_MODEL = os.environ.get('KIMI_MODEL', 'moonshot-v1-32k') KIMI_BASE_URL = os.environ.get('KIMI_BASE_URL', 'https://api.moonshot.cn/v1') # Embedding 模型 OPENAI_EMBEDDING_MODEL = 'text-embedding-3-small' QWEN_EMBEDDING_MODEL = 'text-embedding-v3' KIMI_EMBEDDING_MODEL = 'moonshot-v1-embedding' # ==================== 应用配置 ==================== MAX_FILE_SIZE_MB = 50 ALLOWED_EXTENSIONS = {'pdf', 'doc', 'docx'} SECRET_KEY = 'bidhuo-partner-secret-2024' # ==================== 生成配置 ==================== MAX_RETRIES = 3 REQUEST_TIMEOUT = int(os.environ.get('REQUEST_TIMEOUT', '180')) # 大纲生成单次提示词长、输出大,适当延长读超时(秒),避免接口未返回即被客户端断开 OUTLINE_REQUEST_TIMEOUT = int(os.environ.get('OUTLINE_REQUEST_TIMEOUT', '300')) CHUNK_SIZE = 2000 # 知识库文本分块大小(字符数) CHUNK_OVERLAP = 200 # 分块重叠大小 TOP_K_KNOWLEDGE = 3 # 知识库检索数量 CONTENT_VOLUME = os.environ.get('CONTENT_VOLUME', 'standard') # 篇幅档位: concise / standard / detailed / full TARGET_PAGES = int(os.environ.get('TARGET_PAGES', '0') or '0') # 目标页数(0=不启用) PAGE_CHAR_ESTIMATE = int(os.environ.get('PAGE_CHAR_ESTIMATE', '700') or '700') # 粗略每页字数估算 # ==================== 并发控制 (极速优化核心) ==================== # 全局 LLM 调用信号量与章节生成线程池共用同一上限,默认 40 路真实并发。 # 可通过环境变量 LLM_CONCURRENCY_CAP 提高硬上限(界面保存仍不可超过该值)。 LLM_CONCURRENCY_CAP = int(os.environ.get('LLM_CONCURRENCY_CAP', '40')) _env_def = str(LLM_CONCURRENCY_CAP) _raw_llm = int(os.environ.get('LLM_CONCURRENCY_LIMIT', _env_def)) _raw_sec = int(os.environ.get('MAX_CONCURRENT_SECTIONS', _env_def)) _cv = max(1, min(_raw_llm, _raw_sec, LLM_CONCURRENCY_CAP)) LLM_CONCURRENCY_LIMIT = _cv MAX_CONCURRENT_SECTIONS = _cv _llm_semaphore = threading.Semaphore(_cv) def refresh_llm_semaphore() -> None: """按当前 LLM/章节并发配置重建信号量(加载 settings.json 后须调用)。""" global _llm_semaphore, LLM_CONCURRENCY_LIMIT, MAX_CONCURRENT_SECTIONS v = max(1, min(int(LLM_CONCURRENCY_LIMIT), int(MAX_CONCURRENT_SECTIONS), int(LLM_CONCURRENCY_CAP))) LLM_CONCURRENCY_LIMIT = v MAX_CONCURRENT_SECTIONS = v _llm_semaphore = threading.Semaphore(v) def set_concurrency(n: int) -> None: """统一设置章节池并发与 LLM 信号量上限,并重置信号量。""" global MAX_CONCURRENT_SECTIONS, LLM_CONCURRENCY_LIMIT, _llm_semaphore v = max(1, min(int(n), int(LLM_CONCURRENCY_CAP))) MAX_CONCURRENT_SECTIONS = v LLM_CONCURRENCY_LIMIT = v _llm_semaphore = threading.Semaphore(v) @contextmanager def llm_call(): """全局LLM调用信号量。所有 ai_client.chat / embedding 必须使用。 针对Qwen云API增加轻微jitter避免429。超时60s防止死锁。""" acquired = _llm_semaphore.acquire(blocking=True, timeout=60.0) if not acquired: raise TimeoutError(f"LLM并发已达上限({LLM_CONCURRENCY_LIMIT}),请稍后重试") try: # Qwen RPM敏感,增加极小jitter (0-0.08s) 避免429限流 if MODEL_PROVIDER == 'qwen': time.sleep(random.uniform(0, 0.08)) yield finally: _llm_semaphore.release()