import os import sys import threading import random import time from contextlib import contextmanager # When running as a PyInstaller bundle: # sys._MEIPASS → read-only bundle dir (templates, static, prompts) # sys.executable dir → writable dir next to the .exe (data, settings, db) if getattr(sys, 'frozen', False): _BUNDLE_DIR = sys._MEIPASS # bundled app files BASE_DIR = os.path.dirname(sys.executable) # writable runtime dir else: _BUNDLE_DIR = os.path.dirname(os.path.abspath(__file__)) BASE_DIR = _BUNDLE_DIR DATA_DIR = os.path.join(BASE_DIR, 'data') UPLOAD_DIR = os.path.join(DATA_DIR, 'uploads') EXPORT_DIR = os.path.join(DATA_DIR, 'exports') KNOWLEDGE_DIR= os.path.join(DATA_DIR, 'knowledge') DB_PATH = os.path.join(DATA_DIR, 'projects.db') CHROMA_DIR = os.path.join(DATA_DIR, 'chroma') PROMPTS_DIR = os.path.join(_BUNDLE_DIR, 'prompts') # ==================== AI 模型配置 ==================== # 模型选择:'openai' | 'qwen' | 'deepseek' | 'ollama' MODEL_PROVIDER = os.environ.get('MODEL_PROVIDER', 'qwen') # OpenAI OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', 'sk-your-openai-key') OPENAI_MODEL = os.environ.get('OPENAI_MODEL', 'gpt-4.1') OPENAI_BASE_URL = os.environ.get('OPENAI_BASE_URL', 'https://api.openai.com/v1') # 阿里云通义千问 QWEN_API_KEY = os.environ.get('QWEN_API_KEY', 'sk-your-qwen-key') QWEN_MODEL = os.environ.get('QWEN_MODEL', 'qwen3.6-plus') QWEN_BASE_URL = os.environ.get('QWEN_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1') # 文生图(多模态生成)与 Chat 接口不同:北京地域见 Model Studio 文档 QWEN_MULTIMODAL_BASE = os.environ.get( 'QWEN_MULTIMODAL_BASE', 'https://dashscope.aliyuncs.com/api/v1', ) # 投标附件插图默认模型(同步文生图) QWEN_IMAGE_MODEL = os.environ.get('QWEN_IMAGE_MODEL', 'qwen-image-2.0-pro') QWEN_IMAGE_SIZE = os.environ.get('QWEN_IMAGE_SIZE', '1536*1024') QWEN_IMAGE_PROMPT_EXTEND = os.environ.get('QWEN_IMAGE_PROMPT_EXTEND', 'true').lower() in ( '1', 'true', 'yes', ) QWEN_IMAGE_WATERMARK = os.environ.get('QWEN_IMAGE_WATERMARK', 'false').lower() in ( '1', 'true', 'yes', ) QWEN_IMAGE_NEGATIVE_PROMPT = os.environ.get( 'QWEN_IMAGE_NEGATIVE_PROMPT', '低分辨率, 模糊, 畸形肢体, 过度饱和, 蜡像感, 杂乱构图, 扭曲文字, 公司商标, LOGO, 投标人名称', ) # DeepSeek DEEPSEEK_API_KEY = os.environ.get('DEEPSEEK_API_KEY', 'sk-your-deepseek-key') DEEPSEEK_MODEL = os.environ.get('DEEPSEEK_MODEL', 'deepseek-chat') DEEPSEEK_BASE_URL = os.environ.get('DEEPSEEK_BASE_URL', 'https://api.deepseek.com/v1') # Ollama 本地(OpenAI 兼容接口) OLLAMA_BASE_URL = os.environ.get('OLLAMA_BASE_URL', 'http://localhost:11434/v1') OLLAMA_MODEL = os.environ.get('OLLAMA_MODEL', 'qwen3:8b') # 豆包 / 火山引擎(字节跳动,OpenAI 兼容接口) DOUBAO_API_KEY = os.environ.get('DOUBAO_API_KEY', 'sk-your-doubao-key') DOUBAO_MODEL = os.environ.get('DOUBAO_MODEL', 'doubao-1-5-pro-32k') DOUBAO_BASE_URL = os.environ.get('DOUBAO_BASE_URL', 'https://ark.cn-beijing.volces.com/api/v3') # Kimi / Moonshot AI(OpenAI 兼容接口,支持 Embedding) KIMI_API_KEY = os.environ.get('KIMI_API_KEY', 'sk-your-kimi-key') KIMI_MODEL = os.environ.get('KIMI_MODEL', 'moonshot-v1-32k') KIMI_BASE_URL = os.environ.get('KIMI_BASE_URL', 'https://api.moonshot.cn/v1') # Embedding 模型 OPENAI_EMBEDDING_MODEL = 'text-embedding-3-small' QWEN_EMBEDDING_MODEL = 'text-embedding-v3' KIMI_EMBEDDING_MODEL = 'moonshot-v1-embedding' # ==================== 应用配置 ==================== MAX_FILE_SIZE_MB = 50 ALLOWED_EXTENSIONS = {'pdf', 'doc', 'docx'} SECRET_KEY = 'bidhuo-partner-secret-2024' # ==================== 生成配置 ==================== MAX_RETRIES = 3 REQUEST_TIMEOUT = int(os.environ.get('REQUEST_TIMEOUT', '180')) # 大纲生成单次提示词长、输出大,适当延长读超时(秒),避免接口未返回即被客户端断开 OUTLINE_REQUEST_TIMEOUT = int(os.environ.get('OUTLINE_REQUEST_TIMEOUT', '300')) CHUNK_SIZE = 2000 # 知识库文本分块大小(字符数) CHUNK_OVERLAP = 200 # 分块重叠大小 TOP_K_KNOWLEDGE = 3 # 知识库检索数量 CONTENT_VOLUME = os.environ.get('CONTENT_VOLUME', 'standard') # 篇幅档位: concise / standard / detailed / full TARGET_PAGES = int(os.environ.get('TARGET_PAGES', '0') or '0') # 目标页数(0=不启用) PAGE_CHAR_ESTIMATE = int(os.environ.get('PAGE_CHAR_ESTIMATE', '700') or '700') # 粗略每页字数估算 # ==================== 并发控制 (极速优化核心) ==================== # 全局LLM调用上限,防止Qwen等云API被限流。默认20,与用户要求对齐。 LLM_CONCURRENCY_LIMIT = int(os.environ.get('LLM_CONCURRENCY_LIMIT', '20')) _llm_semaphore = threading.Semaphore(LLM_CONCURRENCY_LIMIT) @contextmanager def llm_call(): """全局LLM调用信号量上下文管理器 (上限20)。所有ai_client.chat / embedding 必须使用。 针对Qwen云API增加轻微jitter避免429。超时60s防止死锁。""" acquired = _llm_semaphore.acquire(blocking=True, timeout=60.0) if not acquired: raise TimeoutError(f"LLM并发已达上限({LLM_CONCURRENCY_LIMIT}),请稍后重试") try: # Qwen RPM敏感,增加极小jitter (0-0.08s) 避免429限流 if MODEL_PROVIDER == 'qwen': time.sleep(random.uniform(0, 0.08)) yield finally: _llm_semaphore.release() # 更新默认并发章节数,支持更高上限(UI后续同步) MAX_CONCURRENT_SECTIONS = int(os.environ.get('MAX_CONCURRENT_SECTIONS', '12'))