2026-04-24 18:53:49 +08:00

146 lines
6.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import sys
import threading
import random
import time
from contextlib import contextmanager
# When running as a PyInstaller bundle:
# sys._MEIPASS → read-only bundle dir (templates, static, prompts)
# sys.executable dir → writable dir next to the .exe (data, settings, db)
if getattr(sys, 'frozen', False):
_BUNDLE_DIR = sys._MEIPASS # bundled app files
BASE_DIR = os.path.dirname(sys.executable) # writable runtime dir
else:
_BUNDLE_DIR = os.path.dirname(os.path.abspath(__file__))
BASE_DIR = _BUNDLE_DIR
DATA_DIR = os.path.join(BASE_DIR, 'data')
UPLOAD_DIR = os.path.join(DATA_DIR, 'uploads')
EXPORT_DIR = os.path.join(DATA_DIR, 'exports')
KNOWLEDGE_DIR= os.path.join(DATA_DIR, 'knowledge')
DB_PATH = os.path.join(DATA_DIR, 'projects.db')
CHROMA_DIR = os.path.join(DATA_DIR, 'chroma')
PROMPTS_DIR = os.path.join(_BUNDLE_DIR, 'prompts')
# ==================== AI 模型配置 ====================
# 模型选择:'openai' | 'qwen' | 'deepseek' | 'ollama'
MODEL_PROVIDER = os.environ.get('MODEL_PROVIDER', 'qwen')
# OpenAI
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', 'sk-your-openai-key')
OPENAI_MODEL = os.environ.get('OPENAI_MODEL', 'gpt-4.1')
OPENAI_BASE_URL = os.environ.get('OPENAI_BASE_URL', 'https://api.openai.com/v1')
# 阿里云通义千问
QWEN_API_KEY = os.environ.get('QWEN_API_KEY', 'sk-your-qwen-key')
QWEN_MODEL = os.environ.get('QWEN_MODEL', 'qwen3.6-plus')
QWEN_BASE_URL = os.environ.get('QWEN_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1')
# 文生图(多模态生成)与 Chat 接口不同:北京地域见 Model Studio 文档
QWEN_MULTIMODAL_BASE = os.environ.get(
'QWEN_MULTIMODAL_BASE',
'https://dashscope.aliyuncs.com/api/v1',
)
# 投标附件插图默认模型(同步文生图)
QWEN_IMAGE_MODEL = os.environ.get('QWEN_IMAGE_MODEL', 'qwen-image-2.0-pro')
QWEN_IMAGE_SIZE = os.environ.get('QWEN_IMAGE_SIZE', '1536*1024')
QWEN_IMAGE_PROMPT_EXTEND = os.environ.get('QWEN_IMAGE_PROMPT_EXTEND', 'true').lower() in (
'1', 'true', 'yes',
)
QWEN_IMAGE_WATERMARK = os.environ.get('QWEN_IMAGE_WATERMARK', 'false').lower() in (
'1', 'true', 'yes',
)
QWEN_IMAGE_NEGATIVE_PROMPT = os.environ.get(
'QWEN_IMAGE_NEGATIVE_PROMPT',
'低分辨率, 模糊, 畸形肢体, 过度饱和, 蜡像感, 杂乱构图, 扭曲文字, 公司商标, LOGO, 投标人名称, '
'塔吊, 塔式起重机, 起重机, 吊车, 挖掘机, 装载机, 压路机, 泵车, 搅拌车, 推土机, 施工机械, 工程车辆, '
'crane, excavator, tower crane, bulldozer',
)
# DeepSeek
DEEPSEEK_API_KEY = os.environ.get('DEEPSEEK_API_KEY', 'sk-your-deepseek-key')
DEEPSEEK_MODEL = os.environ.get('DEEPSEEK_MODEL', 'deepseek-chat')
DEEPSEEK_BASE_URL = os.environ.get('DEEPSEEK_BASE_URL', 'https://api.deepseek.com/v1')
# Ollama 本地OpenAI 兼容接口)
OLLAMA_BASE_URL = os.environ.get('OLLAMA_BASE_URL', 'http://localhost:11434/v1')
OLLAMA_MODEL = os.environ.get('OLLAMA_MODEL', 'qwen3:8b')
# 豆包 / 火山引擎字节跳动OpenAI 兼容接口)
DOUBAO_API_KEY = os.environ.get('DOUBAO_API_KEY', 'sk-your-doubao-key')
DOUBAO_MODEL = os.environ.get('DOUBAO_MODEL', 'doubao-1-5-pro-32k')
DOUBAO_BASE_URL = os.environ.get('DOUBAO_BASE_URL', 'https://ark.cn-beijing.volces.com/api/v3')
# Kimi / Moonshot AIOpenAI 兼容接口,支持 Embedding
KIMI_API_KEY = os.environ.get('KIMI_API_KEY', 'sk-your-kimi-key')
KIMI_MODEL = os.environ.get('KIMI_MODEL', 'moonshot-v1-32k')
KIMI_BASE_URL = os.environ.get('KIMI_BASE_URL', 'https://api.moonshot.cn/v1')
# Embedding 模型
OPENAI_EMBEDDING_MODEL = 'text-embedding-3-small'
QWEN_EMBEDDING_MODEL = 'text-embedding-v3'
KIMI_EMBEDDING_MODEL = 'moonshot-v1-embedding'
# ==================== 应用配置 ====================
MAX_FILE_SIZE_MB = 50
ALLOWED_EXTENSIONS = {'pdf', 'doc', 'docx'}
SECRET_KEY = 'bidhuo-partner-secret-2024'
# ==================== 生成配置 ====================
MAX_RETRIES = 3
REQUEST_TIMEOUT = int(os.environ.get('REQUEST_TIMEOUT', '180'))
# 大纲生成单次提示词长、输出大,适当延长读超时(秒),避免接口未返回即被客户端断开
OUTLINE_REQUEST_TIMEOUT = int(os.environ.get('OUTLINE_REQUEST_TIMEOUT', '300'))
CHUNK_SIZE = 2000 # 知识库文本分块大小(字符数)
CHUNK_OVERLAP = 200 # 分块重叠大小
TOP_K_KNOWLEDGE = 3 # 知识库检索数量
CONTENT_VOLUME = os.environ.get('CONTENT_VOLUME', 'standard') # 篇幅档位: concise / standard / detailed / full
TARGET_PAGES = int(os.environ.get('TARGET_PAGES', '0') or '0') # 目标页数0=不启用)
PAGE_CHAR_ESTIMATE = int(os.environ.get('PAGE_CHAR_ESTIMATE', '700') or '700') # 粗略每页字数估算
# ==================== 并发控制 (极速优化核心) ====================
# 全局 LLM 调用信号量与章节生成线程池共用同一上限,默认 40 路真实并发。
# 可通过环境变量 LLM_CONCURRENCY_CAP 提高硬上限(界面保存仍不可超过该值)。
LLM_CONCURRENCY_CAP = int(os.environ.get('LLM_CONCURRENCY_CAP', '40'))
_env_def = str(LLM_CONCURRENCY_CAP)
_raw_llm = int(os.environ.get('LLM_CONCURRENCY_LIMIT', _env_def))
_raw_sec = int(os.environ.get('MAX_CONCURRENT_SECTIONS', _env_def))
_cv = max(1, min(_raw_llm, _raw_sec, LLM_CONCURRENCY_CAP))
LLM_CONCURRENCY_LIMIT = _cv
MAX_CONCURRENT_SECTIONS = _cv
_llm_semaphore = threading.Semaphore(_cv)
def refresh_llm_semaphore() -> None:
"""按当前 LLM/章节并发配置重建信号量(加载 settings.json 后须调用)。"""
global _llm_semaphore, LLM_CONCURRENCY_LIMIT, MAX_CONCURRENT_SECTIONS
v = max(1, min(int(LLM_CONCURRENCY_LIMIT), int(MAX_CONCURRENT_SECTIONS), int(LLM_CONCURRENCY_CAP)))
LLM_CONCURRENCY_LIMIT = v
MAX_CONCURRENT_SECTIONS = v
_llm_semaphore = threading.Semaphore(v)
def set_concurrency(n: int) -> None:
"""统一设置章节池并发与 LLM 信号量上限,并重置信号量。"""
global MAX_CONCURRENT_SECTIONS, LLM_CONCURRENCY_LIMIT, _llm_semaphore
v = max(1, min(int(n), int(LLM_CONCURRENCY_CAP)))
MAX_CONCURRENT_SECTIONS = v
LLM_CONCURRENCY_LIMIT = v
_llm_semaphore = threading.Semaphore(v)
@contextmanager
def llm_call():
"""全局LLM调用信号量。所有 ai_client.chat / embedding 必须使用。
针对Qwen云API增加轻微jitter避免429。超时60s防止死锁。"""
acquired = _llm_semaphore.acquire(blocking=True, timeout=60.0)
if not acquired:
raise TimeoutError(f"LLM并发已达上限({LLM_CONCURRENCY_LIMIT}),请稍后重试")
try:
# Qwen RPM敏感增加极小jitter (0-0.08s) 避免429限流
if MODEL_PROVIDER == 'qwen':
time.sleep(random.uniform(0, 0.08))
yield
finally:
_llm_semaphore.release()