""" TLS 指纹伪装会话管理模块 核心功能: - 使用 curl_cffi 模拟 Chrome 浏览器的 TLS 指纹 - 管理关键 Cookie (oai-did, __Secure-next-auth 系列) - 统一的错误处理 (403 Cloudflare 拦截, 409 会话冲突) - 代理支持 """ from curl_cffi import requests from typing import Optional, Dict, Any from utils.crypto import generate_oai_did from utils.logger import logger class CloudflareBlockError(Exception): """Cloudflare 拦截异常(403 + Turnstile 挑战)""" pass class SessionInvalidError(Exception): """会话失效异常(409 Conflict - CSRF Token 断链)""" pass class RateLimitError(Exception): """速率限制异常(429 Too Many Requests)""" pass class OAISession: """ OpenAI 会话管理器 使用 curl_cffi 库模拟真实 Chrome 浏览器的 TLS 指纹,绕过 OpenAI 的检测 关键特性: - TLS 指纹伪装 (impersonate="chrome124") - oai-did Cookie 管理(设备指纹) - 自动错误检测和异常抛出 - 代理支持(HTTP/HTTPS/SOCKS5) - 登录获取 access_token """ # OpenAI 相关域名 CHATGPT_DOMAIN = "chatgpt.com" AUTH_DOMAIN = "auth.openai.com" API_DOMAIN = "api.openai.com" def __init__(self, proxy: Optional[str] = None, impersonate: str = "chrome124"): """ 初始化会话 参数: proxy: 代理地址,支持格式: - HTTP: "http://user:pass@ip:port" - HTTPS: "https://user:pass@ip:port" - SOCKS5: "socks5://user:pass@ip:port" impersonate: 模拟的浏览器版本,可选值: - "chrome110", "chrome120", "chrome124" (推荐) - 需要根据实际情况测试最佳版本 """ # 创建 curl_cffi 会话(核心!) self.client = requests.Session( impersonate=impersonate, timeout=30 ) # 配置代理 if proxy: self.client.proxies = { "http": proxy, "https": proxy } logger.info(f"Session using proxy: {self._mask_proxy(proxy)}") else: logger.info("Session initialized without proxy") # 设置请求头(从真实浏览器抓包) self._setup_headers() # 生成并设置 oai-did Cookie(关键设备指纹!) self.oai_did = generate_oai_did() self.client.cookies.set( "oai-did", self.oai_did, domain=f".{self.CHATGPT_DOMAIN}" ) logger.info(f"Session initialized with oai-did: {self.oai_did}") # 登录状态 self.access_token: Optional[str] = None self.session_token: Optional[str] = None self.logged_in_email: Optional[str] = None def _setup_headers(self): """ 设置 HTTP 请求头,模拟真实 Chrome 浏览器 这些 Header 从开发文档的抓包日志中提取 """ self.client.headers.update({ "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36", "Accept": "application/json, text/plain, */*", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", "Accept-Encoding": "gzip, deflate, br", "Sec-Ch-Ua": '"Chromium";v="143", "Not.A/Brand";v="24"', "Sec-Ch-Ua-Mobile": "?0", "Sec-Ch-Ua-Platform": '"Linux"', "Sec-Fetch-Dest": "empty", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Site": "same-origin", "DNT": "1", "Connection": "keep-alive", }) def get(self, url: str, **kwargs) -> requests.Response: """ 发送 GET 请求 参数: url: 目标 URL **kwargs: 传递给 requests.get 的其他参数 返回: Response 对象 抛出: CloudflareBlockError: 遇到 Cloudflare 拦截 SessionInvalidError: 会话失效(409) RateLimitError: 速率限制(429) """ try: response = self.client.get(url, **kwargs) return self._handle_response(response, url) except Exception as e: logger.error(f"GET request failed: {url} - {e}") raise def post(self, url: str, params=None, **kwargs) -> requests.Response: """ 发送 POST 请求 参数: url: 目标 URL params: URL 查询参数(可选) **kwargs: 传递给 requests.post 的其他参数 返回: Response 对象 抛出: CloudflareBlockError: 遇到 Cloudflare 拦截 SessionInvalidError: 会话失效(409) RateLimitError: 速率限制(429) """ try: response = self.client.post(url, params=params, **kwargs) return self._handle_response(response, url) except Exception as e: logger.error(f"POST request failed: {url} - {e}") raise def _handle_response(self, response: requests.Response, url: str) -> requests.Response: """ 统一响应处理和错误检测 参数: response: curl_cffi 响应对象 url: 请求的 URL(用于日志) 返回: 原始 Response 对象(如果没有错误) 抛出: CloudflareBlockError: 检测到 Cloudflare 挑战页面 SessionInvalidError: 检测到 409 会话冲突 RateLimitError: 检测到 429 速率限制 """ status_code = response.status_code # 检测 Cloudflare Turnstile 挑战(403 + 特征文本) if status_code == 403: if self._is_cloudflare_challenge(response): logger.error(f"Cloudflare challenge detected: {url}") raise CloudflareBlockError( f"Cloudflare Turnstile challenge triggered at {url}. " "Possible solutions: use residential proxy, solve captcha, or retry later." ) # 检测会话冲突(CSRF Token 失效) if status_code == 409: logger.error(f"Session conflict (409): {url} - {response.text[:200]}") raise SessionInvalidError( f"Session invalid (409 Conflict): {response.text[:200]}. " "This usually means CSRF token expired or cookie chain broken. " "Need to restart registration flow." ) # 检测速率限制 if status_code == 429: logger.error(f"Rate limit exceeded (429): {url}") raise RateLimitError( f"Rate limit exceeded at {url}. " "Recommendation: slow down requests or change IP/proxy." ) # 记录其他错误响应(4xx, 5xx) if status_code >= 400: logger.warning( f"HTTP {status_code} error: {url}\n" f"Response preview: {response.text[:300]}" ) # 记录成功响应(调试用) if status_code < 300: logger.debug(f"HTTP {status_code} OK: {url}") return response @staticmethod def _is_cloudflare_challenge(response: requests.Response) -> bool: """ 检测响应是否为 Cloudflare Turnstile 挑战页面 特征: - 状态码 403 - 包含 "Just a moment" 或 "Checking your browser" 等文本 - 包含 Cloudflare 相关 JavaScript """ body = response.text.lower() cloudflare_keywords = [ "just a moment", "checking your browser", "cloudflare", "cf-challenge", "ray id" ] return any(keyword in body for keyword in cloudflare_keywords) @staticmethod def _mask_proxy(proxy: str) -> str: """ 脱敏代理地址(隐藏用户名和密码) 例如: http://user:pass@1.2.3.4:8080 -> http://***:***@1.2.3.4:8080 """ import re return re.sub(r'://([^:]+):([^@]+)@', r'://***:***@', proxy) def get_cookies(self) -> Dict[str, str]: """ 获取当前所有 Cookie 返回: Cookie 字典 {name: value} """ # curl_cffi 的 cookies 可能存在同名不同域的 cookie,需要遍历 jar result = {} try: for cookie in self.client.cookies.jar: # 用 domain:name 作为 key 避免冲突,或者直接覆盖 result[cookie.name] = cookie.value except Exception: # 兼容处理 try: for cookie in self.client.cookies: result[cookie.name] = cookie.value except Exception: pass return result def get_cookie(self, name: str) -> Optional[str]: """ 获取指定名称的 Cookie 值 参数: name: Cookie 名称 返回: Cookie 值,不存在则返回 None """ return self.client.cookies.get(name) def set_cookie(self, name: str, value: str, domain: str = None): """ 设置 Cookie 参数: name: Cookie 名称 value: Cookie 值 domain: Cookie 作用域(默认 .chatgpt.com) """ if domain is None: domain = f".{self.CHATGPT_DOMAIN}" self.client.cookies.set(name, value, domain=domain) logger.debug(f"Cookie set: {name}={value[:10]}... (domain={domain})") def close(self): """关闭会话,释放资源""" try: self.client.close() logger.debug("Session closed") except Exception as e: logger.warning(f"Error closing session: {e}") async def login(self, email: str, password: str) -> Dict[str, Any]: """ 使用邮箱密码登录,获取 access_token 参数: email: 登录邮箱 password: 登录密码 返回: 登录结果字典,包含: - status: 状态 (success/failed/...) - access_token: 访问令牌(成功时) - session_token: 会话令牌(成功时) - error: 错误信息(失败时) 示例: session = OAISession() result = await session.login("user@example.com", "password123") if result["status"] == "success": print(f"Access Token: {result['access_token']}") """ from core.login_flow import LoginFlow flow = LoginFlow(self, email, password) result = await flow.run() # 保存登录状态 if result.get("status") == "success": self.access_token = result.get("access_token") self.session_token = result.get("session_token") self.logged_in_email = email logger.info(f"Session logged in as: {email}") return result def is_logged_in(self) -> bool: """检查是否已登录""" return self.access_token is not None def get_access_token(self) -> Optional[str]: """获取当前的 access_token""" return self.access_token def __enter__(self): """支持 with 语句上下文管理""" return self def __exit__(self, exc_type, exc_val, exc_tb): """退出上下文时自动关闭""" self.close() # 导出主要接口 __all__ = [ "OAISession", "CloudflareBlockError", "SessionInvalidError", "RateLimitError", ]