""" Cloudflare Turnstile 验证码解决器 Cloudflare Turnstile 是一种新型验证码系统,用于替代传统的 reCAPTCHA 当触发时会返回 403 状态码并显示 "Just a moment" 页面 ⚠️ 本模块提供预留接口,用户根据需要配置解决方案 """ from typing import Optional, Dict, Any from utils.logger import logger class CloudflareSolver: """ Cloudflare Turnstile 验证码解决器 ⚠️ 预留接口 - 用户根据实际情况选择是否实现 可能的解决方案: 1. 使用高质量住宅代理(推荐,成本较低) 2. 集成打码平台(如 2captcha, capsolver) 3. 使用浏览器自动化 + undetected-chromedriver 4. 等待一段时间后重试(部分情况有效) """ # Turnstile 相关常量 TURNSTILE_SITE_KEY = "0x4AAAAAAADnPIDROrmt1Wwj" # OpenAI 的 Turnstile site key(需要从实际页面提取) @staticmethod def detect_challenge(response) -> bool: """ 检测响应是否为 Cloudflare Turnstile 挑战 参数: response: HTTP 响应对象(来自 requests 或 curl_cffi) 返回: True 如果检测到 Cloudflare 挑战,否则 False 检测特征: - 状态码 403 - 响应体包含 "Just a moment", "Checking your browser" 等文本 - 包含 Cloudflare 相关 JavaScript """ if response.status_code != 403: return False body = response.text.lower() cloudflare_keywords = [ "just a moment", "checking your browser", "cloudflare", "cf-challenge", "turnstile", "ray id" ] detected = any(keyword in body for keyword in cloudflare_keywords) if detected: logger.warning("Cloudflare Turnstile challenge detected") # 尝试提取 Ray ID(用于调试) ray_id = CloudflareSolver._extract_ray_id(response.text) if ray_id: logger.info(f"Cloudflare Ray ID: {ray_id}") return detected @staticmethod async def solve(session, target_url: str, **kwargs) -> Optional[str]: """ 解决 Cloudflare Turnstile 挑战 ⚠️ 预留接口 - 用户需要根据实际需求实现 参数: session: OAISession 实例 target_url: 触发挑战的目标 URL **kwargs: 其他可能需要的参数(如 site_key, action 等) 返回: cf_clearance Cookie 值 或 Turnstile response token 抛出: NotImplementedError: 用户需要实现此方法 集成示例: ```python # 方案 1: 使用 2captcha 打码平台 from twocaptcha import TwoCaptcha solver = TwoCaptcha('YOUR_API_KEY') result = solver.turnstile( sitekey=CloudflareSolver.TURNSTILE_SITE_KEY, url=target_url ) return result['code'] # 方案 2: 使用 capsolver import capsolver capsolver.api_key = "YOUR_API_KEY" solution = capsolver.solve({ "type": "AntiTurnstileTaskProxyLess", "websiteURL": target_url, "websiteKey": CloudflareSolver.TURNSTILE_SITE_KEY, }) return solution['token'] # 方案 3: 使用浏览器自动化 from selenium import webdriver from undetected_chromedriver import Chrome driver = Chrome() driver.get(target_url) # 等待 Cloudflare 自动通过... cf_clearance = driver.get_cookie('cf_clearance')['value'] return cf_clearance ``` """ logger.warning( f"Cloudflare challenge detected at {target_url}, but solver not configured" ) raise NotImplementedError( "❌ Cloudflare solver not implemented.\n\n" "This is OPTIONAL. Only implement if you encounter frequent 403 errors.\n\n" "Recommended solutions:\n" "1. Use high-quality residential proxies (easiest)\n" "2. Integrate captcha solving service (2captcha, capsolver)\n" "3. Use browser automation (undetected-chromedriver)\n" "4. Retry with different proxy/IP\n\n" f"Target URL: {target_url}\n" f"Site Key: {CloudflareSolver.TURNSTILE_SITE_KEY}\n\n" "Example implementation location: core/challenge.py -> solve()" ) @staticmethod def _extract_ray_id(html: str) -> Optional[str]: """ 从 Cloudflare 错误页面提取 Ray ID(用于调试) Ray ID 格式示例: 84a1b2c3d4e5f678-LAX 参数: html: Cloudflare 错误页面的 HTML 内容 返回: Ray ID 字符串,未找到则返回 None """ import re match = re.search(r'Ray ID: ([a-f0-9-]+)', html, re.IGNORECASE) if match: return match.group(1) # 尝试其他格式 match = re.search(r'ray id[:\s]+([a-f0-9-]+)', html, re.IGNORECASE) if match: return match.group(1) return None @staticmethod def should_retry(response) -> bool: """ 判断是否应该重试请求(针对 Cloudflare 挑战) 某些情况下,简单地等待几秒后重试即可通过 参数: response: HTTP 响应对象 返回: True 如果建议重试,否则 False """ if not CloudflareSolver.detect_challenge(response): return False # 如果是轻量级挑战(JavaScript challenge),重试可能有效 # 如果是 Turnstile 验证码,重试无效,需要解决验证码 body = response.text.lower() # JavaScript challenge 特征(可以重试) js_challenge_keywords = ["checking your browser", "please wait"] has_js_challenge = any(kw in body for kw in js_challenge_keywords) # Turnstile 验证码特征(需要解决,重试无效) turnstile_keywords = ["turnstile", "cf-turnstile"] has_turnstile = any(kw in body for kw in turnstile_keywords) if has_js_challenge and not has_turnstile: logger.info("Detected JavaScript challenge, retry may work") return True else: logger.warning("Detected Turnstile captcha, retry unlikely to work") return False @staticmethod def get_bypass_headers() -> Dict[str, str]: """ 获取可能帮助绕过 Cloudflare 的额外 HTTP 头 这些 Header 可以提高通过率,但不保证 100% 有效 返回: 额外的 HTTP 头字典 """ return { "Cache-Control": "max-age=0", "Upgrade-Insecure-Requests": "1", "Sec-Fetch-User": "?1", "Sec-Fetch-Dest": "document", "Sec-Fetch-Mode": "navigate", "Sec-Fetch-Site": "none", "Priority": "u=0, i", } class CaptchaSolver: """ 通用验证码解决器(预留接口) 支持多种验证码类型: - Cloudflare Turnstile - reCAPTCHA v2/v3 - hCaptcha - 图片验证码 """ def __init__(self, api_key: Optional[str] = None, provider: str = "2captcha"): """ 初始化验证码解决器 参数: api_key: 打码平台 API Key provider: 打码平台名称 ("2captcha", "capsolver", "anticaptcha") """ self.api_key = api_key self.provider = provider if not api_key: logger.warning("CaptchaSolver initialized without API key") async def solve_turnstile( self, site_key: str, page_url: str, **kwargs ) -> Optional[str]: """ 解决 Turnstile 验证码(预留接口) 参数: site_key: Turnstile site key page_url: 页面 URL **kwargs: 其他参数 返回: Turnstile response token """ if not self.api_key: raise ValueError("API key not configured") logger.info(f"Solving Turnstile captcha with {self.provider}...") # TODO: 用户集成实际的打码平台 API raise NotImplementedError( f"Turnstile solver not implemented for provider: {self.provider}" ) # 导出主要接口 __all__ = [ "CloudflareSolver", "CaptchaSolver", ]