This commit is contained in:
dela
2026-01-26 15:04:02 +08:00
commit 4813449f9c
31 changed files with 8439 additions and 0 deletions

274
core/challenge.py Normal file
View File

@@ -0,0 +1,274 @@
"""
Cloudflare Turnstile 验证码解决器
Cloudflare Turnstile 是一种新型验证码系统,用于替代传统的 reCAPTCHA
当触发时会返回 403 状态码并显示 "Just a moment" 页面
⚠️ 本模块提供预留接口,用户根据需要配置解决方案
"""
from typing import Optional, Dict, Any
from utils.logger import logger
class CloudflareSolver:
"""
Cloudflare Turnstile 验证码解决器
⚠️ 预留接口 - 用户根据实际情况选择是否实现
可能的解决方案:
1. 使用高质量住宅代理(推荐,成本较低)
2. 集成打码平台(如 2captcha, capsolver
3. 使用浏览器自动化 + undetected-chromedriver
4. 等待一段时间后重试(部分情况有效)
"""
# Turnstile 相关常量
TURNSTILE_SITE_KEY = "0x4AAAAAAADnPIDROrmt1Wwj" # OpenAI 的 Turnstile site key需要从实际页面提取
@staticmethod
def detect_challenge(response) -> bool:
"""
检测响应是否为 Cloudflare Turnstile 挑战
参数:
response: HTTP 响应对象(来自 requests 或 curl_cffi
返回:
True 如果检测到 Cloudflare 挑战,否则 False
检测特征:
- 状态码 403
- 响应体包含 "Just a moment", "Checking your browser" 等文本
- 包含 Cloudflare 相关 JavaScript
"""
if response.status_code != 403:
return False
body = response.text.lower()
cloudflare_keywords = [
"just a moment",
"checking your browser",
"cloudflare",
"cf-challenge",
"turnstile",
"ray id"
]
detected = any(keyword in body for keyword in cloudflare_keywords)
if detected:
logger.warning("Cloudflare Turnstile challenge detected")
# 尝试提取 Ray ID用于调试
ray_id = CloudflareSolver._extract_ray_id(response.text)
if ray_id:
logger.info(f"Cloudflare Ray ID: {ray_id}")
return detected
@staticmethod
async def solve(session, target_url: str, **kwargs) -> Optional[str]:
"""
解决 Cloudflare Turnstile 挑战
⚠️ 预留接口 - 用户需要根据实际需求实现
参数:
session: OAISession 实例
target_url: 触发挑战的目标 URL
**kwargs: 其他可能需要的参数(如 site_key, action 等)
返回:
cf_clearance Cookie 值 或 Turnstile response token
抛出:
NotImplementedError: 用户需要实现此方法
集成示例:
```python
# 方案 1: 使用 2captcha 打码平台
from twocaptcha import TwoCaptcha
solver = TwoCaptcha('YOUR_API_KEY')
result = solver.turnstile(
sitekey=CloudflareSolver.TURNSTILE_SITE_KEY,
url=target_url
)
return result['code']
# 方案 2: 使用 capsolver
import capsolver
capsolver.api_key = "YOUR_API_KEY"
solution = capsolver.solve({
"type": "AntiTurnstileTaskProxyLess",
"websiteURL": target_url,
"websiteKey": CloudflareSolver.TURNSTILE_SITE_KEY,
})
return solution['token']
# 方案 3: 使用浏览器自动化
from selenium import webdriver
from undetected_chromedriver import Chrome
driver = Chrome()
driver.get(target_url)
# 等待 Cloudflare 自动通过...
cf_clearance = driver.get_cookie('cf_clearance')['value']
return cf_clearance
```
"""
logger.warning(
f"Cloudflare challenge detected at {target_url}, but solver not configured"
)
raise NotImplementedError(
"❌ Cloudflare solver not implemented.\n\n"
"This is OPTIONAL. Only implement if you encounter frequent 403 errors.\n\n"
"Recommended solutions:\n"
"1. Use high-quality residential proxies (easiest)\n"
"2. Integrate captcha solving service (2captcha, capsolver)\n"
"3. Use browser automation (undetected-chromedriver)\n"
"4. Retry with different proxy/IP\n\n"
f"Target URL: {target_url}\n"
f"Site Key: {CloudflareSolver.TURNSTILE_SITE_KEY}\n\n"
"Example implementation location: core/challenge.py -> solve()"
)
@staticmethod
def _extract_ray_id(html: str) -> Optional[str]:
"""
从 Cloudflare 错误页面提取 Ray ID用于调试
Ray ID 格式示例: 84a1b2c3d4e5f678-LAX
参数:
html: Cloudflare 错误页面的 HTML 内容
返回:
Ray ID 字符串,未找到则返回 None
"""
import re
match = re.search(r'Ray ID: ([a-f0-9-]+)', html, re.IGNORECASE)
if match:
return match.group(1)
# 尝试其他格式
match = re.search(r'ray id[:\s]+([a-f0-9-]+)', html, re.IGNORECASE)
if match:
return match.group(1)
return None
@staticmethod
def should_retry(response) -> bool:
"""
判断是否应该重试请求(针对 Cloudflare 挑战)
某些情况下,简单地等待几秒后重试即可通过
参数:
response: HTTP 响应对象
返回:
True 如果建议重试,否则 False
"""
if not CloudflareSolver.detect_challenge(response):
return False
# 如果是轻量级挑战JavaScript challenge重试可能有效
# 如果是 Turnstile 验证码,重试无效,需要解决验证码
body = response.text.lower()
# JavaScript challenge 特征(可以重试)
js_challenge_keywords = ["checking your browser", "please wait"]
has_js_challenge = any(kw in body for kw in js_challenge_keywords)
# Turnstile 验证码特征(需要解决,重试无效)
turnstile_keywords = ["turnstile", "cf-turnstile"]
has_turnstile = any(kw in body for kw in turnstile_keywords)
if has_js_challenge and not has_turnstile:
logger.info("Detected JavaScript challenge, retry may work")
return True
else:
logger.warning("Detected Turnstile captcha, retry unlikely to work")
return False
@staticmethod
def get_bypass_headers() -> Dict[str, str]:
"""
获取可能帮助绕过 Cloudflare 的额外 HTTP 头
这些 Header 可以提高通过率,但不保证 100% 有效
返回:
额外的 HTTP 头字典
"""
return {
"Cache-Control": "max-age=0",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-User": "?1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Priority": "u=0, i",
}
class CaptchaSolver:
"""
通用验证码解决器(预留接口)
支持多种验证码类型:
- Cloudflare Turnstile
- reCAPTCHA v2/v3
- hCaptcha
- 图片验证码
"""
def __init__(self, api_key: Optional[str] = None, provider: str = "2captcha"):
"""
初始化验证码解决器
参数:
api_key: 打码平台 API Key
provider: 打码平台名称 ("2captcha", "capsolver", "anticaptcha")
"""
self.api_key = api_key
self.provider = provider
if not api_key:
logger.warning("CaptchaSolver initialized without API key")
async def solve_turnstile(
self,
site_key: str,
page_url: str,
**kwargs
) -> Optional[str]:
"""
解决 Turnstile 验证码(预留接口)
参数:
site_key: Turnstile site key
page_url: 页面 URL
**kwargs: 其他参数
返回:
Turnstile response token
"""
if not self.api_key:
raise ValueError("API key not configured")
logger.info(f"Solving Turnstile captcha with {self.provider}...")
# TODO: 用户集成实际的打码平台 API
raise NotImplementedError(
f"Turnstile solver not implemented for provider: {self.provider}"
)
# 导出主要接口
__all__ = [
"CloudflareSolver",
"CaptchaSolver",
]