275 lines
8.3 KiB
Python
275 lines
8.3 KiB
Python
"""
|
||
Cloudflare Turnstile 验证码解决器
|
||
|
||
Cloudflare Turnstile 是一种新型验证码系统,用于替代传统的 reCAPTCHA
|
||
当触发时会返回 403 状态码并显示 "Just a moment" 页面
|
||
|
||
⚠️ 本模块提供预留接口,用户根据需要配置解决方案
|
||
"""
|
||
|
||
from typing import Optional, Dict, Any
|
||
from utils.logger import logger
|
||
|
||
|
||
class CloudflareSolver:
|
||
"""
|
||
Cloudflare Turnstile 验证码解决器
|
||
|
||
⚠️ 预留接口 - 用户根据实际情况选择是否实现
|
||
|
||
可能的解决方案:
|
||
1. 使用高质量住宅代理(推荐,成本较低)
|
||
2. 集成打码平台(如 2captcha, capsolver)
|
||
3. 使用浏览器自动化 + undetected-chromedriver
|
||
4. 等待一段时间后重试(部分情况有效)
|
||
"""
|
||
|
||
# Turnstile 相关常量
|
||
TURNSTILE_SITE_KEY = "0x4AAAAAAADnPIDROrmt1Wwj" # OpenAI 的 Turnstile site key(需要从实际页面提取)
|
||
|
||
@staticmethod
|
||
def detect_challenge(response) -> bool:
|
||
"""
|
||
检测响应是否为 Cloudflare Turnstile 挑战
|
||
|
||
参数:
|
||
response: HTTP 响应对象(来自 requests 或 curl_cffi)
|
||
|
||
返回:
|
||
True 如果检测到 Cloudflare 挑战,否则 False
|
||
|
||
检测特征:
|
||
- 状态码 403
|
||
- 响应体包含 "Just a moment", "Checking your browser" 等文本
|
||
- 包含 Cloudflare 相关 JavaScript
|
||
"""
|
||
if response.status_code != 403:
|
||
return False
|
||
|
||
body = response.text.lower()
|
||
cloudflare_keywords = [
|
||
"just a moment",
|
||
"checking your browser",
|
||
"cloudflare",
|
||
"cf-challenge",
|
||
"turnstile",
|
||
"ray id"
|
||
]
|
||
|
||
detected = any(keyword in body for keyword in cloudflare_keywords)
|
||
|
||
if detected:
|
||
logger.warning("Cloudflare Turnstile challenge detected")
|
||
# 尝试提取 Ray ID(用于调试)
|
||
ray_id = CloudflareSolver._extract_ray_id(response.text)
|
||
if ray_id:
|
||
logger.info(f"Cloudflare Ray ID: {ray_id}")
|
||
|
||
return detected
|
||
|
||
@staticmethod
|
||
async def solve(session, target_url: str, **kwargs) -> Optional[str]:
|
||
"""
|
||
解决 Cloudflare Turnstile 挑战
|
||
|
||
⚠️ 预留接口 - 用户需要根据实际需求实现
|
||
|
||
参数:
|
||
session: OAISession 实例
|
||
target_url: 触发挑战的目标 URL
|
||
**kwargs: 其他可能需要的参数(如 site_key, action 等)
|
||
|
||
返回:
|
||
cf_clearance Cookie 值 或 Turnstile response token
|
||
|
||
抛出:
|
||
NotImplementedError: 用户需要实现此方法
|
||
|
||
集成示例:
|
||
```python
|
||
# 方案 1: 使用 2captcha 打码平台
|
||
from twocaptcha import TwoCaptcha
|
||
solver = TwoCaptcha('YOUR_API_KEY')
|
||
result = solver.turnstile(
|
||
sitekey=CloudflareSolver.TURNSTILE_SITE_KEY,
|
||
url=target_url
|
||
)
|
||
return result['code']
|
||
|
||
# 方案 2: 使用 capsolver
|
||
import capsolver
|
||
capsolver.api_key = "YOUR_API_KEY"
|
||
solution = capsolver.solve({
|
||
"type": "AntiTurnstileTaskProxyLess",
|
||
"websiteURL": target_url,
|
||
"websiteKey": CloudflareSolver.TURNSTILE_SITE_KEY,
|
||
})
|
||
return solution['token']
|
||
|
||
# 方案 3: 使用浏览器自动化
|
||
from selenium import webdriver
|
||
from undetected_chromedriver import Chrome
|
||
driver = Chrome()
|
||
driver.get(target_url)
|
||
# 等待 Cloudflare 自动通过...
|
||
cf_clearance = driver.get_cookie('cf_clearance')['value']
|
||
return cf_clearance
|
||
```
|
||
"""
|
||
logger.warning(
|
||
f"Cloudflare challenge detected at {target_url}, but solver not configured"
|
||
)
|
||
|
||
raise NotImplementedError(
|
||
"❌ Cloudflare solver not implemented.\n\n"
|
||
"This is OPTIONAL. Only implement if you encounter frequent 403 errors.\n\n"
|
||
"Recommended solutions:\n"
|
||
"1. Use high-quality residential proxies (easiest)\n"
|
||
"2. Integrate captcha solving service (2captcha, capsolver)\n"
|
||
"3. Use browser automation (undetected-chromedriver)\n"
|
||
"4. Retry with different proxy/IP\n\n"
|
||
f"Target URL: {target_url}\n"
|
||
f"Site Key: {CloudflareSolver.TURNSTILE_SITE_KEY}\n\n"
|
||
"Example implementation location: core/challenge.py -> solve()"
|
||
)
|
||
|
||
@staticmethod
|
||
def _extract_ray_id(html: str) -> Optional[str]:
|
||
"""
|
||
从 Cloudflare 错误页面提取 Ray ID(用于调试)
|
||
|
||
Ray ID 格式示例: 84a1b2c3d4e5f678-LAX
|
||
|
||
参数:
|
||
html: Cloudflare 错误页面的 HTML 内容
|
||
|
||
返回:
|
||
Ray ID 字符串,未找到则返回 None
|
||
"""
|
||
import re
|
||
match = re.search(r'Ray ID: ([a-f0-9-]+)', html, re.IGNORECASE)
|
||
if match:
|
||
return match.group(1)
|
||
|
||
# 尝试其他格式
|
||
match = re.search(r'ray id[:\s]+([a-f0-9-]+)', html, re.IGNORECASE)
|
||
if match:
|
||
return match.group(1)
|
||
|
||
return None
|
||
|
||
@staticmethod
|
||
def should_retry(response) -> bool:
|
||
"""
|
||
判断是否应该重试请求(针对 Cloudflare 挑战)
|
||
|
||
某些情况下,简单地等待几秒后重试即可通过
|
||
|
||
参数:
|
||
response: HTTP 响应对象
|
||
|
||
返回:
|
||
True 如果建议重试,否则 False
|
||
"""
|
||
if not CloudflareSolver.detect_challenge(response):
|
||
return False
|
||
|
||
# 如果是轻量级挑战(JavaScript challenge),重试可能有效
|
||
# 如果是 Turnstile 验证码,重试无效,需要解决验证码
|
||
body = response.text.lower()
|
||
|
||
# JavaScript challenge 特征(可以重试)
|
||
js_challenge_keywords = ["checking your browser", "please wait"]
|
||
has_js_challenge = any(kw in body for kw in js_challenge_keywords)
|
||
|
||
# Turnstile 验证码特征(需要解决,重试无效)
|
||
turnstile_keywords = ["turnstile", "cf-turnstile"]
|
||
has_turnstile = any(kw in body for kw in turnstile_keywords)
|
||
|
||
if has_js_challenge and not has_turnstile:
|
||
logger.info("Detected JavaScript challenge, retry may work")
|
||
return True
|
||
else:
|
||
logger.warning("Detected Turnstile captcha, retry unlikely to work")
|
||
return False
|
||
|
||
@staticmethod
|
||
def get_bypass_headers() -> Dict[str, str]:
|
||
"""
|
||
获取可能帮助绕过 Cloudflare 的额外 HTTP 头
|
||
|
||
这些 Header 可以提高通过率,但不保证 100% 有效
|
||
|
||
返回:
|
||
额外的 HTTP 头字典
|
||
"""
|
||
return {
|
||
"Cache-Control": "max-age=0",
|
||
"Upgrade-Insecure-Requests": "1",
|
||
"Sec-Fetch-User": "?1",
|
||
"Sec-Fetch-Dest": "document",
|
||
"Sec-Fetch-Mode": "navigate",
|
||
"Sec-Fetch-Site": "none",
|
||
"Priority": "u=0, i",
|
||
}
|
||
|
||
|
||
class CaptchaSolver:
|
||
"""
|
||
通用验证码解决器(预留接口)
|
||
|
||
支持多种验证码类型:
|
||
- Cloudflare Turnstile
|
||
- reCAPTCHA v2/v3
|
||
- hCaptcha
|
||
- 图片验证码
|
||
"""
|
||
|
||
def __init__(self, api_key: Optional[str] = None, provider: str = "2captcha"):
|
||
"""
|
||
初始化验证码解决器
|
||
|
||
参数:
|
||
api_key: 打码平台 API Key
|
||
provider: 打码平台名称 ("2captcha", "capsolver", "anticaptcha")
|
||
"""
|
||
self.api_key = api_key
|
||
self.provider = provider
|
||
|
||
if not api_key:
|
||
logger.warning("CaptchaSolver initialized without API key")
|
||
|
||
async def solve_turnstile(
|
||
self,
|
||
site_key: str,
|
||
page_url: str,
|
||
**kwargs
|
||
) -> Optional[str]:
|
||
"""
|
||
解决 Turnstile 验证码(预留接口)
|
||
|
||
参数:
|
||
site_key: Turnstile site key
|
||
page_url: 页面 URL
|
||
**kwargs: 其他参数
|
||
|
||
返回:
|
||
Turnstile response token
|
||
"""
|
||
if not self.api_key:
|
||
raise ValueError("API key not configured")
|
||
|
||
logger.info(f"Solving Turnstile captcha with {self.provider}...")
|
||
|
||
# TODO: 用户集成实际的打码平台 API
|
||
raise NotImplementedError(
|
||
f"Turnstile solver not implemented for provider: {self.provider}"
|
||
)
|
||
|
||
|
||
# 导出主要接口
|
||
__all__ = [
|
||
"CloudflareSolver",
|
||
"CaptchaSolver",
|
||
]
|