frist
This commit is contained in:
274
core/challenge.py
Normal file
274
core/challenge.py
Normal file
@@ -0,0 +1,274 @@
|
||||
"""
|
||||
Cloudflare Turnstile 验证码解决器
|
||||
|
||||
Cloudflare Turnstile 是一种新型验证码系统,用于替代传统的 reCAPTCHA
|
||||
当触发时会返回 403 状态码并显示 "Just a moment" 页面
|
||||
|
||||
⚠️ 本模块提供预留接口,用户根据需要配置解决方案
|
||||
"""
|
||||
|
||||
from typing import Optional, Dict, Any
|
||||
from utils.logger import logger
|
||||
|
||||
|
||||
class CloudflareSolver:
|
||||
"""
|
||||
Cloudflare Turnstile 验证码解决器
|
||||
|
||||
⚠️ 预留接口 - 用户根据实际情况选择是否实现
|
||||
|
||||
可能的解决方案:
|
||||
1. 使用高质量住宅代理(推荐,成本较低)
|
||||
2. 集成打码平台(如 2captcha, capsolver)
|
||||
3. 使用浏览器自动化 + undetected-chromedriver
|
||||
4. 等待一段时间后重试(部分情况有效)
|
||||
"""
|
||||
|
||||
# Turnstile 相关常量
|
||||
TURNSTILE_SITE_KEY = "0x4AAAAAAADnPIDROrmt1Wwj" # OpenAI 的 Turnstile site key(需要从实际页面提取)
|
||||
|
||||
@staticmethod
|
||||
def detect_challenge(response) -> bool:
|
||||
"""
|
||||
检测响应是否为 Cloudflare Turnstile 挑战
|
||||
|
||||
参数:
|
||||
response: HTTP 响应对象(来自 requests 或 curl_cffi)
|
||||
|
||||
返回:
|
||||
True 如果检测到 Cloudflare 挑战,否则 False
|
||||
|
||||
检测特征:
|
||||
- 状态码 403
|
||||
- 响应体包含 "Just a moment", "Checking your browser" 等文本
|
||||
- 包含 Cloudflare 相关 JavaScript
|
||||
"""
|
||||
if response.status_code != 403:
|
||||
return False
|
||||
|
||||
body = response.text.lower()
|
||||
cloudflare_keywords = [
|
||||
"just a moment",
|
||||
"checking your browser",
|
||||
"cloudflare",
|
||||
"cf-challenge",
|
||||
"turnstile",
|
||||
"ray id"
|
||||
]
|
||||
|
||||
detected = any(keyword in body for keyword in cloudflare_keywords)
|
||||
|
||||
if detected:
|
||||
logger.warning("Cloudflare Turnstile challenge detected")
|
||||
# 尝试提取 Ray ID(用于调试)
|
||||
ray_id = CloudflareSolver._extract_ray_id(response.text)
|
||||
if ray_id:
|
||||
logger.info(f"Cloudflare Ray ID: {ray_id}")
|
||||
|
||||
return detected
|
||||
|
||||
@staticmethod
|
||||
async def solve(session, target_url: str, **kwargs) -> Optional[str]:
|
||||
"""
|
||||
解决 Cloudflare Turnstile 挑战
|
||||
|
||||
⚠️ 预留接口 - 用户需要根据实际需求实现
|
||||
|
||||
参数:
|
||||
session: OAISession 实例
|
||||
target_url: 触发挑战的目标 URL
|
||||
**kwargs: 其他可能需要的参数(如 site_key, action 等)
|
||||
|
||||
返回:
|
||||
cf_clearance Cookie 值 或 Turnstile response token
|
||||
|
||||
抛出:
|
||||
NotImplementedError: 用户需要实现此方法
|
||||
|
||||
集成示例:
|
||||
```python
|
||||
# 方案 1: 使用 2captcha 打码平台
|
||||
from twocaptcha import TwoCaptcha
|
||||
solver = TwoCaptcha('YOUR_API_KEY')
|
||||
result = solver.turnstile(
|
||||
sitekey=CloudflareSolver.TURNSTILE_SITE_KEY,
|
||||
url=target_url
|
||||
)
|
||||
return result['code']
|
||||
|
||||
# 方案 2: 使用 capsolver
|
||||
import capsolver
|
||||
capsolver.api_key = "YOUR_API_KEY"
|
||||
solution = capsolver.solve({
|
||||
"type": "AntiTurnstileTaskProxyLess",
|
||||
"websiteURL": target_url,
|
||||
"websiteKey": CloudflareSolver.TURNSTILE_SITE_KEY,
|
||||
})
|
||||
return solution['token']
|
||||
|
||||
# 方案 3: 使用浏览器自动化
|
||||
from selenium import webdriver
|
||||
from undetected_chromedriver import Chrome
|
||||
driver = Chrome()
|
||||
driver.get(target_url)
|
||||
# 等待 Cloudflare 自动通过...
|
||||
cf_clearance = driver.get_cookie('cf_clearance')['value']
|
||||
return cf_clearance
|
||||
```
|
||||
"""
|
||||
logger.warning(
|
||||
f"Cloudflare challenge detected at {target_url}, but solver not configured"
|
||||
)
|
||||
|
||||
raise NotImplementedError(
|
||||
"❌ Cloudflare solver not implemented.\n\n"
|
||||
"This is OPTIONAL. Only implement if you encounter frequent 403 errors.\n\n"
|
||||
"Recommended solutions:\n"
|
||||
"1. Use high-quality residential proxies (easiest)\n"
|
||||
"2. Integrate captcha solving service (2captcha, capsolver)\n"
|
||||
"3. Use browser automation (undetected-chromedriver)\n"
|
||||
"4. Retry with different proxy/IP\n\n"
|
||||
f"Target URL: {target_url}\n"
|
||||
f"Site Key: {CloudflareSolver.TURNSTILE_SITE_KEY}\n\n"
|
||||
"Example implementation location: core/challenge.py -> solve()"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _extract_ray_id(html: str) -> Optional[str]:
|
||||
"""
|
||||
从 Cloudflare 错误页面提取 Ray ID(用于调试)
|
||||
|
||||
Ray ID 格式示例: 84a1b2c3d4e5f678-LAX
|
||||
|
||||
参数:
|
||||
html: Cloudflare 错误页面的 HTML 内容
|
||||
|
||||
返回:
|
||||
Ray ID 字符串,未找到则返回 None
|
||||
"""
|
||||
import re
|
||||
match = re.search(r'Ray ID: ([a-f0-9-]+)', html, re.IGNORECASE)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
# 尝试其他格式
|
||||
match = re.search(r'ray id[:\s]+([a-f0-9-]+)', html, re.IGNORECASE)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def should_retry(response) -> bool:
|
||||
"""
|
||||
判断是否应该重试请求(针对 Cloudflare 挑战)
|
||||
|
||||
某些情况下,简单地等待几秒后重试即可通过
|
||||
|
||||
参数:
|
||||
response: HTTP 响应对象
|
||||
|
||||
返回:
|
||||
True 如果建议重试,否则 False
|
||||
"""
|
||||
if not CloudflareSolver.detect_challenge(response):
|
||||
return False
|
||||
|
||||
# 如果是轻量级挑战(JavaScript challenge),重试可能有效
|
||||
# 如果是 Turnstile 验证码,重试无效,需要解决验证码
|
||||
body = response.text.lower()
|
||||
|
||||
# JavaScript challenge 特征(可以重试)
|
||||
js_challenge_keywords = ["checking your browser", "please wait"]
|
||||
has_js_challenge = any(kw in body for kw in js_challenge_keywords)
|
||||
|
||||
# Turnstile 验证码特征(需要解决,重试无效)
|
||||
turnstile_keywords = ["turnstile", "cf-turnstile"]
|
||||
has_turnstile = any(kw in body for kw in turnstile_keywords)
|
||||
|
||||
if has_js_challenge and not has_turnstile:
|
||||
logger.info("Detected JavaScript challenge, retry may work")
|
||||
return True
|
||||
else:
|
||||
logger.warning("Detected Turnstile captcha, retry unlikely to work")
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def get_bypass_headers() -> Dict[str, str]:
|
||||
"""
|
||||
获取可能帮助绕过 Cloudflare 的额外 HTTP 头
|
||||
|
||||
这些 Header 可以提高通过率,但不保证 100% 有效
|
||||
|
||||
返回:
|
||||
额外的 HTTP 头字典
|
||||
"""
|
||||
return {
|
||||
"Cache-Control": "max-age=0",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Sec-Fetch-User": "?1",
|
||||
"Sec-Fetch-Dest": "document",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Sec-Fetch-Site": "none",
|
||||
"Priority": "u=0, i",
|
||||
}
|
||||
|
||||
|
||||
class CaptchaSolver:
|
||||
"""
|
||||
通用验证码解决器(预留接口)
|
||||
|
||||
支持多种验证码类型:
|
||||
- Cloudflare Turnstile
|
||||
- reCAPTCHA v2/v3
|
||||
- hCaptcha
|
||||
- 图片验证码
|
||||
"""
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, provider: str = "2captcha"):
|
||||
"""
|
||||
初始化验证码解决器
|
||||
|
||||
参数:
|
||||
api_key: 打码平台 API Key
|
||||
provider: 打码平台名称 ("2captcha", "capsolver", "anticaptcha")
|
||||
"""
|
||||
self.api_key = api_key
|
||||
self.provider = provider
|
||||
|
||||
if not api_key:
|
||||
logger.warning("CaptchaSolver initialized without API key")
|
||||
|
||||
async def solve_turnstile(
|
||||
self,
|
||||
site_key: str,
|
||||
page_url: str,
|
||||
**kwargs
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
解决 Turnstile 验证码(预留接口)
|
||||
|
||||
参数:
|
||||
site_key: Turnstile site key
|
||||
page_url: 页面 URL
|
||||
**kwargs: 其他参数
|
||||
|
||||
返回:
|
||||
Turnstile response token
|
||||
"""
|
||||
if not self.api_key:
|
||||
raise ValueError("API key not configured")
|
||||
|
||||
logger.info(f"Solving Turnstile captcha with {self.provider}...")
|
||||
|
||||
# TODO: 用户集成实际的打码平台 API
|
||||
raise NotImplementedError(
|
||||
f"Turnstile solver not implemented for provider: {self.provider}"
|
||||
)
|
||||
|
||||
|
||||
# 导出主要接口
|
||||
__all__ = [
|
||||
"CloudflareSolver",
|
||||
"CaptchaSolver",
|
||||
]
|
||||
Reference in New Issue
Block a user