Files
autoPlus/core/challenge.py
2026-01-26 15:04:02 +08:00

275 lines
8.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Cloudflare Turnstile 验证码解决器
Cloudflare Turnstile 是一种新型验证码系统,用于替代传统的 reCAPTCHA
当触发时会返回 403 状态码并显示 "Just a moment" 页面
⚠️ 本模块提供预留接口,用户根据需要配置解决方案
"""
from typing import Optional, Dict, Any
from utils.logger import logger
class CloudflareSolver:
"""
Cloudflare Turnstile 验证码解决器
⚠️ 预留接口 - 用户根据实际情况选择是否实现
可能的解决方案:
1. 使用高质量住宅代理(推荐,成本较低)
2. 集成打码平台(如 2captcha, capsolver
3. 使用浏览器自动化 + undetected-chromedriver
4. 等待一段时间后重试(部分情况有效)
"""
# Turnstile 相关常量
TURNSTILE_SITE_KEY = "0x4AAAAAAADnPIDROrmt1Wwj" # OpenAI 的 Turnstile site key需要从实际页面提取
@staticmethod
def detect_challenge(response) -> bool:
"""
检测响应是否为 Cloudflare Turnstile 挑战
参数:
response: HTTP 响应对象(来自 requests 或 curl_cffi
返回:
True 如果检测到 Cloudflare 挑战,否则 False
检测特征:
- 状态码 403
- 响应体包含 "Just a moment", "Checking your browser" 等文本
- 包含 Cloudflare 相关 JavaScript
"""
if response.status_code != 403:
return False
body = response.text.lower()
cloudflare_keywords = [
"just a moment",
"checking your browser",
"cloudflare",
"cf-challenge",
"turnstile",
"ray id"
]
detected = any(keyword in body for keyword in cloudflare_keywords)
if detected:
logger.warning("Cloudflare Turnstile challenge detected")
# 尝试提取 Ray ID用于调试
ray_id = CloudflareSolver._extract_ray_id(response.text)
if ray_id:
logger.info(f"Cloudflare Ray ID: {ray_id}")
return detected
@staticmethod
async def solve(session, target_url: str, **kwargs) -> Optional[str]:
"""
解决 Cloudflare Turnstile 挑战
⚠️ 预留接口 - 用户需要根据实际需求实现
参数:
session: OAISession 实例
target_url: 触发挑战的目标 URL
**kwargs: 其他可能需要的参数(如 site_key, action 等)
返回:
cf_clearance Cookie 值 或 Turnstile response token
抛出:
NotImplementedError: 用户需要实现此方法
集成示例:
```python
# 方案 1: 使用 2captcha 打码平台
from twocaptcha import TwoCaptcha
solver = TwoCaptcha('YOUR_API_KEY')
result = solver.turnstile(
sitekey=CloudflareSolver.TURNSTILE_SITE_KEY,
url=target_url
)
return result['code']
# 方案 2: 使用 capsolver
import capsolver
capsolver.api_key = "YOUR_API_KEY"
solution = capsolver.solve({
"type": "AntiTurnstileTaskProxyLess",
"websiteURL": target_url,
"websiteKey": CloudflareSolver.TURNSTILE_SITE_KEY,
})
return solution['token']
# 方案 3: 使用浏览器自动化
from selenium import webdriver
from undetected_chromedriver import Chrome
driver = Chrome()
driver.get(target_url)
# 等待 Cloudflare 自动通过...
cf_clearance = driver.get_cookie('cf_clearance')['value']
return cf_clearance
```
"""
logger.warning(
f"Cloudflare challenge detected at {target_url}, but solver not configured"
)
raise NotImplementedError(
"❌ Cloudflare solver not implemented.\n\n"
"This is OPTIONAL. Only implement if you encounter frequent 403 errors.\n\n"
"Recommended solutions:\n"
"1. Use high-quality residential proxies (easiest)\n"
"2. Integrate captcha solving service (2captcha, capsolver)\n"
"3. Use browser automation (undetected-chromedriver)\n"
"4. Retry with different proxy/IP\n\n"
f"Target URL: {target_url}\n"
f"Site Key: {CloudflareSolver.TURNSTILE_SITE_KEY}\n\n"
"Example implementation location: core/challenge.py -> solve()"
)
@staticmethod
def _extract_ray_id(html: str) -> Optional[str]:
"""
从 Cloudflare 错误页面提取 Ray ID用于调试
Ray ID 格式示例: 84a1b2c3d4e5f678-LAX
参数:
html: Cloudflare 错误页面的 HTML 内容
返回:
Ray ID 字符串,未找到则返回 None
"""
import re
match = re.search(r'Ray ID: ([a-f0-9-]+)', html, re.IGNORECASE)
if match:
return match.group(1)
# 尝试其他格式
match = re.search(r'ray id[:\s]+([a-f0-9-]+)', html, re.IGNORECASE)
if match:
return match.group(1)
return None
@staticmethod
def should_retry(response) -> bool:
"""
判断是否应该重试请求(针对 Cloudflare 挑战)
某些情况下,简单地等待几秒后重试即可通过
参数:
response: HTTP 响应对象
返回:
True 如果建议重试,否则 False
"""
if not CloudflareSolver.detect_challenge(response):
return False
# 如果是轻量级挑战JavaScript challenge重试可能有效
# 如果是 Turnstile 验证码,重试无效,需要解决验证码
body = response.text.lower()
# JavaScript challenge 特征(可以重试)
js_challenge_keywords = ["checking your browser", "please wait"]
has_js_challenge = any(kw in body for kw in js_challenge_keywords)
# Turnstile 验证码特征(需要解决,重试无效)
turnstile_keywords = ["turnstile", "cf-turnstile"]
has_turnstile = any(kw in body for kw in turnstile_keywords)
if has_js_challenge and not has_turnstile:
logger.info("Detected JavaScript challenge, retry may work")
return True
else:
logger.warning("Detected Turnstile captcha, retry unlikely to work")
return False
@staticmethod
def get_bypass_headers() -> Dict[str, str]:
"""
获取可能帮助绕过 Cloudflare 的额外 HTTP 头
这些 Header 可以提高通过率,但不保证 100% 有效
返回:
额外的 HTTP 头字典
"""
return {
"Cache-Control": "max-age=0",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-User": "?1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Priority": "u=0, i",
}
class CaptchaSolver:
"""
通用验证码解决器(预留接口)
支持多种验证码类型:
- Cloudflare Turnstile
- reCAPTCHA v2/v3
- hCaptcha
- 图片验证码
"""
def __init__(self, api_key: Optional[str] = None, provider: str = "2captcha"):
"""
初始化验证码解决器
参数:
api_key: 打码平台 API Key
provider: 打码平台名称 ("2captcha", "capsolver", "anticaptcha")
"""
self.api_key = api_key
self.provider = provider
if not api_key:
logger.warning("CaptchaSolver initialized without API key")
async def solve_turnstile(
self,
site_key: str,
page_url: str,
**kwargs
) -> Optional[str]:
"""
解决 Turnstile 验证码(预留接口)
参数:
site_key: Turnstile site key
page_url: 页面 URL
**kwargs: 其他参数
返回:
Turnstile response token
"""
if not self.api_key:
raise ValueError("API key not configured")
logger.info(f"Solving Turnstile captcha with {self.provider}...")
# TODO: 用户集成实际的打码平台 API
raise NotImplementedError(
f"Turnstile solver not implemented for provider: {self.provider}"
)
# 导出主要接口
__all__ = [
"CloudflareSolver",
"CaptchaSolver",
]