first commi
This commit is contained in:
151
checker/legacy/recha/reca.py
Normal file
151
checker/legacy/recha/reca.py
Normal file
@@ -0,0 +1,151 @@
|
||||
import logging
|
||||
import re
|
||||
from time import sleep
|
||||
|
||||
import random
|
||||
import requests
|
||||
# 假设 ProxyRotator 是外部定义的或者是同文件导入的
|
||||
from .proxy import ProxyRotator
|
||||
# 假设 OLD_ANDROID_USER_AGENTS 是一个包含旧版安卓 UA 的全局列表
|
||||
from .constants import OLD_ANDROID_USER_AGENTS
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class RecaptchaRequestor:
|
||||
def __init__(self, timeout, proxies):
|
||||
self.timeout = timeout
|
||||
# 初始化代理轮换器,这是为了防止IP被封
|
||||
self.proxy_rotator = ProxyRotator(proxies)
|
||||
self.base_url = 'https://www.google.com/recaptcha'
|
||||
|
||||
def _random_headers(self):
|
||||
# 关键点:伪装成旧版安卓手机
|
||||
user_agent = random.choice(OLD_ANDROID_USER_AGENTS)
|
||||
return {
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'User-Agent': user_agent
|
||||
}
|
||||
|
||||
def _request(self, method, endpoint, **kwargs):
|
||||
# 构造完整 URL
|
||||
url = f"{self.base_url}{endpoint}"
|
||||
|
||||
# 设置默认超时和 Header
|
||||
kwargs.setdefault('timeout', self.timeout)
|
||||
kwargs['headers'] = self._random_headers()
|
||||
|
||||
# 每次请求都切换代理 IP
|
||||
kwargs['proxies'] = self.proxy_rotator.get()
|
||||
|
||||
try:
|
||||
# 发送请求
|
||||
logger.debug('Recaptcha request %s %s params=%s', method, url, kwargs.get('params'))
|
||||
response = requests.request(method, url, **kwargs)
|
||||
response.raise_for_status()
|
||||
logger.debug('Recaptcha response %s %s status=%s', method, url, response.status_code)
|
||||
return response.text
|
||||
except requests.RequestException as exc:
|
||||
logger.warning('Recaptcha request failed %s %s: %s', method, url, exc)
|
||||
return None
|
||||
|
||||
def fetch_anchor_token(self, api_type, params):
|
||||
# 请求具体的 api_type (api2/enterprise) anchor 接口获取初始 Session
|
||||
return self._request('GET', f'/{api_type}/anchor', params=params)
|
||||
|
||||
def fetch_recaptcha_token(self, api_type, s_params, payload):
|
||||
# 请求 "/reload" 接口提交答案
|
||||
# payload 包含了上一条日志中生成的加密数据
|
||||
text = self._request('POST', f'/{api_type}/reload', params={'k': s_params.get('k')}, data=payload)
|
||||
|
||||
if text:
|
||||
# 从返回的 JSON/文本中提取最终的 "rresp" Token
|
||||
match = re.search(r'"rresp","(.*?)"', text)
|
||||
if match:
|
||||
return match.group(1)
|
||||
return None
|
||||
|
||||
|
||||
class RecaptchaSolverSync:
|
||||
|
||||
MAX_RETRIES = 20
|
||||
RETRY_DELAY = 1
|
||||
|
||||
def __init__(self, timeout, proxies):
|
||||
# 依赖外部的一个 Requestor 类来发送 HTTP 请求
|
||||
self.client = RecaptchaRequestor(timeout=timeout, proxies=proxies)
|
||||
|
||||
@staticmethod
|
||||
def _parse_api_type(anchor_url):
|
||||
# 正则分析 URL,判断是标准版 (api2) 还是企业版 (enterprise)
|
||||
match = re.search(r'(api2|enterprise)/anchor\?(.*)', anchor_url)
|
||||
if match:
|
||||
return match.group(1), match.group(2)
|
||||
return None, None
|
||||
|
||||
@staticmethod
|
||||
def _extract_c_value(html):
|
||||
# 关键步骤:从 Google 返回的 HTML 中提取 "c" 值(Session Token)
|
||||
match = re.search(r'value="(.*?)"', html)
|
||||
if match:
|
||||
return match.group(1)
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _parse_params(param_str):
|
||||
# 解析 URL 查询字符串成字典,例如 "k=xxx&co=yyy" → {'k': 'xxx', 'co': 'yyy'}
|
||||
params = {}
|
||||
if param_str:
|
||||
for pair in param_str.split('&'):
|
||||
if '=' in pair:
|
||||
key, value = pair.split('=', 1)
|
||||
params[key] = value
|
||||
return params
|
||||
|
||||
def _build_payload(self, s_params, c_value):
|
||||
# 构造发送给 Recaptcha 服务器的数据包
|
||||
# 包含了版本号(v)、原因(reason=q)、Token(c)、Sitekey(k) 等
|
||||
return f"v={s_params.get('v')}&reason=q&c={c_value}&k={s_params.get('k')}&co={s_params.get('co')}"
|
||||
|
||||
def solve(self, anchor_url):
|
||||
api_type, param_str = self._parse_api_type(anchor_url)
|
||||
if not param_str:
|
||||
logger.error('Invalid anchor URL provided: %s', anchor_url)
|
||||
raise ValueError('Invalid anchor URL format.')
|
||||
|
||||
s_params = self._parse_params(param_str) # 解析参数字典
|
||||
|
||||
logger.debug('Recaptcha solve start api_type=%s params=%s', api_type, s_params)
|
||||
|
||||
# 重试循环:尝试最多 20 次
|
||||
for attempt in range(1, self.MAX_RETRIES + 1):
|
||||
logger.debug('Recaptcha attempt %d/%d', attempt, self.MAX_RETRIES)
|
||||
# 1. 请求 Anchor(复选框页面)
|
||||
anchor_token_html = self.client.fetch_anchor_token(api_type, s_params)
|
||||
if not anchor_token_html:
|
||||
logger.debug('Anchor response empty, retrying...')
|
||||
|
||||
sleep(self.RETRY_DELAY)
|
||||
continue
|
||||
|
||||
# 2. 提取 Session Token ("c" value)
|
||||
c_value = self._extract_c_value(anchor_token_html)
|
||||
if not c_value:
|
||||
logger.debug('Failed to extract c value from anchor response.')
|
||||
sleep(self.RETRY_DELAY)
|
||||
continue
|
||||
|
||||
# 3. 构造最终请求载荷
|
||||
payload = self._build_payload(s_params, c_value)
|
||||
logger.debug('Payload prepared with keys: %s', list(s_params.keys()))
|
||||
|
||||
# 4. 请求最终的 Pass Token
|
||||
token = self.client.fetch_recaptcha_token(api_type, s_params, payload)
|
||||
|
||||
if token:
|
||||
logger.info('Recaptcha solved in %d attempt(s).', attempt)
|
||||
return token # 成功拿到 Token!
|
||||
|
||||
sleep(self.RETRY_DELAY)
|
||||
|
||||
logger.error('Failed to solve reCAPTCHA after %d attempts.', self.MAX_RETRIES)
|
||||
raise RuntimeError('Failed to solve reCAPTCHA after maximum retries.')
|
||||
Reference in New Issue
Block a user