214 lines
8.4 KiB
Python
214 lines
8.4 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
专用解析脚本:chatgpt.com-*.log 格式
|
||
每行结构: hsw.js:2 {"tag":"索引点","tH":N,"Ig":"..."}
|
||
Ig 值含义:被检测的浏览器 API 构造函数名 / 属性名 / 返回值
|
||
"""
|
||
|
||
import re
|
||
import json
|
||
import sys
|
||
import glob
|
||
from collections import defaultdict, OrderedDict
|
||
|
||
# ── 自动找日志文件 ──────────────────────────────────────────
|
||
def find_log(path_arg=None):
|
||
if path_arg:
|
||
return path_arg
|
||
candidates = sorted(glob.glob("/home/carry/myprj/hcaptcha/asset/chatgpt.com-*.log"))
|
||
if not candidates:
|
||
print("❌ 未找到 chatgpt.com-*.log,请手动传入路径")
|
||
sys.exit(1)
|
||
return candidates[-1] # 取最新的
|
||
|
||
|
||
# ── 解析 ────────────────────────────────────────────────────
|
||
def parse(path):
|
||
entries = []
|
||
with open(path, encoding="utf-8") as f:
|
||
for lineno, line in enumerate(f, 1):
|
||
line = line.strip()
|
||
m = re.match(r'hsw\.js:\d+\s+(.*)', line)
|
||
if not m:
|
||
continue
|
||
body = m.group(1).strip()
|
||
if body.startswith('{'):
|
||
try:
|
||
obj = json.loads(body)
|
||
if obj.get("tag") == "索引点":
|
||
entries.append({
|
||
"lineno": lineno,
|
||
"tH": obj["tH"],
|
||
"has_ig": "Ig" in obj,
|
||
"ig": obj.get("Ig"), # 可能是 str/int/bool/None
|
||
})
|
||
except json.JSONDecodeError:
|
||
pass
|
||
return entries
|
||
|
||
|
||
# ── 汇总 ────────────────────────────────────────────────────
|
||
def summarize(entries):
|
||
"""
|
||
对每个 tH,按出现顺序收集所有 Ig 值(去重保序)。
|
||
分类:
|
||
- has_value : Ig 有实际内容
|
||
- no_ig : 完全没有 Ig 字段
|
||
"""
|
||
tH_igs = defaultdict(list) # tH -> [ig, ...](有序去重后)
|
||
tH_no_ig = defaultdict(int) # tH -> 出现次数(无 Ig 的)
|
||
tH_lines = defaultdict(list) # tH -> 首次出现行号
|
||
|
||
seen = defaultdict(set) # 用于 Ig 去重
|
||
|
||
for e in entries:
|
||
tH = e["tH"]
|
||
tH_lines[tH].append(e["lineno"])
|
||
|
||
if e["has_ig"]:
|
||
ig = e["ig"]
|
||
key = repr(ig)
|
||
if key not in seen[tH]:
|
||
seen[tH].add(key)
|
||
tH_igs[tH].append(ig)
|
||
else:
|
||
tH_no_ig[tH] += 1
|
||
|
||
return tH_igs, tH_no_ig, tH_lines
|
||
|
||
|
||
# ── 打印报告 ─────────────────────────────────────────────────
|
||
def report(tH_igs, tH_no_ig, tH_lines):
|
||
all_tH = sorted(set(list(tH_igs.keys()) + list(tH_no_ig.keys())))
|
||
|
||
print("=" * 68)
|
||
print(" HSW 新日志分析 — 每个索引点(tH)访问的浏览器 API")
|
||
print("=" * 68)
|
||
|
||
# 分组输出
|
||
has_value = []
|
||
only_no_ig = []
|
||
|
||
for tH in all_tH:
|
||
igs = tH_igs.get(tH, [])
|
||
no = tH_no_ig.get(tH, 0)
|
||
if igs:
|
||
has_value.append((tH, igs, no))
|
||
else:
|
||
only_no_ig.append((tH, no))
|
||
|
||
# ── 有值的 tH ──
|
||
print(f"\n✅ 有 Ig 值的索引点 ({len(has_value)} 个)\n")
|
||
print(f" {'tH':<6} {'Ig 值(去重、按出现顺序)'}")
|
||
print(f" {'─'*6} {'─'*56}")
|
||
for tH, igs, no_cnt in has_value:
|
||
# 格式化 Ig 列表
|
||
parts = []
|
||
for v in igs:
|
||
if isinstance(v, str) and len(v) > 60:
|
||
parts.append(v[:57] + "...")
|
||
else:
|
||
parts.append(repr(v) if not isinstance(v, str) else v)
|
||
ig_str = " | ".join(parts)
|
||
suffix = f" (另有 {no_cnt} 次无Ig)" if no_cnt else ""
|
||
print(f" tH={tH:<4d} {ig_str}{suffix}")
|
||
|
||
# ── 只有 no_ig 的 tH ──
|
||
print(f"\n🟠 仅无 Ig 字段的索引点 ({len(only_no_ig)} 个) ← void 路径或未命中\n")
|
||
print(f" {'tH':<6} {'出现次数'}")
|
||
print(f" {'─'*6} {'─'*10}")
|
||
for tH, cnt in only_no_ig:
|
||
print(f" tH={tH:<4d} {cnt} 次")
|
||
|
||
# ── 按 API 类别归纳 ──
|
||
print(f"\n{'─'*68}")
|
||
print(" 📋 API 检测归纳(每个 tH 在检测什么)")
|
||
print(f"{'─'*68}\n")
|
||
|
||
# 已知含义映射(根据常见 hCaptcha 指纹逻辑)
|
||
known = {
|
||
"Window": "全局 window 对象",
|
||
"Promise": "Promise 构造函数检测",
|
||
"Object": "Object 原型检测",
|
||
"Performance": "performance API",
|
||
"performance": "window.performance 属性",
|
||
"Crypto": "window.crypto API",
|
||
"Uint8Array": "TypedArray (crypto.getRandomValues)",
|
||
"OfflineAudioContext": "AudioContext 指纹",
|
||
"RTCPeerConnection": "WebRTC 检测",
|
||
"fetch": "fetch API 检测",
|
||
"Request": "fetch Request 构造函数",
|
||
"Screen": "screen 对象",
|
||
"Storage": "localStorage / sessionStorage",
|
||
"IDBFactory": "indexedDB",
|
||
"HTMLDocument": "document 类型",
|
||
"HTMLCanvasElement": "Canvas 元素检测",
|
||
"CanvasRenderingContext2D": "2D Canvas 渲染上下文",
|
||
"Navigator": "navigator 对象",
|
||
"webdriver": "navigator.webdriver 检测(bot检测关键)",
|
||
"languages": "navigator.languages",
|
||
"Array": "Array 类型检测",
|
||
"getEntriesByType": "performance.getEntriesByType 方法",
|
||
"prototype": "原型链检测",
|
||
"constructor": "constructor 属性验证",
|
||
"__wdata": "window 属性枚举(环境指纹)",
|
||
"#000000": "Canvas fillStyle 默认值",
|
||
}
|
||
|
||
for tH, igs, _ in has_value:
|
||
descs = []
|
||
for v in igs:
|
||
if isinstance(v, str):
|
||
d = known.get(v)
|
||
if d:
|
||
descs.append(f"{v} → {d}")
|
||
elif v.startswith("0,1,2,3"):
|
||
descs.append("window keys 枚举列表 → 全局属性指纹")
|
||
elif re.match(r'\d+:\d+:\d{4}', v):
|
||
descs.append(f"{v} → HSW token 格式")
|
||
elif v in ("f", "t", "c", "d"):
|
||
descs.append(f'"{v}" → 分支标记字符')
|
||
else:
|
||
descs.append(v)
|
||
elif isinstance(v, bool):
|
||
descs.append(f"{v} → 布尔检测结果")
|
||
elif isinstance(v, int):
|
||
descs.append(f"{v} → 数值")
|
||
|
||
print(f" tH={tH:<4d}:")
|
||
for d in descs:
|
||
print(f" {d}")
|
||
print()
|
||
|
||
|
||
# ── 导出 JSON ────────────────────────────────────────────────
|
||
def export_json(tH_igs, tH_no_ig, out_path):
|
||
result = OrderedDict()
|
||
all_tH = sorted(set(list(tH_igs.keys()) + list(tH_no_ig.keys())))
|
||
for tH in all_tH:
|
||
igs = tH_igs.get(tH, [])
|
||
no = tH_no_ig.get(tH, 0)
|
||
result[str(tH)] = {
|
||
"ig_values": [v if not isinstance(v, str) or len(v) <= 200 else v[:200]+"..." for v in igs],
|
||
"no_ig_count": no,
|
||
"status": "has_value" if igs else "no_ig",
|
||
}
|
||
with open(out_path, "w", encoding="utf-8") as f:
|
||
json.dump(result, f, ensure_ascii=False, indent=2)
|
||
print(f"📄 JSON 已写入: {out_path}")
|
||
|
||
|
||
# ── 入口 ─────────────────────────────────────────────────────
|
||
if __name__ == "__main__":
|
||
log_path = find_log(sys.argv[1] if len(sys.argv) > 1 else None)
|
||
print(f"📂 日志文件: {log_path}\n")
|
||
|
||
entries = parse(log_path)
|
||
print(f"共解析 {len(entries)} 条索引点记录\n")
|
||
|
||
tH_igs, tH_no_ig, tH_lines = summarize(entries)
|
||
report(tH_igs, tH_no_ig, tH_lines)
|
||
|
||
out = log_path.replace(".log", "_analysis.json")
|
||
export_json(tH_igs, tH_no_ig, out)
|