415gotit
This commit is contained in:
213
analyze_new.py
Normal file
213
analyze_new.py
Normal file
@@ -0,0 +1,213 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
专用解析脚本:chatgpt.com-*.log 格式
|
||||
每行结构: hsw.js:2 {"tag":"索引点","tH":N,"Ig":"..."}
|
||||
Ig 值含义:被检测的浏览器 API 构造函数名 / 属性名 / 返回值
|
||||
"""
|
||||
|
||||
import re
|
||||
import json
|
||||
import sys
|
||||
import glob
|
||||
from collections import defaultdict, OrderedDict
|
||||
|
||||
# ── 自动找日志文件 ──────────────────────────────────────────
|
||||
def find_log(path_arg=None):
|
||||
if path_arg:
|
||||
return path_arg
|
||||
candidates = sorted(glob.glob("/home/carry/myprj/hcaptcha/asset/chatgpt.com-*.log"))
|
||||
if not candidates:
|
||||
print("❌ 未找到 chatgpt.com-*.log,请手动传入路径")
|
||||
sys.exit(1)
|
||||
return candidates[-1] # 取最新的
|
||||
|
||||
|
||||
# ── 解析 ────────────────────────────────────────────────────
|
||||
def parse(path):
|
||||
entries = []
|
||||
with open(path, encoding="utf-8") as f:
|
||||
for lineno, line in enumerate(f, 1):
|
||||
line = line.strip()
|
||||
m = re.match(r'hsw\.js:\d+\s+(.*)', line)
|
||||
if not m:
|
||||
continue
|
||||
body = m.group(1).strip()
|
||||
if body.startswith('{'):
|
||||
try:
|
||||
obj = json.loads(body)
|
||||
if obj.get("tag") == "索引点":
|
||||
entries.append({
|
||||
"lineno": lineno,
|
||||
"tH": obj["tH"],
|
||||
"has_ig": "Ig" in obj,
|
||||
"ig": obj.get("Ig"), # 可能是 str/int/bool/None
|
||||
})
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
return entries
|
||||
|
||||
|
||||
# ── 汇总 ────────────────────────────────────────────────────
|
||||
def summarize(entries):
|
||||
"""
|
||||
对每个 tH,按出现顺序收集所有 Ig 值(去重保序)。
|
||||
分类:
|
||||
- has_value : Ig 有实际内容
|
||||
- no_ig : 完全没有 Ig 字段
|
||||
"""
|
||||
tH_igs = defaultdict(list) # tH -> [ig, ...](有序去重后)
|
||||
tH_no_ig = defaultdict(int) # tH -> 出现次数(无 Ig 的)
|
||||
tH_lines = defaultdict(list) # tH -> 首次出现行号
|
||||
|
||||
seen = defaultdict(set) # 用于 Ig 去重
|
||||
|
||||
for e in entries:
|
||||
tH = e["tH"]
|
||||
tH_lines[tH].append(e["lineno"])
|
||||
|
||||
if e["has_ig"]:
|
||||
ig = e["ig"]
|
||||
key = repr(ig)
|
||||
if key not in seen[tH]:
|
||||
seen[tH].add(key)
|
||||
tH_igs[tH].append(ig)
|
||||
else:
|
||||
tH_no_ig[tH] += 1
|
||||
|
||||
return tH_igs, tH_no_ig, tH_lines
|
||||
|
||||
|
||||
# ── 打印报告 ─────────────────────────────────────────────────
|
||||
def report(tH_igs, tH_no_ig, tH_lines):
|
||||
all_tH = sorted(set(list(tH_igs.keys()) + list(tH_no_ig.keys())))
|
||||
|
||||
print("=" * 68)
|
||||
print(" HSW 新日志分析 — 每个索引点(tH)访问的浏览器 API")
|
||||
print("=" * 68)
|
||||
|
||||
# 分组输出
|
||||
has_value = []
|
||||
only_no_ig = []
|
||||
|
||||
for tH in all_tH:
|
||||
igs = tH_igs.get(tH, [])
|
||||
no = tH_no_ig.get(tH, 0)
|
||||
if igs:
|
||||
has_value.append((tH, igs, no))
|
||||
else:
|
||||
only_no_ig.append((tH, no))
|
||||
|
||||
# ── 有值的 tH ──
|
||||
print(f"\n✅ 有 Ig 值的索引点 ({len(has_value)} 个)\n")
|
||||
print(f" {'tH':<6} {'Ig 值(去重、按出现顺序)'}")
|
||||
print(f" {'─'*6} {'─'*56}")
|
||||
for tH, igs, no_cnt in has_value:
|
||||
# 格式化 Ig 列表
|
||||
parts = []
|
||||
for v in igs:
|
||||
if isinstance(v, str) and len(v) > 60:
|
||||
parts.append(v[:57] + "...")
|
||||
else:
|
||||
parts.append(repr(v) if not isinstance(v, str) else v)
|
||||
ig_str = " | ".join(parts)
|
||||
suffix = f" (另有 {no_cnt} 次无Ig)" if no_cnt else ""
|
||||
print(f" tH={tH:<4d} {ig_str}{suffix}")
|
||||
|
||||
# ── 只有 no_ig 的 tH ──
|
||||
print(f"\n🟠 仅无 Ig 字段的索引点 ({len(only_no_ig)} 个) ← void 路径或未命中\n")
|
||||
print(f" {'tH':<6} {'出现次数'}")
|
||||
print(f" {'─'*6} {'─'*10}")
|
||||
for tH, cnt in only_no_ig:
|
||||
print(f" tH={tH:<4d} {cnt} 次")
|
||||
|
||||
# ── 按 API 类别归纳 ──
|
||||
print(f"\n{'─'*68}")
|
||||
print(" 📋 API 检测归纳(每个 tH 在检测什么)")
|
||||
print(f"{'─'*68}\n")
|
||||
|
||||
# 已知含义映射(根据常见 hCaptcha 指纹逻辑)
|
||||
known = {
|
||||
"Window": "全局 window 对象",
|
||||
"Promise": "Promise 构造函数检测",
|
||||
"Object": "Object 原型检测",
|
||||
"Performance": "performance API",
|
||||
"performance": "window.performance 属性",
|
||||
"Crypto": "window.crypto API",
|
||||
"Uint8Array": "TypedArray (crypto.getRandomValues)",
|
||||
"OfflineAudioContext": "AudioContext 指纹",
|
||||
"RTCPeerConnection": "WebRTC 检测",
|
||||
"fetch": "fetch API 检测",
|
||||
"Request": "fetch Request 构造函数",
|
||||
"Screen": "screen 对象",
|
||||
"Storage": "localStorage / sessionStorage",
|
||||
"IDBFactory": "indexedDB",
|
||||
"HTMLDocument": "document 类型",
|
||||
"HTMLCanvasElement": "Canvas 元素检测",
|
||||
"CanvasRenderingContext2D": "2D Canvas 渲染上下文",
|
||||
"Navigator": "navigator 对象",
|
||||
"webdriver": "navigator.webdriver 检测(bot检测关键)",
|
||||
"languages": "navigator.languages",
|
||||
"Array": "Array 类型检测",
|
||||
"getEntriesByType": "performance.getEntriesByType 方法",
|
||||
"prototype": "原型链检测",
|
||||
"constructor": "constructor 属性验证",
|
||||
"__wdata": "window 属性枚举(环境指纹)",
|
||||
"#000000": "Canvas fillStyle 默认值",
|
||||
}
|
||||
|
||||
for tH, igs, _ in has_value:
|
||||
descs = []
|
||||
for v in igs:
|
||||
if isinstance(v, str):
|
||||
d = known.get(v)
|
||||
if d:
|
||||
descs.append(f"{v} → {d}")
|
||||
elif v.startswith("0,1,2,3"):
|
||||
descs.append("window keys 枚举列表 → 全局属性指纹")
|
||||
elif re.match(r'\d+:\d+:\d{4}', v):
|
||||
descs.append(f"{v} → HSW token 格式")
|
||||
elif v in ("f", "t", "c", "d"):
|
||||
descs.append(f'"{v}" → 分支标记字符')
|
||||
else:
|
||||
descs.append(v)
|
||||
elif isinstance(v, bool):
|
||||
descs.append(f"{v} → 布尔检测结果")
|
||||
elif isinstance(v, int):
|
||||
descs.append(f"{v} → 数值")
|
||||
|
||||
print(f" tH={tH:<4d}:")
|
||||
for d in descs:
|
||||
print(f" {d}")
|
||||
print()
|
||||
|
||||
|
||||
# ── 导出 JSON ────────────────────────────────────────────────
|
||||
def export_json(tH_igs, tH_no_ig, out_path):
|
||||
result = OrderedDict()
|
||||
all_tH = sorted(set(list(tH_igs.keys()) + list(tH_no_ig.keys())))
|
||||
for tH in all_tH:
|
||||
igs = tH_igs.get(tH, [])
|
||||
no = tH_no_ig.get(tH, 0)
|
||||
result[str(tH)] = {
|
||||
"ig_values": [v if not isinstance(v, str) or len(v) <= 200 else v[:200]+"..." for v in igs],
|
||||
"no_ig_count": no,
|
||||
"status": "has_value" if igs else "no_ig",
|
||||
}
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
json.dump(result, f, ensure_ascii=False, indent=2)
|
||||
print(f"📄 JSON 已写入: {out_path}")
|
||||
|
||||
|
||||
# ── 入口 ─────────────────────────────────────────────────────
|
||||
if __name__ == "__main__":
|
||||
log_path = find_log(sys.argv[1] if len(sys.argv) > 1 else None)
|
||||
print(f"📂 日志文件: {log_path}\n")
|
||||
|
||||
entries = parse(log_path)
|
||||
print(f"共解析 {len(entries)} 条索引点记录\n")
|
||||
|
||||
tH_igs, tH_no_ig, tH_lines = summarize(entries)
|
||||
report(tH_igs, tH_no_ig, tH_lines)
|
||||
|
||||
out = log_path.replace(".log", "_analysis.json")
|
||||
export_json(tH_igs, tH_no_ig, out)
|
||||
Reference in New Issue
Block a user