415gotit
This commit is contained in:
175
analyze_priority.py
Normal file
175
analyze_priority.py
Normal file
@@ -0,0 +1,175 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
从 chatgpt.com-*_analysis.json 中,按优先级对每个指纹字段评分排序。
|
||||
|
||||
评分规则:
|
||||
+10 bot 自动化检测专属字段(webdriver, $cdc_*, callPhantom 等)
|
||||
+ 5 出现在核心检测循环 tH=154 或 tH=155
|
||||
+ 2 每额外出现在一个不同 tH(跨 tH 频次)
|
||||
+ 3 属于已知高风险 API(Crypto, RTCPeerConnection, OfflineAudioContext 等)
|
||||
+ 1 属于 navigator / screen / canvas 系列
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import glob
|
||||
from collections import defaultdict
|
||||
|
||||
# ── 配置 ─────────────────────────────────────────────────────
|
||||
ANALYSIS_JSON = sorted(glob.glob(
|
||||
"/home/carry/myprj/hcaptcha/asset/chatgpt.com-*_analysis.json"
|
||||
))[-1]
|
||||
|
||||
# bot 自动化检测专属字段(出现即暴露)
|
||||
BOT_SIGNALS = {
|
||||
"webdriver", "callPhantom", "callSelenium", "_selenium", "__phantomas",
|
||||
"domAutomationController", "awesomium", "$wdc_", "domAutomation",
|
||||
"_WEBDRIVER_ELEM_CACHE", "spawn", "__nightmare", "__webdriver_script_fn",
|
||||
"__webdriver_script_func", "__driver_evaluate", "__webdriver_evaluate",
|
||||
"__selenium_evaluate", "__fxdriver_evaluate", "__driver_unwrapped",
|
||||
"__webdriver_unwrapped", "__selenium_unwrapped", "__fxdriver_unwrapped",
|
||||
"hcaptchaCallbackZenno", "_Selenium_IDE_Recorder",
|
||||
"cdc_adoQpoasnfa76pfcZLmcfl_Array",
|
||||
"cdc_adoQpoasnfa76pfcZLmcfl_Promise",
|
||||
"cdc_adoQpoasnfa76pfcZLmcfl_Symbol",
|
||||
"CDCJStestRunStatus",
|
||||
"$cdc_asdjflasutopfhvcZLmcfl_",
|
||||
"$chrome_asyncScriptInfo",
|
||||
}
|
||||
|
||||
# 高风险 API(指纹强度高)
|
||||
HIGH_RISK_APIS = {
|
||||
"Crypto", "RTCPeerConnection", "OfflineAudioContext",
|
||||
"CanvasRenderingContext2D", "HTMLCanvasElement", "WebGL2RenderingContext",
|
||||
"WebGLRenderingContext", "IDBFactory", "PluginArray", "NavigatorUAData",
|
||||
"PerformanceNavigationTiming", "PerformanceResourceTiming",
|
||||
}
|
||||
|
||||
# navigator / screen / canvas 系列
|
||||
MEDIUM_APIS = {
|
||||
"Navigator", "Screen", "Storage", "Performance", "HTMLDocument",
|
||||
"ScreenOrientation", "NetworkInformation", "languages", "maxTouchPoints",
|
||||
"webdriver", "platform", "userAgent",
|
||||
}
|
||||
|
||||
# 核心检测循环 tH
|
||||
CORE_TH = {154, 155}
|
||||
|
||||
|
||||
# ── 加载 ────────────────────────────────────────────────────
|
||||
def load(path):
|
||||
with open(path, encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
# ── 评分 ────────────────────────────────────────────────────
|
||||
def score(data):
|
||||
# api -> {tH set, score, reasons}
|
||||
api_info = defaultdict(lambda: {"tH_set": set(), "score": 0, "reasons": []})
|
||||
|
||||
for tH_str, entry in data.items():
|
||||
tH = int(tH_str)
|
||||
for ig in entry.get("ig_values", []):
|
||||
if not isinstance(ig, str):
|
||||
continue
|
||||
# 跳过明显是"值"而非 API 名的字符串
|
||||
if ig.startswith("0,1,2") or ig.startswith("1:") or \
|
||||
ig.startswith("#") or ig.startswith("return ") or \
|
||||
ig.startswith("https://") or len(ig) > 80:
|
||||
continue
|
||||
|
||||
info = api_info[ig]
|
||||
info["tH_set"].add(tH)
|
||||
|
||||
# 计算分数
|
||||
for api, info in api_info.items():
|
||||
s = 0
|
||||
reasons = []
|
||||
|
||||
# bot 信号
|
||||
if api in BOT_SIGNALS:
|
||||
s += 10
|
||||
reasons.append("🚨 bot检测字段 +10")
|
||||
|
||||
# 核心检测循环
|
||||
core_hit = info["tH_set"] & CORE_TH
|
||||
if core_hit:
|
||||
s += 5
|
||||
reasons.append(f"🎯 核心循环 tH={sorted(core_hit)} +5")
|
||||
|
||||
# 高风险 API
|
||||
if api in HIGH_RISK_APIS:
|
||||
s += 3
|
||||
reasons.append("⚡ 高风险API +3")
|
||||
|
||||
# 中等 API
|
||||
if api in MEDIUM_APIS:
|
||||
s += 1
|
||||
reasons.append("📡 navigator/screen类 +1")
|
||||
|
||||
# 跨 tH 频次(每多一个 tH +2)
|
||||
freq = len(info["tH_set"])
|
||||
if freq > 1:
|
||||
bonus = (freq - 1) * 2
|
||||
s += bonus
|
||||
reasons.append(f"🔁 跨{freq}个tH +{bonus}")
|
||||
|
||||
info["score"] = s
|
||||
info["reasons"] = reasons
|
||||
|
||||
return api_info
|
||||
|
||||
|
||||
# ── 输出 ─────────────────────────────────────────────────────
|
||||
def report(api_info):
|
||||
# 按分数排序
|
||||
ranked = sorted(api_info.items(), key=lambda x: -x[1]["score"])
|
||||
|
||||
print("=" * 70)
|
||||
print(" HSW 指纹字段 优先级排名")
|
||||
print("=" * 70)
|
||||
|
||||
# 分档
|
||||
tiers = [
|
||||
("🔴 P0 必须正确(≥10分)", lambda s: s >= 10),
|
||||
("🟠 P1 高优先级(5~9分)", lambda s: 5 <= s < 10),
|
||||
("🟡 P2 中优先级(3~4分)", lambda s: 3 <= s < 5),
|
||||
("🟢 P3 低优先级(1~2分)", lambda s: 1 <= s < 3),
|
||||
("⚪ P4 可忽略(0分)", lambda s: s == 0),
|
||||
]
|
||||
|
||||
for tier_label, condition in tiers:
|
||||
tier_items = [(api, info) for api, info in ranked if condition(info["score"])]
|
||||
if not tier_items:
|
||||
continue
|
||||
print(f"\n{tier_label} [{len(tier_items)} 个]")
|
||||
print(f" {'分数':<5} {'字段名':<45} 出现tH")
|
||||
print(f" {'─'*5} {'─'*45} {'─'*20}")
|
||||
for api, info in tier_items:
|
||||
tH_list = ",".join(str(t) for t in sorted(info["tH_set"]))
|
||||
print(f" {info['score']:<5} {api:<45} tH={tH_list}")
|
||||
for r in info["reasons"]:
|
||||
print(f" {r}")
|
||||
|
||||
# 导出 JSON
|
||||
out = {
|
||||
api: {
|
||||
"score": info["score"],
|
||||
"tH_list": sorted(info["tH_set"]),
|
||||
"reasons": info["reasons"],
|
||||
}
|
||||
for api, info in ranked
|
||||
}
|
||||
out_path = ANALYSIS_JSON.replace("_analysis.json", "_priority.json")
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
json.dump(out, f, ensure_ascii=False, indent=2)
|
||||
print(f"\n📄 优先级结果已写入: {out_path}")
|
||||
|
||||
|
||||
# ── 入口 ─────────────────────────────────────────────────────
|
||||
if __name__ == "__main__":
|
||||
path = sys.argv[1] if len(sys.argv) > 1 else ANALYSIS_JSON
|
||||
print(f"📂 读取: {path}\n")
|
||||
data = load(path)
|
||||
api_info = score(data)
|
||||
report(api_info)
|
||||
Reference in New Issue
Block a user