crawl, fixes
This commit is contained in:
parent
68435037e7
commit
f5145f80ea
7 changed files with 326 additions and 58 deletions
94
crawler.py
Normal file
94
crawler.py
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
import json,re, subprocess, sys, threading, time
|
||||
|
||||
BATCH = 50
|
||||
TIMEOUT = 4
|
||||
CAPTURES = {
|
||||
"read_bytes": "SSL handshake has read (\\d+) bytes and written \\d+ bytes\n",
|
||||
"written_bytes": "SSL handshake has read \\d+ bytes and written (\\d+) bytes\n",
|
||||
"cert_sig": "Peer signature type: ([a-zA-Z0-9_.-]+)\n",
|
||||
"cert_pk_size": "Server public key is (\\d+) bit\n",
|
||||
"kx": "(?:Negotiated TLS1\\.3 group|Peer Temp Key): ([a-zA-Z0-9_.-]+)(?:\n|,)",
|
||||
"cipher": "Cipher is ([a-zA-Z0-9_.-]+)\n",
|
||||
"protocol": "Protocol: ([a-zA-Z0-9_.-]+)\n",
|
||||
}
|
||||
|
||||
lock = threading.Lock()
|
||||
results = {}
|
||||
subs = []
|
||||
|
||||
def probe(domain, ossl):
|
||||
# End of input data, cleanly stops the session and the process
|
||||
ossl.stdin.write(b"")
|
||||
ossl.stdin.close()
|
||||
|
||||
results[domain] = output = ossl.stdout.read().decode()
|
||||
|
||||
ossl.kill()
|
||||
|
||||
if __name__ == "__main__":
|
||||
if sys.argv[1] == "crawl":
|
||||
threads = []
|
||||
f = open(sys.argv[2], "r")
|
||||
for line in f.readlines()[1:]:
|
||||
#line = line.removesuffix("\n")
|
||||
line = line[:line.find(",")-1]
|
||||
print("start", line)
|
||||
addr = f"{line}:443"
|
||||
ossl = subprocess.Popen(["openssl", "s_client", "-no-interactive", addr], stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
t = threading.Thread(target=probe, args=(line, ossl,))
|
||||
threads.append((t, ossl, time.time()))
|
||||
time.sleep(0.05)
|
||||
t.start()
|
||||
if len(threads) >= BATCH:
|
||||
print("wait")
|
||||
for (t, ossl, start) in threads:
|
||||
t.join(max(0, TIMEOUT - (time.time() - start)))
|
||||
try:
|
||||
ossl.kill()
|
||||
except:
|
||||
pass
|
||||
threads.clear()
|
||||
print("waited")
|
||||
f.close()
|
||||
|
||||
for (t, ossl, start) in threads:
|
||||
t.join(max(0, TIMEOUT - (time.time() - start)))
|
||||
try:
|
||||
ossl.kill()
|
||||
except:
|
||||
pass
|
||||
threads.clear()
|
||||
|
||||
print("finished")
|
||||
|
||||
f = open("/dev/shm/crawl.json", "w")
|
||||
json.dump(results, f)
|
||||
f.close()
|
||||
|
||||
print("written")
|
||||
|
||||
sys.exit(0)
|
||||
elif sys.argv[1] == "stat":
|
||||
regexes = {}
|
||||
for r in CAPTURES:
|
||||
regexes[r] = re.compile(CAPTURES[r])
|
||||
stats = {}
|
||||
f = open(sys.argv[2], "r")
|
||||
c = json.load(f)
|
||||
for domain in c:
|
||||
for r in CAPTURES:
|
||||
try:
|
||||
val = regexes[r].finditer(c[domain]).__next__().group(1)
|
||||
#print(r, val)
|
||||
if r in ["domain", "read_bytes", "written_bytes"]:
|
||||
continue
|
||||
if not r in stats:
|
||||
stats[r] = {}
|
||||
if not val in stats[r]:
|
||||
stats[r][val] = 1
|
||||
else:
|
||||
stats[r][val] += 1
|
||||
except StopIteration:
|
||||
pass
|
||||
#print("Not found:", line, r)
|
||||
print(stats)
|
||||
Loading…
Add table
Add a link
Reference in a new issue