forked from sent/waves
23 lines
813 B
Python
23 lines
813 B
Python
# script used for crawling pages
|
|
# use the network tab in chrome devtools, clear it before loading the game, and when all assets have finished being requested, press the Export HAR button (looks like download)
|
|
# then, use this script like so (this is an example): python3 dehar.py example.com.har outputdirectory/
|
|
|
|
import os, sys, subprocess
|
|
|
|
banned = [] #[".php", ".asp"]
|
|
|
|
infile = sys.argv[1]
|
|
outdir = sys.argv[2]
|
|
|
|
input = open(infile, "r", encoding="utf-8").read().splitlines()
|
|
|
|
lis2 = []
|
|
for x in input:
|
|
if ('"url"' in x) and not(any(i in x for i in banned)):
|
|
lis2.append(x.split('"')[3].split("?")[0])
|
|
|
|
lis2 = list(dict.fromkeys(lis2))
|
|
|
|
open("./wgettemp.txt", "w", encoding="utf-8").write("\n".join(lis2))
|
|
subprocess.call("wget -x -i ./wgettemp.txt -P %s" % outdir, shell=True)
|
|
os.remove("./wgettemp.txt") |