import requests, schedule, time from bs4 import BeautifulSoup from whatDomain import ermWhatAAAATheIpFromDomainYaCrazy, ermWhatATheIpFromDomainYaCrazy OPNSNSMIRRORURL = "https://opnsense.org/download/#full-mirror-listing" IPv4FILE = "./OPNS_MirrorListV4" IPv6FILE = "./OPNS_MirrorListV6" def sanitizeURL(inpurl: str): if not "/" in inpurl[:-1]: inpurl += "/" if "https://" in inpurl: outurl = inpurl[8:] elif "http://" in inpurl: outurl = inpurl[7:] elif "http://" or "https://" not in url: outurl = inpurl else: return -1 # how the fuck does it work? # I mean I wrote this but I don't know why does it work. i = 0 for char in outurl: i += 1 if char == "/": outurl = outurl[:i] if char == "/": outurl = outurl[:-1] return outurl def getFreshData(): payload = requests.get(DEBMIRRORURL) soup = BeautifulSoup(payload.content, "html.parser") return soup def sanitizeUrlsGodWhatTheFuckIsThis(SoupInput: BeautifulSoup): outMirrorDict = {} current_country = None # Iterate through all table rows for table in SoupInput.find_all("table"): for row in table.find_all("tr"): # Check for country name in a full-row header () strong = row.find("strong") if strong: country_name = strong.get_text(strip=True) if country_name in target_countries: current_country = country_name else: current_country = None continue # move to next row # Check for inline country name in first column cols = row.find_all("td") if len(cols) >= 2: possible_country = cols[0].get_text(strip=True) link_tag = cols[1].find("a", href=True) if possible_country in target_countries: current_country = possible_country if current_country and link_tag: url = link_tag['href'] if current_country not in outMirrorDict: outMirrorDict[current_country] = [] outMirrorDict[current_country].append(url) outMirrorDict.update({"Security": DEBSECURITYURL}) outMirrorDict.update({"Extras": EXTRASURL}) return outMirrorDict def LeJob(): print("Starting lookup") LeSoup = getFreshData() LeMirrorDict = sanitizeUrlsGodWhatTheFuckIsThis(LeSoup) # print(LeMirrorDict) with open(IPv4FILE, "r",) as fR, open(IPv4FILE, "w",) as fW: for key, urls in LeMirrorDict.items(): # print(urls) if key in target_countries: for url in urls: # print(url) if url not in fR: goodurl = sanitizeURL(url) # print(goodurl) ip4Dict = ermWhatATheIpFromDomainYaCrazy(goodurl) if ip4Dict == -1: continue for key, ip in ip4Dict.items(): print(ip) fW.write(ip + "/32" + "\n") with open(IPv6FILE, "r",) as fR, open(IPv6FILE, "w",) as fW: for key, urls in LeMirrorDict.items(): if key in target_countries: for url in urls: if url not in fR: goodurl = sanitizeURL(url) # print(goodurl) ip6Dict = ermWhatAAAATheIpFromDomainYaCrazy(goodurl) if ip6Dict == -1: continue for key, ip in ip6Dict.items(): # print(ip) fW.write(ip + "/128" + "\n") # schedule.every().day.at("12:45").do(LeJob) # schedule.every().day.at("17:44").do(LeJob) # while True: # schedule.run_pending() # print("Waiting...") # time.sleep(30) #Wait one minute # # LeJob() def DNSerrHandling(inpErrorCode: int): match inpErrorCode: case -1: return "No answer from dns server." case -2: # return "All nameservers failed to answer the query." print("All nameservers failed to answer the query.\n Fix your DNS servers.") exit(-1) case -3: return "The DNS query name does not exist." case -4: return "The DNS querry got timed out." case _: return "Not implemented" gigalist = [] payload = requests.get(OPNSNSMIRRORURL) soup = BeautifulSoup(payload.content, "html.parser") # print(soup) # divs = soup.find_all("div", {"class": "download_section"}) for data in soup.find_all('div', class_='download_section'): for a in data.find_all('a', href=True): url = a['href'] saniturl = sanitizeURL(url) print(saniturl) IPv4Dict = ermWhatATheIpFromDomainYaCrazy(saniturl) IPv6Dict = ermWhatAAAATheIpFromDomainYaCrazy(saniturl) if type(IPv4Dict) == int: print(f"{DNSerrHandling(IPv4Dict)}") continue elif type(IPv6Dict) == int: print(f"ERROR RESOLVING {saniturl} IPv6 address with error code {IPv6Dict}") continue for key, ip in IPv4Dict.items(): print(f"Found the ipv4: {ip}") for key, ip in IPv6Dict.items(): print(f"Found the ipv6: {ip}") # print(a.get('href')) #for getting link # print(a.text) #for getting text between the link # for a in soup.find_all("div", {"class": "download_section"}, "a", href=True): # h2zz = divs = soup.find_all("h3") # print(f" one link is: {h2zz}")