diff --git a/config.py b/config.py index 94616e8..0158402 100644 --- a/config.py +++ b/config.py @@ -1,5 +1,6 @@ #!/usr/bin/python3 monitoringPeriod = 30 -urls = ["https://bootlesshacker.com"] +urls = ["https://www.bootlesshacker.com"] urlTimeout = 10 +maxWorkers = 4 diff --git a/main.py b/main.py index ab7bde5..5f45980 100755 --- a/main.py +++ b/main.py @@ -28,8 +28,21 @@ import os import time import log import requests +from functools import partial +from concurrent.futures import ThreadPoolExecutor, as_completed from bs4 import BeautifulSoup +def loadUrl(url): + + response = requests.get(url, timeout=config.urlTimeout, headers=headers) + return response + +def prepareUrl(src, baseUrl): + + if not src.startswith("http://") and not src.startswith("https://"): + return baseUrl.rstrip("/") + "/" + src.lstrip("/") + return src + while True: load1, load5, load15 = psutil.getloadavg() # this takes time to warm up if not running script on *nix @@ -49,28 +62,33 @@ while True: } for url in config.urls: - + + baseUrl = url + urlFail = False + startTime = time.time() - request = requests.get(url, timeout=config.urlTimeout, headers=headers) + request = loadUrl(url) if request.status_code == 200: html = BeautifulSoup(request.content, 'html.parser') imageUrls = [img['src'] for img in html.find_all('img')] - for url in imageUrls: + with ThreadPoolExecutor(max_workers=config.maxWorkers) as executor: + responses = [executor.submit(loadUrl, prepareUrl(url, baseUrl)) for url in imageUrls] + + responses = [future.result() for future in as_completed(responses)] - request = requests.get(url) - - if not request.status_code == 200: - # failure scenario - print("test") + for response in responses: + if not response.status_code == 200: + urlFail = True endTime = time.time() timeDiff = endTime - startTime print(timeDiff) + else: - print(failure) + + urlFail = True time.sleep(config.monitoringPeriod) -