Fixes issue #1 and also issue #3

This commit is contained in:
Thomas Williams 2024-06-26 19:40:26 +01:00
parent f76a916c83
commit c4edd9badd
Signed by: thomas
GPG key ID: EB8F975CF60BCBFF
2 changed files with 30 additions and 11 deletions

View file

@ -1,5 +1,6 @@
#!/usr/bin/python3 #!/usr/bin/python3
monitoringPeriod = 30 monitoringPeriod = 30
urls = ["https://bootlesshacker.com"] urls = ["https://www.bootlesshacker.com"]
urlTimeout = 10 urlTimeout = 10
maxWorkers = 4

38
main.py
View file

@ -28,8 +28,21 @@ import os
import time import time
import log import log
import requests import requests
from functools import partial
from concurrent.futures import ThreadPoolExecutor, as_completed
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
def loadUrl(url):
response = requests.get(url, timeout=config.urlTimeout, headers=headers)
return response
def prepareUrl(src, baseUrl):
if not src.startswith("http://") and not src.startswith("https://"):
return baseUrl.rstrip("/") + "/" + src.lstrip("/")
return src
while True: while True:
load1, load5, load15 = psutil.getloadavg() # this takes time to warm up if not running script on *nix load1, load5, load15 = psutil.getloadavg() # this takes time to warm up if not running script on *nix
@ -49,28 +62,33 @@ while True:
} }
for url in config.urls: for url in config.urls:
baseUrl = url
urlFail = False
startTime = time.time() startTime = time.time()
request = requests.get(url, timeout=config.urlTimeout, headers=headers) request = loadUrl(url)
if request.status_code == 200: if request.status_code == 200:
html = BeautifulSoup(request.content, 'html.parser') html = BeautifulSoup(request.content, 'html.parser')
imageUrls = [img['src'] for img in html.find_all('img')] imageUrls = [img['src'] for img in html.find_all('img')]
for url in imageUrls: with ThreadPoolExecutor(max_workers=config.maxWorkers) as executor:
responses = [executor.submit(loadUrl, prepareUrl(url, baseUrl)) for url in imageUrls]
responses = [future.result() for future in as_completed(responses)]
request = requests.get(url) for response in responses:
if not response.status_code == 200:
if not request.status_code == 200: urlFail = True
# failure scenario
print("test")
endTime = time.time() endTime = time.time()
timeDiff = endTime - startTime timeDiff = endTime - startTime
print(timeDiff) print(timeDiff)
else: else:
print(failure)
urlFail = True
time.sleep(config.monitoringPeriod) time.sleep(config.monitoringPeriod)