Source code for SurVigilance.ui.scrapers.check_internet_connectivity

# Modified from https://www.geeksforgeeks.org/python/how-to-check-whether-users-internet-is-on-or-off-using-python/
import http.client as httplib
from urllib.parse import urlparse

SITES_TO_CHECK = [
    "https://daen.tga.gov.au/medicines-search/",
    "https://laegemiddelstyrelsen.dk/en/sideeffects/side-effects-of-medicines/interactive-adverse-drug-reaction-overviews/",
    "https://fis.fda.gov/extensions/FPD-QDE-FAERS/FPD-QDE-FAERS.html",
    "https://www.lareb.nl/en",
    "https://www.medsafe.govt.nz/Projects/B1/ADRSearch.asp",
    "https://vaers.hhs.gov/data/datasets.html",
    "https://www.vigiaccess.org/",
]


[docs] def check_site_connectivity( url=None, timeout=5, ): """ Checks if a given URL or list of URLs is reachable by making a HEAD request. Parameters ----------- url: str or list, optional The URL or list of URLs to check for connectivity. If None, SITES_TO_CHECK will be used. SITES_TO_CHECK = ["https://daen.tga.gov.au/medicines-search/", "https://laegemiddelstyrelsen.dk/en/sideeffects/side-effects-of-medicines/interactive-adverse-drug-reaction-overviews/", "https://fis.fda.gov/extensions/FPD-QDE-FAERS/FPD-QDE-FAERS.html", "https://www.lareb.nl/en", "https://www.medsafe.govt.nz/Projects/B1/ADRSearch.asp", "https://vaers.hhs.gov/data/datasets.html", "https://www.vigiaccess.org/",] timeout: int, optional The maximum time (in seconds) to wait for a connection. Defaults to 5 seconds. Returns -------- tuple or list: If `url` is a string, returns a tuple containing: - bool: True if the site is reachable, False otherwise. - str: A message indicating the connectivity status. If `urls` is a list, returns a list of these tuples. Example -------- >>> SITES_TO_CHECK = ["https://daen.tga.gov.au/medicines-search/", "https://laegemiddelstyrelsen.dk/en/sideeffects/side-effects-of-medicines/interactive-adverse-drug-reaction-overviews/", "https://fis.fda.gov/extensions/FPD-QDE-FAERS/FPD-QDE-FAERS.html", "https://www.lareb.nl/en", "https://www.medsafe.govt.nz/Projects/B1/ADRSearch.asp", "https://vaers.hhs.gov/data/datasets.html", "https://www.vigiaccess.org/",] >>> result, message = check_site_connectivity(url=SITES_TO_CHECK) >>> print(result) """ if url is None: url = SITES_TO_CHECK is_single_url = isinstance(url, str) if is_single_url: urls = [url] else: urls = url results = [] for url in urls: try: parsed_url = urlparse(url) if parsed_url.scheme == "https": connection = httplib.HTTPSConnection(parsed_url.netloc, timeout=timeout) else: connection = httplib.HTTPConnection(parsed_url.netloc, timeout=timeout) connection.request("HEAD", parsed_url.path or "/") connection.getresponse() results.append( ( True, f"{url} is reachable.", ) ) except Exception as e: # pragma: no cover results.append((False, f"{url} is not reachable. Error: {e}.")) finally: if "connection" in locals() and connection: connection.close() if is_single_url: return results[0] return results
def check_all_scraper_sites(st_object=None): """ Checks connectivity for all external websites required by the scrapers. Parameters ----------- st_object: Streamlit Object Streamlit object in which the connectivity status for the various databases are shown. Returns --------- tuple: A tuple containing: - bool: True if all sites are reachable, False otherwise. - list: A list of detailed messages for each site's connectivity. """ all_messages = [] all_ok = True if st_object: st_object.write("Checking connectivity to required websites...") else: print("Checking connectivity to required websites...") results = check_site_connectivity() for reachable, message in results: all_messages.append(message) if st_object: st_object.markdown(message) else: print(message) if not reachable: all_ok = False return all_ok, all_messages