Programmatic Scraping (without using the Graphical User Interface)

Programmatic Scraping (without using the Graphical User Interface)#

Even though SurVigilance is a GUI application to download data from various safety databases, we have also kept the possibility for a user to interact in a programmatic manner to download the data. This vignette demonstrates how to access the different databases programmatically and download the required data.

WHO VigiAccess#

In this example, we would like to download data from VigiAccess for the drug “paracetamol”.

import os

from SurVigilance.ui.scrapers import scrape_vigiaccess_sb


def main():
    out_dir = "vigi_out"
    os.makedirs(out_dir, exist_ok=True)

    med = "paracetamol"

    df = scrape_vigiaccess_sb(medicine=med, output_dir=out_dir, headless=True)
    print(df.head())


if __name__ == "__main__":
    main()
                 PT  Count
0  Thrombocytopenia   1073
1      Coagulopathy    774
2           Anaemia    557
3   Agranulocytosis    513
4       Neutropenia    450

NL Lareb#

In this example, we would like to download data from Lareb for the drug “atorvastatin”.

import os

from SurVigilance.ui.scrapers import scrape_lareb_sb


def main():
    out_dir = "lareb_out"
    os.makedirs(out_dir, exist_ok=True)

    med = "atorvastatin"

    df = scrape_lareb_sb(medicine=med, output_dir=out_dir, headless=True)
    print(df.head())


if __name__ == "__main__":
    main()
                    PT  Count
0               Asthma      2
1     Throat tightness      3
2       Sinus disorder      1
3            Epistaxis     20
4  Pharyngeal swelling      2

DK DMA#

In this example, we would like to download data from Danish Medicines Agency for the drug “paracetamol”.

import os

from SurVigilance.ui.scrapers import scrape_dma_sb


def main():
    out_dir = "dma_out"
    os.makedirs(out_dir, exist_ok=True)

    med = "paracetamol"

    df = scrape_dma_sb(medicine=med, output_dir=out_dir, headless=True)
    print(df.head())


if __name__ == "__main__":
    main()
                                PT  Count
0                          Anaemia      2
1  Normochromic normocytic anaemia      1
2              Nephrogenic anaemia      1
3                 Aplastic anaemia      2
4                     Pancytopenia      1

NZ MEDSAFE#

In this example, we would like to download data from NZ Medsafe for the medicine “atorvastatin”.

import os

from SurVigilance.ui.scrapers import scrape_medsafe_sb


def main():
    out_dir = "nzsmars_out"
    os.makedirs(out_dir, exist_ok=True)

    term = "atorvastatin"
    search_type = "medicine"  # or "vaccine"

    df = scrape_medsafe_sb(
        searching_for=search_type,
        drug_vaccine=term,
        output_dir=out_dir,
        headless=True,
    )
    print(df.head())


if __name__ == "__main__":
    main()
                                    SOC                PT  Count
0  Blood and lymphatic system disorders           Anaemia      5
1  Blood and lymphatic system disorders     Lymphocytosis      1
2  Blood and lymphatic system disorders       Neutropenia      1
3  Blood and lymphatic system disorders      Neutrophilia      1
4  Blood and lymphatic system disorders  Thrombocytopenia      2

AU DAEN#

In this example, we would like to fetch data from the TGA DAEN for the medicine “aspirin”.

import os

from SurVigilance.ui.scrapers import scrape_daen_sb


def main():
    out_dir = "daen_out"
    os.makedirs(out_dir, exist_ok=True)

    med = "aspirin"

    df = scrape_daen_sb(medicine=med, output_dir=out_dir, headless=True)
    print(f"Data collected: {len(df)} rows, {len(df.columns)} columns")


if __name__ == "__main__":
    main()
Data collected: 1084 rows, 5 columns

USA FAERS#

For the FAERS databases, we can download the ZIP files. To see which data files are available for different quarters, we should first review the list of available files.

import os

from SurVigilance.ui.scrapers import scrape_faers_sb


def main():
    out_dir = "faers_out"
    os.makedirs(out_dir, exist_ok=True)

    df = scrape_faers_sb(output_dir=out_dir, headless=True)
    print(df.head())


if __name__ == "__main__":
    main()
   Year                  Quarter
0  2025    July - September 2025
1  2025        April - June 2025
2  2025     January - March 2025
3  2024  October - December 2024
4  2024    July - September 2024

From the list of available files, let’s try to download the data for Q1 (Jan - Mar), 2025 using the code below.

import os

from SurVigilance.ui.scrapers import download_file


def main():
    # Please note the year and quarters in this url should be changed corresponding to data to be downloaded.
    # Also for data prior to Q4 2012, please use url : https://fis.fda.gov/content/Exports/aers_ascii_YYYYQQ.zip
    url = "https://fis.fda.gov/content/Exports/faers_ascii_2025q1.zip"
    out_dir = "faers_out"
    os.makedirs(out_dir, exist_ok=True)

    path = download_file(url=url, download_dir=out_dir)

    # Show size of downloaded file
    size_bytes = os.path.getsize(path)
    size_mb = size_bytes / (1024**2)
    print(f"Downloaded file size: {size_bytes} bytes ({size_mb:.2f} MB)")


if __name__ == "__main__":
    main()
Downloaded file size: 67465250 bytes (64.34 MB)

Please note that for downloading the VAERS data, the user needs to provide a CAPTCHA, hence it is not possible to download the data without opening a GUI. It is for this reason, we have not included the example for VAERS in the programmatic access section.