lidl-price-scraper/scrape.py

import requests
from bs4 import BeautifulSoup
from tabulate import tabulate

def fetch_product_info(urls):
    product_info = []

    for url in urls:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            product_name = soup.find('h1', class_='keyfacts__title').text.strip()
            current_price = soup.find('div', class_='m-price__price').text.strip()
            original_price_element = soup.find('span', class_='m-price__rrp')
            original_price = original_price_element.text.strip() if original_price_element else "-"
            discount_element = soup.find('div', class_='m-price__label')
            discount = discount_element.text.strip() if discount_element else "-"

            product_info.append([product_name, current_price, discount, original_price, url])
        else:
            print(f"Failed to fetch URL: {url}")

    return product_info

def print_product_table(product_info):
    headers = ["Product Name", "Price", "Discount", "original price", "URL"]
    print(tabulate(product_info, headers=headers, tablefmt="grid"))

if __name__ == "__main__":
    urls = [
        "https://www.lidl.cz/p/p100370600",
        "https://www.lidl.cz/p/p100358513",
        "https://www.lidl.cz/p/p100336045",
    ]

    product_info = fetch_product_info(urls)
    print_product_table(product_info)