lidl-price-scraper/scrape.py

import json
import requests
from bs4 import BeautifulSoup
from tabulate import tabulate

def fetch_product_info(urls):
    product_info = []

    for url in urls:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            product_name = soup.find('h1', class_='keyfacts__title').text.strip()
            current_price = soup.find('div', class_='m-price__price').text.strip()
            original_price_element = soup.find('span', class_='m-price__rrp')
            original_price = original_price_element.text.strip() if original_price_element else "-"
            discount_element = soup.find('div', class_='m-price__label')
            discount = discount_element.text.strip() if discount_element else "-"

            product_info.append({
                "name": product_name,
                "price": current_price,
                "discount": discount,
                "originalPrice": original_price,
                "url": url
            })
        else:
            print(f"Failed to fetch URL: {url}")

    return product_info

def print_product_table(product_info):
    headers = ["Product Name", "Price", "Discount", "original price", "URL"]
    table_data = [[info["name"], info["price"], info["discount"], info["originalPrice"], info["url"]] for info in product_info]
    print(tabulate(table_data, headers=headers, tablefmt="grid"))

def save_to_json(product_info, output_file):
    with open(output_file, 'w') as f:
        json.dump(product_info, f, indent=4)

if __name__ == "__main__":
    input_file = "urls.txt"
    output_file = "product_info.json"

    with open(input_file, 'r') as f:
        urls = [line.strip() for line in f.readlines()]

    product_info = fetch_product_info(urls)
    print_product_table(product_info)
    save_to_json(product_info, output_file)