Files
lidl-price-scraper/scrape.py

50 lines
1.8 KiB
Python

import json
import requests
from bs4 import BeautifulSoup
from tabulate import tabulate
def fetch_product_info(urls):
product_info = []
for url in urls:
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
product_name = soup.find('h1', class_='keyfacts__title').text.strip()
current_price = soup.find('div', class_='m-price__price').text.strip()
original_price_element = soup.find('span', class_='m-price__rrp')
original_price = original_price_element.text.strip() if original_price_element else "-"
discount_element = soup.find('div', class_='m-price__label')
discount = discount_element.text.strip() if discount_element else "-"
product_info.append({
"name": product_name,
"price": current_price,
"discount": discount,
"originalPrice": original_price,
"url": url
})
else:
print(f"Failed to fetch URL: {url}")
return product_info
def print_product_table(product_info):
headers = ["Product Name", "Price", "Discount", "original price", "URL"]
table_data = [[info["name"], info["price"], info["discount"], info["originalPrice"], info["url"]] for info in product_info]
print(tabulate(table_data, headers=headers, tablefmt="grid"))
def save_to_json(product_info, output_file):
with open(output_file, 'w') as f:
json.dump(product_info, f, indent=4)
if __name__ == "__main__":
input_file = "urls.txt"
output_file = "product_info.json"
with open(input_file, 'r') as f:
urls = [line.strip() for line in f.readlines()]
product_info = fetch_product_info(urls)
print_product_table(product_info)
save_to_json(product_info, output_file)