Files
lidl-price-scraper/main.py
2024-02-08 11:21:57 +01:00

68 lines
2.4 KiB
Python

import datetime
import os
import re
import json
import requests
from bs4 import BeautifulSoup
from mail_sender import EmailSender
from utils import format_product_table
from lxml import etree
def fetch_product_info(urls):
product_info = []
for url in urls:
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
html_str = str(soup)
root_element = etree.fromstring(html_str, parser=etree.HTMLParser())
product_name = soup.find('h1', class_='keyfacts__title').text.strip()
current_price = soup.find('div', class_='m-price__price').text.strip()
original_price_element = soup.find('span', class_='m-price__rrp')
original_price = original_price_element.text.strip() if original_price_element else "-"
discount_xpath = '//div[@class="m-price__label" and not(ancestor::*[@style[contains(., "display: none")]])]'
discount_xpath_results = root_element.xpath(discount_xpath)
discount_elements = [BeautifulSoup(etree.tostring(elem), 'html.parser') for elem in discount_xpath_results]
discount_element = soup.find('div', class_='m-price__label')
discount = int(re.findall(r'\d+', discount_element.text.strip())[0]) if discount_element else 0
product_info.append({
"name": product_name,
"price": current_price,
"discount": discount,
"originalPrice": original_price,
"url": url
})
else:
print(f"Failed to fetch URL: {url}")
return product_info
def save_to_json(product_info, output_file):
with open(output_file, 'w') as f:
json.dump(product_info, f, indent=4)
if __name__ == "__main__":
input_file = "urls.txt"
output_file = "product_info.json"
with open(input_file, 'r') as f:
urls = [line.strip() for line in f.readlines()]
print(f'Fetching prices at {datetime.datetime.now()}')
product_info = fetch_product_info(urls)
print(format_product_table(product_info))
save_to_json(product_info, output_file)
products_on_sale = [product for product in product_info if product["discount"] > 0]
if len(products_on_sale) > 0:
sender = EmailSender(os.environ["SCRAPER_SMTP_USER"], os.environ["SCRAPER_SMTP_PASSWORD"])
sender.send_email(os.environ["SCRAPER_TO_MAIL"], products_on_sale)