import datetime import os import re import json import requests from bs4 import BeautifulSoup from mail_sender import EmailSender from utils import format_product_table from urllib.parse import quote LIDL_PRODUCT_URL_PREFIX = "https://www.lidl.cz/p/" def fetch_product_info(urls): product_info = [] for url in urls: response = requests.get(url) if response.status_code == 200: soup = BeautifulSoup(response.text, 'html.parser') product_name = soup.find('h1', class_='keyfacts__title').text.strip() current_price = soup.find('div', class_='m-price__price').text.strip() original_price_element = soup.find('span', class_='m-price__rrp') original_price = original_price_element.text.strip() if original_price_element else "-" discount_element = soup.find('div', class_='m-price__label') discount = int(re.findall(r'\d+', discount_element.text.strip())[0]) if discount_element else 0 product_info.append({ "name": product_name, "price": current_price, "discount": discount, "originalPrice": original_price, "url": url }) else: print(f"Failed to fetch URL: {url}") return product_info def find_urls(products): all_links = set() for product in products: url = f'https://www.lidl.cz/q/search?q={quote(product)}' response = requests.get(url) if response.status_code == 200: # this might potentially get invalid products (relies on the fact the product number is 9 digits surrounded by parentheses # and there are no other 9 digits strings in the whole returned HTML (but it could be improved to target only the