From 7d9bf4a8849524ede0df7cf41f3b89852f5ffe7f Mon Sep 17 00:00:00 2001 From: Jakub Knetl Date: Tue, 30 Jan 2024 18:08:40 +0100 Subject: [PATCH] Add first version --- requirements.txt | 3 +++ scrape.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 requirements.txt create mode 100644 scrape.py diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9c214ad --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +requests +beautifulsoup4 +tabulate \ No newline at end of file diff --git a/scrape.py b/scrape.py new file mode 100644 index 0000000..1a384c6 --- /dev/null +++ b/scrape.py @@ -0,0 +1,32 @@ +import requests +from bs4 import BeautifulSoup +from tabulate import tabulate + +def fetch_product_info(urls): + product_info = [] + + for url in urls: + response = requests.get(url) + if response.status_code == 200: + soup = BeautifulSoup(response.text, 'html.parser') + product_name = soup.find('h1', class_='keyfacts__title').text.strip() + product_price = soup.find('div', class_='m-price__price').text.strip() + product_info.append([product_name, product_price, url]) + else: + print(f"Failed to fetch URL: {url}") + + return product_info + +def print_product_table(product_info): + headers = ["Product Name", "Price", "URL"] + print(tabulate(product_info, headers=headers, tablefmt="grid")) + +if __name__ == "__main__": + urls = [ + "https://www.lidl.cz/p/p100370600", + "https://www.lidl.cz/p/p100358513", + "https://www.lidl.cz/p/p100336045", + ] + + product_info = fetch_product_info(urls) + print_product_table(product_info) \ No newline at end of file