Compare commits
4 Commits
75aaf80d94
...
fix-multip
| Author | SHA1 | Date | |
|---|---|---|---|
| 681dde9b46 | |||
| c7fcec7c56 | |||
| 3505a86416 | |||
| d4302240e7 |
@@ -2,7 +2,10 @@
|
|||||||
|
|
||||||
Simple app which checks a price of selected items and prints their current price and discount.
|
Simple app which checks a price of selected items and prints their current price and discount.
|
||||||
|
|
||||||
It also sends a notification of items via email any items is on sale and following env variables are defined:
|
## How to build and run
|
||||||
|
|
||||||
|
1. create file with urls to be watched `cp urls.txt.example urls.txt`
|
||||||
|
2. Configure SMTP and mail destination (for mail notification)
|
||||||
|
|
||||||
```
|
```
|
||||||
SCRAPER_SMTP_USER
|
SCRAPER_SMTP_USER
|
||||||
@@ -24,5 +27,5 @@ or build docker image and run it using the docker:
|
|||||||
|
|
||||||
```
|
```
|
||||||
docker build -t lidl-price-scraper .
|
docker build -t lidl-price-scraper .
|
||||||
docker run -it --rm --env-file ./.env lidl-price-scraper
|
docker run -it --rm -v $(pwd)/urls.txt:/app/urls.txt --env-file ./.env lidl-price-scraper
|
||||||
```
|
```
|
||||||
12
main.py
12
main.py
@@ -1,3 +1,4 @@
|
|||||||
|
import datetime
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
@@ -5,6 +6,7 @@ import requests
|
|||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from mail_sender import EmailSender
|
from mail_sender import EmailSender
|
||||||
from utils import format_product_table
|
from utils import format_product_table
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
def fetch_product_info(urls):
|
def fetch_product_info(urls):
|
||||||
product_info = []
|
product_info = []
|
||||||
@@ -13,10 +15,17 @@ def fetch_product_info(urls):
|
|||||||
response = requests.get(url)
|
response = requests.get(url)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
soup = BeautifulSoup(response.text, 'html.parser')
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
html_str = str(soup)
|
||||||
|
root_element = etree.fromstring(html_str, parser=etree.HTMLParser())
|
||||||
product_name = soup.find('h1', class_='keyfacts__title').text.strip()
|
product_name = soup.find('h1', class_='keyfacts__title').text.strip()
|
||||||
current_price = soup.find('div', class_='m-price__price').text.strip()
|
current_price = soup.find('div', class_='m-price__price').text.strip()
|
||||||
original_price_element = soup.find('span', class_='m-price__rrp')
|
original_price_element = soup.find('span', class_='m-price__rrp')
|
||||||
original_price = original_price_element.text.strip() if original_price_element else "-"
|
original_price = original_price_element.text.strip() if original_price_element else "-"
|
||||||
|
|
||||||
|
|
||||||
|
discount_xpath = '//div[@class="m-price__label" and not(ancestor::*[@style[contains(., "display: none")]])]'
|
||||||
|
discount_xpath_results = root_element.xpath(discount_xpath)
|
||||||
|
discount_elements = [BeautifulSoup(etree.tostring(elem), 'html.parser') for elem in discount_xpath_results]
|
||||||
discount_element = soup.find('div', class_='m-price__label')
|
discount_element = soup.find('div', class_='m-price__label')
|
||||||
discount = int(re.findall(r'\d+', discount_element.text.strip())[0]) if discount_element else 0
|
discount = int(re.findall(r'\d+', discount_element.text.strip())[0]) if discount_element else 0
|
||||||
|
|
||||||
@@ -44,6 +53,9 @@ if __name__ == "__main__":
|
|||||||
with open(input_file, 'r') as f:
|
with open(input_file, 'r') as f:
|
||||||
urls = [line.strip() for line in f.readlines()]
|
urls = [line.strip() for line in f.readlines()]
|
||||||
|
|
||||||
|
|
||||||
|
print(f'Fetching prices at {datetime.datetime.now()}')
|
||||||
|
|
||||||
product_info = fetch_product_info(urls)
|
product_info = fetch_product_info(urls)
|
||||||
print(format_product_table(product_info))
|
print(format_product_table(product_info))
|
||||||
save_to_json(product_info, output_file)
|
save_to_json(product_info, output_file)
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
requests
|
requests
|
||||||
beautifulsoup4
|
beautifulsoup4
|
||||||
tabulate
|
tabulate
|
||||||
|
lxml
|
||||||
3
urls.txt.example
Normal file
3
urls.txt.example
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
https://www.lidl.cz/p/p100370600
|
||||||
|
https://www.lidl.cz/p/p100358513
|
||||||
|
https://www.lidl.cz/p/p100336045
|
||||||
Reference in New Issue
Block a user