WIP parse by xpath
This commit is contained in:
8
main.py
8
main.py
@@ -6,6 +6,7 @@ import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from mail_sender import EmailSender
|
||||
from utils import format_product_table
|
||||
from lxml import etree
|
||||
|
||||
def fetch_product_info(urls):
|
||||
product_info = []
|
||||
@@ -14,10 +15,17 @@ def fetch_product_info(urls):
|
||||
response = requests.get(url)
|
||||
if response.status_code == 200:
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
html_str = str(soup)
|
||||
root_element = etree.fromstring(html_str, parser=etree.HTMLParser())
|
||||
product_name = soup.find('h1', class_='keyfacts__title').text.strip()
|
||||
current_price = soup.find('div', class_='m-price__price').text.strip()
|
||||
original_price_element = soup.find('span', class_='m-price__rrp')
|
||||
original_price = original_price_element.text.strip() if original_price_element else "-"
|
||||
|
||||
|
||||
discount_xpath = '//div[@class="m-price__label" and not(ancestor::*[@style[contains(., "display: none")]])]'
|
||||
discount_xpath_results = root_element.xpath(discount_xpath)
|
||||
discount_elements = [BeautifulSoup(etree.tostring(elem), 'html.parser') for elem in discount_xpath_results]
|
||||
discount_element = soup.find('div', class_='m-price__label')
|
||||
discount = int(re.findall(r'\d+', discount_element.text.strip())[0]) if discount_element else 0
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
requests
|
||||
beautifulsoup4
|
||||
tabulate
|
||||
tabulate
|
||||
lxml
|
||||
Reference in New Issue
Block a user