Input from file and output to file as json
This commit is contained in:
27
scrape.py
27
scrape.py
@@ -1,3 +1,4 @@
|
|||||||
|
import json
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from tabulate import tabulate
|
from tabulate import tabulate
|
||||||
@@ -16,7 +17,13 @@ def fetch_product_info(urls):
|
|||||||
discount_element = soup.find('div', class_='m-price__label')
|
discount_element = soup.find('div', class_='m-price__label')
|
||||||
discount = discount_element.text.strip() if discount_element else "-"
|
discount = discount_element.text.strip() if discount_element else "-"
|
||||||
|
|
||||||
product_info.append([product_name, current_price, discount, original_price, url])
|
product_info.append({
|
||||||
|
"name": product_name,
|
||||||
|
"price": current_price,
|
||||||
|
"discount": discount,
|
||||||
|
"originalPrice": original_price,
|
||||||
|
"url": url
|
||||||
|
})
|
||||||
else:
|
else:
|
||||||
print(f"Failed to fetch URL: {url}")
|
print(f"Failed to fetch URL: {url}")
|
||||||
|
|
||||||
@@ -24,14 +31,20 @@ def fetch_product_info(urls):
|
|||||||
|
|
||||||
def print_product_table(product_info):
|
def print_product_table(product_info):
|
||||||
headers = ["Product Name", "Price", "Discount", "original price", "URL"]
|
headers = ["Product Name", "Price", "Discount", "original price", "URL"]
|
||||||
print(tabulate(product_info, headers=headers, tablefmt="grid"))
|
table_data = [[info["name"], info["price"], info["discount"], info["originalPrice"], info["url"]] for info in product_info]
|
||||||
|
print(tabulate(table_data, headers=headers, tablefmt="grid"))
|
||||||
|
|
||||||
|
def save_to_json(product_info, output_file):
|
||||||
|
with open(output_file, 'w') as f:
|
||||||
|
json.dump(product_info, f, indent=4)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
urls = [
|
input_file = "urls.txt"
|
||||||
"https://www.lidl.cz/p/p100370600",
|
output_file = "product_info.json"
|
||||||
"https://www.lidl.cz/p/p100358513",
|
|
||||||
"https://www.lidl.cz/p/p100336045",
|
with open(input_file, 'r') as f:
|
||||||
]
|
urls = [line.strip() for line in f.readlines()]
|
||||||
|
|
||||||
product_info = fetch_product_info(urls)
|
product_info = fetch_product_info(urls)
|
||||||
print_product_table(product_info)
|
print_product_table(product_info)
|
||||||
|
save_to_json(product_info, output_file)
|
||||||
Reference in New Issue
Block a user