From d1818e6dd32b632cfb269de4033883f06ead863b Mon Sep 17 00:00:00 2001 From: h7x4 Date: Tue, 18 Mar 2025 00:33:07 +0100 Subject: [PATCH] v3 --- scrape.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/scrape.py b/scrape.py index 6cc2c3a..1f4696f 100644 --- a/scrape.py +++ b/scrape.py @@ -1,5 +1,3 @@ -from tkinter import filedialog - import csv from datetime import datetime @@ -12,6 +10,12 @@ import requests BASE_URL = "https://auctions.yahoo.co.jp/" +KEYWORDS = [ + "keyword1", + "keyword2", + "keyword3", +] + @dataclass class Advertisement: @@ -163,24 +167,14 @@ def save_advertisements_to_csv(advertisements: list[Advertisement], filename: st def main(): - file_path = filedialog.askopenfilename( - title='Looking for keywords.txt', - filetypes = [ ('Text', '*.txt'), ('Any', '*') ], - ) - keywords = [ ] - with open(file_path, 'r') as file: - for line in file.read().splitlines(): - if line.strip() != "": - keywords.append(line.strip()) - - progress_bar = tqdm(keywords) + progress_bar = tqdm(KEYWORDS) progress_bar.set_description("Scraping websites...") for keyword in progress_bar: timestamp = datetime.now() advertisements = crawl_search_index_for_keyword(keyword) - formatted_timestamp = timestamp.astimezone().replace(microsecond=0).isoformat() + formatted_timestamp = timestamp.astimezone().replace(microsecond=0).isoformat().replace(':', '-') save_advertisements_to_csv(advertisements, f'{formatted_timestamp}-{keyword}.csv')