diff --git a/web-scrapper/README.md b/web-scrapper/README.md new file mode 100644 index 0000000..866ad24 --- /dev/null +++ b/web-scrapper/README.md @@ -0,0 +1,38 @@ +# Markdown to HTML Converter + +## Introduction +This GUI application simplifies the process of web scraping by allowing users to input website URLs, select specific data elements to scrape, customize scraping parameters, and manage the extracted data effortlessly. Users can also download the scraped data in various formats. + +## Usage +1. Install dependencies: + ```bash + pip install beatifulsoup4 requests + ``` + +2. Run the scraper: + ```bash + python web_scraper.py + ``` + +3. The downloaded data will be generated with the same name as the file name web_scraper.csv + +## Example +![image](gg.png) + +To run scraper: +```bash +python web_scraper.py +``` + +The downloaded file `web_scrapery.csv` will be generated. + +## Dependencies +Make sure you have the necessary libraries installed. You can install BeautifulSoup and requests using pip + ```bash +pip install beatifulsoup4 reuests +``` +## How it Works +This will execute the script and launch the web scraper GUI. You can input a URL, click the "Scrape Data" button to scrape the data, and then click the "Download Data" button to save the scraped data to a CSV file. + +## Contribution +Contributions are welcome! Feel free to submit issues or pull requests. diff --git a/web-scrapper/gg.png b/web-scrapper/gg.png new file mode 100644 index 0000000..71f1a9d Binary files /dev/null and b/web-scrapper/gg.png differ diff --git a/web-scrapper/web_scraper.py b/web-scrapper/web_scraper.py new file mode 100644 index 0000000..833c92b --- /dev/null +++ b/web-scrapper/web_scraper.py @@ -0,0 +1,56 @@ +import tkinter as tk +from tkinter import ttk, messagebox +from bs4 import BeautifulSoup +import requests +import csv + +class WebScraperApp: + def __init__(self, root): + self.root = root + self.root.title("Web Scraper") + + self.url_label = ttk.Label(root, text="Enter URL:") + self.url_label.grid(row=0, column=0, padx=10, pady=10) + self.url_entry = ttk.Entry(root, width=50) + self.url_entry.grid(row=0, column=1, padx=10, pady=10) + + self.scrape_button = ttk.Button(root, text="Scrape Data", command=self.scrape_data) + self.scrape_button.grid(row=0, column=2, padx=10, pady=10) + + self.data_text = tk.Text(root, height=20, width=80) + self.data_text.grid(row=1, column=0, columnspan=3, padx=10, pady=10) + + self.download_button = ttk.Button(root, text="Download Data", command=self.download_data) + self.download_button.grid(row=2, column=0, columnspan=3, padx=10, pady=10) + + def scrape_data(self): + url = self.url_entry.get() + try: + response = requests.get(url) + soup = BeautifulSoup(response.content, "html.parser") + + # Here, you can customize the data scraping based on the website structure + # For demonstration purposes, we'll simply extract all text data + data = soup.get_text() + + # Display scraped data in the text box + self.data_text.delete(1.0, tk.END) + self.data_text.insert(tk.END, data) + except Exception as e: + messagebox.showerror("Error", f"An error occurred: {str(e)}") + + def download_data(self): + data = self.data_text.get(1.0, tk.END) + if data.strip(): + filename = "scraped_data.csv" + with open(filename, "w", newline="", encoding="utf-8") as file: + file.write(data) + messagebox.showinfo("Success", f"Data has been saved to {filename}") + else: + messagebox.showwarning("No Data", "There is no data to download.") + + +if __name__ == "__main__": + root = tk.Tk() + app = WebScraperApp(root) + root.mainloop()