- Python
requests
for sending HTTP requestsBeautifulSoup
for parsing HTMLpandas
for organizing and exporting dataFlask
(optional) to turn it into a basic API
import requests
from bs4 import BeautifulSoup
import pandas as pd
def scrape_website(url):
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
data = []
for item in soup.select('.some-class'):
data.append(item.text.strip())
df = pd.DataFrame(data, columns=['Extracted Data'])
df.to_csv('output.csv', index=False)
print("Data saved to output.csv")
else:
print("Failed to retrieve data")
scrape_website('https://example.com')