42 lines
939 B
Python
42 lines
939 B
Python
import requests
|
|
import json
|
|
from bs4 import BeautifulSoup
|
|
import argparse
|
|
|
|
# Create the parser
|
|
parser = argparse.ArgumentParser(description='Scrape Debian package information from a URL.')
|
|
|
|
# Add the URL argument
|
|
parser.add_argument('url', type=str, help='The URL to scrape Debian package information from.')
|
|
|
|
# Parse the arguments
|
|
args = parser.parse_args()
|
|
|
|
# Use the parsed URL
|
|
url = args.url
|
|
response = requests.get(url)
|
|
html_content = response.text
|
|
|
|
soup = BeautifulSoup(html_content, 'html.parser')
|
|
data = []
|
|
|
|
# Find all the table rows
|
|
rows = soup.find_all('tr')
|
|
|
|
for row in rows:
|
|
cells = row.find_all('td')
|
|
if cells:
|
|
name = cells[1].text.strip()
|
|
last_modified = cells[2].text.strip()
|
|
size = cells[3].text.strip()
|
|
|
|
data.append({
|
|
"Name": name,
|
|
"Last modified": last_modified,
|
|
"Size": size
|
|
})
|
|
|
|
json_data = json.dumps(data, indent=2)
|
|
print(json_data)
|
|
|