rebuild-debian-csv: Switch from FTP to HTTP
This commit is contained in:
parent
14091f59d6
commit
559316ff5d
|
|
@ -1,11 +1,13 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
from datetime import timedelta, date, datetime
|
from datetime import timedelta, date, datetime
|
||||||
from io import BytesIO
|
|
||||||
from csv import DictWriter
|
from csv import DictWriter
|
||||||
|
|
||||||
import ftplib
|
import gzip
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
|
import urllib.request
|
||||||
|
import urllib.error
|
||||||
|
|
||||||
|
|
||||||
logging.basicConfig()
|
logging.basicConfig()
|
||||||
|
|
@ -102,28 +104,66 @@ class Release:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def get_releases():
|
def get_releases(url):
|
||||||
ftp = ftplib.FTP("mirror.nl.leaseweb.net")
|
dirlinepattern = re.compile(
|
||||||
ftp.login()
|
r"\.(/dists/[\w\-]+):"
|
||||||
logger.debug("Connected to FTP")
|
)
|
||||||
|
|
||||||
for distdir in ("/debian/dists", "/ubuntu/dists", ):
|
filelinepattern = re.compile(
|
||||||
ftp.cwd(distdir)
|
r"([\w\-]+)" # permissions (1)
|
||||||
distsubdirs = ftp.nlst()
|
r"\s+"
|
||||||
assert len(distsubdirs) > 0
|
r"(\d+)" # inodes (2)
|
||||||
logger.debug("Found %d items in %s", len(distsubdirs), distdir)
|
r"\s+"
|
||||||
|
r"(\w+)" # user (3)
|
||||||
|
r"\s+"
|
||||||
|
r"(\w+)" # group (4)
|
||||||
|
r"\s+"
|
||||||
|
r"(\d+)" # size (5)
|
||||||
|
r"\s+"
|
||||||
|
r"(\w+\s+\d+\s+\d+:\d+|\w+\s+\d+\s+\d+)" # datetime (6)
|
||||||
|
r"\s+"
|
||||||
|
r"(.*)" # filename (7)
|
||||||
|
)
|
||||||
|
|
||||||
for x in distsubdirs:
|
listurl = url + "/ls-lR.gz"
|
||||||
data = BytesIO()
|
with gzip.open(urllib.request.urlopen(listurl), "rt") as f:
|
||||||
|
logger.debug("Downloaded %s", listurl)
|
||||||
|
|
||||||
|
while f:
|
||||||
try:
|
try:
|
||||||
ftp.retrbinary(f"RETR {x}/Release", data.write)
|
dirnameline = next(f).strip()
|
||||||
assert data.tell() > 0
|
assert dirnameline.startswith(".")
|
||||||
data.seek(0)
|
totalline = next(f).strip()
|
||||||
logger.debug("Downloaded %s/%s/Release", distdir, x)
|
assert totalline.startswith("total ")
|
||||||
|
except StopIteration:
|
||||||
|
break
|
||||||
|
|
||||||
yield Release(data)
|
skipdir = True
|
||||||
except ftplib.error_perm:
|
dirlinematch = dirlinepattern.fullmatch(dirnameline)
|
||||||
pass
|
if dirlinematch:
|
||||||
|
debiandir = dirlinematch.group(1)
|
||||||
|
skipdir = False
|
||||||
|
|
||||||
|
for a in f:
|
||||||
|
fileline = a.strip()
|
||||||
|
if fileline == "":
|
||||||
|
break
|
||||||
|
if skipdir:
|
||||||
|
continue
|
||||||
|
|
||||||
|
filelinematch = filelinepattern.fullmatch(fileline)
|
||||||
|
if not filelinematch:
|
||||||
|
continue
|
||||||
|
filename = filelinematch.group(7)
|
||||||
|
if filename == "Release" or filename.startswith("Release ->"):
|
||||||
|
relurl = url + debiandir + "/Release"
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(relurl) as u:
|
||||||
|
logger.debug("Downloaded %s", relurl)
|
||||||
|
|
||||||
|
yield Release(u)
|
||||||
|
except urllib.error.URLError as e:
|
||||||
|
logger.warning("Failed to download %s: %s", relurl, e)
|
||||||
|
|
||||||
|
|
||||||
def write_csv(filename, releases, archs):
|
def write_csv(filename, releases, archs):
|
||||||
|
|
@ -155,7 +195,11 @@ def write_csv(filename, releases, archs):
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
logger.info("Downloading releases...")
|
logger.info("Downloading releases...")
|
||||||
releases = list(sorted(set(get_releases())))
|
debianreleases = set(get_releases("http://ftp.debian.org/debian"))
|
||||||
|
assert len(debianreleases) > 0
|
||||||
|
ubuntureleases = set(get_releases("http://ftp.ubuntu.com/ubuntu"))
|
||||||
|
assert len(ubuntureleases) > 0
|
||||||
|
releases = list(sorted(debianreleases | ubuntureleases))
|
||||||
assert len(releases) > 0
|
assert len(releases) > 0
|
||||||
logger.info("Found %d releases", len(releases))
|
logger.info("Found %d releases", len(releases))
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue