Refined the Release class and get_dist_releases
Now uses the webpage for dist information rather than the large ls-lR.gz that needed unpacking.
This commit is contained in:
parent
31b31c2bd7
commit
d62ab37036
|
|
@ -19,14 +19,22 @@ class Release:
|
|||
def __init__(self, fileobj):
|
||||
params = {}
|
||||
for line in fileobj:
|
||||
line = line.decode("utf-8")
|
||||
if line.startswith(" ") or ": " not in line:
|
||||
continue
|
||||
line = line.decode('utf-8').strip()
|
||||
if line == "MD5Sum:" or line == "SHA256:":
|
||||
break
|
||||
|
||||
k, v = line.strip().split(": ", 1)
|
||||
k, v = line.split(": ", 1)
|
||||
params[k] = v
|
||||
|
||||
self.label = params.get("Label")
|
||||
# "Origin" is Debian/Ubuntu/Devuan as expected.
|
||||
# "Origin" = "Label" for Debian and Ubuntu, not always for Devuan.
|
||||
# "Label" is "Debian"/"Ubuntu" for Debian/Ubuntu.
|
||||
# "Label" is "Devuan" or "Master" for Devuan.
|
||||
# "Label" of "Master" has no equivalent in Debian/Ubuntu.
|
||||
# Where this program uses "label" it really wants "origin".
|
||||
self.origin = params.get("Origin")
|
||||
self.label = self.origin
|
||||
|
||||
self.suite = params.get("Suite")
|
||||
self.version = params.get("Version")
|
||||
self.codename = params.get("Codename")
|
||||
|
|
@ -84,16 +92,29 @@ class Release:
|
|||
return date.today() - release_date
|
||||
|
||||
def is_relevant(self):
|
||||
if self.label not in ("Debian", "Ubuntu", ):
|
||||
if self.label not in ("Debian", "Ubuntu", "Devuan", ):
|
||||
return False
|
||||
|
||||
bl1 = ("oldoldstable", "devel", )
|
||||
if self.suite in bl1:
|
||||
return False
|
||||
if self.label == "Debian" or self.label == "Ubuntu":
|
||||
bl1 = ("oldoldstable", "devel", )
|
||||
if self.suite in bl1:
|
||||
return False
|
||||
|
||||
bl2 = ("-updates", "-backports", "-security", "-proposed", "-sloppy", )
|
||||
if any(self.suite.endswith(suffix) for suffix in bl2):
|
||||
return False
|
||||
bl2 = ("-updates", "-backports", "-security", "-proposed", "-sloppy", )
|
||||
if any(self.suite.endswith(suffix) for suffix in bl2):
|
||||
return False
|
||||
|
||||
if self.label == "Devuan":
|
||||
# "oldoldstable" is maintained in Devuan.
|
||||
# These are no longer maintained.
|
||||
bl_ = ("jessie", "ascii", )
|
||||
if self.suite in bl_:
|
||||
return False
|
||||
|
||||
# For fine-grained control:
|
||||
bl_ = ("-backports", "-security", "-proposed-updates", )
|
||||
if any(self.suite.endswith(suffix) for suffix in bl_):
|
||||
return False
|
||||
|
||||
if self.label == "Ubuntu":
|
||||
if self.is_lts():
|
||||
|
|
@ -108,10 +129,19 @@ class Release:
|
|||
return True
|
||||
if self.label == "Ubuntu" and self.age() < timedelta(days=0):
|
||||
return True
|
||||
if self.label == "Devuan" and self.suite == "experimental":
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
"""
|
||||
# Note: get_releases(url) is deprecated because it can not work with Devuan.
|
||||
# Instead, use get_dist_releases(url).
|
||||
#
|
||||
# get_releases(url) requires the file ls-lR.gz to be present.
|
||||
# This is not available in Devuan.
|
||||
#
|
||||
def get_releases(url):
|
||||
dirlinepattern = re.compile(
|
||||
r"\.(/dists/[\w\-]+):"
|
||||
|
|
@ -172,6 +202,49 @@ def get_releases(url):
|
|||
yield Release(u)
|
||||
except urllib.error.URLError as e:
|
||||
logger.warning("Failed to download %s: %s", relurl, e)
|
||||
"""
|
||||
|
||||
|
||||
def get_dist_releases(url):
|
||||
|
||||
from lxml import html
|
||||
|
||||
# Open the web page listurl and use an xpath to extract the dist names.
|
||||
listurl = url + "/dists/"
|
||||
|
||||
try:
|
||||
tree = html.fromstring(urllib.request.urlopen(listurl).read())
|
||||
logger.debug("Downloaded %s", listurl)
|
||||
except urllib.error.URLError as e:
|
||||
logger.warning("Failed to download %s: %s", listurl, e)
|
||||
else:
|
||||
# Extract dist names from the web links.
|
||||
"""
|
||||
Finds <a href="{debiandir}"> in the web page.
|
||||
|
||||
Using Xpath 1.0:
|
||||
matches: buster/, daedalus/, noble/, oldstable, stable/, unstable/
|
||||
does not match: ../, /debian/, daedalus-updates/, 6.0/
|
||||
|
||||
"""
|
||||
dist_path = "//a[contains(@href,'/') \
|
||||
and not(starts-with(@href,'/')) \
|
||||
and not(contains(@href,'-')) \
|
||||
and not(contains(@href,'.')) \
|
||||
]/@href"
|
||||
|
||||
dist_names = tree.xpath(dist_path)
|
||||
|
||||
for debiandir in dist_names:
|
||||
relurl = listurl + debiandir + "Release"
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(relurl) as u:
|
||||
logger.debug("Downloaded %s", relurl)
|
||||
|
||||
yield Release(u)
|
||||
except urllib.error.URLError as e:
|
||||
logger.warning("Failed to download %s: %s", relurl, e)
|
||||
|
||||
|
||||
def write_csv(filename, releases, archs):
|
||||
|
|
@ -181,6 +254,7 @@ def write_csv(filename, releases, archs):
|
|||
|
||||
for r in releases:
|
||||
if not r.is_relevant():
|
||||
logger.debug("Discarding as not relevant: %s ", repr(r))
|
||||
continue
|
||||
|
||||
for arch in archs:
|
||||
|
|
@ -203,11 +277,13 @@ def write_csv(filename, releases, archs):
|
|||
|
||||
if __name__ == "__main__":
|
||||
logger.info("Downloading releases...")
|
||||
debianreleases = set(get_releases("http://ftp.debian.org/debian"))
|
||||
debianreleases = set(get_dist_releases("http://ftp.debian.org/debian"))
|
||||
assert len(debianreleases) > 0
|
||||
ubuntureleases = set(get_releases("http://ftp.ubuntu.com/ubuntu"))
|
||||
ubuntureleases = set(get_dist_releases("http://ftp.ubuntu.com/ubuntu"))
|
||||
assert len(ubuntureleases) > 0
|
||||
releases = list(sorted(debianreleases | ubuntureleases))
|
||||
devuanreleases = set(get_dist_releases("http://deb.devuan.org/merged"))
|
||||
assert len(devuanreleases) > 0
|
||||
releases = list(sorted(debianreleases | ubuntureleases | devuanreleases))
|
||||
assert len(releases) > 0
|
||||
logger.info("Found %d releases", len(releases))
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue