317 lines
10 KiB
Python
Executable File
317 lines
10 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
from datetime import timedelta, date, datetime
|
|
from csv import DictWriter
|
|
|
|
import gzip
|
|
import logging
|
|
import re
|
|
import urllib.request
|
|
import urllib.error
|
|
|
|
|
|
logging.basicConfig()
|
|
logger = logging.getLogger("rebuild-debian-csv")
|
|
logger.setLevel(logging.DEBUG)
|
|
|
|
|
|
class Release:
|
|
def __init__(self, fileobj):
|
|
params = {}
|
|
for line in fileobj:
|
|
line = line.decode('utf-8').strip()
|
|
# Header of "Release" finishes at:
|
|
# "MD5Sum:" in Debian/Ubuntu
|
|
# "SHA256:" in Devuan
|
|
if line == "MD5Sum:" or line == "SHA256:":
|
|
break
|
|
|
|
k, v = line.split(": ", 1)
|
|
params[k] = v
|
|
|
|
# In Release files,
|
|
# e.g. https://ftp.debian.org/debian/dists/stable/Release
|
|
# "Origin" is Debian/Ubuntu/Devuan as expected.
|
|
# "Origin" = "Label" for Debian and Ubuntu, not always for Devuan.
|
|
# "Label" is "Debian"/"Ubuntu" for Debian/Ubuntu.
|
|
# "Label" is "Devuan" or "Master" for Devuan.
|
|
# "Label" of "Master" has no equivalent in Debian/Ubuntu.
|
|
#
|
|
# Where this program uses "label" it really wants "origin".
|
|
self.origin = params.get("Origin")
|
|
self.label = self.origin
|
|
|
|
self.suite = params.get("Suite")
|
|
self.version = params.get("Version")
|
|
self.codename = params.get("Codename")
|
|
self.architectures = params.get("Architectures", "").split(" ")
|
|
|
|
SUITE_TO_VERSION = {
|
|
"testing": "96",
|
|
"unstable": "97",
|
|
"experimental": "98",
|
|
}
|
|
self.sortkey = self.label + \
|
|
(self.version or SUITE_TO_VERSION.get(self.suite, "99"))
|
|
|
|
def __repr__(self):
|
|
name = self.label
|
|
if self.version and self.suite and self.suite != self.codename:
|
|
name += f" {self.suite}/{self.version}"
|
|
elif self.version:
|
|
name += f" {self.version}"
|
|
elif self.suite:
|
|
name += f" {self.suite}"
|
|
if self.is_lts():
|
|
name += " LTS"
|
|
if self.codename:
|
|
name += f" (\"{self.codename}\")"
|
|
|
|
return name
|
|
|
|
def __eq__(self, other):
|
|
return repr(self) == repr(other)
|
|
|
|
def __lt__(self, other):
|
|
return self.sortkey < other.sortkey
|
|
|
|
def __hash__(self):
|
|
return hash(repr(self))
|
|
|
|
def release_date(self):
|
|
if self.label == "Ubuntu" and self.version:
|
|
try:
|
|
return datetime.strptime(self.version, "%y.%m").date()
|
|
except ValueError as e:
|
|
logger.warning("Can't parse calver %s: %s", self.version, e)
|
|
|
|
def is_lts(self):
|
|
release_date = self.release_date()
|
|
if release_date:
|
|
return release_date.year % 2 == 0 and release_date.month == 4
|
|
else:
|
|
return False
|
|
|
|
def age(self):
|
|
release_date = self.release_date()
|
|
if release_date:
|
|
return date.today() - release_date
|
|
|
|
def is_relevant(self):
|
|
if self.label not in ("Debian", "Ubuntu", "Devuan", ):
|
|
return False
|
|
|
|
if self.label == "Debian" or self.label == "Ubuntu":
|
|
bl1 = ("oldoldstable", "devel", )
|
|
if self.suite in bl1:
|
|
return False
|
|
|
|
bl2 = ("-updates", "-backports", "-security", "-proposed", "-sloppy", )
|
|
if any(self.suite.endswith(suffix) for suffix in bl2):
|
|
return False
|
|
|
|
if self.label == "Devuan":
|
|
# "oldoldstable" is maintained in Devuan.
|
|
# These are no longer maintained.
|
|
bl_ = ("jessie", "ascii", )
|
|
if self.suite in bl_:
|
|
return False
|
|
|
|
# For fine-grained control:
|
|
bl_ = ("-backports", "-security", "-proposed-updates", )
|
|
if any(self.suite.endswith(suffix) for suffix in bl_):
|
|
return False
|
|
|
|
if self.label == "Ubuntu":
|
|
if self.is_lts():
|
|
return self.age() < 5 * timedelta(days=365)
|
|
else:
|
|
return self.age() < timedelta(days=365)
|
|
|
|
return True
|
|
|
|
def is_experimental(self):
|
|
if self.label == "Debian" and self.suite == "experimental":
|
|
return True
|
|
if self.label == "Ubuntu" and self.age() < timedelta(days=0):
|
|
return True
|
|
if self.label == "Devuan" and self.suite == "experimental":
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
"""
|
|
# Note: get_releases(url) is deprecated because it can not work with Devuan.
|
|
# Instead, use get_dist_releases(url).
|
|
#
|
|
# get_releases(url) requires the file ls-lR.gz to be present.
|
|
# This is not available in Devuan.
|
|
#
|
|
def get_releases(url):
|
|
dirlinepattern = re.compile(
|
|
r"\.(/dists/[\w\-]+):"
|
|
)
|
|
|
|
filelinepattern = re.compile(
|
|
r"([\w\-]+)" # permissions (1)
|
|
r"\s+"
|
|
r"(\d+)" # inodes (2)
|
|
r"\s+"
|
|
r"(\w+)" # user (3)
|
|
r"\s+"
|
|
r"(\w+)" # group (4)
|
|
r"\s+"
|
|
r"(\d+)" # size (5)
|
|
r"\s+"
|
|
r"(\w+\s+\d+\s+\d+:\d+|\w+\s+\d+\s+\d+)" # datetime (6)
|
|
r"\s+"
|
|
r"(.*)" # filename (7)
|
|
)
|
|
|
|
listurl = url + "/ls-lR.gz"
|
|
with gzip.open(urllib.request.urlopen(listurl), "rt") as f:
|
|
logger.debug("Downloaded %s", listurl)
|
|
|
|
while f:
|
|
try:
|
|
dirnameline = next(f).strip()
|
|
assert dirnameline.startswith(".")
|
|
totalline = next(f).strip()
|
|
assert totalline.startswith("total ")
|
|
except StopIteration:
|
|
break
|
|
|
|
skipdir = True
|
|
dirlinematch = dirlinepattern.fullmatch(dirnameline)
|
|
if dirlinematch:
|
|
debiandir = dirlinematch.group(1)
|
|
skipdir = False
|
|
|
|
for a in f:
|
|
fileline = a.strip()
|
|
if fileline == "":
|
|
break
|
|
if skipdir:
|
|
continue
|
|
|
|
filelinematch = filelinepattern.fullmatch(fileline)
|
|
if not filelinematch:
|
|
continue
|
|
filename = filelinematch.group(7)
|
|
if filename == "Release" or filename.startswith("Release ->"):
|
|
relurl = url + debiandir + "/Release"
|
|
try:
|
|
with urllib.request.urlopen(relurl) as u:
|
|
logger.debug("Downloaded %s", relurl)
|
|
|
|
yield Release(u)
|
|
except urllib.error.URLError as e:
|
|
logger.warning("Failed to download %s: %s", relurl, e)
|
|
"""
|
|
|
|
|
|
def get_dist_releases(url):
|
|
|
|
from lxml import html
|
|
|
|
# Open the web page listurl and use an xpath to extract the dist names.
|
|
listurl = url + "/dists/"
|
|
|
|
try:
|
|
tree = html.fromstring(urllib.request.urlopen(listurl).read())
|
|
logger.debug("Downloaded %s", listurl)
|
|
except urllib.error.URLError as e:
|
|
logger.warning("Failed to download %s: %s", listurl, e)
|
|
else:
|
|
# Extract dist names from the web links.
|
|
"""
|
|
Finds <a href="{debiandir}"> in the web page.
|
|
|
|
Using Xpath 1.0:
|
|
matches: buster/, daedalus/, noble/, oldstable, stable/, unstable/
|
|
does not match: ../, /debian/, daedalus-updates/, 6.0/
|
|
|
|
|
|
The chosen xpath:
|
|
|
|
dist_path = "//a[contains(@href,'/') \
|
|
and not(starts-with(@href,'/')) \
|
|
and not(contains(@href,'-')) \
|
|
and not(contains(@href,'.')) \
|
|
]/@href"
|
|
|
|
will select only hrefs which are:
|
|
not, e.g., /debian/ not(starts-with(@href,'/'))
|
|
directories contains(@href,'/')
|
|
codenames not(contains(@href,'-'))
|
|
not numbers or ../ not(contains(@href,'.'))
|
|
|
|
This excludes all "-updates", "-backports", "-security", "-proposed", etc.
|
|
"""
|
|
dist_path = "//a[contains(@href,'/') \
|
|
and not(starts-with(@href,'/')) \
|
|
and not(contains(@href,'-')) \
|
|
and not(contains(@href,'.')) \
|
|
]/@href"
|
|
|
|
dist_names = tree.xpath(dist_path)
|
|
|
|
for debiandir in dist_names:
|
|
relurl = listurl + debiandir + "Release"
|
|
|
|
try:
|
|
with urllib.request.urlopen(relurl) as u:
|
|
logger.debug("Downloaded %s", relurl)
|
|
|
|
yield Release(u)
|
|
except urllib.error.URLError as e:
|
|
logger.warning("Failed to download %s: %s", relurl, e)
|
|
|
|
|
|
def write_csv(filename, releases, archs):
|
|
with open(filename, "w", newline="") as f:
|
|
w = DictWriter(f, fieldnames=("OS", "Dist", "Arch", "Name", "Exp", ))
|
|
w.writeheader()
|
|
|
|
for r in releases:
|
|
if not r.is_relevant():
|
|
logger.debug("Discarding as not relevant: %s ", repr(r))
|
|
continue
|
|
|
|
for arch in archs:
|
|
if arch not in r.architectures:
|
|
continue
|
|
|
|
dist = r.codename.lower()
|
|
if dist == "rc-buggy":
|
|
dist = "experimental"
|
|
|
|
w.writerow({
|
|
"OS": r.label.lower(),
|
|
"Dist": dist,
|
|
"Arch": arch,
|
|
"Name": repr(r),
|
|
"Exp": r.is_experimental(),
|
|
})
|
|
logger.debug("Wrote %s to file %s", r, filename)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
logger.info("Downloading releases...")
|
|
debianreleases = set(get_dist_releases("http://ftp.debian.org/debian"))
|
|
assert len(debianreleases) > 0
|
|
ubuntureleases = set(get_dist_releases("http://ftp.ubuntu.com/ubuntu"))
|
|
assert len(ubuntureleases) > 0
|
|
devuanreleases = set(get_dist_releases("http://deb.devuan.org/merged"))
|
|
assert len(devuanreleases) > 0
|
|
releases = list(sorted(debianreleases | ubuntureleases | devuanreleases))
|
|
assert len(releases) > 0
|
|
logger.info("Found %d releases", len(releases))
|
|
|
|
write_csv("debians-arm.csv", releases, ("armhf", "arm64"))
|
|
logger.info("Wrote debians-arm.csv")
|
|
|
|
write_csv("debians-x86.csv", releases, ("i386", "amd64"))
|
|
logger.info("Wrote debians-x86.csv")
|