sd-card-images/metascripts/rebuild-debian-csv

317 lines
10 KiB
Python
Executable File

#!/usr/bin/env python3
from datetime import timedelta, date, datetime
from csv import DictWriter
import gzip
import logging
import re
import urllib.request
import urllib.error
logging.basicConfig()
logger = logging.getLogger("rebuild-debian-csv")
logger.setLevel(logging.DEBUG)
class Release:
def __init__(self, fileobj):
params = {}
for line in fileobj:
line = line.decode('utf-8').strip()
# Header of "Release" finishes at:
# "MD5Sum:" in Debian/Ubuntu
# "SHA256:" in Devuan
if line == "MD5Sum:" or line == "SHA256:":
break
k, v = line.split(": ", 1)
params[k] = v
# In Release files,
# e.g. https://ftp.debian.org/debian/dists/stable/Release
# "Origin" is Debian/Ubuntu/Devuan as expected.
# "Origin" = "Label" for Debian and Ubuntu, not always for Devuan.
# "Label" is "Debian"/"Ubuntu" for Debian/Ubuntu.
# "Label" is "Devuan" or "Master" for Devuan.
# "Label" of "Master" has no equivalent in Debian/Ubuntu.
#
# Where this program uses "label" it really wants "origin".
self.origin = params.get("Origin")
self.label = self.origin
self.suite = params.get("Suite")
self.version = params.get("Version")
self.codename = params.get("Codename")
self.architectures = params.get("Architectures", "").split(" ")
SUITE_TO_VERSION = {
"testing": "96",
"unstable": "97",
"experimental": "98",
}
self.sortkey = self.label + \
(self.version or SUITE_TO_VERSION.get(self.suite, "99"))
def __repr__(self):
name = self.label
if self.version and self.suite and self.suite != self.codename:
name += f" {self.suite}/{self.version}"
elif self.version:
name += f" {self.version}"
elif self.suite:
name += f" {self.suite}"
if self.is_lts():
name += " LTS"
if self.codename:
name += f" (\"{self.codename}\")"
return name
def __eq__(self, other):
return repr(self) == repr(other)
def __lt__(self, other):
return self.sortkey < other.sortkey
def __hash__(self):
return hash(repr(self))
def release_date(self):
if self.label == "Ubuntu" and self.version:
try:
return datetime.strptime(self.version, "%y.%m").date()
except ValueError as e:
logger.warning("Can't parse calver %s: %s", self.version, e)
def is_lts(self):
release_date = self.release_date()
if release_date:
return release_date.year % 2 == 0 and release_date.month == 4
else:
return False
def age(self):
release_date = self.release_date()
if release_date:
return date.today() - release_date
def is_relevant(self):
if self.label not in ("Debian", "Ubuntu", "Devuan", ):
return False
if self.label == "Debian" or self.label == "Ubuntu":
bl1 = ("oldoldstable", "devel", )
if self.suite in bl1:
return False
bl2 = ("-updates", "-backports", "-security", "-proposed", "-sloppy", )
if any(self.suite.endswith(suffix) for suffix in bl2):
return False
if self.label == "Devuan":
# "oldoldstable" is maintained in Devuan.
# These are no longer maintained.
bl_ = ("jessie", "ascii", )
if self.suite in bl_:
return False
# For fine-grained control:
bl_ = ("-backports", "-security", "-proposed-updates", )
if any(self.suite.endswith(suffix) for suffix in bl_):
return False
if self.label == "Ubuntu":
if self.is_lts():
return self.age() < 5 * timedelta(days=365)
else:
return self.age() < timedelta(days=365)
return True
def is_experimental(self):
if self.label == "Debian" and self.suite == "experimental":
return True
if self.label == "Ubuntu" and self.age() < timedelta(days=0):
return True
if self.label == "Devuan" and self.suite == "experimental":
return True
return False
"""
# Note: get_releases(url) is deprecated because it can not work with Devuan.
# Instead, use get_dist_releases(url).
#
# get_releases(url) requires the file ls-lR.gz to be present.
# This is not available in Devuan.
#
def get_releases(url):
dirlinepattern = re.compile(
r"\.(/dists/[\w\-]+):"
)
filelinepattern = re.compile(
r"([\w\-]+)" # permissions (1)
r"\s+"
r"(\d+)" # inodes (2)
r"\s+"
r"(\w+)" # user (3)
r"\s+"
r"(\w+)" # group (4)
r"\s+"
r"(\d+)" # size (5)
r"\s+"
r"(\w+\s+\d+\s+\d+:\d+|\w+\s+\d+\s+\d+)" # datetime (6)
r"\s+"
r"(.*)" # filename (7)
)
listurl = url + "/ls-lR.gz"
with gzip.open(urllib.request.urlopen(listurl), "rt") as f:
logger.debug("Downloaded %s", listurl)
while f:
try:
dirnameline = next(f).strip()
assert dirnameline.startswith(".")
totalline = next(f).strip()
assert totalline.startswith("total ")
except StopIteration:
break
skipdir = True
dirlinematch = dirlinepattern.fullmatch(dirnameline)
if dirlinematch:
debiandir = dirlinematch.group(1)
skipdir = False
for a in f:
fileline = a.strip()
if fileline == "":
break
if skipdir:
continue
filelinematch = filelinepattern.fullmatch(fileline)
if not filelinematch:
continue
filename = filelinematch.group(7)
if filename == "Release" or filename.startswith("Release ->"):
relurl = url + debiandir + "/Release"
try:
with urllib.request.urlopen(relurl) as u:
logger.debug("Downloaded %s", relurl)
yield Release(u)
except urllib.error.URLError as e:
logger.warning("Failed to download %s: %s", relurl, e)
"""
def get_dist_releases(url):
from lxml import html
# Open the web page listurl and use an xpath to extract the dist names.
listurl = url + "/dists/"
try:
tree = html.fromstring(urllib.request.urlopen(listurl).read())
logger.debug("Downloaded %s", listurl)
except urllib.error.URLError as e:
logger.warning("Failed to download %s: %s", listurl, e)
else:
# Extract dist names from the web links.
"""
Finds <a href="{debiandir}"> in the web page.
Using Xpath 1.0:
matches: buster/, daedalus/, noble/, oldstable, stable/, unstable/
does not match: ../, /debian/, daedalus-updates/, 6.0/
The chosen xpath:
dist_path = "//a[contains(@href,'/') \
and not(starts-with(@href,'/')) \
and not(contains(@href,'-')) \
and not(contains(@href,'.')) \
]/@href"
will select only hrefs which are:
not, e.g., /debian/ not(starts-with(@href,'/'))
directories contains(@href,'/')
codenames not(contains(@href,'-'))
not numbers or ../ not(contains(@href,'.'))
This excludes all "-updates", "-backports", "-security", "-proposed", etc.
"""
dist_path = "//a[contains(@href,'/') \
and not(starts-with(@href,'/')) \
and not(contains(@href,'-')) \
and not(contains(@href,'.')) \
]/@href"
dist_names = tree.xpath(dist_path)
for debiandir in dist_names:
relurl = listurl + debiandir + "Release"
try:
with urllib.request.urlopen(relurl) as u:
logger.debug("Downloaded %s", relurl)
yield Release(u)
except urllib.error.URLError as e:
logger.warning("Failed to download %s: %s", relurl, e)
def write_csv(filename, releases, archs):
with open(filename, "w", newline="") as f:
w = DictWriter(f, fieldnames=("OS", "Dist", "Arch", "Name", "Exp", ))
w.writeheader()
for r in releases:
if not r.is_relevant():
logger.debug("Discarding as not relevant: %s ", repr(r))
continue
for arch in archs:
if arch not in r.architectures:
continue
dist = r.codename.lower()
if dist == "rc-buggy":
dist = "experimental"
w.writerow({
"OS": r.label.lower(),
"Dist": dist,
"Arch": arch,
"Name": repr(r),
"Exp": r.is_experimental(),
})
logger.debug("Wrote %s to file %s", r, filename)
if __name__ == "__main__":
logger.info("Downloading releases...")
debianreleases = set(get_dist_releases("http://ftp.debian.org/debian"))
assert len(debianreleases) > 0
ubuntureleases = set(get_dist_releases("http://ftp.ubuntu.com/ubuntu"))
assert len(ubuntureleases) > 0
devuanreleases = set(get_dist_releases("http://deb.devuan.org/merged"))
assert len(devuanreleases) > 0
releases = list(sorted(debianreleases | ubuntureleases | devuanreleases))
assert len(releases) > 0
logger.info("Found %d releases", len(releases))
write_csv("debians-arm.csv", releases, ("armhf", "arm64"))
logger.info("Wrote debians-arm.csv")
write_csv("debians-x86.csv", releases, ("i386", "amd64"))
logger.info("Wrote debians-x86.csv")