rebuild-debian-csv: Use dist names not ls-lR.gz.
sd-card-images CI / test ${{ matrix.os }} ${{ matrix.arch }} ${{ matrix.suite }} (i386, debian, bullseye) (push) Waiting to run Details
sd-card-images CI / test ${{ matrix.os }} ${{ matrix.arch }} ${{ matrix.suite }} (i386, debian, sid) (push) Waiting to run Details
sd-card-images CI / test ${{ matrix.os }} ${{ matrix.arch }} ${{ matrix.suite }} (i386, debian, trixie) (push) Waiting to run Details
sd-card-images CI / build docker images (push) Blocked by required conditions Details
sd-card-images CI / test ${{ matrix.os }} ${{ matrix.arch }} ${{ matrix.suite }} (i386, debian, bookworm) (push) Has been cancelled Details

Get codenames from the package repository's web page, because
Devuan does not have an ls-lR.gz file.

Uses python3-lxml.
This commit is contained in:
david 2024-05-12 17:21:07 +01:00
parent 6ee61b294c
commit 4d56c211b7
1 changed files with 74 additions and 6 deletions

View File

@ -19,11 +19,13 @@ class Release:
def __init__(self, fileobj):
params = {}
for line in fileobj:
line = line.decode("utf-8")
if line.startswith(" ") or ": " not in line:
continue
line = line.decode('utf-8').strip()
# Header of "Release" finishes at:
# "MD5Sum:" Debian/Ubuntu
if line == "MD5Sum:":
break
k, v = line.strip().split(": ", 1)
k, v = line.split(": ", 1)
params[k] = v
self.label = params.get("Label")
@ -112,6 +114,13 @@ class Release:
return False
"""
# Note: get_releases(url) is deprecated because it can not work with Devuan.
# Instead, use get_dist_releases(url).
#
# get_releases(url) requires the file ls-lR.gz to be present.
# This is not available in Devuan.
#
def get_releases(url):
dirlinepattern = re.compile(
r"\.(/dists/[\w\-]+):"
@ -172,6 +181,65 @@ def get_releases(url):
yield Release(u)
except urllib.error.URLError as e:
logger.warning("Failed to download %s: %s", relurl, e)
"""
def get_dist_releases(url):
from lxml import html
# Open the web page listurl and use an xpath to extract the dist names.
listurl = url + "/dists/"
try:
tree = html.fromstring(urllib.request.urlopen(listurl).read())
logger.debug("Downloaded %s", listurl)
except urllib.error.URLError as e:
logger.warning("Failed to download %s: %s", listurl, e)
else:
# Extract dist names from the web links.
"""
Finds <a href="{debiandir}"> in the web page.
Using Xpath 1.0:
matches: buster/, daedalus/, noble/, oldstable, stable/, unstable/
does not match: ../, /debian/, daedalus-updates/, 6.0/
The chosen xpath:
dist_path = "//a[contains(@href,'/') \
and not(starts-with(@href,'/')) \
and not(contains(@href,'-')) \
and not(contains(@href,'.')) \
]/@href"
will select only hrefs which are:
not, e.g., /debian/ not(starts-with(@href,'/'))
directories contains(@href,'/')
codenames not(contains(@href,'-'))
not numbers or ../ not(contains(@href,'.'))
This excludes all "-updates", "-backports", "-security", "-proposed", etc.
"""
dist_path = "//a[contains(@href,'/') \
and not(starts-with(@href,'/')) \
and not(contains(@href,'-')) \
and not(contains(@href,'.')) \
]/@href"
dist_names = tree.xpath(dist_path)
for debiandir in dist_names:
relurl = listurl + debiandir + "Release"
try:
with urllib.request.urlopen(relurl) as u:
logger.debug("Downloaded %s", relurl)
yield Release(u)
except urllib.error.URLError as e:
logger.warning("Failed to download %s: %s", relurl, e)
def write_csv(filename, releases, archs):
@ -203,9 +271,9 @@ def write_csv(filename, releases, archs):
if __name__ == "__main__":
logger.info("Downloading releases...")
debianreleases = set(get_releases("http://ftp.debian.org/debian"))
debianreleases = set(get_dist_releases("http://ftp.debian.org/debian"))
assert len(debianreleases) > 0
ubuntureleases = set(get_releases("http://ftp.ubuntu.com/ubuntu"))
ubuntureleases = set(get_dist_releases("http://ftp.ubuntu.com/ubuntu"))
assert len(ubuntureleases) > 0
releases = list(sorted(debianreleases | ubuntureleases))
assert len(releases) > 0