KiBot/kibot/out_download_datasheets.py

# -*- coding: utf-8 -*-
# Copyright (c) 2021 Salvador E. Tropea
# Copyright (c) 2021 Instituto Nacional de Tecnología Industrial
# License: GPL-3.0
# Project: KiBot (formerly KiPlot)
import os
import re
import requests
from .out_base import VariantOptions
from .fil_base import DummyFilter
from .error import KiPlotConfigurationError
from .misc import W_UNKFLD, W_ALRDOWN, W_FAILDL
from .gs import GS
from .macros import macros, document, output_class  # noqa: F401
from . import log
logger = log.get_logger()
USER_AGENT = 'Mozilla/5.0 (Windows NT 5.2; rv:2.0.1) Gecko/20100101 Firefox/4.0.1'


def is_url(ds):
    return ds.startswith('http://') or ds.startswith('https://')


class Download_Datasheets_Options(VariantOptions):
    _vars_regex = re.compile(r'\$\{([^\}]+)\}')

    def __init__(self):
        super().__init__()
        with document:
            self.field = 'Datasheet'
            """ *Name of the field containing the URL """
            self.output = '${VALUE}.pdf'
            """ Name used for the downloaded datasheet.
                ${FIELD} will be replaced by the FIELD content """
            self.dnf = False
            """ Include the DNF components """
            self.repeated = False
            """ Download URLs that we already downloaded.
                It only makes sense if the `output` field makes their output different """
            self.link_repeated = True
            """ Instead of download things we already downloaded use symlinks """
        # Used to collect the targets
        self._dry = False

    def config(self, parent):
        super().config(parent)
        if not self.field:
            raise KiPlotConfigurationError("Empty `field` ({})".format(str(self._tree)))
        if not self.output:
            raise KiPlotConfigurationError("Empty `output` ({})".format(str(self._tree)))
        self.field = self.field.lower()

    def download(self, c, ds, dir, name, known):
        dest = os.path.join(dir, name)
        logger.debug('To download: {} -> {}'.format(ds, dest))
        if name in self._downloaded:
            logger.warning(W_ALRDOWN+'Datasheet `{}` already downloaded'.format(name))
            return None
        elif known is not None and self.link_repeated:
            # We already downloaded this URL, but stored it with a different name
            if not self._dry:
                os.symlink(known, dest)
            self._created.append(os.path.relpath(dest))
        elif not os.path.isfile(dest):
            # Download
            if not self._dry:
                try:
                    r = requests.get(ds, allow_redirects=True, headers={'User-Agent': USER_AGENT}, timeout=20)
                except requests.exceptions.ReadTimeout:
                    logger.warning(W_FAILDL+'Timeout during download `{}`'.format(ds))
                    return None
                except requests.exceptions.ConnectionError:
                    logger.warning(W_FAILDL+'Connection error during download `{}`'.format(ds))
                    return None
                if r.status_code != 200:
                    logger.warning(W_FAILDL+'Failed to download `{}`'.format(ds))
                    return None
                with open(dest, 'wb') as f:
                    f.write(r.content)
            self._downloaded.add(name)
            self._created.append(os.path.relpath(dest))
        elif self._dry:
            self._created.append(os.path.relpath(dest))
        return name

    def out_name(self, c):
        """ Compute the name of the output file.
            Replaces ${FIELD} and %X. """
        out = ''
        last = 0
        pattern = self.output
        pattern_l = len(pattern)
        for match in Download_Datasheets_Options._vars_regex.finditer(pattern):
            fname = match.group(1).lower()
            value = c.get_field_value(fname)
            if value is None:
                value = 'Unknown'
                logger.warning(W_UNKFLD+"In datasheets download output file name:"
                               " Field `{}` not defined for {}, using `Unknown`".format(fname, c.ref))
            if match.start():
                out += pattern[last:match.start()]
            out += value
            last = match.end()
        if last < pattern_l:
            out += pattern[last:pattern_l]
        out = self.expand_filename_sch(out)
        return out.replace('/', '_')

    def run(self, output_dir):
        if not self.dnf_filter and not self.variant:
            # Add a dummy filter to force the creation of a components list
            self.dnf_filter = DummyFilter()
        super().run(output_dir)
        self._urls = {}
        self._downloaded = set()
        self._created = []
        field_used = False
        for c in self._comps:
            ds = c.get_field_value(self.field)
            if ds is not None:
                field_used = True
            if not c.included or (not c.fitted and not self.dnf):
                continue
            if ds and is_url(ds):
                known = self._urls.get(ds, None)
                if known is None or self.repeated:
                    name = self.out_name(c)
                    name = self.download(c, ds, output_dir, name, known)
                    if known is None:
                        self._urls[ds] = name
                else:
                    logger.debug('Already downloaded: '+ds)
        if not field_used:
            known_fields = GS.sch.get_field_names({})
            if self.field not in known_fields:
                logger.warning(W_UNKFLD+"The field used for datasheets ({}) doesn't seem to be used".format(self.field))
        else:
            logger.debug('Unique URLs: '+str(len(self._urls)))
            logger.debug('Downloaded: '+str(len(self._downloaded)))
            logger.debug('Created: '+str(len(self._created)))

    def get_targets(self, out_dir):
        # Do a dry run to collect the output names
        self._dry = True
        self.run(out_dir)
        self._dry = False
        return self._created


@output_class
class Download_Datasheets(BaseOutput):  # noqa: F821
    """ Datasheets downloader
        Downloads the datasheets for the project """
    def __init__(self):
        super().__init__()
        with document:
            self.options = Download_Datasheets_Options
            """ *[dict] Options for the `download_datasheets` output """
        self._sch_related = True
        self._category = 'Schematic/docs'

    def run(self, output_dir):
        # No output member, just a dir
        self.options.run(output_dir)

    @staticmethod
    def get_conf_examples(name, layers, templates):
        has_urls = False
        for c in GS.sch.get_components():
            if c.datasheet and is_url(c.datasheet):
                has_urls = True
                break
        if not has_urls:
            return None
        return BaseOutput.simple_conf_examples(name, 'Download the datasheets', 'Datasheets')  # noqa: F821