KiBot/kibot/out_pdfunite.py

152 lines
6.2 KiB
Python

# -*- coding: utf-8 -*-
# Copyright (c) 2022 Salvador E. Tropea
# Copyright (c) 2022 Instituto Nacional de Tecnología Industrial
# License: GPL-3.0
# Project: KiBot (formerly KiPlot)
import re
import os
import glob
from subprocess import check_output, STDOUT, CalledProcessError
from .gs import GS
from .error import KiPlotConfigurationError
from .kiplot import config_output, get_output_dir, run_output
from .misc import MISSING_TOOL, WRONG_INSTALL, WRONG_ARGUMENTS, INTERNAL_ERROR, W_NOTPDF, MISSING_FILES, W_NOMATCH
from .optionable import Optionable, BaseOptions
from .registrable import RegOutput
from .create_pdf import create_pdf_from_pages
from .macros import macros, document, output_class # noqa: F401
from . import log
logger = log.get_logger()
class FilesList(Optionable):
def __init__(self):
super().__init__()
with document:
self.source = '*.pdf'
""" *File names to add, wildcards allowed. Use ** for recursive match.
By default this pattern is applied to the output dir specified with `-d` command line option.
See the `from_cwd` option """
self.from_cwd = False
""" Use the current working directory instead of the dir specified by `-d` """
self.from_output = ''
""" *Collect files from the selected output.
When used the `source` option is ignored """
self.filter = r'.*\.pdf'
""" A regular expression that source files must match """
class PDFUniteOptions(BaseOptions):
def __init__(self):
with document:
self.output = GS.def_global_output
""" *Name for the generated PDF (%i=name of the output %x=pdf) """
self.outputs = FilesList
""" *[list(dict)] Which files will be included """
self.use_external_command = False
""" Use the `pdfunite` tool instead of PyPDF2 Python module """
super().__init__()
self._expand_ext = 'pdf'
def config(self, parent):
super().config(parent)
if isinstance(self.outputs, type):
KiPlotConfigurationError('Nothing to join')
self._expand_id = parent.name
def get_files(self, output, no_out_run=False):
output_real = os.path.realpath(output)
files = []
out_dir_cwd = os.getcwd()
out_dir_default = self.expand_filename_pcb(GS.out_dir)
for f in self.outputs:
# Get the list of candidates
files_list = None
if f.from_output:
out = RegOutput.get_output(f.from_output)
if out is not None:
config_output(out)
files_list = out.get_targets(get_output_dir(out.dir, out, dry=True))
else:
GS.exit_with_error(f'Unknown output `{f.from_output}` selected in {self._parent}', WRONG_ARGUMENTS)
if not no_out_run:
for file in files_list:
if not os.path.isfile(file):
# The target doesn't exist
if not out._done:
# The output wasn't created in this run, try running it
run_output(out)
if not os.path.isfile(file):
# Still missing, something is wrong
GS.exit_with_error('Unable to generate `{file}` from {out}', INTERNAL_ERROR)
else:
out_dir = out_dir_cwd if f.from_cwd else out_dir_default
source = f.expand_filename_both(f.source, make_safe=False)
files_list = glob.iglob(os.path.join(out_dir, source), recursive=True)
# Filter and adapt them
old_len = len(files)
for fname in filter(re.compile(f.filter).match, files_list):
fname_real = os.path.realpath(fname)
# Avoid including the output
if fname_real == output_real:
continue
files.append(fname_real)
if len(files) == old_len:
logger.warning(W_NOMATCH+'No match found for `{}`'.format(f.from_output if f.from_output else f.source))
return files
def get_targets(self, out_dir):
return [self._parent.expand_filename(out_dir, self.output)]
def get_dependencies(self):
output = self.get_targets(self.expand_filename_pcb(GS.out_dir))[0]
files = self.get_files(output, no_out_run=True)
return files
def run_external(self, files, output):
cmd = ['pdfunite']+files+[output]
logger.debug('Running: {}'.format(cmd))
try:
check_output(cmd, stderr=STDOUT)
except FileNotFoundError:
GS.exit_with_error('Missing `pdfunite` command, install it (poppler-utils)', MISSING_TOOL)
except CalledProcessError as e:
GS.exit_with_error(f'Failed to invoke pdfunite command, error {e.returncode}', WRONG_INSTALL, e)
def run(self, output):
# Output file name
logger.debug('Collecting files')
# Collect the files
files = self.get_files(output)
for fn in files:
with open(fn, 'rb') as f:
sig = f.read(4)
if sig != b'%PDF':
logger.warning(W_NOTPDF+'Joining a non PDF file `{}`, will most probably fail'.format(fn))
if len(files) < 2:
GS.exit_with_error(f'At least two files must be joined ({files})', MISSING_FILES)
logger.debug('Generating `{}` PDF'.format(output))
if os.path.isfile(output):
os.remove(output)
if self.use_external_command:
self.run_external(files, output)
else:
create_pdf_from_pages(files, output)
@output_class
class PDFUnite(BaseOutput): # noqa: F821
""" PDF joiner
Generates a new PDF from other outputs.
This is just a PDF joiner, using `pdfunite` from Poppler Utils. """
def __init__(self):
super().__init__()
with document:
self.options = PDFUniteOptions
""" *[dict] Options for the `pdfunite` output """
self._none_related = True
def get_dependencies(self):
return self.options.get_dependencies()