From 956af56b29c276d85aa8f6ff6eed1e858af2aa66 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Thu, 14 Jun 2018 10:27:31 +0200 Subject: [PATCH 01/13] rM2svg: minor code reorganisation Signed-off-by: Marcello Seri --- tools/rM2svg | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/tools/rM2svg b/tools/rM2svg index ed1c268..0828064 100755 --- a/tools/rM2svg +++ b/tools/rM2svg @@ -14,11 +14,19 @@ x_width = 1404 y_width = 1872 # Mappings -stroke_colour = { +STROKE_BW = { 0: "black", 1: "grey", 2: "white", - } +} + + +STROKE_C = { + 0: "blue", + 1: "red", + 2: "white", + 3: "yellow" +} '''stroke_width={ 0x3ff00000: 2, @@ -61,15 +69,6 @@ def main(): if not os.path.exists(args.input): parser.error('The file "{}" does not exist!'.format(args.input)) - if args.coloured_annotations: - global stroke_colour - stroke_colour = { - 0: "blue", - 1: "red", - 2: "white", - 3: "yellow" - } - lines2svg(args.input, args.output, args.singlefile, args.coloured_annotations) @@ -79,6 +78,9 @@ def abort(msg): def lines2svg(input_file, output_name, singlefile, coloured_annotations=False): + # set the correct color map + stroke_colour = STROKE_C if coloured_annotations else STROKE_BW + # Read the file in memory. Consider optimising by reading chunks. with open(input_file, 'rb') as f: data = f.read() @@ -125,14 +127,14 @@ def lines2svg(input_file, output_name, singlefile, coloured_annotations=False): print('Unexpected value on page {} after nlayers'.format(page + 1)) # Iterate through layers on the page (There is at least one) - for layer in range(nlayers): + for _layer in range(nlayers): fmt = ' Date: Thu, 14 Jun 2018 10:28:10 +0200 Subject: [PATCH 02/13] rM2svg: make it usable as a python module Signed-off-by: Marcello Seri --- tools/{rM2svg => rM2svg.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tools/{rM2svg => rM2svg.py} (100%) diff --git a/tools/rM2svg b/tools/rM2svg.py similarity index 100% rename from tools/rM2svg rename to tools/rM2svg.py From 4a91b4210367f7aad39cd430e94e36c817d0254e Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Wed, 13 Jun 2018 18:40:02 +0200 Subject: [PATCH 03/13] exportNotebook.py: preliminary implementation of python based module It implements a first version of core functions and drops the pdfinfo dependency but introduces the python3 PyPDF2 and paramiko dependencies. Pdftk is now optional but the merge function using PyPDF2 is not yet implemented. Signed-off-by: Marcello Seri --- tools/exportNotebook.py | 380 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 380 insertions(+) create mode 100755 tools/exportNotebook.py diff --git a/tools/exportNotebook.py b/tools/exportNotebook.py new file mode 100755 index 0000000..b6f6786 --- /dev/null +++ b/tools/exportNotebook.py @@ -0,0 +1,380 @@ +#!/usr/bin/env python3 +import argparse +import getpass +import json +import os +import shutil +import subprocess +import sys + +from contextlib import contextmanager +from pathlib import Path +from tempfile import mkdtemp + +import PyPDF2 +import paramiko + +from rM2svg import lines2svg + +__prog_name__ = "exportNotebook" +__version__ = "0.0.1beta" + +REMARKABLE_IP = "10.11.99.1" +REMARKABLE_H, REMARKABLE_W = 1872, 1404 +TPL_PATH = "/usr/share/remarkable/templates/" +XOCHITL_PATH = ".local/share/remarkable/xochitl/" + +# Requires: +# - PyPDF2, paramiko python libraries +# - rM2svg python module +# - convert (imagemagick) +# - rsvg-convert (optional, to avoid rasterizing of lines) + +# NOTE: they will be None if the command is not present! +CONVERT = shutil.which("convert") +RSVG_CONVERT = shutil.which("rsvg-convert") or CONVERT + + +def parse_args_or_exit(argv=None): + """ + Parse command line options + """ + parser = argparse.ArgumentParser(prog=__prog_name__) + parser.add_argument("prefix", + help="partial notebook name", + metavar="FILETITLE") + parser.add_argument("-p", + "--password", + help="remarkable ssh password", + default=None) + parser.add_argument("-c", + "--coloured", + help="Colour annotations for document markup.", + action='store_true') + parser.add_argument("--pdftk", + help="Assemble the pdfs with pdftk instead of PyPDF2.", + action='store_true') + parser.add_argument("-1", + "--singlefile", + help="Enable multipage svg file when calling rM2svg", + action='store_true', + ) + parser.add_argument("--keeptmp", + help="Do not delete temporary log files.", + action='store_true') + parser.add_argument('--version', + action='version', + version='%(prog)s {version}'.format(version=__version__)) + return parser.parse_args(argv) + + +def get_ssh_ip(): + """ + Check if ssh configuration for "remarkable" exists. + Return the appropriate host string for ssh. + """ + ssh_config = Path.home().joinpath('.ssh/config') + # pylint: disable=no-member + if not ssh_config.is_file(): + return REMARKABLE_IP + + with open(ssh_config) as ssh: + lines = (l.strip() for l in ssh.readlines()) + + if "host remarkable" in lines: + return "remarkable" + else: + return REMARKABLE_IP + + +@contextmanager +def get_ssh_client(password=None, keeptmp=False): + """ + Context manager to deal with the ssh connection to the remarkable. + """ + hostname = get_ssh_ip() + username = "root" + if password is None: + password = getpass.getpass('%s password: ' % hostname) + + client = paramiko.client.SSHClient() + client.set_missing_host_key_policy(paramiko.client.AutoAddPolicy()) + client.connect(hostname, username=username, password=password) + yield client + client.close() + + +def get_client_output(client, cmd): + """ + Executes [cmd] with the [client] and returns a touple with the contents + of the standard output and standard error. + It raises OSError if the command fails. + """ + stdin, stdout, stderr = client.exec_command(cmd) + stdout.channel.recv_exit_status() + out, err = stdout.readlines(), stderr.readlines() + stdin.close() + stdout.close() + stderr.close() + return out, err + + +def get_notebook_id(client, prefix): + """ + Return the notebook prefix (Newest notebook matching the name) + """ + out, err = get_client_output( + client, + " | ".join([ + "ls -rt {}*.metadata".format(XOCHITL_PATH), + "xargs fgrep -l {}".format(prefix), + "tail -n1", + "cut -d. -f1,2" + ])) + + if err: + print("error: {}".format(err), file=sys.stderr) + + if err and not out: + return None + + notebook_id = out.pop().strip() + return notebook_id + + +def copy_notebook_data(client, tmp, notebook_id): + """ + Copies the notebook data and the underlying notebook pdf in tmp (if it exists). + Returns a tuple with: + - the list of copied (non-template) files + - the list of used templates in order and with repetition + (empty if a background pdf is present) + """ + list_files = "ls -1 {}.{{lines,pagedata,metadata,pdf}} 2>/dev/null".format( + notebook_id) + out, err = get_client_output(client, list_files) + if err: + print("error: {}".format(err)) + if err and not out: + raise EnvironmentError(err) + + filenames = [os.path.basename(f.strip()) for f in out if f.strip()] + templates = [] + + sftp = client.open_sftp() + try: + for filename in filenames: + remotepath = os.path.join(os.path.dirname(notebook_id), filename) + localpath = os.path.join(tmp, filename) + print("Copying {} into {}".format(remotepath, localpath)) + sftp.get(remotepath, localpath) + + if filename.endswith(".pagedata") and \ + not any(fname.endswith(".pdf") for fname in filenames): + + def get_tpl_fname(line): + "Return template png file name from the name string" + line = line.strip() # do we risk to strip important whitespace? + return "{}.png".format(line) if line else "Blank" + + with open(localpath) as pdata_f: + templates = [ + get_tpl_fname(line) + for line in pdata_f.readlines() + ] + for tpl_fname in set(templates): + remotepath = os.path.join(TPL_PATH, tpl_fname) + localpath = os.path.join(tmp, tpl_fname) + print("Copying {} into {}".format(remotepath, localpath)) + sftp.get(remotepath, localpath) + + finally: + sftp.close() + + filenames + return filenames, templates + + +def get_extended_metadata(tmp, notebook_id, templates): + """ + Get notebook metadata. + Returns a dictionary with the following keys: + [ "deleted", "lastModified", "metadatamodified", "modified", "parent" + , "pinned", "synced", "type", "version", "visibleName" + , "pages", "templates" + ] + """ + metadata_path = os.path.join( + tmp, + "{}.metadata".format(os.path.basename(notebook_id)) + ) + with open(metadata_path) as meta_f: + metadata = json.load(meta_f) + metadata["pages"] = len(templates) + metadata["templates"] = templates + return metadata + + +def get_background_original_geometry(pdfname): + """ + Read PDF dimensions of background_original for scale correction. + Returns the pair width, height in points (1 pt = 1/72 in) + """ + pdf_path = os.path.join(pdfname) + with open(pdf_path, 'rb') as pdf: + reader = PyPDF2.PdfFileReader(pdf) + _, _, width, height = reader.getPage(0).mediaBox + return width, height + + +def prepare_background(tmp, metadata, filenames, notebook_id): + """ + Does the magic to prepare background pdfs with the right + templates, sizes and offsets. It requires 'convert' to be + present in the path. Return the background pdf file path. + """ + background = os.path.join(tmp, "background.pdf") + # If we have copied the templates it means that we don't have a + # base pdf. This is currently guaranteeed by the implementation of + # copy_notebook_data + if metadata["templates"]: + templates_list = [ + os.path.join(tmp, template) + for template in metadata["templates"] + ] + # NOTE: we are assuming here that convert exists. + # There is a check in main's body + cmd = sum([ + [CONVERT], + templates_list, + ["-transparent", "white", background] + ], []) + subprocess.call(cmd) + return background + + # If we are here we need to use the pdf to prepare the background. + # This is currently guaranteed by the implementation of copy_notebook_data + + # If we don't have a pdf file here we need to fail + pdf = next(fname for fname in filenames if fname.endswith(".pdf")) + print("Found underlying document PDF, using as background.") + bg_original = os.path.join(tmp, "background_original.pdf") + os.symlink( + os.path.join(tmp, pdf), + bg_original + ) + + # use gs for now but will move to PyPDF2 + if shutil.which("gs"): + width, height = get_background_original_geometry(bg_original) + new_width = height / REMARKABLE_H * REMARKABLE_W + offset = new_width - width + print( + "Original PDF dimensions are ({}x{}), correcting by offset of {} to fit rM foreground.".format( + width, height, offset) + ) + bg_offset = os.path.join(tmp, "background_with_offset.pdf") + cmd = [ + "gs", "-q", "-sDEVICE=pdfwrite", "-dBATCH", "-dNOPAUSE", + "-sOutputFile={}".format(bg_offset), + "-dDEVICEWIDTHPOINTS={}".format(new_width), + "-dDEVICEHEIGHTPOINTS={}".format(height), + "-dFIXEDMEDIA", + "-c", "{{{} 0 translate}}".format(offset), + "-f", bg_original + ] + subprocess.call(cmd) + os.symlink(bg_offset, background) + else: + print("Unable to find 'gs', skipping offset and resize of the background PDF") + os.symlink(bg_original, background) + + return background + + +def prepare_foreground(tmp, filenames, singlefile, coloured): + """ + Extract annotations and create a PDF. Returns the foreground pdf path. + """ + output_prefix = os.path.join(tmp, "foreground") + # If we cannot find a lines file we need to fail here + lines_path = os.path.join( + tmp, + next(fname for fname in filenames if fname.endswith(".lines")) + ) + # TODO: make the --singlefile option of rM2SVG configurable + lines2svg(lines_path, output_prefix, + singlefile=singlefile, coloured_annotations=coloured) + + foreground = os.path.join(tmp, "foreground.pdf") + foreground_svgs = [str(svg) for svg in Path(tmp).glob("foreground*.svg")] + # NOTE: here we assume that at least 'convert' is present. + # There is a check in main's body + if RSVG_CONVERT is not None: + cmd = sum([ + [RSVG_CONVERT, "-a", "-f", "pdf"], + foreground_svgs, + ["-o", foreground] + ], []) + else: + cmd = sum([ + [CONVERT, "-density", "100"], + foreground_svgs, + ["-transparent", "white", foreground] + ], []) + subprocess.call(cmd) + + return foreground + + +def make_annotated_pdf(name, background, foreground, pdftk=False): + """ + Uses the [foreground] and [background] pdfs to assemble the final + annotated pdf called [name].pdf. It uses PyPDF2 when pdftk is False. + """ + if not name.endswith(".pdf"): + name = "{}.pdf".format(name) + + # NOTE: Here we assume that pdftk is present. + # There is a check in main's body + if pdftk: + # Use multistamp instead of multibackground to preserve transparency + cmd = ["pdftk", background, "multistamp", foreground, "output", name] + subprocess.call(cmd) + print("Written {} to {}".format(os.stat(name).st_size, name)) + else: + raise NotImplementedError + + +if __name__ == "__main__": + args = parse_args_or_exit(sys.argv[1:]) + if CONVERT is None: + sys.exit( + "Unable to detect the required 'convert' executable from ImageMagick") + + tmp = mkdtemp() + with get_ssh_client(args.password) as client: + notebook_id = get_notebook_id(client, args.prefix) + if not notebook_id: + sys.exit( + "Unable to find notebook with name containing '{}'".format(args.prefix)) + + filenames, templates = copy_notebook_data(client, tmp, notebook_id) + if not filenames: + sys.exit("Unable to copy any file from the device") + + metadata = get_extended_metadata(tmp, notebook_id, templates) + background = prepare_background(tmp, metadata, filenames, notebook_id) + foreground = prepare_foreground( + tmp, filenames, args.singlefile, args.coloured) + + if shutil.which("pdftk") is None and args.pdftk: + sys.exit("Used --pdftk flag but the pdftk executable was not found") + make_annotated_pdf(metadata["visibleName"], + background, foreground, pdftk=args.pdftk) + + if not args.keeptmp: + print("Cleaning up temporary folder {}".format(tmp)) + shutil.rmtree(tmp) + else: + print("The intermediate files are available in {}".format(tmp)) From 4c3d5881a9fdfdbc071c06349610e503edba2d6c Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Thu, 14 Jun 2018 11:58:34 +0200 Subject: [PATCH 04/13] exportNotebook.py: implement PyPDF2 based pdf merge When used, this removes the dependency of pdftk. Signed-off-by: Marcello Seri --- tools/exportNotebook.py | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/tools/exportNotebook.py b/tools/exportNotebook.py index b6f6786..15cd65c 100755 --- a/tools/exportNotebook.py +++ b/tools/exportNotebook.py @@ -135,7 +135,7 @@ def get_notebook_id(client, prefix): if err: print("error: {}".format(err), file=sys.stderr) - if err and not out: + if err or not out: return None notebook_id = out.pop().strip() @@ -327,6 +327,35 @@ def prepare_foreground(tmp, filenames, singlefile, coloured): return foreground +# From https://github.com/tesseract-ocr/tesseract/issues/660#issuecomment-273629726 +# TODO: with the rescaling performed here we probably don't need +# to rescale the background +def merge_pdfs(background, foreground, destination): + """ + Use PyPDF2 to merge the [background] and [foreground] pdfs, + saving the result into [destination]. + """ + with open(background, 'rb') as bg_pdfh, open(foreground, 'rb') as fg_pdfh: + bg_pdf = PyPDF2.PdfFileReader(bg_pdfh) + fg_pdf = PyPDF2.PdfFileReader(fg_pdfh) + destination_pdf = PyPDF2.PdfFileWriter() + for bg_page, fg_page in zip(bg_pdf.pages, fg_pdf.pages): + _, _, width, height = bg_page.mediaBox + fg_page.scaleTo(float(width), float(height)) + bg_page.mergePage(fg_page) + destination_pdf.addPage(bg_page) + + bg_len, fg_len = len(bg_pdf.pages), len(fg_pdf.pages) + if bg_len != fg_len: + remaining_pages = fg_pdf.pages[bg_len:] if bg_len < fg_len \ + else bg_pdf.pages[fg_len:] + for page in remaining_pages: + destination_pdf.addPage(page) + + with open(destination, 'wb') as out: + destination_pdf.write(out) + + def make_annotated_pdf(name, background, foreground, pdftk=False): """ Uses the [foreground] and [background] pdfs to assemble the final @@ -335,15 +364,16 @@ def make_annotated_pdf(name, background, foreground, pdftk=False): if not name.endswith(".pdf"): name = "{}.pdf".format(name) - # NOTE: Here we assume that pdftk is present. - # There is a check in main's body + # NOTE: Here we assume that when pdftk is present when somebody calls + # it with pdftk=True. There is a check in main's body if pdftk: # Use multistamp instead of multibackground to preserve transparency cmd = ["pdftk", background, "multistamp", foreground, "output", name] subprocess.call(cmd) - print("Written {} to {}".format(os.stat(name).st_size, name)) else: - raise NotImplementedError + merge_pdfs(background, foreground, name) + + print("Written {} to {}".format(os.stat(name).st_size, name)) if __name__ == "__main__": From c9ef601ef4b42eb144060f3034734fac8fee5198 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Thu, 14 Jun 2018 14:05:37 +0200 Subject: [PATCH 05/13] exportNotebook.py: implement rescaling at pdf merge stage Signed-off-by: Marcello Seri --- tools/exportNotebook.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/tools/exportNotebook.py b/tools/exportNotebook.py index 15cd65c..64a3891 100755 --- a/tools/exportNotebook.py +++ b/tools/exportNotebook.py @@ -29,6 +29,7 @@ # - rM2svg python module # - convert (imagemagick) # - rsvg-convert (optional, to avoid rasterizing of lines) +# - ghostscript, pdftk (optional) # NOTE: they will be None if the command is not present! CONVERT = shutil.which("convert") @@ -227,7 +228,7 @@ def get_background_original_geometry(pdfname): return width, height -def prepare_background(tmp, metadata, filenames, notebook_id): +def prepare_background(tmp, metadata, filenames, notebook_id, resizebg=False): """ Does the magic to prepare background pdfs with the right templates, sizes and offsets. It requires 'convert' to be @@ -264,8 +265,8 @@ def prepare_background(tmp, metadata, filenames, notebook_id): bg_original ) - # use gs for now but will move to PyPDF2 - if shutil.which("gs"): + # NOTE: this is needed only when pdftk is used + if resizebg and shutil.which("gs"): width, height = get_background_original_geometry(bg_original) new_width = height / REMARKABLE_H * REMARKABLE_W offset = new_width - width @@ -286,7 +287,8 @@ def prepare_background(tmp, metadata, filenames, notebook_id): subprocess.call(cmd) os.symlink(bg_offset, background) else: - print("Unable to find 'gs', skipping offset and resize of the background PDF") + if resizebg: + print("Unable to find 'gs', skipping offset and resize of the background PDF") os.symlink(bg_original, background) return background @@ -328,8 +330,7 @@ def prepare_foreground(tmp, filenames, singlefile, coloured): # From https://github.com/tesseract-ocr/tesseract/issues/660#issuecomment-273629726 -# TODO: with the rescaling performed here we probably don't need -# to rescale the background +# Note: with the rescaling performed here we don't need to rescale the background def merge_pdfs(background, foreground, destination): """ Use PyPDF2 to merge the [background] and [foreground] pdfs, @@ -339,11 +340,15 @@ def merge_pdfs(background, foreground, destination): bg_pdf = PyPDF2.PdfFileReader(bg_pdfh) fg_pdf = PyPDF2.PdfFileReader(fg_pdfh) destination_pdf = PyPDF2.PdfFileWriter() + for bg_page, fg_page in zip(bg_pdf.pages, fg_pdf.pages): - _, _, width, height = bg_page.mediaBox - fg_page.scaleTo(float(width), float(height)) - bg_page.mergePage(fg_page) - destination_pdf.addPage(bg_page) + _, _, _, height = bg_page.mediaBox + new_width = height / REMARKABLE_H * REMARKABLE_W + base_page = PyPDF2.pdf.PageObject.createBlankPage(width=new_width, height=height) + base_page.mergePage(bg_page) + fg_page.scaleTo(new_width, height) + base_page.mergePage(fg_page) + destination_pdf.addPage(base_page) bg_len, fg_len = len(bg_pdf.pages), len(fg_pdf.pages) if bg_len != fg_len: @@ -381,6 +386,8 @@ def make_annotated_pdf(name, background, foreground, pdftk=False): if CONVERT is None: sys.exit( "Unable to detect the required 'convert' executable from ImageMagick") + if shutil.which("pdftk") is None and args.pdftk: + sys.exit("Used --pdftk flag but the pdftk executable was not found") tmp = mkdtemp() with get_ssh_client(args.password) as client: @@ -394,12 +401,10 @@ def make_annotated_pdf(name, background, foreground, pdftk=False): sys.exit("Unable to copy any file from the device") metadata = get_extended_metadata(tmp, notebook_id, templates) - background = prepare_background(tmp, metadata, filenames, notebook_id) + background = prepare_background( + tmp, metadata, filenames, notebook_id, resizebg=args.pdftk) foreground = prepare_foreground( tmp, filenames, args.singlefile, args.coloured) - - if shutil.which("pdftk") is None and args.pdftk: - sys.exit("Used --pdftk flag but the pdftk executable was not found") make_annotated_pdf(metadata["visibleName"], background, foreground, pdftk=args.pdftk) From 4355e3787d5a6a7676450f64c9a9530fd3cfee06 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Thu, 14 Jun 2018 14:08:16 +0200 Subject: [PATCH 06/13] exportNotebook: reduce verbosity Signed-off-by: Marcello Seri --- tools/exportNotebook.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/exportNotebook.py b/tools/exportNotebook.py index 64a3891..d043c05 100755 --- a/tools/exportNotebook.py +++ b/tools/exportNotebook.py @@ -167,7 +167,7 @@ def copy_notebook_data(client, tmp, notebook_id): for filename in filenames: remotepath = os.path.join(os.path.dirname(notebook_id), filename) localpath = os.path.join(tmp, filename) - print("Copying {} into {}".format(remotepath, localpath)) + # print("Copying {} into {}".format(remotepath, localpath)) sftp.get(remotepath, localpath) if filename.endswith(".pagedata") and \ @@ -186,7 +186,7 @@ def get_tpl_fname(line): for tpl_fname in set(templates): remotepath = os.path.join(TPL_PATH, tpl_fname) localpath = os.path.join(tmp, tpl_fname) - print("Copying {} into {}".format(remotepath, localpath)) + # print("Copying {} into {}".format(remotepath, localpath)) sftp.get(remotepath, localpath) finally: From d8d3045462c9136b6fa8148adae80348783512b9 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Thu, 14 Jun 2018 14:15:30 +0200 Subject: [PATCH 07/13] exportNotebook: workaround PyPDF2 FloatObject inconsistency Signed-off-by: Marcello Seri --- tools/exportNotebook.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/exportNotebook.py b/tools/exportNotebook.py index d043c05..b56cc16 100755 --- a/tools/exportNotebook.py +++ b/tools/exportNotebook.py @@ -344,7 +344,10 @@ def merge_pdfs(background, foreground, destination): for bg_page, fg_page in zip(bg_pdf.pages, fg_pdf.pages): _, _, _, height = bg_page.mediaBox new_width = height / REMARKABLE_H * REMARKABLE_W - base_page = PyPDF2.pdf.PageObject.createBlankPage(width=new_width, height=height) + # workaround PyPDF2 FloatObject inconsistency + height, new_width = float(height), float(new_width) + base_page = PyPDF2.pdf.PageObject.createBlankPage( + width=new_width, height=height) base_page.mergePage(bg_page) fg_page.scaleTo(new_width, height) base_page.mergePage(fg_page) From ae3de3b353d71b4db7bc306c0c4f0a983c48716a Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Thu, 14 Jun 2018 14:15:57 +0200 Subject: [PATCH 08/13] exportNotebook: make more modular and add waiting output Signed-off-by: Marcello Seri --- tools/exportNotebook.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/tools/exportNotebook.py b/tools/exportNotebook.py index b56cc16..f7f7b59 100755 --- a/tools/exportNotebook.py +++ b/tools/exportNotebook.py @@ -384,16 +384,18 @@ def make_annotated_pdf(name, background, foreground, pdftk=False): print("Written {} to {}".format(os.stat(name).st_size, name)) -if __name__ == "__main__": - args = parse_args_or_exit(sys.argv[1:]) +def main(args, tmp): + """ + Main entry point for the script + """ if CONVERT is None: sys.exit( "Unable to detect the required 'convert' executable from ImageMagick") if shutil.which("pdftk") is None and args.pdftk: sys.exit("Used --pdftk flag but the pdftk executable was not found") - tmp = mkdtemp() with get_ssh_client(args.password) as client: + print("Gathering fata from the rM device") notebook_id = get_notebook_id(client, args.prefix) if not notebook_id: sys.exit( @@ -404,15 +406,25 @@ def make_annotated_pdf(name, background, foreground, pdftk=False): sys.exit("Unable to copy any file from the device") metadata = get_extended_metadata(tmp, notebook_id, templates) + print("Preparing background document") background = prepare_background( tmp, metadata, filenames, notebook_id, resizebg=args.pdftk) + print("Preparing annotations") foreground = prepare_foreground( tmp, filenames, args.singlefile, args.coloured) + print("Preparing final PDF") make_annotated_pdf(metadata["visibleName"], background, foreground, pdftk=args.pdftk) - if not args.keeptmp: - print("Cleaning up temporary folder {}".format(tmp)) - shutil.rmtree(tmp) - else: - print("The intermediate files are available in {}".format(tmp)) + +if __name__ == "__main__": + args = parse_args_or_exit(sys.argv[1:]) + tmp = mkdtemp() + try: + main(args, tmp) + finally: + if not args.keeptmp: + print("Cleaning up temporary folder {}".format(tmp)) + shutil.rmtree(tmp) + else: + print("The intermediate files are available in {}".format(tmp)) From e0c85f73a4a3f4efaa8adf8fe7ec2b7face854c0 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Thu, 14 Jun 2018 14:19:54 +0200 Subject: [PATCH 09/13] exportNotebook: add more information on pdftk flag Signed-off-by: Marcello Seri --- tools/exportNotebook.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/exportNotebook.py b/tools/exportNotebook.py index f7f7b59..a20d9da 100755 --- a/tools/exportNotebook.py +++ b/tools/exportNotebook.py @@ -53,7 +53,9 @@ def parse_args_or_exit(argv=None): help="Colour annotations for document markup.", action='store_true') parser.add_argument("--pdftk", - help="Assemble the pdfs with pdftk instead of PyPDF2.", + help="Assemble the pdfs with pdftk instead of PyPDF2. " + "This is faster and prodces smaller output when " + "exporting template-based notebooks.", action='store_true') parser.add_argument("-1", "--singlefile", From 18bafa15bdfcd2e2dd5807bdca7446fbf32e82d3 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Thu, 14 Jun 2018 14:20:23 +0200 Subject: [PATCH 10/13] exportNotebook: remove bash version of exportNotebook Signed-off-by: Marcello Seri --- tools/exportNotebook | 125 ------------------------------------------- 1 file changed, 125 deletions(-) delete mode 100755 tools/exportNotebook diff --git a/tools/exportNotebook b/tools/exportNotebook deleted file mode 100755 index d6ac523..0000000 --- a/tools/exportNotebook +++ /dev/null @@ -1,125 +0,0 @@ -#!/usr/bin/env bash - -# Needs: -# - ssh and scp (openssh) -# - convert (imagemagick) -# - pdftk (pdftk) -# - rsvg-convert (optional, to avoid rasterizing of lines) -# - gs & pdfinfo (optional, to account for original pdf size) - -if [[ $# -eq 0 ]] ; then - echo "Usage: ./exportNotebook (Partial)NotebookName AdditionalrM2svgArguments" - echo "You can additionally append the -c argument for coloured annotations." - exit 0 -fi - -# Make sure we have rM2svg -command -v rM2svg >/dev/null 2>&1 -if [[ $? -ne 0 ]]; then - if [[ -x rM2svg ]]; then - rM2svg_cmd="$(dirname `readlink -f $0`)/rM2svg" - else - print "Cannot find rM2svg" - exit 1 - fi -else - rM2svg_cmd="rM2svg" -fi - -# Check if ssh configuration for "remarkable" exists -grep -Fxq "host remarkable" ~/.ssh/config -if [ $? -eq 0 ]; then - SSH_IP="root@remarkable" -else - SSH_IP="root@10.11.99.1" -fi - -# Start SSH in ControlMaster mode -control_path_dir=$(mktemp -d --suffix=ssh_remark) -control_options="-o ControlPath=$control_path_dir/%h_%p_%r -o ControlPersist=10s" -ssh -M ${control_options} ${SSH_IP} exit - -# Getting the notebook prefix (Newest notebook matching the name) -id=$(ssh ${control_options} ${SSH_IP} "ls -rt .local/share/remarkable/xochitl/*.metadata | xargs fgrep -l $1" | tail -n1 | cut -d. -f1,2) - -test -z "$id" && exit 1 - -tmpfolder=$(mktemp -d) - -# Getting notebook data -scp $control_options -q ${SSH_IP}:"${id}".{lines,pagedata,metadata} "${tmpfolder}"/ - -# Copy underyling document pdf if it exists -ssh $control_options -q ${SSH_IP} "[[ -f "${id}.pdf" ]]" && scp $control_options -q ${SSH_IP}:"${id}.pdf" "${tmpfolder}" - -# Fix for single page notebooks with no template (empty pagedata file by default) -if [ ! -s "${tmpfolder}"/*.pagedata ] -then - echo "Blank" > "${tmpfolder}"/*.pagedata -fi - -# Fix empty lines in pagedata files -sed -i -e "s/^[[:blank:]]*$/Blank/" "${tmpfolder}"/*.pagedata - -filename=$(grep -F '"visibleName"' "${tmpfolder}"/*.metadata | cut -d: -f2- | grep -o '"[^"]*"') -echo "Exporting notebook ${filename} ($(wc -l "${tmpfolder}"/*.pagedata | cut -d\ -f1) pages)" - -if [ -f "${tmpfolder}"/*.pdf ] -then - ln -s "${tmpfolder}/"*.pdf "${tmpfolder}/background_original.pdf" - echo "Found underlying document PDF, using as background." - - if command -v "gs" > /dev/null && command -v "pdfinfo" > /dev/null - then - # Read PDF dimensions for scale correction - size=$(pdfinfo ${tmpfolder}/background_original.pdf | grep "Page size" | awk '{print $3,$5}') - width=$(echo ${size} | cut -f1 -d " ") - height=$(echo ${size} | cut -f2 -d " ") - - # Calculate new width and necessary offset (rM dimensions: 1404x1872) - new_width=$(echo "scale=5; ${height} / 1872 * 1404" | bc) - offset=$(echo "scale=5; ${new_width} - ${width}" | bc) - - echo "Original PDF dimensions are (${width}x${height}), correcting by offset of ${offset} to fit rM foreground." - - # Add offset to background.pdf to match foreground dimensions - gs -q -sDEVICE=pdfwrite -dBATCH -dNOPAUSE -sOutputFile=${tmpfolder}/background_with_offset.pdf \ - -dDEVICEWIDTHPOINTS=${new_width} -dDEVICEHEIGHTPOINTS=${height} -dFIXEDMEDIA \ - -c "{${offset} 0 translate}" \ - -f "${tmpfolder}/background_original.pdf" - - ln -s ${tmpfolder}/background_with_offset.pdf ${tmpfolder}/background.pdf - else - ln -s ${tmpfolder}/background_original.pdf ${tmpfolder}/background.pdf - fi -else - # Getting template files - sort -u "${tmpfolder}"/*.pagedata | while read -r tpl; do - scp $control_options -q ${SSH_IP}:"'/usr/share/remarkable/templates/${tpl}.png'" "${tmpfolder}"/ - done - - # Generate a PDF file out of the templates - sed -e "s|^|\"${tmpfolder}\"/\"|" -e 's|$|.png"|' "${tmpfolder}"/*.pagedata | tr '\n' ' ' | sed -e "s|$|-transparent white \"${tmpfolder}\"/background.pdf|" | xargs convert -fi - -# Extract annotations and create a PDF -$rM2svg_cmd --input "${tmpfolder}"/*.lines --output "${tmpfolder}/foreground" $2 - -if command -v "rsvg-convert" > /dev/null -then - rsvg-convert -a -f pdf "${tmpfolder}"/foreground*.svg -o "${tmpfolder}"/foreground.pdf -else - convert -density 100 "${tmpfolder}"/foreground*.svg -transparent white "${tmpfolder}"/foreground.pdf -fi - -# Strip .pdf suffix if it already exists (document vs. notebook) -filename=$(basename -s .pdf ${filename//\"/}) - -# Use multistamp instead of multibackground to preserve transparency -pdftk "${tmpfolder}"/background.pdf multistamp "${tmpfolder}"/foreground.pdf output "${filename}.pdf" - -filesize=$(ls -la "${filename}.pdf" | awk '{print $5}' | numfmt --to=iec-i --suffix=B --format="%.3f") -echo "Written ${filesize} to ${filename}.pdf" - -ssh $control_options -O exit ${SSH_IP} -rm -Rf "${tmpfolder}" "${control_path_dir}" From 5c84f8743fed140d21195caa19a4285889ef4bc3 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Thu, 14 Jun 2018 14:36:05 +0200 Subject: [PATCH 11/13] exportNotebook: remove TODO leftover Signed-off-by: Marcello Seri --- tools/exportNotebook.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/exportNotebook.py b/tools/exportNotebook.py index a20d9da..f4d1cc9 100755 --- a/tools/exportNotebook.py +++ b/tools/exportNotebook.py @@ -306,7 +306,6 @@ def prepare_foreground(tmp, filenames, singlefile, coloured): tmp, next(fname for fname in filenames if fname.endswith(".lines")) ) - # TODO: make the --singlefile option of rM2SVG configurable lines2svg(lines_path, output_prefix, singlefile=singlefile, coloured_annotations=coloured) From 7febfd54b5add0f72871308df64de77b8c2796d3 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Mon, 22 Oct 2018 19:26:08 +0200 Subject: [PATCH 12/13] rM2svg: update code as suggested by @niklasb and @ericsfraga Signed-off-by: Marcello Seri --- tools/rM2svg.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/tools/rM2svg.py b/tools/rM2svg.py index 0828064..6476288 100755 --- a/tools/rM2svg.py +++ b/tools/rM2svg.py @@ -10,8 +10,8 @@ # Size -x_width = 1404 -y_width = 1872 +DEFAULT_WIDTH = 1404 +DEFAULT_HEIGHT = 1872 # Mappings STROKE_BW = { @@ -61,6 +61,8 @@ def main(): help="Colour annotations for document markup.", action='store_true', ) + parser.add_argument('-x', '--width', default=DEFAULT_WIDTH, type=int) + parser.add_argument('-y', '--height', default=DEFAULT_HEIGHT, type=int) parser.add_argument('--version', action='version', version='%(prog)s {version}'.format(version=__version__)) @@ -69,7 +71,7 @@ def main(): if not os.path.exists(args.input): parser.error('The file "{}" does not exist!'.format(args.input)) - lines2svg(args.input, args.output, args.singlefile, args.coloured_annotations) + lines2svg(args.input, args.output, args.singlefile, args.coloured_annotations, width=args.width, height=args.height) def abort(msg): @@ -77,7 +79,7 @@ def abort(msg): sys.exit(1) -def lines2svg(input_file, output_name, singlefile, coloured_annotations=False): +def lines2svg(input_file, output_name, singlefile, coloured_annotations, width, height): # set the correct color map stroke_colour = STROKE_C if coloured_annotations else STROKE_BW @@ -101,7 +103,7 @@ def lines2svg(input_file, output_name, singlefile, coloured_annotations=False): if singlefile: output = open(output_name, 'w') - output.write(''.format(y_width, x_width)) # BEGIN Notebook + output.write(''.format(height, width)) # BEGIN Notebook output.write(''' ''') + def start_scale(): + zoom = width / DEFAULT_WIDTH + output.write('' % (zoom, zoom)) + + def end_scale(): + output.write('') + # Iterate through pages (There is at least one) for page in range(npages): if singlefile: output.write(''.format(page, 'none' if page != 0 else 'inline')) # Opening page group, visible only for the first page. + start_scale() else: output = open("{}_{:02}.svg".format(output_name, page+1), 'w') - output.write('\n'.format(y_width, x_width)) # BEGIN page + output.write('\n'.format(height, width)) # BEGIN page + start_scale() fmt = ''.format(x_width, y_width, (page + 1) % npages)) + end_scale() output.write('') # Closing page group else: + end_scale() output.write('') # END page output.close() From 3ced3fcd15bb72500decfc15eb6fc9bd8eed9d2f Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Mon, 22 Oct 2018 19:48:35 +0200 Subject: [PATCH 13/13] exportNotebook: update code to use new rM2svg This should resolve the scaling issues for the PDFs that have all the pages of the same size Signed-off-by: Marcello Seri --- tools/exportNotebook.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/tools/exportNotebook.py b/tools/exportNotebook.py index f4d1cc9..230f4d3 100755 --- a/tools/exportNotebook.py +++ b/tools/exportNotebook.py @@ -14,13 +14,13 @@ import PyPDF2 import paramiko -from rM2svg import lines2svg +from rM2svg import lines2svg, DEFAULT_WIDTH, DEFAULT_HEIGHT __prog_name__ = "exportNotebook" __version__ = "0.0.1beta" REMARKABLE_IP = "10.11.99.1" -REMARKABLE_H, REMARKABLE_W = 1872, 1404 +REMARKABLE_H, REMARKABLE_W = DEFAULT_HEIGHT, DEFAULT_WIDTH TPL_PATH = "/usr/share/remarkable/templates/" XOCHITL_PATH = ".local/share/remarkable/xochitl/" @@ -296,7 +296,7 @@ def prepare_background(tmp, metadata, filenames, notebook_id, resizebg=False): return background -def prepare_foreground(tmp, filenames, singlefile, coloured): +def prepare_foreground(tmp, filenames, singlefile, coloured, width, height): """ Extract annotations and create a PDF. Returns the foreground pdf path. """ @@ -307,7 +307,7 @@ def prepare_foreground(tmp, filenames, singlefile, coloured): next(fname for fname in filenames if fname.endswith(".lines")) ) lines2svg(lines_path, output_prefix, - singlefile=singlefile, coloured_annotations=coloured) + singlefile=singlefile, coloured_annotations=coloured, width=width, height=height) foreground = os.path.join(tmp, "foreground.pdf") foreground_svgs = [str(svg) for svg in Path(tmp).glob("foreground*.svg")] @@ -343,14 +343,12 @@ def merge_pdfs(background, foreground, destination): destination_pdf = PyPDF2.PdfFileWriter() for bg_page, fg_page in zip(bg_pdf.pages, fg_pdf.pages): - _, _, _, height = bg_page.mediaBox - new_width = height / REMARKABLE_H * REMARKABLE_W - # workaround PyPDF2 FloatObject inconsistency - height, new_width = float(height), float(new_width) + ll, lr, ul, ur = bg_page.mediaBox + width, height = ul - ll, ur - lr base_page = PyPDF2.pdf.PageObject.createBlankPage( - width=new_width, height=height) + width=width, height=height) base_page.mergePage(bg_page) - fg_page.scaleTo(new_width, height) + fg_page.scaleTo(width, height) base_page.mergePage(fg_page) destination_pdf.addPage(base_page) @@ -411,8 +409,14 @@ def main(args, tmp): background = prepare_background( tmp, metadata, filenames, notebook_id, resizebg=args.pdftk) print("Preparing annotations") + # TODO: move prepare foreground in the merge, preparing each svg with the + # correct height and width + bg_pdf = PyPDF2.PdfFileReader(background) + bg_page = next(bg_pdf.pages) + ll, lr, ul, ur = bg_page.mediaBox + width, height = ul - ll, ur - lr foreground = prepare_foreground( - tmp, filenames, args.singlefile, args.coloured) + tmp, filenames, args.singlefile, args.coloured, width, height) print("Preparing final PDF") make_annotated_pdf(metadata["visibleName"], background, foreground, pdftk=args.pdftk)