Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 59 additions & 11 deletions src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import zipfile

from . import extra

import importlib.util

# Set up g_out_log and g_out_message from environment variables.
#
Expand Down Expand Up @@ -333,6 +333,37 @@ def __init__(self):

_globals = _Globals()

_get_layout: typing.Optional[typing.Callable] = None

# global switch ensuring that the recommendation message is shown at most once
_recommend_layout = True # must be referred to as "global" everywhere


def no_recommend_layout():
"""For users who never want to see the layout recommendation."""
global _recommend_layout
_recommend_layout = False


def _warn_layout_once():
"""Check if we should recommend installing the layout package."""
msg="""Consider using the pymupdf_layout package for a greatly improved page layout analysis."""

global _recommend_layout
if (
1
and _recommend_layout # still True?
and _get_layout is None # no layout function stored here

# client did not globally disable the recommendation
and os.getenv("PYMUPDF_SUGGEST_LAYOUT_ANALYZER") != "0"

# layout is not available in this Python
and not importlib.util.find_spec("pymupdf.layout")
):
print(msg)
_recommend_layout = False # never show the message again


# Optionally use MuPDF via cppyy bindings; experimental and not tested recently
# as of 2023-01-20 11:51:40
Expand Down Expand Up @@ -9952,7 +9983,7 @@ def _get_resource_properties(self):
return rc

def _get_textpage(self, clip=None, flags=0, matrix=None):
if g_use_extra:
if 1 or g_use_extra:
ll_tpage = extra.page_get_textpage(self.this, clip, flags, matrix)
tpage = mupdf.FzStextPage(ll_tpage)
return tpage
Expand Down Expand Up @@ -10781,6 +10812,20 @@ def clip_to_rect(self, rect):
pclip = JM_rect_from_py(clip)
mupdf.pdf_clip_page(pdfpage, pclip)

def get_layout(self):
"""Try to access layout information."""

if self.layout_information is not None:
# layout information already present
return

if not _get_layout:
# no layout information available
return

layout_info = _get_layout(self)
self.layout_information = layout_info

@property
def artbox(self):
"""The ArtBox"""
Expand Down Expand Up @@ -11432,7 +11477,7 @@ def get_cdrawings(self, extended=None, callback=None, method=None):
assert isinstance(page, mupdf.FzPage), f'{self.this=}'
clips = True if extended else False
prect = mupdf.fz_bound_page(page)
if g_use_extra:
if 1 or g_use_extra:
rc = extra.get_cdrawings(page, extended, callback, method)
else:
rc = list()
Expand Down Expand Up @@ -12157,7 +12202,7 @@ def get_texttrace(self):
self.set_rotation(0)
page = self.this
rc = []
if g_use_extra:
if 1 or g_use_extra:
dev = extra.JM_new_texttrace_device(rc)
else:
dev = JM_new_texttrace_device(rc)
Expand Down Expand Up @@ -13206,6 +13251,9 @@ def xref(self):

rect = property(bound, doc="page rectangle")

# any result of layout analysis is stored here
layout_information = None


class Pixmap:

Expand Down Expand Up @@ -16391,7 +16439,7 @@ def _textpage_dict(self, raw=False):

def extractBLOCKS(self):
"""Return a list with text block information."""
if g_use_extra:
if 1 or g_use_extra:
return extra.extractBLOCKS(self.this)
block_n = -1
this_tpage = self.this
Expand Down Expand Up @@ -16587,7 +16635,7 @@ def extractTextbox(self, rect):

def extractWORDS(self, delimiters=None):
"""Return a list with text word information."""
if g_use_extra:
if 1 or g_use_extra:
return extra.extractWORDS(self.this, delimiters)
buflen = 0
last_char_rtl = 0
Expand Down Expand Up @@ -18969,7 +19017,7 @@ def JM_color_FromSequence(color):


def JM_color_count( pm, clip):
if g_use_extra:
if 1 or g_use_extra:
return extra.ll_JM_color_count(pm.m_internal, clip)

rc = dict()
Expand Down Expand Up @@ -20469,7 +20517,7 @@ def JM_make_annot_DA(annot, ncol, col, fontname, fontsize):


def JM_make_spanlist(line_dict, line, raw, buff, tp_rect):
if g_use_extra:
if 1 or g_use_extra:
return extra.JM_make_spanlist(line_dict, line, raw, buff, tp_rect)
char_list = None
span_list = []
Expand Down Expand Up @@ -20682,7 +20730,7 @@ def JM_make_image_block(block, block_dict):


def JM_make_text_block(block, block_dict, raw, buff, tp_rect):
if g_use_extra:
if 1 or g_use_extra:
return extra.JM_make_text_block(block.m_internal, block_dict, raw, buff.m_internal, tp_rect.m_internal)
line_list = []
block_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
Expand All @@ -20705,7 +20753,7 @@ def JM_make_text_block(block, block_dict, raw, buff, tp_rect):


def JM_make_textpage_dict(tp, page_dict, raw):
if g_use_extra:
if 1 or g_use_extra:
return extra.JM_make_textpage_dict(tp.m_internal, page_dict, raw)
text_buffer = mupdf.fz_new_buffer(128)
block_list = []
Expand Down Expand Up @@ -21356,7 +21404,7 @@ def JM_rotate_page_matrix(page):


def JM_search_stext_page(page, needle):
if g_use_extra:
if 1 or g_use_extra:
return extra.JM_search_stext_page(page.m_internal, needle)

rect = mupdf.FzRect(page.m_internal.mediabox)
Expand Down
Loading