@@ -51,6 +51,7 @@ def from_file(
5151 filename : str ,
5252 fixed_layouts : Optional [List [Optional [List [TextRegion ]]]] = None ,
5353 pdf_image_dpi : int = 200 ,
54+ password : Optional [str ] = None ,
5455 ** kwargs ,
5556 ) -> DocumentLayout :
5657 """Creates a DocumentLayout from a pdf file."""
@@ -62,6 +63,7 @@ def from_file(
6263 pdf_image_dpi ,
6364 output_folder = temp_dir ,
6465 path_only = True ,
66+ password = password ,
6567 )
6668 image_paths = cast (List [str ], _image_paths )
6769 number_of_pages = len (image_paths )
@@ -133,6 +135,7 @@ def __init__(
133135 document_filename : Optional [Union [str , PurePath ]] = None ,
134136 detection_model : Optional [UnstructuredObjectDetectionModel ] = None ,
135137 element_extraction_model : Optional [UnstructuredElementExtractionModel ] = None ,
138+ password : Optional [str ] = None ,
136139 ):
137140 if detection_model is not None and element_extraction_model is not None :
138141 raise ValueError ("Only one of detection_model and extraction_model should be passed." )
@@ -148,6 +151,7 @@ def __init__(
148151 self .element_extraction_model = element_extraction_model
149152 self .elements : Collection [LayoutElement ] = []
150153 self .elements_array : LayoutElements | None = None
154+ self .password = password
151155 # NOTE(alan): Dropped LocationlessLayoutElement that was created for chipper - chipper has
152156 # locations now and if we need to support LayoutElements without bounding boxes we can make
153157 # the bbox property optional
@@ -325,6 +329,7 @@ def from_image(
325329def process_data_with_model (
326330 data : BinaryIO ,
327331 model_name : Optional [str ],
332+ password : Optional [str ] = None ,
328333 ** kwargs : Any ,
329334) -> DocumentLayout :
330335 """Process PDF as file-like object `data` into a `DocumentLayout`.
@@ -339,6 +344,7 @@ def process_data_with_model(
339344 layout = process_file_with_model (
340345 file_path ,
341346 model_name ,
347+ password = password ,
342348 ** kwargs ,
343349 )
344350
@@ -351,6 +357,7 @@ def process_file_with_model(
351357 is_image : bool = False ,
352358 fixed_layouts : Optional [List [Optional [List [TextRegion ]]]] = None ,
353359 pdf_image_dpi : int = 200 ,
360+ password : Optional [str ] = None ,
354361 ** kwargs : Any ,
355362) -> DocumentLayout :
356363 """Processes pdf file with name filename into a DocumentLayout by using a model identified by
@@ -379,6 +386,7 @@ def process_file_with_model(
379386 element_extraction_model = element_extraction_model ,
380387 fixed_layouts = fixed_layouts ,
381388 pdf_image_dpi = pdf_image_dpi ,
389+ password = password ,
382390 ** kwargs ,
383391 )
384392 )
@@ -390,6 +398,7 @@ def convert_pdf_to_image(
390398 dpi : int = 200 ,
391399 output_folder : Optional [Union [str , PurePath ]] = None ,
392400 path_only : bool = False ,
401+ password : Optional [str ] = None ,
393402) -> Union [List [Image .Image ], List [str ]]:
394403 """Get the image renderings of the pdf pages using pdf2image"""
395404
@@ -402,12 +411,14 @@ def convert_pdf_to_image(
402411 dpi = dpi ,
403412 output_folder = output_folder ,
404413 paths_only = path_only ,
414+ userpw = password or "" ,
405415 )
406416 else :
407417 images = pdf2image .convert_from_path (
408418 filename ,
409419 dpi = dpi ,
410420 paths_only = path_only ,
421+ userpw = password or "" ,
411422 )
412423
413424 return images
0 commit comments