@@ -109,7 +109,7 @@ def get_pdf_splits(pdf_pages: List[PageObject], split_size: int = 1):
109109 for page in pdf_pages [offset :end ]:
110110 new_pdf .add_page (page )
111111
112- new_pdf .write (pdf_buffer ) # type: ignore
112+ new_pdf .write (pdf_buffer ) # type: ignore
113113 pdf_buffer .seek (0 )
114114
115115 yield (pdf_buffer .read (), offset )
@@ -196,7 +196,7 @@ def partition_pdf_splits(
196196 request : Request ,
197197 pdf_pages : List [PageObject ],
198198 file : IO [bytes ],
199- metadata_filename : str ,
199+ metadata_filename : Optional [ str ] ,
200200 content_type : str ,
201201 coordinates : bool ,
202202 ** partition_kwargs : Dict [str , Any ],
@@ -265,7 +265,7 @@ def __enter__(self):
265265
266266 Is_Chipper_Processing = True
267267
268- def __exit__ (self , exc_type , exc_value , exc_tb ):
268+ def __exit__ (self , exc_type , exc_value , exc_tb ): # type: ignore
269269 global Is_Chipper_Processing
270270 Is_Chipper_Processing = False
271271
@@ -500,7 +500,7 @@ def pipeline_api(
500500 # Clean up returned elements
501501 # Note(austin): pydantic should control this sort of thing for us
502502 for i , element in enumerate (elements ):
503- elements [i ].metadata .filename = os .path .basename (filename )
503+ elements [i ].metadata .filename = os .path .basename (filename ) # type: ignore
504504
505505 if not show_coordinates and element .metadata .coordinates :
506506 elements [i ].metadata .coordinates = None
@@ -591,15 +591,14 @@ def _validate_chunking_strategy(m_chunking_strategy: List[str]) -> Union[str, No
591591 return chunking_strategy
592592
593593
594- def _set_pdf_infer_table_structure (m_pdf_infer_table_structure : List [str ], strategy : str ):
594+ def _set_pdf_infer_table_structure (m_pdf_infer_table_structure : List [str ], strategy : str ) -> bool :
595595 pdf_infer_table_structure = (
596596 m_pdf_infer_table_structure [0 ] if len (m_pdf_infer_table_structure ) else "false"
597597 ).lower ()
598598 if strategy == "hi_res" and pdf_infer_table_structure == "true" :
599- pdf_infer_table_structure = True
599+ return True
600600 else :
601- pdf_infer_table_structure = False
602- return pdf_infer_table_structure
601+ return False
603602
604603
605604def get_validated_mimetype (file : UploadFile ):
@@ -635,7 +634,12 @@ def get_validated_mimetype(file: UploadFile):
635634class MultipartMixedResponse (StreamingResponse ):
636635 CRLF = b"\r \n "
637636
638- def __init__ (self , * args : Any , content_type : Union [str , None ] = None , ** kwargs : Dict [str , Any ]):
637+ def __init__ (
638+ self ,
639+ * args : Any ,
640+ content_type : Union [str , None ] = None ,
641+ ** kwargs , # type: ignore
642+ ):
639643 super ().__init__ (* args , ** kwargs )
640644 self .content_type = content_type
641645
@@ -821,11 +825,11 @@ def response_generator(is_multipart: bool):
821825 def join_responses (responses : List [Any ]):
822826 if media_type != "text/csv" :
823827 return responses
824- data = pd .read_csv (io .BytesIO (responses [0 ].body ))
828+ data = pd .read_csv (io .BytesIO (responses [0 ].body )) # type: ignore
825829 if len (responses ) > 1 :
826830 for resp in responses [1 :]:
827- resp_data = pd .read_csv (io .BytesIO (resp .body ))
828- data = data .merge (resp_data , how = "outer" )
831+ resp_data = pd .read_csv (io .BytesIO (resp .body )) # type: ignore
832+ data = data .merge (resp_data , how = "outer" ) # type: ignore
829833 return PlainTextResponse (data .to_csv ())
830834
831835 if content_type == "multipart/mixed" :
0 commit comments