@@ -1856,6 +1856,51 @@ def print_unique_links_with_status_codes(self):
18561856 soup = self .get_beautiful_soup (self .get_page_source ())
18571857 page_utils ._print_unique_links_with_status_codes (page_url , soup )
18581858
1859+ def assert_pdf_text (self , pdf , text , page = None ):
1860+ """ Asserts text in a PDF file.
1861+ PDF can be either a URL or a file path on the local file system.
1862+ @Params
1863+ pdf - The URL or file path of the PDF file.
1864+ text - The expected text to verify in the PDF.
1865+ page - The page number of the PDF to use (optional).
1866+ If a page number is provided, looks only at that page.
1867+ (1 is the first page, 2 is the second page, etc.)
1868+ If no page number is provided, looks at all the pages. """
1869+ import PyPDF2
1870+ if not pdf .lower ().endswith ('.pdf' ):
1871+ raise Exception ("%s is not a PDF file! (Expecting a .pdf)" % pdf )
1872+ file_path = None
1873+ if page_utils .is_valid_url (pdf ):
1874+ if self .get_current_url () != pdf :
1875+ self .open (pdf )
1876+ self .download_file (pdf )
1877+ file_name = pdf .split ('/' )[- 1 ]
1878+ file_path = self .get_downloads_folder () + '/' + file_name
1879+ else :
1880+ if not os .path .exists (pdf ):
1881+ raise Exception ("%s is not a valid URL or file path!" % pdf )
1882+ file_path = os .path .abspath (pdf )
1883+ pdf_file_object = open (file_path , "rb" )
1884+ pdf_reader = PyPDF2 .PdfFileReader (pdf_file_object , strict = False )
1885+ num_pages = pdf_reader .numPages
1886+ if type (page ) is int :
1887+ if page > num_pages :
1888+ raise Exception ("Invalid page number for the PDF!" )
1889+ page = page - 1
1890+ page_obj = pdf_reader .getPage (page )
1891+ pdf_page_text = page_obj .extractText ()
1892+ if text not in pdf_page_text :
1893+ raise Exception ("PDF [%s] is missing expected text [%s] on "
1894+ "page [%s]!" % (file_path , text , page ))
1895+ else :
1896+ for page_num in range (num_pages ):
1897+ page_obj = pdf_reader .getPage (page_num )
1898+ pdf_page_text = page_obj .extractText ()
1899+ if text in pdf_page_text :
1900+ return
1901+ raise Exception ("PDF [%s] is missing expected text [%s]!"
1902+ "" % (file_path , text ))
1903+
18591904 def create_folder (self , folder ):
18601905 """ Creates a folder of the given name if it doesn't already exist. """
18611906 if folder .endswith ("/" ):
0 commit comments