In this video I’ll show you how to extract text from a PDF file with Tkinter.
We’ll build an app that opens a PDF file, grabs a specific page, and converts that page to text.
Then we’ll output that text to a Tkinter Text Widget.
Python Code: pdf_extract.py
(Github Code)
from tkinter import *
import PyPDF2
from tkinter import filedialog, messagebox
root = Tk()
root.title('Tkinter.com - PDF Text Extractor')
root.iconbitmap('c:/tkinter.com/images/codemy.ico')
root.geometry("550x650")
# Open PDF File Function
def open_pdf():
# get file name
my_file = filedialog.askopenfilename(title="Open File",
filetype=(("PDF Files", ".pdf"), ("All Files", "*.*")))
try:
# Open File
pdf_file = PyPDF2.PdfFileReader(my_file)
# Get the number of pages
number_of_pages = len(pdf_file.pages)
# Update Our Pages Label
pages_label.config(text=f'Showing {my_entry.get()} of {number_of_pages-1} pages...')
# Select page to read
page = pdf_file.getPage(int(my_entry.get()))
# Get the content from the page
content = page.extractText()
# Clear the text box
my_text.delete(1.0, END)
# Output pdf to text
my_text.insert(1.0, content)
except Exception as e:
messagebox.showerror("Whoah!", f'There was a problem! {e}')
# Text Box
my_text = Text(root, width=60, height=25)
my_text.pack(pady=20)
# LabelFrame and Entry Box
my_label_frame = LabelFrame(root, text="Select Page To Open")
my_label_frame.pack(pady=10)
my_label = Label(my_label_frame, text="Page Number: ")
my_label.grid(column=0, row=0, pady=10, padx=10)
my_entry = Entry(my_label_frame)
my_entry.grid(column=1, row=0, padx=10, pady=10)
# Open Button
my_button = Button(root, text="Open PDF", command=open_pdf)
my_button.pack(pady=20)
# Page Number Label
pages_label = Label(root, text="")
pages_label.pack(pady=20)
root.mainloop()

Add comment