ArabicOCR — amazing OCR library for Arabic pdf documents

pdf="arabic_image.pdf"
import sys, fitz
doc = fitz.open(pdf) # open document
for page in doc: # iterate through the pages
pix = page.get_pixmap() # render page to an image
pix.save("page-%i.png" % page.number) # store image as a PNG
!pip install ArabicOcr
from ArabicOcr import arabicocr
image_path='page-0.png'
out_image='out.jpg'
results=arabicocr.arabic_ocr(image_path,out_image)
print(results) 
words=[]
for i in range(len(results)):
word=results[i][1]
words.append(word)
with open ('file.txt','w',encoding='utf-8')as myfile:
myfile.write(str(words))
annotations=[]
for i in range(len(results)):
annotation=results[i][0]
annotations.append(annotation)
with open ('annotations.txt','w',encoding='utf-8')as myfile:
myfile.write(str(annotations))
import cv2
import matplotlib.pyplot as plt
img = cv2.imread('out.jpg', cv2.IMREAD_UNCHANGED)
plt.figure(figsize=(10,10))
plt.imshow(img)

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store