Building a Python Flask Web Document Scanner Using Dynamsoft Document SDK
The document-scanner-sdk package provides Python bindings for the Dynamsoft C/C++ Document Scanner SDK v1.x, enabling developers to quickly create document scanner applications for Windows and Linux desktop environments. This article demonstrates how to build a web-based document scanner using Python Flask and the Python Document Scanner SDK. The application allows you to capture documents using a connected camera, process them on the server, and display the scanned results directly in a web browser.
Prerequisites
- Obtain a 30-day free trial license for the Dynamsoft Document Normalizer SDK.
-
Install the required dependencies:
pip install flask document-scanner-sdk opencv-python
Creating a Scanner Class for Document Processing
Start by creating a document.py
file and defining a Scanner
class to handle document image processing:
import cv2
import numpy as np
import docscanner
docscanner.initLicense("LICENSE-KEY")
class Scanner(object):
def __init__(self):
self.scanner = docscanner.createInstance()
self.scanner.setParameters(docscanner.Templates.color)
def __del__(self):
pass
def detect_edge(self, image, enabled_transform=False):
results = self.scanner.detectMat(image)
normalized_image = None
for result in results:
x1 = result.x1
y1 = result.y1
x2 = result.x2
y2 = result.y2
x3 = result.x3
y3 = result.y3
x4 = result.x4
y4 = result.y4
cv2.drawContours(
image, [np.intp([(x1, y1), (x2, y2), (x3, y3), (x4, y4)])], 0, (0, 255, 0), 2)
if enabled_transform:
normalized_image = self.scanner.normalizeBuffer(
image, x1, y1, x2, y2, x3, y3, x4, y4)
normalized_image = docscanner.convertNormalizedImage2Mat(
normalized_image)
break
return image, normalized_image
Explanation
initLicense
: Initializes the SDK with the provided license key.createInstance
: Creates an instance of the document scanner.setParameters
: Configures the scanner parameters using the color template.detectMat
: Detects the document edges in the input image.normalizeBuffer
: Normalizes the document image based on detected edges.convertNormalizedImage2Mat
: Converts the normalized image to acv2
matrix.
Implementing a Simple Desktop Document Scanner
-
Create a
desktop.py
file and add the following code:import cv2 from document import Scanner cap = cv2.VideoCapture(0) scanner = Scanner() while (cap.isOpened()): ret, frame = cap.read() video_frame = None image_frame = None if cv2.waitKey(10) & 0xFF == ord('q'): break if ret: if cv2.waitKey(10) & 0xFF == ord('p'): video_frame, image_frame = scanner.detect_edge(frame, True) else: video_frame, _ = scanner.detect_edge(frame) if video_frame is not None: cv2.imshow("Edge Detection", video_frame) if image_frame is not None: cv2.imshow("Rectified Document", image_frame) cap.release() cv2.destroyAllWindows()
This code captures frames from the camera using OpenCV, continuously detects document edges, and displays the rectified document when the
p
key is pressed. Pressq
to exit the application. -
Run the desktop document scanner:
python desktop.py
Building a Flask Web Document Scanner
-
Create a
camera.py
file to manage the camera, detect document edges, and rectify the document:import cv2 from document import Scanner class VideoCamera(object): def __init__(self): self.cap = cv2.VideoCapture(0) self.is_record = False self.out = None self.transformed_frame = None self.scanner = Scanner() self.cached_frame = None def __del__(self): self.cap.release() def get_video_frame(self): ret, frame = self.cap.read() if ret: frame, _ = self.scanner.detect_edge(frame) self.cached_frame = frame ret, jpeg = cv2.imencode('.jpg', frame) return jpeg.tobytes() else: return None def capture_frame(self): ret, frame = self.cap.read() if ret: _, frame = self.scanner.detect_edge(frame, True) ret, jpeg = cv2.imencode('.jpg', frame) self.transformed_frame = jpeg.tobytes() else: return None def get_cached_frame(self): return self.cached_frame def get_image_frame(self): return self.transformed_frame
-
Create an HTML template with an image element to display the camera frames served by Flask:
<!DOCTYPE html> <html> <head> <title>Document Scanner</title> </head> <body> <h1>Document Edge Detection and Perspective Transformation</h1> <div id="controller"> <button id="capture">Capture</button> <script type="text/javascript" src=""></script> </div> <img id="video" src="" width="640" height="480"> <img id="image" style="max-width:640px; max-height:480px"> </body> </html>
-
Encode each frame as a JPEG image and stream it to the client:
def video_frame(): global video_camera if video_camera == None: video_camera = VideoCamera() while True: frame = video_camera.get_video_frame() if frame is not None: yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n\r\n') else: yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + video_camera.get_cached_frame() + b'\r\n\r\n') @app.route('/video_viewer') def video_viewer(): return Response(video_frame(), mimetype='multipart/x-mixed-replace; boundary=frame')
-
Create a
controller.js
file to handle the capture event:var buttonCapture = document.getElementById("capture"); buttonCapture.onclick = function() { var xhr = new XMLHttpRequest(); xhr.onreadystatechange = function() { if (xhr.readyState == 4 && xhr.status == 200) { var image = document.getElementById("image"); image.src = "/image_viewer?" + new Date().getTime(); } } xhr.open("POST", "/capture_status"); xhr.setRequestHeader("Content-Type", "application/json;charset=UTF-8"); xhr.send(JSON.stringify({ status: "true" })); };
-
Stream the transformed document image to the client:
def image_frame(): global video_camera if video_camera == None: video_camera = VideoCamera() frame = video_camera.get_image_frame() if frame is not None: yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n\r\n') @app.route('/image_viewer') def image_viewer(): return Response(image_frame(), mimetype='multipart/x-mixed-replace; boundary=frame')
-
Run the Flask application.
python server.py
-
Visit
http://127.0.0.1:5000
in your web browser to use the document scanner.
Source Code
https://github.com/yushulx/python-document-scanner-sdk/tree/main/examples/web