Build a Document Normalization React Native Vision Camera Plugin

Nov 09, 2022

Dynamsoft Document Normalizer is an SDK to detect the boundary of documents and runs perspective transformation to get a normalized document image. A normalized image can be used for further processing such as OCR.

In this article, we are going to create a React Native Vision Camera frame processor plugin of Dynamsoft Document Normalizer so that it is convenient to build a React Native document scanner.

This article is Part 1 in a 2-Part Series.

Part 1 - Build a Document Normalization React Native Vision Camera Plugin
Part 2 - Build a React Native Document Scanner

Other React Native Vision Camera Frame Processor Plugins

Build the Document Normalization Frame Processor Plugin for React Native Vision Camera

Let’s do this in steps.

New Project

First, create a native module project.

npx create-react-native-library vision-camera-dynamsoft-document-normalizer

You can test the project using the following command:

cd example
npx react-native run-android
# or for iOS: 
npx react-native run-ios

Add Dependencies

For Android, add the following to android/build.gradle:

rootProject.allprojects {
  repositories {
    maven {
      url "https://download2.dynamsoft.com/maven/aar"
    }
  }
}

dependencies {
  // From node_modules
  implementation project(path: ':react-native-vision-camera')
  // DDN
  implementation 'com.dynamsoft:dynamsoftdocumentnormalizer:1.0.10'
  // Camera
  implementation 'androidx.camera:camera-core:1.0.2'
}

For iOS, add the following to vision-camera-dynamsoft-document-normalizer.podspec:

s.dependency "DynamsoftDocumentNormalizer", "= 1.0.10"

Define the Functions in JavaScript

Define a detect function which can be used in a frame processor. The function will detect the document border and return a DetectedQuadResult with the location info.

/**
 * Detect documents from the camera preview
 */
export function detect(frame: Frame): DetectedQuadResult[] {
  'worklet'
  // @ts-ignore
  // eslint-disable-next-line no-undef
  return __detect(frame, {})
}

The DetectedQuadResult and related interfaces:

export interface DetectedQuadResult {
  location: Quadrilateral;
  confidenceAsDocumentBoundary: number;
}

export interface Point {
  x:number;
  y:number;
}

export interface Quadrilateral {
  points: [Point, Point, Point, Point];
}

Define a normalizeFile native module function. We can pass the detection result we get using the detect function to normalize a local image. We can get the path or base64 of the normalized image.

/**
 * Normalize an image file
 */
export function normalizeFile(url:string, quad:Quadrilateral, config: NormalizationConfig): Promise<NormalizedImageResult> {
  return VisionCameraDynamsoftDocumentNormalizer.normalizeFile(url, quad, config);
}

Related interfaces:

/**
 * Config of whether to save the normalized as a file and base64.
 */
export interface NormalizationConfig{
  saveNormalizationResultAsFile?: boolean;
  includeNormalizationResultAsBase64?: boolean;
}

/**
 * Normalization result containing the image path or base64
 */
export interface NormalizedImageResult {
  imageURL?: string;
  imageBase64?: string;
}

Define a initLicense function. We can use it to activate Dynamsoft Document Normalizer with a license (apply for a trial license).

/**
 * Init the license of Dynamsoft Document Normalizer
 */
export function initLicense(license:string): Promise<boolean> {
  return VisionCameraDynamsoftDocumentNormalizer.initLicense(license);
}

Define a initRuntimeSettingsFromString function. We can use pass a JSON template to update the runtime settings of Dynamsoft Document Normalizer. Check out the docs to learn more about the parameters.

/**
 * Init the runtime settings from a JSON template
 */
export function initRuntimeSettingsFromString(template:string): Promise<boolean> {
  return VisionCameraDynamsoftDocumentNormalizer.initRuntimeSettingsFromString(template);
}

Implement the Functions for Android

In VisionCameraDynamsoftDocumentNormalizerModule.java, create an instance of Document Normalizer and add related methods.

The constructor:

@ReactModule(name = VisionCameraDynamsoftDocumentNormalizerModule.NAME)
public class VisionCameraDynamsoftDocumentNormalizerModule extends ReactContextBaseJavaModule {
    public static final String NAME = "VisionCameraDynamsoftDocumentNormalizer";
    private Context mContext;
    private DocumentNormalizer ddn;
    public VisionCameraDynamsoftDocumentNormalizerModule(ReactApplicationContext reactContext) {
        super(reactContext);
        mContext = reactContext;
        initDDN();
    }
}

Related methods:

private void initDDN(){
    try {
        ddn = new DocumentNormalizer();
    } catch (DocumentNormalizerException e) {
        e.printStackTrace();
    }
}

public Context getContext(){
    return mContext;
}
public DocumentNormalizer getDDN(){
    return ddn;
}

Add methods with the @ReactMethod annotation which can be called from JavaScript.

@ReactMethod
public void initLicense(String license, Promise promise) {
    LicenseManager.initLicense(license, mContext, new LicenseVerificationListener() {
        @Override
        public void licenseVerificationCallback(boolean isSuccess, CoreException error) {
            if(!isSuccess){
                error.printStackTrace();
                promise.resolve(false);
            }else{
                Log.d("DDN","license valid");
                promise.resolve(true);
            }
        }
    });
}

@ReactMethod
public void initRuntimeSettingsFromString(String template, Promise promise) {
    try {
        ddn.initRuntimeSettingsFromString(template);
        promise.resolve(true);
    } catch (DocumentNormalizerException e) {
        e.printStackTrace();
        promise.reject("DDN",e.getMessage());
    }
}

@ReactMethod
public void normalizeFile(String filePath, ReadableMap quad, ReadableMap config, Promise promise) {
    WritableNativeMap returnResult = new WritableNativeMap();
    Log.d("DDN",quad.toString());
    ReadableArray points = quad.getArray("points");
    Quadrilateral quadrilateral = new Quadrilateral();
    quadrilateral.points = convertPoints(points);
    try {
        NormalizedImageResult result = ddn.normalize(filePath,quadrilateral);
        if (config.hasKey("saveNormalizationResultAsFile")) {
            if (config.getBoolean("saveNormalizationResultAsFile")) {
                File cacheDir = mContext.getCacheDir();
                String fileName = System.currentTimeMillis() + ".jpg";
                String path = BitmapUtils.saveImage(result.image.toBitmap(), cacheDir, fileName);
                returnResult.putString("imageURL",path);
            }
        }
        if (config.hasKey("includeNormalizationResultAsBase64")) {
            if (config.getBoolean("includeNormalizationResultAsBase64")) {
                String base64 = BitmapUtils.bitmap2Base64(result.image.toBitmap());
                returnResult.putString("imageBase64",base64);
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
        if (e instanceof DocumentNormalizerException) {
            Log.d("DDN","Error code: "+((DocumentNormalizerException) e).getErrorCode());
        }
        promise.reject("DDN",e.getMessage());
        return;
    }
    promise.resolve(returnResult);
}

private Point[] convertPoints(ReadableArray quadPoints){
    Point[] points = new Point[4];
    for (int i = 0; i < quadPoints.size(); i++) {
        Point p = new Point();
        p.x = quadPoints.getMap(i).getInt("x");
        p.y = quadPoints.getMap(i).getInt("y");
        points[i] = p;
    }
    return points;
}

Create a new file named VisionCameraDetectionPlugin.java, where we define the frame processor to detect the document location. A BitmapUtils class is used to convert the image proxy to bitmap and rotate it if needed.

public class VisionCameraDetectionPlugin extends FrameProcessorPlugin {
    private VisionCameraDynamsoftDocumentNormalizerModule mModule;
    @Override
    public Object callback(ImageProxy image, Object[] params) {
        WritableNativeArray quadResultsWrapped = new WritableNativeArray();
        try {
            @SuppressLint("UnsafeOptInUsageError")
            Bitmap bitmap = BitmapUtils.getBitmap(image);
            DetectedQuadResult[] quadResults = mModule.ddn.detectQuad(bitmap);
            for (DetectedQuadResult quad:quadResults) {
                quadResultsWrapped.pushMap(Utils.getMapFromDetectedQuadResult(quad));
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return quadResultsWrapped;
    }

    VisionCameraDetectionPlugin(VisionCameraDynamsoftDocumentNormalizerModule module) {
        super("detect");
        mModule = module;
    }
}

Create a Utils class to wrap the results.

public class Utils {
    public static Point[] convertPoints(ReadableArray pointsArray){
        Point[] points = new Point[4];
        for (int i = 0; i < pointsArray.size(); i++) {
            ReadableMap pointMap = pointsArray.getMap(i);
            Point point = new Point();
            point.x = pointMap.getInt("x");
            point.y = pointMap.getInt("y");
            points[i] = point;
        }
        return points;
    }

    public static WritableNativeMap getMapFromDetectedQuadResult(DetectedQuadResult result){
        WritableNativeMap map = new WritableNativeMap();
        map.putInt("confidenceAsDocumentBoundary",result.confidenceAsDocumentBoundary);
        map.putMap("location",getMapFromLocation(result.location));
        return map;
    }

    private static WritableNativeMap getMapFromLocation(Quadrilateral location){
        WritableNativeMap map = new WritableNativeMap();
        WritableNativeArray points = new WritableNativeArray();
        for (Point point: location.points) {
            WritableNativeMap pointAsMap = new WritableNativeMap();
            pointAsMap.putInt("x",point.x);
            pointAsMap.putInt("y",point.y);
            points.pushMap(pointAsMap);
        }
        map.putArray("points",points);
        return map;
    }
}

In VisionCameraDynamsoftDocumentNormalizerPackage.java, register the frame processor plugin.

@Override
public List<NativeModule> createNativeModules(@NonNull ReactApplicationContext reactContext) {
    List<NativeModule> modules = new ArrayList<>();
    VisionCameraDynamsoftDocumentNormalizerModule module = new VisionCameraDynamsoftDocumentNormalizerModule(reactContext);
    modules.add(module);
    VisionCameraDetectionPlugin detectionPlugin = new VisionCameraDetectionPlugin(module);
    FrameProcessorPlugin.register(detectionPlugin);
    return modules;
}

Implement the Functions for iOS

Define the following native module methods in VisionCameraDynamsoftDocumentNormalizer.m.

RCT_EXTERN_METHOD(initLicense:(NSString)license
                 withResolver:(RCTPromiseResolveBlock)resolve
                 withRejecter:(RCTPromiseRejectBlock)reject)

RCT_EXTERN_METHOD(initRuntimeSettingsFromString:(NSString *)template
                 withResolver:(RCTPromiseResolveBlock)resolve
                 withRejecter:(RCTPromiseRejectBlock)reject)

RCT_EXTERN_METHOD(normalizeFile:(NSString *)path
                  quad:(NSDictionary *)quad
                  config:(NSDictionary *)config
                 withResolver:(RCTPromiseResolveBlock)resolve
                 withRejecter:(RCTPromiseRejectBlock)reject)

In VisionCameraDynamsoftDocumentNormalizer.swift, add a static instance of Document Normalizer.

import DynamsoftDocumentNormalizer
@objc(VisionCameraDynamsoftDocumentNormalizer)
class VisionCameraDynamsoftDocumentNormalizer: NSObject,LicenseVerificationListener  {
    static var ddn:DynamsoftDocumentNormalizer = DynamsoftDocumentNormalizer()
}

In VisionCameraDynamsoftDocumentNormalizer.swift, add related functions with the @objc annotation which can be called from JavaScript.

@objc(initRuntimeSettingsFromString:withResolver:withRejecter:)
func initRuntimeSettingsFromString(template:String, resolve:RCTPromiseResolveBlock,reject:RCTPromiseRejectBlock) -> Void {
    do {
        try VisionCameraDynamsoftDocumentNormalizer.ddn.initRuntimeSettingsFromString(template)
        resolve(true)
    }catch {
        print("Unexpected error: \(error).")
        resolve(false)
    }
}

@objc(initLicense:withResolver:withRejecter:)
func initLicense(license:String, resolve:RCTPromiseResolveBlock,reject:RCTPromiseRejectBlock) -> Void {
    DynamsoftLicenseManager.initLicense(license, verificationDelegate: self)
    resolve(true)
}

func licenseVerificationCallback(_ isSuccess: Bool, error: Error?) {
    print(isSuccess)
}

@objc(normalizeFile:quad:config:withResolver:withRejecter:)
func normalizeFile(path:String,quad:[String:Any], config:[String:Any],resolve:RCTPromiseResolveBlock,reject:RCTPromiseRejectBlock) -> Void {
    do {
        var returned_result:[String:String] = [:]
           
        let imageURL = URL(fileURLWithPath: path)
        var image = UIImage(contentsOfFile: imageURL.path)!
        image = BitmapUtils.normalizedImage(image)
        let points = quad["points"] as! [[String:NSNumber]]
        let quadrilateral = iQuadrilateral.init()
        quadrilateral.points = convertPoints(points)
           
        let bpp = image.cgImage?.bitsPerPixel
        var pixelFormat:EnumImagePixelFormat
        switch (bpp) {
           case 1:
            pixelFormat = EnumImagePixelFormat.binary
            break;
           case 8:
            pixelFormat = EnumImagePixelFormat.grayScaled
            break;
           case 32:
            pixelFormat = EnumImagePixelFormat.ARGB_8888
            print("ARGB888")
            break;
           case 48:
            pixelFormat = EnumImagePixelFormat.RGB_161616;
            break;
           case 64:
            pixelFormat = EnumImagePixelFormat.ARGB_16161616;
            break;
           default:
            pixelFormat = EnumImagePixelFormat.RGB_888;
            print("RGB888")
            break;
        }
           
        let data = iImageData.init()
        data.bytes = image.cgImage?.dataProvider?.data as! Data
        data.orientation = 0
        data.stride = image.cgImage!.bytesPerRow
        data.width = image.cgImage!.width
        data.height = image.cgImage!.height
        data.format = pixelFormat
           
        let normalizedImageResult = try VisionCameraDynamsoftDocumentNormalizer.ddn.normalizeBuffer(data, quad: quadrilateral)

        if config["saveNormalizationResultAsFile"] != nil {
            if config["saveNormalizationResultAsFile"] as! Bool == true {
                let tmpDir = NSTemporaryDirectory()
                let timestamp = String(format: "%f", Date().timeIntervalSince1970*1000)
                let filePath = tmpDir + "/" + timestamp + ".png"
                do{
                    try normalizedImageResult.saveToFile(filePath)
                    returned_result["imageURL"] = filePath
                }catch {
                    print(error)
                }
            }
        }
        if config["includeNormalizationResultAsBase64"] != nil {
            if config["includeNormalizationResultAsBase64"] as! Bool == true {
                do{
                    let normalizedUIImage = try normalizedImageResult.image.toUIImage()
                    let base64 = Utils.getBase64FromImage(normalizedUIImage)
                    returned_result["imageBase64"] = base64
                }catch{
                    print(error)
                }
            }
        }

        resolve(returned_result)
    }catch {
        print("Unexpected error: \(error).")
        resolve(false)
    }
}

func convertPoints(_ points:[[String:NSNumber]]) -> [CGPoint] {
    var CGPoints:[CGPoint] = [];
    for point in points {
        let x = point["x"]!
        let y = point["y"]!
        let intX = x.intValue
        let intY = y.intValue
        let cgPoint = CGPoint(x: intX, y: intY)
        CGPoints.append(cgPoint)
    }
    return CGPoints
}

Create a BitmapUtils.swift file with the following content. The normalizedImage function can be used to fix the orientation of an UIImage.

class BitmapUtils {
    //https://stackoverflow.com/questions/8915630/ios-uiimageview-how-to-handle-uiimage-image-orientation
    static public func normalizedImage(_ image:UIImage) -> UIImage {
        if image.imageOrientation == UIImage.Orientation.up {
            return image
        }
        UIGraphicsBeginImageContextWithOptions(image.size, false, image.scale)
        image.draw(in: CGRect(x:0,y:0,width:image.size.width,height:image.size.height))
        let normalized = UIGraphicsGetImageFromCurrentImageContext()!
        UIGraphicsEndImageContext();
        return normalized
    }
}

Create a Utils.swift to do UIImage/Base64 conversion and wrap the results.

class Utils {
       
    static public func convertBase64ToImage(_ imageStr:String) ->UIImage?{
        if let data: NSData = NSData(base64Encoded: imageStr, options:NSData.Base64DecodingOptions.ignoreUnknownCharacters)
        {
            if let image: UIImage = UIImage(data: data as Data)
            {
                return image
            }
        }
        return nil
    }
       
    static func getBase64FromImage(_ image:UIImage) -> String{
        let dataTmp = image.jpegData(compressionQuality: 100)
        if let data = dataTmp {
            return data.base64EncodedString()
        }
        return ""
    }
       
       
    static func wrapDetectionResult (result:iDetectedQuadResult) -> [String: Any] {
        var dict: [String: Any] = [:]
        dict["confidenceAsDocumentBoundary"] = result.confidenceAsDocumentBoundary
        dict["location"] = wrapLocation(location:result.location)
        return dict
    }
       
    static private func wrapLocation (location:iQuadrilateral?) -> [String: Any] {
        var dict: [String: Any] = [:]
        var points: [[String:CGFloat]] = []
        let CGPoints = location!.points as! [CGPoint]
        for point in CGPoints {
            var pointDict: [String:CGFloat] = [:]
            pointDict["x"] = point.x
            pointDict["y"] = point.y
            points.append(pointDict)
        }
        dict["points"] = points
        return dict
    }
       
}

Create a DetectionFrameProcessorPlugin.swift to define the frame processor:

@objc(DetectionFrameProcessorPlugin)
public class DetectionFrameProcessorPlugin: NSObject, FrameProcessorPluginBase {
    private static let context = CIContext(options: nil)
    @objc
    public static func callback(_ frame: Frame!, withArgs _: [Any]!) -> Any! {
        guard let imageBuffer = CMSampleBufferGetImageBuffer(frame.buffer) else {
            print("Failed to get CVPixelBuffer!")
            return nil
          }
        let ciImage = CIImage(cvPixelBuffer: imageBuffer)

        guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else {
            print("Failed to create CGImage!")
            return nil
        }
           
        var returned_results: [Any] = []
        let image = UIImage(cgImage: cgImage)
           
        let results = try? VisionCameraDynamsoftDocumentNormalizer.ddn.detectQuadFromImage(image)
        if results != nil {
            for result in results! {
                returned_results.append(Utils.wrapDetectionResult(result:result))
            }
        }
        return returned_results
    }
}

An Objective-C file named DetectionFrameProcessorPlugin.m is needed as well.

#import <Foundation/Foundation.h>
#import <VisionCamera/FrameProcessorPlugin.h>

@interface VISION_EXPORT_SWIFT_FRAME_PROCESSOR(detect, DetectionFrameProcessorPlugin)
@end

React Native Document Scanner Demo

In the next article, we are going to use the plugin to build a React Native document scanner.

Source Code

Get the source code and have a try: https://github.com/tony-xlh/vision-camera-dynamsoft-document-normalizer

Disclaimer:

The wrappers and sample code on Dynamsoft Codepool are community editions, shared as-is and not fully tested. Dynamsoft is happy to provide technical support for users exploring these solutions but makes no guarantees.