Build a Document Normalization React Native Vision Camera Plugin

Dynamsoft Document Normalizer is an SDK to detect the boundary of documents and runs perspective transformation to get a normalized document image. A normalized image can be used for further processing such as OCR.

In this article, we are going to create a React Native Vision Camera frame processor plugin of Dynamsoft Document Normalizer so that it is convenient to build a React Native document scanner.

Build the Document Normalization Frame Processor Plugin for React Native Vision Camera

Let’s do this in steps.

New Project

First, create a native module project.

npx create-react-native-library vision-camera-dynamsoft-document-normalizer

You can test the project using the following command:

cd example
npx react-native run-android
# or for iOS: 
npx react-native run-ios

Add Dependencies

For Android, add the following to android/build.gradle:

rootProject.allprojects {
  repositories {
    maven {
      url "https://download2.dynamsoft.com/maven/aar"
    }
  }
}

dependencies {
  // From node_modules
  implementation project(path: ':react-native-vision-camera')
  // DDN
  implementation 'com.dynamsoft:dynamsoftdocumentnormalizer:1.0.10'
  // Camera
  implementation 'androidx.camera:camera-core:1.0.2'
}

For iOS, add the following to vision-camera-dynamsoft-document-normalizer.podspec:

s.dependency "DynamsoftDocumentNormalizer", "= 1.0.10"

Define the Functions in JavaScript

  1. Define a detect function which can be used in a frame processor. The function will detect the document border and return a DetectedQuadResult with the location info.

    /**
     * Detect documents from the camera preview
     */
    export function detect(frame: Frame): DetectedQuadResult[] {
      'worklet'
      // @ts-ignore
      // eslint-disable-next-line no-undef
      return __detect(frame, {})
    }
    

    The DetectedQuadResult and related interfaces:

    export interface DetectedQuadResult {
      location: Quadrilateral;
      confidenceAsDocumentBoundary: number;
    }
    
    export interface Point {
      x:number;
      y:number;
    }
    
    export interface Quadrilateral {
      points: [Point, Point, Point, Point];
    }
    
  2. Define a normalizeFile native module function. We can pass the detection result we get using the detect function to normalize a local image. We can get the path or base64 of the normalized image.

    /**
     * Normalize an image file
     */
    export function normalizeFile(url:string, quad:Quadrilateral, config: NormalizationConfig): Promise<NormalizedImageResult> {
      return VisionCameraDynamsoftDocumentNormalizer.normalizeFile(url, quad, config);
    }
    

    Related interfaces:

    /**
     * Config of whether to save the normalized as a file and base64.
     */
    export interface NormalizationConfig{
      saveNormalizationResultAsFile?: boolean;
      includeNormalizationResultAsBase64?: boolean;
    }
    
    /**
     * Normalization result containing the image path or base64
     */
    export interface NormalizedImageResult {
      imageURL?: string;
      imageBase64?: string;
    }
    
  3. Define a initLicense function. We can use it to activate Dynamsoft Document Normalizer with a license (apply for a trial license).

    /**
     * Init the license of Dynamsoft Document Normalizer
     */
    export function initLicense(license:string): Promise<boolean> {
      return VisionCameraDynamsoftDocumentNormalizer.initLicense(license);
    }
    
  4. Define a initRuntimeSettingsFromString function. We can use pass a JSON template to update the runtime settings of Dynamsoft Document Normalizer. Check out the docs to learn more about the parameters.

    /**
     * Init the runtime settings from a JSON template
     */
    export function initRuntimeSettingsFromString(template:string): Promise<boolean> {
      return VisionCameraDynamsoftDocumentNormalizer.initRuntimeSettingsFromString(template);
    }
    

Implement the Functions for Android

  1. In VisionCameraDynamsoftDocumentNormalizerModule.java, create an instance of Document Normalizer and add related methods.

    The constructor:

    @ReactModule(name = VisionCameraDynamsoftDocumentNormalizerModule.NAME)
    public class VisionCameraDynamsoftDocumentNormalizerModule extends ReactContextBaseJavaModule {
        public static final String NAME = "VisionCameraDynamsoftDocumentNormalizer";
        private Context mContext;
        private DocumentNormalizer ddn;
        public VisionCameraDynamsoftDocumentNormalizerModule(ReactApplicationContext reactContext) {
            super(reactContext);
            mContext = reactContext;
            initDDN();
        }
    }
    

    Related methods:

    private void initDDN(){
        try {
            ddn = new DocumentNormalizer();
        } catch (DocumentNormalizerException e) {
            e.printStackTrace();
        }
    }
    
    public Context getContext(){
        return mContext;
    }
    public DocumentNormalizer getDDN(){
        return ddn;
    }
    
  2. Add methods with the @ReactMethod annotation which can be called from JavaScript.

    @ReactMethod
    public void initLicense(String license, Promise promise) {
        LicenseManager.initLicense(license, mContext, new LicenseVerificationListener() {
            @Override
            public void licenseVerificationCallback(boolean isSuccess, CoreException error) {
                if(!isSuccess){
                    error.printStackTrace();
                    promise.resolve(false);
                }else{
                    Log.d("DDN","license valid");
                    promise.resolve(true);
                }
            }
        });
    }
    
    @ReactMethod
    public void initRuntimeSettingsFromString(String template, Promise promise) {
        try {
            ddn.initRuntimeSettingsFromString(template);
            promise.resolve(true);
        } catch (DocumentNormalizerException e) {
            e.printStackTrace();
            promise.reject("DDN",e.getMessage());
        }
    }
    
    @ReactMethod
    public void normalizeFile(String filePath, ReadableMap quad, ReadableMap config, Promise promise) {
        WritableNativeMap returnResult = new WritableNativeMap();
        Log.d("DDN",quad.toString());
        ReadableArray points = quad.getArray("points");
        Quadrilateral quadrilateral = new Quadrilateral();
        quadrilateral.points = convertPoints(points);
        try {
            NormalizedImageResult result = ddn.normalize(filePath,quadrilateral);
            if (config.hasKey("saveNormalizationResultAsFile")) {
                if (config.getBoolean("saveNormalizationResultAsFile")) {
                    File cacheDir = mContext.getCacheDir();
                    String fileName = System.currentTimeMillis() + ".jpg";
                    String path = BitmapUtils.saveImage(result.image.toBitmap(), cacheDir, fileName);
                    returnResult.putString("imageURL",path);
                }
            }
            if (config.hasKey("includeNormalizationResultAsBase64")) {
                if (config.getBoolean("includeNormalizationResultAsBase64")) {
                    String base64 = BitmapUtils.bitmap2Base64(result.image.toBitmap());
                    returnResult.putString("imageBase64",base64);
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
            if (e instanceof DocumentNormalizerException) {
                Log.d("DDN","Error code: "+((DocumentNormalizerException) e).getErrorCode());
            }
            promise.reject("DDN",e.getMessage());
            return;
        }
        promise.resolve(returnResult);
    }
    
    private Point[] convertPoints(ReadableArray quadPoints){
        Point[] points = new Point[4];
        for (int i = 0; i < quadPoints.size(); i++) {
            Point p = new Point();
            p.x = quadPoints.getMap(i).getInt("x");
            p.y = quadPoints.getMap(i).getInt("y");
            points[i] = p;
        }
        return points;
    }
    
  3. Create a new file named VisionCameraDetectionPlugin.java, where we define the frame processor to detect the document location. A BitmapUtils class is used to convert the image proxy to bitmap and rotate it if needed.

    public class VisionCameraDetectionPlugin extends FrameProcessorPlugin {
        private VisionCameraDynamsoftDocumentNormalizerModule mModule;
        @Override
        public Object callback(ImageProxy image, Object[] params) {
            WritableNativeArray quadResultsWrapped = new WritableNativeArray();
            try {
                @SuppressLint("UnsafeOptInUsageError")
                Bitmap bitmap = BitmapUtils.getBitmap(image);
                DetectedQuadResult[] quadResults = mModule.ddn.detectQuad(bitmap);
                for (DetectedQuadResult quad:quadResults) {
                    quadResultsWrapped.pushMap(Utils.getMapFromDetectedQuadResult(quad));
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
            return quadResultsWrapped;
        }
    
        VisionCameraDetectionPlugin(VisionCameraDynamsoftDocumentNormalizerModule module) {
            super("detect");
            mModule = module;
        }
    }
    
  4. Create a Utils class to wrap the results.

    public class Utils {
        public static Point[] convertPoints(ReadableArray pointsArray){
            Point[] points = new Point[4];
            for (int i = 0; i < pointsArray.size(); i++) {
                ReadableMap pointMap = pointsArray.getMap(i);
                Point point = new Point();
                point.x = pointMap.getInt("x");
                point.y = pointMap.getInt("y");
                points[i] = point;
            }
            return points;
        }
    
        public static WritableNativeMap getMapFromDetectedQuadResult(DetectedQuadResult result){
            WritableNativeMap map = new WritableNativeMap();
            map.putInt("confidenceAsDocumentBoundary",result.confidenceAsDocumentBoundary);
            map.putMap("location",getMapFromLocation(result.location));
            return map;
        }
    
        private static WritableNativeMap getMapFromLocation(Quadrilateral location){
            WritableNativeMap map = new WritableNativeMap();
            WritableNativeArray points = new WritableNativeArray();
            for (Point point: location.points) {
                WritableNativeMap pointAsMap = new WritableNativeMap();
                pointAsMap.putInt("x",point.x);
                pointAsMap.putInt("y",point.y);
                points.pushMap(pointAsMap);
            }
            map.putArray("points",points);
            return map;
        }
    }
    
  5. In VisionCameraDynamsoftDocumentNormalizerPackage.java, register the frame processor plugin.

    @Override
    public List<NativeModule> createNativeModules(@NonNull ReactApplicationContext reactContext) {
        List<NativeModule> modules = new ArrayList<>();
        VisionCameraDynamsoftDocumentNormalizerModule module = new VisionCameraDynamsoftDocumentNormalizerModule(reactContext);
        modules.add(module);
        VisionCameraDetectionPlugin detectionPlugin = new VisionCameraDetectionPlugin(module);
        FrameProcessorPlugin.register(detectionPlugin);
        return modules;
    }
    

Implement the Functions for iOS

  1. Define the following native module methods in VisionCameraDynamsoftDocumentNormalizer.m.

    RCT_EXTERN_METHOD(initLicense:(NSString)license
                     withResolver:(RCTPromiseResolveBlock)resolve
                     withRejecter:(RCTPromiseRejectBlock)reject)
    
    RCT_EXTERN_METHOD(initRuntimeSettingsFromString:(NSString *)template
                     withResolver:(RCTPromiseResolveBlock)resolve
                     withRejecter:(RCTPromiseRejectBlock)reject)
    
    RCT_EXTERN_METHOD(normalizeFile:(NSString *)path
                      quad:(NSDictionary *)quad
                      config:(NSDictionary *)config
                     withResolver:(RCTPromiseResolveBlock)resolve
                     withRejecter:(RCTPromiseRejectBlock)reject)
    
  2. In VisionCameraDynamsoftDocumentNormalizer.swift, add a static instance of Document Normalizer.

    import DynamsoftDocumentNormalizer
    @objc(VisionCameraDynamsoftDocumentNormalizer)
    class VisionCameraDynamsoftDocumentNormalizer: NSObject,LicenseVerificationListener  {
        static var ddn:DynamsoftDocumentNormalizer = DynamsoftDocumentNormalizer()
    }
    
  3. In VisionCameraDynamsoftDocumentNormalizer.swift, add related functions with the @objc annotation which can be called from JavaScript.

    @objc(initRuntimeSettingsFromString:withResolver:withRejecter:)
    func initRuntimeSettingsFromString(template:String, resolve:RCTPromiseResolveBlock,reject:RCTPromiseRejectBlock) -> Void {
        do {
            try VisionCameraDynamsoftDocumentNormalizer.ddn.initRuntimeSettingsFromString(template)
            resolve(true)
        }catch {
            print("Unexpected error: \(error).")
            resolve(false)
        }
    }
    
    @objc(initLicense:withResolver:withRejecter:)
    func initLicense(license:String, resolve:RCTPromiseResolveBlock,reject:RCTPromiseRejectBlock) -> Void {
        DynamsoftLicenseManager.initLicense(license, verificationDelegate: self)
        resolve(true)
    }
    
    func licenseVerificationCallback(_ isSuccess: Bool, error: Error?) {
        print(isSuccess)
    }
    
    @objc(normalizeFile:quad:config:withResolver:withRejecter:)
    func normalizeFile(path:String,quad:[String:Any], config:[String:Any],resolve:RCTPromiseResolveBlock,reject:RCTPromiseRejectBlock) -> Void {
        do {
            var returned_result:[String:String] = [:]
               
            let imageURL = URL(fileURLWithPath: path)
            var image = UIImage(contentsOfFile: imageURL.path)!
            image = BitmapUtils.normalizedImage(image)
            let points = quad["points"] as! [[String:NSNumber]]
            let quadrilateral = iQuadrilateral.init()
            quadrilateral.points = convertPoints(points)
               
            let bpp = image.cgImage?.bitsPerPixel
            var pixelFormat:EnumImagePixelFormat
            switch (bpp) {
               case 1:
                pixelFormat = EnumImagePixelFormat.binary
                break;
               case 8:
                pixelFormat = EnumImagePixelFormat.grayScaled
                break;
               case 32:
                pixelFormat = EnumImagePixelFormat.ARGB_8888
                print("ARGB888")
                break;
               case 48:
                pixelFormat = EnumImagePixelFormat.RGB_161616;
                break;
               case 64:
                pixelFormat = EnumImagePixelFormat.ARGB_16161616;
                break;
               default:
                pixelFormat = EnumImagePixelFormat.RGB_888;
                print("RGB888")
                break;
            }
               
            let data = iImageData.init()
            data.bytes = image.cgImage?.dataProvider?.data as! Data
            data.orientation = 0
            data.stride = image.cgImage!.bytesPerRow
            data.width = image.cgImage!.width
            data.height = image.cgImage!.height
            data.format = pixelFormat
               
            let normalizedImageResult = try VisionCameraDynamsoftDocumentNormalizer.ddn.normalizeBuffer(data, quad: quadrilateral)
    
            if config["saveNormalizationResultAsFile"] != nil {
                if config["saveNormalizationResultAsFile"] as! Bool == true {
                    let tmpDir = NSTemporaryDirectory()
                    let timestamp = String(format: "%f", Date().timeIntervalSince1970*1000)
                    let filePath = tmpDir + "/" + timestamp + ".png"
                    do{
                        try normalizedImageResult.saveToFile(filePath)
                        returned_result["imageURL"] = filePath
                    }catch {
                        print(error)
                    }
                }
            }
            if config["includeNormalizationResultAsBase64"] != nil {
                if config["includeNormalizationResultAsBase64"] as! Bool == true {
                    do{
                        let normalizedUIImage = try normalizedImageResult.image.toUIImage()
                        let base64 = Utils.getBase64FromImage(normalizedUIImage)
                        returned_result["imageBase64"] = base64
                    }catch{
                        print(error)
                    }
                }
            }
    
            resolve(returned_result)
        }catch {
            print("Unexpected error: \(error).")
            resolve(false)
        }
    }
    
    func convertPoints(_ points:[[String:NSNumber]]) -> [CGPoint] {
        var CGPoints:[CGPoint] = [];
        for point in points {
            let x = point["x"]!
            let y = point["y"]!
            let intX = x.intValue
            let intY = y.intValue
            let cgPoint = CGPoint(x: intX, y: intY)
            CGPoints.append(cgPoint)
        }
        return CGPoints
    }
    
  4. Create a BitmapUtils.swift file with the following content. The normalizedImage function can be used to fix the orientation of an UIImage.

    class BitmapUtils {
        //https://stackoverflow.com/questions/8915630/ios-uiimageview-how-to-handle-uiimage-image-orientation
        static public func normalizedImage(_ image:UIImage) -> UIImage {
            if image.imageOrientation == UIImage.Orientation.up {
                return image
            }
            UIGraphicsBeginImageContextWithOptions(image.size, false, image.scale)
            image.draw(in: CGRect(x:0,y:0,width:image.size.width,height:image.size.height))
            let normalized = UIGraphicsGetImageFromCurrentImageContext()!
            UIGraphicsEndImageContext();
            return normalized
        }
    }
    
  5. Create a Utils.swift to do UIImage/Base64 conversion and wrap the results.

    class Utils {
           
        static public func convertBase64ToImage(_ imageStr:String) ->UIImage?{
            if let data: NSData = NSData(base64Encoded: imageStr, options:NSData.Base64DecodingOptions.ignoreUnknownCharacters)
            {
                if let image: UIImage = UIImage(data: data as Data)
                {
                    return image
                }
            }
            return nil
        }
           
        static func getBase64FromImage(_ image:UIImage) -> String{
            let dataTmp = image.jpegData(compressionQuality: 100)
            if let data = dataTmp {
                return data.base64EncodedString()
            }
            return ""
        }
           
           
        static func wrapDetectionResult (result:iDetectedQuadResult) -> [String: Any] {
            var dict: [String: Any] = [:]
            dict["confidenceAsDocumentBoundary"] = result.confidenceAsDocumentBoundary
            dict["location"] = wrapLocation(location:result.location)
            return dict
        }
           
        static private func wrapLocation (location:iQuadrilateral?) -> [String: Any] {
            var dict: [String: Any] = [:]
            var points: [[String:CGFloat]] = []
            let CGPoints = location!.points as! [CGPoint]
            for point in CGPoints {
                var pointDict: [String:CGFloat] = [:]
                pointDict["x"] = point.x
                pointDict["y"] = point.y
                points.append(pointDict)
            }
            dict["points"] = points
            return dict
        }
           
    }
    
  6. Create a DetectionFrameProcessorPlugin.swift to define the frame processor:

    @objc(DetectionFrameProcessorPlugin)
    public class DetectionFrameProcessorPlugin: NSObject, FrameProcessorPluginBase {
        private static let context = CIContext(options: nil)
        @objc
        public static func callback(_ frame: Frame!, withArgs _: [Any]!) -> Any! {
            guard let imageBuffer = CMSampleBufferGetImageBuffer(frame.buffer) else {
                print("Failed to get CVPixelBuffer!")
                return nil
              }
            let ciImage = CIImage(cvPixelBuffer: imageBuffer)
    
            guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else {
                print("Failed to create CGImage!")
                return nil
            }
               
            var returned_results: [Any] = []
            let image = UIImage(cgImage: cgImage)
               
            let results = try? VisionCameraDynamsoftDocumentNormalizer.ddn.detectQuadFromImage(image)
            if results != nil {
                for result in results! {
                    returned_results.append(Utils.wrapDetectionResult(result:result))
                }
            }
            return returned_results
        }
    }
    

    An Objective-C file named DetectionFrameProcessorPlugin.m is needed as well.

    #import <Foundation/Foundation.h>
    #import <VisionCamera/FrameProcessorPlugin.h>
    
    @interface VISION_EXPORT_SWIFT_FRAME_PROCESSOR(detect, DetectionFrameProcessorPlugin)
    @end
    

React Native Document Scanner Demo

In the next article, we are going to use the plugin to build a React Native document scanner.

Source Code

Get the source code and have a try: https://github.com/tony-xlh/vision-camera-dynamsoft-document-normalizer