Build a Document Normalization React Native Vision Camera Plugin

Dynamsoft Document Normalizer is an SDK to detect the boundary of documents and runs perspective transformation to get a normalized document image. A normalized image can be used for further processing such as OCR.

In this article, we are going to create a React Native Vision Camera frame processor plugin of Dynamsoft Document Normalizer so that it is convenient to build a React Native document scanner.

Other React Native Vision Camera Frame Processor Plugins

Build the Document Normalization Frame Processor Plugin for React Native Vision Camera

Let’s do this in steps.

New Project

First, create a native module project.

npx create-react-native-library vision-camera-dynamsoft-document-normalizer

You can test the project using the following command:

cd example
npx react-native run-android
# or for iOS: 
npx react-native run-ios

Add Dependencies

For Android, add the following to android/build.gradle:

rootProject.allprojects {
  repositories {
    maven {
      url "https://download2.dynamsoft.com/maven/aar"
    }
  }
}

dependencies {
  // From node_modules
  implementation project(path: ':react-native-vision-camera')
  // DDN
  implementation 'com.dynamsoft:dynamsoftdocumentnormalizer:1.0.10'
  // Camera
  implementation 'androidx.camera:camera-core:1.0.2'
}

For iOS, add the following to vision-camera-dynamsoft-document-normalizer.podspec:

s.dependency "DynamsoftDocumentNormalizer", "= 1.0.10"

Define the Functions in JavaScript

  1. Define a detect function which can be used in a frame processor. The function will detect the document border and return a DetectedQuadResult with the location info.

    /**
     * Detect documents from the camera preview
     */
    export function detect(frame: Frame): DetectedQuadResult[] {
      'worklet'
      // @ts-ignore
      // eslint-disable-next-line no-undef
      return __detect(frame, {})
    }
    

    The DetectedQuadResult and related interfaces:

    export interface DetectedQuadResult {
      location: Quadrilateral;
      confidenceAsDocumentBoundary: number;
    }
    
    export interface Point {
      x:number;
      y:number;
    }
    
    export interface Quadrilateral {
      points: [Point, Point, Point, Point];
    }
    
  2. Define a normalizeFile native module function. We can pass the detection result we get using the detect function to normalize a local image. We can get the path or base64 of the normalized image.

    /**
     * Normalize an image file
     */
    export function normalizeFile(url:string, quad:Quadrilateral, config: NormalizationConfig): Promise<NormalizedImageResult> {
      return VisionCameraDynamsoftDocumentNormalizer.normalizeFile(url, quad, config);
    }
    

    Related interfaces:

    /**
     * Config of whether to save the normalized as a file and base64.
     */
    export interface NormalizationConfig{
      saveNormalizationResultAsFile?: boolean;
      includeNormalizationResultAsBase64?: boolean;
    }
    
    /**
     * Normalization result containing the image path or base64
     */
    export interface NormalizedImageResult {
      imageURL?: string;
      imageBase64?: string;
    }
    
  3. Define a initLicense function. We can use it to activate Dynamsoft Document Normalizer with a license (apply for a trial license).

    /**
     * Init the license of Dynamsoft Document Normalizer
     */
    export function initLicense(license:string): Promise<boolean> {
      return VisionCameraDynamsoftDocumentNormalizer.initLicense(license);
    }
    
  4. Define a initRuntimeSettingsFromString function. We can use pass a JSON template to update the runtime settings of Dynamsoft Document Normalizer. Check out the docs to learn more about the parameters.

    /**
     * Init the runtime settings from a JSON template
     */
    export function initRuntimeSettingsFromString(template:string): Promise<boolean> {
      return VisionCameraDynamsoftDocumentNormalizer.initRuntimeSettingsFromString(template);
    }
    

Implement the Functions for Android

  1. In VisionCameraDynamsoftDocumentNormalizerModule.java, create an instance of Document Normalizer and add related methods.

    The constructor:

    @ReactModule(name = VisionCameraDynamsoftDocumentNormalizerModule.NAME)
    public class VisionCameraDynamsoftDocumentNormalizerModule extends ReactContextBaseJavaModule {
        public static final String NAME = "VisionCameraDynamsoftDocumentNormalizer";
        private Context mContext;
        private DocumentNormalizer ddn;
        public VisionCameraDynamsoftDocumentNormalizerModule(ReactApplicationContext reactContext) {
            super(reactContext);
            mContext = reactContext;
            initDDN();
        }
    }
    

    Related methods:

    private void initDDN(){
        try {
            ddn = new DocumentNormalizer();
        } catch (DocumentNormalizerException e) {
            e.printStackTrace();
        }
    }
    
    public Context getContext(){
        return mContext;
    }
    public DocumentNormalizer getDDN(){
        return ddn;
    }
    
  2. Add methods with the @ReactMethod annotation which can be called from JavaScript.

    @ReactMethod
    public void initLicense(String license, Promise promise) {
        LicenseManager.initLicense(license, mContext, new LicenseVerificationListener() {
            @Override
            public void licenseVerificationCallback(boolean isSuccess, CoreException error) {
                if(!isSuccess){
                    error.printStackTrace();
                    promise.resolve(false);
                }else{
                    Log.d("DDN","license valid");
                    promise.resolve(true);
                }
            }
        });
    }
    
    @ReactMethod
    public void initRuntimeSettingsFromString(String template, Promise promise) {
        try {
            ddn.initRuntimeSettingsFromString(template);
            promise.resolve(true);
        } catch (DocumentNormalizerException e) {
            e.printStackTrace();
            promise.reject("DDN",e.getMessage());
        }
    }
    
    @ReactMethod
    public void normalizeFile(String filePath, ReadableMap quad, ReadableMap config, Promise promise) {
        WritableNativeMap returnResult = new WritableNativeMap();
        Log.d("DDN",quad.toString());
        ReadableArray points = quad.getArray("points");
        Quadrilateral quadrilateral = new Quadrilateral();
        quadrilateral.points = convertPoints(points);
        try {
            NormalizedImageResult result = ddn.normalize(filePath,quadrilateral);
            if (config.hasKey("saveNormalizationResultAsFile")) {
                if (config.getBoolean("saveNormalizationResultAsFile")) {
                    File cacheDir = mContext.getCacheDir();
                    String fileName = System.currentTimeMillis() + ".jpg";
                    String path = BitmapUtils.saveImage(result.image.toBitmap(), cacheDir, fileName);
                    returnResult.putString("imageURL",path);
                }
            }
            if (config.hasKey("includeNormalizationResultAsBase64")) {
                if (config.getBoolean("includeNormalizationResultAsBase64")) {
                    String base64 = BitmapUtils.bitmap2Base64(result.image.toBitmap());
                    returnResult.putString("imageBase64",base64);
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
            if (e instanceof DocumentNormalizerException) {
                Log.d("DDN","Error code: "+((DocumentNormalizerException) e).getErrorCode());
            }
            promise.reject("DDN",e.getMessage());
            return;
        }
        promise.resolve(returnResult);
    }
    
    private Point[] convertPoints(ReadableArray quadPoints){
        Point[] points = new Point[4];
        for (int i = 0; i < quadPoints.size(); i++) {
            Point p = new Point();
            p.x = quadPoints.getMap(i).getInt("x");
            p.y = quadPoints.getMap(i).getInt("y");
            points[i] = p;
        }
        return points;
    }
    
  3. Create a new file named VisionCameraDetectionPlugin.java, where we define the frame processor to detect the document location. A BitmapUtils class is used to convert the image proxy to bitmap and rotate it if needed.

    public class VisionCameraDetectionPlugin extends FrameProcessorPlugin {
        private VisionCameraDynamsoftDocumentNormalizerModule mModule;
        @Override
        public Object callback(ImageProxy image, Object[] params) {
            WritableNativeArray quadResultsWrapped = new WritableNativeArray();
            try {
                @SuppressLint("UnsafeOptInUsageError")
                Bitmap bitmap = BitmapUtils.getBitmap(image);
                DetectedQuadResult[] quadResults = mModule.ddn.detectQuad(bitmap);
                for (DetectedQuadResult quad:quadResults) {
                    quadResultsWrapped.pushMap(Utils.getMapFromDetectedQuadResult(quad));
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
            return quadResultsWrapped;
        }
    
        VisionCameraDetectionPlugin(VisionCameraDynamsoftDocumentNormalizerModule module) {
            super("detect");
            mModule = module;
        }
    }
    
  4. Create a Utils class to wrap the results.

    public class Utils {
        public static Point[] convertPoints(ReadableArray pointsArray){
            Point[] points = new Point[4];
            for (int i = 0; i < pointsArray.size(); i++) {
                ReadableMap pointMap = pointsArray.getMap(i);
                Point point = new Point();
                point.x = pointMap.getInt("x");
                point.y = pointMap.getInt("y");
                points[i] = point;
            }
            return points;
        }
    
        public static WritableNativeMap getMapFromDetectedQuadResult(DetectedQuadResult result){
            WritableNativeMap map = new WritableNativeMap();
            map.putInt("confidenceAsDocumentBoundary",result.confidenceAsDocumentBoundary);
            map.putMap("location",getMapFromLocation(result.location));
            return map;
        }
    
        private static WritableNativeMap getMapFromLocation(Quadrilateral location){
            WritableNativeMap map = new WritableNativeMap();
            WritableNativeArray points = new WritableNativeArray();
            for (Point point: location.points) {
                WritableNativeMap pointAsMap = new WritableNativeMap();
                pointAsMap.putInt("x",point.x);
                pointAsMap.putInt("y",point.y);
                points.pushMap(pointAsMap);
            }
            map.putArray("points",points);
            return map;
        }
    }
    
  5. In VisionCameraDynamsoftDocumentNormalizerPackage.java, register the frame processor plugin.

    @Override
    public List<NativeModule> createNativeModules(@NonNull ReactApplicationContext reactContext) {
        List<NativeModule> modules = new ArrayList<>();
        VisionCameraDynamsoftDocumentNormalizerModule module = new VisionCameraDynamsoftDocumentNormalizerModule(reactContext);
        modules.add(module);
        VisionCameraDetectionPlugin detectionPlugin = new VisionCameraDetectionPlugin(module);
        FrameProcessorPlugin.register(detectionPlugin);
        return modules;
    }
    

Implement the Functions for iOS

  1. Define the following native module methods in VisionCameraDynamsoftDocumentNormalizer.m.

    RCT_EXTERN_METHOD(initLicense:(NSString)license
                     withResolver:(RCTPromiseResolveBlock)resolve
                     withRejecter:(RCTPromiseRejectBlock)reject)
    
    RCT_EXTERN_METHOD(initRuntimeSettingsFromString:(NSString *)template
                     withResolver:(RCTPromiseResolveBlock)resolve
                     withRejecter:(RCTPromiseRejectBlock)reject)
    
    RCT_EXTERN_METHOD(normalizeFile:(NSString *)path
                      quad:(NSDictionary *)quad
                      config:(NSDictionary *)config
                     withResolver:(RCTPromiseResolveBlock)resolve
                     withRejecter:(RCTPromiseRejectBlock)reject)
    
  2. In VisionCameraDynamsoftDocumentNormalizer.swift, add a static instance of Document Normalizer.

    import DynamsoftDocumentNormalizer
    @objc(VisionCameraDynamsoftDocumentNormalizer)
    class VisionCameraDynamsoftDocumentNormalizer: NSObject,LicenseVerificationListener  {
        static var ddn:DynamsoftDocumentNormalizer = DynamsoftDocumentNormalizer()
    }
    
  3. In VisionCameraDynamsoftDocumentNormalizer.swift, add related functions with the @objc annotation which can be called from JavaScript.

    @objc(initRuntimeSettingsFromString:withResolver:withRejecter:)
    func initRuntimeSettingsFromString(template:String, resolve:RCTPromiseResolveBlock,reject:RCTPromiseRejectBlock) -> Void {
        do {
            try VisionCameraDynamsoftDocumentNormalizer.ddn.initRuntimeSettingsFromString(template)
            resolve(true)
        }catch {
            print("Unexpected error: \(error).")
            resolve(false)
        }
    }
    
    @objc(initLicense:withResolver:withRejecter:)
    func initLicense(license:String, resolve:RCTPromiseResolveBlock,reject:RCTPromiseRejectBlock) -> Void {
        DynamsoftLicenseManager.initLicense(license, verificationDelegate: self)
        resolve(true)
    }
    
    func licenseVerificationCallback(_ isSuccess: Bool, error: Error?) {
        print(isSuccess)
    }
    
    @objc(normalizeFile:quad:config:withResolver:withRejecter:)
    func normalizeFile(path:String,quad:[String:Any], config:[String:Any],resolve:RCTPromiseResolveBlock,reject:RCTPromiseRejectBlock) -> Void {
        do {
            var returned_result:[String:String] = [:]
               
            let imageURL = URL(fileURLWithPath: path)
            var image = UIImage(contentsOfFile: imageURL.path)!
            image = BitmapUtils.normalizedImage(image)
            let points = quad["points"] as! [[String:NSNumber]]
            let quadrilateral = iQuadrilateral.init()
            quadrilateral.points = convertPoints(points)
               
            let bpp = image.cgImage?.bitsPerPixel
            var pixelFormat:EnumImagePixelFormat
            switch (bpp) {
               case 1:
                pixelFormat = EnumImagePixelFormat.binary
                break;
               case 8:
                pixelFormat = EnumImagePixelFormat.grayScaled
                break;
               case 32:
                pixelFormat = EnumImagePixelFormat.ARGB_8888
                print("ARGB888")
                break;
               case 48:
                pixelFormat = EnumImagePixelFormat.RGB_161616;
                break;
               case 64:
                pixelFormat = EnumImagePixelFormat.ARGB_16161616;
                break;
               default:
                pixelFormat = EnumImagePixelFormat.RGB_888;
                print("RGB888")
                break;
            }
               
            let data = iImageData.init()
            data.bytes = image.cgImage?.dataProvider?.data as! Data
            data.orientation = 0
            data.stride = image.cgImage!.bytesPerRow
            data.width = image.cgImage!.width
            data.height = image.cgImage!.height
            data.format = pixelFormat
               
            let normalizedImageResult = try VisionCameraDynamsoftDocumentNormalizer.ddn.normalizeBuffer(data, quad: quadrilateral)
    
            if config["saveNormalizationResultAsFile"] != nil {
                if config["saveNormalizationResultAsFile"] as! Bool == true {
                    let tmpDir = NSTemporaryDirectory()
                    let timestamp = String(format: "%f", Date().timeIntervalSince1970*1000)
                    let filePath = tmpDir + "/" + timestamp + ".png"
                    do{
                        try normalizedImageResult.saveToFile(filePath)
                        returned_result["imageURL"] = filePath
                    }catch {
                        print(error)
                    }
                }
            }
            if config["includeNormalizationResultAsBase64"] != nil {
                if config["includeNormalizationResultAsBase64"] as! Bool == true {
                    do{
                        let normalizedUIImage = try normalizedImageResult.image.toUIImage()
                        let base64 = Utils.getBase64FromImage(normalizedUIImage)
                        returned_result["imageBase64"] = base64
                    }catch{
                        print(error)
                    }
                }
            }
    
            resolve(returned_result)
        }catch {
            print("Unexpected error: \(error).")
            resolve(false)
        }
    }
    
    func convertPoints(_ points:[[String:NSNumber]]) -> [CGPoint] {
        var CGPoints:[CGPoint] = [];
        for point in points {
            let x = point["x"]!
            let y = point["y"]!
            let intX = x.intValue
            let intY = y.intValue
            let cgPoint = CGPoint(x: intX, y: intY)
            CGPoints.append(cgPoint)
        }
        return CGPoints
    }
    
  4. Create a BitmapUtils.swift file with the following content. The normalizedImage function can be used to fix the orientation of an UIImage.

    class BitmapUtils {
        //https://stackoverflow.com/questions/8915630/ios-uiimageview-how-to-handle-uiimage-image-orientation
        static public func normalizedImage(_ image:UIImage) -> UIImage {
            if image.imageOrientation == UIImage.Orientation.up {
                return image
            }
            UIGraphicsBeginImageContextWithOptions(image.size, false, image.scale)
            image.draw(in: CGRect(x:0,y:0,width:image.size.width,height:image.size.height))
            let normalized = UIGraphicsGetImageFromCurrentImageContext()!
            UIGraphicsEndImageContext();
            return normalized
        }
    }
    
  5. Create a Utils.swift to do UIImage/Base64 conversion and wrap the results.

    class Utils {
           
        static public func convertBase64ToImage(_ imageStr:String) ->UIImage?{
            if let data: NSData = NSData(base64Encoded: imageStr, options:NSData.Base64DecodingOptions.ignoreUnknownCharacters)
            {
                if let image: UIImage = UIImage(data: data as Data)
                {
                    return image
                }
            }
            return nil
        }
           
        static func getBase64FromImage(_ image:UIImage) -> String{
            let dataTmp = image.jpegData(compressionQuality: 100)
            if let data = dataTmp {
                return data.base64EncodedString()
            }
            return ""
        }
           
           
        static func wrapDetectionResult (result:iDetectedQuadResult) -> [String: Any] {
            var dict: [String: Any] = [:]
            dict["confidenceAsDocumentBoundary"] = result.confidenceAsDocumentBoundary
            dict["location"] = wrapLocation(location:result.location)
            return dict
        }
           
        static private func wrapLocation (location:iQuadrilateral?) -> [String: Any] {
            var dict: [String: Any] = [:]
            var points: [[String:CGFloat]] = []
            let CGPoints = location!.points as! [CGPoint]
            for point in CGPoints {
                var pointDict: [String:CGFloat] = [:]
                pointDict["x"] = point.x
                pointDict["y"] = point.y
                points.append(pointDict)
            }
            dict["points"] = points
            return dict
        }
           
    }
    
  6. Create a DetectionFrameProcessorPlugin.swift to define the frame processor:

    @objc(DetectionFrameProcessorPlugin)
    public class DetectionFrameProcessorPlugin: NSObject, FrameProcessorPluginBase {
        private static let context = CIContext(options: nil)
        @objc
        public static func callback(_ frame: Frame!, withArgs _: [Any]!) -> Any! {
            guard let imageBuffer = CMSampleBufferGetImageBuffer(frame.buffer) else {
                print("Failed to get CVPixelBuffer!")
                return nil
              }
            let ciImage = CIImage(cvPixelBuffer: imageBuffer)
    
            guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else {
                print("Failed to create CGImage!")
                return nil
            }
               
            var returned_results: [Any] = []
            let image = UIImage(cgImage: cgImage)
               
            let results = try? VisionCameraDynamsoftDocumentNormalizer.ddn.detectQuadFromImage(image)
            if results != nil {
                for result in results! {
                    returned_results.append(Utils.wrapDetectionResult(result:result))
                }
            }
            return returned_results
        }
    }
    

    An Objective-C file named DetectionFrameProcessorPlugin.m is needed as well.

    #import <Foundation/Foundation.h>
    #import <VisionCamera/FrameProcessorPlugin.h>
    
    @interface VISION_EXPORT_SWIFT_FRAME_PROCESSOR(detect, DetectionFrameProcessorPlugin)
    @end
    

React Native Document Scanner Demo

In the next article, we are going to use the plugin to build a React Native document scanner.

Source Code

Get the source code and have a try: https://github.com/tony-xlh/vision-camera-dynamsoft-document-normalizer