Build a Label Recognition Frame Processor Plugin for React Native Vision Camera (iOS)

In the previous article, we’ve talked about how to build a React Native Vision Camera frame processor plugin for Android to recognize text using Dynamsoft Label Recognizer. In this article, we will finish the iOS part of the plugin.

Build a Label Recognition Frame Processor Plugin for React Native Vision Camera (iOS)

Add Dependencies

We need to add dependencies of Dynamsoft Label Recognizer first.

Open vision-camera-dynamsoft-label-recognizer.podspec and add the following lines:

s.libraries = 'c++'
s.dependency "DynamsoftLabelRecognizer", '= 2.0.0.1'

Then, we can run pod install for the example project:

cd example/ios
pod install

Add Camera Permission to the Example Project

We also need to add the following to Info.plist for camera permission:

<key>NSCameraUsageDescription</key>
<string>For barcode scanning</string>

Then, we can run the app for a test.

npx react-native run-ios

Write the iOS Native Code for the Plugin

  1. Create a new file named VisionCameraDLRPlugin.swift with the following content:

    import Foundation
    
    @objc(VisionCameraDLRPlugin)
    public class VisionCameraDLRPlugin: NSObject, FrameProcessorPluginBase {
        @objc
        public static func callback(_ frame: Frame!, withArgs args: [Any]!) -> Any! {
            // code goes here
            return []
        }
    }
    
  2. Add the following content to VisionCameraDynamsoftLabelRecognizer-Bridging-Header.h:

    #import <VisionCamera/FrameProcessorPlugin.h>
    #import <VisionCamera/Frame.h>
    
  3. Create a new Objective-C file with the same name as the Swift file with the following content:

    #import <Foundation/Foundation.h>
    #import <VisionCamera/FrameProcessorPlugin.h>
    
    @interface VISION_EXPORT_SWIFT_FRAME_PROCESSOR(recognize, VisionCameraDLRPlugin)
    
    @end
    
  4. Create a new LabelRecognizerManager.swift to initialize Dynamsoft Label Recognizer and manage its runtime settings:

    import Foundation
    import DynamsoftLabelRecognizer
    
    class LabelRecognizerManager:NSObject, DLRLicenseVerificationDelegate {
        private var recognizer:DynamsoftLabelRecognizer!;
        private var currentModelFolder = "";
        private var currentTemplate = "";
        private var mLicense = "";
           
        init(license:String){
            super.init()
            mLicense = license
            initDLR(license: license)
        }
           
        public func getRecognizer() -> DynamsoftLabelRecognizer{
            if recognizer == nil {
                initDLR(license: mLicense)
            }
            return recognizer
        }
           
        public func destroy() {
            recognizer.dispose()
            recognizer = nil
        }
           
        private func initDLR(license:String) {
            DynamsoftLabelRecognizer.initLicense(license, verificationDelegate: self)
            recognizer = DynamsoftLabelRecognizer.init()
        }
           
        func dlrLicenseVerificationCallback(_ isSuccess: Bool, error: Error?) {
            var msg:String? = ""
            if(error != nil)
            {
                let err = error as NSError?
                if err?.code == -1009 {
                    msg = "Dynamsoft Label Recognizer is unable to connect to the public Internet to acquire a license. Please connect your device to the Internet or contact support@dynamsoft.com to acquire an offline license."
                }else{
                    msg = err!.userInfo[NSUnderlyingErrorKey] as? String
                    if(msg == nil)
                    {
                        msg = err?.localizedDescription
                    }
                }
                print(msg ?? "")
            }
        }
           
           
        public func updateTemplate(template:String){
            if (currentTemplate != template) {
                var clearErr : NSError? = NSError()
                recognizer.clearAppendedSettings(error: &clearErr)
                var err : NSError? = NSError()
                recognizer.appendSettingsFromString(content: template, error: &err)
                print("template added")
                print(template)
                if err?.code != 0 {
                    print("error")
                    var errMsg:String? = ""
                    errMsg = err!.userInfo[NSUnderlyingErrorKey] as? String
                    print(errMsg ?? "")
                }
                currentTemplate = template;
            }
        }
           
    
        public func useCustomModel(modelFolder:String,modelFileNames: [String])   {
            if (modelFolder != currentModelFolder) {
                currentModelFolder = modelFolder
                for model in modelFileNames {
                       
                    guard let prototxt = Bundle.main.url(
                        forResource: model,
                        withExtension: "prototxt",
                        subdirectory: modelFolder
                    ) else {
                        print("model not exist")
                        return
                    }
    
                    let datapro = try! Data.init(contentsOf: prototxt)
                    let txt = Bundle.main.url(forResource: model, withExtension: "txt", subdirectory: modelFolder)
                    let datatxt = try! Data.init(contentsOf: txt!)
                    let caffemodel = Bundle.main.url(forResource: model, withExtension: "caffemodel", subdirectory: modelFolder)
                    let datacaf = try! Data.init(contentsOf: caffemodel!)
                    DynamsoftLabelRecognizer.appendCharacterModel(name: model, prototxtBuffer: datapro, txtBuffer: datatxt, characterModelBuffer: datacaf)
                    print("load model %@", model)
                }
            }
        }
    }
    
  5. Use the manager to create an instance of Dynamsoft Label Recognizer if the plugin is called:

    @objc(VisionCameraDLRPlugin)
    public class VisionCameraDLRPlugin: NSObject, FrameProcessorPluginBase {
        private static var recognizer:DynamsoftLabelRecognizer!
        private static var manager:LabelRecognizerManager!
        private static let context = CIContext(options: nil)
    
        @objc
        public static func callback(_ frame: Frame!, withArgs args: [Any]!) -> Any! {
            let config = getConfig(withArgs: args)
            if manager == nil {
                let license: String = config?["license"] as? String ?? "DLS2eyJoYW5kc2hha2VDb2RlIjoiMjAwMDAxLTE2NDk4Mjk3OTI2MzUiLCJvcmdhbml6YXRpb25JRCI6IjIwMDAwMSIsInNlc3Npb25QYXNzd29yZCI6IndTcGR6Vm05WDJrcEQ5YUoifQ=="  //default 1-day public trial. Apply for a trial license here: https://www.dynamsoft.com/customer/license/trialLicense/?product=dlr
                manager = LabelRecognizerManager(license: license)
                recognizer = manager.getRecognizer();
            }
            return nil
        }
           
        static func getConfig(withArgs args: [Any]!) -> [String:Any]! {
            if args.count>0 {
                let config = args[0] as? [String: Any]
                return config
            }
            return nil
        }
    }
    
  6. Update the template and load models if they are in the config.

    let templateName = config?["templateName"] as? String ?? ""
    
    if config!["customModelConfig"] != nil {
        let customModelConfig = config?["customModelConfig"] as? [String:Any]
        let modelFolder = customModelConfig!["customModelFolder"] as! String
        let modelFileNames = customModelConfig!["customModelFileNames"] as! [String]
        manager.useCustomModel(modelFolder: modelFolder, modelFileNames: modelFileNames)
    }
       
    let template = config?["template"] as? String ?? ""
    if (template != "") {
        manager.updateTemplate(template: template)
    }
    
  7. Convert the frame to CGImage and crop it if a scan region is set.

    Convert:

    guard let imageBuffer = CMSampleBufferGetImageBuffer(frame.buffer) else {
      print("Failed to get CVPixelBuffer!")
      return nil
    }
    let ciImage = CIImage(cvPixelBuffer: imageBuffer)
    
    guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else {
      print("Failed to create CGImage!")
      return nil
    }
    

    Crop:

    let image:UIImage;
    let scanRegion = config?["scanRegion"] as? [String: Int]
    if scanRegion != nil {
        let imgWidth = Double(cgImage.width)
        let imgHeight = Double(cgImage.height)
        let left:Double = Double(scanRegion?["left"] ?? 0) / 100.0 * imgWidth
        let top:Double = Double(scanRegion?["top"] ?? 0) / 100.0 * imgHeight
        let width:Double = Double(scanRegion?["width"] ?? 100) / 100.0 * imgWidth
        let height:Double = Double(scanRegion?["height"] ?? 100) / 100.0 * imgHeight
    
        // The cropRect is the rect of the image to keep,
        // in this case centered
        let cropRect = CGRect(
            x: left,
            y: top,
            width: width,
            height: height
        ).integral
    
        let cropped = cgImage.cropping(
            to: cropRect
        )!
        image = UIImage(cgImage: cropped)
        print("use cropped image")
    }else{
        image = UIImage(cgImage: cgImage)
    }
    
  8. Recognize text from the image and wrap the result.

    var scanResult: [String:Any] = [:]
    var returned_results: [Any] = []
    var error : NSError? = NSError()
    let results = recognizer.recognizeByImage(image: image, templateName: templateName, error: &error)
    
    for result in results {
        returned_results.append(Utils.wrapDLRResult(result:result))
    }
    
    scanResult["results"] = returned_results
    let includeImageBase64 = config!["includeImageBase64"] as? Bool ?? false
    if includeImageBase64 == true {
        scanResult["imageBase64"] = Utils.getBase64FromImage(image)
    }
    

    A Utils class is used to store helper methods.

       
    class Utils {
        static public func convertBase64ToImage(_ imageStr:String) ->UIImage?{
            if let data: NSData = NSData(base64Encoded: imageStr, options:NSData.Base64DecodingOptions.ignoreUnknownCharacters)
            {
                if let image: UIImage = UIImage(data: data as Data)
                {
                    return image
                }
            }
            return nil
        }
           
        static func getBase64FromImage(_ image:UIImage) -> String{
            let dataTmp = image.jpegData(compressionQuality: 100)
            if let data = dataTmp {
                return data.base64EncodedString()
            }
            return ""
        }
           
        static func wrapDLRResult (result:iDLRResult) -> [String: Any] {
            var dict: [String: Any] = [:]
            dict["confidence"] = result.confidence
            dict["pageNumber"] = result.pageNumber
            dict["referenceRegionName"] = result.refereneceRegionName
            dict["textAreaName"] = result.textAreaName
            dict["location"] = wrapLocation(location:result.location)
               
            var lineResults: [[String:Any]] = []
            for lineResult in result.lineResults! {
                let lineResultDict: [String: Any] = wrapDLRLineResult(result: lineResult)
                lineResults.append(lineResultDict)
            }
            dict["lineResults"] = lineResults
                       
            return dict
        }
           
        static private func wrapDLRLineResult (result:iDLRLineResult) -> [String: Any] {
            var dict: [String: Any] = [:]
            dict["confidence"] = result.confidence
            dict["text"] = result.text
            dict["characterModelName"] = result.characterModelName
            dict["lineSpecificationName"] = result.lineSpecificationName
            dict["location"] = wrapLocation(location:result.location)
            var characterResults: [[String:Any]] = []
            for characterResult in result.characterResults! {
                let characterResultDict: [String: Any] = wrapDLRCharacterResult(result: characterResult)
                characterResults.append(characterResultDict)
            }
            dict["characterResults"] = characterResults
            return dict
        }
           
        static private func wrapDLRCharacterResult (result:iDLRCharacterResult) -> [String: Any] {
            var dict: [String: Any] = [:]
            dict["characterH"] = result.characterH
            dict["characterHConfidence"] = result.characterHConfidence
            dict["characterM"] = result.characterM
            dict["characterMConfidence"] = result.characterMConfidence
            dict["characterL"] = result.characterL
            dict["characterLConfidence"] = result.characterLConfidence
            dict["location"] = wrapLocation(location:result.location)
            return dict
        }
           
        static private func wrapLocation (location:iQuadrilateral?) -> [String: Any] {
            var dict: [String: Any] = [:]
            var points: [[String:CGFloat]] = []
            let CGPoints = location!.points as! [CGPoint]
            for point in CGPoints {
                var pointDict: [String:CGFloat] = [:]
                pointDict["x"] = point.x
                pointDict["y"] = point.y
                points.append(pointDict)
            }
            dict["points"] = points
            return dict
        }
    }
    

All right, we’ve now finished writing the iOS part of the plugin.

Here is a video of the final result:

Source Code

https://github.com/xulihang/vision-camera-dynamsoft-label-recognizer