Build a Label Recognition Frame Processor Plugin for React Native Vision Camera (iOS)

In the previous article, we’ve talked about how to build a React Native Vision Camera frame processor plugin for Android to recognize text using Dynamsoft Label Recognizer. In this article, we will finish the iOS part of the plugin.

Other React Native Vision Camera Frame Processor Plugins

Build a Label Recognition Frame Processor Plugin for React Native Vision Camera (iOS)

Add Dependencies

We need to add dependencies of Dynamsoft Label Recognizer first.

Open vision-camera-dynamsoft-label-recognizer.podspec and add the following lines:

s.libraries = 'c++'
s.dependency "DynamsoftLabelRecognizer", '= 2.2.20'

Then, we can run pod install for the example project:

cd example/ios
pod install

Add Camera Permission to the Example Project

We also need to add the following to Info.plist for camera permission:

<key>NSCameraUsageDescription</key>
<string>For barcode scanning</string>

Then, we can run the app for a test.

npx react-native run-ios

Write the iOS Native Code for the Plugin

  1. Create a new file named VisionCameraDLRPlugin.swift with the following content:

    import Foundation
    
    @objc(VisionCameraDLRPlugin)
    public class VisionCameraDLRPlugin: NSObject, FrameProcessorPluginBase {
        @objc
        public static func callback(_ frame: Frame!, withArgs args: [Any]!) -> Any! {
            // code goes here
            return []
        }
    }
    
  2. Add the following content to VisionCameraDynamsoftLabelRecognizer-Bridging-Header.h:

    #import <VisionCamera/FrameProcessorPlugin.h>
    #import <VisionCamera/Frame.h>
    
  3. Create a new Objective-C file with the same name as the Swift file with the following content:

    #import <Foundation/Foundation.h>
    #import <VisionCamera/FrameProcessorPlugin.h>
    
    @interface VISION_EXPORT_SWIFT_FRAME_PROCESSOR(recognize, VisionCameraDLRPlugin)
    
    @end
    
  4. Create a new LabelRecognizerManager.swift to initialize Dynamsoft Label Recognizer and manage its runtime settings:

    import Foundation
    import DynamsoftLabelRecognizer
    
    class LabelRecognizerManager:NSObject, LicenseVerificationListener {
        private var recognizer:DynamsoftLabelRecognizer!;
        private var currentModelFolder = "";
        private var currentTemplate = "";
        private var mLicense = "";
           
        init(license:String){
            super.init()
            mLicense = license
            initDLR(license: license)
        }
           
        public func getRecognizer() -> DynamsoftLabelRecognizer{
            if recognizer == nil {
                initDLR(license: mLicense)
            }
            return recognizer
        }
           
        public func destroy() {
            recognizer = nil
        }
           
        private func initDLR(license:String) {
            DynamsoftLicenseManager.initLicense(license, verificationDelegate: self)
            recognizer = DynamsoftLabelRecognizer.init()
        }
           
        func licenseVerificationCallback(_ isSuccess: Bool, error: Error?) {
            var msg:String? = ""
            if(error != nil)
            {
                let err = error as NSError?
                if err?.code == -1009 {
                    msg = "Dynamsoft Label Recognizer is unable to connect to the public Internet to acquire a license. Please connect your device to the Internet or contact support@dynamsoft.com to acquire an offline license."
                }else{
                    msg = err!.userInfo[NSUnderlyingErrorKey] as? String
                    if(msg == nil)
                    {
                        msg = err?.localizedDescription
                    }
                }
                print(msg ?? "")
            }
        }
           
           
        public func updateTemplate(template:String){
            if (currentTemplate != template) {
                try! recognizer.initRuntimeSettings(template)
                currentTemplate = template
            }
        }
           
    
        public func useCustomModel(modelFolder:String,modelFileNames: [String])   {
            if (modelFolder != currentModelFolder) {
                currentModelFolder = modelFolder
                for model in modelFileNames {
                       
                    guard let prototxt = Bundle.main.url(
                        forResource: model,
                        withExtension: "prototxt",
                        subdirectory: modelFolder
                    ) else {
                        print("model not exist")
                        return
                    }
    
                    let datapro = try! Data.init(contentsOf: prototxt)
                    let txt = Bundle.main.url(forResource: model, withExtension: "txt", subdirectory: modelFolder)
                    let datatxt = try! Data.init(contentsOf: txt!)
                    let caffemodel = Bundle.main.url(forResource: model, withExtension: "caffemodel", subdirectory: modelFolder)
                    let datacaf = try! Data.init(contentsOf: caffemodel!)
                    DynamsoftLabelRecognizer.appendCharacterModel(model, prototxtBuffer: datapro, txtBuffer: datatxt, characterModelBuffer: datacaf)
                    print("load model %@", model)
                }
            }
        }
    }
    
  5. Use the manager to create an instance of Dynamsoft Label Recognizer if the plugin is called:

    @objc(VisionCameraDLRPlugin)
    public class VisionCameraDLRPlugin: NSObject, FrameProcessorPluginBase {
        private static var recognizer:DynamsoftLabelRecognizer!
        private static var manager:LabelRecognizerManager!
        private static let context = CIContext(options: nil)
    
        @objc
        public static func callback(_ frame: Frame!, withArgs args: [Any]!) -> Any! {
            let config = getConfig(withArgs: args)
            if manager == nil {
                let license: String = config?["license"] as? String ?? "DLS2eyJoYW5kc2hha2VDb2RlIjoiMjAwMDAxLTE2NDk4Mjk3OTI2MzUiLCJvcmdhbml6YXRpb25JRCI6IjIwMDAwMSIsInNlc3Npb25QYXNzd29yZCI6IndTcGR6Vm05WDJrcEQ5YUoifQ=="  //default 1-day public trial. Apply for a trial license here: https://www.dynamsoft.com/customer/license/trialLicense/?product=dlr
                manager = LabelRecognizerManager(license: license)
                recognizer = manager.getRecognizer();
            }
            return nil
        }
           
        static func getConfig(withArgs args: [Any]!) -> [String:Any]! {
            if args.count>0 {
                let config = args[0] as? [String: Any]
                return config
            }
            return nil
        }
    }
    
  6. Update the template and load models if they are in the config.

    if config!["customModelConfig"] != nil {
        let customModelConfig = config?["customModelConfig"] as? [String:Any]
        let modelFolder = customModelConfig!["customModelFolder"] as! String
        let modelFileNames = customModelConfig!["customModelFileNames"] as! [String]
        manager.useCustomModel(modelFolder: modelFolder, modelFileNames: modelFileNames)
    }
       
    let template = config?["template"] as? String ?? ""
    if (template != "") {
        manager.updateTemplate(template: template)
    }
    
  7. Convert the frame to CGImage and crop it if a scan region is set.

    Convert:

    guard let imageBuffer = CMSampleBufferGetImageBuffer(frame.buffer) else {
      print("Failed to get CVPixelBuffer!")
      return nil
    }
    let ciImage = CIImage(cvPixelBuffer: imageBuffer)
    
    guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else {
      print("Failed to create CGImage!")
      return nil
    }
    

    Crop:

    let image:UIImage;
    let scanRegion = config?["scanRegion"] as? [String: Int]
    if scanRegion != nil {
        let imgWidth = Double(cgImage.width)
        let imgHeight = Double(cgImage.height)
        let left:Double = Double(scanRegion?["left"] ?? 0) / 100.0 * imgWidth
        let top:Double = Double(scanRegion?["top"] ?? 0) / 100.0 * imgHeight
        let width:Double = Double(scanRegion?["width"] ?? 100) / 100.0 * imgWidth
        let height:Double = Double(scanRegion?["height"] ?? 100) / 100.0 * imgHeight
    
        // The cropRect is the rect of the image to keep,
        // in this case centered
        let cropRect = CGRect(
            x: left,
            y: top,
            width: width,
            height: height
        ).integral
    
        let cropped = cgImage.cropping(
            to: cropRect
        )!
        image = UIImage(cgImage: cropped)
        print("use cropped image")
    }else{
        image = UIImage(cgImage: cgImage)
    }
    
  8. Recognize text from the image and wrap the result.

    var scanResult: [String:Any] = [:]
    var returned_results: [Any] = []
    let results = try? recognizer.recognizeImage(image)
    
    for result in results! {
        returned_results.append(Utils.wrapDLRResult(result:result))
    }
    
    scanResult["results"] = returned_results
    let includeImageBase64 = config!["includeImageBase64"] as? Bool ?? false
    if includeImageBase64 == true {
        scanResult["imageBase64"] = Utils.getBase64FromImage(image)
    }
    

    A Utils class is used to store helper methods.

       
    class Utils {
        static public func convertBase64ToImage(_ imageStr:String) ->UIImage?{
            if let data: NSData = NSData(base64Encoded: imageStr, options:NSData.Base64DecodingOptions.ignoreUnknownCharacters)
            {
                if let image: UIImage = UIImage(data: data as Data)
                {
                    return image
                }
            }
            return nil
        }
           
        static func getBase64FromImage(_ image:UIImage) -> String{
            let dataTmp = image.jpegData(compressionQuality: 100)
            if let data = dataTmp {
                return data.base64EncodedString()
            }
            return ""
        }
           
        static func wrapDLRResult (result:iDLRResult) -> [String: Any] {
            var dict: [String: Any] = [:]
            dict["confidence"] = result.confidence
            dict["pageNumber"] = result.pageNumber
            dict["referenceRegionName"] = result.referenceRegionName
            dict["textAreaName"] = result.textAreaName
            dict["location"] = wrapLocation(location:result.location)
               
            var lineResults: [[String:Any]] = []
            for lineResult in result.lineResults! {
                let lineResultDict: [String: Any] = wrapDLRLineResult(result: lineResult)
                lineResults.append(lineResultDict)
            }
            dict["lineResults"] = lineResults
                       
            return dict
        }
           
        static private func wrapDLRLineResult (result:iDLRLineResult) -> [String: Any] {
            var dict: [String: Any] = [:]
            dict["confidence"] = result.confidence
            dict["text"] = result.text
            dict["characterModelName"] = result.characterModelName
            dict["lineSpecificationName"] = result.lineSpecificationName
            dict["location"] = wrapLocation(location:result.location)
            var characterResults: [[String:Any]] = []
            for characterResult in result.characterResults! {
                let characterResultDict: [String: Any] = wrapDLRCharacterResult(result: characterResult)
                characterResults.append(characterResultDict)
            }
            dict["characterResults"] = characterResults
            return dict
        }
           
        static private func wrapDLRCharacterResult (result:iDLRCharacterResult) -> [String: Any] {
            var dict: [String: Any] = [:]
            dict["characterH"] = result.characterH
            dict["characterHConfidence"] = result.characterHConfidence
            dict["characterM"] = result.characterM
            dict["characterMConfidence"] = result.characterMConfidence
            dict["characterL"] = result.characterL
            dict["characterLConfidence"] = result.characterLConfidence
            dict["location"] = wrapLocation(location:result.location)
            return dict
        }
           
        static private func wrapLocation (location:iQuadrilateral?) -> [String: Any] {
            var dict: [String: Any] = [:]
            var points: [[String:CGFloat]] = []
            let CGPoints = location!.points as! [CGPoint]
            for point in CGPoints {
                var pointDict: [String:CGFloat] = [:]
                pointDict["x"] = point.x
                pointDict["y"] = point.y
                points.append(pointDict)
            }
            dict["points"] = points
            return dict
        }
    }
    

All right, we’ve now finished writing the iOS part of the plugin.

Source Code

https://github.com/tony-xlh/vision-camera-dynamsoft-label-recognizer