How to Recognize MRZ from Passport and ID Card with Node.js

This article aims to build a Node.js module to recognize MRZ from passport, ID card, and travel documents on Windows and Linux. The module is implemented based on Dynamsoft C++ OCR SDK.

Prerequisites

  • Node.js
  • Platform-specific C/C++ compiler
  • TypeScript

      npm install -g typescript
      npm install --save @types/node
    
  • node-gyp

      npm i node-gyp -g
    

License Key

Get a 30-day FREE trial license of Dynamsoft OCR SDK.

Building Node.js MRZ Scanner Module

MRZ Recognition Model

Before getting started to implement Node.js API, let’s take a glimpse of the MRZ model. The MRZ recognition model consist of four files:

  • MRZ.caffemodel
  • MRZ.prototxt
  • MRZ.txt
  • MRZ.json

To use the model, you put MRZ.caffemodel, MRZ.prototxt and MRZ.txt in the same folder named model, and add the folder name to the parameter DirectoryPath in the MRZ.json file. As a matter of fact, the DirectoryPath should be an absolute path, which is changed dynamically depending on the module location. You will see the code snippet later in next section.

Project Structure of Node.js C++ Addon

A basic Node.js C++ extension project consists of three files:

  • binding.gyp: the configuration file for building the project
  • *.cc: the C/C++ source code file
  • index.js: the JavaScript glue code for calling the C++ API

The binding.gyp file

Check the binding.gyp file to see how to build the Node.js addon respectively on Windows and Linux:

{
    "targets": [
        {
            'target_name': "mrzscanner",
            'sources': ["src/mrzscanner.cc"],
            'include_dirs': [
                        "include"
            ],
            'conditions': [
                ['OS=="linux"', {
                    'defines': [
                        'LINUX_MRZ',
                    ],
                    "cflags" : [
                        "-std=c++11"
                    ],
                    'ldflags': [
                                "-Wl,-rpath,'$$ORIGIN'"
                    ],
                    
                    'libraries': [
                        "-lDynamsoftLabelRecognizer", "-L../lib/linux"
                    ],
                    'copies': [
                        {
                            'destination': 'build/Release/',
                            'files': [
                                './lib/linux/libDynamicPdf.so',
                                './lib/linux/libDynamsoftLabelRecognizer.so',
                                './lib/linux/libDynamsoftLicenseClient.so',
                            ]
                        }
                    ]
                }],
                ['OS=="win"', {
                    'defines': [
                        'WINDOWS_MRZ',
                    ],
                    'libraries': [
                        "-l../lib/win/DynamsoftLabelRecognizerx64.lib"
                    ],
                    'copies': [
                        {
                            'destination': 'build/Release/',
                            'files': [
                                './lib/win/DynamsoftLabelRecognizerx64.dll',
                                './lib/win/DynamsoftLicenseClientx64.dll',
                                './lib/win/vcomp140.dll',
                                './lib/win/DynamicPdfx64.dll',
                            ]
                        }
                    ]
                }]
            ]
        }
    ]
}

C/C++ API for MRZ recognition

According to Node.js addon tutorial, we create an mrzscanner.h file to define native classes and functions.

#ifndef MRZSCANNER_H
#define MRZSCANNER_H

#include <node.h>
#include <node_object_wrap.h>

class MrzScanner : public node::ObjectWrap {
    public:
    static void Init(v8::Local<v8::Object> exports);
    void *handler;

    private:
    
    explicit MrzScanner();
    ~MrzScanner();

    static void New(const v8::FunctionCallbackInfo<v8::Value>& args);
    static void LoadModel(const v8::FunctionCallbackInfo<v8::Value>& args);
    static void DecodeFileAsync(const v8::FunctionCallbackInfo<v8::Value> &args);
    static void DecodeBufferAsync(const v8::FunctionCallbackInfo<v8::Value> &args);
};

#endif

The void *handler is the pointer to the C++ MRZ SDK object.

In mrzscanner.cc, we need to export the class and two global functions.

void Init(Local<Object> exports)
{
    NODE_SET_METHOD(exports, "initLicense", InitLicense);
    NODE_SET_METHOD(exports, "getVersionNumber", GetVersionNumber);
    MrzScanner::Init(exports);
}

NODE_MODULE(MRZ, Init)

The initLicense function is used to initialize the license. The getVersionNumber function is used to get the version number of the SDK.

void InitLicense(const FunctionCallbackInfo<Value> &args)
{
	Isolate *isolate = args.GetIsolate();
	Local<Context> context = isolate->GetCurrentContext();

	String::Utf8Value license(isolate, args[0]);
	char *pszLicense = *license;
	char errorMsgBuffer[512];
	int ret = DLR_InitLicense(pszLicense, errorMsgBuffer, 512);
	args.GetReturnValue().Set(Number::New(isolate, ret));
}

void GetVersionNumber(const FunctionCallbackInfo<Value> &args)
{
	Isolate *isolate = Isolate::GetCurrent();
	args.GetReturnValue().Set(String::NewFromUtf8(
								  isolate, DLR_GetVersion())
								  .ToLocalChecked());
}

To make C++ methods loadModel, decodeFileAsync and decodeBufferAsync accessible to JavaScript, we export them in void MrzScanner::Init() method.

void MrzScanner::Init(Local<Object> exports)
{
    Isolate *isolate = exports->GetIsolate();
    Local<Context> context = isolate->GetCurrentContext();

    Local<ObjectTemplate> addon_data_tpl = ObjectTemplate::New(isolate);
    addon_data_tpl->SetInternalFieldCount(1); 
    Local<Object> addon_data =
        addon_data_tpl->NewInstance(context).ToLocalChecked();

    Local<FunctionTemplate> tpl = FunctionTemplate::New(isolate, New, addon_data);
    tpl->SetClassName(String::NewFromUtf8(isolate, "MrzScanner").ToLocalChecked());
    tpl->InstanceTemplate()->SetInternalFieldCount(1);

    NODE_SET_PROTOTYPE_METHOD(tpl, "loadModel", LoadModel);
    NODE_SET_PROTOTYPE_METHOD(tpl, "decodeFileAsync", DecodeFileAsync);
    NODE_SET_PROTOTYPE_METHOD(tpl, "decodeBufferAsync", DecodeBufferAsync);

    Local<Function> constructor = tpl->GetFunction(context).ToLocalChecked();
    addon_data->SetInternalField(0, constructor);
    exports->Set(context, String::NewFromUtf8(isolate, "MrzScanner").ToLocalChecked(),
                constructor)
        .FromJust();
}

void MrzScanner::New(const FunctionCallbackInfo<Value>& args) {
  Isolate* isolate = args.GetIsolate();
  Local<Context> context = isolate->GetCurrentContext();

  if (args.IsConstructCall()) {
    MrzScanner* obj = new MrzScanner();
    obj->Wrap(args.This());
    args.GetReturnValue().Set(args.This());
  } else {
    const int argc = 1;
    Local<Value> argv[argc] = { args[0] };
    Local<Function> cons =
        args.Data().As<Object>()->GetInternalField(0).As<Function>();
    Local<Object> result =
        cons->NewInstance(context, argc, argv).ToLocalChecked();
    args.GetReturnValue().Set(result);
  }
}

Before calling MRZ recognition method, we must load the MRZ model. The API for loading the MRZ model is DLR_AppendSettingsFromFile():

void MrzScanner::LoadModel(const FunctionCallbackInfo<Value> &args)
{
	Isolate *isolate = args.GetIsolate();

	MrzScanner *obj = ObjectWrap::Unwrap<MrzScanner>(args.Holder());

	String::Utf8Value fileName(isolate, args[0]); 
	char *pFileName = *fileName;

	char errorMsgBuffer[512];
	int ret = DLR_AppendSettingsFromFile(obj->handler, pFileName, errorMsgBuffer, 512);

	args.GetReturnValue().Set(Number::New(isolate, ret));
}

Finally, let’s utilize libuv’s thread pool to implement the asynchronous functions: DecodeFileAsync() and DecodeBufferAsync(). The uv_queue_work() function can queue our MRZ recognition tasks and execute them in native thread.

typedef enum
{
	NO_BUFFER,
	RGB_BUFFER,
} BufferType;

struct MRZWorker
{
	void *handler;
	uv_work_t request;			   // libuv
	Persistent<Function> callback; // javascript callback
	char filename[128];			   // file name
	DLR_ResultArray *pResults;	   // result pointer
	unsigned char *buffer;
	int size;			   // file size
	int errorCode;		   // detection error code
	int width;			   // image width
	int height;			   // image height
	BufferType bufferType; // buffer type
	int stride;			   // image stride
};

void MrzScanner::DecodeFileAsync(const FunctionCallbackInfo<Value> &args)
{
	Isolate *isolate = args.GetIsolate();
	MrzScanner *obj = ObjectWrap::Unwrap<MrzScanner>(args.Holder());
	Local<Context> context = isolate->GetCurrentContext();

	String::Utf8Value fileName(isolate, args[0]);
	char *pFileName = *fileName;
	Local<Function> cb = Local<Function>::Cast(args[1]); 

	MRZWorker *worker = new MRZWorker;
	worker->handler = obj->handler;
	worker->request.data = worker;
	strcpy(worker->filename, pFileName);
	worker->callback.Reset(isolate, cb);
	worker->pResults = NULL;
	worker->buffer = NULL;
	worker->bufferType = NO_BUFFER;

	uv_queue_work(uv_default_loop(), &worker->request, (uv_work_cb)DetectionWorking, (uv_after_work_cb)DetectionDone);
}

void MrzScanner::DecodeBufferAsync(const FunctionCallbackInfo<Value> &args)
{
	Isolate *isolate = Isolate::GetCurrent();
	MrzScanner *obj = ObjectWrap::Unwrap<MrzScanner>(args.Holder());
	Local<Context> context = isolate->GetCurrentContext();

	unsigned char *buffer = (unsigned char *)node::Buffer::Data(args[0]); 
	int width = args[1]->Int32Value(context).ToChecked();				  
	int height = args[2]->Int32Value(context).ToChecked();				  
	int stride = args[3]->Int32Value(context).ToChecked();				  

	MRZWorker *worker = new MRZWorker;
	worker->handler = obj->handler;
	worker->request.data = worker;
	worker->callback.Reset(isolate, cb);
	worker->pResults = NULL;
	worker->buffer = buffer;
	worker->width = width;
	worker->height = height;
	worker->bufferType = RGB_BUFFER;
	worker->stride = stride;

	uv_queue_work(uv_default_loop(), &worker->request, (uv_work_cb)DetectionWorking, (uv_after_work_cb)DetectionDone);
}

Now, we can build the project to generate a shared library for Node.js.

node-gyp configure
node-gyp build

Create JavaScript Methods based on C/C++ API

As the native library is ready, it’s time to write the JavaScript code.

  1. Create an index.ts file, which is written in TypeScript.
  2. Import the C/C++ library:

     const mrzscanner = require('./build/Release/mrzscanner');
    
  3. Define an MrzScanner class:

     class MrzScanner {
         obj : any;
         constructor() {
             this.obj = mrzscanner.MrzScanner();
         }
     }
    
  4. Add static methods to set license key and get the version number of Dynamsoft C++ OCR SDK.

     static initLicense(license): number {
         return mrzscanner.initLicense(license);
     }
    
     static getVersionNumber(): string {
         return mrzscanner.getVersionNumber();
     }
    
  5. Add instance methods: loadModel(), decodeFileAsync(), decodeBufferAsync(), parseTwoLines(), and parseThreeLines().

    • loadModel(): load the MRZ recognition model
        loadModel(modelDir: string): number {
            let modelPath = path.join(modelDir, 'MRZ.json');
            let json = fs.readFileSync(modelPath);
            let config = JSON.parse(json);
            if (config['CharacterModelArray'][0]['DirectoryPath'] === 'model') {
                config['CharacterModelArray'][0]['DirectoryPath'] = path.join(modelDir, 'model');
                fs.writeFileSync(modelPath, JSON.stringify(config));
            }
            return this.obj.loadModel(modelPath);
        }
      
    • decodeFileAsync(): asynchronous method to decode MRZ from image file
        decodeFileAsync(filePath: string): Promise<any> {
            return new Promise((resolve, reject) => {
                this.obj.decodeFileAsync(filePath, (err, result) => {
                    if (err) {
                        reject(err);
                    } else {
                        resolve(result);
                    }
                });
            });
        }
      
    • decodeBufferAsync(): asynchronous method to decode MRZ from image buffer
        decodeBufferAsync(buffer: Buffer, width: number, height: number, stride: number): Promise<any> {
        return new Promise((resolve, reject) => {
            this.obj.decodeBufferAsync(buffer, width, height, stride, (err, result) => {
                if (err) {
                    reject(err);
                } else {
                    resolve(result);
                }
            });
        });
        }
      
    • parseTwoLines(): parse the MRZ information from two lines.
        parseTwoLines(line1: string, line2: string): any {
            let mrzInfo: any = {};
            let type = line1.substring(0, 1);
            if (!(/[I|P|V]/.test(type))) return false;
            if (type === 'P') {
                mrzInfo.type = 'PASSPORT (TD-3)';
            } else if (type === 'V') {
                if (line1.length === 44) {
                    mrzInfo.type = 'VISA (MRV-A)';
                } else if (line1.length === 36) {
                    mrzInfo.type = 'VISA (MRV-B)';
                }
            } else if (type === 'I') {
                mrzInfo.type = 'ID CARD (TD-2)'
            }
            ...
        }
      
    • parseThreeLines(): parse the MRZ information from three lines.
        parseThreeLines(line1: string, line2: string, line3: string): any {
            let mrzInfo: any = {};
            let type = line1.substring(0, 1);
            if (!(/[I|P|V]/.test(type))) return false;
            mrzInfo.type = 'ID CARD (TD-1)';
            ...
        }
      
  6. Generate the index.js file:

     tsc index.ts
    

Example of Using Node.js MRZ Scanner Module

  1. Install OpenCV and follow the tutorial of OpenCV4Nodejs to install the module.
     npm install opencv4nodejs
    
  2. Install mrz4nodejs.

     npm install mrz4nodejs
    
  3. Copy a passport image to your project folder and create an app.js file to recognize MRZ from the image:

     const cv = require('opencv4nodejs');
     const drawParams = { color: new cv.Vec(0, 255, 0), thickness: 2 }
     const path = require('path');
     const MrzScanner = require('mrz4nodejs');
     console.log(MrzScanner.getVersionNumber());
     MrzScanner.initLicense('DLS2eyJoYW5kc2hha2VDb2RlIjoiMjAwMDAxLTE2NDk4Mjk3OTI2MzUiLCJvcmdhbml6YXRpb25JRCI6IjIwMDAwMSIsInNlc3Npb25QYXNzd29yZCI6IndTcGR6Vm05WDJrcEQ5YUoifQ==');
    
     var obj = new MrzScanner();
     var ret = obj.loadModel(path.dirname(require.resolve('mrz4nodejs')));
     (async function () {
         try {
             let img = cv.imread('test.png');
    
             var results = await obj.decodeBufferAsync(img.getData(), img.cols, img.rows, img.step);
    
             console.log(results);
    
             if (results.length == 2) {
                 console.log(obj.parseTwoLines(results[0].text, results[1].text));
             }
             else if (results.length == 3) {
                 console.log(obj.parseThreeLines(results[0].text, results[1].text, results[2].text));
             }
    
             for (index in results) {
                 let result = results[index];
    
                 let upperLeft = new cv.Point(result.x1, result.y1)
                 let bottomLeft = new cv.Point(result.x2, result.y2)
                 let upperRight = new cv.Point(result.x3, result.y3)
                 let bottomRight = new cv.Point(result.x4, result.y4)
    
                 img.drawLine(
                     upperLeft,
                     bottomLeft,
                     drawParams
                 )
                 img.drawLine(
                     bottomLeft,
                     upperRight,
                     drawParams
                 )
    
                 img.drawLine(
                     upperRight,
                     bottomRight,
                     drawParams
                 )
                 img.drawLine(
                     bottomRight,
                     upperLeft,
                     drawParams
                 )
             }
    
             cv.imshow('MRZ Scanner', img);
             const key = cv.waitKey(0); 
         } catch (error) {
             console.log(error);
         }
     })();
    
    
  4. Run the app.js file:

     node app.js
    

    Node.js MRZ recognition

Source Code

https://github.com/yushulx/mrz4nodejs