Batch Document Scanning with the Aid of Patch Code

Mar 31, 2023

Patch Code is a barcode developed by Kodak for use in automated scanning. It is mainly used to classify or organize documents. Several documents may need to be scanned and we have to save them into different PDF files. Instead of running several separate jobs (loading the scanner with each document separately), the documents are loaded into the scanner with a Patch Code page separating each document.¹

Dynamic Web TWAIN is an SDK which enables document scanning from browsers and it also comes with Dynamsoft Barcode Reader to read barcodes including Patch Code. In this article, we are going to create a web app to scan documents with Patch Code pages in between and recognize Patch Code to separate them into different PDF files.

Online demo

Build a Document Scanning App

Create a new HTML file with the following content.

<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Patch Code Batch Scanning Sample</title>
    <style>
      .app {
        display: flex;
      }
    </style>
  </head>
  <body>
    <div>
      <h2 class="title">Batch Document Scanning with Patch Code</h2>
      <div class="app">
        <div id="dwtcontrolContainer"></div>
      </div>
    </div>
    <script>
    </script>
  </body>
</html>

Include the library of Dynamic Web TWAIN in the head.

<script src="https://unpkg.com/dwt@18.0.0/dist/dynamsoft.webtwain.min.js"></script>

Initialize Dynamic Web TWAIN when the page loads. You can apply for a trial license here.

let DWObject;
window.onload = function(){
  Dynamsoft.DWT.AutoLoad = false;
  //Dynamsoft.DWT.ProductKey = "<your license key>";
  Dynamsoft.DWT.ResourcesPath = "https://unpkg.com/dwt@18.0.0/dist";
  init();
}
function init(){
  Dynamsoft.DWT.Containers = [{ ContainerId: 'dwtcontrolContainer',Width: 360, Height: 480 }];
  Dynamsoft.DWT.RegisterEvent('OnWebTwainReady', function () {
    DWObject = Dynamsoft.DWT.GetWebTwain('dwtcontrolContainer');
    DWObject.SetViewMode(2,2);
  });
  Dynamsoft.DWT.Load();
}

Add a button to scan documents. It will display a list to select which scanner to use and the scanner’s UI for the user to scan documents. We can set whether to enable duplex scan and auto document feeder.

HTML:

<div class="app">
  <div id="dwtcontrolContainer"></div>
  <div class="controls">
    <button onclick="scan();">Scan</button>
  </div>
</div>

JavaScript:

function scan() {
  if (DWObject) {
    DWObject.SelectSource(function () {
      DWObject.OpenSource();
      DWObject.AcquireImage();
    },
      function () {
        console.log("SelectSource failed!");
      }
    );
  }
}

Screenshot using Kodak i2600:

Kodak scanner UI

Use Patch Code to Separate the Documents

Read Patch Code

There are two ways to read Patch Code. One is using the scanner’s built-in capability and the other is using a barcode reading library like Dynamsoft Barcode Reader.

Use Scanner to Read Patch Code

Pass configuration for the AcquireImage function to enable getting extended image info.

DWObject.AcquireImage({
  IfCloseSourceAfterAcquire: true,
  IfGetImageInfo: true,
  IfGetExtImageInfo:true,
  extendedImageInfoQueryLevel:Dynamsoft.DWT.EnumDWT_ExtImageInfo.supported
});

In the scanner’s UI, enable Patch Code detection. (The screenshot shows Fujitsu fi-7300NX’s configuration page.)

Register the OnPostTransferAsync event where we can get the extended image info including the Patch Code reading result.

DWObject.RegisterEvent("OnPostTransferAsync", function (outputInfo) {
  console.log(outputInfo);
});

The output with Patch Code detected is like the following:

{
    "imageId": 239514301,
    "imageInfo": {
        "XResolution": 300,
        "YResolution": 300,
        "ImageWidth": 2480,
        "ImageLength": 3301,
        "BitsPerPixel": 1,
        "SamplesPerPixel": 1,
        "BitsPerSample": [
            1,
            0,
            0,
            0,
            0,
            0,
            0,
            0
        ],
        "Planar": false,
        "PixelType": 0,
        "Compression": 0,
        "ImageLayout": {
            "Frame": {
                "Left": 0,
                "Top": 0,
                "Right": 8.5,
                "Bottom": 11
            },
            "DocumentNumber": 1,
            "PageNumber": 1,
            "FrameNumber": 1
        }
    },
    "extendedImageInfo": {
        "barcode": [],
        "others": {
            "TWEI_DESKEWSTATUS": {
                "BINARY": false,
                "Error": true,
                "VALUE": "Data Not Available"
            },
            "TWEI_SKEWORIGINALANGLE": {
                "BINARY": false,
                "Error": true,
                "VALUE": "Data Not Available"
            },
            "TWEI_DOCUMENTNUMBER": 1,
            "TWEI_PAGENUMBER": 1,
            "TWEI_CAMERA": {
                "BINARY": false,
                "Error": false,
                "VALUE": "/Camera_Bitonal_Front"
            },
            "TWEI_FRAMENUMBER": 1,
            "TWEI_FRAME": [
                0,
                0,
                8.5,
                11
            ],
            "TWEI_PIXELFLAVOR": 0,
            "TWEI_PAGESIDE": 1,
            "TWEI_PATCHCODE": 5,
            "ExtImageInfo ID:0x8801": 0,
            "TWEI_PAPERCOUNT": 1
        }
    }
}

The output without Patch Code detected is like the following:

{
    "imageId": 239514300,
    "imageInfo": {
        "XResolution": 300,
        "YResolution": 300,
        "ImageWidth": 2477,
        "ImageLength": 3301,
        "BitsPerPixel": 1,
        "SamplesPerPixel": 1,
        "BitsPerSample": [
            1,
            0,
            0,
            0,
            0,
            0,
            0,
            0
        ],
        "Planar": false,
        "PixelType": 0,
        "Compression": 0,
        "ImageLayout": {
            "Frame": {
                "Left": 0,
                "Top": 0,
                "Right": 8.5,
                "Bottom": 11
            },
            "DocumentNumber": 1,
            "PageNumber": 1,
            "FrameNumber": 1
        }
    },
    "extendedImageInfo": {
        "barcode": [
            {
                "TWEI_BARCODETYPE": 10,
                "TWEI_BARCODETEXTLENGTH": 13,
                "TWEI_BARCODEX": 399,
                "TWEI_BARCODEY": 453,
                "TWEI_BARCODEROTATION": 0,
                "TWEI_BARCODECONFIDENCE": 0,
                "BINARY": false,
                "TWEI_BARCODETEXT": "9781118097540"
            }
        ],
        "others": {
            "TWEI_DESKEWSTATUS": {
                "BINARY": false,
                "Error": true,
                "VALUE": "Data Not Available"
            },
            "TWEI_SKEWORIGINALANGLE": {
                "BINARY": false,
                "Error": true,
                "VALUE": "Data Not Available"
            },
            "TWEI_DOCUMENTNUMBER": 1,
            "TWEI_PAGENUMBER": 1,
            "TWEI_CAMERA": {
                "BINARY": false,
                "Error": false,
                "VALUE": "/Camera_Bitonal_Front"
            },
            "TWEI_FRAMENUMBER": 1,
            "TWEI_FRAME": [
                0,
                0,
                8.5,
                11
            ],
            "TWEI_PIXELFLAVOR": 0,
            "TWEI_PAGESIDE": 1,
            "TWEI_PATCHCODE": {
                "BINARY": false,
                "Error": true,
                "VALUE": "Data Not Available"
            },
            "ExtImageInfo ID:0x8801": 0,
            "TWEI_PAPERCOUNT": 1,
            "TWEI_BARCODECOUNT": 1
        }
    }
}

If the image scanned has Patch Code, then set a tag for it:

const extendedImageInfo = outputInfo["extendedImageInfo"];
if (extendedImageInfo["others"]) {
  const others = extendedImageInfo["others"];
  if (others["TWEI_PATCHCODE"]){
    const PatchCodeInfo = others["TWEI_PATCHCODE"];
    if (!isNaN(PatchCodeInfo)) {
      DWObject.TagImages([DWObject.HowManyImagesInBuffer - 1], "HasPatchCode");
    }
  }
}

However, not all scanners have the capability to read Patch Code. We can use Dynamsoft Barcode Reader to do this instead.

Use Dynamsoft Barcode Reader to Read Patch Code

Update the runtime settings of Dynamsoft Barcode Reader to read Patch Code only.

async function usePatchCodeRuntimeSettings(){
  let settings = await DWObject.Addon.BarcodeReader.getRuntimeSettings();
  settings.barcodeFormatIds = Dynamsoft.DBR.EnumBarcodeFormat.BF_PATCHCODE;
  await DWObject.Addon.BarcodeReader.updateRuntimeSettings(settings);
}

Read barcodes for all the images and tag them the info whether there is a Patch Code.

async function tagImagesWithBarcodeReader(){
  for (let i = 0; i < DWObject.HowManyImagesInBuffer; i++) {
    let tagList =  DWObject.GetTagListByIndex(i);
    let processed = false;
    for (let j = 0; j < tagList.length; j++) {
      const tag = tagList[j];
      if (tag === "NoPatchCode" || tag === "HasPatchCode") { //skip already processed images
        processed = true;
      }
    }
    if (processed === false) {
      let results = await DWObject.Addon.BarcodeReader.decode(i);
      if (results.length > 0) {
        DWObject.TagImages([i], "HasPatchCode");
      }else{
        DWObject.TagImages([i], "NoPatchCode");
      }
    }
  }
}

We can add a checkbox to enable Dynamsoft Barcode Reader.

<div id="options">
  <div>Options:</div>
  <div>
    <input type="checkbox" id="enableBarcodeReader" checked/>
    <label for="enableBarcodeReader">
      Use Dynamsoft Barcode Reader to read Patch Codes
    </label>
  </div>
</div>

Save the Document Images into Different PDF Files Based on Patch Code

Next, we are going to separate the documents based on Path Code and save them into different PDF files.

There are some options we need to define:

Whether to separate documents with Patch Code.
Whether to remove the page with Patch Code.
Whether to remove the page after the page with Patch Code. This is for two-sided scanning.
An output path like F:\out.pdf. If separation is enabled, then save them as out-001.pdf, out-002.pdf, etc.

HTML:

<div id="options">
  <div>Options:</div>
    <div>
      <input type="checkbox" id="enableBarcodeReader" checked/>
      <label for="enableBarcodeReader">
        Use Dynamsoft Barcode Reader to read Patch Codes
      </label>
      <br/>
      <input type="checkbox" id="enableSeparation" checked/>
      <label for="enableSeparation">
        Enable document separation with Patch Code
      </label>
      <br/>
      <input type="checkbox" id="removePatchCodePage" checked/>
      <label for="removePatchCodePage">
        Remove the Patch Code page
      </label>
      <br/>
      <input type="checkbox" id="removePageAfterPatchCodePage" checked/>
      <label for="removePageAfterPatchCodePage">
        Remove the next page of the Patch Code page
      </label>
      <br/>
      <label for="outputPath">
        Output path:
        <input type="text" id="outputPath"/>
      </label><button onclick="selectOutputPath();">Select</button>
      <div>
        PS: You need to input a base filename like scanned.pdf and the documents will be saved as scanned-001.pdf, scanned-002.pdf...
      </div>
    </div>
  </div>
</div>

The function to select the absolute output path:

function selectOutputPath(){
  if (DWObject) {
    DWObject.RegisterEvent("OnGetFilePath",
      function (isSave, filesCount, index, directory, fileName) {
        console.log(directory);
        console.log(fileName);
        let path = getFullPath(directory,fileName);
        document.getElementById("outputPath").value = path;
      }
    );
    DWObject.ShowFileDialog(
      true,
      "",
      0,
      "",
      "",
      false,
      false,
      0
    );
  }
}

function getFullPath(directory,fileName){
  if (Dynamsoft.Lib.env.bWin) {
    return directory + "\\" + fileName;
  }else{
    return directory + "/" + fileName;
  }
}

The function to save the documents into separate PDF files:

async function save(){
  if (DWObject) {
    if (document.getElementById("enableBarcodeReader").checked) {
      await tagImagesWithBarcodeReader();
    }
    let documents = [];
    let imageIndex = [];
    for (let i = 0; i < DWObject.HowManyImagesInBuffer; i++) {
      let separationPage = false;
      if (document.getElementById("enableSeparation").checked) {
        let tagList =  DWObject.GetTagListByIndex(i); 
        for (let j = 0; j < tagList.length; j++) {
          const tag = tagList[j];
          if (tag === "HasPatchCode") {
            separationPage = true;
          }
        }
      }
      imageIndex.push(i);
      if (separationPage || i === DWObject.HowManyImagesInBuffer - 1) {
        if (separationPage) {
          if (document.getElementById("removePatchCodePage").checked) {
            imageIndex.pop();
          }
          if (document.getElementById("removePageAfterPatchCodePage").checked) {
            if (i < DWObject.HowManyImagesInBuffer) {
              i++
            }
          }
        }
        documents.push(imageIndex);
        imageIndex = [];
      }
    }
    const path = document.getElementById("outputPath").value;
    let outputFolder = getFolder(path);
    let baseName = getBaseName(path);
    if (path) {
      DWObject.IfShowFileDialog = false;
    }
    for (let index = 0; index < documents.length; index++) {
      const document = documents[index];
      DWObject.SelectImages(document);
      let fileName = baseName + "-" + (index+1).toString().padStart(3,'0') + ".pdf";
      let outputPath = getFullPath(outputFolder,fileName);
      await saveAsMultiPagePDF(outputPath);
    }
    alert("Saved");
  }
}

function saveAsMultiPagePDF(outputPath){
  return new Promise(function (resolve, reject) {
    function OnSuccess() {
      resolve(true);
    }
    function OnFailure(errorCode, errorString) {
      reject(errorString);
    }
    DWObject.SaveSelectedImagesAsMultiPagePDF(outputPath,OnSuccess,OnFailure);
  });
}

The save function is triggered with a button.

<button onclick="save();">Save</button>

All right, we’ve now finished the web app to run batch document scanning and save them into separate PDF files based on Patch Code.

Source Code

Get the source code of the demo to have a try:

https://github.com/tony-xlh/Batch-Document-Scanning-with-Patch-Code

References

https://en.wikipedia.org/wiki/Patch_Code ↩

LANGUAGES

PLATFORMS

FEATURED