How to Split Documents via Barcodes

Aug 06, 2024

Barcodes are commonly used to identify retail products. We can also use them to mark documents. For example, we may need to scan a batch of forms. Each form contains several pages and there is a barcode on the first page denoting a unique form number. We can use the barcodes to split the documents and attach form numbers to those documents.

In this article, we are going to write a demo web app to scan documents using Dynamic Web TWAIN and split them via barcodes read by Dynamsoft Barcode Reader.

Demo video of the final result:

Scan Documents in a Web Page

Let’s first write a web page to scan documents.

Create a new HTML file with the following template:

<!DOCTYPE html>
<html>
<head>
  <title>Document Scanning via TWAIN</title>
  <meta name="viewport" content="width=device-width,initial-scale=1.0,maximum-scale=1.0,user-scalable=0" />
</head>
<body>
  <h2>Document Scanning via TWAIN</h2>
  <script type="text/javascript">
  </script>
</body>
</html>

Include the library of Dynamic Web TWAIN in the head.

<script src="https://unpkg.com/dwt@18.5.0/dist/dynamsoft.webtwain.min.js"></script>

Initialize an instance of Dynamic Web TWAIN and bind it to a viewer. You can apply for its license here.

HTML:

<div id="dwtcontrolContainer"></div>

JavaScript:

let DWObject;
let scanners;
initDWT();

function initDWT(){
  Dynamsoft.DWT.AutoLoad = false;
  Dynamsoft.DWT.Containers = [];
  Dynamsoft.DWT.ResourcesPath = "https://unpkg.com/dwt@18.5.0/dist";
  let oneDayTrialLicense = "LICENSE-KEY";
  Dynamsoft.DWT.ProductKey = oneDayTrialLicense;  
  Dynamsoft.DWT.CreateDWTObjectEx(
    {
      WebTwainId: 'dwtcontrol'
    },
    function(obj) {
      DWObject = obj;
      DWObject.Viewer.bind(document.getElementById('dwtcontrolContainer'));
      DWObject.Viewer.height = "480px";
      DWObject.Viewer.width = "360px";
      DWObject.Viewer.show();
      DWObject.Viewer.setViewMode(2,2);
    },
    function(err) {
      console.log(err);
    }
  );
}

List connected scanners.

let scanners;
async function loadScanners(){
  scanners = await DWObject.GetDevicesAsync();
  let selScanners = document.getElementById("select-scanner");
  selScanners.innerHTML = "";
  for (let index = 0; index < scanners.length; index++) {
    const scanner = scanners[index];
    let option = new Option(scanner.displayName,index);
    selScanners.appendChild(option);
  }
}

Scan documents using the selected scanner. It will bring up the scanner’s configuration UI to perform a scanning.

HTML:

<input type="button" value="Scan" onclick="AcquireImage();" />

JavaScript:

async function AcquireImage() {
  if (DWObject) {
    const selectedIndex = document.getElementById("select-scanner").selectedIndex;
    const options = {
      IfShowUI:true,
    };
    await DWObject.SelectDeviceAsync(scanners[selectedIndex]);
    await DWObject.OpenSourceAsync();
    await DWObject.AcquireImageAsync(options);
    await DWObject.CloseSourceAsync();
  }
}

Apart from using the scanner’s UI for configuration, we can also directly configure the scanning behavior like auto document feeder, resolution, pixel type and duplex scanning via code:

HTML:

<label>
  Auto Document Feeder:
  <input type="checkbox" id="ADF"/>
</label>
<br/>
<label>
  Duplex:
  <input type="checkbox" id="duplex"/>
</label>
<br/>
<label>
  Resolution:
  <select id="select-resolution">
    <option value="100">100</option>
    <option value="200">200</option>
    <option value="300" selected>300</option>
  </select>
</label>
<br/>
<label>
  Pixel Type:
  <select id="select-pixeltype">
    <option>Black & White</option>
    <option>Gray</option>
    <option selected>Color</option>
  </select>
</label>

JavaScript:

const selectedIndex = document.getElementById("select-scanner").selectedIndex;
const options = {
  IfShowUI:document.getElementById("showUI").checked,
  PixelType:document.getElementById("select-pixeltype").selectedIndex,
  Resolution:document.getElementById("select-resolution").selectedOptions[0].value,
  IfFeederEnabled:document.getElementById("ADF").checked,
  IfDuplexEnabled:document.getElementById("duplex").checked
};
await DWObject.SelectDeviceAsync(scanners[selectedIndex]);
await DWObject.OpenSourceAsync();
await DWObject.AcquireImageAsync(options);
await DWObject.CloseSourceAsync();

Load existing files. This can make the testing of splitting document easy.

HTML:

<input type="button" value="Load Files" onclick="LoadFiles();" />

JavaScript:

function LoadFiles(){
  DWObject.LoadImageEx(
    "", //file name can be empty if "Open File" dialog is called.
    Dynamsoft.DWT.EnumDWT_ImageType.IT_ALL,
    function () {
      console.log("success");
    },
    function (errorCode, errorString) {
      console.log(errorString);
    }
  );
}

Read Barcodes on a Page

Next, let’s use Dynamsoft Barcode Reader to read barcodes on pages.

Include the libraries used by Dynamsoft Barcode Reader.

<script src="https://cdn.jsdelivr.net/npm/dynamsoft-core@3.2.30/dist/core.js"></script>
<script src="https://cdn.jsdelivr.net/npm/dynamsoft-barcode-reader@10.2.10/dist/dbr.js"></script>
<script src="https://cdn.jsdelivr.net/npm/dynamsoft-capture-vision-router@2.2.30/dist/cvr.js"></script>
<script src="https://cdn.jsdelivr.net/npm/dynamsoft-license@3.2.21/dist/license.js"></script>

Initialize its license. You can apply for a license here.

Dynamsoft.License.LicenseManager.initLicense("LICENSE-KEY"); 

Create an instance of capture vision router to call Dynamsoft Barcode Reader.

Dynamsoft.Core.CoreModule.loadWasm(["dbr"]); //load the wasm files of barcode reader
let router = await Dynamsoft.CVR.CaptureVisionRouter.createInstance();

Write a function to convert a page into blob.

async function convertToBlob(index){
  return new Promise((resolve, reject) => {
    DWObject.ConvertToBlob(
      [index],
      Dynamsoft.DWT.EnumDWT_ImageType.IT_PNG,
      function (result, indices, type) {
        resolve(result);
      },
      function (errorCode, errorString) {
        reject(errorString);
      }
    );  
  })
}

Read barcodes from the blob.

const i = 0; //the first scanned page
const blob = await convertToBlob(i);
let barcodeReadingResult = await router.capture(blob, "ReadBarcodes_Balance");

Split Documents by Barcodes

Different documents have their own ways of using barcodes. Normally, there are two ways: put the barcode on the first page, put the barcode on the last page.

Create a select element for selecting which way to use.

<label>
  Barcode Position:
  <select class="barcode-position">
    <option>First Page</option>
    <option>Last Page</option>
  </select>
</label>

Read barcodes on every page and split them based on whether the page contains barcodes.

const FIRST = 0;
const LAST = 1;
const total = DWObject.HowManyImagesInBuffer;
let documents = [];
let doc;
for (let i = 0; i < total; i++) {
  DWObject.SelectImages([i]);
  const blob = await convertToBlob(i);
  let barcodeReadingResult = await router.capture(blob, "ReadBarcodes_Balance");  

  if (barcodeReadingResult.items.length>0) {
    if (doc) {
      if (barcodePosition === LAST) {
        doc.imageIndex.push(i);
        doc.barcodes = barcodeReadingResult.items;
      }
      documents.push(doc);
    }
    doc = {barcodes:[],imageIndex:[]} //reinit a document
    if (barcodePosition === FIRST) {
      doc.imageIndex.push(i);
      doc.barcodes = barcodeReadingResult.items;
    }
  }else{
    if (barcodePosition === FIRST) {
      if (doc) {
        doc.imageIndex.push(i);
      }
    }else{
      if (!doc) {
        doc = {barcodes:[],imageIndex:[]} //init a document
      }
      doc.imageIndex.push(i);
    }
  }
}
if (doc) {
  documents.push(doc);
}

List documents classified by barcodes.

function ListClassifiedDocuments(){
  const documentsContainer = document.getElementsByClassName("documents")[0];
  documentsContainer.innerHTML = "";
  for (let i = 0; i < documents.length; i++) {
    const doc = documents[i];
    let documentContainer = document.createElement("div");
    let title = document.createElement("div");
    title.innerHTML = doc.barcodes[0].text;
    documentContainer.appendChild(title);
    let thumbnailsContainer = document.createElement("div");
    thumbnailsContainer.className = "thumbnails";
    for (let j = 0; j < doc.imageIndex.length; j++) {
      const imageIndex = doc.imageIndex[j];
      const a = document.createElement("a");
      const thumbnailImage = document.createElement("img");
      thumbnailImage.className = "thumbnail";
      thumbnailImage.src = DWObject.GetImageURL(imageIndex);
      a.appendChild(thumbnailImage);
      a.href = thumbnailImage.src;
      a.target = "_blank";
      thumbnailsContainer.appendChild(a);
    }
    documentContainer.appendChild(thumbnailsContainer);
    let saveButton = document.createElement("button");
    saveButton.innerText = "Save as PDF";
    saveButton.className = "save-button";
    saveButton.type = "button";
    documentContainer.appendChild(saveButton);
    saveButton.addEventListener("click",function(){
      DWObject.SelectImages(doc.imageIndex);
      DWObject.SaveSelectedImagesAsMultiPagePDF("form-"+(i+1)+".pdf")
    })
    documentsContainer.appendChild(documentContainer);
  }
}

Classified

Source Code

Get the source code of the demo to have a try:

https://github.com/tony-xlh/Dynamic-Web-TWAIN-samples/tree/main/Classification-with-Barcode