How to Split Documents via Barcodes

Barcodes are commonly used to identify retail products. We can also use them to mark documents. For example, we may need to scan a batch of forms. Each form contains several pages and there is a barcode on the first page denoting a unique form number. We can use the barcodes to split the documents and attach form numbers to those documents.

In this article, we are going to write a demo web app to scan documents using Dynamic Web TWAIN and split them via barcodes read by Dynamsoft Barcode Reader.

Demo video of the final result:

Scan Documents in a Web Page

Let’s first write a web page to scan documents.

  1. Create a new HTML file with the following template:

    <!DOCTYPE html>
    <html>
    <head>
      <title>Document Scanning via TWAIN</title>
      <meta name="viewport" content="width=device-width,initial-scale=1.0,maximum-scale=1.0,user-scalable=0" />
    </head>
    <body>
      <h2>Document Scanning via TWAIN</h2>
      <script type="text/javascript">
      </script>
    </body>
    </html>
    
  2. Include the library of Dynamic Web TWAIN in the head.

    <script src="https://unpkg.com/dwt@18.5.0/dist/dynamsoft.webtwain.min.js"></script>
    
  3. Initialize an instance of Dynamic Web TWAIN and bind it to a viewer. You can apply for its license here.

    HTML:

    <div id="dwtcontrolContainer"></div>
    

    JavaScript:

    let DWObject;
    let scanners;
    initDWT();
    
    function initDWT(){
      Dynamsoft.DWT.AutoLoad = false;
      Dynamsoft.DWT.Containers = [];
      Dynamsoft.DWT.ResourcesPath = "https://unpkg.com/dwt@18.5.0/dist";
      let oneDayTrialLicense = "DLS2eyJoYW5kc2hha2VDb2RlIjoiMjAwMDAxLTE2NDk4Mjk3OTI2MzUiLCJvcmdhbml6YXRpb25JRCI6IjIwMDAwMSIsInNlc3Npb25QYXNzd29yZCI6IndTcGR6Vm05WDJrcEQ5YUoifQ==";
      Dynamsoft.DWT.ProductKey = oneDayTrialLicense;  
      Dynamsoft.DWT.CreateDWTObjectEx(
        {
          WebTwainId: 'dwtcontrol'
        },
        function(obj) {
          DWObject = obj;
          DWObject.Viewer.bind(document.getElementById('dwtcontrolContainer'));
          DWObject.Viewer.height = "480px";
          DWObject.Viewer.width = "360px";
          DWObject.Viewer.show();
          DWObject.Viewer.setViewMode(2,2);
        },
        function(err) {
          console.log(err);
        }
      );
    }
    
  4. List connected scanners.

    let scanners;
    async function loadScanners(){
      scanners = await DWObject.GetDevicesAsync();
      let selScanners = document.getElementById("select-scanner");
      selScanners.innerHTML = "";
      for (let index = 0; index < scanners.length; index++) {
        const scanner = scanners[index];
        let option = new Option(scanner.displayName,index);
        selScanners.appendChild(option);
      }
    }
    
  5. Scan documents using the selected scanner. It will bring up the scanner’s configuration UI to perform a scanning.

    HTML:

    <input type="button" value="Scan" onclick="AcquireImage();" />
    

    JavaScript:

    async function AcquireImage() {
      if (DWObject) {
        const selectedIndex = document.getElementById("select-scanner").selectedIndex;
        const options = {
          IfShowUI:true,
        };
        await DWObject.SelectDeviceAsync(scanners[selectedIndex]);
        await DWObject.OpenSourceAsync();
        await DWObject.AcquireImageAsync(options);
        await DWObject.CloseSourceAsync();
      }
    }
    

    Apart from using the scanner’s UI for configuration, we can also directly configure the scanning behavior like auto document feeder, resolution, pixel type and duplex scanning via code:

    HTML:

    <label>
      Auto Document Feeder:
      <input type="checkbox" id="ADF"/>
    </label>
    <br/>
    <label>
      Duplex:
      <input type="checkbox" id="duplex"/>
    </label>
    <br/>
    <label>
      Resolution:
      <select id="select-resolution">
        <option value="100">100</option>
        <option value="200">200</option>
        <option value="300" selected>300</option>
      </select>
    </label>
    <br/>
    <label>
      Pixel Type:
      <select id="select-pixeltype">
        <option>Black & White</option>
        <option>Gray</option>
        <option selected>Color</option>
      </select>
    </label>
    

    JavaScript:

    const selectedIndex = document.getElementById("select-scanner").selectedIndex;
    const options = {
      IfShowUI:document.getElementById("showUI").checked,
      PixelType:document.getElementById("select-pixeltype").selectedIndex,
      Resolution:document.getElementById("select-resolution").selectedOptions[0].value,
      IfFeederEnabled:document.getElementById("ADF").checked,
      IfDuplexEnabled:document.getElementById("duplex").checked
    };
    await DWObject.SelectDeviceAsync(scanners[selectedIndex]);
    await DWObject.OpenSourceAsync();
    await DWObject.AcquireImageAsync(options);
    await DWObject.CloseSourceAsync();
    
  6. Load existing files. This can make the testing of splitting document easy.

    HTML:

    <input type="button" value="Load Files" onclick="LoadFiles();" />
    

    JavaScript:

    function LoadFiles(){
      DWObject.LoadImageEx(
        "", //file name can be empty if "Open File" dialog is called.
        Dynamsoft.DWT.EnumDWT_ImageType.IT_ALL,
        function () {
          console.log("success");
        },
        function (errorCode, errorString) {
          console.log(errorString);
        }
      );
    }
    

Read Barcodes on a Page

Next, let’s use Dynamsoft Barcode Reader to read barcodes on pages.

  1. Include the libraries used by Dynamsoft Barcode Reader.

    <script src="https://cdn.jsdelivr.net/npm/dynamsoft-core@3.2.30/dist/core.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/dynamsoft-barcode-reader@10.2.10/dist/dbr.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/dynamsoft-capture-vision-router@2.2.30/dist/cvr.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/dynamsoft-license@3.2.21/dist/license.js"></script>
    
  2. Initialize its license. You can apply for a license here.

    Dynamsoft.License.LicenseManager.initLicense("DLS2eyJoYW5kc2hha2VDb2RlIjoiMjAwMDAxLTE2NDk4Mjk3OTI2MzUiLCJvcmdhbml6YXRpb25JRCI6IjIwMDAwMSIsInNlc3Npb25QYXNzd29yZCI6IndTcGR6Vm05WDJrcEQ5YUoifQ=="); //one-day trial
    
  3. Create an instance of capture vision router to call Dynamsoft Barcode Reader.

    Dynamsoft.Core.CoreModule.loadWasm(["dbr"]); //load the wasm files of barcode reader
    let router = await Dynamsoft.CVR.CaptureVisionRouter.createInstance();
    
  4. Write a function to convert a page into blob.

    async function convertToBlob(index){
      return new Promise((resolve, reject) => {
        DWObject.ConvertToBlob(
          [index],
          Dynamsoft.DWT.EnumDWT_ImageType.IT_PNG,
          function (result, indices, type) {
            resolve(result);
          },
          function (errorCode, errorString) {
            reject(errorString);
          }
        );  
      })
    }
    
  5. Read barcodes from the blob.

    const i = 0; //the first scanned page
    const blob = await convertToBlob(i);
    let barcodeReadingResult = await router.capture(blob, "ReadBarcodes_Balance");
    

Split Documents by Barcodes

Different documents have their own ways of using barcodes. Normally, there are two ways: put the barcode on the first page, put the barcode on the last page.

  1. Create a select element for selecting which way to use.

    <label>
      Barcode Position:
      <select class="barcode-position">
        <option>First Page</option>
        <option>Last Page</option>
      </select>
    </label>
    
  2. Read barcodes on every page and split them based on whether the page contains barcodes.

    const FIRST = 0;
    const LAST = 1;
    const total = DWObject.HowManyImagesInBuffer;
    let documents = [];
    let doc;
    for (let i = 0; i < total; i++) {
      DWObject.SelectImages([i]);
      const blob = await convertToBlob(i);
      let barcodeReadingResult = await router.capture(blob, "ReadBarcodes_Balance");  
    
      if (barcodeReadingResult.items.length>0) {
        if (doc) {
          if (barcodePosition === LAST) {
            doc.imageIndex.push(i);
            doc.barcodes = barcodeReadingResult.items;
          }
          documents.push(doc);
        }
        doc = {barcodes:[],imageIndex:[]} //reinit a document
        if (barcodePosition === FIRST) {
          doc.imageIndex.push(i);
          doc.barcodes = barcodeReadingResult.items;
        }
      }else{
        if (barcodePosition === FIRST) {
          if (doc) {
            doc.imageIndex.push(i);
          }
        }else{
          if (!doc) {
            doc = {barcodes:[],imageIndex:[]} //init a document
          }
          doc.imageIndex.push(i);
        }
      }
    }
    if (doc) {
      documents.push(doc);
    }
    
  3. List documents classified by barcodes.

    function ListClassifiedDocuments(){
      const documentsContainer = document.getElementsByClassName("documents")[0];
      documentsContainer.innerHTML = "";
      for (let i = 0; i < documents.length; i++) {
        const doc = documents[i];
        let documentContainer = document.createElement("div");
        let title = document.createElement("div");
        title.innerHTML = doc.barcodes[0].text;
        documentContainer.appendChild(title);
        let thumbnailsContainer = document.createElement("div");
        thumbnailsContainer.className = "thumbnails";
        for (let j = 0; j < doc.imageIndex.length; j++) {
          const imageIndex = doc.imageIndex[j];
          const a = document.createElement("a");
          const thumbnailImage = document.createElement("img");
          thumbnailImage.className = "thumbnail";
          thumbnailImage.src = DWObject.GetImageURL(imageIndex);
          a.appendChild(thumbnailImage);
          a.href = thumbnailImage.src;
          a.target = "_blank";
          thumbnailsContainer.appendChild(a);
        }
        documentContainer.appendChild(thumbnailsContainer);
        let saveButton = document.createElement("button");
        saveButton.innerText = "Save as PDF";
        saveButton.className = "save-button";
        saveButton.type = "button";
        documentContainer.appendChild(saveButton);
        saveButton.addEventListener("click",function(){
          DWObject.SelectImages(doc.imageIndex);
          DWObject.SaveSelectedImagesAsMultiPagePDF("form-"+(i+1)+".pdf")
        })
        documentsContainer.appendChild(documentContainer);
      }
    }
    

    Classified

Source Code

Get the source code of the demo to have a try:

https://github.com/tony-xlh/Dynamic-Web-TWAIN-samples/tree/main/Classification-with-Barcode