A Simple Web Document Management App with OpenCV.js

Document Scanning is nothing new for mobile apps. I remember the first time that I installed a document scanning app was in 2010. However, Apple, Microsoft, and Google the giant tech companies got started to recognize the importance of document scanning technology recently. It is good to see the function appears in iOS Notes, Office Lens and Google Drive. Why is the document scanning being hot now? I think the reason is the quality of images captured by mobile cameras is getting better and better. More and more users tend to scan documents with cameras instead of traditional scanners. If you are not familiar with Android and iOS development, you can use HTML5. In this post, I will share how to create a simple web document management app using OpenCV.js.

How to Use FileSystem APIs to Write, Read and Remove Images

While creating a web client-side document management app, if you keep all image data in memory, you will see the “out of memory” warning soon. To avoid the issue, we can write memory data to cache. There are several storage options available in HTML5, such as Local Storage, Session Storage, IndexedDB, Web SQL, and Cookies. Due to the size limitation, they are not suitable for my scenario. My use case is to load and edit images that are large binary blobs in the web browser. It is why I need to use FileSystem APIs which aims to deal with data outside of the context of the browser.

Load an image to memory

Create an input element and set “file” as the type. The following code can change the button text:

<input type="button" id="load" value="Load image" onclick="document.getElementById('upload').click();" />
<input type="file" id="upload" onchange="loadImage();" style="display:none;" />

Comparing to desktop platforms, the “file” type running on mobile platforms trigger not only a file selection window but also the default camera app.

Use FileReader to load an image into an array buffer and wrap it as a blob.

fileReader.onload = function (event) {
    if (!cacheManager.hasStorage(event.target.result.byteLength)) {
      alert('Image storage is full. Please clear some images to get storage quota.');
      return;
    }
    let size = event.target.result.byteLength / 1024;
    log('size = ' + size + 'K');

    var arrayBufferView = new Uint8Array(this.result);
    var blob = new Blob([arrayBufferView], {
      type: "image/png"
    });

    renderBlobImage(blob, 'canvas');

  };
  fileReader.readAsArrayBuffer(file);

Draw the image on a canvas:

function renderBlobImage(blob, canvasID) {
  var urlCreator = window.URL || window.webkitURL;

  let canvas = document.getElementById(canvasID);
  let ctx = canvas.getContext('2d');
  var image = new Image();
  image.onload = function () {
    ctx.clearRect(0, 0, canvas.width, canvas.height);
    var canvasWidth = 600;
    var canvasHeight = 400;

    var scaleFactor = Math.min((canvasWidth / image.width), (canvasHeight / image.height));
    canvas.width = image.width * scaleFactor;
    canvas.height = image.height * scaleFactor;
    ctx.drawImage(image, 0, 0, image.width * scaleFactor, image.height * scaleFactor);
  }
  image.src = urlCreator.createObjectURL(blob);
}

Write a file

Here is the code for writing a file:

CacheManager.prototype.writeFile = function (
    fileName, blob, successCallback, errorCallback) {

    function onInitFs(fs) {
      fs.root.getFile(fileName, {
        create: true
      }, function (fileEntry) {
        fileEntry.createWriter(function (fileWriter) {

          fileWriter.truncate(0);

        }, errorHandler);

        fileEntry.createWriter(function (fileWriter) {

          fileWriter.onwriteend = function (e) {
          
            console.log('Write completed.');
            successCallback();
          };

          fileWriter.onerror = function (e) {
            console.log('Write failed: ' + e.toString());
            errorCallback();
          };

          fileWriter.write(blob);

        }, errorHandler);

      }, errorHandler);
    }

    window.requestFileSystem(
      window.TEMPORARY, this.storageSize, onInitFs, errorHandler);
  };

Why do we need to use createWriter() twice? The first createWriter() is used to clear the existing data.

We can compare the storage usage before and after:

Before

FileSystem before

After

FileSystem after

Read a file

CacheManager.prototype.readFile = function (fileName, callback) {
    function onInitFs(fs) {
      fs.root.getFile(
        fileName, {},
        function (fileEntry) {

          // Get a File object representing the file,
          // then use FileReader to read its contents.
          fileEntry.file(callback, errorHandler);

        },
        errorHandler);
    }

    window.requestFileSystem(
      window.TEMPORARY, this.storageSize, onInitFs, errorHandler);
  };

Remove a file

CacheManager.prototype.removeFile = function (fileName) {
    function onInitFs(fs) {
        fs.root.getFile(fileName, {
          create: false
        }, function (fileEntry) {

          fileEntry.remove(function () {
            console.log('File removed.');
          }, errorHandler);

        }, errorHandler);
      }

      window.requestFileSystem(
        window.TEMPORARY, this.storageSize, onInitFs, errorHandler);

    
  };

How to Use OpenCV.js

OpenCV.js is a JavaScript binding that exposes OpenCV library to the web.

To use OpenCV.js, you just need to include cv.js:

<script async src="cv.js"></script>

When opening your web page, OpenCV.js will load the cv.data file via XMLHttpRequest. Here is the code snippet that I extracted from the complex sample code for monitoring the download status:

var Module = {
  setStatus: function (text) {
    if (!Module.setStatus.last) Module.setStatus.last = {
      time: Date.now(),
      text: ''
    };
    if (text === Module.setStatus.text) return;
    var m = text.match(/([^(]+)\((\d+(\.\d+)?)\/(\d+)\)/);
    var now = Date.now();
    if (m && now - Date.now() < 30) return; // if this is a progress update, skip it if too soon
    if (m) {
      text = m[1];

    }
    if (text === '') {
      isOpenCVReady = true;
      console.log('OpenCV is ready');
      // onPreprocess();
    }

  },
  totalDependencies: 0,
  monitorRunDependencies: function (left) {
    this.totalDependencies = Math.max(this.totalDependencies, left);
    Module.setStatus(left ? 'Preparing... (' + (this.totalDependencies - left) + '/' + this.totalDependencies + ')' : 'All downloads complete.');
  }
};
Module.setStatus('Downloading...');

Once OpenCV context is ready, we can call any OpenCV methods in JavaScript. However, there seems to be a bug:

OpenCV.js bug

I debugged line by line and finally focused on the following code:

cv.convexHull(contours.get(0), item, false, true);

As long as I run the snippet before calling other APIs, I can get the expected result.

OpenCV.js: edge detection

The full code for skipping the bug:

function onPreprocess() {

  var canvas = document.createElement('canvas');
  var ctx = canvas.getContext('2d');
  var imgData = ctx.getImageData(0, 0, canvas.width, canvas.height);
  var src = cv.matFromArray(imgData, cv.CV_8UC4);
  var canny_output = new cv.Mat();
  var blurred = new cv.Mat();
  var cthresh = 75;
  cv.blur(src, blurred, [5, 5], [-1, -1], cv.BORDER_DEFAULT);
  cv.Canny(blurred, canny_output, cthresh, cthresh * 2, 3, 0);

  var contours = new cv.MatVector();
  var hierarchy = new cv.Mat();
  cv.findContours(canny_output, contours, hierarchy, 3, 2, [0, 0]);

  var item = new cv.Mat();
  // For preprocessing. Bug?
  cv.convexHull(contours.get(0), item, false, true);
  item.delete();

  src.delete();
  blurred.delete();
  contours.delete();
  hierarchy.delete();
  canny_output.delete();

}

Now, let’s play the web document management app:

web document management

Source Code

https://github.com/yushulx/javascript-image-storage