How to Convert PDF to Images in HTML5
Converting PDFs to images is a common task in our daily work. We can use the images for archiving and further editing like OCR. With Dynamic Web TWAIN, a JavaScript document scanning library which has built-in support for various file formats, we can convert PDF to images in HTML5.
In this article, we are going to write a web demo to do the conversion.
New HTML File
Create a new HTML file with the following template:
<!DOCTYPE html>
<html lang="en">
<head>
<title>Convert PDF to Images</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no">
<style>
</style>
</head>
<body>
<div class="app">
</div>
<script>
</script>
</body>
</html>
Include Dynamic Web TWAIN
In the head of the HTML, include the library of Dynamic Web TWAIN via CDN.
<script src="https://unpkg.com/dwt@18.4.2/dist/dynamsoft.webtwain.min.js"></script>
Initialize Dynamic Web TWAIN
-
Add a container for holding the controls of Web TWAIN in HTML.
<div class="viewer"> <div id="dwtcontrolContainer"></div> </div>
-
Load Dynamic Web TWAIN and bind its controls to the container. A product key is needed. You can apply for one here.
let DWObject; //an instance of Web TWAIN Dynamsoft.DWT.AutoLoad = false; Dynamsoft.DWT.ProductKey = "DLS2eyJoYW5kc2hha2VDb2RlIjoiMjAwMDAxLTE2NDk4Mjk3OTI2MzUiLCJvcmdhbml6YXRpb25JRCI6IjIwMDAwMSIsInNlc3Npb25QYXNzd29yZCI6IndTcGR6Vm05WDJrcEQ5YUoifQ=="; //one-day trial Dynamsoft.DWT.ResourcesPath = "https://unpkg.com/dwt@18.4.2/dist"; init(); function init(){ Dynamsoft.DWT.Containers = [{ ContainerId: 'dwtcontrolContainer',Width: 270, Height: 350 }]; Dynamsoft.DWT.RegisterEvent('OnWebTwainReady', function () { DWObject = Dynamsoft.DWT.GetWebTwain('dwtcontrolContainer'); DWObject.Viewer.width = "100%"; DWObject.Viewer.height = "100%"; DWObject.SetViewMode(1,1); }); Dynamsoft.DWT.Load(); }
Load PDF Files
Next, we can load PDF files into Web TWAIN’s buffer using its LoadImageEX method.
DWObject.IfShowFileDialog = true; //"Open File" dialog will be opened.
DWObject.LoadImageEx(
"", //file name can be empty if "Open File" dialog is called.
Dynamsoft.DWT.EnumDWT_ImageType.IT_PDF,
function () {
console.log("success");
},
function (errorCode, errorString) {
console.log(errorString);
}
);
There are several options we can configure for reading PDF files.
- Convert Mode:
- Image only. Directly extract the images in PDF files.
- Render all. Render pages into images. The output image’s resolution and bit-depth might be different from its original values.
- Auto. Automatically detect which mode to use based on whether the PDF page contains only one image.
- Resolution: specify the DPI for the rendering of pages. Only valid using the “Render all” mode.
- Render with annotations: render PDF annotations.
- Password: for encrypted PDFs.
We can add controls for configuring the options before calling the LoadImageEX
method.
HTML:
<div class="options">
PDF Rasterizer Options:
<br/>
<label>
Convert mode:
<select id="modeSelect">
<option>Auto</option>
<option>Image only</option>
<option>Render all</option>
</select>
</label>
<br/>
<label>
Render annotations:
<input type="checkbox" id="renderAnnotationCheckbox" />
</label>
<br/>
<label>
Password:
<input type="password" id="password" />
</label>
<br/>
<label>
Resolution:
<select id="resolutionSelect">
<option>200</option>
<option>300</option>
<option>600</option>
</select>
</label>
</div>
JavaScript:
let convertMode;
let convertModeSelect = document.getElementById("modeSelect");
if (convertModeSelect.selectedIndex === 0) {
convertMode = Dynamsoft.DWT.EnumDWT_ConvertMode.CM_AUTO;
}else if (convertModeSelect.selectedIndex === 1) {
convertMode = Dynamsoft.DWT.EnumDWT_ConvertMode.CM_IMAGEONLY;
}else{
convertMode = Dynamsoft.DWT.EnumDWT_ConvertMode.CM_RENDERALL;
}
let password = document.getElementById("password").value;
let renderAnnotations = document.getElementById("renderAnnotationCheckbox").checked;
let resolution = parseInt(document.getElementById("resolutionSelect").selectedOptions[0].innerText);
let readerOptions = {
convertMode: convertMode,
password: password,
renderOptions: {
resolution: resolution,
renderAnnotations: renderAnnotations
}
};
DWObject.Addon.PDF.SetReaderOptions(readerOptions);
Export as JPG Files
Next, we can export the images in the buffer as JPG files. We have to convert them into blob and download them.
async function Download(){
if (DWObject) {
DWObject.IfShowFileDialog = false;
for (let index = 0; index < DWObject.HowManyImagesInBuffer; index++) {
DWObject.SelectImages([index]);
let blob = await getBlob();
downloadBlob((index+1)+".jpg",blob);
}
}
}
function downloadBlob(filename,blob){
const link = document.createElement('a')
link.href = URL.createObjectURL(blob);
link.download = filename;
document.body.appendChild(link)
link.click()
document.body.removeChild(link)
}
function getBlob(){
return new Promise((resolve, reject) => {
if (DWObject.GetImageBitDepth(DWObject.CurrentImageIndexInBuffer) == 1) {
//Convert black & white images to gray
DWObject.ConvertToGrayScale(DWObject.CurrentImageIndexInBuffer);
}
DWObject.ConvertToBlob([DWObject.CurrentImageIndexInBuffer],Dynamsoft.DWT.EnumDWT_ImageType.IT_JPG,
function(blob){
resolve(blob);
},
function(_errorCode, errorString){
reject(errorString);
}
)
})
}
Source Code
Get the source code of the demo to have a try: