2
votes

I need to convert HTML to PDF. I have tried with jsPDF and read a lot of questions here on stackoverflow about this. I have tried all the methods that exist, html(), fromHtml, html2pdf and html2canvas. But all of them have various problems. Either missing content, fuzzy content or margins are completely off.

So I am trying a different route. I found following code snippet to convert to word document. And this works.

function exportHTML(){
       var header = "<html xmlns:o='urn:schemas-microsoft-com:office:office' "+
            "xmlns:w='urn:schemas-microsoft-com:office:word' "+
            "xmlns='http://www.w3.org/TR/REC-html40'>"+
            "<head><meta charset='utf-8'><title>Export HTML to Word Document with JavaScript</title></head><body>";
       var footer = "</body></html>";
       var sourceHTML = header+document.getElementById("source-html").innerHTML+footer;
       
       var source = 'data:application/vnd.ms-word;charset=utf-8,' + encodeURIComponent(sourceHTML);
       var fileDownload = document.createElement("a");
       document.body.appendChild(fileDownload);
       fileDownload.href = source;
       fileDownload.download = 'document.doc';
       fileDownload.click();
       document.body.removeChild(fileDownload);
    }

However I do not want the word file to be downloaded. I need to capture it and convert it to a base64 string because then I can send it to a rest api that can convert the word document to pdf. That rest api does not support html directly otherwise I would just send the html. Hence the workaround to word then to pdf. ps I cannot use an online pdf solution due to sensitive information, the rest api is an internal service.

2
Seems like a convoluted way to reach your end goal. Why not just post a question about the jsPDF code you were having troubles with?APAD1
I did not post a specific question about jsPDF because that question has been asked and answered many times and none of the responses I found gave me the result I needed.Tommy

2 Answers

0
votes

However I do not want the word file to be downloaded. I need to capture it and convert it to a base64 string because then I can send it to a rest api that can convert the word document to pdf.

Then no need to insert it into a download link. Just base64 encode the string with btoa:

function exportHTML(){
       var header = "<html xmlns:o='urn:schemas-microsoft-com:office:office' "+
            "xmlns:w='urn:schemas-microsoft-com:office:word' "+
            "xmlns='http://www.w3.org/TR/REC-html40'>"+
            "<head><meta charset='utf-8'><title>Export HTML to Word Document with JavaScript</title></head><body>";
       var footer = "</body></html>";
       var sourceHTML = header+document.getElementById("source-html").innerHTML+footer;

       var source = 'data:application/vnd.ms-word;charset=utf-8,' + encodeURIComponent(sourceHTML);

       // encode here instead of creating a link
       var encoded = window.btoa(source);
       return encoded;
}

Then you'll be free to use XMLHttpRequest to send the encoded string to your API endpoint. E.g.:

var encodedString = exportHTML();

var xhr = new XMLHttpRequest();
xhr.open('POST', '/my-conversion-endpoint', true);

xhr.setRequestHeader('Content-type', 'application/x-www-form-urlencoded');

xhr.onreadystatechange = function() {
  if(xhr.readyState == 4 && xhr.status == 200) {
    // request finished
    alert(xhr.responseText);
  }
}

xhr.send('encodedString=' + encodedString);
0
votes

Use "new Blob" by file's construct:

function exportHTML(){
       var header = "<html xmlns:o='urn:schemas-microsoft-com:office:office' "+
            "xmlns:w='urn:schemas-microsoft-com:office:word' "+
            "xmlns='http://www.w3.org/TR/REC-html40'>"+
            "<head><meta charset='utf-8'><title>Export HTML to Word Document with JavaScript</title></head><body>";
       var footer = "</body></html>";
       var sourceHTML = header+document.getElementById("source-html").innerHTML+footer;
       
       var source = 'data:application/vnd.ms-word;charset=utf-8,' + encodeURIComponent(sourceHTML);
       //var fileDownload = document.createElement("a");
       //document.body.appendChild(fileDownload);
       //fileDownload.href = source;
       //fileDownload.download = 'document.doc';
       //fileDownload.click();
       //document.body.removeChild(fileDownload);
       var my_file=new Blob([source]);
       getBase64(my_file);
}

function getBase64(file) {
   var reader = new FileReader();
   reader.readAsDataURL(file);
   reader.onload = function () {
     console.log(reader.result);
   };
   reader.onerror = function (error) {
     console.log('Error: ', error);
   };
}
    
exportHTML();
<div id="source-html">Hi <b>World</b>!</div>