0
votes

I need to take files from urls (pdfs) save them and then store in an alternate file system which uses a buffer. What I find is the get works for the url and the file is saved locally but then when I try and get the buffer it is empty.

When I run a second time when the file was saved already then it does work. I think this is related to closing the file somehow. Has anyone any ideas? My code is as follows -

function savefile(filename, url) {

    const file = fs.createWriteStream(filename);
    const request = https.get(url, function(response) {
      response.pipe(file);
            console.log("file saved");
            return file;
    });;
}

app.post('/addfile', function(req, res) {

    var filename = req.body.filename;
    var url = req.body.url;
    var file = savefile(filename, url);
    let testFile = fs.readFileSync(filename);
    let testBuffer = new Buffer(testFile);
    // process testBuffer
1

1 Answers

1
votes

I think the problem here is that when you call savefile, it returns before the data has been read and saved to disk.

This means when you call fs.readFileSync, the file data is not yet present. It may take another few hundred milliseconds before the file is present. Remember the https.get function is not blocking (like most I/O functions in Node.js).

So the best way to proceed is to either use a callback function to indicate when we're done, or to use a Promise. I generally prefer the latter, since the code syntax is cleaner.

For example (with a Promise):

function savefileWithPromise(filename, url) {
    return new Promise((resolve, reject) => {

        // Create file and setup close handler.
        const file = fs.createWriteStream(filename)
            .on('close', () => resolve("File end"));

        // Read data from url..the file.close handler will fire when the response has been piped to the file stream.
        https.get(url, function(response) {
            response.pipe(file);
        });
    });
}

app.post('/addfile', async function(req, res) {
    var filename = req.body.filename;
    var url = req.body.url;
    console.log(`/addfile: Reading from url: ${url}, writing to file ${filename}...`);
    await savefileWithPromise(filename, url);
    // readFileSync returns a buffer.
    let testFile = fs.readFileSync(filename);
    console.log("File length: " + testFile.length + " byte(s).")
    res.status(200).send("ok");
});

We can also do the same thing with callbacks:

function savefileWithCallback(filename, url, callback) {
    // Create file and setup close handler.
    const file = fs.createWriteStream(filename)
        .on('close', () => callback("File end"));

    // Read data from url..
    https.get(url, function(response) {
        response.pipe(file);
    });
}

app.post('/addfile', function(req, res) {
    var filename = req.body.filename;
    var url = req.body.url;
    console.log(`/addfile: Reading from url: ${url}, writing to file ${filename}...`);
    savefileWithCallback(filename, url, function() {
        // readFileSync returns a buffer.
        let testFile = fs.readFileSync(filename);
        console.log("File length: " + testFile.length + " byte(s).")
        res.status(200).send("ok");
    });
});

And then to simply read the url data to a buffer:

function readUrlDataToBuffer(url) {
    return new Promise((resolve, reject) => {
        https.get(url, function(response) {
            const data = [];
            response.on('data', function(chunk) {
                data.push(chunk);
            }).on('end', function() {
                resolve(Buffer.concat(data));
            })
        }).on('error', function(err) {
            reject(err);
        });
    });
}

app.post('/addfile', async function(req, res) {
    try {
        var url = req.body.url;
        console.log(`/addfile: Reading from url: ${url}..`);
        let buffer = await readUrlDataToBuffer(url);
        console.log("Buffer length: " + buffer.length + " byte(s).");
        res.send('ok');
    } catch (error) {
        res.status(500).send('An error occurred');
    }
});