I have this function running in a Google Sheets script that pulls HTML from subreddits and returns them to a spreadsheet. It works for me some/most of the time, but other times I get an error "Could not parse text. (line 13)" which is the line with var doc = Xml.parse(page, true);
. Any idea why this is happening or is this just a bug with Google Scripts? Here's the code that works...sometimes.
function getRedditHTML() {
var entries_array = [];
var subreddit_array = ['https://www.reddit.com/r/news/','https://www.reddit.com/r/funny/','https://www.reddit.com/r/science/'];
for (var s = 0; s < subreddit_array.length; s++) {
var page = UrlFetchApp.fetch(subreddit_array[s]);
//this is Line 13 that is breaking
var doc = Xml.parse(page, true);
var bodyHtml = doc.html.body.toXmlString();
doc = XmlService.parse(bodyHtml);
var root = doc.getRootElement();
var entries = getElementsByClassName(root,'thing');
for (var i = 0; i < entries.length; i++) {
var title = getElementsByClassName(entries[i],'title');
title = XmlService.getRawFormat().format(title[1]).replace(/<[^>]*>/g, "");
var link = getElementsByClassName(entries[i],'comments');
link = link[0].getAttribute('href').getValue();
var rank = getElementsByClassName(entries[i],'rank');
rank = rank[0].getValue();
var likes = getElementsByClassName(entries[i],'likes');
likes = likes[0].getValue();
entries_array.push([rank, likes, title, link]);
}
}
return entries_array.sort(function (a, b) {
return b[1] - a[1];
});
}
thing
by itself - there are tags such asthing_id
- Is it mandatory to do it via apps script for you? Because I have a couple other really easy ways to import it directly into the sheet – Aurielle Perlmannthing
is on each div that contains a post in the HTML. Each div also has a unique id but I want to pull all posts, which is why I pulled by class name. I tried other built-in functions in Google Sheets (e.g.IMPORTFEED
) but it couldn't give me specific values I wanted like "rank" or "likes" so I decided to make this script. If you have other methods let me know. – Keith