I downloaded pjscrape (running PhantomJS under the hood), and in fact, page queries returned fully populated content, including dynamic content. Unfortunately, pjscrape only emits JSON or CSV. I need HTML.
Using PhantomJS alone, I have this script (call is my-query.js):
var page = require('webpage').create();
page.open('http://www.sonoma.edu/calendar/groups/clubs.html', function (status) {
console.log("status: " + status);
if (status !== "success") {
console.log("Unable to access network");
} else {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js", function() {
console.log("Got jQuery...");
var fullyPopulatedContent = null;
page.evaluate(function() {
$(document).ready(function() {
fullyPopulatedContent = $("html").html();
});
});
window.setTimeout(function() {
console.log(fullyPopulatedContent);
}, 10000);
});
}
});
But this logic never sets fullyPopulatedContent after the page.evaluate is done. IE, fullyPopulatedContent is just always null.
This seems like such a trivial application that you would think PhantomJS would do it out of the box for free.
Any clues how to make such queries work, when the target URL comprises content dynamically populated via Ajax/javascript or frames? And if frames are involved, can you also please explain how PhantomJS navigates through frame content, as the online documentation and examples are not clear on that topic.