Scrap web as REST
With FaasPlus, web scraping is simple and efficient, allowing you to retrieve and
parse data from websites
quickly. By leveraging the jsdom
library, you can easily manipulate and
extract information from
HTML content, making it ideal for collecting data from external sources.
Example: Scraping Wikipedia Biography
This function will access the Wikipedia page for the person specified by the firstName
and
lastName
parameters, parse the content using jsdom
, and return key details found in the
biography infobox.
const handleRequest = async (fetch, jsdom, req) => {
const res = {};
const fullName = req.params.firstName + '_' + req.params.lastName;
const url = 'https://en.wikipedia.org/wiki/' + fullName;
// Fetch Wikipedia page
const gres = await fetch(url);
const tres = await gres.text();
// Parse HTML with jsdom
const dom = new jsdom(tres);
const doc = dom.window.document;
const table = doc.querySelector('.infobox.biography.vcard');
// Extract data from table rows
const rows = table ? table.querySelectorAll('tr') : [];
rows.forEach(row => {
const th = row.querySelector('.infobox-label');
const td = row.querySelector('.infobox-data');
if (th && td) {
const key = th.textContent.trim();
const value = td.textContent.trim();
res[key] = value;
}
});
return res;
};
>response
{
"Born": "March 14, 1879",
"Died": "April 18, 1955",
"Alma mater": "Swiss Federal Polytechnic (Diploma)",
"Known for": "Theory of relativity, E=mc², Einstein field equations",
...
}