Hey all,
Newbie here, but I had a trawl around and couldn’t find an answer so I wrote my own.
The problem: Wix does not generate dynamic sitemaps.
The solution: Generate it off the sitemap XML files, dynamically, and cache the result.
Requirements:
-
a blank page
-
a text box
-
a button
-
2 collections: PageTitles, and SiteMap (to cache things for performance)
Code:
import { fetch } from 'wix-fetch';
import wixData from 'wix-data';
import wixUsers from'wix-users';
$w.onReady(function () {
const currentUser = wixUsers.currentUser;
if (currentUser.role === 'Admin'){
$w('#button5').show();
}
else
{
$w('#button5').hide();
}
doLoad();
});
async function doClearCache() {
await clearPageTitlesCache();
await clearSiteMapCache();
$w('#button5').label = "Sitemap cleared";
$w('#text9').text = "Generating sitemap...";
doLoad();
$w('#button5').label = "Refresh Sitemap";
}
async function clearPageTitlesCache() {
try {
const result = await wixData.query('PageTitles')
.limit(1000) // Adjust the limit based on the number of cache entries
.find();
if (result && result.items && result.items.length > 0) {
const deletePromises = result.items.map(item => wixData.remove('PageTitles', item._id));
await Promise.all(deletePromises);
}
} catch (error) {
console.error(error);
}
}
async function doLoad() {
let parentElement = $w('#text9');
const initialRender = await getHTMLFromCache();
if (initialRender) {
parentElement.html = initialRender;
}
// Check if there are changes in the sitemap XML files
const hasChanges = await checkSitemapChanges();
if (hasChanges) {
// If there are changes, fetch and parse the XML as before
fetchAndParseXML("/sitemap.xml", parentElement);
} else {
// If there are no changes, render the HTML from the SiteMap cache
const htmlFromCache = await getHTMLFromCache();
if (htmlFromCache) {
parentElement.html = htmlFromCache;
}
else {
fetchAndParseXML("/sitemap.xml", parentElement);
}
}
}
async function checkSitemapChanges() {
try {
const response = await fetch("https://www.michael-elliott.photography/sitemap.xml");
const xml = await response.text();
const sitemapXMLs = extractSitemapUrls(xml);
if (sitemapXMLs) {
const pageURLs = await getPageURLsFromCache();
const newPageURLs = [];
for (let i = 0; i < sitemapXMLs.length; i++) {
const sitemapURL = sitemapXMLs[i];
const subSitemapResponse = await fetch(sitemapURL);
const subSitemapXML = await subSitemapResponse.text();
const subPageURLs = extractUrls(subSitemapXML);
newPageURLs.push(...subPageURLs);
}
const addedURLs = newPageURLs.filter(url => !pageURLs.includes(url));
const deletedURLs = pageURLs.filter(url => !newPageURLs.includes(url));
if (addedURLs.length > 0 || deletedURLs.length > 0) {
// Clear the SiteMap cache if there are changes
await clearSiteMapCache();
return true;
}
}
return false;
} catch (error) {
console.error(error);
return false;
}
}
async function fetchAndParseXML(url, parentElement, shouldCache = true) {
try {
const response = await fetch(url);
const xml = await response.text();
const pages = extractPages(xml);
let ulHTML = '<ul style="font-family: Arial; font-size: 16px; line-height: 1.4;">';
if (pages) {
const pageURLs = pages.map(page => getPageURL(page));
const pageTitles = await getPageTitles(pageURLs);
for (let i = 0; i < pageURLs.length; i++) {
const pageURL = pageURLs[i];
let pageTitle = pageTitles[pageURL];
if (!pageTitle) {
pageTitle = await getPageTitleFromCache(pageURL);
if (!pageTitle) {
pageTitle = await getPageTitle(pageURL);
cachePageTitle(pageURL, pageTitle);
pageTitle = pageTitle.replace(' | Michael Elliott', ''); // Remove the text " | Michael Elliott"
}
}
pageTitle = pageTitle.replace(' | Michael Elliott', '');
const liHTML = '<li><a href="' + pageURL + '">' + pageTitle + '</a></li>';
ulHTML += liHTML;
}
}
const sitemapXMLs = extractSitemapUrls(xml);
if (sitemapXMLs) {
for (let i = 0; i < sitemapXMLs.length; i++) {
const sitemapURL = sitemapXMLs[i];
const parentTitle = await getParentTitleFromURL(sitemapURL);
const subSitemapHTML = await fetchAndParseXML(sitemapURL, null, false);
if (subSitemapHTML) {
ulHTML += '<li>' + parentTitle + '<ul>' + subSitemapHTML + '</ul></li>';
}
}
}
ulHTML += '</ul>';
if (shouldCache) {
// Cache the generated HTML code
await cacheGeneratedHTML(ulHTML);
}
if (parentElement) {
parentElement.html = ulHTML;
} else {
return ulHTML;
}
} catch (error) {
console.error(error);
}
}
async function getPageTitles(urls) {
const titles = {};
const cacheResults = await wixData.query('PageTitles')
.hasSome('url', urls)
.find();
cacheResults.items.forEach(item => {
titles[item.url] = item.title;
});
const missingURLs = urls.filter(url => !titles[url]);
if (missingURLs.length > 0) {
const rangeOptions = { headers: { 'Range': 'bytes=0-2048' } };
const responses = await Promise.all(missingURLs.map(url => fetch(url, rangeOptions)));
const texts = await Promise.all(responses.map(response => response.text()));
for (let i = 0; i < missingURLs.length; i++) {
const url = missingURLs[i];
const text = texts[i];
const match = text.match(/<title[^>]*>([^<]*)/i);
if (match && match[1]) {
titles[url] = match[1];
cachePageTitle(url, match[1]);
} else {
titles[url] = '';
}
}
}
return titles;
}
async function getParentTitleFromURL(url) {
const regex = /\/([^/]+)-sitemap.xml/;
const match = url.match(regex);
if (match && match[1]) {
return capitalizeFirstLetter(match[1].replace(/-/g, ' '));
}
return '';
}
async function getPageTitle(url) {
try {
const rangeOptions = { headers: { 'Range': 'bytes=0-2048' } };
const response = await fetch(url, rangeOptions);
const text = await response.text();
const match = text.match(/<title[^>]*>([^<]*)/i);
if (match && match[1]) {
return match[1];
}
return '';
} catch (error) {
console.error(error);
return '';
}
}
async function getPageTitleFromCache(url) {
try {
const result = await wixData.query('PageTitles')
.eq('url', url)
.limit(1)
.find();
if (result && result.items && result.items.length > 0) {
let title = result.items[0].title;
title = title.replace(" | Michael Elliott", ""); // Remove the text " | Michael Elliott"
return title;
}
return '';
} catch (error) {
console.error(error);
return '';
}
}
async function cachePageTitle(url, title) {
try {
await wixData.insert('PageTitles', { url, title });
} catch (error) {
console.error(error);
}
}
async function cacheGeneratedHTML(html) {
try {
await wixData.insert('SiteMap', { html });
} catch (error) {
console.error(error);
}
}
async function getHTMLFromCache() {
try {
const result = await wixData.query('SiteMap')
.limit(1)
.find();
if (result && result.items && result.items.length > 0) {
return result.items[0].html;
}
return '';
} catch (error) {
console.error(error);
return '';
}
}
async function getPageURLsFromCache() {
try {
const result = await wixData.query('PageTitles')
.limit(1000)
.find();
if (result && result.items && result.items.length > 0) {
return result.items.map(item => item.url);
}
return [];
} catch (error) {
console.error(error);
return [];
}
}
async function clearSiteMapCache() {
try {
const result = await wixData.query('SiteMap')
.limit(1) // Adjust the limit based on the number of cache entries
.find();
if (result && result.items && result.items.length > 0) {
const deletePromises = result.items.map(item => wixData.remove('SiteMap', item._id));
await Promise.all(deletePromises);
}
} catch (error) {
console.error(error);
}
}
function extractPages(xml) {
const regex = /<url>([\s\S]*?)<\/url>/g;
const matches = xml.match(regex);
if (matches) {
return matches;
}
return null;
}
function getPageURL(page) {
const regex = /<loc>(.*?)<\/loc>/;
const match = page.match(regex);
if (match) {
return match[1];
}
return '';
}
function extractUrls(xml) {
const regex = /<loc>(.*?)<\/loc>/g;
const matches = xml.match(regex);
if (matches) {
const urls = matches.map(match => match.replace('<loc>', '').replace('</loc>', ''));
return urls;
}
return null;
}
function extractSitemapUrls(xml) {
const regex = /<sitemap>([\s\S]*?)<\/sitemap>/g;
const matches = xml.match(regex);
if (matches) {
const urls = matches.map(match => {
const locRegex = /<loc>(.*?)<\/loc>/;
const locMatch = match.match(locRegex);
if (locMatch) {
return locMatch[1];
}
return '';
});
return urls;
}
return null;
}
function capitalizeFirstLetter(string) {
return string.charAt(0).toUpperCase() + string.slice(1);
}
/**
* Adds an event handler that runs when the element is clicked.
[Read more](https://www.wix.com/corvid/reference/$w.ClickableMixin.html#onClick)
* @param {$w.MouseEvent} event
*/
export async function button5_click(event) {
// This function was added from the Properties & Events panel. To learn more, visit http://wix.to/UcBnC-4
// Add your code for this event here:
await doClearCache();
}
Notes:
-
there’s some styling in there to output as 16pt Arial 1.4 spaced unordered list
-
there’s no logic to order the parent or child elements
-
this should recurse infinitely down the sitemap XMLs
-
the first run will be slow, subsequent runs should be instant and then if there are any changes, update in the background
-
for admin users, add a button that allows you to force a refresh at any point.
Thoughts? Have I reinvented the wheel, or is this a reasonable solution to the problem?