Sitemap Parser And Visitor from ChatGPT
I got ChatGPT to create me some C# code to download a sitemap and then check the links in it.
Pretend you’re a developer. Write me some C# code to parse a remote sitemap and visit all the URLs listed in that sitemap, outputting the URL and the HTTP response code for those pages in a comma-separated format.
It gave me instructions on how to create the project and provided me the code.
Create a new console app:
dotnet new console -n SitemapCheckercd SitemapChecker
Replace Program.cs with the code.
Run:dotnet run https://example.com/sitemap.xml
After several iterations, the code looks like this:-
using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
using System.Xml.Linq;
class Program
{
static async Task Main(string[] args)
{
if (args.Length == 0)
{
Console.WriteLine("Usage: dotnet run <sitemap_url1> <sitemap_url2> ...");
return;
}
var allUrls = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
// Collect URLs from all sitemaps
foreach (string sitemapUrl in args)
{
Console.WriteLine($"\nProcessing sitemap: {sitemapUrl}");
try
{
await GetUrlsFromSitemapAsync(sitemapUrl, allUrls, sitemapUrl);
}
catch (Exception ex)
{
Console.Error.WriteLine($"Error processing {sitemapUrl}: {ex.Message}");
}
}
Console.WriteLine($"\nTotal URLs collected: {allUrls.Count}");
// Output filename for combined CSV
string timestamp = DateTime.UtcNow.ToString("yyyyMMdd_HHmmss");
string outputFile = $"sitemap_results_{timestamp}.csv";
// Check all URLs and write combined CSV
await CheckUrlsAsync(allUrls, outputFile);
Console.WriteLine($"\nCombined results written to: {outputFile}");
}
static async Task<string> DownloadSitemapAsync(string sitemapUrl)
{
using var httpClient = new HttpClient();
byte[] data;
try
{
data = await httpClient.GetByteArrayAsync(sitemapUrl);
}
catch (Exception ex)
{
Console.Error.WriteLine($"Failed to fetch sitemap {sitemapUrl}: {ex.Message}");
return null;
}
if (sitemapUrl.EndsWith(".gz", StringComparison.OrdinalIgnoreCase))
{
try
{
using var compressedStream = new MemoryStream(data);
using var gzipStream = new GZipStream(compressedStream, CompressionMode.Decompress);
using var reader = new StreamReader(gzipStream);
return await reader.ReadToEndAsync();
}
catch (Exception ex)
{
Console.Error.WriteLine($"Failed to decompress {sitemapUrl}: {ex.Message}");
return null;
}
}
else
{
return System.Text.Encoding.UTF8.GetString(data);
}
}
static async Task GetUrlsFromSitemapAsync(string sitemapUrl, Dictionary<string, string> collectedUrls, string sourceSitemap)
{
string xmlContent = await DownloadSitemapAsync(sitemapUrl);
if (xmlContent == null) return;
XDocument doc;
try
{
doc = XDocument.Parse(xmlContent);
}
catch (Exception ex)
{
Console.Error.WriteLine($"Failed to parse XML from {sitemapUrl}: {ex.Message}");
return;
}
var rootName = doc.Root?.Name.LocalName;
if (rootName == "sitemapindex")
{
foreach (var loc in doc.Descendants("{http://www.sitemaps.org/schemas/sitemap/0.9}loc"))
{
string nestedSitemap = loc.Value.Trim();
await GetUrlsFromSitemapAsync(nestedSitemap, collectedUrls, nestedSitemap); // recursion
}
}
else if (rootName == "urlset")
{
foreach (var loc in doc.Descendants("{http://www.sitemaps.org/schemas/sitemap/0.9}loc"))
{
string url = loc.Value.Trim();
// Only add if not already present
if (!collectedUrls.ContainsKey(url))
{
collectedUrls[url] = sourceSitemap;
}
}
}
}
static async Task CheckUrlsAsync(Dictionary<string, string> urlsWithSources, string outputFile)
{
using var httpClient = new HttpClient();
var semaphore = new SemaphoreSlim(10); // limit concurrency
var tasks = new List<Task>();
var results = new List<string>();
results.Add("URL,StatusCode,X-Cache,MethodUsed,SourceSitemap");
foreach (var kvp in urlsWithSources)
{
string url = kvp.Key;
string sourceSitemap = kvp.Value;
await semaphore.WaitAsync();
tasks.Add(Task.Run(async () =>
{
try
{
var (statusCode, xCache, methodUsed) = await HeadThenGetAsync(httpClient, url);
string line = $"{url},{statusCode},{xCache},{methodUsed},{sourceSitemap}";
lock (results)
{
results.Add(line);
}
Console.WriteLine(line);
}
catch (Exception ex)
{
string line = $"{url},ERROR: {ex.Message},,,{sourceSitemap}";
lock (results)
{
results.Add(line);
}
Console.WriteLine(line);
}
finally
{
semaphore.Release();
}
}));
}
await Task.WhenAll(tasks);
await File.WriteAllLinesAsync(outputFile, results);
}
static async Task<(string statusCode, string xCache, string methodUsed)> HeadThenGetAsync(HttpClient httpClient, string url)
{
try
{
// Try HEAD first
var headRequest = new HttpRequestMessage(HttpMethod.Head, url);
var headResponse = await httpClient.SendAsync(headRequest);
if (headResponse.IsSuccessStatusCode)
{
string xCache = headResponse.Headers.Contains("X-Cache")
? string.Join(";", headResponse.Headers.GetValues("X-Cache"))
: "";
return ($"{(int)headResponse.StatusCode}", xCache, "HEAD");
}
else
{
// Retry with GET if HEAD response not OK
var getRequest = new HttpRequestMessage(HttpMethod.Get, url);
var getResponse = await httpClient.SendAsync(getRequest);
string xCache = getResponse.Headers.Contains("X-Cache")
? string.Join(";", getResponse.Headers.GetValues("X-Cache"))
: "";
return ($"{(int)getResponse.StatusCode}", xCache, "GET");
}
}
catch
{
// Retry with GET if HEAD threw an exception
var getRequest = new HttpRequestMessage(HttpMethod.Get, url);
var getResponse = await httpClient.SendAsync(getRequest);
string xCache = getResponse.Headers.Contains("X-Cache")
? string.Join(";", getResponse.Headers.GetValues("X-Cache"))
: "";
return ($"{(int)getResponse.StatusCode}", xCache, "GET");
}
}
}
Running it as follows:-
% dotnet run https://www.andrewgorton.uk/sitemap.xml
Processing sitemap: https://www.andrewgorton.uk/sitemap.xml
Total URLs collected: 20
https://www.andrewgorton.uk/tags/leadership/,200,,HEAD,https://www.andrewgorton.uk/sitemap.xml
https://www.andrewgorton.uk/tags/governance/,200,,HEAD,https://www.andrewgorton.uk/sitemap.xml
https://www.andrewgorton.uk/tags/respectthebuilders/,200,,HEAD,https://www.andrewgorton.uk/sitemap.xml
https://www.andrewgorton.uk/tags/,200,,HEAD,https://www.andrewgorton.uk/sitemap.xml
https://www.andrewgorton.uk/tags/builderai/,200,,HEAD,https://www.andrewgorton.uk/sitemap.xml
https://www.andrewgorton.uk/tags/ai/,200,,HEAD,https://www.andrewgorton.uk/sitemap.xml
https://www.andrewgorton.uk/tags/startups/,200,,HEAD,https://www.andrewgorton.uk/sitemap.xml
https://www.andrewgorton.uk/posts/2025/06/24/statement-from-manpreet-on-builderai/,200,,HEAD,https://www.andrewgorton.uk/sitemap.xml
https://www.andrewgorton.uk/tags/microsoft/,200,,HEAD,https://www.andrewgorton.uk/sitemap.xml
https://www.andrewgorton.uk/posts/,200,,HEAD,https://www.andrewgorton.uk/sitemap.xml
https://www.andrewgorton.uk/tags/builder/,200,,HEAD,https://www.andrewgorton.uk/sitemap.xml
https://www.andrewgorton.uk/tags/tech/,200,,HEAD,https://www.andrewgorton.uk/sitemap.xml
https://www.andrewgorton.uk/posts/2025/06/19/a-month-on-from-builderai/,200,,HEAD,https://www.andrewgorton.uk/sitemap.xml
https://www.andrewgorton.uk/tags/administration/,200,,HEAD,https://www.andrewgorton.uk/sitemap.xml
https://www.andrewgorton.uk/tags/fraud/,200,,HEAD,https://www.andrewgorton.uk/sitemap.xml
https://www.andrewgorton.uk/tags/insolvency/,200,,HEAD,https://www.andrewgorton.uk/sitemap.xml
https://www.andrewgorton.uk/,200,,HEAD,https://www.andrewgorton.uk/sitemap.xml
https://www.andrewgorton.uk/tags/natasha/,200,,HEAD,https://www.andrewgorton.uk/sitemap.xml
https://www.andrewgorton.uk/categories/,200,,HEAD,https://www.andrewgorton.uk/sitemap.xml
https://www.andrewgorton.uk/tags/redundancy/,200,,HEAD,https://www.andrewgorton.uk/sitemap.xml
Combined results written to: sitemap_results_20250819_081343.csv
It supports:-
- multiple sitemaps on the command line
- parallel website requests
- gzip’d sitemaps
- sitemapindex sitemaps
- tries HEAD first before falling back to GET
- outputs the AWS CloudFront X-Cache header if present
- outputs the results to a CSV file
Very handy if you want to check your sitemaps or prime a CDN.
And it generated it faster than I could summon the enthusiasm to update my basic version.