<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:news="http://www.google.com/schemas/sitemap-news/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" xmlns:video="http://www.google.com/schemas/sitemap-video/1.1"><url><loc>https://www.reinforced.info/archive</loc><changefreq>daily</changefreq></url><url><loc>https://www.reinforced.info/about</loc><changefreq>weekly</changefreq></url><url><loc>https://www.reinforced.info/p/positive-gradients-negative-gradients</loc><lastmod>2025-12-19</lastmod><changefreq>monthly</changefreq></url><url><loc>https://www.reinforced.info/p/bandits-vs-reinforcement-learning</loc><lastmod>2024-04-30</lastmod><changefreq>monthly</changefreq></url><url><loc>https://www.reinforced.info/p/reward-model-overoptimization</loc><lastmod>2024-04-08</lastmod><changefreq>monthly</changefreq></url><url><loc>https://www.reinforced.info/p/reward-modeling-for-rlhf</loc><lastmod>2024-01-10</lastmod><changefreq>monthly</changefreq></url><url><loc>https://www.reinforced.info/p/hello-world</loc><lastmod>2024-01-07</lastmod><changefreq>monthly</changefreq></url></urlset>