<?xml version="1.0" encoding="utf-8" standalone="yes"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml"><url><loc>https://www.jonam.io/journal/inference-engineering/product-ideas/docvault/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/inference-engineering/research-topics/position-invariant-document-kv-cache/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/inference-engineering/product-ideas/prefillx/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/inference-engineering/research-topics/temporal-turboquant-kv-tiering/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/inference-engineering/product-ideas/infergrid/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/inference-engineering/research-topics/roofline-adaptive-inference-scheduler/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/inference-engineering/product-ideas/draftos/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/inference-engineering/research-topics/speculative-prefill/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/inference-engineering/research-topics/quantization-divergence-hallucination-signal/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/inference-engineering/product-ideas/sloguard/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/inference-engineering/product-ideas/haloscoreai/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/inference-engineering/research-topics/online-eagle-draft-learning/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/inference-engineering/product-ideas/distillaudit/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/inference-engineering/research-topics/slo-aware-kv-cache-tiering/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/inference-engineering/research-topics/attention-head-similarity-pruning/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/inference-engineering/product-ideas/convocache/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/inference-engineering/product-ideas/specdraft-cloud/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/inference-engineering/research-topics/unlearning-layer-in-attention/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/inference-engineering/research-topics/hardware-aware-inference-cpu-ideas/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/inference-engineering/product-ideas/neuraledge/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/inference-engineering/research-topics/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/inference-engineering/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/inference-engineering/product-ideas/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/adapters/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/assistant/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/attention/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/audit/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/categories/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/compiler/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/compliance/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/cpu/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/distillation/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/docvault/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/eagle/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/edge-ai/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/enterprise/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/governance/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/gpu/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/hallucination/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/hardware/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/hbm/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/heads/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/inference/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/categories/inference-engineering/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/journal/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/kv-cache/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/llm/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/llm-systems/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/long-context/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/memory/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/observability/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/online-learning/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/pagedattention/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/prefill/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/prefix-caching/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/products/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/pruning/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/quantization/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/rag/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/research/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/robotics/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/roofline/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/rope/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/safety/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/scheduler/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/serving/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/slo/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/speculative-decoding/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/startup/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/systems/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/systolic-arrays/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/thermal/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/tokens-per-dollar/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/turboquant/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/uncertainty/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/unlearning/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/vllm/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/yc/</loc><lastmod>2026-05-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/ai/</loc><lastmod>2026-04-30T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/files/Inference_Engineering_Lecture_3.pdf</loc><lastmod>2026-04-30T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/files/Inference_Engineering_Lecture_2.pdf</loc><lastmod>2026-04-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/build-llm-from-scratch/code-an-llm-tokenizer-from-scratch/</loc><lastmod>2025-11-01T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/tokenizer/</loc><lastmod>2025-11-01T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/stages/</loc><lastmod>2025-10-30T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/build-llm-from-scratch/stages-of-building-an-llm-from-scratch/</loc><lastmod>2025-10-30T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/gpt-3/</loc><lastmod>2025-10-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/build-llm-from-scratch/how-does-gpt-3-really-work/</loc><lastmod>2025-10-28T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/transformers/</loc><lastmod>2025-10-26T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/build-llm-from-scratch/transformers/</loc><lastmod>2025-10-26T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/fine-tuning/</loc><lastmod>2025-10-24T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/pre-training/</loc><lastmod>2025-10-24T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/build-llm-from-scratch/pretraining-vs-finetuning/</loc><lastmod>2025-10-24T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/build-llm-from-scratch/large-language-models-basics/</loc><lastmod>2025-10-22T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/llm-basics/</loc><lastmod>2025-10-22T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/introduction/</loc><lastmod>2025-10-20T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/build-llm-from-scratch/introduction/</loc><lastmod>2025-10-20T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/posts/nanochat/</loc><lastmod>2025-10-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/categories/internal/</loc><lastmod>2025-10-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/nanochat/</loc><lastmod>2025-10-18T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/build-llm-from-scratch/</loc><lastmod>2025-10-15T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/categories/build-llm-from-scratch/</loc><lastmod>2025-10-15T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/about/</loc><lastmod>2025-10-12T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/categories/external/</loc><lastmod>2025-10-11T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/literature/</loc><lastmod>2025-10-11T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/categories/ml/</loc><lastmod>2024-02-08T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/mlexpert/</loc><lastmod>2024-02-08T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/machine-learning/ml-crash-course/</loc><lastmod>2024-02-08T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/tags/notes/</loc><lastmod>2024-02-08T00:00:00+00:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/resume/</loc><lastmod>2022-06-13T21:28:51+01:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/files/</loc><lastmod>2022-06-13T20:55:37+01:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/journal/</loc><lastmod>2022-06-13T20:55:37+01:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/posts/</loc><lastmod>2022-06-13T20:55:37+01:00</lastmod><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/authors/</loc><changefreq>daily</changefreq><priority>0.5</priority></url><url><loc>https://www.jonam.io/series/</loc><changefreq>daily</changefreq><priority>0.5</priority></url></urlset>