How your OS finds a file among millions in microseconds — and why a 50-year-old data structure is still the right tool for the job.
Imagine you have a directory with 10 million files. When you run open("report.pdf"), the OS needs to find the inode number for that filename. How?
The naive approach — a flat list you scan linearly — is O(n). With 10 million files and a disk seek costing ~10ms, that's potentially minutes. Even a sorted array with binary search does O(log n) comparisons, but each comparison might require a separate disk read if the array doesn't fit in RAM. That's still thousands of seeks.
A B-Tree solves this by packing many keys into a single node (one disk page), so each disk read eliminates a huge number of candidates at once. A B-Tree of order 100 over 10 million files needs at most log₁₀₀(10,000,000) ≈ 3.5 → 4 disk reads to find any file.
// Why B-Trees exist: the cost model for disk-based lookups const DISK_SEEK_MS = 10; // ~10ms per random disk seek (HDD) const RAM_LOOKUP_NS = 100; // ~100ns per random RAM access const DISK_RATIO = (DISK_SEEK_MS * 1e6) / RAM_LOOKUP_NS; // 100,000× function costModel(nFiles, btreeOrder = 100) { const linearSeeks = nFiles; // scan every entry const binarySeeks = Math.ceil(Math.log2(nFiles)); // binary search, 1 seek/compare if not in RAM const btreeSeeks = Math.ceil(Math.log(nFiles) / Math.log(btreeOrder)); // one seek per level return { linearSeeks, binarySeeks, btreeSeeks }; } const sizes = [1_000, 100_000, 10_000_000]; for (const n of sizes) { const { linearSeeks, binarySeeks, btreeSeeks } = costModel(n); console.log(`\n--- ${n.toLocaleString()} files ---`); console.log(` Linear scan: ${linearSeeks.toLocaleString()} seeks = ${(linearSeeks * DISK_SEEK_MS / 1000).toFixed(1)}s`); console.log(` Binary search: ${binarySeeks} seeks = ${(binarySeeks * DISK_SEEK_MS).toFixed(0)}ms`); console.log(` B-Tree (t=100): ${btreeSeeks} seeks = ${(btreeSeeks * DISK_SEEK_MS).toFixed(0)}ms`); } console.log(`\nDisk is ${DISK_RATIO.toLocaleString()}× slower than RAM per access.`); console.log("B-Tree packs ~100+ keys per node → each seek eliminates 100× more candidates.");
A B-Tree of order t (minimum degree) is a self-balancing tree where every node holds between t−1 and 2t−1 keys, and every leaf is at the same depth. Each key in an internal node acts as a separator: everything in the left subtree is smaller, everything in the right is larger.
Each node maps directly onto one disk page (typically 4KB or 16KB). When the OS reads a node, it gets all its keys in one I/O operation. A filesystem with 4KB pages and 8-byte keys can fit ~500 keys per node — meaning each disk read eliminates 500 candidates from the search space.
// A B-Tree node — the unit that maps to one disk page class BTreeNode { constructor(isLeaf = true) { this.keys = []; // sorted array of keys this.children = []; // child node references (empty if leaf) this.isLeaf = isLeaf; // In a real filesystem, each child is a disk block number, // and values map keys → inode numbers. this.values = []; // leaf: key→value (e.g. filename→inode) } isFull(t) { return this.keys.length === 2 * t - 1; } diskSize(keyBytes = 8, ptrBytes = 8) { // How many bytes does this node occupy on disk? const keySpace = this.keys.length * keyBytes; const ptrSpace = this.children.length * ptrBytes; const overhead = 32; // node header: isLeaf, keyCount, etc. return keySpace + ptrSpace + overhead; } } // Verify the B-Tree invariants for a given tree function checkInvariants(node, t, depth = 0, isRoot = true) { const min = isRoot ? 1 : t - 1; const max = 2 * t - 1; console.log(`Depth ${depth}: ${node.keys.length} keys [${node.keys.join(",")}] leaf=${node.isLeaf}`); console.assert(node.keys.length >= min, `Too few keys at depth ${depth}`); console.assert(node.keys.length <= max, `Too many keys at depth ${depth}`); // Keys must be sorted for (let i = 1; i < node.keys.length; i++) console.assert(node.keys[i] > node.keys[i-1], "Keys not sorted"); if (!node.isLeaf) { console.assert(node.children.length === node.keys.length + 1, "Children count must be keys+1"); for (const child of node.children) checkInvariants(child, t, depth + 1, false); } } // Build a small valid B-Tree manually (t=2, so 1-3 keys per non-root node) const root = new BTreeNode(false); root.keys = [20, 40]; const l1 = new BTreeNode(); l1.keys = [10, 15]; const l2 = new BTreeNode(); l2.keys = [25, 30]; const l3 = new BTreeNode(); l3.keys = [45, 50]; root.children = [l1, l2, l3]; console.log("=== Checking invariants ==="); checkInvariants(root, 2); console.log("All invariants satisfied."); console.log(`\nRoot disk size: ${root.diskSize()} bytes`); console.log(`Leaf disk size: ${l1.diskSize()} bytes`);
To find a key, start at the root and at each node binary-search the key array. If found, return the value. Otherwise, follow the appropriate child pointer — the subtree between the two nearest separator keys — and repeat. Stop at a leaf: if the key isn't there, it doesn't exist.
Each level requires exactly one disk read. The height of a B-Tree with n keys and minimum degree t is at most log_t((n+1)/2). For t=100 and n=10 million, that's ≤4 levels.
// B-Tree search — returns value AND number of disk reads function search(node, key, reads = 0) { reads++; // each node access = one disk page read // Binary search within this node's keys let lo = 0, hi = node.keys.length - 1; while (lo <= hi) { const mid = (lo + hi) >> 1; if (key < node.keys[mid]) hi = mid - 1; else if (key > node.keys[mid]) lo = mid + 1; else return { found: true, value: node.values?.[mid], reads }; } // `lo` is now the index of the child subtree to follow if (node.isLeaf) return { found: false, reads }; // not in tree return search(node.children[lo], key, reads); } // Build a 3-level B-Tree and time some searches function buildTree() { const root = { keys:[30,60], isLeaf:false, children:[ { keys:[10,20], isLeaf:false, children:[ { keys:[5], isLeaf:true, values:["inode_5"] }, { keys:[15], isLeaf:true, values:["inode_15"] }, { keys:[25], isLeaf:true, values:["inode_25"] }, ]}, { keys:[40,50], isLeaf:false, children:[ { keys:[35], isLeaf:true, values:["inode_35"] }, { keys:[45], isLeaf:true, values:["inode_45"] }, { keys:[55], isLeaf:true, values:["inode_55"] }, ]}, { keys:[70,80], isLeaf:false, children:[ { keys:[65], isLeaf:true, values:["inode_65"] }, { keys:[75], isLeaf:true, values:["inode_75"] }, { keys:[85], isLeaf:true, values:["inode_85"] }, ]}, ]}; return root; } const tree = buildTree(); const queries = [45, 85, 15, 99, 30]; for (const q of queries) { const r = search(tree, q); const status = r.found ? `found → ${r.value}` : "not found"; console.log(`search(${String(q).padStart(3)}): ${status} [${r.reads} disk reads]`); }
Inserting always starts at the correct leaf. If the leaf has room (keys.length < 2t−1), insert there and done. If the leaf is full, it must be split: divide its keys in half, promote the median key up to the parent, and create two new nodes from the halves.
This split can cascade upward — if the parent is also full, it splits too, and so on. If the root splits, a new root is created and the tree grows taller. This is the only way a B-Tree grows in height — always at the top, keeping all leaves at the same depth.
// B-Tree insertion — the real algorithm with splitting const t = 2; // minimum degree: nodes hold 1-3 keys, root holds 1-3 keys function makeNode(isLeaf, keys = [], children = [], values = []) { return { isLeaf, keys: [...keys], children: [...children], values: [...values] }; } function splitChild(parent, childIndex) { // Split parent.children[childIndex] — it must be full (2t-1 keys) const full = parent.children[childIndex]; const mid = t - 1; // median index // Right half becomes new node const right = makeNode(full.isLeaf, full.keys.slice(mid + 1), full.isLeaf ? [] : full.children.slice(mid + 1), full.values.slice(mid + 1) ); // Median key gets promoted to parent const medianKey = full.keys[mid]; parent.keys.splice(childIndex, 0, medianKey); parent.children.splice(childIndex + 1, 0, right); // Left half stays in original node (truncate) full.keys = full.keys.slice(0, mid); full.values = full.values.slice(0, mid); if (!full.isLeaf) full.children = full.children.slice(0, mid + 1); console.log(` Split: promoted key=${medianKey}, left=[${full.keys}], right=[${right.keys}]`); } function insertNonFull(node, key, value) { let i = node.keys.length - 1; if (node.isLeaf) { // Shift keys right and insert in sorted position node.keys.push(0); node.values.push(null); while (i >= 0 && key < node.keys[i]) { node.keys[i+1] = node.keys[i]; node.values[i+1] = node.values[i]; i--; } node.keys[i+1] = key; node.values[i+1] = value; } else { // Find correct child, split it if full, recurse while (i >= 0 && key < node.keys[i]) i--; i++; if (node.children[i].keys.length === 2 * t - 1) { splitChild(node, i); if (key > node.keys[i]) i++; } insertNonFull(node.children[i], key, value); } } function insert(tree, key, value) { console.log(`Inserting key=${key}`); if (tree.root.keys.length === 2 * t - 1) { // Root is full — tree grows taller const newRoot = makeNode(false, [], [tree.root]); splitChild(newRoot, 0); tree.root = newRoot; console.log(" Root split → tree height increased"); } insertNonFull(tree.root, key, value); } function printTree(node, depth = 0) { console.log(" ".repeat(depth) + `[${node.keys.join(",")}]` + (node.isLeaf ? " ←leaf" : "")); if (!node.isLeaf) node.children.forEach(c => printTree(c, depth+1)); } const tree = { root: makeNode(true) }; const keys = [10, 20, 5, 6, 12, 30, 7, 17]; for (const k of keys) { insert(tree, k, `inode_${k}`); } console.log("\nFinal tree:"); printTree(tree.root);
Deletion is the most complex operation. After removing a key from a leaf, the leaf might underflow — have fewer than t−1 keys. To fix this, the node tries to borrow a key from an adjacent sibling (rotating through the parent). If the sibling has no spare keys, the node and its sibling merge into one node, pulling down the separator key from the parent. This can cascade up to the root.
// B-Tree deletion — handles underflow via borrow or merge const t = 2; // minimum degree function borrowFromLeft(parent, idx) { // parent.children[idx] is short — steal from left sibling const child = parent.children[idx]; const left = parent.children[idx - 1]; // Rotate: left's last key → parent → child's front child.keys.unshift(parent.keys[idx - 1]); parent.keys[idx - 1] = left.keys.pop(); if (!child.isLeaf) child.children.unshift(left.children.pop()); console.log(` Borrowed from left sibling. Parent sep: ${parent.keys[idx-1]}`); } function borrowFromRight(parent, idx) { // parent.children[idx] is short — steal from right sibling const child = parent.children[idx]; const right = parent.children[idx + 1]; child.keys.push(parent.keys[idx]); parent.keys[idx] = right.keys.shift(); if (!child.isLeaf) child.children.push(right.children.shift()); console.log(` Borrowed from right sibling. Parent sep: ${parent.keys[idx]}`); } function merge(parent, idx) { // Merge children[idx] and children[idx+1], pulling down parent.keys[idx] const left = parent.children[idx]; const right = parent.children[idx + 1]; const sep = parent.keys.splice(idx, 1)[0]; parent.children.splice(idx + 1, 1); left.keys.push(sep, ...right.keys); if (!left.isLeaf) left.children.push(...right.children); console.log(` Merged: pulled down sep=${sep}, merged node=[${left.keys}]`); } function deleteKey(node, key, isRoot = true) { const idx = node.keys.findIndex(k => k >= key); if (node.isLeaf) { if (node.keys[idx] === key) { node.keys.splice(idx, 1); console.log(` Deleted ${key} from leaf`); } else { console.log(` Key ${key} not found`); } return; } if (idx < node.keys.length && node.keys[idx] === key) { // Key is in this internal node — replace with in-order predecessor let pred = node.children[idx]; while (!pred.isLeaf) pred = pred.children[pred.children.length - 1]; const predKey = pred.keys[pred.keys.length - 1]; node.keys[idx] = predKey; console.log(` Replacing internal key ${key} with predecessor ${predKey}`); deleteKey(node.children[idx], predKey, false); return; } // Key is in subtree — ensure child has enough keys before descending const childIdx = idx === -1 ? node.children.length - 1 : idx; const child = node.children[childIdx]; if (child.keys.length < t) { // Child has minimum keys — fix it before descending if (childIdx > 0 && node.children[childIdx-1].keys.length >= t) borrowFromLeft(node, childIdx); else if (childIdx < node.children.length-1 && node.children[childIdx+1].keys.length >= t) borrowFromRight(node, childIdx); else merge(node, childIdx > 0 ? childIdx - 1 : 0); } deleteKey(node.children[childIdx > 0 && child.keys.length < t ? childIdx-1 : childIdx], key, false); } function printTree(node, d=0) { console.log(" ".repeat(d)+`[${node.keys}]`+(node.isLeaf?" ←leaf":"")); if(!node.isLeaf) node.children.forEach(c=>printTree(c,d+1)); } // Build a tree, then delete from it const root = { isLeaf:false, keys:[20,40], children:[ {isLeaf:true,keys:[10,15]}, {isLeaf:true,keys:[25,30]}, {isLeaf:true,keys:[45,50]}, ] }; console.log("Before:"); printTree(root); console.log("\nDeleting 10:"); deleteKey(root, 10); console.log("\nDeleting 25:"); deleteKey(root, 25); console.log("\nAfter:"); printTree(root);
Real filesystems often use B-Trees or close relatives, but not all in exactly the same place or with exactly the same key/value layout. Here's a rough map for three major filesystems:
Across these systems, the key is whatever that particular structure is indexing: a filename, a filename hash, a catalog key, or a logical block offset. The associated value is metadata directly, or a pointer to metadata or data extents. The common idea is still logarithmic lookup with high fan-out nodes.
A common pattern is a two-stage lookup. First, a directory index or catalog structure maps a human-facing name to file metadata. Then, the file's block-mapping metadata resolves logical block offset → physical disk block. The exact structures differ across ext4, HFS+, and NTFS, but this indirection is what lets files be sparse, fragmented, and still efficiently addressable.
// Simplified two-level filesystem lookup: directory index + extent lookup // Representative of real filesystems, not a byte-for-byte model of ext4/HFS+/NTFS class Inode { constructor(id, size, mode = "rw-r--r--") { this.id = id; this.size = size; // file size in bytes this.mode = mode; this.nlinks = 1; this.modified = new Date().toISOString().slice(0,19); // Extent tree: maps logical block [start,end) → physical block start // In a real B-Tree the key is the logical offset this.extents = []; // [{logStart, logEnd, physStart}] } addExtent(logStart, length, physStart) { this.extents.push({ logStart, logEnd: logStart + length, physStart }); this.extents.sort((a,b) => a.logStart - b.logStart); } logicalToPhysical(logBlock) { // Binary search the extent tree — O(log e) where e = number of extents let lo = 0, hi = this.extents.length - 1; while (lo <= hi) { const mid = (lo + hi) >> 1; const e = this.extents[mid]; if (logBlock < e.logStart) hi = mid - 1; else if (logBlock >= e.logEnd) lo = mid + 1; else return e.physStart + (logBlock - e.logStart); } return null; // sparse file: block not allocated (reads as zeros) } } class SimpleFS { constructor() { this.dirTree = new Map(); // filename → inode (our B-Tree, simplified) this.inodes = new Map(); // inode id → Inode this.nextIno = 2; // inode 1 is reserved for root dir } create(filename, size) { const ino = this.nextIno++; const node = new Inode(ino, size); // Allocate extents (simplified: one contiguous run per file) const blocks = Math.ceil(size / 4096); // 4KB blocks node.addExtent(0, blocks, ino * 10); // fake physical address this.dirTree.set(filename, ino); this.inodes.set(ino, node); return ino; } open(filename) { // Step 1: directory B-Tree lookup → inode number const ino = this.dirTree.get(filename); if (!ino) throw new Error(`ENOENT: ${filename}`); // Step 2: inode lookup → file metadata const inode = this.inodes.get(ino); console.log(`open("${filename}")`); console.log(` dir B-Tree lookup → inode=${ino}`); console.log(` inode: size=${inode.size}B, mode=${inode.mode}, extents=${inode.extents.length}`); return inode; } read(inode, logBlock) { // Step 3: extent B-Tree lookup → physical block const phys = inode.logicalToPhysical(logBlock); if (phys === null) { console.log(` read block ${logBlock}: sparse (unallocated) → returns zeros`); } else { console.log(` extent lookup: logical block ${logBlock} → physical block ${phys}`); } return phys; } } const fs = new SimpleFS(); fs.create("report.pdf", 524288); // 512KB fs.create("notes.txt", 4096); // exactly 1 block fs.create("database.db", 10485760); // 10MB console.log("=== Full lookup chain ==="); const f = fs.open("report.pdf"); fs.read(f, 0); // first block fs.read(f, 63); // last block of 512KB file fs.read(f, 200); // beyond EOF (sparse) console.log("\ntry { fs.open(\"missing.txt\") } ->"); try { fs.open("missing.txt") } catch(e) { console.log(` ${e.message}`) };
Putting it all together: a structurally complete in-memory filesystem backed by a real B-Tree. It supports mkdir, create, cat, ls, stat, find, rm, mv, cd, pwd, and tree. Each directory is a B-Tree keyed by filename. Files store their data as byte arrays.
// A complete in-memory filesystem backed by a B-Tree // Every directory is a BTree; files store Buffer (Uint8Array) data const T = 2; // B-Tree minimum degree class BTree { constructor() { this.root = { isLeaf:true, keys:[], vals:[], children:[] }; } _search(node, key) { let i = node.keys.findIndex(k => k >= key); if (i === -1) i = node.keys.length; if (node.keys[i] === key) return { node, i }; if (node.isLeaf) return null; return this._search(node.children[i], key); } get(key) { const r = this._search(this.root, key); return r ? r.node.vals[r.i] : undefined; } has(key) { return !!this._search(this.root, key); } _splitChild(parent, ci) { const s = parent.children[ci], mid = T-1; const r = { isLeaf:s.isLeaf, keys:s.keys.splice(mid+1), vals:s.vals.splice(mid+1), children: s.isLeaf?[]:s.children.splice(mid+1) }; parent.keys.splice(ci,0,s.keys.pop()); parent.vals.splice(ci,0,s.vals.pop()); parent.children.splice(ci+1,0,r); } _insertNF(node, key, val) { let i = node.keys.length-1; if (node.isLeaf) { node.keys.push(null); node.vals.push(null); while (i>=0 && key<node.keys[i]) { node.keys[i+1]=node.keys[i]; node.vals[i+1]=node.vals[i]; i--; } node.keys[i+1]=key; node.vals[i+1]=val; } else { while (i>=0 && key<node.keys[i]) i--; i++; if (node.children[i].keys.length===2*T-1) { this._splitChild(node,i); if (key>node.keys[i]) i++; } this._insertNF(node.children[i],key,val); } } set(key, val) { const ex = this._search(this.root, key); if (ex) { ex.node.vals[ex.i] = val; return; } if (this.root.keys.length === 2*T-1) { const nr = {isLeaf:false,keys:[],vals:[],children:[this.root]}; this._splitChild(nr,0); this.root = nr; } this._insertNF(this.root,key,val); } keys() { const res = [], walk = n => { if (n.isLeaf) { res.push(...n.keys); return; } n.children.forEach((c,i)=>{ walk(c); if(i<n.keys.length) res.push(n.keys[i]); }); }; walk(this.root); return res; } } class MiniFS { constructor() { this.inodes = new Map(); this.nextIno = 1; // Root directory: inode 1 this._makeDir(null, "/"); } _makeDir(parentIno, name) { const ino = this.nextIno++; this.inodes.set(ino, { type:"dir", name, ino, parentIno, tree: new BTree(), ctime:new Date() }); if (parentIno !== null) this.inodes.get(parentIno).tree.set(name, ino); return ino; } _resolve(path) { const parts = path.split("/").filter(Boolean); let cur = 1; // start at root for (const p of parts) { const dir = this.inodes.get(cur); if (!dir || dir.type !== "dir") throw new Error(`Not a directory: ${p}`); const child = dir.tree.get(p); if (!child) throw new Error(`ENOENT: ${path}`); cur = child; } return cur; } mkdir(path) { const parts = path.split("/").filter(Boolean); const name = parts.pop(); const parentIno = this._resolve("/" + parts.join("/")); const ino = this._makeDir(parentIno, name); return `created dir ${path} (inode ${ino})`; } create(path, content = "") { const parts = path.split("/").filter(Boolean); const name = parts.pop(); const parentIno = this._resolve("/" + parts.join("/")); const ino = this.nextIno++; const data = new TextEncoder().encode(content); this.inodes.set(ino, { type:"file", name, ino, parentIno, data, ctime:new Date() }); this.inodes.get(parentIno).tree.set(name, ino); return `created ${path} (inode ${ino}, ${data.length} bytes)`; } read(path) { const node = this.inodes.get(this._resolve(path)); if (node.type !== "file") throw new Error(`Is a directory: ${path}`); return new TextDecoder().decode(node.data); } ls(path = "/") { const dir = this.inodes.get(this._resolve(path)); if (dir.type !== "dir") throw new Error(`Not a directory`); return dir.tree.keys().map(name => { const child = this.inodes.get(dir.tree.get(name)); return child.type === "dir" ? ` ${name}/` : ` ${name} (${child.data.length}B)`; }); } stat(path) { const node = this.inodes.get(this._resolve(path)); return [ ` inode: ${node.ino}`, ` type: ${node.type}`, ` size: ${node.data ? node.data.length : 0} bytes`, ` parent: ${node.parentIno}`, ].join("\n"); } find(dir = "/", needle, results = []) { const node = this.inodes.get(this._resolve(dir)); if (node.type !== "dir") return results; for (const name of node.tree.keys()) { const childPath = dir.replace(/\/$/,"") + "/" + name; if (name.includes(needle)) results.push(childPath); const child = this.inodes.get(node.tree.get(name)); if (child.type === "dir") this.find(childPath, needle, results); } return results; } } // Demo: build a directory structure const fs = new MiniFS(); console.log(fs.mkdir("/home")); console.log(fs.mkdir("/home/alice")); console.log(fs.mkdir("/home/alice/docs")); console.log(fs.create("/home/alice/readme.txt", "Hello, filesystem!")); console.log(fs.create("/home/alice/docs/report.pdf", "%PDF-1.4 ...")); console.log("\nls /home/alice:"); console.log(fs.ls("/home/alice").join("\n")); console.log("\nread /home/alice/readme.txt:"); console.log(" " + fs.read("/home/alice/readme.txt")); console.log("\nstat /home/alice/docs/report.pdf:"); console.log(fs.stat("/home/alice/docs/report.pdf")); console.log("\nfind / -name 'report':"); console.log(fs.find("/", "report").map(p=>` ${p}`).join("\n"));
The B-Tree is one of the most consequential data structures in systems programming. These are excellent next steps: