1 module btrfs; 2 3 import core.stdc.errno; 4 import core.sys.posix.sys.ioctl; 5 import core.sys.posix.sys.stat; 6 import core.sys.posix.sys.types; 7 8 import std.algorithm.comparison; 9 import std.exception; 10 import std.math; 11 import std.string; 12 13 import ae.utils.bitmanip; 14 import ae.utils.math : eq; 15 16 import btrfs.c.ioctl; 17 import btrfs.c.kerncompat; 18 import btrfs.c.kernel_shared.ctree; 19 import btrfs.c.kernel_lib.sizes; 20 21 private 22 { 23 alias __fsword_t = uint; 24 struct fsid_t { int[2] __val; } 25 struct statfs_t 26 { 27 __fsword_t f_type; 28 __fsword_t f_bsize; 29 fsblkcnt_t f_blocks; 30 fsblkcnt_t f_bfree; 31 fsblkcnt_t f_bavail; 32 fsfilcnt_t f_files; 33 fsfilcnt_t f_ffree; 34 fsid_t f_fsid; 35 __fsword_t f_namelen; 36 __fsword_t f_frsize; 37 __fsword_t f_flags; 38 __fsword_t[5] f_spare; 39 ubyte[4096 - 88] __unknown; // D - seems to vary A LOT across platforms / libcs 40 } 41 extern(C) int fstatfs(int fd, statfs_t* buf); 42 } 43 44 enum BTRFS_SUPER_MAGIC = 0x9123683E; 45 46 /// Returns true is fd is on a btrfs filesystem. 47 bool isBTRFS(int fd) 48 { 49 statfs_t sfs; 50 fstatfs(fd, &sfs).eq(0).errnoEnforce("fstatfs"); 51 return sfs.f_type == BTRFS_SUPER_MAGIC; 52 } 53 54 /// Returns true is fd is the root of a btrfs subvolume. 55 bool isSubvolume(int fd) 56 { 57 stat_t st; 58 fstat(fd, &st).eq(0).errnoEnforce("fstat"); 59 return st.st_ino == BTRFS_FIRST_FREE_OBJECTID; 60 } 61 62 /// Returns the subvolume ID containing the file open in fd. 63 u64 getSubvolumeID(int fd) 64 { 65 btrfs_ioctl_ino_lookup_args args; 66 args.treeid = 0; 67 args.objectid = BTRFS_FIRST_FREE_OBJECTID; 68 ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args).eq(0).errnoEnforce("ino lookup"); 69 return args.treeid; 70 } 71 72 btrfs_ioctl_fs_info_args getInfo(int fd) 73 { 74 btrfs_ioctl_fs_info_args args; 75 ioctl(fd, BTRFS_IOC_FS_INFO, &args).eq(0).errnoEnforce("fs info"); 76 return args; 77 } 78 79 btrfs_ioctl_dev_info_args getDevInfo(int fd, u64 devid) 80 { 81 btrfs_ioctl_dev_info_args args; 82 args.devid = devid; 83 ioctl(fd, BTRFS_IOC_DEV_INFO, &args).eq(0).errnoEnforce("dev info"); 84 return args; 85 } 86 87 btrfs_ioctl_dev_info_args[] getDevices(int fd) 88 { 89 btrfs_ioctl_dev_info_args[] result; 90 91 auto info = fd.getInfo(); 92 93 foreach (i; 0 .. info.max_id + 1) 94 { 95 btrfs_ioctl_dev_info_args devInfo; 96 try 97 devInfo = fd.getDevInfo(i); 98 catch (ErrnoException e) 99 if (e.errno == ENODEV) 100 continue; 101 else 102 throw e; 103 result ~= devInfo; 104 } 105 106 enforce(result.length == info.num_devices, "Unexpected number of devices"); 107 return result; 108 } 109 110 enum __u64[2] treeSearchAllObjectIDs = [0, -1]; 111 enum __u64[2] treeSearchAllOffsets = [0, -1]; 112 enum __u64[2] treeSearchAllTransIDs = [0, -1]; 113 114 /// Raw tree search 115 void treeSearch( 116 /// Handle to the filesystem 117 int fd, 118 /// Tree to search in 119 __u64 treeID, 120 /// Min and max (inclusive) object IDs to search 121 __u64[2] objectIDs, 122 /// Min and max (inclusive) types to search 123 __u8[2] types, 124 /// Min and max (inclusive) offsets to search 125 __u64[2] offsets, 126 /// Min and max (inclusive) transaction IDs to search 127 __u64[2] transIDs, 128 /// Callback receiving the search results 129 scope void delegate( 130 /// Search result header, containing the object and transaction ID, offset and type 131 const ref btrfs_ioctl_search_header header, 132 /// Raw data - must be cast to the correct type 133 const void[] data, 134 ) callback, 135 ) 136 { 137 // Lexicographic ordering for tree search 138 static union Key 139 { 140 struct Fields 141 { 142 align(1): 143 BigEndian!__u64 objectID; 144 BigEndian!__u8 type; 145 BigEndian!__u64 offset; 146 } 147 static assert(Fields.sizeof == 0x11); 148 149 Fields fields; 150 ubyte[Fields.sizeof] bytes; 151 152 this(__u64 objectID, __u8 type, __u64 offset) 153 { 154 fields = Fields( 155 BigEndian!__u64(objectID), 156 BigEndian!__u8 (type), 157 BigEndian!__u64(offset), 158 ); 159 } 160 161 void opUnary(string op : "++")() 162 { 163 foreach_reverse (ref b; bytes) 164 if (++b != 0) 165 return; // No overflow (otherwise, continue to carry the 1) 166 assert(false, "Search key overflow"); 167 } 168 169 bool opBinary(string op)(const ref Key o) const 170 if (is(typeof(mixin(`0` ~ op ~ `0`)) == bool)) 171 { 172 return mixin(`bytes` ~ op ~ `o.bytes`); 173 } 174 175 int opCmp(const ref Key o) const 176 { 177 return cmp(bytes[], o.bytes[]); 178 } 179 } 180 181 Key min = Key(objectIDs[0], types[0], offsets[0]); 182 Key max = Key(objectIDs[1], types[1], offsets[1]); 183 184 btrfs_ioctl_search_args args; 185 btrfs_ioctl_search_key* sk = &args.key; 186 187 sk.tree_id = treeID; 188 sk.max_objectid = max.fields.objectID; 189 sk.max_type = max.fields.type; 190 sk.max_offset = max.fields.offset; 191 sk.min_transid = transIDs[0]; 192 sk.max_transid = transIDs[1]; 193 sk.nr_items = 4096; 194 195 do 196 { 197 sk.min_objectid = min.fields.objectID; 198 sk.min_type = min.fields.type ; 199 sk.min_offset = min.fields.offset ; 200 ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args).eq(0).errnoEnforce("tree search"); 201 202 if (sk.nr_items == 0) 203 break; 204 205 ulong off = 0; 206 btrfs_ioctl_search_header* sh; 207 foreach (i; 0 .. sk.nr_items) 208 { 209 sh = cast(btrfs_ioctl_search_header *)(args.buf.ptr + off); 210 off += (*sh).sizeof; 211 auto data = (args.buf.ptr + off)[0 .. sh.len]; 212 off += sh.len; 213 214 if ( 215 objectIDs[0] <= sh.objectid && sh.objectid <= objectIDs[1] && 216 types [0] <= sh.type && sh.type <= types [1] && 217 offsets [0] <= sh.offset && sh.offset <= offsets [1] && 218 transIDs [0] <= sh.transid && sh.transid <= transIDs [1] 219 ) 220 callback(*sh, data); 221 } 222 223 assert(sh.type < 256); 224 min.fields.objectID = sh.objectid; 225 min.fields.type = cast(__u8)sh.type; 226 min.fields.offset = sh.offset; 227 min++; 228 } 229 while (min <= max); 230 } 231 232 /// Typed tree search 233 void treeSearch( 234 /// BTRFS type identifier to search for 235 __u8 btrfsType, 236 /// Structure type describing the data 237 Type, 238 )( 239 /// Handle to the filesystem 240 int fd, 241 /// Tree to search in 242 __u64 treeID, 243 /// Min and max (inclusive) object IDs to search 244 __u64[2] objectIDs, 245 /// Min and max (inclusive) offsets to search 246 __u64[2] offsets, 247 /// Min and max (inclusive) transaction IDs to search 248 __u64[2] transIDs, 249 /// Callback receiving the search results 250 scope void delegate( 251 /// Search result header, containing the object and transaction ID, and offset 252 const ref btrfs_ioctl_search_header header, 253 /// Search result data 254 /// For variable-length structures, additional data can be 255 /// accessed by reading past the end of this variable. 256 /// `header.len` indicates the real total size of the data. 257 const ref Type data, 258 ) callback, 259 ) 260 { 261 __u8[2] types = [btrfsType, btrfsType]; 262 treeSearch( 263 fd, treeID, objectIDs, types, offsets, transIDs, 264 (const ref btrfs_ioctl_search_header header, const void[] data) 265 { 266 callback(header, *cast(Type*)data.ptr); 267 } 268 ); 269 } 270 271 /// Enumerate all chunks in the filesystem. 272 void enumerateChunks( 273 /// Handle to the filesystem 274 int fd, 275 /// Result callback 276 scope void delegate( 277 /// Chunk logical address 278 u64 offset, 279 /// Chunk info 280 /// Stripes can be indexed according to num_stripes 281 const ref btrfs_chunk chunk, 282 ) callback, 283 ) 284 { 285 treeSearch!( 286 BTRFS_CHUNK_ITEM_KEY, 287 btrfs_chunk, 288 )( 289 fd, 290 BTRFS_CHUNK_TREE_OBJECTID, 291 treeSearchAllObjectIDs, 292 treeSearchAllOffsets, 293 treeSearchAllTransIDs, 294 (const ref btrfs_ioctl_search_header header, const ref btrfs_chunk chunk) 295 { 296 callback(header.offset, chunk); 297 } 298 ); 299 } 300 301 /// Enumerate all device allocations in the filesystem. 302 /// This is roughly the inverse of enumerateChunks. 303 void enumerateDevExtents( 304 /// Handle to the filesystem 305 int fd, 306 /// Result callback 307 scope void delegate( 308 /// Device ID 309 u64 devid, 310 /// Extent physical address 311 u64 offset, 312 /// Extent info 313 const ref btrfs_dev_extent chunk, 314 ) callback, 315 /// Filter device. Default is all devices. 316 __u64[2] devIDs = treeSearchAllObjectIDs, 317 ) 318 { 319 treeSearch!( 320 BTRFS_DEV_EXTENT_KEY, 321 btrfs_dev_extent, 322 )( 323 fd, 324 BTRFS_DEV_TREE_OBJECTID, 325 devIDs, 326 treeSearchAllOffsets, 327 treeSearchAllTransIDs, 328 (const ref btrfs_ioctl_search_header header, const ref btrfs_dev_extent chunk) 329 { 330 callback(header.objectid, header.offset, chunk); 331 } 332 ); 333 } 334 335 private ubyte[SZ_64K] logicalInoBuf; 336 337 /// Get inode at this logical offset 338 void logicalIno( 339 /// File descriptor to the root (subvolume) containing the inode 340 int fd, 341 /// Logical offset to resolve 342 u64 logical, 343 /// Result callback 344 scope void delegate( 345 /// The inode 346 u64 inode, 347 /// The offset within the inode file 348 /// Will be zero if `ignoreOffset` is true 349 u64 offset, 350 /// The filesystem root ID containing the inode 351 u64 root, 352 ) callback, 353 /// Ignore the offset when querying extent ownership 354 /// If this particular offset is not in use by any file but the extent is, 355 /// this allows querying which file is pinning the offset. 356 bool ignoreOffset = false, 357 /// Query buffer 358 ubyte[] buf = logicalInoBuf[], 359 ) 360 { 361 u64 flags = 0; 362 if (ignoreOffset) 363 flags |= BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET; 364 365 assert(buf.length > btrfs_data_container.sizeof); 366 auto inodes = cast(btrfs_data_container*)buf.ptr; 367 368 int request = BTRFS_IOC_LOGICAL_INO; 369 if (buf.length > SZ_64K || flags != 0) 370 request = BTRFS_IOC_LOGICAL_INO_V2; 371 372 btrfs_ioctl_logical_ino_args loi; 373 loi.logical = logical; 374 loi.size = buf.length; 375 loi.flags = flags; 376 loi.inodes = cast(__u64)inodes; 377 378 ioctl(fd, request, &loi).eq(0).errnoEnforce("logical ino"); 379 for (auto i = 0; i < inodes.elem_cnt; i += 3) 380 { 381 u64 inum = inodes.val.ptr[i]; 382 u64 offset = inodes.val.ptr[i+1]; 383 u64 root = inodes.val.ptr[i+2]; 384 385 callback(inum, offset, root); 386 } 387 } 388 389 /// Obtain all paths for an inode. 390 void inoPaths( 391 /// Handle to the filesystem 392 int fd, 393 /// The inode 394 u64 inode, 395 /// Callback for receiving file names 396 scope void delegate(char[] fn) callback, 397 ) 398 { 399 union Buf 400 { 401 btrfs_data_container container; 402 ubyte[0x10000] buf; 403 } 404 Buf fspath = void; 405 406 btrfs_ioctl_ino_path_args ipa; 407 ipa.inum = inode; 408 ipa.size = fspath.sizeof; 409 ipa.fspath = ptr_to_u64(&fspath); 410 411 ioctl(fd, BTRFS_IOC_INO_PATHS, &ipa).eq(0).errnoEnforce("ino paths"); 412 413 foreach (i; 0 .. fspath.container.elem_cnt) 414 { 415 auto ptr = fspath.buf.ptr; 416 ptr += fspath.container.val.offsetof; 417 ptr += fspath.container.val.ptr[i]; 418 auto str = cast(char *)ptr; 419 callback(fromStringz(str)); 420 } 421 } 422 423 /// Obtains the relative path of a given filesystem object for the given filesystem root. 424 void inoLookup( 425 /// Handle to the filesystem 426 int fd, 427 /// Tree ID containing the filesystem object 428 u64 treeID, 429 /// Filesystem object 430 u64 objectID, 431 /// Callback receiving the relative path (it ends with /) 432 scope void delegate(char[] fn) callback, 433 ) 434 { 435 btrfs_ioctl_ino_lookup_args args; 436 args.treeid = treeID; 437 args.objectid = objectID; 438 439 ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args).eq(0).errnoEnforce("ino lookup"); 440 args.name[$-1] = 0; 441 callback(fromStringz(args.name.ptr)); 442 } 443 444 /// Find a root's parent root, and where it is within it 445 void findRootBackRef( 446 /// Handle to the filesystem 447 int fd, 448 /// The child root ID whose parent to find 449 __u64 rootID, 450 /// Result callback 451 scope void delegate( 452 /// The parent root ID 453 __u64 parentRootID, 454 /// The directory ID (within the parent root) containing the child 455 __u64 dirID, 456 /// The sequence of the child entry within the directory 457 __u64 sequence, 458 /// The base file name of the child within the directory 459 char[] name, 460 ) callback, 461 ) 462 { 463 treeSearch!( 464 BTRFS_ROOT_BACKREF_KEY, 465 btrfs_root_ref, 466 )( 467 fd, 468 BTRFS_ROOT_TREE_OBJECTID, 469 [rootID, rootID], 470 treeSearchAllOffsets, 471 treeSearchAllTransIDs, 472 (const ref btrfs_ioctl_search_header header, const ref btrfs_root_ref data) 473 { 474 auto parentRoot = header.offset; 475 auto name = (cast(char*)(&data + 1))[0 .. data.name_len]; 476 callback(parentRoot, data.dirid, data.sequence, name); 477 } 478 ); 479 } 480 481 /// Clone a file range 482 void cloneRange( 483 /// Source file descriptor 484 int srcFile, 485 /// Offset in source file to clone from 486 ulong srcOffset, 487 /// Target file descriptor 488 int dstFile, 489 /// Offset in target file to clone over 490 ulong dstOffset, 491 /// Number of bytes to clone 492 ulong length, 493 ) 494 { 495 btrfs_ioctl_clone_range_args args; 496 497 args.src_fd = srcFile; 498 args.src_offset = srcOffset; 499 args.src_length = length; 500 args.dest_offset = dstOffset; 501 502 int ret = ioctl(dstFile, BTRFS_IOC_CLONE_RANGE, &args); 503 errnoEnforce(ret >= 0, "ioctl(BTRFS_IOC_CLONE_RANGE)"); 504 } 505 506 version (btrfsUnittest) 507 unittest 508 { 509 if (!checkBtrfs()) 510 return; 511 import std.range, std.random, std.algorithm, std.file; 512 import std.stdio : File; 513 enum blockSize = 16*1024; // TODO: detect 514 auto data = blockSize.iota.map!(n => uniform!ubyte).array(); 515 std.file.write("test1.bin", data); 516 scope(exit) remove("test1.bin"); 517 auto f1 = File("test1.bin", "rb"); 518 scope(exit) remove("test2.bin"); 519 auto f2 = File("test2.bin", "wb"); 520 cloneRange(f1.fileno, 0, f2.fileno, 0, blockSize); 521 f2.close(); 522 f1.close(); 523 assert(std.file.read("test2.bin") == data); 524 } 525 526 struct Extent 527 { 528 int fd; 529 ulong offset; 530 } 531 532 struct SameExtentResult 533 { 534 ulong totalBytesDeduped; 535 } 536 537 SameExtentResult sameExtent(in Extent[] extents, ulong length) 538 { 539 assert(extents.length >= 2, "Need at least 2 extents to deduplicate"); 540 541 auto buf = new ubyte[ 542 btrfs_ioctl_same_args.sizeof + 543 btrfs_ioctl_same_extent_info.sizeof * extents.length]; 544 auto same = cast(btrfs_ioctl_same_args*) buf.ptr; 545 546 same.length = length; 547 same.logical_offset = extents[0].offset; 548 enforce(extents.length < ushort.max, "Too many extents"); 549 same.dest_count = cast(ushort)(extents.length - 1); 550 551 foreach (i, ref extent; extents[1..$]) 552 { 553 same.info.ptr[i].fd = extent.fd; 554 same.info.ptr[i].logical_offset = extent.offset; 555 same.info.ptr[i].status = -1; 556 } 557 558 int ret = ioctl(extents[0].fd, BTRFS_IOC_FILE_EXTENT_SAME, same); 559 errnoEnforce(ret >= 0, "ioctl(BTRFS_IOC_FILE_EXTENT_SAME)"); 560 561 SameExtentResult result; 562 563 foreach (i, ref extent; extents[1..$]) 564 { 565 auto status = same.info.ptr[i].status; 566 if (status) 567 { 568 enforce(status != BTRFS_SAME_DATA_DIFFERS, 569 "Extent #%d differs".format(i+1)); 570 errno = -status; 571 errnoEnforce(false, 572 "Deduplicating extent #%d returned status %d".format(i+1, status)); 573 } 574 result.totalBytesDeduped += same.info.ptr[i].bytes_deduped; 575 } 576 577 return result; 578 } 579 580 version (btrfsUnittest) 581 unittest 582 { 583 if (!checkBtrfs()) 584 return; 585 import std.range, std.random, std.algorithm, std.file; 586 import std.stdio : File; 587 enum blockSize = 16*1024; // TODO: detect 588 auto data = blockSize.iota.map!(n => uniform!ubyte).array(); 589 std.file.write("test1.bin", data); 590 scope(exit) remove("test1.bin"); 591 std.file.write("test2.bin", data); 592 scope(exit) remove("test2.bin"); 593 594 { 595 auto f1 = File("test1.bin", "r+b"); 596 auto f2 = File("test2.bin", "r+b"); 597 sameExtent([ 598 Extent(f1.fileno, 0), 599 Extent(f2.fileno, 0), 600 ], blockSize); 601 } 602 603 { 604 data[0]++; 605 std.file.write("test2.bin", data); 606 auto f1 = File("test1.bin", "r+b"); 607 auto f2 = File("test2.bin", "r+b"); 608 assertThrown!Exception(sameExtent([ 609 Extent(f1.fileno, 0), 610 Extent(f2.fileno, 0), 611 ], blockSize)); 612 } 613 } 614 615 version (btrfsUnittest) 616 { 617 import ae.sys.file; 618 import std.stdio : stderr; 619 620 bool checkBtrfs(string moduleName = __MODULE__)() 621 { 622 auto fs = getPathFilesystem("."); 623 if (fs != "btrfs") 624 { 625 stderr.writefln("Current filesystem is %s, not btrfs, skipping %s test.", fs, moduleName); 626 return false; 627 } 628 return true; 629 } 630 } 631 632 /// Delete a btrfs subvolume by its ID. 633 void deleteSubvolume( 634 /// File descriptor of the filesystem. 635 int fd, 636 /// The subvolume's tree ID. 637 u64 subvolumeID, 638 ) 639 { 640 btrfs_ioctl_vol_args_v2 args; 641 args.flags = BTRFS_SUBVOL_SPEC_BY_ID; 642 args.subvolid = subvolumeID; 643 644 int ret = ioctl(fd, BTRFS_IOC_SNAP_DESTROY_V2, &args); 645 errnoEnforce(ret >= 0, "ioctl(BTRFS_IOC_SNAP_DESTROY_V2)"); 646 }