1 module btrfs; 2 3 import core.stdc.errno; 4 import core.sys.posix.sys.ioctl; 5 import core.sys.posix.sys.stat; 6 import core.sys.posix.sys.types; 7 8 import std.algorithm.comparison; 9 import std.exception; 10 import std.math; 11 import std.string; 12 13 import ae.utils.bitmanip; 14 import ae.utils.math : eq; 15 16 import btrfs.c.ioctl; 17 import btrfs.c.kerncompat; 18 import btrfs.c.kernel_shared.ctree; 19 import btrfs.c.kernel_lib.sizes; 20 21 private 22 { 23 alias __fsword_t = uint; 24 struct fsid_t { int[2] __val; } 25 struct statfs_t 26 { 27 __fsword_t f_type; 28 __fsword_t f_bsize; 29 fsblkcnt_t f_blocks; 30 fsblkcnt_t f_bfree; 31 fsblkcnt_t f_bavail; 32 fsfilcnt_t f_files; 33 fsfilcnt_t f_ffree; 34 fsid_t f_fsid; 35 __fsword_t f_namelen; 36 __fsword_t f_frsize; 37 __fsword_t f_flags; 38 __fsword_t[5] f_spare; 39 ubyte[4096 - 88] __unknown; // D - seems to vary A LOT across platforms / libcs 40 } 41 extern(C) int fstatfs(int fd, statfs_t* buf); 42 } 43 44 enum BTRFS_SUPER_MAGIC = 0x9123683E; 45 46 /// Returns true is fd is on a btrfs filesystem. 47 bool isBTRFS(int fd) 48 { 49 statfs_t sfs; 50 fstatfs(fd, &sfs).eq(0).errnoEnforce("fstatfs"); 51 return sfs.f_type == BTRFS_SUPER_MAGIC; 52 } 53 54 /// Returns true is fd is the root of a btrfs subvolume. 55 bool isSubvolume(int fd) 56 { 57 stat_t st; 58 fstat(fd, &st).eq(0).errnoEnforce("fstat"); 59 return st.st_ino == BTRFS_FIRST_FREE_OBJECTID; 60 } 61 62 /// Returns the subvolume ID containing the file open in fd. 63 u64 getSubvolumeID(int fd) 64 { 65 btrfs_ioctl_ino_lookup_args args; 66 args.treeid = 0; 67 args.objectid = BTRFS_FIRST_FREE_OBJECTID; 68 ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args).eq(0).errnoEnforce("ino lookup"); 69 return args.treeid; 70 } 71 72 enum __u64[2] treeSearchAllObjectIDs = [0, -1]; 73 enum __u64[2] treeSearchAllOffsets = [0, -1]; 74 enum __u64[2] treeSearchAllTransIDs = [0, -1]; 75 76 /// Raw tree search 77 void treeSearch( 78 /// Handle to the filesystem 79 int fd, 80 /// Tree to search in 81 __u64 treeID, 82 /// Min and max (inclusive) object IDs to search 83 __u64[2] objectIDs, 84 /// Min and max (inclusive) types to search 85 __u8[2] types, 86 /// Min and max (inclusive) offsets to search 87 __u64[2] offsets, 88 /// Min and max (inclusive) transaction IDs to search 89 __u64[2] transIDs, 90 /// Callback receiving the search results 91 scope void delegate( 92 /// Search result header, containing the object and transaction ID, offset and type 93 const ref btrfs_ioctl_search_header header, 94 /// Raw data - must be cast to the correct type 95 const void[] data, 96 ) callback, 97 ) 98 { 99 // Lexicographic ordering for tree search 100 static union Key 101 { 102 struct Fields 103 { 104 align(1): 105 BigEndian!__u64 objectID; 106 BigEndian!__u8 type; 107 BigEndian!__u64 offset; 108 } 109 static assert(Fields.sizeof == 0x11); 110 111 Fields fields; 112 ubyte[Fields.sizeof] bytes; 113 114 this(__u64 objectID, __u8 type, __u64 offset) 115 { 116 fields = Fields( 117 BigEndian!__u64(objectID), 118 BigEndian!__u8 (type), 119 BigEndian!__u64(offset), 120 ); 121 } 122 123 void opUnary(string op : "++")() 124 { 125 foreach_reverse (ref b; bytes) 126 if (++b != 0) 127 return; // No overflow (otherwise, continue to carry the 1) 128 assert(false, "Search key overflow"); 129 } 130 131 bool opBinary(string op)(const ref Key o) const 132 if (is(typeof(mixin(`0` ~ op ~ `0`)) == bool)) 133 { 134 return mixin(`bytes` ~ op ~ `o.bytes`); 135 } 136 137 int opCmp(const ref Key o) const 138 { 139 return cmp(bytes[], o.bytes[]); 140 } 141 } 142 143 Key min = Key(objectIDs[0], types[0], offsets[0]); 144 Key max = Key(objectIDs[1], types[1], offsets[1]); 145 146 btrfs_ioctl_search_args args; 147 btrfs_ioctl_search_key* sk = &args.key; 148 149 sk.tree_id = treeID; 150 sk.max_objectid = max.fields.objectID; 151 sk.max_type = max.fields.type; 152 sk.max_offset = max.fields.offset; 153 sk.min_transid = transIDs[0]; 154 sk.max_transid = transIDs[1]; 155 sk.nr_items = 4096; 156 157 do 158 { 159 sk.min_objectid = min.fields.objectID; 160 sk.min_type = min.fields.type ; 161 sk.min_offset = min.fields.offset ; 162 ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args).eq(0).errnoEnforce("tree search"); 163 164 if (sk.nr_items == 0) 165 break; 166 167 ulong off = 0; 168 btrfs_ioctl_search_header* sh; 169 foreach (i; 0 .. sk.nr_items) 170 { 171 sh = cast(btrfs_ioctl_search_header *)(args.buf.ptr + off); 172 off += (*sh).sizeof; 173 auto data = (args.buf.ptr + off)[0 .. sh.len]; 174 off += sh.len; 175 176 if ( 177 objectIDs[0] <= sh.objectid && sh.objectid <= objectIDs[1] && 178 types [0] <= sh.type && sh.type <= types [1] && 179 offsets [0] <= sh.offset && sh.offset <= offsets [1] && 180 transIDs [0] <= sh.transid && sh.transid <= transIDs [1] 181 ) 182 callback(*sh, data); 183 } 184 185 assert(sh.type < 256); 186 min.fields.objectID = sh.objectid; 187 min.fields.type = cast(__u8)sh.type; 188 min.fields.offset = sh.offset; 189 min++; 190 } 191 while (min <= max); 192 } 193 194 /// Typed tree search 195 void treeSearch( 196 /// BTRFS type identifier to search for 197 __u8 btrfsType, 198 /// Structure type describing the data 199 Type, 200 )( 201 /// Handle to the filesystem 202 int fd, 203 /// Tree to search in 204 __u64 treeID, 205 /// Min and max (inclusive) object IDs to search 206 __u64[2] objectIDs, 207 /// Min and max (inclusive) offsets to search 208 __u64[2] offsets, 209 /// Min and max (inclusive) transaction IDs to search 210 __u64[2] transIDs, 211 /// Callback receiving the search results 212 scope void delegate( 213 /// Search result header, containing the object and transaction ID, and offset 214 const ref btrfs_ioctl_search_header header, 215 /// Search result data 216 /// For variable-length structures, additional data can be 217 /// accessed by reading past the end of this variable. 218 /// `header.len` indicates the real total size of the data. 219 const ref Type data, 220 ) callback, 221 ) 222 { 223 __u8[2] types = [btrfsType, btrfsType]; 224 treeSearch( 225 fd, treeID, objectIDs, types, offsets, transIDs, 226 (const ref btrfs_ioctl_search_header header, const void[] data) 227 { 228 callback(header, *cast(Type*)data.ptr); 229 } 230 ); 231 } 232 233 /// Enumerate all chunks in the filesystem. 234 void enumerateChunks( 235 /// Handle to the filesystem 236 int fd, 237 /// Result callback 238 scope void delegate( 239 /// Chunk logical address 240 u64 offset, 241 /// Chunk info 242 /// Stripes can be indexed according to num_stripes 243 const ref btrfs_chunk chunk, 244 ) callback, 245 ) 246 { 247 treeSearch!( 248 BTRFS_CHUNK_ITEM_KEY, 249 btrfs_chunk, 250 )( 251 fd, 252 BTRFS_CHUNK_TREE_OBJECTID, 253 treeSearchAllObjectIDs, 254 treeSearchAllOffsets, 255 treeSearchAllTransIDs, 256 (const ref btrfs_ioctl_search_header header, const ref btrfs_chunk chunk) 257 { 258 callback(header.offset, chunk); 259 } 260 ); 261 } 262 263 private ubyte[SZ_64K] logicalInoBuf; 264 265 /// Get inode at this logical offset 266 void logicalIno( 267 /// File descriptor to the root (subvolume) containing the inode 268 int fd, 269 /// Logical offset to resolve 270 u64 logical, 271 /// Result callback 272 scope void delegate( 273 /// The inode 274 u64 inode, 275 /// The offset within the inode file 276 /// Will be zero if `ignoreOffset` is true 277 u64 offset, 278 /// The filesystem root ID containing the inode 279 u64 root, 280 ) callback, 281 /// Ignore the offset when querying extent ownership 282 /// If this particular offset is not in use by any file but the extent is, 283 /// this allows querying which file is pinning the offset. 284 bool ignoreOffset = false, 285 /// Query buffer 286 ubyte[] buf = logicalInoBuf[], 287 ) 288 { 289 u64 flags = 0; 290 if (ignoreOffset) 291 flags |= BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET; 292 293 assert(buf.length > btrfs_data_container.sizeof); 294 auto inodes = cast(btrfs_data_container*)buf.ptr; 295 296 ulong request = BTRFS_IOC_LOGICAL_INO; 297 if (buf.length > SZ_64K || flags != 0) 298 request = BTRFS_IOC_LOGICAL_INO_V2; 299 300 btrfs_ioctl_logical_ino_args loi; 301 loi.logical = logical; 302 loi.size = buf.length; 303 loi.flags = flags; 304 loi.inodes = cast(__u64)inodes; 305 306 ioctl(fd, request, &loi).eq(0).errnoEnforce("logical ino"); 307 for (auto i = 0; i < inodes.elem_cnt; i += 3) 308 { 309 u64 inum = inodes.val.ptr[i]; 310 u64 offset = inodes.val.ptr[i+1]; 311 u64 root = inodes.val.ptr[i+2]; 312 313 callback(inum, offset, root); 314 } 315 } 316 317 /// Obtain all paths for an inode. 318 void inoPaths( 319 /// Handle to the filesystem 320 int fd, 321 /// The inode 322 u64 inode, 323 /// Callback for receiving file names 324 scope void delegate(char[] fn) callback, 325 ) 326 { 327 union Buf 328 { 329 btrfs_data_container container; 330 ubyte[0x10000] buf; 331 } 332 Buf fspath; 333 334 btrfs_ioctl_ino_path_args ipa; 335 ipa.inum = inode; 336 ipa.size = fspath.sizeof; 337 ipa.fspath = ptr_to_u64(&fspath); 338 339 ioctl(fd, BTRFS_IOC_INO_PATHS, &ipa).eq(0).errnoEnforce("ino paths"); 340 341 foreach (i; 0 .. fspath.container.elem_cnt) 342 { 343 auto ptr = fspath.buf.ptr; 344 ptr += fspath.container.val.offsetof; 345 ptr += fspath.container.val.ptr[i]; 346 auto str = cast(char *)ptr; 347 callback(fromStringz(str)); 348 } 349 } 350 351 /// Obtains the relative path of a given filesystem object for the given filesystem root. 352 void inoLookup( 353 /// Handle to the filesystem 354 int fd, 355 /// Tree ID containing the filesystem object 356 u64 treeID, 357 /// Filesystem object 358 u64 objectID, 359 /// Callback receiving the relative path (it ends with /) 360 scope void delegate(char[] fn) callback, 361 ) 362 { 363 btrfs_ioctl_ino_lookup_args args; 364 args.treeid = treeID; 365 args.objectid = objectID; 366 367 ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args).eq(0).errnoEnforce("ino lookup"); 368 args.name[$-1] = 0; 369 callback(fromStringz(args.name.ptr)); 370 } 371 372 /// Find a root's parent root, and where it is within it 373 void findRootBackRef( 374 /// Handle to the filesystem 375 int fd, 376 /// The child root ID whose parent to find 377 __u64 rootID, 378 /// Result callback 379 scope void delegate( 380 /// The parent root ID 381 __u64 parentRootID, 382 /// The directory ID (within the parent root) containing the child 383 __u64 dirID, 384 /// The sequence of the child entry within the directory 385 __u64 sequence, 386 /// The base file name of the child within the directory 387 char[] name, 388 ) callback, 389 ) 390 { 391 treeSearch!( 392 BTRFS_ROOT_BACKREF_KEY, 393 btrfs_root_ref, 394 )( 395 fd, 396 BTRFS_ROOT_TREE_OBJECTID, 397 [rootID, rootID], 398 treeSearchAllOffsets, 399 treeSearchAllTransIDs, 400 (const ref btrfs_ioctl_search_header header, const ref btrfs_root_ref data) 401 { 402 auto parentRoot = header.offset; 403 auto name = (cast(char*)(&data + 1))[0 .. data.name_len]; 404 callback(parentRoot, data.dirid, data.sequence, name); 405 } 406 ); 407 } 408 409 /// Clone a file range 410 void cloneRange( 411 /// Source file descriptor 412 int srcFile, 413 /// Offset in source file to clone from 414 ulong srcOffset, 415 /// Target file descriptor 416 int dstFile, 417 /// Offset in target file to clone over 418 ulong dstOffset, 419 /// Number of bytes to clone 420 ulong length, 421 ) 422 { 423 btrfs_ioctl_clone_range_args args; 424 425 args.src_fd = srcFile; 426 args.src_offset = srcOffset; 427 args.src_length = length; 428 args.dest_offset = dstOffset; 429 430 int ret = ioctl(dstFile, BTRFS_IOC_CLONE_RANGE, &args); 431 errnoEnforce(ret >= 0, "ioctl(BTRFS_IOC_CLONE_RANGE)"); 432 } 433 434 version (btrfsUnittest) 435 unittest 436 { 437 if (!checkBtrfs()) 438 return; 439 import std.range, std.random, std.algorithm, std.file; 440 import std.stdio : File; 441 enum blockSize = 16*1024; // TODO: detect 442 auto data = blockSize.iota.map!(n => uniform!ubyte).array(); 443 std.file.write("test1.bin", data); 444 scope(exit) remove("test1.bin"); 445 auto f1 = File("test1.bin", "rb"); 446 scope(exit) remove("test2.bin"); 447 auto f2 = File("test2.bin", "wb"); 448 cloneRange(f1.fileno, 0, f2.fileno, 0, blockSize); 449 f2.close(); 450 f1.close(); 451 assert(std.file.read("test2.bin") == data); 452 } 453 454 struct Extent 455 { 456 int fd; 457 ulong offset; 458 } 459 460 struct SameExtentResult 461 { 462 ulong totalBytesDeduped; 463 } 464 465 SameExtentResult sameExtent(in Extent[] extents, ulong length) 466 { 467 assert(extents.length >= 2, "Need at least 2 extents to deduplicate"); 468 469 auto buf = new ubyte[ 470 btrfs_ioctl_same_args.sizeof + 471 btrfs_ioctl_same_extent_info.sizeof * extents.length]; 472 auto same = cast(btrfs_ioctl_same_args*) buf.ptr; 473 474 same.length = length; 475 same.logical_offset = extents[0].offset; 476 enforce(extents.length < ushort.max, "Too many extents"); 477 same.dest_count = cast(ushort)(extents.length - 1); 478 479 foreach (i, ref extent; extents[1..$]) 480 { 481 same.info.ptr[i].fd = extent.fd; 482 same.info.ptr[i].logical_offset = extent.offset; 483 same.info.ptr[i].status = -1; 484 } 485 486 int ret = ioctl(extents[0].fd, BTRFS_IOC_FILE_EXTENT_SAME, same); 487 errnoEnforce(ret >= 0, "ioctl(BTRFS_IOC_FILE_EXTENT_SAME)"); 488 489 SameExtentResult result; 490 491 foreach (i, ref extent; extents[1..$]) 492 { 493 auto status = same.info.ptr[i].status; 494 if (status) 495 { 496 enforce(status != BTRFS_SAME_DATA_DIFFERS, 497 "Extent #%d differs".format(i+1)); 498 errno = -status; 499 errnoEnforce(false, 500 "Deduplicating extent #%d returned status %d".format(i+1, status)); 501 } 502 result.totalBytesDeduped += same.info.ptr[i].bytes_deduped; 503 } 504 505 return result; 506 } 507 508 version (btrfsUnittest) 509 unittest 510 { 511 if (!checkBtrfs()) 512 return; 513 import std.range, std.random, std.algorithm, std.file; 514 import std.stdio : File; 515 enum blockSize = 16*1024; // TODO: detect 516 auto data = blockSize.iota.map!(n => uniform!ubyte).array(); 517 std.file.write("test1.bin", data); 518 scope(exit) remove("test1.bin"); 519 std.file.write("test2.bin", data); 520 scope(exit) remove("test2.bin"); 521 522 { 523 auto f1 = File("test1.bin", "r+b"); 524 auto f2 = File("test2.bin", "r+b"); 525 sameExtent([ 526 Extent(f1.fileno, 0), 527 Extent(f2.fileno, 0), 528 ], blockSize); 529 } 530 531 { 532 data[0]++; 533 std.file.write("test2.bin", data); 534 auto f1 = File("test1.bin", "r+b"); 535 auto f2 = File("test2.bin", "r+b"); 536 assertThrown!Exception(sameExtent([ 537 Extent(f1.fileno, 0), 538 Extent(f2.fileno, 0), 539 ], blockSize)); 540 } 541 } 542 543 version (btrfsUnittest) 544 { 545 import ae.sys.file; 546 import std.stdio : stderr; 547 548 bool checkBtrfs(string moduleName = __MODULE__)() 549 { 550 auto fs = getPathFilesystem("."); 551 if (fs != "btrfs") 552 { 553 stderr.writefln("Current filesystem is %s, not btrfs, skipping %s test.", fs, moduleName); 554 return false; 555 } 556 return true; 557 } 558 }