1 module btrfs;
2 
3 import core.stdc.errno;
4 import core.sys.posix.sys.ioctl;
5 import core.sys.posix.sys.stat;
6 import core.sys.posix.sys.types;
7 
8 import std.algorithm.comparison;
9 import std.exception;
10 import std.math;
11 import std.string;
12 
13 import ae.utils.bitmanip;
14 import ae.utils.math : eq;
15 
16 import btrfs.c.ioctl;
17 import btrfs.c.kerncompat;
18 import btrfs.c.kernel_shared.ctree;
19 import btrfs.c.kernel_lib.sizes;
20 
21 private
22 {
23 	alias __fsword_t = uint;
24 	struct fsid_t { int[2] __val; }
25 	struct statfs_t
26 	{
27 		__fsword_t f_type;
28 		__fsword_t f_bsize;
29 		fsblkcnt_t f_blocks;
30 		fsblkcnt_t f_bfree;
31 		fsblkcnt_t f_bavail;
32 		fsfilcnt_t f_files;
33 		fsfilcnt_t f_ffree;
34 		fsid_t     f_fsid;
35 		__fsword_t f_namelen;
36 		__fsword_t f_frsize;
37 		__fsword_t f_flags;
38 		__fsword_t[5] f_spare;
39 		ubyte[4096 - 88] __unknown; // D - seems to vary A LOT across platforms / libcs
40 	}
41 	extern(C) int fstatfs(int fd, statfs_t* buf);
42 }
43 
44 enum BTRFS_SUPER_MAGIC = 0x9123683E;
45 
46 /// Returns true is fd is on a btrfs filesystem.
47 bool isBTRFS(int fd)
48 {
49 	statfs_t sfs;
50 	fstatfs(fd, &sfs).eq(0).errnoEnforce("fstatfs");
51 	return sfs.f_type == BTRFS_SUPER_MAGIC;
52 }
53 
54 /// Returns true is fd is the root of a btrfs subvolume.
55 bool isSubvolume(int fd)
56 {
57 	stat_t st;
58 	fstat(fd, &st).eq(0).errnoEnforce("fstat");
59 	return st.st_ino == BTRFS_FIRST_FREE_OBJECTID;
60 }
61 
62 /// Returns the subvolume ID containing the file open in fd.
63 u64 getSubvolumeID(int fd)
64 {
65 	btrfs_ioctl_ino_lookup_args args;
66 	args.treeid = 0;
67 	args.objectid = BTRFS_FIRST_FREE_OBJECTID;
68 	ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args).eq(0).errnoEnforce("ino lookup");
69 	return args.treeid;
70 }
71 
72 btrfs_ioctl_fs_info_args getInfo(int fd)
73 {
74 	btrfs_ioctl_fs_info_args args;
75 	ioctl(fd, BTRFS_IOC_FS_INFO, &args).eq(0).errnoEnforce("fs info");
76 	return args;
77 }
78 
79 btrfs_ioctl_dev_info_args getDevInfo(int fd, u64 devid)
80 {
81 	btrfs_ioctl_dev_info_args args;
82 	args.devid = devid;
83 	ioctl(fd, BTRFS_IOC_DEV_INFO, &args).eq(0).errnoEnforce("dev info");
84 	return args;
85 }
86 
87 btrfs_ioctl_dev_info_args[] getDevices(int fd)
88 {
89 	btrfs_ioctl_dev_info_args[] result;
90 
91 	auto info = fd.getInfo();
92 
93 	foreach (i; 0 .. info.max_id + 1)
94 	{
95 		btrfs_ioctl_dev_info_args devInfo;
96 		try
97 			devInfo = fd.getDevInfo(i);
98 		catch (ErrnoException e)
99 			if (e.errno == ENODEV)
100 				continue;
101 			else
102 				throw e;
103 		result ~= devInfo;
104 	}
105 
106 	enforce(result.length == info.num_devices, "Unexpected number of devices");
107 	return result;
108 }
109 
110 enum __u64[2] treeSearchAllObjectIDs = [0, -1];
111 enum __u64[2] treeSearchAllOffsets   = [0, -1];
112 enum __u64[2] treeSearchAllTransIDs  = [0, -1];
113 
114 /// Raw tree search
115 void treeSearch(
116 	/// Handle to the filesystem
117 	int fd,
118 	/// Tree to search in
119 	__u64 treeID,
120 	/// Min and max (inclusive) object IDs to search
121 	__u64[2] objectIDs,
122 	/// Min and max (inclusive) types to search
123 	__u8[2] types,
124 	/// Min and max (inclusive) offsets to search
125 	__u64[2] offsets,
126 	/// Min and max (inclusive) transaction IDs to search
127 	__u64[2] transIDs,
128 	/// Callback receiving the search results
129 	scope void delegate(
130 		/// Search result header, containing the object and transaction ID, offset and type
131 		const ref btrfs_ioctl_search_header header,
132 		/// Raw data - must be cast to the correct type
133 		const void[] data,
134 	) callback,
135 )
136 {
137 	// Lexicographic ordering for tree search
138 	static union Key
139 	{
140 		struct Fields
141 		{
142 		align(1):
143 			BigEndian!__u64 objectID;
144 			BigEndian!__u8  type;
145 			BigEndian!__u64 offset;
146 		}
147 		static assert(Fields.sizeof == 0x11);
148 
149 		Fields fields;
150 		ubyte[Fields.sizeof] bytes;
151 
152 		this(__u64 objectID, __u8 type, __u64 offset)
153 		{
154 			fields = Fields(
155 				BigEndian!__u64(objectID),
156 				BigEndian!__u8 (type),
157 				BigEndian!__u64(offset),
158 			);
159 		}
160 
161 		void opUnary(string op : "++")()
162 		{
163 			foreach_reverse (ref b; bytes)
164 				if (++b != 0)
165 					return; // No overflow (otherwise, continue to carry the 1)
166 			assert(false, "Search key overflow");
167 		}
168 
169 		bool opBinary(string op)(const ref Key o) const
170 		if (is(typeof(mixin(`0` ~ op ~ `0`)) == bool))
171 		{
172 			return mixin(`bytes` ~ op ~ `o.bytes`);
173 		}
174 
175 		int opCmp(const ref Key o) const
176 		{
177 			return cmp(bytes[], o.bytes[]);
178 		}
179 	}
180 
181 	Key min = Key(objectIDs[0], types[0], offsets[0]);
182 	Key max = Key(objectIDs[1], types[1], offsets[1]);
183 
184 	btrfs_ioctl_search_args args;
185 	btrfs_ioctl_search_key* sk = &args.key;
186 
187 	sk.tree_id = treeID;
188 	sk.max_objectid = max.fields.objectID;
189 	sk.max_type     = max.fields.type;
190 	sk.max_offset   = max.fields.offset;
191 	sk.min_transid  = transIDs[0];
192 	sk.max_transid  = transIDs[1];
193 	sk.nr_items = 4096;
194 
195 	do
196 	{
197 		sk.min_objectid = min.fields.objectID;
198 		sk.min_type     = min.fields.type    ;
199 		sk.min_offset   = min.fields.offset  ;
200 		ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args).eq(0).errnoEnforce("tree search");
201 
202 		if (sk.nr_items == 0)
203 			break;
204 
205 		ulong off = 0;
206 		btrfs_ioctl_search_header* sh;
207 		foreach (i; 0 .. sk.nr_items)
208 		{
209 			sh = cast(btrfs_ioctl_search_header *)(args.buf.ptr + off);
210 			off += (*sh).sizeof;
211 			auto data = (args.buf.ptr + off)[0 .. sh.len];
212 			off += sh.len;
213 
214 			if (
215 				objectIDs[0] <= sh.objectid && sh.objectid <= objectIDs[1] &&
216 				types    [0] <= sh.type     && sh.type     <= types    [1] &&
217 				offsets  [0] <= sh.offset   && sh.offset   <= offsets  [1] &&
218 				transIDs [0] <= sh.transid  && sh.transid  <= transIDs [1]
219 			)
220 				callback(*sh, data);
221 		}
222 
223 		assert(sh.type < 256);
224 		min.fields.objectID = sh.objectid;
225 		min.fields.type     = cast(__u8)sh.type;
226 		min.fields.offset   = sh.offset;
227 		min++;
228 	}
229 	while (min <= max);
230 }
231 
232 /// Typed tree search
233 void treeSearch(
234 	/// BTRFS type identifier to search for
235 	__u8 btrfsType,
236 	/// Structure type describing the data
237 	Type,
238 )(
239 	/// Handle to the filesystem
240 	int fd,
241 	/// Tree to search in
242 	__u64 treeID,
243 	/// Min and max (inclusive) object IDs to search
244 	__u64[2] objectIDs,
245 	/// Min and max (inclusive) offsets to search
246 	__u64[2] offsets,
247 	/// Min and max (inclusive) transaction IDs to search
248 	__u64[2] transIDs,
249 	/// Callback receiving the search results
250 	scope void delegate(
251 		/// Search result header, containing the object and transaction ID, and offset
252 		const ref btrfs_ioctl_search_header header,
253 		/// Search result data
254 		/// For variable-length structures, additional data can be
255 		/// accessed by reading past the end of this variable.
256 		/// `header.len` indicates the real total size of the data.
257 		const ref Type data,
258 	) callback,
259 )
260 {
261 	__u8[2] types = [btrfsType, btrfsType];
262 	treeSearch(
263 		fd, treeID, objectIDs, types, offsets, transIDs,
264 		(const ref btrfs_ioctl_search_header header, const void[] data)
265 		{
266 			callback(header, *cast(Type*)data.ptr);
267 		}
268 	);
269 }
270 
271 /// Enumerate all chunks in the filesystem.
272 void enumerateChunks(
273 	/// Handle to the filesystem
274 	int fd,
275 	/// Result callback
276 	scope void delegate(
277 		/// Chunk logical address
278 		u64 offset,
279 		/// Chunk info
280 		/// Stripes can be indexed according to num_stripes
281 		const ref btrfs_chunk chunk,
282 	) callback,
283 )
284 {
285 	treeSearch!(
286 		BTRFS_CHUNK_ITEM_KEY,
287 		btrfs_chunk,
288 	)(
289 		fd,
290 		BTRFS_CHUNK_TREE_OBJECTID,
291 		treeSearchAllObjectIDs,
292 		treeSearchAllOffsets,
293 		treeSearchAllTransIDs,
294 		(const ref btrfs_ioctl_search_header header, const ref btrfs_chunk chunk)
295 		{
296 			callback(header.offset, chunk);
297 		}
298 	);
299 }
300 
301 /// Enumerate all device allocations in the filesystem.
302 /// This is roughly the inverse of enumerateChunks.
303 void enumerateDevExtents(
304 	/// Handle to the filesystem
305 	int fd,
306 	/// Result callback
307 	scope void delegate(
308 		/// Device ID
309 		u64 devid,
310 		/// Extent physical address
311 		u64 offset,
312 		/// Extent info
313 		const ref btrfs_dev_extent chunk,
314 	) callback,
315 	/// Filter device. Default is all devices.
316 	__u64[2] devIDs = treeSearchAllObjectIDs,
317 )
318 {
319 	treeSearch!(
320 		BTRFS_DEV_EXTENT_KEY,
321 		btrfs_dev_extent,
322 	)(
323 		fd,
324 		BTRFS_DEV_TREE_OBJECTID,
325 		devIDs,
326 		treeSearchAllOffsets,
327 		treeSearchAllTransIDs,
328 		(const ref btrfs_ioctl_search_header header, const ref btrfs_dev_extent chunk)
329 		{
330 			callback(header.objectid, header.offset, chunk);
331 		}
332 	);
333 }
334 
335 private ubyte[SZ_64K] logicalInoBuf;
336 
337 /// Get inode at this logical offset
338 void logicalIno(
339 	/// File descriptor to the root (subvolume) containing the inode
340 	int fd,
341 	/// Logical offset to resolve
342 	u64 logical,
343 	/// Result callback
344 	scope void delegate(
345 		/// The inode
346 		u64 inode,
347 		/// The offset within the inode file
348 		/// Will be zero if `ignoreOffset` is true
349 		u64 offset,
350 		/// The filesystem root ID containing the inode
351 		u64 root,
352 	) callback,
353 	/// Ignore the offset when querying extent ownership
354 	/// If this particular offset is not in use by any file but the extent is,
355 	/// this allows querying which file is pinning the offset.
356 	bool ignoreOffset = false,
357 	/// Query buffer
358 	ubyte[] buf = logicalInoBuf[],
359 )
360 {
361 	u64 flags = 0;
362 	if (ignoreOffset)
363 		flags |= BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET;
364 
365 	assert(buf.length > btrfs_data_container.sizeof);
366 	auto inodes = cast(btrfs_data_container*)buf.ptr;
367 
368 	ulong request = BTRFS_IOC_LOGICAL_INO;
369 	if (buf.length > SZ_64K || flags != 0)
370 		request = BTRFS_IOC_LOGICAL_INO_V2;
371 
372 	btrfs_ioctl_logical_ino_args loi;
373 	loi.logical = logical;
374 	loi.size = buf.length;
375 	loi.flags = flags;
376 	loi.inodes = cast(__u64)inodes;
377 
378 	ioctl(fd, request, &loi).eq(0).errnoEnforce("logical ino");
379 	for (auto i = 0; i < inodes.elem_cnt; i += 3)
380 	{
381 		u64 inum   = inodes.val.ptr[i];
382 		u64 offset = inodes.val.ptr[i+1];
383 		u64 root   = inodes.val.ptr[i+2];
384 
385 		callback(inum, offset, root);
386 	}
387 }
388 
389 /// Obtain all paths for an inode.
390 void inoPaths(
391 	/// Handle to the filesystem
392 	int fd,
393 	/// The inode
394 	u64 inode,
395 	/// Callback for receiving file names
396 	scope void delegate(char[] fn) callback,
397 )
398 {
399 	union Buf
400 	{
401 		btrfs_data_container container;
402 		ubyte[0x10000] buf;
403 	}
404 	Buf fspath;
405 
406 	btrfs_ioctl_ino_path_args ipa;
407 	ipa.inum = inode;
408 	ipa.size = fspath.sizeof;
409 	ipa.fspath = ptr_to_u64(&fspath);
410 
411 	ioctl(fd, BTRFS_IOC_INO_PATHS, &ipa).eq(0).errnoEnforce("ino paths");
412 
413 	foreach (i; 0 .. fspath.container.elem_cnt)
414 	{
415 		auto ptr = fspath.buf.ptr;
416 		ptr += fspath.container.val.offsetof;
417 		ptr += fspath.container.val.ptr[i];
418 		auto str = cast(char *)ptr;
419 		callback(fromStringz(str));
420 	}
421 }
422 
423 /// Obtains the relative path of a given filesystem object for the given filesystem root.
424 void inoLookup(
425 	/// Handle to the filesystem
426 	int fd,
427 	/// Tree ID containing the filesystem object
428 	u64 treeID,
429 	/// Filesystem object
430 	u64 objectID,
431 	/// Callback receiving the relative path (it ends with /)
432 	scope void delegate(char[] fn) callback,
433 )
434 {
435 	btrfs_ioctl_ino_lookup_args args;
436 	args.treeid = treeID;
437 	args.objectid = objectID;
438 
439 	ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args).eq(0).errnoEnforce("ino lookup");
440 	args.name[$-1] = 0;
441 	callback(fromStringz(args.name.ptr));
442 }
443 
444 /// Find a root's parent root, and where it is within it
445 void findRootBackRef(
446 	/// Handle to the filesystem
447 	int fd,
448 	/// The child root ID whose parent to find
449 	__u64 rootID,
450 	/// Result callback
451 	scope void delegate(
452 		/// The parent root ID
453 		__u64 parentRootID,
454 		/// The directory ID (within the parent root) containing the child
455 		__u64 dirID,
456 		/// The sequence of the child entry within the directory
457 		__u64 sequence,
458 		/// The base file name of the child within the directory
459 		char[] name,
460 	) callback,
461 )
462 {
463 	treeSearch!(
464 		BTRFS_ROOT_BACKREF_KEY,
465 		btrfs_root_ref,
466 	)(
467 		fd,
468 		BTRFS_ROOT_TREE_OBJECTID,
469 		[rootID, rootID],
470 		treeSearchAllOffsets,
471 		treeSearchAllTransIDs,
472 		(const ref btrfs_ioctl_search_header header, const ref btrfs_root_ref data)
473 		{
474 			auto parentRoot = header.offset;
475 			auto name = (cast(char*)(&data + 1))[0 .. data.name_len];
476 			callback(parentRoot, data.dirid, data.sequence, name);
477 		}
478 	);
479 }
480 
481 /// Clone a file range
482 void cloneRange(
483 	/// Source file descriptor
484 	int srcFile,
485 	/// Offset in source file to clone from
486 	ulong srcOffset,
487 	/// Target file descriptor
488 	int dstFile,
489 	/// Offset in target file to clone over
490 	ulong dstOffset,
491 	/// Number of bytes to clone
492 	ulong length,
493 )
494 {
495 	btrfs_ioctl_clone_range_args args;
496 
497 	args.src_fd = srcFile;
498 	args.src_offset = srcOffset;
499 	args.src_length = length;
500 	args.dest_offset = dstOffset;
501 
502 	int ret = ioctl(dstFile, BTRFS_IOC_CLONE_RANGE, &args);
503 	errnoEnforce(ret >= 0, "ioctl(BTRFS_IOC_CLONE_RANGE)");
504 }
505 
506 version (btrfsUnittest)
507 unittest
508 {
509 	if (!checkBtrfs())
510 		return;
511 	import std.range, std.random, std.algorithm, std.file;
512 	import std.stdio : File;
513 	enum blockSize = 16*1024; // TODO: detect
514 	auto data = blockSize.iota.map!(n => uniform!ubyte).array();
515 	std.file.write("test1.bin", data);
516 	scope(exit) remove("test1.bin");
517 	auto f1 = File("test1.bin", "rb");
518 	scope(exit) remove("test2.bin");
519 	auto f2 = File("test2.bin", "wb");
520 	cloneRange(f1.fileno, 0, f2.fileno, 0, blockSize);
521 	f2.close();
522 	f1.close();
523 	assert(std.file.read("test2.bin") == data);
524 }
525 
526 struct Extent
527 {
528 	int fd;
529 	ulong offset;
530 }
531 
532 struct SameExtentResult
533 {
534 	ulong totalBytesDeduped;
535 }
536 
537 SameExtentResult sameExtent(in Extent[] extents, ulong length)
538 {
539 	assert(extents.length >= 2, "Need at least 2 extents to deduplicate");
540 
541 	auto buf = new ubyte[
542 		      btrfs_ioctl_same_args.sizeof +
543 		      btrfs_ioctl_same_extent_info.sizeof * extents.length];
544 	auto same = cast(btrfs_ioctl_same_args*) buf.ptr;
545 
546 	same.length = length;
547 	same.logical_offset = extents[0].offset;
548 	enforce(extents.length < ushort.max, "Too many extents");
549 	same.dest_count = cast(ushort)(extents.length - 1);
550 
551 	foreach (i, ref extent; extents[1..$])
552 	{
553 		same.info.ptr[i].fd = extent.fd;
554 		same.info.ptr[i].logical_offset = extent.offset;
555 		same.info.ptr[i].status = -1;
556 	}
557 
558 	int ret = ioctl(extents[0].fd, BTRFS_IOC_FILE_EXTENT_SAME, same);
559 	errnoEnforce(ret >= 0, "ioctl(BTRFS_IOC_FILE_EXTENT_SAME)");
560 
561 	SameExtentResult result;
562 
563 	foreach (i, ref extent; extents[1..$])
564 	{
565 		auto status = same.info.ptr[i].status;
566 		if (status)
567 		{
568 			enforce(status != BTRFS_SAME_DATA_DIFFERS,
569 				"Extent #%d differs".format(i+1));
570 			errno = -status;
571 			errnoEnforce(false,
572 				"Deduplicating extent #%d returned status %d".format(i+1, status));
573 		}
574 		result.totalBytesDeduped += same.info.ptr[i].bytes_deduped;
575 	}
576 
577 	return result;
578 }
579 
580 version (btrfsUnittest)
581 unittest
582 {
583 	if (!checkBtrfs())
584 		return;
585 	import std.range, std.random, std.algorithm, std.file;
586 	import std.stdio : File;
587 	enum blockSize = 16*1024; // TODO: detect
588 	auto data = blockSize.iota.map!(n => uniform!ubyte).array();
589 	std.file.write("test1.bin", data);
590 	scope(exit) remove("test1.bin");
591 	std.file.write("test2.bin", data);
592 	scope(exit) remove("test2.bin");
593 
594 	{
595 		auto f1 = File("test1.bin", "r+b");
596 		auto f2 = File("test2.bin", "r+b");
597 		sameExtent([
598 			Extent(f1.fileno, 0),
599 			Extent(f2.fileno, 0),
600 		], blockSize);
601 	}
602 
603 	{
604 		data[0]++;
605 		std.file.write("test2.bin", data);
606 		auto f1 = File("test1.bin", "r+b");
607 		auto f2 = File("test2.bin", "r+b");
608 		assertThrown!Exception(sameExtent([
609 			Extent(f1.fileno, 0),
610 			Extent(f2.fileno, 0),
611 		], blockSize));
612 	}
613 }
614 
615 version (btrfsUnittest)
616 {
617 	import ae.sys.file;
618 	import std.stdio : stderr;
619 
620 	bool checkBtrfs(string moduleName = __MODULE__)()
621 	{
622 		auto fs = getPathFilesystem(".");
623 		if (fs != "btrfs")
624 		{
625 			stderr.writefln("Current filesystem is %s, not btrfs, skipping %s test.", fs, moduleName);
626 			return false;
627 		}
628 		return true;
629 	}
630 }