1 module btrfs;
2 
3 import core.stdc.errno;
4 import core.sys.posix.sys.ioctl;
5 import core.sys.posix.sys.stat;
6 import core.sys.posix.sys.types;
7 
8 import std.algorithm.comparison;
9 import std.exception;
10 import std.math;
11 import std.string;
12 
13 import ae.utils.bitmanip;
14 import ae.utils.math : eq;
15 
16 import btrfs.c.ioctl;
17 import btrfs.c.kerncompat;
18 import btrfs.c.kernel_shared.ctree;
19 import btrfs.c.kernel_lib.sizes;
20 
21 private
22 {
23 	alias __fsword_t = uint;
24 	struct fsid_t { int[2] __val; }
25 	struct statfs_t
26 	{
27 		__fsword_t f_type;
28 		__fsword_t f_bsize;
29 		fsblkcnt_t f_blocks;
30 		fsblkcnt_t f_bfree;
31 		fsblkcnt_t f_bavail;
32 		fsfilcnt_t f_files;
33 		fsfilcnt_t f_ffree;
34 		fsid_t     f_fsid;
35 		__fsword_t f_namelen;
36 		__fsword_t f_frsize;
37 		__fsword_t f_flags;
38 		__fsword_t[5] f_spare;
39 		ubyte[4096 - 88] __unknown; // D - seems to vary A LOT across platforms / libcs
40 	}
41 	extern(C) int fstatfs(int fd, statfs_t* buf);
42 }
43 
44 enum BTRFS_SUPER_MAGIC = 0x9123683E;
45 
46 /// Returns true is fd is on a btrfs filesystem.
47 bool isBTRFS(int fd)
48 {
49 	statfs_t sfs;
50 	fstatfs(fd, &sfs).eq(0).errnoEnforce("fstatfs");
51 	return sfs.f_type == BTRFS_SUPER_MAGIC;
52 }
53 
54 /// Returns true is fd is the root of a btrfs subvolume.
55 bool isSubvolume(int fd)
56 {
57 	stat_t st;
58 	fstat(fd, &st).eq(0).errnoEnforce("fstat");
59 	return st.st_ino == BTRFS_FIRST_FREE_OBJECTID;
60 }
61 
62 /// Returns the subvolume ID containing the file open in fd.
63 u64 getSubvolumeID(int fd)
64 {
65 	btrfs_ioctl_ino_lookup_args args;
66 	args.treeid = 0;
67 	args.objectid = BTRFS_FIRST_FREE_OBJECTID;
68 	ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args).eq(0).errnoEnforce("ino lookup");
69 	return args.treeid;
70 }
71 
72 enum __u64[2] treeSearchAllObjectIDs = [0, -1];
73 enum __u64[2] treeSearchAllOffsets   = [0, -1];
74 enum __u64[2] treeSearchAllTransIDs  = [0, -1];
75 
76 /// Raw tree search
77 void treeSearch(
78 	/// Handle to the filesystem
79 	int fd,
80 	/// Tree to search in
81 	__u64 treeID,
82 	/// Min and max (inclusive) object IDs to search
83 	__u64[2] objectIDs,
84 	/// Min and max (inclusive) types to search
85 	__u8[2] types,
86 	/// Min and max (inclusive) offsets to search
87 	__u64[2] offsets,
88 	/// Min and max (inclusive) transaction IDs to search
89 	__u64[2] transIDs,
90 	/// Callback receiving the search results
91 	scope void delegate(
92 		/// Search result header, containing the object and transaction ID, offset and type
93 		const ref btrfs_ioctl_search_header header,
94 		/// Raw data - must be cast to the correct type
95 		const void[] data,
96 	) callback,
97 )
98 {
99 	// Lexicographic ordering for tree search
100 	static union Key
101 	{
102 		struct Fields
103 		{
104 		align(1):
105 			BigEndian!__u64 objectID;
106 			BigEndian!__u8  type;
107 			BigEndian!__u64 offset;
108 		}
109 		static assert(Fields.sizeof == 0x11);
110 
111 		Fields fields;
112 		ubyte[Fields.sizeof] bytes;
113 
114 		this(__u64 objectID, __u8 type, __u64 offset)
115 		{
116 			fields = Fields(
117 				BigEndian!__u64(objectID),
118 				BigEndian!__u8 (type),
119 				BigEndian!__u64(offset),
120 			);
121 		}
122 
123 		void opUnary(string op : "++")()
124 		{
125 			foreach_reverse (ref b; bytes)
126 				if (++b != 0)
127 					return; // No overflow (otherwise, continue to carry the 1)
128 			assert(false, "Search key overflow");
129 		}
130 
131 		bool opBinary(string op)(const ref Key o) const
132 		if (is(typeof(mixin(`0` ~ op ~ `0`)) == bool))
133 		{
134 			return mixin(`bytes` ~ op ~ `o.bytes`);
135 		}
136 
137 		int opCmp(const ref Key o) const
138 		{
139 			return cmp(bytes[], o.bytes[]);
140 		}
141 	}
142 
143 	Key min = Key(objectIDs[0], types[0], offsets[0]);
144 	Key max = Key(objectIDs[1], types[1], offsets[1]);
145 
146 	btrfs_ioctl_search_args args;
147 	btrfs_ioctl_search_key* sk = &args.key;
148 
149 	sk.tree_id = treeID;
150 	sk.max_objectid = max.fields.objectID;
151 	sk.max_type     = max.fields.type;
152 	sk.max_offset   = max.fields.offset;
153 	sk.min_transid  = transIDs[0];
154 	sk.max_transid  = transIDs[1];
155 	sk.nr_items = 4096;
156 
157 	do
158 	{
159 		sk.min_objectid = min.fields.objectID;
160 		sk.min_type     = min.fields.type    ;
161 		sk.min_offset   = min.fields.offset  ;
162 		ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args).eq(0).errnoEnforce("tree search");
163 
164 		if (sk.nr_items == 0)
165 			break;
166 
167 		ulong off = 0;
168 		btrfs_ioctl_search_header* sh;
169 		foreach (i; 0 .. sk.nr_items)
170 		{
171 			sh = cast(btrfs_ioctl_search_header *)(args.buf.ptr + off);
172 			off += (*sh).sizeof;
173 			auto data = (args.buf.ptr + off)[0 .. sh.len];
174 			off += sh.len;
175 
176 			if (
177 				objectIDs[0] <= sh.objectid && sh.objectid <= objectIDs[1] &&
178 				types    [0] <= sh.type     && sh.type     <= types    [1] &&
179 				offsets  [0] <= sh.offset   && sh.offset   <= offsets  [1] &&
180 				transIDs [0] <= sh.transid  && sh.transid  <= transIDs [1]
181 			)
182 				callback(*sh, data);
183 		}
184 
185 		assert(sh.type < 256);
186 		min.fields.objectID = sh.objectid;
187 		min.fields.type     = cast(__u8)sh.type;
188 		min.fields.offset   = sh.offset;
189 		min++;
190 	}
191 	while (min <= max);
192 }
193 
194 /// Typed tree search
195 void treeSearch(
196 	/// BTRFS type identifier to search for
197 	__u8 btrfsType,
198 	/// Structure type describing the data
199 	Type,
200 )(
201 	/// Handle to the filesystem
202 	int fd,
203 	/// Tree to search in
204 	__u64 treeID,
205 	/// Min and max (inclusive) object IDs to search
206 	__u64[2] objectIDs,
207 	/// Min and max (inclusive) offsets to search
208 	__u64[2] offsets,
209 	/// Min and max (inclusive) transaction IDs to search
210 	__u64[2] transIDs,
211 	/// Callback receiving the search results
212 	scope void delegate(
213 		/// Search result header, containing the object and transaction ID, and offset
214 		const ref btrfs_ioctl_search_header header,
215 		/// Search result data
216 		/// For variable-length structures, additional data can be
217 		/// accessed by reading past the end of this variable.
218 		/// `header.len` indicates the real total size of the data.
219 		const ref Type data,
220 	) callback,
221 )
222 {
223 	__u8[2] types = [btrfsType, btrfsType];
224 	treeSearch(
225 		fd, treeID, objectIDs, types, offsets, transIDs,
226 		(const ref btrfs_ioctl_search_header header, const void[] data)
227 		{
228 			callback(header, *cast(Type*)data.ptr);
229 		}
230 	);
231 }
232 
233 /// Enumerate all chunks in the filesystem.
234 void enumerateChunks(
235 	/// Handle to the filesystem
236 	int fd,
237 	/// Result callback
238 	scope void delegate(
239 		/// Chunk logical address
240 		u64 offset,
241 		/// Chunk info
242 		/// Stripes can be indexed according to num_stripes
243 		const ref btrfs_chunk chunk,
244 	) callback,
245 )
246 {
247 	treeSearch!(
248 		BTRFS_CHUNK_ITEM_KEY,
249 		btrfs_chunk,
250 	)(
251 		fd,
252 		BTRFS_CHUNK_TREE_OBJECTID,
253 		treeSearchAllObjectIDs,
254 		treeSearchAllOffsets,
255 		treeSearchAllTransIDs,
256 		(const ref btrfs_ioctl_search_header header, const ref btrfs_chunk chunk)
257 		{
258 			callback(header.offset, chunk);
259 		}
260 	);
261 }
262 
263 private ubyte[SZ_64K] logicalInoBuf;
264 
265 /// Get inode at this logical offset
266 void logicalIno(
267 	/// File descriptor to the root (subvolume) containing the inode
268 	int fd,
269 	/// Logical offset to resolve
270 	u64 logical,
271 	/// Result callback
272 	scope void delegate(
273 		/// The inode
274 		u64 inode,
275 		/// The offset within the inode file
276 		/// Will be zero if `ignoreOffset` is true
277 		u64 offset,
278 		/// The filesystem root ID containing the inode
279 		u64 root,
280 	) callback,
281 	/// Ignore the offset when querying extent ownership
282 	/// If this particular offset is not in use by any file but the extent is,
283 	/// this allows querying which file is pinning the offset.
284 	bool ignoreOffset = false,
285 	/// Query buffer
286 	ubyte[] buf = logicalInoBuf[],
287 )
288 {
289 	u64 flags = 0;
290 	if (ignoreOffset)
291 		flags |= BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET;
292 
293 	assert(buf.length > btrfs_data_container.sizeof);
294 	auto inodes = cast(btrfs_data_container*)buf.ptr;
295 
296 	ulong request = BTRFS_IOC_LOGICAL_INO;
297 	if (buf.length > SZ_64K || flags != 0)
298 		request = BTRFS_IOC_LOGICAL_INO_V2;
299 
300 	btrfs_ioctl_logical_ino_args loi;
301 	loi.logical = logical;
302 	loi.size = buf.length;
303 	loi.flags = flags;
304 	loi.inodes = cast(__u64)inodes;
305 
306 	ioctl(fd, request, &loi).eq(0).errnoEnforce("logical ino");
307 	for (auto i = 0; i < inodes.elem_cnt; i += 3)
308 	{
309 		u64 inum   = inodes.val.ptr[i];
310 		u64 offset = inodes.val.ptr[i+1];
311 		u64 root   = inodes.val.ptr[i+2];
312 
313 		callback(inum, offset, root);
314 	}
315 }
316 
317 /// Obtain all paths for an inode.
318 void inoPaths(
319 	/// Handle to the filesystem
320 	int fd,
321 	/// The inode
322 	u64 inode,
323 	/// Callback for receiving file names
324 	scope void delegate(char[] fn) callback,
325 )
326 {
327 	union Buf
328 	{
329 		btrfs_data_container container;
330 		ubyte[0x10000] buf;
331 	}
332 	Buf fspath;
333 
334 	btrfs_ioctl_ino_path_args ipa;
335 	ipa.inum = inode;
336 	ipa.size = fspath.sizeof;
337 	ipa.fspath = ptr_to_u64(&fspath);
338 
339 	ioctl(fd, BTRFS_IOC_INO_PATHS, &ipa).eq(0).errnoEnforce("ino paths");
340 
341 	foreach (i; 0 .. fspath.container.elem_cnt)
342 	{
343 		auto ptr = fspath.buf.ptr;
344 		ptr += fspath.container.val.offsetof;
345 		ptr += fspath.container.val.ptr[i];
346 		auto str = cast(char *)ptr;
347 		callback(fromStringz(str));
348 	}
349 }
350 
351 /// Obtains the relative path of a given filesystem object for the given filesystem root.
352 void inoLookup(
353 	/// Handle to the filesystem
354 	int fd,
355 	/// Tree ID containing the filesystem object
356 	u64 treeID,
357 	/// Filesystem object
358 	u64 objectID,
359 	/// Callback receiving the relative path (it ends with /)
360 	scope void delegate(char[] fn) callback,
361 )
362 {
363 	btrfs_ioctl_ino_lookup_args args;
364 	args.treeid = treeID;
365 	args.objectid = objectID;
366 
367 	ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args).eq(0).errnoEnforce("ino lookup");
368 	args.name[$-1] = 0;
369 	callback(fromStringz(args.name.ptr));
370 }
371 
372 /// Find a root's parent root, and where it is within it
373 void findRootBackRef(
374 	/// Handle to the filesystem
375 	int fd,
376 	/// The child root ID whose parent to find
377 	__u64 rootID,
378 	/// Result callback
379 	scope void delegate(
380 		/// The parent root ID
381 		__u64 parentRootID,
382 		/// The directory ID (within the parent root) containing the child
383 		__u64 dirID,
384 		/// The sequence of the child entry within the directory
385 		__u64 sequence,
386 		/// The base file name of the child within the directory
387 		char[] name,
388 	) callback,
389 )
390 {
391 	treeSearch!(
392 		BTRFS_ROOT_BACKREF_KEY,
393 		btrfs_root_ref,
394 	)(
395 		fd,
396 		BTRFS_ROOT_TREE_OBJECTID,
397 		[rootID, rootID],
398 		treeSearchAllOffsets,
399 		treeSearchAllTransIDs,
400 		(const ref btrfs_ioctl_search_header header, const ref btrfs_root_ref data)
401 		{
402 			auto parentRoot = header.offset;
403 			auto name = (cast(char*)(&data + 1))[0 .. data.name_len];
404 			callback(parentRoot, data.dirid, data.sequence, name);
405 		}
406 	);
407 }
408 
409 /// Clone a file range
410 void cloneRange(
411 	/// Source file descriptor
412 	int srcFile,
413 	/// Offset in source file to clone from
414 	ulong srcOffset,
415 	/// Target file descriptor
416 	int dstFile,
417 	/// Offset in target file to clone over
418 	ulong dstOffset,
419 	/// Number of bytes to clone
420 	ulong length,
421 )
422 {
423 	btrfs_ioctl_clone_range_args args;
424 
425 	args.src_fd = srcFile;
426 	args.src_offset = srcOffset;
427 	args.src_length = length;
428 	args.dest_offset = dstOffset;
429 
430 	int ret = ioctl(dstFile, BTRFS_IOC_CLONE_RANGE, &args);
431 	errnoEnforce(ret >= 0, "ioctl(BTRFS_IOC_CLONE_RANGE)");
432 }
433 
434 version (btrfsUnittest)
435 unittest
436 {
437 	if (!checkBtrfs())
438 		return;
439 	import std.range, std.random, std.algorithm, std.file;
440 	import std.stdio : File;
441 	enum blockSize = 16*1024; // TODO: detect
442 	auto data = blockSize.iota.map!(n => uniform!ubyte).array();
443 	std.file.write("test1.bin", data);
444 	scope(exit) remove("test1.bin");
445 	auto f1 = File("test1.bin", "rb");
446 	scope(exit) remove("test2.bin");
447 	auto f2 = File("test2.bin", "wb");
448 	cloneRange(f1.fileno, 0, f2.fileno, 0, blockSize);
449 	f2.close();
450 	f1.close();
451 	assert(std.file.read("test2.bin") == data);
452 }
453 
454 struct Extent
455 {
456 	int fd;
457 	ulong offset;
458 }
459 
460 struct SameExtentResult
461 {
462 	ulong totalBytesDeduped;
463 }
464 
465 SameExtentResult sameExtent(in Extent[] extents, ulong length)
466 {
467 	assert(extents.length >= 2, "Need at least 2 extents to deduplicate");
468 
469 	auto buf = new ubyte[
470 		      btrfs_ioctl_same_args.sizeof +
471 		      btrfs_ioctl_same_extent_info.sizeof * extents.length];
472 	auto same = cast(btrfs_ioctl_same_args*) buf.ptr;
473 
474 	same.length = length;
475 	same.logical_offset = extents[0].offset;
476 	enforce(extents.length < ushort.max, "Too many extents");
477 	same.dest_count = cast(ushort)(extents.length - 1);
478 
479 	foreach (i, ref extent; extents[1..$])
480 	{
481 		same.info.ptr[i].fd = extent.fd;
482 		same.info.ptr[i].logical_offset = extent.offset;
483 		same.info.ptr[i].status = -1;
484 	}
485 
486 	int ret = ioctl(extents[0].fd, BTRFS_IOC_FILE_EXTENT_SAME, same);
487 	errnoEnforce(ret >= 0, "ioctl(BTRFS_IOC_FILE_EXTENT_SAME)");
488 
489 	SameExtentResult result;
490 
491 	foreach (i, ref extent; extents[1..$])
492 	{
493 		auto status = same.info.ptr[i].status;
494 		if (status)
495 		{
496 			enforce(status != BTRFS_SAME_DATA_DIFFERS,
497 				"Extent #%d differs".format(i+1));
498 			errno = -status;
499 			errnoEnforce(false,
500 				"Deduplicating extent #%d returned status %d".format(i+1, status));
501 		}
502 		result.totalBytesDeduped += same.info.ptr[i].bytes_deduped;
503 	}
504 
505 	return result;
506 }
507 
508 version (btrfsUnittest)
509 unittest
510 {
511 	if (!checkBtrfs())
512 		return;
513 	import std.range, std.random, std.algorithm, std.file;
514 	import std.stdio : File;
515 	enum blockSize = 16*1024; // TODO: detect
516 	auto data = blockSize.iota.map!(n => uniform!ubyte).array();
517 	std.file.write("test1.bin", data);
518 	scope(exit) remove("test1.bin");
519 	std.file.write("test2.bin", data);
520 	scope(exit) remove("test2.bin");
521 
522 	{
523 		auto f1 = File("test1.bin", "r+b");
524 		auto f2 = File("test2.bin", "r+b");
525 		sameExtent([
526 			Extent(f1.fileno, 0),
527 			Extent(f2.fileno, 0),
528 		], blockSize);
529 	}
530 
531 	{
532 		data[0]++;
533 		std.file.write("test2.bin", data);
534 		auto f1 = File("test1.bin", "r+b");
535 		auto f2 = File("test2.bin", "r+b");
536 		assertThrown!Exception(sameExtent([
537 			Extent(f1.fileno, 0),
538 			Extent(f2.fileno, 0),
539 		], blockSize));
540 	}
541 }
542 
543 version (btrfsUnittest)
544 {
545 	import ae.sys.file;
546 	import std.stdio : stderr;
547 
548 	bool checkBtrfs(string moduleName = __MODULE__)()
549 	{
550 		auto fs = getPathFilesystem(".");
551 		if (fs != "btrfs")
552 		{
553 			stderr.writefln("Current filesystem is %s, not btrfs, skipping %s test.", fs, moduleName);
554 			return false;
555 		}
556 		return true;
557 	}
558 }