Permalink
Please sign in to comment.
Showing
with
20 additions
and 9,567 deletions.
- +6 −0 .gitmodules
- +8 −7 Makefile
- +1 −1 crates/binutils
- +1 −1 crates/coreutils
- +1 −0 crates/extra
- +1 −1 crates/extrautils
- +1 −1 crates/games
- +1 −0 crates/zfs
- +0 −126 crates/zfs/arcache.rs
- +0 −333 crates/zfs/avl.rs
- +0 −56 crates/zfs/block_ptr.rs
- +0 −1,935 crates/zfs/dmu_objset.rs
- +0 −85 crates/zfs/dnode.rs
- +0 −92 crates/zfs/dsl_dataset.rs
- +0 −37 crates/zfs/dsl_dir.rs
- +0 −17 crates/zfs/dsl_pool.rs
- +0 −42 crates/zfs/dvaddr.rs
- +0 −16 crates/zfs/from_bytes.rs
- +0 −147 crates/zfs/lzjb.rs
- +0 −621 crates/zfs/main.rs
- +0 −587 crates/zfs/metaslab.rs
- +0 −385 crates/zfs/nvpair.rs
- +0 −266 crates/zfs/nvstream.rs
- +0 −319 crates/zfs/spa.rs
- +0 −207 crates/zfs/space_map.rs
- +0 −371 crates/zfs/taskq.rs
- +0 −5 crates/zfs/txg.rs
- +0 −47 crates/zfs/uberblock.rs
- +0 −74 crates/zfs/util.rs
- +0 −506 crates/zfs/vdev.rs
- +0 −34 crates/zfs/vdev_file.rs
- +0 −1,011 crates/zfs/vdev_label.rs
- +0 −682 crates/zfs/vdev_queue.rs
- +0 −145 crates/zfs/xdr/mem_ops.rs
- +0 −5 crates/zfs/xdr/mod.rs
- +0 −219 crates/zfs/xdr/xdr.rs
- +0 −190 crates/zfs/zap.rs
- +0 −38 crates/zfs/zfs.rs
- +0 −8 crates/zfs/zil_header.rs
- +0 −950 crates/zfs/zio.rs
6
.gitmodules
15
Makefile
2
crates/binutils
| @@ -1 +1 @@ | ||
| -Subproject commit 3232642e98433e882148f296416023e1f22b9bda | ||
| +Subproject commit 5599724eab8b28705f6b2b66145fdcb7e4ce2d4d |
2
crates/coreutils
| @@ -1 +1 @@ | ||
| -Subproject commit 41eef0ff8a18f0011f373e6b78fb199a25ef2926 | ||
| +Subproject commit 3a666f3b7ddf682c342363d9583b887a360d3dab |
1
crates/extra
| @@ -0,0 +1 @@ | ||
| +Subproject commit dd01a09283df73e8e62a6fa59ede41897459dcbd |
2
crates/extrautils
| @@ -1 +1 @@ | ||
| -Subproject commit 90e803f249803d93e081b71c553177c4befd6f18 | ||
| +Subproject commit b1ebde2e1a5e3cac977d076d4df04b7a76f06ff3 |
2
crates/games
| @@ -1 +1 @@ | ||
| -Subproject commit 98ffb8e0a2c471252e5a922f8dd6a335388d7a10 | ||
| +Subproject commit eb52fcb69b59957bd3bfdf6a6ff37234788bf521 |
1
crates/zfs
| @@ -0,0 +1 @@ | ||
| +Subproject commit 066a57daef9f86c59018867d904e9fb15d3ddec7 |
126
crates/zfs/arcache.rs
| @@ -1,126 +0,0 @@ | ||
| -use std::collections::{BTreeMap, VecDeque}; | ||
| - | ||
| -use super::dvaddr::DVAddr; | ||
| -use super::zio; | ||
| - | ||
| -/// MRU - Most Recently Used cache | ||
| -struct Mru { | ||
| - map: BTreeMap<DVAddr, Vec<u8>>, | ||
| - queue: VecDeque<DVAddr>, // Oldest DVAddrs are at the end | ||
| - size: usize, // Max mru cache size in blocks | ||
| - used: usize, // Number of used blocks in mru cache | ||
| -} | ||
| - | ||
| -impl Mru { | ||
| - pub fn new() -> Self { | ||
| - Mru { | ||
| - map: BTreeMap::new(), | ||
| - queue: VecDeque::new(), | ||
| - size: 1000, | ||
| - used: 0, | ||
| - } | ||
| - } | ||
| - | ||
| - pub fn cache_block(&mut self, dva: &DVAddr, block: Vec<u8>) -> Result<Vec<u8>, String> { | ||
| - // If necessary, make room for the block in the cache | ||
| - while self.used + (dva.asize() as usize) > self.size { | ||
| - let last_dva = match self.queue.pop_back() { | ||
| - Some(dva) => dva, | ||
| - None => return Err("No more ARC MRU items to free".to_string()), | ||
| - }; | ||
| - self.map.remove(&last_dva); | ||
| - self.used -= last_dva.asize() as usize; | ||
| - } | ||
| - | ||
| - // Add the block to the cache | ||
| - self.used += dva.asize() as usize; | ||
| - self.map.insert(*dva, block); | ||
| - self.queue.push_front(*dva); | ||
| - Ok(self.map.get(dva).unwrap().clone()) | ||
| - } | ||
| -} | ||
| - | ||
| -/// MFU - Most Frequently Used cache | ||
| -struct Mfu { | ||
| - // TODO: Keep track of use counts. So mfu_map becomes (use_count: u64, Vec<u8>). Reset the use | ||
| - // count every once in a while. For instance, every 1000 reads. This will probably end up being | ||
| - // a knob for the user. | ||
| - // TODO: Keep track of minimum frequency and corresponding DVA | ||
| - map: BTreeMap<DVAddr, (u64, Vec<u8>)>, | ||
| - size: usize, // Max mfu cache size in blocks | ||
| - used: usize, // Number of used bytes in mfu cache | ||
| -} | ||
| - | ||
| -impl Mfu { | ||
| - pub fn new() -> Self { | ||
| - Mfu { | ||
| - map: BTreeMap::new(), | ||
| - size: 1000, | ||
| - used: 0, | ||
| - } | ||
| - } | ||
| - | ||
| - pub fn cache_block(&mut self, dva: &DVAddr, block: Vec<u8>) -> Result<Vec<u8>, String> { | ||
| - { | ||
| - let mut lowest_freq = ::std::u64::MAX; | ||
| - let mut lowest_dva: Result<DVAddr, String> = Err("No valid DVA found.".to_string()); | ||
| - | ||
| - for (&dva_key, &(freq, _)) in self.map.iter() { | ||
| - if freq < lowest_freq { | ||
| - lowest_freq = freq; | ||
| - lowest_dva = Ok(dva_key); | ||
| - } | ||
| - } | ||
| - | ||
| - self.map.remove(&try!(lowest_dva)); | ||
| - } | ||
| - | ||
| - // Add the block to the cache | ||
| - self.used += dva.asize() as usize; | ||
| - self.map.insert(*dva, (2, block)); | ||
| - Ok(self.map.get(dva).unwrap().1.clone()) | ||
| - } | ||
| -} | ||
| - | ||
| -// Our implementation of the Adaptive Replacement Cache (ARC) is set up to allocate | ||
| -// its buffer on the heap rather than in a private pool thing. This makes it much | ||
| -// simpler to implement, but defers the fragmentation problem to the heap allocator. | ||
| -// We named the type `ArCache` to avoid confusion with Rust's `Arc` reference type. | ||
| -pub struct ArCache { | ||
| - mru: Mru, | ||
| - mfu: Mfu, | ||
| -} | ||
| - | ||
| -impl ArCache { | ||
| - pub fn new() -> Self { | ||
| - ArCache { | ||
| - mru: Mru::new(), | ||
| - mfu: Mfu::new(), | ||
| - } | ||
| - } | ||
| - | ||
| - pub fn read(&mut self, reader: &mut zio::Reader, dva: &DVAddr) -> Result<Vec<u8>, String> { | ||
| - if let Some(block) = self.mru.map.remove(dva) { | ||
| - self.mfu.map.insert(*dva, (0, block.clone())); | ||
| - | ||
| - // Block is cached | ||
| - return Ok(block); | ||
| - } | ||
| - if let Some(block) = self.mfu.map.get_mut(dva) { | ||
| - // Block is cached | ||
| - if block.0 > 1000 { | ||
| - block.0 = 0; | ||
| - } else { | ||
| - block.0 += 1; | ||
| - } | ||
| - | ||
| - return Ok(block.1.clone()); | ||
| - } | ||
| - | ||
| - // Block isn't cached, have to read it from disk | ||
| - let block = reader.read(dva.sector() as usize, dva.asize() as usize); | ||
| - | ||
| - // Blocks start in MRU cache | ||
| - self.mru.cache_block(dva, block) | ||
| - } | ||
| -} |
333
crates/zfs/avl.rs
| @@ -1,333 +0,0 @@ | ||
| -use std::rc::Rc; | ||
| - | ||
| -pub struct Node<T> { | ||
| - value: T, | ||
| - left: Option<usize>, // ID for left node | ||
| - right: Option<usize>, // ID for right node | ||
| -} | ||
| - | ||
| -impl<T> Node<T> { | ||
| - pub fn value(&self) -> &T { | ||
| - &self.value | ||
| - } | ||
| - pub fn left<K>(&self, tree: &Tree<T, K>) -> Option<NodeId> { | ||
| - self.left.map(|l| { | ||
| - NodeId { | ||
| - index: l, | ||
| - time_stamp: tree.nodes[l].time_stamp, | ||
| - } | ||
| - }) | ||
| - } | ||
| - pub fn right<K>(&self, tree: &Tree<T, K>) -> Option<NodeId> { | ||
| - self.right.map(|r| { | ||
| - NodeId { | ||
| - index: r, | ||
| - time_stamp: tree.nodes[r].time_stamp, | ||
| - } | ||
| - }) | ||
| - } | ||
| -} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| -#[derive(Copy, Clone)] | ||
| -pub struct NodeId { | ||
| - index: usize, | ||
| - time_stamp: u64, | ||
| -} | ||
| - | ||
| -impl NodeId { | ||
| - pub fn get<'a, T, K>(&self, avl: &'a Tree<T, K>) -> &'a Node<T> { | ||
| - let ref slot = avl.nodes[self.index]; | ||
| - if slot.time_stamp == self.time_stamp { | ||
| - slot.node.as_ref().unwrap() | ||
| - } else { | ||
| - panic!("NodeId had invalid time_stamp"); | ||
| - } | ||
| - } | ||
| - | ||
| - pub fn try_get<'a, T, K>(&self, avl: &'a Tree<T, K>) -> Option<&'a Node<T>> { | ||
| - avl.nodes | ||
| - .get(self.index) | ||
| - .and_then(|slot| { | ||
| - if slot.time_stamp == self.time_stamp { | ||
| - slot.node.as_ref() | ||
| - } else { | ||
| - None | ||
| - } | ||
| - }) | ||
| - } | ||
| - | ||
| - pub fn get_mut<'a, T, K>(&self, avl: &'a mut Tree<T, K>) -> &'a mut Node<T> { | ||
| - let ref mut slot = avl.nodes[self.index]; | ||
| - if slot.time_stamp == self.time_stamp { | ||
| - slot.node.as_mut().unwrap() | ||
| - } else { | ||
| - panic!("NodeId had invalid time_stamp"); | ||
| - } | ||
| - } | ||
| - | ||
| - pub fn try_get_mut<'a, T, K>(&self, avl: &'a mut Tree<T, K>) -> Option<&'a mut Node<T>> { | ||
| - avl.nodes | ||
| - .get_mut(self.index) | ||
| - .and_then(|slot| { | ||
| - if slot.time_stamp == self.time_stamp { | ||
| - slot.node.as_mut() | ||
| - } else { | ||
| - None | ||
| - } | ||
| - }) | ||
| - } | ||
| -} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| - | ||
| -pub struct Tree<T, K> { | ||
| - root: Option<usize>, // Index of the root node | ||
| - nodes: Vec<Slot<T>>, | ||
| - free_list: Vec<usize>, | ||
| - key: Rc<Fn(&T) -> K>, | ||
| -} | ||
| - | ||
| -impl<T, K: PartialOrd> Tree<T, K> { | ||
| - pub fn new(key: Rc<Fn(&T) -> K>) -> Self { | ||
| - Tree { | ||
| - root: None, | ||
| - nodes: Vec::new(), | ||
| - free_list: Vec::new(), | ||
| - key: key, | ||
| - } | ||
| - } | ||
| - | ||
| - // Inserts a value into the tree, keeping it balanced. Lesser values will be stored on | ||
| - // the left, while greater values will be stored on the right. No duplicates are allowed. | ||
| - pub fn insert(&mut self, value: T) { | ||
| - let root = self.root; | ||
| - self.root = Some(self._insert(value, root)); | ||
| - } | ||
| - | ||
| - pub fn in_order<F: Fn(&Node<T>)>(&self, f: F) { | ||
| - if let Some(root) = self.root { | ||
| - self._in_order(&f, root); | ||
| - } | ||
| - } | ||
| - | ||
| - /// Good ol' binary search. Returns immutable reference | ||
| - pub fn find(&self, key: K) -> Option<&T> { | ||
| - let root = self.root; | ||
| - self._find(key, root) | ||
| - } | ||
| - | ||
| - /// Good ol' binary search. Returns a mutable reference | ||
| - pub fn find_mut(&mut self, key: K) -> Option<&mut T> { | ||
| - let root = self.root; | ||
| - self._find_mut(key, root) | ||
| - } | ||
| - | ||
| - // Implementation of insert | ||
| - fn _insert(&mut self, value: T, node: Option<usize>) -> usize { | ||
| - let node = match node { | ||
| - Some(node) => { | ||
| - // Node exists, check which way to branch. | ||
| - if (self.key)(&value) == (self.key)(&self.node(node).value) { | ||
| - return node; | ||
| - } else if (self.key)(&value) < (self.key)(&self.node(node).value) { | ||
| - let l = self.node(node).left; | ||
| - self.node_mut(node).left = Some(self._insert(value, l)); | ||
| - } else if (self.key)(&value) > (self.key)(&self.node(node).value) { | ||
| - let r = self.node(node).right; | ||
| - self.node_mut(node).right = Some(self._insert(value, r)); | ||
| - } | ||
| - | ||
| - node | ||
| - } | ||
| - None => { | ||
| - // The node doesn't exist, create it here. | ||
| - self.allocate_node(value) | ||
| - } | ||
| - }; | ||
| - | ||
| - self.rebalance(node) | ||
| - } | ||
| - | ||
| - pub fn _in_order<F: Fn(&Node<T>)>(&self, f: &F, node: usize) { | ||
| - if let Some(l) = self.node(node).left { | ||
| - self._in_order(f, l); | ||
| - } | ||
| - f(self.node(node)); | ||
| - if let Some(r) = self.node(node).right { | ||
| - self._in_order(f, r); | ||
| - } | ||
| - } | ||
| - | ||
| - pub fn _find(&self, key: K, node: Option<usize>) -> Option<&T> { | ||
| - node.and_then(|n| { | ||
| - if (self.key)(&self.node(n).value) < key { | ||
| - let left = self.node(n).left; | ||
| - self._find(key, left) | ||
| - } else if (self.key)(&self.node(n).value) > key { | ||
| - let right = self.node(n).right; | ||
| - self._find(key, right) | ||
| - } else { | ||
| - // Found it! | ||
| - Some(&self.node(n).value) | ||
| - } | ||
| - }) | ||
| - } | ||
| - | ||
| - pub fn _find_mut(&mut self, key: K, node: Option<usize>) -> Option<&mut T> { | ||
| - match node { | ||
| - Some(n) => { | ||
| - if (self.key)(&self.node(n).value) < key { | ||
| - let left = self.node(n).left; | ||
| - self._find_mut(key, left) | ||
| - } else if (self.key)(&self.node(n).value) > key { | ||
| - let right = self.node(n).right; | ||
| - self._find_mut(key, right) | ||
| - } else { | ||
| - // Found it! | ||
| - Some(&mut self.node_mut(n).value) | ||
| - } | ||
| - } | ||
| - None => None, | ||
| - } | ||
| - } | ||
| - | ||
| - // Performs a left rotation on a tree/subtree. | ||
| - // Returns the replace the specified node with | ||
| - fn rotate_left(&mut self, node: usize) -> usize { | ||
| - // Keep track of the original node positions | ||
| - // For a rotate left, the right child node must exist | ||
| - let r = self.node(node).right.unwrap(); | ||
| - let rl = self.node(r).left; | ||
| - | ||
| - let ret = r; | ||
| - self.node_mut(node).right = rl; | ||
| - self.node_mut(ret).left = Some(node); | ||
| - | ||
| - ret | ||
| - } | ||
| - | ||
| - // Performs a right rotation on a tree/subtree. | ||
| - // Returns the replace the specified node with | ||
| - fn rotate_right(&mut self, node: usize) -> usize { | ||
| - // Keep track of the original node positions | ||
| - // For a rotate right, the left child node must exist | ||
| - let l = self.node(node).left.unwrap(); | ||
| - let lr = self.node(l).right; | ||
| - | ||
| - let ret = l; | ||
| - self.node_mut(node).left = lr; | ||
| - self.node_mut(ret).right = Some(node); | ||
| - | ||
| - ret | ||
| - } | ||
| - | ||
| - // Performs a left-right double rotation on a tree/subtree. | ||
| - fn rotate_leftright(&mut self, node: usize) -> usize { | ||
| - let l = self.node(node).left.unwrap(); | ||
| - let new_l = self.rotate_left(l); // Left node needs to exist | ||
| - self.node_mut(node).left = Some(new_l); | ||
| - self.rotate_right(node) | ||
| - } | ||
| - | ||
| - // Performs a right-left double rotation on a tree/subtree. | ||
| - fn rotate_rightleft(&mut self, node: usize) -> usize { | ||
| - let r = self.node(node).right.unwrap(); | ||
| - let new_r = self.rotate_right(r); // Right node needs to exist | ||
| - self.node_mut(node).right = Some(new_r); | ||
| - self.rotate_left(node) | ||
| - } | ||
| - | ||
| - // Rebalances the provided node and returns the node to replace it with if rotations | ||
| - // occur | ||
| - fn rebalance(&mut self, node: usize) -> usize { | ||
| - let balance = self.height(self.node(node).left) - self.height(self.node(node).right); | ||
| - if balance == 2 { | ||
| - // left | ||
| - let lbalance = self.height(self.node(self.node(node).left.unwrap()).left) - | ||
| - self.height(self.node(self.node(node).left.unwrap()).right); | ||
| - if lbalance == 0 || lbalance == 1 { | ||
| - // left left - need to rotate right | ||
| - return self.rotate_right(node); | ||
| - } else if lbalance == -1 { | ||
| - // left right | ||
| - return self.rotate_leftright(node); // function name is just a coincidence | ||
| - } | ||
| - } else if balance == -2 { | ||
| - // right | ||
| - let rbalance = self.height(self.node(self.node(node).right.unwrap()).left) - | ||
| - self.height(self.node(self.node(node).right.unwrap()).right); | ||
| - if rbalance == 1 { | ||
| - // right left | ||
| - return self.rotate_rightleft(node); // function name is just a coincidence | ||
| - } else if rbalance == 0 || rbalance == -1 { | ||
| - // right right - need to rotate left | ||
| - return self.rotate_left(node); | ||
| - } | ||
| - } | ||
| - | ||
| - node | ||
| - } | ||
| - | ||
| - // height gets the height of a tree or subtree | ||
| - fn height(&self, node: Option<usize>) -> i64 { | ||
| - match node { | ||
| - Some(node) => { | ||
| - let left_height = self.height(self.node(node).left); | ||
| - let right_height = self.height(self.node(node).right); | ||
| - | ||
| - if left_height > right_height { | ||
| - left_height + 1 | ||
| - } else { | ||
| - right_height + 1 | ||
| - } | ||
| - } | ||
| - None => -1, | ||
| - } | ||
| - } | ||
| - | ||
| - fn allocate_node(&mut self, value: T) -> usize { | ||
| - match self.free_list.pop() { | ||
| - Some(index) => { | ||
| - self.nodes[index].time_stamp += 1; | ||
| - index | ||
| - } | ||
| - None => { | ||
| - // No free slots, create a new one | ||
| - let index = self.nodes.len(); | ||
| - self.nodes.push(Slot { | ||
| - time_stamp: 0, | ||
| - node: Some(Node { | ||
| - value: value, | ||
| - left: None, | ||
| - right: None, | ||
| - }), | ||
| - }); | ||
| - index | ||
| - } | ||
| - } | ||
| - } | ||
| - | ||
| - fn free_node(&mut self, index: usize) -> Node<T> { | ||
| - self.free_list.push(index); | ||
| - | ||
| - // NOTE: We unwrap here, because we trust that `id` points to a valid node, because | ||
| - // only we can create and free Nodes and their NodeIds | ||
| - self.nodes[index].node.take().unwrap() | ||
| - } | ||
| - | ||
| - fn node(&self, index: usize) -> &Node<T> { | ||
| - self.nodes[index].node.as_ref().unwrap() | ||
| - } | ||
| - | ||
| - fn node_mut(&mut self, index: usize) -> &mut Node<T> { | ||
| - self.nodes[index].node.as_mut().unwrap() | ||
| - } | ||
| -} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| - | ||
| -struct Slot<T> { | ||
| - time_stamp: u64, | ||
| - node: Option<Node<T>>, | ||
| -} |
56
crates/zfs/block_ptr.rs
| @@ -1,56 +0,0 @@ | ||
| -use super::from_bytes::FromBytes; | ||
| -use super::dvaddr::DVAddr; | ||
| - | ||
| -#[derive(Copy, Clone, Debug)] | ||
| -#[repr(packed)] | ||
| -pub struct BlockPtr { | ||
| - pub dvas: [DVAddr; 3], | ||
| - pub flags_size: u64, | ||
| - pub padding: [u64; 3], | ||
| - pub birth_txg: u64, | ||
| - pub fill_count: u64, | ||
| - pub checksum: [u64; 4], | ||
| -} | ||
| - | ||
| -impl BlockPtr { | ||
| - pub fn level(&self) -> u64 { | ||
| - (self.flags_size >> 56) & 0x7F | ||
| - } | ||
| - | ||
| - pub fn object_type(&self) -> u64 { | ||
| - (self.flags_size >> 48) & 0xFF | ||
| - } | ||
| - | ||
| - pub fn checksum(&self) -> u64 { | ||
| - (self.flags_size >> 40) & 0xFF | ||
| - } | ||
| - | ||
| - pub fn compression(&self) -> u64 { | ||
| - (self.flags_size >> 32) & 0xFF | ||
| - } | ||
| - | ||
| - pub fn lsize(&self) -> u64 { | ||
| - (self.flags_size & 0xFFFF) + 1 | ||
| - } | ||
| - | ||
| - pub fn psize(&self) -> u64 { | ||
| - ((self.flags_size >> 16) & 0xFFFF) + 1 | ||
| - } | ||
| -} | ||
| - | ||
| -impl FromBytes for BlockPtr {} | ||
| - | ||
| -#[derive(Copy, Clone, Debug)] | ||
| -#[repr(packed)] | ||
| -pub struct Gang { | ||
| - pub bps: [BlockPtr; 3], | ||
| - pub padding: [u64; 14], | ||
| - pub magic: u64, | ||
| - pub checksum: u64, | ||
| -} | ||
| - | ||
| -impl Gang { | ||
| - pub fn magic() -> u64 { | ||
| - return 0x117a0cb17ada1002; | ||
| - } | ||
| -} |
1,935
crates/zfs/dmu_objset.rs
0 additions,
1,935 deletions
not shown because the diff is too large. Please use a local Git client to view these changes.
85
crates/zfs/dnode.rs
| @@ -1,85 +0,0 @@ | ||
| -use std::fmt; | ||
| -use std::mem; | ||
| - | ||
| -use super::block_ptr::BlockPtr; | ||
| -use super::from_bytes::FromBytes; | ||
| -use super::zil_header::ZilHeader; | ||
| - | ||
| -#[repr(u8)] | ||
| -#[derive(Debug, Eq, PartialEq)] | ||
| -pub enum ObjectType { | ||
| - None, | ||
| - ObjectDirectory, | ||
| - ObjectArray, | ||
| - PackedNvList, | ||
| - NvListSize, | ||
| - BlockPtrList, | ||
| - BlockPtrListHdr, | ||
| - SpaceMapHeader, | ||
| - SpaceMap, | ||
| - IntentLog, | ||
| - DNode, | ||
| - ObjSet, | ||
| - DataSet, | ||
| - DataSetChildMap, | ||
| - ObjSetSnapMap, | ||
| - DslProps, | ||
| - DslObjSet, | ||
| - ZNode, | ||
| - Acl, | ||
| - PlainFileContents, | ||
| - DirectoryContents, | ||
| - MasterNode, | ||
| - DeleteQueue, | ||
| - ZVol, | ||
| - ZVolProp, | ||
| -} | ||
| - | ||
| -#[repr(packed)] | ||
| -pub struct DNodePhys { | ||
| - pub object_type: ObjectType, | ||
| - pub indblkshift: u8, // ln2(indirect block size) | ||
| - pub nlevels: u8, // 1=blkptr->data blocks | ||
| - pub nblkptr: u8, // length of blkptr | ||
| - pub bonus_type: u8, // type of data in bonus buffer | ||
| - pub checksum: u8, // ZIO_CHECKSUM type | ||
| - pub compress: u8, // ZIO_COMPRESS type | ||
| - pub flags: u8, // DNODE_FLAG_* | ||
| - pub data_blk_sz_sec: u16, // data block size in 512b sectors | ||
| - pub bonus_len: u16, // length of bonus | ||
| - pub pad2: [u8; 4], | ||
| - | ||
| - // accounting is protected by dirty_mtx | ||
| - pub maxblkid: u64, // largest allocated block ID | ||
| - pub used: u64, // bytes (or sectors) of disk space | ||
| - | ||
| - pub pad3: [u64; 4], | ||
| - | ||
| - blkptr_bonus: [u8; 448], | ||
| -} | ||
| - | ||
| -impl DNodePhys { | ||
| - pub fn get_blockptr<'a>(&self, i: usize) -> &'a BlockPtr { | ||
| - unsafe { mem::transmute(&self.blkptr_bonus[i * 128]) } | ||
| - } | ||
| - | ||
| - pub fn get_bonus(&self) -> &[u8] { | ||
| - &self.blkptr_bonus[(self.nblkptr as usize) * 128..] | ||
| - } | ||
| -} | ||
| - | ||
| -impl FromBytes for DNodePhys {} | ||
| - | ||
| -impl fmt::Debug for DNodePhys { | ||
| - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
| - try!(write!(f, | ||
| - "DNodePhys {{ object_type: {:?}, nlevels: {:X}, nblkptr: {:X}, bonus_type: \ | ||
| - {:X}, bonus_len: {:X}}}\n", | ||
| - self.object_type, | ||
| - self.nlevels, | ||
| - self.nblkptr, | ||
| - self.bonus_type, | ||
| - self.bonus_len)); | ||
| - Ok(()) | ||
| - } | ||
| -} |
92
crates/zfs/dsl_dataset.rs
| @@ -1,92 +0,0 @@ | ||
| -use super::block_ptr::BlockPtr; | ||
| -use super::from_bytes::FromBytes; | ||
| - | ||
| -#[repr(packed)] | ||
| -pub struct DslDatasetPhys { | ||
| - pub dir_obj: u64, // DMU_OT_DSL_DIR | ||
| - pub prev_snap_obj: u64, // DMU_OT_DSL_DATASET | ||
| - pub prev_snap_txg: u64, | ||
| - pub next_snap_obj: u64, // DMU_OT_DSL_DATASET | ||
| - pub snapnames_zapobj: u64, // DMU_OT_DSL_DS_SNAP_MAP 0 for snaps | ||
| - pub num_children: u64, // clone/snap children, ==0 for head | ||
| - pub creation_time: u64, // seconds since 1970 | ||
| - pub creation_txg: u64, | ||
| - pub deadlist_obj: u64, // DMU_OT_DEADLIST | ||
| - // ds_referenced_bytes, ds_compressed_bytes, and ds_uncompressed_bytes | ||
| - // include all blocks referenced by this dataset, including those | ||
| - // shared with any other datasets. | ||
| - // | ||
| - pub referenced_bytes: u64, | ||
| - pub compressed_bytes: u64, | ||
| - pub uncompressed_bytes: u64, | ||
| - pub unique_bytes: u64, // only relevant to snapshots | ||
| - // The ds_fsid_guid is a 56-bit ID that can change to avoid | ||
| - // collisions. The ds_guid is a 64-bit ID that will never | ||
| - // change, so there is a small probability that it will collide. | ||
| - // | ||
| - pub fsid_guid: u64, | ||
| - pub guid: u64, | ||
| - pub flags: u64, // DS_FLAG_* | ||
| - pub bp: BlockPtr, | ||
| - pub next_clones_obj: u64, // DMU_OT_DSL_CLONES | ||
| - pub props_obj: u64, // DMU_OT_DSL_PROPS for snaps | ||
| - pub userrefs_obj: u64, // DMU_OT_USERREFS | ||
| - pad: [u64; 5], // pad out to 320 bytes for good measure | ||
| -} | ||
| - | ||
| -impl FromBytes for DslDatasetPhys {} | ||
| - | ||
| -//------------------------------------------------------------------------------------------------// | ||
| - | ||
| -// struct DslDataset { | ||
| -// dmu_buf_user_t ds_dbu, | ||
| -// | ||
| -// Immutable: | ||
| -// dsl_dir *ds_dir, | ||
| -// dmu_buf_t *ds_dbuf, | ||
| -// object: u64, | ||
| -// fsid_guid: u64, | ||
| -// is_snapshot: bool, | ||
| -// | ||
| -// only used in syncing context, only valid for non-snapshots: | ||
| -// dsl_dataset *ds_prev, | ||
| -// bookmarks: u64, // DMU_OTN_ZAP_METADATA | ||
| -// large_blocks: bool, | ||
| -// need_large_blocks: bool, | ||
| -// | ||
| -// has internal locking: | ||
| -// dsl_deadlist_t ds_deadlist, | ||
| -// bplist_t ds_pending_deadlist, | ||
| -// | ||
| -// protected by lock on pool's dp_dirty_datasets list | ||
| -// txg_node_t ds_dirty_link, | ||
| -// list_node_t ds_synced_link, | ||
| -// | ||
| -// ds_phys->ds_<accounting> is also protected by ds_lock. | ||
| -// Protected by ds_lock: | ||
| -// kmutex_t ds_lock, | ||
| -// objset_t *ds_objset, | ||
| -// ds_userrefs: u64, | ||
| -// void *ds_owner, | ||
| -// | ||
| -// Long holds prevent the ds from being destroyed, they allow the | ||
| -// ds to remain held even after dropping the dp_config_rwlock. | ||
| -// Owning counts as a long hold. See the comments above | ||
| -// dsl_pool_hold() for details. | ||
| -// refcount_t ds_longholds, | ||
| -// | ||
| -// no locking, only for making guesses | ||
| -// ds_trysnap_txg: u64, | ||
| -// | ||
| -// for objset_open() | ||
| -// kmutex_t ds_opening_lock, | ||
| -// | ||
| -// ds_reserved: u64, // cached refreservation | ||
| -// ds_quota: u64, // cached refquota | ||
| -// | ||
| -// kmutex_t ds_sendstream_lock, | ||
| -// list_t ds_sendstreams, | ||
| -// | ||
| -// Protected by ds_lock, keep at end of struct for better locality | ||
| -// char ds_snapname[MAXNAMELEN], | ||
| -// } |
37
crates/zfs/dsl_dir.rs
| @@ -1,37 +0,0 @@ | ||
| -use super::from_bytes::FromBytes; | ||
| - | ||
| -const DD_USED_NUM: usize = 5; // The number of variants in DslDirUsed | ||
| - | ||
| -pub enum DslDirUsed { | ||
| - Head = 0, | ||
| - Snap, | ||
| - Child, | ||
| - ChildReserve, | ||
| - RefReserve, | ||
| -} | ||
| - | ||
| -#[repr(packed)] | ||
| -pub struct DslDirPhys { | ||
| - pub creation_time: u64, // not actually used | ||
| - pub head_dataset_obj: u64, | ||
| - pub parent_obj: u64, | ||
| - pub origin_obj: u64, | ||
| - pub child_dir_zapobj: u64, | ||
| - // how much space our children are accounting for, for leaf | ||
| - // datasets, == physical space used by fs + snaps | ||
| - pub used_bytes: u64, | ||
| - pub compressed_bytes: u64, | ||
| - pub uncompressed_bytes: u64, | ||
| - // Administrative quota setting | ||
| - pub quota: u64, | ||
| - // Administrative reservation setting | ||
| - pub reserved: u64, | ||
| - pub props_zapobj: u64, | ||
| - pub deleg_zapobj: u64, // dataset delegation permissions | ||
| - pub flags: u64, | ||
| - pub used_breakdown: [u64; DD_USED_NUM], | ||
| - pub clones: u64, // dsl_dir objects | ||
| - pub pad: [u64; 13], // pad out to 256 bytes for good measure | ||
| -} | ||
| - | ||
| -impl FromBytes for DslDirPhys {} |
17
crates/zfs/dsl_pool.rs
| @@ -1,17 +0,0 @@ | ||
| -use super::spa; | ||
| -use super::zfs; | ||
| - | ||
| -pub struct DslPool { | ||
| - // Immutable | ||
| - root_dir_obj: u64, | ||
| -} | ||
| - | ||
| -impl DslPool { | ||
| - pub fn init(spa: &mut spa::Spa, txg: u64) -> zfs::Result<Self> { | ||
| - Self::open_impl(spa, txg) | ||
| - } | ||
| - | ||
| - fn open_impl(spa: &mut spa::Spa, txg: u64) -> zfs::Result<Self> { | ||
| - Ok(DslPool { root_dir_obj: 0 }) | ||
| - } | ||
| -} |
42
crates/zfs/dvaddr.rs
| @@ -1,42 +0,0 @@ | ||
| -use std::fmt; | ||
| - | ||
| -#[derive(Copy, Clone, Eq, Hash, Ord, PartialEq, PartialOrd)] | ||
| -#[repr(packed)] | ||
| -pub struct DVAddr { | ||
| - pub vdev: u64, | ||
| - pub offset: u64, | ||
| -} | ||
| - | ||
| -impl DVAddr { | ||
| - /// Sector address is the offset plus two vdev labels and one boot block (4 MB, or 8192 sectors) | ||
| - pub fn sector(&self) -> u64 { | ||
| - self.offset() + 0x2000 | ||
| - } | ||
| - | ||
| - pub fn gang(&self) -> bool { | ||
| - if self.offset & 0x8000000000000000 == 1 { | ||
| - true | ||
| - } else { | ||
| - false | ||
| - } | ||
| - } | ||
| - | ||
| - pub fn offset(&self) -> u64 { | ||
| - self.offset & 0x7FFFFFFFFFFFFFFF | ||
| - } | ||
| - | ||
| - pub fn asize(&self) -> u64 { | ||
| - (self.vdev & 0xFFFFFF) + 1 | ||
| - } | ||
| -} | ||
| - | ||
| -impl fmt::Debug for DVAddr { | ||
| - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
| - try!(write!(f, | ||
| - "DVAddr {{ offset: {:X}, gang: {}, asize: {:X} }}\n", | ||
| - self.offset(), | ||
| - self.gang(), | ||
| - self.asize())); | ||
| - Ok(()) | ||
| - } | ||
| -} |
16
crates/zfs/from_bytes.rs
| @@ -1,16 +0,0 @@ | ||
| -use std::{mem, ptr}; | ||
| - | ||
| -pub trait FromBytes: Sized { | ||
| - fn from_bytes(data: &[u8]) -> Result<Self, String> { | ||
| - if data.len() >= mem::size_of::<Self>() { | ||
| - let s = unsafe { ptr::read(data.as_ptr() as *const Self) }; | ||
| - Ok(s) | ||
| - } else { | ||
| - Err(format!("Error: bytes length of {} not long enough for the byte size of {}", | ||
| - data.len(), | ||
| - mem::size_of::<Self>())) | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -impl FromBytes for u64 {} |
147
crates/zfs/lzjb.rs
| @@ -1,147 +0,0 @@ | ||
| -const NBBY: usize = 8; // Number of bits per byte | ||
| -const MATCH_BITS: usize = 6; | ||
| -const MATCH_MIN: usize = 3; | ||
| -const MATCH_MAX: usize = ((1 << MATCH_BITS) + (MATCH_MIN - 1)); | ||
| -const OFFSET_MASK: usize = ((1 << (16 - MATCH_BITS)) - 1); | ||
| -const LEMPEL_SIZE: usize = 1024; | ||
| - | ||
| -/// LZJB compress the bytes in `src` into `dst` | ||
| -pub fn compress(src: &[u8], dst: &mut [u8]) -> usize { | ||
| - let mut src_i = 0; // Current index in src | ||
| - let mut dst_i = 0; // Current index in dst | ||
| - | ||
| - // We place 1 extra byte preceding every 8 bytes. Each bit in this byte is | ||
| - // a flag that corresponds to one of the 8 bytes that delimit it. If the | ||
| - // flag is set, the byte is a copy item. If the flag is 0, it is a literal | ||
| - // item. We'll call this the copy flag. | ||
| - | ||
| - // Stores the index of the current copy flag in dst | ||
| - let mut copymap = 0; | ||
| - | ||
| - // The current bit in the byte pointed at by `copymap` | ||
| - let mut copymask: usize = 1 << (NBBY - 1); | ||
| - | ||
| - // This is our cache | ||
| - let mut lempel = [0usize; LEMPEL_SIZE]; | ||
| - | ||
| - while src_i < src.len() { | ||
| - copymask <<= 1; | ||
| - if copymask == (1 << NBBY) { | ||
| - // We've reached the end of our 8-byte cycle | ||
| - if dst_i >= dst.len() - 1 - 2 * NBBY { | ||
| - // If we've reached the last two bytes, we're done | ||
| - return src.len(); | ||
| - } | ||
| - // Not done yet, reset the cycle | ||
| - copymask = 1; | ||
| - copymap = dst_i; // Point to our new copy flag byte | ||
| - dst[dst_i] = 0; // Place the new (initially clear) copy flag byte | ||
| - dst_i += 1; | ||
| - } | ||
| - | ||
| - if src_i > src.len() - MATCH_MAX { | ||
| - // Nearing the end of the data, don't bother searching for matches, | ||
| - // just copy. | ||
| - dst[dst_i] = src[src_i]; | ||
| - src_i += 1; | ||
| - dst_i += 1; | ||
| - continue; | ||
| - } | ||
| - | ||
| - // Compute hash of current 3 byte slice. It will be the index to our | ||
| - // cache | ||
| - let mut hash = ((src[src_i] as usize) << 16) + ((src[src_i + 1] as usize) << 8) + | ||
| - (src[src_i + 2] as usize); | ||
| - hash += hash >> 9; | ||
| - hash += hash >> 5; | ||
| - let hp = (hash as usize) & (LEMPEL_SIZE - 1); | ||
| - | ||
| - // Look up the current 3 byte slice in the cache. We'll verify that it's | ||
| - // a valid entry later. | ||
| - let offset = (src_i - lempel[hp]) & OFFSET_MASK; | ||
| - let cpy = src_i - offset; | ||
| - | ||
| - // Set the current 3 byte slice as the most recent sighting of it in the | ||
| - // cache | ||
| - lempel[hp] = src_i; | ||
| - | ||
| - // Check that the cached item is valid | ||
| - if src_i >= offset && cpy != src_i && src[src_i] == src[cpy] && | ||
| - src[src_i + 1] == src[cpy + 1] && src[src_i + 2] == src[cpy + 2] { | ||
| - // This cache item is valid, write a copy item | ||
| - dst[copymap] |= copymask as u8; // Set the | ||
| - | ||
| - // Find the full length of this match. Since it was in the hash, | ||
| - // we know the match length is at least 3. | ||
| - let mut mlen = MATCH_MIN; | ||
| - while mlen < MATCH_MAX { | ||
| - if src[src_i + mlen] != src[cpy + mlen] { | ||
| - break; | ||
| - } | ||
| - mlen += 1; | ||
| - } | ||
| - | ||
| - // Place the match length portion of the copy item | ||
| - dst[dst_i] = (((mlen - MATCH_MIN) << (NBBY - MATCH_BITS)) | (offset >> NBBY)) as u8; | ||
| - dst_i += 1; | ||
| - | ||
| - // Place the offset portion of the copy item | ||
| - dst[dst_i] = offset as u8; | ||
| - dst_i += 1; | ||
| - | ||
| - // Now we get to skip the repeated sequence! | ||
| - src_i += mlen; | ||
| - } else { | ||
| - // Not a real cache entry, don't make a copy item | ||
| - dst[dst_i] = src[src_i]; | ||
| - dst_i += 1; | ||
| - src_i += 1; | ||
| - } | ||
| - } | ||
| - | ||
| - return dst_i; | ||
| -} | ||
| - | ||
| -pub fn decompress(src: &[u8], dst: &mut [u8]) -> bool { | ||
| - let mut src_i = 0; | ||
| - let mut dst_i = 0; | ||
| - let mut copymap: u8 = 0; | ||
| - let mut copymask: usize = 1 << (NBBY - 1); | ||
| - | ||
| - while dst_i < dst.len() { | ||
| - copymask <<= 1; | ||
| - if copymask == (1 << NBBY) { | ||
| - // Finished another 8-byte loop, repeat | ||
| - copymask = 1; // Reset the copy mask | ||
| - copymap = src[src_i]; // Current byte is the new copymap | ||
| - src_i += 1; | ||
| - } | ||
| - if (copymap & (copymask as u8)) != 0 { | ||
| - // Found a copy item | ||
| - let mlen = ((src[src_i] as usize) >> (NBBY - MATCH_BITS)) + MATCH_MIN; | ||
| - let offset = (((src[src_i] as usize) << NBBY) | (src[src_i + 1] as usize)) & | ||
| - OFFSET_MASK; | ||
| - src_i += 2; | ||
| - if dst_i < offset { | ||
| - // Copy item points to invalid index, error | ||
| - return false; | ||
| - } | ||
| - let mut cpy = dst_i - offset; | ||
| - for _ in 0..mlen { | ||
| - if dst_i >= dst.len() { | ||
| - // Reached the end of the destination buffer, can't copy anymore | ||
| - break; | ||
| - } | ||
| - dst[dst_i] = dst[cpy]; | ||
| - dst_i += 1; | ||
| - cpy += 1; | ||
| - } | ||
| - } else { | ||
| - // It's a literal item, copy it directly | ||
| - dst[dst_i] = src[src_i]; | ||
| - dst_i += 1; | ||
| - src_i += 1; | ||
| - } | ||
| - } | ||
| - return true; | ||
| -} |
621
crates/zfs/main.rs
| @@ -1,621 +0,0 @@ | ||
| -// To use this, please install zfs-fuse | ||
| -use std::{mem, str}; | ||
| -use std::fs::File; | ||
| -use std::io::{Read, Write, stdin, stdout}; | ||
| -use std::rc::Rc; | ||
| - | ||
| -use self::arcache::ArCache; | ||
| -use self::dnode::{DNodePhys, ObjectType}; | ||
| -use self::dmu_objset::ObjectSetPhys; | ||
| -use self::block_ptr::BlockPtr; | ||
| -use self::dsl_dataset::DslDatasetPhys; | ||
| -use self::dsl_dir::DslDirPhys; | ||
| -use self::from_bytes::FromBytes; | ||
| -use self::nvpair::NvValue; | ||
| -use self::space_map::SpaceMapPhys; | ||
| -use self::uberblock::Uberblock; | ||
| -use self::vdev::VdevLabel; | ||
| - | ||
| -macro_rules! readln { | ||
| - () => ({ | ||
| - let mut buffer = String::new(); | ||
| - match stdin().read_line(&mut buffer) { | ||
| - Ok(_) => Some(buffer), | ||
| - Err(_) => None | ||
| - } | ||
| - }); | ||
| -} | ||
| - | ||
| -pub mod arcache; | ||
| -pub mod avl; | ||
| -pub mod block_ptr; | ||
| -pub mod dmu_objset; | ||
| -pub mod dnode; | ||
| -pub mod dsl_dataset; | ||
| -pub mod dsl_dir; | ||
| -pub mod dsl_pool; | ||
| -pub mod dvaddr; | ||
| -pub mod from_bytes; | ||
| -pub mod lzjb; | ||
| -pub mod metaslab; | ||
| -pub mod nvpair; | ||
| -pub mod nvstream; | ||
| -pub mod spa; | ||
| -pub mod space_map; | ||
| -pub mod taskq; | ||
| -pub mod txg; | ||
| -pub mod uberblock; | ||
| -pub mod util; | ||
| -pub mod vdev; | ||
| -pub mod vdev_file; | ||
| -pub mod xdr; | ||
| -pub mod zap; | ||
| -pub mod zfs; | ||
| -pub mod zil_header; | ||
| -pub mod zio; | ||
| - | ||
| -pub struct ZfsReader { | ||
| - pub zio: zio::Reader, | ||
| - pub arc: ArCache, | ||
| -} | ||
| - | ||
| -impl ZfsReader { | ||
| - pub fn read_block(&mut self, block_ptr: &BlockPtr) -> Result<Vec<u8>, String> { | ||
| - let data = self.arc.read(&mut self.zio, &block_ptr.dvas[0]); | ||
| - match block_ptr.compression() { | ||
| - 2 => { | ||
| - // compression off | ||
| - data | ||
| - } | ||
| - 1 | 3 => { | ||
| - // lzjb compression | ||
| - let mut decompressed = vec![0; (block_ptr.lsize()*512) as usize]; | ||
| - lzjb::decompress(&match data { | ||
| - Ok(data) => data, | ||
| - Err(e) => return Err(e), | ||
| - }, | ||
| - &mut decompressed); | ||
| - Ok(decompressed) | ||
| - } | ||
| - u => Err(format!("Error: Unknown compression type {}", u)), | ||
| - } | ||
| - } | ||
| - | ||
| - pub fn read_type<T: FromBytes>(&mut self, block_ptr: &BlockPtr) -> Result<T, String> { | ||
| - let data = self.read_block(block_ptr); | ||
| - data.and_then(|data| T::from_bytes(&data[..])) | ||
| - } | ||
| - | ||
| - pub fn read_type_array<T: FromBytes>(&mut self, | ||
| - block_ptr: &BlockPtr, | ||
| - offset: usize) | ||
| - -> Result<T, String> { | ||
| - let data = self.read_block(block_ptr); | ||
| - data.and_then(|data| T::from_bytes(&data[offset * mem::size_of::<T>()..])) | ||
| - } | ||
| - | ||
| - pub fn uber(&mut self, _: &[u8]) -> Result<Uberblock, String> { | ||
| - let mut newest_uberblock: Option<Uberblock> = None; | ||
| - for i in 0..128 { | ||
| - // let ub_len = 2*512; | ||
| - // let ub_start = i * ub_len; | ||
| - // let ub_end = ub_start + ub_len; | ||
| - // if let Ok(uberblock) = Uberblock::from_bytes(&uberblocks[ub_start..ub_end]) { | ||
| - if let Ok(uberblock) = Uberblock::from_bytes(&self.zio.read(256 + i * 2, 2)) { | ||
| - let newest = match newest_uberblock { | ||
| - Some(previous) => { | ||
| - if uberblock.txg > previous.txg { | ||
| - // Found a newer uberblock | ||
| - true | ||
| - } else { | ||
| - false | ||
| - } | ||
| - } | ||
| - // No uberblock yet, so first one we find is the newest | ||
| - None => true, | ||
| - }; | ||
| - | ||
| - if newest { | ||
| - newest_uberblock = Some(uberblock); | ||
| - } | ||
| - } | ||
| - } | ||
| - | ||
| - match newest_uberblock { | ||
| - Some(uberblock) => Ok(uberblock), | ||
| - None => Err("Failed to find valid uberblock".to_string()), | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -#[derive(Copy, Clone, PartialEq)] | ||
| -pub enum ZfsTraverse { | ||
| - ThisDir, | ||
| - Done, | ||
| -} | ||
| - | ||
| -pub struct Zfs { | ||
| - pub reader: ZfsReader, | ||
| - pub uberblock: Uberblock, // The active uberblock | ||
| - pub mos: ObjectSetPhys, | ||
| - fs_objset: ObjectSetPhys, | ||
| - master_node: DNodePhys, | ||
| - root: u64, | ||
| -} | ||
| - | ||
| -impl Zfs { | ||
| - pub fn new(disk: File) -> Result<Self, String> { | ||
| - let mut zfs_reader = ZfsReader { | ||
| - zio: zio::Reader { disk: disk }, | ||
| - arc: ArCache::new(), | ||
| - }; | ||
| - | ||
| - // Read vdev label | ||
| - // let vdev_label = Box::new(try!(VdevLabel::from_bytes(&zfs_reader.zio.read(0, 256 * 2)))); | ||
| - // let mut xdr = xdr::MemOps::new(&mut vdev_label.nv_pairs); | ||
| - // let nv_list = try!(nvstream::decode_nv_list(&mut xdr).map_err(|e| format!("{:?}", e))); | ||
| - // let vdev_tree = | ||
| - // match nv_list.find("vdev_tree") { | ||
| - // Some(vdev_tree) => { | ||
| - // vdev_tree | ||
| - // }, | ||
| - // None => { | ||
| - // return Err("No vdev_tree in vdev label nvpairs".to_string()); | ||
| - // }, | ||
| - // }; | ||
| - // | ||
| - // let vdev_tree = | ||
| - // if let NvValue::NvList(ref vdev_tree) = *vdev_tree { | ||
| - // vdev_tree | ||
| - // } else { | ||
| - // return Err("vdev_tree is not NvValue::NvList".to_string()); | ||
| - // }; | ||
| - | ||
| - | ||
| - // Get the active uberblock | ||
| - // let uberblock = try!(zfs_reader.uber(&vdev_label.uberblocks)); | ||
| - let uberblock = try!(zfs_reader.uber(&[])); | ||
| - | ||
| - // let mos_dva = uberblock.rootbp.dvas[0]; | ||
| - let mos: ObjectSetPhys = try!(zfs_reader.read_type(&uberblock.rootbp)); | ||
| - let mos_bp1 = mos.meta_dnode.get_blockptr(0); | ||
| - | ||
| - // 2nd dnode in MOS points at the root dataset zap | ||
| - let dnode1: DNodePhys = try!(zfs_reader.read_type_array(&mos_bp1, 1)); | ||
| - | ||
| - let root_ds_bp = dnode1.get_blockptr(0); | ||
| - let root_ds: zap::MZapWrapper = try!(zfs_reader.read_type(root_ds_bp)); | ||
| - | ||
| - let root_ds_dnode: DNodePhys = | ||
| - try!(zfs_reader.read_type_array(&mos_bp1, root_ds.chunks[0].value as usize)); | ||
| - | ||
| - let dsl_dir = try!(DslDirPhys::from_bytes(root_ds_dnode.get_bonus())); | ||
| - let head_ds_dnode: DNodePhys = | ||
| - try!(zfs_reader.read_type_array(&mos_bp1, dsl_dir.head_dataset_obj as usize)); | ||
| - | ||
| - let root_dataset = try!(DslDatasetPhys::from_bytes(head_ds_dnode.get_bonus())); | ||
| - | ||
| - let fs_objset: ObjectSetPhys = try!(zfs_reader.read_type(&root_dataset.bp)); | ||
| - | ||
| - let mut indirect: BlockPtr = try!(zfs_reader.read_type_array(fs_objset.meta_dnode | ||
| - .get_blockptr(0), | ||
| - 0)); | ||
| - while indirect.level() > 0 { | ||
| - indirect = try!(zfs_reader.read_type_array(&indirect, 0)); | ||
| - } | ||
| - | ||
| - // Master node is always the second object in the object set | ||
| - let master_node: DNodePhys = try!(zfs_reader.read_type_array(&indirect, 1)); | ||
| - let master_node_zap: zap::MZapWrapper = | ||
| - try!(zfs_reader.read_type(master_node.get_blockptr(0))); | ||
| - | ||
| - // Find the ROOT zap entry | ||
| - let mut root = None; | ||
| - for chunk in &master_node_zap.chunks { | ||
| - if chunk.name() == Some("ROOT") { | ||
| - root = Some(chunk.value); | ||
| - break; | ||
| - } | ||
| - } | ||
| - | ||
| - let root = match root { | ||
| - Some(root) => Ok(root), | ||
| - None => Err("Error: failed to get the ROOT".to_string()), | ||
| - }; | ||
| - | ||
| - Ok(Zfs { | ||
| - reader: zfs_reader, | ||
| - uberblock: uberblock, | ||
| - mos: mos, | ||
| - fs_objset: fs_objset, | ||
| - master_node: master_node, | ||
| - root: try!(root), | ||
| - }) | ||
| - } | ||
| - | ||
| - pub fn traverse<F, T>(&mut self, mut f: F) -> Option<T> | ||
| - where F: FnMut(&mut Self, | ||
| - &str, | ||
| - usize, | ||
| - &mut DNodePhys, | ||
| - &BlockPtr, | ||
| - &mut Option<T>) | ||
| - -> Option<ZfsTraverse> | ||
| - { | ||
| - // Given the fs_objset and the object id of the root directory, we can traverse the | ||
| - // directory tree. | ||
| - // TODO: Cache object id of paths | ||
| - // TODO: Calculate path through objset blockptr tree to use | ||
| - let mut indirect: BlockPtr = self.reader | ||
| - .read_type_array(self.fs_objset | ||
| - .meta_dnode | ||
| - .get_blockptr(0), | ||
| - 0) | ||
| - .unwrap(); | ||
| - while indirect.level() > 0 { | ||
| - indirect = self.reader.read_type_array(&indirect, 0).unwrap(); | ||
| - } | ||
| - // Set the cur_node to the root node, located at an L0 indirect block | ||
| - let root = self.root as usize; | ||
| - let mut cur_node: DNodePhys = self.reader | ||
| - .read_type_array(&indirect, self.root as usize) | ||
| - .unwrap(); | ||
| - let mut result = None; | ||
| - if f(self, "", root, &mut cur_node, &indirect, &mut result) == Some(ZfsTraverse::Done) { | ||
| - return result; | ||
| - } | ||
| - 'traverse: loop { | ||
| - // Directory dnodes point at zap objects. File/directory names are mapped to their | ||
| - // fs_objset object ids. | ||
| - let dir_contents: zap::MZapWrapper = self.reader | ||
| - .read_type(cur_node.get_blockptr(0)) | ||
| - .unwrap(); | ||
| - let mut next_dir = None; | ||
| - for chunk in &dir_contents.chunks { | ||
| - match chunk.name() { | ||
| - Some(chunk_name) => { | ||
| - // Stop once we get to a null entry | ||
| - if chunk_name.is_empty() { | ||
| - break; | ||
| - } | ||
| - | ||
| - let traverse = f(self, | ||
| - chunk_name, | ||
| - chunk.value as usize, | ||
| - &mut cur_node, | ||
| - &indirect, | ||
| - &mut result); | ||
| - if let Some(traverse) = traverse { | ||
| - match traverse { | ||
| - ZfsTraverse::ThisDir => { | ||
| - // Found the folder we were looking for | ||
| - next_dir = Some(chunk.value); | ||
| - break; | ||
| - } | ||
| - ZfsTraverse::Done => { | ||
| - break 'traverse; | ||
| - } | ||
| - } | ||
| - } | ||
| - } | ||
| - None => { | ||
| - // Invalid directory name | ||
| - return None; | ||
| - } | ||
| - } | ||
| - } | ||
| - if next_dir.is_none() { | ||
| - break; | ||
| - } | ||
| - } | ||
| - result | ||
| - } | ||
| - | ||
| - pub fn read_file(&mut self, path: &str) -> Option<Vec<u8>> { | ||
| - let path = path.trim_matches('/'); // Robust against different url styles | ||
| - let path_end_index = path.rfind('/').map(|i| i + 1).unwrap_or(0); | ||
| - let path_end = &path[path_end_index..]; | ||
| - let mut folder_iter = path.split('/'); | ||
| - let mut folder = folder_iter.next(); | ||
| - | ||
| - let file_contents = self.traverse(|zfs, name, node_id, node, indirect, result| { | ||
| - let mut this_dir = false; | ||
| - if let Some(folder) = folder { | ||
| - if name == folder { | ||
| - *node = zfs.reader | ||
| - .read_type_array(indirect, node_id as usize) | ||
| - .unwrap(); | ||
| - if name == path_end { | ||
| - if node.object_type != ObjectType::PlainFileContents { | ||
| - // Not a file | ||
| - return Some(ZfsTraverse::Done); | ||
| - } | ||
| - // Found the file | ||
| - let file_contents = zfs.reader | ||
| - .read_block(node.get_blockptr(0)) | ||
| - .unwrap(); | ||
| - // TODO: Read file size from ZPL rather than look for terminating 0 | ||
| - let file_contents: Vec<u8> = file_contents.into_iter() | ||
| - .take_while(|c| *c != 0) | ||
| - .collect(); | ||
| - *result = Some(file_contents); | ||
| - return Some(ZfsTraverse::Done); | ||
| - } | ||
| - this_dir = true; | ||
| - } | ||
| - } | ||
| - if this_dir { | ||
| - if node.object_type != ObjectType::DirectoryContents { | ||
| - // Not a folder | ||
| - return Some(ZfsTraverse::Done); | ||
| - } | ||
| - folder = folder_iter.next(); | ||
| - return Some(ZfsTraverse::ThisDir); | ||
| - } | ||
| - None | ||
| - }); | ||
| - | ||
| - file_contents | ||
| - } | ||
| - | ||
| - pub fn ls(&mut self, path: &str) -> Option<Vec<String>> { | ||
| - let path = path.trim_matches('/'); // Robust against different url styles | ||
| - let path_end_index = path.rfind('/').map(|i| i + 1).unwrap_or(0); | ||
| - let path_end = &path[path_end_index..]; | ||
| - let mut folder_iter = path.split('/'); | ||
| - let mut folder = folder_iter.next(); | ||
| - | ||
| - let file_contents = self.traverse(|zfs, name, node_id, node, indirect, result| { | ||
| - let mut this_dir = false; | ||
| - if let Some(folder) = folder { | ||
| - if name == folder { | ||
| - if folder == path_end { | ||
| - *node = zfs.reader | ||
| - .read_type_array(indirect, node_id as usize) | ||
| - .unwrap(); | ||
| - let dir_contents: zap::MZapWrapper = zfs.reader | ||
| - .read_type(node.get_blockptr(0)) | ||
| - .unwrap(); | ||
| - | ||
| - let ls: Vec<String> = dir_contents.chunks | ||
| - .iter() | ||
| - .map(|x| { | ||
| - if x.value & 0xF000000000000000 == | ||
| - 0x4000000000000000 { | ||
| - x.name().unwrap().to_string() + | ||
| - "/" | ||
| - } else { | ||
| - x.name().unwrap().to_string() | ||
| - } | ||
| - }) | ||
| - .take_while(|x| !x.is_empty()) | ||
| - .collect(); | ||
| - *result = Some(ls); | ||
| - return Some(ZfsTraverse::Done); | ||
| - } | ||
| - this_dir = true; | ||
| - } | ||
| - } | ||
| - if this_dir { | ||
| - folder = folder_iter.next(); | ||
| - return Some(ZfsTraverse::ThisDir); | ||
| - } | ||
| - None | ||
| - }); | ||
| - | ||
| - file_contents | ||
| - } | ||
| -} | ||
| - | ||
| -// TODO: Find a way to remove all the to_string's | ||
| -fn main() { | ||
| - println!("Type open zfs.img to open the image file"); | ||
| - | ||
| - let mut zfs_option: Option<Zfs> = None; | ||
| - | ||
| - 'reading: loop { | ||
| - print!("# "); | ||
| - stdout().flush(); | ||
| - | ||
| - if let Some(line) = readln!() { | ||
| - let args: Vec<String> = line.trim().split(' ').map(|arg| arg.to_string()).collect(); | ||
| - | ||
| - if let Some(command) = args.get(0) { | ||
| - let mut close = false; | ||
| - match zfs_option { | ||
| - Some(ref mut zfs) => { | ||
| - if command == "uber" { | ||
| - let ref uberblock = zfs.uberblock; | ||
| - // 128 KB of ubers after 128 KB of other stuff | ||
| - println!("Newest Uberblock {:X}", zfs.uberblock.magic); | ||
| - println!("Version {}", uberblock.version); | ||
| - println!("TXG {}", uberblock.txg); | ||
| - println!("GUID {:X}", uberblock.guid_sum); | ||
| - println!("Timestamp {}", uberblock.timestamp); | ||
| - println!("ROOTBP[0] {:?}", uberblock.rootbp.dvas[0]); | ||
| - println!("ROOTBP[1] {:?}", uberblock.rootbp.dvas[1]); | ||
| - println!("ROOTBP[2] {:?}", uberblock.rootbp.dvas[2]); | ||
| - } else if command == "spa_import" { | ||
| - let mut nvpairs_buffer = zfs.reader.zio.read(32, 224); | ||
| - let mut xdr = xdr::MemOps::new(&mut nvpairs_buffer); | ||
| - let nv_list = nvstream::decode_nv_list(&mut xdr).unwrap(); | ||
| - let name = nv_list.get::<&String>("name").unwrap().clone(); | ||
| - let spa = spa::Spa::import(name, nv_list).unwrap(); | ||
| - } else if command == "vdev_label" { | ||
| - match VdevLabel::from_bytes(&zfs.reader.zio.read(0, 256 * 2)) { | ||
| - Ok(ref mut vdev_label) => { | ||
| - let mut xdr = xdr::MemOps::new(&mut vdev_label.nv_pairs); | ||
| - let nv_list = nvstream::decode_nv_list(&mut xdr).unwrap(); | ||
| - println!("Got nv_list:\n{:?}", nv_list); | ||
| - match nv_list.find("vdev_tree") { | ||
| - Some(vdev_tree) => { | ||
| - println!("Got vdev_tree"); | ||
| - | ||
| - let vdev_tree = if let NvValue::NvList(ref vdev_tree) = | ||
| - *vdev_tree { | ||
| - Some(vdev_tree) | ||
| - } else { | ||
| - None | ||
| - }; | ||
| - | ||
| - match vdev_tree.unwrap().find("metaslab_array") { | ||
| - Some(metaslab_array) => { | ||
| - println!("Got metaslab_array"); | ||
| - if let NvValue::Uint64(metaslab_array) = | ||
| - *metaslab_array { | ||
| - // Get metaslab array dnode | ||
| - let metaslab_array = metaslab_array as usize; | ||
| - let ma_dnode: Result<DNodePhys, String> = | ||
| - zfs.reader | ||
| - .read_type_array(zfs.mos | ||
| - .meta_dnode | ||
| - .get_blockptr(0), | ||
| - metaslab_array); | ||
| - let ma_dnode = ma_dnode.unwrap(); // TODO | ||
| - | ||
| - // Get a spacemap object id | ||
| - let sm_id: Result<u64, String> = | ||
| - zfs.reader.read_type_array(ma_dnode.get_blockptr(0), 0); | ||
| - let sm_id = sm_id.unwrap(); // TODO | ||
| - | ||
| - let sm_dnode: Result<DNodePhys, String> = | ||
| - zfs.reader | ||
| - .read_type_array(zfs.mos | ||
| - .meta_dnode | ||
| - .get_blockptr(0), | ||
| - sm_id as usize); | ||
| - let sm_dnode = sm_dnode.unwrap(); // TODO | ||
| - let space_map_phys = SpaceMapPhys::from_bytes(sm_dnode.get_bonus()).unwrap(); // TODO | ||
| - let space_map: Result<Vec<u8>, String> = | ||
| - zfs.reader | ||
| - .read_block(sm_dnode.get_blockptr(0)); | ||
| - | ||
| - println!("got space map id: {:?}", sm_id); | ||
| - println!("got space map dnode: {:?}", sm_dnode); | ||
| - println!("got space map phys: {:?}", | ||
| - space_map_phys); | ||
| - // println!("got space map: {:?}", &space_map.unwrap()[0..64]); | ||
| - | ||
| - let mut range_tree: avl::Tree<space_map::Entry, | ||
| - u64> = | ||
| - avl::Tree::new(Rc::new(|x| x.offset())); | ||
| - // space_map::load_space_map_avl(&space_map::SpaceMap { size: 30 }, | ||
| - // &mut range_tree, | ||
| - // &space_map.unwrap(), | ||
| - // space_map::MapType::Alloc).unwrap(); | ||
| - } else { | ||
| - println!("Invalid metaslab_array NvValue \ | ||
| - type. Expected Uint64."); | ||
| - } | ||
| - } | ||
| - None => { | ||
| - println!("No `metaslab_array` in vdev_tree"); | ||
| - } | ||
| - }; | ||
| - } | ||
| - None => { | ||
| - println!("No `vdev_tree` in vdev_label nvpairs"); | ||
| - } | ||
| - } | ||
| - } | ||
| - Err(e) => { | ||
| - println!("Couldn't read vdev_label: {}", e); | ||
| - } | ||
| - } | ||
| - } else if command == "file" { | ||
| - match args.get(1) { | ||
| - Some(arg) => { | ||
| - let file = zfs.read_file(arg); | ||
| - match file { | ||
| - Some(file) => { | ||
| - println!("File contents: {}", | ||
| - str::from_utf8(&file).unwrap()); | ||
| - } | ||
| - None => println!("Failed to read file"), | ||
| - } | ||
| - } | ||
| - None => println!("Usage: file <path>"), | ||
| - } | ||
| - } else if command == "ls" { | ||
| - match args.get(1) { | ||
| - Some(arg) => { | ||
| - let ls = zfs.ls(arg); | ||
| - match ls { | ||
| - Some(ls) => { | ||
| - for item in &ls { | ||
| - print!("{}\t", item); | ||
| - } | ||
| - } | ||
| - None => println!("Failed to read directory"), | ||
| - } | ||
| - } | ||
| - None => println!("Usage: ls <path>"), | ||
| - } | ||
| - } else if command == "dump" { | ||
| - match args.get(1) { | ||
| - Some(arg) => { | ||
| - if let Ok(sector) = arg.parse::<usize>() { | ||
| - println!("Dump sector: {}", sector); | ||
| - | ||
| - let data = zfs.reader.zio.read(sector, 1); | ||
| - for i in 0..data.len() { | ||
| - if i % 32 == 0 { | ||
| - print!("\n{:X}:", i); | ||
| - } | ||
| - if let Some(byte) = data.get(i) { | ||
| - print!(" {:X}", *byte); | ||
| - } else { | ||
| - println!(" !"); | ||
| - } | ||
| - } | ||
| - print!("\n"); | ||
| - } else { | ||
| - println!("Sector not a number"); | ||
| - } | ||
| - } | ||
| - None => println!("No sector specified!"), | ||
| - } | ||
| - } else if command == "close" { | ||
| - println!("Closing"); | ||
| - close = true; | ||
| - } else if command == "exit" { | ||
| - break 'reading; | ||
| - } else { | ||
| - println!("Commands: uber vdev_label file ls dump close exit"); | ||
| - } | ||
| - } | ||
| - None => { | ||
| - if command == "open" { | ||
| - match args.get(1) { | ||
| - Some(arg) => { | ||
| - match File::open(arg) { | ||
| - Ok(file) => { | ||
| - let zfs = Zfs::new(file); | ||
| - if let Err(ref e) = zfs { | ||
| - println!("Error: {:?}", e); | ||
| - } else { | ||
| - println!("Open: {}", arg); | ||
| - } | ||
| - zfs_option = zfs.ok(); | ||
| - } | ||
| - Err(err) => println!("Failed to open {}: {}", arg, err), | ||
| - } | ||
| - } | ||
| - None => println!("No file specified!"), | ||
| - } | ||
| - } else if command == "exit" { | ||
| - break 'reading; | ||
| - } else { | ||
| - println!("Commands: open exit"); | ||
| - } | ||
| - } | ||
| - } | ||
| - if close { | ||
| - zfs_option = None; | ||
| - } | ||
| - } | ||
| - } else { | ||
| - break 'reading; | ||
| - } | ||
| - } | ||
| -} |
587
crates/zfs/metaslab.rs
| @@ -1,587 +0,0 @@ | ||
| -use std::cmp; | ||
| -use std::rc::Rc; | ||
| - | ||
| -use super::avl; | ||
| -use super::dmu_objset::ObjectSet; | ||
| -use super::space_map::{self, Segment, SpaceMap}; | ||
| -use super::taskq::{self, Taskq}; | ||
| -use super::txg; | ||
| -use util; | ||
| -use super::vdev; | ||
| -use super::zfs; | ||
| - | ||
| -// A metaslab class encompasses a category of allocatable top-level vdevs. | ||
| -// Each top-level vdev is associated with a metaslab group which defines | ||
| -// the allocatable region for that vdev. Examples of these categories include | ||
| -// "normal" for data block allocations (i.e. main pool allocations) or "log" | ||
| -// for allocations designated for intent log devices (i.e. slog devices). | ||
| -// When a block allocation is requested from the SPA it is associated with a | ||
| -// metaslab_class_t, and only top-level vdevs (i.e. metaslab groups) belonging | ||
| -// to the class can be used to satisfy that request. Allocations are done | ||
| -// by traversing the metaslab groups that are linked off of the `rotor` field. | ||
| -// This rotor points to the next metaslab group where allocations will be | ||
| -// attempted. Allocating a block is a 3 step process -- select the metaslab | ||
| -// group, select the metaslab, and then allocate the block. The metaslab | ||
| -// class defines the low-level block allocator that will be used as the | ||
| -// final step in allocation. These allocators are pluggable allowing each class | ||
| -// to use a block allocator that best suits that class. | ||
| -// | ||
| -pub struct MetaslabClass { | ||
| - // spa: *Spa, | ||
| - // rotor: *MetaslabGroup, | ||
| - ops: Rc<MetaslabOps>, | ||
| - aliquot: u64, | ||
| - alloc_groups: u64, // # of allocatable groups | ||
| - alloc: u64, // total allocated space | ||
| - deferred: u64, // total deferred frees | ||
| - space: u64, // total space (alloc + free) | ||
| - dspace: u64, /* total deflated space | ||
| - * histogram: [u64, RANGE_TREE_HISTOGRAM_SIZE], | ||
| - * fastwrite_lock: kmutex_t, */ | ||
| -} | ||
| - | ||
| -impl MetaslabClass { | ||
| - pub fn create(ops: Rc<MetaslabOps>) -> MetaslabClass { | ||
| - // mutex_init(&mc->mc_fastwrite_lock, NULL, MUTEX_DEFAULT, NULL); | ||
| - | ||
| - MetaslabClass { | ||
| - // rotor: NULL, | ||
| - ops: ops, | ||
| - aliquot: 0, | ||
| - alloc_groups: 0, | ||
| - alloc: 0, | ||
| - deferred: 0, | ||
| - space: 0, | ||
| - dspace: 0, | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -// Metaslab groups encapsulate all the allocatable regions (i.e. metaslabs) | ||
| -// of a top-level vdev. They are linked togther to form a circular linked | ||
| -// list and can belong to only one metaslab class. Metaslab groups may become | ||
| -// ineligible for allocations for a number of reasons such as limited free | ||
| -// space, fragmentation, or going offline. When this happens the allocator will | ||
| -// simply find the next metaslab group in the linked list and attempt | ||
| -// to allocate from that group instead. | ||
| -// | ||
| -pub struct MetaslabGroup { | ||
| - // lock: kmutex_t, | ||
| - metaslab_tree: avl::Tree<MetaslabAvlNode, (u64, u64)>, | ||
| - aliquot: u64, | ||
| - allocatable: bool, // can we allocate? | ||
| - free_capacity: u64, // percentage free | ||
| - bias: i64, | ||
| - activation_count: i64, | ||
| - ms_class: Rc<MetaslabClass>, | ||
| - // vdev: vdev::TreeIndex, | ||
| - taskq: Taskq, | ||
| - // prev: *MetaslabGroup, | ||
| - // next: *MetaslabGroup, | ||
| - fragmentation: u64, // histogram: [u64; RANGE_TREE_HISTOGRAM_SIZE], | ||
| -} | ||
| - | ||
| -impl MetaslabGroup { | ||
| - pub fn create(ms_class: Rc<MetaslabClass>) -> Self { | ||
| - let metaslab_key = Rc::new(|ms: &MetaslabAvlNode| (ms.weight, ms.start)); | ||
| - let taskq = Taskq::new("metaslab_group_taskq".to_string(), | ||
| - // metaslab_load_pct | ||
| - 4, | ||
| - 10, | ||
| - -1i64 as u64, | ||
| - // TASKQ_THREADS_CPU_PCT | TASKQ_DYNAMIC | ||
| - 0); | ||
| - | ||
| - MetaslabGroup { | ||
| - // lock: kmutex_t, | ||
| - metaslab_tree: avl::Tree::new(metaslab_key), | ||
| - aliquot: 0, | ||
| - allocatable: false, // can we allocate? | ||
| - free_capacity: 0, // percentage free | ||
| - bias: 0, | ||
| - activation_count: 0, | ||
| - ms_class: ms_class, | ||
| - // vdev: vdev, | ||
| - taskq: taskq, | ||
| - // prev: *MetaslabGroup, | ||
| - // next: *MetaslabGroup, | ||
| - fragmentation: 0, // histogram: [0; RANGE_TREE_HISTOGRAM_SIZE], | ||
| - } | ||
| - } | ||
| - | ||
| - pub fn add(&mut self, index: usize, m: &Metaslab) { | ||
| - self.metaslab_tree.insert(MetaslabAvlNode { | ||
| - index: index, | ||
| - start: m.start, | ||
| - weight: m.weight, | ||
| - }); | ||
| - } | ||
| - | ||
| - pub fn activate(&mut self) { | ||
| - // metaslab_class_t *mc = self.class; | ||
| - // metaslab_group_t *mgprev, *mgnext; | ||
| - // | ||
| - // assert!(spa_config_held(ms_class.spa, SCL_ALLOC, RW_WRITER)); | ||
| - // | ||
| - // assert!(ms_class.rotor != mg); | ||
| - // assert!(self.prev == NULL); | ||
| - // assert!(self.next == NULL); | ||
| - // assert!(self.activation_count <= 0); | ||
| - // | ||
| - // if (++self.activation_count <= 0) | ||
| - // return; | ||
| - // | ||
| - // self.aliquot = metaslab_aliquot * cmp::max(1, self.vdev->vdev_children); | ||
| - // metaslab_group_alloc_update(mg); | ||
| - // | ||
| - // if (mgprev = ms_class.rotor) == NULL { | ||
| - // self.prev = mg; | ||
| - // self.next = mg; | ||
| - // } else { | ||
| - // mgnext = mgprev->mg_next; | ||
| - // self.prev = mgprev; | ||
| - // self.next = mgnext; | ||
| - // mgprev->mg_next = mg; | ||
| - // mgnext->mg_prev = mg; | ||
| - // } | ||
| - // ms_class.rotor = mg; | ||
| - } | ||
| -} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| - | ||
| -// This value defines the number of elements in the lbas array. The value | ||
| -// of 64 was chosen as it covers all power of 2 buckets up to UINT64_MAX. | ||
| -// This is the equivalent of highbit(UINT64_MAX). | ||
| -const MAX_LBAS: usize = 64; | ||
| - | ||
| -// Each metaslab maintains a set of in-core trees to track metaslab operations. | ||
| -// The in-core free tree (ms_tree) contains the current list of free segments. | ||
| -// As blocks are allocated, the allocated segment are removed from the ms_tree | ||
| -// and added to a per txg allocation tree (ms_alloctree). As blocks are freed, | ||
| -// they are added to the per txg free tree (ms_freetree). These per txg | ||
| -// trees allow us to process all allocations and frees in syncing context | ||
| -// where it is safe to update the on-disk space maps. One additional in-core | ||
| -// tree is maintained to track deferred frees (ms_defertree). Once a block | ||
| -// is freed it will move from the ms_freetree to the ms_defertree. A deferred | ||
| -// free means that a block has been freed but cannot be used by the pool | ||
| -// until TXG_DEFER_SIZE transactions groups later. For example, a block | ||
| -// that is freed in txg 50 will not be available for reallocation until | ||
| -// txg 52 (50 + TXG_DEFER_SIZE). This provides a safety net for uberblock | ||
| -// rollback. A pool could be safely rolled back TXG_DEFERS_SIZE | ||
| -// transactions groups and ensure that no block has been reallocated. | ||
| -// | ||
| -// The simplified transition diagram looks like this: | ||
| -// | ||
| -// | ||
| -// ALLOCATE | ||
| -// | | ||
| -// V | ||
| -// free segment (tree) --------> alloc_tree ----> (write to space map) | ||
| -// ^ | ||
| -// | | ||
| -// | free_tree <--- FREE | ||
| -// | | | ||
| -// | | | ||
| -// | | | ||
| -// +----------- defer_tree <-------+---------> (write to space map) | ||
| -// | ||
| -// | ||
| -// Each metaslab's space is tracked in a single space map in the MOS, | ||
| -// which is only updated in syncing context. Each time we sync a txg, | ||
| -// we append the allocs and frees from that txg to the space map. | ||
| -// The pool space is only updated once all metaslabs have finished syncing. | ||
| -// | ||
| -// To load the in-core free tree we read the space map from disk. | ||
| -// This object contains a series of alloc and free records that are | ||
| -// combined to make up the list of all free segments in this metaslab. These | ||
| -// segments are represented in-core by the ms_tree and are stored in an | ||
| -// AVL tree. | ||
| -// | ||
| -// As the space map grows (as a result of the appends) it will | ||
| -// eventually become space-inefficient. When the metaslab's in-core free tree | ||
| -// is zfs_condense_pct/100 times the size of the minimal on-disk | ||
| -// representation, we rewrite it in its minimized form. If a metaslab | ||
| -// needs to condense then we must set the condensing flag to ensure | ||
| -// that allocations are not performed on the metaslab that is being written. | ||
| -// | ||
| - | ||
| -pub struct Metaslab { | ||
| - // lock: kmutex_t, | ||
| - // load_cv: kcondvar_t, | ||
| - space_map: Option<SpaceMap>, | ||
| - ops: Rc<MetaslabOps>, | ||
| - id: u64, | ||
| - start: u64, | ||
| - size: u64, | ||
| - fragmentation: u64, | ||
| - | ||
| - // Sorted by start | ||
| - alloc_tree: Vec<avl::Tree<space_map::Segment, u64>>, // txg::TXG_SIZE | ||
| - free_tree: Vec<avl::Tree<space_map::Segment, u64>>, // txg::TXG_SIZE | ||
| - defer_tree: Vec<avl::Tree<space_map::Segment, u64>>, // txg::DEFER_SIZE | ||
| - tree: avl::Tree<space_map::Segment, u64>, | ||
| - | ||
| - condensing: bool, | ||
| - condense_wanted: bool, | ||
| - loaded: bool, | ||
| - loading: bool, | ||
| - | ||
| - defer_space: i64, // sum of defermap[] space | ||
| - weight: u64, // weight vs others in group | ||
| - access_txg: u64, | ||
| - | ||
| - // The metaslab block allocators can optionally use a size-ordered | ||
| - // range tree and/or an array of LBAs. Not all allocators use | ||
| - // this functionality. The size_tree should always contain the | ||
| - // same number of segments as the tree. The only difference | ||
| - // is that the size_tree is ordered by segment sizes. | ||
| - size_tree: avl::Tree<space_map::Segment, u64>, // Sorted by size | ||
| - lbas: [u64; MAX_LBAS], /* group: *MetaslabGroup, | ||
| - * avl_node_t ms_group_node, // node in metaslab group tree | ||
| - * txg_node_t ms_txg_node, // per-txg dirty metaslab links */ | ||
| -} | ||
| - | ||
| -impl Metaslab { | ||
| - pub fn new(ops: Rc<MetaslabOps>, | ||
| - id: u64, | ||
| - start: u64, | ||
| - size: u64, | ||
| - space_map: Option<SpaceMap>) | ||
| - -> Self { | ||
| - let seg_key_start = Rc::new(|seg: &Segment| seg.start); | ||
| - let seg_key_size = Rc::new(|seg: &Segment| seg.size); | ||
| - | ||
| - Metaslab { | ||
| - // lock: kmutex_t, | ||
| - // load_cv: kcondvar_t, | ||
| - space_map: space_map, | ||
| - ops: ops, | ||
| - id: id, | ||
| - start: start, | ||
| - size: size, | ||
| - fragmentation: 0, | ||
| - | ||
| - alloc_tree: (0..txg::TXG_SIZE).map(|x| avl::Tree::new(seg_key_start.clone())).collect(), | ||
| - free_tree: (0..txg::TXG_SIZE).map(|x| avl::Tree::new(seg_key_start.clone())).collect(), | ||
| - defer_tree: (0..txg::DEFER_SIZE) | ||
| - .map(|x| avl::Tree::new(seg_key_start.clone())) | ||
| - .collect(), | ||
| - tree: avl::Tree::new(seg_key_start), | ||
| - | ||
| - condensing: false, | ||
| - condense_wanted: false, | ||
| - loaded: false, | ||
| - loading: false, | ||
| - | ||
| - defer_space: 0, | ||
| - weight: 0, | ||
| - access_txg: 0, | ||
| - | ||
| - size_tree: avl::Tree::new(seg_key_size), | ||
| - lbas: [0; MAX_LBAS], /* group: *MetaslabGroup, | ||
| - * avl_node_t ms_group_node, // node in metaslab group tree | ||
| - * txg_node_t ms_txg_node, // per-txg dirty metaslab links */ | ||
| - } | ||
| - } | ||
| - | ||
| - pub fn init(mos: &mut ObjectSet, | ||
| - vdev: &mut vdev::Vdev, | ||
| - id: u64, | ||
| - object: u64, | ||
| - txg: u64) | ||
| - -> zfs::Result<Self> { | ||
| - // We assume this is a top-level vdev | ||
| - let vdev_top = try!(vdev.top.as_mut().ok_or(zfs::Error::Invalid)); | ||
| - | ||
| - // mutex_init(&ms.lock, NULL, MUTEX_DEFAULT, NULL); | ||
| - // cv_init(&ms->ms_load_cv, NULL, CV_DEFAULT, NULL); | ||
| - let start = id << vdev_top.ms_shift; | ||
| - let size = 1 << vdev_top.ms_shift; | ||
| - | ||
| - // We only open space map objects that already exist. All others | ||
| - // will be opened when we finally allocate an object for it. | ||
| - let space_map = if object != 0 { | ||
| - Some(try!(SpaceMap::open(mos, | ||
| - object, | ||
| - start, | ||
| - size, | ||
| - vdev.ashift as u8 /* , &ms.lock */))) | ||
| - } else { | ||
| - None | ||
| - }; | ||
| - | ||
| - let mut metaslab = Self::new(vdev_top.ms_group.ms_class.ops.clone(), | ||
| - id, | ||
| - start, | ||
| - size, | ||
| - space_map); | ||
| - | ||
| - vdev_top.ms_group.add(id as usize, &metaslab); | ||
| - | ||
| - // metaslab.fragmentation = metaslab_fragmentation(metaslab); | ||
| - | ||
| - // If we're opening an existing pool (txg == 0) or creating | ||
| - // a new one (txg == TXG_INITIAL), all space is available now. | ||
| - // If we're adding space to an existing pool, the new space | ||
| - // does not become available until after this txg has synced. | ||
| - if txg <= txg::TXG_INITIAL as u64 { | ||
| - // metaslab_sync_done(metaslab, 0); | ||
| - } | ||
| - | ||
| - // If metaslab_debug_load is set and we're initializing a metaslab | ||
| - // that has an allocated space_map object then load the its space | ||
| - // map so that can verify frees. | ||
| - // if metaslab_debug_load && metaslab.space_map.is_some() { | ||
| - // try!(metaslab.load()); | ||
| - // } | ||
| - | ||
| - | ||
| - // if txg != 0 { | ||
| - // vdev.dirty(0, NULL, txg); | ||
| - // vdev.dirty(vdev::DIRTY_METASLAB, ms, txg); | ||
| - // } | ||
| - | ||
| - Ok(metaslab) | ||
| - } | ||
| - | ||
| - pub fn load(&mut self) -> zfs::Result<()> { | ||
| - let mut result = Ok(()); | ||
| - // assert!(MUTEX_HELD(&self.lock)); | ||
| - assert!(!self.loaded); | ||
| - assert!(!self.loading); | ||
| - | ||
| - self.loading = true; | ||
| - | ||
| - // If the space map has not been allocated yet, then treat | ||
| - // all the space in the metaslab as free and add it to the | ||
| - // tree. | ||
| - if let Some(ref mut space_map) = self.space_map { | ||
| - // result = space_map.load(&mut self.tree, space_map::AllocType::Free); | ||
| - } else { | ||
| - self.tree.insert(Segment { | ||
| - start: self.start, | ||
| - size: self.size, | ||
| - }); | ||
| - } | ||
| - | ||
| - self.loaded = result.is_ok(); | ||
| - self.loading = false; | ||
| - | ||
| - if self.loaded { | ||
| - for t in 0..txg::DEFER_SIZE { | ||
| - // self.defer_tree[t].in_order(range_tree_remove, self.tree); | ||
| - } | ||
| - } | ||
| - // cv_broadcast(&self.load_cv); | ||
| - result | ||
| - } | ||
| - | ||
| - pub fn load_wait(&self) { | ||
| - while self.loading { | ||
| - assert!(!self.loaded); | ||
| - // cv_wait(&msp->ms_load_cv, &msp->ms_lock); | ||
| - } | ||
| - } | ||
| - | ||
| - fn activate(&mut self, activation_weight: u64) -> zfs::Result<()> { | ||
| - // TODO | ||
| - // assert!(MUTEX_HELD(&self.lock)); | ||
| - // | ||
| - // if self.weight & METASLAB_ACTIVE_MASK == 0 { | ||
| - // self.load_wait(); | ||
| - // if !self.loaded { | ||
| - // if let Err(e) = self.load() { | ||
| - // metaslab_group_sort(self.group, msp, 0); | ||
| - // return Err(e); | ||
| - // } | ||
| - // } | ||
| - // | ||
| - // metaslab_group_sort(self.group, self, self.weight | activation_weight); | ||
| - // } | ||
| - // assert!(self.loaded); | ||
| - // assert!(self.weight & METASLAB_ACTIVE_MASK); | ||
| - | ||
| - | ||
| - Ok(()) | ||
| - } | ||
| -} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| - | ||
| -pub struct MetaslabOps { | ||
| - pub alloc: fn(ms: &mut Metaslab, size: u64) -> u64, | ||
| -} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| - | ||
| -// The first-fit block allocator | ||
| -pub fn ff_alloc(ms: &mut Metaslab, size: u64) -> u64 { | ||
| - // Find the largest power of 2 block size that evenly divides the | ||
| - // requested size. This is used to try to allocate blocks with similar | ||
| - // alignment from the same area of the metaslab (i.e. same cursor | ||
| - // bucket) but it does not guarantee that other allocations sizes | ||
| - // may exist in the same region. | ||
| - let align = size & -(size as i64) as u64; | ||
| - let ref mut cursor = ms.lbas[(util::highbit64(align) - 1) as usize]; | ||
| - let ref mut tree = ms.tree; | ||
| - | ||
| - // return metaslab_block_picker(tree, cursor, size, align); | ||
| - return 0; | ||
| -} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| -// This is a helper function that can be used by the allocator to find | ||
| -// a suitable block to allocate. This will search the specified AVL | ||
| -// tree looking for a block that matches the specified criteria. | ||
| -// fn metaslab_block_picker(tree: &mut avl::Tree, cursor: &mut u64, size: u64, align: u64) -> u64 { | ||
| -// range_seg_t *rs, rsearch; | ||
| -// avl_index_t where; | ||
| -// | ||
| -// rsearch.rs_start = *cursor; | ||
| -// rsearch.rs_end = *cursor + size; | ||
| -// | ||
| -// rs = tree.find(&rsearch, &where); | ||
| -// if rs == NULL { | ||
| -// rs = tree.nearest(where, AVL_AFTER); | ||
| -// } | ||
| -// | ||
| -// while rs != NULL { | ||
| -// let offset: u64 = util::p2roundup(rs->rs_start, align); | ||
| -// | ||
| -// if offset + size <= rs->rs_end { | ||
| -// cursor = offset + size; | ||
| -// return (offset); | ||
| -// } | ||
| -// rs = AVL_NEXT(t, rs); | ||
| -// } | ||
| -// | ||
| -// If we know we've searched the whole map (*cursor == 0), give up. | ||
| -// Otherwise, reset the cursor to the beginning and try again. | ||
| -// if *cursor == 0 { | ||
| -// return (-1ULL); | ||
| -// } | ||
| -// | ||
| -// cursor = 0; | ||
| -// return metaslab_block_picker(tree, cursor, size, align); | ||
| -// } | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| - | ||
| -struct MetaslabAvlNode { | ||
| - index: usize, | ||
| - weight: u64, | ||
| - start: u64, | ||
| -} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| - | ||
| -// Allow allocations to switch to gang blocks quickly. We do this to | ||
| -// avoid having to load lots of space_maps in a given txg. There are, | ||
| -// however, some cases where we want to avoid "fast" ganging and instead | ||
| -// we want to do an exhaustive search of all metaslabs on this device. | ||
| -// Currently we don't allow any gang, slog, or dump device related allocations | ||
| -// to "fast" gang. | ||
| -// fn can_fast_gang(flags) -> bool { | ||
| -// (flags) & (METASLAB_GANG_CHILD | METASLAB_GANG_HEADER | METASLAB_GANG_AVOID) == 0 | ||
| -// } | ||
| - | ||
| - | ||
| -const METASLAB_WEIGHT_PRIMARY: u64 = 1 << 63; | ||
| -const METASLAB_WEIGHT_SECONDARY: u64 = 1 << 62; | ||
| -const METASLAB_ACTIVE_MASK: u64 = METASLAB_WEIGHT_PRIMARY | METASLAB_WEIGHT_SECONDARY; | ||
| - | ||
| -// Metaslab granularity, in bytes. This is roughly similar to what would be | ||
| -// referred to as the "stripe size" in traditional RAID arrays. In normal | ||
| -// operation, we will try to write this amount of data to a top-level vdev | ||
| -// before moving on to the next one. | ||
| -static metaslab_aliquot: usize = 512 << 10; | ||
| - | ||
| -// static metaslab_gang_bang: u64 = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */ | ||
| - | ||
| -// The in-core space map representation is more compact than its on-disk form. | ||
| -// The zfs_condense_pct determines how much more compact the in-core | ||
| -// space_map representation must be before we compact it on-disk. | ||
| -// Values should be greater than or equal to 100. | ||
| -static zfs_condense_pct: isize = 200; | ||
| - | ||
| -// Condensing a metaslab is not guaranteed to actually reduce the amount of | ||
| -// space used on disk. In particular, a space map uses data in increments of | ||
| -// MAX(1 << ashift, space_map_blksz), so a metaslab might use the | ||
| -// same number of blocks after condensing. Since the goal of condensing is to | ||
| -// reduce the number of IOPs required to read the space map, we only want to | ||
| -// condense when we can be sure we will reduce the number of blocks used by the | ||
| -// space map. Unfortunately, we cannot precisely compute whether or not this is | ||
| -// the case in metaslab_should_condense since we are holding ms_lock. Instead, | ||
| -// we apply the following heuristic: do not condense a spacemap unless the | ||
| -// uncondensed size consumes greater than zfs_metaslab_condense_block_threshold | ||
| -// blocks. | ||
| -static zfs_metaslab_condense_block_threshold: isize = 4; | ||
| - | ||
| -// The zfs_mg_noalloc_threshold defines which metaslab groups should | ||
| -// be eligible for allocation. The value is defined as a percentage of | ||
| -// free space. Metaslab groups that have more free space than | ||
| -// zfs_mg_noalloc_threshold are always eligible for allocations. Once | ||
| -// a metaslab group's free space is less than or equal to the | ||
| -// zfs_mg_noalloc_threshold the allocator will avoid allocating to that | ||
| -// group unless all groups in the pool have reached zfs_mg_noalloc_threshold. | ||
| -// Once all groups in the pool reach zfs_mg_noalloc_threshold then all | ||
| -// groups are allowed to accept allocations. Gang blocks are always | ||
| -// eligible to allocate on any metaslab group. The default value of 0 means | ||
| -// no metaslab group will be excluded based on this criterion. | ||
| -static zfs_mg_noalloc_threshold: isize = 0; | ||
| - | ||
| -// Metaslab groups are considered eligible for allocations if their | ||
| -// fragmenation metric (measured as a percentage) is less than or equal to | ||
| -// zfs_mg_fragmentation_threshold. If a metaslab group exceeds this threshold | ||
| -// then it will be skipped unless all metaslab groups within the metaslab | ||
| -// class have also crossed this threshold. | ||
| -static zfs_mg_fragmentation_threshold: isize = 85; | ||
| - | ||
| -// Allow metaslabs to keep their active state as long as their fragmentation | ||
| -// percentage is less than or equal to zfs_metaslab_fragmentation_threshold. An | ||
| -// active metaslab that exceeds this threshold will no longer keep its active | ||
| -// status allowing better metaslabs to be selected. | ||
| -static zfs_metaslab_fragmentation_threshold: isize = 70; | ||
| - | ||
| -// When set will load all metaslabs when pool is first opened. | ||
| -static metaslab_debug_load: isize = 0; | ||
| - | ||
| -// When set will prevent metaslabs from being unloaded. | ||
| -static metaslab_debug_unload: isize = 0; | ||
| - | ||
| -// Minimum size which forces the dynamic allocator to change | ||
| -// it's allocation strategy. Once the space map cannot satisfy | ||
| -// an allocation of this size then it switches to using more | ||
| -// aggressive strategy (i.e search by size rather than offset). | ||
| -// static metaslab_df_alloc_threshold: u64 = SPA_MAXBLOCKSIZE; | ||
| - | ||
| -// The minimum free space, in percent, which must be available | ||
| -// in a space map to continue allocations in a first-fit fashion. | ||
| -// Once the space_map's free space drops below this level we dynamically | ||
| -// switch to using best-fit allocations. | ||
| -static metaslab_df_free_pct: isize = 4; | ||
| - | ||
| -// Percentage of all cpus that can be used by the metaslab taskq. | ||
| -static metaslab_load_pct: isize = 50; | ||
| - | ||
| -// Determines how many txgs a metaslab may remain loaded without having any | ||
| -// allocations from it. As long as a metaslab continues to be used we will | ||
| -// keep it loaded. | ||
| -static metaslab_unload_delay: usize = txg::TXG_SIZE * 2; | ||
| - | ||
| -// Max number of metaslabs per group to preload. | ||
| -// static metaslab_preload_limit: isize = SPA_DVAS_PER_BP; | ||
| - | ||
| -// Enable/disable preloading of metaslab. | ||
| -static metaslab_preload_enabled: bool = true; | ||
| - | ||
| -// Enable/disable fragmentation weighting on metaslabs. | ||
| -static metaslab_fragmentation_factor_enabled: bool = true; | ||
| - | ||
| -// Enable/disable lba weighting (i.e. outer tracks are given preference). | ||
| -static metaslab_lba_weighting_enabled: bool = true; | ||
| - | ||
| -// Enable/disable metaslab group biasing. | ||
| -static metaslab_bias_enabled: bool = true; | ||
| - | ||
| -// static uint64_t metaslab_fragmentation(metaslab_t *); |
385
crates/zfs/nvpair.rs
| @@ -1,385 +0,0 @@ | ||
| -use std::fmt; | ||
| - | ||
| -// nvp implementation version | ||
| -pub const NV_VERSION: i32 = 0; | ||
| - | ||
| -// nvlist header | ||
| -// #[derive(Debug)] | ||
| -pub struct NvList { | ||
| - pub version: i32, | ||
| - pub nvflag: u32, // persistent flags | ||
| - pub pairs: Vec<(String, NvValue)>, | ||
| -} | ||
| - | ||
| -impl NvList { | ||
| - pub fn new(nvflag: u32) -> Self { | ||
| - NvList { | ||
| - version: NV_VERSION, | ||
| - nvflag: nvflag, | ||
| - pairs: Vec::new(), | ||
| - } | ||
| - } | ||
| - | ||
| - pub fn add(&mut self, name: String, value: NvValue) { | ||
| - self.pairs.push((name, value)); | ||
| - } | ||
| - | ||
| - pub fn find(&self, name: &str) -> Option<&NvValue> { | ||
| - for pair in &self.pairs { | ||
| - if pair.0 == name { | ||
| - return Some(&pair.1); | ||
| - } | ||
| - } | ||
| - None | ||
| - } | ||
| - | ||
| - pub fn find_mut(&mut self, name: &str) -> Option<&mut NvValue> { | ||
| - for pair in &mut self.pairs { | ||
| - if pair.0 == name { | ||
| - return Some(&mut pair.1); | ||
| - } | ||
| - } | ||
| - None | ||
| - } | ||
| - | ||
| - pub fn get<'a, T: GetNvValue<'a>>(&'a self, name: &str) -> Option<T> { | ||
| - self.find(name).and_then(|x| GetNvValue::get(x)) | ||
| - } | ||
| -} | ||
| - | ||
| -impl fmt::Debug for NvList { | ||
| - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
| - try!(write!(f, | ||
| - "NvList {{ version: {:X}, nvflag: {:X}, pairs: [\n", | ||
| - self.version, | ||
| - self.nvflag)); | ||
| - for &(ref name, ref value) in &self.pairs { | ||
| - if name.is_empty() { | ||
| - break; | ||
| - } | ||
| - try!(write!(f, "{} : {:?}\n", name, value)); | ||
| - } | ||
| - try!(write!(f, "] }}\n")); | ||
| - Ok(()) | ||
| - } | ||
| -} | ||
| - | ||
| -// TODO Auto implement Debug. format! currently crashes with big u32 values | ||
| -// #[derive(Debug)] | ||
| -pub enum NvValue { | ||
| - Unknown, | ||
| - Boolean, | ||
| - Byte(u8), | ||
| - Int16(i16), | ||
| - Uint16(u16), | ||
| - Int32(i32), | ||
| - Uint32(u32), | ||
| - Int64(i64), | ||
| - Uint64(u64), | ||
| - String(String), | ||
| - ByteArray(Vec<u8>), | ||
| - Int16Array(Vec<i16>), | ||
| - Uint16Array(Vec<u16>), | ||
| - Int32Array(Vec<i32>), | ||
| - Uint32Array(Vec<u32>), | ||
| - Int64Array(Vec<i64>), | ||
| - Uint64Array(Vec<u64>), | ||
| - StringArray(Vec<String>), | ||
| - HrTime(i64), | ||
| - NvList(NvList), | ||
| - NvListArray(Vec<NvList>), | ||
| - BooleanValue(bool), | ||
| - Int8(i8), | ||
| - Uint8(u8), | ||
| - BooleanArray(Vec<bool>), | ||
| - Int8Array(Vec<i8>), | ||
| - Uint8Array(Vec<u8>), | ||
| -} | ||
| - | ||
| -impl NvValue { | ||
| - pub fn data_type(&self) -> DataType { | ||
| - match *self { | ||
| - NvValue::Unknown => DataType::Unknown, | ||
| - NvValue::Boolean => DataType::Boolean, | ||
| - NvValue::Byte(_) => DataType::Byte, | ||
| - NvValue::Int16(_) => DataType::Int16, | ||
| - NvValue::Uint16(_) => DataType::Uint16, | ||
| - NvValue::Int32(_) => DataType::Int32, | ||
| - NvValue::Uint32(_) => DataType::Uint32, | ||
| - NvValue::Int64(_) => DataType::Int64, | ||
| - NvValue::Uint64(_) => DataType::Uint64, | ||
| - NvValue::String(_) => DataType::String, | ||
| - NvValue::ByteArray(_) => DataType::ByteArray, | ||
| - NvValue::Int16Array(_) => DataType::Int16Array, | ||
| - NvValue::Uint16Array(_) => DataType::Uint16Array, | ||
| - NvValue::Int32Array(_) => DataType::Int32Array, | ||
| - NvValue::Uint32Array(_) => DataType::Uint32Array, | ||
| - NvValue::Int64Array(_) => DataType::Int64Array, | ||
| - NvValue::Uint64Array(_) => DataType::Uint64Array, | ||
| - NvValue::StringArray(_) => DataType::StringArray, | ||
| - NvValue::HrTime(_) => DataType::HrTime, | ||
| - NvValue::NvList(_) => DataType::NvList, | ||
| - NvValue::NvListArray(_) => DataType::NvListArray, | ||
| - NvValue::BooleanValue(_) => DataType::BooleanValue, | ||
| - NvValue::Int8(_) => DataType::Int8, | ||
| - NvValue::Uint8(_) => DataType::Uint8, | ||
| - NvValue::BooleanArray(_) => DataType::BooleanArray, | ||
| - NvValue::Int8Array(_) => DataType::Int8Array, | ||
| - NvValue::Uint8Array(_) => DataType::Uint8Array, | ||
| - } | ||
| - } | ||
| - | ||
| - pub fn num_elements(&self) -> usize { | ||
| - match *self { | ||
| - NvValue::Unknown => 1, | ||
| - NvValue::Boolean => 1, | ||
| - NvValue::Byte(_) => 1, | ||
| - NvValue::Int16(_) => 1, | ||
| - NvValue::Uint16(_) => 1, | ||
| - NvValue::Int32(_) => 1, | ||
| - NvValue::Uint32(_) => 1, | ||
| - NvValue::Int64(_) => 1, | ||
| - NvValue::Uint64(_) => 1, | ||
| - NvValue::String(_) => 1, | ||
| - NvValue::ByteArray(ref a) => a.len(), | ||
| - NvValue::Int16Array(ref a) => a.len(), | ||
| - NvValue::Uint16Array(ref a) => a.len(), | ||
| - NvValue::Int32Array(ref a) => a.len(), | ||
| - NvValue::Uint32Array(ref a) => a.len(), | ||
| - NvValue::Int64Array(ref a) => a.len(), | ||
| - NvValue::Uint64Array(ref a) => a.len(), | ||
| - NvValue::StringArray(ref a) => a.len(), | ||
| - NvValue::HrTime(_) => 1, | ||
| - NvValue::NvList(_) => 1, | ||
| - NvValue::NvListArray(ref a) => a.len(), | ||
| - NvValue::BooleanValue(_) => 1, | ||
| - NvValue::Int8(_) => 1, | ||
| - NvValue::Uint8(_) => 1, | ||
| - NvValue::BooleanArray(ref a) => a.len(), | ||
| - NvValue::Int8Array(ref a) => a.len(), | ||
| - NvValue::Uint8Array(ref a) => a.len(), | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -impl fmt::Debug for NvValue { | ||
| - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
| - match *self { | ||
| - NvValue::Int64(v) => write!(f, "Int64(0x{:X})", v), | ||
| - NvValue::Uint64(v) => write!(f, "Uint64(0x{:X})", v), | ||
| - NvValue::NvList(ref v) => write!(f, "NvList({:?})", v), | ||
| - NvValue::NvListArray(ref v) => { | ||
| - try!(write!(f, "NvListArray([")); | ||
| - for nv_list in v { | ||
| - try!(write!(f, "NvList({:?})", nv_list)); | ||
| - } | ||
| - write!(f, "])") | ||
| - } | ||
| - NvValue::String(ref v) => write!(f, "String({})", v), | ||
| - _ => write!(f, "{:?}", self), | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -#[derive(Copy, Clone, Debug)] | ||
| -pub enum DataType { | ||
| - Unknown = 0, | ||
| - Boolean, | ||
| - Byte, | ||
| - Int16, | ||
| - Uint16, | ||
| - Int32, | ||
| - Uint32, | ||
| - Int64, | ||
| - Uint64, | ||
| - String, | ||
| - ByteArray, | ||
| - Int16Array, | ||
| - Uint16Array, | ||
| - Int32Array, | ||
| - Uint32Array, | ||
| - Int64Array, | ||
| - Uint64Array, | ||
| - StringArray, | ||
| - HrTime, | ||
| - NvList, | ||
| - NvListArray, | ||
| - BooleanValue, | ||
| - Int8, | ||
| - Uint8, | ||
| - BooleanArray, | ||
| - Int8Array, | ||
| - Uint8Array, | ||
| -} | ||
| - | ||
| -impl DataType { | ||
| - pub fn from_u8(u: u8) -> Option<DataType> { | ||
| - match u { | ||
| - 0 => Some(DataType::Unknown), | ||
| - 1 => Some(DataType::Boolean), | ||
| - 2 => Some(DataType::Byte), | ||
| - 3 => Some(DataType::Int16), | ||
| - 4 => Some(DataType::Uint16), | ||
| - 5 => Some(DataType::Int32), | ||
| - 6 => Some(DataType::Uint32), | ||
| - 7 => Some(DataType::Int64), | ||
| - 8 => Some(DataType::Uint64), | ||
| - 9 => Some(DataType::String), | ||
| - 10 => Some(DataType::ByteArray), | ||
| - 11 => Some(DataType::Int16Array), | ||
| - 12 => Some(DataType::Uint16Array), | ||
| - 13 => Some(DataType::Int32Array), | ||
| - 14 => Some(DataType::Uint32Array), | ||
| - 15 => Some(DataType::Int64Array), | ||
| - 16 => Some(DataType::Uint64Array), | ||
| - 17 => Some(DataType::StringArray), | ||
| - 18 => Some(DataType::HrTime), | ||
| - 19 => Some(DataType::NvList), | ||
| - 20 => Some(DataType::NvListArray), | ||
| - 21 => Some(DataType::BooleanValue), | ||
| - 22 => Some(DataType::Int8), | ||
| - 23 => Some(DataType::Uint8), | ||
| - 24 => Some(DataType::BooleanArray), | ||
| - 25 => Some(DataType::Int8Array), | ||
| - 26 => Some(DataType::Uint8Array), | ||
| - _ => None, | ||
| - } | ||
| - } | ||
| - | ||
| - pub fn to_u8(self) -> u8 { | ||
| - match self { | ||
| - DataType::Unknown => 0, | ||
| - DataType::Boolean => 1, | ||
| - DataType::Byte => 2, | ||
| - DataType::Int16 => 3, | ||
| - DataType::Uint16 => 4, | ||
| - DataType::Int32 => 5, | ||
| - DataType::Uint32 => 6, | ||
| - DataType::Int64 => 7, | ||
| - DataType::Uint64 => 8, | ||
| - DataType::String => 9, | ||
| - DataType::ByteArray => 10, | ||
| - DataType::Int16Array => 11, | ||
| - DataType::Uint16Array => 12, | ||
| - DataType::Int32Array => 13, | ||
| - DataType::Uint32Array => 14, | ||
| - DataType::Int64Array => 15, | ||
| - DataType::Uint64Array => 16, | ||
| - DataType::StringArray => 17, | ||
| - DataType::HrTime => 18, | ||
| - DataType::NvList => 19, | ||
| - DataType::NvListArray => 20, | ||
| - DataType::BooleanValue => 21, | ||
| - DataType::Int8 => 22, | ||
| - DataType::Uint8 => 23, | ||
| - DataType::BooleanArray => 24, | ||
| - DataType::Int8Array => 25, | ||
| - DataType::Uint8Array => 26, | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| - | ||
| -pub trait GetNvValue<'a>: Sized { | ||
| - fn get(value: &'a NvValue) -> Option<Self>; | ||
| -} | ||
| - | ||
| -impl<'a> GetNvValue<'a> for bool { | ||
| - fn get(value: &'a NvValue) -> Option<Self> { | ||
| - match *value { | ||
| - NvValue::BooleanValue(v) => Some(v), | ||
| - _ => None, | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -impl<'a> GetNvValue<'a> for u8 { | ||
| - fn get(value: &'a NvValue) -> Option<Self> { | ||
| - match *value { | ||
| - NvValue::Byte(v) => Some(v), | ||
| - _ => None, | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -impl<'a> GetNvValue<'a> for u16 { | ||
| - fn get(value: &'a NvValue) -> Option<Self> { | ||
| - match *value { | ||
| - NvValue::Uint16(v) => Some(v), | ||
| - _ => None, | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -impl<'a> GetNvValue<'a> for u32 { | ||
| - fn get(value: &'a NvValue) -> Option<Self> { | ||
| - match *value { | ||
| - NvValue::Uint32(v) => Some(v), | ||
| - _ => None, | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -impl<'a> GetNvValue<'a> for u64 { | ||
| - fn get(value: &'a NvValue) -> Option<Self> { | ||
| - match *value { | ||
| - NvValue::Uint64(v) => Some(v), | ||
| - _ => None, | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -impl<'a> GetNvValue<'a> for i16 { | ||
| - fn get(value: &'a NvValue) -> Option<Self> { | ||
| - match *value { | ||
| - NvValue::Int16(v) => Some(v), | ||
| - _ => None, | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -impl<'a> GetNvValue<'a> for i32 { | ||
| - fn get(value: &'a NvValue) -> Option<Self> { | ||
| - match *value { | ||
| - NvValue::Int32(v) => Some(v), | ||
| - _ => None, | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -impl<'a> GetNvValue<'a> for i64 { | ||
| - fn get(value: &'a NvValue) -> Option<Self> { | ||
| - match *value { | ||
| - NvValue::Int64(v) => Some(v), | ||
| - _ => None, | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -impl<'a> GetNvValue<'a> for &'a String { | ||
| - fn get(value: &'a NvValue) -> Option<Self> { | ||
| - match *value { | ||
| - NvValue::String(ref v) => Some(v), | ||
| - _ => None, | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -impl<'a> GetNvValue<'a> for &'a NvList { | ||
| - fn get(value: &'a NvValue) -> Option<Self> { | ||
| - match *value { | ||
| - NvValue::NvList(ref v) => Some(v), | ||
| - _ => None, | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -impl<'a> GetNvValue<'a> for &'a Vec<NvList> { | ||
| - fn get(value: &'a NvValue) -> Option<Self> { | ||
| - match *value { | ||
| - NvValue::NvListArray(ref v) => Some(v), | ||
| - _ => None, | ||
| - } | ||
| - } | ||
| -} |
266
crates/zfs/nvstream.rs
| @@ -1,266 +0,0 @@ | ||
| -use std::mem; | ||
| - | ||
| -use super::nvpair::{DataType, NV_VERSION, NvList, NvValue}; | ||
| -use super::xdr; | ||
| - | ||
| -// nvlist pack encoding | ||
| -const NV_ENCODE_NATIVE: u8 = 0; | ||
| -const NV_ENCODE_XDR: u8 = 1; | ||
| - | ||
| -// nvlist pack endian | ||
| -const NV_BIG_ENDIAN: u8 = 0; | ||
| -const NV_LITTLE_ENDIAN: u8 = 1; | ||
| - | ||
| -// nvlist persistent unique name flags, stored in nvl_nvflags | ||
| -const NV_UNIQUE_NAME: u32 = 0x1; | ||
| -const NV_UNIQUE_NAME_TYPE: u32 = 0x2; | ||
| - | ||
| -// nvlist lookup pairs related flags | ||
| -const NV_FLAG_NOENTOK: isize = 0x1; | ||
| - | ||
| -// NvList XDR format: | ||
| -// - header (encoding and endian): 4 bytes | ||
| -// - nvl version: 4 bytes | ||
| -// - nv flags: 4 bytes | ||
| -// - nv pairs: | ||
| -// - encoded size: 4 bytes | ||
| -// - decoded size: 4 bytes | ||
| -// - name: xdr string | len: 4 bytes, data: len+(4 - len%4) bytes | ||
| -// - data type: 4 bytes | ||
| -// - num elements: 4 bytes | ||
| -// - data | ||
| -// - 2 terminating zeros: 4 bytes | ||
| -// | ||
| -// NOTE: XDR aligns all of the smaller integer types to be 4 bytes, so `encode_u8` is actually | ||
| -// writing 4 bytes | ||
| -// | ||
| -// I don't know why the ZFS developers decided to use i32's everywhere. Even for clearly | ||
| -// unsigned things like array lengths. | ||
| - | ||
| -/// Name value stream header | ||
| -#[derive(Debug)] | ||
| -pub struct NvsHeader { | ||
| - encoding: u8, // nvs encoding method | ||
| - endian: u8, // nvs endian | ||
| - reserved1: u8, // reserved for future use | ||
| - reserved2: u8, // reserved for future use | ||
| -} | ||
| - | ||
| -/// Encodes a NvList in XDR format | ||
| -pub fn encode_nv_list(xdr: &mut xdr::Xdr, nv_list: &NvList) -> xdr::XdrResult<()> { | ||
| - try!(encode_nv_list_header(xdr)); | ||
| - | ||
| - // Encode version and nvflag | ||
| - try!(xdr.encode_i32(nv_list.version)); | ||
| - try!(xdr.encode_u32(nv_list.nvflag)); | ||
| - | ||
| - // Encode the pairs | ||
| - for &(ref name, ref value) in &nv_list.pairs { | ||
| - // Encode name | ||
| - // let encoded_size = 0; | ||
| - // let decoded_size = 0; | ||
| - try!(xdr.encode_string(name)); | ||
| - | ||
| - // TODO | ||
| - | ||
| - // Encode data type | ||
| - try!(xdr.encode_u8(value.data_type().to_u8())); | ||
| - | ||
| - // Encode the number of elements | ||
| - try!(xdr.encode_i32(value.num_elements() as i32)); | ||
| - | ||
| - // Encode the value | ||
| - } | ||
| - | ||
| - // Encode 2 terminating zeros | ||
| - try!(xdr.encode_i32(0)); | ||
| - try!(xdr.encode_i32(0)); | ||
| - Ok(()) | ||
| -} | ||
| - | ||
| -fn encode_nv_list_header(xdr: &mut xdr::Xdr) -> xdr::XdrResult<()> { | ||
| - let header = NvsHeader { | ||
| - encoding: NV_ENCODE_XDR, | ||
| - endian: NV_LITTLE_ENDIAN, | ||
| - reserved1: 0, | ||
| - reserved2: 0, | ||
| - }; | ||
| - let header_bytes: [u8; 4] = unsafe { mem::transmute(header) }; | ||
| - try!(xdr.encode_opaque(&header_bytes)); | ||
| - Ok(()) | ||
| -} | ||
| - | ||
| -/// Decodes a NvList in XDR format | ||
| -pub fn decode_nv_list(xdr: &mut xdr::Xdr) -> xdr::XdrResult<NvList> { | ||
| - try!(decode_nv_list_header(xdr)); | ||
| - | ||
| - decode_nv_list_embedded(xdr) | ||
| -} | ||
| - | ||
| -pub fn decode_nv_list_embedded(xdr: &mut xdr::Xdr) -> xdr::XdrResult<NvList> { | ||
| - // Decode version and nvflag | ||
| - let version = try!(xdr.decode_i32()); | ||
| - let nvflag = try!(xdr.decode_u32()); | ||
| - | ||
| - // TODO: Give an actual error | ||
| - if version != NV_VERSION { | ||
| - return Err(xdr::XdrError); | ||
| - } | ||
| - | ||
| - let mut nv_list = NvList::new(nvflag); | ||
| - | ||
| - // Decode the pairs | ||
| - loop { | ||
| - // Decode decoded/decoded size | ||
| - let encoded_size = try!(xdr.decode_u32()); | ||
| - let decoded_size = try!(xdr.decode_u32()); | ||
| - | ||
| - // Check for 2 terminating zeros | ||
| - if encoded_size == 0 && decoded_size == 0 { | ||
| - break; | ||
| - } | ||
| - | ||
| - // Decode name | ||
| - let name = try!(xdr.decode_string()); | ||
| - | ||
| - // Decode data type | ||
| - let data_type = match DataType::from_u8(try!(xdr.decode_u8())) { | ||
| - Some(dt) => dt, | ||
| - None => { | ||
| - return Err(xdr::XdrError); | ||
| - } | ||
| - }; | ||
| - | ||
| - // Decode the number of elements | ||
| - let num_elements = try!(xdr.decode_i32()) as usize; | ||
| - | ||
| - // Decode the value | ||
| - let value = try!(decode_nv_value(xdr, data_type, num_elements)); | ||
| - | ||
| - // Add the value to the list | ||
| - nv_list.pairs.push((name, value)); | ||
| - } | ||
| - | ||
| - Ok(nv_list) | ||
| -} | ||
| - | ||
| -fn decode_nv_list_header(xdr: &mut xdr::Xdr) -> xdr::XdrResult<()> { | ||
| - let mut bytes: [u8; 4] = [0; 4]; | ||
| - try!(xdr.decode_opaque(&mut bytes)); | ||
| - let header: NvsHeader = unsafe { mem::transmute(bytes) }; | ||
| - | ||
| - if header.encoding != NV_ENCODE_XDR { | ||
| - return Err(xdr::XdrError); | ||
| - } | ||
| - Ok(()) | ||
| -} | ||
| - | ||
| -fn decode_nv_value(xdr: &mut xdr::Xdr, | ||
| - data_type: DataType, | ||
| - num_elements: usize) | ||
| - -> xdr::XdrResult<NvValue> { | ||
| - match data_type { | ||
| - DataType::Unknown => Ok(NvValue::Unknown), | ||
| - DataType::Boolean => Ok(NvValue::Boolean), | ||
| - DataType::Byte => Ok(NvValue::Byte(try!(xdr.decode_u8()))), | ||
| - DataType::Int16 => Ok(NvValue::Int16(try!(xdr.decode_i16()))), | ||
| - DataType::Uint16 => Ok(NvValue::Uint16(try!(xdr.decode_u16()))), | ||
| - DataType::Int32 => Ok(NvValue::Int32(try!(xdr.decode_i32()))), | ||
| - DataType::Uint32 => Ok(NvValue::Uint32(try!(xdr.decode_u32()))), | ||
| - DataType::Int64 => Ok(NvValue::Int64(try!(xdr.decode_i64()))), | ||
| - DataType::Uint64 => Ok(NvValue::Uint64(try!(xdr.decode_u64()))), | ||
| - DataType::String => Ok(NvValue::String(try!(xdr.decode_string()))), | ||
| - DataType::ByteArray => { | ||
| - let mut v = vec![0; num_elements]; | ||
| - for v in &mut v { | ||
| - *v = try!(xdr.decode_u8()); | ||
| - } | ||
| - Ok(NvValue::ByteArray(v)) | ||
| - } | ||
| - DataType::Int16Array => { | ||
| - let mut v = vec![0; num_elements]; | ||
| - for v in &mut v { | ||
| - *v = try!(xdr.decode_i16()); | ||
| - } | ||
| - Ok(NvValue::Int16Array(v)) | ||
| - } | ||
| - DataType::Uint16Array => { | ||
| - let mut v = vec![0; num_elements]; | ||
| - for v in &mut v { | ||
| - *v = try!(xdr.decode_u16()); | ||
| - } | ||
| - Ok(NvValue::Uint16Array(v)) | ||
| - } | ||
| - DataType::Int32Array => { | ||
| - let mut v = vec![0; num_elements]; | ||
| - for v in &mut v { | ||
| - *v = try!(xdr.decode_i32()); | ||
| - } | ||
| - Ok(NvValue::Int32Array(v)) | ||
| - } | ||
| - DataType::Uint32Array => { | ||
| - let mut v = vec![0; num_elements]; | ||
| - for v in &mut v { | ||
| - *v = try!(xdr.decode_u32()); | ||
| - } | ||
| - Ok(NvValue::Uint32Array(v)) | ||
| - } | ||
| - DataType::Int64Array => { | ||
| - let mut v = vec![0; num_elements]; | ||
| - for v in &mut v { | ||
| - *v = try!(xdr.decode_i64()); | ||
| - } | ||
| - Ok(NvValue::Int64Array(v)) | ||
| - } | ||
| - DataType::Uint64Array => { | ||
| - let mut v = vec![0; num_elements]; | ||
| - for v in &mut v { | ||
| - *v = try!(xdr.decode_u64()); | ||
| - } | ||
| - Ok(NvValue::Uint64Array(v)) | ||
| - } | ||
| - DataType::StringArray => { | ||
| - let mut v = vec![0; num_elements]; | ||
| - for v in &mut v { | ||
| - *v = try!(xdr.decode_u64()); | ||
| - } | ||
| - Ok(NvValue::Uint64Array(v)) | ||
| - } | ||
| - DataType::HrTime => Ok(NvValue::HrTime(try!(xdr.decode_i64()))), | ||
| - DataType::NvList => { | ||
| - let nv_list = try!(decode_nv_list_embedded(xdr)); | ||
| - Ok(NvValue::NvList(nv_list)) | ||
| - } | ||
| - DataType::NvListArray => { | ||
| - let mut v = Vec::with_capacity(num_elements); | ||
| - for _ in 0..num_elements { | ||
| - v.push(try!(decode_nv_list_embedded(xdr))); | ||
| - } | ||
| - Ok(NvValue::NvListArray(v)) | ||
| - } | ||
| - DataType::BooleanValue => Ok(NvValue::BooleanValue(try!(xdr.decode_bool()))), | ||
| - DataType::Int8 => Ok(NvValue::Int8(try!(xdr.decode_i8()))), | ||
| - DataType::Uint8 => Ok(NvValue::Uint8(try!(xdr.decode_u8()))), | ||
| - DataType::BooleanArray => { | ||
| - let mut v = vec![false; num_elements]; | ||
| - for v in &mut v { | ||
| - *v = try!(xdr.decode_bool()); | ||
| - } | ||
| - Ok(NvValue::BooleanArray(v)) | ||
| - } | ||
| - DataType::Int8Array => { | ||
| - let mut v = vec![0; num_elements]; | ||
| - for v in &mut v { | ||
| - *v = try!(xdr.decode_i8()); | ||
| - } | ||
| - Ok(NvValue::Int8Array(v)) | ||
| - } | ||
| - DataType::Uint8Array => { | ||
| - let mut v = vec![0; num_elements]; | ||
| - for v in &mut v { | ||
| - *v = try!(xdr.decode_u8()); | ||
| - } | ||
| - Ok(NvValue::Uint8Array(v)) | ||
| - } | ||
| - } | ||
| -} |
319
crates/zfs/spa.rs
| @@ -1,319 +0,0 @@ | ||
| -use std::cmp; | ||
| -use std::rc::Rc; | ||
| - | ||
| -use super::avl; | ||
| -use super::dmu_objset::ObjectSet; | ||
| -use super::dsl_pool; | ||
| -use super::metaslab::{self, MetaslabClass}; | ||
| -use super::nvpair::{NvList, NvValue}; | ||
| -use super::taskq::Taskq; | ||
| -use super::txg; | ||
| -use super::uberblock::Uberblock; | ||
| -use super::vdev; | ||
| -use super::zfs; | ||
| -use super::zio; | ||
| - | ||
| -pub enum ImportType { | ||
| - Existing, | ||
| - Assemble, | ||
| -} | ||
| - | ||
| -// Storage pool allocator | ||
| -pub struct Spa { | ||
| - name: String, // Pool name | ||
| - config: NvList, | ||
| - state: zfs::PoolState, | ||
| - load_state: zfs::SpaLoadState, | ||
| - zio_taskq: Vec<Vec<SpaTaskqs>>, | ||
| - // dsl_pool: DslPool, | ||
| - normal_class: Rc<MetaslabClass>, // normal data class | ||
| - log_class: Rc<MetaslabClass>, // intent log data class | ||
| - first_txg: u64, | ||
| - mos: ObjectSet, | ||
| - vdev_tree: vdev::Tree, | ||
| - root_vdev: vdev::TreeIndex, | ||
| - // ubsync: Uberblock, // Last synced uberblock | ||
| - // uberblock: Uberblock, // Current active uberblock | ||
| - did: u64, // if procp != p0, did of t1 | ||
| -} | ||
| - | ||
| -impl Spa { | ||
| - pub fn create(name: String, nvroot: &NvList) -> zfs::Result<Self> { | ||
| - let mut config = NvList::new(0); | ||
| - config.add("name".to_string(), NvValue::String(name.clone())); | ||
| - Self::new(name, config, vdev::AllocType::Add) | ||
| - } | ||
| - | ||
| - pub fn import(name: String, config: NvList) -> zfs::Result<Self> { | ||
| - let load_state = zfs::SpaLoadState::Import; | ||
| - | ||
| - // note that mos_config is true - we trust the user's config in this case | ||
| - let mut spa = try!(Self::load(name, config, load_state, ImportType::Existing, true)); | ||
| - | ||
| - spa.activate(); | ||
| - | ||
| - Ok(spa) | ||
| - } | ||
| - | ||
| - // pub fn open(&mut self) -> zfs::Result<()> { | ||
| - // let load_state = zfs::SpaLoadState::Open; | ||
| - // if self.state == zfs::PoolState::Uninitialized { | ||
| - // First time opening | ||
| - // self.activate(); | ||
| - // try!(self.load(load_state, ImportType::Existing, false)); | ||
| - // } | ||
| - // | ||
| - // Ok(()) | ||
| - // } | ||
| - | ||
| - fn new(name: String, config: NvList, vdev_alloc_type: vdev::AllocType) -> zfs::Result<Self> { | ||
| - let metaslab_ops = Rc::new(metaslab::MetaslabOps { alloc: metaslab::ff_alloc }); | ||
| - let normal_class = Rc::new(MetaslabClass::create(metaslab_ops.clone())); | ||
| - let log_class = Rc::new(MetaslabClass::create(metaslab_ops)); | ||
| - | ||
| - // Parse vdev tree | ||
| - let mut vdev_tree = vdev::Tree::new(); | ||
| - let root_vdev = { | ||
| - let nvroot: &NvList = try!(config.get("vdev_tree").ok_or(zfs::Error::Invalid)); | ||
| - try!(vdev_tree.parse(&normal_class, nvroot, None, vdev_alloc_type)) | ||
| - }; | ||
| - | ||
| - Ok(Spa { | ||
| - name: name, | ||
| - config: config, | ||
| - state: zfs::PoolState::Uninitialized, | ||
| - load_state: zfs::SpaLoadState::None, | ||
| - zio_taskq: Vec::new(), | ||
| - // dsl_pool: blah, | ||
| - normal_class: normal_class, | ||
| - log_class: log_class, | ||
| - first_txg: 0, | ||
| - mos: ObjectSet, | ||
| - vdev_tree: vdev_tree, | ||
| - root_vdev: root_vdev, | ||
| - did: 0, | ||
| - }) | ||
| - } | ||
| - | ||
| - fn load(name: String, | ||
| - config: NvList, | ||
| - load_state: zfs::SpaLoadState, | ||
| - import_type: ImportType, | ||
| - mos_config: bool) | ||
| - -> zfs::Result<Self> { | ||
| - let pool_guid = try!(config.get("pool_guid").ok_or(zfs::Error::Invalid)); | ||
| - | ||
| - let mut spa = try!(Self::load_impl(name, | ||
| - pool_guid, | ||
| - config, | ||
| - load_state, | ||
| - import_type, | ||
| - mos_config)); | ||
| - spa.load_state = zfs::SpaLoadState::None; | ||
| - | ||
| - Ok(spa) | ||
| - } | ||
| - | ||
| - /// mosconfig: Whether `config` came from on-disk MOS and so is trusted, or was user-made and so | ||
| - /// is untrusted. | ||
| - fn load_impl(name: String, | ||
| - pool_guid: u64, | ||
| - config: NvList, | ||
| - load_state: zfs::SpaLoadState, | ||
| - import_type: ImportType, | ||
| - mos_config: bool) | ||
| - -> zfs::Result<Self> { | ||
| - // Determine the vdev allocation type from import type | ||
| - let vdev_alloc_type = match import_type { | ||
| - ImportType::Existing => vdev::AllocType::Load, | ||
| - ImportType::Assemble => vdev::AllocType::Split, | ||
| - }; | ||
| - | ||
| - let mut spa = try!(Self::new(name, config, vdev_alloc_type)); | ||
| - spa.load_state = load_state; | ||
| - | ||
| - // Create "The Godfather" zio to hold all async IOs | ||
| - // spa.spa_async_zio_root = kmem_alloc(max_ncpus * sizeof (void *), KM_SLEEP); | ||
| - // for i in 0..max_ncpus { | ||
| - // spa.async_zio_root[i] = | ||
| - // Zio::root(spa, None, None, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); | ||
| - // } | ||
| - | ||
| - | ||
| - // TODO: Try to open all vdevs, loading each label in the process. | ||
| - | ||
| - // TODO | ||
| - // Find the best uberblock. | ||
| - // vdev_uberblock_load(rvd, ub, &label); | ||
| - | ||
| - // If we weren't able to find a single valid uberblock, return failure. | ||
| - // if ub.txg == 0 { | ||
| - // return spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO); | ||
| - // } | ||
| - | ||
| - | ||
| - // Initialize internal structures | ||
| - spa.state = zfs::PoolState::Active; | ||
| - // spa.ubsync = spa.uberblock; | ||
| - // spa.verify_min_txg = | ||
| - // if spa.extreme_rewind { | ||
| - // txg::TXG_INITIAL - 1 | ||
| - // } else { | ||
| - // spa.last_synced_txg() - txg::DEFER_SIZE - 1; | ||
| - // }; | ||
| - // spa.first_txg = | ||
| - // if spa.last_ubsync_txg { spa.last_ubsync_txg } else { spa.last_synced_txg() + 1 }; | ||
| - // spa.claim_max_txg = spa.first_txg; | ||
| - // spa.prev_software_version = ub.software_version; | ||
| - | ||
| - // spa.dsl_pool = try!(dsl_pool::DslPool::init(&mut spa, spa.first_txg)); | ||
| - // if error { return spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO); } | ||
| - // spa.meta_objset = spa.dsl_pool.meta_objset; | ||
| - | ||
| - // Load stuff for the top-level and leaf vdevs | ||
| - spa.vdev_tree.load(&mut spa.mos, spa.root_vdev); | ||
| - | ||
| - Ok(spa) | ||
| - } | ||
| - | ||
| - fn activate(&mut self) { | ||
| - // assert!(self.state == zfs::PoolState::Uninitialized); | ||
| - | ||
| - self.state = zfs::PoolState::Active; | ||
| - | ||
| - // TODO: maybe start the spa thread | ||
| - | ||
| - self.create_zio_taskqs(); | ||
| - | ||
| - self.did = 0; | ||
| - } | ||
| - | ||
| - // fn taskqs_init(&mut self, t: zio::Type, q: zio::TaskqType) { | ||
| - // const zio_taskq_info_t *ztip = &zio_taskqs[t][q]; | ||
| - // zti_modes mode = ztip.mode; | ||
| - // let value = ztip.value; | ||
| - // let count = ztip.count; | ||
| - // let ref tqs = self.zio_taskq[t][q]; | ||
| - // let flags = TASKQ_DYNAMIC; | ||
| - // let mut batch: bool = false; | ||
| - // | ||
| - // if mode == ZTI_MODE_NULL { | ||
| - // tqs.count = 0; | ||
| - // tqs.taskq = NULL; | ||
| - // return; | ||
| - // } | ||
| - // | ||
| - // assert!(count > 0); | ||
| - // | ||
| - // tqs.count = count; | ||
| - // tqs.taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP); | ||
| - // | ||
| - // match mode { | ||
| - // ZTI_MODE_FIXED => { | ||
| - // assert!(value >= 1); | ||
| - // value = cmp::max(value, 1); | ||
| - // }, | ||
| - // ZTI_MODE_BATCH => { | ||
| - // batch = true; | ||
| - // flags |= TASKQ_THREADS_CPU_PCT; | ||
| - // value = zio_taskq_batch_pct; | ||
| - // }, | ||
| - // _ => { | ||
| - // panic!("unrecognized mode for %s_%s taskq (%u:%u) in spa_activate()", | ||
| - // zio_type_name[t], zio_taskq_types[q], mode, value); | ||
| - // }, | ||
| - // } | ||
| - // | ||
| - // for i in 0..count { | ||
| - // taskq_t *tq; | ||
| - // char name[32]; | ||
| - // | ||
| - // if (count > 1) { | ||
| - // snprintf(name, sizeof (name), "%s_%s_%u", | ||
| - // zio_type_name[t], zio_taskq_types[q], i); | ||
| - // } else { | ||
| - // snprintf(name, sizeof (name), "%s_%s", | ||
| - // zio_type_name[t], zio_taskq_types[q]); | ||
| - // } | ||
| - // | ||
| - // if zio_taskq_sysdc && spa->spa_proc != &p0 { | ||
| - // if batch { | ||
| - // flags |= TASKQ_DC_BATCH; | ||
| - // } | ||
| - // | ||
| - // tq = taskq_create_sysdc(name, value, 50, INT_MAX, | ||
| - // spa->spa_proc, zio_taskq_basedc, flags); | ||
| - // } else { | ||
| - // pri_t pri = maxclsyspri; | ||
| - // The write issue taskq can be extremely CPU | ||
| - // intensive. Run it at slightly less important | ||
| - // priority than the other taskqs. Under Linux this | ||
| - // means incrementing the priority value on platforms | ||
| - // like illumos it should be decremented. | ||
| - // if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE) | ||
| - // pri += 1; | ||
| - // | ||
| - // tq = taskq_create_proc(name, value, pri, 50, | ||
| - // INT_MAX, spa->spa_proc, flags); | ||
| - // } | ||
| - // | ||
| - // tqs->taskq[i] = tq; | ||
| - // } | ||
| - // } | ||
| - | ||
| - fn create_zio_taskqs(&mut self) { | ||
| - for t in 0..zio::NUM_TYPES { | ||
| - for q in 0..zio::NUM_TASKQ_TYPES { | ||
| - // self.taskqs_init(t, q); | ||
| - } | ||
| - } | ||
| - } | ||
| - | ||
| - fn last_synced_txg(&self) -> u64 { | ||
| - // TODO | ||
| - // self.ubsync.ub_txg | ||
| - 0 | ||
| - } | ||
| - | ||
| - fn first_txg(&self) -> u64 { | ||
| - self.first_txg | ||
| - } | ||
| -} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| - | ||
| -struct ZioTaskqInfo { | ||
| - // mode: zti_modes_t, | ||
| - value: usize, | ||
| - count: usize, | ||
| -} | ||
| - | ||
| -struct SpaTaskqs { | ||
| - count: usize, | ||
| - taskq: Vec<Vec<Taskq>>, | ||
| -} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| - | ||
| -pub struct SpaNamespace { | ||
| - // TODO: Use &str instead of String as key type. Lifetimes are hard. | ||
| - avl: avl::Tree<Spa, String>, // AVL tree of Spa sorted by name | ||
| -} | ||
| - | ||
| -impl SpaNamespace { | ||
| - pub fn new() -> Self { | ||
| - SpaNamespace { avl: avl::Tree::new(Rc::new(|x| x.name.clone())) } | ||
| - } | ||
| - | ||
| - pub fn add(&mut self, spa: Spa) { | ||
| - self.avl.insert(spa); | ||
| - } | ||
| - | ||
| - pub fn find(&self, name: String) -> Option<&Spa> { | ||
| - self.avl.find(name) | ||
| - } | ||
| - | ||
| - pub fn find_mut(&mut self, name: String) -> Option<&mut Spa> { | ||
| - self.avl.find_mut(name) | ||
| - } | ||
| -} |
207
crates/zfs/space_map.rs
| @@ -1,207 +0,0 @@ | ||
| -use std::{fmt, mem}; | ||
| - | ||
| -use super::avl; | ||
| -use super::dmu_objset::ObjectSet; | ||
| -use super::from_bytes::FromBytes; | ||
| -use super::zfs; | ||
| - | ||
| -const SPACE_MAP_HISTOGRAM_SIZE: usize = 32; | ||
| - | ||
| -/// The `SpaceMapPhys` is the on-disk representation of the space map. | ||
| -/// Consumers of space maps should never reference any of the members of this | ||
| -/// structure directly. These members may only be updated in syncing context. | ||
| -/// | ||
| -/// Note the smp_object is no longer used but remains in the structure | ||
| -/// for backward compatibility. | ||
| -/// | ||
| -/// The smp_histogram maintains a histogram of free regions. Each | ||
| -/// bucket, smp_histogram[i], contains the number of free regions | ||
| -/// whose size is: | ||
| -/// 2^(i+sm_shift) <= size of free region in bytes < 2^(i+sm_shift+1) | ||
| -#[derive(Debug)] | ||
| -pub struct SpaceMapPhys { | ||
| - object: u64, // on-disk space map object | ||
| - objsize: u64, // size of the object | ||
| - alloc: u64, /* space allocated from the map | ||
| - * pad: [u64; 5], // reserved | ||
| - * histogram: [u64; SPACE_MAP_HISTOGRAM_SIZE], */ | ||
| -} | ||
| - | ||
| -impl FromBytes for SpaceMapPhys {} | ||
| - | ||
| -pub struct SpaceMap { | ||
| - start: u64, // start of map | ||
| - size: u64, // size of map | ||
| - shift: u8, // unit shift | ||
| - length: u64, // synced length | ||
| - alloc: u64, // synced space allocated | ||
| - // os: *ObjectSet, // objset for this map | ||
| - object: u64, // object id for this map | ||
| - blksz: u32, // block size for space map | ||
| - // dbuf: *dmu_dbuf_t, // space_map_phys_t dbuf | ||
| - phys: SpaceMapPhys, // on-disk space map | ||
| -} | ||
| - | ||
| -impl SpaceMap { | ||
| - /// Returns SpaceMapPhys, Dbuf, and block size | ||
| - // TODO | ||
| - // fn open_impl(os: &mut ObjectSet, object: u64) -> zfs::Result<(SpaceMapPhys, dmu::Dbuf, u64)> { | ||
| - // let dbuf = try!(dmu_bonus_hold(os, object, sm)); | ||
| - // | ||
| - // let (block_size, num_blocks) = dmu_object_size_from_db(dbuf); | ||
| - // let phys = SpaceMapPhys::from_bytes(dbuf.data); | ||
| - // | ||
| - // Ok((phys, dbuf, block_size)) | ||
| - // } | ||
| - | ||
| - | ||
| - pub fn open(os: &mut ObjectSet, | ||
| - object: u64, | ||
| - start: u64, | ||
| - size: u64, | ||
| - shift: u8) | ||
| - -> zfs::Result<Self> { | ||
| - assert!(object != 0); | ||
| - | ||
| - // TODO | ||
| - // let (phys, dbuf, block_size) = try!(Self::open_impl(os, object)); | ||
| - let phys = SpaceMapPhys { | ||
| - object: 0, // on-disk space map object | ||
| - objsize: 0, // size of the object | ||
| - alloc: 0, // space allocated from the map | ||
| - }; | ||
| - let block_size = 0; | ||
| - | ||
| - let mut space_map = SpaceMap { | ||
| - start: start, | ||
| - size: size, | ||
| - shift: shift, | ||
| - // os: os, | ||
| - object: object, | ||
| - length: 0, | ||
| - alloc: 0, | ||
| - blksz: block_size, | ||
| - // dbuf: dbuf, | ||
| - phys: phys, | ||
| - }; | ||
| - | ||
| - Ok(space_map) | ||
| - } | ||
| - | ||
| - pub fn load_avl(&self, | ||
| - tree: &mut avl::Tree<Segment, u64>, | ||
| - bytes: &[u8], | ||
| - map_type: MapType) | ||
| - -> Result<(), String> { | ||
| - for i in 0..(self.size as usize) { | ||
| - let entry = Entry::from_bytes(&bytes[i * mem::size_of::<Entry>()..]).unwrap(); | ||
| - let entry_map_type = match entry.map_type() { | ||
| - Some(map_type) => map_type, | ||
| - None => { | ||
| - return Err("Invalid map type".to_string()); | ||
| - } | ||
| - }; | ||
| - if entry.debug() != 1 && entry_map_type == map_type { | ||
| - // it's not a debug entry and it's the right map type, add it to the tree | ||
| - tree.insert(Segment::from_entry(&entry)); | ||
| - } | ||
| - } | ||
| - tree.in_order(|node| { | ||
| - println!("{:?}", node.value()); | ||
| - }); | ||
| - | ||
| - Ok(()) | ||
| - } | ||
| -} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| -#[derive(Copy, Clone, Debug, PartialEq)] | ||
| -pub enum MapType { | ||
| - Alloc = 0, | ||
| - Free = 1, | ||
| -} | ||
| - | ||
| -impl MapType { | ||
| - pub fn from_u64(u: u64) -> Option<Self> { | ||
| - match u { | ||
| - 0 => Some(MapType::Alloc), | ||
| - 1 => Some(MapType::Free), | ||
| - _ => None, | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -#[derive(Copy, Clone)] | ||
| -pub struct Entry(u64); | ||
| - | ||
| -impl FromBytes for Entry {} | ||
| - | ||
| -impl Entry { | ||
| - pub fn debug(&self) -> u64 { | ||
| - (self.0 >> 63) & 0x1 // 1 bit long | ||
| - } | ||
| - | ||
| - // Non-debug entries | ||
| - | ||
| - pub fn size(&self) -> u64 { | ||
| - self.0 & 0x7FFF // 15 bits long | ||
| - } | ||
| - | ||
| - pub fn map_type(&self) -> Option<MapType> { | ||
| - MapType::from_u64((self.0 >> 15) & 0x1) // 1 bit long | ||
| - } | ||
| - | ||
| - pub fn offset(&self) -> u64 { | ||
| - (self.0 >> 16) & 0x7FFFFFFFFFFF // 47 bytes long | ||
| - } | ||
| - | ||
| - // Debug entries | ||
| - | ||
| - pub fn action(&self) -> u64 { | ||
| - (self.0 >> 60) & 0x7 // 3 bits long | ||
| - } | ||
| - | ||
| - pub fn sync_pass(&self) -> u64 { | ||
| - (self.0 >> 50) & 0x3FF // 10 bits long | ||
| - } | ||
| - | ||
| - pub fn txg(&self) -> u64 { | ||
| - self.0 & 0x3FFFFFFFFFFFF // 50 bytes long | ||
| - } | ||
| -} | ||
| - | ||
| -impl fmt::Debug for Entry { | ||
| - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
| - if self.debug() == 1 { | ||
| - try!(write!(f, | ||
| - "DEBUG: action:0x{:X} sync_pass:{:X} txg:0x{:X}", | ||
| - self.action(), | ||
| - self.sync_pass(), | ||
| - self.txg())); | ||
| - } else { | ||
| - try!(write!(f, | ||
| - "ENTRY: size:0x{:X} map_type:{:?} offset:0x{:X}", | ||
| - self.size(), | ||
| - self.map_type(), | ||
| - self.offset())); | ||
| - } | ||
| - Ok(()) | ||
| - } | ||
| -} | ||
| - | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| -#[derive(Debug)] | ||
| -pub struct Segment { | ||
| - pub start: u64, | ||
| - pub size: u64, | ||
| -} | ||
| - | ||
| -impl Segment { | ||
| - fn from_entry(entry: &Entry) -> Self { | ||
| - Segment { | ||
| - start: entry.offset(), | ||
| - size: entry.size(), | ||
| - } | ||
| - } | ||
| -} |
371
crates/zfs/taskq.rs
| @@ -1,371 +0,0 @@ | ||
| -use std::cmp; | ||
| -// use std::collections::VecDeque; | ||
| -// use std::sync::mpsc::{channel, Sender, Receiver}; | ||
| -use std::thread; | ||
| - | ||
| -use super::zfs; | ||
| - | ||
| -const TQENT_FLAG_PREALLOC: u64 = 0x1; // taskq_dispatch_ent used | ||
| - | ||
| -const TASKQ_PREPOPULATE: u64 = 0x0001; | ||
| -const TASKQ_CPR_SAFE: u64 = 0x0002; // Use CPR safe protocol | ||
| -const TASKQ_DYNAMIC: u64 = 0x0004; // Use dynamic thread scheduling | ||
| -const TASKQ_THREADS_CPU_PCT: u64 = 0x0008; // Scale # threads by # cpus | ||
| -const TASKQ_DC_BATCH: u64 = 0x0010; // Mark threads as batch | ||
| - | ||
| -// const TQ_SLEEP: u64 = KM_SLEEP; // Can block for memory | ||
| -// const TQ_NOSLEEP: u64 = KM_NOSLEEP; // Cannot block for memory; may fail | ||
| -const TQ_NOQUEUE: u64 = 0x02; // Do not enqueue if can't dispatch | ||
| -const TQ_FRONT: u64 = 0x08; // Queue in front | ||
| - | ||
| -const TASKQ_ACTIVE: u64 = 0x00010000; | ||
| - | ||
| -pub type TaskFn = Box<FnMut()>; | ||
| - | ||
| -pub struct Taskq { | ||
| - name: String, | ||
| - // kmutex_t lock, | ||
| - // krwlock_t threadlock, | ||
| - // kcondvar_t dispatch_cv, | ||
| - // kcondvar_t wait_cv,*/ | ||
| - // threads: Vec<Sender<Task>>, | ||
| - flags: u64, | ||
| - active: u16, | ||
| - num_threads: u16, | ||
| - num_alloc: u64, | ||
| - min_alloc: u64, | ||
| - max_alloc: u64, | ||
| - next_task_id: usize, | ||
| - // kcondvar_t max_alloc_cv, | ||
| - max_alloc_wait: i64, /* taskq_ent_t *freelist, | ||
| - * task_queue: VecDeque<Task>, */ | ||
| -} | ||
| - | ||
| -impl Taskq { | ||
| - pub fn new(name: String, | ||
| - mut num_threads: u16, | ||
| - min_alloc: u64, | ||
| - max_alloc: u64, | ||
| - flags: u64) | ||
| - -> Self { | ||
| - // taskq_t *tq = kmem_zalloc(sizeof (taskq_t), KM_SLEEP); | ||
| - | ||
| - // if flags & TASKQ_THREADS_CPU_PCT != 0 { | ||
| - // int pct; | ||
| - // assert!(num_threads >= 0); | ||
| - // assert!(num_threads <= 100); | ||
| - // pct = cmp::min(num_threads, 100); | ||
| - // pct = cmp::max(pct, 0); | ||
| - // | ||
| - // num_threads = (sysconf(_SC_NPROCESSORS_ONLN) * pct) / 100; | ||
| - // num_threads = cmp::max(num_threads, 1); /* need at least 1 thread */ | ||
| - // } else { | ||
| - // assert!(num_threads >= 1); | ||
| - // } | ||
| - | ||
| - // rw_init(&tq.threadlock, NULL, RW_DEFAULT, NULL); | ||
| - // mutex_init(&tq.lock, NULL, MUTEX_DEFAULT, NULL); | ||
| - // cv_init(&tq.dispatch_cv, NULL, CV_DEFAULT, NULL); | ||
| - // cv_init(&tq.wait_cv, NULL, CV_DEFAULT, NULL); | ||
| - // cv_init(&tq.max_alloc_cv, NULL, CV_DEFAULT, NULL); | ||
| - // tq.task.next: &tq.task; | ||
| - // tq.task.prev: &tq.task; | ||
| - | ||
| - // if flags & TASKQ_PREPOPULATE != 0 { | ||
| - // mutex_enter(&tq.lock); | ||
| - // while (min_alloc-- > 0) | ||
| - // task_free(tq, task_alloc(tq, KM_SLEEP)); | ||
| - // mutex_exit(&tq.lock); | ||
| - // } | ||
| - | ||
| - // let mut threads = Vec::new(); | ||
| - // for _ in 0..num_threads { | ||
| - // let (task_t, task_r) = channel(); | ||
| - // threads.push(task_t); | ||
| - // thread::spawn(|| { taskq_thread(task_r) }); | ||
| - // tq.thread_list[t] = thread_create(NULL, 0, taskq_thread, tq, TS_RUN, NULL, 0, pri); | ||
| - // VERIFIY(tq.thread_list[t]); | ||
| - // } | ||
| - | ||
| - Taskq { | ||
| - name: name, | ||
| - // threads: threads, | ||
| - flags: flags | TASKQ_ACTIVE, | ||
| - active: num_threads, | ||
| - num_threads: num_threads, | ||
| - num_alloc: 0, | ||
| - min_alloc: min_alloc, | ||
| - max_alloc: max_alloc, | ||
| - next_task_id: 0, | ||
| - max_alloc_wait: 0, // task_queue: VecDeque::new(), | ||
| - } | ||
| - } | ||
| - | ||
| - // fn alloc_task(&mut self, tqflags: u64) -> Self { | ||
| - // taskq_ent_t *t; | ||
| - // | ||
| - // loop { | ||
| - // if (t = self.freelist) != NULL && self.num_alloc >= self.min_alloc { | ||
| - // There's a free Task in the free_list | ||
| - // assert!(t.flags & TQENT_FLAG_PREALLOC == 0); | ||
| - // self.freelist = t.next; | ||
| - // } else { | ||
| - // if (self.num_alloc >= self.max_alloc) { | ||
| - // if tqflags & KM_SLEEP == 0 { | ||
| - // return NULL; | ||
| - // } | ||
| - // | ||
| - // We don't want to exceed max_alloc, but we can't | ||
| - // wait for other tasks to complete (and thus free up | ||
| - // task structures) without risking deadlock with | ||
| - // the caller. So, we just delay for one second | ||
| - // to throttle the allocation rate. If we have tasks | ||
| - // complete before one second timeout expires then | ||
| - // taskq_ent_free will signal us and we will | ||
| - // immediately retry the allocation. | ||
| - // self.max_alloc_wait += 1; | ||
| - // let rv = cv_timedwait(&self.max_alloc_cv, &self.lock, ddi_get_lbolt() + hz); | ||
| - // self.max_alloc_wait -= 1; | ||
| - // if rv > 0 { | ||
| - // continue; | ||
| - // } | ||
| - // } | ||
| - // mutex_exit(&self.lock); | ||
| - // | ||
| - // t = kmem_alloc(sizeof (taskq_ent_t), tqflags); | ||
| - // | ||
| - // mutex_enter(&self.lock); | ||
| - // if t != NULL { | ||
| - // Make sure we start without any flags | ||
| - // t.flags = 0; | ||
| - // self.num_alloc++; | ||
| - // } | ||
| - // } | ||
| - // | ||
| - // break; | ||
| - // } | ||
| - // return t; | ||
| - // } | ||
| - | ||
| - // fn task_free(taskq_t *tq, taskq_ent_t *t) { | ||
| - // if (tq->tq_nalloc <= tq->tq_min_alloc) { | ||
| - // t->tqent_next = tq->tq_freelist; | ||
| - // tq->tq_freelist = t; | ||
| - // } else { | ||
| - // tq->tq_nalloc--; | ||
| - // mutex_exit(&tq->tq_lock); | ||
| - // kmem_free(t, sizeof (taskq_ent_t)); | ||
| - // mutex_enter(&tq->tq_lock); | ||
| - // } | ||
| - // | ||
| - // if (tq->tq_max_alloc_wait) { | ||
| - // cv_signal(&tq->tq_max_alloc_cv); | ||
| - // } | ||
| - // } | ||
| - | ||
| - fn taskq_dispatch(&mut self, func: TaskFn, flags: u64) -> TaskId { | ||
| - // self.threads[0].send(Task { func: func, flags: flags }); | ||
| - let index = self.next_task_id; | ||
| - self.next_task_id += 1; | ||
| - TaskId(index) | ||
| - } | ||
| - | ||
| - // fn taskq_dispatch(&mut self, func: TaskFn, flags: u64) -> TaskId { | ||
| - // taskq_ent_t *t; | ||
| - // | ||
| - // if taskq_now { | ||
| - // func(arg); | ||
| - // return 1; | ||
| - // } | ||
| - // | ||
| - // mutex_enter(&self.lock); | ||
| - // assert!(self.flags & TASKQ_ACTIVE); | ||
| - // if (t = self.alloc_task(tqflags)) == NULL { | ||
| - // mutex_exit(&self.lock); | ||
| - // return 0; | ||
| - // } | ||
| - // if tqflags & TQ_FRONT != 0 { | ||
| - // t.next = self.task.next; | ||
| - // t.prev = &self.task; | ||
| - // } else { | ||
| - // t.next = &self.task; | ||
| - // t.prev = self.task.prev; | ||
| - // } | ||
| - // t.next.prev = t; | ||
| - // t.prev.next = t; | ||
| - // t.func = func; | ||
| - // t.flags = 0; | ||
| - // cv_signal(&self.dispatch_cv); | ||
| - // mutex_exit(&self.lock); | ||
| - // return 1; | ||
| - // } | ||
| - // | ||
| - // taskqid_t | ||
| - // taskq_dispatch_delay(taskq_t *tq, task_func_t func, uint_t tqflags, | ||
| - // clock_t expire_time) | ||
| - // { | ||
| - // return 0; | ||
| - // } | ||
| - | ||
| - // pub fn empty_ent(&self) -> bool { | ||
| - // self.next == NULL | ||
| - // } | ||
| - | ||
| - // fn taskq_init_ent(taskq_ent_t *t) { | ||
| - // t.next = NULL; | ||
| - // t.prev = NULL; | ||
| - // t.func = NULL; | ||
| - // t.flags = 0; | ||
| - // } | ||
| - | ||
| - // fn taskq_dispatch_ent(taskq_t *tq, task_func_t func, uint_t flags, taskq_ent_t *t) { | ||
| - // assert!(func != NULL); | ||
| - // | ||
| - // Mark it as a prealloc'd task. This is important | ||
| - // to ensure that we don't free it later. | ||
| - // t.flags |= TQENT_FLAG_PREALLOC; | ||
| - // Enqueue the task to the underlying queue. | ||
| - // mutex_enter(&tq.lock); | ||
| - // | ||
| - // if (flags & TQ_FRONT) { | ||
| - // t.next = tq.task.next; | ||
| - // t.prev = &tq.task; | ||
| - // } else { | ||
| - // t.next = &tq.task; | ||
| - // t.prev = tq.task.prev; | ||
| - // } | ||
| - // t.next.prev = t; | ||
| - // t.prev.next = t; | ||
| - // t.func = func; | ||
| - // cv_signal(&tq.dispatch_cv); | ||
| - // mutex_exit(&tq.lock); | ||
| - // } | ||
| - | ||
| - // fn wait(&self) { | ||
| - // mutex_enter(&tq.lock); | ||
| - // while tq.task.next != &tq.task || tq.active > 0 { | ||
| - // cv_wait(&tq.wait_cv, &tq.lock); | ||
| - // } | ||
| - // mutex_exit(&tq.lock); | ||
| - // } | ||
| - // | ||
| - // fn wait_id(&self, id: TaskId) { | ||
| - // self.wait(); | ||
| - // } | ||
| - // | ||
| - // fn wait_outstanding(&self, id: TaskId) { | ||
| - // self.wait(); | ||
| - // } | ||
| - // | ||
| - // fn destroy(&mut self) { | ||
| - // int num_threads = tq->tq_num_threads; | ||
| - // | ||
| - // taskq_wait(tq); | ||
| - // | ||
| - // mutex_enter(&tq->tq_lock); | ||
| - // | ||
| - // tq->tq_flags &= ~TASKQ_ACTIVE; | ||
| - // cv_broadcast(&tq->tq_dispatch_cv); | ||
| - // | ||
| - // while tq->tq_num_threads > 0 { | ||
| - // cv_wait(&tq->tq_wait_cv, &tq->tq_lock); | ||
| - // } | ||
| - // | ||
| - // tq.min_alloc = 0; | ||
| - // while (tq.num_alloc != 0) { | ||
| - // ASSERT(tq->tq_freelist != NULL); | ||
| - // task_free(tq, task_alloc(tq, KM_SLEEP)); | ||
| - // } | ||
| - // | ||
| - // mutex_exit(&tq->tq_lock); | ||
| - // | ||
| - // kmem_free(tq->tq_thread_list, num_threads * sizeof (kthread_t *)); | ||
| - // | ||
| - // rw_destroy(&tq->tq_threadlock); | ||
| - // mutex_destroy(&tq->tq_lock); | ||
| - // cv_destroy(&tq->tq_dispatch_cv); | ||
| - // cv_destroy(&tq->tq_wait_cv); | ||
| - // cv_destroy(&tq->tq_max_alloc_cv); | ||
| - // | ||
| - // kmem_free(tq, sizeof (taskq_t)); | ||
| - // } | ||
| - // | ||
| - // pub fn member(&self, thread_id: ThreadId) -> bool { | ||
| - // for i in 0..self.num_threads { | ||
| - // if self.thread_list[i] == t { | ||
| - // return true; | ||
| - // } | ||
| - // } | ||
| - // | ||
| - // false | ||
| - // } | ||
| - | ||
| - pub fn cancel_id(&mut self, id: TaskId) -> zfs::Result<()> { | ||
| - Err(zfs::Error::NoEntity) | ||
| - } | ||
| -} | ||
| - | ||
| -// fn system_taskq_init() { | ||
| -// system_taskq = taskq_create("system_taskq", 64, maxclsyspri, 4, 512, | ||
| -// TASKQ_DYNAMIC | TASKQ_PREPOPULATE); | ||
| -// } | ||
| -// | ||
| -// fn system_taskq_fini() { | ||
| -// taskq_destroy(system_taskq); | ||
| -// system_taskq = NULL; // defensive | ||
| -// } | ||
| - | ||
| -//-------------------------------------------------------------------------------------------------// | ||
| - | ||
| -pub struct TaskId(usize); | ||
| - | ||
| -struct Task { | ||
| - // taskq_ent *next; | ||
| - // taskq_ent *prev; | ||
| - func: Box<FnMut()>, | ||
| - flags: u64, | ||
| -} | ||
| - | ||
| -//-------------------------------------------------------------------------------------------------// | ||
| - | ||
| -// fn taskq_thread(task_r: Receiver<Task>) { | ||
| -// while let Ok(task) = task_r.recv() { | ||
| -// (task.func)(); | ||
| -// } | ||
| -// } | ||
| - | ||
| -// fn taskq_thread(task_r: Receiver<Task>) { | ||
| -// taskq_t *tq = arg; | ||
| -// taskq_ent_t *t; | ||
| -// | ||
| -// mutex_enter(&tq.lock); | ||
| -// while tq.flags & TASKQ_ACTIVE != 0 { | ||
| -// if (t = tq.task.next) == &tq.task { | ||
| -// tq.active -= 1; | ||
| -// if tq.active == 0 { | ||
| -// cv_broadcast(&tq.wait_cv); | ||
| -// } | ||
| -// cv_wait(&tq.dispatch_cv, &tq.lock); | ||
| -// tq.active++; | ||
| -// continue; | ||
| -// } | ||
| -// t.prev.next = t.next; | ||
| -// t.next.prev = t.prev; | ||
| -// t.next = NULL; | ||
| -// t.prev = NULL; | ||
| -// mutex_exit(&tq.lock); | ||
| -// | ||
| -// rw_enter(&tq.threadlock, RW_READER); | ||
| -// t.func(t.arg); | ||
| -// rw_exit(&tq.threadlock); | ||
| -// | ||
| -// mutex_enter(&tq.lock); | ||
| -// if !t.flags & TQENT_FLAG_PREALLOC != 0 { | ||
| -// task_free(tq, t); | ||
| -// } | ||
| -// } | ||
| -// tq.num_threads--; | ||
| -// cv_broadcast(&tq.wait_cv); | ||
| -// mutex_exit(&tq.lock); | ||
| -// thread_exit(); | ||
| -// } |
5
crates/zfs/txg.rs
| @@ -1,5 +0,0 @@ | ||
| -pub const DEFER_SIZE: usize = 2; | ||
| - | ||
| -pub const TXG_SIZE: usize = 4; | ||
| - | ||
| -pub const TXG_INITIAL: usize = TXG_SIZE; |
47
crates/zfs/uberblock.rs
| @@ -1,47 +0,0 @@ | ||
| -use std::{mem, ptr}; | ||
| - | ||
| -use super::from_bytes::FromBytes; | ||
| -use super::block_ptr::BlockPtr; | ||
| - | ||
| -const UBERBLOCK_MAGIC: u64 = 0x00bab10c; // oo-ba-bloc! | ||
| -pub const UBERBLOCK_SHIFT: u64 = 10; // up to 1K | ||
| - | ||
| -#[derive(Copy, Clone, Debug)] | ||
| -#[repr(packed)] | ||
| -pub struct Uberblock { | ||
| - pub magic: u64, | ||
| - pub version: u64, | ||
| - pub txg: u64, | ||
| - pub guid_sum: u64, | ||
| - pub timestamp: u64, | ||
| - pub rootbp: BlockPtr, | ||
| -} | ||
| - | ||
| -impl Uberblock { | ||
| - pub fn magic_little() -> u64 { | ||
| - return 0x0cb1ba00; | ||
| - } | ||
| - | ||
| - pub fn magic_big() -> u64 { | ||
| - return 0x00bab10c; | ||
| - } | ||
| -} | ||
| - | ||
| -impl FromBytes for Uberblock { | ||
| - fn from_bytes(data: &[u8]) -> Result<Self, String> { | ||
| - if data.len() >= mem::size_of::<Uberblock>() { | ||
| - let uberblock = unsafe { ptr::read(data.as_ptr() as *const Uberblock) }; | ||
| - if uberblock.magic == Uberblock::magic_little() { | ||
| - Ok(uberblock) | ||
| - } else if uberblock.magic == Uberblock::magic_big() { | ||
| - Ok(uberblock) | ||
| - } else { | ||
| - Err("Error: Invalid uberblock magic number".to_string()) | ||
| - } | ||
| - } else { | ||
| - Err(format!("Error: Need {} bytes to read uberblock, only {} in buffer", | ||
| - mem::size_of::<Uberblock>(), | ||
| - data.len())) | ||
| - } | ||
| - } | ||
| -} |
74
crates/zfs/util.rs
| @@ -1,74 +0,0 @@ | ||
| - | ||
| -// Compatibility macros/typedefs needed for Solaris -> Linux port | ||
| -pub fn p2_align(x: u64, align: u64) -> u64 { | ||
| - x & -(align as i64) as u64 | ||
| -} | ||
| - | ||
| -fn p2_cross(x: u64, y: u64, align: u64) -> bool { | ||
| - x ^ y > align - 1 | ||
| -} | ||
| - | ||
| -fn p2_round_up(x: u64, align: u64) -> u64 { | ||
| - ((x - 1) | (align - 1)) + 1 | ||
| -} | ||
| - | ||
| -fn p2_boundary(off: u64, len: u64, align: u64) -> bool { | ||
| - (off ^ (off + len - 1)) > (align - 1) | ||
| -} | ||
| - | ||
| -fn p2_phase(x: u64, align: u64) -> u64 { | ||
| - x & (align - 1) | ||
| -} | ||
| - | ||
| -fn p2_nphase(x: u64, align: u64) -> u64 { | ||
| - -(x as i64) as u64 & (align - 1) | ||
| -} | ||
| - | ||
| -fn p2_nphase_typed(x: u64, align: u64) -> u64 { | ||
| - -(x as i64) as u64 & (align - 1) | ||
| -} | ||
| - | ||
| -fn is_p2(x: u64) -> bool { | ||
| - x & (x - 1) == 0 | ||
| -} | ||
| - | ||
| -fn is_p2_aligned(v: u64, a: u64) -> bool { | ||
| - v & (a - 1) == 0 | ||
| -} | ||
| - | ||
| -pub fn highbit64(u: u64) -> u32 { | ||
| - 63 - u.leading_zeros() | ||
| -} | ||
| - | ||
| -// Typed version of the P2* macros. These macros should be used to ensure | ||
| -// that the result is correctly calculated based on the data type of (x), | ||
| -// which is passed in as the last argument, regardless of the data | ||
| -// type of the alignment. For example, if (x) is of type uint64_t, | ||
| -// and we want to round it up to a page boundary using "PAGESIZE" as | ||
| -// the alignment, we can do either | ||
| -// P2ROUNDUP(x, (uint64_t)PAGESIZE) | ||
| -// or | ||
| -// P2ROUNDUP_TYPED(x, PAGESIZE, uint64_t) | ||
| -// | ||
| -// #define P2ALIGN_TYPED(x, align, type) \ | ||
| -// ((type)(x) & -(type)(align)) | ||
| -// #define P2PHASE_TYPED(x, align, type) \ | ||
| -// ((type)(x) & ((type)(align) - 1)) | ||
| -// #define P2NPHASE_TYPED(x, align, type) \ | ||
| -// (-(type)(x) & ((type)(align) - 1)) | ||
| -// #define P2ROUNDUP_TYPED(x, align, type) \ | ||
| -// ((((type)(x) - 1) | ((type)(align) - 1)) + 1) | ||
| -// #define P2END_TYPED(x, align, type) \ | ||
| -// (-(~(type)(x) & -(type)(align))) | ||
| -// #define P2PHASEUP_TYPED(x, align, phase, type) \ | ||
| -// ((type)(phase) - (((type)(phase) - (type)(x)) & -(type)(align))) | ||
| -// #define P2CROSS_TYPED(x, y, align, type) \ | ||
| -// (((type)(x) ^ (type)(y)) > (type)(align) - 1) | ||
| -// #define P2SAMEHIGHBIT_TYPED(x, y, type) \ | ||
| -// (((type)(x) ^ (type)(y)) < ((type)(x) & (type)(y))) | ||
| -// | ||
| -// | ||
| -// avoid any possibility of clashing with <stddef.h> version | ||
| -// #if defined(_KERNEL) && !defined(_KMEMUSER) && !defined(offsetof) | ||
| -// #define offsetof(s, m) ((size_t)(&(((s *)0)->m))) | ||
| -// #endif |
506
crates/zfs/vdev.rs
| @@ -1,506 +0,0 @@ | ||
| -use std::{cmp, mem}; | ||
| -use std::rc::Rc; | ||
| - | ||
| -use super::dmu_objset::ObjectSet; | ||
| -use super::from_bytes::FromBytes; | ||
| -use super::metaslab::{Metaslab, MetaslabClass, MetaslabGroup}; | ||
| -use super::nvpair::{NvList, NvValue}; | ||
| -use super::uberblock; | ||
| -use super::util; | ||
| -use super::vdev_file::VdevFile; | ||
| -use super::zfs; | ||
| - | ||
| -#[repr(packed)] | ||
| -pub struct VdevLabel { | ||
| - pub blank: [u8; 8 * 1024], | ||
| - pub boot_header: [u8; 8 * 1024], | ||
| - pub nv_pairs: [u8; 112 * 1024], | ||
| - pub uberblocks: [u8; 128 * 1024], | ||
| -} | ||
| - | ||
| -impl FromBytes for VdevLabel {} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| - | ||
| -pub trait IVdevOps { | ||
| - /// Returns (size, max_size, ashift) | ||
| - fn open(&mut self, vdev: &mut Vdev) -> zfs::Result<(u64, u64, u64)>; | ||
| - | ||
| - fn close(&mut self, vdev: &mut Vdev); | ||
| - | ||
| - /// Default asize function: return the MAX of psize with the asize of all children. This is | ||
| - /// what's used by anything other than RAID-Z. | ||
| - fn asize(&mut self, vdev: &mut Vdev, psize: u64) -> u64; | ||
| - | ||
| - fn hold(&mut self, vdev: &mut Vdev); | ||
| - | ||
| - fn release(&mut self, vdev: &mut Vdev); | ||
| -} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| - | ||
| -pub struct VdevOps { | ||
| - pub ops: Box<IVdevOps>, | ||
| - // io_start: fn(&zio::Zio), | ||
| - // io_done: fn(&zio::Zio), | ||
| - // state_change: fn(), | ||
| - vdev_type: String, | ||
| - is_leaf: bool, | ||
| -} | ||
| - | ||
| -impl VdevOps { | ||
| - pub fn vdev_type(&self) -> &str { | ||
| - self.vdev_type.as_ref() | ||
| - } | ||
| - pub fn is_leaf(&self) -> bool { | ||
| - self.is_leaf | ||
| - } | ||
| -} | ||
| - | ||
| -fn load_ops(vdev_type: &str, nv: &NvList) -> zfs::Result<VdevOps> { | ||
| - match vdev_type { | ||
| - "disk" => { | ||
| - Ok(VdevOps { | ||
| - ops: Box::new(try!(VdevFile::load(nv))), | ||
| - vdev_type: "disk".to_string(), | ||
| - is_leaf: true, | ||
| - }) | ||
| - } | ||
| - _ => Err(zfs::Error::Invalid), | ||
| - } | ||
| -} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| -#[derive(Copy, Clone, Debug, PartialEq)] | ||
| -pub enum AllocType { | ||
| - Load = 0, | ||
| - Add, | ||
| - Spare, | ||
| - L2Cache, | ||
| - RootPool, | ||
| - Split, | ||
| - Attach, | ||
| -} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| - | ||
| -/// States are ordered from least to most healthy. | ||
| -/// Vdevs `CannotOpen` and worse are considered unusable. | ||
| -#[derive(Copy, Clone, Debug, PartialEq)] | ||
| -pub enum State { | ||
| - Unknown, // Uninitialized vdev | ||
| - Closed, // Not currently open | ||
| - Offline, // Not allowed to open | ||
| - Removed, // Explicitly removed from the system | ||
| - CannotOpen, // Tried top open, but failed | ||
| - Faulted, // External request to fault device | ||
| - Degraded, // Replicated vdev with unhealthy kids | ||
| - Healthy, // Presumed good | ||
| -} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| - | ||
| -// Stuff that only top level vdevs have | ||
| -pub struct Top { | ||
| - pub ms_array: u64, // object ID of metaslab array in MOS | ||
| - pub ms_shift: u64, // metaslab shift | ||
| - pub ms_group: MetaslabGroup, // metaslab group | ||
| - pub metaslabs: Vec<Metaslab>, // in-memory metaslab array | ||
| - pub is_hole: bool, | ||
| - pub removing: bool, // device is being removed? | ||
| -} | ||
| - | ||
| -impl Top { | ||
| - pub fn new(ms_array: u64, ms_shift: u64, ms_group: MetaslabGroup) -> Self { | ||
| - Top { | ||
| - ms_array: ms_array, | ||
| - ms_shift: ms_shift, | ||
| - ms_group: ms_group, | ||
| - metaslabs: vec![], | ||
| - is_hole: false, // TODO: zol checks vdev_ops for this, but idk what to do yet | ||
| - removing: false, | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| - | ||
| -pub struct Leaf { | ||
| - whole_disk: u64, | ||
| -} | ||
| - | ||
| -impl Leaf { | ||
| - pub fn new() -> Self { | ||
| - Leaf { whole_disk: 0 } | ||
| - } | ||
| -} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| - | ||
| -// Note that a vdev can be a top-level, a leaf, both, or neither | ||
| -pub struct Vdev { | ||
| - id: u64, // child number in vdev parent | ||
| - guid: u64, // unique ID for this vdev | ||
| - guid_sum: u64, // self guid + all child guids | ||
| - orig_guid: u64, // orig. guid prior to remove | ||
| - asize: u64, // allocatable device capacity | ||
| - min_asize: u64, // min acceptable asize | ||
| - max_asize: u64, // max acceptable asize | ||
| - pub ashift: u64, // block alignment shift | ||
| - state: State, | ||
| - prev_state: State, | ||
| - pub ops: VdevOps, | ||
| - parent: Option<TreeIndex>, | ||
| - top_vdev: Option<TreeIndex>, | ||
| - children: Vec<TreeIndex>, | ||
| - create_txg: u64, // txg when top-level was added | ||
| - | ||
| - pub top: Option<Top>, | ||
| - pub leaf: Option<Leaf>, | ||
| -} | ||
| - | ||
| -impl Vdev { | ||
| - pub fn new(id: u64, | ||
| - guid: Option<u64>, | ||
| - ashift: u64, | ||
| - ops: VdevOps, | ||
| - create_txg: u64, | ||
| - vdev_top: Option<Top>) | ||
| - -> Self { | ||
| - let guid = guid.unwrap_or_else(|| { | ||
| - // TODO: generate a guid | ||
| - 0 | ||
| - }); | ||
| - | ||
| - // TODO vdev_queue_init | ||
| - | ||
| - Vdev { | ||
| - id: id, | ||
| - guid: guid, | ||
| - guid_sum: guid, // No children yet, so guid_sum is just my guid | ||
| - orig_guid: 0, | ||
| - asize: 0, | ||
| - min_asize: 0, | ||
| - max_asize: 0, | ||
| - ashift: ashift, | ||
| - state: State::Closed, | ||
| - prev_state: State::Unknown, | ||
| - ops: ops, | ||
| - parent: None, | ||
| - top_vdev: None, | ||
| - children: Vec::new(), | ||
| - create_txg: create_txg, | ||
| - | ||
| - top: vdev_top, | ||
| - leaf: None, | ||
| - } | ||
| - } | ||
| - | ||
| - pub fn load(normal_class: &Rc<MetaslabClass>, | ||
| - nv: &NvList, | ||
| - id: u64, | ||
| - parent: Option<TreeIndex>, | ||
| - vdev_tree: &Tree, | ||
| - alloc_type: AllocType) | ||
| - -> zfs::Result<Self> { | ||
| - let vdev_type = try!(nv.get::<&String>("type").ok_or(zfs::Error::Invalid)).clone(); | ||
| - | ||
| - let ops = try!(load_ops(vdev_type.as_ref(), nv)); | ||
| - | ||
| - if alloc_type == AllocType::Load { | ||
| - // Verify the provided id matches the id written in the MOS | ||
| - let label_id: u64 = try!(nv.get("id").ok_or(zfs::Error::Invalid)); | ||
| - if label_id != id { | ||
| - return Err(zfs::Error::Invalid); | ||
| - } | ||
| - } | ||
| - | ||
| - // If this is some sort of load, then we read the guid from the nvpairs. Otherwise, | ||
| - // Vdev::new will generate one for us | ||
| - let guid = match alloc_type { | ||
| - AllocType::Load | AllocType::Spare | AllocType::L2Cache | AllocType::RootPool => { | ||
| - Some(try!(nv.get("guid").ok_or(zfs::Error::Invalid))) | ||
| - } | ||
| - _ => None, | ||
| - }; | ||
| - | ||
| - let create_txg = try!(nv.get("create_txg").ok_or(zfs::Error::Invalid)); | ||
| - let ashift = try!(nv.get("ashift").ok_or(zfs::Error::Invalid)); | ||
| - | ||
| - let mut vdev_top = None; | ||
| - | ||
| - // If we're a top-level vdev, try to load the allocation parameters, | ||
| - // create the metaslab group, and create the vdev::Top | ||
| - if let Some(parent) = parent { | ||
| - if parent.get(vdev_tree).parent.is_none() { | ||
| - let mut ms_array = 0; | ||
| - let mut ms_shift = 0; | ||
| - if alloc_type == AllocType::Load || alloc_type == AllocType::Split { | ||
| - ms_array = try!(nv.get("metaslab_array").ok_or(zfs::Error::Invalid)); | ||
| - ms_shift = try!(nv.get("metaslab_shift").ok_or(zfs::Error::Invalid)); | ||
| - // let asize = try!(nv.get("asize").ok_or(zfs::Error::Invalid)); | ||
| - // let removing = try!(nv.get("removing").ok_or(zfs::Error::Invalid)); | ||
| - } | ||
| - | ||
| - if alloc_type != AllocType::Attach { | ||
| - assert!(alloc_type == AllocType::Load || alloc_type == AllocType::Add || | ||
| - alloc_type == AllocType::Split || | ||
| - alloc_type == AllocType::RootPool); | ||
| - let ms_group = MetaslabGroup::create(normal_class.clone()); | ||
| - | ||
| - vdev_top = Some(Top::new(ms_array, ms_shift, ms_group)); | ||
| - } | ||
| - } | ||
| - } | ||
| - | ||
| - let mut vdev = Self::new(id, guid, ashift, ops, create_txg, vdev_top); | ||
| - vdev.parent = parent; | ||
| - | ||
| - Ok(vdev) | ||
| - } | ||
| - | ||
| - fn open(&mut self) -> zfs::Result<()> { | ||
| - Ok(()) | ||
| - } | ||
| - | ||
| - fn metaslab_init(&mut self, mos: &mut ObjectSet, txg: u64) -> zfs::Result<()> { | ||
| - // We assume this is a top-level vdev | ||
| - let ref mut top = try!(self.top.as_mut().ok_or(zfs::Error::Invalid)); | ||
| - | ||
| - let old_count = top.metaslabs.len(); | ||
| - let new_count = (self.asize >> top.ms_shift) as usize; | ||
| - | ||
| - // assert!(txg == 0 || spa_config_held(spa, SCL_ALLOC, RW_WRITER)); | ||
| - | ||
| - // Return if vdev isn't being allocated from yet | ||
| - if top.ms_shift == 0 { | ||
| - return Ok(()); | ||
| - } | ||
| - assert!(!top.is_hole); // Must not be a hole | ||
| - | ||
| - // Compute the raidz-deflation ratio. Note, we hard-code | ||
| - // in 128k (1 << 17) because it is the "typical" blocksize. | ||
| - // Even though SPA_MAXBLOCKSIZE changed, this algorithm can not change, | ||
| - // otherwise it would inconsistently account for existing bp's. | ||
| - // vd->vdev_deflate_ratio = (1 << 17) / (vdev_psize_to_asize(vd, 1 << 17) >> SPA_MINBLOCKSHIFT); | ||
| - | ||
| - assert!(old_count <= new_count); | ||
| - | ||
| - for m in old_count..new_count { | ||
| - let object: u64 = 0; | ||
| - | ||
| - if txg == 0 { | ||
| - // try!(dmu_read(mos, top.ms_array, m * mem::size_of::<u64>(), | ||
| - // mem::size_of::<u64>(), &object, DMU_READ_PREFETCH)); | ||
| - } | ||
| - | ||
| - // let metaslab = try!(Metaslab::init(mos, self, m as u64, object, txg)); | ||
| - // top.metaslabs.push(metaslab); | ||
| - } | ||
| - | ||
| - // if (txg == 0) | ||
| - // spa_config_enter(spa, SCL_ALLOC, FTAG, RW_WRITER); | ||
| - | ||
| - // If the vdev is being removed we don't activate | ||
| - // the metaslabs since we want to ensure that no new | ||
| - // allocations are performed on this device. | ||
| - if old_count == 0 && !top.removing { | ||
| - // metaslab_group_activate(vd.mg); | ||
| - } | ||
| - | ||
| - // if (txg == 0) | ||
| - // spa_config_exit(spa, SCL_ALLOC, FTAG); | ||
| - | ||
| - Ok(()) | ||
| - } | ||
| - | ||
| - // Get the minimum allocatable size. We define the allocatable size as | ||
| - // the vdev's asize rounded to the nearest metaslab. This allows us to | ||
| - // replace or attach devices which don't have the same physical size but | ||
| - // can still satisfy the same number of allocations. | ||
| - // fn get_min_asize(&self, parent: Option<&Vdev>) -> u64 { | ||
| - // vdev_t *pvd = vd->vdev_parent; | ||
| - // | ||
| - // If our parent is NULL (inactive spare or cache) or is the root, | ||
| - // just return our own asize. | ||
| - // if self.parent.is_none() { | ||
| - // return self.asize; | ||
| - // } | ||
| - // | ||
| - // The top-level vdev just returns the allocatable size rounded | ||
| - // to the nearest metaslab. | ||
| - // if let Some(ref top) = self.top { | ||
| - // return util::p2_align(self.asize, 1u64 << top.ms_shift); | ||
| - // } | ||
| - // | ||
| - // The allocatable space for a raidz vdev is N * sizeof(smallest child), | ||
| - // so each child must provide at least 1/Nth of its asize. | ||
| - // if pvd->vdev_ops == &vdev_raidz_ops { | ||
| - // return pvd->vdev_min_asize / pvd->vdev_children; | ||
| - // } | ||
| - // | ||
| - // pvd->vdev_min_asize | ||
| - // } | ||
| - | ||
| - | ||
| - // pub fn dirty(&mut self, flags: u64, void *arg, txg: u64) { | ||
| - // We assume this is a top-level vdev | ||
| - // let ref top = self.top.unwrap(); | ||
| - // | ||
| - // assert!(self == self.top_vdev); | ||
| - // assert!(!self.is_hole); | ||
| - // assert!(util::is_p2(flags)); | ||
| - // assert!(spa_writeable(self.spa)); | ||
| - // | ||
| - // if flags & DIRTY_METASLAB { | ||
| - // txg_list_add(&self.ms_list, arg, txg); | ||
| - // } | ||
| - // | ||
| - // if flags & DIRTY_DTL { | ||
| - // txg_list_add(&self.dtl_list, arg, txg); | ||
| - // } | ||
| - // | ||
| - // txg_list_add(&self.spa.vdev_txg_list, self, txg); | ||
| - // } | ||
| - | ||
| - pub fn uberblock_shift(&self) -> u64 { | ||
| - cmp::min(cmp::max(self.ashift, uberblock::UBERBLOCK_SHIFT), | ||
| - MAX_UBERBLOCK_SHIFT) | ||
| - } | ||
| - | ||
| - pub fn uberblock_count(&self) -> u64 { | ||
| - UBERBLOCK_RING >> self.uberblock_shift() | ||
| - } | ||
| - | ||
| - // pub fn uberblock_offset(&self, n) -> u64 { | ||
| - // offsetof(vdev_label_t, vl_uberblock[n << self.uberblock_shift()]) | ||
| - // } | ||
| - | ||
| - pub fn uberblock_size(&self) -> u64 { | ||
| - 1 << self.uberblock_shift() | ||
| - } | ||
| -} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| -#[derive(Copy, Clone, PartialEq)] | ||
| -pub struct TreeIndex(usize); | ||
| - | ||
| -impl TreeIndex { | ||
| - pub fn get<'a>(&self, tree: &'a Tree) -> &'a Vdev { | ||
| - tree.nodes[self.0].as_ref().unwrap() | ||
| - } | ||
| - | ||
| - pub fn get_mut<'a>(&self, tree: &'a mut Tree) -> &'a mut Vdev { | ||
| - tree.nodes[self.0].as_mut().unwrap() | ||
| - } | ||
| -} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| - | ||
| -pub struct Tree { | ||
| - nodes: Vec<Option<Vdev>>, | ||
| - free: Vec<usize>, | ||
| -} | ||
| - | ||
| -impl Tree { | ||
| - pub fn new() -> Self { | ||
| - Tree { | ||
| - nodes: Vec::new(), | ||
| - free: Vec::new(), | ||
| - } | ||
| - } | ||
| - | ||
| - pub fn add(&mut self, vdev: Vdev) -> TreeIndex { | ||
| - let parent = vdev.parent; | ||
| - let guid = vdev.guid; | ||
| - | ||
| - // Add the vdev node | ||
| - let index = TreeIndex(match self.free.pop() { | ||
| - Some(free_index) => { | ||
| - self.nodes[free_index] = Some(vdev); | ||
| - free_index | ||
| - } | ||
| - None => { | ||
| - self.nodes.push(Some(vdev)); | ||
| - self.nodes.len() - 1 | ||
| - } | ||
| - }); | ||
| - | ||
| - index.get_mut(self).top_vdev = parent.map(|parent| { | ||
| - parent.get(self).top_vdev.unwrap_or(index) | ||
| - }); | ||
| - | ||
| - if let Some(parent) = parent { | ||
| - parent.get_mut(self).guid_sum += guid; | ||
| - parent.get_mut(self).children.push(index); | ||
| - } | ||
| - | ||
| - index | ||
| - } | ||
| - | ||
| - pub fn parse(&mut self, | ||
| - normal_class: &Rc<MetaslabClass>, | ||
| - nv: &NvList, | ||
| - parent: Option<TreeIndex>, | ||
| - alloc_type: AllocType) | ||
| - -> zfs::Result<TreeIndex> { | ||
| - let vdev = try!(Vdev::load(normal_class, nv, 0, parent, self, alloc_type)); | ||
| - let index = self.add(vdev); | ||
| - | ||
| - // Done parsing if this is a leaf | ||
| - if index.get(self).ops.is_leaf() { | ||
| - return Ok(index); | ||
| - } | ||
| - | ||
| - // Get the vdev's children | ||
| - let children: &Vec<NvList> = try!(nv.get("children").ok_or(zfs::Error::Invalid)); | ||
| - | ||
| - for child in children { | ||
| - self.parse(normal_class, child, Some(index), alloc_type); | ||
| - } | ||
| - | ||
| - Ok(index) | ||
| - } | ||
| - | ||
| - pub fn load(&mut self, mos: &mut ObjectSet, root: TreeIndex) { | ||
| - // We use an iterative solution because of borrowing issues | ||
| - let mut queue = vec![root]; | ||
| - | ||
| - while let Some(index) = queue.pop() { | ||
| - let vdev = index.get_mut(self); | ||
| - | ||
| - // Recursively load all children | ||
| - for child in &vdev.children { | ||
| - queue.push(*child); | ||
| - } | ||
| - | ||
| - // Load metaslabs for top-level vdevs | ||
| - // if let Some(ref top) = vdev.top { | ||
| - if vdev.top.is_some() { | ||
| - // if !top.is_hole { | ||
| - if vdev.ashift == 0 || vdev.asize == 0 || vdev.metaslab_init(mos, 0).is_err() { | ||
| - // TODO: Set vdev state to error | ||
| - } | ||
| - // } | ||
| - } | ||
| - | ||
| - // TODO: Load DTL for leaf vdevs | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| - | ||
| -const DIRTY_METASLAB: u64 = 0x01; | ||
| -const DIRTY_DTL: u64 = 0x02; | ||
| - | ||
| -const RAIDZ_MAXPARITY: usize = 3; | ||
| - | ||
| -const PAD_SIZE: u64 = 8 << 10; | ||
| -// 2 padding areas (vl_pad1 and vl_pad2) to skip | ||
| -const SKIP_SIZE: u64 = PAD_SIZE * 2; | ||
| -const PHYS_SIZE: u64 = 112 << 10; | ||
| -const UBERBLOCK_RING: u64 = 128 << 10; | ||
| - | ||
| -// The largest uberblock we support is 8k. | ||
| -const MAX_UBERBLOCK_SHIFT: u64 = 13; |
34
crates/zfs/vdev_file.rs
| @@ -1,34 +0,0 @@ | ||
| -use super::nvpair::NvList; | ||
| -use super::{vdev, zfs}; | ||
| - | ||
| -pub struct VdevFile { | ||
| - path: String, | ||
| -} | ||
| - | ||
| -impl VdevFile { | ||
| - pub fn load(nv: &NvList) -> zfs::Result<Self> { | ||
| - Ok(VdevFile { path: try!(nv.get::<&String>("path").ok_or(zfs::Error::Invalid)).clone() }) | ||
| - } | ||
| - | ||
| - // pub fn io_start(zio: &zio::Zio); | ||
| - | ||
| - // pub fn io_done(zio: &zio::Zio); | ||
| - | ||
| - // pub fn state_change(); | ||
| -} | ||
| - | ||
| -impl vdev::IVdevOps for VdevFile { | ||
| - fn open(&mut self, vdev: &mut vdev::Vdev) -> zfs::Result<(u64, u64, u64)> { | ||
| - Ok((0, 0, 0)) | ||
| - } | ||
| - | ||
| - fn close(&mut self, vdev: &mut vdev::Vdev) {} | ||
| - | ||
| - fn asize(&mut self, vdev: &mut vdev::Vdev, psize: u64) -> u64 { | ||
| - 0 | ||
| - } | ||
| - | ||
| - fn hold(&mut self, vdev: &mut vdev::Vdev) {} | ||
| - | ||
| - fn release(&mut self, vdev: &mut vdev::Vdev) {} | ||
| -} |
1,011
crates/zfs/vdev_label.rs
| @@ -1,1011 +0,0 @@ | ||
| -use std::mem; | ||
| - | ||
| -use super::vdev::VdevLabel; | ||
| - | ||
| -// vdev_dirty() flags | ||
| -const VDD_METASLAB: u64 = 0x01; | ||
| -const VDD_DTL: u64 = 0x02; | ||
| - | ||
| -// Offset of embedded boot loader region on each label | ||
| -const VDEV_BOOT_OFFSET: usize = 2 * mem::size_of::<VdevLabel>(); | ||
| -// Size of embedded boot loader region on each label. | ||
| -// The total size of the first two labels plus the boot area is 4MB. | ||
| -const VDEV_BOOT_SIZE: usize = 7 << 19; // 3.5M | ||
| - | ||
| -// Size of label regions at the start and end of each leaf device. | ||
| -const VDEV_LABEL_START_SIZE: usize = (2 * mem::size_of::<VdevLabel>() + VDEV_BOOT_SIZE); | ||
| -const VDEV_LABEL_END_SIZE: usize = (2 * mem::size_of::<VdevLabel>()); | ||
| -const VDEV_LABELS: u8 = 4; | ||
| -const VDEV_BEST_LABEL: u8 = VDEV_LABELS; | ||
| - | ||
| -// Basic routines to read and write from a vdev label. | ||
| -// Used throughout the rest of this file. | ||
| -vdev_label_offset(psize: u64, l: u8, offset: u64) -> u64 { | ||
| - assert!(offset < mem::size_of::<VdevLabel>()); | ||
| - //assert!(P2PHASE_TYPED(psize, mem::size_of::<VdevLabel>(), u64) == 0); | ||
| - | ||
| - offset + (l as u64) * (mem::size_of::<VdevLabel>() as u64) + | ||
| - if l < VDEV_LABELS / 2 { | ||
| - 0 | ||
| - } else { | ||
| - psize - (VDEV_LABELS as u64) * (mem::size_of::<VdevLabel>() as u64) | ||
| - } | ||
| -} | ||
| - | ||
| -// Returns back the vdev label associated with the passed in offset. | ||
| -vdev_label_number(psize: u64, offset: u64) -> Option<u64> { | ||
| - if offset >= psize - VDEV_LABEL_END_SIZE { | ||
| - offset -= psize - VDEV_LABEL_END_SIZE; | ||
| - offset += ((VDEV_LABELS as u64) / 2) * (mem::size_of::<VdevLabel>() as u64); | ||
| - } | ||
| - let l = offset / (mem::size_of::<VdevLabel>() as u64); | ||
| - if l < (VDEV_LABELS as u64) { | ||
| - Some(l) | ||
| - } else { | ||
| - None | ||
| - } | ||
| -} | ||
| - | ||
| -fn vdev_label_read(zio_t *zio, vdev_t *vd, l: u8, void *buf, offset: u64, | ||
| - size: u64, zio_done_func_t *done, void *private, flags: u64) { | ||
| - //assert!(spa_config_held(zio->io_spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL); | ||
| - //assert!(flags & ZIO_FLAG_CONFIG_WRITER); | ||
| - | ||
| - Zio::read_phys(zio, vd, vdev_label_offset(vd.psize, l, offset), | ||
| - size, buf, ZIO_CHECKSUM_LABEL, done, private, | ||
| - zio::Priority::SyncRead, flags, true).no_wait(); | ||
| -} | ||
| - | ||
| -static void | ||
| -vdev_label_write(zio_t *zio, vdev_t *vd, l: u8, void *buf, uint64_t offset, | ||
| - uint64_t size, zio_done_func_t *done, void *private, int flags) | ||
| -{ | ||
| - assert!(spa_config_held(zio->io_spa, SCL_ALL, RW_WRITER) == SCL_ALL || | ||
| - (spa_config_held(zio->io_spa, SCL_CONFIG | SCL_STATE, RW_READER) == | ||
| - (SCL_CONFIG | SCL_STATE) && | ||
| - dsl_pool_sync_context(spa_get_dsl(zio->io_spa)))); | ||
| - assert!(flags & ZIO_FLAG_CONFIG_WRITER); | ||
| - | ||
| - zio.write_phys(vd, vdev_label_offset(vd->vdev_psize, l, offset), | ||
| - size, buf, ZIO_CHECKSUM_LABEL, done, private, | ||
| - ZIO_PRIORITY_SYNC_WRITE, flags, true).no_wait(); | ||
| -} | ||
| - | ||
| -// Generate the nvlist representing this vdev's config. | ||
| -fn vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, vdev_config_flag_t flags) -> NvList { | ||
| - let nv = NvList::new(0); | ||
| - | ||
| - nv.add("type".to_string(), NvValue::String(vd.ops.vdev_type)); | ||
| - if !(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) { | ||
| - nv.add("id".to_string(), NvValue::Uint64(vd.id)); | ||
| - } | ||
| - nv.add("guid".to_string(), NvValue::Uint64(vd.guid)); | ||
| - | ||
| - if (vd->vdev_path != NULL) | ||
| - fnvlist_add_string(nv, ZPOOL_CONFIG_PATH, vd->vdev_path); | ||
| - | ||
| - if (vd->vdev_devid != NULL) | ||
| - fnvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vd->vdev_devid); | ||
| - | ||
| - if (vd->vdev_physpath != NULL) | ||
| - fnvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH, | ||
| - vd->vdev_physpath); | ||
| - | ||
| - if (vd->vdev_fru != NULL) | ||
| - fnvlist_add_string(nv, ZPOOL_CONFIG_FRU, vd->vdev_fru); | ||
| - | ||
| - if (vd->vdev_nparity != 0) { | ||
| - assert!(strcmp(vd->vdev_ops->vdev_op_type, | ||
| - VDEV_TYPE_RAIDZ) == 0); | ||
| - | ||
| - // Make sure someone hasn't managed to sneak a fancy new vdev | ||
| - // into a crufty old storage pool. | ||
| - assert!(vd->vdev_nparity == 1 || | ||
| - (vd->vdev_nparity <= 2 && | ||
| - spa_version(spa) >= SPA_VERSION_RAIDZ2) || | ||
| - (vd->vdev_nparity <= 3 && | ||
| - spa_version(spa) >= SPA_VERSION_RAIDZ3)); | ||
| - | ||
| - // Note that we'll add the nparity tag even on storage pools | ||
| - // that only support a single parity device -- older software | ||
| - // will just ignore it. | ||
| - fnvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, vd->vdev_nparity); | ||
| - } | ||
| - | ||
| - if (vd->vdev_wholedisk != -1ULL) | ||
| - fnvlist_add_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, | ||
| - vd->vdev_wholedisk); | ||
| - | ||
| - if (vd->vdev_not_present) | ||
| - fnvlist_add_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, 1); | ||
| - | ||
| - if (vd->vdev_isspare) | ||
| - fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1); | ||
| - | ||
| - if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) && | ||
| - vd == vd->vdev_top) { | ||
| - fnvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY, | ||
| - vd->vdev_ms_array); | ||
| - fnvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT, | ||
| - vd->vdev_ms_shift); | ||
| - fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASHIFT, vd->vdev_ashift); | ||
| - fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASIZE, | ||
| - vd->vdev_asize); | ||
| - fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_LOG, vd->vdev_islog); | ||
| - if (vd->vdev_removing) | ||
| - fnvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVING, | ||
| - vd->vdev_removing); | ||
| - } | ||
| - | ||
| - if (vd->vdev_dtl_sm != NULL) { | ||
| - fnvlist_add_uint64(nv, ZPOOL_CONFIG_DTL, | ||
| - space_map_object(vd->vdev_dtl_sm)); | ||
| - } | ||
| - | ||
| - if (vd->vdev_crtxg) | ||
| - fnvlist_add_uint64(nv, ZPOOL_CONFIG_CREATE_TXG, vd->vdev_crtxg); | ||
| - | ||
| - if (getstats) { | ||
| - vdev_stat_t vs; | ||
| - pool_scan_stat_t ps; | ||
| - | ||
| - vdev_get_stats(vd, &vs); | ||
| - fnvlist_add_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, | ||
| - (uint64_t *)&vs, sizeof (vs) / sizeof (uint64_t)); | ||
| - | ||
| - // provide either current or previous scan information | ||
| - if (spa_scan_get_stats(spa, &ps) == 0) { | ||
| - fnvlist_add_uint64_array(nv, | ||
| - ZPOOL_CONFIG_SCAN_STATS, (uint64_t *)&ps, | ||
| - sizeof (pool_scan_stat_t) / sizeof (uint64_t)); | ||
| - } | ||
| - } | ||
| - | ||
| - if (!vd->vdev_ops->vdev_op_leaf) { | ||
| - nvlist_t **child; | ||
| - int c, idx; | ||
| - | ||
| - assert!(!vd->vdev_ishole); | ||
| - | ||
| - child = kmem_alloc(vd->vdev_children * sizeof (nvlist_t *), | ||
| - KM_SLEEP); | ||
| - | ||
| - for (c = 0, idx = 0; c < vd->vdev_children; c++) { | ||
| - vdev_t *cvd = vd->vdev_child[c]; | ||
| - | ||
| - // If we're generating an nvlist of removing | ||
| - // vdevs then skip over any device which is | ||
| - // not being removed. | ||
| - if ((flags & VDEV_CONFIG_REMOVING) && | ||
| - !cvd->vdev_removing) | ||
| - continue; | ||
| - | ||
| - child[idx++] = vdev_config_generate(spa, cvd, | ||
| - getstats, flags); | ||
| - } | ||
| - | ||
| - if (idx) { | ||
| - fnvlist_add_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, | ||
| - child, idx); | ||
| - } | ||
| - | ||
| - for (c = 0; c < idx; c++) | ||
| - nvlist_free(child[c]); | ||
| - | ||
| - kmem_free(child, vd->vdev_children * sizeof (nvlist_t *)); | ||
| - | ||
| - } else { | ||
| - const char *aux = NULL; | ||
| - | ||
| - if (vd->vdev_offline && !vd->vdev_tmpoffline) | ||
| - fnvlist_add_uint64(nv, ZPOOL_CONFIG_OFFLINE, true); | ||
| - if (vd->vdev_resilver_txg != 0) | ||
| - fnvlist_add_uint64(nv, ZPOOL_CONFIG_RESILVER_TXG, | ||
| - vd->vdev_resilver_txg); | ||
| - if (vd->vdev_faulted) | ||
| - fnvlist_add_uint64(nv, ZPOOL_CONFIG_FAULTED, true); | ||
| - if (vd->vdev_degraded) | ||
| - fnvlist_add_uint64(nv, ZPOOL_CONFIG_DEGRADED, true); | ||
| - if (vd->vdev_removed) | ||
| - fnvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVED, true); | ||
| - if (vd->vdev_unspare) | ||
| - fnvlist_add_uint64(nv, ZPOOL_CONFIG_UNSPARE, true); | ||
| - if (vd->vdev_ishole) | ||
| - fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_HOLE, true); | ||
| - | ||
| - switch (vd->vdev_stat.vs_aux) { | ||
| - case VDEV_AUX_ERR_EXCEEDED: | ||
| - aux = "err_exceeded"; | ||
| - break; | ||
| - | ||
| - case VDEV_AUX_EXTERNAL: | ||
| - aux = "external"; | ||
| - break; | ||
| - } | ||
| - | ||
| - if (aux != NULL) | ||
| - fnvlist_add_string(nv, ZPOOL_CONFIG_AUX_STATE, aux); | ||
| - | ||
| - if (vd->vdev_splitting && vd->vdev_orig_guid != 0LL) { | ||
| - fnvlist_add_uint64(nv, ZPOOL_CONFIG_ORIG_GUID, | ||
| - vd->vdev_orig_guid); | ||
| - } | ||
| - } | ||
| - | ||
| - return (nv); | ||
| -} | ||
| - | ||
| -// Generate a view of the top-level vdevs. If we currently have holes | ||
| -// in the namespace, then generate an array which contains a list of holey | ||
| -// vdevs. Additionally, add the number of top-level children that currently | ||
| -// exist. | ||
| -void | ||
| -vdev_top_config_generate(spa_t *spa, nvlist_t *config) | ||
| -{ | ||
| - vdev_t *rvd = spa->spa_root_vdev; | ||
| - uint64_t *array; | ||
| - uint_t c, idx; | ||
| - | ||
| - array = kmem_alloc(rvd->vdev_children * sizeof (uint64_t), KM_SLEEP); | ||
| - | ||
| - for (c = 0, idx = 0; c < rvd->vdev_children; c++) { | ||
| - vdev_t *tvd = rvd->vdev_child[c]; | ||
| - | ||
| - if (tvd->vdev_ishole) | ||
| - array[idx++] = c; | ||
| - } | ||
| - | ||
| - if (idx) { | ||
| - VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_HOLE_ARRAY, | ||
| - array, idx) == 0); | ||
| - } | ||
| - | ||
| - VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN, | ||
| - rvd->vdev_children) == 0); | ||
| - | ||
| - kmem_free(array, rvd->vdev_children * sizeof (uint64_t)); | ||
| -} | ||
| - | ||
| -// Returns the configuration from the label of the given vdev. For vdevs | ||
| -// which don't have a txg value stored on their label (i.e. spares/cache) | ||
| -// or have not been completely initialized (txg = 0) just return | ||
| -// the configuration from the first valid label we find. Otherwise, | ||
| -// find the most up-to-date label that does not exceed the specified | ||
| -// 'txg' value. | ||
| -fn vdev_label_read_config(vdev_t *vd, uint64_t txg) -> NvList { | ||
| - spa_t *spa = vd->vdev_spa; | ||
| - nvlist_t *config = NULL; | ||
| - vdev_phys_t *vp; | ||
| - uint64_t best_txg = 0; | ||
| - int error = 0; | ||
| - int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | | ||
| - ZIO_FLAG_SPECULATIVE; | ||
| - int l; | ||
| - | ||
| - assert!(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL); | ||
| - | ||
| - if (!vdev_readable(vd)) | ||
| - return (NULL); | ||
| - | ||
| - vp = zio_buf_alloc(sizeof (vdev_phys_t)); | ||
| - | ||
| -retry: | ||
| - for (l = 0; l < VDEV_LABELS; l++) { | ||
| - nvlist_t *label = NULL; | ||
| - | ||
| - let zio = Zio::root(spa, None, None, flags); | ||
| - | ||
| - vdev_label_read(zio, vd, l, vp, | ||
| - offsetof(vdev_label_t, vl_vdev_phys), | ||
| - sizeof (vdev_phys_t), NULL, NULL, flags); | ||
| - | ||
| - if (zio_wait(zio) == 0 && | ||
| - nvlist_unpack(vp->vp_nvlist, sizeof (vp->vp_nvlist), | ||
| - &label, 0) == 0) { | ||
| - uint64_t label_txg = 0; | ||
| - | ||
| - // Auxiliary vdevs won't have txg values in their | ||
| - // labels and newly added vdevs may not have been | ||
| - // completely initialized so just return the | ||
| - // configuration from the first valid label we | ||
| - // encounter. | ||
| - error = nvlist_lookup_uint64(label, | ||
| - ZPOOL_CONFIG_POOL_TXG, &label_txg); | ||
| - if ((error || label_txg == 0) && !config) { | ||
| - config = label; | ||
| - break; | ||
| - } else if (label_txg <= txg && label_txg > best_txg) { | ||
| - best_txg = label_txg; | ||
| - nvlist_free(config); | ||
| - config = fnvlist_dup(label); | ||
| - } | ||
| - } | ||
| - | ||
| - if (label != NULL) { | ||
| - nvlist_free(label); | ||
| - label = NULL; | ||
| - } | ||
| - } | ||
| - | ||
| - if (config == NULL && !(flags & ZIO_FLAG_TRYHARD)) { | ||
| - flags |= ZIO_FLAG_TRYHARD; | ||
| - goto retry; | ||
| - } | ||
| - | ||
| - zio_buf_free(vp, sizeof (vdev_phys_t)); | ||
| - | ||
| - return (config); | ||
| -} | ||
| - | ||
| -// Determine if a device is in use. The 'spare_guid' parameter will be filled | ||
| -// in with the device guid if this spare is active elsewhere on the system. | ||
| -vdev_inuse(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason, | ||
| - uint64_t *spare_guid, uint64_t *l2cache_guid) -> bool { | ||
| - spa_t *spa = vd->vdev_spa; | ||
| - uint64_t state, pool_guid, device_guid, txg, spare_pool; | ||
| - uint64_t vdtxg = 0; | ||
| - nvlist_t *label; | ||
| - | ||
| - if (spare_guid) | ||
| - *spare_guid = 0ULL; | ||
| - if (l2cache_guid) | ||
| - *l2cache_guid = 0ULL; | ||
| - | ||
| - // Read the label, if any, and perform some basic sanity checks. | ||
| - if ((label = vdev_label_read_config(vd, -1ULL)) == NULL) | ||
| - return (false); | ||
| - | ||
| - nvlist_lookup_uint64(label, ZPOOL_CONFIG_CREATE_TXG, &vdtxg); | ||
| - | ||
| - if nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, &state) != 0 || | ||
| - nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &device_guid) != 0 { | ||
| - nvlist_free(label); | ||
| - return (false); | ||
| - } | ||
| - | ||
| - if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && | ||
| - (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID, | ||
| - &pool_guid) != 0 || | ||
| - nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG, | ||
| - &txg) != 0)) { | ||
| - nvlist_free(label); | ||
| - return (false); | ||
| - } | ||
| - | ||
| - nvlist_free(label); | ||
| - | ||
| - // Check to see if this device indeed belongs to the pool it claims to | ||
| - // be a part of. The only way this is allowed is if the device is a hot | ||
| - // spare (which we check for later on). | ||
| - if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && | ||
| - !spa_guid_exists(pool_guid, device_guid) && | ||
| - !spa_spare_exists(device_guid, NULL, NULL) && | ||
| - !spa_l2cache_exists(device_guid, NULL)) | ||
| - return (false); | ||
| - | ||
| - // If the transaction group is zero, then this an initialized (but | ||
| - // unused) label. This is only an error if the create transaction | ||
| - // on-disk is the same as the one we're using now, in which case the | ||
| - // user has attempted to add the same vdev multiple times in the same | ||
| - // transaction. | ||
| - if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && | ||
| - txg == 0 && vdtxg == crtxg) | ||
| - return (true); | ||
| - | ||
| - // Check to see if this is a spare device. We do an explicit check for | ||
| - // spa_has_spare() here because it may be on our pending list of spares | ||
| - // to add. We also check if it is an l2cache device. | ||
| - if (spa_spare_exists(device_guid, &spare_pool, NULL) || | ||
| - spa_has_spare(spa, device_guid)) { | ||
| - if (spare_guid) | ||
| - *spare_guid = device_guid; | ||
| - | ||
| - switch (reason) { | ||
| - case VDEV_LABEL_CREATE: | ||
| - case VDEV_LABEL_L2CACHE: | ||
| - return (true); | ||
| - | ||
| - case VDEV_LABEL_REPLACE: | ||
| - return (!spa_has_spare(spa, device_guid) || | ||
| - spare_pool != 0ULL); | ||
| - | ||
| - case VDEV_LABEL_SPARE: | ||
| - return (spa_has_spare(spa, device_guid)); | ||
| - default: | ||
| - break; | ||
| - } | ||
| - } | ||
| - | ||
| - // Check to see if this is an l2cache device. | ||
| - if (spa_l2cache_exists(device_guid, NULL)) | ||
| - return true; | ||
| - | ||
| - // We can't rely on a pool's state if it's been imported | ||
| - // read-only. Instead we look to see if the pools is marked | ||
| - // read-only in the namespace and set the state to active. | ||
| - if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && | ||
| - (spa = spa_by_guid(pool_guid, device_guid)) != NULL && | ||
| - spa_mode(spa) == FREAD) | ||
| - state = POOL_STATE_ACTIVE; | ||
| - | ||
| - // If the device is marked ACTIVE, then this device is in use by another | ||
| - // pool on the system. | ||
| - return (state == POOL_STATE_ACTIVE); | ||
| -} | ||
| - | ||
| -// Initialize a vdev label. We check to make sure each leaf device is not in | ||
| -// use, and writable. We put down an initial label which we will later | ||
| -// overwrite with a complete label. Note that it's important to do this | ||
| -// sequentially, not in parallel, so that we catch cases of multiple use of the | ||
| -// same leaf vdev in the vdev we're creating -- e.g. mirroring a disk with | ||
| -// itself. | ||
| -int | ||
| -vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) | ||
| -{ | ||
| - spa_t *spa = vd->vdev_spa; | ||
| - nvlist_t *label; | ||
| - vdev_phys_t *vp; | ||
| - char *pad2; | ||
| - uberblock_t *ub; | ||
| - zio_t *zio; | ||
| - char *buf; | ||
| - size_t buflen; | ||
| - int error; | ||
| - uint64_t spare_guid = 0, l2cache_guid = 0; | ||
| - int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL; | ||
| - int c, l; | ||
| - vdev_t *pvd; | ||
| - | ||
| - assert!(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); | ||
| - | ||
| - for (c = 0; c < vd->vdev_children; c++) | ||
| - if ((error = vdev_label_init(vd->vdev_child[c], | ||
| - crtxg, reason)) != 0) | ||
| - return (error); | ||
| - | ||
| - // Track the creation time for this vdev | ||
| - vd->vdev_crtxg = crtxg; | ||
| - | ||
| - if (!vd->vdev_ops->vdev_op_leaf || !spa_writeable(spa)) | ||
| - return (0); | ||
| - | ||
| - // Dead vdevs cannot be initialized. | ||
| - if (vdev_is_dead(vd)) | ||
| - return (SET_ERROR(EIO)); | ||
| - | ||
| - // Determine if the vdev is in use. | ||
| - if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_SPLIT && | ||
| - vdev_inuse(vd, crtxg, reason, &spare_guid, &l2cache_guid)) | ||
| - return (SET_ERROR(EBUSY)); | ||
| - | ||
| - // If this is a request to add or replace a spare or l2cache device | ||
| - // that is in use elsewhere on the system, then we must update the | ||
| - // guid (which was initialized to a random value) to reflect the | ||
| - // actual GUID (which is shared between multiple pools). | ||
| - if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_L2CACHE && | ||
| - spare_guid != 0ULL) { | ||
| - uint64_t guid_delta = spare_guid - vd->vdev_guid; | ||
| - | ||
| - vd->vdev_guid += guid_delta; | ||
| - | ||
| - for (pvd = vd; pvd != NULL; pvd = pvd->vdev_parent) | ||
| - pvd->vdev_guid_sum += guid_delta; | ||
| - | ||
| - // If this is a replacement, then we want to fallthrough to the | ||
| - // rest of the code. If we're adding a spare, then it's already | ||
| - // labeled appropriately and we can just return. | ||
| - if (reason == VDEV_LABEL_SPARE) | ||
| - return (0); | ||
| - assert!(reason == VDEV_LABEL_REPLACE || | ||
| - reason == VDEV_LABEL_SPLIT); | ||
| - } | ||
| - | ||
| - if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_SPARE && | ||
| - l2cache_guid != 0ULL) { | ||
| - uint64_t guid_delta = l2cache_guid - vd->vdev_guid; | ||
| - | ||
| - vd->vdev_guid += guid_delta; | ||
| - | ||
| - for (pvd = vd; pvd != NULL; pvd = pvd->vdev_parent) | ||
| - pvd->vdev_guid_sum += guid_delta; | ||
| - | ||
| - // If this is a replacement, then we want to fallthrough to the | ||
| - // rest of the code. If we're adding an l2cache, then it's | ||
| - // already labeled appropriately and we can just return. | ||
| - if (reason == VDEV_LABEL_L2CACHE) | ||
| - return (0); | ||
| - assert!(reason == VDEV_LABEL_REPLACE); | ||
| - } | ||
| - | ||
| - // Initialize its label. | ||
| - vp = zio_buf_alloc(sizeof (vdev_phys_t)); | ||
| - bzero(vp, sizeof (vdev_phys_t)); | ||
| - | ||
| - // Generate a label describing the pool and our top-level vdev. | ||
| - // We mark it as being from txg 0 to indicate that it's not | ||
| - // really part of an active pool just yet. The labels will | ||
| - // be written again with a meaningful txg by spa_sync(). | ||
| - if (reason == VDEV_LABEL_SPARE || | ||
| - // For inactive hot spares, we generate a special label that | ||
| - // identifies as a mutually shared hot spare. We write the | ||
| - // label if we are adding a hot spare, or if we are removing an | ||
| - // active hot spare (in which case we want to revert the | ||
| - // labels). | ||
| - VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_SLEEP) == 0); | ||
| - | ||
| - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_VERSION, | ||
| - spa_version(spa)) == 0); | ||
| - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_POOL_STATE, | ||
| - POOL_STATE_SPARE) == 0); | ||
| - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID, | ||
| - vd->vdev_guid) == 0); | ||
| - } else if (reason == VDEV_LABEL_L2CACHE || | ||
| - (reason == VDEV_LABEL_REMOVE && vd->vdev_isl2cache)) { | ||
| - // For level 2 ARC devices, add a special label. | ||
| - VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_SLEEP) == 0); | ||
| - | ||
| - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_VERSION, | ||
| - spa_version(spa)) == 0); | ||
| - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_POOL_STATE, | ||
| - POOL_STATE_L2CACHE) == 0); | ||
| - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID, | ||
| - vd->vdev_guid) == 0); | ||
| - } else { | ||
| - uint64_t txg = 0ULL; | ||
| - | ||
| - if (reason == VDEV_LABEL_SPLIT) | ||
| - txg = spa->spa_uberblock.ub_txg; | ||
| - label = spa_config_generate(spa, vd, txg, false); | ||
| - | ||
| - // Add our creation time. This allows us to detect multiple | ||
| - // vdev uses as described above, and automatically expires if we | ||
| - // fail. | ||
| - VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_CREATE_TXG, | ||
| - crtxg) == 0); | ||
| - } | ||
| - | ||
| - buf = vp->vp_nvlist; | ||
| - buflen = sizeof (vp->vp_nvlist); | ||
| - | ||
| - error = nvlist_pack(label, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP); | ||
| - if (error != 0) { | ||
| - nvlist_free(label); | ||
| - zio_buf_free(vp, sizeof (vdev_phys_t)); | ||
| - /* EFAULT means nvlist_pack ran out of room */ | ||
| - return (error == EFAULT ? ENAMETOOLONG : EINVAL); | ||
| - } | ||
| - | ||
| - // Initialize uberblock template. | ||
| - ub = zio_buf_alloc(VDEV_UBERBLOCK_RING); | ||
| - bzero(ub, VDEV_UBERBLOCK_RING); | ||
| - *ub = spa->spa_uberblock; | ||
| - ub->ub_txg = 0; | ||
| - | ||
| - // Initialize the 2nd padding area. | ||
| - pad2 = zio_buf_alloc(VDEV_PAD_SIZE); | ||
| - bzero(pad2, VDEV_PAD_SIZE); | ||
| - | ||
| - // Write everything in parallel. | ||
| -retry: | ||
| - zio = zio_root(spa, NULL, NULL, flags); | ||
| - | ||
| - for (l = 0; l < VDEV_LABELS; l++) { | ||
| - | ||
| - vdev_label_write(zio, vd, l, vp, | ||
| - offsetof(vdev_label_t, vl_vdev_phys), | ||
| - sizeof (vdev_phys_t), NULL, NULL, flags); | ||
| - | ||
| - // Skip the 1st padding area. | ||
| - // Zero out the 2nd padding area where it might have | ||
| - // left over data from previous filesystem format. | ||
| - vdev_label_write(zio, vd, l, pad2, | ||
| - offsetof(vdev_label_t, vl_pad2), | ||
| - VDEV_PAD_SIZE, NULL, NULL, flags); | ||
| - | ||
| - vdev_label_write(zio, vd, l, ub, | ||
| - offsetof(vdev_label_t, vl_uberblock), | ||
| - VDEV_UBERBLOCK_RING, NULL, NULL, flags); | ||
| - } | ||
| - | ||
| - error = zio_wait(zio); | ||
| - | ||
| - if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) { | ||
| - flags |= ZIO_FLAG_TRYHARD; | ||
| - goto retry; | ||
| - } | ||
| - | ||
| - nvlist_free(label); | ||
| - zio_buf_free(pad2, VDEV_PAD_SIZE); | ||
| - zio_buf_free(ub, VDEV_UBERBLOCK_RING); | ||
| - zio_buf_free(vp, sizeof (vdev_phys_t)); | ||
| - | ||
| - // If this vdev hasn't been previously identified as a spare, then we | ||
| - // mark it as such only if a) we are labeling it as a spare, or b) it | ||
| - // exists as a spare elsewhere in the system. Do the same for | ||
| - // level 2 ARC devices. | ||
| - if (error == 0 && !vd->vdev_isspare && | ||
| - (reason == VDEV_LABEL_SPARE || | ||
| - spa_spare_exists(vd->vdev_guid, NULL, NULL))) | ||
| - spa_spare_add(vd); | ||
| - | ||
| - if (error == 0 && !vd->vdev_isl2cache && | ||
| - (reason == VDEV_LABEL_L2CACHE || | ||
| - spa_l2cache_exists(vd->vdev_guid, NULL))) | ||
| - spa_l2cache_add(vd); | ||
| - | ||
| - return (error); | ||
| -} | ||
| - | ||
| -// ========================================================================== | ||
| -// uberblock load/sync | ||
| -// ========================================================================== | ||
| - | ||
| -// Consider the following situation: txg is safely synced to disk. We've | ||
| -// written the first uberblock for txg + 1, and then we lose power. When we | ||
| -// come back up, we fail to see the uberblock for txg + 1 because, say, | ||
| -// it was on a mirrored device and the replica to which we wrote txg + 1 | ||
| -// is now offline. If we then make some changes and sync txg + 1, and then | ||
| -// the missing replica comes back, then for a few seconds we'll have two | ||
| -// conflicting uberblocks on disk with the same txg. The solution is simple: | ||
| -// among uberblocks with equal txg, choose the one with the latest timestamp. | ||
| -fn uberblock_compare(a: &Uberblock, b: &Uberblock) -> i64 { | ||
| - if a.txg < b.txg { | ||
| - return -1; | ||
| - } | ||
| - if a.txg > b.txg { | ||
| - return 1; | ||
| - } | ||
| - | ||
| - if a.timestamp < b.timestamp { | ||
| - return -1; | ||
| - } | ||
| - if a.timestamp > b.timestamp { | ||
| - return 1; | ||
| - } | ||
| - | ||
| - 0 | ||
| -} | ||
| - | ||
| -struct ubl_cbdata { | ||
| - uberblock_t *ubl_ubbest; /* Best uberblock */ | ||
| - vdev_t *ubl_vd; /* vdev associated with the above */ | ||
| -}; | ||
| - | ||
| -fn uberblock_load_done(zt *zio) { | ||
| - vdev_t *vd = zio->vd; | ||
| - spa_t *spa = zio->spa; | ||
| - zt *rio = zio->private; | ||
| - uberblock_t *ub = zio->data; | ||
| - struct ubl_cbdata *cbp = rio->private; | ||
| - | ||
| - //assert!(zio.size == VDEV_UBERBLOCK_SIZE(vd)); | ||
| - | ||
| - if (zio->error == 0 && uberblock_verify(ub) == 0) { | ||
| - mutex_enter(&rio->lock); | ||
| - if (ub->ub_txg <= spa->spa_load_max_txg && | ||
| - uberblock_compare(ub, cbp->ubl_ubbest) > 0) { | ||
| - // Keep track of the vdev in which this uberblock | ||
| - // was found. We will use this information later | ||
| - // to obtain the config nvlist associated with | ||
| - // this uberblock. | ||
| - *cbp->ubl_ubbest = *ub; | ||
| - cbp->ubl_vd = vd; | ||
| - } | ||
| - mutex_exit(&rio->lock); | ||
| - } | ||
| - | ||
| - zbuf_free(zio->data, zio->size); | ||
| -} | ||
| - | ||
| -fn uberblock_load_impl(zio: &Zio, vdev_t *vd, int flags, struct ubl_cbdata *cbp) { | ||
| - for c in 0..vd->vdev_children { | ||
| - uberblock_load_impl(zio, vd.vdev_child[c], flags, cbp); | ||
| - } | ||
| - | ||
| - if vd.ops.vdev_op_leaf && vdev_readable(vd) { | ||
| - for l in 0..VDEV_LABELS { | ||
| - for n in 0..VDEV_UBERBLOCK_COUNT(vd) { | ||
| - vdev_label_read(zio, vd, l, zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd)), | ||
| - VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd), | ||
| - vdev_uberblock_load_done, zio, flags); | ||
| - } | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -// Reads the 'best' uberblock from disk along with its associated | ||
| -// configuration. First, we read the uberblock array of each label of each | ||
| -// vdev, keeping track of the uberblock with the highest txg in each array. | ||
| -// Then, we read the configuration from the same vdev as the best uberblock. | ||
| -fn uberblock_load(vdev_t *rvd, ub: &Uberblock, nvlist_t **config) -> Option<Uberblock> { | ||
| - spa_t *spa = rvd->vdev_spa; | ||
| - struct ubl_cbdata cb; | ||
| - let flags = zio::FLAG_CONFIG_WRITER | zio::FLAG_CANFAIL | | ||
| - zio::FLAG_SPECULATIVE | zio::FLAG_TRYHARD; | ||
| - | ||
| - assert!(ub); | ||
| - assert!(config); | ||
| - | ||
| - bzero(ub, sizeof (uberblock_t)); | ||
| - *config = NULL; | ||
| - | ||
| - cb.ubl_ubbest = ub; | ||
| - cb.ubl_vd = NULL; | ||
| - | ||
| - spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); | ||
| - let zio = Zio::root(spa, None, &cb, flags); | ||
| - uberblock_load_impl(zio, rvd, flags, &cb); | ||
| - zio.wait(); | ||
| - | ||
| - // It's possible that the best uberblock was discovered on a label | ||
| - // that has a configuration which was written in a future txg. | ||
| - // Search all labels on this vdev to find the configuration that | ||
| - // matches the txg for our uberblock. | ||
| - if (cb.ubl_vd != NULL) | ||
| - *config = label_read_config(cb.ubl_vd, ub->ub_txg); | ||
| - spa_config_exit(spa, SCL_ALL, FTAG); | ||
| -} | ||
| - | ||
| -// On success, increment root zio's count of good writes. | ||
| -// We only get credit for writes to known-visible vdevs; see spa_vdev_add(). | ||
| -fn vdev_uberblock_sync_done(zio_t *zio) { | ||
| - uint64_t *good_writes = zio->io_private; | ||
| - | ||
| - if (zio->io_error == 0 && zio->io_vd->vdev_top->vdev_ms_array != 0) | ||
| - atomic_add_64(good_writes, 1); | ||
| -} | ||
| - | ||
| -// Write the uberblock to all labels of all leaves of the specified vdev. | ||
| -fn vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags) { | ||
| - uberblock_t *ubbuf; | ||
| - int c, l, n; | ||
| - | ||
| - for (c = 0; c < vd->vdev_children; c++) { | ||
| - vdev_uberblock_sync(zio, ub, vd->vdev_child[c], flags); | ||
| - } | ||
| - | ||
| - if !vd->vdev_ops->vdev_op_leaf { | ||
| - return; | ||
| - } | ||
| - | ||
| - if !vdev_writeable(vd) { | ||
| - return; | ||
| - } | ||
| - | ||
| - n = ub->ub_txg & (VDEV_UBERBLOCK_COUNT(vd) - 1); | ||
| - | ||
| - ubbuf = zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd)); | ||
| - bzero(ubbuf, VDEV_UBERBLOCK_SIZE(vd)); | ||
| - *ubbuf = *ub; | ||
| - | ||
| - for (l = 0; l < VDEV_LABELS; l++) { | ||
| - vdev_label_write(zio, vd, l, ubbuf, | ||
| - VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd), | ||
| - vdev_uberblock_sync_done, zio->io_private, | ||
| - flags | ZIO_FLAG_DONT_PROPAGATE); | ||
| - } | ||
| - | ||
| - zio_buf_free(ubbuf, VDEV_UBERBLOCK_SIZE(vd)); | ||
| -} | ||
| - | ||
| -// Sync the uberblocks to all vdevs in svd[] | ||
| -fn vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags) -> zfs::Result<()> { | ||
| - spa_t *spa = svd[0]->vdev_spa; | ||
| - zio_t *zio; | ||
| - uint64_t good_writes = 0; | ||
| - int v; | ||
| - | ||
| - zio = zio_root(spa, NULL, &good_writes, flags); | ||
| - | ||
| - for (v = 0; v < svdcount; v++) | ||
| - vdev_uberblock_sync(zio, ub, svd[v], flags); | ||
| - | ||
| - (void) zio_wait(zio); | ||
| - | ||
| - // Flush the uberblocks to disk. This ensures that the odd labels | ||
| - // are no longer needed (because the new uberblocks and the even | ||
| - // labels are safely on disk), so it is safe to overwrite them. | ||
| - zio = zio_root(spa, NULL, NULL, flags); | ||
| - | ||
| - for (v = 0; v < svdcount; v++) | ||
| - zio_flush(zio, svd[v]); | ||
| - | ||
| - (void) zio_wait(zio); | ||
| - | ||
| - return (good_writes >= 1 ? 0 : EIO); | ||
| -} | ||
| - | ||
| -// On success, increment the count of good writes for our top-level vdev. | ||
| -fn vdev_label_sync_done(zio_t *zio) { | ||
| - uint64_t *good_writes = zio->io_private; | ||
| - | ||
| - if (zio->io_error == 0) | ||
| - atomic_add_64(good_writes, 1); | ||
| -} | ||
| - | ||
| -// If there weren't enough good writes, indicate failure to the parent. | ||
| -fn vdev_label_sync_top_done(zio_t *zio) { | ||
| - uint64_t *good_writes = zio->io_private; | ||
| - | ||
| - if (*good_writes == 0) | ||
| - zio->io_error = SET_ERROR(EIO); | ||
| - | ||
| - kmem_free(good_writes, sizeof (uint64_t)); | ||
| -} | ||
| - | ||
| -// We ignore errors for log and cache devices, simply free the private data. | ||
| -fn vdev_label_sync_ignore_done(zio_t *zio) { | ||
| - kmem_free(zio->io_private, sizeof (uint64_t)); | ||
| -} | ||
| - | ||
| -// Write all even or odd labels to all leaves of the specified vdev. | ||
| -fn vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg, int flags) { | ||
| - nvlist_t *label; | ||
| - vdev_phys_t *vp; | ||
| - char *buf; | ||
| - size_t buflen; | ||
| - int c; | ||
| - | ||
| - for (c = 0; c < vd->vdev_children; c++) | ||
| - vdev_label_sync(zio, vd->vdev_child[c], l, txg, flags); | ||
| - | ||
| - if (!vd->vdev_ops->vdev_op_leaf) | ||
| - return; | ||
| - | ||
| - if (!vdev_writeable(vd)) | ||
| - return; | ||
| - | ||
| - // Generate a label describing the top-level config to which we belong. | ||
| - label = spa_config_generate(vd->vdev_spa, vd, txg, false); | ||
| - | ||
| - vp = zio_buf_alloc(sizeof (vdev_phys_t)); | ||
| - bzero(vp, sizeof (vdev_phys_t)); | ||
| - | ||
| - buf = vp->vp_nvlist; | ||
| - buflen = sizeof (vp->vp_nvlist); | ||
| - | ||
| - if (!nvlist_pack(label, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP)) { | ||
| - for (; l < VDEV_LABELS; l += 2) { | ||
| - vdev_label_write(zio, vd, l, vp, | ||
| - offsetof(vdev_label_t, vl_vdev_phys), | ||
| - sizeof (vdev_phys_t), | ||
| - vdev_label_sync_done, zio->io_private, | ||
| - flags | ZIO_FLAG_DONT_PROPAGATE); | ||
| - } | ||
| - } | ||
| - | ||
| - zio_buf_free(vp, sizeof (vdev_phys_t)); | ||
| - nvlist_free(label); | ||
| -} | ||
| - | ||
| -fn vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags) -> zfs::Result<()> { | ||
| - list_t *dl = &spa->spa_config_dirty_list; | ||
| - vdev_t *vd; | ||
| - zio_t *zio; | ||
| - int error; | ||
| - | ||
| - // Write the new labels to disk. | ||
| - zio = zio_root(spa, NULL, NULL, flags); | ||
| - | ||
| - for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd)) { | ||
| - uint64_t *good_writes; | ||
| - zio_t *vio; | ||
| - | ||
| - assert!(!vd->vdev_ishole); | ||
| - | ||
| - good_writes = kmem_zalloc(sizeof (uint64_t), KM_SLEEP); | ||
| - vio = zio_null(zio, spa, NULL, | ||
| - (vd->vdev_islog || vd->vdev_aux != NULL) ? | ||
| - vdev_label_sync_ignore_done : vdev_label_sync_top_done, | ||
| - good_writes, flags); | ||
| - vdev_label_sync(vio, vd, l, txg, flags); | ||
| - vio.no_wait(); | ||
| - } | ||
| - | ||
| - error = zio.wait(); | ||
| - | ||
| - // Flush the new labels to disk. | ||
| - zio = zio.root(spa, None, None, flags); | ||
| - | ||
| - for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd)) { | ||
| - zio.flush(vd); | ||
| - } | ||
| - | ||
| - zio.wait(); | ||
| - | ||
| - return (error); | ||
| -} | ||
| - | ||
| -// Sync the uberblock and any changes to the vdev configuration. | ||
| -// | ||
| -// The order of operations is carefully crafted to ensure that | ||
| -// if the system panics or loses power at any time, the state on disk | ||
| -// is still transactionally consistent. The in-line comments below | ||
| -// describe the failure semantics at each stage. | ||
| -// | ||
| -// Moreover, vdev_config_sync() is designed to be idempotent: if it fails | ||
| -// at any time, you can just call it again, and it will resume its work. | ||
| -fn config_sync(vdev_t **svd, int svdcount, uint64_t txg, boolean_t tryhard) -> zfs::Result<()> { | ||
| - spa_t *spa = svd[0]->vdev_spa; | ||
| - uberblock_t *ub = &spa->spa_uberblock; | ||
| - vdev_t *vd; | ||
| - int error; | ||
| - int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL; | ||
| - | ||
| - // Normally, we don't want to try too hard to write every label and | ||
| - // uberblock. If there is a flaky disk, we don't want the rest of the | ||
| - // sync process to block while we retry. But if we can't write a | ||
| - // single label out, we should retry with ZIO_FLAG_TRYHARD before | ||
| - // bailing out and declaring the pool faulted. | ||
| - if tryhard { | ||
| - flags |= ZIO_FLAG_TRYHARD; | ||
| - } | ||
| - | ||
| - assert!(ub->ub_txg <= txg); | ||
| - | ||
| - // If this isn't a resync due to I/O errors, | ||
| - // and nothing changed in this transaction group, | ||
| - // and the vdev configuration hasn't changed, | ||
| - // then there's nothing to do. | ||
| - if ub->ub_txg < txg && | ||
| - uberblock_update(ub, spa->spa_root_vdev, txg) == false && | ||
| - list_is_empty(&spa->spa_config_dirty_list) { | ||
| - return 0; | ||
| - } | ||
| - | ||
| - if txg > spa_freeze_txg(spa) { | ||
| - return 0; | ||
| - } | ||
| - | ||
| - assert!(txg <= spa->spa_final_txg); | ||
| - | ||
| - // Flush the write cache of every disk that's been written to | ||
| - // in this transaction group. This ensures that all blocks | ||
| - // written in this txg will be committed to stable storage | ||
| - // before any uberblock that references them. | ||
| - let zio = Zio::root(spa, None, None, flags); | ||
| - | ||
| - for (vd = txg_list_head(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)); vd; | ||
| - vd = txg_list_next(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg))) | ||
| - zio.flush(vd); | ||
| - | ||
| - zio.wait(); | ||
| - | ||
| - // Sync out the even labels (L0, L2) for every dirty vdev. If the | ||
| - // system dies in the middle of this process, that's OK: all of the | ||
| - // even labels that made it to disk will be newer than any uberblock, | ||
| - // and will therefore be considered invalid. The odd labels (L1, L3), | ||
| - // which have not yet been touched, will still be valid. We flush | ||
| - // the new labels to disk to ensure that all even-label updates | ||
| - // are committed to stable storage before the uberblock update. | ||
| - if (error = vdev_label_sync_list(spa, 0, txg, flags)) != 0 { | ||
| - return error; | ||
| - } | ||
| - | ||
| - // Sync the uberblocks to all vdevs in svd[]. | ||
| - // If the system dies in the middle of this step, there are two cases | ||
| - // to consider, and the on-disk state is consistent either way: | ||
| - // | ||
| - // (1) If none of the new uberblocks made it to disk, then the | ||
| - // previous uberblock will be the newest, and the odd labels | ||
| - // (which had not yet been touched) will be valid with respect | ||
| - // to that uberblock. | ||
| - // | ||
| - // (2) If one or more new uberblocks made it to disk, then they | ||
| - // will be the newest, and the even labels (which had all | ||
| - // been successfully committed) will be valid with respect | ||
| - // to the new uberblocks. | ||
| - if (error = vdev_uberblock_sync_list(svd, svdcount, ub, flags)) != 0 { | ||
| - return error; | ||
| - } | ||
| - | ||
| - // Sync out odd labels for every dirty vdev. If the system dies | ||
| - // in the middle of this process, the even labels and the new | ||
| - // uberblocks will suffice to open the pool. The next time | ||
| - // the pool is opened, the first thing we'll do -- before any | ||
| - // user data is modified -- is mark every vdev dirty so that | ||
| - // all labels will be brought up to date. We flush the new labels | ||
| - // to disk to ensure that all odd-label updates are committed to | ||
| - // stable storage before the next transaction group begins. | ||
| - vdev_label_sync_list(spa, 1, txg, flags) | ||
| -} |
682
crates/zfs/vdev_queue.rs
| @@ -1,682 +0,0 @@ | ||
| -use super::zio; | ||
| - | ||
| -// ZFS IO Scheduler | ||
| -// --------------- | ||
| -// | ||
| -// ZFS issues IO operations to leaf vdevs to satisfy and complete zios. The | ||
| -// IO scheduler determines when and in what order those operations are | ||
| -// issued. The IO scheduler divides operations into five IO classes | ||
| -// prioritized in the following order: sync read, sync write, async read, | ||
| -// async write, and scrub/resilver. Each queue defines the minimum and | ||
| -// maximum number of concurrent operations that may be issued to the device. | ||
| -// In addition, the device has an aggregate maximum. Note that the sum of the | ||
| -// per-queue minimums must not exceed the aggregate maximum. If the | ||
| -// sum of the per-queue maximums exceeds the aggregate maximum, then the | ||
| -// number of active IOs may reach zfs_vdev_max_active, in which case no | ||
| -// further IOs will be issued regardless of whether all per-queue | ||
| -// minimums have been met. | ||
| -// | ||
| -// For many physical devices, throughput increases with the number of | ||
| -// concurrent operations, but latency typically suffers. Further, physical | ||
| -// devices typically have a limit at which more concurrent operations have no | ||
| -// effect on throughput or can actually cause it to decrease. | ||
| -// | ||
| -// The scheduler selects the next operation to issue by first looking for an | ||
| -// IO class whose minimum has not been satisfied. Once all are satisfied and | ||
| -// the aggregate maximum has not been hit, the scheduler looks for classes | ||
| -// whose maximum has not been satisfied. Iteration through the IO classes is | ||
| -// done in the order specified above. No further operations are issued if the | ||
| -// aggregate maximum number of concurrent operations has been hit or if there | ||
| -// are no operations queued for an IO class that has not hit its maximum. | ||
| -// Every time an IO is queued or an operation completes, the IO scheduler | ||
| -// looks for new operations to issue. | ||
| -// | ||
| -// All IO classes have a fixed maximum number of outstanding operations | ||
| -// except for the async write class. Asynchronous writes represent the data | ||
| -// that is committed to stable storage during the syncing stage for | ||
| -// transaction groups (see txg.c). Transaction groups enter the syncing state | ||
| -// periodically so the number of queued async writes will quickly burst up and | ||
| -// then bleed down to zero. Rather than servicing them as quickly as possible, | ||
| -// the IO scheduler changes the maximum number of active async write IOs | ||
| -// according to the amount of dirty data in the pool (see dsl_pool.c). Since | ||
| -// both throughput and latency typically increase with the number of | ||
| -// concurrent operations issued to physical devices, reducing the burstiness | ||
| -// in the number of concurrent operations also stabilizes the response time of | ||
| -// operations from other -- and in particular synchronous -- queues. In broad | ||
| -// strokes, the IO scheduler will issue more concurrent operations from the | ||
| -// async write queue as there's more dirty data in the pool. | ||
| -// | ||
| -// Async Writes | ||
| -// | ||
| -// The number of concurrent operations issued for the async write IO class | ||
| -// follows a piece-wise linear function defined by a few adjustable points. | ||
| -// | ||
| -// | o---------| <-- zfs_vdev_async_write_max_active | ||
| -// ^ | /^ | | ||
| -// | | / | | | ||
| -// active | / | | | ||
| -// IO | / | | | ||
| -// count | / | | | ||
| -// | / | | | ||
| -// |------------o | | <-- zfs_vdev_async_write_min_active | ||
| -// 0|____________^______|_________| | ||
| -// 0% | | 100% of zfs_dirty_data_max | ||
| -// | | | ||
| -// | `-- zfs_vdev_async_write_active_max_dirty_percent | ||
| -// `--------- zfs_vdev_async_write_active_min_dirty_percent | ||
| -// | ||
| -// Until the amount of dirty data exceeds a minimum percentage of the dirty | ||
| -// data allowed in the pool, the IO scheduler will limit the number of | ||
| -// concurrent operations to the minimum. As that threshold is crossed, the | ||
| -// number of concurrent operations issued increases linearly to the maximum at | ||
| -// the specified maximum percentage of the dirty data allowed in the pool. | ||
| -// | ||
| -// Ideally, the amount of dirty data on a busy pool will stay in the sloped | ||
| -// part of the function between zfs_vdev_async_write_active_min_dirty_percent | ||
| -// and zfs_vdev_async_write_active_max_dirty_percent. If it exceeds the | ||
| -// maximum percentage, this indicates that the rate of incoming data is | ||
| -// greater than the rate that the backend storage can handle. In this case, we | ||
| -// must further throttle incoming writes (see dmu_tx_delay() for details). | ||
| - | ||
| -// the sum of each queue's max_active. It must be at least the sum of each | ||
| -// queue's min_active. | ||
| -uint32_t zfs_vdev_max_active = 1000; | ||
| - | ||
| -// Per-queue limits on the number of IOs active to each device. If the | ||
| -// number of active IOs is < zfs_vdev_max_active, then the min_active comes | ||
| -// into play. We will send min_active from each queue, and then select from | ||
| -// queues in the order defined by zio_priority_t. | ||
| -// | ||
| -// In general, smaller max_active's will lead to lower latency of synchronous | ||
| -// operations. Larger max_active's may lead to higher overall throughput, | ||
| -// depending on underlying storage. | ||
| -// | ||
| -// The ratio of the queues' max_actives determines the balance of performance | ||
| -// between reads, writes, and scrubs. E.g., increasing | ||
| -// zfs_vdev_scrub_max_active will cause the scrub or resilver to complete | ||
| -// more quickly, but reads and writes to have higher latency and lower | ||
| -// throughput. | ||
| -uint32_t zfs_vdev_sync_read_min_active = 10; | ||
| -uint32_t zfs_vdev_sync_read_max_active = 10; | ||
| -uint32_t zfs_vdev_sync_write_min_active = 10; | ||
| -uint32_t zfs_vdev_sync_write_max_active = 10; | ||
| -uint32_t zfs_vdev_async_read_min_active = 1; | ||
| -uint32_t zfs_vdev_async_read_max_active = 3; | ||
| -uint32_t zfs_vdev_async_write_min_active = 1; | ||
| -uint32_t zfs_vdev_async_write_max_active = 10; | ||
| -uint32_t zfs_vdev_scrub_min_active = 1; | ||
| -uint32_t zfs_vdev_scrub_max_active = 2; | ||
| - | ||
| -// When the pool has less than zfs_vdev_async_write_active_min_dirty_percent | ||
| -// dirty data, use zfs_vdev_async_write_min_active. When it has more than | ||
| -// zfs_vdev_async_write_active_max_dirty_percent, use | ||
| -// zfs_vdev_async_write_max_active. The value is linearly interpolated | ||
| -// between min and max. | ||
| -int zfs_vdev_async_write_active_min_dirty_percent = 30; | ||
| -int zfs_vdev_async_write_active_max_dirty_percent = 60; | ||
| - | ||
| -// To reduce IOPs, we aggregate small adjacent IOs into one large IO. | ||
| -// For read IOs, we also aggregate across small adjacency gaps; for writes | ||
| -// we include spans of optional IOs to aid aggregation at the disk even when | ||
| -// they aren't able to help us aggregate at this level. | ||
| -int zfs_vdev_aggregation_limit = SPA_OLD_MAXBLOCKSIZE; | ||
| -int zfs_vdev_read_gap_limit = 32 << 10; | ||
| -int zfs_vdev_write_gap_limit = 4 << 10; | ||
| - | ||
| -fn vdev_queue_offset_compare(const void *x1, const void *x2) -> i32 { | ||
| - const zio_t *z1 = x1; | ||
| - const zio_t *z2 = x2; | ||
| - | ||
| - if z1.offset < z2.offset { | ||
| - return -1; | ||
| - } | ||
| - if z1.offset > z2.offset { | ||
| - return 1; | ||
| - } | ||
| - | ||
| - if z1 < z2 { | ||
| - return -1; | ||
| - } | ||
| - if z1 > z2 { | ||
| - return 1; | ||
| - } | ||
| - | ||
| - return 0; | ||
| -} | ||
| - | ||
| -static inline avl_tree_t * | ||
| -vdev_queue_class_tree(vdev_queue_t *vq, zio_priority_t p) | ||
| -{ | ||
| - return (&vq->vq_class[p].vqc_queued_tree); | ||
| -} | ||
| - | ||
| -static inline avl_tree_t * | ||
| -vdev_queue_type_tree(vdev_queue_t *vq, zio_type_t t) | ||
| -{ | ||
| - assert!(t == ZIO_TYPE_READ || t == ZIO_TYPE_WRITE); | ||
| - if t == ZIO_TYPE_READ { | ||
| - return &vq->vq_read_offset_tree; | ||
| - } else { | ||
| - return &vq->vq_write_offset_tree; | ||
| - } | ||
| -} | ||
| - | ||
| -int | ||
| -vdev_queue_timestamp_compare(const void *x1, const void *x2) | ||
| -{ | ||
| - const zio_t *z1 = x1; | ||
| - const zio_t *z2 = x2; | ||
| - | ||
| - if (z1->io_timestamp < z2->io_timestamp) | ||
| - return (-1); | ||
| - if (z1->io_timestamp > z2->io_timestamp) | ||
| - return (1); | ||
| - | ||
| - if (z1 < z2) | ||
| - return (-1); | ||
| - if (z1 > z2) | ||
| - return (1); | ||
| - | ||
| - return (0); | ||
| -} | ||
| - | ||
| -static int | ||
| -vdev_queue_class_min_active(zio_priority_t p) | ||
| -{ | ||
| - switch (p) { | ||
| - case ZIO_PRIORITY_SYNC_READ: | ||
| - return (zfs_vdev_sync_read_min_active); | ||
| - case ZIO_PRIORITY_SYNC_WRITE: | ||
| - return (zfs_vdev_sync_write_min_active); | ||
| - case ZIO_PRIORITY_ASYNC_READ: | ||
| - return (zfs_vdev_async_read_min_active); | ||
| - case ZIO_PRIORITY_ASYNC_WRITE: | ||
| - return (zfs_vdev_async_write_min_active); | ||
| - case ZIO_PRIORITY_SCRUB: | ||
| - return (zfs_vdev_scrub_min_active); | ||
| - default: | ||
| - panic("invalid priority %u", p); | ||
| - return (0); | ||
| - } | ||
| -} | ||
| - | ||
| -static int | ||
| -vdev_queue_max_async_writes(spa_t *spa) | ||
| -{ | ||
| - int writes; | ||
| - uint64_t dirty = spa->spa_dsl_pool->dp_dirty_total; | ||
| - uint64_t min_bytes = zfs_dirty_data_max * | ||
| - zfs_vdev_async_write_active_min_dirty_percent / 100; | ||
| - uint64_t max_bytes = zfs_dirty_data_max * | ||
| - zfs_vdev_async_write_active_max_dirty_percent / 100; | ||
| - | ||
| - // Sync tasks correspond to interactive user actions. To reduce the | ||
| - // execution time of those actions we push data out as fast as possible. | ||
| - if (spa_has_pending_synctask(spa)) { | ||
| - return zfs_vdev_async_write_max_active; | ||
| - } | ||
| - | ||
| - if dirty < min_bytes { | ||
| - return zfs_vdev_async_write_min_active; | ||
| - } | ||
| - if dirty > max_bytes { | ||
| - return zfs_vdev_async_write_max_active; | ||
| - } | ||
| - | ||
| - // linear interpolation: | ||
| - // slope = (max_writes - min_writes) / (max_bytes - min_bytes) | ||
| - // move right by min_bytes | ||
| - // move up by min_writes | ||
| - writes = (dirty - min_bytes) * | ||
| - (zfs_vdev_async_write_max_active - zfs_vdev_async_write_min_active) / | ||
| - (max_bytes - min_bytes) + zfs_vdev_async_write_min_active; | ||
| - assert!(writes >= zfs_vdev_async_write_min_active); | ||
| - assert!(writes <= zfs_vdev_async_write_max_active); | ||
| - return (writes); | ||
| -} | ||
| - | ||
| -fn vdev_queue_class_max_active(spa_t *spa, p: zio::Priority) -> int { | ||
| - match p { | ||
| - zio::Priority::SyncRead => zfs_vdev_sync_read_max_active, | ||
| - zio::Priority::SyncWrite => zfs_vdev_sync_write_max_active, | ||
| - zio::Priority::AsyncRead => zfs_vdev_async_read_max_active, | ||
| - zio::Priority::AsyncWrite => vdev_queue_max_async_writes(spa), | ||
| - zio::Priority::Scrub => zfs_vdev_scrub_max_active, | ||
| - _ => { | ||
| - panic!("invalid priority {}", p); | ||
| - 0 | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -// Return the IO class to issue from, or ZIO_PRIORITY_MAX_QUEUEABLE if | ||
| -// there is no eligible class. | ||
| -static zio_priority_t | ||
| -vdev_queue_class_to_issue(vdev_queue_t *vq) | ||
| -{ | ||
| - spa_t *spa = vq->vq_vdev->vdev_spa; | ||
| - zio_priority_t p; | ||
| - | ||
| - if (avl_numnodes(&vq->vq_active_tree) >= zfs_vdev_max_active) | ||
| - return (ZIO_PRIORITY_NUM_QUEUEABLE); | ||
| - | ||
| - for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) { | ||
| - if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 && | ||
| - vq->vq_class[p].vqc_active < | ||
| - vdev_queue_class_min_active(p)) | ||
| - return (p); | ||
| - } | ||
| - | ||
| - // If we haven't found a queue, look for one that hasn't reached its | ||
| - // maximum # outstanding IOs. | ||
| - for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) { | ||
| - if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 && | ||
| - vq->vq_class[p].vqc_active < | ||
| - vdev_queue_class_max_active(spa, p)) | ||
| - return (p); | ||
| - } | ||
| - | ||
| - return (ZIO_PRIORITY_NUM_QUEUEABLE); | ||
| -} | ||
| - | ||
| -void | ||
| -vdev_queue_init(vdev_t *vd) | ||
| -{ | ||
| - vdev_queue_t *vq = &vd->vdev_queue; | ||
| - zio_priority_t p; | ||
| - | ||
| - mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL); | ||
| - vq->vq_vdev = vd; | ||
| - taskq_init_ent(&vd->vdev_queue.vq_io_search.io_tqent); | ||
| - | ||
| - avl_create(&vq->vq_active_tree, vdev_queue_offset_compare, | ||
| - sizeof (zio_t), offsetof(struct zio, io_queue_node)); | ||
| - avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_READ), | ||
| - vdev_queue_offset_compare, sizeof (zio_t), | ||
| - offsetof(struct zio, io_offset_node)); | ||
| - avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE), | ||
| - vdev_queue_offset_compare, sizeof (zio_t), | ||
| - offsetof(struct zio, io_offset_node)); | ||
| - | ||
| - for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) { | ||
| - int (*compfn) (const void *, const void *); | ||
| - | ||
| - // The synchronous IO queues are dispatched in FIFO rather | ||
| - // than LBA order. This provides more consistent latency for | ||
| - // these IOs. | ||
| - if (p == ZIO_PRIORITY_SYNC_READ || p == ZIO_PRIORITY_SYNC_WRITE) | ||
| - compfn = vdev_queue_timestamp_compare; | ||
| - else | ||
| - compfn = vdev_queue_offset_compare; | ||
| - avl_create(vdev_queue_class_tree(vq, p), compfn, | ||
| - sizeof (zio_t), offsetof(struct zio, io_queue_node)); | ||
| - } | ||
| -} | ||
| - | ||
| -void | ||
| -vdev_queue_fini(vdev_t *vd) | ||
| -{ | ||
| - vdev_queue_t *vq = &vd->vdev_queue; | ||
| - zio_priority_t p; | ||
| - | ||
| - for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) | ||
| - avl_destroy(vdev_queue_class_tree(vq, p)); | ||
| - avl_destroy(&vq->vq_active_tree); | ||
| - avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_READ)); | ||
| - avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE)); | ||
| - | ||
| - mutex_destroy(&vq->vq_lock); | ||
| -} | ||
| - | ||
| -static void | ||
| -vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio) | ||
| -{ | ||
| - spa_t *spa = zio->io_spa; | ||
| - spa_stats_history_t *ssh = &spa->spa_stats.io_history; | ||
| - | ||
| - assert!(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); | ||
| - avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio); | ||
| - avl_add(vdev_queue_type_tree(vq, zio->io_type), zio); | ||
| - | ||
| - if (ssh->kstat != NULL) { | ||
| - mutex_enter(&ssh->lock); | ||
| - kstat_waitq_enter(ssh->kstat->ks_data); | ||
| - mutex_exit(&ssh->lock); | ||
| - } | ||
| -} | ||
| - | ||
| -static void | ||
| -vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio) | ||
| -{ | ||
| - spa_t *spa = zio->io_spa; | ||
| - spa_stats_history_t *ssh = &spa->spa_stats.io_history; | ||
| - | ||
| - assert!(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); | ||
| - avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio); | ||
| - avl_remove(vdev_queue_type_tree(vq, zio->io_type), zio); | ||
| - | ||
| - if (ssh->kstat != NULL) { | ||
| - mutex_enter(&ssh->lock); | ||
| - kstat_waitq_exit(ssh->kstat->ks_data); | ||
| - mutex_exit(&ssh->lock); | ||
| - } | ||
| -} | ||
| - | ||
| -static void | ||
| -vdev_queue_pending_add(vdev_queue_t *vq, zio_t *zio) | ||
| -{ | ||
| - spa_t *spa = zio->io_spa; | ||
| - spa_stats_history_t *ssh = &spa->spa_stats.io_history; | ||
| - | ||
| - ASSERT(MUTEX_HELD(&vq->vq_lock)); | ||
| - assert!(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); | ||
| - vq->vq_class[zio->io_priority].vqc_active++; | ||
| - avl_add(&vq->vq_active_tree, zio); | ||
| - | ||
| - if (ssh->kstat != NULL) { | ||
| - mutex_enter(&ssh->lock); | ||
| - kstat_runq_enter(ssh->kstat->ks_data); | ||
| - mutex_exit(&ssh->lock); | ||
| - } | ||
| -} | ||
| - | ||
| -static void | ||
| -vdev_queue_pending_remove(vdev_queue_t *vq, zio_t *zio) | ||
| -{ | ||
| - spa_t *spa = zio->io_spa; | ||
| - spa_stats_history_t *ssh = &spa->spa_stats.io_history; | ||
| - | ||
| - ASSERT(MUTEX_HELD(&vq->vq_lock)); | ||
| - assert!(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); | ||
| - vq->vq_class[zio->io_priority].vqc_active--; | ||
| - avl_remove(&vq->vq_active_tree, zio); | ||
| - | ||
| - if (ssh->kstat != NULL) { | ||
| - kstat_io_t *ksio = ssh->kstat->ks_data; | ||
| - | ||
| - mutex_enter(&ssh->lock); | ||
| - kstat_runq_exit(ksio); | ||
| - if (zio->io_type == ZIO_TYPE_READ) { | ||
| - ksio->reads++; | ||
| - ksio->nread += zio->io_size; | ||
| - } else if (zio->io_type == ZIO_TYPE_WRITE) { | ||
| - ksio->writes++; | ||
| - ksio->nwritten += zio->io_size; | ||
| - } | ||
| - mutex_exit(&ssh->lock); | ||
| - } | ||
| -} | ||
| - | ||
| -fn vdev_queue_agg_io_done(aio: &mut Zio) { | ||
| - if (aio.zio_type == ZIO_TYPE_READ) { | ||
| - zio_t *pio; | ||
| - while (pio = zio_walk_parents(aio)) != NULL { | ||
| - bcopy(aio.data + (pio.offset - aio.offset), pio.data, pio.size); | ||
| - } | ||
| - } | ||
| - | ||
| - zio_buf_free(aio.data, aio.size); | ||
| -} | ||
| - | ||
| -// Compute the range spanned by two IOs, which is the endpoint of the last | ||
| -// (lio->io_offset + lio->io_size) minus start of the first (fio->io_offset). | ||
| -// Conveniently, the gap between fio and lio is given by -IO_SPAN(lio, fio); | ||
| -// thus fio and lio are adjacent if and only if IO_SPAN(lio, fio) == 0. | ||
| -#define IO_SPAN(fio, lio) ((lio)->io_offset + (lio)->io_size - (fio)->io_offset) | ||
| -#define IO_GAP(fio, lio) (-IO_SPAN(lio, fio)) | ||
| - | ||
| -static zio_t * | ||
| -vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) | ||
| -{ | ||
| - zio_t *first, *last, *aio, *dio, *mandatory, *nio; | ||
| - uint64_t maxgap = 0; | ||
| - uint64_t size; | ||
| - boolean_t stretch = B_FALSE; | ||
| - avl_tree_t *t = vdev_queue_type_tree(vq, zio->io_type); | ||
| - enum zio_flag flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT; | ||
| - | ||
| - if (zio->io_flags & ZIO_FLAG_DONT_AGGREGATE) | ||
| - return (NULL); | ||
| - | ||
| - // Prevent users from setting the zfs_vdev_aggregation_limit | ||
| - // tuning larger than SPA_MAXBLOCKSIZE. | ||
| - zfs_vdev_aggregation_limit = | ||
| - MIN(zfs_vdev_aggregation_limit, SPA_MAXBLOCKSIZE); | ||
| - | ||
| - first = last = zio; | ||
| - | ||
| - if (zio->io_type == ZIO_TYPE_READ) | ||
| - maxgap = zfs_vdev_read_gap_limit; | ||
| - | ||
| - // We can aggregate IOs that are sufficiently adjacent and of | ||
| - // the same flavor, as expressed by the AGG_INHERIT flags. | ||
| - // The latter requirement is necessary so that certain | ||
| - // attributes of the IO, such as whether it's a normal IO | ||
| - // or a scrub/resilver, can be preserved in the aggregate. | ||
| - // We can include optional IOs, but don't allow them | ||
| - // to begin a range as they add no benefit in that situation. | ||
| - | ||
| - // We keep track of the last non-optional IO. | ||
| - mandatory = (first->io_flags & ZIO_FLAG_OPTIONAL) ? NULL : first; | ||
| - | ||
| - // Walk backwards through sufficiently contiguous IOs | ||
| - // recording the last non-option IO. | ||
| - while ((dio = AVL_PREV(t, first)) != NULL && | ||
| - (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags && | ||
| - IO_SPAN(dio, last) <= zfs_vdev_aggregation_limit && | ||
| - IO_GAP(dio, first) <= maxgap) { | ||
| - first = dio; | ||
| - if (mandatory == NULL && !(first->io_flags & ZIO_FLAG_OPTIONAL)) | ||
| - mandatory = first; | ||
| - } | ||
| - | ||
| - // Skip any initial optional IOs. | ||
| - while ((first->io_flags & ZIO_FLAG_OPTIONAL) && first != last) { | ||
| - first = AVL_NEXT(t, first); | ||
| - ASSERT(first != NULL); | ||
| - } | ||
| - | ||
| - | ||
| - // Walk forward through sufficiently contiguous IOs. | ||
| - while ((dio = AVL_NEXT(t, last)) != NULL && | ||
| - (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags && | ||
| - IO_SPAN(first, dio) <= zfs_vdev_aggregation_limit && | ||
| - IO_GAP(last, dio) <= maxgap) { | ||
| - last = dio; | ||
| - if (!(last->io_flags & ZIO_FLAG_OPTIONAL)) | ||
| - mandatory = last; | ||
| - } | ||
| - | ||
| - // Now that we've established the range of the IO aggregation | ||
| - // we must decide what to do with trailing optional IOs. | ||
| - // For reads, there's nothing to do. While we are unable to | ||
| - // aggregate further, it's possible that a trailing optional | ||
| - // IO would allow the underlying device to aggregate with | ||
| - // subsequent IOs. We must therefore determine if the next | ||
| - // non-optional IO is close enough to make aggregation | ||
| - // worthwhile. | ||
| - if (zio->io_type == ZIO_TYPE_WRITE && mandatory != NULL) { | ||
| - zio_t *nio = last; | ||
| - while ((dio = AVL_NEXT(t, nio)) != NULL && | ||
| - IO_GAP(nio, dio) == 0 && | ||
| - IO_GAP(mandatory, dio) <= zfs_vdev_write_gap_limit) { | ||
| - nio = dio; | ||
| - if (!(nio->io_flags & ZIO_FLAG_OPTIONAL)) { | ||
| - stretch = B_TRUE; | ||
| - break; | ||
| - } | ||
| - } | ||
| - } | ||
| - | ||
| - if (stretch) { | ||
| - // This may be a no-op. | ||
| - dio = AVL_NEXT(t, last); | ||
| - dio->io_flags &= ~ZIO_FLAG_OPTIONAL; | ||
| - } else { | ||
| - while (last != mandatory && last != first) { | ||
| - ASSERT(last->io_flags & ZIO_FLAG_OPTIONAL); | ||
| - last = AVL_PREV(t, last); | ||
| - ASSERT(last != NULL); | ||
| - } | ||
| - } | ||
| - | ||
| - if (first == last) | ||
| - return (NULL); | ||
| - | ||
| - size = IO_SPAN(first, last); | ||
| - assert!(size, <=, zfs_vdev_aggregation_limit); | ||
| - | ||
| - aio = zio_vdev_delegated_io(first->io_vd, first->io_offset, | ||
| - zio_buf_alloc(size), size, first->io_type, zio->io_priority, | ||
| - flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE, | ||
| - vdev_queue_agg_io_done, NULL); | ||
| - aio->io_timestamp = first->io_timestamp; | ||
| - | ||
| - nio = first; | ||
| - do { | ||
| - dio = nio; | ||
| - nio = AVL_NEXT(t, dio); | ||
| - assert!(dio->io_type, ==, aio->io_type); | ||
| - | ||
| - if (dio->io_flags & ZIO_FLAG_NODATA) { | ||
| - assert!(dio->io_type, ==, ZIO_TYPE_WRITE); | ||
| - bzero((char *)aio->io_data + (dio->io_offset - | ||
| - aio->io_offset), dio->io_size); | ||
| - } else if (dio->io_type == ZIO_TYPE_WRITE) { | ||
| - bcopy(dio->io_data, (char *)aio->io_data + | ||
| - (dio->io_offset - aio->io_offset), | ||
| - dio->io_size); | ||
| - } | ||
| - | ||
| - zio_add_child(dio, aio); | ||
| - vdev_queue_io_remove(vq, dio); | ||
| - zio_vdev_io_bypass(dio); | ||
| - zio_execute(dio); | ||
| - } while (dio != last); | ||
| - | ||
| - return (aio); | ||
| -} | ||
| - | ||
| -fn vdev_queue_io_to_issue(vdev_queue_t *vq) -> Option<Zio> { | ||
| - zio_t *zio, *aio; | ||
| - zio_priority_t p; | ||
| - avl_index_t idx; | ||
| - avl_tree_t *tree; | ||
| - | ||
| -again: | ||
| - ASSERT(MUTEX_HELD(&vq->vq_lock)); | ||
| - | ||
| - p = vdev_queue_class_to_issue(vq); | ||
| - | ||
| - if (p == ZIO_PRIORITY_NUM_QUEUEABLE) { | ||
| - // No eligible queued IOs | ||
| - return (NULL); | ||
| - } | ||
| - | ||
| - // For LBA-ordered queues (async / scrub), issue the IO which follows | ||
| - // the most recently issued IO in LBA (offset) order. | ||
| - // | ||
| - // For FIFO queues (sync), issue the IO with the lowest timestamp. | ||
| - tree = vdev_queue_class_tree(vq, p); | ||
| - vq->vq_io_search.io_timestamp = 0; | ||
| - vq->vq_io_search.io_offset = vq->vq_last_offset + 1; | ||
| - //VERIFY(avl_find(tree, &vq->vq_io_search, &idx) == NULL); | ||
| - zio = avl_nearest(tree, idx, AVL_AFTER); | ||
| - if (zio == NULL) | ||
| - zio = avl_first(tree); | ||
| - assert!(zio->io_priority == p); | ||
| - | ||
| - aio = vdev_queue_aggregate(vq, zio); | ||
| - if (aio != NULL) | ||
| - zio = aio; | ||
| - else | ||
| - vdev_queue_io_remove(vq, zio); | ||
| - | ||
| - // If the IO is or was optional and therefore has no data, we need to | ||
| - // simply discard it. We need to drop the vdev queue's lock to avoid a | ||
| - // deadlock that we could encounter since this IO will complete | ||
| - // immediately. | ||
| - if (zio->io_flags & ZIO_FLAG_NODATA) { | ||
| - mutex_exit(&vq->vq_lock); | ||
| - zio_vdev_io_bypass(zio); | ||
| - zio_execute(zio); | ||
| - mutex_enter(&vq->vq_lock); | ||
| - goto again; | ||
| - } | ||
| - | ||
| - vdev_queue_pending_add(vq, zio); | ||
| - vq->vq_last_offset = zio->io_offset; | ||
| - | ||
| - return (zio); | ||
| -} | ||
| - | ||
| -pub fn vdev_queue_io(zio_t *zio) -> Option<Zio> { | ||
| - vdev_queue_t *vq = &zio.vd.vdev_queue; | ||
| - | ||
| - if zio->io_flags & ZIO_FLAG_DONT_QUEUE != 0 { | ||
| - return zio; | ||
| - } | ||
| - | ||
| - // Children IOs inherent their parent's priority, which might | ||
| - // not match the child's IO type. Fix it up here. | ||
| - if zio.zio_type == ZIO_TYPE_READ { | ||
| - if zio->io_priority != ZIO_PRIORITY_SYNC_READ && | ||
| - zio->io_priority != ZIO_PRIORITY_ASYNC_READ && | ||
| - zio->io_priority != ZIO_PRIORITY_SCRUB | ||
| - { | ||
| - zio->io_priority = ZIO_PRIORITY_ASYNC_READ; | ||
| - } | ||
| - } else { | ||
| - assert!(zio.zio_type == ZIO_TYPE_WRITE); | ||
| - if (zio.priority != ZIO_PRIORITY_SYNC_WRITE && | ||
| - zio.priority != ZIO_PRIORITY_ASYNC_WRITE) | ||
| - zio.priority = ZIO_PRIORITY_ASYNC_WRITE; | ||
| - } | ||
| - | ||
| - zio->io_flags |= ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE; | ||
| - | ||
| - mutex_enter(&vq->vq_lock); | ||
| - zio.timestamp = gethrtime(); | ||
| - vdev_queue_io_add(vq, zio); | ||
| - let nio = vdev_queue_io_to_issue(vq); | ||
| - mutex_exit(&vq->vq_lock); | ||
| - | ||
| - if let Some(nio) = nio { | ||
| - if nio.done == vdev_queue_agg_io_done { | ||
| - nio.no_wait(); | ||
| - return None; | ||
| - } | ||
| - } | ||
| - | ||
| - nio | ||
| -} | ||
| - | ||
| -fn vdev_queue_io_done(zio_t *zio) { | ||
| - vdev_queue_t *vq = &zio->io_vd->vdev_queue; | ||
| - zio_t *nio; | ||
| - | ||
| - if zio_injection_enabled { | ||
| - delay(SEC_TO_TICK(zio_handle_io_delay(zio))); | ||
| - } | ||
| - | ||
| - mutex_enter(&vq->vq_lock); | ||
| - | ||
| - vdev_queue_pending_remove(vq, zio); | ||
| - | ||
| - zio.delta = gethrtime() - zio.timestamp; | ||
| - vq.io_complete_ts = gethrtime(); | ||
| - vq.io_delta_ts = vq.io_complete_ts - zio.timestamp; | ||
| - | ||
| - while (nio = vdev_queue_io_to_issue(vq)) != NULL { | ||
| - mutex_exit(&vq->vq_lock); | ||
| - if (nio.done == vdev_queue_agg_io_done) { | ||
| - nio.no_wait(); | ||
| - } else { | ||
| - zio_vdev_io_reissue(nio); | ||
| - nio.execute(); | ||
| - } | ||
| - mutex_enter(&vq.lock); | ||
| - } | ||
| - | ||
| - mutex_exit(&vq->vq_lock); | ||
| -} |
145
crates/zfs/xdr/mem_ops.rs
| @@ -1,145 +0,0 @@ | ||
| -use std::{mem, ptr}; | ||
| - | ||
| -use super::{XdrOps, XdrError, XdrResult}; | ||
| - | ||
| -pub struct MemOps<'a> { | ||
| - pos: usize, | ||
| - buffer: &'a mut [u8], | ||
| -} | ||
| - | ||
| -impl<'a> MemOps<'a> { | ||
| - pub fn new(buffer: &'a mut [u8]) -> Self { | ||
| - MemOps { | ||
| - pos: 0, | ||
| - buffer: buffer, | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -// Xdr encodes things in big endian and values are aligned at 4 bytes. For example, a u8 would take | ||
| -// up 4 bytes when serialized. | ||
| -impl<'a> XdrOps for MemOps<'a> { | ||
| - fn get_i64(&mut self) -> XdrResult<i64> { | ||
| - if self.pos >= self.buffer.len() { | ||
| - Err(XdrError) | ||
| - } else if self.buffer.len() - self.pos < 8 { | ||
| - Err(XdrError) | ||
| - } else { | ||
| - let d: &i64 = unsafe { mem::transmute(&self.buffer[self.pos]) }; | ||
| - // let val_d = i64::from_be(*d); | ||
| - self.pos += 8; | ||
| - Ok(i64::from_be(*d)) | ||
| - } | ||
| - } | ||
| - | ||
| - fn put_i64(&mut self, l: i64) -> XdrResult<()> { | ||
| - if self.pos >= self.buffer.len() || self.buffer.len() - self.pos < 8 { | ||
| - // Buffer is too small | ||
| - return Err(XdrError); | ||
| - } | ||
| - | ||
| - let d: &mut i64 = unsafe { mem::transmute(&mut self.buffer[self.pos]) }; | ||
| - *d = l.to_be(); | ||
| - self.pos += 8; | ||
| - Ok(()) | ||
| - } | ||
| - | ||
| - fn get_i32(&mut self) -> XdrResult<i32> { | ||
| - if self.pos >= self.buffer.len() { | ||
| - Err(XdrError) | ||
| - } else if self.buffer.len() - self.pos < 4 { | ||
| - Err(XdrError) | ||
| - } else { | ||
| - let d: &i32 = unsafe { mem::transmute(&self.buffer[self.pos]) }; | ||
| - self.pos += 4; | ||
| - Ok(i32::from_be(*d)) | ||
| - } | ||
| - } | ||
| - | ||
| - fn put_i32(&mut self, i: i32) -> XdrResult<()> { | ||
| - if self.pos >= self.buffer.len() || self.buffer.len() - self.pos < 4 { | ||
| - // Buffer is too small | ||
| - return Err(XdrError); | ||
| - } | ||
| - | ||
| - let d: &mut i32 = unsafe { mem::transmute(&mut self.buffer[self.pos]) }; | ||
| - *d = i.to_be(); | ||
| - self.pos += 4; | ||
| - Ok(()) | ||
| - } | ||
| - | ||
| - fn get_bytes(&mut self, bytes: &mut [u8]) -> XdrResult<()> { | ||
| - if bytes.is_empty() { | ||
| - return Ok(()); | ||
| - } | ||
| - if self.pos >= self.buffer.len() { | ||
| - Err(XdrError) | ||
| - } else if self.buffer.len() - self.pos < bytes.len() { | ||
| - Err(XdrError) | ||
| - } else { | ||
| - // Technically the upper bound on this slice doesn't have to be there | ||
| - let src = self.buffer[self.pos..self.pos + bytes.len()].as_ptr(); | ||
| - let dst = bytes.as_mut_ptr(); | ||
| - unsafe { | ||
| - ptr::copy(src, dst, bytes.len()); | ||
| - } | ||
| - self.pos += bytes.len(); | ||
| - | ||
| - Ok(()) | ||
| - } | ||
| - } | ||
| - | ||
| - fn put_bytes(&mut self, bytes: &[u8]) -> XdrResult<()> { | ||
| - if self.pos >= self.buffer.len() || self.buffer.len() - self.pos < bytes.len() { | ||
| - // Buffer is too small | ||
| - return Err(XdrError); | ||
| - } | ||
| - | ||
| - let src = bytes.as_ptr(); | ||
| - // Technically the upper bound on this slice doesn't have to be there | ||
| - let dst = self.buffer[self.pos..self.pos + bytes.len()].as_mut_ptr(); | ||
| - unsafe { | ||
| - ptr::copy(src, dst, bytes.len()); | ||
| - } | ||
| - self.pos += bytes.len(); | ||
| - | ||
| - Ok(()) | ||
| - } | ||
| - | ||
| - fn get_pos(&self) -> usize { | ||
| - self.pos | ||
| - } | ||
| - | ||
| - fn set_pos(&mut self, new_pos: usize) -> XdrResult<()> { | ||
| - self.pos = new_pos; | ||
| - Ok(()) | ||
| - } | ||
| -} | ||
| - | ||
| -#[test] | ||
| -fn test_mem_ops_i64() { | ||
| - let mem_ops = MemOps::new(&mut [1, 1, 0, 0]); | ||
| - assert!(mem_ops.get_i32() == 257); | ||
| -} | ||
| - | ||
| -#[test] | ||
| -fn test_mem_ops_i64_and_back() { | ||
| - let mut mem_ops = MemOps::new(&mut [0; 8]); | ||
| - mem_ops.put_i64(424242); | ||
| - mem_ops.set_pos(0); | ||
| - assert!(mem_ops.get_i64() == 424242); | ||
| -} | ||
| - | ||
| -#[test] | ||
| -fn test_mem_ops_i32() { | ||
| - let mem_ops = MemOps::new(&mut [1, 1, 0, 0]); | ||
| - assert!(mem_ops.get_i32() == 257); | ||
| -} | ||
| - | ||
| -#[test] | ||
| -fn test_mem_ops_i32_and_back() { | ||
| - let mut mem_ops = MemOps::new(&mut [0; 4]); | ||
| - mem_ops.put_i32(424242); | ||
| - mem_ops.set_pos(0); | ||
| - assert!(mem_ops.get_i32() == 424242); | ||
| -} |
5
crates/zfs/xdr/mod.rs
| @@ -1,5 +0,0 @@ | ||
| -pub use self::xdr::*; | ||
| -pub use self::mem_ops::MemOps; | ||
| - | ||
| -pub mod xdr; | ||
| -pub mod mem_ops; |
219
crates/zfs/xdr/xdr.rs
| @@ -1,219 +0,0 @@ | ||
| -// use std::*; | ||
| - | ||
| -#[derive(Debug)] | ||
| -pub struct XdrError; | ||
| - | ||
| -pub type XdrResult<T> = Result<T, XdrError>; | ||
| - | ||
| -pub enum XdrOp { | ||
| - Encode, | ||
| - Decode, | ||
| - Free, | ||
| -} | ||
| - | ||
| -// TODO: Return `XdrResult` instead | ||
| -pub trait XdrOps { | ||
| - /// Get a i64 from underlying stream | ||
| - fn get_i64(&mut self) -> XdrResult<i64>; | ||
| - | ||
| - /// Put a i64 to underlying stream | ||
| - fn put_i64(&mut self, l: i64) -> XdrResult<()>; | ||
| - | ||
| - /// Get a i32 from underlying stream | ||
| - fn get_i32(&mut self) -> XdrResult<i32>; | ||
| - | ||
| - /// Put a i32 to underlying stream | ||
| - fn put_i32(&mut self, i: i32) -> XdrResult<()>; | ||
| - | ||
| - /// Get some bytes from the underlying stream | ||
| - fn get_bytes(&mut self, bytes: &mut [u8]) -> XdrResult<()>; | ||
| - | ||
| - /// Put some bytes into the underlying stream | ||
| - fn put_bytes(&mut self, bytes: &[u8]) -> XdrResult<()>; | ||
| - | ||
| - /// Returns bytes off from beginning | ||
| - fn get_pos(&self) -> usize; | ||
| - | ||
| - /// Lets you reposition the stream | ||
| - fn set_pos(&mut self, offset: usize) -> XdrResult<()>; | ||
| - | ||
| -// TODO: Not sure if we'll need this? | ||
| -// Buf quick ptr to buffered data | ||
| -// fn inline(&mut self, len: usize) -> *mut i32; | ||
| - | ||
| -// TODO: Not sure if we'll need this? | ||
| -// Change, retrieve client info | ||
| -// fn control(&mut self, req: isize, op: void *); | ||
| -} | ||
| - | ||
| -pub trait Xdr { | ||
| - fn encode_bool(&mut self, i: bool) -> XdrResult<()>; | ||
| - fn decode_bool(&mut self) -> XdrResult<bool>; | ||
| - | ||
| - fn encode_i8(&mut self, i: i8) -> XdrResult<()>; | ||
| - fn decode_i8(&mut self) -> XdrResult<i8>; | ||
| - | ||
| - fn encode_u8(&mut self, u: u8) -> XdrResult<()>; | ||
| - fn decode_u8(&mut self) -> XdrResult<u8>; | ||
| - | ||
| - fn encode_i16(&mut self, i: i16) -> XdrResult<()>; | ||
| - fn decode_i16(&mut self) -> XdrResult<i16>; | ||
| - | ||
| - fn encode_u16(&mut self, u: u16) -> XdrResult<()>; | ||
| - fn decode_u16(&mut self) -> XdrResult<u16>; | ||
| - | ||
| - fn encode_i32(&mut self, i: i32) -> XdrResult<()>; | ||
| - fn decode_i32(&mut self) -> XdrResult<i32>; | ||
| - | ||
| - fn encode_u32(&mut self, u: u32) -> XdrResult<()>; | ||
| - fn decode_u32(&mut self) -> XdrResult<u32>; | ||
| - | ||
| - fn encode_i64(&mut self, i: i64) -> XdrResult<()>; | ||
| - fn decode_i64(&mut self) -> XdrResult<i64>; | ||
| - | ||
| - fn encode_u64(&mut self, u: u64) -> XdrResult<()>; | ||
| - fn decode_u64(&mut self) -> XdrResult<u64>; | ||
| - | ||
| - fn encode_opaque(&mut self, bytes: &[u8]) -> XdrResult<()>; | ||
| - fn decode_opaque(&mut self, bytes: &mut [u8]) -> XdrResult<()>; | ||
| - | ||
| - fn encode_bytes(&mut self, bytes: &[u8]) -> XdrResult<()>; | ||
| - fn decode_bytes(&mut self) -> XdrResult<Vec<u8>>; | ||
| - | ||
| - fn encode_string(&mut self, string: &String) -> XdrResult<()>; | ||
| - fn decode_string(&mut self) -> XdrResult<String>; | ||
| -} | ||
| - | ||
| -impl<T: XdrOps> Xdr for T { | ||
| - fn encode_bool(&mut self, b: bool) -> XdrResult<()> { | ||
| - let i = match b { | ||
| - false => 0, | ||
| - true => 1, | ||
| - }; | ||
| - self.put_i32(i) | ||
| - } | ||
| - | ||
| - fn decode_bool(&mut self) -> XdrResult<bool> { | ||
| - let i = try!(self.get_i32()); | ||
| - match i { | ||
| - 0 => Ok(false), | ||
| - 1 => Ok(true), | ||
| - _ => Err(XdrError), | ||
| - } | ||
| - } | ||
| - | ||
| - fn encode_i8(&mut self, i: i8) -> XdrResult<()> { | ||
| - self.put_i32(i as i32) | ||
| - } | ||
| - | ||
| - fn decode_i8(&mut self) -> XdrResult<i8> { | ||
| - self.get_i32().map(|x| x as i8) | ||
| - } | ||
| - | ||
| - fn encode_u8(&mut self, u: u8) -> XdrResult<()> { | ||
| - self.put_i32(u as i32) | ||
| - } | ||
| - | ||
| - fn decode_u8(&mut self) -> XdrResult<u8> { | ||
| - self.get_i32().map(|x| x as u8) | ||
| - } | ||
| - | ||
| - fn encode_i16(&mut self, i: i16) -> XdrResult<()> { | ||
| - self.put_i32(i as i32) | ||
| - } | ||
| - | ||
| - fn decode_i16(&mut self) -> XdrResult<i16> { | ||
| - self.get_i32().map(|x| x as i16) | ||
| - } | ||
| - | ||
| - fn encode_u16(&mut self, u: u16) -> XdrResult<()> { | ||
| - self.put_i32(u as i32) | ||
| - } | ||
| - | ||
| - fn decode_u16(&mut self) -> XdrResult<u16> { | ||
| - self.get_i32().map(|x| x as u16) | ||
| - } | ||
| - | ||
| - fn encode_i32(&mut self, i: i32) -> XdrResult<()> { | ||
| - self.put_i32(i) | ||
| - } | ||
| - | ||
| - fn decode_i32(&mut self) -> XdrResult<i32> { | ||
| - self.get_i32() | ||
| - } | ||
| - | ||
| - fn encode_u32(&mut self, u: u32) -> XdrResult<()> { | ||
| - self.put_i32(u as i32) | ||
| - } | ||
| - | ||
| - fn decode_u32(&mut self) -> XdrResult<u32> { | ||
| - self.get_i32().map(|x| x as u32) | ||
| - } | ||
| - | ||
| - fn encode_i64(&mut self, i: i64) -> XdrResult<()> { | ||
| - self.put_i64(i) | ||
| - } | ||
| - | ||
| - fn decode_i64(&mut self) -> XdrResult<i64> { | ||
| - self.get_i64() | ||
| - } | ||
| - | ||
| - fn encode_u64(&mut self, u: u64) -> XdrResult<()> { | ||
| - self.put_i64(u as i64) | ||
| - } | ||
| - | ||
| - fn decode_u64(&mut self) -> XdrResult<u64> { | ||
| - self.get_i64().map(|x| x as u64) | ||
| - } | ||
| - | ||
| - fn encode_opaque(&mut self, bytes: &[u8]) -> XdrResult<()> { | ||
| - // XDR byte strings always have len%4 == 0 | ||
| - let crud: [u8; 4] = [0; 4]; | ||
| - let mut round_up = bytes.len() % 4; | ||
| - if round_up > 0 { | ||
| - round_up = 4 - round_up; | ||
| - } | ||
| - try!(self.put_bytes(bytes)); | ||
| - try!(self.put_bytes(&crud[0..round_up])); | ||
| - Ok(()) | ||
| - } | ||
| - | ||
| - fn decode_opaque(&mut self, bytes: &mut [u8]) -> XdrResult<()> { | ||
| - // XDR byte strings always have len%4 == 0 | ||
| - let mut crud: [u8; 4] = [0; 4]; | ||
| - let mut round_up = bytes.len() % 4; | ||
| - if round_up > 0 { | ||
| - round_up = 4 - round_up; | ||
| - } | ||
| - try!(self.get_bytes(bytes)); | ||
| - try!(self.get_bytes(&mut crud[0..round_up])); | ||
| - Ok(()) | ||
| - } | ||
| - | ||
| - fn encode_bytes(&mut self, bytes: &[u8]) -> XdrResult<()> { | ||
| - try!(self.encode_u32(bytes.len() as u32)); | ||
| - self.encode_opaque(bytes) | ||
| - } | ||
| - | ||
| - fn decode_bytes(&mut self) -> XdrResult<Vec<u8>> { | ||
| - let count = try!(self.decode_u32()); | ||
| - let mut bytes = vec![0; count as usize]; | ||
| - try!(self.decode_opaque(&mut bytes[..])); | ||
| - Ok(bytes) | ||
| - } | ||
| - | ||
| - fn encode_string(&mut self, string: &String) -> XdrResult<()> { | ||
| - try!(self.encode_u32(string.as_bytes().len() as u32)); | ||
| - self.encode_opaque(string.as_bytes()) | ||
| - } | ||
| - | ||
| - fn decode_string(&mut self) -> XdrResult<String> { | ||
| - let count = try!(self.decode_u32()); | ||
| - if count > 1024 { | ||
| - return Err(XdrError); | ||
| - } | ||
| - let mut bytes = vec![0; count as usize]; | ||
| - try!(self.decode_opaque(&mut bytes[..])); | ||
| - String::from_utf8(bytes).map_err(|_| XdrError) | ||
| - } | ||
| -} |
190
crates/zfs/zap.rs
| @@ -1,190 +0,0 @@ | ||
| -use std::{fmt, mem, ptr, str}; | ||
| -use std::io::Seek; | ||
| - | ||
| -use super::from_bytes::FromBytes; | ||
| - | ||
| -const MZAP_ENT_LEN: usize = 64; | ||
| -const MZAP_NAME_LEN: usize = MZAP_ENT_LEN - 8 - 4 - 2; | ||
| - | ||
| -#[repr(u64)] | ||
| -#[derive(Copy, Clone, Debug)] | ||
| -pub enum ZapObjectType { | ||
| - Micro = (1 << 63) + 3, | ||
| - Header = (1 << 63) + 1, | ||
| - Leaf = 1 << 63, | ||
| -} | ||
| - | ||
| -/// Microzap | ||
| -#[repr(packed)] | ||
| -pub struct MZapPhys { | ||
| - pub block_type: ZapObjectType, // ZapObjectType::Micro | ||
| - pub salt: u64, | ||
| - pub norm_flags: u64, | ||
| - pad: [u64; 5], | ||
| -} | ||
| - | ||
| -pub struct MZapWrapper { | ||
| - pub phys: MZapPhys, | ||
| - pub chunks: Vec<MZapEntPhys>, // variable size depending on block size | ||
| -} | ||
| - | ||
| -impl FromBytes for MZapWrapper { | ||
| - fn from_bytes(data: &[u8]) -> Result<Self, String> { | ||
| - if data.len() >= mem::size_of::<MZapPhys>() { | ||
| - // Read the first part of the mzap -- its base phys struct | ||
| - let mzap_phys = unsafe { ptr::read(data.as_ptr() as *const MZapPhys) }; | ||
| - // Read the mzap entries, aka chunks | ||
| - let mut mzap_entries = Vec::new(); | ||
| - let num_entries = (data.len() - mem::size_of::<MZapPhys>()) / | ||
| - mem::size_of::<MZapEntPhys>(); | ||
| - for i in 0..num_entries { | ||
| - let entry_pos = mem::size_of::<MZapPhys>() + i * mem::size_of::<MZapEntPhys>(); | ||
| - let mzap_ent = unsafe { | ||
| - ptr::read(data[entry_pos..].as_ptr() as *const MZapEntPhys) | ||
| - }; | ||
| - mzap_entries.push(mzap_ent); | ||
| - } | ||
| - Ok(MZapWrapper { | ||
| - phys: mzap_phys, | ||
| - chunks: mzap_entries, | ||
| - }) | ||
| - } else { | ||
| - Err("Error: needs a proper error message".to_string()) | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -impl fmt::Debug for MZapWrapper { | ||
| - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
| - try!(write!(f, | ||
| - "MZapPhys {{\nblock_type: {:?},\nsalt: {:X},\nnorm_flags: {:X},\nchunk: [\n", | ||
| - self.phys.block_type, | ||
| - self.phys.salt, | ||
| - self.phys.norm_flags)); | ||
| - for chunk in &self.chunks { | ||
| - try!(write!(f, "{:?}\n", chunk)); | ||
| - } | ||
| - try!(write!(f, "] }}\n")); | ||
| - Ok(()) | ||
| - } | ||
| -} | ||
| - | ||
| -#[repr(packed)] | ||
| -pub struct MZapEntPhys { | ||
| - pub value: u64, | ||
| - pub cd: u32, | ||
| - pub pad: u16, | ||
| - pub name: [u8; MZAP_NAME_LEN], | ||
| -} | ||
| - | ||
| -impl MZapEntPhys { | ||
| - pub fn name(&self) -> Option<&str> { | ||
| - let mut len = 0; | ||
| - for c in &self.name[..] { | ||
| - if *c == 0 { | ||
| - break; | ||
| - } | ||
| - len += 1; | ||
| - } | ||
| - | ||
| - str::from_utf8(&self.name[..len]).ok() | ||
| - } | ||
| -} | ||
| - | ||
| -impl fmt::Debug for MZapEntPhys { | ||
| - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
| - try!(write!(f, | ||
| - "MZapEntPhys {{\nvalue: {:X},\ncd: {:X},\nname: ", | ||
| - self.value, | ||
| - self.cd)); | ||
| - for i in 0..MZAP_NAME_LEN { | ||
| - if self.name[i] == 0 { | ||
| - break; | ||
| - } | ||
| - try!(write!(f, "{}", self.name[i] as char)); | ||
| - } | ||
| - try!(write!(f, "\n}}\n")); | ||
| - Ok(()) | ||
| - } | ||
| -} | ||
| - | ||
| -/// Fatzap | ||
| -#[repr(packed)] | ||
| -pub struct ZapPhys { | ||
| - pub block_type: ZapObjectType, // ZapObjectType::Header | ||
| - pub magic: u64, | ||
| - pub ptr_table: ZapTablePhys, | ||
| - pub free_block: u64, | ||
| - pub num_leafs: u64, | ||
| - pub num_entries: u64, | ||
| - pub salt: u64, | ||
| - pub pad: [u64; 8181], | ||
| - pub leafs: [u64; 8192], | ||
| -} | ||
| - | ||
| -#[repr(packed)] | ||
| -pub struct ZapTablePhys { | ||
| - pub block: u64, | ||
| - pub num_blocks: u64, | ||
| - pub shift: u64, | ||
| - pub next_block: u64, | ||
| - pub block_copied: u64, | ||
| -} | ||
| - | ||
| -const ZAP_LEAF_MAGIC: u32 = 0x2AB1EAF; | ||
| -const ZAP_LEAF_CHUNKSIZE: usize = 24; | ||
| - | ||
| -// The amount of space within the chunk available for the array is: | ||
| -// chunk size - space for type (1) - space for next pointer (2) | ||
| -const ZAP_LEAF_ARRAY_BYTES: usize = ZAP_LEAF_CHUNKSIZE - 3; | ||
| - | ||
| -// pub struct ZapLeafPhys { | ||
| -// pub header: ZapLeafHeader, | ||
| -// hash: [u16; ZAP_LEAF_HASH_NUMENTRIES], | ||
| -// union zap_leaf_chunk { | ||
| -// entry, | ||
| -// array, | ||
| -// free, | ||
| -// } chunks[ZapLeafChunk; ZAP_LEAF_NUMCHUNKS], | ||
| -// } | ||
| - | ||
| -#[repr(packed)] | ||
| -pub struct ZapLeafHeader { | ||
| - pub block_type: ZapObjectType, // ZapObjectType::Leaf | ||
| - pub next: u64, | ||
| - pub prefix: u64, | ||
| - pub magic: u32, | ||
| - pub n_free: u16, | ||
| - pub n_entries: u16, | ||
| - pub prefix_len: u16, | ||
| - pub free_list: u16, | ||
| - pad2: [u8; 12], | ||
| -} | ||
| - | ||
| -#[repr(packed)] | ||
| -struct ZapLeafEntry { | ||
| - leaf_type: u8, | ||
| - int_size: u8, | ||
| - next: u16, | ||
| - name_chunk: u16, | ||
| - name_length: u16, | ||
| - value_chunk: u16, | ||
| - value_length: u16, | ||
| - cd: u16, | ||
| - pad: [u8; 2], | ||
| - hash: u64, | ||
| -} | ||
| - | ||
| -#[repr(packed)] | ||
| -struct ZapLeafArray { | ||
| - leaf_type: u8, | ||
| - array: [u8; ZAP_LEAF_ARRAY_BYTES], | ||
| - next: u16, | ||
| -} | ||
| - | ||
| -#[repr(packed)] | ||
| -struct ZapLeafFree { | ||
| - free_type: u8, | ||
| - pad: [u8; ZAP_LEAF_ARRAY_BYTES], | ||
| - next: u16, | ||
| -} |
38
crates/zfs/zfs.rs
| @@ -1,38 +0,0 @@ | ||
| -use std::result; | ||
| - | ||
| -/// The error type used throughout ZFS | ||
| -#[derive(Copy, Clone, Debug, PartialEq)] | ||
| -pub enum Error { | ||
| - NoEntity, | ||
| - Invalid, | ||
| -} | ||
| - | ||
| -/// The Result type used throughout ZFS | ||
| -pub type Result<T> = result::Result<T, Error>; | ||
| - | ||
| -/// The following states are written to disk as part of the normal | ||
| -/// SPA lifecycle: Active, Exported, Destroyed, Spare, L2Cache. The remaining | ||
| -/// states are software abstractions used at various levels to communicate | ||
| -/// pool state. | ||
| -#[derive(Copy, Clone, PartialEq)] | ||
| -pub enum PoolState { | ||
| - Active = 0, // In active use | ||
| - Exported, // Explicitly exported | ||
| - Destroyed, // Explicitly destroyed | ||
| - Spare, // Reserved for hot spare use | ||
| - L2Cache, // Level 2 ARC device | ||
| - Uninitialized, // Internal spa_t state | ||
| - Unavailable, // Internal libzfs state | ||
| - PotentiallyActive, // Internal libzfs state | ||
| -} | ||
| - | ||
| -/// Internal SPA load state. Used by FMA diagnosis engine. | ||
| -#[derive(Copy, Clone, PartialEq)] | ||
| -pub enum SpaLoadState { | ||
| - None, // no load in progress | ||
| - Open, // normal open | ||
| - Import, // import in progress | ||
| - TryImport, // tryimport in progress | ||
| - Recover, // recovery requested | ||
| - Error, // load failed | ||
| -} |
8
crates/zfs/zil_header.rs
| @@ -1,8 +0,0 @@ | ||
| -use super::block_ptr::BlockPtr; | ||
| - | ||
| -#[repr(packed)] | ||
| -pub struct ZilHeader { | ||
| - claim_txg: u64, | ||
| - replay_seq: u64, | ||
| - log: BlockPtr, | ||
| -} |
950
crates/zfs/zio.rs
| @@ -1,950 +0,0 @@ | ||
| -use std::{mem, ptr}; | ||
| -use std::fs::File; | ||
| -use std::io::{Read, Seek, SeekFrom, Write}; | ||
| - | ||
| -use super::avl; | ||
| -use super::block_ptr::BlockPtr; | ||
| -use super::dvaddr::DVAddr; | ||
| -use super::from_bytes::FromBytes; | ||
| -use super::lzjb; | ||
| -use super::uberblock::Uberblock; | ||
| -use super::zfs; | ||
| - | ||
| -pub struct Reader { | ||
| - pub disk: File, | ||
| -} | ||
| - | ||
| -impl Reader { | ||
| - // TODO: Error handling | ||
| - pub fn read(&mut self, start: usize, length: usize) -> Vec<u8> { | ||
| - let mut ret: Vec<u8> = vec![0; length*512]; | ||
| - | ||
| - self.disk.seek(SeekFrom::Start(start as u64 * 512)); | ||
| - self.disk.read(&mut ret); | ||
| - | ||
| - return ret; | ||
| - } | ||
| - | ||
| - pub fn write(&mut self, block: usize, data: &[u8; 512]) { | ||
| - self.disk.seek(SeekFrom::Start(block as u64 * 512)); | ||
| - self.disk.write(data); | ||
| - } | ||
| - | ||
| - pub fn read_dva(&mut self, dva: &DVAddr) -> Vec<u8> { | ||
| - self.read(dva.sector() as usize, dva.asize() as usize) | ||
| - } | ||
| - | ||
| - pub fn read_block(&mut self, block_ptr: &BlockPtr) -> Result<Vec<u8>, String> { | ||
| - let data = self.read_dva(&block_ptr.dvas[0]); | ||
| - match block_ptr.compression() { | ||
| - 2 => { | ||
| - // compression off | ||
| - Ok(data) | ||
| - } | ||
| - 1 | 3 => { | ||
| - // lzjb compression | ||
| - let mut decompressed = vec![0; (block_ptr.lsize()*512) as usize]; | ||
| - lzjb::decompress(&data, &mut decompressed); | ||
| - Ok(decompressed) | ||
| - } | ||
| - _ => Err("Error: not enough bytes".to_string()), | ||
| - } | ||
| - } | ||
| - | ||
| - pub fn read_type<T: FromBytes>(&mut self, block_ptr: &BlockPtr) -> Result<T, String> { | ||
| - let data = self.read_block(block_ptr); | ||
| - data.and_then(|data| T::from_bytes(&data[..])) | ||
| - } | ||
| - | ||
| - pub fn read_type_array<T: FromBytes>(&mut self, | ||
| - block_ptr: &BlockPtr, | ||
| - offset: usize) | ||
| - -> Result<T, String> { | ||
| - let data = self.read_block(block_ptr); | ||
| - data.and_then(|data| T::from_bytes(&data[offset * mem::size_of::<T>()..])) | ||
| - } | ||
| - | ||
| - pub fn uber(&mut self) -> Result<Uberblock, String> { | ||
| - let mut newest_uberblock: Option<Uberblock> = None; | ||
| - for i in 0..128 { | ||
| - if let Ok(uberblock) = Uberblock::from_bytes(&self.read(256 + i * 2, 2)) { | ||
| - let newest = match newest_uberblock { | ||
| - Some(previous) => { | ||
| - if uberblock.txg > previous.txg { | ||
| - // Found a newer uberblock | ||
| - true | ||
| - } else { | ||
| - false | ||
| - } | ||
| - } | ||
| - // No uberblock yet, so first one we find is the newest | ||
| - None => true, | ||
| - }; | ||
| - | ||
| - if newest { | ||
| - newest_uberblock = Some(uberblock); | ||
| - } | ||
| - } | ||
| - } | ||
| - | ||
| - match newest_uberblock { | ||
| - Some(uberblock) => Ok(uberblock), | ||
| - None => Err("Failed to find valid uberblock".to_string()), | ||
| - } | ||
| - } | ||
| -} | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| - | ||
| -// pub struct Zio { | ||
| -// Core information about this IO | ||
| -// bookmark: ZBookmarkPhys, | ||
| -// prop: ZioProp, | ||
| -// zio_type: Type, | ||
| -// child_type: Child, | ||
| -// int io_cmd, | ||
| -// priority: Priority, | ||
| -// reexecute: u8, | ||
| -// state: [u8; NUM_WAIT_TYPES], | ||
| -// txg: u64, | ||
| -// spa_t *io_spa, | ||
| -// blkptr_t *io_bp, | ||
| -// blkptr_t *io_bp_override, | ||
| -// bp_copy: BlockPtr, | ||
| -// list_t io_parent_list, | ||
| -// list_t io_child_list, | ||
| -// zio_link_t *io_walk_link, | ||
| -// zio_t *logical, | ||
| -// zio_transform_t *io_transform_stack, | ||
| -// | ||
| -// Callback info | ||
| -// ready: DoneFunc, | ||
| -// physdone: DoneFunc, | ||
| -// done: DoneFunc, | ||
| -// private: *void, | ||
| -// prev_space_delta: i64, // DMU private | ||
| -// bp_orig: BlockPtr, | ||
| -// | ||
| -// Data represented by this IO | ||
| -// void *data, | ||
| -// void *orig_data, | ||
| -// size: u64, | ||
| -// orig_size: u64, | ||
| -// | ||
| -// Stuff for the vdev stack | ||
| -// vdev_t *vd, | ||
| -// void *io_vsd, | ||
| -// const zio_vsd_ops_t *io_vsd_ops, | ||
| -// | ||
| -// offset: u64, | ||
| -// timestamp: hrtime_t, // submitted at | ||
| -// delta: hrtime_t, // vdev queue service delta | ||
| -// delay: u64, // vdev disk service delta (ticks) | ||
| -// queue_node: avl::NodeId, | ||
| -// offset_node: avl::NodeId, | ||
| -// | ||
| -// Internal pipeline state | ||
| -// flags: Flag, | ||
| -// stage: State, | ||
| -// pipeline: State, | ||
| -// orig_flags: ZioFlag, | ||
| -// orig_stage: State, | ||
| -// orig_pipeline: State, | ||
| -// error: zfs::Error, | ||
| -// child_error: [zfs::Error; NUM_CHILD_TYPES], | ||
| -// children: [[u64; NUM_WAIT_TYPES]; NUM_CHILD_TYPES], | ||
| -// child_count: u64, | ||
| -// phys_children: u64, | ||
| -// parent_count: u64, | ||
| -// uint64_t *stall, | ||
| -// zio_t *gang_leader, | ||
| -// zio_gang_node_t *gang_tree, | ||
| -// void *executor, | ||
| -// void *waiter, | ||
| -// kmutex_t lock, | ||
| -// kcondvar_t cv,*/ | ||
| -// | ||
| -// FMA state | ||
| -// zio_cksum_report_t *io_cksum_report, | ||
| -// uint64_t io_ena, | ||
| -// | ||
| -// Taskq dispatching state | ||
| -// tqent: TaskqEnt, | ||
| -// } | ||
| -// | ||
| -// impl Zio { | ||
| -// pub fn root(spa: Option<&Spa>, zio_done_func_t *done, void *private, flags: Flag) -> Self { | ||
| -// Self::null(None, spa, None, done, private, flags) | ||
| -// } | ||
| -// | ||
| -// pub fn read(zio_t *pio, spa_t *spa, const blkptr_t *bp, | ||
| -// void *data, uint64_t size, zio_done_func_t *done, void *private, | ||
| -// zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb) -> Self { | ||
| -// zfs_blkptr_verify(spa, bp); | ||
| -// | ||
| -// let pipeline = | ||
| -// if flags & ZIO_FLAG_DDT_CHILD { | ||
| -// ZIO_DDT_CHILD_READ_PIPELINE | ||
| -// } else { ZIO_READ_PIPELINE }; | ||
| -// | ||
| -// Self::create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp, | ||
| -// data, size, done, private, | ||
| -// Type::Read, priority, flags, None, 0, zb, | ||
| -// State::Open, pipeline) | ||
| -// } | ||
| -// | ||
| -// fn null(pio: Option<&Zio>, spa: Option<&Spa>, vd: Option<&vdev::Vdev>, zio_done_func_t *done, | ||
| -// void *private, flags: Flag) -> Self { | ||
| -// Self::create(pio, spa, 0, None, None, 0, done, private, | ||
| -// Type::Null, Priority::Now, flags, vd, 0, None, | ||
| -// State::Open, ZIO_INTERLOCK_PIPELINE) | ||
| -// } | ||
| -// | ||
| -// fn create(zio_t *pio, spa_t *spa, txg: u64, bp: Option<&BlockPtr>, | ||
| -// void *data, size: u64, zio_done_func_t *done, void *private, | ||
| -// zio_type: Type, priority: Priority, flags: Flag, | ||
| -// vd: Option<&vdev::Vdev>, offset: u64, zb: Option<&ZBookmarkPhys>, | ||
| -// stage: State, pipeline: State)-> Self { | ||
| -// assert!(size <= SPA_MAXBLOCKSIZE); | ||
| -// assert!(util::p2_phase(size, SPA_MINBLOCKSIZE) == 0); | ||
| -// assert!(util::p2_phase(offset, SPA_MINBLOCKSIZE) == 0); | ||
| -// | ||
| -// assert!(!vd || spa_config_held(spa, SCL_STATE_ALL, RW_READER)); | ||
| -// assert!(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER)); | ||
| -// assert!(vd || stage == ZIO_STAGE_OPEN); | ||
| -// | ||
| -// zio = kmem_cache_alloc(zcache, KM_SLEEP); | ||
| -// bzero(zio, sizeof (zt)); | ||
| -// | ||
| -// mutex_init(&zio->lock, NULL, MUTEX_DEFAULT, NULL); | ||
| -// cv_init(&zio->cv, NULL, CV_DEFAULT, NULL); | ||
| -// | ||
| -// list_create(&zio->parent_list, sizeof (zlink_t), | ||
| -// offsetof(zlink_t, zl_parent_node)); | ||
| -// list_create(&zio->child_list, sizeof (zlink_t), | ||
| -// offsetof(zlink_t, zl_child_node)); | ||
| -// | ||
| -// let child_type = | ||
| -// if vd.is_some() { | ||
| -// Child::Vdev | ||
| -// } else if flags & ZIO_FLAG_GANG_CHILD { | ||
| -// Child::Gang | ||
| -// } else if flags & ZIO_FLAG_DDT_CHILD { | ||
| -// Child::Ddt | ||
| -// } else { | ||
| -// Child::Logical | ||
| -// }; | ||
| -// | ||
| -// if let Some(bp) = bp { | ||
| -// zio.bp = (blkptr_t *)bp; | ||
| -// zio.bp_copy = *bp; | ||
| -// zio.bp_orig = *bp; | ||
| -// if zio_type != Type::Write || child_type == Child::Ddt { | ||
| -// zio.bp = &zio.bp_copy; // so caller can free | ||
| -// } | ||
| -// if child_type == Child::Logical { | ||
| -// zio.logical = zio; | ||
| -// } | ||
| -// if child_type > Child::Gang && BP_IS_GANG(bp) { | ||
| -// pipeline |= ZIO_GANG_STAGES; | ||
| -// } | ||
| -// } | ||
| -// | ||
| -// if zb != NULL { | ||
| -// zio.bookmark = *zb; | ||
| -// } | ||
| -// | ||
| -// if let Some(pio) = pio { | ||
| -// if zio.logical == NULL { | ||
| -// zio.logical = pio.logical; | ||
| -// } | ||
| -// if zio.child_type == Child::Gang { | ||
| -// zio.gang_leader = pio.gang_leader; | ||
| -// } | ||
| -// Self::add_child(pio, zio); | ||
| -// } | ||
| -// | ||
| -// taskq::taskq_init_ent(&zio->tqent); | ||
| -// | ||
| -// Zio { | ||
| -// child_type: child_type, | ||
| -// spa: spa, | ||
| -// txg: txg, | ||
| -// done: done, | ||
| -// private: private, | ||
| -// zio_type: zio_type, | ||
| -// priority: priority, | ||
| -// vd: vd, | ||
| -// offset: offset, | ||
| -// | ||
| -// data: data, | ||
| -// orig_data: data, | ||
| -// size: size, | ||
| -// orig_size: size, | ||
| -// | ||
| -// flags: flags, | ||
| -// orig_flags: flags, | ||
| -// stage: stage, | ||
| -// orig_stage: stage, | ||
| -// pipeline: pipeline, | ||
| -// orig_pipeline: pipeline, | ||
| -// | ||
| -// state: [stage >= State::Ready, | ||
| -// state >= State::Done], | ||
| -// } | ||
| -// } | ||
| -// | ||
| -// fn read_phys(zio_t *pio, vdev_t *vd, offset: u64, size: u64, | ||
| -// void *data, int checksum, zio_done_func_t *done, void *private, | ||
| -// priority: Priority, zio_flag flags, labels: bool) -> Zio { | ||
| -// assert!(vd->vdev_children == 0); | ||
| -// assert!(!labels || offset + size <= VDEV_LABEL_START_SIZE || | ||
| -// offset >= vd.vdev_psize - VDEV_LABEL_END_SIZE); | ||
| -// assert!(offset + size <= vd.vdev_psize); | ||
| -// | ||
| -// let mut zio = Self::create(pio, vd.vdev_spa, 0, NULL, data, size, done, private, | ||
| -// Type::Read, priority, flags | ZIO_FLAG_PHYSICAL, vd, offset, | ||
| -// NULL, State::Open, ZIO_READ_PHYS_PIPELINE); | ||
| -// | ||
| -// zio.prop.checksum = checksum; | ||
| -// | ||
| -// zio | ||
| -// } | ||
| -// | ||
| -// ========================================================================== | ||
| -// Parent/Child relationships | ||
| -// ========================================================================== | ||
| -// | ||
| -// fn add_child(parent: &mut Zio, child: &mut Zio) { | ||
| -// zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP); | ||
| -// int w; | ||
| -// | ||
| -// Logical I/Os can have logical, gang, or vdev children. | ||
| -// Gang I/Os can have gang or vdev children. | ||
| -// Vdev I/Os can only have vdev children. | ||
| -// The following assert captures all of these constraints. | ||
| -// assert!(cio->io_child_type <= pio->io_child_type); | ||
| -// | ||
| -// zl.parent = parent; | ||
| -// zl.child = child; | ||
| -// | ||
| -// mutex_enter(&child.lock); | ||
| -// mutex_enter(&parent.lock); | ||
| -// | ||
| -// assert!(parent.state[WaitType::Done] == 0); | ||
| -// | ||
| -// for w in 0..NUM_WAIT_TYPES { | ||
| -// parent.children[child.child_type][w] += !child.state[w]; | ||
| -// } | ||
| -// | ||
| -// list_insert_head(&pio->io_child_list, zl); | ||
| -// list_insert_head(&cio->io_parent_list, zl); | ||
| -// | ||
| -// parent.child_count += 1; | ||
| -// child.parent_count += 1; | ||
| -// | ||
| -// mutex_exit(&pio->io_lock); | ||
| -// mutex_exit(&cio->io_lock); | ||
| -// } | ||
| -// | ||
| -// ========================================================================== | ||
| -// Execute the IO pipeline | ||
| -// ========================================================================== | ||
| -// | ||
| -// fn taskq_dispatch(&mut self, mut tq_type: TaskqType, cut_in_line: bool) { | ||
| -// let spa = self.spa; | ||
| -// let flags = if cut_in_line { TQ_FRONT } else { 0 }; | ||
| -// | ||
| -// let zio_type = | ||
| -// if self.flags & (FLAG_CONFIG_WRITER | FLAG_PROBE) != 0 { | ||
| -// If we're a config writer or a probe, the normal issue and | ||
| -// interrupt threads may all be blocked waiting for the config lock. | ||
| -// In this case, select the otherwise-unused taskq for ZIO_TYPE_NULL. | ||
| -// Type::Null | ||
| -// } else if self.zio_type == Type::Write && self.vd.is_some() && self.vd.vdev_aux { | ||
| -// A similar issue exists for the L2ARC write thread until L2ARC 2.0. | ||
| -// Type::Null | ||
| -// } else { | ||
| -// self.zio_type | ||
| -// }; | ||
| -// | ||
| -// If this is a high priority IO, then use the high priority taskq if | ||
| -// available. | ||
| -// if self.priority == Priority::Now && spa->spa_zio_taskq[t][tq_type + 1].stqs_count != 0 { | ||
| -// tq_type += 1; | ||
| -// } | ||
| -// | ||
| -// assert!(tq_type < NUM_TASKQ_TYPES); | ||
| -// | ||
| -// NB: We are assuming that the zio can only be dispatched | ||
| -// to a single taskq at a time. It would be a grievous error | ||
| -// to dispatch the zio to another taskq at the same time. | ||
| -// assert!(taskq_empty_ent(&zio.tqent)); | ||
| -// spa.taskq_dispatch_ent(zio_type, tq_type, Box::new(|| { self.execute() }), flags, &self.tqent); | ||
| -// } | ||
| -// | ||
| -// fn taskq_member(&self, TaskqType q) -> bool { | ||
| -// let spa = self.spa; | ||
| -// | ||
| -// for t in 0..NUM_ZIO_TYPES { | ||
| -// let tqs = &spa.zio_taskq[t][q]; | ||
| -// for i in 0..tqs.count { | ||
| -// if tqs.taskq[i].member(self.executor) { | ||
| -// return true; | ||
| -// } | ||
| -// } | ||
| -// } | ||
| -// | ||
| -// false | ||
| -// } | ||
| -// | ||
| -// fn issue_async(&self) -> PipelineFlow { | ||
| -// self.taskq_dispatch(TaskqType::Issue, false); | ||
| -// | ||
| -// PipelineFlow::Stop | ||
| -// } | ||
| -// | ||
| -// fn interrupt(&self) { | ||
| -// self.taskq_dispatch(TaskqType::Interrupt, false); | ||
| -// } | ||
| -// | ||
| -// Execute the I/O pipeline until one of the following occurs: | ||
| -// (1) the I/O completes; (2) the pipeline stalls waiting for | ||
| -// dependent child I/Os; (3) the I/O issues, so we're waiting | ||
| -// for an I/O completion interrupt; (4) the I/O is delegated by | ||
| -// vdev-level caching or aggregation; (5) the I/O is deferred | ||
| -// due to vdev-level queueing; (6) the I/O is handed off to | ||
| -// another thread. In all cases, the pipeline stops whenever | ||
| -// there's no CPU work; it never burns a thread in cv_wait_io(). | ||
| -// | ||
| -// There's no locking on io_stage because there's no legitimate way | ||
| -// for multiple threads to be attempting to process the same I/O. | ||
| -// fn execute(&mut self) { | ||
| -// self.executor = curthread; | ||
| -// | ||
| -// while self.stage < State::Done { | ||
| -// let mut stage = self.stage; | ||
| -// | ||
| -// assert!(!MUTEX_HELD(&self.io_lock)); | ||
| -// assert!(ISP2(stage)); | ||
| -// assert!(self.stall == NULL); | ||
| -// while stage & self.pipeline == 0 { | ||
| -// stage <<= 1; | ||
| -// } | ||
| -// | ||
| -// assert!(stage <= State::Done); | ||
| -// | ||
| -// let cut = | ||
| -// match stage { | ||
| -// State::VdevIoStart => REQUEUE_IO_START_CUT_IN_LINE, | ||
| -// _ => false, | ||
| -// }; | ||
| -// | ||
| -// If we are in interrupt context and this pipeline stage | ||
| -// will grab a config lock that is held across IO, | ||
| -// or may wait for an IO that needs an interrupt thread | ||
| -// to complete, issue async to avoid deadlock. | ||
| -// | ||
| -// For VDEV_IO_START, we cut in line so that the io will | ||
| -// be sent to disk promptly. | ||
| -// if stage & BLOCKING_STAGES != 0 && self.vd.is_none() && self.taskq_member(TaskqType::Interrupt) { | ||
| -// self.taskq_dispatch(TaskqType::Issue, cut); | ||
| -// return; | ||
| -// } | ||
| -// | ||
| -// If we executing in the context of the tx_sync_thread, | ||
| -// or we are performing pool initialization outside of a | ||
| -// zio_taskq[ZIO_TASKQ_ISSUE|ZIO_TASKQ_ISSUE_HIGH] context. | ||
| -// Then issue the zio asynchronously to minimize stack usage | ||
| -// for these deep call paths. | ||
| -// let dp = self.spa.get_dsl_pool(); | ||
| -// if (dp && curthread == dp.tx.tx_sync_thread) || | ||
| -// (dp && dp.spa.is_initializing() && !self.taskq_member(TaskqType::Issue) && | ||
| -// !self.taskq_member(TaskqType::IssueHigh)) { | ||
| -// self.taskq_dispatch(TaskqType::Issue, cut); | ||
| -// return; | ||
| -// }*/ | ||
| -// | ||
| -// self.stage = stage; | ||
| -// let rv = pipeline_stages[highbit64(stage) - 1](self); | ||
| -// | ||
| -// if rv == PipelineFlow::Stop { | ||
| -// return; | ||
| -// } | ||
| -// | ||
| -// assert!(rv == PipelineFlow::Continue); | ||
| -// } | ||
| -// } | ||
| -// | ||
| -// pub fn wait(&self) -> zfs::Result<()> { | ||
| -// assert!(self.stage == State::Open); | ||
| -// assert!(self.executor == NULL); | ||
| -// | ||
| -// self.waiter = curthread; | ||
| -// | ||
| -// self.execute(); | ||
| -// | ||
| -// mutex_enter(&self.lock); | ||
| -// while self.executor != NULL { | ||
| -// cv_wait_io(&self.cv, &self.lock); | ||
| -// } | ||
| -// mutex_exit(&self.lock); | ||
| -// | ||
| -// let error = self.error; | ||
| -// self.destroy(); | ||
| -// | ||
| -// Ok(()) | ||
| -// } | ||
| -// | ||
| -// fn no_wait(&mut self) { | ||
| -// assert!(self.executor == NULL); | ||
| -// | ||
| -// if self.child_type == Child::Logical && self.unique_parent() == NULL { | ||
| -// This is a logical async I/O with no parent to wait for it. | ||
| -// We add it to the spa_async_root_zio "Godfather" I/O which | ||
| -// will ensure they complete prior to unloading the pool. | ||
| -// kpreempt_disable(); | ||
| -// let pio = self.spa.async_zio_root[CPU_SEQID]; | ||
| -// kpreempt_enable(); | ||
| -// | ||
| -// Self::add_child(pio, self); | ||
| -// } | ||
| -// | ||
| -// self.execute(); | ||
| -// } | ||
| -// | ||
| -// ///////////////////////////////////////////////////////////////////////////////////////////// | ||
| -// Pipeline stages | ||
| -// ///////////////////////////////////////////////////////////////////////////////////////////// | ||
| -// | ||
| -// fn read_bp_init(zio_t *zio) -> PipelineFlow { | ||
| -// blkptr_t *bp = zio.bp; | ||
| -// | ||
| -// if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF && | ||
| -// zio.child_type == Child::Logical && | ||
| -// !(zio->io_flags & ZIO_FLAG_RAW)) { | ||
| -// uint64_t psize = BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp); | ||
| -// void *cbuf = zio_buf_alloc(psize); | ||
| -// | ||
| -// zio_push_transform(zio, cbuf, psize, psize, zio_decompress); | ||
| -// } | ||
| -// | ||
| -// if BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA { | ||
| -// zio.pipeline = ZIO_INTERLOCK_PIPELINE; | ||
| -// decode_embedded_bp_compressed(bp, zio->io_data); | ||
| -// } else { | ||
| -// ASSERT(!BP_IS_EMBEDDED(bp)); | ||
| -// } | ||
| -// | ||
| -// if !DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0 { | ||
| -// zio.flags |= ZIO_FLAG_DONT_CACHE; | ||
| -// } | ||
| -// | ||
| -// if BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP { | ||
| -// zio.flags |= ZIO_FLAG_DONT_CACHE; | ||
| -// } | ||
| -// | ||
| -// if BP_GET_DEDUP(bp) && zio.child_type == Child::Logical { | ||
| -// zio.pipeline = ZIO_DDT_READ_PIPELINE; | ||
| -// } | ||
| -// | ||
| -// return PipelineFlow::Continue; | ||
| -// } | ||
| -// | ||
| -// Issue an I/O to the underlying vdev. Typically the issue pipeline | ||
| -// stops after this stage and will resume upon I/O completion. | ||
| -// However, there are instances where the vdev layer may need to | ||
| -// continue the pipeline when an I/O was not issued. Since the I/O | ||
| -// that was sent to the vdev layer might be different than the one | ||
| -// currently active in the pipeline (see vdev_queue_io()), we explicitly | ||
| -// force the underlying vdev layers to call either zio_execute() or | ||
| -// zio_interrupt() to ensure that the pipeline continues with the correct I/O. | ||
| -// fn vdev_io_start(zio_t *zio) -> PipelineFlow { | ||
| -// vdev_t *vd = zio.vd; | ||
| -// spa_t *spa = zio.spa; | ||
| -// | ||
| -// assert!(zio.error == 0); | ||
| -// assert!(zio.child_error[Child::Vdev] == 0); | ||
| -// | ||
| -// if vd == NULL { | ||
| -// if zio.flags & ZIO_FLAG_CONFIG_WRITER == 0 { | ||
| -// spa_config_enter(spa, SCL_ZIO, zio, RW_READER); | ||
| -// } | ||
| -// | ||
| -// The mirror_ops handle multiple DVAs in a single BP. | ||
| -// vdev_mirror_ops.vdev_op_start(zio); | ||
| -// return PipelineFlow::Stop; | ||
| -// } | ||
| -// | ||
| -// We keep track of time-sensitive I/Os so that the scan thread | ||
| -// can quickly react to certain workloads. In particular, we care | ||
| -// about non-scrubbing, top-level reads and writes with the following | ||
| -// characteristics: | ||
| -// - synchronous writes of user data to non-slog devices | ||
| -// - any reads of user data | ||
| -// When these conditions are met, adjust the timestamp of spa_last_io | ||
| -// which allows the scan thread to adjust its workload accordingly. | ||
| -// if zio.flags & ZIO_FLAG_SCAN_THREAD == 0 && zio.bp != NULL && vd == vd.top_vdev && | ||
| -// !vd.is_log && zio.bookmark.objset != DMU_META_OBJSET && zio.txg != spa.syncing_txg() { | ||
| -// let old = spa.spa_last_io; | ||
| -// let new = ddi_get_lbolt64(); | ||
| -// if old != new { | ||
| -// atomic_cas_64(&spa.spa_last_io, old, new); | ||
| -// } | ||
| -// } | ||
| -// | ||
| -// let align = 1 << vd.top_vdev.ashift; | ||
| -// | ||
| -// if zio.flags & ZIO_FLAG_PHYSICAL == 0 && util::p2_phase(zio.size, align) != 0 { | ||
| -// Transform logical writes to be a full physical block size. | ||
| -// let asize = util::p2_round_up(zio.size, align); | ||
| -// char *abuf = zio_buf_alloc(asize); | ||
| -// assert!(vd == vd.vdev_top); | ||
| -// if (zio.zio_type == Type::Write) { | ||
| -// bcopy(zio.data, abuf, zio.size); | ||
| -// bzero(abuf + zio.size, asize - zio.size); | ||
| -// } | ||
| -// zio_push_transform(zio, abuf, asize, asize, zsubblock); | ||
| -// } | ||
| -// | ||
| -// If this is not a physical io, make sure that it is properly aligned | ||
| -// before proceeding. | ||
| -// if zio.flags & ZIO_FLAG_PHYSICAL == 0 { | ||
| -// assert!(util::p2_phase(zio.offset, align) == 0); | ||
| -// assert!(util::p2_phase(zio.size, align) == 0); | ||
| -// } else { | ||
| -// For physical writes, we allow 512b aligned writes and assume | ||
| -// the device will perform a read-modify-write as necessary. | ||
| -// assert!(util::p2_phase(zio.offset, SPA_MINBLOCKSIZE) == 0); | ||
| -// assert!(util::p2_phase(zio.size, SPA_MINBLOCKSIZE) == 0); | ||
| -// } | ||
| -// | ||
| -// VERIFY(zio.zio_type != Type::Write || spa_writeable(spa)); | ||
| -// | ||
| -// If this is a repair I/O, and there's no self-healing involved -- | ||
| -// that is, we're just resilvering what we expect to resilver -- | ||
| -// then don't do the I/O unless zio's txg is actually in vd's DTL. | ||
| -// This prevents spurious resilvering with nested replication. | ||
| -// For example, given a mirror of mirrors, (A+B)+(C+D), if only | ||
| -// A is out of date, we'll read from C+D, then use the data to | ||
| -// resilver A+B -- but we don't actually want to resilver B, just A. | ||
| -// The top-level mirror has no way to know this, so instead we just | ||
| -// discard unnecessary repairs as we work our way down the vdev tree. | ||
| -// The same logic applies to any form of nested replication: | ||
| -// ditto + mirror, RAID-Z + replacing, etc. This covers them all. | ||
| -// if (zio.flags & ZIO_FLAG_IO_REPAIR != 0 && | ||
| -// zio.flags & ZIO_FLAG_SELF_HEAL == 0 && | ||
| -// zio.txg != 0 && /* not a delegated i/o */ | ||
| -// !vdev_dtl_contains(vd, DTL_PARTIAL, zio.txg, 1)) { | ||
| -// assert!(zio.zio_type == Type::Write); | ||
| -// zio_vdev_bypass(zio); | ||
| -// return PipelineFlow::Continue; | ||
| -// } | ||
| -// | ||
| -// if vd.ops.is_leaf() && (zio.zio_type == Type::Read || zio.zio_type == Type::Write) { | ||
| -// if zio.zio_type == Type::Read && vdev_cache_read(zio) { | ||
| -// return PipelineFlow::Continue; | ||
| -// } | ||
| -// | ||
| -// if (zio = vdev_queue_io(zio)) == NULL { | ||
| -// return PipelineFlow::Stop; | ||
| -// } | ||
| -// | ||
| -// if !vdev_accessible(vd, zio) { | ||
| -// zio.error = SET_ERROR(ENXIO); | ||
| -// zio.interrupt(); | ||
| -// return PipelineFlow::Stop; | ||
| -// } | ||
| -// } | ||
| -// | ||
| -// (vd.ops.io_start)(zio); | ||
| -// PipelineFlow::Stop | ||
| -// } | ||
| -// | ||
| -// fn vdev_io_done(zio: &mut Zio) -> PipelineFlow { | ||
| -// vdev_t *vd = zio.vd; | ||
| -// vdev_ops_t *ops = vd ? vd->vdev_ops : &vdev_mirror_ops; | ||
| -// let mut unexpected_error = false; | ||
| -// | ||
| -// if zio.wait_for_children(Child::Vdev, WaitType::Done) { | ||
| -// return PipelineFlow::Stop; | ||
| -// } | ||
| -// | ||
| -// assert!(zio.zio_type == Type::Read || zio.zio_type == Type::Write); | ||
| -// | ||
| -// if vd != NULL && vd.ops.is_leaf() { | ||
| -// vdev_queue_io_done(zio); | ||
| -// | ||
| -// if zio.zio_type == Type::Write { | ||
| -// vdev_cache_write(zio); | ||
| -// } | ||
| -// | ||
| -// if zio_injection_enabled && zio.error == 0 { | ||
| -// zio.error = zio_handle_device_injection(vd, zio, EIO); | ||
| -// } | ||
| -// | ||
| -// if zio_injection_enabled && zio.error == 0 { | ||
| -// zio.error = zio_handle_label_injection(zio, EIO); | ||
| -// }*/ | ||
| -// | ||
| -// if zio.error { | ||
| -// if !vdev_accessible(vd, zio) { | ||
| -// zio.error = SET_ERROR(ENXIO); | ||
| -// } else { | ||
| -// unexpected_error = true; | ||
| -// } | ||
| -// } | ||
| -// } | ||
| -// | ||
| -// (ops.io_done)(zio); | ||
| -// | ||
| -// if unexpected_error { | ||
| -// VERIFY(vdev_probe(vd, zio) == NULL); | ||
| -// } | ||
| -// | ||
| -// PipelineFlow::Continue | ||
| -// } | ||
| -// } | ||
| - | ||
| -/// ///////////////////////////////////////////////////////////////////////////////////////////////// | ||
| - | ||
| -// A bookmark is a four-tuple <objset, object, level, blkid> that uniquely | ||
| -// identifies any block in the pool. By convention, the meta-objset (MOS) | ||
| -// is objset 0, and the meta-dnode is object 0. This covers all blocks | ||
| -// except root blocks and ZIL blocks, which are defined as follows: | ||
| -// | ||
| -// Root blocks (objset_phys_t) are object 0, level -1: <objset, 0, -1, 0>. | ||
| -// ZIL blocks are bookmarked <objset, 0, -2, blkid == ZIL sequence number>. | ||
| -// dmu_sync()ed ZIL data blocks are bookmarked <objset, object, -2, blkid>. | ||
| -// | ||
| -// Note: this structure is called a bookmark because its original purpose | ||
| -// was to remember where to resume a pool-wide traverse. | ||
| -// | ||
| -// Note: this structure is passed between userland and the kernel, and is | ||
| -// stored on disk (by virtue of being incorporated into other on-disk | ||
| -// structures, e.g. dsl_scan_phys_t). | ||
| -// | ||
| -struct ZbookmarkPhys { | ||
| - objset: u64, | ||
| - object: u64, | ||
| - level: i64, | ||
| - blkid: u64, | ||
| -} | ||
| - | ||
| -const REQUEUE_IO_START_CUT_IN_LINE: bool = true; | ||
| -pub const NUM_CHILD_TYPES: usize = 4; | ||
| -pub const NUM_WAIT_TYPES: usize = 2; | ||
| -pub const NUM_TYPES: usize = 6; | ||
| -pub const NUM_TASKQ_TYPES: usize = 4; | ||
| - | ||
| -// Default Linux timeout for a sd device. | ||
| -// const ZIO_DELAY_MAX = (30 * MILLISEC); | ||
| - | ||
| -// const ZIO_FAILURE_MODE_WAIT = 0; | ||
| -// const ZIO_FAILURE_MODE_CONTINUE = 1; | ||
| -// const ZIO_FAILURE_MODE_PANIC = 2; | ||
| - | ||
| -// pub enum TaskqType { | ||
| -// Issue = 0, | ||
| -// IssueHigh, | ||
| -// Interrupt, | ||
| -// InterruptHigh, | ||
| -// } | ||
| -// | ||
| -// #[derive(Copy, Clone, PartialEq)] | ||
| -// enum Priority { | ||
| -// SyncRead, | ||
| -// SyncWrite, // ZIL | ||
| -// AsyncRead, // prefetch | ||
| -// AsyncWrite, // spa_sync() | ||
| -// Scrub, // asynchronous scrub/resilver reads | ||
| -// NumQueueable, | ||
| -// | ||
| -// Now // non-queued io (e.g. free) | ||
| -// } | ||
| -// | ||
| -// #[derive(Copy, Clone, PartialEq)] | ||
| -// pub enum Type { | ||
| -// Null = 0, | ||
| -// Read, | ||
| -// Write, | ||
| -// Free, | ||
| -// Claim, | ||
| -// IoCtl, | ||
| -// } | ||
| -// | ||
| -// const FLAG_AGG_INHERIT: u64 = Flag::CanFail - 1; | ||
| -// const FLAG_DDT_INHERIT: u64 = Flag::IoRetry - 1; | ||
| -// const FLAG_GANG_INHERIT: u64 = Flag::IoRetry - 1; | ||
| -// const FLAG_VDEV_INHERIT: u64 = Flag::DontQueue - 1; | ||
| -// | ||
| -// const NUM_PIPE_STAGES: usize = 22; | ||
| -// | ||
| -// type PipeStageFn = fn(&mut Zio) -> zfs::Result<()>; | ||
| -// static pipeline_stages: [Option<PipeStageFn>; NUM_PIPE_STAGES] = | ||
| -// [None, | ||
| -// Some(Zio::read_bp_init), | ||
| -// None,//Some(Zio::free_bp_init), | ||
| -// Some(Zio::issue_async), | ||
| -// None,//Some(Zio::write_bp_init), | ||
| -// None,//Some(Zio::checksum_generate), | ||
| -// None,//Some(Zio::nop_write), | ||
| -// None,//Some(Zio::ddt_read_start), | ||
| -// None,//Some(Zio::ddt_read_done), | ||
| -// None,//Some(Zio::ddt_write), | ||
| -// None,//Some(Zio::ddt_free), | ||
| -// None,//Some(Zio::gang_assemble), | ||
| -// None,//Some(Zio::gang_issue), | ||
| -// None,//Some(Zio::dva_allocate), | ||
| -// None,//Some(Zio::dva_free), | ||
| -// None,//Some(Zio::dva_claim), | ||
| -// Some(Zio::ready), | ||
| -// Some(Zio::vdev_io_start), | ||
| -// Some(Zio::vdev_io_done), | ||
| -// Some(Zio::vdev_io_assess), | ||
| -// Some(Zio::checksum_verify), | ||
| -// Some(Zio::done)]; | ||
| -// | ||
| -// #[derive(Copy, Clone, PartialEq)] | ||
| -// enum PipelineFlow { | ||
| -// Continue = 0x100, | ||
| -// Stop = 0x101, | ||
| -// } | ||
| -// | ||
| -// #[derive(Copy, Clone, PartialEq)] | ||
| -// enum Flag { | ||
| -// Flags inherited by gang, ddt, and vdev children, | ||
| -// and that must be equal for two zios to aggregate | ||
| -// DontAggregate = 1 << 0, | ||
| -// IoRepair = 1 << 1, | ||
| -// SelfHeal = 1 << 2, | ||
| -// Resilver = 1 << 3, | ||
| -// Scrub = 1 << 4, | ||
| -// ScanThread = 1 << 5, | ||
| -// Physical = 1 << 6, | ||
| -// | ||
| -// Flags inherited by ddt, gang, and vdev children. | ||
| -// CanFail = 1 << 7, // must be first for INHERIT | ||
| -// Speculative = 1 << 8, | ||
| -// ConfigWriter = 1 << 9, | ||
| -// DontRetry = 1 << 10, | ||
| -// DontCache = 1 << 11, | ||
| -// NoData = 1 << 12, | ||
| -// InduceDamage = 1 << 13, | ||
| -// | ||
| -// Flags inherited by vdev children. | ||
| -// IoRetry = 1 << 14, /* must be first for INHERIT */ | ||
| -// Probe = 1 << 15, | ||
| -// TryHard = 1 << 16, | ||
| -// Optional = 1 << 17, | ||
| -// | ||
| -// Flags not inherited by any children. | ||
| -// DontQueue = 1 << 18, /* must be first for INHERIT */ | ||
| -// DontPropagate = 1 << 19, | ||
| -// IoBypass = 1 << 20, | ||
| -// IoRewrite = 1 << 21, | ||
| -// Raw = 1 << 22, | ||
| -// GangChild = 1 << 23, | ||
| -// DdtChild = 1 << 24, | ||
| -// GodFather = 1 << 25, | ||
| -// NopWrite = 1 << 26, | ||
| -// ReExecuted = 1 << 27, | ||
| -// Delegated = 1 << 28, | ||
| -// FastWrite = 1 << 29, | ||
| -// }; | ||
| -// | ||
| -// #[derive(Copy, Clone, PartialEq)] | ||
| -// enum Child { | ||
| -// Vdev = 0, | ||
| -// Gang, | ||
| -// Ddt, | ||
| -// Logical, | ||
| -// }; | ||
| -// | ||
| -// #[repr(u8)] | ||
| -// enum WaitType { | ||
| -// Ready = 0, | ||
| -// Done, | ||
| -// }; | ||
| -// | ||
| -// zio pipeline stage definitions | ||
| -// enum Stage { | ||
| -// Open = 1 << 0, // RWFCI | ||
| -// | ||
| -// ReadBpInit = 1 << 1, // R---- | ||
| -// FreeBpInit = 1 << 2, // --F-- | ||
| -// IssueAsync = 1 << 3, // RWF-- | ||
| -// WriteBpInit = 1 << 4, // -W--- | ||
| -// | ||
| -// ChecksumGenerate = 1 << 5, // -W--- | ||
| -// | ||
| -// NopWrite = 1 << 6, // -W--- | ||
| -// | ||
| -// DdtReadStart = 1 << 7, // R---- | ||
| -// DdtReadDone = 1 << 8, // R---- | ||
| -// DdtWrite = 1 << 9, // -W--- | ||
| -// DdtFree = 1 << 10, // --F-- | ||
| -// | ||
| -// GangAssemble = 1 << 11, // RWFC- | ||
| -// GangIssue = 1 << 12, // RWFC- | ||
| -// | ||
| -// DvaAllocate = 1 << 13, // -W--- | ||
| -// DvaFree = 1 << 14, // --F-- | ||
| -// DvaClaim = 1 << 15, // ---C- | ||
| -// | ||
| -// Ready = 1 << 16, // RWFCI | ||
| -// | ||
| -// VdevIoStart = 1 << 17, // RW--I | ||
| -// VdevIoDone = 1 << 18, // RW--I | ||
| -// VdevIoAssess = 1 << 19, // RW--I | ||
| -// | ||
| -// ChecksumVerify = 1 << 20, // R---- | ||
| -// | ||
| -// Done = 1 << 21, // RWFCI | ||
| -// }; | ||
| -// | ||
| -// const INTERLOCK_STAGES = STAGE_READY | STAGE_DONE; | ||
| -// | ||
| -// const INTERLOCK_PIPELINE = INTERLOCK_STAGES | ||
| -// | ||
| -// const VDEV_IO_STAGES = STAGE_VDEV_IO_START | | ||
| -// STAGE_VDEV_IO_DONE | STAGE_VDEV_IO_ASSESS; | ||
| -// | ||
| -// const VDEV_CHILD_PIPELINE = VDEV_IO_STAGES | STAGE_DONE; | ||
| -// | ||
| -// const READ_COMMON_STAGES = INTERLOCK_STAGES | VDEV_IO_STAGES | STAGE_CHECKSUM_VERIFY | ||
| -// | ||
| -// const READ_PHYS_PIPELINE = READ_COMMON_STAGES | ||
| -// | ||
| -// const READ_PIPELINE = READ_COMMON_STAGES | STAGE_READ_BP_INIT | ||
| -// | ||
| -// const DDT_CHILD_READ_PIPELINE = READ_COMMON_STAGES; | ||
| -// | ||
| -// const DDT_READ_PIPELINE = INTERLOCK_STAGES | STAGE_READ_BP_INIT | STAGE_DDT_READ_START | STAGE_DDT_READ_DONE; | ||
| -// | ||
| -// const WRITE_COMMON_STAGES = INTERLOCK_STAGES | VDEV_IO_STAGES | STAGE_ISSUE_ASYNC | STAGE_CHECKSUM_GENERATE; | ||
| -// | ||
| -// const WRITE_PHYS_PIPELINE = WRITE_COMMON_STAGES; | ||
| -// | ||
| -// const REWRITE_PIPELINE = WRITE_COMMON_STAGES | STAGE_WRITE_BP_INIT; | ||
| -// | ||
| -// const WRITE_PIPELINE = WRITE_COMMON_STAGES | STAGE_WRITE_BP_INIT | STAGE_DVA_ALLOCATE; | ||
| -// | ||
| -// const DDT_CHILD_WRITE_PIPELINE = INTERLOCK_STAGES | VDEV_IO_STAGES | STAGE_DVA_ALLOCATE; | ||
| -// | ||
| -// const DDT_WRITE_PIPELINE = INTERLOCK_STAGES | STAGE_ISSUE_ASYNC | | ||
| -// STAGE_WRITE_BP_INIT | STAGE_CHECKSUM_GENERATE | | ||
| -// STAGE_DDT_WRITE; | ||
| -// | ||
| -// const GANG_STAGES = STAGE_GANG_ASSEMBLE | STAGE_GANG_ISSUE; | ||
| -// | ||
| -// const FREE_PIPELINE = INTERLOCK_STAGES | STAGE_FREE_BP_INIT | STAGE_DVA_FREE; | ||
| -// | ||
| -// const DDT_FREE_PIPELINE = INTERLOCK_STAGES | STAGE_FREE_BP_INIT | STAGE_ISSUE_ASYNC | STAGE_DDT_FREE; | ||
| -// | ||
| -// const CLAIM_PIPELINE = INTERLOCK_STAGES | STAGE_DVA_CLAIM; | ||
| -// | ||
| -// const IOCTL_PIPELINE = INTERLOCK_STAGES | STAGE_VDEV_IO_START | STAGE_VDEV_IO_ASSESS; | ||
| -// | ||
| -// const BLOCKING_STAGES = STAGE_DVA_ALLOCATE | STAGE_DVA_CLAIM | STAGE_VDEV_IO_START; | ||
| -// |
0 comments on commit
cdd3511