Skip to content
Browse files

WIP: Update to include libextra

  • Loading branch information...
1 parent 171bac8 commit cdd351160f2354e81bdb541c3cc955355843ef36 @jackpot51 jackpot51 committed
View
6 .gitmodules
@@ -40,3 +40,9 @@
[submodule "installer"]
path = installer
url = https://github.com/redox-os/installer.git
+[submodule "crates/zfs"]
+ path = crates/zfs
+ url = https://github.com/redox-os/zfs.git
+[submodule "crates/extra"]
+ path = crates/extra
+ url = https://github.com/redox-os/libextra.git
View
15 Makefile
@@ -158,10 +158,10 @@ apps: filesystem/apps/calculator/main.bin \
filesystem/apps/terminal/main.bin \
filesystem/apps/viewer/main.bin
-$(BUILD)/libcoreutils.rlib: crates/coreutils/src/lib.rs crates/coreutils/src/*.rs $(BUILD)/libstd.rlib
- $(RUSTC) $(RUSTCFLAGS) --crate-name coreutils --crate-type lib -o $@ $<
+$(BUILD)/libextra.rlib: crates/extra/src/lib.rs crates/extra/src/*.rs $(BUILD)/libstd.rlib
+ $(RUSTC) $(RUSTCFLAGS) --crate-name extra --crate-type lib -o $@ $<
-filesystem/bin/%: crates/coreutils/src/bin/%.rs $(BUILD)/crt0.o $(BUILD)/libcoreutils.rlib
+filesystem/bin/%: crates/coreutils/src/bin/%.rs $(BUILD)/crt0.o $(BUILD)/libextra.rlib
mkdir -p filesystem/bin
$(RUSTC) $(RUSTCFLAGS) --crate-type bin -o $@ $<
@@ -188,14 +188,15 @@ coreutils: \
filesystem/bin/shutdown \
filesystem/bin/sleep \
filesystem/bin/tail \
+ filesystem/bin/test \
filesystem/bin/touch \
filesystem/bin/true \
filesystem/bin/wc \
filesystem/bin/yes
- #TODO: filesystem/bin/env filesystem/bin/test
+ #TODO: filesystem/bin/env
-$(BUILD)/libbinutils.rlib: crates/binutils/src/lib.rs crates/binutils/src/*.rs $(BUILD)/libcoreutils.rlib
+$(BUILD)/libbinutils.rlib: crates/binutils/src/lib.rs crates/binutils/src/*.rs $(BUILD)/libextra.rlib
$(RUSTC) $(RUSTCFLAGS) --crate-name binutils --crate-type lib -o $@ $<
filesystem/bin/%: crates/binutils/src/bin/%.rs $(BUILD)/crt0.o $(BUILD)/libbinutils.rlib
@@ -210,7 +211,7 @@ binutils: \
$(BUILD)/libtermion.rlib: crates/termion/src/lib.rs crates/termion/src/*.rs $(BUILD)/libstd.rlib
$(RUSTC) $(RUSTCFLAGS) --crate-name termion --crate-type lib -o $@ $< --cfg 'feature="nightly"'
-filesystem/bin/%: crates/extrautils/src/bin/%.rs $(BUILD)/crt0.o $(BUILD)/libcoreutils.rlib $(BUILD)/libtermion.rlib
+filesystem/bin/%: crates/extrautils/src/bin/%.rs $(BUILD)/crt0.o $(BUILD)/libextra.rlib $(BUILD)/libtermion.rlib
mkdir -p filesystem/bin
$(RUSTC) $(RUSTCFLAGS) --crate-type bin -o $@ $<
@@ -223,7 +224,7 @@ extrautils: \
filesystem/bin/rem
#TODO: filesystem/bin/mtxt
-filesystem/bin/%: crates/games/src/%/main.rs crates/games/src/%/*.rs $(BUILD)/crt0.o $(BUILD)/libcoreutils.rlib $(BUILD)/libtermion.rlib
+filesystem/bin/%: crates/games/src/%/main.rs crates/games/src/%/*.rs $(BUILD)/crt0.o $(BUILD)/libextra.rlib $(BUILD)/libtermion.rlib
mkdir -p filesystem/bin
$(RUSTC) $(RUSTCFLAGS) --crate-type bin -o $@ $<
2 crates/binutils
@@ -1 +1 @@
-Subproject commit 3232642e98433e882148f296416023e1f22b9bda
+Subproject commit 5599724eab8b28705f6b2b66145fdcb7e4ce2d4d
2 crates/coreutils
@@ -1 +1 @@
-Subproject commit 41eef0ff8a18f0011f373e6b78fb199a25ef2926
+Subproject commit 3a666f3b7ddf682c342363d9583b887a360d3dab
1 crates/extra
@@ -0,0 +1 @@
+Subproject commit dd01a09283df73e8e62a6fa59ede41897459dcbd
2 crates/extrautils
@@ -1 +1 @@
-Subproject commit 90e803f249803d93e081b71c553177c4befd6f18
+Subproject commit b1ebde2e1a5e3cac977d076d4df04b7a76f06ff3
2 crates/games
@@ -1 +1 @@
-Subproject commit 98ffb8e0a2c471252e5a922f8dd6a335388d7a10
+Subproject commit eb52fcb69b59957bd3bfdf6a6ff37234788bf521
1 crates/zfs
@@ -0,0 +1 @@
+Subproject commit 066a57daef9f86c59018867d904e9fb15d3ddec7
View
126 crates/zfs/arcache.rs
@@ -1,126 +0,0 @@
-use std::collections::{BTreeMap, VecDeque};
-
-use super::dvaddr::DVAddr;
-use super::zio;
-
-/// MRU - Most Recently Used cache
-struct Mru {
- map: BTreeMap<DVAddr, Vec<u8>>,
- queue: VecDeque<DVAddr>, // Oldest DVAddrs are at the end
- size: usize, // Max mru cache size in blocks
- used: usize, // Number of used blocks in mru cache
-}
-
-impl Mru {
- pub fn new() -> Self {
- Mru {
- map: BTreeMap::new(),
- queue: VecDeque::new(),
- size: 1000,
- used: 0,
- }
- }
-
- pub fn cache_block(&mut self, dva: &DVAddr, block: Vec<u8>) -> Result<Vec<u8>, String> {
- // If necessary, make room for the block in the cache
- while self.used + (dva.asize() as usize) > self.size {
- let last_dva = match self.queue.pop_back() {
- Some(dva) => dva,
- None => return Err("No more ARC MRU items to free".to_string()),
- };
- self.map.remove(&last_dva);
- self.used -= last_dva.asize() as usize;
- }
-
- // Add the block to the cache
- self.used += dva.asize() as usize;
- self.map.insert(*dva, block);
- self.queue.push_front(*dva);
- Ok(self.map.get(dva).unwrap().clone())
- }
-}
-
-/// MFU - Most Frequently Used cache
-struct Mfu {
- // TODO: Keep track of use counts. So mfu_map becomes (use_count: u64, Vec<u8>). Reset the use
- // count every once in a while. For instance, every 1000 reads. This will probably end up being
- // a knob for the user.
- // TODO: Keep track of minimum frequency and corresponding DVA
- map: BTreeMap<DVAddr, (u64, Vec<u8>)>,
- size: usize, // Max mfu cache size in blocks
- used: usize, // Number of used bytes in mfu cache
-}
-
-impl Mfu {
- pub fn new() -> Self {
- Mfu {
- map: BTreeMap::new(),
- size: 1000,
- used: 0,
- }
- }
-
- pub fn cache_block(&mut self, dva: &DVAddr, block: Vec<u8>) -> Result<Vec<u8>, String> {
- {
- let mut lowest_freq = ::std::u64::MAX;
- let mut lowest_dva: Result<DVAddr, String> = Err("No valid DVA found.".to_string());
-
- for (&dva_key, &(freq, _)) in self.map.iter() {
- if freq < lowest_freq {
- lowest_freq = freq;
- lowest_dva = Ok(dva_key);
- }
- }
-
- self.map.remove(&try!(lowest_dva));
- }
-
- // Add the block to the cache
- self.used += dva.asize() as usize;
- self.map.insert(*dva, (2, block));
- Ok(self.map.get(dva).unwrap().1.clone())
- }
-}
-
-// Our implementation of the Adaptive Replacement Cache (ARC) is set up to allocate
-// its buffer on the heap rather than in a private pool thing. This makes it much
-// simpler to implement, but defers the fragmentation problem to the heap allocator.
-// We named the type `ArCache` to avoid confusion with Rust's `Arc` reference type.
-pub struct ArCache {
- mru: Mru,
- mfu: Mfu,
-}
-
-impl ArCache {
- pub fn new() -> Self {
- ArCache {
- mru: Mru::new(),
- mfu: Mfu::new(),
- }
- }
-
- pub fn read(&mut self, reader: &mut zio::Reader, dva: &DVAddr) -> Result<Vec<u8>, String> {
- if let Some(block) = self.mru.map.remove(dva) {
- self.mfu.map.insert(*dva, (0, block.clone()));
-
- // Block is cached
- return Ok(block);
- }
- if let Some(block) = self.mfu.map.get_mut(dva) {
- // Block is cached
- if block.0 > 1000 {
- block.0 = 0;
- } else {
- block.0 += 1;
- }
-
- return Ok(block.1.clone());
- }
-
- // Block isn't cached, have to read it from disk
- let block = reader.read(dva.sector() as usize, dva.asize() as usize);
-
- // Blocks start in MRU cache
- self.mru.cache_block(dva, block)
- }
-}
View
333 crates/zfs/avl.rs
@@ -1,333 +0,0 @@
-use std::rc::Rc;
-
-pub struct Node<T> {
- value: T,
- left: Option<usize>, // ID for left node
- right: Option<usize>, // ID for right node
-}
-
-impl<T> Node<T> {
- pub fn value(&self) -> &T {
- &self.value
- }
- pub fn left<K>(&self, tree: &Tree<T, K>) -> Option<NodeId> {
- self.left.map(|l| {
- NodeId {
- index: l,
- time_stamp: tree.nodes[l].time_stamp,
- }
- })
- }
- pub fn right<K>(&self, tree: &Tree<T, K>) -> Option<NodeId> {
- self.right.map(|r| {
- NodeId {
- index: r,
- time_stamp: tree.nodes[r].time_stamp,
- }
- })
- }
-}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-#[derive(Copy, Clone)]
-pub struct NodeId {
- index: usize,
- time_stamp: u64,
-}
-
-impl NodeId {
- pub fn get<'a, T, K>(&self, avl: &'a Tree<T, K>) -> &'a Node<T> {
- let ref slot = avl.nodes[self.index];
- if slot.time_stamp == self.time_stamp {
- slot.node.as_ref().unwrap()
- } else {
- panic!("NodeId had invalid time_stamp");
- }
- }
-
- pub fn try_get<'a, T, K>(&self, avl: &'a Tree<T, K>) -> Option<&'a Node<T>> {
- avl.nodes
- .get(self.index)
- .and_then(|slot| {
- if slot.time_stamp == self.time_stamp {
- slot.node.as_ref()
- } else {
- None
- }
- })
- }
-
- pub fn get_mut<'a, T, K>(&self, avl: &'a mut Tree<T, K>) -> &'a mut Node<T> {
- let ref mut slot = avl.nodes[self.index];
- if slot.time_stamp == self.time_stamp {
- slot.node.as_mut().unwrap()
- } else {
- panic!("NodeId had invalid time_stamp");
- }
- }
-
- pub fn try_get_mut<'a, T, K>(&self, avl: &'a mut Tree<T, K>) -> Option<&'a mut Node<T>> {
- avl.nodes
- .get_mut(self.index)
- .and_then(|slot| {
- if slot.time_stamp == self.time_stamp {
- slot.node.as_mut()
- } else {
- None
- }
- })
- }
-}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-
-pub struct Tree<T, K> {
- root: Option<usize>, // Index of the root node
- nodes: Vec<Slot<T>>,
- free_list: Vec<usize>,
- key: Rc<Fn(&T) -> K>,
-}
-
-impl<T, K: PartialOrd> Tree<T, K> {
- pub fn new(key: Rc<Fn(&T) -> K>) -> Self {
- Tree {
- root: None,
- nodes: Vec::new(),
- free_list: Vec::new(),
- key: key,
- }
- }
-
- // Inserts a value into the tree, keeping it balanced. Lesser values will be stored on
- // the left, while greater values will be stored on the right. No duplicates are allowed.
- pub fn insert(&mut self, value: T) {
- let root = self.root;
- self.root = Some(self._insert(value, root));
- }
-
- pub fn in_order<F: Fn(&Node<T>)>(&self, f: F) {
- if let Some(root) = self.root {
- self._in_order(&f, root);
- }
- }
-
- /// Good ol' binary search. Returns immutable reference
- pub fn find(&self, key: K) -> Option<&T> {
- let root = self.root;
- self._find(key, root)
- }
-
- /// Good ol' binary search. Returns a mutable reference
- pub fn find_mut(&mut self, key: K) -> Option<&mut T> {
- let root = self.root;
- self._find_mut(key, root)
- }
-
- // Implementation of insert
- fn _insert(&mut self, value: T, node: Option<usize>) -> usize {
- let node = match node {
- Some(node) => {
- // Node exists, check which way to branch.
- if (self.key)(&value) == (self.key)(&self.node(node).value) {
- return node;
- } else if (self.key)(&value) < (self.key)(&self.node(node).value) {
- let l = self.node(node).left;
- self.node_mut(node).left = Some(self._insert(value, l));
- } else if (self.key)(&value) > (self.key)(&self.node(node).value) {
- let r = self.node(node).right;
- self.node_mut(node).right = Some(self._insert(value, r));
- }
-
- node
- }
- None => {
- // The node doesn't exist, create it here.
- self.allocate_node(value)
- }
- };
-
- self.rebalance(node)
- }
-
- pub fn _in_order<F: Fn(&Node<T>)>(&self, f: &F, node: usize) {
- if let Some(l) = self.node(node).left {
- self._in_order(f, l);
- }
- f(self.node(node));
- if let Some(r) = self.node(node).right {
- self._in_order(f, r);
- }
- }
-
- pub fn _find(&self, key: K, node: Option<usize>) -> Option<&T> {
- node.and_then(|n| {
- if (self.key)(&self.node(n).value) < key {
- let left = self.node(n).left;
- self._find(key, left)
- } else if (self.key)(&self.node(n).value) > key {
- let right = self.node(n).right;
- self._find(key, right)
- } else {
- // Found it!
- Some(&self.node(n).value)
- }
- })
- }
-
- pub fn _find_mut(&mut self, key: K, node: Option<usize>) -> Option<&mut T> {
- match node {
- Some(n) => {
- if (self.key)(&self.node(n).value) < key {
- let left = self.node(n).left;
- self._find_mut(key, left)
- } else if (self.key)(&self.node(n).value) > key {
- let right = self.node(n).right;
- self._find_mut(key, right)
- } else {
- // Found it!
- Some(&mut self.node_mut(n).value)
- }
- }
- None => None,
- }
- }
-
- // Performs a left rotation on a tree/subtree.
- // Returns the replace the specified node with
- fn rotate_left(&mut self, node: usize) -> usize {
- // Keep track of the original node positions
- // For a rotate left, the right child node must exist
- let r = self.node(node).right.unwrap();
- let rl = self.node(r).left;
-
- let ret = r;
- self.node_mut(node).right = rl;
- self.node_mut(ret).left = Some(node);
-
- ret
- }
-
- // Performs a right rotation on a tree/subtree.
- // Returns the replace the specified node with
- fn rotate_right(&mut self, node: usize) -> usize {
- // Keep track of the original node positions
- // For a rotate right, the left child node must exist
- let l = self.node(node).left.unwrap();
- let lr = self.node(l).right;
-
- let ret = l;
- self.node_mut(node).left = lr;
- self.node_mut(ret).right = Some(node);
-
- ret
- }
-
- // Performs a left-right double rotation on a tree/subtree.
- fn rotate_leftright(&mut self, node: usize) -> usize {
- let l = self.node(node).left.unwrap();
- let new_l = self.rotate_left(l); // Left node needs to exist
- self.node_mut(node).left = Some(new_l);
- self.rotate_right(node)
- }
-
- // Performs a right-left double rotation on a tree/subtree.
- fn rotate_rightleft(&mut self, node: usize) -> usize {
- let r = self.node(node).right.unwrap();
- let new_r = self.rotate_right(r); // Right node needs to exist
- self.node_mut(node).right = Some(new_r);
- self.rotate_left(node)
- }
-
- // Rebalances the provided node and returns the node to replace it with if rotations
- // occur
- fn rebalance(&mut self, node: usize) -> usize {
- let balance = self.height(self.node(node).left) - self.height(self.node(node).right);
- if balance == 2 {
- // left
- let lbalance = self.height(self.node(self.node(node).left.unwrap()).left) -
- self.height(self.node(self.node(node).left.unwrap()).right);
- if lbalance == 0 || lbalance == 1 {
- // left left - need to rotate right
- return self.rotate_right(node);
- } else if lbalance == -1 {
- // left right
- return self.rotate_leftright(node); // function name is just a coincidence
- }
- } else if balance == -2 {
- // right
- let rbalance = self.height(self.node(self.node(node).right.unwrap()).left) -
- self.height(self.node(self.node(node).right.unwrap()).right);
- if rbalance == 1 {
- // right left
- return self.rotate_rightleft(node); // function name is just a coincidence
- } else if rbalance == 0 || rbalance == -1 {
- // right right - need to rotate left
- return self.rotate_left(node);
- }
- }
-
- node
- }
-
- // height gets the height of a tree or subtree
- fn height(&self, node: Option<usize>) -> i64 {
- match node {
- Some(node) => {
- let left_height = self.height(self.node(node).left);
- let right_height = self.height(self.node(node).right);
-
- if left_height > right_height {
- left_height + 1
- } else {
- right_height + 1
- }
- }
- None => -1,
- }
- }
-
- fn allocate_node(&mut self, value: T) -> usize {
- match self.free_list.pop() {
- Some(index) => {
- self.nodes[index].time_stamp += 1;
- index
- }
- None => {
- // No free slots, create a new one
- let index = self.nodes.len();
- self.nodes.push(Slot {
- time_stamp: 0,
- node: Some(Node {
- value: value,
- left: None,
- right: None,
- }),
- });
- index
- }
- }
- }
-
- fn free_node(&mut self, index: usize) -> Node<T> {
- self.free_list.push(index);
-
- // NOTE: We unwrap here, because we trust that `id` points to a valid node, because
- // only we can create and free Nodes and their NodeIds
- self.nodes[index].node.take().unwrap()
- }
-
- fn node(&self, index: usize) -> &Node<T> {
- self.nodes[index].node.as_ref().unwrap()
- }
-
- fn node_mut(&mut self, index: usize) -> &mut Node<T> {
- self.nodes[index].node.as_mut().unwrap()
- }
-}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-
-struct Slot<T> {
- time_stamp: u64,
- node: Option<Node<T>>,
-}
View
56 crates/zfs/block_ptr.rs
@@ -1,56 +0,0 @@
-use super::from_bytes::FromBytes;
-use super::dvaddr::DVAddr;
-
-#[derive(Copy, Clone, Debug)]
-#[repr(packed)]
-pub struct BlockPtr {
- pub dvas: [DVAddr; 3],
- pub flags_size: u64,
- pub padding: [u64; 3],
- pub birth_txg: u64,
- pub fill_count: u64,
- pub checksum: [u64; 4],
-}
-
-impl BlockPtr {
- pub fn level(&self) -> u64 {
- (self.flags_size >> 56) & 0x7F
- }
-
- pub fn object_type(&self) -> u64 {
- (self.flags_size >> 48) & 0xFF
- }
-
- pub fn checksum(&self) -> u64 {
- (self.flags_size >> 40) & 0xFF
- }
-
- pub fn compression(&self) -> u64 {
- (self.flags_size >> 32) & 0xFF
- }
-
- pub fn lsize(&self) -> u64 {
- (self.flags_size & 0xFFFF) + 1
- }
-
- pub fn psize(&self) -> u64 {
- ((self.flags_size >> 16) & 0xFFFF) + 1
- }
-}
-
-impl FromBytes for BlockPtr {}
-
-#[derive(Copy, Clone, Debug)]
-#[repr(packed)]
-pub struct Gang {
- pub bps: [BlockPtr; 3],
- pub padding: [u64; 14],
- pub magic: u64,
- pub checksum: u64,
-}
-
-impl Gang {
- pub fn magic() -> u64 {
- return 0x117a0cb17ada1002;
- }
-}
View
1,935 crates/zfs/dmu_objset.rs
0 additions, 1,935 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
View
85 crates/zfs/dnode.rs
@@ -1,85 +0,0 @@
-use std::fmt;
-use std::mem;
-
-use super::block_ptr::BlockPtr;
-use super::from_bytes::FromBytes;
-use super::zil_header::ZilHeader;
-
-#[repr(u8)]
-#[derive(Debug, Eq, PartialEq)]
-pub enum ObjectType {
- None,
- ObjectDirectory,
- ObjectArray,
- PackedNvList,
- NvListSize,
- BlockPtrList,
- BlockPtrListHdr,
- SpaceMapHeader,
- SpaceMap,
- IntentLog,
- DNode,
- ObjSet,
- DataSet,
- DataSetChildMap,
- ObjSetSnapMap,
- DslProps,
- DslObjSet,
- ZNode,
- Acl,
- PlainFileContents,
- DirectoryContents,
- MasterNode,
- DeleteQueue,
- ZVol,
- ZVolProp,
-}
-
-#[repr(packed)]
-pub struct DNodePhys {
- pub object_type: ObjectType,
- pub indblkshift: u8, // ln2(indirect block size)
- pub nlevels: u8, // 1=blkptr->data blocks
- pub nblkptr: u8, // length of blkptr
- pub bonus_type: u8, // type of data in bonus buffer
- pub checksum: u8, // ZIO_CHECKSUM type
- pub compress: u8, // ZIO_COMPRESS type
- pub flags: u8, // DNODE_FLAG_*
- pub data_blk_sz_sec: u16, // data block size in 512b sectors
- pub bonus_len: u16, // length of bonus
- pub pad2: [u8; 4],
-
- // accounting is protected by dirty_mtx
- pub maxblkid: u64, // largest allocated block ID
- pub used: u64, // bytes (or sectors) of disk space
-
- pub pad3: [u64; 4],
-
- blkptr_bonus: [u8; 448],
-}
-
-impl DNodePhys {
- pub fn get_blockptr<'a>(&self, i: usize) -> &'a BlockPtr {
- unsafe { mem::transmute(&self.blkptr_bonus[i * 128]) }
- }
-
- pub fn get_bonus(&self) -> &[u8] {
- &self.blkptr_bonus[(self.nblkptr as usize) * 128..]
- }
-}
-
-impl FromBytes for DNodePhys {}
-
-impl fmt::Debug for DNodePhys {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- try!(write!(f,
- "DNodePhys {{ object_type: {:?}, nlevels: {:X}, nblkptr: {:X}, bonus_type: \
- {:X}, bonus_len: {:X}}}\n",
- self.object_type,
- self.nlevels,
- self.nblkptr,
- self.bonus_type,
- self.bonus_len));
- Ok(())
- }
-}
View
92 crates/zfs/dsl_dataset.rs
@@ -1,92 +0,0 @@
-use super::block_ptr::BlockPtr;
-use super::from_bytes::FromBytes;
-
-#[repr(packed)]
-pub struct DslDatasetPhys {
- pub dir_obj: u64, // DMU_OT_DSL_DIR
- pub prev_snap_obj: u64, // DMU_OT_DSL_DATASET
- pub prev_snap_txg: u64,
- pub next_snap_obj: u64, // DMU_OT_DSL_DATASET
- pub snapnames_zapobj: u64, // DMU_OT_DSL_DS_SNAP_MAP 0 for snaps
- pub num_children: u64, // clone/snap children, ==0 for head
- pub creation_time: u64, // seconds since 1970
- pub creation_txg: u64,
- pub deadlist_obj: u64, // DMU_OT_DEADLIST
- // ds_referenced_bytes, ds_compressed_bytes, and ds_uncompressed_bytes
- // include all blocks referenced by this dataset, including those
- // shared with any other datasets.
- //
- pub referenced_bytes: u64,
- pub compressed_bytes: u64,
- pub uncompressed_bytes: u64,
- pub unique_bytes: u64, // only relevant to snapshots
- // The ds_fsid_guid is a 56-bit ID that can change to avoid
- // collisions. The ds_guid is a 64-bit ID that will never
- // change, so there is a small probability that it will collide.
- //
- pub fsid_guid: u64,
- pub guid: u64,
- pub flags: u64, // DS_FLAG_*
- pub bp: BlockPtr,
- pub next_clones_obj: u64, // DMU_OT_DSL_CLONES
- pub props_obj: u64, // DMU_OT_DSL_PROPS for snaps
- pub userrefs_obj: u64, // DMU_OT_USERREFS
- pad: [u64; 5], // pad out to 320 bytes for good measure
-}
-
-impl FromBytes for DslDatasetPhys {}
-
-//------------------------------------------------------------------------------------------------//
-
-// struct DslDataset {
-// dmu_buf_user_t ds_dbu,
-//
-// Immutable:
-// dsl_dir *ds_dir,
-// dmu_buf_t *ds_dbuf,
-// object: u64,
-// fsid_guid: u64,
-// is_snapshot: bool,
-//
-// only used in syncing context, only valid for non-snapshots:
-// dsl_dataset *ds_prev,
-// bookmarks: u64, // DMU_OTN_ZAP_METADATA
-// large_blocks: bool,
-// need_large_blocks: bool,
-//
-// has internal locking:
-// dsl_deadlist_t ds_deadlist,
-// bplist_t ds_pending_deadlist,
-//
-// protected by lock on pool's dp_dirty_datasets list
-// txg_node_t ds_dirty_link,
-// list_node_t ds_synced_link,
-//
-// ds_phys->ds_<accounting> is also protected by ds_lock.
-// Protected by ds_lock:
-// kmutex_t ds_lock,
-// objset_t *ds_objset,
-// ds_userrefs: u64,
-// void *ds_owner,
-//
-// Long holds prevent the ds from being destroyed, they allow the
-// ds to remain held even after dropping the dp_config_rwlock.
-// Owning counts as a long hold. See the comments above
-// dsl_pool_hold() for details.
-// refcount_t ds_longholds,
-//
-// no locking, only for making guesses
-// ds_trysnap_txg: u64,
-//
-// for objset_open()
-// kmutex_t ds_opening_lock,
-//
-// ds_reserved: u64, // cached refreservation
-// ds_quota: u64, // cached refquota
-//
-// kmutex_t ds_sendstream_lock,
-// list_t ds_sendstreams,
-//
-// Protected by ds_lock, keep at end of struct for better locality
-// char ds_snapname[MAXNAMELEN],
-// }
View
37 crates/zfs/dsl_dir.rs
@@ -1,37 +0,0 @@
-use super::from_bytes::FromBytes;
-
-const DD_USED_NUM: usize = 5; // The number of variants in DslDirUsed
-
-pub enum DslDirUsed {
- Head = 0,
- Snap,
- Child,
- ChildReserve,
- RefReserve,
-}
-
-#[repr(packed)]
-pub struct DslDirPhys {
- pub creation_time: u64, // not actually used
- pub head_dataset_obj: u64,
- pub parent_obj: u64,
- pub origin_obj: u64,
- pub child_dir_zapobj: u64,
- // how much space our children are accounting for, for leaf
- // datasets, == physical space used by fs + snaps
- pub used_bytes: u64,
- pub compressed_bytes: u64,
- pub uncompressed_bytes: u64,
- // Administrative quota setting
- pub quota: u64,
- // Administrative reservation setting
- pub reserved: u64,
- pub props_zapobj: u64,
- pub deleg_zapobj: u64, // dataset delegation permissions
- pub flags: u64,
- pub used_breakdown: [u64; DD_USED_NUM],
- pub clones: u64, // dsl_dir objects
- pub pad: [u64; 13], // pad out to 256 bytes for good measure
-}
-
-impl FromBytes for DslDirPhys {}
View
17 crates/zfs/dsl_pool.rs
@@ -1,17 +0,0 @@
-use super::spa;
-use super::zfs;
-
-pub struct DslPool {
- // Immutable
- root_dir_obj: u64,
-}
-
-impl DslPool {
- pub fn init(spa: &mut spa::Spa, txg: u64) -> zfs::Result<Self> {
- Self::open_impl(spa, txg)
- }
-
- fn open_impl(spa: &mut spa::Spa, txg: u64) -> zfs::Result<Self> {
- Ok(DslPool { root_dir_obj: 0 })
- }
-}
View
42 crates/zfs/dvaddr.rs
@@ -1,42 +0,0 @@
-use std::fmt;
-
-#[derive(Copy, Clone, Eq, Hash, Ord, PartialEq, PartialOrd)]
-#[repr(packed)]
-pub struct DVAddr {
- pub vdev: u64,
- pub offset: u64,
-}
-
-impl DVAddr {
- /// Sector address is the offset plus two vdev labels and one boot block (4 MB, or 8192 sectors)
- pub fn sector(&self) -> u64 {
- self.offset() + 0x2000
- }
-
- pub fn gang(&self) -> bool {
- if self.offset & 0x8000000000000000 == 1 {
- true
- } else {
- false
- }
- }
-
- pub fn offset(&self) -> u64 {
- self.offset & 0x7FFFFFFFFFFFFFFF
- }
-
- pub fn asize(&self) -> u64 {
- (self.vdev & 0xFFFFFF) + 1
- }
-}
-
-impl fmt::Debug for DVAddr {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- try!(write!(f,
- "DVAddr {{ offset: {:X}, gang: {}, asize: {:X} }}\n",
- self.offset(),
- self.gang(),
- self.asize()));
- Ok(())
- }
-}
View
16 crates/zfs/from_bytes.rs
@@ -1,16 +0,0 @@
-use std::{mem, ptr};
-
-pub trait FromBytes: Sized {
- fn from_bytes(data: &[u8]) -> Result<Self, String> {
- if data.len() >= mem::size_of::<Self>() {
- let s = unsafe { ptr::read(data.as_ptr() as *const Self) };
- Ok(s)
- } else {
- Err(format!("Error: bytes length of {} not long enough for the byte size of {}",
- data.len(),
- mem::size_of::<Self>()))
- }
- }
-}
-
-impl FromBytes for u64 {}
View
147 crates/zfs/lzjb.rs
@@ -1,147 +0,0 @@
-const NBBY: usize = 8; // Number of bits per byte
-const MATCH_BITS: usize = 6;
-const MATCH_MIN: usize = 3;
-const MATCH_MAX: usize = ((1 << MATCH_BITS) + (MATCH_MIN - 1));
-const OFFSET_MASK: usize = ((1 << (16 - MATCH_BITS)) - 1);
-const LEMPEL_SIZE: usize = 1024;
-
-/// LZJB compress the bytes in `src` into `dst`
-pub fn compress(src: &[u8], dst: &mut [u8]) -> usize {
- let mut src_i = 0; // Current index in src
- let mut dst_i = 0; // Current index in dst
-
- // We place 1 extra byte preceding every 8 bytes. Each bit in this byte is
- // a flag that corresponds to one of the 8 bytes that delimit it. If the
- // flag is set, the byte is a copy item. If the flag is 0, it is a literal
- // item. We'll call this the copy flag.
-
- // Stores the index of the current copy flag in dst
- let mut copymap = 0;
-
- // The current bit in the byte pointed at by `copymap`
- let mut copymask: usize = 1 << (NBBY - 1);
-
- // This is our cache
- let mut lempel = [0usize; LEMPEL_SIZE];
-
- while src_i < src.len() {
- copymask <<= 1;
- if copymask == (1 << NBBY) {
- // We've reached the end of our 8-byte cycle
- if dst_i >= dst.len() - 1 - 2 * NBBY {
- // If we've reached the last two bytes, we're done
- return src.len();
- }
- // Not done yet, reset the cycle
- copymask = 1;
- copymap = dst_i; // Point to our new copy flag byte
- dst[dst_i] = 0; // Place the new (initially clear) copy flag byte
- dst_i += 1;
- }
-
- if src_i > src.len() - MATCH_MAX {
- // Nearing the end of the data, don't bother searching for matches,
- // just copy.
- dst[dst_i] = src[src_i];
- src_i += 1;
- dst_i += 1;
- continue;
- }
-
- // Compute hash of current 3 byte slice. It will be the index to our
- // cache
- let mut hash = ((src[src_i] as usize) << 16) + ((src[src_i + 1] as usize) << 8) +
- (src[src_i + 2] as usize);
- hash += hash >> 9;
- hash += hash >> 5;
- let hp = (hash as usize) & (LEMPEL_SIZE - 1);
-
- // Look up the current 3 byte slice in the cache. We'll verify that it's
- // a valid entry later.
- let offset = (src_i - lempel[hp]) & OFFSET_MASK;
- let cpy = src_i - offset;
-
- // Set the current 3 byte slice as the most recent sighting of it in the
- // cache
- lempel[hp] = src_i;
-
- // Check that the cached item is valid
- if src_i >= offset && cpy != src_i && src[src_i] == src[cpy] &&
- src[src_i + 1] == src[cpy + 1] && src[src_i + 2] == src[cpy + 2] {
- // This cache item is valid, write a copy item
- dst[copymap] |= copymask as u8; // Set the
-
- // Find the full length of this match. Since it was in the hash,
- // we know the match length is at least 3.
- let mut mlen = MATCH_MIN;
- while mlen < MATCH_MAX {
- if src[src_i + mlen] != src[cpy + mlen] {
- break;
- }
- mlen += 1;
- }
-
- // Place the match length portion of the copy item
- dst[dst_i] = (((mlen - MATCH_MIN) << (NBBY - MATCH_BITS)) | (offset >> NBBY)) as u8;
- dst_i += 1;
-
- // Place the offset portion of the copy item
- dst[dst_i] = offset as u8;
- dst_i += 1;
-
- // Now we get to skip the repeated sequence!
- src_i += mlen;
- } else {
- // Not a real cache entry, don't make a copy item
- dst[dst_i] = src[src_i];
- dst_i += 1;
- src_i += 1;
- }
- }
-
- return dst_i;
-}
-
-pub fn decompress(src: &[u8], dst: &mut [u8]) -> bool {
- let mut src_i = 0;
- let mut dst_i = 0;
- let mut copymap: u8 = 0;
- let mut copymask: usize = 1 << (NBBY - 1);
-
- while dst_i < dst.len() {
- copymask <<= 1;
- if copymask == (1 << NBBY) {
- // Finished another 8-byte loop, repeat
- copymask = 1; // Reset the copy mask
- copymap = src[src_i]; // Current byte is the new copymap
- src_i += 1;
- }
- if (copymap & (copymask as u8)) != 0 {
- // Found a copy item
- let mlen = ((src[src_i] as usize) >> (NBBY - MATCH_BITS)) + MATCH_MIN;
- let offset = (((src[src_i] as usize) << NBBY) | (src[src_i + 1] as usize)) &
- OFFSET_MASK;
- src_i += 2;
- if dst_i < offset {
- // Copy item points to invalid index, error
- return false;
- }
- let mut cpy = dst_i - offset;
- for _ in 0..mlen {
- if dst_i >= dst.len() {
- // Reached the end of the destination buffer, can't copy anymore
- break;
- }
- dst[dst_i] = dst[cpy];
- dst_i += 1;
- cpy += 1;
- }
- } else {
- // It's a literal item, copy it directly
- dst[dst_i] = src[src_i];
- dst_i += 1;
- src_i += 1;
- }
- }
- return true;
-}
View
621 crates/zfs/main.rs
@@ -1,621 +0,0 @@
-// To use this, please install zfs-fuse
-use std::{mem, str};
-use std::fs::File;
-use std::io::{Read, Write, stdin, stdout};
-use std::rc::Rc;
-
-use self::arcache::ArCache;
-use self::dnode::{DNodePhys, ObjectType};
-use self::dmu_objset::ObjectSetPhys;
-use self::block_ptr::BlockPtr;
-use self::dsl_dataset::DslDatasetPhys;
-use self::dsl_dir::DslDirPhys;
-use self::from_bytes::FromBytes;
-use self::nvpair::NvValue;
-use self::space_map::SpaceMapPhys;
-use self::uberblock::Uberblock;
-use self::vdev::VdevLabel;
-
-macro_rules! readln {
- () => ({
- let mut buffer = String::new();
- match stdin().read_line(&mut buffer) {
- Ok(_) => Some(buffer),
- Err(_) => None
- }
- });
-}
-
-pub mod arcache;
-pub mod avl;
-pub mod block_ptr;
-pub mod dmu_objset;
-pub mod dnode;
-pub mod dsl_dataset;
-pub mod dsl_dir;
-pub mod dsl_pool;
-pub mod dvaddr;
-pub mod from_bytes;
-pub mod lzjb;
-pub mod metaslab;
-pub mod nvpair;
-pub mod nvstream;
-pub mod spa;
-pub mod space_map;
-pub mod taskq;
-pub mod txg;
-pub mod uberblock;
-pub mod util;
-pub mod vdev;
-pub mod vdev_file;
-pub mod xdr;
-pub mod zap;
-pub mod zfs;
-pub mod zil_header;
-pub mod zio;
-
-pub struct ZfsReader {
- pub zio: zio::Reader,
- pub arc: ArCache,
-}
-
-impl ZfsReader {
- pub fn read_block(&mut self, block_ptr: &BlockPtr) -> Result<Vec<u8>, String> {
- let data = self.arc.read(&mut self.zio, &block_ptr.dvas[0]);
- match block_ptr.compression() {
- 2 => {
- // compression off
- data
- }
- 1 | 3 => {
- // lzjb compression
- let mut decompressed = vec![0; (block_ptr.lsize()*512) as usize];
- lzjb::decompress(&match data {
- Ok(data) => data,
- Err(e) => return Err(e),
- },
- &mut decompressed);
- Ok(decompressed)
- }
- u => Err(format!("Error: Unknown compression type {}", u)),
- }
- }
-
- pub fn read_type<T: FromBytes>(&mut self, block_ptr: &BlockPtr) -> Result<T, String> {
- let data = self.read_block(block_ptr);
- data.and_then(|data| T::from_bytes(&data[..]))
- }
-
- pub fn read_type_array<T: FromBytes>(&mut self,
- block_ptr: &BlockPtr,
- offset: usize)
- -> Result<T, String> {
- let data = self.read_block(block_ptr);
- data.and_then(|data| T::from_bytes(&data[offset * mem::size_of::<T>()..]))
- }
-
- pub fn uber(&mut self, _: &[u8]) -> Result<Uberblock, String> {
- let mut newest_uberblock: Option<Uberblock> = None;
- for i in 0..128 {
- // let ub_len = 2*512;
- // let ub_start = i * ub_len;
- // let ub_end = ub_start + ub_len;
- // if let Ok(uberblock) = Uberblock::from_bytes(&uberblocks[ub_start..ub_end]) {
- if let Ok(uberblock) = Uberblock::from_bytes(&self.zio.read(256 + i * 2, 2)) {
- let newest = match newest_uberblock {
- Some(previous) => {
- if uberblock.txg > previous.txg {
- // Found a newer uberblock
- true
- } else {
- false
- }
- }
- // No uberblock yet, so first one we find is the newest
- None => true,
- };
-
- if newest {
- newest_uberblock = Some(uberblock);
- }
- }
- }
-
- match newest_uberblock {
- Some(uberblock) => Ok(uberblock),
- None => Err("Failed to find valid uberblock".to_string()),
- }
- }
-}
-
-#[derive(Copy, Clone, PartialEq)]
-pub enum ZfsTraverse {
- ThisDir,
- Done,
-}
-
-pub struct Zfs {
- pub reader: ZfsReader,
- pub uberblock: Uberblock, // The active uberblock
- pub mos: ObjectSetPhys,
- fs_objset: ObjectSetPhys,
- master_node: DNodePhys,
- root: u64,
-}
-
-impl Zfs {
- pub fn new(disk: File) -> Result<Self, String> {
- let mut zfs_reader = ZfsReader {
- zio: zio::Reader { disk: disk },
- arc: ArCache::new(),
- };
-
- // Read vdev label
- // let vdev_label = Box::new(try!(VdevLabel::from_bytes(&zfs_reader.zio.read(0, 256 * 2))));
- // let mut xdr = xdr::MemOps::new(&mut vdev_label.nv_pairs);
- // let nv_list = try!(nvstream::decode_nv_list(&mut xdr).map_err(|e| format!("{:?}", e)));
- // let vdev_tree =
- // match nv_list.find("vdev_tree") {
- // Some(vdev_tree) => {
- // vdev_tree
- // },
- // None => {
- // return Err("No vdev_tree in vdev label nvpairs".to_string());
- // },
- // };
- //
- // let vdev_tree =
- // if let NvValue::NvList(ref vdev_tree) = *vdev_tree {
- // vdev_tree
- // } else {
- // return Err("vdev_tree is not NvValue::NvList".to_string());
- // };
-
-
- // Get the active uberblock
- // let uberblock = try!(zfs_reader.uber(&vdev_label.uberblocks));
- let uberblock = try!(zfs_reader.uber(&[]));
-
- // let mos_dva = uberblock.rootbp.dvas[0];
- let mos: ObjectSetPhys = try!(zfs_reader.read_type(&uberblock.rootbp));
- let mos_bp1 = mos.meta_dnode.get_blockptr(0);
-
- // 2nd dnode in MOS points at the root dataset zap
- let dnode1: DNodePhys = try!(zfs_reader.read_type_array(&mos_bp1, 1));
-
- let root_ds_bp = dnode1.get_blockptr(0);
- let root_ds: zap::MZapWrapper = try!(zfs_reader.read_type(root_ds_bp));
-
- let root_ds_dnode: DNodePhys =
- try!(zfs_reader.read_type_array(&mos_bp1, root_ds.chunks[0].value as usize));
-
- let dsl_dir = try!(DslDirPhys::from_bytes(root_ds_dnode.get_bonus()));
- let head_ds_dnode: DNodePhys =
- try!(zfs_reader.read_type_array(&mos_bp1, dsl_dir.head_dataset_obj as usize));
-
- let root_dataset = try!(DslDatasetPhys::from_bytes(head_ds_dnode.get_bonus()));
-
- let fs_objset: ObjectSetPhys = try!(zfs_reader.read_type(&root_dataset.bp));
-
- let mut indirect: BlockPtr = try!(zfs_reader.read_type_array(fs_objset.meta_dnode
- .get_blockptr(0),
- 0));
- while indirect.level() > 0 {
- indirect = try!(zfs_reader.read_type_array(&indirect, 0));
- }
-
- // Master node is always the second object in the object set
- let master_node: DNodePhys = try!(zfs_reader.read_type_array(&indirect, 1));
- let master_node_zap: zap::MZapWrapper =
- try!(zfs_reader.read_type(master_node.get_blockptr(0)));
-
- // Find the ROOT zap entry
- let mut root = None;
- for chunk in &master_node_zap.chunks {
- if chunk.name() == Some("ROOT") {
- root = Some(chunk.value);
- break;
- }
- }
-
- let root = match root {
- Some(root) => Ok(root),
- None => Err("Error: failed to get the ROOT".to_string()),
- };
-
- Ok(Zfs {
- reader: zfs_reader,
- uberblock: uberblock,
- mos: mos,
- fs_objset: fs_objset,
- master_node: master_node,
- root: try!(root),
- })
- }
-
- pub fn traverse<F, T>(&mut self, mut f: F) -> Option<T>
- where F: FnMut(&mut Self,
- &str,
- usize,
- &mut DNodePhys,
- &BlockPtr,
- &mut Option<T>)
- -> Option<ZfsTraverse>
- {
- // Given the fs_objset and the object id of the root directory, we can traverse the
- // directory tree.
- // TODO: Cache object id of paths
- // TODO: Calculate path through objset blockptr tree to use
- let mut indirect: BlockPtr = self.reader
- .read_type_array(self.fs_objset
- .meta_dnode
- .get_blockptr(0),
- 0)
- .unwrap();
- while indirect.level() > 0 {
- indirect = self.reader.read_type_array(&indirect, 0).unwrap();
- }
- // Set the cur_node to the root node, located at an L0 indirect block
- let root = self.root as usize;
- let mut cur_node: DNodePhys = self.reader
- .read_type_array(&indirect, self.root as usize)
- .unwrap();
- let mut result = None;
- if f(self, "", root, &mut cur_node, &indirect, &mut result) == Some(ZfsTraverse::Done) {
- return result;
- }
- 'traverse: loop {
- // Directory dnodes point at zap objects. File/directory names are mapped to their
- // fs_objset object ids.
- let dir_contents: zap::MZapWrapper = self.reader
- .read_type(cur_node.get_blockptr(0))
- .unwrap();
- let mut next_dir = None;
- for chunk in &dir_contents.chunks {
- match chunk.name() {
- Some(chunk_name) => {
- // Stop once we get to a null entry
- if chunk_name.is_empty() {
- break;
- }
-
- let traverse = f(self,
- chunk_name,
- chunk.value as usize,
- &mut cur_node,
- &indirect,
- &mut result);
- if let Some(traverse) = traverse {
- match traverse {
- ZfsTraverse::ThisDir => {
- // Found the folder we were looking for
- next_dir = Some(chunk.value);
- break;
- }
- ZfsTraverse::Done => {
- break 'traverse;
- }
- }
- }
- }
- None => {
- // Invalid directory name
- return None;
- }
- }
- }
- if next_dir.is_none() {
- break;
- }
- }
- result
- }
-
- pub fn read_file(&mut self, path: &str) -> Option<Vec<u8>> {
- let path = path.trim_matches('/'); // Robust against different url styles
- let path_end_index = path.rfind('/').map(|i| i + 1).unwrap_or(0);
- let path_end = &path[path_end_index..];
- let mut folder_iter = path.split('/');
- let mut folder = folder_iter.next();
-
- let file_contents = self.traverse(|zfs, name, node_id, node, indirect, result| {
- let mut this_dir = false;
- if let Some(folder) = folder {
- if name == folder {
- *node = zfs.reader
- .read_type_array(indirect, node_id as usize)
- .unwrap();
- if name == path_end {
- if node.object_type != ObjectType::PlainFileContents {
- // Not a file
- return Some(ZfsTraverse::Done);
- }
- // Found the file
- let file_contents = zfs.reader
- .read_block(node.get_blockptr(0))
- .unwrap();
- // TODO: Read file size from ZPL rather than look for terminating 0
- let file_contents: Vec<u8> = file_contents.into_iter()
- .take_while(|c| *c != 0)
- .collect();
- *result = Some(file_contents);
- return Some(ZfsTraverse::Done);
- }
- this_dir = true;
- }
- }
- if this_dir {
- if node.object_type != ObjectType::DirectoryContents {
- // Not a folder
- return Some(ZfsTraverse::Done);
- }
- folder = folder_iter.next();
- return Some(ZfsTraverse::ThisDir);
- }
- None
- });
-
- file_contents
- }
-
- pub fn ls(&mut self, path: &str) -> Option<Vec<String>> {
- let path = path.trim_matches('/'); // Robust against different url styles
- let path_end_index = path.rfind('/').map(|i| i + 1).unwrap_or(0);
- let path_end = &path[path_end_index..];
- let mut folder_iter = path.split('/');
- let mut folder = folder_iter.next();
-
- let file_contents = self.traverse(|zfs, name, node_id, node, indirect, result| {
- let mut this_dir = false;
- if let Some(folder) = folder {
- if name == folder {
- if folder == path_end {
- *node = zfs.reader
- .read_type_array(indirect, node_id as usize)
- .unwrap();
- let dir_contents: zap::MZapWrapper = zfs.reader
- .read_type(node.get_blockptr(0))
- .unwrap();
-
- let ls: Vec<String> = dir_contents.chunks
- .iter()
- .map(|x| {
- if x.value & 0xF000000000000000 ==
- 0x4000000000000000 {
- x.name().unwrap().to_string() +
- "/"
- } else {
- x.name().unwrap().to_string()
- }
- })
- .take_while(|x| !x.is_empty())
- .collect();
- *result = Some(ls);
- return Some(ZfsTraverse::Done);
- }
- this_dir = true;
- }
- }
- if this_dir {
- folder = folder_iter.next();
- return Some(ZfsTraverse::ThisDir);
- }
- None
- });
-
- file_contents
- }
-}
-
-// TODO: Find a way to remove all the to_string's
-fn main() {
- println!("Type open zfs.img to open the image file");
-
- let mut zfs_option: Option<Zfs> = None;
-
- 'reading: loop {
- print!("# ");
- stdout().flush();
-
- if let Some(line) = readln!() {
- let args: Vec<String> = line.trim().split(' ').map(|arg| arg.to_string()).collect();
-
- if let Some(command) = args.get(0) {
- let mut close = false;
- match zfs_option {
- Some(ref mut zfs) => {
- if command == "uber" {
- let ref uberblock = zfs.uberblock;
- // 128 KB of ubers after 128 KB of other stuff
- println!("Newest Uberblock {:X}", zfs.uberblock.magic);
- println!("Version {}", uberblock.version);
- println!("TXG {}", uberblock.txg);
- println!("GUID {:X}", uberblock.guid_sum);
- println!("Timestamp {}", uberblock.timestamp);
- println!("ROOTBP[0] {:?}", uberblock.rootbp.dvas[0]);
- println!("ROOTBP[1] {:?}", uberblock.rootbp.dvas[1]);
- println!("ROOTBP[2] {:?}", uberblock.rootbp.dvas[2]);
- } else if command == "spa_import" {
- let mut nvpairs_buffer = zfs.reader.zio.read(32, 224);
- let mut xdr = xdr::MemOps::new(&mut nvpairs_buffer);
- let nv_list = nvstream::decode_nv_list(&mut xdr).unwrap();
- let name = nv_list.get::<&String>("name").unwrap().clone();
- let spa = spa::Spa::import(name, nv_list).unwrap();
- } else if command == "vdev_label" {
- match VdevLabel::from_bytes(&zfs.reader.zio.read(0, 256 * 2)) {
- Ok(ref mut vdev_label) => {
- let mut xdr = xdr::MemOps::new(&mut vdev_label.nv_pairs);
- let nv_list = nvstream::decode_nv_list(&mut xdr).unwrap();
- println!("Got nv_list:\n{:?}", nv_list);
- match nv_list.find("vdev_tree") {
- Some(vdev_tree) => {
- println!("Got vdev_tree");
-
- let vdev_tree = if let NvValue::NvList(ref vdev_tree) =
- *vdev_tree {
- Some(vdev_tree)
- } else {
- None
- };
-
- match vdev_tree.unwrap().find("metaslab_array") {
- Some(metaslab_array) => {
- println!("Got metaslab_array");
- if let NvValue::Uint64(metaslab_array) =
- *metaslab_array {
- // Get metaslab array dnode
- let metaslab_array = metaslab_array as usize;
- let ma_dnode: Result<DNodePhys, String> =
- zfs.reader
- .read_type_array(zfs.mos
- .meta_dnode
- .get_blockptr(0),
- metaslab_array);
- let ma_dnode = ma_dnode.unwrap(); // TODO
-
- // Get a spacemap object id
- let sm_id: Result<u64, String> =
- zfs.reader.read_type_array(ma_dnode.get_blockptr(0), 0);
- let sm_id = sm_id.unwrap(); // TODO
-
- let sm_dnode: Result<DNodePhys, String> =
- zfs.reader
- .read_type_array(zfs.mos
- .meta_dnode
- .get_blockptr(0),
- sm_id as usize);
- let sm_dnode = sm_dnode.unwrap(); // TODO
- let space_map_phys = SpaceMapPhys::from_bytes(sm_dnode.get_bonus()).unwrap(); // TODO
- let space_map: Result<Vec<u8>, String> =
- zfs.reader
- .read_block(sm_dnode.get_blockptr(0));
-
- println!("got space map id: {:?}", sm_id);
- println!("got space map dnode: {:?}", sm_dnode);
- println!("got space map phys: {:?}",
- space_map_phys);
- // println!("got space map: {:?}", &space_map.unwrap()[0..64]);
-
- let mut range_tree: avl::Tree<space_map::Entry,
- u64> =
- avl::Tree::new(Rc::new(|x| x.offset()));
- // space_map::load_space_map_avl(&space_map::SpaceMap { size: 30 },
- // &mut range_tree,
- // &space_map.unwrap(),
- // space_map::MapType::Alloc).unwrap();
- } else {
- println!("Invalid metaslab_array NvValue \
- type. Expected Uint64.");
- }
- }
- None => {
- println!("No `metaslab_array` in vdev_tree");
- }
- };
- }
- None => {
- println!("No `vdev_tree` in vdev_label nvpairs");
- }
- }
- }
- Err(e) => {
- println!("Couldn't read vdev_label: {}", e);
- }
- }
- } else if command == "file" {
- match args.get(1) {
- Some(arg) => {
- let file = zfs.read_file(arg);
- match file {
- Some(file) => {
- println!("File contents: {}",
- str::from_utf8(&file).unwrap());
- }
- None => println!("Failed to read file"),
- }
- }
- None => println!("Usage: file <path>"),
- }
- } else if command == "ls" {
- match args.get(1) {
- Some(arg) => {
- let ls = zfs.ls(arg);
- match ls {
- Some(ls) => {
- for item in &ls {
- print!("{}\t", item);
- }
- }
- None => println!("Failed to read directory"),
- }
- }
- None => println!("Usage: ls <path>"),
- }
- } else if command == "dump" {
- match args.get(1) {
- Some(arg) => {
- if let Ok(sector) = arg.parse::<usize>() {
- println!("Dump sector: {}", sector);
-
- let data = zfs.reader.zio.read(sector, 1);
- for i in 0..data.len() {
- if i % 32 == 0 {
- print!("\n{:X}:", i);
- }
- if let Some(byte) = data.get(i) {
- print!(" {:X}", *byte);
- } else {
- println!(" !");
- }
- }
- print!("\n");
- } else {
- println!("Sector not a number");
- }
- }
- None => println!("No sector specified!"),
- }
- } else if command == "close" {
- println!("Closing");
- close = true;
- } else if command == "exit" {
- break 'reading;
- } else {
- println!("Commands: uber vdev_label file ls dump close exit");
- }
- }
- None => {
- if command == "open" {
- match args.get(1) {
- Some(arg) => {
- match File::open(arg) {
- Ok(file) => {
- let zfs = Zfs::new(file);
- if let Err(ref e) = zfs {
- println!("Error: {:?}", e);
- } else {
- println!("Open: {}", arg);
- }
- zfs_option = zfs.ok();
- }
- Err(err) => println!("Failed to open {}: {}", arg, err),
- }
- }
- None => println!("No file specified!"),
- }
- } else if command == "exit" {
- break 'reading;
- } else {
- println!("Commands: open exit");
- }
- }
- }
- if close {
- zfs_option = None;
- }
- }
- } else {
- break 'reading;
- }
- }
-}
View
587 crates/zfs/metaslab.rs
@@ -1,587 +0,0 @@
-use std::cmp;
-use std::rc::Rc;
-
-use super::avl;
-use super::dmu_objset::ObjectSet;
-use super::space_map::{self, Segment, SpaceMap};
-use super::taskq::{self, Taskq};
-use super::txg;
-use util;
-use super::vdev;
-use super::zfs;
-
-// A metaslab class encompasses a category of allocatable top-level vdevs.
-// Each top-level vdev is associated with a metaslab group which defines
-// the allocatable region for that vdev. Examples of these categories include
-// "normal" for data block allocations (i.e. main pool allocations) or "log"
-// for allocations designated for intent log devices (i.e. slog devices).
-// When a block allocation is requested from the SPA it is associated with a
-// metaslab_class_t, and only top-level vdevs (i.e. metaslab groups) belonging
-// to the class can be used to satisfy that request. Allocations are done
-// by traversing the metaslab groups that are linked off of the `rotor` field.
-// This rotor points to the next metaslab group where allocations will be
-// attempted. Allocating a block is a 3 step process -- select the metaslab
-// group, select the metaslab, and then allocate the block. The metaslab
-// class defines the low-level block allocator that will be used as the
-// final step in allocation. These allocators are pluggable allowing each class
-// to use a block allocator that best suits that class.
-//
-pub struct MetaslabClass {
- // spa: *Spa,
- // rotor: *MetaslabGroup,
- ops: Rc<MetaslabOps>,
- aliquot: u64,
- alloc_groups: u64, // # of allocatable groups
- alloc: u64, // total allocated space
- deferred: u64, // total deferred frees
- space: u64, // total space (alloc + free)
- dspace: u64, /* total deflated space
- * histogram: [u64, RANGE_TREE_HISTOGRAM_SIZE],
- * fastwrite_lock: kmutex_t, */
-}
-
-impl MetaslabClass {
- pub fn create(ops: Rc<MetaslabOps>) -> MetaslabClass {
- // mutex_init(&mc->mc_fastwrite_lock, NULL, MUTEX_DEFAULT, NULL);
-
- MetaslabClass {
- // rotor: NULL,
- ops: ops,
- aliquot: 0,
- alloc_groups: 0,
- alloc: 0,
- deferred: 0,
- space: 0,
- dspace: 0,
- }
- }
-}
-
-// Metaslab groups encapsulate all the allocatable regions (i.e. metaslabs)
-// of a top-level vdev. They are linked togther to form a circular linked
-// list and can belong to only one metaslab class. Metaslab groups may become
-// ineligible for allocations for a number of reasons such as limited free
-// space, fragmentation, or going offline. When this happens the allocator will
-// simply find the next metaslab group in the linked list and attempt
-// to allocate from that group instead.
-//
-pub struct MetaslabGroup {
- // lock: kmutex_t,
- metaslab_tree: avl::Tree<MetaslabAvlNode, (u64, u64)>,
- aliquot: u64,
- allocatable: bool, // can we allocate?
- free_capacity: u64, // percentage free
- bias: i64,
- activation_count: i64,
- ms_class: Rc<MetaslabClass>,
- // vdev: vdev::TreeIndex,
- taskq: Taskq,
- // prev: *MetaslabGroup,
- // next: *MetaslabGroup,
- fragmentation: u64, // histogram: [u64; RANGE_TREE_HISTOGRAM_SIZE],
-}
-
-impl MetaslabGroup {
- pub fn create(ms_class: Rc<MetaslabClass>) -> Self {
- let metaslab_key = Rc::new(|ms: &MetaslabAvlNode| (ms.weight, ms.start));
- let taskq = Taskq::new("metaslab_group_taskq".to_string(),
- // metaslab_load_pct
- 4,
- 10,
- -1i64 as u64,
- // TASKQ_THREADS_CPU_PCT | TASKQ_DYNAMIC
- 0);
-
- MetaslabGroup {
- // lock: kmutex_t,
- metaslab_tree: avl::Tree::new(metaslab_key),
- aliquot: 0,
- allocatable: false, // can we allocate?
- free_capacity: 0, // percentage free
- bias: 0,
- activation_count: 0,
- ms_class: ms_class,
- // vdev: vdev,
- taskq: taskq,
- // prev: *MetaslabGroup,
- // next: *MetaslabGroup,
- fragmentation: 0, // histogram: [0; RANGE_TREE_HISTOGRAM_SIZE],
- }
- }
-
- pub fn add(&mut self, index: usize, m: &Metaslab) {
- self.metaslab_tree.insert(MetaslabAvlNode {
- index: index,
- start: m.start,
- weight: m.weight,
- });
- }
-
- pub fn activate(&mut self) {
- // metaslab_class_t *mc = self.class;
- // metaslab_group_t *mgprev, *mgnext;
- //
- // assert!(spa_config_held(ms_class.spa, SCL_ALLOC, RW_WRITER));
- //
- // assert!(ms_class.rotor != mg);
- // assert!(self.prev == NULL);
- // assert!(self.next == NULL);
- // assert!(self.activation_count <= 0);
- //
- // if (++self.activation_count <= 0)
- // return;
- //
- // self.aliquot = metaslab_aliquot * cmp::max(1, self.vdev->vdev_children);
- // metaslab_group_alloc_update(mg);
- //
- // if (mgprev = ms_class.rotor) == NULL {
- // self.prev = mg;
- // self.next = mg;
- // } else {
- // mgnext = mgprev->mg_next;
- // self.prev = mgprev;
- // self.next = mgnext;
- // mgprev->mg_next = mg;
- // mgnext->mg_prev = mg;
- // }
- // ms_class.rotor = mg;
- }
-}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-
-// This value defines the number of elements in the lbas array. The value
-// of 64 was chosen as it covers all power of 2 buckets up to UINT64_MAX.
-// This is the equivalent of highbit(UINT64_MAX).
-const MAX_LBAS: usize = 64;
-
-// Each metaslab maintains a set of in-core trees to track metaslab operations.
-// The in-core free tree (ms_tree) contains the current list of free segments.
-// As blocks are allocated, the allocated segment are removed from the ms_tree
-// and added to a per txg allocation tree (ms_alloctree). As blocks are freed,
-// they are added to the per txg free tree (ms_freetree). These per txg
-// trees allow us to process all allocations and frees in syncing context
-// where it is safe to update the on-disk space maps. One additional in-core
-// tree is maintained to track deferred frees (ms_defertree). Once a block
-// is freed it will move from the ms_freetree to the ms_defertree. A deferred
-// free means that a block has been freed but cannot be used by the pool
-// until TXG_DEFER_SIZE transactions groups later. For example, a block
-// that is freed in txg 50 will not be available for reallocation until
-// txg 52 (50 + TXG_DEFER_SIZE). This provides a safety net for uberblock
-// rollback. A pool could be safely rolled back TXG_DEFERS_SIZE
-// transactions groups and ensure that no block has been reallocated.
-//
-// The simplified transition diagram looks like this:
-//
-//
-// ALLOCATE
-// |
-// V
-// free segment (tree) --------> alloc_tree ----> (write to space map)
-// ^
-// |
-// | free_tree <--- FREE
-// | |
-// | |
-// | |
-// +----------- defer_tree <-------+---------> (write to space map)
-//
-//
-// Each metaslab's space is tracked in a single space map in the MOS,
-// which is only updated in syncing context. Each time we sync a txg,
-// we append the allocs and frees from that txg to the space map.
-// The pool space is only updated once all metaslabs have finished syncing.
-//
-// To load the in-core free tree we read the space map from disk.
-// This object contains a series of alloc and free records that are
-// combined to make up the list of all free segments in this metaslab. These
-// segments are represented in-core by the ms_tree and are stored in an
-// AVL tree.
-//
-// As the space map grows (as a result of the appends) it will
-// eventually become space-inefficient. When the metaslab's in-core free tree
-// is zfs_condense_pct/100 times the size of the minimal on-disk
-// representation, we rewrite it in its minimized form. If a metaslab
-// needs to condense then we must set the condensing flag to ensure
-// that allocations are not performed on the metaslab that is being written.
-//
-
-pub struct Metaslab {
- // lock: kmutex_t,
- // load_cv: kcondvar_t,
- space_map: Option<SpaceMap>,
- ops: Rc<MetaslabOps>,
- id: u64,
- start: u64,
- size: u64,
- fragmentation: u64,
-
- // Sorted by start
- alloc_tree: Vec<avl::Tree<space_map::Segment, u64>>, // txg::TXG_SIZE
- free_tree: Vec<avl::Tree<space_map::Segment, u64>>, // txg::TXG_SIZE
- defer_tree: Vec<avl::Tree<space_map::Segment, u64>>, // txg::DEFER_SIZE
- tree: avl::Tree<space_map::Segment, u64>,
-
- condensing: bool,
- condense_wanted: bool,
- loaded: bool,
- loading: bool,
-
- defer_space: i64, // sum of defermap[] space
- weight: u64, // weight vs others in group
- access_txg: u64,
-
- // The metaslab block allocators can optionally use a size-ordered
- // range tree and/or an array of LBAs. Not all allocators use
- // this functionality. The size_tree should always contain the
- // same number of segments as the tree. The only difference
- // is that the size_tree is ordered by segment sizes.
- size_tree: avl::Tree<space_map::Segment, u64>, // Sorted by size
- lbas: [u64; MAX_LBAS], /* group: *MetaslabGroup,
- * avl_node_t ms_group_node, // node in metaslab group tree
- * txg_node_t ms_txg_node, // per-txg dirty metaslab links */
-}
-
-impl Metaslab {
- pub fn new(ops: Rc<MetaslabOps>,
- id: u64,
- start: u64,
- size: u64,
- space_map: Option<SpaceMap>)
- -> Self {
- let seg_key_start = Rc::new(|seg: &Segment| seg.start);
- let seg_key_size = Rc::new(|seg: &Segment| seg.size);
-
- Metaslab {
- // lock: kmutex_t,
- // load_cv: kcondvar_t,
- space_map: space_map,
- ops: ops,
- id: id,
- start: start,
- size: size,
- fragmentation: 0,
-
- alloc_tree: (0..txg::TXG_SIZE).map(|x| avl::Tree::new(seg_key_start.clone())).collect(),
- free_tree: (0..txg::TXG_SIZE).map(|x| avl::Tree::new(seg_key_start.clone())).collect(),
- defer_tree: (0..txg::DEFER_SIZE)
- .map(|x| avl::Tree::new(seg_key_start.clone()))
- .collect(),
- tree: avl::Tree::new(seg_key_start),
-
- condensing: false,
- condense_wanted: false,
- loaded: false,
- loading: false,
-
- defer_space: 0,
- weight: 0,
- access_txg: 0,
-
- size_tree: avl::Tree::new(seg_key_size),
- lbas: [0; MAX_LBAS], /* group: *MetaslabGroup,
- * avl_node_t ms_group_node, // node in metaslab group tree
- * txg_node_t ms_txg_node, // per-txg dirty metaslab links */
- }
- }
-
- pub fn init(mos: &mut ObjectSet,
- vdev: &mut vdev::Vdev,
- id: u64,
- object: u64,
- txg: u64)
- -> zfs::Result<Self> {
- // We assume this is a top-level vdev
- let vdev_top = try!(vdev.top.as_mut().ok_or(zfs::Error::Invalid));
-
- // mutex_init(&ms.lock, NULL, MUTEX_DEFAULT, NULL);
- // cv_init(&ms->ms_load_cv, NULL, CV_DEFAULT, NULL);
- let start = id << vdev_top.ms_shift;
- let size = 1 << vdev_top.ms_shift;
-
- // We only open space map objects that already exist. All others
- // will be opened when we finally allocate an object for it.
- let space_map = if object != 0 {
- Some(try!(SpaceMap::open(mos,
- object,
- start,
- size,
- vdev.ashift as u8 /* , &ms.lock */)))
- } else {
- None
- };
-
- let mut metaslab = Self::new(vdev_top.ms_group.ms_class.ops.clone(),
- id,
- start,
- size,
- space_map);
-
- vdev_top.ms_group.add(id as usize, &metaslab);
-
- // metaslab.fragmentation = metaslab_fragmentation(metaslab);
-
- // If we're opening an existing pool (txg == 0) or creating
- // a new one (txg == TXG_INITIAL), all space is available now.
- // If we're adding space to an existing pool, the new space
- // does not become available until after this txg has synced.
- if txg <= txg::TXG_INITIAL as u64 {
- // metaslab_sync_done(metaslab, 0);
- }
-
- // If metaslab_debug_load is set and we're initializing a metaslab
- // that has an allocated space_map object then load the its space
- // map so that can verify frees.
- // if metaslab_debug_load && metaslab.space_map.is_some() {
- // try!(metaslab.load());
- // }
-
-
- // if txg != 0 {
- // vdev.dirty(0, NULL, txg);
- // vdev.dirty(vdev::DIRTY_METASLAB, ms, txg);
- // }
-
- Ok(metaslab)
- }
-
- pub fn load(&mut self) -> zfs::Result<()> {
- let mut result = Ok(());
- // assert!(MUTEX_HELD(&self.lock));
- assert!(!self.loaded);
- assert!(!self.loading);
-
- self.loading = true;
-
- // If the space map has not been allocated yet, then treat
- // all the space in the metaslab as free and add it to the
- // tree.
- if let Some(ref mut space_map) = self.space_map {
- // result = space_map.load(&mut self.tree, space_map::AllocType::Free);
- } else {
- self.tree.insert(Segment {
- start: self.start,
- size: self.size,
- });
- }
-
- self.loaded = result.is_ok();
- self.loading = false;
-
- if self.loaded {
- for t in 0..txg::DEFER_SIZE {
- // self.defer_tree[t].in_order(range_tree_remove, self.tree);
- }
- }
- // cv_broadcast(&self.load_cv);
- result
- }
-
- pub fn load_wait(&self) {
- while self.loading {
- assert!(!self.loaded);
- // cv_wait(&msp->ms_load_cv, &msp->ms_lock);
- }
- }
-
- fn activate(&mut self, activation_weight: u64) -> zfs::Result<()> {
- // TODO
- // assert!(MUTEX_HELD(&self.lock));
- //
- // if self.weight & METASLAB_ACTIVE_MASK == 0 {
- // self.load_wait();
- // if !self.loaded {
- // if let Err(e) = self.load() {
- // metaslab_group_sort(self.group, msp, 0);
- // return Err(e);
- // }
- // }
- //
- // metaslab_group_sort(self.group, self, self.weight | activation_weight);
- // }
- // assert!(self.loaded);
- // assert!(self.weight & METASLAB_ACTIVE_MASK);
-
-
- Ok(())
- }
-}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-
-pub struct MetaslabOps {
- pub alloc: fn(ms: &mut Metaslab, size: u64) -> u64,
-}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-
-// The first-fit block allocator
-pub fn ff_alloc(ms: &mut Metaslab, size: u64) -> u64 {
- // Find the largest power of 2 block size that evenly divides the
- // requested size. This is used to try to allocate blocks with similar
- // alignment from the same area of the metaslab (i.e. same cursor
- // bucket) but it does not guarantee that other allocations sizes
- // may exist in the same region.
- let align = size & -(size as i64) as u64;
- let ref mut cursor = ms.lbas[(util::highbit64(align) - 1) as usize];
- let ref mut tree = ms.tree;
-
- // return metaslab_block_picker(tree, cursor, size, align);
- return 0;
-}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-// This is a helper function that can be used by the allocator to find
-// a suitable block to allocate. This will search the specified AVL
-// tree looking for a block that matches the specified criteria.
-// fn metaslab_block_picker(tree: &mut avl::Tree, cursor: &mut u64, size: u64, align: u64) -> u64 {
-// range_seg_t *rs, rsearch;
-// avl_index_t where;
-//
-// rsearch.rs_start = *cursor;
-// rsearch.rs_end = *cursor + size;
-//
-// rs = tree.find(&rsearch, &where);
-// if rs == NULL {
-// rs = tree.nearest(where, AVL_AFTER);
-// }
-//
-// while rs != NULL {
-// let offset: u64 = util::p2roundup(rs->rs_start, align);
-//
-// if offset + size <= rs->rs_end {
-// cursor = offset + size;
-// return (offset);
-// }
-// rs = AVL_NEXT(t, rs);
-// }
-//
-// If we know we've searched the whole map (*cursor == 0), give up.
-// Otherwise, reset the cursor to the beginning and try again.
-// if *cursor == 0 {
-// return (-1ULL);
-// }
-//
-// cursor = 0;
-// return metaslab_block_picker(tree, cursor, size, align);
-// }
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-
-struct MetaslabAvlNode {
- index: usize,
- weight: u64,
- start: u64,
-}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-
-// Allow allocations to switch to gang blocks quickly. We do this to
-// avoid having to load lots of space_maps in a given txg. There are,
-// however, some cases where we want to avoid "fast" ganging and instead
-// we want to do an exhaustive search of all metaslabs on this device.
-// Currently we don't allow any gang, slog, or dump device related allocations
-// to "fast" gang.
-// fn can_fast_gang(flags) -> bool {
-// (flags) & (METASLAB_GANG_CHILD | METASLAB_GANG_HEADER | METASLAB_GANG_AVOID) == 0
-// }
-
-
-const METASLAB_WEIGHT_PRIMARY: u64 = 1 << 63;
-const METASLAB_WEIGHT_SECONDARY: u64 = 1 << 62;
-const METASLAB_ACTIVE_MASK: u64 = METASLAB_WEIGHT_PRIMARY | METASLAB_WEIGHT_SECONDARY;
-
-// Metaslab granularity, in bytes. This is roughly similar to what would be
-// referred to as the "stripe size" in traditional RAID arrays. In normal
-// operation, we will try to write this amount of data to a top-level vdev
-// before moving on to the next one.
-static metaslab_aliquot: usize = 512 << 10;
-
-// static metaslab_gang_bang: u64 = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */
-
-// The in-core space map representation is more compact than its on-disk form.
-// The zfs_condense_pct determines how much more compact the in-core
-// space_map representation must be before we compact it on-disk.
-// Values should be greater than or equal to 100.
-static zfs_condense_pct: isize = 200;
-
-// Condensing a metaslab is not guaranteed to actually reduce the amount of
-// space used on disk. In particular, a space map uses data in increments of
-// MAX(1 << ashift, space_map_blksz), so a metaslab might use the
-// same number of blocks after condensing. Since the goal of condensing is to
-// reduce the number of IOPs required to read the space map, we only want to
-// condense when we can be sure we will reduce the number of blocks used by the
-// space map. Unfortunately, we cannot precisely compute whether or not this is
-// the case in metaslab_should_condense since we are holding ms_lock. Instead,
-// we apply the following heuristic: do not condense a spacemap unless the
-// uncondensed size consumes greater than zfs_metaslab_condense_block_threshold
-// blocks.
-static zfs_metaslab_condense_block_threshold: isize = 4;
-
-// The zfs_mg_noalloc_threshold defines which metaslab groups should
-// be eligible for allocation. The value is defined as a percentage of
-// free space. Metaslab groups that have more free space than
-// zfs_mg_noalloc_threshold are always eligible for allocations. Once
-// a metaslab group's free space is less than or equal to the
-// zfs_mg_noalloc_threshold the allocator will avoid allocating to that
-// group unless all groups in the pool have reached zfs_mg_noalloc_threshold.
-// Once all groups in the pool reach zfs_mg_noalloc_threshold then all
-// groups are allowed to accept allocations. Gang blocks are always
-// eligible to allocate on any metaslab group. The default value of 0 means
-// no metaslab group will be excluded based on this criterion.
-static zfs_mg_noalloc_threshold: isize = 0;
-
-// Metaslab groups are considered eligible for allocations if their
-// fragmenation metric (measured as a percentage) is less than or equal to
-// zfs_mg_fragmentation_threshold. If a metaslab group exceeds this threshold
-// then it will be skipped unless all metaslab groups within the metaslab
-// class have also crossed this threshold.
-static zfs_mg_fragmentation_threshold: isize = 85;
-
-// Allow metaslabs to keep their active state as long as their fragmentation
-// percentage is less than or equal to zfs_metaslab_fragmentation_threshold. An
-// active metaslab that exceeds this threshold will no longer keep its active
-// status allowing better metaslabs to be selected.
-static zfs_metaslab_fragmentation_threshold: isize = 70;
-
-// When set will load all metaslabs when pool is first opened.
-static metaslab_debug_load: isize = 0;
-
-// When set will prevent metaslabs from being unloaded.
-static metaslab_debug_unload: isize = 0;
-
-// Minimum size which forces the dynamic allocator to change
-// it's allocation strategy. Once the space map cannot satisfy
-// an allocation of this size then it switches to using more
-// aggressive strategy (i.e search by size rather than offset).
-// static metaslab_df_alloc_threshold: u64 = SPA_MAXBLOCKSIZE;
-
-// The minimum free space, in percent, which must be available
-// in a space map to continue allocations in a first-fit fashion.
-// Once the space_map's free space drops below this level we dynamically
-// switch to using best-fit allocations.
-static metaslab_df_free_pct: isize = 4;
-
-// Percentage of all cpus that can be used by the metaslab taskq.
-static metaslab_load_pct: isize = 50;
-
-// Determines how many txgs a metaslab may remain loaded without having any
-// allocations from it. As long as a metaslab continues to be used we will
-// keep it loaded.
-static metaslab_unload_delay: usize = txg::TXG_SIZE * 2;
-
-// Max number of metaslabs per group to preload.
-// static metaslab_preload_limit: isize = SPA_DVAS_PER_BP;
-
-// Enable/disable preloading of metaslab.
-static metaslab_preload_enabled: bool = true;
-
-// Enable/disable fragmentation weighting on metaslabs.
-static metaslab_fragmentation_factor_enabled: bool = true;
-
-// Enable/disable lba weighting (i.e. outer tracks are given preference).
-static metaslab_lba_weighting_enabled: bool = true;
-
-// Enable/disable metaslab group biasing.
-static metaslab_bias_enabled: bool = true;
-
-// static uint64_t metaslab_fragmentation(metaslab_t *);
View
385 crates/zfs/nvpair.rs
@@ -1,385 +0,0 @@
-use std::fmt;
-
-// nvp implementation version
-pub const NV_VERSION: i32 = 0;
-
-// nvlist header
-// #[derive(Debug)]
-pub struct NvList {
- pub version: i32,
- pub nvflag: u32, // persistent flags
- pub pairs: Vec<(String, NvValue)>,
-}
-
-impl NvList {
- pub fn new(nvflag: u32) -> Self {
- NvList {
- version: NV_VERSION,
- nvflag: nvflag,
- pairs: Vec::new(),
- }
- }
-
- pub fn add(&mut self, name: String, value: NvValue) {
- self.pairs.push((name, value));
- }
-
- pub fn find(&self, name: &str) -> Option<&NvValue> {
- for pair in &self.pairs {
- if pair.0 == name {
- return Some(&pair.1);
- }
- }
- None
- }
-
- pub fn find_mut(&mut self, name: &str) -> Option<&mut NvValue> {
- for pair in &mut self.pairs {
- if pair.0 == name {
- return Some(&mut pair.1);
- }
- }
- None
- }
-
- pub fn get<'a, T: GetNvValue<'a>>(&'a self, name: &str) -> Option<T> {
- self.find(name).and_then(|x| GetNvValue::get(x))
- }
-}
-
-impl fmt::Debug for NvList {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- try!(write!(f,
- "NvList {{ version: {:X}, nvflag: {:X}, pairs: [\n",
- self.version,
- self.nvflag));
- for &(ref name, ref value) in &self.pairs {
- if name.is_empty() {
- break;
- }
- try!(write!(f, "{} : {:?}\n", name, value));
- }
- try!(write!(f, "] }}\n"));
- Ok(())
- }
-}
-
-// TODO Auto implement Debug. format! currently crashes with big u32 values
-// #[derive(Debug)]
-pub enum NvValue {
- Unknown,
- Boolean,
- Byte(u8),
- Int16(i16),
- Uint16(u16),
- Int32(i32),
- Uint32(u32),
- Int64(i64),
- Uint64(u64),
- String(String),
- ByteArray(Vec<u8>),
- Int16Array(Vec<i16>),
- Uint16Array(Vec<u16>),
- Int32Array(Vec<i32>),
- Uint32Array(Vec<u32>),
- Int64Array(Vec<i64>),
- Uint64Array(Vec<u64>),
- StringArray(Vec<String>),
- HrTime(i64),
- NvList(NvList),
- NvListArray(Vec<NvList>),
- BooleanValue(bool),
- Int8(i8),
- Uint8(u8),
- BooleanArray(Vec<bool>),
- Int8Array(Vec<i8>),
- Uint8Array(Vec<u8>),
-}
-
-impl NvValue {
- pub fn data_type(&self) -> DataType {
- match *self {
- NvValue::Unknown => DataType::Unknown,
- NvValue::Boolean => DataType::Boolean,
- NvValue::Byte(_) => DataType::Byte,
- NvValue::Int16(_) => DataType::Int16,
- NvValue::Uint16(_) => DataType::Uint16,
- NvValue::Int32(_) => DataType::Int32,
- NvValue::Uint32(_) => DataType::Uint32,
- NvValue::Int64(_) => DataType::Int64,
- NvValue::Uint64(_) => DataType::Uint64,
- NvValue::String(_) => DataType::String,
- NvValue::ByteArray(_) => DataType::ByteArray,
- NvValue::Int16Array(_) => DataType::Int16Array,
- NvValue::Uint16Array(_) => DataType::Uint16Array,
- NvValue::Int32Array(_) => DataType::Int32Array,
- NvValue::Uint32Array(_) => DataType::Uint32Array,
- NvValue::Int64Array(_) => DataType::Int64Array,
- NvValue::Uint64Array(_) => DataType::Uint64Array,
- NvValue::StringArray(_) => DataType::StringArray,
- NvValue::HrTime(_) => DataType::HrTime,
- NvValue::NvList(_) => DataType::NvList,
- NvValue::NvListArray(_) => DataType::NvListArray,
- NvValue::BooleanValue(_) => DataType::BooleanValue,
- NvValue::Int8(_) => DataType::Int8,
- NvValue::Uint8(_) => DataType::Uint8,
- NvValue::BooleanArray(_) => DataType::BooleanArray,
- NvValue::Int8Array(_) => DataType::Int8Array,
- NvValue::Uint8Array(_) => DataType::Uint8Array,
- }
- }
-
- pub fn num_elements(&self) -> usize {
- match *self {
- NvValue::Unknown => 1,
- NvValue::Boolean => 1,
- NvValue::Byte(_) => 1,
- NvValue::Int16(_) => 1,
- NvValue::Uint16(_) => 1,
- NvValue::Int32(_) => 1,
- NvValue::Uint32(_) => 1,
- NvValue::Int64(_) => 1,
- NvValue::Uint64(_) => 1,
- NvValue::String(_) => 1,
- NvValue::ByteArray(ref a) => a.len(),
- NvValue::Int16Array(ref a) => a.len(),
- NvValue::Uint16Array(ref a) => a.len(),
- NvValue::Int32Array(ref a) => a.len(),
- NvValue::Uint32Array(ref a) => a.len(),
- NvValue::Int64Array(ref a) => a.len(),
- NvValue::Uint64Array(ref a) => a.len(),
- NvValue::StringArray(ref a) => a.len(),
- NvValue::HrTime(_) => 1,
- NvValue::NvList(_) => 1,
- NvValue::NvListArray(ref a) => a.len(),
- NvValue::BooleanValue(_) => 1,
- NvValue::Int8(_) => 1,
- NvValue::Uint8(_) => 1,
- NvValue::BooleanArray(ref a) => a.len(),
- NvValue::Int8Array(ref a) => a.len(),
- NvValue::Uint8Array(ref a) => a.len(),
- }
- }
-}
-
-impl fmt::Debug for NvValue {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- match *self {
- NvValue::Int64(v) => write!(f, "Int64(0x{:X})", v),
- NvValue::Uint64(v) => write!(f, "Uint64(0x{:X})", v),
- NvValue::NvList(ref v) => write!(f, "NvList({:?})", v),
- NvValue::NvListArray(ref v) => {
- try!(write!(f, "NvListArray(["));
- for nv_list in v {
- try!(write!(f, "NvList({:?})", nv_list));
- }
- write!(f, "])")
- }
- NvValue::String(ref v) => write!(f, "String({})", v),
- _ => write!(f, "{:?}", self),
- }
- }
-}
-
-#[derive(Copy, Clone, Debug)]
-pub enum DataType {
- Unknown = 0,
- Boolean,
- Byte,
- Int16,
- Uint16,
- Int32,
- Uint32,
- Int64,
- Uint64,
- String,
- ByteArray,
- Int16Array,
- Uint16Array,
- Int32Array,
- Uint32Array,
- Int64Array,
- Uint64Array,
- StringArray,
- HrTime,
- NvList,
- NvListArray,
- BooleanValue,
- Int8,
- Uint8,
- BooleanArray,
- Int8Array,
- Uint8Array,
-}
-
-impl DataType {
- pub fn from_u8(u: u8) -> Option<DataType> {
- match u {
- 0 => Some(DataType::Unknown),
- 1 => Some(DataType::Boolean),
- 2 => Some(DataType::Byte),
- 3 => Some(DataType::Int16),
- 4 => Some(DataType::Uint16),
- 5 => Some(DataType::Int32),
- 6 => Some(DataType::Uint32),
- 7 => Some(DataType::Int64),
- 8 => Some(DataType::Uint64),
- 9 => Some(DataType::String),
- 10 => Some(DataType::ByteArray),
- 11 => Some(DataType::Int16Array),
- 12 => Some(DataType::Uint16Array),
- 13 => Some(DataType::Int32Array),
- 14 => Some(DataType::Uint32Array),
- 15 => Some(DataType::Int64Array),
- 16 => Some(DataType::Uint64Array),
- 17 => Some(DataType::StringArray),
- 18 => Some(DataType::HrTime),
- 19 => Some(DataType::NvList),
- 20 => Some(DataType::NvListArray),
- 21 => Some(DataType::BooleanValue),
- 22 => Some(DataType::Int8),
- 23 => Some(DataType::Uint8),
- 24 => Some(DataType::BooleanArray),
- 25 => Some(DataType::Int8Array),
- 26 => Some(DataType::Uint8Array),
- _ => None,
- }
- }
-
- pub fn to_u8(self) -> u8 {
- match self {
- DataType::Unknown => 0,
- DataType::Boolean => 1,
- DataType::Byte => 2,
- DataType::Int16 => 3,
- DataType::Uint16 => 4,
- DataType::Int32 => 5,
- DataType::Uint32 => 6,
- DataType::Int64 => 7,
- DataType::Uint64 => 8,
- DataType::String => 9,
- DataType::ByteArray => 10,
- DataType::Int16Array => 11,
- DataType::Uint16Array => 12,
- DataType::Int32Array => 13,
- DataType::Uint32Array => 14,
- DataType::Int64Array => 15,
- DataType::Uint64Array => 16,
- DataType::StringArray => 17,
- DataType::HrTime => 18,
- DataType::NvList => 19,
- DataType::NvListArray => 20,
- DataType::BooleanValue => 21,
- DataType::Int8 => 22,
- DataType::Uint8 => 23,
- DataType::BooleanArray => 24,
- DataType::Int8Array => 25,
- DataType::Uint8Array => 26,
- }
- }
-}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-
-pub trait GetNvValue<'a>: Sized {
- fn get(value: &'a NvValue) -> Option<Self>;
-}
-
-impl<'a> GetNvValue<'a> for bool {
- fn get(value: &'a NvValue) -> Option<Self> {
- match *value {
- NvValue::BooleanValue(v) => Some(v),
- _ => None,
- }
- }
-}
-
-impl<'a> GetNvValue<'a> for u8 {
- fn get(value: &'a NvValue) -> Option<Self> {
- match *value {
- NvValue::Byte(v) => Some(v),
- _ => None,
- }
- }
-}
-
-impl<'a> GetNvValue<'a> for u16 {
- fn get(value: &'a NvValue) -> Option<Self> {
- match *value {
- NvValue::Uint16(v) => Some(v),
- _ => None,
- }
- }
-}
-
-impl<'a> GetNvValue<'a> for u32 {
- fn get(value: &'a NvValue) -> Option<Self> {
- match *value {
- NvValue::Uint32(v) => Some(v),
- _ => None,
- }
- }
-}
-
-impl<'a> GetNvValue<'a> for u64 {
- fn get(value: &'a NvValue) -> Option<Self> {
- match *value {
- NvValue::Uint64(v) => Some(v),
- _ => None,
- }
- }
-}
-
-impl<'a> GetNvValue<'a> for i16 {
- fn get(value: &'a NvValue) -> Option<Self> {
- match *value {
- NvValue::Int16(v) => Some(v),
- _ => None,
- }
- }
-}
-
-impl<'a> GetNvValue<'a> for i32 {
- fn get(value: &'a NvValue) -> Option<Self> {
- match *value {
- NvValue::Int32(v) => Some(v),
- _ => None,
- }
- }
-}
-
-impl<'a> GetNvValue<'a> for i64 {
- fn get(value: &'a NvValue) -> Option<Self> {
- match *value {
- NvValue::Int64(v) => Some(v),
- _ => None,
- }
- }
-}
-
-impl<'a> GetNvValue<'a> for &'a String {
- fn get(value: &'a NvValue) -> Option<Self> {
- match *value {
- NvValue::String(ref v) => Some(v),
- _ => None,
- }
- }
-}
-
-impl<'a> GetNvValue<'a> for &'a NvList {
- fn get(value: &'a NvValue) -> Option<Self> {
- match *value {
- NvValue::NvList(ref v) => Some(v),
- _ => None,
- }
- }
-}
-
-impl<'a> GetNvValue<'a> for &'a Vec<NvList> {
- fn get(value: &'a NvValue) -> Option<Self> {
- match *value {
- NvValue::NvListArray(ref v) => Some(v),
- _ => None,
- }
- }
-}
View
266 crates/zfs/nvstream.rs
@@ -1,266 +0,0 @@
-use std::mem;
-
-use super::nvpair::{DataType, NV_VERSION, NvList, NvValue};
-use super::xdr;
-
-// nvlist pack encoding
-const NV_ENCODE_NATIVE: u8 = 0;
-const NV_ENCODE_XDR: u8 = 1;
-
-// nvlist pack endian
-const NV_BIG_ENDIAN: u8 = 0;
-const NV_LITTLE_ENDIAN: u8 = 1;
-
-// nvlist persistent unique name flags, stored in nvl_nvflags
-const NV_UNIQUE_NAME: u32 = 0x1;
-const NV_UNIQUE_NAME_TYPE: u32 = 0x2;
-
-// nvlist lookup pairs related flags
-const NV_FLAG_NOENTOK: isize = 0x1;
-
-// NvList XDR format:
-// - header (encoding and endian): 4 bytes
-// - nvl version: 4 bytes
-// - nv flags: 4 bytes
-// - nv pairs:
-// - encoded size: 4 bytes
-// - decoded size: 4 bytes
-// - name: xdr string | len: 4 bytes, data: len+(4 - len%4) bytes
-// - data type: 4 bytes
-// - num elements: 4 bytes
-// - data
-// - 2 terminating zeros: 4 bytes
-//
-// NOTE: XDR aligns all of the smaller integer types to be 4 bytes, so `encode_u8` is actually
-// writing 4 bytes
-//
-// I don't know why the ZFS developers decided to use i32's everywhere. Even for clearly
-// unsigned things like array lengths.
-
-/// Name value stream header
-#[derive(Debug)]
-pub struct NvsHeader {
- encoding: u8, // nvs encoding method
- endian: u8, // nvs endian
- reserved1: u8, // reserved for future use
- reserved2: u8, // reserved for future use
-}
-
-/// Encodes a NvList in XDR format
-pub fn encode_nv_list(xdr: &mut xdr::Xdr, nv_list: &NvList) -> xdr::XdrResult<()> {
- try!(encode_nv_list_header(xdr));
-
- // Encode version and nvflag
- try!(xdr.encode_i32(nv_list.version));
- try!(xdr.encode_u32(nv_list.nvflag));
-
- // Encode the pairs
- for &(ref name, ref value) in &nv_list.pairs {
- // Encode name
- // let encoded_size = 0;
- // let decoded_size = 0;
- try!(xdr.encode_string(name));
-
- // TODO
-
- // Encode data type
- try!(xdr.encode_u8(value.data_type().to_u8()));
-
- // Encode the number of elements
- try!(xdr.encode_i32(value.num_elements() as i32));
-
- // Encode the value
- }
-
- // Encode 2 terminating zeros
- try!(xdr.encode_i32(0));
- try!(xdr.encode_i32(0));
- Ok(())
-}
-
-fn encode_nv_list_header(xdr: &mut xdr::Xdr) -> xdr::XdrResult<()> {
- let header = NvsHeader {
- encoding: NV_ENCODE_XDR,
- endian: NV_LITTLE_ENDIAN,
- reserved1: 0,
- reserved2: 0,
- };
- let header_bytes: [u8; 4] = unsafe { mem::transmute(header) };
- try!(xdr.encode_opaque(&header_bytes));
- Ok(())
-}
-
-/// Decodes a NvList in XDR format
-pub fn decode_nv_list(xdr: &mut xdr::Xdr) -> xdr::XdrResult<NvList> {
- try!(decode_nv_list_header(xdr));
-
- decode_nv_list_embedded(xdr)
-}
-
-pub fn decode_nv_list_embedded(xdr: &mut xdr::Xdr) -> xdr::XdrResult<NvList> {
- // Decode version and nvflag
- let version = try!(xdr.decode_i32());
- let nvflag = try!(xdr.decode_u32());
-
- // TODO: Give an actual error
- if version != NV_VERSION {
- return Err(xdr::XdrError);
- }
-
- let mut nv_list = NvList::new(nvflag);
-
- // Decode the pairs
- loop {
- // Decode decoded/decoded size
- let encoded_size = try!(xdr.decode_u32());
- let decoded_size = try!(xdr.decode_u32());
-
- // Check for 2 terminating zeros
- if encoded_size == 0 && decoded_size == 0 {
- break;
- }
-
- // Decode name
- let name = try!(xdr.decode_string());
-
- // Decode data type
- let data_type = match DataType::from_u8(try!(xdr.decode_u8())) {
- Some(dt) => dt,
- None => {
- return Err(xdr::XdrError);
- }
- };
-
- // Decode the number of elements
- let num_elements = try!(xdr.decode_i32()) as usize;
-
- // Decode the value
- let value = try!(decode_nv_value(xdr, data_type, num_elements));
-
- // Add the value to the list
- nv_list.pairs.push((name, value));
- }
-
- Ok(nv_list)
-}
-
-fn decode_nv_list_header(xdr: &mut xdr::Xdr) -> xdr::XdrResult<()> {
- let mut bytes: [u8; 4] = [0; 4];
- try!(xdr.decode_opaque(&mut bytes));
- let header: NvsHeader = unsafe { mem::transmute(bytes) };
-
- if header.encoding != NV_ENCODE_XDR {
- return Err(xdr::XdrError);
- }
- Ok(())
-}
-
-fn decode_nv_value(xdr: &mut xdr::Xdr,
- data_type: DataType,
- num_elements: usize)
- -> xdr::XdrResult<NvValue> {
- match data_type {
- DataType::Unknown => Ok(NvValue::Unknown),
- DataType::Boolean => Ok(NvValue::Boolean),
- DataType::Byte => Ok(NvValue::Byte(try!(xdr.decode_u8()))),
- DataType::Int16 => Ok(NvValue::Int16(try!(xdr.decode_i16()))),
- DataType::Uint16 => Ok(NvValue::Uint16(try!(xdr.decode_u16()))),
- DataType::Int32 => Ok(NvValue::Int32(try!(xdr.decode_i32()))),
- DataType::Uint32 => Ok(NvValue::Uint32(try!(xdr.decode_u32()))),
- DataType::Int64 => Ok(NvValue::Int64(try!(xdr.decode_i64()))),
- DataType::Uint64 => Ok(NvValue::Uint64(try!(xdr.decode_u64()))),
- DataType::String => Ok(NvValue::String(try!(xdr.decode_string()))),
- DataType::ByteArray => {
- let mut v = vec![0; num_elements];
- for v in &mut v {
- *v = try!(xdr.decode_u8());
- }
- Ok(NvValue::ByteArray(v))
- }
- DataType::Int16Array => {
- let mut v = vec![0; num_elements];
- for v in &mut v {
- *v = try!(xdr.decode_i16());
- }
- Ok(NvValue::Int16Array(v))
- }
- DataType::Uint16Array => {
- let mut v = vec![0; num_elements];
- for v in &mut v {
- *v = try!(xdr.decode_u16());
- }
- Ok(NvValue::Uint16Array(v))
- }
- DataType::Int32Array => {
- let mut v = vec![0; num_elements];
- for v in &mut v {
- *v = try!(xdr.decode_i32());
- }
- Ok(NvValue::Int32Array(v))
- }
- DataType::Uint32Array => {
- let mut v = vec![0; num_elements];
- for v in &mut v {
- *v = try!(xdr.decode_u32());
- }
- Ok(NvValue::Uint32Array(v))
- }
- DataType::Int64Array => {
- let mut v = vec![0; num_elements];
- for v in &mut v {
- *v = try!(xdr.decode_i64());
- }
- Ok(NvValue::Int64Array(v))
- }
- DataType::Uint64Array => {
- let mut v = vec![0; num_elements];
- for v in &mut v {
- *v = try!(xdr.decode_u64());
- }
- Ok(NvValue::Uint64Array(v))
- }
- DataType::StringArray => {
- let mut v = vec![0; num_elements];
- for v in &mut v {
- *v = try!(xdr.decode_u64());
- }
- Ok(NvValue::Uint64Array(v))
- }
- DataType::HrTime => Ok(NvValue::HrTime(try!(xdr.decode_i64()))),
- DataType::NvList => {
- let nv_list = try!(decode_nv_list_embedded(xdr));
- Ok(NvValue::NvList(nv_list))
- }
- DataType::NvListArray => {
- let mut v = Vec::with_capacity(num_elements);
- for _ in 0..num_elements {
- v.push(try!(decode_nv_list_embedded(xdr)));
- }
- Ok(NvValue::NvListArray(v))
- }
- DataType::BooleanValue => Ok(NvValue::BooleanValue(try!(xdr.decode_bool()))),
- DataType::Int8 => Ok(NvValue::Int8(try!(xdr.decode_i8()))),
- DataType::Uint8 => Ok(NvValue::Uint8(try!(xdr.decode_u8()))),
- DataType::BooleanArray => {
- let mut v = vec![false; num_elements];
- for v in &mut v {
- *v = try!(xdr.decode_bool());
- }
- Ok(NvValue::BooleanArray(v))
- }
- DataType::Int8Array => {
- let mut v = vec![0; num_elements];
- for v in &mut v {
- *v = try!(xdr.decode_i8());
- }
- Ok(NvValue::Int8Array(v))
- }
- DataType::Uint8Array => {
- let mut v = vec![0; num_elements];
- for v in &mut v {
- *v = try!(xdr.decode_u8());
- }
- Ok(NvValue::Uint8Array(v))
- }
- }
-}
View
319 crates/zfs/spa.rs
@@ -1,319 +0,0 @@
-use std::cmp;
-use std::rc::Rc;
-
-use super::avl;
-use super::dmu_objset::ObjectSet;
-use super::dsl_pool;
-use super::metaslab::{self, MetaslabClass};
-use super::nvpair::{NvList, NvValue};
-use super::taskq::Taskq;
-use super::txg;
-use super::uberblock::Uberblock;
-use super::vdev;
-use super::zfs;
-use super::zio;
-
-pub enum ImportType {
- Existing,
- Assemble,
-}
-
-// Storage pool allocator
-pub struct Spa {
- name: String, // Pool name
- config: NvList,
- state: zfs::PoolState,
- load_state: zfs::SpaLoadState,
- zio_taskq: Vec<Vec<SpaTaskqs>>,
- // dsl_pool: DslPool,
- normal_class: Rc<MetaslabClass>, // normal data class
- log_class: Rc<MetaslabClass>, // intent log data class
- first_txg: u64,
- mos: ObjectSet,
- vdev_tree: vdev::Tree,
- root_vdev: vdev::TreeIndex,
- // ubsync: Uberblock, // Last synced uberblock
- // uberblock: Uberblock, // Current active uberblock
- did: u64, // if procp != p0, did of t1
-}
-
-impl Spa {
- pub fn create(name: String, nvroot: &NvList) -> zfs::Result<Self> {
- let mut config = NvList::new(0);
- config.add("name".to_string(), NvValue::String(name.clone()));
- Self::new(name, config, vdev::AllocType::Add)
- }
-
- pub fn import(name: String, config: NvList) -> zfs::Result<Self> {
- let load_state = zfs::SpaLoadState::Import;
-
- // note that mos_config is true - we trust the user's config in this case
- let mut spa = try!(Self::load(name, config, load_state, ImportType::Existing, true));
-
- spa.activate();
-
- Ok(spa)
- }
-
- // pub fn open(&mut self) -> zfs::Result<()> {
- // let load_state = zfs::SpaLoadState::Open;
- // if self.state == zfs::PoolState::Uninitialized {
- // First time opening
- // self.activate();
- // try!(self.load(load_state, ImportType::Existing, false));
- // }
- //
- // Ok(())
- // }
-
- fn new(name: String, config: NvList, vdev_alloc_type: vdev::AllocType) -> zfs::Result<Self> {
- let metaslab_ops = Rc::new(metaslab::MetaslabOps { alloc: metaslab::ff_alloc });
- let normal_class = Rc::new(MetaslabClass::create(metaslab_ops.clone()));
- let log_class = Rc::new(MetaslabClass::create(metaslab_ops));
-
- // Parse vdev tree
- let mut vdev_tree = vdev::Tree::new();
- let root_vdev = {
- let nvroot: &NvList = try!(config.get("vdev_tree").ok_or(zfs::Error::Invalid));
- try!(vdev_tree.parse(&normal_class, nvroot, None, vdev_alloc_type))
- };
-
- Ok(Spa {
- name: name,
- config: config,
- state: zfs::PoolState::Uninitialized,
- load_state: zfs::SpaLoadState::None,
- zio_taskq: Vec::new(),
- // dsl_pool: blah,
- normal_class: normal_class,
- log_class: log_class,
- first_txg: 0,
- mos: ObjectSet,
- vdev_tree: vdev_tree,
- root_vdev: root_vdev,
- did: 0,
- })
- }
-
- fn load(name: String,
- config: NvList,
- load_state: zfs::SpaLoadState,
- import_type: ImportType,
- mos_config: bool)
- -> zfs::Result<Self> {
- let pool_guid = try!(config.get("pool_guid").ok_or(zfs::Error::Invalid));
-
- let mut spa = try!(Self::load_impl(name,
- pool_guid,
- config,
- load_state,
- import_type,
- mos_config));
- spa.load_state = zfs::SpaLoadState::None;
-
- Ok(spa)
- }
-
- /// mosconfig: Whether `config` came from on-disk MOS and so is trusted, or was user-made and so
- /// is untrusted.
- fn load_impl(name: String,
- pool_guid: u64,
- config: NvList,
- load_state: zfs::SpaLoadState,
- import_type: ImportType,
- mos_config: bool)
- -> zfs::Result<Self> {
- // Determine the vdev allocation type from import type
- let vdev_alloc_type = match import_type {
- ImportType::Existing => vdev::AllocType::Load,
- ImportType::Assemble => vdev::AllocType::Split,
- };
-
- let mut spa = try!(Self::new(name, config, vdev_alloc_type));
- spa.load_state = load_state;
-
- // Create "The Godfather" zio to hold all async IOs
- // spa.spa_async_zio_root = kmem_alloc(max_ncpus * sizeof (void *), KM_SLEEP);
- // for i in 0..max_ncpus {
- // spa.async_zio_root[i] =
- // Zio::root(spa, None, None, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER);
- // }
-
-
- // TODO: Try to open all vdevs, loading each label in the process.
-
- // TODO
- // Find the best uberblock.
- // vdev_uberblock_load(rvd, ub, &label);
-
- // If we weren't able to find a single valid uberblock, return failure.
- // if ub.txg == 0 {
- // return spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO);
- // }
-
-
- // Initialize internal structures
- spa.state = zfs::PoolState::Active;
- // spa.ubsync = spa.uberblock;
- // spa.verify_min_txg =
- // if spa.extreme_rewind {
- // txg::TXG_INITIAL - 1
- // } else {
- // spa.last_synced_txg() - txg::DEFER_SIZE - 1;
- // };
- // spa.first_txg =
- // if spa.last_ubsync_txg { spa.last_ubsync_txg } else { spa.last_synced_txg() + 1 };
- // spa.claim_max_txg = spa.first_txg;
- // spa.prev_software_version = ub.software_version;
-
- // spa.dsl_pool = try!(dsl_pool::DslPool::init(&mut spa, spa.first_txg));
- // if error { return spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO); }
- // spa.meta_objset = spa.dsl_pool.meta_objset;
-
- // Load stuff for the top-level and leaf vdevs
- spa.vdev_tree.load(&mut spa.mos, spa.root_vdev);
-
- Ok(spa)
- }
-
- fn activate(&mut self) {
- // assert!(self.state == zfs::PoolState::Uninitialized);
-
- self.state = zfs::PoolState::Active;
-
- // TODO: maybe start the spa thread
-
- self.create_zio_taskqs();
-
- self.did = 0;
- }
-
- // fn taskqs_init(&mut self, t: zio::Type, q: zio::TaskqType) {
- // const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
- // zti_modes mode = ztip.mode;
- // let value = ztip.value;
- // let count = ztip.count;
- // let ref tqs = self.zio_taskq[t][q];
- // let flags = TASKQ_DYNAMIC;
- // let mut batch: bool = false;
- //
- // if mode == ZTI_MODE_NULL {
- // tqs.count = 0;
- // tqs.taskq = NULL;
- // return;
- // }
- //
- // assert!(count > 0);
- //
- // tqs.count = count;
- // tqs.taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP);
- //
- // match mode {
- // ZTI_MODE_FIXED => {
- // assert!(value >= 1);
- // value = cmp::max(value, 1);
- // },
- // ZTI_MODE_BATCH => {
- // batch = true;
- // flags |= TASKQ_THREADS_CPU_PCT;
- // value = zio_taskq_batch_pct;
- // },
- // _ => {
- // panic!("unrecognized mode for %s_%s taskq (%u:%u) in spa_activate()",
- // zio_type_name[t], zio_taskq_types[q], mode, value);
- // },
- // }
- //
- // for i in 0..count {
- // taskq_t *tq;
- // char name[32];
- //
- // if (count > 1) {
- // snprintf(name, sizeof (name), "%s_%s_%u",
- // zio_type_name[t], zio_taskq_types[q], i);
- // } else {
- // snprintf(name, sizeof (name), "%s_%s",
- // zio_type_name[t], zio_taskq_types[q]);
- // }
- //
- // if zio_taskq_sysdc && spa->spa_proc != &p0 {
- // if batch {
- // flags |= TASKQ_DC_BATCH;
- // }
- //
- // tq = taskq_create_sysdc(name, value, 50, INT_MAX,
- // spa->spa_proc, zio_taskq_basedc, flags);
- // } else {
- // pri_t pri = maxclsyspri;
- // The write issue taskq can be extremely CPU
- // intensive. Run it at slightly less important
- // priority than the other taskqs. Under Linux this
- // means incrementing the priority value on platforms
- // like illumos it should be decremented.
- // if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE)
- // pri += 1;
- //
- // tq = taskq_create_proc(name, value, pri, 50,
- // INT_MAX, spa->spa_proc, flags);
- // }
- //
- // tqs->taskq[i] = tq;
- // }
- // }
-
- fn create_zio_taskqs(&mut self) {
- for t in 0..zio::NUM_TYPES {
- for q in 0..zio::NUM_TASKQ_TYPES {
- // self.taskqs_init(t, q);
- }
- }
- }
-
- fn last_synced_txg(&self) -> u64 {
- // TODO
- // self.ubsync.ub_txg
- 0
- }
-
- fn first_txg(&self) -> u64 {
- self.first_txg
- }
-}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-
-struct ZioTaskqInfo {
- // mode: zti_modes_t,
- value: usize,
- count: usize,
-}
-
-struct SpaTaskqs {
- count: usize,
- taskq: Vec<Vec<Taskq>>,
-}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-
-pub struct SpaNamespace {
- // TODO: Use &str instead of String as key type. Lifetimes are hard.
- avl: avl::Tree<Spa, String>, // AVL tree of Spa sorted by name
-}
-
-impl SpaNamespace {
- pub fn new() -> Self {
- SpaNamespace { avl: avl::Tree::new(Rc::new(|x| x.name.clone())) }
- }
-
- pub fn add(&mut self, spa: Spa) {
- self.avl.insert(spa);
- }
-
- pub fn find(&self, name: String) -> Option<&Spa> {
- self.avl.find(name)
- }
-
- pub fn find_mut(&mut self, name: String) -> Option<&mut Spa> {
- self.avl.find_mut(name)
- }
-}
View
207 crates/zfs/space_map.rs
@@ -1,207 +0,0 @@
-use std::{fmt, mem};
-
-use super::avl;
-use super::dmu_objset::ObjectSet;
-use super::from_bytes::FromBytes;
-use super::zfs;
-
-const SPACE_MAP_HISTOGRAM_SIZE: usize = 32;
-
-/// The `SpaceMapPhys` is the on-disk representation of the space map.
-/// Consumers of space maps should never reference any of the members of this
-/// structure directly. These members may only be updated in syncing context.
-///
-/// Note the smp_object is no longer used but remains in the structure
-/// for backward compatibility.
-///
-/// The smp_histogram maintains a histogram of free regions. Each
-/// bucket, smp_histogram[i], contains the number of free regions
-/// whose size is:
-/// 2^(i+sm_shift) <= size of free region in bytes < 2^(i+sm_shift+1)
-#[derive(Debug)]
-pub struct SpaceMapPhys {
- object: u64, // on-disk space map object
- objsize: u64, // size of the object
- alloc: u64, /* space allocated from the map
- * pad: [u64; 5], // reserved
- * histogram: [u64; SPACE_MAP_HISTOGRAM_SIZE], */
-}
-
-impl FromBytes for SpaceMapPhys {}
-
-pub struct SpaceMap {
- start: u64, // start of map
- size: u64, // size of map
- shift: u8, // unit shift
- length: u64, // synced length
- alloc: u64, // synced space allocated
- // os: *ObjectSet, // objset for this map
- object: u64, // object id for this map
- blksz: u32, // block size for space map
- // dbuf: *dmu_dbuf_t, // space_map_phys_t dbuf
- phys: SpaceMapPhys, // on-disk space map
-}
-
-impl SpaceMap {
- /// Returns SpaceMapPhys, Dbuf, and block size
- // TODO
- // fn open_impl(os: &mut ObjectSet, object: u64) -> zfs::Result<(SpaceMapPhys, dmu::Dbuf, u64)> {
- // let dbuf = try!(dmu_bonus_hold(os, object, sm));
- //
- // let (block_size, num_blocks) = dmu_object_size_from_db(dbuf);
- // let phys = SpaceMapPhys::from_bytes(dbuf.data);
- //
- // Ok((phys, dbuf, block_size))
- // }
-
-
- pub fn open(os: &mut ObjectSet,
- object: u64,
- start: u64,
- size: u64,
- shift: u8)
- -> zfs::Result<Self> {
- assert!(object != 0);
-
- // TODO
- // let (phys, dbuf, block_size) = try!(Self::open_impl(os, object));
- let phys = SpaceMapPhys {
- object: 0, // on-disk space map object
- objsize: 0, // size of the object
- alloc: 0, // space allocated from the map
- };
- let block_size = 0;
-
- let mut space_map = SpaceMap {
- start: start,
- size: size,
- shift: shift,
- // os: os,
- object: object,
- length: 0,
- alloc: 0,
- blksz: block_size,
- // dbuf: dbuf,
- phys: phys,
- };
-
- Ok(space_map)
- }
-
- pub fn load_avl(&self,
- tree: &mut avl::Tree<Segment, u64>,
- bytes: &[u8],
- map_type: MapType)
- -> Result<(), String> {
- for i in 0..(self.size as usize) {
- let entry = Entry::from_bytes(&bytes[i * mem::size_of::<Entry>()..]).unwrap();
- let entry_map_type = match entry.map_type() {
- Some(map_type) => map_type,
- None => {
- return Err("Invalid map type".to_string());
- }
- };
- if entry.debug() != 1 && entry_map_type == map_type {
- // it's not a debug entry and it's the right map type, add it to the tree
- tree.insert(Segment::from_entry(&entry));
- }
- }
- tree.in_order(|node| {
- println!("{:?}", node.value());
- });
-
- Ok(())
- }
-}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-#[derive(Copy, Clone, Debug, PartialEq)]
-pub enum MapType {
- Alloc = 0,
- Free = 1,
-}
-
-impl MapType {
- pub fn from_u64(u: u64) -> Option<Self> {
- match u {
- 0 => Some(MapType::Alloc),
- 1 => Some(MapType::Free),
- _ => None,
- }
- }
-}
-
-#[derive(Copy, Clone)]
-pub struct Entry(u64);
-
-impl FromBytes for Entry {}
-
-impl Entry {
- pub fn debug(&self) -> u64 {
- (self.0 >> 63) & 0x1 // 1 bit long
- }
-
- // Non-debug entries
-
- pub fn size(&self) -> u64 {
- self.0 & 0x7FFF // 15 bits long
- }
-
- pub fn map_type(&self) -> Option<MapType> {
- MapType::from_u64((self.0 >> 15) & 0x1) // 1 bit long
- }
-
- pub fn offset(&self) -> u64 {
- (self.0 >> 16) & 0x7FFFFFFFFFFF // 47 bytes long
- }
-
- // Debug entries
-
- pub fn action(&self) -> u64 {
- (self.0 >> 60) & 0x7 // 3 bits long
- }
-
- pub fn sync_pass(&self) -> u64 {
- (self.0 >> 50) & 0x3FF // 10 bits long
- }
-
- pub fn txg(&self) -> u64 {
- self.0 & 0x3FFFFFFFFFFFF // 50 bytes long
- }
-}
-
-impl fmt::Debug for Entry {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- if self.debug() == 1 {
- try!(write!(f,
- "DEBUG: action:0x{:X} sync_pass:{:X} txg:0x{:X}",
- self.action(),
- self.sync_pass(),
- self.txg()));
- } else {
- try!(write!(f,
- "ENTRY: size:0x{:X} map_type:{:?} offset:0x{:X}",
- self.size(),
- self.map_type(),
- self.offset()));
- }
- Ok(())
- }
-}
-
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-#[derive(Debug)]
-pub struct Segment {
- pub start: u64,
- pub size: u64,
-}
-
-impl Segment {
- fn from_entry(entry: &Entry) -> Self {
- Segment {
- start: entry.offset(),
- size: entry.size(),
- }
- }
-}
View
371 crates/zfs/taskq.rs
@@ -1,371 +0,0 @@
-use std::cmp;
-// use std::collections::VecDeque;
-// use std::sync::mpsc::{channel, Sender, Receiver};
-use std::thread;
-
-use super::zfs;
-
-const TQENT_FLAG_PREALLOC: u64 = 0x1; // taskq_dispatch_ent used
-
-const TASKQ_PREPOPULATE: u64 = 0x0001;
-const TASKQ_CPR_SAFE: u64 = 0x0002; // Use CPR safe protocol
-const TASKQ_DYNAMIC: u64 = 0x0004; // Use dynamic thread scheduling
-const TASKQ_THREADS_CPU_PCT: u64 = 0x0008; // Scale # threads by # cpus
-const TASKQ_DC_BATCH: u64 = 0x0010; // Mark threads as batch
-
-// const TQ_SLEEP: u64 = KM_SLEEP; // Can block for memory
-// const TQ_NOSLEEP: u64 = KM_NOSLEEP; // Cannot block for memory; may fail
-const TQ_NOQUEUE: u64 = 0x02; // Do not enqueue if can't dispatch
-const TQ_FRONT: u64 = 0x08; // Queue in front
-
-const TASKQ_ACTIVE: u64 = 0x00010000;
-
-pub type TaskFn = Box<FnMut()>;
-
-pub struct Taskq {
- name: String,
- // kmutex_t lock,
- // krwlock_t threadlock,
- // kcondvar_t dispatch_cv,
- // kcondvar_t wait_cv,*/
- // threads: Vec<Sender<Task>>,
- flags: u64,
- active: u16,
- num_threads: u16,
- num_alloc: u64,
- min_alloc: u64,
- max_alloc: u64,
- next_task_id: usize,
- // kcondvar_t max_alloc_cv,
- max_alloc_wait: i64, /* taskq_ent_t *freelist,
- * task_queue: VecDeque<Task>, */
-}
-
-impl Taskq {
- pub fn new(name: String,
- mut num_threads: u16,
- min_alloc: u64,
- max_alloc: u64,
- flags: u64)
- -> Self {
- // taskq_t *tq = kmem_zalloc(sizeof (taskq_t), KM_SLEEP);
-
- // if flags & TASKQ_THREADS_CPU_PCT != 0 {
- // int pct;
- // assert!(num_threads >= 0);
- // assert!(num_threads <= 100);
- // pct = cmp::min(num_threads, 100);
- // pct = cmp::max(pct, 0);
- //
- // num_threads = (sysconf(_SC_NPROCESSORS_ONLN) * pct) / 100;
- // num_threads = cmp::max(num_threads, 1); /* need at least 1 thread */
- // } else {
- // assert!(num_threads >= 1);
- // }
-
- // rw_init(&tq.threadlock, NULL, RW_DEFAULT, NULL);
- // mutex_init(&tq.lock, NULL, MUTEX_DEFAULT, NULL);
- // cv_init(&tq.dispatch_cv, NULL, CV_DEFAULT, NULL);
- // cv_init(&tq.wait_cv, NULL, CV_DEFAULT, NULL);
- // cv_init(&tq.max_alloc_cv, NULL, CV_DEFAULT, NULL);
- // tq.task.next: &tq.task;
- // tq.task.prev: &tq.task;
-
- // if flags & TASKQ_PREPOPULATE != 0 {
- // mutex_enter(&tq.lock);
- // while (min_alloc-- > 0)
- // task_free(tq, task_alloc(tq, KM_SLEEP));
- // mutex_exit(&tq.lock);
- // }
-
- // let mut threads = Vec::new();
- // for _ in 0..num_threads {
- // let (task_t, task_r) = channel();
- // threads.push(task_t);
- // thread::spawn(|| { taskq_thread(task_r) });
- // tq.thread_list[t] = thread_create(NULL, 0, taskq_thread, tq, TS_RUN, NULL, 0, pri);
- // VERIFIY(tq.thread_list[t]);
- // }
-
- Taskq {
- name: name,
- // threads: threads,
- flags: flags | TASKQ_ACTIVE,
- active: num_threads,
- num_threads: num_threads,
- num_alloc: 0,
- min_alloc: min_alloc,
- max_alloc: max_alloc,
- next_task_id: 0,
- max_alloc_wait: 0, // task_queue: VecDeque::new(),
- }
- }
-
- // fn alloc_task(&mut self, tqflags: u64) -> Self {
- // taskq_ent_t *t;
- //
- // loop {
- // if (t = self.freelist) != NULL && self.num_alloc >= self.min_alloc {
- // There's a free Task in the free_list
- // assert!(t.flags & TQENT_FLAG_PREALLOC == 0);
- // self.freelist = t.next;
- // } else {
- // if (self.num_alloc >= self.max_alloc) {
- // if tqflags & KM_SLEEP == 0 {
- // return NULL;
- // }
- //
- // We don't want to exceed max_alloc, but we can't
- // wait for other tasks to complete (and thus free up
- // task structures) without risking deadlock with
- // the caller. So, we just delay for one second
- // to throttle the allocation rate. If we have tasks
- // complete before one second timeout expires then
- // taskq_ent_free will signal us and we will
- // immediately retry the allocation.
- // self.max_alloc_wait += 1;
- // let rv = cv_timedwait(&self.max_alloc_cv, &self.lock, ddi_get_lbolt() + hz);
- // self.max_alloc_wait -= 1;
- // if rv > 0 {
- // continue;
- // }
- // }
- // mutex_exit(&self.lock);
- //
- // t = kmem_alloc(sizeof (taskq_ent_t), tqflags);
- //
- // mutex_enter(&self.lock);
- // if t != NULL {
- // Make sure we start without any flags
- // t.flags = 0;
- // self.num_alloc++;
- // }
- // }
- //
- // break;
- // }
- // return t;
- // }
-
- // fn task_free(taskq_t *tq, taskq_ent_t *t) {
- // if (tq->tq_nalloc <= tq->tq_min_alloc) {
- // t->tqent_next = tq->tq_freelist;
- // tq->tq_freelist = t;
- // } else {
- // tq->tq_nalloc--;
- // mutex_exit(&tq->tq_lock);
- // kmem_free(t, sizeof (taskq_ent_t));
- // mutex_enter(&tq->tq_lock);
- // }
- //
- // if (tq->tq_max_alloc_wait) {
- // cv_signal(&tq->tq_max_alloc_cv);
- // }
- // }
-
- fn taskq_dispatch(&mut self, func: TaskFn, flags: u64) -> TaskId {
- // self.threads[0].send(Task { func: func, flags: flags });
- let index = self.next_task_id;
- self.next_task_id += 1;
- TaskId(index)
- }
-
- // fn taskq_dispatch(&mut self, func: TaskFn, flags: u64) -> TaskId {
- // taskq_ent_t *t;
- //
- // if taskq_now {
- // func(arg);
- // return 1;
- // }
- //
- // mutex_enter(&self.lock);
- // assert!(self.flags & TASKQ_ACTIVE);
- // if (t = self.alloc_task(tqflags)) == NULL {
- // mutex_exit(&self.lock);
- // return 0;
- // }
- // if tqflags & TQ_FRONT != 0 {
- // t.next = self.task.next;
- // t.prev = &self.task;
- // } else {
- // t.next = &self.task;
- // t.prev = self.task.prev;
- // }
- // t.next.prev = t;
- // t.prev.next = t;
- // t.func = func;
- // t.flags = 0;
- // cv_signal(&self.dispatch_cv);
- // mutex_exit(&self.lock);
- // return 1;
- // }
- //
- // taskqid_t
- // taskq_dispatch_delay(taskq_t *tq, task_func_t func, uint_t tqflags,
- // clock_t expire_time)
- // {
- // return 0;
- // }
-
- // pub fn empty_ent(&self) -> bool {
- // self.next == NULL
- // }
-
- // fn taskq_init_ent(taskq_ent_t *t) {
- // t.next = NULL;
- // t.prev = NULL;
- // t.func = NULL;
- // t.flags = 0;
- // }
-
- // fn taskq_dispatch_ent(taskq_t *tq, task_func_t func, uint_t flags, taskq_ent_t *t) {
- // assert!(func != NULL);
- //
- // Mark it as a prealloc'd task. This is important
- // to ensure that we don't free it later.
- // t.flags |= TQENT_FLAG_PREALLOC;
- // Enqueue the task to the underlying queue.
- // mutex_enter(&tq.lock);
- //
- // if (flags & TQ_FRONT) {
- // t.next = tq.task.next;
- // t.prev = &tq.task;
- // } else {
- // t.next = &tq.task;
- // t.prev = tq.task.prev;
- // }
- // t.next.prev = t;
- // t.prev.next = t;
- // t.func = func;
- // cv_signal(&tq.dispatch_cv);
- // mutex_exit(&tq.lock);
- // }
-
- // fn wait(&self) {
- // mutex_enter(&tq.lock);
- // while tq.task.next != &tq.task || tq.active > 0 {
- // cv_wait(&tq.wait_cv, &tq.lock);
- // }
- // mutex_exit(&tq.lock);
- // }
- //
- // fn wait_id(&self, id: TaskId) {
- // self.wait();
- // }
- //
- // fn wait_outstanding(&self, id: TaskId) {
- // self.wait();
- // }
- //
- // fn destroy(&mut self) {
- // int num_threads = tq->tq_num_threads;
- //
- // taskq_wait(tq);
- //
- // mutex_enter(&tq->tq_lock);
- //
- // tq->tq_flags &= ~TASKQ_ACTIVE;
- // cv_broadcast(&tq->tq_dispatch_cv);
- //
- // while tq->tq_num_threads > 0 {
- // cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
- // }
- //
- // tq.min_alloc = 0;
- // while (tq.num_alloc != 0) {
- // ASSERT(tq->tq_freelist != NULL);
- // task_free(tq, task_alloc(tq, KM_SLEEP));
- // }
- //
- // mutex_exit(&tq->tq_lock);
- //
- // kmem_free(tq->tq_thread_list, num_threads * sizeof (kthread_t *));
- //
- // rw_destroy(&tq->tq_threadlock);
- // mutex_destroy(&tq->tq_lock);
- // cv_destroy(&tq->tq_dispatch_cv);
- // cv_destroy(&tq->tq_wait_cv);
- // cv_destroy(&tq->tq_max_alloc_cv);
- //
- // kmem_free(tq, sizeof (taskq_t));
- // }
- //
- // pub fn member(&self, thread_id: ThreadId) -> bool {
- // for i in 0..self.num_threads {
- // if self.thread_list[i] == t {
- // return true;
- // }
- // }
- //
- // false
- // }
-
- pub fn cancel_id(&mut self, id: TaskId) -> zfs::Result<()> {
- Err(zfs::Error::NoEntity)
- }
-}
-
-// fn system_taskq_init() {
-// system_taskq = taskq_create("system_taskq", 64, maxclsyspri, 4, 512,
-// TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
-// }
-//
-// fn system_taskq_fini() {
-// taskq_destroy(system_taskq);
-// system_taskq = NULL; // defensive
-// }
-
-//-------------------------------------------------------------------------------------------------//
-
-pub struct TaskId(usize);
-
-struct Task {
- // taskq_ent *next;
- // taskq_ent *prev;
- func: Box<FnMut()>,
- flags: u64,
-}
-
-//-------------------------------------------------------------------------------------------------//
-
-// fn taskq_thread(task_r: Receiver<Task>) {
-// while let Ok(task) = task_r.recv() {
-// (task.func)();
-// }
-// }
-
-// fn taskq_thread(task_r: Receiver<Task>) {
-// taskq_t *tq = arg;
-// taskq_ent_t *t;
-//
-// mutex_enter(&tq.lock);
-// while tq.flags & TASKQ_ACTIVE != 0 {
-// if (t = tq.task.next) == &tq.task {
-// tq.active -= 1;
-// if tq.active == 0 {
-// cv_broadcast(&tq.wait_cv);
-// }
-// cv_wait(&tq.dispatch_cv, &tq.lock);
-// tq.active++;
-// continue;
-// }
-// t.prev.next = t.next;
-// t.next.prev = t.prev;
-// t.next = NULL;
-// t.prev = NULL;
-// mutex_exit(&tq.lock);
-//
-// rw_enter(&tq.threadlock, RW_READER);
-// t.func(t.arg);
-// rw_exit(&tq.threadlock);
-//
-// mutex_enter(&tq.lock);
-// if !t.flags & TQENT_FLAG_PREALLOC != 0 {
-// task_free(tq, t);
-// }
-// }
-// tq.num_threads--;
-// cv_broadcast(&tq.wait_cv);
-// mutex_exit(&tq.lock);
-// thread_exit();
-// }
View
5 crates/zfs/txg.rs
@@ -1,5 +0,0 @@
-pub const DEFER_SIZE: usize = 2;
-
-pub const TXG_SIZE: usize = 4;
-
-pub const TXG_INITIAL: usize = TXG_SIZE;
View
47 crates/zfs/uberblock.rs
@@ -1,47 +0,0 @@
-use std::{mem, ptr};
-
-use super::from_bytes::FromBytes;
-use super::block_ptr::BlockPtr;
-
-const UBERBLOCK_MAGIC: u64 = 0x00bab10c; // oo-ba-bloc!
-pub const UBERBLOCK_SHIFT: u64 = 10; // up to 1K
-
-#[derive(Copy, Clone, Debug)]
-#[repr(packed)]
-pub struct Uberblock {
- pub magic: u64,
- pub version: u64,
- pub txg: u64,
- pub guid_sum: u64,
- pub timestamp: u64,
- pub rootbp: BlockPtr,
-}
-
-impl Uberblock {
- pub fn magic_little() -> u64 {
- return 0x0cb1ba00;
- }
-
- pub fn magic_big() -> u64 {
- return 0x00bab10c;
- }
-}
-
-impl FromBytes for Uberblock {
- fn from_bytes(data: &[u8]) -> Result<Self, String> {
- if data.len() >= mem::size_of::<Uberblock>() {
- let uberblock = unsafe { ptr::read(data.as_ptr() as *const Uberblock) };
- if uberblock.magic == Uberblock::magic_little() {
- Ok(uberblock)
- } else if uberblock.magic == Uberblock::magic_big() {
- Ok(uberblock)
- } else {
- Err("Error: Invalid uberblock magic number".to_string())
- }
- } else {
- Err(format!("Error: Need {} bytes to read uberblock, only {} in buffer",
- mem::size_of::<Uberblock>(),
- data.len()))
- }
- }
-}
View
74 crates/zfs/util.rs
@@ -1,74 +0,0 @@
-
-// Compatibility macros/typedefs needed for Solaris -> Linux port
-pub fn p2_align(x: u64, align: u64) -> u64 {
- x & -(align as i64) as u64
-}
-
-fn p2_cross(x: u64, y: u64, align: u64) -> bool {
- x ^ y > align - 1
-}
-
-fn p2_round_up(x: u64, align: u64) -> u64 {
- ((x - 1) | (align - 1)) + 1
-}
-
-fn p2_boundary(off: u64, len: u64, align: u64) -> bool {
- (off ^ (off + len - 1)) > (align - 1)
-}
-
-fn p2_phase(x: u64, align: u64) -> u64 {
- x & (align - 1)
-}
-
-fn p2_nphase(x: u64, align: u64) -> u64 {
- -(x as i64) as u64 & (align - 1)
-}
-
-fn p2_nphase_typed(x: u64, align: u64) -> u64 {
- -(x as i64) as u64 & (align - 1)
-}
-
-fn is_p2(x: u64) -> bool {
- x & (x - 1) == 0
-}
-
-fn is_p2_aligned(v: u64, a: u64) -> bool {
- v & (a - 1) == 0
-}
-
-pub fn highbit64(u: u64) -> u32 {
- 63 - u.leading_zeros()
-}
-
-// Typed version of the P2* macros. These macros should be used to ensure
-// that the result is correctly calculated based on the data type of (x),
-// which is passed in as the last argument, regardless of the data
-// type of the alignment. For example, if (x) is of type uint64_t,
-// and we want to round it up to a page boundary using "PAGESIZE" as
-// the alignment, we can do either
-// P2ROUNDUP(x, (uint64_t)PAGESIZE)
-// or
-// P2ROUNDUP_TYPED(x, PAGESIZE, uint64_t)
-//
-// #define P2ALIGN_TYPED(x, align, type) \
-// ((type)(x) & -(type)(align))
-// #define P2PHASE_TYPED(x, align, type) \
-// ((type)(x) & ((type)(align) - 1))
-// #define P2NPHASE_TYPED(x, align, type) \
-// (-(type)(x) & ((type)(align) - 1))
-// #define P2ROUNDUP_TYPED(x, align, type) \
-// ((((type)(x) - 1) | ((type)(align) - 1)) + 1)
-// #define P2END_TYPED(x, align, type) \
-// (-(~(type)(x) & -(type)(align)))
-// #define P2PHASEUP_TYPED(x, align, phase, type) \
-// ((type)(phase) - (((type)(phase) - (type)(x)) & -(type)(align)))
-// #define P2CROSS_TYPED(x, y, align, type) \
-// (((type)(x) ^ (type)(y)) > (type)(align) - 1)
-// #define P2SAMEHIGHBIT_TYPED(x, y, type) \
-// (((type)(x) ^ (type)(y)) < ((type)(x) & (type)(y)))
-//
-//
-// avoid any possibility of clashing with <stddef.h> version
-// #if defined(_KERNEL) && !defined(_KMEMUSER) && !defined(offsetof)
-// #define offsetof(s, m) ((size_t)(&(((s *)0)->m)))
-// #endif
View
506 crates/zfs/vdev.rs
@@ -1,506 +0,0 @@
-use std::{cmp, mem};
-use std::rc::Rc;
-
-use super::dmu_objset::ObjectSet;
-use super::from_bytes::FromBytes;
-use super::metaslab::{Metaslab, MetaslabClass, MetaslabGroup};
-use super::nvpair::{NvList, NvValue};
-use super::uberblock;
-use super::util;
-use super::vdev_file::VdevFile;
-use super::zfs;
-
-#[repr(packed)]
-pub struct VdevLabel {
- pub blank: [u8; 8 * 1024],
- pub boot_header: [u8; 8 * 1024],
- pub nv_pairs: [u8; 112 * 1024],
- pub uberblocks: [u8; 128 * 1024],
-}
-
-impl FromBytes for VdevLabel {}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-
-pub trait IVdevOps {
- /// Returns (size, max_size, ashift)
- fn open(&mut self, vdev: &mut Vdev) -> zfs::Result<(u64, u64, u64)>;
-
- fn close(&mut self, vdev: &mut Vdev);
-
- /// Default asize function: return the MAX of psize with the asize of all children. This is
- /// what's used by anything other than RAID-Z.
- fn asize(&mut self, vdev: &mut Vdev, psize: u64) -> u64;
-
- fn hold(&mut self, vdev: &mut Vdev);
-
- fn release(&mut self, vdev: &mut Vdev);
-}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-
-pub struct VdevOps {
- pub ops: Box<IVdevOps>,
- // io_start: fn(&zio::Zio),
- // io_done: fn(&zio::Zio),
- // state_change: fn(),
- vdev_type: String,
- is_leaf: bool,
-}
-
-impl VdevOps {
- pub fn vdev_type(&self) -> &str {
- self.vdev_type.as_ref()
- }
- pub fn is_leaf(&self) -> bool {
- self.is_leaf
- }
-}
-
-fn load_ops(vdev_type: &str, nv: &NvList) -> zfs::Result<VdevOps> {
- match vdev_type {
- "disk" => {
- Ok(VdevOps {
- ops: Box::new(try!(VdevFile::load(nv))),
- vdev_type: "disk".to_string(),
- is_leaf: true,
- })
- }
- _ => Err(zfs::Error::Invalid),
- }
-}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-#[derive(Copy, Clone, Debug, PartialEq)]
-pub enum AllocType {
- Load = 0,
- Add,
- Spare,
- L2Cache,
- RootPool,
- Split,
- Attach,
-}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-
-/// States are ordered from least to most healthy.
-/// Vdevs `CannotOpen` and worse are considered unusable.
-#[derive(Copy, Clone, Debug, PartialEq)]
-pub enum State {
- Unknown, // Uninitialized vdev
- Closed, // Not currently open
- Offline, // Not allowed to open
- Removed, // Explicitly removed from the system
- CannotOpen, // Tried top open, but failed
- Faulted, // External request to fault device
- Degraded, // Replicated vdev with unhealthy kids
- Healthy, // Presumed good
-}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-
-// Stuff that only top level vdevs have
-pub struct Top {
- pub ms_array: u64, // object ID of metaslab array in MOS
- pub ms_shift: u64, // metaslab shift
- pub ms_group: MetaslabGroup, // metaslab group
- pub metaslabs: Vec<Metaslab>, // in-memory metaslab array
- pub is_hole: bool,
- pub removing: bool, // device is being removed?
-}
-
-impl Top {
- pub fn new(ms_array: u64, ms_shift: u64, ms_group: MetaslabGroup) -> Self {
- Top {
- ms_array: ms_array,
- ms_shift: ms_shift,
- ms_group: ms_group,
- metaslabs: vec![],
- is_hole: false, // TODO: zol checks vdev_ops for this, but idk what to do yet
- removing: false,
- }
- }
-}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-
-pub struct Leaf {
- whole_disk: u64,
-}
-
-impl Leaf {
- pub fn new() -> Self {
- Leaf { whole_disk: 0 }
- }
-}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-
-// Note that a vdev can be a top-level, a leaf, both, or neither
-pub struct Vdev {
- id: u64, // child number in vdev parent
- guid: u64, // unique ID for this vdev
- guid_sum: u64, // self guid + all child guids
- orig_guid: u64, // orig. guid prior to remove
- asize: u64, // allocatable device capacity
- min_asize: u64, // min acceptable asize
- max_asize: u64, // max acceptable asize
- pub ashift: u64, // block alignment shift
- state: State,
- prev_state: State,
- pub ops: VdevOps,
- parent: Option<TreeIndex>,
- top_vdev: Option<TreeIndex>,
- children: Vec<TreeIndex>,
- create_txg: u64, // txg when top-level was added
-
- pub top: Option<Top>,
- pub leaf: Option<Leaf>,
-}
-
-impl Vdev {
- pub fn new(id: u64,
- guid: Option<u64>,
- ashift: u64,
- ops: VdevOps,
- create_txg: u64,
- vdev_top: Option<Top>)
- -> Self {
- let guid = guid.unwrap_or_else(|| {
- // TODO: generate a guid
- 0
- });
-
- // TODO vdev_queue_init
-
- Vdev {
- id: id,
- guid: guid,
- guid_sum: guid, // No children yet, so guid_sum is just my guid
- orig_guid: 0,
- asize: 0,
- min_asize: 0,
- max_asize: 0,
- ashift: ashift,
- state: State::Closed,
- prev_state: State::Unknown,
- ops: ops,
- parent: None,
- top_vdev: None,
- children: Vec::new(),
- create_txg: create_txg,
-
- top: vdev_top,
- leaf: None,
- }
- }
-
- pub fn load(normal_class: &Rc<MetaslabClass>,
- nv: &NvList,
- id: u64,
- parent: Option<TreeIndex>,
- vdev_tree: &Tree,
- alloc_type: AllocType)
- -> zfs::Result<Self> {
- let vdev_type = try!(nv.get::<&String>("type").ok_or(zfs::Error::Invalid)).clone();
-
- let ops = try!(load_ops(vdev_type.as_ref(), nv));
-
- if alloc_type == AllocType::Load {
- // Verify the provided id matches the id written in the MOS
- let label_id: u64 = try!(nv.get("id").ok_or(zfs::Error::Invalid));
- if label_id != id {
- return Err(zfs::Error::Invalid);
- }
- }
-
- // If this is some sort of load, then we read the guid from the nvpairs. Otherwise,
- // Vdev::new will generate one for us
- let guid = match alloc_type {
- AllocType::Load | AllocType::Spare | AllocType::L2Cache | AllocType::RootPool => {
- Some(try!(nv.get("guid").ok_or(zfs::Error::Invalid)))
- }
- _ => None,
- };
-
- let create_txg = try!(nv.get("create_txg").ok_or(zfs::Error::Invalid));
- let ashift = try!(nv.get("ashift").ok_or(zfs::Error::Invalid));
-
- let mut vdev_top = None;
-
- // If we're a top-level vdev, try to load the allocation parameters,
- // create the metaslab group, and create the vdev::Top
- if let Some(parent) = parent {
- if parent.get(vdev_tree).parent.is_none() {
- let mut ms_array = 0;
- let mut ms_shift = 0;
- if alloc_type == AllocType::Load || alloc_type == AllocType::Split {
- ms_array = try!(nv.get("metaslab_array").ok_or(zfs::Error::Invalid));
- ms_shift = try!(nv.get("metaslab_shift").ok_or(zfs::Error::Invalid));
- // let asize = try!(nv.get("asize").ok_or(zfs::Error::Invalid));
- // let removing = try!(nv.get("removing").ok_or(zfs::Error::Invalid));
- }
-
- if alloc_type != AllocType::Attach {
- assert!(alloc_type == AllocType::Load || alloc_type == AllocType::Add ||
- alloc_type == AllocType::Split ||
- alloc_type == AllocType::RootPool);
- let ms_group = MetaslabGroup::create(normal_class.clone());
-
- vdev_top = Some(Top::new(ms_array, ms_shift, ms_group));
- }
- }
- }
-
- let mut vdev = Self::new(id, guid, ashift, ops, create_txg, vdev_top);
- vdev.parent = parent;
-
- Ok(vdev)
- }
-
- fn open(&mut self) -> zfs::Result<()> {
- Ok(())
- }
-
- fn metaslab_init(&mut self, mos: &mut ObjectSet, txg: u64) -> zfs::Result<()> {
- // We assume this is a top-level vdev
- let ref mut top = try!(self.top.as_mut().ok_or(zfs::Error::Invalid));
-
- let old_count = top.metaslabs.len();
- let new_count = (self.asize >> top.ms_shift) as usize;
-
- // assert!(txg == 0 || spa_config_held(spa, SCL_ALLOC, RW_WRITER));
-
- // Return if vdev isn't being allocated from yet
- if top.ms_shift == 0 {
- return Ok(());
- }
- assert!(!top.is_hole); // Must not be a hole
-
- // Compute the raidz-deflation ratio. Note, we hard-code
- // in 128k (1 << 17) because it is the "typical" blocksize.
- // Even though SPA_MAXBLOCKSIZE changed, this algorithm can not change,
- // otherwise it would inconsistently account for existing bp's.
- // vd->vdev_deflate_ratio = (1 << 17) / (vdev_psize_to_asize(vd, 1 << 17) >> SPA_MINBLOCKSHIFT);
-
- assert!(old_count <= new_count);
-
- for m in old_count..new_count {
- let object: u64 = 0;
-
- if txg == 0 {
- // try!(dmu_read(mos, top.ms_array, m * mem::size_of::<u64>(),
- // mem::size_of::<u64>(), &object, DMU_READ_PREFETCH));
- }
-
- // let metaslab = try!(Metaslab::init(mos, self, m as u64, object, txg));
- // top.metaslabs.push(metaslab);
- }
-
- // if (txg == 0)
- // spa_config_enter(spa, SCL_ALLOC, FTAG, RW_WRITER);
-
- // If the vdev is being removed we don't activate
- // the metaslabs since we want to ensure that no new
- // allocations are performed on this device.
- if old_count == 0 && !top.removing {
- // metaslab_group_activate(vd.mg);
- }
-
- // if (txg == 0)
- // spa_config_exit(spa, SCL_ALLOC, FTAG);
-
- Ok(())
- }
-
- // Get the minimum allocatable size. We define the allocatable size as
- // the vdev's asize rounded to the nearest metaslab. This allows us to
- // replace or attach devices which don't have the same physical size but
- // can still satisfy the same number of allocations.
- // fn get_min_asize(&self, parent: Option<&Vdev>) -> u64 {
- // vdev_t *pvd = vd->vdev_parent;
- //
- // If our parent is NULL (inactive spare or cache) or is the root,
- // just return our own asize.
- // if self.parent.is_none() {
- // return self.asize;
- // }
- //
- // The top-level vdev just returns the allocatable size rounded
- // to the nearest metaslab.
- // if let Some(ref top) = self.top {
- // return util::p2_align(self.asize, 1u64 << top.ms_shift);
- // }
- //
- // The allocatable space for a raidz vdev is N * sizeof(smallest child),
- // so each child must provide at least 1/Nth of its asize.
- // if pvd->vdev_ops == &vdev_raidz_ops {
- // return pvd->vdev_min_asize / pvd->vdev_children;
- // }
- //
- // pvd->vdev_min_asize
- // }
-
-
- // pub fn dirty(&mut self, flags: u64, void *arg, txg: u64) {
- // We assume this is a top-level vdev
- // let ref top = self.top.unwrap();
- //
- // assert!(self == self.top_vdev);
- // assert!(!self.is_hole);
- // assert!(util::is_p2(flags));
- // assert!(spa_writeable(self.spa));
- //
- // if flags & DIRTY_METASLAB {
- // txg_list_add(&self.ms_list, arg, txg);
- // }
- //
- // if flags & DIRTY_DTL {
- // txg_list_add(&self.dtl_list, arg, txg);
- // }
- //
- // txg_list_add(&self.spa.vdev_txg_list, self, txg);
- // }
-
- pub fn uberblock_shift(&self) -> u64 {
- cmp::min(cmp::max(self.ashift, uberblock::UBERBLOCK_SHIFT),
- MAX_UBERBLOCK_SHIFT)
- }
-
- pub fn uberblock_count(&self) -> u64 {
- UBERBLOCK_RING >> self.uberblock_shift()
- }
-
- // pub fn uberblock_offset(&self, n) -> u64 {
- // offsetof(vdev_label_t, vl_uberblock[n << self.uberblock_shift()])
- // }
-
- pub fn uberblock_size(&self) -> u64 {
- 1 << self.uberblock_shift()
- }
-}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-#[derive(Copy, Clone, PartialEq)]
-pub struct TreeIndex(usize);
-
-impl TreeIndex {
- pub fn get<'a>(&self, tree: &'a Tree) -> &'a Vdev {
- tree.nodes[self.0].as_ref().unwrap()
- }
-
- pub fn get_mut<'a>(&self, tree: &'a mut Tree) -> &'a mut Vdev {
- tree.nodes[self.0].as_mut().unwrap()
- }
-}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-
-pub struct Tree {
- nodes: Vec<Option<Vdev>>,
- free: Vec<usize>,
-}
-
-impl Tree {
- pub fn new() -> Self {
- Tree {
- nodes: Vec::new(),
- free: Vec::new(),
- }
- }
-
- pub fn add(&mut self, vdev: Vdev) -> TreeIndex {
- let parent = vdev.parent;
- let guid = vdev.guid;
-
- // Add the vdev node
- let index = TreeIndex(match self.free.pop() {
- Some(free_index) => {
- self.nodes[free_index] = Some(vdev);
- free_index
- }
- None => {
- self.nodes.push(Some(vdev));
- self.nodes.len() - 1
- }
- });
-
- index.get_mut(self).top_vdev = parent.map(|parent| {
- parent.get(self).top_vdev.unwrap_or(index)
- });
-
- if let Some(parent) = parent {
- parent.get_mut(self).guid_sum += guid;
- parent.get_mut(self).children.push(index);
- }
-
- index
- }
-
- pub fn parse(&mut self,
- normal_class: &Rc<MetaslabClass>,
- nv: &NvList,
- parent: Option<TreeIndex>,
- alloc_type: AllocType)
- -> zfs::Result<TreeIndex> {
- let vdev = try!(Vdev::load(normal_class, nv, 0, parent, self, alloc_type));
- let index = self.add(vdev);
-
- // Done parsing if this is a leaf
- if index.get(self).ops.is_leaf() {
- return Ok(index);
- }
-
- // Get the vdev's children
- let children: &Vec<NvList> = try!(nv.get("children").ok_or(zfs::Error::Invalid));
-
- for child in children {
- self.parse(normal_class, child, Some(index), alloc_type);
- }
-
- Ok(index)
- }
-
- pub fn load(&mut self, mos: &mut ObjectSet, root: TreeIndex) {
- // We use an iterative solution because of borrowing issues
- let mut queue = vec![root];
-
- while let Some(index) = queue.pop() {
- let vdev = index.get_mut(self);
-
- // Recursively load all children
- for child in &vdev.children {
- queue.push(*child);
- }
-
- // Load metaslabs for top-level vdevs
- // if let Some(ref top) = vdev.top {
- if vdev.top.is_some() {
- // if !top.is_hole {
- if vdev.ashift == 0 || vdev.asize == 0 || vdev.metaslab_init(mos, 0).is_err() {
- // TODO: Set vdev state to error
- }
- // }
- }
-
- // TODO: Load DTL for leaf vdevs
- }
- }
-}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-
-const DIRTY_METASLAB: u64 = 0x01;
-const DIRTY_DTL: u64 = 0x02;
-
-const RAIDZ_MAXPARITY: usize = 3;
-
-const PAD_SIZE: u64 = 8 << 10;
-// 2 padding areas (vl_pad1 and vl_pad2) to skip
-const SKIP_SIZE: u64 = PAD_SIZE * 2;
-const PHYS_SIZE: u64 = 112 << 10;
-const UBERBLOCK_RING: u64 = 128 << 10;
-
-// The largest uberblock we support is 8k.
-const MAX_UBERBLOCK_SHIFT: u64 = 13;
View
34 crates/zfs/vdev_file.rs
@@ -1,34 +0,0 @@
-use super::nvpair::NvList;
-use super::{vdev, zfs};
-
-pub struct VdevFile {
- path: String,
-}
-
-impl VdevFile {
- pub fn load(nv: &NvList) -> zfs::Result<Self> {
- Ok(VdevFile { path: try!(nv.get::<&String>("path").ok_or(zfs::Error::Invalid)).clone() })
- }
-
- // pub fn io_start(zio: &zio::Zio);
-
- // pub fn io_done(zio: &zio::Zio);
-
- // pub fn state_change();
-}
-
-impl vdev::IVdevOps for VdevFile {
- fn open(&mut self, vdev: &mut vdev::Vdev) -> zfs::Result<(u64, u64, u64)> {
- Ok((0, 0, 0))
- }
-
- fn close(&mut self, vdev: &mut vdev::Vdev) {}
-
- fn asize(&mut self, vdev: &mut vdev::Vdev, psize: u64) -> u64 {
- 0
- }
-
- fn hold(&mut self, vdev: &mut vdev::Vdev) {}
-
- fn release(&mut self, vdev: &mut vdev::Vdev) {}
-}
View
1,011 crates/zfs/vdev_label.rs
@@ -1,1011 +0,0 @@
-use std::mem;
-
-use super::vdev::VdevLabel;
-
-// vdev_dirty() flags
-const VDD_METASLAB: u64 = 0x01;
-const VDD_DTL: u64 = 0x02;
-
-// Offset of embedded boot loader region on each label
-const VDEV_BOOT_OFFSET: usize = 2 * mem::size_of::<VdevLabel>();
-// Size of embedded boot loader region on each label.
-// The total size of the first two labels plus the boot area is 4MB.
-const VDEV_BOOT_SIZE: usize = 7 << 19; // 3.5M
-
-// Size of label regions at the start and end of each leaf device.
-const VDEV_LABEL_START_SIZE: usize = (2 * mem::size_of::<VdevLabel>() + VDEV_BOOT_SIZE);
-const VDEV_LABEL_END_SIZE: usize = (2 * mem::size_of::<VdevLabel>());
-const VDEV_LABELS: u8 = 4;
-const VDEV_BEST_LABEL: u8 = VDEV_LABELS;
-
-// Basic routines to read and write from a vdev label.
-// Used throughout the rest of this file.
-vdev_label_offset(psize: u64, l: u8, offset: u64) -> u64 {
- assert!(offset < mem::size_of::<VdevLabel>());
- //assert!(P2PHASE_TYPED(psize, mem::size_of::<VdevLabel>(), u64) == 0);
-
- offset + (l as u64) * (mem::size_of::<VdevLabel>() as u64) +
- if l < VDEV_LABELS / 2 {
- 0
- } else {
- psize - (VDEV_LABELS as u64) * (mem::size_of::<VdevLabel>() as u64)
- }
-}
-
-// Returns back the vdev label associated with the passed in offset.
-vdev_label_number(psize: u64, offset: u64) -> Option<u64> {
- if offset >= psize - VDEV_LABEL_END_SIZE {
- offset -= psize - VDEV_LABEL_END_SIZE;
- offset += ((VDEV_LABELS as u64) / 2) * (mem::size_of::<VdevLabel>() as u64);
- }
- let l = offset / (mem::size_of::<VdevLabel>() as u64);
- if l < (VDEV_LABELS as u64) {
- Some(l)
- } else {
- None
- }
-}
-
-fn vdev_label_read(zio_t *zio, vdev_t *vd, l: u8, void *buf, offset: u64,
- size: u64, zio_done_func_t *done, void *private, flags: u64) {
- //assert!(spa_config_held(zio->io_spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
- //assert!(flags & ZIO_FLAG_CONFIG_WRITER);
-
- Zio::read_phys(zio, vd, vdev_label_offset(vd.psize, l, offset),
- size, buf, ZIO_CHECKSUM_LABEL, done, private,
- zio::Priority::SyncRead, flags, true).no_wait();
-}
-
-static void
-vdev_label_write(zio_t *zio, vdev_t *vd, l: u8, void *buf, uint64_t offset,
- uint64_t size, zio_done_func_t *done, void *private, int flags)
-{
- assert!(spa_config_held(zio->io_spa, SCL_ALL, RW_WRITER) == SCL_ALL ||
- (spa_config_held(zio->io_spa, SCL_CONFIG | SCL_STATE, RW_READER) ==
- (SCL_CONFIG | SCL_STATE) &&
- dsl_pool_sync_context(spa_get_dsl(zio->io_spa))));
- assert!(flags & ZIO_FLAG_CONFIG_WRITER);
-
- zio.write_phys(vd, vdev_label_offset(vd->vdev_psize, l, offset),
- size, buf, ZIO_CHECKSUM_LABEL, done, private,
- ZIO_PRIORITY_SYNC_WRITE, flags, true).no_wait();
-}
-
-// Generate the nvlist representing this vdev's config.
-fn vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, vdev_config_flag_t flags) -> NvList {
- let nv = NvList::new(0);
-
- nv.add("type".to_string(), NvValue::String(vd.ops.vdev_type));
- if !(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) {
- nv.add("id".to_string(), NvValue::Uint64(vd.id));
- }
- nv.add("guid".to_string(), NvValue::Uint64(vd.guid));
-
- if (vd->vdev_path != NULL)
- fnvlist_add_string(nv, ZPOOL_CONFIG_PATH, vd->vdev_path);
-
- if (vd->vdev_devid != NULL)
- fnvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vd->vdev_devid);
-
- if (vd->vdev_physpath != NULL)
- fnvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
- vd->vdev_physpath);
-
- if (vd->vdev_fru != NULL)
- fnvlist_add_string(nv, ZPOOL_CONFIG_FRU, vd->vdev_fru);
-
- if (vd->vdev_nparity != 0) {
- assert!(strcmp(vd->vdev_ops->vdev_op_type,
- VDEV_TYPE_RAIDZ) == 0);
-
- // Make sure someone hasn't managed to sneak a fancy new vdev
- // into a crufty old storage pool.
- assert!(vd->vdev_nparity == 1 ||
- (vd->vdev_nparity <= 2 &&
- spa_version(spa) >= SPA_VERSION_RAIDZ2) ||
- (vd->vdev_nparity <= 3 &&
- spa_version(spa) >= SPA_VERSION_RAIDZ3));
-
- // Note that we'll add the nparity tag even on storage pools
- // that only support a single parity device -- older software
- // will just ignore it.
- fnvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, vd->vdev_nparity);
- }
-
- if (vd->vdev_wholedisk != -1ULL)
- fnvlist_add_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
- vd->vdev_wholedisk);
-
- if (vd->vdev_not_present)
- fnvlist_add_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, 1);
-
- if (vd->vdev_isspare)
- fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1);
-
- if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) &&
- vd == vd->vdev_top) {
- fnvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY,
- vd->vdev_ms_array);
- fnvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT,
- vd->vdev_ms_shift);
- fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASHIFT, vd->vdev_ashift);
- fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASIZE,
- vd->vdev_asize);
- fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_LOG, vd->vdev_islog);
- if (vd->vdev_removing)
- fnvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVING,
- vd->vdev_removing);
- }
-
- if (vd->vdev_dtl_sm != NULL) {
- fnvlist_add_uint64(nv, ZPOOL_CONFIG_DTL,
- space_map_object(vd->vdev_dtl_sm));
- }
-
- if (vd->vdev_crtxg)
- fnvlist_add_uint64(nv, ZPOOL_CONFIG_CREATE_TXG, vd->vdev_crtxg);
-
- if (getstats) {
- vdev_stat_t vs;
- pool_scan_stat_t ps;
-
- vdev_get_stats(vd, &vs);
- fnvlist_add_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
- (uint64_t *)&vs, sizeof (vs) / sizeof (uint64_t));
-
- // provide either current or previous scan information
- if (spa_scan_get_stats(spa, &ps) == 0) {
- fnvlist_add_uint64_array(nv,
- ZPOOL_CONFIG_SCAN_STATS, (uint64_t *)&ps,
- sizeof (pool_scan_stat_t) / sizeof (uint64_t));
- }
- }
-
- if (!vd->vdev_ops->vdev_op_leaf) {
- nvlist_t **child;
- int c, idx;
-
- assert!(!vd->vdev_ishole);
-
- child = kmem_alloc(vd->vdev_children * sizeof (nvlist_t *),
- KM_SLEEP);
-
- for (c = 0, idx = 0; c < vd->vdev_children; c++) {
- vdev_t *cvd = vd->vdev_child[c];
-
- // If we're generating an nvlist of removing
- // vdevs then skip over any device which is
- // not being removed.
- if ((flags & VDEV_CONFIG_REMOVING) &&
- !cvd->vdev_removing)
- continue;
-
- child[idx++] = vdev_config_generate(spa, cvd,
- getstats, flags);
- }
-
- if (idx) {
- fnvlist_add_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
- child, idx);
- }
-
- for (c = 0; c < idx; c++)
- nvlist_free(child[c]);
-
- kmem_free(child, vd->vdev_children * sizeof (nvlist_t *));
-
- } else {
- const char *aux = NULL;
-
- if (vd->vdev_offline && !vd->vdev_tmpoffline)
- fnvlist_add_uint64(nv, ZPOOL_CONFIG_OFFLINE, true);
- if (vd->vdev_resilver_txg != 0)
- fnvlist_add_uint64(nv, ZPOOL_CONFIG_RESILVER_TXG,
- vd->vdev_resilver_txg);
- if (vd->vdev_faulted)
- fnvlist_add_uint64(nv, ZPOOL_CONFIG_FAULTED, true);
- if (vd->vdev_degraded)
- fnvlist_add_uint64(nv, ZPOOL_CONFIG_DEGRADED, true);
- if (vd->vdev_removed)
- fnvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVED, true);
- if (vd->vdev_unspare)
- fnvlist_add_uint64(nv, ZPOOL_CONFIG_UNSPARE, true);
- if (vd->vdev_ishole)
- fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_HOLE, true);
-
- switch (vd->vdev_stat.vs_aux) {
- case VDEV_AUX_ERR_EXCEEDED:
- aux = "err_exceeded";
- break;
-
- case VDEV_AUX_EXTERNAL:
- aux = "external";
- break;
- }
-
- if (aux != NULL)
- fnvlist_add_string(nv, ZPOOL_CONFIG_AUX_STATE, aux);
-
- if (vd->vdev_splitting && vd->vdev_orig_guid != 0LL) {
- fnvlist_add_uint64(nv, ZPOOL_CONFIG_ORIG_GUID,
- vd->vdev_orig_guid);
- }
- }
-
- return (nv);
-}
-
-// Generate a view of the top-level vdevs. If we currently have holes
-// in the namespace, then generate an array which contains a list of holey
-// vdevs. Additionally, add the number of top-level children that currently
-// exist.
-void
-vdev_top_config_generate(spa_t *spa, nvlist_t *config)
-{
- vdev_t *rvd = spa->spa_root_vdev;
- uint64_t *array;
- uint_t c, idx;
-
- array = kmem_alloc(rvd->vdev_children * sizeof (uint64_t), KM_SLEEP);
-
- for (c = 0, idx = 0; c < rvd->vdev_children; c++) {
- vdev_t *tvd = rvd->vdev_child[c];
-
- if (tvd->vdev_ishole)
- array[idx++] = c;
- }
-
- if (idx) {
- VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_HOLE_ARRAY,
- array, idx) == 0);
- }
-
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN,
- rvd->vdev_children) == 0);
-
- kmem_free(array, rvd->vdev_children * sizeof (uint64_t));
-}
-
-// Returns the configuration from the label of the given vdev. For vdevs
-// which don't have a txg value stored on their label (i.e. spares/cache)
-// or have not been completely initialized (txg = 0) just return
-// the configuration from the first valid label we find. Otherwise,
-// find the most up-to-date label that does not exceed the specified
-// 'txg' value.
-fn vdev_label_read_config(vdev_t *vd, uint64_t txg) -> NvList {
- spa_t *spa = vd->vdev_spa;
- nvlist_t *config = NULL;
- vdev_phys_t *vp;
- uint64_t best_txg = 0;
- int error = 0;
- int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
- ZIO_FLAG_SPECULATIVE;
- int l;
-
- assert!(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
-
- if (!vdev_readable(vd))
- return (NULL);
-
- vp = zio_buf_alloc(sizeof (vdev_phys_t));
-
-retry:
- for (l = 0; l < VDEV_LABELS; l++) {
- nvlist_t *label = NULL;
-
- let zio = Zio::root(spa, None, None, flags);
-
- vdev_label_read(zio, vd, l, vp,
- offsetof(vdev_label_t, vl_vdev_phys),
- sizeof (vdev_phys_t), NULL, NULL, flags);
-
- if (zio_wait(zio) == 0 &&
- nvlist_unpack(vp->vp_nvlist, sizeof (vp->vp_nvlist),
- &label, 0) == 0) {
- uint64_t label_txg = 0;
-
- // Auxiliary vdevs won't have txg values in their
- // labels and newly added vdevs may not have been
- // completely initialized so just return the
- // configuration from the first valid label we
- // encounter.
- error = nvlist_lookup_uint64(label,
- ZPOOL_CONFIG_POOL_TXG, &label_txg);
- if ((error || label_txg == 0) && !config) {
- config = label;
- break;
- } else if (label_txg <= txg && label_txg > best_txg) {
- best_txg = label_txg;
- nvlist_free(config);
- config = fnvlist_dup(label);
- }
- }
-
- if (label != NULL) {
- nvlist_free(label);
- label = NULL;
- }
- }
-
- if (config == NULL && !(flags & ZIO_FLAG_TRYHARD)) {
- flags |= ZIO_FLAG_TRYHARD;
- goto retry;
- }
-
- zio_buf_free(vp, sizeof (vdev_phys_t));
-
- return (config);
-}
-
-// Determine if a device is in use. The 'spare_guid' parameter will be filled
-// in with the device guid if this spare is active elsewhere on the system.
-vdev_inuse(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason,
- uint64_t *spare_guid, uint64_t *l2cache_guid) -> bool {
- spa_t *spa = vd->vdev_spa;
- uint64_t state, pool_guid, device_guid, txg, spare_pool;
- uint64_t vdtxg = 0;
- nvlist_t *label;
-
- if (spare_guid)
- *spare_guid = 0ULL;
- if (l2cache_guid)
- *l2cache_guid = 0ULL;
-
- // Read the label, if any, and perform some basic sanity checks.
- if ((label = vdev_label_read_config(vd, -1ULL)) == NULL)
- return (false);
-
- nvlist_lookup_uint64(label, ZPOOL_CONFIG_CREATE_TXG, &vdtxg);
-
- if nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, &state) != 0 ||
- nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &device_guid) != 0 {
- nvlist_free(label);
- return (false);
- }
-
- if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
- (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID,
- &pool_guid) != 0 ||
- nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG,
- &txg) != 0)) {
- nvlist_free(label);
- return (false);
- }
-
- nvlist_free(label);
-
- // Check to see if this device indeed belongs to the pool it claims to
- // be a part of. The only way this is allowed is if the device is a hot
- // spare (which we check for later on).
- if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
- !spa_guid_exists(pool_guid, device_guid) &&
- !spa_spare_exists(device_guid, NULL, NULL) &&
- !spa_l2cache_exists(device_guid, NULL))
- return (false);
-
- // If the transaction group is zero, then this an initialized (but
- // unused) label. This is only an error if the create transaction
- // on-disk is the same as the one we're using now, in which case the
- // user has attempted to add the same vdev multiple times in the same
- // transaction.
- if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
- txg == 0 && vdtxg == crtxg)
- return (true);
-
- // Check to see if this is a spare device. We do an explicit check for
- // spa_has_spare() here because it may be on our pending list of spares
- // to add. We also check if it is an l2cache device.
- if (spa_spare_exists(device_guid, &spare_pool, NULL) ||
- spa_has_spare(spa, device_guid)) {
- if (spare_guid)
- *spare_guid = device_guid;
-
- switch (reason) {
- case VDEV_LABEL_CREATE:
- case VDEV_LABEL_L2CACHE:
- return (true);
-
- case VDEV_LABEL_REPLACE:
- return (!spa_has_spare(spa, device_guid) ||
- spare_pool != 0ULL);
-
- case VDEV_LABEL_SPARE:
- return (spa_has_spare(spa, device_guid));
- default:
- break;
- }
- }
-
- // Check to see if this is an l2cache device.
- if (spa_l2cache_exists(device_guid, NULL))
- return true;
-
- // We can't rely on a pool's state if it's been imported
- // read-only. Instead we look to see if the pools is marked
- // read-only in the namespace and set the state to active.
- if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
- (spa = spa_by_guid(pool_guid, device_guid)) != NULL &&
- spa_mode(spa) == FREAD)
- state = POOL_STATE_ACTIVE;
-
- // If the device is marked ACTIVE, then this device is in use by another
- // pool on the system.
- return (state == POOL_STATE_ACTIVE);
-}
-
-// Initialize a vdev label. We check to make sure each leaf device is not in
-// use, and writable. We put down an initial label which we will later
-// overwrite with a complete label. Note that it's important to do this
-// sequentially, not in parallel, so that we catch cases of multiple use of the
-// same leaf vdev in the vdev we're creating -- e.g. mirroring a disk with
-// itself.
-int
-vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
-{
- spa_t *spa = vd->vdev_spa;
- nvlist_t *label;
- vdev_phys_t *vp;
- char *pad2;
- uberblock_t *ub;
- zio_t *zio;
- char *buf;
- size_t buflen;
- int error;
- uint64_t spare_guid = 0, l2cache_guid = 0;
- int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
- int c, l;
- vdev_t *pvd;
-
- assert!(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
-
- for (c = 0; c < vd->vdev_children; c++)
- if ((error = vdev_label_init(vd->vdev_child[c],
- crtxg, reason)) != 0)
- return (error);
-
- // Track the creation time for this vdev
- vd->vdev_crtxg = crtxg;
-
- if (!vd->vdev_ops->vdev_op_leaf || !spa_writeable(spa))
- return (0);
-
- // Dead vdevs cannot be initialized.
- if (vdev_is_dead(vd))
- return (SET_ERROR(EIO));
-
- // Determine if the vdev is in use.
- if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_SPLIT &&
- vdev_inuse(vd, crtxg, reason, &spare_guid, &l2cache_guid))
- return (SET_ERROR(EBUSY));
-
- // If this is a request to add or replace a spare or l2cache device
- // that is in use elsewhere on the system, then we must update the
- // guid (which was initialized to a random value) to reflect the
- // actual GUID (which is shared between multiple pools).
- if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_L2CACHE &&
- spare_guid != 0ULL) {
- uint64_t guid_delta = spare_guid - vd->vdev_guid;
-
- vd->vdev_guid += guid_delta;
-
- for (pvd = vd; pvd != NULL; pvd = pvd->vdev_parent)
- pvd->vdev_guid_sum += guid_delta;
-
- // If this is a replacement, then we want to fallthrough to the
- // rest of the code. If we're adding a spare, then it's already
- // labeled appropriately and we can just return.
- if (reason == VDEV_LABEL_SPARE)
- return (0);
- assert!(reason == VDEV_LABEL_REPLACE ||
- reason == VDEV_LABEL_SPLIT);
- }
-
- if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_SPARE &&
- l2cache_guid != 0ULL) {
- uint64_t guid_delta = l2cache_guid - vd->vdev_guid;
-
- vd->vdev_guid += guid_delta;
-
- for (pvd = vd; pvd != NULL; pvd = pvd->vdev_parent)
- pvd->vdev_guid_sum += guid_delta;
-
- // If this is a replacement, then we want to fallthrough to the
- // rest of the code. If we're adding an l2cache, then it's
- // already labeled appropriately and we can just return.
- if (reason == VDEV_LABEL_L2CACHE)
- return (0);
- assert!(reason == VDEV_LABEL_REPLACE);
- }
-
- // Initialize its label.
- vp = zio_buf_alloc(sizeof (vdev_phys_t));
- bzero(vp, sizeof (vdev_phys_t));
-
- // Generate a label describing the pool and our top-level vdev.
- // We mark it as being from txg 0 to indicate that it's not
- // really part of an active pool just yet. The labels will
- // be written again with a meaningful txg by spa_sync().
- if (reason == VDEV_LABEL_SPARE ||
- // For inactive hot spares, we generate a special label that
- // identifies as a mutually shared hot spare. We write the
- // label if we are adding a hot spare, or if we are removing an
- // active hot spare (in which case we want to revert the
- // labels).
- VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-
- VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_VERSION,
- spa_version(spa)) == 0);
- VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_POOL_STATE,
- POOL_STATE_SPARE) == 0);
- VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID,
- vd->vdev_guid) == 0);
- } else if (reason == VDEV_LABEL_L2CACHE ||
- (reason == VDEV_LABEL_REMOVE && vd->vdev_isl2cache)) {
- // For level 2 ARC devices, add a special label.
- VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-
- VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_VERSION,
- spa_version(spa)) == 0);
- VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_POOL_STATE,
- POOL_STATE_L2CACHE) == 0);
- VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID,
- vd->vdev_guid) == 0);
- } else {
- uint64_t txg = 0ULL;
-
- if (reason == VDEV_LABEL_SPLIT)
- txg = spa->spa_uberblock.ub_txg;
- label = spa_config_generate(spa, vd, txg, false);
-
- // Add our creation time. This allows us to detect multiple
- // vdev uses as described above, and automatically expires if we
- // fail.
- VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_CREATE_TXG,
- crtxg) == 0);
- }
-
- buf = vp->vp_nvlist;
- buflen = sizeof (vp->vp_nvlist);
-
- error = nvlist_pack(label, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP);
- if (error != 0) {
- nvlist_free(label);
- zio_buf_free(vp, sizeof (vdev_phys_t));
- /* EFAULT means nvlist_pack ran out of room */
- return (error == EFAULT ? ENAMETOOLONG : EINVAL);
- }
-
- // Initialize uberblock template.
- ub = zio_buf_alloc(VDEV_UBERBLOCK_RING);
- bzero(ub, VDEV_UBERBLOCK_RING);
- *ub = spa->spa_uberblock;
- ub->ub_txg = 0;
-
- // Initialize the 2nd padding area.
- pad2 = zio_buf_alloc(VDEV_PAD_SIZE);
- bzero(pad2, VDEV_PAD_SIZE);
-
- // Write everything in parallel.
-retry:
- zio = zio_root(spa, NULL, NULL, flags);
-
- for (l = 0; l < VDEV_LABELS; l++) {
-
- vdev_label_write(zio, vd, l, vp,
- offsetof(vdev_label_t, vl_vdev_phys),
- sizeof (vdev_phys_t), NULL, NULL, flags);
-
- // Skip the 1st padding area.
- // Zero out the 2nd padding area where it might have
- // left over data from previous filesystem format.
- vdev_label_write(zio, vd, l, pad2,
- offsetof(vdev_label_t, vl_pad2),
- VDEV_PAD_SIZE, NULL, NULL, flags);
-
- vdev_label_write(zio, vd, l, ub,
- offsetof(vdev_label_t, vl_uberblock),
- VDEV_UBERBLOCK_RING, NULL, NULL, flags);
- }
-
- error = zio_wait(zio);
-
- if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) {
- flags |= ZIO_FLAG_TRYHARD;
- goto retry;
- }
-
- nvlist_free(label);
- zio_buf_free(pad2, VDEV_PAD_SIZE);
- zio_buf_free(ub, VDEV_UBERBLOCK_RING);
- zio_buf_free(vp, sizeof (vdev_phys_t));
-
- // If this vdev hasn't been previously identified as a spare, then we
- // mark it as such only if a) we are labeling it as a spare, or b) it
- // exists as a spare elsewhere in the system. Do the same for
- // level 2 ARC devices.
- if (error == 0 && !vd->vdev_isspare &&
- (reason == VDEV_LABEL_SPARE ||
- spa_spare_exists(vd->vdev_guid, NULL, NULL)))
- spa_spare_add(vd);
-
- if (error == 0 && !vd->vdev_isl2cache &&
- (reason == VDEV_LABEL_L2CACHE ||
- spa_l2cache_exists(vd->vdev_guid, NULL)))
- spa_l2cache_add(vd);
-
- return (error);
-}
-
-// ==========================================================================
-// uberblock load/sync
-// ==========================================================================
-
-// Consider the following situation: txg is safely synced to disk. We've
-// written the first uberblock for txg + 1, and then we lose power. When we
-// come back up, we fail to see the uberblock for txg + 1 because, say,
-// it was on a mirrored device and the replica to which we wrote txg + 1
-// is now offline. If we then make some changes and sync txg + 1, and then
-// the missing replica comes back, then for a few seconds we'll have two
-// conflicting uberblocks on disk with the same txg. The solution is simple:
-// among uberblocks with equal txg, choose the one with the latest timestamp.
-fn uberblock_compare(a: &Uberblock, b: &Uberblock) -> i64 {
- if a.txg < b.txg {
- return -1;
- }
- if a.txg > b.txg {
- return 1;
- }
-
- if a.timestamp < b.timestamp {
- return -1;
- }
- if a.timestamp > b.timestamp {
- return 1;
- }
-
- 0
-}
-
-struct ubl_cbdata {
- uberblock_t *ubl_ubbest; /* Best uberblock */
- vdev_t *ubl_vd; /* vdev associated with the above */
-};
-
-fn uberblock_load_done(zt *zio) {
- vdev_t *vd = zio->vd;
- spa_t *spa = zio->spa;
- zt *rio = zio->private;
- uberblock_t *ub = zio->data;
- struct ubl_cbdata *cbp = rio->private;
-
- //assert!(zio.size == VDEV_UBERBLOCK_SIZE(vd));
-
- if (zio->error == 0 && uberblock_verify(ub) == 0) {
- mutex_enter(&rio->lock);
- if (ub->ub_txg <= spa->spa_load_max_txg &&
- uberblock_compare(ub, cbp->ubl_ubbest) > 0) {
- // Keep track of the vdev in which this uberblock
- // was found. We will use this information later
- // to obtain the config nvlist associated with
- // this uberblock.
- *cbp->ubl_ubbest = *ub;
- cbp->ubl_vd = vd;
- }
- mutex_exit(&rio->lock);
- }
-
- zbuf_free(zio->data, zio->size);
-}
-
-fn uberblock_load_impl(zio: &Zio, vdev_t *vd, int flags, struct ubl_cbdata *cbp) {
- for c in 0..vd->vdev_children {
- uberblock_load_impl(zio, vd.vdev_child[c], flags, cbp);
- }
-
- if vd.ops.vdev_op_leaf && vdev_readable(vd) {
- for l in 0..VDEV_LABELS {
- for n in 0..VDEV_UBERBLOCK_COUNT(vd) {
- vdev_label_read(zio, vd, l, zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd)),
- VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd),
- vdev_uberblock_load_done, zio, flags);
- }
- }
- }
-}
-
-// Reads the 'best' uberblock from disk along with its associated
-// configuration. First, we read the uberblock array of each label of each
-// vdev, keeping track of the uberblock with the highest txg in each array.
-// Then, we read the configuration from the same vdev as the best uberblock.
-fn uberblock_load(vdev_t *rvd, ub: &Uberblock, nvlist_t **config) -> Option<Uberblock> {
- spa_t *spa = rvd->vdev_spa;
- struct ubl_cbdata cb;
- let flags = zio::FLAG_CONFIG_WRITER | zio::FLAG_CANFAIL |
- zio::FLAG_SPECULATIVE | zio::FLAG_TRYHARD;
-
- assert!(ub);
- assert!(config);
-
- bzero(ub, sizeof (uberblock_t));
- *config = NULL;
-
- cb.ubl_ubbest = ub;
- cb.ubl_vd = NULL;
-
- spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
- let zio = Zio::root(spa, None, &cb, flags);
- uberblock_load_impl(zio, rvd, flags, &cb);
- zio.wait();
-
- // It's possible that the best uberblock was discovered on a label
- // that has a configuration which was written in a future txg.
- // Search all labels on this vdev to find the configuration that
- // matches the txg for our uberblock.
- if (cb.ubl_vd != NULL)
- *config = label_read_config(cb.ubl_vd, ub->ub_txg);
- spa_config_exit(spa, SCL_ALL, FTAG);
-}
-
-// On success, increment root zio's count of good writes.
-// We only get credit for writes to known-visible vdevs; see spa_vdev_add().
-fn vdev_uberblock_sync_done(zio_t *zio) {
- uint64_t *good_writes = zio->io_private;
-
- if (zio->io_error == 0 && zio->io_vd->vdev_top->vdev_ms_array != 0)
- atomic_add_64(good_writes, 1);
-}
-
-// Write the uberblock to all labels of all leaves of the specified vdev.
-fn vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags) {
- uberblock_t *ubbuf;
- int c, l, n;
-
- for (c = 0; c < vd->vdev_children; c++) {
- vdev_uberblock_sync(zio, ub, vd->vdev_child[c], flags);
- }
-
- if !vd->vdev_ops->vdev_op_leaf {
- return;
- }
-
- if !vdev_writeable(vd) {
- return;
- }
-
- n = ub->ub_txg & (VDEV_UBERBLOCK_COUNT(vd) - 1);
-
- ubbuf = zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd));
- bzero(ubbuf, VDEV_UBERBLOCK_SIZE(vd));
- *ubbuf = *ub;
-
- for (l = 0; l < VDEV_LABELS; l++) {
- vdev_label_write(zio, vd, l, ubbuf,
- VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd),
- vdev_uberblock_sync_done, zio->io_private,
- flags | ZIO_FLAG_DONT_PROPAGATE);
- }
-
- zio_buf_free(ubbuf, VDEV_UBERBLOCK_SIZE(vd));
-}
-
-// Sync the uberblocks to all vdevs in svd[]
-fn vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags) -> zfs::Result<()> {
- spa_t *spa = svd[0]->vdev_spa;
- zio_t *zio;
- uint64_t good_writes = 0;
- int v;
-
- zio = zio_root(spa, NULL, &good_writes, flags);
-
- for (v = 0; v < svdcount; v++)
- vdev_uberblock_sync(zio, ub, svd[v], flags);
-
- (void) zio_wait(zio);
-
- // Flush the uberblocks to disk. This ensures that the odd labels
- // are no longer needed (because the new uberblocks and the even
- // labels are safely on disk), so it is safe to overwrite them.
- zio = zio_root(spa, NULL, NULL, flags);
-
- for (v = 0; v < svdcount; v++)
- zio_flush(zio, svd[v]);
-
- (void) zio_wait(zio);
-
- return (good_writes >= 1 ? 0 : EIO);
-}
-
-// On success, increment the count of good writes for our top-level vdev.
-fn vdev_label_sync_done(zio_t *zio) {
- uint64_t *good_writes = zio->io_private;
-
- if (zio->io_error == 0)
- atomic_add_64(good_writes, 1);
-}
-
-// If there weren't enough good writes, indicate failure to the parent.
-fn vdev_label_sync_top_done(zio_t *zio) {
- uint64_t *good_writes = zio->io_private;
-
- if (*good_writes == 0)
- zio->io_error = SET_ERROR(EIO);
-
- kmem_free(good_writes, sizeof (uint64_t));
-}
-
-// We ignore errors for log and cache devices, simply free the private data.
-fn vdev_label_sync_ignore_done(zio_t *zio) {
- kmem_free(zio->io_private, sizeof (uint64_t));
-}
-
-// Write all even or odd labels to all leaves of the specified vdev.
-fn vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg, int flags) {
- nvlist_t *label;
- vdev_phys_t *vp;
- char *buf;
- size_t buflen;
- int c;
-
- for (c = 0; c < vd->vdev_children; c++)
- vdev_label_sync(zio, vd->vdev_child[c], l, txg, flags);
-
- if (!vd->vdev_ops->vdev_op_leaf)
- return;
-
- if (!vdev_writeable(vd))
- return;
-
- // Generate a label describing the top-level config to which we belong.
- label = spa_config_generate(vd->vdev_spa, vd, txg, false);
-
- vp = zio_buf_alloc(sizeof (vdev_phys_t));
- bzero(vp, sizeof (vdev_phys_t));
-
- buf = vp->vp_nvlist;
- buflen = sizeof (vp->vp_nvlist);
-
- if (!nvlist_pack(label, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP)) {
- for (; l < VDEV_LABELS; l += 2) {
- vdev_label_write(zio, vd, l, vp,
- offsetof(vdev_label_t, vl_vdev_phys),
- sizeof (vdev_phys_t),
- vdev_label_sync_done, zio->io_private,
- flags | ZIO_FLAG_DONT_PROPAGATE);
- }
- }
-
- zio_buf_free(vp, sizeof (vdev_phys_t));
- nvlist_free(label);
-}
-
-fn vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags) -> zfs::Result<()> {
- list_t *dl = &spa->spa_config_dirty_list;
- vdev_t *vd;
- zio_t *zio;
- int error;
-
- // Write the new labels to disk.
- zio = zio_root(spa, NULL, NULL, flags);
-
- for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd)) {
- uint64_t *good_writes;
- zio_t *vio;
-
- assert!(!vd->vdev_ishole);
-
- good_writes = kmem_zalloc(sizeof (uint64_t), KM_SLEEP);
- vio = zio_null(zio, spa, NULL,
- (vd->vdev_islog || vd->vdev_aux != NULL) ?
- vdev_label_sync_ignore_done : vdev_label_sync_top_done,
- good_writes, flags);
- vdev_label_sync(vio, vd, l, txg, flags);
- vio.no_wait();
- }
-
- error = zio.wait();
-
- // Flush the new labels to disk.
- zio = zio.root(spa, None, None, flags);
-
- for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd)) {
- zio.flush(vd);
- }
-
- zio.wait();
-
- return (error);
-}
-
-// Sync the uberblock and any changes to the vdev configuration.
-//
-// The order of operations is carefully crafted to ensure that
-// if the system panics or loses power at any time, the state on disk
-// is still transactionally consistent. The in-line comments below
-// describe the failure semantics at each stage.
-//
-// Moreover, vdev_config_sync() is designed to be idempotent: if it fails
-// at any time, you can just call it again, and it will resume its work.
-fn config_sync(vdev_t **svd, int svdcount, uint64_t txg, boolean_t tryhard) -> zfs::Result<()> {
- spa_t *spa = svd[0]->vdev_spa;
- uberblock_t *ub = &spa->spa_uberblock;
- vdev_t *vd;
- int error;
- int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
-
- // Normally, we don't want to try too hard to write every label and
- // uberblock. If there is a flaky disk, we don't want the rest of the
- // sync process to block while we retry. But if we can't write a
- // single label out, we should retry with ZIO_FLAG_TRYHARD before
- // bailing out and declaring the pool faulted.
- if tryhard {
- flags |= ZIO_FLAG_TRYHARD;
- }
-
- assert!(ub->ub_txg <= txg);
-
- // If this isn't a resync due to I/O errors,
- // and nothing changed in this transaction group,
- // and the vdev configuration hasn't changed,
- // then there's nothing to do.
- if ub->ub_txg < txg &&
- uberblock_update(ub, spa->spa_root_vdev, txg) == false &&
- list_is_empty(&spa->spa_config_dirty_list) {
- return 0;
- }
-
- if txg > spa_freeze_txg(spa) {
- return 0;
- }
-
- assert!(txg <= spa->spa_final_txg);
-
- // Flush the write cache of every disk that's been written to
- // in this transaction group. This ensures that all blocks
- // written in this txg will be committed to stable storage
- // before any uberblock that references them.
- let zio = Zio::root(spa, None, None, flags);
-
- for (vd = txg_list_head(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)); vd;
- vd = txg_list_next(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg)))
- zio.flush(vd);
-
- zio.wait();
-
- // Sync out the even labels (L0, L2) for every dirty vdev. If the
- // system dies in the middle of this process, that's OK: all of the
- // even labels that made it to disk will be newer than any uberblock,
- // and will therefore be considered invalid. The odd labels (L1, L3),
- // which have not yet been touched, will still be valid. We flush
- // the new labels to disk to ensure that all even-label updates
- // are committed to stable storage before the uberblock update.
- if (error = vdev_label_sync_list(spa, 0, txg, flags)) != 0 {
- return error;
- }
-
- // Sync the uberblocks to all vdevs in svd[].
- // If the system dies in the middle of this step, there are two cases
- // to consider, and the on-disk state is consistent either way:
- //
- // (1) If none of the new uberblocks made it to disk, then the
- // previous uberblock will be the newest, and the odd labels
- // (which had not yet been touched) will be valid with respect
- // to that uberblock.
- //
- // (2) If one or more new uberblocks made it to disk, then they
- // will be the newest, and the even labels (which had all
- // been successfully committed) will be valid with respect
- // to the new uberblocks.
- if (error = vdev_uberblock_sync_list(svd, svdcount, ub, flags)) != 0 {
- return error;
- }
-
- // Sync out odd labels for every dirty vdev. If the system dies
- // in the middle of this process, the even labels and the new
- // uberblocks will suffice to open the pool. The next time
- // the pool is opened, the first thing we'll do -- before any
- // user data is modified -- is mark every vdev dirty so that
- // all labels will be brought up to date. We flush the new labels
- // to disk to ensure that all odd-label updates are committed to
- // stable storage before the next transaction group begins.
- vdev_label_sync_list(spa, 1, txg, flags)
-}
View
682 crates/zfs/vdev_queue.rs
@@ -1,682 +0,0 @@
-use super::zio;
-
-// ZFS IO Scheduler
-// ---------------
-//
-// ZFS issues IO operations to leaf vdevs to satisfy and complete zios. The
-// IO scheduler determines when and in what order those operations are
-// issued. The IO scheduler divides operations into five IO classes
-// prioritized in the following order: sync read, sync write, async read,
-// async write, and scrub/resilver. Each queue defines the minimum and
-// maximum number of concurrent operations that may be issued to the device.
-// In addition, the device has an aggregate maximum. Note that the sum of the
-// per-queue minimums must not exceed the aggregate maximum. If the
-// sum of the per-queue maximums exceeds the aggregate maximum, then the
-// number of active IOs may reach zfs_vdev_max_active, in which case no
-// further IOs will be issued regardless of whether all per-queue
-// minimums have been met.
-//
-// For many physical devices, throughput increases with the number of
-// concurrent operations, but latency typically suffers. Further, physical
-// devices typically have a limit at which more concurrent operations have no
-// effect on throughput or can actually cause it to decrease.
-//
-// The scheduler selects the next operation to issue by first looking for an
-// IO class whose minimum has not been satisfied. Once all are satisfied and
-// the aggregate maximum has not been hit, the scheduler looks for classes
-// whose maximum has not been satisfied. Iteration through the IO classes is
-// done in the order specified above. No further operations are issued if the
-// aggregate maximum number of concurrent operations has been hit or if there
-// are no operations queued for an IO class that has not hit its maximum.
-// Every time an IO is queued or an operation completes, the IO scheduler
-// looks for new operations to issue.
-//
-// All IO classes have a fixed maximum number of outstanding operations
-// except for the async write class. Asynchronous writes represent the data
-// that is committed to stable storage during the syncing stage for
-// transaction groups (see txg.c). Transaction groups enter the syncing state
-// periodically so the number of queued async writes will quickly burst up and
-// then bleed down to zero. Rather than servicing them as quickly as possible,
-// the IO scheduler changes the maximum number of active async write IOs
-// according to the amount of dirty data in the pool (see dsl_pool.c). Since
-// both throughput and latency typically increase with the number of
-// concurrent operations issued to physical devices, reducing the burstiness
-// in the number of concurrent operations also stabilizes the response time of
-// operations from other -- and in particular synchronous -- queues. In broad
-// strokes, the IO scheduler will issue more concurrent operations from the
-// async write queue as there's more dirty data in the pool.
-//
-// Async Writes
-//
-// The number of concurrent operations issued for the async write IO class
-// follows a piece-wise linear function defined by a few adjustable points.
-//
-// | o---------| <-- zfs_vdev_async_write_max_active
-// ^ | /^ |
-// | | / | |
-// active | / | |
-// IO | / | |
-// count | / | |
-// | / | |
-// |------------o | | <-- zfs_vdev_async_write_min_active
-// 0|____________^______|_________|
-// 0% | | 100% of zfs_dirty_data_max
-// | |
-// | `-- zfs_vdev_async_write_active_max_dirty_percent
-// `--------- zfs_vdev_async_write_active_min_dirty_percent
-//
-// Until the amount of dirty data exceeds a minimum percentage of the dirty
-// data allowed in the pool, the IO scheduler will limit the number of
-// concurrent operations to the minimum. As that threshold is crossed, the
-// number of concurrent operations issued increases linearly to the maximum at
-// the specified maximum percentage of the dirty data allowed in the pool.
-//
-// Ideally, the amount of dirty data on a busy pool will stay in the sloped
-// part of the function between zfs_vdev_async_write_active_min_dirty_percent
-// and zfs_vdev_async_write_active_max_dirty_percent. If it exceeds the
-// maximum percentage, this indicates that the rate of incoming data is
-// greater than the rate that the backend storage can handle. In this case, we
-// must further throttle incoming writes (see dmu_tx_delay() for details).
-
-// the sum of each queue's max_active. It must be at least the sum of each
-// queue's min_active.
-uint32_t zfs_vdev_max_active = 1000;
-
-// Per-queue limits on the number of IOs active to each device. If the
-// number of active IOs is < zfs_vdev_max_active, then the min_active comes
-// into play. We will send min_active from each queue, and then select from
-// queues in the order defined by zio_priority_t.
-//
-// In general, smaller max_active's will lead to lower latency of synchronous
-// operations. Larger max_active's may lead to higher overall throughput,
-// depending on underlying storage.
-//
-// The ratio of the queues' max_actives determines the balance of performance
-// between reads, writes, and scrubs. E.g., increasing
-// zfs_vdev_scrub_max_active will cause the scrub or resilver to complete
-// more quickly, but reads and writes to have higher latency and lower
-// throughput.
-uint32_t zfs_vdev_sync_read_min_active = 10;
-uint32_t zfs_vdev_sync_read_max_active = 10;
-uint32_t zfs_vdev_sync_write_min_active = 10;
-uint32_t zfs_vdev_sync_write_max_active = 10;
-uint32_t zfs_vdev_async_read_min_active = 1;
-uint32_t zfs_vdev_async_read_max_active = 3;
-uint32_t zfs_vdev_async_write_min_active = 1;
-uint32_t zfs_vdev_async_write_max_active = 10;
-uint32_t zfs_vdev_scrub_min_active = 1;
-uint32_t zfs_vdev_scrub_max_active = 2;
-
-// When the pool has less than zfs_vdev_async_write_active_min_dirty_percent
-// dirty data, use zfs_vdev_async_write_min_active. When it has more than
-// zfs_vdev_async_write_active_max_dirty_percent, use
-// zfs_vdev_async_write_max_active. The value is linearly interpolated
-// between min and max.
-int zfs_vdev_async_write_active_min_dirty_percent = 30;
-int zfs_vdev_async_write_active_max_dirty_percent = 60;
-
-// To reduce IOPs, we aggregate small adjacent IOs into one large IO.
-// For read IOs, we also aggregate across small adjacency gaps; for writes
-// we include spans of optional IOs to aid aggregation at the disk even when
-// they aren't able to help us aggregate at this level.
-int zfs_vdev_aggregation_limit = SPA_OLD_MAXBLOCKSIZE;
-int zfs_vdev_read_gap_limit = 32 << 10;
-int zfs_vdev_write_gap_limit = 4 << 10;
-
-fn vdev_queue_offset_compare(const void *x1, const void *x2) -> i32 {
- const zio_t *z1 = x1;
- const zio_t *z2 = x2;
-
- if z1.offset < z2.offset {
- return -1;
- }
- if z1.offset > z2.offset {
- return 1;
- }
-
- if z1 < z2 {
- return -1;
- }
- if z1 > z2 {
- return 1;
- }
-
- return 0;
-}
-
-static inline avl_tree_t *
-vdev_queue_class_tree(vdev_queue_t *vq, zio_priority_t p)
-{
- return (&vq->vq_class[p].vqc_queued_tree);
-}
-
-static inline avl_tree_t *
-vdev_queue_type_tree(vdev_queue_t *vq, zio_type_t t)
-{
- assert!(t == ZIO_TYPE_READ || t == ZIO_TYPE_WRITE);
- if t == ZIO_TYPE_READ {
- return &vq->vq_read_offset_tree;
- } else {
- return &vq->vq_write_offset_tree;
- }
-}
-
-int
-vdev_queue_timestamp_compare(const void *x1, const void *x2)
-{
- const zio_t *z1 = x1;
- const zio_t *z2 = x2;
-
- if (z1->io_timestamp < z2->io_timestamp)
- return (-1);
- if (z1->io_timestamp > z2->io_timestamp)
- return (1);
-
- if (z1 < z2)
- return (-1);
- if (z1 > z2)
- return (1);
-
- return (0);
-}
-
-static int
-vdev_queue_class_min_active(zio_priority_t p)
-{
- switch (p) {
- case ZIO_PRIORITY_SYNC_READ:
- return (zfs_vdev_sync_read_min_active);
- case ZIO_PRIORITY_SYNC_WRITE:
- return (zfs_vdev_sync_write_min_active);
- case ZIO_PRIORITY_ASYNC_READ:
- return (zfs_vdev_async_read_min_active);
- case ZIO_PRIORITY_ASYNC_WRITE:
- return (zfs_vdev_async_write_min_active);
- case ZIO_PRIORITY_SCRUB:
- return (zfs_vdev_scrub_min_active);
- default:
- panic("invalid priority %u", p);
- return (0);
- }
-}
-
-static int
-vdev_queue_max_async_writes(spa_t *spa)
-{
- int writes;
- uint64_t dirty = spa->spa_dsl_pool->dp_dirty_total;
- uint64_t min_bytes = zfs_dirty_data_max *
- zfs_vdev_async_write_active_min_dirty_percent / 100;
- uint64_t max_bytes = zfs_dirty_data_max *
- zfs_vdev_async_write_active_max_dirty_percent / 100;
-
- // Sync tasks correspond to interactive user actions. To reduce the
- // execution time of those actions we push data out as fast as possible.
- if (spa_has_pending_synctask(spa)) {
- return zfs_vdev_async_write_max_active;
- }
-
- if dirty < min_bytes {
- return zfs_vdev_async_write_min_active;
- }
- if dirty > max_bytes {
- return zfs_vdev_async_write_max_active;
- }
-
- // linear interpolation:
- // slope = (max_writes - min_writes) / (max_bytes - min_bytes)
- // move right by min_bytes
- // move up by min_writes
- writes = (dirty - min_bytes) *
- (zfs_vdev_async_write_max_active - zfs_vdev_async_write_min_active) /
- (max_bytes - min_bytes) + zfs_vdev_async_write_min_active;
- assert!(writes >= zfs_vdev_async_write_min_active);
- assert!(writes <= zfs_vdev_async_write_max_active);
- return (writes);
-}
-
-fn vdev_queue_class_max_active(spa_t *spa, p: zio::Priority) -> int {
- match p {
- zio::Priority::SyncRead => zfs_vdev_sync_read_max_active,
- zio::Priority::SyncWrite => zfs_vdev_sync_write_max_active,
- zio::Priority::AsyncRead => zfs_vdev_async_read_max_active,
- zio::Priority::AsyncWrite => vdev_queue_max_async_writes(spa),
- zio::Priority::Scrub => zfs_vdev_scrub_max_active,
- _ => {
- panic!("invalid priority {}", p);
- 0
- }
- }
-}
-
-// Return the IO class to issue from, or ZIO_PRIORITY_MAX_QUEUEABLE if
-// there is no eligible class.
-static zio_priority_t
-vdev_queue_class_to_issue(vdev_queue_t *vq)
-{
- spa_t *spa = vq->vq_vdev->vdev_spa;
- zio_priority_t p;
-
- if (avl_numnodes(&vq->vq_active_tree) >= zfs_vdev_max_active)
- return (ZIO_PRIORITY_NUM_QUEUEABLE);
-
- for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
- if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 &&
- vq->vq_class[p].vqc_active <
- vdev_queue_class_min_active(p))
- return (p);
- }
-
- // If we haven't found a queue, look for one that hasn't reached its
- // maximum # outstanding IOs.
- for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
- if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 &&
- vq->vq_class[p].vqc_active <
- vdev_queue_class_max_active(spa, p))
- return (p);
- }
-
- return (ZIO_PRIORITY_NUM_QUEUEABLE);
-}
-
-void
-vdev_queue_init(vdev_t *vd)
-{
- vdev_queue_t *vq = &vd->vdev_queue;
- zio_priority_t p;
-
- mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL);
- vq->vq_vdev = vd;
- taskq_init_ent(&vd->vdev_queue.vq_io_search.io_tqent);
-
- avl_create(&vq->vq_active_tree, vdev_queue_offset_compare,
- sizeof (zio_t), offsetof(struct zio, io_queue_node));
- avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_READ),
- vdev_queue_offset_compare, sizeof (zio_t),
- offsetof(struct zio, io_offset_node));
- avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE),
- vdev_queue_offset_compare, sizeof (zio_t),
- offsetof(struct zio, io_offset_node));
-
- for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
- int (*compfn) (const void *, const void *);
-
- // The synchronous IO queues are dispatched in FIFO rather
- // than LBA order. This provides more consistent latency for
- // these IOs.
- if (p == ZIO_PRIORITY_SYNC_READ || p == ZIO_PRIORITY_SYNC_WRITE)
- compfn = vdev_queue_timestamp_compare;
- else
- compfn = vdev_queue_offset_compare;
- avl_create(vdev_queue_class_tree(vq, p), compfn,
- sizeof (zio_t), offsetof(struct zio, io_queue_node));
- }
-}
-
-void
-vdev_queue_fini(vdev_t *vd)
-{
- vdev_queue_t *vq = &vd->vdev_queue;
- zio_priority_t p;
-
- for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++)
- avl_destroy(vdev_queue_class_tree(vq, p));
- avl_destroy(&vq->vq_active_tree);
- avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_READ));
- avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE));
-
- mutex_destroy(&vq->vq_lock);
-}
-
-static void
-vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
-{
- spa_t *spa = zio->io_spa;
- spa_stats_history_t *ssh = &spa->spa_stats.io_history;
-
- assert!(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
- avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio);
- avl_add(vdev_queue_type_tree(vq, zio->io_type), zio);
-
- if (ssh->kstat != NULL) {
- mutex_enter(&ssh->lock);
- kstat_waitq_enter(ssh->kstat->ks_data);
- mutex_exit(&ssh->lock);
- }
-}
-
-static void
-vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
-{
- spa_t *spa = zio->io_spa;
- spa_stats_history_t *ssh = &spa->spa_stats.io_history;
-
- assert!(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
- avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio);
- avl_remove(vdev_queue_type_tree(vq, zio->io_type), zio);
-
- if (ssh->kstat != NULL) {
- mutex_enter(&ssh->lock);
- kstat_waitq_exit(ssh->kstat->ks_data);
- mutex_exit(&ssh->lock);
- }
-}
-
-static void
-vdev_queue_pending_add(vdev_queue_t *vq, zio_t *zio)
-{
- spa_t *spa = zio->io_spa;
- spa_stats_history_t *ssh = &spa->spa_stats.io_history;
-
- ASSERT(MUTEX_HELD(&vq->vq_lock));
- assert!(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
- vq->vq_class[zio->io_priority].vqc_active++;
- avl_add(&vq->vq_active_tree, zio);
-
- if (ssh->kstat != NULL) {
- mutex_enter(&ssh->lock);
- kstat_runq_enter(ssh->kstat->ks_data);
- mutex_exit(&ssh->lock);
- }
-}
-
-static void
-vdev_queue_pending_remove(vdev_queue_t *vq, zio_t *zio)
-{
- spa_t *spa = zio->io_spa;
- spa_stats_history_t *ssh = &spa->spa_stats.io_history;
-
- ASSERT(MUTEX_HELD(&vq->vq_lock));
- assert!(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
- vq->vq_class[zio->io_priority].vqc_active--;
- avl_remove(&vq->vq_active_tree, zio);
-
- if (ssh->kstat != NULL) {
- kstat_io_t *ksio = ssh->kstat->ks_data;
-
- mutex_enter(&ssh->lock);
- kstat_runq_exit(ksio);
- if (zio->io_type == ZIO_TYPE_READ) {
- ksio->reads++;
- ksio->nread += zio->io_size;
- } else if (zio->io_type == ZIO_TYPE_WRITE) {
- ksio->writes++;
- ksio->nwritten += zio->io_size;
- }
- mutex_exit(&ssh->lock);
- }
-}
-
-fn vdev_queue_agg_io_done(aio: &mut Zio) {
- if (aio.zio_type == ZIO_TYPE_READ) {
- zio_t *pio;
- while (pio = zio_walk_parents(aio)) != NULL {
- bcopy(aio.data + (pio.offset - aio.offset), pio.data, pio.size);
- }
- }
-
- zio_buf_free(aio.data, aio.size);
-}
-
-// Compute the range spanned by two IOs, which is the endpoint of the last
-// (lio->io_offset + lio->io_size) minus start of the first (fio->io_offset).
-// Conveniently, the gap between fio and lio is given by -IO_SPAN(lio, fio);
-// thus fio and lio are adjacent if and only if IO_SPAN(lio, fio) == 0.
-#define IO_SPAN(fio, lio) ((lio)->io_offset + (lio)->io_size - (fio)->io_offset)
-#define IO_GAP(fio, lio) (-IO_SPAN(lio, fio))
-
-static zio_t *
-vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
-{
- zio_t *first, *last, *aio, *dio, *mandatory, *nio;
- uint64_t maxgap = 0;
- uint64_t size;
- boolean_t stretch = B_FALSE;
- avl_tree_t *t = vdev_queue_type_tree(vq, zio->io_type);
- enum zio_flag flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;
-
- if (zio->io_flags & ZIO_FLAG_DONT_AGGREGATE)
- return (NULL);
-
- // Prevent users from setting the zfs_vdev_aggregation_limit
- // tuning larger than SPA_MAXBLOCKSIZE.
- zfs_vdev_aggregation_limit =
- MIN(zfs_vdev_aggregation_limit, SPA_MAXBLOCKSIZE);
-
- first = last = zio;
-
- if (zio->io_type == ZIO_TYPE_READ)
- maxgap = zfs_vdev_read_gap_limit;
-
- // We can aggregate IOs that are sufficiently adjacent and of
- // the same flavor, as expressed by the AGG_INHERIT flags.
- // The latter requirement is necessary so that certain
- // attributes of the IO, such as whether it's a normal IO
- // or a scrub/resilver, can be preserved in the aggregate.
- // We can include optional IOs, but don't allow them
- // to begin a range as they add no benefit in that situation.
-
- // We keep track of the last non-optional IO.
- mandatory = (first->io_flags & ZIO_FLAG_OPTIONAL) ? NULL : first;
-
- // Walk backwards through sufficiently contiguous IOs
- // recording the last non-option IO.
- while ((dio = AVL_PREV(t, first)) != NULL &&
- (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
- IO_SPAN(dio, last) <= zfs_vdev_aggregation_limit &&
- IO_GAP(dio, first) <= maxgap) {
- first = dio;
- if (mandatory == NULL && !(first->io_flags & ZIO_FLAG_OPTIONAL))
- mandatory = first;
- }
-
- // Skip any initial optional IOs.
- while ((first->io_flags & ZIO_FLAG_OPTIONAL) && first != last) {
- first = AVL_NEXT(t, first);
- ASSERT(first != NULL);
- }
-
-
- // Walk forward through sufficiently contiguous IOs.
- while ((dio = AVL_NEXT(t, last)) != NULL &&
- (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
- IO_SPAN(first, dio) <= zfs_vdev_aggregation_limit &&
- IO_GAP(last, dio) <= maxgap) {
- last = dio;
- if (!(last->io_flags & ZIO_FLAG_OPTIONAL))
- mandatory = last;
- }
-
- // Now that we've established the range of the IO aggregation
- // we must decide what to do with trailing optional IOs.
- // For reads, there's nothing to do. While we are unable to
- // aggregate further, it's possible that a trailing optional
- // IO would allow the underlying device to aggregate with
- // subsequent IOs. We must therefore determine if the next
- // non-optional IO is close enough to make aggregation
- // worthwhile.
- if (zio->io_type == ZIO_TYPE_WRITE && mandatory != NULL) {
- zio_t *nio = last;
- while ((dio = AVL_NEXT(t, nio)) != NULL &&
- IO_GAP(nio, dio) == 0 &&
- IO_GAP(mandatory, dio) <= zfs_vdev_write_gap_limit) {
- nio = dio;
- if (!(nio->io_flags & ZIO_FLAG_OPTIONAL)) {
- stretch = B_TRUE;
- break;
- }
- }
- }
-
- if (stretch) {
- // This may be a no-op.
- dio = AVL_NEXT(t, last);
- dio->io_flags &= ~ZIO_FLAG_OPTIONAL;
- } else {
- while (last != mandatory && last != first) {
- ASSERT(last->io_flags & ZIO_FLAG_OPTIONAL);
- last = AVL_PREV(t, last);
- ASSERT(last != NULL);
- }
- }
-
- if (first == last)
- return (NULL);
-
- size = IO_SPAN(first, last);
- assert!(size, <=, zfs_vdev_aggregation_limit);
-
- aio = zio_vdev_delegated_io(first->io_vd, first->io_offset,
- zio_buf_alloc(size), size, first->io_type, zio->io_priority,
- flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE,
- vdev_queue_agg_io_done, NULL);
- aio->io_timestamp = first->io_timestamp;
-
- nio = first;
- do {
- dio = nio;
- nio = AVL_NEXT(t, dio);
- assert!(dio->io_type, ==, aio->io_type);
-
- if (dio->io_flags & ZIO_FLAG_NODATA) {
- assert!(dio->io_type, ==, ZIO_TYPE_WRITE);
- bzero((char *)aio->io_data + (dio->io_offset -
- aio->io_offset), dio->io_size);
- } else if (dio->io_type == ZIO_TYPE_WRITE) {
- bcopy(dio->io_data, (char *)aio->io_data +
- (dio->io_offset - aio->io_offset),
- dio->io_size);
- }
-
- zio_add_child(dio, aio);
- vdev_queue_io_remove(vq, dio);
- zio_vdev_io_bypass(dio);
- zio_execute(dio);
- } while (dio != last);
-
- return (aio);
-}
-
-fn vdev_queue_io_to_issue(vdev_queue_t *vq) -> Option<Zio> {
- zio_t *zio, *aio;
- zio_priority_t p;
- avl_index_t idx;
- avl_tree_t *tree;
-
-again:
- ASSERT(MUTEX_HELD(&vq->vq_lock));
-
- p = vdev_queue_class_to_issue(vq);
-
- if (p == ZIO_PRIORITY_NUM_QUEUEABLE) {
- // No eligible queued IOs
- return (NULL);
- }
-
- // For LBA-ordered queues (async / scrub), issue the IO which follows
- // the most recently issued IO in LBA (offset) order.
- //
- // For FIFO queues (sync), issue the IO with the lowest timestamp.
- tree = vdev_queue_class_tree(vq, p);
- vq->vq_io_search.io_timestamp = 0;
- vq->vq_io_search.io_offset = vq->vq_last_offset + 1;
- //VERIFY(avl_find(tree, &vq->vq_io_search, &idx) == NULL);
- zio = avl_nearest(tree, idx, AVL_AFTER);
- if (zio == NULL)
- zio = avl_first(tree);
- assert!(zio->io_priority == p);
-
- aio = vdev_queue_aggregate(vq, zio);
- if (aio != NULL)
- zio = aio;
- else
- vdev_queue_io_remove(vq, zio);
-
- // If the IO is or was optional and therefore has no data, we need to
- // simply discard it. We need to drop the vdev queue's lock to avoid a
- // deadlock that we could encounter since this IO will complete
- // immediately.
- if (zio->io_flags & ZIO_FLAG_NODATA) {
- mutex_exit(&vq->vq_lock);
- zio_vdev_io_bypass(zio);
- zio_execute(zio);
- mutex_enter(&vq->vq_lock);
- goto again;
- }
-
- vdev_queue_pending_add(vq, zio);
- vq->vq_last_offset = zio->io_offset;
-
- return (zio);
-}
-
-pub fn vdev_queue_io(zio_t *zio) -> Option<Zio> {
- vdev_queue_t *vq = &zio.vd.vdev_queue;
-
- if zio->io_flags & ZIO_FLAG_DONT_QUEUE != 0 {
- return zio;
- }
-
- // Children IOs inherent their parent's priority, which might
- // not match the child's IO type. Fix it up here.
- if zio.zio_type == ZIO_TYPE_READ {
- if zio->io_priority != ZIO_PRIORITY_SYNC_READ &&
- zio->io_priority != ZIO_PRIORITY_ASYNC_READ &&
- zio->io_priority != ZIO_PRIORITY_SCRUB
- {
- zio->io_priority = ZIO_PRIORITY_ASYNC_READ;
- }
- } else {
- assert!(zio.zio_type == ZIO_TYPE_WRITE);
- if (zio.priority != ZIO_PRIORITY_SYNC_WRITE &&
- zio.priority != ZIO_PRIORITY_ASYNC_WRITE)
- zio.priority = ZIO_PRIORITY_ASYNC_WRITE;
- }
-
- zio->io_flags |= ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE;
-
- mutex_enter(&vq->vq_lock);
- zio.timestamp = gethrtime();
- vdev_queue_io_add(vq, zio);
- let nio = vdev_queue_io_to_issue(vq);
- mutex_exit(&vq->vq_lock);
-
- if let Some(nio) = nio {
- if nio.done == vdev_queue_agg_io_done {
- nio.no_wait();
- return None;
- }
- }
-
- nio
-}
-
-fn vdev_queue_io_done(zio_t *zio) {
- vdev_queue_t *vq = &zio->io_vd->vdev_queue;
- zio_t *nio;
-
- if zio_injection_enabled {
- delay(SEC_TO_TICK(zio_handle_io_delay(zio)));
- }
-
- mutex_enter(&vq->vq_lock);
-
- vdev_queue_pending_remove(vq, zio);
-
- zio.delta = gethrtime() - zio.timestamp;
- vq.io_complete_ts = gethrtime();
- vq.io_delta_ts = vq.io_complete_ts - zio.timestamp;
-
- while (nio = vdev_queue_io_to_issue(vq)) != NULL {
- mutex_exit(&vq->vq_lock);
- if (nio.done == vdev_queue_agg_io_done) {
- nio.no_wait();
- } else {
- zio_vdev_io_reissue(nio);
- nio.execute();
- }
- mutex_enter(&vq.lock);
- }
-
- mutex_exit(&vq->vq_lock);
-}
View
145 crates/zfs/xdr/mem_ops.rs
@@ -1,145 +0,0 @@
-use std::{mem, ptr};
-
-use super::{XdrOps, XdrError, XdrResult};
-
-pub struct MemOps<'a> {
- pos: usize,
- buffer: &'a mut [u8],
-}
-
-impl<'a> MemOps<'a> {
- pub fn new(buffer: &'a mut [u8]) -> Self {
- MemOps {
- pos: 0,
- buffer: buffer,
- }
- }
-}
-
-// Xdr encodes things in big endian and values are aligned at 4 bytes. For example, a u8 would take
-// up 4 bytes when serialized.
-impl<'a> XdrOps for MemOps<'a> {
- fn get_i64(&mut self) -> XdrResult<i64> {
- if self.pos >= self.buffer.len() {
- Err(XdrError)
- } else if self.buffer.len() - self.pos < 8 {
- Err(XdrError)
- } else {
- let d: &i64 = unsafe { mem::transmute(&self.buffer[self.pos]) };
- // let val_d = i64::from_be(*d);
- self.pos += 8;
- Ok(i64::from_be(*d))
- }
- }
-
- fn put_i64(&mut self, l: i64) -> XdrResult<()> {
- if self.pos >= self.buffer.len() || self.buffer.len() - self.pos < 8 {
- // Buffer is too small
- return Err(XdrError);
- }
-
- let d: &mut i64 = unsafe { mem::transmute(&mut self.buffer[self.pos]) };
- *d = l.to_be();
- self.pos += 8;
- Ok(())
- }
-
- fn get_i32(&mut self) -> XdrResult<i32> {
- if self.pos >= self.buffer.len() {
- Err(XdrError)
- } else if self.buffer.len() - self.pos < 4 {
- Err(XdrError)
- } else {
- let d: &i32 = unsafe { mem::transmute(&self.buffer[self.pos]) };
- self.pos += 4;
- Ok(i32::from_be(*d))
- }
- }
-
- fn put_i32(&mut self, i: i32) -> XdrResult<()> {
- if self.pos >= self.buffer.len() || self.buffer.len() - self.pos < 4 {
- // Buffer is too small
- return Err(XdrError);
- }
-
- let d: &mut i32 = unsafe { mem::transmute(&mut self.buffer[self.pos]) };
- *d = i.to_be();
- self.pos += 4;
- Ok(())
- }
-
- fn get_bytes(&mut self, bytes: &mut [u8]) -> XdrResult<()> {
- if bytes.is_empty() {
- return Ok(());
- }
- if self.pos >= self.buffer.len() {
- Err(XdrError)
- } else if self.buffer.len() - self.pos < bytes.len() {
- Err(XdrError)
- } else {
- // Technically the upper bound on this slice doesn't have to be there
- let src = self.buffer[self.pos..self.pos + bytes.len()].as_ptr();
- let dst = bytes.as_mut_ptr();
- unsafe {
- ptr::copy(src, dst, bytes.len());
- }
- self.pos += bytes.len();
-
- Ok(())
- }
- }
-
- fn put_bytes(&mut self, bytes: &[u8]) -> XdrResult<()> {
- if self.pos >= self.buffer.len() || self.buffer.len() - self.pos < bytes.len() {
- // Buffer is too small
- return Err(XdrError);
- }
-
- let src = bytes.as_ptr();
- // Technically the upper bound on this slice doesn't have to be there
- let dst = self.buffer[self.pos..self.pos + bytes.len()].as_mut_ptr();
- unsafe {
- ptr::copy(src, dst, bytes.len());
- }
- self.pos += bytes.len();
-
- Ok(())
- }
-
- fn get_pos(&self) -> usize {
- self.pos
- }
-
- fn set_pos(&mut self, new_pos: usize) -> XdrResult<()> {
- self.pos = new_pos;
- Ok(())
- }
-}
-
-#[test]
-fn test_mem_ops_i64() {
- let mem_ops = MemOps::new(&mut [1, 1, 0, 0]);
- assert!(mem_ops.get_i32() == 257);
-}
-
-#[test]
-fn test_mem_ops_i64_and_back() {
- let mut mem_ops = MemOps::new(&mut [0; 8]);
- mem_ops.put_i64(424242);
- mem_ops.set_pos(0);
- assert!(mem_ops.get_i64() == 424242);
-}
-
-#[test]
-fn test_mem_ops_i32() {
- let mem_ops = MemOps::new(&mut [1, 1, 0, 0]);
- assert!(mem_ops.get_i32() == 257);
-}
-
-#[test]
-fn test_mem_ops_i32_and_back() {
- let mut mem_ops = MemOps::new(&mut [0; 4]);
- mem_ops.put_i32(424242);
- mem_ops.set_pos(0);
- assert!(mem_ops.get_i32() == 424242);
-}
View
5 crates/zfs/xdr/mod.rs
@@ -1,5 +0,0 @@
-pub use self::xdr::*;
-pub use self::mem_ops::MemOps;
-
-pub mod xdr;
-pub mod mem_ops;
View
219 crates/zfs/xdr/xdr.rs
@@ -1,219 +0,0 @@
-// use std::*;
-
-#[derive(Debug)]
-pub struct XdrError;
-
-pub type XdrResult<T> = Result<T, XdrError>;
-
-pub enum XdrOp {
- Encode,
- Decode,
- Free,
-}
-
-// TODO: Return `XdrResult` instead
-pub trait XdrOps {
- /// Get a i64 from underlying stream
- fn get_i64(&mut self) -> XdrResult<i64>;
-
- /// Put a i64 to underlying stream
- fn put_i64(&mut self, l: i64) -> XdrResult<()>;
-
- /// Get a i32 from underlying stream
- fn get_i32(&mut self) -> XdrResult<i32>;
-
- /// Put a i32 to underlying stream
- fn put_i32(&mut self, i: i32) -> XdrResult<()>;
-
- /// Get some bytes from the underlying stream
- fn get_bytes(&mut self, bytes: &mut [u8]) -> XdrResult<()>;
-
- /// Put some bytes into the underlying stream
- fn put_bytes(&mut self, bytes: &[u8]) -> XdrResult<()>;
-
- /// Returns bytes off from beginning
- fn get_pos(&self) -> usize;
-
- /// Lets you reposition the stream
- fn set_pos(&mut self, offset: usize) -> XdrResult<()>;
-
-// TODO: Not sure if we'll need this?
-// Buf quick ptr to buffered data
-// fn inline(&mut self, len: usize) -> *mut i32;
-
-// TODO: Not sure if we'll need this?
-// Change, retrieve client info
-// fn control(&mut self, req: isize, op: void *);
-}
-
-pub trait Xdr {
- fn encode_bool(&mut self, i: bool) -> XdrResult<()>;
- fn decode_bool(&mut self) -> XdrResult<bool>;
-
- fn encode_i8(&mut self, i: i8) -> XdrResult<()>;
- fn decode_i8(&mut self) -> XdrResult<i8>;
-
- fn encode_u8(&mut self, u: u8) -> XdrResult<()>;
- fn decode_u8(&mut self) -> XdrResult<u8>;
-
- fn encode_i16(&mut self, i: i16) -> XdrResult<()>;
- fn decode_i16(&mut self) -> XdrResult<i16>;
-
- fn encode_u16(&mut self, u: u16) -> XdrResult<()>;
- fn decode_u16(&mut self) -> XdrResult<u16>;
-
- fn encode_i32(&mut self, i: i32) -> XdrResult<()>;
- fn decode_i32(&mut self) -> XdrResult<i32>;
-
- fn encode_u32(&mut self, u: u32) -> XdrResult<()>;
- fn decode_u32(&mut self) -> XdrResult<u32>;
-
- fn encode_i64(&mut self, i: i64) -> XdrResult<()>;
- fn decode_i64(&mut self) -> XdrResult<i64>;
-
- fn encode_u64(&mut self, u: u64) -> XdrResult<()>;
- fn decode_u64(&mut self) -> XdrResult<u64>;
-
- fn encode_opaque(&mut self, bytes: &[u8]) -> XdrResult<()>;
- fn decode_opaque(&mut self, bytes: &mut [u8]) -> XdrResult<()>;
-
- fn encode_bytes(&mut self, bytes: &[u8]) -> XdrResult<()>;
- fn decode_bytes(&mut self) -> XdrResult<Vec<u8>>;
-
- fn encode_string(&mut self, string: &String) -> XdrResult<()>;
- fn decode_string(&mut self) -> XdrResult<String>;
-}
-
-impl<T: XdrOps> Xdr for T {
- fn encode_bool(&mut self, b: bool) -> XdrResult<()> {
- let i = match b {
- false => 0,
- true => 1,
- };
- self.put_i32(i)
- }
-
- fn decode_bool(&mut self) -> XdrResult<bool> {
- let i = try!(self.get_i32());
- match i {
- 0 => Ok(false),
- 1 => Ok(true),
- _ => Err(XdrError),
- }
- }
-
- fn encode_i8(&mut self, i: i8) -> XdrResult<()> {
- self.put_i32(i as i32)
- }
-
- fn decode_i8(&mut self) -> XdrResult<i8> {
- self.get_i32().map(|x| x as i8)
- }
-
- fn encode_u8(&mut self, u: u8) -> XdrResult<()> {
- self.put_i32(u as i32)
- }
-
- fn decode_u8(&mut self) -> XdrResult<u8> {
- self.get_i32().map(|x| x as u8)
- }
-
- fn encode_i16(&mut self, i: i16) -> XdrResult<()> {
- self.put_i32(i as i32)
- }
-
- fn decode_i16(&mut self) -> XdrResult<i16> {
- self.get_i32().map(|x| x as i16)
- }
-
- fn encode_u16(&mut self, u: u16) -> XdrResult<()> {
- self.put_i32(u as i32)
- }
-
- fn decode_u16(&mut self) -> XdrResult<u16> {
- self.get_i32().map(|x| x as u16)
- }
-
- fn encode_i32(&mut self, i: i32) -> XdrResult<()> {
- self.put_i32(i)
- }
-
- fn decode_i32(&mut self) -> XdrResult<i32> {
- self.get_i32()
- }
-
- fn encode_u32(&mut self, u: u32) -> XdrResult<()> {
- self.put_i32(u as i32)
- }
-
- fn decode_u32(&mut self) -> XdrResult<u32> {
- self.get_i32().map(|x| x as u32)
- }
-
- fn encode_i64(&mut self, i: i64) -> XdrResult<()> {
- self.put_i64(i)
- }
-
- fn decode_i64(&mut self) -> XdrResult<i64> {
- self.get_i64()
- }
-
- fn encode_u64(&mut self, u: u64) -> XdrResult<()> {
- self.put_i64(u as i64)
- }
-
- fn decode_u64(&mut self) -> XdrResult<u64> {
- self.get_i64().map(|x| x as u64)
- }
-
- fn encode_opaque(&mut self, bytes: &[u8]) -> XdrResult<()> {
- // XDR byte strings always have len%4 == 0
- let crud: [u8; 4] = [0; 4];
- let mut round_up = bytes.len() % 4;
- if round_up > 0 {
- round_up = 4 - round_up;
- }
- try!(self.put_bytes(bytes));
- try!(self.put_bytes(&crud[0..round_up]));
- Ok(())
- }
-
- fn decode_opaque(&mut self, bytes: &mut [u8]) -> XdrResult<()> {
- // XDR byte strings always have len%4 == 0
- let mut crud: [u8; 4] = [0; 4];
- let mut round_up = bytes.len() % 4;
- if round_up > 0 {
- round_up = 4 - round_up;
- }
- try!(self.get_bytes(bytes));
- try!(self.get_bytes(&mut crud[0..round_up]));
- Ok(())
- }
-
- fn encode_bytes(&mut self, bytes: &[u8]) -> XdrResult<()> {
- try!(self.encode_u32(bytes.len() as u32));
- self.encode_opaque(bytes)
- }
-
- fn decode_bytes(&mut self) -> XdrResult<Vec<u8>> {
- let count = try!(self.decode_u32());
- let mut bytes = vec![0; count as usize];
- try!(self.decode_opaque(&mut bytes[..]));
- Ok(bytes)
- }
-
- fn encode_string(&mut self, string: &String) -> XdrResult<()> {
- try!(self.encode_u32(string.as_bytes().len() as u32));
- self.encode_opaque(string.as_bytes())
- }
-
- fn decode_string(&mut self) -> XdrResult<String> {
- let count = try!(self.decode_u32());
- if count > 1024 {
- return Err(XdrError);
- }
- let mut bytes = vec![0; count as usize];
- try!(self.decode_opaque(&mut bytes[..]));
- String::from_utf8(bytes).map_err(|_| XdrError)
- }
-}
View
190 crates/zfs/zap.rs
@@ -1,190 +0,0 @@
-use std::{fmt, mem, ptr, str};
-use std::io::Seek;
-
-use super::from_bytes::FromBytes;
-
-const MZAP_ENT_LEN: usize = 64;
-const MZAP_NAME_LEN: usize = MZAP_ENT_LEN - 8 - 4 - 2;
-
-#[repr(u64)]
-#[derive(Copy, Clone, Debug)]
-pub enum ZapObjectType {
- Micro = (1 << 63) + 3,
- Header = (1 << 63) + 1,
- Leaf = 1 << 63,
-}
-
-/// Microzap
-#[repr(packed)]
-pub struct MZapPhys {
- pub block_type: ZapObjectType, // ZapObjectType::Micro
- pub salt: u64,
- pub norm_flags: u64,
- pad: [u64; 5],
-}
-
-pub struct MZapWrapper {
- pub phys: MZapPhys,
- pub chunks: Vec<MZapEntPhys>, // variable size depending on block size
-}
-
-impl FromBytes for MZapWrapper {
- fn from_bytes(data: &[u8]) -> Result<Self, String> {
- if data.len() >= mem::size_of::<MZapPhys>() {
- // Read the first part of the mzap -- its base phys struct
- let mzap_phys = unsafe { ptr::read(data.as_ptr() as *const MZapPhys) };
- // Read the mzap entries, aka chunks
- let mut mzap_entries = Vec::new();
- let num_entries = (data.len() - mem::size_of::<MZapPhys>()) /
- mem::size_of::<MZapEntPhys>();
- for i in 0..num_entries {
- let entry_pos = mem::size_of::<MZapPhys>() + i * mem::size_of::<MZapEntPhys>();
- let mzap_ent = unsafe {
- ptr::read(data[entry_pos..].as_ptr() as *const MZapEntPhys)
- };
- mzap_entries.push(mzap_ent);
- }
- Ok(MZapWrapper {
- phys: mzap_phys,
- chunks: mzap_entries,
- })
- } else {
- Err("Error: needs a proper error message".to_string())
- }
- }
-}
-
-impl fmt::Debug for MZapWrapper {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- try!(write!(f,
- "MZapPhys {{\nblock_type: {:?},\nsalt: {:X},\nnorm_flags: {:X},\nchunk: [\n",
- self.phys.block_type,
- self.phys.salt,
- self.phys.norm_flags));
- for chunk in &self.chunks {
- try!(write!(f, "{:?}\n", chunk));
- }
- try!(write!(f, "] }}\n"));
- Ok(())
- }
-}
-
-#[repr(packed)]
-pub struct MZapEntPhys {
- pub value: u64,
- pub cd: u32,
- pub pad: u16,
- pub name: [u8; MZAP_NAME_LEN],
-}
-
-impl MZapEntPhys {
- pub fn name(&self) -> Option<&str> {
- let mut len = 0;
- for c in &self.name[..] {
- if *c == 0 {
- break;
- }
- len += 1;
- }
-
- str::from_utf8(&self.name[..len]).ok()
- }
-}
-
-impl fmt::Debug for MZapEntPhys {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- try!(write!(f,
- "MZapEntPhys {{\nvalue: {:X},\ncd: {:X},\nname: ",
- self.value,
- self.cd));
- for i in 0..MZAP_NAME_LEN {
- if self.name[i] == 0 {
- break;
- }
- try!(write!(f, "{}", self.name[i] as char));
- }
- try!(write!(f, "\n}}\n"));
- Ok(())
- }
-}
-
-/// Fatzap
-#[repr(packed)]
-pub struct ZapPhys {
- pub block_type: ZapObjectType, // ZapObjectType::Header
- pub magic: u64,
- pub ptr_table: ZapTablePhys,
- pub free_block: u64,
- pub num_leafs: u64,
- pub num_entries: u64,
- pub salt: u64,
- pub pad: [u64; 8181],
- pub leafs: [u64; 8192],
-}
-
-#[repr(packed)]
-pub struct ZapTablePhys {
- pub block: u64,
- pub num_blocks: u64,
- pub shift: u64,
- pub next_block: u64,
- pub block_copied: u64,
-}
-
-const ZAP_LEAF_MAGIC: u32 = 0x2AB1EAF;
-const ZAP_LEAF_CHUNKSIZE: usize = 24;
-
-// The amount of space within the chunk available for the array is:
-// chunk size - space for type (1) - space for next pointer (2)
-const ZAP_LEAF_ARRAY_BYTES: usize = ZAP_LEAF_CHUNKSIZE - 3;
-
-// pub struct ZapLeafPhys {
-// pub header: ZapLeafHeader,
-// hash: [u16; ZAP_LEAF_HASH_NUMENTRIES],
-// union zap_leaf_chunk {
-// entry,
-// array,
-// free,
-// } chunks[ZapLeafChunk; ZAP_LEAF_NUMCHUNKS],
-// }
-
-#[repr(packed)]
-pub struct ZapLeafHeader {
- pub block_type: ZapObjectType, // ZapObjectType::Leaf
- pub next: u64,
- pub prefix: u64,
- pub magic: u32,
- pub n_free: u16,
- pub n_entries: u16,
- pub prefix_len: u16,
- pub free_list: u16,
- pad2: [u8; 12],
-}
-
-#[repr(packed)]
-struct ZapLeafEntry {
- leaf_type: u8,
- int_size: u8,
- next: u16,
- name_chunk: u16,
- name_length: u16,
- value_chunk: u16,
- value_length: u16,
- cd: u16,
- pad: [u8; 2],
- hash: u64,
-}
-
-#[repr(packed)]
-struct ZapLeafArray {
- leaf_type: u8,
- array: [u8; ZAP_LEAF_ARRAY_BYTES],
- next: u16,
-}
-
-#[repr(packed)]
-struct ZapLeafFree {
- free_type: u8,
- pad: [u8; ZAP_LEAF_ARRAY_BYTES],
- next: u16,
-}
View
38 crates/zfs/zfs.rs
@@ -1,38 +0,0 @@
-use std::result;
-
-/// The error type used throughout ZFS
-#[derive(Copy, Clone, Debug, PartialEq)]
-pub enum Error {
- NoEntity,
- Invalid,
-}
-
-/// The Result type used throughout ZFS
-pub type Result<T> = result::Result<T, Error>;
-
-/// The following states are written to disk as part of the normal
-/// SPA lifecycle: Active, Exported, Destroyed, Spare, L2Cache. The remaining
-/// states are software abstractions used at various levels to communicate
-/// pool state.
-#[derive(Copy, Clone, PartialEq)]
-pub enum PoolState {
- Active = 0, // In active use
- Exported, // Explicitly exported
- Destroyed, // Explicitly destroyed
- Spare, // Reserved for hot spare use
- L2Cache, // Level 2 ARC device
- Uninitialized, // Internal spa_t state
- Unavailable, // Internal libzfs state
- PotentiallyActive, // Internal libzfs state
-}
-
-/// Internal SPA load state. Used by FMA diagnosis engine.
-#[derive(Copy, Clone, PartialEq)]
-pub enum SpaLoadState {
- None, // no load in progress
- Open, // normal open
- Import, // import in progress
- TryImport, // tryimport in progress
- Recover, // recovery requested
- Error, // load failed
-}
View
8 crates/zfs/zil_header.rs
@@ -1,8 +0,0 @@
-use super::block_ptr::BlockPtr;
-
-#[repr(packed)]
-pub struct ZilHeader {
- claim_txg: u64,
- replay_seq: u64,
- log: BlockPtr,
-}
View
950 crates/zfs/zio.rs
@@ -1,950 +0,0 @@
-use std::{mem, ptr};
-use std::fs::File;
-use std::io::{Read, Seek, SeekFrom, Write};
-
-use super::avl;
-use super::block_ptr::BlockPtr;
-use super::dvaddr::DVAddr;
-use super::from_bytes::FromBytes;
-use super::lzjb;
-use super::uberblock::Uberblock;
-use super::zfs;
-
-pub struct Reader {
- pub disk: File,
-}
-
-impl Reader {
- // TODO: Error handling
- pub fn read(&mut self, start: usize, length: usize) -> Vec<u8> {
- let mut ret: Vec<u8> = vec![0; length*512];
-
- self.disk.seek(SeekFrom::Start(start as u64 * 512));
- self.disk.read(&mut ret);
-
- return ret;
- }
-
- pub fn write(&mut self, block: usize, data: &[u8; 512]) {
- self.disk.seek(SeekFrom::Start(block as u64 * 512));
- self.disk.write(data);
- }
-
- pub fn read_dva(&mut self, dva: &DVAddr) -> Vec<u8> {
- self.read(dva.sector() as usize, dva.asize() as usize)
- }
-
- pub fn read_block(&mut self, block_ptr: &BlockPtr) -> Result<Vec<u8>, String> {
- let data = self.read_dva(&block_ptr.dvas[0]);
- match block_ptr.compression() {
- 2 => {
- // compression off
- Ok(data)
- }
- 1 | 3 => {
- // lzjb compression
- let mut decompressed = vec![0; (block_ptr.lsize()*512) as usize];
- lzjb::decompress(&data, &mut decompressed);
- Ok(decompressed)
- }
- _ => Err("Error: not enough bytes".to_string()),
- }
- }
-
- pub fn read_type<T: FromBytes>(&mut self, block_ptr: &BlockPtr) -> Result<T, String> {
- let data = self.read_block(block_ptr);
- data.and_then(|data| T::from_bytes(&data[..]))
- }
-
- pub fn read_type_array<T: FromBytes>(&mut self,
- block_ptr: &BlockPtr,
- offset: usize)
- -> Result<T, String> {
- let data = self.read_block(block_ptr);
- data.and_then(|data| T::from_bytes(&data[offset * mem::size_of::<T>()..]))
- }
-
- pub fn uber(&mut self) -> Result<Uberblock, String> {
- let mut newest_uberblock: Option<Uberblock> = None;
- for i in 0..128 {
- if let Ok(uberblock) = Uberblock::from_bytes(&self.read(256 + i * 2, 2)) {
- let newest = match newest_uberblock {
- Some(previous) => {
- if uberblock.txg > previous.txg {
- // Found a newer uberblock
- true
- } else {
- false
- }
- }
- // No uberblock yet, so first one we find is the newest
- None => true,
- };
-
- if newest {
- newest_uberblock = Some(uberblock);
- }
- }
- }
-
- match newest_uberblock {
- Some(uberblock) => Ok(uberblock),
- None => Err("Failed to find valid uberblock".to_string()),
- }
- }
-}
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-
-// pub struct Zio {
-// Core information about this IO
-// bookmark: ZBookmarkPhys,
-// prop: ZioProp,
-// zio_type: Type,
-// child_type: Child,
-// int io_cmd,
-// priority: Priority,
-// reexecute: u8,
-// state: [u8; NUM_WAIT_TYPES],
-// txg: u64,
-// spa_t *io_spa,
-// blkptr_t *io_bp,
-// blkptr_t *io_bp_override,
-// bp_copy: BlockPtr,
-// list_t io_parent_list,
-// list_t io_child_list,
-// zio_link_t *io_walk_link,
-// zio_t *logical,
-// zio_transform_t *io_transform_stack,
-//
-// Callback info
-// ready: DoneFunc,
-// physdone: DoneFunc,
-// done: DoneFunc,
-// private: *void,
-// prev_space_delta: i64, // DMU private
-// bp_orig: BlockPtr,
-//
-// Data represented by this IO
-// void *data,
-// void *orig_data,
-// size: u64,
-// orig_size: u64,
-//
-// Stuff for the vdev stack
-// vdev_t *vd,
-// void *io_vsd,
-// const zio_vsd_ops_t *io_vsd_ops,
-//
-// offset: u64,
-// timestamp: hrtime_t, // submitted at
-// delta: hrtime_t, // vdev queue service delta
-// delay: u64, // vdev disk service delta (ticks)
-// queue_node: avl::NodeId,
-// offset_node: avl::NodeId,
-//
-// Internal pipeline state
-// flags: Flag,
-// stage: State,
-// pipeline: State,
-// orig_flags: ZioFlag,
-// orig_stage: State,
-// orig_pipeline: State,
-// error: zfs::Error,
-// child_error: [zfs::Error; NUM_CHILD_TYPES],
-// children: [[u64; NUM_WAIT_TYPES]; NUM_CHILD_TYPES],
-// child_count: u64,
-// phys_children: u64,
-// parent_count: u64,
-// uint64_t *stall,
-// zio_t *gang_leader,
-// zio_gang_node_t *gang_tree,
-// void *executor,
-// void *waiter,
-// kmutex_t lock,
-// kcondvar_t cv,*/
-//
-// FMA state
-// zio_cksum_report_t *io_cksum_report,
-// uint64_t io_ena,
-//
-// Taskq dispatching state
-// tqent: TaskqEnt,
-// }
-//
-// impl Zio {
-// pub fn root(spa: Option<&Spa>, zio_done_func_t *done, void *private, flags: Flag) -> Self {
-// Self::null(None, spa, None, done, private, flags)
-// }
-//
-// pub fn read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
-// void *data, uint64_t size, zio_done_func_t *done, void *private,
-// zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb) -> Self {
-// zfs_blkptr_verify(spa, bp);
-//
-// let pipeline =
-// if flags & ZIO_FLAG_DDT_CHILD {
-// ZIO_DDT_CHILD_READ_PIPELINE
-// } else { ZIO_READ_PIPELINE };
-//
-// Self::create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp,
-// data, size, done, private,
-// Type::Read, priority, flags, None, 0, zb,
-// State::Open, pipeline)
-// }
-//
-// fn null(pio: Option<&Zio>, spa: Option<&Spa>, vd: Option<&vdev::Vdev>, zio_done_func_t *done,
-// void *private, flags: Flag) -> Self {
-// Self::create(pio, spa, 0, None, None, 0, done, private,
-// Type::Null, Priority::Now, flags, vd, 0, None,
-// State::Open, ZIO_INTERLOCK_PIPELINE)
-// }
-//
-// fn create(zio_t *pio, spa_t *spa, txg: u64, bp: Option<&BlockPtr>,
-// void *data, size: u64, zio_done_func_t *done, void *private,
-// zio_type: Type, priority: Priority, flags: Flag,
-// vd: Option<&vdev::Vdev>, offset: u64, zb: Option<&ZBookmarkPhys>,
-// stage: State, pipeline: State)-> Self {
-// assert!(size <= SPA_MAXBLOCKSIZE);
-// assert!(util::p2_phase(size, SPA_MINBLOCKSIZE) == 0);
-// assert!(util::p2_phase(offset, SPA_MINBLOCKSIZE) == 0);
-//
-// assert!(!vd || spa_config_held(spa, SCL_STATE_ALL, RW_READER));
-// assert!(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER));
-// assert!(vd || stage == ZIO_STAGE_OPEN);
-//
-// zio = kmem_cache_alloc(zcache, KM_SLEEP);
-// bzero(zio, sizeof (zt));
-//
-// mutex_init(&zio->lock, NULL, MUTEX_DEFAULT, NULL);
-// cv_init(&zio->cv, NULL, CV_DEFAULT, NULL);
-//
-// list_create(&zio->parent_list, sizeof (zlink_t),
-// offsetof(zlink_t, zl_parent_node));
-// list_create(&zio->child_list, sizeof (zlink_t),
-// offsetof(zlink_t, zl_child_node));
-//
-// let child_type =
-// if vd.is_some() {
-// Child::Vdev
-// } else if flags & ZIO_FLAG_GANG_CHILD {
-// Child::Gang
-// } else if flags & ZIO_FLAG_DDT_CHILD {
-// Child::Ddt
-// } else {
-// Child::Logical
-// };
-//
-// if let Some(bp) = bp {
-// zio.bp = (blkptr_t *)bp;
-// zio.bp_copy = *bp;
-// zio.bp_orig = *bp;
-// if zio_type != Type::Write || child_type == Child::Ddt {
-// zio.bp = &zio.bp_copy; // so caller can free
-// }
-// if child_type == Child::Logical {
-// zio.logical = zio;
-// }
-// if child_type > Child::Gang && BP_IS_GANG(bp) {
-// pipeline |= ZIO_GANG_STAGES;
-// }
-// }
-//
-// if zb != NULL {
-// zio.bookmark = *zb;
-// }
-//
-// if let Some(pio) = pio {
-// if zio.logical == NULL {
-// zio.logical = pio.logical;
-// }
-// if zio.child_type == Child::Gang {
-// zio.gang_leader = pio.gang_leader;
-// }
-// Self::add_child(pio, zio);
-// }
-//
-// taskq::taskq_init_ent(&zio->tqent);
-//
-// Zio {
-// child_type: child_type,
-// spa: spa,
-// txg: txg,
-// done: done,
-// private: private,
-// zio_type: zio_type,
-// priority: priority,
-// vd: vd,
-// offset: offset,
-//
-// data: data,
-// orig_data: data,
-// size: size,
-// orig_size: size,
-//
-// flags: flags,
-// orig_flags: flags,
-// stage: stage,
-// orig_stage: stage,
-// pipeline: pipeline,
-// orig_pipeline: pipeline,
-//
-// state: [stage >= State::Ready,
-// state >= State::Done],
-// }
-// }
-//
-// fn read_phys(zio_t *pio, vdev_t *vd, offset: u64, size: u64,
-// void *data, int checksum, zio_done_func_t *done, void *private,
-// priority: Priority, zio_flag flags, labels: bool) -> Zio {
-// assert!(vd->vdev_children == 0);
-// assert!(!labels || offset + size <= VDEV_LABEL_START_SIZE ||
-// offset >= vd.vdev_psize - VDEV_LABEL_END_SIZE);
-// assert!(offset + size <= vd.vdev_psize);
-//
-// let mut zio = Self::create(pio, vd.vdev_spa, 0, NULL, data, size, done, private,
-// Type::Read, priority, flags | ZIO_FLAG_PHYSICAL, vd, offset,
-// NULL, State::Open, ZIO_READ_PHYS_PIPELINE);
-//
-// zio.prop.checksum = checksum;
-//
-// zio
-// }
-//
-// ==========================================================================
-// Parent/Child relationships
-// ==========================================================================
-//
-// fn add_child(parent: &mut Zio, child: &mut Zio) {
-// zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP);
-// int w;
-//
-// Logical I/Os can have logical, gang, or vdev children.
-// Gang I/Os can have gang or vdev children.
-// Vdev I/Os can only have vdev children.
-// The following assert captures all of these constraints.
-// assert!(cio->io_child_type <= pio->io_child_type);
-//
-// zl.parent = parent;
-// zl.child = child;
-//
-// mutex_enter(&child.lock);
-// mutex_enter(&parent.lock);
-//
-// assert!(parent.state[WaitType::Done] == 0);
-//
-// for w in 0..NUM_WAIT_TYPES {
-// parent.children[child.child_type][w] += !child.state[w];
-// }
-//
-// list_insert_head(&pio->io_child_list, zl);
-// list_insert_head(&cio->io_parent_list, zl);
-//
-// parent.child_count += 1;
-// child.parent_count += 1;
-//
-// mutex_exit(&pio->io_lock);
-// mutex_exit(&cio->io_lock);
-// }
-//
-// ==========================================================================
-// Execute the IO pipeline
-// ==========================================================================
-//
-// fn taskq_dispatch(&mut self, mut tq_type: TaskqType, cut_in_line: bool) {
-// let spa = self.spa;
-// let flags = if cut_in_line { TQ_FRONT } else { 0 };
-//
-// let zio_type =
-// if self.flags & (FLAG_CONFIG_WRITER | FLAG_PROBE) != 0 {
-// If we're a config writer or a probe, the normal issue and
-// interrupt threads may all be blocked waiting for the config lock.
-// In this case, select the otherwise-unused taskq for ZIO_TYPE_NULL.
-// Type::Null
-// } else if self.zio_type == Type::Write && self.vd.is_some() && self.vd.vdev_aux {
-// A similar issue exists for the L2ARC write thread until L2ARC 2.0.
-// Type::Null
-// } else {
-// self.zio_type
-// };
-//
-// If this is a high priority IO, then use the high priority taskq if
-// available.
-// if self.priority == Priority::Now && spa->spa_zio_taskq[t][tq_type + 1].stqs_count != 0 {
-// tq_type += 1;
-// }
-//
-// assert!(tq_type < NUM_TASKQ_TYPES);
-//
-// NB: We are assuming that the zio can only be dispatched
-// to a single taskq at a time. It would be a grievous error
-// to dispatch the zio to another taskq at the same time.
-// assert!(taskq_empty_ent(&zio.tqent));
-// spa.taskq_dispatch_ent(zio_type, tq_type, Box::new(|| { self.execute() }), flags, &self.tqent);
-// }
-//
-// fn taskq_member(&self, TaskqType q) -> bool {
-// let spa = self.spa;
-//
-// for t in 0..NUM_ZIO_TYPES {
-// let tqs = &spa.zio_taskq[t][q];
-// for i in 0..tqs.count {
-// if tqs.taskq[i].member(self.executor) {
-// return true;
-// }
-// }
-// }
-//
-// false
-// }
-//
-// fn issue_async(&self) -> PipelineFlow {
-// self.taskq_dispatch(TaskqType::Issue, false);
-//
-// PipelineFlow::Stop
-// }
-//
-// fn interrupt(&self) {
-// self.taskq_dispatch(TaskqType::Interrupt, false);
-// }
-//
-// Execute the I/O pipeline until one of the following occurs:
-// (1) the I/O completes; (2) the pipeline stalls waiting for
-// dependent child I/Os; (3) the I/O issues, so we're waiting
-// for an I/O completion interrupt; (4) the I/O is delegated by
-// vdev-level caching or aggregation; (5) the I/O is deferred
-// due to vdev-level queueing; (6) the I/O is handed off to
-// another thread. In all cases, the pipeline stops whenever
-// there's no CPU work; it never burns a thread in cv_wait_io().
-//
-// There's no locking on io_stage because there's no legitimate way
-// for multiple threads to be attempting to process the same I/O.
-// fn execute(&mut self) {
-// self.executor = curthread;
-//
-// while self.stage < State::Done {
-// let mut stage = self.stage;
-//
-// assert!(!MUTEX_HELD(&self.io_lock));
-// assert!(ISP2(stage));
-// assert!(self.stall == NULL);
-// while stage & self.pipeline == 0 {
-// stage <<= 1;
-// }
-//
-// assert!(stage <= State::Done);
-//
-// let cut =
-// match stage {
-// State::VdevIoStart => REQUEUE_IO_START_CUT_IN_LINE,
-// _ => false,
-// };
-//
-// If we are in interrupt context and this pipeline stage
-// will grab a config lock that is held across IO,
-// or may wait for an IO that needs an interrupt thread
-// to complete, issue async to avoid deadlock.
-//
-// For VDEV_IO_START, we cut in line so that the io will
-// be sent to disk promptly.
-// if stage & BLOCKING_STAGES != 0 && self.vd.is_none() && self.taskq_member(TaskqType::Interrupt) {
-// self.taskq_dispatch(TaskqType::Issue, cut);
-// return;
-// }
-//
-// If we executing in the context of the tx_sync_thread,
-// or we are performing pool initialization outside of a
-// zio_taskq[ZIO_TASKQ_ISSUE|ZIO_TASKQ_ISSUE_HIGH] context.
-// Then issue the zio asynchronously to minimize stack usage
-// for these deep call paths.
-// let dp = self.spa.get_dsl_pool();
-// if (dp && curthread == dp.tx.tx_sync_thread) ||
-// (dp && dp.spa.is_initializing() && !self.taskq_member(TaskqType::Issue) &&
-// !self.taskq_member(TaskqType::IssueHigh)) {
-// self.taskq_dispatch(TaskqType::Issue, cut);
-// return;
-// }*/
-//
-// self.stage = stage;
-// let rv = pipeline_stages[highbit64(stage) - 1](self);
-//
-// if rv == PipelineFlow::Stop {
-// return;
-// }
-//
-// assert!(rv == PipelineFlow::Continue);
-// }
-// }
-//
-// pub fn wait(&self) -> zfs::Result<()> {
-// assert!(self.stage == State::Open);
-// assert!(self.executor == NULL);
-//
-// self.waiter = curthread;
-//
-// self.execute();
-//
-// mutex_enter(&self.lock);
-// while self.executor != NULL {
-// cv_wait_io(&self.cv, &self.lock);
-// }
-// mutex_exit(&self.lock);
-//
-// let error = self.error;
-// self.destroy();
-//
-// Ok(())
-// }
-//
-// fn no_wait(&mut self) {
-// assert!(self.executor == NULL);
-//
-// if self.child_type == Child::Logical && self.unique_parent() == NULL {
-// This is a logical async I/O with no parent to wait for it.
-// We add it to the spa_async_root_zio "Godfather" I/O which
-// will ensure they complete prior to unloading the pool.
-// kpreempt_disable();
-// let pio = self.spa.async_zio_root[CPU_SEQID];
-// kpreempt_enable();
-//
-// Self::add_child(pio, self);
-// }
-//
-// self.execute();
-// }
-//
-// /////////////////////////////////////////////////////////////////////////////////////////////
-// Pipeline stages
-// /////////////////////////////////////////////////////////////////////////////////////////////
-//
-// fn read_bp_init(zio_t *zio) -> PipelineFlow {
-// blkptr_t *bp = zio.bp;
-//
-// if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF &&
-// zio.child_type == Child::Logical &&
-// !(zio->io_flags & ZIO_FLAG_RAW)) {
-// uint64_t psize = BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp);
-// void *cbuf = zio_buf_alloc(psize);
-//
-// zio_push_transform(zio, cbuf, psize, psize, zio_decompress);
-// }
-//
-// if BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA {
-// zio.pipeline = ZIO_INTERLOCK_PIPELINE;
-// decode_embedded_bp_compressed(bp, zio->io_data);
-// } else {
-// ASSERT(!BP_IS_EMBEDDED(bp));
-// }
-//
-// if !DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0 {
-// zio.flags |= ZIO_FLAG_DONT_CACHE;
-// }
-//
-// if BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP {
-// zio.flags |= ZIO_FLAG_DONT_CACHE;
-// }
-//
-// if BP_GET_DEDUP(bp) && zio.child_type == Child::Logical {
-// zio.pipeline = ZIO_DDT_READ_PIPELINE;
-// }
-//
-// return PipelineFlow::Continue;
-// }
-//
-// Issue an I/O to the underlying vdev. Typically the issue pipeline
-// stops after this stage and will resume upon I/O completion.
-// However, there are instances where the vdev layer may need to
-// continue the pipeline when an I/O was not issued. Since the I/O
-// that was sent to the vdev layer might be different than the one
-// currently active in the pipeline (see vdev_queue_io()), we explicitly
-// force the underlying vdev layers to call either zio_execute() or
-// zio_interrupt() to ensure that the pipeline continues with the correct I/O.
-// fn vdev_io_start(zio_t *zio) -> PipelineFlow {
-// vdev_t *vd = zio.vd;
-// spa_t *spa = zio.spa;
-//
-// assert!(zio.error == 0);
-// assert!(zio.child_error[Child::Vdev] == 0);
-//
-// if vd == NULL {
-// if zio.flags & ZIO_FLAG_CONFIG_WRITER == 0 {
-// spa_config_enter(spa, SCL_ZIO, zio, RW_READER);
-// }
-//
-// The mirror_ops handle multiple DVAs in a single BP.
-// vdev_mirror_ops.vdev_op_start(zio);
-// return PipelineFlow::Stop;
-// }
-//
-// We keep track of time-sensitive I/Os so that the scan thread
-// can quickly react to certain workloads. In particular, we care
-// about non-scrubbing, top-level reads and writes with the following
-// characteristics:
-// - synchronous writes of user data to non-slog devices
-// - any reads of user data
-// When these conditions are met, adjust the timestamp of spa_last_io
-// which allows the scan thread to adjust its workload accordingly.
-// if zio.flags & ZIO_FLAG_SCAN_THREAD == 0 && zio.bp != NULL && vd == vd.top_vdev &&
-// !vd.is_log && zio.bookmark.objset != DMU_META_OBJSET && zio.txg != spa.syncing_txg() {
-// let old = spa.spa_last_io;
-// let new = ddi_get_lbolt64();
-// if old != new {
-// atomic_cas_64(&spa.spa_last_io, old, new);
-// }
-// }
-//
-// let align = 1 << vd.top_vdev.ashift;
-//
-// if zio.flags & ZIO_FLAG_PHYSICAL == 0 && util::p2_phase(zio.size, align) != 0 {
-// Transform logical writes to be a full physical block size.
-// let asize = util::p2_round_up(zio.size, align);
-// char *abuf = zio_buf_alloc(asize);
-// assert!(vd == vd.vdev_top);
-// if (zio.zio_type == Type::Write) {
-// bcopy(zio.data, abuf, zio.size);
-// bzero(abuf + zio.size, asize - zio.size);
-// }
-// zio_push_transform(zio, abuf, asize, asize, zsubblock);
-// }
-//
-// If this is not a physical io, make sure that it is properly aligned
-// before proceeding.
-// if zio.flags & ZIO_FLAG_PHYSICAL == 0 {
-// assert!(util::p2_phase(zio.offset, align) == 0);
-// assert!(util::p2_phase(zio.size, align) == 0);
-// } else {
-// For physical writes, we allow 512b aligned writes and assume
-// the device will perform a read-modify-write as necessary.
-// assert!(util::p2_phase(zio.offset, SPA_MINBLOCKSIZE) == 0);
-// assert!(util::p2_phase(zio.size, SPA_MINBLOCKSIZE) == 0);
-// }
-//
-// VERIFY(zio.zio_type != Type::Write || spa_writeable(spa));
-//
-// If this is a repair I/O, and there's no self-healing involved --
-// that is, we're just resilvering what we expect to resilver --
-// then don't do the I/O unless zio's txg is actually in vd's DTL.
-// This prevents spurious resilvering with nested replication.
-// For example, given a mirror of mirrors, (A+B)+(C+D), if only
-// A is out of date, we'll read from C+D, then use the data to
-// resilver A+B -- but we don't actually want to resilver B, just A.
-// The top-level mirror has no way to know this, so instead we just
-// discard unnecessary repairs as we work our way down the vdev tree.
-// The same logic applies to any form of nested replication:
-// ditto + mirror, RAID-Z + replacing, etc. This covers them all.
-// if (zio.flags & ZIO_FLAG_IO_REPAIR != 0 &&
-// zio.flags & ZIO_FLAG_SELF_HEAL == 0 &&
-// zio.txg != 0 && /* not a delegated i/o */
-// !vdev_dtl_contains(vd, DTL_PARTIAL, zio.txg, 1)) {
-// assert!(zio.zio_type == Type::Write);
-// zio_vdev_bypass(zio);
-// return PipelineFlow::Continue;
-// }
-//
-// if vd.ops.is_leaf() && (zio.zio_type == Type::Read || zio.zio_type == Type::Write) {
-// if zio.zio_type == Type::Read && vdev_cache_read(zio) {
-// return PipelineFlow::Continue;
-// }
-//
-// if (zio = vdev_queue_io(zio)) == NULL {
-// return PipelineFlow::Stop;
-// }
-//
-// if !vdev_accessible(vd, zio) {
-// zio.error = SET_ERROR(ENXIO);
-// zio.interrupt();
-// return PipelineFlow::Stop;
-// }
-// }
-//
-// (vd.ops.io_start)(zio);
-// PipelineFlow::Stop
-// }
-//
-// fn vdev_io_done(zio: &mut Zio) -> PipelineFlow {
-// vdev_t *vd = zio.vd;
-// vdev_ops_t *ops = vd ? vd->vdev_ops : &vdev_mirror_ops;
-// let mut unexpected_error = false;
-//
-// if zio.wait_for_children(Child::Vdev, WaitType::Done) {
-// return PipelineFlow::Stop;
-// }
-//
-// assert!(zio.zio_type == Type::Read || zio.zio_type == Type::Write);
-//
-// if vd != NULL && vd.ops.is_leaf() {
-// vdev_queue_io_done(zio);
-//
-// if zio.zio_type == Type::Write {
-// vdev_cache_write(zio);
-// }
-//
-// if zio_injection_enabled && zio.error == 0 {
-// zio.error = zio_handle_device_injection(vd, zio, EIO);
-// }
-//
-// if zio_injection_enabled && zio.error == 0 {
-// zio.error = zio_handle_label_injection(zio, EIO);
-// }*/
-//
-// if zio.error {
-// if !vdev_accessible(vd, zio) {
-// zio.error = SET_ERROR(ENXIO);
-// } else {
-// unexpected_error = true;
-// }
-// }
-// }
-//
-// (ops.io_done)(zio);
-//
-// if unexpected_error {
-// VERIFY(vdev_probe(vd, zio) == NULL);
-// }
-//
-// PipelineFlow::Continue
-// }
-// }
-
-/// /////////////////////////////////////////////////////////////////////////////////////////////////
-
-// A bookmark is a four-tuple <objset, object, level, blkid> that uniquely
-// identifies any block in the pool. By convention, the meta-objset (MOS)
-// is objset 0, and the meta-dnode is object 0. This covers all blocks
-// except root blocks and ZIL blocks, which are defined as follows:
-//
-// Root blocks (objset_phys_t) are object 0, level -1: <objset, 0, -1, 0>.
-// ZIL blocks are bookmarked <objset, 0, -2, blkid == ZIL sequence number>.
-// dmu_sync()ed ZIL data blocks are bookmarked <objset, object, -2, blkid>.
-//
-// Note: this structure is called a bookmark because its original purpose
-// was to remember where to resume a pool-wide traverse.
-//
-// Note: this structure is passed between userland and the kernel, and is
-// stored on disk (by virtue of being incorporated into other on-disk
-// structures, e.g. dsl_scan_phys_t).
-//
-struct ZbookmarkPhys {
- objset: u64,
- object: u64,
- level: i64,
- blkid: u64,
-}
-
-const REQUEUE_IO_START_CUT_IN_LINE: bool = true;
-pub const NUM_CHILD_TYPES: usize = 4;
-pub const NUM_WAIT_TYPES: usize = 2;
-pub const NUM_TYPES: usize = 6;
-pub const NUM_TASKQ_TYPES: usize = 4;
-
-// Default Linux timeout for a sd device.
-// const ZIO_DELAY_MAX = (30 * MILLISEC);
-
-// const ZIO_FAILURE_MODE_WAIT = 0;
-// const ZIO_FAILURE_MODE_CONTINUE = 1;
-// const ZIO_FAILURE_MODE_PANIC = 2;
-
-// pub enum TaskqType {
-// Issue = 0,
-// IssueHigh,
-// Interrupt,
-// InterruptHigh,
-// }
-//
-// #[derive(Copy, Clone, PartialEq)]
-// enum Priority {
-// SyncRead,
-// SyncWrite, // ZIL
-// AsyncRead, // prefetch
-// AsyncWrite, // spa_sync()
-// Scrub, // asynchronous scrub/resilver reads
-// NumQueueable,
-//
-// Now // non-queued io (e.g. free)
-// }
-//
-// #[derive(Copy, Clone, PartialEq)]
-// pub enum Type {
-// Null = 0,
-// Read,
-// Write,
-// Free,
-// Claim,
-// IoCtl,
-// }
-//
-// const FLAG_AGG_INHERIT: u64 = Flag::CanFail - 1;
-// const FLAG_DDT_INHERIT: u64 = Flag::IoRetry - 1;
-// const FLAG_GANG_INHERIT: u64 = Flag::IoRetry - 1;
-// const FLAG_VDEV_INHERIT: u64 = Flag::DontQueue - 1;
-//
-// const NUM_PIPE_STAGES: usize = 22;
-//
-// type PipeStageFn = fn(&mut Zio) -> zfs::Result<()>;
-// static pipeline_stages: [Option<PipeStageFn>; NUM_PIPE_STAGES] =
-// [None,
-// Some(Zio::read_bp_init),
-// None,//Some(Zio::free_bp_init),
-// Some(Zio::issue_async),
-// None,//Some(Zio::write_bp_init),
-// None,//Some(Zio::checksum_generate),
-// None,//Some(Zio::nop_write),
-// None,//Some(Zio::ddt_read_start),
-// None,//Some(Zio::ddt_read_done),
-// None,//Some(Zio::ddt_write),
-// None,//Some(Zio::ddt_free),
-// None,//Some(Zio::gang_assemble),
-// None,//Some(Zio::gang_issue),
-// None,//Some(Zio::dva_allocate),
-// None,//Some(Zio::dva_free),
-// None,//Some(Zio::dva_claim),
-// Some(Zio::ready),
-// Some(Zio::vdev_io_start),
-// Some(Zio::vdev_io_done),
-// Some(Zio::vdev_io_assess),
-// Some(Zio::checksum_verify),
-// Some(Zio::done)];
-//
-// #[derive(Copy, Clone, PartialEq)]
-// enum PipelineFlow {
-// Continue = 0x100,
-// Stop = 0x101,
-// }
-//
-// #[derive(Copy, Clone, PartialEq)]
-// enum Flag {
-// Flags inherited by gang, ddt, and vdev children,
-// and that must be equal for two zios to aggregate
-// DontAggregate = 1 << 0,
-// IoRepair = 1 << 1,
-// SelfHeal = 1 << 2,
-// Resilver = 1 << 3,
-// Scrub = 1 << 4,
-// ScanThread = 1 << 5,
-// Physical = 1 << 6,
-//
-// Flags inherited by ddt, gang, and vdev children.
-// CanFail = 1 << 7, // must be first for INHERIT
-// Speculative = 1 << 8,
-// ConfigWriter = 1 << 9,
-// DontRetry = 1 << 10,
-// DontCache = 1 << 11,
-// NoData = 1 << 12,
-// InduceDamage = 1 << 13,
-//
-// Flags inherited by vdev children.
-// IoRetry = 1 << 14, /* must be first for INHERIT */
-// Probe = 1 << 15,
-// TryHard = 1 << 16,
-// Optional = 1 << 17,
-//
-// Flags not inherited by any children.
-// DontQueue = 1 << 18, /* must be first for INHERIT */
-// DontPropagate = 1 << 19,
-// IoBypass = 1 << 20,
-// IoRewrite = 1 << 21,
-// Raw = 1 << 22,
-// GangChild = 1 << 23,
-// DdtChild = 1 << 24,
-// GodFather = 1 << 25,
-// NopWrite = 1 << 26,
-// ReExecuted = 1 << 27,
-// Delegated = 1 << 28,
-// FastWrite = 1 << 29,
-// };
-//
-// #[derive(Copy, Clone, PartialEq)]
-// enum Child {
-// Vdev = 0,
-// Gang,
-// Ddt,
-// Logical,
-// };
-//
-// #[repr(u8)]
-// enum WaitType {
-// Ready = 0,
-// Done,
-// };
-//
-// zio pipeline stage definitions
-// enum Stage {
-// Open = 1 << 0, // RWFCI
-//
-// ReadBpInit = 1 << 1, // R----
-// FreeBpInit = 1 << 2, // --F--
-// IssueAsync = 1 << 3, // RWF--
-// WriteBpInit = 1 << 4, // -W---
-//
-// ChecksumGenerate = 1 << 5, // -W---
-//
-// NopWrite = 1 << 6, // -W---
-//
-// DdtReadStart = 1 << 7, // R----
-// DdtReadDone = 1 << 8, // R----
-// DdtWrite = 1 << 9, // -W---
-// DdtFree = 1 << 10, // --F--
-//
-// GangAssemble = 1 << 11, // RWFC-
-// GangIssue = 1 << 12, // RWFC-
-//
-// DvaAllocate = 1 << 13, // -W---
-// DvaFree = 1 << 14, // --F--
-// DvaClaim = 1 << 15, // ---C-
-//
-// Ready = 1 << 16, // RWFCI
-//
-// VdevIoStart = 1 << 17, // RW--I
-// VdevIoDone = 1 << 18, // RW--I
-// VdevIoAssess = 1 << 19, // RW--I
-//
-// ChecksumVerify = 1 << 20, // R----
-//
-// Done = 1 << 21, // RWFCI
-// };
-//
-// const INTERLOCK_STAGES = STAGE_READY | STAGE_DONE;
-//
-// const INTERLOCK_PIPELINE = INTERLOCK_STAGES
-//
-// const VDEV_IO_STAGES = STAGE_VDEV_IO_START |
-// STAGE_VDEV_IO_DONE | STAGE_VDEV_IO_ASSESS;
-//
-// const VDEV_CHILD_PIPELINE = VDEV_IO_STAGES | STAGE_DONE;
-//
-// const READ_COMMON_STAGES = INTERLOCK_STAGES | VDEV_IO_STAGES | STAGE_CHECKSUM_VERIFY
-//
-// const READ_PHYS_PIPELINE = READ_COMMON_STAGES
-//
-// const READ_PIPELINE = READ_COMMON_STAGES | STAGE_READ_BP_INIT
-//
-// const DDT_CHILD_READ_PIPELINE = READ_COMMON_STAGES;
-//
-// const DDT_READ_PIPELINE = INTERLOCK_STAGES | STAGE_READ_BP_INIT | STAGE_DDT_READ_START | STAGE_DDT_READ_DONE;
-//
-// const WRITE_COMMON_STAGES = INTERLOCK_STAGES | VDEV_IO_STAGES | STAGE_ISSUE_ASYNC | STAGE_CHECKSUM_GENERATE;
-//
-// const WRITE_PHYS_PIPELINE = WRITE_COMMON_STAGES;
-//
-// const REWRITE_PIPELINE = WRITE_COMMON_STAGES | STAGE_WRITE_BP_INIT;
-//
-// const WRITE_PIPELINE = WRITE_COMMON_STAGES | STAGE_WRITE_BP_INIT | STAGE_DVA_ALLOCATE;
-//
-// const DDT_CHILD_WRITE_PIPELINE = INTERLOCK_STAGES | VDEV_IO_STAGES | STAGE_DVA_ALLOCATE;
-//
-// const DDT_WRITE_PIPELINE = INTERLOCK_STAGES | STAGE_ISSUE_ASYNC |
-// STAGE_WRITE_BP_INIT | STAGE_CHECKSUM_GENERATE |
-// STAGE_DDT_WRITE;
-//
-// const GANG_STAGES = STAGE_GANG_ASSEMBLE | STAGE_GANG_ISSUE;
-//
-// const FREE_PIPELINE = INTERLOCK_STAGES | STAGE_FREE_BP_INIT | STAGE_DVA_FREE;
-//
-// const DDT_FREE_PIPELINE = INTERLOCK_STAGES | STAGE_FREE_BP_INIT | STAGE_ISSUE_ASYNC | STAGE_DDT_FREE;
-//
-// const CLAIM_PIPELINE = INTERLOCK_STAGES | STAGE_DVA_CLAIM;
-//
-// const IOCTL_PIPELINE = INTERLOCK_STAGES | STAGE_VDEV_IO_START | STAGE_VDEV_IO_ASSESS;
-//
-// const BLOCKING_STAGES = STAGE_DVA_ALLOCATE | STAGE_DVA_CLAIM | STAGE_VDEV_IO_START;
-//

0 comments on commit cdd3511

Please sign in to comment.
Something went wrong with that request. Please try again.