Mini Shell
Module for managing BCache sets
BCache is a block-level caching mechanism similar to ZFS L2ARC/ZIL, dm-cache and fscache.
It works by formatting one block device as a cache set, then adding backend devices
(which need to be formatted as such) to the set and activating them.
It's available in Linux mainline kernel since 3.10
This module needs the bcache userspace tools to function.
.. versionadded:: 2016.3.0
import logging
import os
import re
import time
import salt.utils.path
log = logging.getLogger(__name__)
LOG = {
"trace": logging.TRACE,
"debug": logging.DEBUG,
"info": logging.INFO,
"warn": logging.WARNING,
"error": logging.ERROR,
"crit": logging.CRITICAL,
__func_alias__ = {
"attach_": "attach",
"config_": "config",
"super_": "super",
HAS_BLKDISCARD = salt.utils.path.which("blkdiscard") is not None
def __virtual__():
Only work when make-bcache is installed
return salt.utils.path.which("make-bcache") is not None
def uuid(dev=None):
Return the bcache UUID of a block device.
If no device is given, the Cache UUID is returned.
CLI Example:
.. code-block:: bash
salt '*' bcache.uuid
salt '*' bcache.uuid /dev/sda
salt '*' bcache.uuid bcache0
if dev is None:
# take the only directory in /sys/fs/bcache and return its basename
return list(salt.utils.path.os_walk("/sys/fs/bcache/"))[0][1][0]
# basename of the /sys/block/{dev}/bcache/cache symlink target
return os.path.basename(_bcsys(dev, "cache"))
except Exception: # pylint: disable=broad-except
return False
def attach_(dev=None):
Attach a backing devices to a cache set
If no dev is given, all backing devices will be attached.
CLI Example:
.. code-block:: bash
salt '*' bcache.attach sdc
salt '*' bcache.attach /dev/bcache1
:return: bool or None if nuttin' happened
cache = uuid()
if not cache:
log.error("No cache to attach %s to", dev)
return False
if dev is None:
res = {}
for dev, data in status(alldevs=True).items():
if "cache" in data:
res[dev] = attach_(dev)
return res if res else None
bcache = uuid(dev)
if bcache:
if bcache == cache:"%s is already attached to bcache %s, doing nothing", dev, cache)
return None
elif not detach(dev):
return False
log.debug("Attaching %s to bcache %s", dev, cache)
if not _bcsys(
f"Error attaching {dev} to bcache {cache}",
return False
return _wait(
lambda: uuid(dev) == cache,
f"{dev} received attach to bcache {cache}, but did not comply",
def detach(dev=None):
Detach a backing device(s) from a cache set
If no dev is given, all backing devices will be attached.
Detaching a backing device will flush its write cache.
This should leave the underlying device in a consistent state, but might take a while.
CLI Example:
.. code-block:: bash
salt '*' bcache.detach sdc
salt '*' bcache.detach bcache1
if dev is None:
res = {}
for dev, data in status(alldevs=True).items():
if "cache" in data:
res[dev] = detach(dev)
return res if res else None
log.debug("Detaching %s", dev)
if not _bcsys(dev, "detach", "goaway", "error", f"Error detaching {dev}"):
return False
return _wait(
lambda: uuid(dev) is False,
f"{dev} received detach, but did not comply",
def start():
Trigger a start of the full bcache system through udev.
CLI Example:
.. code-block:: bash
salt '*' bcache.start
if not _run_all("udevadm trigger", "error", "Error starting bcache: %s"):
return False
elif not _wait(
lambda: uuid() is not False,
"Bcache system started, but no active cache set found.",
return False
return True
def stop(dev=None):
Stop a bcache device
If no device is given, all backing devices will be detached from the cache, which will subsequently be stopped.
.. warning::
'Stop' on an individual backing device means hard-stop;
no attempt at flushing will be done and the bcache device will seemingly 'disappear' from the device lists
CLI Example:
.. code-block:: bash
salt '*' bcache.stop
if dev is not None:
log.warning("Stopping %s, device will only reappear after reregistering!", dev)
if not _bcsys(dev, "stop", "goaway", "error", f"Error stopping {dev}"):
return False
return _wait(
lambda: _sysfs_attr(_bcpath(dev)) is False,
f"Device {dev} did not stop",
cache = uuid()
if not cache:
log.warning("bcache already stopped?")
return None
if not _alltrue(detach()):
return False
elif not _fssys("stop", "goaway", "error", "Error stopping cache"):
return False
return _wait(lambda: uuid() is False, "error", "Cache did not stop", 300)
def back_make(dev, cache_mode="writeback", force=False, attach=True, bucket_size=None):
Create a backing device for attachment to a set.
Because the block size must be the same, a cache set already needs to exist.
CLI Example:
.. code-block:: bash
salt '*' bcache.back_make sdc cache_mode=writeback attach=True
:param cache_mode: writethrough, writeback, writearound or none.
:param force: Overwrite existing bcaches
:param attach: Immediately attach the backing device to the set
:param bucket_size: Size of a bucket (see kernel doc)
# pylint: disable=too-many-return-statements
cache = uuid()
if not cache:
log.error("No bcache set found")
return False
elif _sysfs_attr(_bcpath(dev)):
if not force:
"%s already contains a bcache. Wipe it manually or use force", dev
return False
elif uuid(dev) and not detach(dev):
return False
elif not stop(dev):
return False
dev = _devpath(dev)
block_size = _size_map(_fssys("block_size"))
# You might want to override, we pick the cache set's as sane default
if bucket_size is None:
bucket_size = _size_map(_fssys("bucket_size"))
cmd = "make-bcache --block {} --bucket {} --{} --bdev {}".format(
block_size, bucket_size, cache_mode, dev
if force:
cmd += " --wipe-bcache"
if not _run_all(cmd, "error", f"Error creating backing device {dev}: %s"):
return False
elif not _sysfs_attr(
f"Error registering backing device {dev}",
return False
elif not _wait(
lambda: _sysfs_attr(_bcpath(dev)) is not False,
f"Backing device {dev} did not register",
return False
elif attach:
return attach_(dev)
return True
def cache_make(
dev, reserved=None, force=False, block_size=None, bucket_size=None, attach=True
Create BCache cache on a block device.
If blkdiscard is available the entire device will be properly cleared in advance.
CLI Example:
.. code-block:: bash
salt '*' bcache.cache_make sdb reserved=10% block_size=4096
:param reserved: if dev is a full device, create a partition table with this size empty.
.. note::
this increases the amount of reserved space available to SSD garbage collectors,
potentially (vastly) increasing performance
:param block_size: Block size of the cache; defaults to devices' logical block size
:param force: Overwrite existing BCache sets
:param attach: Attach all existing backend devices immediately
# TODO: multiple devs == md jbod
# pylint: disable=too-many-return-statements
# ---------------- Preflight checks ----------------
cache = uuid()
if cache:
if not force:
log.error("BCache cache %s is already on the system", cache)
return False
cache = _bdev()
dev = _devbase(dev)
udev = __salt__["udev.env"](dev)
if (
"ID_FS_TYPE" in udev
or (udev.get("DEVTYPE", None) != "partition" and "ID_PART_TABLE_TYPE" in udev)
) and not force:
log.error("%s already contains data, wipe first or force", dev)
return False
elif reserved is not None and udev.get("DEVTYPE", None) != "disk":
log.error("Need a partitionable blockdev for reserved to work")
return False
_, block, bucket = _sizes(dev)
if bucket_size is None:
bucket_size = bucket
# TODO: bucket from _sizes() makes no sense
bucket_size = False
if block_size is None:
block_size = block
# ---------------- Still here, start doing destructive stuff ----------------
if cache:
if not stop():
return False
# Wipe the current cache device as well,
# forever ruining any chance of it accidentally popping up again
elif not _wipe(cache):
return False
# Can't do enough wiping
if not _wipe(dev):
return False
if reserved:
cmd = (
"parted -m -s -a optimal -- "
"/dev/{0} mklabel gpt mkpart bcache-reserved 1M {1} mkpart bcache {1} 100%".format(
dev, reserved
# if wipe was incomplete & part layout remains the same,
# this is one condition set where udev would make it accidentally popup again
if not _run_all(cmd, "error", f"Error creating bcache partitions on {dev}: %s"):
return False
dev = f"{dev}2"
# ---------------- Finally, create a cache ----------------
cmd = f"make-bcache --cache /dev/{dev} --block {block_size} --wipe-bcache"
# Actually bucket_size should always have a value, but for testing 0 is possible as well
if bucket_size:
cmd += f" --bucket {bucket_size}"
if not _run_all(cmd, "error", f"Error creating cache {dev}: %s"):
return False
elif not _wait(
lambda: uuid() is not False,
f"Cache {dev} seemingly created OK, but FS did not activate",
return False
if attach:
return _alltrue(attach_())
return True
def config_(dev=None, **kwargs):
Show or update config of a bcache device.
If no device is given, operate on the cache set itself.
CLI Example:
.. code-block:: bash
salt '*' bcache.config
salt '*' bcache.config bcache1
salt '*' bcache.config errors=panic journal_delay_ms=150
salt '*' bcache.config bcache1 cache_mode=writeback writeback_percent=15
:return: config or True/False
if dev is None:
spath = _fspath()
spath = _bcpath(dev)
# filter out 'hidden' kwargs added by our favourite orchestration system
updates = {key: val for key, val in kwargs.items() if not key.startswith("__")}
if updates:
endres = 0
for key, val in updates.items():
endres += _sysfs_attr(
[spath, key],
f"Failed to update {os.path.join(spath, key)} with {val}",
return endres > 0
result = {}
data = _sysfs_parse(spath, config=True, internals=True, options=True)
for key in ("other_ro", "inter_ro"):
if key in data:
del data[key]
for key in data:
return result
def status(stats=False, config=False, internals=False, superblock=False, alldevs=False):
Show the full status of the BCache system and optionally all its involved devices
CLI Example:
.. code-block:: bash
salt '*' bcache.status
salt '*' bcache.status stats=True
salt '*' bcache.status internals=True alldevs=True
:param stats: include statistics
:param config: include settings
:param internals: include internals
:param superblock: include superblock
bdevs = []
for _, links, _ in salt.utils.path.os_walk("/sys/block/"):
for block in links:
if "bcache" in block:
for spath, sdirs, _ in salt.utils.path.os_walk(
f"/sys/block/{block}", followlinks=False
if "bcache" in sdirs:
statii = {}
for bcache in bdevs:
statii[bcache] = device(bcache, stats, config, internals, superblock)
cuuid = uuid()
cdev = _bdev()
if cdev:
count = 0
for dev in statii:
if dev != cdev:
# it's a backing dev
if statii[dev]["cache"] == cuuid:
count += 1
statii[cdev]["attached_backing_devices"] = count
if not alldevs:
statii = statii[cdev]
return statii
def device(dev, stats=False, config=False, internals=False, superblock=False):
Check the state of a single bcache device
CLI Example:
.. code-block:: bash
salt '*' bcache.device bcache0
salt '*' bcache.device /dev/sdc stats=True
:param stats: include statistics
:param settings: include all settings
:param internals: include all internals
:param superblock: include superblock info
result = {}
if not _sysfs_attr(
_bcpath(dev), None, "error", f"{dev} is not a bcache fo any kind"
return False
elif _bcsys(dev, "set"):
# ---------------- It's the cache itself ----------------
result["uuid"] = uuid()
base_attr = [
# ---------------- Parse through both the blockdev & the FS ----------------
result.update(_sysfs_parse(_bcpath(dev), base_attr, stats, config, internals))
result.update(_sysfs_parse(_fspath(), base_attr, stats, config, internals))
# ---------------- It's a backing device ----------------
back_uuid = uuid(dev)
if back_uuid is not None:
result["cache"] = back_uuid
result["dev"] = os.path.basename(_bcsys(dev, "dev"))
except Exception: # pylint: disable=broad-except
result["bdev"] = _bdev(dev)
base_attr = ["cache_mode", "running", "state", "writeback_running"]
base_path = _bcpath(dev)
result.update(_sysfs_parse(base_path, base_attr, stats, config, internals))
# ---------------- Modifications ----------------
state = [result["state"]]
if result.pop("running"):
if "writeback_running" in result:
if result.pop("writeback_running"):
result["state"] = state
# ---------------- Statistics ----------------
if "stats" in result:
replre = r"(stats|cache)_"
statres = result["stats"]
for attr in result["stats"]:
if "/" not in attr:
key = re.sub(replre, "", attr)
statres[key] = statres.pop(attr)
stat, key = attr.split("/", 1)
stat = re.sub(replre, "", stat)
key = re.sub(replre, "", key)
if stat not in statres:
statres[stat] = {}
statres[stat][key] = statres.pop(attr)
result["stats"] = statres
# ---------------- Internals ----------------
if internals:
interres = result.pop("inter_ro", {})
interres.update(result.pop("inter_rw", {}))
if interres:
for key in interres:
if key.startswith("internal"):
nkey = re.sub(r"internal[s/]*", "", key)
interres[nkey] = interres.pop(key)
key = nkey
if key.startswith(("btree", "writeback")):
mkey, skey = re.split(r"_", key, maxsplit=1)
if mkey not in interres:
interres[mkey] = {}
interres[mkey][skey] = interres.pop(key)
result["internals"] = interres
# ---------------- Config ----------------
if config:
configres = result["config"]
for key in configres:
if key.startswith("writeback"):
mkey, skey = re.split(r"_", key, maxsplit=1)
if mkey not in configres:
configres[mkey] = {}
configres[mkey][skey] = configres.pop(key)
result["config"] = configres
# ---------------- Superblock ----------------
if superblock:
result["superblock"] = super_(dev)
return result
def super_(dev):
Read out BCache SuperBlock
CLI Example:
.. code-block:: bash
salt '*' bcache.device bcache0
salt '*' bcache.device /dev/sdc
dev = _devpath(dev)
ret = {}
res = _run_all(
f"bcache-super-show {dev}",
f"Error reading superblock on {dev}: %s",
if not res:
return False
for line in res.splitlines(): # pylint: disable=no-member
line = line.strip()
if not line:
key, val = (val.strip() for val in re.split(r"[\s]+", line, maxsplit=1))
if not (key and val):
mval = None
if " " in val:
rval, mval = (val.strip() for val in re.split(r"[\s]+", val, maxsplit=1))
mval = mval[1:-1]
rval = val
rval = int(rval)
except Exception: # pylint: disable=broad-except
rval = float(rval)
except Exception: # pylint: disable=broad-except
if rval == "yes":
rval = True
elif rval == "no":
rval = False
pkey, key = re.split(r"\.", key, maxsplit=1)
if pkey not in ret:
ret[pkey] = {}
if mval is not None:
ret[pkey][key] = (rval, mval)
ret[pkey][key] = rval
return ret
# -------------------------------- HELPER FUNCTIONS --------------------------------
def _devbase(dev):
Basename of just about any dev
dev = os.path.realpath(os.path.expandvars(dev))
dev = os.path.basename(dev)
return dev
def _devpath(dev):
Return /dev name of just about any dev
:return: /dev/devicename
return os.path.join("/dev", _devbase(dev))
def _syspath(dev):
Full SysFS path of a device
dev = _devbase(dev)
dev = re.sub(r"^([vhs][a-z]+)([0-9]+)", r"\1/\1\2", dev)
# name = re.sub(r'^([a-z]+)(?<!(bcache|md|dm))([0-9]+)', r'\1/\1\2', name)
return os.path.join("/sys/block/", dev)
def _bdev(dev=None):
Resolve a bcacheX or cache to a real dev
:return: basename of bcache dev
if dev is None:
dev = _fssys("cache0")
dev = _bcpath(dev)
if not dev:
return False
return _devbase(os.path.dirname(dev))
def _bcpath(dev):
Full SysFS path of a bcache device
return os.path.join(_syspath(dev), "bcache")
def _fspath():
:return: path of active bcache
cuuid = uuid()
if not cuuid:
return False
return os.path.join("/sys/fs/bcache/", cuuid)
def _fssys(name, value=None, log_lvl=None, log_msg=None):
Simple wrapper to interface with bcache SysFS
fspath = _fspath()
if not fspath:
return False
return _sysfs_attr([fspath, name], value, log_lvl, log_msg)
def _bcsys(dev, name, value=None, log_lvl=None, log_msg=None):
Simple wrapper to interface with backing devs SysFS
return _sysfs_attr([_bcpath(dev), name], value, log_lvl, log_msg)
def _sysfs_attr(name, value=None, log_lvl=None, log_msg=None):
Simple wrapper with logging around sysfs.attr
if isinstance(name, str):
name = [name]
res = __salt__["sysfs.attr"](os.path.join(*name), value)
if not res and log_lvl is not None and log_msg is not None:
log.log(LOG[log_lvl], log_msg)
return res
def _sysfs_parse(
path, base_attr=None, stats=False, config=False, internals=False, options=False
Helper function for parsing BCache's SysFS interface
result = {}
# ---------------- Parse through the interfaces list ----------------
intfs = __salt__["sysfs.interfaces"](path)
# Actions, we ignore
del intfs["w"]
# -------- Sorting hat --------
binkeys = []
if internals:
binkeys.extend(["inter_ro", "inter_rw"])
if config:
if stats:
bintf = {}
for key in binkeys:
bintf[key] = []
for intf in intfs["r"]:
if intf.startswith("internal"):
key = "inter_ro"
elif "stats" in intf:
key = "stats"
# What to do with these???
# I'll utilize 'inter_ro' as 'misc' as well
key = "inter_ro"
if key in bintf:
for intf in intfs["rw"]:
if intf.startswith("internal"):
key = "inter_rw"
key = "config"
if key in bintf:
if base_attr is not None:
for intf in bintf:
bintf[intf] = [sintf for sintf in bintf[intf] if sintf not in base_attr]
bintf["base"] = base_attr
mods = {
"stats": [
for modt, modlist in mods.items():
found = []
if modt not in bintf:
for mod in modlist:
for intflist in bintf.values():
if mod in intflist:
bintf[modt] += found
# -------- Fetch SysFS vals --------
bintflist = [intf for iflist in bintf.values() for intf in iflist]
result.update(__salt__[""](bintflist, path))
# -------- Parse through well known string lists --------
for strlist in (
if strlist in result:
listres = {}
for line in result[strlist].split("\n"):
key, val = line.split(":", 1)
val = val.strip()
val = int(val)
except Exception: # pylint: disable=broad-except
val = float(val)
except Exception: # pylint: disable=broad-except
listres[key.strip()] = val
result[strlist] = listres
# -------- Parse through selection lists --------
if not options:
for sellist in ("cache_mode", "cache_replacement_policy", "errors"):
if sellist in result:
result[sellist] ="\[(.+)\]", result[sellist]).groups()[0]
# -------- Parse through well known bools --------
for boolkey in ("running", "writeback_running", "congested"):
if boolkey in result:
result[boolkey] = bool(result[boolkey])
# -------- Recategorize results --------
bresult = {}
for iftype, intflist in bintf.items():
ifres = {}
for intf in intflist:
if intf in result:
ifres[intf] = result.pop(intf)
if ifres:
bresult[iftype] = ifres
return bresult
def _size_map(size):
Map Bcache's size strings to real bytes
# I know, I know, EAFP.
# But everything else is reason for None
if not isinstance(size, int):
if"[Kk]", size):
size = 1024 * float(re.sub(r"[Kk]", "", size))
elif"[Mm]", size):
size = 1024**2 * float(re.sub(r"[Mm]", "", size))
size = int(size)
return size
except Exception: # pylint: disable=broad-except
return None
def _sizes(dev):
Return neigh useless sizing info about a blockdev
:return: (total size in blocks, blocksize, maximum discard size in bytes)
dev = _devbase(dev)
# standarization yay
block_sizes = (
discard_sizes = (
sysfs = __salt__[""](
# TODO: makes no sense
# First of all, it has to be a power of 2
# Secondly, this returns 4GiB - 512b on Intel 3500's for some weird reason
# discard_granularity seems in bytes, resolves to 512b ???
# max_hw_sectors_kb???
# There's also discard_max_hw_bytes more recently
# See:
# Also, I cant find any docs yet regarding bucket sizes;
# it's supposed to be discard_max_hw_bytes,
# but no way to figure that one reliably out apparently
discard = sysfs.get(
"queue/discard_max_bytes", sysfs.get("../queue/discard_max_bytes", None)
block = sysfs.get(
"queue/hw_sector_size", sysfs.get("../queue/hw_sector_size", None)
return 512 * sysfs["size"], block, discard
def _wipe(dev):
endres = 0
dev = _devbase(dev)
size, block, discard = _sizes(dev)
if discard is None:
log.error("Unable to read SysFS props for %s", dev)
return None
elif not discard:
log.warning("%s seems unable to discard", dev)
wiper = "dd"
"blkdiscard binary not available, properly wipe the dev manually for"
" optimal results"
wiper = "dd"
wiper = "blkdiscard"
wipe_failmsg = f"Error wiping {dev}: %s"
if wiper == "dd":
blocks = 4
cmd = f"dd if=/dev/zero of=/dev/{dev} bs=1M count={blocks}"
endres += _run_all(cmd, "warn", wipe_failmsg)
# Some stuff (<cough>GPT</cough>) writes stuff at the end of a dev as well
cmd += f" seek={(size / 1024**2) - blocks}"
endres += _run_all(cmd, "warn", wipe_failmsg)
elif wiper == "blkdiscard":
cmd = f"blkdiscard /dev/{dev}"
endres += _run_all(cmd, "warn", wipe_failmsg)
# TODO: fix annoying bug failing blkdiscard by trying to discard 1 sector past blkdev
endres = 1
return endres > 0
def _wait(lfunc, log_lvl=None, log_msg=None, tries=10):
Wait for lfunc to be True
:return: True if lfunc succeeded within tries, False if it didn't
i = 0
while i < tries:
if lfunc():
return True
i += 1
if log_lvl is not None:
log.log(LOG[log_lvl], log_msg)
return False
def _run_all(cmd, log_lvl=None, log_msg=None, exitcode=0):
Simple wrapper around cmd.run_all
log_msg can contain {0} for stderr
:return: True or stdout, False if retcode wasn't exitcode
res = __salt__["cmd.run_all"](cmd)
if res["retcode"] == exitcode:
if res["stdout"]:
return res["stdout"]
return True
if log_lvl is not None:
log.log(LOG[log_lvl], log_msg, res["stderr"])
return False
def _alltrue(resdict):
if resdict is None:
return True
return len([val for val in resdict.values() if val]) > 0
Zerion Mini Shell 1.0