Mini Shell
# coding=utf-8
#
# Copyright © Cloud Linux GmbH & Cloud Linux Software, Inc 2010-2023 All Rights Reserved
#
# Licensed under CLOUD LINUX LICENSE AGREEMENT
# http://cloudlinux.com/docs/LICENSE.TXT
import itertools
import math
from typing import Any, Callable
from .utils import bootstrap_gen
from ._logs import logger
from .common import (
AdjustStepData,
empty_stats,
empty_usage,
GetNormalLimits,
InvalidStateError,
)
from .overload import OverloadCheckResult
from .lves_tracker import LvesTracker
from .lve_sm import LveStateManager
class StepCalculator:
MAX_STEP = 50
def __init__(self, overload_threshold: float) -> None:
self._last_enable_step = 0
self._last_disable_step = 0
self._overload_threshold = overload_threshold
def get_enable_step(self, server_load: float) -> int:
"""
Get current step size
We start with 1 and increase step size by 1 on each iteration.
Step size is bounded by two factors:
1. MAX_STEP
2. descending linear function (y = k * x + b) depending on sever_load:
step_size = -1 * MAX_STEP / overload_threshold * sever_load + MAX_STEP
:param float sever_load: server load estimation (from 0 to 1)
:return int: step size
"""
self._last_disable_step = 0
next_step_size = min(self._last_enable_step + 1, self.MAX_STEP)
# step size bouded by descending linear function (MAX_STEP at 0, 0 at overload_threshold)
upper_bound = math.ceil(-1 * self.MAX_STEP / self._overload_threshold * server_load + self.MAX_STEP)
upper_bound = min(max(upper_bound, 0), self.MAX_STEP) # step size 0 <= step_size <= MAX_STEP
step_size = min(next_step_size, upper_bound)
self._last_enable_step = step_size
return step_size
def get_disable_step(self, lves_count: int) -> int:
self._last_enable_step = 0
self._last_disable_step = min(max(self._last_disable_step * 2, 1), lves_count)
return self._last_disable_step
class Adjuster:
def __init__(
self,
lves_tracker: LvesTracker,
get_normal_limits: GetNormalLimits,
step_calculator: StepCalculator,
is_server_overloaded: Callable[[], OverloadCheckResult],
fail_fast: bool = True
) -> None:
self._step = self._create_algorithm_gen(
lves_tracker=lves_tracker,
step_calculator=step_calculator,
get_normal_limits=get_normal_limits,
is_server_overloaded=is_server_overloaded,
fail_fast=fail_fast,
).send
self._step_exception: Exception | None = None
def step(self, adjust_step_data: AdjustStepData) -> Any:
self._step(adjust_step_data)
if self._step_exception is not None:
exc, self._step_exception = self._step_exception, None
raise exc
@bootstrap_gen
def _create_algorithm_gen(
self,
lves_tracker: LvesTracker,
step_calculator: StepCalculator,
get_normal_limits: GetNormalLimits,
is_server_overloaded: Callable[[], OverloadCheckResult],
fail_fast: bool,
):
while True:
try:
# TODO(vlebedev): Implement cooldown period for LVEs that were unbursted externally?
msg = yield
assert isinstance(msg, AdjustStepData)
try:
normal_limits_by_lve = get_normal_limits()
except Exception:
logger.exception('Failed to get normal limits')
normal_limits_by_lve = {}
# TODO(vlebedev): This update call does not belong to adjuster - move it out of here.
lves_tracker.update(
now=msg.now,
normal_limits_by_id=normal_limits_by_lve,
stats_by_id=msg.stats,
usages_by_id=msg.lve_usages_by_id,
)
to_burst, to_unburst = set[LveStateManager](), set[LveStateManager](
lves_tracker.quota_exceeded - lves_tracker.unbursted,
)
is_overloaded: OverloadCheckResult = is_server_overloaded()
if is_overloaded:
currently_bursted = lves_tracker.bursted
currently_overusing = lves_tracker.overusing
step = step_calculator.get_disable_step(len(currently_bursted))
logger.info('Server is overloaded - searching %s candidates to unburst', step)
if step > len(to_unburst):
missing_num = step - len(to_unburst)
logger.debug(
'Server is overloaded and there is not enough LVEs going to be unbursted'
'due to quota being exceeded - trying to find %s more candidates to unburst',
missing_num,
)
to_unburst.update(itertools.islice(
list(set(currently_overusing)) + list(set(currently_bursted) - set(currently_overusing)),
missing_num,
))
elif (burst_candidates := lves_tracker.unbursted - lves_tracker.quota_exceeded):
burst_candidates = sorted(
burst_candidates,
# unutilzied_*_ratio = (*_limit - *_usage) / *_limit
# sort by min(unutilzied_cpu_ratio, unutilized_io_ratio)
# The more from allowed io or cpu capacity is alredy used, the higher priority for bursting
key=lambda x: (
min(
(
msg.stats.get(x.lve_id, empty_stats).cpu -
msg.lve_usages_by_id.get(x.lve_id, empty_usage).cpu_usage
) / msg.stats.get(x.lve_id, empty_stats).cpu,
(
msg.stats.get(x.lve_id, empty_stats).io -
msg.lve_usages_by_id.get(x.lve_id, empty_usage).io_usage
) / msg.stats.get(x.lve_id, empty_stats).io,
)
),
)
step = step_calculator.get_enable_step(is_overloaded.server_load)
logger.debug('Server has spare resources - trying to find %s more candidates to burst', step)
to_burst.update(itertools.islice(burst_candidates, step))
if fail_fast and to_burst.intersection(to_unburst) != set():
raise AssertionError('LVE can`t be bursted and unbursted simultaneously!')
for managers, cmd in [
(to_burst, LveStateManager.Burst(now=msg.now)),
(to_unburst, LveStateManager.Unburst(now=msg.now)),
]:
for manager in managers:
try:
manager.step(cmd)
except InvalidStateError as e:
if fail_fast:
raise e
logger.exception('LVE "%s": Failed to execute "%s"!', manager.lve_id, cmd, exc_info=e)
except Exception as e:
self._step_exception = e
Zerion Mini Shell 1.0