Mini Shell
Direktori : /lib/0xtools/ |
|
Current File : //lib/0xtools/psnproc.py |
# psn -- Linux Process Snapper by Tanel Poder [https://0x.tools]
# Copyright 2019-2021 Tanel Poder
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# SPDX-License-Identifier: GPL-2.0-or-later
# structures defining /proc
import os, os.path
import re
import platform
system_timer_hz = os.sysconf('SC_CLK_TCK')
class ProcSource:
def __init__(self, name, path, available_columns, stored_column_names, task_level=False, read_samples=lambda f: [f.read()], parse_sample=lambda self, sample: sample.split()):
self.name = name
self.path = path
self.available_columns = available_columns
self.task_level = task_level
self.read_samples = read_samples
self.parse_sample = parse_sample
self.set_stored_columns(stored_column_names)
def set_stored_columns(self, stored_column_names):
col_name_i, schema_type_i, source_i, transform_i = range(4)
self.stored_column_names = stored_column_names or [c[0] for c in self.available_columns]
# find schema columns
sample_cols = [('event_time', str), ('pid', int), ('task', int)]
source_cols = [c for c in self.available_columns if c[col_name_i] in self.stored_column_names and c[col_name_i] not in dict(sample_cols) and c[1] is not None]
self.schema_columns = sample_cols + source_cols
column_indexes = dict([(c[col_name_i], c[source_i]) for c in self.available_columns])
schema_extract_idx = [column_indexes[c[col_name_i]] for c in source_cols]
schema_extract_convert = [c[schema_type_i] if len(c) == 3 else c[transform_i] for c in source_cols]
self.schema_extract = list(zip(schema_extract_idx, schema_extract_convert))
self.insert_sql = "INSERT INTO '%s' VALUES (%s)" % (self.name, ','.join(['?' for i in self.schema_columns]))
def sample(self, event_time, pid, task):
sample_path = self.path % (pid, task) if self.task_level else self.path % pid
with open(sample_path) as f:
full_sample = None
raw_samples = self.read_samples(f)
def create_row_sample(raw_sample):
full_sample = self.parse_sample(self, raw_sample)
# some syscall-specific code pushed down to general sampling function
# call readlink() to get the file name for system calls that have a file descriptor as arg0
filename = ''
if self.name == 'syscall':
# special case: kernel threads show all-zero "syscall" on newer kernels like 4.x
# otherwise it incorrectly looks like that kernel is in a "read" syscall (id=0 on x86_64)
if full_sample[0] == '-1' or full_sample == ['0', '0x0', '0x0', '0x0', '0x0', '0x0', '0x0', '0x0', '0x0']:
full_sample = ['kernel_thread', '0x0', '0x0', '0x0', '0x0', '0x0', '0x0', '0x0', '0x0']
try:
syscall_id = full_sample[0] # get string version of syscall number or "running" or "-1"
except (ValueError, IndexError) as e:
print('problem extracting syscall id', self.name, 'sample:')
printr(full_sample)
print
raise
if syscall_id in syscalls_with_fd_arg:
try:
arg0 = int(full_sample[1], 16)
# a hacky way for avoiding reading false file descriptors for kernel threads on older kernels
# (like 2.6.32) that show "syscall 0x0" for kernel threads + some random false arguments.
# TODO refactor this and kernel_thread translation above
if arg0 <= 65536:
filename = os.readlink("/proc/%s/fd/%s" % (pid, arg0)) + " " + special_fds.get(arg0, '')
else:
filename = 'fd over 65536'
except (OSError) as e:
# file has been closed or process has disappeared
#print 'problem with translating fd to name /proc/%s/fd/%s' % (pid, arg0), 'sample:'
#print full_sample
#print
filename = '-'
full_sample += (filename,)
r = [event_time, pid, task] + [convert(full_sample[idx]) for idx, convert in self.schema_extract]
return r
try:
return [create_row_sample(rs) for rs in raw_samples]
except (ValueError, IndexError) as e:
print('problem parsing', self.name, 'sample:')
print(raw_samples)
print
raise
### stat ###
# process_state_name = {
# 'R': 'Running (ON CPU)',
# 'S': 'Sleeping (Interruptible)',
# 'D': 'Disk (Uninterruptible)',
# 'Z': 'Zombie',
# 'T': 'Traced/Stopped',
# 'W': 'Paging'
# }
# https://github.com/torvalds/linux/blob/master/fs/proc/array.c
# State W (paging) is not used in kernels 2.6.x onwards
process_state_name = {
'R': 'Running (ON CPU)', #/* 0x00 */
'S': 'Sleep (Interruptible)', #/* 0x01 */
'D': 'Disk (Uninterruptible)', #/* 0x02 */
'T': '(stopped)', #/* 0x04 */
't': '(tracing stop)', #/* 0x08 */
'X': '(dead)', #/* 0x10 */
'Z': '(zombie)', #/* 0x20 */
'P': '(parked)', #/* 0x40 */
#/* states beyond TASK_REPORT: */
'I': '(idle)', #/* 0x80 */
}
def parse_stat_sample(proc_source, sample):
tokens = raw_tokens = sample.split()
# stitch together comm field of the form (word word)
if raw_tokens[1][0] == '(' and raw_tokens[1][-1] != ')':
tokens = raw_tokens[:2]
raw_tokens = raw_tokens[2:]
while tokens[-1][-1] != ')':
tokens[-1] += ' ' + raw_tokens.pop(0)
tokens.extend(raw_tokens)
return tokens
trim_comm = re.compile('\d+')
stat = ProcSource('stat', '/proc/%s/task/%s/stat', [
('pid', int, 0),
('comm', str, 1, lambda c: re.sub(trim_comm, '*', c)),
('comm2', str, 1),
('state_id', str, 2),
('state', str, 2, lambda state_id: process_state_name.get(state_id, state_id)),
('ppid', int, 3),
('pgrp', int, 4),
('session', int, 5),
('tty_nr', int, 6),
('tpgid', int, 7),
('flags', None, 8),
('minflt', int, 9),
('cminflt', int, 10),
('majflt', int, 11),
('cmajflt', int, 12),
('utime', int, 13),
('stime', int, 14),
('cutime', int, 15),
('cstime', int, 16),
('utime_sec', int, 13, lambda v: int(v) / system_timer_hz),
('stime_sec', int, 14, lambda v: int(v) / system_timer_hz),
('cutime_sec', int, 15, lambda v: int(v) / system_timer_hz),
('cstime_sec', int, 16, lambda v: int(v) / system_timer_hz),
('priority', int, 17),
('nice', int, 18),
('num_threads', int, 19),
('itrealvalue', None, 20),
('starttime', int, 21),
('vsize', int, 22),
('rss', int, 23),
('rsslim', str, 24),
('startcode', None, 25),
('endcode', None, 26),
('startstack', None, 27),
('kstkesp', None, 28),
('kstkeip', None, 29),
('signal', None, 30),
('blocked', None, 31),
('sigignore', None, 32),
('sigcatch', None, 33),
('wchan', None, 34),
('nswap', None, 35),
('cnswap', None, 36),
('exit_signal', int, 37),
('processor', int, 38),
('rt_priority', int, 39),
('policy', None, 40),
('delayacct_blkio_ticks', int, 41),
('guest_time', int, 42),
('cgust_time', int, 43),
('start_data', None, 44),
('end_data', None, 45),
('start_brk', None, 46),
('arg_start', None, 47),
('arg_end', None, 48),
('env_start', None, 49),
('env_end', None, 50),
('exit_code', int, 51),
], None,
task_level=True,
parse_sample=parse_stat_sample)
### status ###
def parse_status_sample(proc_source, sample):
lines = sample.split('\n')
sample_values = []
for line in [l for l in lines if l]:
line_tokens = line.split()
n, v = line_tokens[0][:-1].lower(), ' '.join(line_tokens[1:])
n_kb = n + '_kb'
# missing values take default parse function value: assume no order change, and that available_columns contains all possible field names
while len(sample_values) < len(proc_source.available_columns) and proc_source.available_columns[len(sample_values)][0] not in (n, n_kb):
parse_fn = proc_source.available_columns[len(sample_values)][1]
sample_values.append(parse_fn())
if len(sample_values) < len(proc_source.available_columns):
sample_values.append(v)
return sample_values
status = ProcSource('status', '/proc/%s/status', [
('name', str, 0),
('umask', str, 1),
('state', str, 2), # remove duplicate with stat
('tgid', int, 3),
('ngid', int, 4),
('pid', int, 5),
('ppid', int, 6), # remove duplicate with stat
('tracerpid', int, 7),
('uid', int, 8, lambda v: int(v.split()[0])),
('gid', int, 9, lambda v: int(v.split()[0])),
('fdsize', int, 10),
('groups', str, 11),
('nstgid', str, 12),
('nspid', str, 13),
('nspgid', str, 14),
('nssid', str, 15),
('vmpeak_kb', int, 16, lambda v: int(v.split()[0])),
('vmsize_kb', int, 17, lambda v: int(v.split()[0])),
('vmlck_kb', int, 18, lambda v: int(v.split()[0])),
('vmpin_kb', int, 19, lambda v: int(v.split()[0])),
('vmhwm_kb', int, 20, lambda v: int(v.split()[0])),
('vmrss_kb', int, 21, lambda v: int(v.split()[0])),
('rssanon_kb', int, 22, lambda v: int(v.split()[0])),
('rssfile_kb', int, 23, lambda v: int(v.split()[0])),
('rssshmem_kb', int, 24, lambda v: int(v.split()[0])),
('vmdata_kb', int, 25, lambda v: int(v.split()[0])),
('vmstk_kb', int, 26, lambda v: int(v.split()[0])),
('vmexe_kb', int, 27, lambda v: int(v.split()[0])),
('vmlib_kb', int, 28, lambda v: int(v.split()[0])),
('vmpte_kb', int, 29, lambda v: int(v.split()[0])),
('vmpmd_kb', int, 30, lambda v: int(v.split()[0])),
('vmswap_kb', int, 31, lambda v: int(v.split()[0])),
('hugetlbpages_kb', int, 32, lambda v: int(v.split()[0])),
('threads', int, 33),
('sigq', str, 34),
('sigpnd', str, 35),
('shdpnd', str, 36),
('sigblk', str, 37),
('sigign', str, 38),
('sigcgt', str, 39),
('capinh', str, 40),
('capprm', str, 41),
('capeff', str, 42),
('capbnd', str, 43),
('capamb', str, 44),
('seccomp', int, 45),
('cpus_allowed', str, 46),
('cpus_allowed_list', str, 47),
('mems_allowed', str, 48),
('mems_allowed_list', str, 49),
('voluntary_ctxt_switches', int, 50),
('nonvoluntary_ctxt_switches', int, 51)
], None, task_level=False, parse_sample=parse_status_sample)
### syscall ###
def extract_system_call_ids(unistd_64_fh):
syscall_id_to_name = {'running': '[running]', '-1': '[kernel_direct]', 'kernel_thread':'[kernel_thread]'}
# examples from a unistd.h file
# #define __NR_mount 40
# #define __NR3264_truncate 45
for name_prefix in ['__NR_', '__NR3264_']:
for line in unistd_64_fh.readlines():
tokens = line.split()
if tokens and len(tokens) == 3 and tokens[0] == '#define':
_, s_name, s_id = tokens
if s_name.startswith(name_prefix):
s_name = s_name[len(name_prefix):]
syscall_id_to_name[s_id] = s_name
return syscall_id_to_name
# currently assuming all platforms are x86_64
def get_system_call_names():
psn_dir=os.path.dirname(os.path.realpath(__file__))
kernel_ver=platform.release().split('-')[0]
# this probably needds to be improved for better platform support
if platform.machine() == 'aarch64':
unistd_64_paths = ['/usr/include/asm-generic/unistd.h']
else:
unistd_64_paths = ['/usr/include/asm/unistd_64.h', '/usr/include/x86_64-linux-gnu/asm/unistd_64.h', '/usr/include/asm-x86_64/unistd.h', '/usr/include/asm/unistd.h', psn_dir+'/syscall_64_'+kernel_ver+'.h', psn_dir+'/syscall_64.h']
for path in unistd_64_paths:
try:
with open(path) as f:
return extract_system_call_ids(f)
except IOError as e:
pass
raise Exception('unistd_64.h not found in' + ' or '.join(unistd_64_paths) + '.\n You may need to "yum install kernel-headers" or "apt-get install libc6-dev"\n until this dependency is removed in a newer pSnapper version')
syscall_id_to_name = get_system_call_names()
# define syscalls for which we can look up filename from fd argument
# before the change for Python 3
#syscall_name_to_id = dict((y,x) for x,y in syscall_id_to_name.iteritems())
syscall_name_to_id = dict((y,x) for x,y in syscall_id_to_name.items())
syscalls_with_fd_arg = set([
syscall_name_to_id.get('read' , 'N/A')
, syscall_name_to_id.get('write' , 'N/A')
, syscall_name_to_id.get('pread64' , 'N/A')
, syscall_name_to_id.get('pwrite64' , 'N/A')
, syscall_name_to_id.get('fsync' , 'N/A')
, syscall_name_to_id.get('fdatasync' , 'N/A')
, syscall_name_to_id.get('recvfrom' , 'N/A')
, syscall_name_to_id.get('sendto' , 'N/A')
, syscall_name_to_id.get('recvmsg' , 'N/A')
, syscall_name_to_id.get('sendmsg' , 'N/A')
, syscall_name_to_id.get('epoll_wait' , 'N/A')
, syscall_name_to_id.get('ioctl' , 'N/A')
, syscall_name_to_id.get('accept' , 'N/A')
, syscall_name_to_id.get('accept4' , 'N/A')
, syscall_name_to_id.get('getdents' , 'N/A')
, syscall_name_to_id.get('getdents64' , 'N/A')
, syscall_name_to_id.get('unlinkat' , 'N/A')
, syscall_name_to_id.get('fstat' , 'N/A')
, syscall_name_to_id.get('fstatfs' , 'N/A')
, syscall_name_to_id.get('newfstatat' , 'N/A')
, syscall_name_to_id.get('openat' , 'N/A')
, syscall_name_to_id.get('readv' , 'N/A')
, syscall_name_to_id.get('writev' , 'N/A')
, syscall_name_to_id.get('preadv' , 'N/A')
, syscall_name_to_id.get('pwritev' , 'N/A')
, syscall_name_to_id.get('preadv2' , 'N/A')
, syscall_name_to_id.get('pwritev2' , 'N/A')
])
special_fds = { 0:'(stdin) ', 1:'(stdout)', 2:'(stderr)' }
def parse_syscall_sample(proc_source, sample):
tokens = sample.split()
if tokens[0] == 'running':
return (tokens[0], '', '', '', '', '', '', None, None)
else:
return tokens
trim_socket = re.compile('\d+')
syscall = ProcSource('syscall', '/proc/%s/task/%s/syscall', [
('syscall_id', int, 0, lambda sn: -2 if sn == 'running' else int(sn)),
('syscall', str, 0, lambda sn: syscall_id_to_name[sn]), # convert syscall_id via unistd_64.h into call name
('arg0', str, 1),
('arg1', str, 2),
('arg2', str, 3),
('arg3', str, 4),
('arg4', str, 5),
('arg5', str, 6),
('esp', None, 7), # stack pointer
('eip', None, 8), # program counter/instruction pointer
('filename', str, 9, lambda fn: re.sub(trim_socket, '*', fn) if fn.split(':')[0] in ['socket','pipe'] else fn),
('filename2', str, 9),
('filenamesum',str, 9, lambda fn: re.sub(trim_socket, '*', fn)),
('basename', str, 9, lambda fn: re.sub(trim_socket, '*', fn) if fn.split(':')[0] in ['socket','pipe'] else os.path.basename(fn)), # filename if syscall has fd as arg0
('dirname', str, 9, lambda fn: re.sub(trim_socket, '*', fn) if fn.split(':')[0] in ['socket','pipe'] else os.path.dirname(fn)), # filename if syscall has fd as arg0
], None,
task_level=True, parse_sample=parse_syscall_sample)
### get file name from file descriptor ###
#filename = ProcSource('fd', '/proc/%s/task/%s/fd', [('wchan', str, 0)], ['wchan'], task_level=True)
### process cmdline args ###
def parse_cmdline_sample(proc_source,sample):
# the cmdline entry may have spaces in it and happens to have a \000 in the end
# the split [] hack is due to postgres having some extra spaces in its cmdlines
return [sample.split('\000')[0].strip()]
cmdline = ProcSource('cmdline', '/proc/%s/task/%s/cmdline', [('cmdline', str, 0)], ['cmdline'], task_level=True, parse_sample=parse_cmdline_sample)
### wchan ###
wchan = ProcSource('wchan', '/proc/%s/task/%s/wchan', [('wchan', str, 0)], ['wchan'], task_level=True)
### io ###
def parse_io_sample(proc_source, sample):
return [line.split()[1] if line else '' for line in sample.split('\n')]
io = ProcSource('io', '/proc/%s/task/%s/io', [
('rchar', int, 0),
('wchar', int, 1),
('syscr', int, 2),
('syscw', int, 3),
('read_bytes', int, 4),
('write_bytes', int, 5),
('cancelled_write_bytes', int, 6),
], None,
task_level=True,
parse_sample=parse_io_sample)
### net/dev ### (not accounted at process level)
def read_net_samples(fh):
return fh.readlines()[2:]
def parse_net_sample(proc_source, sample):
fields = sample.split()
fields[0] = fields[0][:-1]
return fields
net = ProcSource('net', '/proc/%s/task/%s/net/dev', [
('iface', str, 0),
('rx_bytes', str, 1),
('rx_packets', str, 2),
('rx_errs', str, 3),
('rx_drop', str, 4),
('rx_fifo', str, 5),
('rx_frame', str, 6),
('rx_compressed', str, 7),
('rx_multicast', str, 8),
('tx_bytes', str, 9),
('tx_packets', str, 10),
('tx_errs', str, 11),
('tx_drop', str, 12),
('tx_fifo', str, 13),
('tx_colls', str, 14),
('tx_carrier', str, 15),
('tx_compressed', str, 16),
], None,
read_samples=read_net_samples,
parse_sample=parse_net_sample)
### stack ###
def read_stack_samples(fh):
result = ''
# reverse stack and ignore the (reversed) top frame 0xfffffffffffff
# | |
# v v
for x in fh.readlines()[::-1][1:]:
func = x.split(' ')[1].split('+')[0]
if func not in ['entry_SYSCALL_64_after_hwframe','do_syscall_64','el0t_64_sync_handler',
'el0_svc', 'do_el0_svc', 'el0_svc_common.constprop.0', 'invoke_syscall.constprop.0' ]:
if result: # skip writing the 1st "->"
result += '->'
result += func + '()'
return [result or '-']
stack = ProcSource('stack', '/proc/%s/task/%s/stack', [
('kstack', str, 0),
], None,
task_level=True,
read_samples=read_stack_samples)
### smaps ###
def read_smaps_samples(fh):
samples = []
current_sample = ''
for line in fh.readlines():
current_sample += line
if line[:7] == 'VmFlags':
samples.append(current_sample)
current_sample = ''
return samples
def parse_smaps_sample(proc_source, sample):
sample_values = []
sample_lines = [l for l in sample.split('\n') if l != '']
header_tokens = sample_lines[0].split()
sample_values.extend(header_tokens[:5])
sample_values.append(' '.join(header_tokens[5:]))
for line in sample_lines[1:-1]:
n, kb, _ = line.split()
n = n[:-1].lower() + '_kb'
# missing values take default parse function value: assume no order change, and that available_columns contains all possible field names
while len(sample_values) < len(proc_source.available_columns) and n != proc_source.available_columns[len(sample_values)][0]:
parse_fn = proc_source.available_columns[len(sample_values)][1]
sample_values.append(parse_fn())
if len(sample_values) < len(proc_source.available_columns):
sample_values.append(kb)
while len(sample_values) < len(proc_source.available_columns) - 1:
parse_fn = proc_source.available_columns[len(sample_values)][1]
sample_values.append(parse_fn())
sample_values.append(' '.join(sample_lines[-1].split()[1:]))
return sample_values
smaps = ProcSource('smaps', '/proc/%s/smaps', [
('address_range', str, 0),
('perms', str, 1),
('offset', str, 2),
('dev', str, 3),
('inode', int, 4),
('pathname', str, 5),
('size_kb', int, 6),
('rss_kb', int, 7),
('pss_kb', int, 8),
('shared_clean_kb', int, 9),
('shared_dirty_kb', int, 10),
('private_clean_kb', int, 11),
('private_dirty_kb', int, 12),
('referenced_kb', int, 13),
('anonymous_kb', int, 14),
('anonhugepages_kb', int, 15),
('shmempmdmapped_kb', int, 16),
('shared_hugetld_kb', int, 17),
('private_hugetld_kb', int, 18),
('swap_kb', int, 19),
('swappss_kb', int, 20),
('kernelpagesize_kb', int, 21),
('mmupagesize_kb', int, 22),
('locked_kb', int, 23),
('vmflags', str, 24),
], None,
task_level=False,
read_samples=read_smaps_samples,
parse_sample=parse_smaps_sample)
all_sources = [stat, status, syscall, wchan, io, smaps, stack, cmdline]
Zerion Mini Shell 1.0