Mini Shell
#!/bin/bash
# nlp-awk.sh
# Reads an Apache log file and summarizes it
# 0.8.5
# Written by: Ryan Cunningham (ryanc@inmotionhosting.com)
# 0.8.3: Fix for URLs that have spaces in them which breaks absolutely everything
# 0.8.4: Works properly with mawk or gawk
# 0.8.5: Changed FS to a regex. In order to compensate for requests that contain
# quotation marks - 10M lines in 1:46.59
# Output is a follows:
# 0 - Number of hits by hour
# 1 - HTTP response codes
# 2 - IPs
# 3 - User Agents
# 4 - Requests
# 5 - Requests for non-static content, query strings stripped off
if [ "$1" == "stdin" ]; then
FILE=/dev/stdin
else
FILE=$1
fi
if [[ ! -z $2 ]]; then
DAY=$2
else
DAY=""
fi
if command mawk 2>/dev/null; then
INT=mawk
else
INT=awk
fi
$INT -v "INT=$INT" -v "day=$DAY" 'BEGIN { FS=" \"|\" "; a[0] = ""; b[0] = ""; c[0] = "";
req[""] = 0; ip[""]= 0; ua[""] = 0; rcode[""] = 0; hits[""] = 0; nstatic[""] = 0; refs[""] = 0; OFS="|";
VOFF=1; }
$1 ~ day{
{
## The first field and last three should always be usable
# Split fields by spaces
split($1, a, " ");
split($(NF-2), c, " ");
# if fields dont split cleanly on FS
if (NF != "5") {
# This is ugly but it should almost never have to actually be run
lbound = index($0, "]") + 3;
$2 = substr($0, lbound, index($0, $(NF-2)) - lbound - 2);
}
blen = split($2, b, " ");
if ($2 != "-") {
mth_res = substr($2, 1, index($2, b[blen]) - 2);
qindex = index($2, "?") - VOFF; # gawk and mawk need different values here... sometimes
if (qindex > 0) {
uri = substr($2, 0, qindex);
} else { uri = mth_res; }
} else {
mth_res = $2;
uri = $2;
}
ip_addr = a[1];
hourly = substr(a[4], 2, 14);
res_code = c[1];
u_agent = $NF;
ip[ip_addr]++;
hits[hourly]++;
req[res_code OFS mth_res]++;
rcode[res_code]++;
ua[u_agent]++;
nstatic[res_code OFS uri]++;
}}
END {
for (i in hits) { if (hits[i] != 0) { print 0 OFS 10000000 OFS i OFS hits[i]; } }
for (i in rcode) { if (rcode[i] != 0) { print 1 OFS 20000000 OFS i OFS rcode[i]; } }
for (i in ip) { if (ip[i] != 0) { print 2 OFS ip[i] OFS i; } }
for (i in ua) { if (ua[i] != 0) { print 3 OFS ua[i] OFS i; } }
for (i in req) { if (req[i] != 0) { print 4 OFS req[i] OFS i; } }
# for (i in refs) { if (refs[i] != 0) { print 6 OFS refs[i] OFS i; } }
for (i in nstatic) {
if (match(tolower(i), /(jpg|jpeg|gif|png|ico|txt|pdf|swf|xml|css|js)$/) == 0) {
if (nstatic[i] != 0) { print 5 OFS nstatic[i] OFS i; }
}
}
}' $FILE 2>/dev/null | sort -t'|' -k2,2rn 2>/dev/null # Pipe stderr to /dev/null so it won't print to the controlling terminal
Zerion Mini Shell 1.0