BEGIN {
FS = ",";
# testing attack types from Wenke Lee
category["apache2."] = 2
category["back."] = 2
category["buffer_overflow."] = 3
category["ftp_write."] = 4
category["guess_passwd."] = 4
category["httptunnel."] = 4
category["httptunnel."] = 3
category["imap."] = 4
category["ipsweep."] = 1
category["land."] = 2
category["loadmodule."] = 3
category["mailbomb."] = 2
category["mscan."] = 1
category["multihop."] = 4
category["multihop."] = 3 # note that this is a duplicate
category["named."] = 4
category["neptune."] = 2
category["nmap."] = 1
category["perl."] = 3
category["phf."] = 4
category["pod."] = 2
category["portsweep."] = 1
category["processtable."] = 2
category["ps."] = 3
category["rootkit."] = 3
category["saint."] = 1
category["satan."] = 1
category["sendmail."] = 4
category["smurf."] = 2
category["snmpgetattack."] = 4
category["snmpguess."] = 4
category["sqlattack."] = 3
category["teardrop."] = 2
category["udpstorm."] = 2
category["warezmaster."] = 2
category["worm."] = 4
category["xlock."] = 4
category["xsnoop."] = 4
category["xterm."] = 3
# training attack types
category["back."] = 2
category["buffer_overflow."] = 3
category["ftp_write."] = 4
category["guess_passwd."] = 4
category["imap."] = 4
category["ipsweep."] = 1
category["land."] = 2
category["loadmodule."] = 3
category["multihop."] = 4
category["neptune."] = 2
category["nmap."] = 1
category["perl."] = 3
category["phf."] = 4
category["pod."] = 2
category["portsweep."] = 1
category["rootkit."] = 3
category["satan."] = 1
category["smurf."] = 2
category["spy."] = 4
category["teardrop."] = 2
category["warezclient."] = 4
category["warezmaster."] = 4 # note that this contradicts
the category above
# initialize matrix cost[actual,predicted]
# note that all other entries of the cost matrix are zero, so the
computed
# total cost is too low if either actual or predicted is ever out-of-range
for (i = 0; i <= 4; i++)
for (j = 0; j <= 4; j++)
if (i == j) cost[i,j] = 0;
else cost[i,j] = 2;
cost[0,1] = 1;
cost[1,0] = 1;
cost[2,1] = 1;
cost[3,0] = 3;
cost[4,0] = 4;
total = 0
} # end of BEGIN
{
label = $NF;
# heuristic to identify incorrectly labeled records
# if (label == "normal." && $3 == "ecr_i") { disputed++;
label = "smurf." }
if (label == label+0) cat = label+0; # works for submitted
entries
else if (label == "normal.") cat = 0;
else if (label in category) cat = category[label];
else cat = 9;
total++
count[cat]++;
for (j = 0; j <= 4; j++) basecost[j] += cost[cat,j];
print cat;
}
END {
print disputed, "records changed from normal to smurf";
for (j = 0; j <= 4; j++) basecost[j] /= total;
for (cat in count)
print cat ": " 100*count[cat]/total "%%\t base cost", basecost[cat];
print "total", total;
}