-
Notifications
You must be signed in to change notification settings - Fork 0
/
Evaluate.py
85 lines (70 loc) · 3.78 KB
/
Evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import pandas as pd
import scipy.special
import sys
def evaluate(ground_truth, parsed_result):
"""
Given the output structured file and ground truth structured file, return the f1 measure of clustering and
parsing accuracy of log parsing.
:param ground_truth: ground truth file location
:param parsed_result: output structured file location
:return: f1 measure of clustering and parsing accuracy
"""
df_ground_truth = pd.read_csv(ground_truth)
df_parsed_log = pd.read_csv(parsed_result)
# Remove invalid ground truth event Templates
null_log_ids = df_ground_truth[~df_ground_truth['EventTemplate'].isnull()].index
df_ground_truth = df_ground_truth.loc[null_log_ids]
df_parsed_log = df_parsed_log.loc[null_log_ids]
(precision, recall, f_measure, accuracy) = get_accuracy(df_ground_truth['EventTemplate'],
df_parsed_log['EventTemplate'])
# print('Precision: %.4f, Recall: %.4f, F1_measure: %.4f, Parsing_Accuracy: %.4f' % (
# precision, recall, f_measure, accuracy))
return f_measure, accuracy
def get_accuracy(series_ground_truth, series_parsedlog, debug=False):
"""
Given the ground truth and output templates as Pandas series calculate precision, recall, f1-measure and parsing
accuracy
:param series_ground_truth: ground truth templates as Pandas series
:param series_parsedlog: output templates as Pandas series
:param debug: configuration parameter to debug or not
:return: precision, recall, f1-measure and parsing accuracy
"""
series_ground_truth_value_counts = series_ground_truth.value_counts()
real_pairs = 0 # number of all possible combination given the ground truth
for count in series_ground_truth_value_counts:
if count > 1:
real_pairs += scipy.special.comb(count, 2)
series_parsed_log_value_counts = series_parsedlog.value_counts()
parsed_pairs = 0
for count in series_parsed_log_value_counts:
if count > 1:
parsed_pairs += scipy.special.comb(count, 2)
accurate_pairs = 0
accurate_events = 0 # determine how many lines are correctly parsed
for parsed_eventId in series_parsed_log_value_counts.index:
log_ids = series_parsedlog[series_parsedlog == parsed_eventId].index # log ids that are given parsed_eventId in the results output file
series_ground_truth_logId_value_counts = series_ground_truth[log_ids].value_counts()
error_eventIds = (parsed_eventId, series_ground_truth_logId_value_counts.index.tolist())
error = True
if series_ground_truth_logId_value_counts.size == 1:
ground_truth_event_id = series_ground_truth_logId_value_counts.index[0]
if log_ids.size == series_ground_truth[series_ground_truth == ground_truth_event_id].size:
accurate_events += log_ids.size
error = False
if error and debug:
print('(parsed_eventId, ground_truth_event_id) =', error_eventIds, 'failed', log_ids.size, 'messages')
for count in series_ground_truth_logId_value_counts:
if count > 1:
accurate_pairs += scipy.special.comb(count, 2)
precision = float(accurate_pairs) / parsed_pairs
recall = float(accurate_pairs) / real_pairs
f_measure = 2 * precision * recall / (precision + recall)
accuracy = float(accurate_events) / series_ground_truth.size
return precision, recall, f_measure, accuracy
if __name__ == '__main__':
DATASET = sys.argv[1]
THRESHOLD = sys.argv[2]
TYPE = sys.argv[3]
ground_truth_df = 'ground_truth/' + DATASET + '_2k.log_structured.csv'
output = "results/" + TYPE + "/" + THRESHOLD + "/" + DATASET + "_structured.csv" # results_ is for manual inverted index results
evaluate(ground_truth_df, output)