-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy patheval.py
More file actions
36 lines (29 loc) · 1.35 KB
/
eval.py
File metadata and controls
36 lines (29 loc) · 1.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import os
import logging
import argparse
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
def main():
parser = argparse.ArgumentParser(description='Evaluate leaderboard predictions for code completion (token level).')
parser.add_argument('--answers', '-a', required=True, help="filename of the labels, in txt format.")
parser.add_argument('--predictions', '-p', required=True, help="filename of the leaderboard predictions, in txt format.")
args = parser.parse_args()
preds = open(args.predictions, "r").readlines()
gts = open(args.answers, "r").readlines()
assert len(preds) == len(gts), f"Samples of predictions and answers are not equal, {len(preds)}: {len(gts)}"
total = 0
correct = 0.0
for pred, gt in zip(preds, gts):
pred = pred.split()
gt = gt.split()
assert len(pred) == len(gt), f"Sequence length of prediction and answer are not equal, {len(pred)}: {len(gt)}"
for x, y in zip(pred, gt):
if y not in ["<s>", "</s>", "<EOL>", "<pad>"]:
total += 1
if x == y:
correct += 1
logger.info(f"Total {total} tokens, accuracy: {round(correct/total*100, 2)}")
if __name__ == "__main__":
main()