Add evaluation dashboard generator

This script isn’t well-coded but serves its purpose. It shows the
current state of the benchmark and aggregated results as a website.
This commit is contained in:
Patrick Lühne 2017-12-01 14:12:36 +01:00
parent f08b17298d
commit 519338232c
Signed by: patrick
GPG Key ID: 05F3611E97A70ABF
3 changed files with 887 additions and 0 deletions

319
evaluate.py Executable file
View File

@ -0,0 +1,319 @@
#!/usr/bin/python3
import math
import os
import re
import subprocess
import sys
import time
import yaml
import pprint
gray = (186, 189, 182)
def executeCommand(command, stdin = None, cwd = None):
with subprocess.Popen(command, stdout = subprocess.PIPE, stderr = subprocess.PIPE, stdin = (subprocess.PIPE if stdin != None else None), cwd = cwd) as process:
stdout, stderr = process.communicate(input = (stdin.encode("utf-8") if stdin != None else None))
exitCode = process.returncode
return stdout.decode("utf-8"), stderr.decode("utf-8"), exitCode
def git(command, cwd, enforce = False):
stdout, stderr, exitCode = executeCommand(["git"] + command, cwd = cwd)
if exitCode != 0:
print(stderr, file = sys.stderr)
if enforce:
raise RuntimeError("git error")
def initRepo(config):
dataDir = config["storage"]["local"]
# clone repo if not existing
if not os.path.isdir(config["storage"]["local"]):
git(["clone", config["storage"]["remote"], dataDir], None, enforce = True)
# fetch origin
git(["fetch"], cwd = dataDir)
# pull all branches
for key, branch in config["storage"]["branches"].items():
git(["checkout", branch], cwd = dataDir, enforce = True)
git(["pull"], cwd = dataDir)
def readBenchmarkConfig(config):
initRepo(config)
dataDir = config["storage"]["local"]
# checkout config branch
git(["checkout", config["storage"]["branches"]["config"]], cwd = dataDir, enforce = True)
# read instance list
instancesFile = os.path.join(config["storage"]["local"], "instances.yml")
with open(instancesFile, "r") as stream:
instances = yaml.load(stream, Loader=yaml.CLoader)
# read configurations to test
configurationsFile = os.path.join(config["storage"]["local"], "configurations.yml")
with open(configurationsFile, "r") as stream:
configurations = yaml.load(stream, Loader=yaml.CLoader)
# flatten lists of options
for configuration in configurations["configurations"]:
configuration["options"] = [item for sublist in configuration["options"] for item in sublist]
return {"configurations": configurations, "instances": instances}
def outputFilenames(configuration, instance, config):
instanceID = instance["ipc"] + "_" + instance["domain"] + "_" + str(instance["instance"])
outputFile = os.path.join(configuration["id"], instanceID + ".out")
errorFile = os.path.join(configuration["id"], instanceID + ".err")
environmentFile = os.path.join(configuration["id"], instanceID + ".env")
return {"outputFile": outputFile, "errorFile": errorFile, "environmentFile": environmentFile}
def jobKey(configuration, instance):
return (configuration["id"], instance["ipc"], instance["domain"], instance["instance"])
def instanceKey(instance):
return (instance["ipc"], instance["domain"], instance["instance"])
def addResult(results, configuration, instance, result):
if not configuration["id"] in results:
results[configuration["id"]] = {}
results[configuration["id"]][instanceKey(instance)] = result
def result(results, configuration, instance):
return results[configuration["id"]][instanceKey(instance)]
def mix(color1, color2, t):
return (color1[0] * (1 - t) + color2[0] * t, color1[1] * (1 - t) + color2[1] * t, color1[2] * (1 - t) + color2[2] * t)
def resultColor(result, config):
if result <= 0:
return colors[0]
elif result >= config["limits"]["time"]:
return colors[-1]
normalizedResult = (result / config["limits"]["time"]) ** 0.2
normalizedResult *= (len(colors) - 1)
c0 = min(math.floor(normalizedResult), len(colors) - 1)
t = normalizedResult - c0
if t <= 0:
return colors[c0]
elif t >= 1:
return colors[c0 + 1]
return mix(colors[c0], colors[c0 + 1], t)
def collectResults(config):
benchmarkConfig = readBenchmarkConfig(config)
dataDir = config["storage"]["local"]
# checkout results branch
git(["checkout", config["storage"]["branches"]["results"]], cwd = dataDir, enforce = True)
configurations = benchmarkConfig["configurations"]["configurations"]
instances = benchmarkConfig["instances"]
results = {}
for instanceSetName, instanceSet in instances.items():
for instance in instanceSet:
for configuration in configurations:
filenames = outputFilenames(configuration, instance, config)
outputFile = os.path.join(config["storage"]["local"], filenames["outputFile"])
errorFile = os.path.join(config["storage"]["local"], filenames["errorFile"])
environmentFile = os.path.join(config["storage"]["local"], filenames["environmentFile"])
if not os.path.exists(outputFile) or not os.path.exists(errorFile) or not os.path.exists(environmentFile):
addResult(results, configuration, instance, None)
continue
with open(errorFile, "r") as errorOutput:
errors = errorOutput.read()
finishedRE = re.compile("^FINISHED CPU", re.M)
runtimeRE = re.compile("<time name=\"ALL\">(.*)</time>", re.M)
timeoutRE = re.compile("^TIMEOUT CPU", re.M)
memoutRE = re.compile("^MEM CPU", re.M)
exitCodeRE = re.compile("^# exit code: (\d+)$", re.M)
finished = finishedRE.search(errors)
runtime = runtimeRE.search(errors)
timeout = timeoutRE.search(errors)
memout = memoutRE.search(errors)
exitCode = exitCodeRE.search(errors)
if exitCode and int(exitCode.group(1)) != 0:
text = "error"
color = None
elif finished:
value = float(runtime.group(1)) / 1000
text = str(value)
color = (value / config["limits"]["time"]) ** 0.2
elif timeout:
text = "> " + str(config["limits"]["time"])
color = 1.0
elif memout:
text = "> " + str(config["limits"]["memory"] / 1000000) + " GB"
color = 1.0
result = {"text": text, "color": color}
addResult(results, configuration, instance, result)
return configurations, instances, results
def aggregateResults(configurations, instanceSetID, instanceSet, instances, results):
aggregatedResults = {("total", ""): {}}
for instance in instanceSet:
ipcDomain = (instance["ipc"], instance["domain"])
if not ipcDomain in aggregatedResults:
aggregatedResults[ipcDomain] = {}
for configuration in configurations:
if not instanceSetID in configuration["instanceSets"]:
continue
if not configuration["id"] in aggregatedResults[ipcDomain]:
aggregatedResults[ipcDomain][configuration["id"]] = {"instances solved": 0, "average runtime": None, "results": []}
if not configuration["id"] in aggregatedResults[("total", "")]:
aggregatedResults[("total", "")][configuration["id"]] = {"instances solved": 0, "average runtime": None, "results": []}
r = result(results, configuration, instance)
if r == None:
continue
value = 900.0
try:
value = float(r["text"])
aggregatedResults[ipcDomain][configuration["id"]]["instances solved"] += 1
aggregatedResults[("total", "")][configuration["id"]]["instances solved"] += 1
except:
pass
aggregatedResults[ipcDomain][configuration["id"]]["results"].append(value)
aggregatedResults[("total", "")][configuration["id"]]["results"].append(value)
for ipcDomain, results in aggregatedResults.items():
for configurationKey, configurationResults in aggregatedResults[ipcDomain].items():
configurationResults["average runtime"] = sum(configurationResults["results"]) / max(1, len(configurationResults["results"]))
return aggregatedResults
def requiresInstance(configuration, instance, instances):
for requiredInstanceSet in configuration["instanceSets"]:
if not requiredInstanceSet in instances:
raise RuntimeError("undefined instance set “" + requiredInstanceSet + "")
if instance in instances[requiredInstanceSet]:
return True
return False
def renderResultsTable(configurations, instanceSetID, instanceSet, instances, results):
print("<h2>" + instanceSetID + " (detailed results)</h2><table><thead><tr><th>IPC</th><th>domain</th><th>instance</th>")
for configuration in configurations:
if not instanceSetID in configuration["instanceSets"]:
continue
print("<th><div title=\"" + str(configuration["options"]) + "\">" + configuration["id"] + "</div></th>")
print("<tbody>")
for instance in instanceSet:
print("<tr><td class=\"col-header\">" + instance["ipc"] + "</td><td class=\"col-header\">" + instance["domain"] + "</td><td class=\"col-header\">" + str(instance["instance"]) + "</td>")
for configuration in configurations:
if not instanceSetID in configuration["instanceSets"]:
continue
r = result(results, configuration, instance)
if r and r["text"] != "error":
print("<td class=\"result result-" + str(int(r["color"] * 100)) + "\">")
print(r["text"])
elif r and r["text"] == "error":
print("<td class=\"error\">")
print(r["text"])
else:
print("<td class=\"tbd\">")
print("</td>")
print ("</tr>")
print("</tbody>")
print("</tr></thead></table>")
def renderAggregatedResultsTable(type, configurations, instanceSetID, instanceSet, instances, results, config):
aggregatedResults = aggregateResults(configurations, instanceSetID, instanceSet, instances, results)
if not aggregatedResults:
print("<!-- error -->")
return
print("<h2>" + instanceSetID + " (" + type + ")</h2><table><thead><tr><th>IPC</th><th>domain</th>")
for configuration in configurations:
if not instanceSetID in configuration["instanceSets"]:
continue
print("<th><div title=\"" + str(configuration["options"]) + "\">" + configuration["id"] + "</div></th>")
print("</tr></thead><tbody>")
for ipcDomain, results in sorted(aggregatedResults.items()):
print("<tr><td class=\"col-header\">" + ipcDomain[0] + "</td><td class=\"col-header\">" + ipcDomain[1] + "</td>")
for configuration in configurations:
if not instanceSetID in configuration["instanceSets"]:
continue
if len(results[configuration["id"]]["results"]) == 0:
print("<td class=\"tbd\"></td>")
continue
r = results[configuration["id"]][type]
numberFormat = "%.2f" if type == "average runtime" else "%d/" + str(len(results[configuration["id"]]["results"]))
value = (r / config["limits"]["time"]) ** 0.2 if type == "average runtime" else 1.0 - r / max(1, len(results[configuration["id"]]["results"]))
classes = " result-" + str(int(value * 100))
print("<td class=\"result" + classes + "\">" + numberFormat % r + "</td>")
print("</tr></tbody></thead></table>")
def main():
with open("config.yml", "r") as stream:
config = yaml.load(stream, Loader=yaml.CLoader)
configurations, instances, results = collectResults(config)
print("<!DOCTYPE html><html lang=\"en\"><head><title>TPLP benchmark results</title><meta charset=\"UTF-8\"><link rel=\"stylesheet\" href=\"style.css?v=2\" type=\"text/css\"></head><body><main><h1>TPLP Benchmark Results</h1><div class=\"footnote\">last updated at " + time.strftime("%Y-%m-%d %H:%M:%S %z") + "</div>")
for instanceSetID, instanceSet in instances.items():
renderAggregatedResultsTable("instances solved", configurations, instanceSetID, instanceSet, instances, results, config)
renderAggregatedResultsTable("average runtime", configurations, instanceSetID, instanceSet, instances, results, config)
renderResultsTable(configurations, instanceSetID, instanceSet, instances, results)
print("</main></body></html>")
main()

BIN
gradient.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 364 B

568
style.css Normal file
View File

@ -0,0 +1,568 @@
*
{
font-family: "Source Sans Pro", sans-serif;
hyphens: auto;
}
h1, h2
{
margin-top: 5rem;
}
main
{
max-width: 1600px;
margin-left: auto;
margin-right: auto;
}
th
{
/*transform: rotate(-90.0deg);
white-space: nowrap;*/
}
tbody
{
font-size: 10pt;
text-align: center;
}
td.tbd
{
background-color: rgb(238, 238, 236);
}
td
{
width: 75px;
}
td.col-header
{
width: inherit;
}
div.footnote
{
font-size: 10pt;
color: #808080;
}
td.error
{
background-color: rgb(226, 60, 33);
color: rgb(148, 17, 0);
}
td.result
{
background-image: url("gradient.png");
background-position: 0% 0%;
background-repeat: repeat-x;
}
td.result-0
{
background-position: 0% 0%;
}
td.result-1
{
background-position: 0% 1%;
}
td.result-2
{
background-position: 0% 2%;
}
td.result-3
{
background-position: 0% 3%;
}
td.result-4
{
background-position: 0% 4%;
}
td.result-5
{
background-position: 0% 5%;
}
td.result-6
{
background-position: 0% 6%;
}
td.result-7
{
background-position: 0% 7%;
}
td.result-8
{
background-position: 0% 8%;
}
td.result-9
{
background-position: 0% 9%;
}
td.result-10
{
background-position: 0% 10%;
}
td.result-11
{
background-position: 0% 11%;
}
td.result-12
{
background-position: 0% 12%;
}
td.result-13
{
background-position: 0% 13%;
}
td.result-14
{
background-position: 0% 14%;
}
td.result-15
{
background-position: 0% 15%;
}
td.result-16
{
background-position: 0% 16%;
}
td.result-17
{
background-position: 0% 17%;
}
td.result-18
{
background-position: 0% 18%;
}
td.result-19
{
background-position: 0% 19%;
}
td.result-20
{
background-position: 0% 20%;
}
td.result-21
{
background-position: 0% 21%;
}
td.result-22
{
background-position: 0% 22%;
}
td.result-23
{
background-position: 0% 23%;
}
td.result-24
{
background-position: 0% 24%;
}
td.result-25
{
background-position: 0% 25%;
}
td.result-26
{
background-position: 0% 26%;
}
td.result-27
{
background-position: 0% 27%;
}
td.result-28
{
background-position: 0% 28%;
}
td.result-29
{
background-position: 0% 29%;
}
td.result-30
{
background-position: 0% 30%;
}
td.result-31
{
background-position: 0% 31%;
}
td.result-32
{
background-position: 0% 32%;
}
td.result-33
{
background-position: 0% 33%;
}
td.result-34
{
background-position: 0% 34%;
}
td.result-35
{
background-position: 0% 35%;
}
td.result-36
{
background-position: 0% 36%;
}
td.result-37
{
background-position: 0% 37%;
}
td.result-38
{
background-position: 0% 38%;
}
td.result-39
{
background-position: 0% 39%;
}
td.result-40
{
background-position: 0% 40%;
}
td.result-41
{
background-position: 0% 41%;
}
td.result-42
{
background-position: 0% 42%;
}
td.result-43
{
background-position: 0% 43%;
}
td.result-44
{
background-position: 0% 44%;
}
td.result-45
{
background-position: 0% 45%;
}
td.result-46
{
background-position: 0% 46%;
}
td.result-47
{
background-position: 0% 47%;
}
td.result-48
{
background-position: 0% 48%;
}
td.result-49
{
background-position: 0% 49%;
}
td.result-50
{
background-position: 0% 50%;
}
td.result-51
{
background-position: 0% 51%;
}
td.result-52
{
background-position: 0% 52%;
}
td.result-53
{
background-position: 0% 53%;
}
td.result-54
{
background-position: 0% 54%;
}
td.result-55
{
background-position: 0% 55%;
}
td.result-56
{
background-position: 0% 56%;
}
td.result-57
{
background-position: 0% 57%;
}
td.result-58
{
background-position: 0% 58%;
}
td.result-59
{
background-position: 0% 59%;
}
td.result-60
{
background-position: 0% 60%;
}
td.result-61
{
background-position: 0% 61%;
}
td.result-62
{
background-position: 0% 62%;
}
td.result-63
{
background-position: 0% 63%;
}
td.result-64
{
background-position: 0% 64%;
}
td.result-65
{
background-position: 0% 65%;
}
td.result-66
{
background-position: 0% 66%;
}
td.result-67
{
background-position: 0% 67%;
}
td.result-68
{
background-position: 0% 68%;
}
td.result-69
{
background-position: 0% 69%;
}
td.result-70
{
background-position: 0% 70%;
}
td.result-71
{
background-position: 0% 71%;
}
td.result-72
{
background-position: 0% 72%;
}
td.result-73
{
background-position: 0% 73%;
}
td.result-74
{
background-position: 0% 74%;
}
td.result-75
{
background-position: 0% 75%;
}
td.result-76
{
background-position: 0% 76%;
}
td.result-77
{
background-position: 0% 77%;
}
td.result-78
{
background-position: 0% 78%;
}
td.result-79
{
background-position: 0% 79%;
}
td.result-80
{
background-position: 0% 80%;
}
td.result-81
{
background-position: 0% 81%;
}
td.result-82
{
background-position: 0% 82%;
}
td.result-83
{
background-position: 0% 83%;
}
td.result-84
{
background-position: 0% 84%;
}
td.result-85
{
background-position: 0% 85%;
}
td.result-86
{
background-position: 0% 86%;
}
td.result-87
{
background-position: 0% 87%;
}
td.result-88
{
background-position: 0% 88%;
}
td.result-89
{
background-position: 0% 89%;
}
td.result-90
{
background-position: 0% 90%;
}
td.result-91
{
background-position: 0% 91%;
}
td.result-92
{
background-position: 0% 92%;
}
td.result-93
{
background-position: 0% 93%;
}
td.result-94
{
background-position: 0% 94%;
}
td.result-95
{
background-position: 0% 95%;
}
td.result-96
{
background-position: 0% 96%;
}
td.result-97
{
background-position: 0% 97%;
}
td.result-98
{
background-position: 0% 98%;
}
td.result-99
{
background-position: 0% 99%;
}
td.result-100
{
background-position: 0% 100%;
}