From 8f80ddca4953881d856e14adc8de5185499ad0b5 Mon Sep 17 00:00:00 2001 From: Valentin Seitz Date: Mon, 6 May 2024 14:04:48 +0200 Subject: [PATCH] add talp html stuff --- gitlab/download_artifacts.py | 90 ++++++++ gitlab/requirements.txt | 1 + talp_report/requirements.txt | 3 + talp_report/talp_add_to_db.py | 98 +++++++++ talp_report/talp_html_report.py | 98 +++++++++ talp_report/talp_report.jinja | 135 ++++++++++++ talp_report/talp_time_series.jinja | 325 +++++++++++++++++++++++++++++ talp_report/talp_time_series.py | 150 +++++++++++++ talp_report/talp_variables.py | 14 ++ 9 files changed, 914 insertions(+) create mode 100644 gitlab/download_artifacts.py create mode 100644 gitlab/requirements.txt create mode 100644 talp_report/requirements.txt create mode 100644 talp_report/talp_add_to_db.py create mode 100644 talp_report/talp_html_report.py create mode 100644 talp_report/talp_report.jinja create mode 100644 talp_report/talp_time_series.jinja create mode 100644 talp_report/talp_time_series.py create mode 100644 talp_report/talp_variables.py diff --git a/gitlab/download_artifacts.py b/gitlab/download_artifacts.py new file mode 100644 index 00000000..566eadf0 --- /dev/null +++ b/gitlab/download_artifacts.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python + +import os +import argparse +import gitlab +import logging + +""" +Script to automagically choose a suitable authentication method and download the .zip archive of the last sucessfull +execution of that jobname on the branch: ref-name +Requires python-gitlab to be installed + +Default behaviour: create empty zip file +""" + + +def download_artifacts(gl, project_name, job_name, ref_name, gitlab_token=None, output_file=None): + try: + project = gl.projects.get(project_name) + except gitlab.exceptions.GitlabGetError as e: + logging.error("Failed to get project details: %s", str(e)) + return + + logging.debug("Found project ID: %s", project.id) + + # Get artifacts for the job + try: + artifacts_file = project.artifacts.download( + ref_name=ref_name, job=job_name.strip()) + logging.info("Artifacts downloaded successfully: %s", output_file) + except Exception as e: + logging.critical( + "Wasnt able to download artifacts, continue with a empty zip directory") + artifacts_file = b'PK\x05\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + + with open(output_file, 'wb') as f: + f.write(artifacts_file) + + +def main(): + parser = argparse.ArgumentParser( + description="Download artifacts from the latest successful GitLab CI job") + parser.add_argument( + "--gitlab-url", help="GitLab URL (the thing before /api/*)", required=True) + parser.add_argument( + "--project-name", help="GitLab project name (format: namespace/project)", required=True) + parser.add_argument( + "--job-name", help="Name of the GitLab CI job from which to download the artifact", required=True) + parser.add_argument( + "--output-file", help="Output file name for downloaded artifacts", required=True) + parser.add_argument( + "--ref-name", help="GitLab branch name", default="main") + parser.add_argument( + "--gitlab-token", help="Personal GitLab access token (if not specified it will try to use a job_token)") + parser.add_argument( + "--log-level", help="Logging level (DEBUG, INFO, WARNING, ERROR)", default="INFO") + + args = parser.parse_args() + + log_level = getattr(logging, args.log_level.upper(), None) + if not isinstance(log_level, int): + raise ValueError("Invalid log level: %s" % args.log_level) + + logging.basicConfig(level=log_level) + job_gitlab_token = None + gitlab_token = None + + if args.gitlab_token: + gitlab_token = args.gitlab_token + else: + job_gitlab_token = os.getenv("CI_JOB_TOKEN") + print(job_gitlab_token) + logging.debug("Using the CI_JOB_TOKEN variant") + + gl = None + if gitlab_token: + gl = gitlab.Gitlab(args.gitlab_url, private_token=gitlab_token) + gl.auth() + elif job_gitlab_token: + gl = gitlab.Gitlab(args.gitlab_url, job_token=job_gitlab_token) + logging.debug("Using the CI_JOB_TOKEN variant") + else: + gl = gitlab.Gitlab(args.gitlab_url) + + download_artifacts(gl, args.project_name, args.job_name, + args.ref_name, args.gitlab_token, args.output_file,) + + +if __name__ == "__main__": + main() diff --git a/gitlab/requirements.txt b/gitlab/requirements.txt new file mode 100644 index 00000000..67de9d95 --- /dev/null +++ b/gitlab/requirements.txt @@ -0,0 +1 @@ +python-gitlab \ No newline at end of file diff --git a/talp_report/requirements.txt b/talp_report/requirements.txt new file mode 100644 index 00000000..1cbe6fb7 --- /dev/null +++ b/talp_report/requirements.txt @@ -0,0 +1,3 @@ +Jinja2 +requests +pandas \ No newline at end of file diff --git a/talp_report/talp_add_to_db.py b/talp_report/talp_add_to_db.py new file mode 100644 index 00000000..02c057ca --- /dev/null +++ b/talp_report/talp_add_to_db.py @@ -0,0 +1,98 @@ +# myapp/app.py +import argparse +import json +import os +import json +from datetime import datetime +import sqlite3 + +from talp_variables import TALP_TABLE_NAME, TALP_TABLE_COLUMNS_WITH_DATATYPES, TALP_TABLE_COLUMNS + + +# Function to insert data into the SQLite database +def insert_data(conn, timestamp, talp_output, metadata): + # Connect to the SQLite database + cursor = conn.cursor() + + try: + # Create a table if it doesn't exist + cursor.execute( + f"CREATE TABLE IF NOT EXISTS {TALP_TABLE_NAME} {TALP_TABLE_COLUMNS_WITH_DATATYPES}") + + # Create an index on the timestamp column + cursor.execute( + f"CREATE INDEX IF NOT EXISTS idx_timestamp ON {TALP_TABLE_NAME} (timestamp)") + + # Convert JSON objects to string format + + # Insert data into the table + cursor.execute(f"INSERT INTO {TALP_TABLE_NAME} {TALP_TABLE_COLUMNS} VALUES (?, ?, ?)", ( + timestamp, json.dumps(talp_output), json.dumps(metadata))) + + # Commit changes and close the connection + conn.commit() + print("INFO: Data inserted successfully") + except sqlite3.Error as e: + print("ERROR:", e) + finally: + # Close the connection + conn.close() + + +def main(): + # Parse command-line arguments + parser = argparse.ArgumentParser( + description='Add talp.json to the local time series database') + parser.add_argument('-t', '--talp', dest='talp', + help='talp.json file to be added', required=True) + parser.add_argument('-m', '--metadata', dest='metadata', + help='metadata.json file to be added', required=False) + parser.add_argument('-db', '--database', dest='database', + help='TALP.db file. If not specified a new one will be generated', required=False) + # TODO add timestamp mechanism + args = parser.parse_args() + + # Check if the JSON file exists + if not os.path.exists(args.talp): + print(f"Error: The specified JSON file '{args.talp}' does not exist.") + return + + if args.metadata: + if not os.path.exists(args.metadata): + print( + f"Error: The specified JSON file '{args.metadata}' does not exist.") + return + + # Set output + if args.database: + DB_FILE = args.database + else: + DB_FILE = "TALP.db" + + + # Connect to database + conn = sqlite3.connect(DB_FILE) + + current_timestamp = datetime.now() + + with open(args.talp, 'r') as json_file: + try: + talp_output = json.load(json_file) + except json.JSONDecodeError as e: + print(f"Error decoding JSON: {e}") + return + if args.metadata: + with open(args.metadata, 'r') as json_file: + try: + metadata = json.load(json_file) + except json.JSONDecodeError as e: + print(f"Error decoding JSON: {e}") + return + else: + metadata = {} + + insert_data(conn, current_timestamp, talp_output, metadata) + + +if __name__ == "__main__": + main() diff --git a/talp_report/talp_html_report.py b/talp_report/talp_html_report.py new file mode 100644 index 00000000..bfe87e83 --- /dev/null +++ b/talp_report/talp_html_report.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python + +import argparse +import json +import os +from jinja2 import Environment, FileSystemLoader +from urllib.request import urlopen, Request +from talp_variables import TALP_TEMPLATE_PATH + +def render_template(directory, template_name, **context): + # Set up Jinja2 environment and load the template + env = Environment(loader=FileSystemLoader(directory)) + template = env.get_template(template_name) + + # Render the template with the provided context + return template.render(context) + + +def create_badge(parallel_eff): + if parallel_eff < 0.6: + bagde_url = f"https://img.shields.io/badge/Parallel_efficiency-{parallel_eff}-red" + elif parallel_eff < 0.8: + bagde_url = f"https://img.shields.io/badge/Parallel_efficiency-{parallel_eff}-orange" + else: + bagde_url = f"https://img.shields.io/badge/Parallel_efficiency-{parallel_eff}-green" + + return urlopen(Request(url=bagde_url, headers={'User-Agent': 'Mozilla'})).read() + + +def main(): + # Parse command-line arguments + parser = argparse.ArgumentParser( + description='Render HTML summary of the talp.json') + parser.add_argument('input', help='Path to the TALP JSON file') + parser.add_argument('-o', '--output', dest='output', + help='Name of the html file beeing generated. If not specified [input].html will be chosen', required=False) + parser.add_argument('-b', '--badge', dest='badge', default="parallel_efficiency.svg", + help="Create a [badge].svg using shields.io. Internet access needed") + args = parser.parse_args() + + # Check if the JSON file exists + if not os.path.exists(args.input): + print( + f"Error: The specified JSON file '{args.json_file}' does not exist.") + return + + # Set output + if args.output: + OUTPUT_FILE = args.output + if not args.output.endswith('.html'): + OUTPUT_FILE += ".html" + print(f"Info: Appending .html to '{args.output}'") + # Check if the HTML file exists + if os.path.exists(args.output): + print(f"Info: Overwriting '{args.output}'") + else: + OUTPUT_FILE = args.input.replace(".json", "") + OUTPUT_FILE += ".html" + + # Load data from the JSON file + with open(args.input, 'r') as json_file: + try: + raw_data = json.load(json_file) + except json.JSONDecodeError as e: + print(f"Error decoding JSON: {e}") + return + + # Check if the popMetrics are there: + if not raw_data['popMetrics']: + print( + f"No popMetrics found in TALP json. Try re-running DLB with arguments --talp --talp-summary=pop-metrics --talp-file={args.input}") + return + + pop_metric_regions = raw_data['popMetrics'] + # Render the template with the data + rendered_html = render_template( + TALP_TEMPLATE_PATH, 'talp_report.jinja', regions=pop_metric_regions) + + # Save or display the rendered HTML as needed + with open(OUTPUT_FILE, 'w') as f: + f.write(rendered_html) + + if args.badge: + if not args.badge.endswith(".svg"): + args.badge += ".svg" + # Get the global effiency + parallel_efficiency = 0.0 + for region in pop_metric_regions: + if region['name'] == "MPI Execution": + parallel_efficiency = region['parallelEfficiency'] + + badge = create_badge(parallel_efficiency) + with open(args.badge, 'wb') as f: + f.write(badge) + + +if __name__ == "__main__": + main() diff --git a/talp_report/talp_report.jinja b/talp_report/talp_report.jinja new file mode 100644 index 00000000..1a7373d8 --- /dev/null +++ b/talp_report/talp_report.jinja @@ -0,0 +1,135 @@ + + + + + + DLB-TALP Performance Report + + + + + + + + + + + + + + +
+ DLB Logo +
+ +

Performance Report

+ + + + + + + + + + + + + + + {% for entry in regions %} + + + + + + + + + + + {% endfor %} + +
NameElapsed Time [s]Average IPCParallel EfficiencyCommunication EfficiencyLoad BalanceLB InLB Out
{{ entry.name }}{{ entry.elapsedTime / 1e9}}{{ entry.averageIPC }}{{ entry.parallelEfficiency }}{{ entry.communicationEfficiency }}{{ entry.loadBalance }}{{ entry.lbIn }}{{ entry.lbOut }}
+ +

Metrics overview

+ For up to date overview of the computed metrics you can consult the DLB User Guide + The coloring above follows the following rules: + + + + + + + + + + + + + + + + + + + + + +
Average IPCParallel Efficiency
< 1 < 0.6
> 1
< 2
> 0.6
< 0.8
> 2 > 0.8
+ + + \ No newline at end of file diff --git a/talp_report/talp_time_series.jinja b/talp_report/talp_time_series.jinja new file mode 100644 index 00000000..84eef2cc --- /dev/null +++ b/talp_report/talp_time_series.jinja @@ -0,0 +1,325 @@ + + + + + + Your Website + + + + + + + + + + +
+
+

Continous performance analysis results

+

for GENEX on RAVEN

+
+
+ + +
+
+
+

Select Regions

+
+
+
+ +
+
+
+
+

Elapsed Time

+
+
+
+

Parallel Efficiency

+
+
+
+

Average IPC

+
+
+
+ +
+ + + + + + + + + + + + + diff --git a/talp_report/talp_time_series.py b/talp_report/talp_time_series.py new file mode 100644 index 00000000..4d1fccd0 --- /dev/null +++ b/talp_report/talp_time_series.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python + +import argparse +import json +import pandas as pd +import os +from jinja2 import Environment, FileSystemLoader +import sqlite3 +from datetime import datetime + +from talp_variables import TALP_DB_COLUMN_METADATA, TALP_DB_COLUMN_TALP_OUPUT, TALP_DB_COLUMN_TIMESTAMP, TALP_DEFAULT_REGION_NAME, TALP_TABLE_NAME, TALP_TEMPLATE_PATH + + +def date_time_to_string(datetime): + return datetime.strftime("%d.%m.%Y %H:%M") + + +def render_template(directory, template_name, **context): + # Set up Jinja2 environment and load the template + env = Environment(loader=FileSystemLoader(directory)) + template = env.get_template(template_name) + + # Render the template with the provided context + return template.render(context) + + +def extract_region_names_from_df(df): + region_names = set() + talp_outputs = df[TALP_DB_COLUMN_TALP_OUPUT].tolist() + for talp_output in talp_outputs: + raw_data = json.loads(talp_output) + for entry in raw_data['popMetrics']: + region_names.add(entry['name']) + return list(region_names) + + +def get_formatted_timestamps(df): + timestamps_df = df[TALP_DB_COLUMN_TIMESTAMP].tolist() + timestamps = [] + for timestamp in timestamps_df: + parsed_date = datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f") + formatted_date = date_time_to_string(parsed_date) + timestamps.append(formatted_date) + return timestamps + + +def extract_metadata_from_df(df): + metadata_obj = {} + timestamps = get_formatted_timestamps(df) + metadatas = df[TALP_DB_COLUMN_METADATA].tolist() + + for timestamp, metadata in zip(timestamps, metadatas): + metadata_obj[timestamp] = json.loads(metadata) + metadata_obj[timestamp]['date'] = timestamp + + return metadata_obj + # talp_outputs = df[TALP_DB_COLUMN_TALP_OUPUT].tolist() + + +def pack_series_data(name, data): + return { + 'name': name, + 'type': 'line', + 'data': data} + + +def extract_dataseries(df, metric): + timestamps = get_formatted_timestamps(df) + regions = extract_region_names_from_df(df) + talp_outputs = df[TALP_DB_COLUMN_TALP_OUPUT].tolist() + series = [] + + for region in regions: + data = [] + for talp_output in talp_outputs: + raw_data = json.loads(talp_output) + for entry in raw_data['popMetrics']: + if entry['name'] == region: + try: + data.append(entry[metric]) + except: + data.append(None) + print( + "WHOOPS not every timestamp has a data point, appending none") + if len(timestamps) != len(data): + print("WHOOPS not every timestamp has a data point") + series.append(pack_series_data(region, data)) + + print(series) + return series + + +def main(): + # Parse command-line arguments + parser = argparse.ArgumentParser( + description='Render HTML summary of the historic talp data') + parser.add_argument('input', help='Path to the TALP.db file') + parser.add_argument('-o', '--output', dest='output', + help='Name of the html file beeing generated. If not specified [input].html will be chosen', required=False) + args = parser.parse_args() + + # Check if the JSON file exists + if not os.path.exists(args.input): + print( + f"Error: The specified SQLITE file '{args.input}' does not exist.") + return + + # Set output + if args.output: + OUTPUT_FILE = args.output + if not args.output.endswith('.html'): + OUTPUT_FILE += ".html" + print(f"Info: Appending .html to '{args.output}'") + # Check if the HTML file exists + if os.path.exists(args.output): + print(f"Info: Overwriting '{args.output}'") + else: + OUTPUT_FILE = args.input.replace(".json", "") + OUTPUT_FILE += ".html" + + # Read in the data + conn = sqlite3.connect(args.input) + + df = pd.read_sql(f"SELECT * FROM {TALP_TABLE_NAME}", conn) + + print(df) + region_names = extract_region_names_from_df(df) + metadata = extract_metadata_from_df(df) + pe_series = extract_dataseries(df, 'parallelEfficiency') + et_series = extract_dataseries(df, 'elapsedTime') + ipc_series = extract_dataseries(df, 'averageIPC') + timestamps = get_formatted_timestamps(df) + + # Render the template with the data + rendered_html = render_template(TALP_TEMPLATE_PATH, 'talp_time_series.jinja', + timestamps=timestamps, + region_names=region_names, + metadata=metadata, + pe_series=pe_series, + et_series=et_series, + ipc_series=ipc_series, + default_region_name=TALP_DEFAULT_REGION_NAME) + + # Save or display the rendered HTML as needed + with open(OUTPUT_FILE, 'w') as f: + f.write(rendered_html) + + +if __name__ == "__main__": + main() diff --git a/talp_report/talp_variables.py b/talp_report/talp_variables.py new file mode 100644 index 00000000..628ae926 --- /dev/null +++ b/talp_report/talp_variables.py @@ -0,0 +1,14 @@ +""" +File declaring some global scoped variables we rely to be there in our scrips +""" +import pathlib + +TALP_TABLE_NAME = "talp_data" +TALP_DB_COLUMN_TALP_OUPUT = "talp_ouput" +TALP_DB_COLUMN_TIMESTAMP = "timestamp" +TALP_DB_COLUMN_METADATA = "metadata" +TALP_DEFAULT_REGION_NAME = "MPI Execution" + +TALP_TEMPLATE_PATH = pathlib.Path(__file__).parent.resolve() +TALP_TABLE_COLUMNS_WITH_DATATYPES = f"({TALP_DB_COLUMN_TIMESTAMP} TIMESTAMP, {TALP_DB_COLUMN_TALP_OUPUT} TEXT, {TALP_DB_COLUMN_METADATA} TEXT)" +TALP_TABLE_COLUMNS = f"({TALP_DB_COLUMN_TIMESTAMP} ,{TALP_DB_COLUMN_TALP_OUPUT}, {TALP_DB_COLUMN_METADATA})" -- GitLab