#!/bin/bash
set -u

DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)"
MYSQLITE3="$DIR"/sqlite3

# check number of arguments
if [ $# -ne 1 ]; then
    echo " usage: ${BASH_SOURCE[0]} [SQLite DB file exported from NSys QDREP file]"
    exit 1
fi

# check if file exists
if [ ! -f "$1" ]
then
    echo "$1 file not found. Exiting"
    exit 1
fi

# check if file opened is DB; if not, exit.
# The sqlite3 file format is defined at https://www.sqlite.org/draft/fileformat.html
HEADER=$(head -c 16 "$1" | tr '\0' '\n')
if [ "$HEADER" != "SQLite format 3" ]
then
    echo "$1 is not an SQlite DB file. Exiting."
    exit 1
fi

# check if DB contains cuda kernel data and/or cuda memory operation data; if not, exit
KERNEL=$("$MYSQLITE3" "$1" "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='CUPTI_ACTIVITY_KIND_KERNEL'");
MEMCPY=$("$MYSQLITE3" "$1" "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='CUPTI_ACTIVITY_KIND_MEMCPY'");
MEMSET=$("$MYSQLITE3" "$1" "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='CUPTI_ACTIVITY_KIND_MEMSET'");
TOTAL_TABLES=$((KERNEL + MEMCPY + MEMSET))

if [ $TOTAL_TABLES -eq 0 ]
then
    echo "CUDA trace data was not collected."
    exit 0
fi

# check if cudaKernelStats table exists
if [ $KERNEL -gt 0 ] && [ "$("$MYSQLITE3" "$1" "SELECT name FROM sqlite_master WHERE type='table' AND name='cudaKernelStats';")" != "cudaKernelStats" ]
then
    printf "\nGenerating CUDA Kernel Statistics..."

    "$MYSQLITE3" "$1" <<GenerateKernelSummary
    PRAGMA SYNCHRONOUS=OFF;
    CREATE TABLE cudaKernelStats (demangledName INTEGER, num INTEGER, min INTEGER, max INTEGER, avg INTEGER, total INTEGER);
    INSERT INTO cudaKernelStats SELECT demangledName, count(demangledName), min(end-start), max(end-start), avg(end-start), sum(end-start)
        FROM CUPTI_ACTIVITY_KIND_KERNEL GROUP BY demangledName;
GenerateKernelSummary
fi

if [ $KERNEL -gt 0 ]
then
printf "\nCUDA Kernel Statistics (nanoseconds)\n\n"
TOTALTIME=$("$MYSQLITE3" "$1" "SELECT sum(total) FROM cudaKernelStats");
echo -e ".width -7 -14 -10 -14 -14 -14 333 \n SELECT
    round((total*100.0)/$TOTALTIME,1) as 'Time(%)', total as 'Total Time',
    num as Instances, round(avg,1) as 'Average', min as 'Minimum',
    max as 'Maximum', value as Name
    FROM cudaKernelStats INNER JOIN StringIds ON StringIds.id = cudaKernelStats.demangledName ORDER BY total DESC;" | "$MYSQLITE3" -column -header "$1"
printf "\n"
fi

# check if cudaMemoryOperationTimeStats table exists
if ([ $MEMCPY -gt 0 ] || [ $MEMSET -gt 0 ]) &&
    [ "$("$MYSQLITE3" "$1" "SELECT name FROM sqlite_master WHERE type='table' AND name='cudaMemoryOperationTimeStats';")" != "cudaMemoryOperationTimeStats" ]
then
    printf "\n\nGenerating CUDA Memory Operation Statistics..."

    "$MYSQLITE3" "$1" <<GenerateMemoryTables
    PRAGMA SYNCHRONOUS=OFF;
    CREATE TABLE MemcpyOperationStrings (id INTEGER, name TEXT);
    INSERT INTO MemcpyOperationStrings (id, name) VALUES (0, '[CUDA memcpy Unknown]'),
        (1, '[CUDA memcpy HtoD]'), (2, '[CUDA memcpy DtoH]'), (3, '[CUDA memcpy HtoA]'),
        (4, '[CUDA memcpy AtoH]'), (5, '[CUDA memcpy AtoA]'), (6, '[CUDA memcpy AtoD]'),
        (7, '[CUDA memcpy DtoA]'), (8, '[CUDA memcpy DtoD]'), (9, '[CUDA memcpy HtoH]'),
        (10, '[CUDA memcpy PtoP]'), (11, '[CUDA Unified Memory memcpy HtoD]'),
        (12, '[CUDA Unified Memory memcpy DtoH]'), (13, '[CUDA Unified Memory memcpy DtoD]');
    CREATE TABLE cudaMemoryOperationTimeStats (num INTEGER, min INTEGER, max INTEGER, avg INTEGER, total INTEGER, name TEXT);
    CREATE TABLE cudaMemoryOperationByteStats (num INTEGER, min INTEGER, max INTEGER, avg INTEGER, total INTEGER, name TEXT);
GenerateMemoryTables

if [ $MEMCPY -gt 0 ]
then
    "$MYSQLITE3" "$1" <<GenerateMemcpyData
    PRAGMA SYNCHRONOUS=OFF;
    INSERT INTO cudaMemoryOperationTimeStats SELECT count(copyKind), min(end-start), max(end-start), avg(end-start), sum(end-start), name as Name
        FROM CUPTI_ACTIVITY_KIND_MEMCPY
        INNER JOIN MemcpyOperationStrings ON MemcpyOperationStrings.id = CUPTI_ACTIVITY_KIND_MEMCPY.copyKind
        GROUP BY copyKind;
    INSERT INTO cudaMemoryOperationByteStats SELECT count(copyKind), min(bytes), max(bytes), avg(bytes), sum(bytes), name as Name
        FROM CUPTI_ACTIVITY_KIND_MEMCPY
        INNER JOIN MemcpyOperationStrings ON MemcpyOperationStrings.id = CUPTI_ACTIVITY_KIND_MEMCPY.copyKind
        GROUP BY copyKind;
GenerateMemcpyData
fi

if [ $MEMSET -gt 0 ]
then
    "$MYSQLITE3" "$1" <<GenerateMemsetData
    PRAGMA SYNCHRONOUS=OFF;
    INSERT INTO cudaMemoryOperationTimeStats SELECT count(*), min(end-start), max(end-start), avg(end-start), sum(end-start), '[CUDA memset]'
        FROM CUPTI_ACTIVITY_KIND_MEMSET;
    INSERT INTO cudaMemoryOperationByteStats SELECT count(*), min(bytes), max(bytes), avg(bytes), sum(bytes), '[CUDA memset]'
        FROM CUPTI_ACTIVITY_KIND_MEMSET;
GenerateMemsetData
fi
fi

if [ $MEMCPY -gt 0 ] || [ $MEMSET -gt 0 ]
then
printf "\nCUDA Memory Operation Statistics (nanoseconds)\n\n"
TOTALTIME=$("$MYSQLITE3" "$1" "SELECT sum(total) FROM cudaMemoryOperationTimeStats");
echo -e ".width -7 -14 -10 -14 -14 -14 80 \n SELECT
    round((total*100.0)/$TOTALTIME,1) as 'Time(%)', total as 'Total Time',
    num as Operations, round(avg,1) as 'Average', min as 'Minimum',
    max as 'Maximum', name as Name
    FROM cudaMemoryOperationTimeStats WHERE num > 0 ORDER BY total DESC;" | "$MYSQLITE3" -column -header "$1"
printf "\n\nCUDA Memory Operation Statistics (KiB)\n\n"
echo -e ".width -19 -14 -19 -17 -19 80 \n SELECT
    printf(\"%.3f\", total*1.0/1024) as 'Total',
    num as Operations,
    printf(\"%.3f\", avg*1.0/1024) as 'Average',
    printf(\"%.3f\", min*1.0/1024) as 'Minimum',
    printf(\"%.3f\", max*1.0/1024) as 'Maximum',
    name as Name
    FROM cudaMemoryOperationByteStats WHERE num > 0 ORDER BY total DESC;" | "$MYSQLITE3" -column -header "$1"
fi

printf "\n\n"
