#!/bin/bash
set -u

ARG_BASE=base

function print_usage()
{
    cat << EOF
$(basename "$0")[:${ARG_BASE}] -- CUDA API & GPU Summary (CUDA API + kernels + memory ops)

    ${ARG_BASE} - Optional argument, if given, will cause summary to be over the
           base name of the kernel, rather than the templated name.

    Output: All time values given in nanoseconds
        Time(%) : Percentage of "Total Time"
        Total Time : The total time used by all executions of this kernel
        Instances: The number of executions of this object
        Average : The average execution time of this kernel
        Minimum : The smallest execution time of this kernel
        Maximum : The largest execution time of this kernel
        Category : The category of the operation
        Operation : The name of the kernel

    This report provides a summary of CUDA API calls, kernels and memory
    operations, and their execution times. Note that the "Time(%)"
    column is calculated using a summation of the "Total Time" column,
    and represents that API call's, kernel's, or memory operation's
    percent of the execution time of the APIs, kernels and memory
    operations listed, and not a percentage of the application wall or
    CPU execution time.

    This report combines data from the "cudaapisum", "gpukernsum", and
    "gpumemsizesum" reports.  It is very similar to profile section of
    "nvprof --dependency-analysis".
EOF
}

### BEGIN include inc_setup ###

EXIT_HELP=25
EXIT_DB=26
EXIT_NODATA=27

# Verify number of params
if [ $# -lt 1 ]
then
    print_usage ${BASH_SOURCE[0]}
    exit ${EXIT_HELP}
fi

# Set DB file
DATABASE="$1"

# Verify DB file exists
if [ ! -f "${DATABASE}" ]
then
    exit ${EXIT_DB}
fi

# Verify DB file contents
# The sqlite3 file format is defined at https://sqlite.org/fileformat.html
DB_FILE_HEADER=$(head -c 16 "$DATABASE" | tr '\0' '\n')
if [ "${DB_FILE_HEADER}" != "SQLite format 3" ]
then
    exit ${EXIT_DB}
fi

# Helper function for error messages
function echoerr() # accepts multiple args
{
    echo "$@" >&2
}

# Setup standard vars

# If we were run by nsys, the path to the preferred sqlite3 should have been
# passed as an env-var.  If not, hope the user has it in their path.
SQLITE3="${NSYS_STATS_SCRIPTS_SQLITE:-sqlite3}"
SQLITE3OPTS="-header -csv -readonly"

RUN_SQLITE="eval \"${SQLITE3}\" ${SQLITE3OPTS} \"${DATABASE}\""

### END include inc_setup ###

### BEGIN: include from inc_table_exists ###

TABLE_EXISTS_TABLES=( )

function table_exists()
{
    local TABLE_NAME=$1

    if [ "${#TABLE_EXISTS_TABLES[@]}" -eq 0 ]
    then
        TABLE_EXISTS_TABLES=( $("${SQLITE3}" ${SQLITE3OPTS} "${DATABASE}" \
                "SELECT name FROM sqlite_master WHERE type = 'table' OR type = 'view'") )
    fi

    for TABLE in "${TABLE_EXISTS_TABLES[@]}"
    do
        if [ "${TABLE}" = "${TABLE_NAME}" ]
        then
            echo "true"
            return 1
        fi
    done
    echo "false"
    return 0
}

### END: include from inc_table_exists ###

### BEGIN include inc_helper_cte ###

MemKindStrsCTE="""
    MemKindStrs (id, name) AS (
    VALUES
        (0,     'Pageable'),
        (1,     'Pinned'),
        (2,     'Device'),
        (3,     'Array'),
        (4,     'Unknown')
    ),
"""

MemcpyOperStrsCTE="""
    MemcpyOperStrs (id, name) AS (
    VALUES
        (0,     '[CUDA memcpy Unknown]'),
        (1,     '[CUDA memcpy HtoD]'),
        (2,     '[CUDA memcpy DtoH]'),
        (3,     '[CUDA memcpy HtoA]'),
        (4,     '[CUDA memcpy AtoH]'),
        (5,     '[CUDA memcpy AtoA]'),
        (6,     '[CUDA memcpy AtoD]'),
        (7,     '[CUDA memcpy DtoA]'),
        (8,     '[CUDA memcpy DtoD]'),
        (9,     '[CUDA memcpy HtoH]'),
        (10,    '[CUDA memcpy PtoP]'),
        (11,    '[CUDA Unified Memory memcpy HtoD]'),
        (12,    '[CUDA Unified Memory memcpy DtoH]'),
        (13,    '[CUDA Unified Memory memcpy DtoD]')
    ),
"""

### END include inc_helper_cte ###


NAME_COL_NAME=demangledName

for ARG in "${@:2}"
do
    if [[ "$ARG" == "${ARG_BASE}" ]]
    then
        NAME_COL_NAME=shortName
    fi
done

QUERY=()

if $(table_exists "CUPTI_ACTIVITY_KIND_RUNTIME")
then
    if [ ${#QUERY[@]} -gt 0 ]
    then
        QUERY+=("UNION ALL")
    fi

    Q="""
        SELECT
            CASE substr(str.value, -6, 2)
                WHEN '_v' THEN substr(str.value, 1, length(str.value)-6)
                ELSE str.value
            END AS name,
            rt.end - rt.start AS duration,
            'CUDA_API' AS category
        FROM
            CUPTI_ACTIVITY_KIND_RUNTIME AS rt
        LEFT OUTER JOIN
            StringIds AS str
            ON str.id = rt.nameId
    """
    QUERY+=("$Q")
fi

if $(table_exists "CUPTI_ACTIVITY_KIND_KERNEL")
then
    if [ ${#QUERY[@]} -gt 0 ]
    then
        QUERY+=("UNION ALL")
    fi

    Q="""
        SELECT
            str.value AS name,
            kern.end - kern.start AS duration,
            'CUDA_KERNEL' AS category
        FROM
            CUPTI_ACTIVITY_KIND_KERNEL AS kern
        LEFT OUTER JOIN
            StringIds AS str
            ON str.id = kern.${NAME_COL_NAME}
    """
    QUERY+=("$Q")
fi

if $(table_exists "CUPTI_ACTIVITY_KIND_MEMCPY")
then
    if [ ${#QUERY[@]} -gt 0 ]
    then
        QUERY+=("UNION ALL")
    fi

    Q="""
        SELECT
            mos.name AS name,
            mcpy.end - mcpy.start AS duration,
            'MEMORY_OPER' AS category
        FROM
            CUPTI_ACTIVITY_KIND_MEMCPY as mcpy
        JOIN
            MemcpyOperStrs AS mos
            ON mos.id = mcpy.copyKind
    """
    QUERY+=("$Q")
fi

if $(table_exists "CUPTI_ACTIVITY_KIND_MEMSET")
then
    if [ ${#QUERY[@]} -gt 0 ]
    then
        QUERY+=("UNION ALL")
    fi

    Q="""
        SELECT
            '[CUDA memset]' AS name,
            end - start AS duration,
            'MEMORY_OPER' AS category
        FROM
            CUPTI_ACTIVITY_KIND_MEMSET
    """
    QUERY+=("$Q")
fi


if [ ${#QUERY[@]} -eq 0 ]
then
    echoerr "$DATABASE does not contain CUDA API, GPU kernel, nor memory operations data."
    exit ${EXIT_NODATA}
fi

${RUN_SQLITE} << EOF

WITH
    ${MemcpyOperStrsCTE}
    apigpu AS (
        ${QUERY[@]}
    ),
    summary AS (
        SELECT
            name AS name,
            category AS category,
            sum(duration) AS total,
            count(*) AS num,
            avg(duration) AS avg,
            min(duration) AS min,
            max(duration) AS max
        FROM
            apigpu
        GROUP BY 1
    ),
    totals AS (
        SELECT sum(total) AS total
        FROM summary
    )
SELECT
    round(summary.total * 100.0 / totals.total, 1) AS "Time(%)",
    summary.total AS "Total Time (ns)",
    summary.num AS "Instances",
    round(summary.avg, 1) AS "Average",
    summary.min AS "Minimum",
    summary.max AS "Maximum",
    summary.category AS "Category",
    summary.name AS "Operation"
FROM
    summary
JOIN
    totals
ORDER BY 2 DESC
;

EOF
