#!/bin/bash
#===============================================================================
# Copyright (C) 2024 Intel Corporation
#
# This software and the related documents are Intel copyrighted  materials,  and
# your use of  them is  governed by the  express license  under which  they were
# provided to you (License).  Unless the License provides otherwise, you may not
# use, modify, copy, publish, distribute,  disclose or transmit this software or
# the related documents without Intel's prior written permission.
#
# This software and the related documents  are provided as  is,  with no express
# or implied  warranties,  other  than those  that are  expressly stated  in the
# License.
#===============================================================================

function build_nompi_hpcg {
    echo '========= Start Build ==============='
    echo "./configure NOMPI_PVC"
    ./configure NOMPI_PVC
    
    #
    # build hpcg binary (doesn't depend on Intel oneMKL library)
    #
    echo "make -j8"
    make -j8

    #
    # build functional and performance test kernels for fundamental HPCG kernels
    # (functional test validates against Intel oneMKL library)
    #
    echo "make -j8 MKLROOT=${MKLROOT} test_kernels"
    make -j8 MKLROOT=${MKLROOT} test_kernels

    #
    # BASIC_PROFILING adds some event waits and overhead to collect wall times for 
    # different segments of CG algorithm which is output in several 
    # .json files upon completion.  Should not be used for official runs, but may
    # be helpful for profiling and debugging
    #

    #echo "make -j8 MKLROOT=${MKLROOT} BASIC_PROFILING=yes"
    #make -j8 MKLROOT=${MKLROOT} BASIC_PROFILING=yes

    echo '========= Finished Build ============'
}

function run_nompi_hpcg {


    # setup runtime configuration
    #
    export SYCL_DEVICE_ALLOWLIST=
    #export ZE_AFFINITY_MASK=1
    #export ONEAPI_DEVICE_SELECTOR="opencl:gpu"
    export ONEAPI_DEVICE_SELECTOR="level_zero:gpu"
    #unset ONEAPI_DEVICE_SELECTOR

    echo '========= Driver versions ==========='
    sycl-ls

    echo '========= ICX SYCL version =========='
    icpx --version

    echo '========= oneMKL PATH ==============='
    echo "MKLROOT = ${MKLROOT}"

    echo '========= Other Settings ============'
    echo "export SYCL_QUEUE_THREAD_POOL_SIZE=${SYCL_QUEUE_THREAD_POOL_SIZE}"

    echo '========= Start Run ================='

    #nprob=16
    #nprob=128
    #nprob=256
    #nprob=320
    #nx=${nprob}
    #ny=${nprob}
    #nz=${nprob}
    nx=512
    ny=512
    nz=256

    exe=bin/xhpcg_nompi_pvc
    #exe=bin/xhpcg_nompi_pvc_test_kernels
    #exe=bin/xhpcg_nompi_pvc_bench_kernels
 
    run_time_in_seconds=60  # official submitable runs need this to be >= 1800

    #runRealRef=0 # good for faster completion of runs with only 50 iters (no comparison to reference CG)
    runRealRef=1 # official runs need to use this (may require >= 50 iters to match
                 # reference CG convergence after 50 iters, but will only count as 50)
    #runRealRef=2 # For small ranks (1,2,4 with size 320x320x320 or 1,2,4,8,12 with
                  # size 512x512x256), offers a shorter path to measuring reference
                  # code convergence after 50 iters of CG_ref to match with optimized
                  # CG (may require >= 50 iters, but will only count as 50)

    nthreads=28 # for reference code openmp threads

    #
    # Configure OpenMP on node, if relevant
    #
    export OMP_NUM_THREADS=${nthreads}
    export OMP_PROC_BIND=close
    export OMP_PLACES=threads

    #
    # runtime parameter summary
    #

    echo " ======== ${exe} =============="
    echo " ===       nthreads: ${nthreads}"
    echo " ===   run-real-ref: ${runRealRef}"
    echo " ===      prob_size: ${nx} x ${ny} x ${nz}"
    echo " ===       run_time: ${run_time_in_seconds}"

    # run executable with specified options
    echo "${exe} --nx=${nx} --ny=${ny} --nz=${nz} -t${run_time_in_seconds} --run-real-ref=${runRealRef}"
    ${exe}  --nx=${nx} --ny=${ny} --nz=${nz} -t${run_time_in_seconds} --run-real-ref=${runRealRef}

    echo '========= Finished ================='
}



#
# Setup environment with dependencies, then build and run 
# (recommend to update to latest released versions, this is minimal dependencies)
#

#
# select Intel(R) oneAPI Math Kernel Library (oneMKL) version
#
source  /opt/intel/oneapi/mkl/2025.0.1/env/vars.sh intel64
# note we need MKLROOT to be defined and we need to ensure that LD_LIBRARY_PATH 
# contains the path to the oneMKL shared libraries (export LD_LIBRARY_PATH=${MKLROOT}/lib:${LD_LIBRARY_PATH} )

#
# Select Intel(R) oneAPI DPC++ Compiler (should be compatible with oneMKL version)
#
source /opt/intel/oneAPI/compiler/2025.0.1/env/vars.sh intel64

#
# Source Intel(R) oneAPI DPC++ Library (oneDPL) version
#
source /opt/intel/oneapi/dpl/2022.7.1/env/vars.sh intel64



#
# build hpcg binaries
#
build_nompi_hpcg

#
# run hpcg binaries
#
run_nompi_hpcg
