/*******************************************************************************
* Copyright (C) 2021 Intel Corporation
*
* This software and the related documents are Intel copyrighted  materials,  and
* your use of  them is  governed by the  express license  under which  they were
* provided to you (License).  Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute,  disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents  are provided as  is,  with no express
* or implied  warranties,  other  than those  that are  expressly stated  in the
* License.
*******************************************************************************/

/*
*  Content:
*      dgetrf_batch (group API) OpenMP Offload Example
*******************************************************************************/

#include <stdio.h>
#include <omp.h>
#include "mkl.h"
#include "mkl_omp_offload.h"

#ifdef MKL_ILP64
  #define FMT "%4lld"
#else
  #define FMT "%4d"
#endif


int main(int argc, char **argv)
{
    // Batched LU inverse parameters
    MKL_INT n   = 10;
    MKL_INT lda = n;
    MKL_INT ldainv = n;
    MKL_INT stride_a = lda*n;
    MKL_INT stride_ainv = ldainv*n;
    MKL_INT stride_ipiv = n;
    MKL_INT batch_size = 4;

    printf("\n===========================================================================\n");
    printf(" Compute the matrix inverse of the matrices in A. Store the results in Ainv\n");
    printf("===========================================================================\n");
    printf("  Batch Size:                          "FMT "\n", batch_size);
    printf("  Matrix Order:                        "FMT "\n", n);
    printf("  Leading dimension for A matrices:    "FMT "\n", lda);
    printf("  Leading dimension for Ainv matrices: "FMT "\n", ldainv);
    printf("  Stride for A matrices:               "FMT "\n", stride_a);
    printf("  Stride for Ainv matrices:            "FMT "\n", stride_ainv);
    printf("  Stride for pivot arrays:             "FMT "\n", stride_ipiv);
    printf("===========================================================================\n");

    // Allocate required memory
    MKL_INT total_a_size       = stride_a    * batch_size;
    MKL_INT total_ipiv_size    = stride_ipiv * batch_size;
    MKL_INT total_ainv_size    = stride_ainv * batch_size;
    double*  a      = (double *)mkl_malloc(sizeof(double) * total_a_size, 64);
    double*  ainv   = (double *)mkl_malloc(sizeof(double) * total_ainv_size, 64);
    MKL_INT* ipiv   = (MKL_INT *)mkl_malloc(sizeof(MKL_INT) * total_ipiv_size, 64);
    MKL_INT* info   = (MKL_INT *)mkl_malloc(sizeof(MKL_INT) * batch_size, 64);
    if ( !a || !ainv || !ipiv || !info ) {
        printf("\n     ERROR. Failed memory allocation \n");
        return 1;
    }

    // Random initialization of matrices
    for (MKL_INT imat = 0; imat < batch_size; imat++) {
      MKL_INT a_off = imat*stride_a;
      for (MKL_INT col = 0; col < n; col++) {
          for (MKL_INT row = 0; row < n; row++) {
              // Random number in the internval (-0.5, 0.5)
              a[a_off + row + col*lda] = ((double) rand() / (double) RAND_MAX) * 0.5;
          }
      }

      // Make diagonal entries larger to ensure matrix is well-conditioned
      for (MKL_INT row = 0; row < n; row++) {
          a[a_off + row + row*lda] += 5.0;
      }
    }

    // Compute LU factorization via OpenMP offload.
    // On entry, A contains the inpyt matrix, on exit it contains the LU factorization
    #pragma omp target data map(a[0:total_a_size], ipiv[0:total_ipiv_size], info[0:batch_size])
    {
        #pragma omp dispatch
        dgetrf_batch_strided(&n, &n, a, &lda, &stride_a, ipiv, &stride_ipiv, &batch_size, info);
    }
    printf("\nFinished call to dgetrf_batch_strided\n");

    MKL_INT exit_status = 0;
    for (MKL_INT imat = 0; imat < batch_size; imat++) {
        if(info[imat]) {
            printf("dgetrf_batch_strided failed: Matrix "FMT " returned with info="FMT"\n", imat, info[imat]);
            exit_status++;
        }
    }

    // Compute the matrix inverse via OpenMP offload. On exit, the inverse is stored in Ainv.
    if(exit_status == 0) {
        #pragma omp target data map(a[0:total_a_size], ipiv[0:total_ipiv_size], ainv[0:total_ainv_size], info[0:batch_size])
        {
            #pragma omp dispatch
            dgetri_oop_batch_strided(&n, a, &lda, &stride_a, ipiv, &stride_ipiv, ainv, &ldainv, &stride_ainv, &batch_size, info);
        }
        printf("\nFinished call to dgetri_oop_batch_strided\n");

        for (MKL_INT imat = 0; imat < batch_size; imat++) {
            if(info[imat]) {
                printf("dgetri_oop_batch_strided offload failed: Matrix "FMT " returned with info="FMT"\n", imat, info[imat]);
                exit_status++;
            }
        }
    }

    // Cleanup
    mkl_free(a);
    mkl_free(ipiv);
    mkl_free(ainv);
    mkl_free(info);

    if (exit_status) {
        printf("\n\n===============================\nExample executed with errors.\n===============================\n\n");
    } else {
        printf("\n\n===============================\nExample executed successfully.\n===============================\n\n");
    }
    return exit_status;
}
