CLBlast/test/correctness/misc/preprocessor.cpp


// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
//   Cedric Nugteren <www.cedricnugteren.nl>
//
// This file contains the tests for the simple integrated OpenCL pre-processor
//
// =================================================================================================

#include <string>
#include <vector>
#include <unordered_map>
#include <random>
#include <iostream>

#include "utilities/utilities.hpp"
#include "utilities/compile.hpp"
#include "kernel_preprocessor.hpp"

namespace clblast {
// =================================================================================================

bool TestDefines() {
  const auto source1 =
  R"(
  #define VAR1
  #define VAR2 32
  #if VAR2 == 32
    #ifndef VAR1
      #define ERROR
      #ifdef VAR1
        #define ERROR
      #endif
    #else
      #if VAR2 == 32 || VAR3 == 4
        #define SUCCESS
      #else
        #define ERROR
      #endif
      #define SUCCESS
    #endif
  #endif
  #ifndef VAR3
    #define SUCCESS
  #else
    #define ERROR
  #endif
  )";
  const auto expected1 =
  "  #define VAR1\n"
  "  #define VAR2 32\n"
  "        #define SUCCESS\n"
  "      #define SUCCESS\n"
  "    #define SUCCESS\n"
  "  \n";
  const auto result1 = PreprocessKernelSource(source1);
  return result1 == expected1;
}
// =================================================================================================

bool TestArrayToRegisterPromotion() {
  const auto source1 =
      R"(#define WPT 2
inline void SetValues(int float, float values[WPT],
                      const float k) {
  #pragma unroll
  for (int i = 0; i < WPT; i += 1) {
    values[i] = k + j;
  }
}
__kernel void ExampleKernel() {
  #pragma promote_to_registers
  float values[WPT];
  #pragma unroll
  for (int i = 0; i < WPT; i += 1) {
    values[i] = 0.0f;
  }
  SetValues(12.3f, values, -3.9f);
}
)";
  const auto expected1 =
      R"(#define WPT 2
inline void SetValues(int float, float values_0, float values_1,
                      const float k) {
  {
    values_0 = k + j;
  }
  {
    values_1 = k + j;
  }
}
__kernel void ExampleKernel() {
  float values_0;
  float values_1;
  {
    values_0 = 0.0f;
  }
  {
    values_1 = 0.0f;
  }
  SetValues(12.3f, values_0, values_1, -3.9f);
}
)";
  const auto result1 = PreprocessKernelSource(source1);
  return result1 == expected1;
}

// =================================================================================================

bool TestKernel(const Device& device, const Context& context,
                const std::string &kernel_name, const std::string &kernel_source,
                const Precision precision) {

  fprintf(stdout, "* Testing simple OpenCL pre-processor for '%s'\n", kernel_name.c_str());

  // Verifies that the current kernel compiles properly (assumes so, otherwise throws an error)
  auto compiler_options_ref = std::vector<std::string>();
  const auto program_ref = CompileFromSource(kernel_source, precision, kernel_name,
                                             device, context, compiler_options_ref, 2);

  // Compiles the same kernel, but now with the pre-processor enabled
  try {
    auto compiler_options = std::vector<std::string>();
    const auto program = CompileFromSource(kernel_source, precision, kernel_name,
                                           device, context, compiler_options, 1);
    return true;
  } catch (const CLCudaAPIBuildError &e) {
    fprintf(stdout, "* ERROR: Compilation warnings/errors with pre-processed kernel, status %d\n",
            e.status());
    return false;
  } catch (const Error<std::runtime_error> &e) {
    fprintf(stdout, "* ERROR: Pre-processor error, message:\n%s\n", e.what());
    return false;
  }
}

// =================================================================================================

size_t RunPreprocessor(int argc, char *argv[], const bool silent, const Precision precision) {
  auto errors = size_t{0};
  auto passed = size_t{0};

  // Retrieves the arguments
  auto help = std::string{"Options given/available:\n"};
  auto arguments = RetrieveCommandLineArguments(argc, argv);
  const auto platform_id = GetArgument(arguments, help, kArgPlatform, ConvertArgument(std::getenv("CLBLAST_PLATFORM"), size_t{0}));
  const auto device_id = GetArgument(arguments, help, kArgDevice, ConvertArgument(std::getenv("CLBLAST_DEVICE"), size_t{0}));
  if (!silent) { fprintf(stdout, "\n* %s\n", help.c_str()); }

  // Initializes OpenCL
  const auto platform = Platform(platform_id);
  const auto device = Device(platform, device_id);
  const auto context = Context(device);

  // Basic tests
  if (TestDefines()) { passed++; } else { errors++; }
  if (TestArrayToRegisterPromotion()) { passed++; } else { errors++; }

  // XAXPY
  const auto xaxpy_sources =
    "#define WPT 2\n"
    #include "../src/kernels/level1/level1.opencl"
    #include "../src/kernels/level1/xaxpy.opencl"
  ;
  if (TestKernel(device, context, "XaxpyFastest", xaxpy_sources, precision)) { passed++; } else { errors++; }

  // XGER
  const auto xger_sources =
    "#define WPT 2\n"
    #include "../src/kernels/level2/level2.opencl"
    #include "../src/kernels/level2/xger.opencl"
  ;
  if (TestKernel(device, context, "Xger", xger_sources, precision)) { passed++; } else { errors++; }

  // XGEMV
  const auto xgemv_sources =
    "#define WPT1 2\n"
    "#define WPT2 2\n"
    "#define WPT3 2\n"
    "#define UNROLL1 4\n"
    "#define VW2 2\n"
    #include "../src/kernels/level2/xgemv.opencl"
    #include "../src/kernels/level2/xgemv_fast.opencl"
  ;
  if (TestKernel(device, context, "XgemvFast", xgemv_sources, precision)) { passed++; } else { errors++; }

  // CopyFast
  const auto copy_fast_sources =
    "#define COPY_WPT 2\n"
    #include "../src/kernels/level3/level3.opencl"
    #include "../src/kernels/level3/copy_fast.opencl"
  ;
  if (TestKernel(device, context, "CopyMatrixFast", copy_fast_sources, precision)) { passed++; } else { errors++; }

  // CopyPad
  const auto copy_pad_sources =
    "#define PAD_WPTX 2\n"
    "#define PAD_WPTY 2\n"
    #include "../src/kernels/level3/level3.opencl"
    #include "../src/kernels/level3/copy_pad.opencl"
  ;
  if (TestKernel(device, context, "CopyPadMatrix", copy_pad_sources, precision)) { passed++; } else { errors++; }

  // TransposeFast
  const auto transpose_fast_sources =
    "#define TRA_WPT 2\n"
    #include "../src/kernels/level3/level3.opencl"
    #include "../src/kernels/level3/transpose_fast.opencl"
  ;
  if (TestKernel(device, context, "TransposeMatrixFast", transpose_fast_sources, precision)) { passed++; } else { errors++; }

  // TransposePad
  const auto transpose_pad_sources =
    "#define PADTRA_WPT 2\n"
    #include "../src/kernels/level3/level3.opencl"
    #include "../src/kernels/level3/transpose_pad.opencl"
  ;
  if (TestKernel(device, context, "TransposePadMatrix", transpose_pad_sources, precision)) { passed++; } else { errors++; }

  // GEMM (in-direct)
  const auto gemm_sources =
    "#define KWI 2\n"
    "#define MWG 16\n"
    "#define NWG 16\n"
    "#define SA 1\n"
    #include "../src/kernels/level3/xgemm_part1.opencl"
    #include "../src/kernels/level3/xgemm_part2.opencl"
    #include "../src/kernels/level3/xgemm_part3.opencl"
  ;
  if (TestKernel(device, context, "Xgemm", gemm_sources, precision)) { passed++; } else { errors++; }

  // GEMM (direct)
  const auto gemm_direct_sources =
    "#define KWID 2\n"
    "#define WGD 16\n"
    #include "../src/kernels/level3/xgemm_direct_part1.opencl"
    #include "../src/kernels/level3/xgemm_direct_part2.opencl"
    #include "../src/kernels/level3/xgemm_direct_part3.opencl"
  ;
  if (TestKernel(device, context, "XgemmDirectTN", gemm_direct_sources, precision)) { passed++; } else { errors++; }

  // Prints and returns the statistics
  std::cout << std::endl;
  std::cout << "    " << passed << " test(s) passed" << std::endl;
  std::cout << "    " << errors << " test(s) failed" << std::endl;
  std::cout << std::endl;
  return errors;
}

// =================================================================================================
} // namespace clblast

// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
  auto errors = size_t{0};
  errors += clblast::RunPreprocessor(argc, argv, false, clblast::Precision::kSingle);
  if (errors > 0) { return 1; } else { return 0; }
}

// =================================================================================================