#=============================================================================
#   CMake build system files
#
#   Copyright (c) 2014 pocl developers
#
#   Permission is hereby granted, free of charge, to any person obtaining a copy
#   of this software and associated documentation files (the "Software"), to deal
#   in the Software without restriction, including without limitation the rights
#   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#   copies of the Software, and to permit persons to whom the Software is
#   furnished to do so, subject to the following conditions:
#
#   The above copyright notice and this permission notice shall be included in
#   all copies or substantial portions of the Software.
#
#   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
#   THE SOFTWARE.
#
#=============================================================================

include("bitcode_rules")

if(ENABLE_VECMATHLIB)
  set(KERNEL_SOURCES ${SOURCES_WITH_VML})
elseif(ENABLE_SLEEF)
  set(KERNEL_SOURCES ${SOURCES_WITH_SLEEF})
else()
  set(KERNEL_SOURCES ${SOURCES_WITHOUT_VML})
endif()

list(APPEND KERNEL_SOURCES "mem_fence.c")

if(HOST_DEVICE_CL_VERSION GREATER 199)
if(X86_64 OR I386)
  if(LLVM_3_6)
    message(STATUS "OpenCL 2.0 atomics only works with LLVM >= 3.7")
  elseif(LLVM_OLDER_THAN_3_9)
    list(APPEND KERNEL_SOURCES svm_atomics_x86_64.ll svm_atomics.cl)
  elseif(LLVM_3_9 AND POCL_USE_FAKE_ADDR_SPACE_IDS)
    list(APPEND KERNEL_SOURCES svm_atomics_x86_64_llvm3_9.ll svm_atomics.cl)
  else()
    list(APPEND KERNEL_SOURCES svm_atomics_x86_64_no_fake_asids.ll svm_atomics.cl)
  endif()
elseif(MIPS)
  message(STATUS "OpenCL 2.0 atomics are currently broken on MIPS")
else()
  message(STATUS "Using generic OpenCL 2.0 atomics. Might or might not break your build.")
  list(APPEND KERNEL_SOURCES svm_atomics_host.cl svm_atomics.cl)
endif()
endif()

set(KERNEL_CL_FLAGS
      "-Wall" "-Wno-unused-local-typedef" "-Xclang"
      "-cl-std=CL${HOST_DEVICE_CL_STD}"
      "-D__OPENCL_C_VERSION__=${HOST_DEVICE_CL_VERSION}"
      ${KERNEL_CL_FLAGS})

separate_arguments(HOST_CLANG_FLAGS)
separate_arguments(HOST_LLC_FLAGS)
set(DEVICE_CL_FLAGS "-D__OPENCL_VERSION__=${HOST_DEVICE_CL_VERSION}")
set(DEVICE_CL_FLAGS "${DEVICE_CL_FLAGS} ${HOST_DEVICE_EXTENSION_DEFINES}")
separate_arguments(DEVICE_CL_FLAGS)

function(x86_distro_variant_to_flags VARIANT OUT_LLC_FLAGS OUT_CLANG_FLAGS)

  if("${VARIANT}" STREQUAL "sse2")
    set(CLANG_F "${CLANG_MARCH_FLAG}athlon64")
    set(LLC_F "-mcpu=athlon64")

  elseif("${VARIANT}" STREQUAL "ssse3")
    set(CLANG_F "${CLANG_MARCH_FLAG}core2")
    set(LLC_F "-mcpu=core2")

  elseif("${VARIANT}" STREQUAL "sse41")
    set(CLANG_F "${CLANG_MARCH_FLAG}penryn")
    set(LLC_F "-mcpu=penryn")

  elseif("${VARIANT}" STREQUAL "avx")
    set(CLANG_F "${CLANG_MARCH_FLAG}sandybridge")
    set(LLC_F "-mcpu=sandybridge")

  elseif("${VARIANT}" STREQUAL "avx_f16c")
    set(CLANG_F "${CLANG_MARCH_FLAG}ivybridge")
    set(LLC_F "-mcpu=ivybridge")

  elseif("${VARIANT}" STREQUAL "avx_fma4")
    set(CLANG_F "${CLANG_MARCH_FLAG}bdver1")
    set(LLC_F "-mcpu=bdver1")

  elseif("${VARIANT}" STREQUAL "avx2")
    set(CLANG_F "${CLANG_MARCH_FLAG}haswell")
    set(LLC_F "-mcpu=haswell")

  elseif("${VARIANT}" STREQUAL "avx512")
    set(CLANG_F "${CLANG_MARCH_FLAG}skylake-avx512")
    set(LLC_F "-mcpu=skylake-avx512")

  else()
    set(CLANG_F "${CLANG_MARCH_FLAG}${VARIANT}")
    set(LLC_F "-mcpu=${VARIANT}")

  endif()

  set(${OUT_LLC_FLAGS} "${LLC_F}" PARENT_SCOPE)
  set(${OUT_CLANG_FLAGS} "${CLANG_F}" PARENT_SCOPE)
endfunction()

###############################################################################

function(compile_sleef VARIANT SLEEF_CONFIG SLEEF_CONFIG_NEW SLEEF_BC)

  unset(BC_FILE_LIST)

  set(EXTRA_FLAGS "-DDORENAME;-DPURE_C;-I${CMAKE_SOURCE_DIR}/lib/kernel/sleef/include")

# disabled - this code uses libm
#    compile_sleef_c_to_bc("c" "sleef/libm/sleef_builtin.c"
#                          "${VARIANT}" BC_FILE_LIST ${EXTRA_FLAGS})
  compile_sleef_c_to_bc("c" "sleef/libm/sleefsp.c"
                        "${VARIANT}" BC_FILE_LIST ${EXTRA_FLAGS})

  if(ENABLE_FP64)
    compile_sleef_c_to_bc("c" "sleef/libm/sleefdp.c"
                          "${VARIANT}" BC_FILE_LIST ${EXTRA_FLAGS})
  endif()

  compile_sleef_c_to_bc("c" "sleef/libm/sleef_glue_auto.c"
                        "${VARIANT}" BC_FILE_LIST "-include" "${SLEEF_CONFIG}")

  file(READ "${CMAKE_SOURCE_DIR}/lib/kernel/sleef/test.c" TEST_SRC)
  file(READ "${CMAKE_SOURCE_DIR}/lib/kernel/sleef/fma_test.c" FMA_TEST_SRC)

  if(ENABLE_FP64)
    set(STR "#define SLEEF_DOUBLE_AVAILABLE\n")
    file(APPEND "${SLEEF_CONFIG_NEW}" "${STR}")
  endif()
  # current SLEEF code does not have code for
  # ARM32 NEON double vectors (if they even exist)
  if(NOT ARM32)
    set(STR "#define SLEEF_DOUBLE_VEC_AVAILABLE\n")
    file(APPEND "${SLEEF_CONFIG_NEW}" "${STR}")
  endif()
  # this is workaround for an inconsistency problem in SLEEF.
  # certain functions (ldexp, ilogb, pown) with double2 type argument
  # take/return int2 types. There are no int2 vectors on x86(-64)
  # but there are on ARM, and the vint type definition in SLEEF is
  # different - on x86 it's defined to be 128bit int32 vector,
  # but that's not the case on ARM where it is a 64bit int32 vector.
  if(X86)
    set(STR "#define SLEEF_VINT_IS_VLONG\n")
    file(APPEND "${SLEEF_CONFIG_NEW}" "${STR}")
  endif()

  foreach(VECSIZE "128" "256" "512")

    set(EXTRA_FLAGS "-DDORENAME;-DVEC${VECSIZE}")
    custom_try_compile_any(1 "${CLANG}" "c" "${TEST_SRC}" RES
      ${CLANG_FLAGS} ${EXTRA_FLAGS} "-c")

    if(${RES} EQUAL "0")
      compile_sleef_c_to_bc("v${VECSIZE}" "sleef/libm/sleefsimdsp.c"
                            "${VARIANT}" BC_FILE_LIST ${EXTRA_FLAGS})
      if(NOT ARM32)
      compile_sleef_c_to_bc("v${VECSIZE}" "sleef/libm/sleefsimddp.c"
                            "${VARIANT}" BC_FILE_LIST ${EXTRA_FLAGS})
      endif()
      message(STATUS "${VARIANT} SLEEF: ${VECSIZE}bit vectors available.")

      set(STR "#define SLEEF_VEC_${VECSIZE}_AVAILABLE\n")
      file(APPEND "${SLEEF_CONFIG_NEW}" "${STR}")

    else()
      message(STATUS "${VARIANT} SLEEF: ${VECSIZE}bit vectors NOT available.")
    endif()

    custom_try_compile_any(1 "${CLANG}" "c" "${FMA_TEST_SRC}" RES
      ${CLANG_FLAGS} ${EXTRA_FLAGS} "-c")
    if(${RES} EQUAL "0")
      unset(STR)
      set(STR "#define HAVE_FMA32_${VECSIZE}\n")
      set(STR "${STR}#define HAVE_FMA64_${VECSIZE}\n")
      file(APPEND "${SLEEF_CONFIG_NEW}" "${STR}")
      message(STATUS "${VARIANT} SLEEF: ${VECSIZE}bit hardware FMA available.")
    endif()

  endforeach()

  file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${VARIANT}")
  set(SLEEF_BC "${CMAKE_CURRENT_BINARY_DIR}/${VARIANT}/sleef.bc")
  set(SLEEF_BC "${CMAKE_CURRENT_BINARY_DIR}/${VARIANT}/sleef.bc" PARENT_SCOPE)

  message(STATUS "${VARIANT} SLEEF bc list: ${BC_FILE_LIST}")
  message(STATUS "${VARIANT} SLEEF bc: ${SLEEF_BC}")

  set(LINK_OPT_COMMAND COMMAND "${LLVM_LINK}" "-o" "${SLEEF_BC}" ${BC_FILE_LIST})

  add_custom_command( OUTPUT "${SLEEF_BC}"
    DEPENDS ${BC_FILE_LIST}
    COMMAND ${LINK_OPT_COMMAND}
    COMMENT "Linking & optimizing SLEEF for ${VARIANT}: ${SLEEF_BC}"
    VERBATIM)

endfunction()

###############################################################################

foreach(CPU_VARIANT IN LISTS KERNELLIB_HOST_CPU_VARIANTS)

if(CPU_VARIANT MATCHES "native")
  set(VARIANT "${LLC_HOST_CPU}")
else()
  set(VARIANT "${CPU_VARIANT}")
endif()

if(X86_64 OR I386)
  x86_distro_variant_to_flags("${VARIANT}" LLC_CPUFLAGS CLANG_CPUFLAGS)
else()
  set(CLANG_CPUFLAGS "${CLANG_MARCH_FLAG}${VARIANT}")
  set(LLC_CPUFLAGS "-mcpu=${VARIANT}")
endif()

separate_arguments(CLANG_CPUFLAGS)
separate_arguments(LLC_CPUFLAGS)
set(CLANG_FLAGS ${HOST_CLANG_FLAGS} ${CLANG_CPUFLAGS}
                "-emit-llvm" "-ffp-contract=off")

if(POCL_USE_FAKE_ADDR_SPACE_IDS)
list(APPEND CLANG_FLAGS "-Xclang" "-ffake-address-space-map"
                        "-DPOCL_USE_FAKE_ADDR_SPACE_IDS")
endif()

set(LLC_FLAGS ${HOST_LLC_FLAGS} ${LLC_CPUFLAGS})

if(ENABLE_SLEEF)

  # write SLEEF config for this CPU
  set(SLEEF_CONFIG "${CMAKE_BINARY_DIR}/sleef_config_temp_${VARIANT}.h")
  set(SLEEF_CONFIG_NEW "${SLEEF_CONFIG}.new")
  set(STR "/* SLEEF library configuration for ${VARIANT} CPU */ \n")
  file(WRITE "${SLEEF_CONFIG_NEW}" "${STR}")

  # compile SLEEF library for the cpu variant
  unset(SLEEF_BC)
  compile_sleef("${VARIANT}" "${SLEEF_CONFIG}" "${SLEEF_CONFIG_NEW}" SLEEF_BC)

  unset(EXTRA_PARAMS)
  if(CMAKE_VERSION VERSION_GREATER "3.2")
    set(EXTRA_PARAMS BYPRODUCTS "${SLEEF_CONFIG}")
  endif()

  add_custom_command(
    OUTPUT "${SLEEF_CONFIG}.witness"
    ${EXTRA_PARAMS}
    COMMAND ${CMAKE_COMMAND} -E copy_if_different
          "${SLEEF_CONFIG_NEW}" "${SLEEF_CONFIG}"
    COMMAND ${CMAKE_COMMAND} -E touch "${SLEEF_CONFIG}.witness"
  )
  add_custom_target("sleef_config_${VARIANT}" DEPENDS "${SLEEF_CONFIG}.witness")

  # compile kernel
  make_kernel_bc(KERNEL_BC "${OCL_KERNEL_TARGET}-${VARIANT}" "${VARIANT}"
                 1 "${SLEEF_BC}" "${SLEEF_CONFIG}" ${KERNEL_SOURCES})

else()
  make_kernel_bc(KERNEL_BC "${OCL_KERNEL_TARGET}-${VARIANT}" "${VARIANT}"
                 0 0 0 ${KERNEL_SOURCES})
endif()

# just debug
message(STATUS "Host Kernel BC for \"${VARIANT}\": ${KERNEL_BC}")

list(APPEND KERNEL_BC_LIST "${KERNEL_BC}")
set(KERNEL_BC_LIST "${KERNEL_BC_LIST}" PARENT_SCOPE)

# a target is needed...
add_custom_target("kernel_host_${VARIANT}" DEPENDS ${KERNEL_BC})

list(APPEND KERNEL_TARGET_LIST "kernel_host_${VARIANT}")
set(KERNEL_TARGET_LIST "${KERNEL_TARGET_LIST}" PARENT_SCOPE)

install(FILES "${KERNEL_BC}"
        DESTINATION "${POCL_INSTALL_PRIVATE_DATADIR}")

endforeach()
