/*
 * Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
 * SPDX-License-Identifier: MIT
 */

#ifndef __BIFROST_PUBLIC_H_
#define __BIFROST_PUBLIC_H_

#include <stdint.h>
#include <string.h>
#include "compiler/nir/nir.h"
#include "panfrost/compiler/pan_compiler.h"
#include "util/u_dynarray.h"

struct bifrost_precompiled_kernel_sysvals {
   struct {
      unsigned x, y, z;
   } num_workgroups;
   uint64_t printf_buffer_address;
} __attribute__((aligned(8)));
;

#define BIFROST_PRECOMPILED_KERNEL_SYSVALS_SIZE                                \
   sizeof(struct bifrost_precompiled_kernel_sysvals)
#define BIFROST_PRECOMPILED_KERNEL_SYSVALS_OFFSET (0)
#define BIFROST_PRECOMPILED_KERNEL_ARGS_OFFSET                                 \
   (BIFROST_PRECOMPILED_KERNEL_SYSVALS_OFFSET +                                \
    BIFROST_PRECOMPILED_KERNEL_SYSVALS_SIZE)
#define BIFROST_PRECOMPILED_KERNEL_ARGS_SIZE                                   \
   (512 - BIFROST_PRECOMPILED_KERNEL_ARGS_OFFSET)

struct bifrost_precompiled_kernel_info {
   struct pan_shader_info info;
   unsigned local_size_x;
   unsigned local_size_y;
   unsigned local_size_z;
   unsigned binary_size;
};

static inline struct bifrost_precompiled_kernel_info
bifrost_precompiled_pack_kernel_info(nir_shader *nir,
                                     struct pan_shader_info *info,
                                     struct util_dynarray *binary)
{
   return (struct bifrost_precompiled_kernel_info){
      .info = *info,
      .local_size_x = nir->info.workgroup_size[0],
      .local_size_y = nir->info.workgroup_size[1],
      .local_size_z = nir->info.workgroup_size[2],
      .binary_size = binary->size,
   };
}

static inline void
bifrost_precompiled_kernel_prepare_push_uniforms(
   void *dst, const void *user_data, size_t user_data_size,
   const struct bifrost_precompiled_kernel_sysvals *sysvals)
{
   assert(user_data_size <= BIFROST_PRECOMPILED_KERNEL_ARGS_SIZE);

   memcpy(dst, sysvals, sizeof(*sysvals));
   memcpy(((uint8_t *)dst + BIFROST_PRECOMPILED_KERNEL_ARGS_OFFSET), user_data,
          user_data_size);
}

void bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id);
void bifrost_optimize_nir(nir_shader *nir, unsigned gpu_id);
void bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id);
void bifrost_lower_texture_nir(nir_shader *nir, unsigned gpu_id);
void bifrost_lower_texture_late_nir(nir_shader *nir, unsigned gpu_id);

bool bifrost_will_dump_shaders(void);
bool bifrost_want_debug_info(void);

void bifrost_compile_shader_nir(nir_shader *nir,
                                const struct pan_compile_inputs *inputs,
                                struct util_dynarray *binary,
                                struct pan_shader_info *info);

#define VALHAL_EX_FIFO_VARYING_BITS \
   (VARYING_BIT_PSIZ | VARYING_BIT_LAYER | VARYING_BIT_PRIMITIVE_ID)

static inline bool
valhal_writes_extended_fifo(uint64_t outputs_written,
                            bool no_psiz, bool multiview)
{
   uint64_t ex_fifo_written = outputs_written & VALHAL_EX_FIFO_VARYING_BITS;
   if (ex_fifo_written == 0)
      return false;

   /* Multiview shaders depend on the FIFO format for indexing per-view
    * output writes. We don't currently patch these offsets in the no_psiz
    * variant, so we need the extended format, regardless of point size.
    */
   if (multiview)
      return true;

   /* If we're not rendering in points mode, the no_psiz variant has point
    * size write patched out for us.
    */
   if (no_psiz)
      ex_fifo_written &= ~VARYING_BIT_PSIZ;

   return ex_fifo_written != 0;
}

#define DEFINE_OPTIONS(arch)                                                   \
   static const nir_shader_compiler_options bifrost_nir_options_v##arch = {    \
      .lower_scmp = true,                                                      \
      .lower_flrp16 = true,                                                    \
      .lower_flrp32 = true,                                                    \
      .lower_flrp64 = true,                                                    \
      .lower_ffract = arch < 11,                                               \
      .lower_fmod = true,                                                      \
      .lower_fdiv = true,                                                      \
      .lower_isign = true,                                                     \
      .lower_find_lsb = true,                                                  \
      .lower_ifind_msb = true,                                                 \
      .lower_fdph = true,                                                      \
      .lower_fsqrt = true,                                                     \
                                                                               \
      .lower_fsign = true,                                                     \
                                                                               \
      .lower_bitfield_insert = true,                                           \
      .lower_bitfield_extract = true,                                          \
      .lower_bitfield_extract8 = true,                                         \
      .lower_bitfield_extract16 = true,                                        \
      .lower_insert_byte = true,                                               \
                                                                               \
      .lower_pack_64_4x16 = true,                                              \
      .lower_pack_half_2x16 = true,                                            \
      .lower_pack_unorm_2x16 = true,                                           \
      .lower_pack_snorm_2x16 = true,                                           \
      .lower_pack_unorm_4x8 = true,                                            \
      .lower_pack_snorm_4x8 = true,                                            \
      .lower_unpack_half_2x16 = true,                                          \
      .lower_unpack_unorm_2x16 = true,                                         \
      .lower_unpack_snorm_2x16 = true,                                         \
      .lower_unpack_unorm_4x8 = true,                                          \
      .lower_unpack_snorm_4x8 = true,                                          \
      .has_pack_32_4x8 = true,                                                 \
                                                                               \
      .lower_doubles_options =                                                 \
         nir_lower_dmod, /* TODO: Don't lower supported 64-bit operations */   \
      .lower_int64_options = ~0, /* TODO: Use IMULD on v7 */                   \
      .lower_mul_high = true,                                                  \
      .lower_fisnormal = true,                                                 \
      .lower_uadd_carry = true,                                                \
      .lower_usub_borrow = true,                                               \
                                                                               \
      .has_isub = true,                                                        \
      .vectorize_vec2_16bit = true,                                            \
      .fuse_ffma16 = true,                                                     \
      .fuse_ffma32 = true,                                                     \
      .fuse_ffma64 = true,                                                     \
                                                                               \
      .lower_uniforms_to_ubo = true,                                           \
                                                                               \
      .has_cs_global_id = true,                                                \
      .lower_cs_local_index_to_id = true,                                      \
      .lower_device_index_to_zero = true,                                      \
      .max_unroll_iterations = 32,                                             \
      .max_samples = 16,                                                       \
      .force_indirect_unrolling =                                              \
         (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),     \
      .force_indirect_unrolling_sampler = true,                                \
      .scalarize_ddx = true,                                                   \
      .support_indirect_inputs = BITFIELD_BIT(MESA_SHADER_TESS_CTRL) |         \
                                 BITFIELD_BIT(MESA_SHADER_TESS_EVAL) |         \
                                 BITFIELD_BIT(MESA_SHADER_FRAGMENT),           \
      .lower_hadd = arch >= 11,                                                \
      .discard_is_demote = true,                                               \
      .has_udot_4x8 = arch >= 9,                                               \
      .has_udot_4x8_sat = arch >= 9,                                           \
      .has_sdot_4x8 = arch >= 9,                                               \
      .has_sdot_4x8_sat = arch >= 9,                                           \
   };

DEFINE_OPTIONS(6);
DEFINE_OPTIONS(9);
DEFINE_OPTIONS(11);

#endif
