/*
 * Copyright 2007 VMware, Inc.
 * Copyright 2016 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * on the rights to use, copy, modify, merge, publish, distribute, sub
 * license, and/or sell copies of the Software, and to permit persons to whom
 * the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

/**
 * \file
 *
 * Common helper functions for PBO up- and downloads.
 */

#include "state_tracker/st_context.h"
#include "state_tracker/st_nir.h"
#include "state_tracker/st_pbo.h"
#include "state_tracker/st_cb_bufferobjects.h"

#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
#include "cso_cache/cso_context.h"
#include "tgsi/tgsi_ureg.h"
#include "util/u_format.h"
#include "util/u_inlines.h"
#include "util/u_upload_mgr.h"

#include "compiler/nir/nir_builder.h"

/* Conversion to apply in the fragment shader. */
enum st_pbo_conversion {
   ST_PBO_CONVERT_NONE = 0,
   ST_PBO_CONVERT_UINT_TO_SINT,
   ST_PBO_CONVERT_SINT_TO_UINT,

   ST_NUM_PBO_CONVERSIONS
};

/* Final setup of buffer addressing information.
 *
 * buf_offset is in pixels.
 *
 * Returns false if something (e.g. alignment) prevents PBO upload/download.
 */
bool
st_pbo_addresses_setup(struct st_context *st,
                       struct pipe_resource *buf, intptr_t buf_offset,
                       struct st_pbo_addresses *addr)
{
   unsigned skip_pixels;

   /* Check alignment against texture buffer requirements. */
   {
      unsigned ofs = (buf_offset * addr->bytes_per_pixel) % st->ctx->Const.TextureBufferOffsetAlignment;
      if (ofs != 0) {
         if (ofs % addr->bytes_per_pixel != 0)
            return false;

         skip_pixels = ofs / addr->bytes_per_pixel;
         buf_offset -= skip_pixels;
      } else {
         skip_pixels = 0;
      }
   }

   assert(buf_offset >= 0);

   addr->buffer = buf;
   addr->first_element = buf_offset;
   addr->last_element = buf_offset + skip_pixels + addr->width - 1
         + (addr->height - 1 + (addr->depth - 1) * addr->image_height) * addr->pixels_per_row;

   if (addr->last_element - addr->first_element > st->ctx->Const.MaxTextureBufferSize - 1)
      return false;

   /* This should be ensured by Mesa before calling our callbacks */
   assert((addr->last_element + 1) * addr->bytes_per_pixel <= buf->width0);

   addr->constants.xoffset = -addr->xoffset + skip_pixels;
   addr->constants.yoffset = -addr->yoffset;
   addr->constants.stride = addr->pixels_per_row;
   addr->constants.image_size = addr->pixels_per_row * addr->image_height;
   addr->constants.layer_offset = 0;

   return true;
}

/* Validate and fill buffer addressing information based on GL pixelstore
 * attributes.
 *
 * Returns false if some aspect of the addressing (e.g. alignment) prevents
 * PBO upload/download.
 */
bool
st_pbo_addresses_pixelstore(struct st_context *st,
                            GLenum gl_target, bool skip_images,
                            const struct gl_pixelstore_attrib *store,
                            const void *pixels,
                            struct st_pbo_addresses *addr)
{
   struct pipe_resource *buf = st_buffer_object(store->BufferObj)->buffer;
   intptr_t buf_offset = (intptr_t) pixels;

   if (buf_offset % addr->bytes_per_pixel)
      return false;

   /* Convert to texels */
   buf_offset = buf_offset / addr->bytes_per_pixel;

   /* Determine image height */
   if (gl_target == GL_TEXTURE_1D_ARRAY) {
      addr->image_height = 1;
   } else {
      addr->image_height = store->ImageHeight > 0 ? store->ImageHeight : addr->height;
   }

   /* Compute the stride, taking store->Alignment into account */
   {
       unsigned pixels_per_row = store->RowLength > 0 ?
                           store->RowLength : addr->width;
       unsigned bytes_per_row = pixels_per_row * addr->bytes_per_pixel;
       unsigned remainder = bytes_per_row % store->Alignment;
       unsigned offset_rows;

       if (remainder > 0)
          bytes_per_row += store->Alignment - remainder;

       if (bytes_per_row % addr->bytes_per_pixel)
          return false;

       addr->pixels_per_row = bytes_per_row / addr->bytes_per_pixel;

       offset_rows = store->SkipRows;
       if (skip_images)
          offset_rows += addr->image_height * store->SkipImages;

       buf_offset += store->SkipPixels + addr->pixels_per_row * offset_rows;
   }

   if (!st_pbo_addresses_setup(st, buf, buf_offset, addr))
      return false;

   /* Support GL_PACK_INVERT_MESA */
   if (store->Invert) {
      addr->constants.xoffset += (addr->height - 1) * addr->constants.stride;
      addr->constants.stride = -addr->constants.stride;
   }

   return true;
}

/* For download from a framebuffer, we may have to invert the Y axis. The
 * setup is as follows:
 * - set viewport to inverted, so that the position sysval is correct for
 *   texel fetches
 * - this function adjusts the fragment shader's constant buffer to compute
 *   the correct destination addresses.
 */
void
st_pbo_addresses_invert_y(struct st_pbo_addresses *addr,
                          unsigned viewport_height)
{
   addr->constants.xoffset +=
      (viewport_height - 1 + 2 * addr->constants.yoffset) * addr->constants.stride;
   addr->constants.stride = -addr->constants.stride;
}

/* Setup all vertex pipeline state, rasterizer state, and fragment shader
 * constants, and issue the draw call for PBO upload/download.
 *
 * The caller is responsible for saving and restoring state, as well as for
 * setting other fragment shader state (fragment shader, samplers), and
 * framebuffer/viewport/DSA/blend state.
 */
bool
st_pbo_draw(struct st_context *st, const struct st_pbo_addresses *addr,
            unsigned surface_width, unsigned surface_height)
{
   struct cso_context *cso = st->cso_context;

   /* Setup vertex and geometry shaders */
   if (!st->pbo.vs) {
      st->pbo.vs = st_pbo_create_vs(st);
      if (!st->pbo.vs)
         return false;
   }

   if (addr->depth != 1 && st->pbo.use_gs && !st->pbo.gs) {
      st->pbo.gs = st_pbo_create_gs(st);
      if (!st->pbo.gs)
         return false;
   }

   cso_set_vertex_shader_handle(cso, st->pbo.vs);

   cso_set_geometry_shader_handle(cso, addr->depth != 1 ? st->pbo.gs : NULL);

   cso_set_tessctrl_shader_handle(cso, NULL);

   cso_set_tesseval_shader_handle(cso, NULL);

   /* Upload vertices */
   {
      struct pipe_vertex_buffer vbo = {0};
      struct pipe_vertex_element velem;

      float x0 = (float) addr->xoffset / surface_width * 2.0f - 1.0f;
      float y0 = (float) addr->yoffset / surface_height * 2.0f - 1.0f;
      float x1 = (float) (addr->xoffset + addr->width) / surface_width * 2.0f - 1.0f;
      float y1 = (float) (addr->yoffset + addr->height) / surface_height * 2.0f - 1.0f;

      float *verts = NULL;

      vbo.stride = 2 * sizeof(float);

      u_upload_alloc(st->pipe->stream_uploader, 0, 8 * sizeof(float), 4,
                     &vbo.buffer_offset, &vbo.buffer.resource, (void **) &verts);
      if (!verts)
         return false;

      verts[0] = x0;
      verts[1] = y0;
      verts[2] = x0;
      verts[3] = y1;
      verts[4] = x1;
      verts[5] = y0;
      verts[6] = x1;
      verts[7] = y1;

      u_upload_unmap(st->pipe->stream_uploader);

      velem.src_offset = 0;
      velem.instance_divisor = 0;
      velem.vertex_buffer_index = 0;
      velem.src_format = PIPE_FORMAT_R32G32_FLOAT;

      cso_set_vertex_elements(cso, 1, &velem);

      cso_set_vertex_buffers(cso, velem.vertex_buffer_index, 1, &vbo);

      pipe_resource_reference(&vbo.buffer.resource, NULL);
   }

   /* Upload constants */
   {
      struct pipe_constant_buffer cb;

      cb.buffer = NULL;
      cb.user_buffer = &addr->constants;
      cb.buffer_offset = 0;
      cb.buffer_size = sizeof(addr->constants);

      cso_set_constant_buffer(cso, PIPE_SHADER_FRAGMENT, 0, &cb);

      pipe_resource_reference(&cb.buffer, NULL);
   }

   /* Rasterizer state */
   cso_set_rasterizer(cso, &st->pbo.raster);

   /* Disable stream output */
   cso_set_stream_outputs(cso, 0, NULL, 0);

   if (addr->depth == 1) {
      cso_draw_arrays(cso, PIPE_PRIM_TRIANGLE_STRIP, 0, 4);
   } else {
      cso_draw_arrays_instanced(cso, PIPE_PRIM_TRIANGLE_STRIP,
                                0, 4, 0, addr->depth);
   }

   return true;
}

void *
st_pbo_create_vs(struct st_context *st)
{
   struct pipe_screen *pscreen = st->pipe->screen;
   bool use_nir = PIPE_SHADER_IR_NIR ==
      pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX,
                                PIPE_SHADER_CAP_PREFERRED_IR);

   if (use_nir) {
      unsigned inputs[] =  {  VERT_ATTRIB_POS, SYSTEM_VALUE_INSTANCE_ID, };
      unsigned outputs[] = { VARYING_SLOT_POS,       VARYING_SLOT_LAYER  };

      return st_nir_make_passthrough_shader(st, "st/pbo VS",
                                            MESA_SHADER_VERTEX,
                                            st->pbo.layers ? 2 : 1,
                                            inputs, outputs, NULL, (1 << 1));
   }

   struct ureg_program *ureg;
   struct ureg_src in_pos;
   struct ureg_src in_instanceid;
   struct ureg_dst out_pos;
   struct ureg_dst out_layer;

   ureg = ureg_create(PIPE_SHADER_VERTEX);
   if (!ureg)
      return NULL;

   in_pos = ureg_DECL_vs_input(ureg, TGSI_SEMANTIC_POSITION);

   out_pos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);

   if (st->pbo.layers) {
      in_instanceid = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_INSTANCEID, 0);

      if (!st->pbo.use_gs)
         out_layer = ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0);
   }

   /* out_pos = in_pos */
   ureg_MOV(ureg, out_pos, in_pos);

   if (st->pbo.layers) {
      if (st->pbo.use_gs) {
         /* out_pos.z = i2f(gl_InstanceID) */
         ureg_I2F(ureg, ureg_writemask(out_pos, TGSI_WRITEMASK_Z),
                        ureg_scalar(in_instanceid, TGSI_SWIZZLE_X));
      } else {
         /* out_layer = gl_InstanceID */
         ureg_MOV(ureg, ureg_writemask(out_layer, TGSI_WRITEMASK_X),
                        ureg_scalar(in_instanceid, TGSI_SWIZZLE_X));
      }
   }

   ureg_END(ureg);

   return ureg_create_shader_and_destroy(ureg, st->pipe);
}

void *
st_pbo_create_gs(struct st_context *st)
{
   static const int zero = 0;
   struct ureg_program *ureg;
   struct ureg_dst out_pos;
   struct ureg_dst out_layer;
   struct ureg_src in_pos;
   struct ureg_src imm;
   unsigned i;

   ureg = ureg_create(PIPE_SHADER_GEOMETRY);
   if (!ureg)
      return NULL;

   ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, PIPE_PRIM_TRIANGLES);
   ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, PIPE_PRIM_TRIANGLE_STRIP);
   ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, 3);

   out_pos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
   out_layer = ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0);

   in_pos = ureg_DECL_input(ureg, TGSI_SEMANTIC_POSITION, 0, 0, 1);

   imm = ureg_DECL_immediate_int(ureg, &zero, 1);

   for (i = 0; i < 3; ++i) {
      struct ureg_src in_pos_vertex = ureg_src_dimension(in_pos, i);

      /* out_pos = in_pos[i] */
      ureg_MOV(ureg, out_pos, in_pos_vertex);

      /* out_layer.x = f2i(in_pos[i].z) */
      ureg_F2I(ureg, ureg_writemask(out_layer, TGSI_WRITEMASK_X),
                     ureg_scalar(in_pos_vertex, TGSI_SWIZZLE_Z));

      ureg_EMIT(ureg, ureg_scalar(imm, TGSI_SWIZZLE_X));
   }

   ureg_END(ureg);

   return ureg_create_shader_and_destroy(ureg, st->pipe);
}

static void
build_conversion(struct ureg_program *ureg, const struct ureg_dst *temp,
                 enum st_pbo_conversion conversion)
{
   switch (conversion) {
   case ST_PBO_CONVERT_SINT_TO_UINT:
      ureg_IMAX(ureg, *temp, ureg_src(*temp), ureg_imm1i(ureg, 0));
      break;
   case ST_PBO_CONVERT_UINT_TO_SINT:
      ureg_UMIN(ureg, *temp, ureg_src(*temp), ureg_imm1u(ureg, (1u << 31) - 1));
      break;
   default:
      /* no-op */
      break;
   }
}

static const struct glsl_type *
sampler_type_for_target(enum pipe_texture_target target)
{
   bool is_array = target >= PIPE_TEXTURE_1D_ARRAY;
   static const enum glsl_sampler_dim dim[] = {
      [PIPE_BUFFER]             = GLSL_SAMPLER_DIM_BUF,
      [PIPE_TEXTURE_1D]         = GLSL_SAMPLER_DIM_1D,
      [PIPE_TEXTURE_2D]         = GLSL_SAMPLER_DIM_2D,
      [PIPE_TEXTURE_3D]         = GLSL_SAMPLER_DIM_3D,
      [PIPE_TEXTURE_CUBE]       = GLSL_SAMPLER_DIM_CUBE,
      [PIPE_TEXTURE_RECT]       = GLSL_SAMPLER_DIM_RECT,
      [PIPE_TEXTURE_1D_ARRAY]   = GLSL_SAMPLER_DIM_1D,
      [PIPE_TEXTURE_2D_ARRAY]   = GLSL_SAMPLER_DIM_2D,
      [PIPE_TEXTURE_CUBE_ARRAY] = GLSL_SAMPLER_DIM_CUBE,
   };

   return glsl_sampler_type(dim[target], false, is_array, GLSL_TYPE_FLOAT);
}

static void *
create_fs_nir(struct st_context *st,
              bool download,
              enum pipe_texture_target target,
              enum st_pbo_conversion conversion)
{
   struct pipe_screen *screen = st->pipe->screen;
   struct nir_builder b;
   const nir_shader_compiler_options *options =
      st->ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions;
   bool pos_is_sysval =
      screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL);

   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, options);

   nir_ssa_def *zero = nir_imm_int(&b, 0);

   /* param = [ -xoffset + skip_pixels, -yoffset, stride, image_height ] */
   nir_variable *param_var =
      nir_variable_create(b.shader, nir_var_uniform, glsl_vec4_type(), "param");
   b.shader->num_uniforms += 4;
   nir_ssa_def *param = nir_load_var(&b, param_var);

   nir_variable *fragcoord =
      nir_variable_create(b.shader, pos_is_sysval ? nir_var_system_value :
                          nir_var_shader_in, glsl_vec4_type(), "gl_FragCoord");
   fragcoord->data.location = pos_is_sysval ? SYSTEM_VALUE_FRAG_COORD
                                            : VARYING_SLOT_POS;
   nir_ssa_def *coord = nir_load_var(&b, fragcoord);

   nir_ssa_def *layer = NULL;
   if (st->pbo.layers && (!download || target == PIPE_TEXTURE_1D_ARRAY ||
                                       target == PIPE_TEXTURE_2D_ARRAY ||
                                       target == PIPE_TEXTURE_3D ||
                                       target == PIPE_TEXTURE_CUBE ||
                                       target == PIPE_TEXTURE_CUBE_ARRAY)) {
      nir_variable *var = nir_variable_create(b.shader, nir_var_shader_in,
                                              glsl_int_type(), "gl_Layer");
      var->data.location = VARYING_SLOT_LAYER;
      var->data.interpolation = INTERP_MODE_FLAT;
      layer = nir_load_var(&b, var);
   }

   /* offset_pos = param.xy + f2i(coord.xy) */
   nir_ssa_def *offset_pos =
      nir_iadd(&b, nir_channels(&b, param, TGSI_WRITEMASK_XY),
               nir_f2i32(&b, nir_channels(&b, coord, TGSI_WRITEMASK_XY)));

   /* addr = offset_pos.x + offset_pos.y * stride */
   nir_ssa_def *pbo_addr =
      nir_iadd(&b, nir_channel(&b, offset_pos, 0),
               nir_imul(&b, nir_channel(&b, offset_pos, 1),
                        nir_channel(&b, param, 2)));
   if (layer) {
      /* pbo_addr += image_height * layer */
      pbo_addr = nir_iadd(&b, pbo_addr,
                          nir_imul(&b, layer, nir_channel(&b, param, 3)));
   }

   nir_ssa_def *texcoord;
   if (download) {
      texcoord = nir_f2i32(&b, nir_channels(&b, coord, TGSI_WRITEMASK_XY));

      if (layer) {
         nir_ssa_def *src_layer = layer;

         if (target == PIPE_TEXTURE_3D) {
            nir_variable *layer_offset_var =
               nir_variable_create(b.shader, nir_var_uniform,
                                   glsl_int_type(), "layer_offset");
            b.shader->num_uniforms += 1;
            layer_offset_var->data.driver_location = 4;
            nir_ssa_def *layer_offset = nir_load_var(&b, layer_offset_var);

            src_layer = nir_iadd(&b, layer, layer_offset);
         }

         texcoord = nir_vec3(&b, nir_channel(&b, texcoord, 0),
                                 nir_channel(&b, texcoord, 1),
                                 src_layer);
      }
   } else {
      texcoord = pbo_addr;
   }

   nir_variable *tex_var =
      nir_variable_create(b.shader, nir_var_uniform,
                          sampler_type_for_target(target), "tex");
   tex_var->data.explicit_binding = true;
   tex_var->data.binding = 0;

   nir_deref_instr *tex_deref = nir_build_deref_var(&b, tex_var);

   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
   tex->op = nir_texop_txf;
   tex->sampler_dim = glsl_get_sampler_dim(tex_var->type);
   tex->coord_components =
      glsl_get_sampler_coordinate_components(tex_var->type);
   tex->dest_type = nir_type_float;
   tex->src[0].src_type = nir_tex_src_texture_deref;
   tex->src[0].src = nir_src_for_ssa(&tex_deref->dest.ssa);
   tex->src[1].src_type = nir_tex_src_sampler_deref;
   tex->src[1].src = nir_src_for_ssa(&tex_deref->dest.ssa);
   tex->src[2].src_type = nir_tex_src_coord;
   tex->src[2].src = nir_src_for_ssa(texcoord);
   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
   nir_builder_instr_insert(&b, &tex->instr);
   nir_ssa_def *result = &tex->dest.ssa;

   if (conversion == ST_PBO_CONVERT_SINT_TO_UINT)
      result = nir_imax(&b, result, zero);
   else if (conversion == ST_PBO_CONVERT_UINT_TO_SINT)
      result = nir_umin(&b, result, nir_imm_int(&b, (1u << 31) - 1));

   if (download) {
      nir_variable *img_var =
         nir_variable_create(b.shader, nir_var_uniform,
                             glsl_image_type(GLSL_SAMPLER_DIM_BUF, false,
                                             GLSL_TYPE_FLOAT), "img");
      img_var->data.image.access = ACCESS_NON_READABLE;
      img_var->data.explicit_binding = true;
      img_var->data.binding = 0;
      nir_deref_instr *img_deref = nir_build_deref_var(&b, img_var);
      nir_intrinsic_instr *intrin =
         nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
      intrin->src[0] = nir_src_for_ssa(&img_deref->dest.ssa);
      intrin->src[1] =
         nir_src_for_ssa(nir_vec4(&b, pbo_addr, zero, zero, zero));
      intrin->src[2] = nir_src_for_ssa(zero);
      intrin->src[3] = nir_src_for_ssa(result);
      intrin->num_components = 4;
      nir_builder_instr_insert(&b, &intrin->instr);
   } else {
      nir_variable *color =
         nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
                             "gl_FragColor");
      color->data.location = FRAG_RESULT_COLOR;

      nir_store_var(&b, color, result, TGSI_WRITEMASK_XYZW);
   }

   return st_nir_finish_builtin_shader(st, b.shader, download ?
                                       "st/pbo download FS" :
                                       "st/pbo upload FS");
}

static void *
create_fs_tgsi(struct st_context *st, bool download,
               enum pipe_texture_target target,
               enum st_pbo_conversion conversion)
{
   struct pipe_context *pipe = st->pipe;
   struct pipe_screen *screen = pipe->screen;
   struct ureg_program *ureg;
   bool have_layer;
   struct ureg_dst out;
   struct ureg_src sampler;
   struct ureg_src pos;
   struct ureg_src layer;
   struct ureg_src const0;
   struct ureg_src const1;
   struct ureg_dst temp0;

   have_layer =
      st->pbo.layers &&
      (!download || target == PIPE_TEXTURE_1D_ARRAY
                 || target == PIPE_TEXTURE_2D_ARRAY
                 || target == PIPE_TEXTURE_3D
                 || target == PIPE_TEXTURE_CUBE
                 || target == PIPE_TEXTURE_CUBE_ARRAY);

   ureg = ureg_create(PIPE_SHADER_FRAGMENT);
   if (!ureg)
      return NULL;

   if (!download) {
      out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
   } else {
      struct ureg_src image;

      /* writeonly images do not require an explicitly given format. */
      image = ureg_DECL_image(ureg, 0, TGSI_TEXTURE_BUFFER, PIPE_FORMAT_NONE,
                                    true, false);
      out = ureg_dst(image);
   }

   sampler = ureg_DECL_sampler(ureg, 0);
   if (screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) {
      pos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
   } else {
      pos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
                               TGSI_INTERPOLATE_LINEAR);
   }
   if (have_layer) {
      layer = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_LAYER, 0,
                                       TGSI_INTERPOLATE_CONSTANT);
   }
   const0  = ureg_DECL_constant(ureg, 0);
   const1  = ureg_DECL_constant(ureg, 1);
   temp0   = ureg_DECL_temporary(ureg);

   /* Note: const0 = [ -xoffset + skip_pixels, -yoffset, stride, image_height ] */

   /* temp0.xy = f2i(temp0.xy) */
   ureg_F2I(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY),
                  ureg_swizzle(pos,
                               TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
                               TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));

   /* temp0.xy = temp0.xy + const0.xy */
   ureg_UADD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY),
                   ureg_swizzle(ureg_src(temp0),
                                TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
                                TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y),
                   ureg_swizzle(const0,
                                TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
                                TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));

   /* temp0.x = const0.z * temp0.y + temp0.x */
   ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X),
                   ureg_scalar(const0, TGSI_SWIZZLE_Z),
                   ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_Y),
                   ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X));

   if (have_layer) {
      /* temp0.x = const0.w * layer + temp0.x */
      ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X),
                      ureg_scalar(const0, TGSI_SWIZZLE_W),
                      ureg_scalar(layer, TGSI_SWIZZLE_X),
                      ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X));
   }

   /* temp0.w = 0 */
   ureg_MOV(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_W), ureg_imm1u(ureg, 0));

   if (download) {
      struct ureg_dst temp1;
      struct ureg_src op[2];

      temp1 = ureg_DECL_temporary(ureg);

      /* temp1.xy = pos.xy */
      ureg_F2I(ureg, ureg_writemask(temp1, TGSI_WRITEMASK_XY), pos);

      /* temp1.zw = 0 */
      ureg_MOV(ureg, ureg_writemask(temp1, TGSI_WRITEMASK_ZW), ureg_imm1u(ureg, 0));

      if (have_layer) {
         struct ureg_dst temp1_layer =
            ureg_writemask(temp1, target == PIPE_TEXTURE_1D_ARRAY ? TGSI_WRITEMASK_Y
                                                                  : TGSI_WRITEMASK_Z);

         /* temp1.y/z = layer */
         ureg_MOV(ureg, temp1_layer, ureg_scalar(layer, TGSI_SWIZZLE_X));

         if (target == PIPE_TEXTURE_3D) {
            /* temp1.z += layer_offset */
            ureg_UADD(ureg, temp1_layer,
                            ureg_scalar(ureg_src(temp1), TGSI_SWIZZLE_Z),
                            ureg_scalar(const1, TGSI_SWIZZLE_X));
         }
      }

      /* temp1 = txf(sampler, temp1) */
      ureg_TXF(ureg, temp1, util_pipe_tex_to_tgsi_tex(target, 1),
                     ureg_src(temp1), sampler);

      build_conversion(ureg, &temp1, conversion);

      /* store(out, temp0, temp1) */
      op[0] = ureg_src(temp0);
      op[1] = ureg_src(temp1);
      ureg_memory_insn(ureg, TGSI_OPCODE_STORE, &out, 1, op, 2, 0,
                             TGSI_TEXTURE_BUFFER, PIPE_FORMAT_NONE);

      ureg_release_temporary(ureg, temp1);
   } else {
      /* out = txf(sampler, temp0.x) */
      ureg_TXF(ureg, temp0, TGSI_TEXTURE_BUFFER, ureg_src(temp0), sampler);

      build_conversion(ureg, &temp0, conversion);

      ureg_MOV(ureg, out, ureg_src(temp0));
   }

   ureg_release_temporary(ureg, temp0);

   ureg_END(ureg);

   return ureg_create_shader_and_destroy(ureg, pipe);
}

static void *
create_fs(struct st_context *st, bool download,
          enum pipe_texture_target target,
          enum st_pbo_conversion conversion)
{
   struct pipe_screen *pscreen = st->pipe->screen;
   bool use_nir = PIPE_SHADER_IR_NIR ==
      pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX,
                                PIPE_SHADER_CAP_PREFERRED_IR);

   if (use_nir)
      return create_fs_nir(st, download, target, conversion);

   return create_fs_tgsi(st, download, target, conversion);
}

static enum st_pbo_conversion
get_pbo_conversion(enum pipe_format src_format, enum pipe_format dst_format)
{
   if (util_format_is_pure_uint(src_format)) {
      if (util_format_is_pure_sint(dst_format))
         return ST_PBO_CONVERT_UINT_TO_SINT;
   } else if (util_format_is_pure_sint(src_format)) {
      if (util_format_is_pure_uint(dst_format))
         return ST_PBO_CONVERT_SINT_TO_UINT;
   }

   return ST_PBO_CONVERT_NONE;
}

void *
st_pbo_get_upload_fs(struct st_context *st,
                     enum pipe_format src_format,
                     enum pipe_format dst_format)
{
   STATIC_ASSERT(ARRAY_SIZE(st->pbo.upload_fs) == ST_NUM_PBO_CONVERSIONS);

   enum st_pbo_conversion conversion = get_pbo_conversion(src_format, dst_format);

   if (!st->pbo.upload_fs[conversion])
      st->pbo.upload_fs[conversion] = create_fs(st, false, 0, conversion);

   return st->pbo.upload_fs[conversion];
}

void *
st_pbo_get_download_fs(struct st_context *st, enum pipe_texture_target target,
                       enum pipe_format src_format,
                       enum pipe_format dst_format)
{
   STATIC_ASSERT(ARRAY_SIZE(st->pbo.download_fs) == ST_NUM_PBO_CONVERSIONS);
   assert(target < PIPE_MAX_TEXTURE_TYPES);

   enum st_pbo_conversion conversion = get_pbo_conversion(src_format, dst_format);

   if (!st->pbo.download_fs[conversion][target])
      st->pbo.download_fs[conversion][target] = create_fs(st, true, target, conversion);

   return st->pbo.download_fs[conversion][target];
}

void
st_init_pbo_helpers(struct st_context *st)
{
   struct pipe_context *pipe = st->pipe;
   struct pipe_screen *screen = pipe->screen;

   st->pbo.upload_enabled =
      screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OBJECTS) &&
      screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT) >= 1 &&
      screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_INTEGERS);
   if (!st->pbo.upload_enabled)
      return;

   st->pbo.download_enabled =
      st->pbo.upload_enabled &&
      screen->get_param(screen, PIPE_CAP_SAMPLER_VIEW_TARGET) &&
      screen->get_param(screen, PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT) &&
      screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
                                       PIPE_SHADER_CAP_MAX_SHADER_IMAGES) >= 1;

   st->pbo.rgba_only =
      screen->get_param(screen, PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY);

   if (screen->get_param(screen, PIPE_CAP_TGSI_INSTANCEID)) {
      if (screen->get_param(screen, PIPE_CAP_TGSI_VS_LAYER_VIEWPORT)) {
         st->pbo.layers = true;
      } else if (screen->get_param(screen, PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES) >= 3) {
         st->pbo.layers = true;
         st->pbo.use_gs = true;
      }
   }

   /* Blend state */
   memset(&st->pbo.upload_blend, 0, sizeof(struct pipe_blend_state));
   st->pbo.upload_blend.rt[0].colormask = PIPE_MASK_RGBA;

   /* Rasterizer state */
   memset(&st->pbo.raster, 0, sizeof(struct pipe_rasterizer_state));
   st->pbo.raster.half_pixel_center = 1;
}

void
st_destroy_pbo_helpers(struct st_context *st)
{
   unsigned i;

   for (i = 0; i < ARRAY_SIZE(st->pbo.upload_fs); ++i) {
      if (st->pbo.upload_fs[i]) {
         cso_delete_fragment_shader(st->cso_context, st->pbo.upload_fs[i]);
         st->pbo.upload_fs[i] = NULL;
      }
   }

   for (i = 0; i < ARRAY_SIZE(st->pbo.download_fs); ++i) {
      for (unsigned j = 0; j < ARRAY_SIZE(st->pbo.download_fs[0]); ++j) {
         if (st->pbo.download_fs[i][j]) {
            cso_delete_fragment_shader(st->cso_context, st->pbo.download_fs[i][j]);
            st->pbo.download_fs[i][j] = NULL;
         }
      }
   }

   if (st->pbo.gs) {
      cso_delete_geometry_shader(st->cso_context, st->pbo.gs);
      st->pbo.gs = NULL;
   }

   if (st->pbo.vs) {
      cso_delete_vertex_shader(st->cso_context, st->pbo.vs);
      st->pbo.vs = NULL;
   }
}
