Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 3771 → Rev 3772

/drivers/video/Gallium/auxiliary/cso_cache/cso_cache.c
0,0 → 1,322
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/* Authors: Zack Rusin <zack@tungstengraphics.com>
*/
 
#include "util/u_debug.h"
 
#include "util/u_memory.h"
 
#include "cso_cache.h"
#include "cso_hash.h"
 
 
struct cso_cache {
struct cso_hash *hashes[CSO_CACHE_MAX];
int max_size;
 
cso_sanitize_callback sanitize_cb;
void *sanitize_data;
};
 
#if 1
static unsigned hash_key(const void *key, unsigned key_size)
{
unsigned *ikey = (unsigned *)key;
unsigned hash = 0, i;
 
assert(key_size % 4 == 0);
 
/* I'm sure this can be improved on:
*/
for (i = 0; i < key_size/4; i++)
hash ^= ikey[i];
 
return hash;
}
#else
static unsigned hash_key(const unsigned char *p, int n)
{
unsigned h = 0;
unsigned g;
 
while (n--) {
h = (h << 4) + *p++;
if ((g = (h & 0xf0000000)) != 0)
h ^= g >> 23;
h &= ~g;
}
return h;
}
#endif
 
unsigned cso_construct_key(void *item, int item_size)
{
return hash_key((item), item_size);
}
 
static INLINE struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_type type)
{
struct cso_hash *hash;
hash = sc->hashes[type];
return hash;
}
 
static void delete_blend_state(void *state, void *data)
{
struct cso_blend *cso = (struct cso_blend *)state;
if (cso->delete_state)
cso->delete_state(cso->context, cso->data);
FREE(state);
}
 
static void delete_depth_stencil_state(void *state, void *data)
{
struct cso_depth_stencil_alpha *cso = (struct cso_depth_stencil_alpha *)state;
if (cso->delete_state)
cso->delete_state(cso->context, cso->data);
FREE(state);
}
 
static void delete_sampler_state(void *state, void *data)
{
struct cso_sampler *cso = (struct cso_sampler *)state;
if (cso->delete_state)
cso->delete_state(cso->context, cso->data);
FREE(state);
}
 
static void delete_rasterizer_state(void *state, void *data)
{
struct cso_rasterizer *cso = (struct cso_rasterizer *)state;
if (cso->delete_state)
cso->delete_state(cso->context, cso->data);
FREE(state);
}
 
static void delete_velements(void *state, void *data)
{
struct cso_velements *cso = (struct cso_velements *)state;
if (cso->delete_state)
cso->delete_state(cso->context, cso->data);
FREE(state);
}
 
static INLINE void delete_cso(void *state, enum cso_cache_type type)
{
switch (type) {
case CSO_BLEND:
delete_blend_state(state, 0);
break;
case CSO_SAMPLER:
delete_sampler_state(state, 0);
break;
case CSO_DEPTH_STENCIL_ALPHA:
delete_depth_stencil_state(state, 0);
break;
case CSO_RASTERIZER:
delete_rasterizer_state(state, 0);
break;
case CSO_VELEMENTS:
delete_velements(state, 0);
break;
default:
assert(0);
FREE(state);
}
}
 
 
static INLINE void sanitize_hash(struct cso_cache *sc,
struct cso_hash *hash,
enum cso_cache_type type,
int max_size)
{
if (sc->sanitize_cb)
sc->sanitize_cb(hash, type, max_size, sc->sanitize_data);
}
 
 
static INLINE void sanitize_cb(struct cso_hash *hash, enum cso_cache_type type,
int max_size, void *user_data)
{
/* if we're approach the maximum size, remove fourth of the entries
* otherwise every subsequent call will go through the same */
int hash_size = cso_hash_size(hash);
int max_entries = (max_size > hash_size) ? max_size : hash_size;
int to_remove = (max_size < max_entries) * max_entries/4;
if (hash_size > max_size)
to_remove += hash_size - max_size;
while (to_remove) {
/*remove elements until we're good */
/*fixme: currently we pick the nodes to remove at random*/
struct cso_hash_iter iter = cso_hash_first_node(hash);
void *cso = cso_hash_take(hash, cso_hash_iter_key(iter));
delete_cso(cso, type);
--to_remove;
}
}
 
struct cso_hash_iter
cso_insert_state(struct cso_cache *sc,
unsigned hash_key, enum cso_cache_type type,
void *state)
{
struct cso_hash *hash = _cso_hash_for_type(sc, type);
sanitize_hash(sc, hash, type, sc->max_size);
 
return cso_hash_insert(hash, hash_key, state);
}
 
struct cso_hash_iter
cso_find_state(struct cso_cache *sc,
unsigned hash_key, enum cso_cache_type type)
{
struct cso_hash *hash = _cso_hash_for_type(sc, type);
 
return cso_hash_find(hash, hash_key);
}
 
 
void *cso_hash_find_data_from_template( struct cso_hash *hash,
unsigned hash_key,
void *templ,
int size )
{
struct cso_hash_iter iter = cso_hash_find(hash, hash_key);
while (!cso_hash_iter_is_null(iter)) {
void *iter_data = cso_hash_iter_data(iter);
if (!memcmp(iter_data, templ, size)) {
/* We found a match
*/
return iter_data;
}
iter = cso_hash_iter_next(iter);
}
return NULL;
}
 
 
struct cso_hash_iter cso_find_state_template(struct cso_cache *sc,
unsigned hash_key, enum cso_cache_type type,
void *templ, unsigned size)
{
struct cso_hash_iter iter = cso_find_state(sc, hash_key, type);
while (!cso_hash_iter_is_null(iter)) {
void *iter_data = cso_hash_iter_data(iter);
if (!memcmp(iter_data, templ, size))
return iter;
iter = cso_hash_iter_next(iter);
}
return iter;
}
 
void * cso_take_state(struct cso_cache *sc,
unsigned hash_key, enum cso_cache_type type)
{
struct cso_hash *hash = _cso_hash_for_type(sc, type);
return cso_hash_take(hash, hash_key);
}
 
struct cso_cache *cso_cache_create(void)
{
struct cso_cache *sc = MALLOC_STRUCT(cso_cache);
int i;
if (sc == NULL)
return NULL;
 
sc->max_size = 4096;
for (i = 0; i < CSO_CACHE_MAX; i++)
sc->hashes[i] = cso_hash_create();
 
sc->sanitize_cb = sanitize_cb;
sc->sanitize_data = 0;
 
return sc;
}
 
void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type,
cso_state_callback func, void *user_data)
{
struct cso_hash *hash = _cso_hash_for_type(sc, type);
struct cso_hash_iter iter;
 
iter = cso_hash_first_node(hash);
while (!cso_hash_iter_is_null(iter)) {
void *state = cso_hash_iter_data(iter);
iter = cso_hash_iter_next(iter);
if (state) {
func(state, user_data);
}
}
}
 
void cso_cache_delete(struct cso_cache *sc)
{
int i;
assert(sc);
 
if (!sc)
return;
 
/* delete driver data */
cso_for_each_state(sc, CSO_BLEND, delete_blend_state, 0);
cso_for_each_state(sc, CSO_DEPTH_STENCIL_ALPHA, delete_depth_stencil_state, 0);
cso_for_each_state(sc, CSO_RASTERIZER, delete_rasterizer_state, 0);
cso_for_each_state(sc, CSO_SAMPLER, delete_sampler_state, 0);
cso_for_each_state(sc, CSO_VELEMENTS, delete_velements, 0);
 
for (i = 0; i < CSO_CACHE_MAX; i++)
cso_hash_delete(sc->hashes[i]);
 
FREE(sc);
}
 
void cso_set_maximum_cache_size(struct cso_cache *sc, int number)
{
int i;
 
sc->max_size = number;
 
for (i = 0; i < CSO_CACHE_MAX; i++)
sanitize_hash(sc, sc->hashes[i], i, sc->max_size);
}
 
int cso_maximum_cache_size(const struct cso_cache *sc)
{
return sc->max_size;
}
 
void cso_cache_set_sanitize_callback(struct cso_cache *sc,
cso_sanitize_callback cb,
void *user_data)
{
sc->sanitize_cb = cb;
sc->sanitize_data = user_data;
}
 
/drivers/video/Gallium/auxiliary/cso_cache/cso_cache.h
0,0 → 1,175
/**************************************************************************
*
* Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/**
* @file
* Constant State Object (CSO) cache.
*
* The basic idea is that the states are created via the
* create_state/bind_state/delete_state semantics. The driver is expected to
* perform as much of the Gallium state translation to whatever its internal
* representation is during the create call. Gallium then has a caching
* mechanism where it stores the created states. When the pipeline needs an
* actual state change, a bind call is issued. In the bind call the driver
* gets its already translated representation.
*
* Those semantics mean that the driver doesn't do the repeated translations
* of states on every frame, but only once, when a new state is actually
* created.
*
* Even on hardware that doesn't do any kind of state cache, it makes the
* driver look a lot neater, plus it avoids all the redundant state
* translations on every frame.
*
* Currently our constant state objects are:
* - alpha test
* - blend
* - depth stencil
* - fragment shader
* - rasterizer (old setup)
* - sampler
* - vertex shader
* - vertex elements
*
* Things that are not constant state objects include:
* - blend_color
* - clip_state
* - clear_color_state
* - constant_buffer
* - feedback_state
* - framebuffer_state
* - polygon_stipple
* - scissor_state
* - texture_state
* - viewport_state
*
* @author Zack Rusin <zack@tungstengraphics.com>
*/
 
#ifndef CSO_CACHE_H
#define CSO_CACHE_H
 
#include "pipe/p_context.h"
#include "pipe/p_state.h"
 
/* cso_hash.h is necessary for cso_hash_iter, as MSVC requires structures
* returned by value to be fully defined */
#include "cso_hash.h"
 
 
#ifdef __cplusplus
extern "C" {
#endif
 
enum cso_cache_type {
CSO_RASTERIZER,
CSO_BLEND,
CSO_DEPTH_STENCIL_ALPHA,
CSO_SAMPLER,
CSO_VELEMENTS,
CSO_CACHE_MAX,
};
 
typedef void (*cso_state_callback)(void *ctx, void *obj);
 
typedef void (*cso_sanitize_callback)(struct cso_hash *hash,
enum cso_cache_type type,
int max_size,
void *user_data);
 
struct cso_cache;
 
struct cso_blend {
struct pipe_blend_state state;
void *data;
cso_state_callback delete_state;
struct pipe_context *context;
};
 
struct cso_depth_stencil_alpha {
struct pipe_depth_stencil_alpha_state state;
void *data;
cso_state_callback delete_state;
struct pipe_context *context;
};
 
struct cso_rasterizer {
struct pipe_rasterizer_state state;
void *data;
cso_state_callback delete_state;
struct pipe_context *context;
};
 
struct cso_sampler {
struct pipe_sampler_state state;
void *data;
cso_state_callback delete_state;
struct pipe_context *context;
};
 
struct cso_velems_state {
unsigned count;
struct pipe_vertex_element velems[PIPE_MAX_ATTRIBS];
};
 
struct cso_velements {
struct cso_velems_state state;
void *data;
cso_state_callback delete_state;
struct pipe_context *context;
};
 
unsigned cso_construct_key(void *item, int item_size);
 
struct cso_cache *cso_cache_create(void);
void cso_cache_delete(struct cso_cache *sc);
 
void cso_cache_set_sanitize_callback(struct cso_cache *sc,
cso_sanitize_callback cb,
void *user_data);
 
struct cso_hash_iter cso_insert_state(struct cso_cache *sc,
unsigned hash_key, enum cso_cache_type type,
void *state);
struct cso_hash_iter cso_find_state(struct cso_cache *sc,
unsigned hash_key, enum cso_cache_type type);
struct cso_hash_iter cso_find_state_template(struct cso_cache *sc,
unsigned hash_key, enum cso_cache_type type,
void *templ, unsigned size);
void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type,
cso_state_callback func, void *user_data);
void * cso_take_state(struct cso_cache *sc, unsigned hash_key,
enum cso_cache_type type);
 
void cso_set_maximum_cache_size(struct cso_cache *sc, int number);
int cso_maximum_cache_size(const struct cso_cache *sc);
 
#ifdef __cplusplus
}
#endif
 
#endif
/drivers/video/Gallium/auxiliary/cso_cache/cso_context.c
0,0 → 1,1431
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/**
* @file
*
* Wrap the cso cache & hash mechanisms in a simplified
* pipe-driver-specific interface.
*
* @author Zack Rusin <zack@tungstengraphics.com>
* @author Keith Whitwell <keith@tungstengraphics.com>
*/
 
#include "pipe/p_state.h"
#include "util/u_draw.h"
#include "util/u_framebuffer.h"
#include "util/u_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_vbuf.h"
#include "tgsi/tgsi_parse.h"
 
#include "cso_cache/cso_context.h"
#include "cso_cache/cso_cache.h"
#include "cso_cache/cso_hash.h"
#include "cso_context.h"
 
 
/**
* Info related to samplers and sampler views.
* We have one of these for fragment samplers and another for vertex samplers.
*/
struct sampler_info
{
struct {
void *samplers[PIPE_MAX_SAMPLERS];
unsigned nr_samplers;
} hw;
 
void *samplers[PIPE_MAX_SAMPLERS];
unsigned nr_samplers;
 
void *samplers_saved[PIPE_MAX_SAMPLERS];
unsigned nr_samplers_saved;
 
struct pipe_sampler_view *views[PIPE_MAX_SAMPLERS];
unsigned nr_views;
 
struct pipe_sampler_view *views_saved[PIPE_MAX_SAMPLERS];
unsigned nr_views_saved;
};
 
 
 
struct cso_context {
struct pipe_context *pipe;
struct cso_cache *cache;
struct u_vbuf *vbuf;
 
boolean has_geometry_shader;
boolean has_streamout;
 
struct sampler_info samplers[PIPE_SHADER_TYPES];
 
struct pipe_vertex_buffer aux_vertex_buffer_current;
struct pipe_vertex_buffer aux_vertex_buffer_saved;
unsigned aux_vertex_buffer_index;
 
struct pipe_constant_buffer aux_constbuf_current[PIPE_SHADER_TYPES];
struct pipe_constant_buffer aux_constbuf_saved[PIPE_SHADER_TYPES];
 
unsigned nr_so_targets;
struct pipe_stream_output_target *so_targets[PIPE_MAX_SO_BUFFERS];
 
unsigned nr_so_targets_saved;
struct pipe_stream_output_target *so_targets_saved[PIPE_MAX_SO_BUFFERS];
 
/** Current and saved state.
* The saved state is used as a 1-deep stack.
*/
void *blend, *blend_saved;
void *depth_stencil, *depth_stencil_saved;
void *rasterizer, *rasterizer_saved;
void *fragment_shader, *fragment_shader_saved;
void *vertex_shader, *vertex_shader_saved;
void *geometry_shader, *geometry_shader_saved;
void *velements, *velements_saved;
struct pipe_query *render_condition, *render_condition_saved;
uint render_condition_mode, render_condition_mode_saved;
boolean render_condition_cond, render_condition_cond_saved;
 
struct pipe_clip_state clip;
struct pipe_clip_state clip_saved;
 
struct pipe_framebuffer_state fb, fb_saved;
struct pipe_viewport_state vp, vp_saved;
struct pipe_blend_color blend_color;
unsigned sample_mask, sample_mask_saved;
struct pipe_stencil_ref stencil_ref, stencil_ref_saved;
};
 
 
static boolean delete_blend_state(struct cso_context *ctx, void *state)
{
struct cso_blend *cso = (struct cso_blend *)state;
 
if (ctx->blend == cso->data)
return FALSE;
 
if (cso->delete_state)
cso->delete_state(cso->context, cso->data);
FREE(state);
return TRUE;
}
 
static boolean delete_depth_stencil_state(struct cso_context *ctx, void *state)
{
struct cso_depth_stencil_alpha *cso =
(struct cso_depth_stencil_alpha *)state;
 
if (ctx->depth_stencil == cso->data)
return FALSE;
 
if (cso->delete_state)
cso->delete_state(cso->context, cso->data);
FREE(state);
 
return TRUE;
}
 
static boolean delete_sampler_state(struct cso_context *ctx, void *state)
{
struct cso_sampler *cso = (struct cso_sampler *)state;
if (cso->delete_state)
cso->delete_state(cso->context, cso->data);
FREE(state);
return TRUE;
}
 
static boolean delete_rasterizer_state(struct cso_context *ctx, void *state)
{
struct cso_rasterizer *cso = (struct cso_rasterizer *)state;
 
if (ctx->rasterizer == cso->data)
return FALSE;
if (cso->delete_state)
cso->delete_state(cso->context, cso->data);
FREE(state);
return TRUE;
}
 
static boolean delete_vertex_elements(struct cso_context *ctx,
void *state)
{
struct cso_velements *cso = (struct cso_velements *)state;
 
if (ctx->velements == cso->data)
return FALSE;
 
if (cso->delete_state)
cso->delete_state(cso->context, cso->data);
FREE(state);
return TRUE;
}
 
 
static INLINE boolean delete_cso(struct cso_context *ctx,
void *state, enum cso_cache_type type)
{
switch (type) {
case CSO_BLEND:
return delete_blend_state(ctx, state);
case CSO_SAMPLER:
return delete_sampler_state(ctx, state);
case CSO_DEPTH_STENCIL_ALPHA:
return delete_depth_stencil_state(ctx, state);
case CSO_RASTERIZER:
return delete_rasterizer_state(ctx, state);
case CSO_VELEMENTS:
return delete_vertex_elements(ctx, state);
default:
assert(0);
FREE(state);
}
return FALSE;
}
 
static INLINE void
sanitize_hash(struct cso_hash *hash, enum cso_cache_type type,
int max_size, void *user_data)
{
struct cso_context *ctx = (struct cso_context *)user_data;
/* if we're approach the maximum size, remove fourth of the entries
* otherwise every subsequent call will go through the same */
int hash_size = cso_hash_size(hash);
int max_entries = (max_size > hash_size) ? max_size : hash_size;
int to_remove = (max_size < max_entries) * max_entries/4;
struct cso_hash_iter iter = cso_hash_first_node(hash);
if (hash_size > max_size)
to_remove += hash_size - max_size;
while (to_remove) {
/*remove elements until we're good */
/*fixme: currently we pick the nodes to remove at random*/
void *cso = cso_hash_iter_data(iter);
if (delete_cso(ctx, cso, type)) {
iter = cso_hash_erase(hash, iter);
--to_remove;
} else
iter = cso_hash_iter_next(iter);
}
}
 
static void cso_init_vbuf(struct cso_context *cso)
{
struct u_vbuf_caps caps;
 
u_vbuf_get_caps(cso->pipe->screen, &caps);
 
/* Install u_vbuf if there is anything unsupported. */
if (!caps.buffer_offset_unaligned ||
!caps.buffer_stride_unaligned ||
!caps.velem_src_offset_unaligned ||
!caps.format_fixed32 ||
!caps.format_float16 ||
!caps.format_float64 ||
!caps.format_norm32 ||
!caps.format_scaled32 ||
!caps.user_vertex_buffers) {
cso->vbuf = u_vbuf_create(cso->pipe, &caps,
cso->aux_vertex_buffer_index);
}
}
 
struct cso_context *cso_create_context( struct pipe_context *pipe )
{
struct cso_context *ctx = CALLOC_STRUCT(cso_context);
if (ctx == NULL)
goto out;
 
ctx->cache = cso_cache_create();
if (ctx->cache == NULL)
goto out;
cso_cache_set_sanitize_callback(ctx->cache,
sanitize_hash,
ctx);
 
ctx->pipe = pipe;
ctx->sample_mask_saved = ~0;
 
ctx->aux_vertex_buffer_index = 0; /* 0 for now */
 
cso_init_vbuf(ctx);
 
/* Enable for testing: */
if (0) cso_set_maximum_cache_size( ctx->cache, 4 );
 
if (pipe->screen->get_shader_param(pipe->screen, PIPE_SHADER_GEOMETRY,
PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) {
ctx->has_geometry_shader = TRUE;
}
if (pipe->screen->get_param(pipe->screen,
PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS) != 0) {
ctx->has_streamout = TRUE;
}
 
return ctx;
 
out:
cso_destroy_context( ctx );
return NULL;
}
 
/**
* Prior to context destruction, this function unbinds all state objects.
*/
void cso_release_all( struct cso_context *ctx )
{
unsigned i, shader;
 
if (ctx->pipe) {
ctx->pipe->bind_blend_state( ctx->pipe, NULL );
ctx->pipe->bind_rasterizer_state( ctx->pipe, NULL );
ctx->pipe->bind_fragment_sampler_states( ctx->pipe, 0, NULL );
if (ctx->pipe->bind_vertex_sampler_states)
ctx->pipe->bind_vertex_sampler_states(ctx->pipe, 0, NULL);
ctx->pipe->bind_depth_stencil_alpha_state( ctx->pipe, NULL );
ctx->pipe->bind_fs_state( ctx->pipe, NULL );
ctx->pipe->bind_vs_state( ctx->pipe, NULL );
ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL );
ctx->pipe->set_fragment_sampler_views(ctx->pipe, 0, NULL);
if (ctx->pipe->set_vertex_sampler_views)
ctx->pipe->set_vertex_sampler_views(ctx->pipe, 0, NULL);
if (ctx->pipe->set_stream_output_targets)
ctx->pipe->set_stream_output_targets(ctx->pipe, 0, NULL, 0);
}
 
/* free fragment samplers, views */
for (shader = 0; shader < Elements(ctx->samplers); shader++) {
struct sampler_info *info = &ctx->samplers[shader];
for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
pipe_sampler_view_reference(&info->views[i], NULL);
pipe_sampler_view_reference(&info->views_saved[i], NULL);
}
}
 
util_unreference_framebuffer_state(&ctx->fb);
util_unreference_framebuffer_state(&ctx->fb_saved);
 
pipe_resource_reference(&ctx->aux_vertex_buffer_current.buffer, NULL);
pipe_resource_reference(&ctx->aux_vertex_buffer_saved.buffer, NULL);
 
for (i = 0; i < PIPE_SHADER_TYPES; i++) {
pipe_resource_reference(&ctx->aux_constbuf_current[i].buffer, NULL);
pipe_resource_reference(&ctx->aux_constbuf_saved[i].buffer, NULL);
}
 
for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
pipe_so_target_reference(&ctx->so_targets[i], NULL);
pipe_so_target_reference(&ctx->so_targets_saved[i], NULL);
}
 
if (ctx->cache) {
cso_cache_delete( ctx->cache );
ctx->cache = NULL;
}
}
 
 
/**
* Free the CSO context. NOTE: the state tracker should have previously called
* cso_release_all().
*/
void cso_destroy_context( struct cso_context *ctx )
{
if (ctx) {
if (ctx->vbuf)
u_vbuf_destroy(ctx->vbuf);
FREE( ctx );
}
}
 
 
/* Those function will either find the state of the given template
* in the cache or they will create a new state from the given
* template, insert it in the cache and return it.
*/
 
/*
* If the driver returns 0 from the create method then they will assign
* the data member of the cso to be the template itself.
*/
 
enum pipe_error cso_set_blend(struct cso_context *ctx,
const struct pipe_blend_state *templ)
{
unsigned key_size, hash_key;
struct cso_hash_iter iter;
void *handle;
 
key_size = templ->independent_blend_enable ?
sizeof(struct pipe_blend_state) :
(char *)&(templ->rt[1]) - (char *)templ;
hash_key = cso_construct_key((void*)templ, key_size);
iter = cso_find_state_template(ctx->cache, hash_key, CSO_BLEND,
(void*)templ, key_size);
 
if (cso_hash_iter_is_null(iter)) {
struct cso_blend *cso = MALLOC(sizeof(struct cso_blend));
if (!cso)
return PIPE_ERROR_OUT_OF_MEMORY;
 
memset(&cso->state, 0, sizeof cso->state);
memcpy(&cso->state, templ, key_size);
cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state);
cso->delete_state = (cso_state_callback)ctx->pipe->delete_blend_state;
cso->context = ctx->pipe;
 
iter = cso_insert_state(ctx->cache, hash_key, CSO_BLEND, cso);
if (cso_hash_iter_is_null(iter)) {
FREE(cso);
return PIPE_ERROR_OUT_OF_MEMORY;
}
 
handle = cso->data;
}
else {
handle = ((struct cso_blend *)cso_hash_iter_data(iter))->data;
}
 
if (ctx->blend != handle) {
ctx->blend = handle;
ctx->pipe->bind_blend_state(ctx->pipe, handle);
}
return PIPE_OK;
}
 
void cso_save_blend(struct cso_context *ctx)
{
assert(!ctx->blend_saved);
ctx->blend_saved = ctx->blend;
}
 
void cso_restore_blend(struct cso_context *ctx)
{
if (ctx->blend != ctx->blend_saved) {
ctx->blend = ctx->blend_saved;
ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend_saved);
}
ctx->blend_saved = NULL;
}
 
 
 
enum pipe_error
cso_set_depth_stencil_alpha(struct cso_context *ctx,
const struct pipe_depth_stencil_alpha_state *templ)
{
unsigned key_size = sizeof(struct pipe_depth_stencil_alpha_state);
unsigned hash_key = cso_construct_key((void*)templ, key_size);
struct cso_hash_iter iter = cso_find_state_template(ctx->cache,
hash_key,
CSO_DEPTH_STENCIL_ALPHA,
(void*)templ, key_size);
void *handle;
 
if (cso_hash_iter_is_null(iter)) {
struct cso_depth_stencil_alpha *cso =
MALLOC(sizeof(struct cso_depth_stencil_alpha));
if (!cso)
return PIPE_ERROR_OUT_OF_MEMORY;
 
memcpy(&cso->state, templ, sizeof(*templ));
cso->data = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe,
&cso->state);
cso->delete_state =
(cso_state_callback)ctx->pipe->delete_depth_stencil_alpha_state;
cso->context = ctx->pipe;
 
iter = cso_insert_state(ctx->cache, hash_key,
CSO_DEPTH_STENCIL_ALPHA, cso);
if (cso_hash_iter_is_null(iter)) {
FREE(cso);
return PIPE_ERROR_OUT_OF_MEMORY;
}
 
handle = cso->data;
}
else {
handle = ((struct cso_depth_stencil_alpha *)
cso_hash_iter_data(iter))->data;
}
 
if (ctx->depth_stencil != handle) {
ctx->depth_stencil = handle;
ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, handle);
}
return PIPE_OK;
}
 
void cso_save_depth_stencil_alpha(struct cso_context *ctx)
{
assert(!ctx->depth_stencil_saved);
ctx->depth_stencil_saved = ctx->depth_stencil;
}
 
void cso_restore_depth_stencil_alpha(struct cso_context *ctx)
{
if (ctx->depth_stencil != ctx->depth_stencil_saved) {
ctx->depth_stencil = ctx->depth_stencil_saved;
ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe,
ctx->depth_stencil_saved);
}
ctx->depth_stencil_saved = NULL;
}
 
 
 
enum pipe_error cso_set_rasterizer(struct cso_context *ctx,
const struct pipe_rasterizer_state *templ)
{
unsigned key_size = sizeof(struct pipe_rasterizer_state);
unsigned hash_key = cso_construct_key((void*)templ, key_size);
struct cso_hash_iter iter = cso_find_state_template(ctx->cache,
hash_key,
CSO_RASTERIZER,
(void*)templ, key_size);
void *handle = NULL;
 
if (cso_hash_iter_is_null(iter)) {
struct cso_rasterizer *cso = MALLOC(sizeof(struct cso_rasterizer));
if (!cso)
return PIPE_ERROR_OUT_OF_MEMORY;
 
memcpy(&cso->state, templ, sizeof(*templ));
cso->data = ctx->pipe->create_rasterizer_state(ctx->pipe, &cso->state);
cso->delete_state =
(cso_state_callback)ctx->pipe->delete_rasterizer_state;
cso->context = ctx->pipe;
 
iter = cso_insert_state(ctx->cache, hash_key, CSO_RASTERIZER, cso);
if (cso_hash_iter_is_null(iter)) {
FREE(cso);
return PIPE_ERROR_OUT_OF_MEMORY;
}
 
handle = cso->data;
}
else {
handle = ((struct cso_rasterizer *)cso_hash_iter_data(iter))->data;
}
 
if (ctx->rasterizer != handle) {
ctx->rasterizer = handle;
ctx->pipe->bind_rasterizer_state(ctx->pipe, handle);
}
return PIPE_OK;
}
 
void cso_save_rasterizer(struct cso_context *ctx)
{
assert(!ctx->rasterizer_saved);
ctx->rasterizer_saved = ctx->rasterizer;
}
 
void cso_restore_rasterizer(struct cso_context *ctx)
{
if (ctx->rasterizer != ctx->rasterizer_saved) {
ctx->rasterizer = ctx->rasterizer_saved;
ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rasterizer_saved);
}
ctx->rasterizer_saved = NULL;
}
 
 
void cso_set_fragment_shader_handle(struct cso_context *ctx, void *handle )
{
if (ctx->fragment_shader != handle) {
ctx->fragment_shader = handle;
ctx->pipe->bind_fs_state(ctx->pipe, handle);
}
}
 
void cso_delete_fragment_shader(struct cso_context *ctx, void *handle )
{
if (handle == ctx->fragment_shader) {
/* unbind before deleting */
ctx->pipe->bind_fs_state(ctx->pipe, NULL);
ctx->fragment_shader = NULL;
}
ctx->pipe->delete_fs_state(ctx->pipe, handle);
}
 
void cso_save_fragment_shader(struct cso_context *ctx)
{
assert(!ctx->fragment_shader_saved);
ctx->fragment_shader_saved = ctx->fragment_shader;
}
 
void cso_restore_fragment_shader(struct cso_context *ctx)
{
if (ctx->fragment_shader_saved != ctx->fragment_shader) {
ctx->pipe->bind_fs_state(ctx->pipe, ctx->fragment_shader_saved);
ctx->fragment_shader = ctx->fragment_shader_saved;
}
ctx->fragment_shader_saved = NULL;
}
 
 
void cso_set_vertex_shader_handle(struct cso_context *ctx, void *handle)
{
if (ctx->vertex_shader != handle) {
ctx->vertex_shader = handle;
ctx->pipe->bind_vs_state(ctx->pipe, handle);
}
}
 
void cso_delete_vertex_shader(struct cso_context *ctx, void *handle )
{
if (handle == ctx->vertex_shader) {
/* unbind before deleting */
ctx->pipe->bind_vs_state(ctx->pipe, NULL);
ctx->vertex_shader = NULL;
}
ctx->pipe->delete_vs_state(ctx->pipe, handle);
}
 
void cso_save_vertex_shader(struct cso_context *ctx)
{
assert(!ctx->vertex_shader_saved);
ctx->vertex_shader_saved = ctx->vertex_shader;
}
 
void cso_restore_vertex_shader(struct cso_context *ctx)
{
if (ctx->vertex_shader_saved != ctx->vertex_shader) {
ctx->pipe->bind_vs_state(ctx->pipe, ctx->vertex_shader_saved);
ctx->vertex_shader = ctx->vertex_shader_saved;
}
ctx->vertex_shader_saved = NULL;
}
 
 
void cso_set_framebuffer(struct cso_context *ctx,
const struct pipe_framebuffer_state *fb)
{
if (memcmp(&ctx->fb, fb, sizeof(*fb)) != 0) {
util_copy_framebuffer_state(&ctx->fb, fb);
ctx->pipe->set_framebuffer_state(ctx->pipe, fb);
}
}
 
void cso_save_framebuffer(struct cso_context *ctx)
{
util_copy_framebuffer_state(&ctx->fb_saved, &ctx->fb);
}
 
void cso_restore_framebuffer(struct cso_context *ctx)
{
if (memcmp(&ctx->fb, &ctx->fb_saved, sizeof(ctx->fb))) {
util_copy_framebuffer_state(&ctx->fb, &ctx->fb_saved);
ctx->pipe->set_framebuffer_state(ctx->pipe, &ctx->fb);
util_unreference_framebuffer_state(&ctx->fb_saved);
}
}
 
 
void cso_set_viewport(struct cso_context *ctx,
const struct pipe_viewport_state *vp)
{
if (memcmp(&ctx->vp, vp, sizeof(*vp))) {
ctx->vp = *vp;
ctx->pipe->set_viewport_states(ctx->pipe, 0, 1, vp);
}
}
 
void cso_save_viewport(struct cso_context *ctx)
{
ctx->vp_saved = ctx->vp;
}
 
 
void cso_restore_viewport(struct cso_context *ctx)
{
if (memcmp(&ctx->vp, &ctx->vp_saved, sizeof(ctx->vp))) {
ctx->vp = ctx->vp_saved;
ctx->pipe->set_viewport_states(ctx->pipe, 0, 1, &ctx->vp);
}
}
 
 
void cso_set_blend_color(struct cso_context *ctx,
const struct pipe_blend_color *bc)
{
if (memcmp(&ctx->blend_color, bc, sizeof(ctx->blend_color))) {
ctx->blend_color = *bc;
ctx->pipe->set_blend_color(ctx->pipe, bc);
}
}
 
void cso_set_sample_mask(struct cso_context *ctx, unsigned sample_mask)
{
if (ctx->sample_mask != sample_mask) {
ctx->sample_mask = sample_mask;
ctx->pipe->set_sample_mask(ctx->pipe, sample_mask);
}
}
 
void cso_save_sample_mask(struct cso_context *ctx)
{
ctx->sample_mask_saved = ctx->sample_mask;
}
 
void cso_restore_sample_mask(struct cso_context *ctx)
{
cso_set_sample_mask(ctx, ctx->sample_mask_saved);
}
 
void cso_set_stencil_ref(struct cso_context *ctx,
const struct pipe_stencil_ref *sr)
{
if (memcmp(&ctx->stencil_ref, sr, sizeof(ctx->stencil_ref))) {
ctx->stencil_ref = *sr;
ctx->pipe->set_stencil_ref(ctx->pipe, sr);
}
}
 
void cso_save_stencil_ref(struct cso_context *ctx)
{
ctx->stencil_ref_saved = ctx->stencil_ref;
}
 
 
void cso_restore_stencil_ref(struct cso_context *ctx)
{
if (memcmp(&ctx->stencil_ref, &ctx->stencil_ref_saved,
sizeof(ctx->stencil_ref))) {
ctx->stencil_ref = ctx->stencil_ref_saved;
ctx->pipe->set_stencil_ref(ctx->pipe, &ctx->stencil_ref);
}
}
 
void cso_set_render_condition(struct cso_context *ctx,
struct pipe_query *query,
boolean condition, uint mode)
{
struct pipe_context *pipe = ctx->pipe;
 
if (ctx->render_condition != query ||
ctx->render_condition_mode != mode ||
ctx->render_condition_cond != condition) {
pipe->render_condition(pipe, query, condition, mode);
ctx->render_condition = query;
ctx->render_condition_cond = condition;
ctx->render_condition_mode = mode;
}
}
 
void cso_save_render_condition(struct cso_context *ctx)
{
ctx->render_condition_saved = ctx->render_condition;
ctx->render_condition_cond_saved = ctx->render_condition_cond;
ctx->render_condition_mode_saved = ctx->render_condition_mode;
}
 
void cso_restore_render_condition(struct cso_context *ctx)
{
cso_set_render_condition(ctx, ctx->render_condition_saved,
ctx->render_condition_cond_saved,
ctx->render_condition_mode_saved);
}
 
void cso_set_geometry_shader_handle(struct cso_context *ctx, void *handle)
{
assert(ctx->has_geometry_shader || !handle);
 
if (ctx->has_geometry_shader && ctx->geometry_shader != handle) {
ctx->geometry_shader = handle;
ctx->pipe->bind_gs_state(ctx->pipe, handle);
}
}
 
void cso_delete_geometry_shader(struct cso_context *ctx, void *handle)
{
if (handle == ctx->geometry_shader) {
/* unbind before deleting */
ctx->pipe->bind_gs_state(ctx->pipe, NULL);
ctx->geometry_shader = NULL;
}
ctx->pipe->delete_gs_state(ctx->pipe, handle);
}
 
void cso_save_geometry_shader(struct cso_context *ctx)
{
if (!ctx->has_geometry_shader) {
return;
}
 
assert(!ctx->geometry_shader_saved);
ctx->geometry_shader_saved = ctx->geometry_shader;
}
 
void cso_restore_geometry_shader(struct cso_context *ctx)
{
if (!ctx->has_geometry_shader) {
return;
}
 
if (ctx->geometry_shader_saved != ctx->geometry_shader) {
ctx->pipe->bind_gs_state(ctx->pipe, ctx->geometry_shader_saved);
ctx->geometry_shader = ctx->geometry_shader_saved;
}
ctx->geometry_shader_saved = NULL;
}
 
/* clip state */
 
static INLINE void
clip_state_cpy(struct pipe_clip_state *dst,
const struct pipe_clip_state *src)
{
memcpy(dst->ucp, src->ucp, sizeof(dst->ucp));
}
 
static INLINE int
clip_state_cmp(const struct pipe_clip_state *a,
const struct pipe_clip_state *b)
{
return memcmp(a->ucp, b->ucp, sizeof(a->ucp));
}
 
void
cso_set_clip(struct cso_context *ctx,
const struct pipe_clip_state *clip)
{
if (clip_state_cmp(&ctx->clip, clip)) {
clip_state_cpy(&ctx->clip, clip);
ctx->pipe->set_clip_state(ctx->pipe, clip);
}
}
 
void
cso_save_clip(struct cso_context *ctx)
{
clip_state_cpy(&ctx->clip_saved, &ctx->clip);
}
 
void
cso_restore_clip(struct cso_context *ctx)
{
if (clip_state_cmp(&ctx->clip, &ctx->clip_saved)) {
clip_state_cpy(&ctx->clip, &ctx->clip_saved);
ctx->pipe->set_clip_state(ctx->pipe, &ctx->clip_saved);
}
}
 
enum pipe_error
cso_set_vertex_elements(struct cso_context *ctx,
unsigned count,
const struct pipe_vertex_element *states)
{
struct u_vbuf *vbuf = ctx->vbuf;
unsigned key_size, hash_key;
struct cso_hash_iter iter;
void *handle;
struct cso_velems_state velems_state;
 
if (vbuf) {
u_vbuf_set_vertex_elements(vbuf, count, states);
return PIPE_OK;
}
 
/* Need to include the count into the stored state data too.
* Otherwise first few count pipe_vertex_elements could be identical
* even if count is different, and there's no guarantee the hash would
* be different in that case neither.
*/
key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned);
velems_state.count = count;
memcpy(velems_state.velems, states,
sizeof(struct pipe_vertex_element) * count);
hash_key = cso_construct_key((void*)&velems_state, key_size);
iter = cso_find_state_template(ctx->cache, hash_key, CSO_VELEMENTS,
(void*)&velems_state, key_size);
 
if (cso_hash_iter_is_null(iter)) {
struct cso_velements *cso = MALLOC(sizeof(struct cso_velements));
if (!cso)
return PIPE_ERROR_OUT_OF_MEMORY;
 
memcpy(&cso->state, &velems_state, key_size);
cso->data = ctx->pipe->create_vertex_elements_state(ctx->pipe, count,
&cso->state.velems[0]);
cso->delete_state =
(cso_state_callback) ctx->pipe->delete_vertex_elements_state;
cso->context = ctx->pipe;
 
iter = cso_insert_state(ctx->cache, hash_key, CSO_VELEMENTS, cso);
if (cso_hash_iter_is_null(iter)) {
FREE(cso);
return PIPE_ERROR_OUT_OF_MEMORY;
}
 
handle = cso->data;
}
else {
handle = ((struct cso_velements *)cso_hash_iter_data(iter))->data;
}
 
if (ctx->velements != handle) {
ctx->velements = handle;
ctx->pipe->bind_vertex_elements_state(ctx->pipe, handle);
}
return PIPE_OK;
}
 
void cso_save_vertex_elements(struct cso_context *ctx)
{
struct u_vbuf *vbuf = ctx->vbuf;
 
if (vbuf) {
u_vbuf_save_vertex_elements(vbuf);
return;
}
 
assert(!ctx->velements_saved);
ctx->velements_saved = ctx->velements;
}
 
void cso_restore_vertex_elements(struct cso_context *ctx)
{
struct u_vbuf *vbuf = ctx->vbuf;
 
if (vbuf) {
u_vbuf_restore_vertex_elements(vbuf);
return;
}
 
if (ctx->velements != ctx->velements_saved) {
ctx->velements = ctx->velements_saved;
ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->velements_saved);
}
ctx->velements_saved = NULL;
}
 
/* vertex buffers */
 
void cso_set_vertex_buffers(struct cso_context *ctx,
unsigned start_slot, unsigned count,
const struct pipe_vertex_buffer *buffers)
{
struct u_vbuf *vbuf = ctx->vbuf;
 
if (vbuf) {
u_vbuf_set_vertex_buffers(vbuf, start_slot, count, buffers);
return;
}
 
/* Save what's in the auxiliary slot, so that we can save and restore it
* for meta ops. */
if (start_slot <= ctx->aux_vertex_buffer_index &&
start_slot+count > ctx->aux_vertex_buffer_index) {
if (buffers) {
const struct pipe_vertex_buffer *vb =
buffers + (ctx->aux_vertex_buffer_index - start_slot);
 
pipe_resource_reference(&ctx->aux_vertex_buffer_current.buffer,
vb->buffer);
memcpy(&ctx->aux_vertex_buffer_current, vb,
sizeof(struct pipe_vertex_buffer));
}
else {
pipe_resource_reference(&ctx->aux_vertex_buffer_current.buffer,
NULL);
ctx->aux_vertex_buffer_current.user_buffer = NULL;
}
}
 
ctx->pipe->set_vertex_buffers(ctx->pipe, start_slot, count, buffers);
}
 
void cso_save_aux_vertex_buffer_slot(struct cso_context *ctx)
{
struct u_vbuf *vbuf = ctx->vbuf;
 
if (vbuf) {
u_vbuf_save_aux_vertex_buffer_slot(vbuf);
return;
}
 
pipe_resource_reference(&ctx->aux_vertex_buffer_saved.buffer,
ctx->aux_vertex_buffer_current.buffer);
memcpy(&ctx->aux_vertex_buffer_saved, &ctx->aux_vertex_buffer_current,
sizeof(struct pipe_vertex_buffer));
}
 
void cso_restore_aux_vertex_buffer_slot(struct cso_context *ctx)
{
struct u_vbuf *vbuf = ctx->vbuf;
 
if (vbuf) {
u_vbuf_restore_aux_vertex_buffer_slot(vbuf);
return;
}
 
cso_set_vertex_buffers(ctx, ctx->aux_vertex_buffer_index, 1,
&ctx->aux_vertex_buffer_saved);
pipe_resource_reference(&ctx->aux_vertex_buffer_saved.buffer, NULL);
}
 
unsigned cso_get_aux_vertex_buffer_slot(struct cso_context *ctx)
{
return ctx->aux_vertex_buffer_index;
}
 
 
/**************** fragment/vertex sampler view state *************************/
 
static enum pipe_error
single_sampler(struct cso_context *ctx,
struct sampler_info *info,
unsigned idx,
const struct pipe_sampler_state *templ)
{
void *handle = NULL;
 
if (templ != NULL) {
unsigned key_size = sizeof(struct pipe_sampler_state);
unsigned hash_key = cso_construct_key((void*)templ, key_size);
struct cso_hash_iter iter =
cso_find_state_template(ctx->cache,
hash_key, CSO_SAMPLER,
(void *) templ, key_size);
 
if (cso_hash_iter_is_null(iter)) {
struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler));
if (!cso)
return PIPE_ERROR_OUT_OF_MEMORY;
 
memcpy(&cso->state, templ, sizeof(*templ));
cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state);
cso->delete_state =
(cso_state_callback) ctx->pipe->delete_sampler_state;
cso->context = ctx->pipe;
 
iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso);
if (cso_hash_iter_is_null(iter)) {
FREE(cso);
return PIPE_ERROR_OUT_OF_MEMORY;
}
 
handle = cso->data;
}
else {
handle = ((struct cso_sampler *)cso_hash_iter_data(iter))->data;
}
}
 
info->samplers[idx] = handle;
 
return PIPE_OK;
}
 
enum pipe_error
cso_single_sampler(struct cso_context *ctx,
unsigned shader_stage,
unsigned idx,
const struct pipe_sampler_state *templ)
{
return single_sampler(ctx, &ctx->samplers[shader_stage], idx, templ);
}
 
 
 
static void
single_sampler_done(struct cso_context *ctx, unsigned shader_stage)
{
struct sampler_info *info = &ctx->samplers[shader_stage];
unsigned i;
 
/* find highest non-null sampler */
for (i = PIPE_MAX_SAMPLERS; i > 0; i--) {
if (info->samplers[i - 1] != NULL)
break;
}
 
info->nr_samplers = i;
 
if (info->hw.nr_samplers != info->nr_samplers ||
memcmp(info->hw.samplers,
info->samplers,
info->nr_samplers * sizeof(void *)) != 0)
{
memcpy(info->hw.samplers,
info->samplers,
info->nr_samplers * sizeof(void *));
info->hw.nr_samplers = info->nr_samplers;
 
switch (shader_stage) {
case PIPE_SHADER_FRAGMENT:
ctx->pipe->bind_fragment_sampler_states(ctx->pipe,
info->nr_samplers,
info->samplers);
break;
case PIPE_SHADER_VERTEX:
ctx->pipe->bind_vertex_sampler_states(ctx->pipe,
info->nr_samplers,
info->samplers);
break;
case PIPE_SHADER_GEOMETRY:
ctx->pipe->bind_geometry_sampler_states(ctx->pipe,
info->nr_samplers,
info->samplers);
break;
default:
assert(!"bad shader type in single_sampler_done()");
}
}
}
 
void
cso_single_sampler_done(struct cso_context *ctx, unsigned shader_stage)
{
single_sampler_done(ctx, shader_stage);
}
 
 
/*
* If the function encouters any errors it will return the
* last one. Done to always try to set as many samplers
* as possible.
*/
enum pipe_error
cso_set_samplers(struct cso_context *ctx,
unsigned shader_stage,
unsigned nr,
const struct pipe_sampler_state **templates)
{
struct sampler_info *info = &ctx->samplers[shader_stage];
unsigned i;
enum pipe_error temp, error = PIPE_OK;
 
/* TODO: fastpath
*/
 
for (i = 0; i < nr; i++) {
temp = single_sampler(ctx, info, i, templates[i]);
if (temp != PIPE_OK)
error = temp;
}
 
for ( ; i < info->nr_samplers; i++) {
temp = single_sampler(ctx, info, i, NULL);
if (temp != PIPE_OK)
error = temp;
}
 
single_sampler_done(ctx, shader_stage);
 
return error;
}
 
void
cso_save_samplers(struct cso_context *ctx, unsigned shader_stage)
{
struct sampler_info *info = &ctx->samplers[shader_stage];
info->nr_samplers_saved = info->nr_samplers;
memcpy(info->samplers_saved, info->samplers, sizeof(info->samplers));
}
 
 
void
cso_restore_samplers(struct cso_context *ctx, unsigned shader_stage)
{
struct sampler_info *info = &ctx->samplers[shader_stage];
info->nr_samplers = info->nr_samplers_saved;
memcpy(info->samplers, info->samplers_saved, sizeof(info->samplers));
single_sampler_done(ctx, shader_stage);
}
 
 
void
cso_set_sampler_views(struct cso_context *ctx,
unsigned shader_stage,
unsigned count,
struct pipe_sampler_view **views)
{
struct sampler_info *info = &ctx->samplers[shader_stage];
unsigned i;
 
/* reference new views */
for (i = 0; i < count; i++) {
pipe_sampler_view_reference(&info->views[i], views[i]);
}
/* unref extra old views, if any */
for (; i < info->nr_views; i++) {
pipe_sampler_view_reference(&info->views[i], NULL);
}
 
info->nr_views = count;
 
/* bind the new sampler views */
switch (shader_stage) {
case PIPE_SHADER_FRAGMENT:
ctx->pipe->set_fragment_sampler_views(ctx->pipe, count, info->views);
break;
case PIPE_SHADER_VERTEX:
ctx->pipe->set_vertex_sampler_views(ctx->pipe, count, info->views);
break;
case PIPE_SHADER_GEOMETRY:
ctx->pipe->set_geometry_sampler_views(ctx->pipe, count, info->views);
break;
default:
assert(!"bad shader type in cso_set_sampler_views()");
}
}
 
 
void
cso_save_sampler_views(struct cso_context *ctx, unsigned shader_stage)
{
struct sampler_info *info = &ctx->samplers[shader_stage];
unsigned i;
 
info->nr_views_saved = info->nr_views;
 
for (i = 0; i < info->nr_views; i++) {
assert(!info->views_saved[i]);
pipe_sampler_view_reference(&info->views_saved[i], info->views[i]);
}
}
 
 
void
cso_restore_sampler_views(struct cso_context *ctx, unsigned shader_stage)
{
struct sampler_info *info = &ctx->samplers[shader_stage];
unsigned i, nr_saved = info->nr_views_saved;
 
for (i = 0; i < nr_saved; i++) {
pipe_sampler_view_reference(&info->views[i], NULL);
/* move the reference from one pointer to another */
info->views[i] = info->views_saved[i];
info->views_saved[i] = NULL;
}
for (; i < info->nr_views; i++) {
pipe_sampler_view_reference(&info->views[i], NULL);
}
 
/* bind the old/saved sampler views */
switch (shader_stage) {
case PIPE_SHADER_FRAGMENT:
ctx->pipe->set_fragment_sampler_views(ctx->pipe, nr_saved, info->views);
break;
case PIPE_SHADER_VERTEX:
ctx->pipe->set_vertex_sampler_views(ctx->pipe, nr_saved, info->views);
break;
case PIPE_SHADER_GEOMETRY:
ctx->pipe->set_geometry_sampler_views(ctx->pipe, nr_saved, info->views);
break;
default:
assert(!"bad shader type in cso_restore_sampler_views()");
}
 
info->nr_views = nr_saved;
info->nr_views_saved = 0;
}
 
 
void
cso_set_stream_outputs(struct cso_context *ctx,
unsigned num_targets,
struct pipe_stream_output_target **targets,
unsigned append_bitmask)
{
struct pipe_context *pipe = ctx->pipe;
uint i;
 
if (!ctx->has_streamout) {
assert(num_targets == 0);
return;
}
 
if (ctx->nr_so_targets == 0 && num_targets == 0) {
/* Nothing to do. */
return;
}
 
/* reference new targets */
for (i = 0; i < num_targets; i++) {
pipe_so_target_reference(&ctx->so_targets[i], targets[i]);
}
/* unref extra old targets, if any */
for (; i < ctx->nr_so_targets; i++) {
pipe_so_target_reference(&ctx->so_targets[i], NULL);
}
 
pipe->set_stream_output_targets(pipe, num_targets, targets,
append_bitmask);
ctx->nr_so_targets = num_targets;
}
 
void
cso_save_stream_outputs(struct cso_context *ctx)
{
uint i;
 
if (!ctx->has_streamout) {
return;
}
 
ctx->nr_so_targets_saved = ctx->nr_so_targets;
 
for (i = 0; i < ctx->nr_so_targets; i++) {
assert(!ctx->so_targets_saved[i]);
pipe_so_target_reference(&ctx->so_targets_saved[i], ctx->so_targets[i]);
}
}
 
void
cso_restore_stream_outputs(struct cso_context *ctx)
{
struct pipe_context *pipe = ctx->pipe;
uint i;
 
if (!ctx->has_streamout) {
return;
}
 
if (ctx->nr_so_targets == 0 && ctx->nr_so_targets_saved == 0) {
/* Nothing to do. */
return;
}
 
for (i = 0; i < ctx->nr_so_targets_saved; i++) {
pipe_so_target_reference(&ctx->so_targets[i], NULL);
/* move the reference from one pointer to another */
ctx->so_targets[i] = ctx->so_targets_saved[i];
ctx->so_targets_saved[i] = NULL;
}
for (; i < ctx->nr_so_targets; i++) {
pipe_so_target_reference(&ctx->so_targets[i], NULL);
}
 
/* ~0 means append */
pipe->set_stream_output_targets(pipe, ctx->nr_so_targets_saved,
ctx->so_targets, ~0);
 
ctx->nr_so_targets = ctx->nr_so_targets_saved;
ctx->nr_so_targets_saved = 0;
}
 
/* constant buffers */
 
void
cso_set_constant_buffer(struct cso_context *cso, unsigned shader_stage,
unsigned index, struct pipe_constant_buffer *cb)
{
struct pipe_context *pipe = cso->pipe;
 
pipe->set_constant_buffer(pipe, shader_stage, index, cb);
 
if (index == 0) {
util_copy_constant_buffer(&cso->aux_constbuf_current[shader_stage], cb);
}
}
 
void
cso_set_constant_buffer_resource(struct cso_context *cso,
unsigned shader_stage,
unsigned index,
struct pipe_resource *buffer)
{
if (buffer) {
struct pipe_constant_buffer cb;
cb.buffer = buffer;
cb.buffer_offset = 0;
cb.buffer_size = buffer->width0;
cb.user_buffer = NULL;
cso_set_constant_buffer(cso, shader_stage, index, &cb);
} else {
cso_set_constant_buffer(cso, shader_stage, index, NULL);
}
}
 
void
cso_save_constant_buffer_slot0(struct cso_context *cso,
unsigned shader_stage)
{
util_copy_constant_buffer(&cso->aux_constbuf_saved[shader_stage],
&cso->aux_constbuf_current[shader_stage]);
}
 
void
cso_restore_constant_buffer_slot0(struct cso_context *cso,
unsigned shader_stage)
{
cso_set_constant_buffer(cso, shader_stage, 0,
&cso->aux_constbuf_saved[shader_stage]);
pipe_resource_reference(&cso->aux_constbuf_saved[shader_stage].buffer,
NULL);
}
 
/* drawing */
 
void
cso_set_index_buffer(struct cso_context *cso,
const struct pipe_index_buffer *ib)
{
struct u_vbuf *vbuf = cso->vbuf;
 
if (vbuf) {
u_vbuf_set_index_buffer(vbuf, ib);
} else {
struct pipe_context *pipe = cso->pipe;
pipe->set_index_buffer(pipe, ib);
}
}
 
void
cso_draw_vbo(struct cso_context *cso,
const struct pipe_draw_info *info)
{
struct u_vbuf *vbuf = cso->vbuf;
 
if (vbuf) {
u_vbuf_draw_vbo(vbuf, info);
} else {
struct pipe_context *pipe = cso->pipe;
pipe->draw_vbo(pipe, info);
}
}
 
void
cso_draw_arrays(struct cso_context *cso, uint mode, uint start, uint count)
{
struct pipe_draw_info info;
 
util_draw_init_info(&info);
 
info.mode = mode;
info.start = start;
info.count = count;
info.min_index = start;
info.max_index = start + count - 1;
 
cso_draw_vbo(cso, &info);
}
/drivers/video/Gallium/auxiliary/cso_cache/cso_context.h
0,0 → 1,239
/**************************************************************************
*
* Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
 
#ifndef CSO_CONTEXT_H
#define CSO_CONTEXT_H
 
#include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
 
 
#ifdef __cplusplus
extern "C" {
#endif
 
struct cso_context;
struct u_vbuf;
 
struct cso_context *cso_create_context( struct pipe_context *pipe );
 
void cso_release_all( struct cso_context *ctx );
 
void cso_destroy_context( struct cso_context *cso );
 
 
 
enum pipe_error cso_set_blend( struct cso_context *cso,
const struct pipe_blend_state *blend );
void cso_save_blend(struct cso_context *cso);
void cso_restore_blend(struct cso_context *cso);
 
 
 
enum pipe_error cso_set_depth_stencil_alpha( struct cso_context *cso,
const struct pipe_depth_stencil_alpha_state *dsa );
void cso_save_depth_stencil_alpha(struct cso_context *cso);
void cso_restore_depth_stencil_alpha(struct cso_context *cso);
 
 
 
enum pipe_error cso_set_rasterizer( struct cso_context *cso,
const struct pipe_rasterizer_state *rasterizer );
void cso_save_rasterizer(struct cso_context *cso);
void cso_restore_rasterizer(struct cso_context *cso);
 
 
enum pipe_error
cso_set_samplers(struct cso_context *cso,
unsigned shader_stage,
unsigned count,
const struct pipe_sampler_state **states);
 
void
cso_save_samplers(struct cso_context *cso, unsigned shader_stage);
 
void
cso_restore_samplers(struct cso_context *cso, unsigned shader_stage);
 
/* Alternate interface to support state trackers that like to modify
* samplers one at a time:
*/
enum pipe_error
cso_single_sampler(struct cso_context *cso,
unsigned shader_stage,
unsigned count,
const struct pipe_sampler_state *states);
 
void
cso_single_sampler_done(struct cso_context *cso, unsigned shader_stage);
 
 
enum pipe_error cso_set_vertex_elements(struct cso_context *ctx,
unsigned count,
const struct pipe_vertex_element *states);
void cso_save_vertex_elements(struct cso_context *ctx);
void cso_restore_vertex_elements(struct cso_context *ctx);
 
 
void cso_set_vertex_buffers(struct cso_context *ctx,
unsigned start_slot, unsigned count,
const struct pipe_vertex_buffer *buffers);
 
/* One vertex buffer slot is provided with the save/restore functionality.
* cso_context chooses the slot, it can be non-zero. */
void cso_save_aux_vertex_buffer_slot(struct cso_context *ctx);
void cso_restore_aux_vertex_buffer_slot(struct cso_context *ctx);
unsigned cso_get_aux_vertex_buffer_slot(struct cso_context *ctx);
 
 
void cso_set_stream_outputs(struct cso_context *ctx,
unsigned num_targets,
struct pipe_stream_output_target **targets,
unsigned append_bitmask);
void cso_save_stream_outputs(struct cso_context *ctx);
void cso_restore_stream_outputs(struct cso_context *ctx);
 
 
/*
* We don't provide shader caching in CSO. Most of the time the api provides
* object semantics for shaders anyway, and the cases where it doesn't
* (eg mesa's internally-generated texenv programs), it will be up to
* the state tracker to implement their own specialized caching.
*/
 
void cso_set_fragment_shader_handle(struct cso_context *ctx, void *handle);
void cso_delete_fragment_shader(struct cso_context *ctx, void *handle );
void cso_save_fragment_shader(struct cso_context *cso);
void cso_restore_fragment_shader(struct cso_context *cso);
 
 
void cso_set_vertex_shader_handle(struct cso_context *ctx, void *handle);
void cso_delete_vertex_shader(struct cso_context *ctx, void *handle );
void cso_save_vertex_shader(struct cso_context *cso);
void cso_restore_vertex_shader(struct cso_context *cso);
 
 
void cso_set_geometry_shader_handle(struct cso_context *ctx, void *handle);
void cso_delete_geometry_shader(struct cso_context *ctx, void *handle);
void cso_save_geometry_shader(struct cso_context *cso);
void cso_restore_geometry_shader(struct cso_context *cso);
 
 
void cso_set_framebuffer(struct cso_context *cso,
const struct pipe_framebuffer_state *fb);
void cso_save_framebuffer(struct cso_context *cso);
void cso_restore_framebuffer(struct cso_context *cso);
 
 
void cso_set_viewport(struct cso_context *cso,
const struct pipe_viewport_state *vp);
void cso_save_viewport(struct cso_context *cso);
void cso_restore_viewport(struct cso_context *cso);
 
 
void cso_set_blend_color(struct cso_context *cso,
const struct pipe_blend_color *bc);
 
void cso_set_sample_mask(struct cso_context *cso, unsigned stencil_mask);
void cso_save_sample_mask(struct cso_context *ctx);
void cso_restore_sample_mask(struct cso_context *ctx);
 
void cso_set_stencil_ref(struct cso_context *cso,
const struct pipe_stencil_ref *sr);
void cso_save_stencil_ref(struct cso_context *cso);
void cso_restore_stencil_ref(struct cso_context *cso);
 
void cso_set_render_condition(struct cso_context *cso,
struct pipe_query *query,
boolean condition, uint mode);
void cso_save_render_condition(struct cso_context *cso);
void cso_restore_render_condition(struct cso_context *cso);
 
 
/* clip state */
 
void
cso_set_clip(struct cso_context *cso,
const struct pipe_clip_state *clip);
 
void
cso_save_clip(struct cso_context *cso);
 
void
cso_restore_clip(struct cso_context *cso);
 
 
/* sampler view state */
 
void
cso_set_sampler_views(struct cso_context *cso,
unsigned shader_stage,
unsigned count,
struct pipe_sampler_view **views);
 
void
cso_save_sampler_views(struct cso_context *cso, unsigned shader_stage);
 
void
cso_restore_sampler_views(struct cso_context *cso, unsigned shader_stage);
 
 
/* constant buffers */
 
void cso_set_constant_buffer(struct cso_context *cso, unsigned shader_stage,
unsigned index, struct pipe_constant_buffer *cb);
void cso_set_constant_buffer_resource(struct cso_context *cso,
unsigned shader_stage,
unsigned index,
struct pipe_resource *buffer);
void cso_save_constant_buffer_slot0(struct cso_context *cso,
unsigned shader_stage);
void cso_restore_constant_buffer_slot0(struct cso_context *cso,
unsigned shader_stage);
 
 
/* drawing */
 
void
cso_set_index_buffer(struct cso_context *cso,
const struct pipe_index_buffer *ib);
 
void
cso_draw_vbo(struct cso_context *cso,
const struct pipe_draw_info *info);
 
/* helper drawing function */
void
cso_draw_arrays(struct cso_context *cso, uint mode, uint start, uint count);
 
#ifdef __cplusplus
}
#endif
 
#endif
/drivers/video/Gallium/auxiliary/cso_cache/cso_hash.c
0,0 → 1,439
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/*
* Authors:
* Zack Rusin <zack@tungstengraphics.com>
*/
 
#include "util/u_debug.h"
#include "util/u_memory.h"
 
#include "cso_hash.h"
 
#define MAX(a, b) ((a > b) ? (a) : (b))
 
static const int MinNumBits = 4;
 
static const unsigned char prime_deltas[] = {
0, 0, 1, 3, 1, 5, 3, 3, 1, 9, 7, 5, 3, 9, 25, 3,
1, 21, 3, 21, 7, 15, 9, 5, 3, 29, 15, 0, 0, 0, 0, 0
};
 
static int primeForNumBits(int numBits)
{
return (1 << numBits) + prime_deltas[numBits];
}
 
/*
Returns the smallest integer n such that
primeForNumBits(n) >= hint.
*/
static int countBits(int hint)
{
int numBits = 0;
int bits = hint;
 
while (bits > 1) {
bits >>= 1;
numBits++;
}
 
if (numBits >= (int)sizeof(prime_deltas)) {
numBits = sizeof(prime_deltas) - 1;
} else if (primeForNumBits(numBits) < hint) {
++numBits;
}
return numBits;
}
 
struct cso_node {
struct cso_node *next;
unsigned key;
void *value;
};
 
struct cso_hash_data {
struct cso_node *fakeNext;
struct cso_node **buckets;
int size;
int nodeSize;
short userNumBits;
short numBits;
int numBuckets;
};
 
struct cso_hash {
union {
struct cso_hash_data *d;
struct cso_node *e;
} data;
};
 
static void *cso_data_allocate_node(struct cso_hash_data *hash)
{
return MALLOC(hash->nodeSize);
}
 
static void cso_free_node(struct cso_node *node)
{
FREE(node);
}
 
static struct cso_node *
cso_hash_create_node(struct cso_hash *hash,
unsigned akey, void *avalue,
struct cso_node **anextNode)
{
struct cso_node *node = cso_data_allocate_node(hash->data.d);
 
if (!node)
return NULL;
 
node->key = akey;
node->value = avalue;
 
node->next = (struct cso_node*)(*anextNode);
*anextNode = node;
++hash->data.d->size;
return node;
}
 
static void cso_data_rehash(struct cso_hash_data *hash, int hint)
{
if (hint < 0) {
hint = countBits(-hint);
if (hint < MinNumBits)
hint = MinNumBits;
hash->userNumBits = (short)hint;
while (primeForNumBits(hint) < (hash->size >> 1))
++hint;
} else if (hint < MinNumBits) {
hint = MinNumBits;
}
 
if (hash->numBits != hint) {
struct cso_node *e = (struct cso_node *)(hash);
struct cso_node **oldBuckets = hash->buckets;
int oldNumBuckets = hash->numBuckets;
int i = 0;
 
hash->numBits = (short)hint;
hash->numBuckets = primeForNumBits(hint);
hash->buckets = MALLOC(sizeof(struct cso_node*) * hash->numBuckets);
for (i = 0; i < hash->numBuckets; ++i)
hash->buckets[i] = e;
 
for (i = 0; i < oldNumBuckets; ++i) {
struct cso_node *firstNode = oldBuckets[i];
while (firstNode != e) {
unsigned h = firstNode->key;
struct cso_node *lastNode = firstNode;
struct cso_node *afterLastNode;
struct cso_node **beforeFirstNode;
while (lastNode->next != e && lastNode->next->key == h)
lastNode = lastNode->next;
 
afterLastNode = lastNode->next;
beforeFirstNode = &hash->buckets[h % hash->numBuckets];
while (*beforeFirstNode != e)
beforeFirstNode = &(*beforeFirstNode)->next;
lastNode->next = *beforeFirstNode;
*beforeFirstNode = firstNode;
firstNode = afterLastNode;
}
}
FREE(oldBuckets);
}
}
 
static void cso_data_might_grow(struct cso_hash_data *hash)
{
if (hash->size >= hash->numBuckets)
cso_data_rehash(hash, hash->numBits + 1);
}
 
static void cso_data_has_shrunk(struct cso_hash_data *hash)
{
if (hash->size <= (hash->numBuckets >> 3) &&
hash->numBits > hash->userNumBits) {
int max = MAX(hash->numBits-2, hash->userNumBits);
cso_data_rehash(hash, max);
}
}
 
static struct cso_node *cso_data_first_node(struct cso_hash_data *hash)
{
struct cso_node *e = (struct cso_node *)(hash);
struct cso_node **bucket = hash->buckets;
int n = hash->numBuckets;
while (n--) {
if (*bucket != e)
return *bucket;
++bucket;
}
return e;
}
 
static struct cso_node **cso_hash_find_node(struct cso_hash *hash, unsigned akey)
{
struct cso_node **node;
 
if (hash->data.d->numBuckets) {
node = (struct cso_node **)(&hash->data.d->buckets[akey % hash->data.d->numBuckets]);
assert(*node == hash->data.e || (*node)->next);
while (*node != hash->data.e && (*node)->key != akey)
node = &(*node)->next;
} else {
node = (struct cso_node **)((const struct cso_node * const *)(&hash->data.e));
}
return node;
}
 
struct cso_hash_iter cso_hash_insert(struct cso_hash *hash,
unsigned key, void *data)
{
cso_data_might_grow(hash->data.d);
 
{
struct cso_node **nextNode = cso_hash_find_node(hash, key);
struct cso_node *node = cso_hash_create_node(hash, key, data, nextNode);
if (!node) {
struct cso_hash_iter null_iter = {hash, 0};
return null_iter;
}
 
{
struct cso_hash_iter iter = {hash, node};
return iter;
}
}
}
 
struct cso_hash * cso_hash_create(void)
{
struct cso_hash *hash = MALLOC_STRUCT(cso_hash);
if (!hash)
return NULL;
 
hash->data.d = MALLOC_STRUCT(cso_hash_data);
if (!hash->data.d) {
FREE(hash);
return NULL;
}
 
hash->data.d->fakeNext = 0;
hash->data.d->buckets = 0;
hash->data.d->size = 0;
hash->data.d->nodeSize = sizeof(struct cso_node);
hash->data.d->userNumBits = (short)MinNumBits;
hash->data.d->numBits = 0;
hash->data.d->numBuckets = 0;
 
return hash;
}
 
void cso_hash_delete(struct cso_hash *hash)
{
struct cso_node *e_for_x = (struct cso_node *)(hash->data.d);
struct cso_node **bucket = (struct cso_node **)(hash->data.d->buckets);
int n = hash->data.d->numBuckets;
while (n--) {
struct cso_node *cur = *bucket++;
while (cur != e_for_x) {
struct cso_node *next = cur->next;
cso_free_node(cur);
cur = next;
}
}
FREE(hash->data.d->buckets);
FREE(hash->data.d);
FREE(hash);
}
 
struct cso_hash_iter cso_hash_find(struct cso_hash *hash,
unsigned key)
{
struct cso_node **nextNode = cso_hash_find_node(hash, key);
struct cso_hash_iter iter = {hash, *nextNode};
return iter;
}
 
unsigned cso_hash_iter_key(struct cso_hash_iter iter)
{
if (!iter.node || iter.hash->data.e == iter.node)
return 0;
return iter.node->key;
}
 
void * cso_hash_iter_data(struct cso_hash_iter iter)
{
if (!iter.node || iter.hash->data.e == iter.node)
return 0;
return iter.node->value;
}
 
static struct cso_node *cso_hash_data_next(struct cso_node *node)
{
union {
struct cso_node *next;
struct cso_node *e;
struct cso_hash_data *d;
} a;
int start;
struct cso_node **bucket;
int n;
 
a.next = node->next;
if (!a.next) {
debug_printf("iterating beyond the last element\n");
return 0;
}
if (a.next->next)
return a.next;
 
start = (node->key % a.d->numBuckets) + 1;
bucket = a.d->buckets + start;
n = a.d->numBuckets - start;
while (n--) {
if (*bucket != a.e)
return *bucket;
++bucket;
}
return a.e;
}
 
 
static struct cso_node *cso_hash_data_prev(struct cso_node *node)
{
union {
struct cso_node *e;
struct cso_hash_data *d;
} a;
int start;
struct cso_node *sentinel;
struct cso_node **bucket;
 
a.e = node;
while (a.e->next)
a.e = a.e->next;
 
if (node == a.e)
start = a.d->numBuckets - 1;
else
start = node->key % a.d->numBuckets;
 
sentinel = node;
bucket = a.d->buckets + start;
while (start >= 0) {
if (*bucket != sentinel) {
struct cso_node *prev = *bucket;
while (prev->next != sentinel)
prev = prev->next;
return prev;
}
 
sentinel = a.e;
--bucket;
--start;
}
debug_printf("iterating backward beyond first element\n");
return a.e;
}
 
struct cso_hash_iter cso_hash_iter_next(struct cso_hash_iter iter)
{
struct cso_hash_iter next = {iter.hash, cso_hash_data_next(iter.node)};
return next;
}
 
int cso_hash_iter_is_null(struct cso_hash_iter iter)
{
if (!iter.node || iter.node == iter.hash->data.e)
return 1;
return 0;
}
 
void * cso_hash_take(struct cso_hash *hash,
unsigned akey)
{
struct cso_node **node = cso_hash_find_node(hash, akey);
if (*node != hash->data.e) {
void *t = (*node)->value;
struct cso_node *next = (*node)->next;
cso_free_node(*node);
*node = next;
--hash->data.d->size;
cso_data_has_shrunk(hash->data.d);
return t;
}
return 0;
}
 
struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter)
{
struct cso_hash_iter prev = {iter.hash,
cso_hash_data_prev(iter.node)};
return prev;
}
 
struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash)
{
struct cso_hash_iter iter = {hash, cso_data_first_node(hash->data.d)};
return iter;
}
 
int cso_hash_size(struct cso_hash *hash)
{
return hash->data.d->size;
}
 
struct cso_hash_iter cso_hash_erase(struct cso_hash *hash, struct cso_hash_iter iter)
{
struct cso_hash_iter ret = iter;
struct cso_node *node = iter.node;
struct cso_node **node_ptr;
 
if (node == hash->data.e)
return iter;
 
ret = cso_hash_iter_next(ret);
node_ptr = (struct cso_node**)(&hash->data.d->buckets[node->key % hash->data.d->numBuckets]);
while (*node_ptr != node)
node_ptr = &(*node_ptr)->next;
*node_ptr = node->next;
cso_free_node(node);
--hash->data.d->size;
return ret;
}
 
boolean cso_hash_contains(struct cso_hash *hash, unsigned key)
{
struct cso_node **node = cso_hash_find_node(hash, key);
return (*node != hash->data.e);
}
/drivers/video/Gallium/auxiliary/cso_cache/cso_hash.h
0,0 → 1,129
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/**
* @file
* Hash table implementation.
*
* This file provides a hash implementation that is capable of dealing
* with collisions. It stores colliding entries in linked list. All
* functions operating on the hash return an iterator. The iterator
* itself points to the collision list. If there wasn't any collision
* the list will have just one entry, otherwise client code should
* iterate over the entries to find the exact entry among ones that
* had the same key (e.g. memcmp could be used on the data to check
* that)
*
* @author Zack Rusin <zack@tungstengraphics.com>
*/
 
#ifndef CSO_HASH_H
#define CSO_HASH_H
 
#include "pipe/p_compiler.h"
 
#ifdef __cplusplus
extern "C" {
#endif
 
 
struct cso_hash;
struct cso_node;
 
 
struct cso_hash_iter {
struct cso_hash *hash;
struct cso_node *node;
};
 
 
struct cso_hash *cso_hash_create(void);
void cso_hash_delete(struct cso_hash *hash);
 
 
int cso_hash_size(struct cso_hash *hash);
 
 
/**
* Adds a data with the given key to the hash. If entry with the given
* key is already in the hash, this current entry is instered before it
* in the collision list.
* Function returns iterator pointing to the inserted item in the hash.
*/
struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, unsigned key,
void *data);
/**
* Removes the item pointed to by the current iterator from the hash.
* Note that the data itself is not erased and if it was a malloc'ed pointer
* it will have to be freed after calling this function by the callee.
* Function returns iterator pointing to the item after the removed one in
* the hash.
*/
struct cso_hash_iter cso_hash_erase(struct cso_hash *hash, struct cso_hash_iter iter);
 
void *cso_hash_take(struct cso_hash *hash, unsigned key);
 
 
 
struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash);
 
/**
* Return an iterator pointing to the first entry in the collision list.
*/
struct cso_hash_iter cso_hash_find(struct cso_hash *hash, unsigned key);
 
/**
* Returns true if a value with the given key exists in the hash
*/
boolean cso_hash_contains(struct cso_hash *hash, unsigned key);
 
 
int cso_hash_iter_is_null(struct cso_hash_iter iter);
unsigned cso_hash_iter_key(struct cso_hash_iter iter);
void *cso_hash_iter_data(struct cso_hash_iter iter);
 
 
struct cso_hash_iter cso_hash_iter_next(struct cso_hash_iter iter);
struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter);
 
 
/**
* Convenience routine to iterate over the collision list while doing a memory
* comparison to see which entry in the list is a direct copy of our template
* and returns that entry.
*/
void *cso_hash_find_data_from_template( struct cso_hash *hash,
unsigned hash_key,
void *templ,
int size );
 
 
#ifdef __cplusplus
}
#endif
 
#endif
/drivers/video/Gallium/auxiliary/os/os_time.c
35,14 → 35,8
 
#include "pipe/p_config.h"
 
#if defined(PIPE_OS_UNIX)
# include <time.h> /* timeval */
# include <sys/time.h> /* timeval */
#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
# include <windows.h>
#else
# error Unsupported OS
#endif
 
#include "os_time.h"
 
50,32 → 44,8
int64_t
os_time_get_nano(void)
{
#if defined(PIPE_OS_LINUX)
 
struct timespec tv;
clock_gettime(CLOCK_MONOTONIC, &tv);
return tv.tv_nsec + tv.tv_sec*INT64_C(1000000000);
 
#elif defined(PIPE_OS_UNIX)
 
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_usec*INT64_C(1000) + tv.tv_sec*INT64_C(1000000000);
 
#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
 
static LARGE_INTEGER frequency;
LARGE_INTEGER counter;
if(!frequency.QuadPart)
QueryPerformanceFrequency(&frequency);
QueryPerformanceCounter(&counter);
return counter.QuadPart*INT64_C(1000000000)/frequency.QuadPart;
 
#else
 
#error Unsupported OS
 
#endif
}
 
 
/drivers/video/Gallium/auxiliary/pipebuffer/pb_buffer.h
0,0 → 1,288
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/**
* \file
* Generic code for buffers.
*
* Behind a pipe buffle handle there can be DMA buffers, client (or user)
* buffers, regular malloced buffers, etc. This file provides an abstract base
* buffer handle that allows the driver to cope with all those kinds of buffers
* in a more flexible way.
*
* There is no obligation of a winsys driver to use this library. And a pipe
* driver should be completly agnostic about it.
*
* \author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
 
#ifndef PB_BUFFER_H_
#define PB_BUFFER_H_
 
 
#include "pipe/p_compiler.h"
#include "util/u_debug.h"
#include "util/u_inlines.h"
#include "pipe/p_defines.h"
 
 
#ifdef __cplusplus
extern "C" {
#endif
 
 
struct pb_vtbl;
struct pb_validate;
struct pipe_fence_handle;
 
 
#define PB_USAGE_CPU_READ (1 << 0)
#define PB_USAGE_CPU_WRITE (1 << 1)
#define PB_USAGE_GPU_READ (1 << 2)
#define PB_USAGE_GPU_WRITE (1 << 3)
#define PB_USAGE_UNSYNCHRONIZED (1 << 10)
#define PB_USAGE_DONTBLOCK (1 << 9)
 
#define PB_USAGE_CPU_READ_WRITE \
( PB_USAGE_CPU_READ | PB_USAGE_CPU_WRITE )
#define PB_USAGE_GPU_READ_WRITE \
( PB_USAGE_GPU_READ | PB_USAGE_GPU_WRITE )
#define PB_USAGE_WRITE \
( PB_USAGE_CPU_WRITE | PB_USAGE_GPU_WRITE )
 
/**
* Buffer description.
*
* Used when allocating the buffer.
*/
struct pb_desc
{
unsigned alignment;
unsigned usage;
};
 
 
/**
* Size. Regular (32bit) unsigned for now.
*/
typedef unsigned pb_size;
 
 
/**
* Base class for all pb_* buffers.
*/
struct pb_buffer
{
struct pipe_reference reference;
unsigned size;
unsigned alignment;
unsigned usage;
 
/**
* Pointer to the virtual function table.
*
* Avoid accessing this table directly. Use the inline functions below
* instead to avoid mistakes.
*/
const struct pb_vtbl *vtbl;
};
 
 
/**
* Virtual function table for the buffer storage operations.
*
* Note that creation is not done through this table.
*/
struct pb_vtbl
{
void (*destroy)( struct pb_buffer *buf );
 
/**
* Map the entire data store of a buffer object into the client's address.
* flags is bitmask of PB_USAGE_CPU_READ/WRITE.
*/
void *(*map)( struct pb_buffer *buf,
unsigned flags, void *flush_ctx );
void (*unmap)( struct pb_buffer *buf );
 
enum pipe_error (*validate)( struct pb_buffer *buf,
struct pb_validate *vl,
unsigned flags );
 
void (*fence)( struct pb_buffer *buf,
struct pipe_fence_handle *fence );
 
/**
* Get the base buffer and the offset.
*
* A buffer can be subdivided in smaller buffers. This method should return
* the underlaying buffer, and the relative offset.
*
* Buffers without an underlaying base buffer should return themselves, with
* a zero offset.
*
* Note that this will increase the reference count of the base buffer.
*/
void (*get_base_buffer)( struct pb_buffer *buf,
struct pb_buffer **base_buf,
pb_size *offset );
};
 
 
 
/* Accessor functions for pb->vtbl:
*/
static INLINE void *
pb_map(struct pb_buffer *buf,
unsigned flags, void *flush_ctx)
{
assert(buf);
if(!buf)
return NULL;
assert(pipe_is_referenced(&buf->reference));
return buf->vtbl->map(buf, flags, flush_ctx);
}
 
 
static INLINE void
pb_unmap(struct pb_buffer *buf)
{
assert(buf);
if(!buf)
return;
assert(pipe_is_referenced(&buf->reference));
buf->vtbl->unmap(buf);
}
 
 
static INLINE void
pb_get_base_buffer( struct pb_buffer *buf,
struct pb_buffer **base_buf,
pb_size *offset )
{
assert(buf);
if(!buf) {
base_buf = NULL;
offset = 0;
return;
}
assert(pipe_is_referenced(&buf->reference));
assert(buf->vtbl->get_base_buffer);
buf->vtbl->get_base_buffer(buf, base_buf, offset);
assert(*base_buf);
assert(*offset < (*base_buf)->size);
}
 
 
static INLINE enum pipe_error
pb_validate(struct pb_buffer *buf, struct pb_validate *vl, unsigned flags)
{
assert(buf);
if(!buf)
return PIPE_ERROR;
assert(buf->vtbl->validate);
return buf->vtbl->validate(buf, vl, flags);
}
 
 
static INLINE void
pb_fence(struct pb_buffer *buf, struct pipe_fence_handle *fence)
{
assert(buf);
if(!buf)
return;
assert(buf->vtbl->fence);
buf->vtbl->fence(buf, fence);
}
 
 
static INLINE void
pb_destroy(struct pb_buffer *buf)
{
assert(buf);
if(!buf)
return;
assert(!pipe_is_referenced(&buf->reference));
buf->vtbl->destroy(buf);
}
 
static INLINE void
pb_reference(struct pb_buffer **dst,
struct pb_buffer *src)
{
struct pb_buffer *old = *dst;
 
if (pipe_reference(&(*dst)->reference, &src->reference))
pb_destroy( old );
*dst = src;
}
 
 
/**
* Utility function to check whether the provided alignment is consistent with
* the requested or not.
*/
static INLINE boolean
pb_check_alignment(pb_size requested, pb_size provided)
{
if(!requested)
return TRUE;
if(requested > provided)
return FALSE;
if(provided % requested != 0)
return FALSE;
return TRUE;
}
 
 
/**
* Utility function to check whether the provided alignment is consistent with
* the requested or not.
*/
static INLINE boolean
pb_check_usage(unsigned requested, unsigned provided)
{
return (requested & provided) == requested ? TRUE : FALSE;
}
 
 
/**
* Malloc-based buffer to store data that can't be used by the graphics
* hardware.
*/
struct pb_buffer *
pb_malloc_buffer_create(pb_size size,
const struct pb_desc *desc);
 
 
#ifdef __cplusplus
}
#endif
 
#endif /*PB_BUFFER_H_*/
/drivers/video/Gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
0,0 → 1,1069
/**************************************************************************
*
* Copyright 2007-2010 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/**
* \file
* Implementation of fenced buffers.
*
* \author Jose Fonseca <jfonseca-at-vmware-dot-com>
* \author Thomas Hellström <thellstrom-at-vmware-dot-com>
*/
 
 
#include "pipe/p_config.h"
 
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
#include <unistd.h>
#include <sched.h>
#endif
 
#include "pipe/p_compiler.h"
#include "pipe/p_defines.h"
#include "util/u_debug.h"
#include "os/os_thread.h"
#include "util/u_memory.h"
#include "util/u_double_list.h"
 
#include "pb_buffer.h"
#include "pb_buffer_fenced.h"
#include "pb_bufmgr.h"
 
 
 
/**
* Convenience macro (type safe).
*/
#define SUPER(__derived) (&(__derived)->base)
 
 
struct fenced_manager
{
struct pb_manager base;
struct pb_manager *provider;
struct pb_fence_ops *ops;
 
/**
* Maximum buffer size that can be safely allocated.
*/
pb_size max_buffer_size;
 
/**
* Maximum cpu memory we can allocate before we start waiting for the
* GPU to idle.
*/
pb_size max_cpu_total_size;
 
/**
* Following members are mutable and protected by this mutex.
*/
pipe_mutex mutex;
 
/**
* Fenced buffer list.
*
* All fenced buffers are placed in this listed, ordered from the oldest
* fence to the newest fence.
*/
struct list_head fenced;
pb_size num_fenced;
 
struct list_head unfenced;
pb_size num_unfenced;
 
/**
* How much temporary CPU memory is being used to hold unvalidated buffers.
*/
pb_size cpu_total_size;
};
 
 
/**
* Fenced buffer.
*
* Wrapper around a pipe buffer which adds fencing and reference counting.
*/
struct fenced_buffer
{
/*
* Immutable members.
*/
 
struct pb_buffer base;
struct fenced_manager *mgr;
 
/*
* Following members are mutable and protected by fenced_manager::mutex.
*/
 
struct list_head head;
 
/**
* Buffer with storage.
*/
struct pb_buffer *buffer;
pb_size size;
struct pb_desc desc;
 
/**
* Temporary CPU storage data. Used when there isn't enough GPU memory to
* store the buffer.
*/
void *data;
 
/**
* A bitmask of PB_USAGE_CPU/GPU_READ/WRITE describing the current
* buffer usage.
*/
unsigned flags;
 
unsigned mapcount;
 
struct pb_validate *vl;
unsigned validation_flags;
 
struct pipe_fence_handle *fence;
};
 
 
static INLINE struct fenced_manager *
fenced_manager(struct pb_manager *mgr)
{
assert(mgr);
return (struct fenced_manager *)mgr;
}
 
 
static INLINE struct fenced_buffer *
fenced_buffer(struct pb_buffer *buf)
{
assert(buf);
return (struct fenced_buffer *)buf;
}
 
 
static void
fenced_buffer_destroy_cpu_storage_locked(struct fenced_buffer *fenced_buf);
 
static enum pipe_error
fenced_buffer_create_cpu_storage_locked(struct fenced_manager *fenced_mgr,
struct fenced_buffer *fenced_buf);
 
static void
fenced_buffer_destroy_gpu_storage_locked(struct fenced_buffer *fenced_buf);
 
static enum pipe_error
fenced_buffer_create_gpu_storage_locked(struct fenced_manager *fenced_mgr,
struct fenced_buffer *fenced_buf,
boolean wait);
 
static enum pipe_error
fenced_buffer_copy_storage_to_gpu_locked(struct fenced_buffer *fenced_buf);
 
static enum pipe_error
fenced_buffer_copy_storage_to_cpu_locked(struct fenced_buffer *fenced_buf);
 
 
/**
* Dump the fenced buffer list.
*
* Useful to understand failures to allocate buffers.
*/
static void
fenced_manager_dump_locked(struct fenced_manager *fenced_mgr)
{
#ifdef DEBUG
struct pb_fence_ops *ops = fenced_mgr->ops;
struct list_head *curr, *next;
struct fenced_buffer *fenced_buf;
 
debug_printf("%10s %7s %8s %7s %10s %s\n",
"buffer", "size", "refcount", "storage", "fence", "signalled");
 
curr = fenced_mgr->unfenced.next;
next = curr->next;
while(curr != &fenced_mgr->unfenced) {
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
assert(!fenced_buf->fence);
debug_printf("%10p %7u %8u %7s\n",
(void *) fenced_buf,
fenced_buf->base.size,
p_atomic_read(&fenced_buf->base.reference.count),
fenced_buf->buffer ? "gpu" : (fenced_buf->data ? "cpu" : "none"));
curr = next;
next = curr->next;
}
 
curr = fenced_mgr->fenced.next;
next = curr->next;
while(curr != &fenced_mgr->fenced) {
int signaled;
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
assert(fenced_buf->buffer);
signaled = ops->fence_signalled(ops, fenced_buf->fence, 0);
debug_printf("%10p %7u %8u %7s %10p %s\n",
(void *) fenced_buf,
fenced_buf->base.size,
p_atomic_read(&fenced_buf->base.reference.count),
"gpu",
(void *) fenced_buf->fence,
signaled == 0 ? "y" : "n");
curr = next;
next = curr->next;
}
#else
(void)fenced_mgr;
#endif
}
 
 
static INLINE void
fenced_buffer_destroy_locked(struct fenced_manager *fenced_mgr,
struct fenced_buffer *fenced_buf)
{
assert(!pipe_is_referenced(&fenced_buf->base.reference));
 
assert(!fenced_buf->fence);
assert(fenced_buf->head.prev);
assert(fenced_buf->head.next);
LIST_DEL(&fenced_buf->head);
assert(fenced_mgr->num_unfenced);
--fenced_mgr->num_unfenced;
 
fenced_buffer_destroy_gpu_storage_locked(fenced_buf);
fenced_buffer_destroy_cpu_storage_locked(fenced_buf);
 
FREE(fenced_buf);
}
 
 
/**
* Add the buffer to the fenced list.
*
* Reference count should be incremented before calling this function.
*/
static INLINE void
fenced_buffer_add_locked(struct fenced_manager *fenced_mgr,
struct fenced_buffer *fenced_buf)
{
assert(pipe_is_referenced(&fenced_buf->base.reference));
assert(fenced_buf->flags & PB_USAGE_GPU_READ_WRITE);
assert(fenced_buf->fence);
 
p_atomic_inc(&fenced_buf->base.reference.count);
 
LIST_DEL(&fenced_buf->head);
assert(fenced_mgr->num_unfenced);
--fenced_mgr->num_unfenced;
LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->fenced);
++fenced_mgr->num_fenced;
}
 
 
/**
* Remove the buffer from the fenced list, and potentially destroy the buffer
* if the reference count reaches zero.
*
* Returns TRUE if the buffer was detroyed.
*/
static INLINE boolean
fenced_buffer_remove_locked(struct fenced_manager *fenced_mgr,
struct fenced_buffer *fenced_buf)
{
struct pb_fence_ops *ops = fenced_mgr->ops;
 
assert(fenced_buf->fence);
assert(fenced_buf->mgr == fenced_mgr);
 
ops->fence_reference(ops, &fenced_buf->fence, NULL);
fenced_buf->flags &= ~PB_USAGE_GPU_READ_WRITE;
 
assert(fenced_buf->head.prev);
assert(fenced_buf->head.next);
 
LIST_DEL(&fenced_buf->head);
assert(fenced_mgr->num_fenced);
--fenced_mgr->num_fenced;
 
LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->unfenced);
++fenced_mgr->num_unfenced;
 
if (p_atomic_dec_zero(&fenced_buf->base.reference.count)) {
fenced_buffer_destroy_locked(fenced_mgr, fenced_buf);
return TRUE;
}
 
return FALSE;
}
 
 
/**
* Wait for the fence to expire, and remove it from the fenced list.
*
* This function will release and re-aquire the mutex, so any copy of mutable
* state must be discarded after calling it.
*/
static INLINE enum pipe_error
fenced_buffer_finish_locked(struct fenced_manager *fenced_mgr,
struct fenced_buffer *fenced_buf)
{
struct pb_fence_ops *ops = fenced_mgr->ops;
enum pipe_error ret = PIPE_ERROR;
 
#if 0
debug_warning("waiting for GPU");
#endif
 
assert(pipe_is_referenced(&fenced_buf->base.reference));
assert(fenced_buf->fence);
 
if(fenced_buf->fence) {
struct pipe_fence_handle *fence = NULL;
int finished;
boolean proceed;
 
ops->fence_reference(ops, &fence, fenced_buf->fence);
 
pipe_mutex_unlock(fenced_mgr->mutex);
 
finished = ops->fence_finish(ops, fenced_buf->fence, 0);
 
pipe_mutex_lock(fenced_mgr->mutex);
 
assert(pipe_is_referenced(&fenced_buf->base.reference));
 
/*
* Only proceed if the fence object didn't change in the meanwhile.
* Otherwise assume the work has been already carried out by another
* thread that re-aquired the lock before us.
*/
proceed = fence == fenced_buf->fence ? TRUE : FALSE;
 
ops->fence_reference(ops, &fence, NULL);
 
if(proceed && finished == 0) {
/*
* Remove from the fenced list
*/
 
boolean destroyed;
 
destroyed = fenced_buffer_remove_locked(fenced_mgr, fenced_buf);
 
/* TODO: remove consequents buffers with the same fence? */
 
assert(!destroyed);
 
fenced_buf->flags &= ~PB_USAGE_GPU_READ_WRITE;
 
ret = PIPE_OK;
}
}
 
return ret;
}
 
 
/**
* Remove as many fenced buffers from the fenced list as possible.
*
* Returns TRUE if at least one buffer was removed.
*/
static boolean
fenced_manager_check_signalled_locked(struct fenced_manager *fenced_mgr,
boolean wait)
{
struct pb_fence_ops *ops = fenced_mgr->ops;
struct list_head *curr, *next;
struct fenced_buffer *fenced_buf;
struct pipe_fence_handle *prev_fence = NULL;
boolean ret = FALSE;
 
curr = fenced_mgr->fenced.next;
next = curr->next;
while(curr != &fenced_mgr->fenced) {
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
 
if(fenced_buf->fence != prev_fence) {
int signaled;
 
if (wait) {
signaled = ops->fence_finish(ops, fenced_buf->fence, 0);
 
/*
* Don't return just now. Instead preemptively check if the
* following buffers' fences already expired, without further waits.
*/
wait = FALSE;
}
else {
signaled = ops->fence_signalled(ops, fenced_buf->fence, 0);
}
 
if (signaled != 0) {
return ret;
}
 
prev_fence = fenced_buf->fence;
}
else {
/* This buffer's fence object is identical to the previous buffer's
* fence object, so no need to check the fence again.
*/
assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0);
}
 
fenced_buffer_remove_locked(fenced_mgr, fenced_buf);
 
ret = TRUE;
 
curr = next;
next = curr->next;
}
 
return ret;
}
 
 
/**
* Try to free some GPU memory by backing it up into CPU memory.
*
* Returns TRUE if at least one buffer was freed.
*/
static boolean
fenced_manager_free_gpu_storage_locked(struct fenced_manager *fenced_mgr)
{
struct list_head *curr, *next;
struct fenced_buffer *fenced_buf;
 
curr = fenced_mgr->unfenced.next;
next = curr->next;
while(curr != &fenced_mgr->unfenced) {
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
 
/*
* We can only move storage if the buffer is not mapped and not
* validated.
*/
if(fenced_buf->buffer &&
!fenced_buf->mapcount &&
!fenced_buf->vl) {
enum pipe_error ret;
 
ret = fenced_buffer_create_cpu_storage_locked(fenced_mgr, fenced_buf);
if(ret == PIPE_OK) {
ret = fenced_buffer_copy_storage_to_cpu_locked(fenced_buf);
if(ret == PIPE_OK) {
fenced_buffer_destroy_gpu_storage_locked(fenced_buf);
return TRUE;
}
fenced_buffer_destroy_cpu_storage_locked(fenced_buf);
}
}
 
curr = next;
next = curr->next;
}
 
return FALSE;
}
 
 
/**
* Destroy CPU storage for this buffer.
*/
static void
fenced_buffer_destroy_cpu_storage_locked(struct fenced_buffer *fenced_buf)
{
if(fenced_buf->data) {
align_free(fenced_buf->data);
fenced_buf->data = NULL;
assert(fenced_buf->mgr->cpu_total_size >= fenced_buf->size);
fenced_buf->mgr->cpu_total_size -= fenced_buf->size;
}
}
 
 
/**
* Create CPU storage for this buffer.
*/
static enum pipe_error
fenced_buffer_create_cpu_storage_locked(struct fenced_manager *fenced_mgr,
struct fenced_buffer *fenced_buf)
{
assert(!fenced_buf->data);
if(fenced_buf->data)
return PIPE_OK;
 
if (fenced_mgr->cpu_total_size + fenced_buf->size > fenced_mgr->max_cpu_total_size)
return PIPE_ERROR_OUT_OF_MEMORY;
 
fenced_buf->data = align_malloc(fenced_buf->size, fenced_buf->desc.alignment);
if(!fenced_buf->data)
return PIPE_ERROR_OUT_OF_MEMORY;
 
fenced_mgr->cpu_total_size += fenced_buf->size;
 
return PIPE_OK;
}
 
 
/**
* Destroy the GPU storage.
*/
static void
fenced_buffer_destroy_gpu_storage_locked(struct fenced_buffer *fenced_buf)
{
if(fenced_buf->buffer) {
pb_reference(&fenced_buf->buffer, NULL);
}
}
 
 
/**
* Try to create GPU storage for this buffer.
*
* This function is a shorthand around pb_manager::create_buffer for
* fenced_buffer_create_gpu_storage_locked()'s benefit.
*/
static INLINE boolean
fenced_buffer_try_create_gpu_storage_locked(struct fenced_manager *fenced_mgr,
struct fenced_buffer *fenced_buf)
{
struct pb_manager *provider = fenced_mgr->provider;
 
assert(!fenced_buf->buffer);
 
fenced_buf->buffer = provider->create_buffer(fenced_mgr->provider,
fenced_buf->size,
&fenced_buf->desc);
return fenced_buf->buffer ? TRUE : FALSE;
}
 
 
/**
* Create GPU storage for this buffer.
*/
static enum pipe_error
fenced_buffer_create_gpu_storage_locked(struct fenced_manager *fenced_mgr,
struct fenced_buffer *fenced_buf,
boolean wait)
{
assert(!fenced_buf->buffer);
 
/*
* Check for signaled buffers before trying to allocate.
*/
fenced_manager_check_signalled_locked(fenced_mgr, FALSE);
 
fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf);
 
/*
* Keep trying while there is some sort of progress:
* - fences are expiring,
* - or buffers are being being swapped out from GPU memory into CPU memory.
*/
while(!fenced_buf->buffer &&
(fenced_manager_check_signalled_locked(fenced_mgr, FALSE) ||
fenced_manager_free_gpu_storage_locked(fenced_mgr))) {
fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf);
}
 
if(!fenced_buf->buffer && wait) {
/*
* Same as before, but this time around, wait to free buffers if
* necessary.
*/
while(!fenced_buf->buffer &&
(fenced_manager_check_signalled_locked(fenced_mgr, TRUE) ||
fenced_manager_free_gpu_storage_locked(fenced_mgr))) {
fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf);
}
}
 
if(!fenced_buf->buffer) {
if(0)
fenced_manager_dump_locked(fenced_mgr);
 
/* give up */
return PIPE_ERROR_OUT_OF_MEMORY;
}
 
return PIPE_OK;
}
 
 
static enum pipe_error
fenced_buffer_copy_storage_to_gpu_locked(struct fenced_buffer *fenced_buf)
{
uint8_t *map;
 
assert(fenced_buf->data);
assert(fenced_buf->buffer);
 
map = pb_map(fenced_buf->buffer, PB_USAGE_CPU_WRITE, NULL);
if(!map)
return PIPE_ERROR;
 
memcpy(map, fenced_buf->data, fenced_buf->size);
 
pb_unmap(fenced_buf->buffer);
 
return PIPE_OK;
}
 
 
static enum pipe_error
fenced_buffer_copy_storage_to_cpu_locked(struct fenced_buffer *fenced_buf)
{
const uint8_t *map;
 
assert(fenced_buf->data);
assert(fenced_buf->buffer);
 
map = pb_map(fenced_buf->buffer, PB_USAGE_CPU_READ, NULL);
if(!map)
return PIPE_ERROR;
 
memcpy(fenced_buf->data, map, fenced_buf->size);
 
pb_unmap(fenced_buf->buffer);
 
return PIPE_OK;
}
 
 
static void
fenced_buffer_destroy(struct pb_buffer *buf)
{
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
struct fenced_manager *fenced_mgr = fenced_buf->mgr;
 
assert(!pipe_is_referenced(&fenced_buf->base.reference));
 
pipe_mutex_lock(fenced_mgr->mutex);
 
fenced_buffer_destroy_locked(fenced_mgr, fenced_buf);
 
pipe_mutex_unlock(fenced_mgr->mutex);
}
 
 
static void *
fenced_buffer_map(struct pb_buffer *buf,
unsigned flags, void *flush_ctx)
{
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
struct fenced_manager *fenced_mgr = fenced_buf->mgr;
struct pb_fence_ops *ops = fenced_mgr->ops;
void *map = NULL;
 
pipe_mutex_lock(fenced_mgr->mutex);
 
assert(!(flags & PB_USAGE_GPU_READ_WRITE));
 
/*
* Serialize writes.
*/
while((fenced_buf->flags & PB_USAGE_GPU_WRITE) ||
((fenced_buf->flags & PB_USAGE_GPU_READ) &&
(flags & PB_USAGE_CPU_WRITE))) {
 
/*
* Don't wait for the GPU to finish accessing it, if blocking is forbidden.
*/
if((flags & PB_USAGE_DONTBLOCK) &&
ops->fence_signalled(ops, fenced_buf->fence, 0) != 0) {
goto done;
}
 
if (flags & PB_USAGE_UNSYNCHRONIZED) {
break;
}
 
/*
* Wait for the GPU to finish accessing. This will release and re-acquire
* the mutex, so all copies of mutable state must be discarded.
*/
fenced_buffer_finish_locked(fenced_mgr, fenced_buf);
}
 
if(fenced_buf->buffer) {
map = pb_map(fenced_buf->buffer, flags, flush_ctx);
}
else {
assert(fenced_buf->data);
map = fenced_buf->data;
}
 
if(map) {
++fenced_buf->mapcount;
fenced_buf->flags |= flags & PB_USAGE_CPU_READ_WRITE;
}
 
done:
pipe_mutex_unlock(fenced_mgr->mutex);
 
return map;
}
 
 
static void
fenced_buffer_unmap(struct pb_buffer *buf)
{
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
struct fenced_manager *fenced_mgr = fenced_buf->mgr;
 
pipe_mutex_lock(fenced_mgr->mutex);
 
assert(fenced_buf->mapcount);
if(fenced_buf->mapcount) {
if (fenced_buf->buffer)
pb_unmap(fenced_buf->buffer);
--fenced_buf->mapcount;
if(!fenced_buf->mapcount)
fenced_buf->flags &= ~PB_USAGE_CPU_READ_WRITE;
}
 
pipe_mutex_unlock(fenced_mgr->mutex);
}
 
 
static enum pipe_error
fenced_buffer_validate(struct pb_buffer *buf,
struct pb_validate *vl,
unsigned flags)
{
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
struct fenced_manager *fenced_mgr = fenced_buf->mgr;
enum pipe_error ret;
 
pipe_mutex_lock(fenced_mgr->mutex);
 
if(!vl) {
/* invalidate */
fenced_buf->vl = NULL;
fenced_buf->validation_flags = 0;
ret = PIPE_OK;
goto done;
}
 
assert(flags & PB_USAGE_GPU_READ_WRITE);
assert(!(flags & ~PB_USAGE_GPU_READ_WRITE));
flags &= PB_USAGE_GPU_READ_WRITE;
 
/* Buffer cannot be validated in two different lists */
if(fenced_buf->vl && fenced_buf->vl != vl) {
ret = PIPE_ERROR_RETRY;
goto done;
}
 
if(fenced_buf->vl == vl &&
(fenced_buf->validation_flags & flags) == flags) {
/* Nothing to do -- buffer already validated */
ret = PIPE_OK;
goto done;
}
 
/*
* Create and update GPU storage.
*/
if(!fenced_buf->buffer) {
assert(!fenced_buf->mapcount);
 
ret = fenced_buffer_create_gpu_storage_locked(fenced_mgr, fenced_buf, TRUE);
if(ret != PIPE_OK) {
goto done;
}
 
ret = fenced_buffer_copy_storage_to_gpu_locked(fenced_buf);
if(ret != PIPE_OK) {
fenced_buffer_destroy_gpu_storage_locked(fenced_buf);
goto done;
}
 
if(fenced_buf->mapcount) {
debug_printf("warning: validating a buffer while it is still mapped\n");
}
else {
fenced_buffer_destroy_cpu_storage_locked(fenced_buf);
}
}
 
ret = pb_validate(fenced_buf->buffer, vl, flags);
if (ret != PIPE_OK)
goto done;
 
fenced_buf->vl = vl;
fenced_buf->validation_flags |= flags;
 
done:
pipe_mutex_unlock(fenced_mgr->mutex);
 
return ret;
}
 
 
static void
fenced_buffer_fence(struct pb_buffer *buf,
struct pipe_fence_handle *fence)
{
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
struct fenced_manager *fenced_mgr = fenced_buf->mgr;
struct pb_fence_ops *ops = fenced_mgr->ops;
 
pipe_mutex_lock(fenced_mgr->mutex);
 
assert(pipe_is_referenced(&fenced_buf->base.reference));
assert(fenced_buf->buffer);
 
if(fence != fenced_buf->fence) {
assert(fenced_buf->vl);
assert(fenced_buf->validation_flags);
 
if (fenced_buf->fence) {
boolean destroyed;
destroyed = fenced_buffer_remove_locked(fenced_mgr, fenced_buf);
assert(!destroyed);
}
if (fence) {
ops->fence_reference(ops, &fenced_buf->fence, fence);
fenced_buf->flags |= fenced_buf->validation_flags;
fenced_buffer_add_locked(fenced_mgr, fenced_buf);
}
 
pb_fence(fenced_buf->buffer, fence);
 
fenced_buf->vl = NULL;
fenced_buf->validation_flags = 0;
}
 
pipe_mutex_unlock(fenced_mgr->mutex);
}
 
 
static void
fenced_buffer_get_base_buffer(struct pb_buffer *buf,
struct pb_buffer **base_buf,
pb_size *offset)
{
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
struct fenced_manager *fenced_mgr = fenced_buf->mgr;
 
pipe_mutex_lock(fenced_mgr->mutex);
 
/*
* This should only be called when the buffer is validated. Typically
* when processing relocations.
*/
assert(fenced_buf->vl);
assert(fenced_buf->buffer);
 
if(fenced_buf->buffer)
pb_get_base_buffer(fenced_buf->buffer, base_buf, offset);
else {
*base_buf = buf;
*offset = 0;
}
 
pipe_mutex_unlock(fenced_mgr->mutex);
}
 
 
static const struct pb_vtbl
fenced_buffer_vtbl = {
fenced_buffer_destroy,
fenced_buffer_map,
fenced_buffer_unmap,
fenced_buffer_validate,
fenced_buffer_fence,
fenced_buffer_get_base_buffer
};
 
 
/**
* Wrap a buffer in a fenced buffer.
*/
static struct pb_buffer *
fenced_bufmgr_create_buffer(struct pb_manager *mgr,
pb_size size,
const struct pb_desc *desc)
{
struct fenced_manager *fenced_mgr = fenced_manager(mgr);
struct fenced_buffer *fenced_buf;
enum pipe_error ret;
 
/*
* Don't stall the GPU, waste time evicting buffers, or waste memory
* trying to create a buffer that will most likely never fit into the
* graphics aperture.
*/
if(size > fenced_mgr->max_buffer_size) {
goto no_buffer;
}
 
fenced_buf = CALLOC_STRUCT(fenced_buffer);
if(!fenced_buf)
goto no_buffer;
 
pipe_reference_init(&fenced_buf->base.reference, 1);
fenced_buf->base.alignment = desc->alignment;
fenced_buf->base.usage = desc->usage;
fenced_buf->base.size = size;
fenced_buf->size = size;
fenced_buf->desc = *desc;
 
fenced_buf->base.vtbl = &fenced_buffer_vtbl;
fenced_buf->mgr = fenced_mgr;
 
pipe_mutex_lock(fenced_mgr->mutex);
 
/*
* Try to create GPU storage without stalling,
*/
ret = fenced_buffer_create_gpu_storage_locked(fenced_mgr, fenced_buf, FALSE);
 
/*
* Attempt to use CPU memory to avoid stalling the GPU.
*/
if(ret != PIPE_OK) {
ret = fenced_buffer_create_cpu_storage_locked(fenced_mgr, fenced_buf);
}
 
/*
* Create GPU storage, waiting for some to be available.
*/
if(ret != PIPE_OK) {
ret = fenced_buffer_create_gpu_storage_locked(fenced_mgr, fenced_buf, TRUE);
}
 
/*
* Give up.
*/
if(ret != PIPE_OK) {
goto no_storage;
}
 
assert(fenced_buf->buffer || fenced_buf->data);
 
LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->unfenced);
++fenced_mgr->num_unfenced;
pipe_mutex_unlock(fenced_mgr->mutex);
 
return &fenced_buf->base;
 
no_storage:
pipe_mutex_unlock(fenced_mgr->mutex);
FREE(fenced_buf);
no_buffer:
return NULL;
}
 
 
static void
fenced_bufmgr_flush(struct pb_manager *mgr)
{
struct fenced_manager *fenced_mgr = fenced_manager(mgr);
 
pipe_mutex_lock(fenced_mgr->mutex);
while(fenced_manager_check_signalled_locked(fenced_mgr, TRUE))
;
pipe_mutex_unlock(fenced_mgr->mutex);
 
assert(fenced_mgr->provider->flush);
if(fenced_mgr->provider->flush)
fenced_mgr->provider->flush(fenced_mgr->provider);
}
 
 
static void
fenced_bufmgr_destroy(struct pb_manager *mgr)
{
struct fenced_manager *fenced_mgr = fenced_manager(mgr);
 
pipe_mutex_lock(fenced_mgr->mutex);
 
/* Wait on outstanding fences */
while (fenced_mgr->num_fenced) {
pipe_mutex_unlock(fenced_mgr->mutex);
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
sched_yield();
#endif
pipe_mutex_lock(fenced_mgr->mutex);
while(fenced_manager_check_signalled_locked(fenced_mgr, TRUE))
;
}
 
#ifdef DEBUG
/*assert(!fenced_mgr->num_unfenced);*/
#endif
 
pipe_mutex_unlock(fenced_mgr->mutex);
pipe_mutex_destroy(fenced_mgr->mutex);
 
if(fenced_mgr->provider)
fenced_mgr->provider->destroy(fenced_mgr->provider);
 
fenced_mgr->ops->destroy(fenced_mgr->ops);
 
FREE(fenced_mgr);
}
 
 
struct pb_manager *
fenced_bufmgr_create(struct pb_manager *provider,
struct pb_fence_ops *ops,
pb_size max_buffer_size,
pb_size max_cpu_total_size)
{
struct fenced_manager *fenced_mgr;
 
if(!provider)
return NULL;
 
fenced_mgr = CALLOC_STRUCT(fenced_manager);
if (!fenced_mgr)
return NULL;
 
fenced_mgr->base.destroy = fenced_bufmgr_destroy;
fenced_mgr->base.create_buffer = fenced_bufmgr_create_buffer;
fenced_mgr->base.flush = fenced_bufmgr_flush;
 
fenced_mgr->provider = provider;
fenced_mgr->ops = ops;
fenced_mgr->max_buffer_size = max_buffer_size;
fenced_mgr->max_cpu_total_size = max_cpu_total_size;
 
LIST_INITHEAD(&fenced_mgr->fenced);
fenced_mgr->num_fenced = 0;
 
LIST_INITHEAD(&fenced_mgr->unfenced);
fenced_mgr->num_unfenced = 0;
 
pipe_mutex_init(fenced_mgr->mutex);
 
return &fenced_mgr->base;
}
/drivers/video/Gallium/auxiliary/pipebuffer/pb_buffer_fenced.h
0,0 → 1,104
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/**
* \file
* Buffer fencing.
*
* "Fenced buffers" is actually a misnomer. They should be referred as
* "fenceable buffers", i.e, buffers that can be fenced, but I couldn't find
* the word "fenceable" in the dictionary.
*
* A "fenced buffer" is a decorator around a normal buffer, which adds two
* special properties:
* - the ability for the destruction to be delayed by a fence;
* - reference counting.
*
* Usually DMA buffers have a life-time that will extend the life-time of its
* handle. The end-of-life is dictated by the fence signalling.
*
* Between the handle's destruction, and the fence signalling, the buffer is
* stored in a fenced buffer list.
*
* \author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
 
#ifndef PB_BUFFER_FENCED_H_
#define PB_BUFFER_FENCED_H_
 
 
#include "util/u_debug.h"
 
 
#ifdef __cplusplus
extern "C" {
#endif
 
 
struct pipe_fence_handle;
 
 
/**
* List of buffers which are awaiting fence signalling.
*/
struct fenced_buffer_list;
 
 
struct pb_fence_ops
{
void (*destroy)( struct pb_fence_ops *ops );
 
/** Set ptr = fence, with reference counting */
void (*fence_reference)( struct pb_fence_ops *ops,
struct pipe_fence_handle **ptr,
struct pipe_fence_handle *fence );
 
/**
* Checks whether the fence has been signalled.
* \param flags driver-specific meaning
* \return zero on success.
*/
int (*fence_signalled)( struct pb_fence_ops *ops,
struct pipe_fence_handle *fence,
unsigned flag );
 
/**
* Wait for the fence to finish.
* \param flags driver-specific meaning
* \return zero on success.
*/
int (*fence_finish)( struct pb_fence_ops *ops,
struct pipe_fence_handle *fence,
unsigned flag );
};
 
 
#ifdef __cplusplus
}
#endif
 
#endif /*PB_BUFFER_FENCED_H_*/
/drivers/video/Gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
0,0 → 1,198
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/**
* \file
* Implementation of malloc-based buffers to store data that can't be processed
* by the hardware.
*
* \author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
 
 
#include "util/u_debug.h"
#include "util/u_memory.h"
#include "pb_buffer.h"
#include "pb_bufmgr.h"
 
 
struct malloc_buffer
{
struct pb_buffer base;
void *data;
};
 
 
extern const struct pb_vtbl malloc_buffer_vtbl;
 
static INLINE struct malloc_buffer *
malloc_buffer(struct pb_buffer *buf)
{
assert(buf);
if (!buf)
return NULL;
assert(buf->vtbl == &malloc_buffer_vtbl);
return (struct malloc_buffer *)buf;
}
 
 
static void
malloc_buffer_destroy(struct pb_buffer *buf)
{
align_free(malloc_buffer(buf)->data);
FREE(buf);
}
 
 
static void *
malloc_buffer_map(struct pb_buffer *buf,
unsigned flags,
void *flush_ctx)
{
return malloc_buffer(buf)->data;
}
 
 
static void
malloc_buffer_unmap(struct pb_buffer *buf)
{
/* No-op */
}
 
 
static enum pipe_error
malloc_buffer_validate(struct pb_buffer *buf,
struct pb_validate *vl,
unsigned flags)
{
assert(0);
return PIPE_ERROR;
}
 
 
static void
malloc_buffer_fence(struct pb_buffer *buf,
struct pipe_fence_handle *fence)
{
assert(0);
}
 
 
static void
malloc_buffer_get_base_buffer(struct pb_buffer *buf,
struct pb_buffer **base_buf,
pb_size *offset)
{
*base_buf = buf;
*offset = 0;
}
 
 
const struct pb_vtbl
malloc_buffer_vtbl = {
malloc_buffer_destroy,
malloc_buffer_map,
malloc_buffer_unmap,
malloc_buffer_validate,
malloc_buffer_fence,
malloc_buffer_get_base_buffer
};
 
 
struct pb_buffer *
pb_malloc_buffer_create(pb_size size,
const struct pb_desc *desc)
{
struct malloc_buffer *buf;
/* TODO: do a single allocation */
buf = CALLOC_STRUCT(malloc_buffer);
if(!buf)
return NULL;
 
pipe_reference_init(&buf->base.reference, 1);
buf->base.usage = desc->usage;
buf->base.size = size;
buf->base.alignment = desc->alignment;
buf->base.vtbl = &malloc_buffer_vtbl;
 
buf->data = align_malloc(size, desc->alignment < sizeof(void*) ? sizeof(void*) : desc->alignment);
if(!buf->data) {
FREE(buf);
return NULL;
}
 
return &buf->base;
}
 
 
static struct pb_buffer *
pb_malloc_bufmgr_create_buffer(struct pb_manager *mgr,
pb_size size,
const struct pb_desc *desc)
{
return pb_malloc_buffer_create(size, desc);
}
 
 
static void
pb_malloc_bufmgr_flush(struct pb_manager *mgr)
{
/* No-op */
}
 
 
static void
pb_malloc_bufmgr_destroy(struct pb_manager *mgr)
{
/* No-op */
}
 
 
static boolean
pb_malloc_bufmgr_is_buffer_busy( struct pb_manager *mgr,
struct pb_buffer *buf )
{
return FALSE;
}
 
 
static struct pb_manager
pb_malloc_bufmgr = {
pb_malloc_bufmgr_destroy,
pb_malloc_bufmgr_create_buffer,
pb_malloc_bufmgr_flush,
pb_malloc_bufmgr_is_buffer_busy
};
 
 
struct pb_manager *
pb_malloc_bufmgr_create(void)
{
return &pb_malloc_bufmgr;
}
/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr.h
0,0 → 1,218
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/**
* \file
* Buffer management.
*
* A buffer manager does only one basic thing: it creates buffers. Actually,
* "buffer factory" would probably a more accurate description.
*
* You can chain buffer managers so that you can have a finer grained memory
* management and pooling.
*
* For example, for a simple batch buffer manager you would chain:
* - the native buffer manager, which provides DMA memory from the graphics
* memory space;
* - the pool buffer manager, which keep around a pool of equally sized buffers
* to avoid latency associated with the native buffer manager;
* - the fenced buffer manager, which will delay buffer destruction until the
* the moment the card finishing processing it.
*
* \author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
 
#ifndef PB_BUFMGR_H_
#define PB_BUFMGR_H_
 
 
#include "pb_buffer.h"
 
 
#ifdef __cplusplus
extern "C" {
#endif
 
 
struct pb_desc;
 
 
/**
* Abstract base class for all buffer managers.
*/
struct pb_manager
{
void
(*destroy)( struct pb_manager *mgr );
 
struct pb_buffer *
(*create_buffer)( struct pb_manager *mgr,
pb_size size,
const struct pb_desc *desc);
 
/**
* Flush all temporary-held buffers.
*
* Used mostly to aid debugging memory issues or to clean up resources when
* the drivers are long lived.
*/
void
(*flush)( struct pb_manager *mgr );
 
boolean
(*is_buffer_busy)( struct pb_manager *mgr,
struct pb_buffer *buf );
};
 
 
/**
* Malloc buffer provider.
*
* Simple wrapper around pb_malloc_buffer_create for convenience.
*/
struct pb_manager *
pb_malloc_bufmgr_create(void);
 
 
/**
* Static buffer pool sub-allocator.
*
* Manages the allocation of equally sized buffers. It does so by allocating
* a single big buffer and divide it equally sized buffers.
*
* It is meant to manage the allocation of batch buffer pools.
*/
struct pb_manager *
pool_bufmgr_create(struct pb_manager *provider,
pb_size n, pb_size size,
const struct pb_desc *desc);
 
 
/**
* Static sub-allocator based the old memory manager.
*
* It managers buffers of different sizes. It does so by allocating a buffer
* with the size of the heap, and then using the old mm memory manager to manage
* that heap.
*/
struct pb_manager *
mm_bufmgr_create(struct pb_manager *provider,
pb_size size, pb_size align2);
 
/**
* Same as mm_bufmgr_create.
*
* Buffer will be release when the manager is destroyed.
*/
struct pb_manager *
mm_bufmgr_create_from_buffer(struct pb_buffer *buffer,
pb_size size, pb_size align2);
 
 
/**
* Slab sub-allocator.
*/
struct pb_manager *
pb_slab_manager_create(struct pb_manager *provider,
pb_size bufSize,
pb_size slabSize,
const struct pb_desc *desc);
 
/**
* Allow a range of buffer size, by aggregating multiple slabs sub-allocators
* with different bucket sizes.
*/
struct pb_manager *
pb_slab_range_manager_create(struct pb_manager *provider,
pb_size minBufSize,
pb_size maxBufSize,
pb_size slabSize,
const struct pb_desc *desc);
 
 
/**
* Time-based buffer cache.
*
* This manager keeps a cache of destroyed buffers during a time interval.
*/
struct pb_manager *
pb_cache_manager_create(struct pb_manager *provider,
unsigned usecs);
 
 
struct pb_fence_ops;
 
/**
* Fenced buffer manager.
*
* This manager is just meant for convenience. It wraps the buffers returned
* by another manager in fenced buffers, so that
*
* NOTE: the buffer manager that provides the buffers will be destroyed
* at the same time.
*/
struct pb_manager *
fenced_bufmgr_create(struct pb_manager *provider,
struct pb_fence_ops *ops,
pb_size max_buffer_size,
pb_size max_cpu_total_size);
 
 
struct pb_manager *
pb_alt_manager_create(struct pb_manager *provider1,
struct pb_manager *provider2);
 
 
/**
* Ondemand buffer manager.
*
* Buffers are created in malloc'ed memory (fast and cached), and the constents
* is transfered to a buffer from the provider (typically in slow uncached
* memory) when there is an attempt to validate the buffer.
*
* Ideal for situations where one does not know before hand whether a given
* buffer will effectively be used by the hardware or not.
*/
struct pb_manager *
pb_ondemand_manager_create(struct pb_manager *provider);
 
 
/**
* Debug buffer manager to detect buffer under- and overflows.
*
* Under/overflow sizes should be a multiple of the largest alignment
*/
struct pb_manager *
pb_debug_manager_create(struct pb_manager *provider,
pb_size underflow_size, pb_size overflow_size);
 
 
#ifdef __cplusplus
}
#endif
 
#endif /*PB_BUFMGR_H_*/
/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c
0,0 → 1,120
/**************************************************************************
*
* Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/**
* \file
* Allocate buffers from two alternative buffer providers.
*
* \author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
 
 
#include "pipe/p_compiler.h"
#include "util/u_debug.h"
#include "util/u_memory.h"
 
#include "pb_buffer.h"
#include "pb_bufmgr.h"
 
 
struct pb_alt_manager
{
struct pb_manager base;
 
struct pb_manager *provider1;
struct pb_manager *provider2;
};
 
 
static INLINE struct pb_alt_manager *
pb_alt_manager(struct pb_manager *mgr)
{
assert(mgr);
return (struct pb_alt_manager *)mgr;
}
 
 
static struct pb_buffer *
pb_alt_manager_create_buffer(struct pb_manager *_mgr,
pb_size size,
const struct pb_desc *desc)
{
struct pb_alt_manager *mgr = pb_alt_manager(_mgr);
struct pb_buffer *buf;
buf = mgr->provider1->create_buffer(mgr->provider1, size, desc);
if(buf)
return buf;
buf = mgr->provider2->create_buffer(mgr->provider2, size, desc);
return buf;
}
 
 
static void
pb_alt_manager_flush(struct pb_manager *_mgr)
{
struct pb_alt_manager *mgr = pb_alt_manager(_mgr);
assert(mgr->provider1->flush);
if(mgr->provider1->flush)
mgr->provider1->flush(mgr->provider1);
assert(mgr->provider2->flush);
if(mgr->provider2->flush)
mgr->provider2->flush(mgr->provider2);
}
 
 
static void
pb_alt_manager_destroy(struct pb_manager *mgr)
{
FREE(mgr);
}
 
 
struct pb_manager *
pb_alt_manager_create(struct pb_manager *provider1,
struct pb_manager *provider2)
{
struct pb_alt_manager *mgr;
 
if(!provider1 || !provider2)
return NULL;
mgr = CALLOC_STRUCT(pb_alt_manager);
if (!mgr)
return NULL;
 
mgr->base.destroy = pb_alt_manager_destroy;
mgr->base.create_buffer = pb_alt_manager_create_buffer;
mgr->base.flush = pb_alt_manager_flush;
mgr->provider1 = provider1;
mgr->provider2 = provider2;
return &mgr->base;
}
/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
0,0 → 1,411
/**************************************************************************
*
* Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/**
* \file
* Buffer cache.
*
* \author Jose Fonseca <jrfonseca-at-tungstengraphics-dot-com>
* \author Thomas Hellström <thomas-at-tungstengraphics-dot-com>
*/
 
 
#include "pipe/p_compiler.h"
#include "util/u_debug.h"
#include "os/os_thread.h"
#include "util/u_memory.h"
#include "util/u_double_list.h"
#include "util/u_time.h"
 
#include "pb_buffer.h"
#include "pb_bufmgr.h"
 
 
/**
* Convenience macro (type safe).
*/
#define SUPER(__derived) (&(__derived)->base)
 
 
struct pb_cache_manager;
 
 
/**
* Wrapper around a pipe buffer which adds delayed destruction.
*/
struct pb_cache_buffer
{
struct pb_buffer base;
struct pb_buffer *buffer;
struct pb_cache_manager *mgr;
 
/** Caching time interval */
int64_t start, end;
 
struct list_head head;
};
 
 
struct pb_cache_manager
{
struct pb_manager base;
 
struct pb_manager *provider;
unsigned usecs;
pipe_mutex mutex;
struct list_head delayed;
pb_size numDelayed;
};
 
 
static INLINE struct pb_cache_buffer *
pb_cache_buffer(struct pb_buffer *buf)
{
assert(buf);
return (struct pb_cache_buffer *)buf;
}
 
 
static INLINE struct pb_cache_manager *
pb_cache_manager(struct pb_manager *mgr)
{
assert(mgr);
return (struct pb_cache_manager *)mgr;
}
 
 
/**
* Actually destroy the buffer.
*/
static INLINE void
_pb_cache_buffer_destroy(struct pb_cache_buffer *buf)
{
struct pb_cache_manager *mgr = buf->mgr;
 
LIST_DEL(&buf->head);
assert(mgr->numDelayed);
--mgr->numDelayed;
assert(!pipe_is_referenced(&buf->base.reference));
pb_reference(&buf->buffer, NULL);
FREE(buf);
}
 
 
/**
* Free as many cache buffers from the list head as possible.
*/
static void
_pb_cache_buffer_list_check_free(struct pb_cache_manager *mgr)
{
struct list_head *curr, *next;
struct pb_cache_buffer *buf;
int64_t now;
now = os_time_get();
curr = mgr->delayed.next;
next = curr->next;
while(curr != &mgr->delayed) {
buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
 
if(!os_time_timeout(buf->start, buf->end, now))
break;
_pb_cache_buffer_destroy(buf);
 
curr = next;
next = curr->next;
}
}
 
 
static void
pb_cache_buffer_destroy(struct pb_buffer *_buf)
{
struct pb_cache_buffer *buf = pb_cache_buffer(_buf);
struct pb_cache_manager *mgr = buf->mgr;
 
pipe_mutex_lock(mgr->mutex);
assert(!pipe_is_referenced(&buf->base.reference));
_pb_cache_buffer_list_check_free(mgr);
buf->start = os_time_get();
buf->end = buf->start + mgr->usecs;
LIST_ADDTAIL(&buf->head, &mgr->delayed);
++mgr->numDelayed;
pipe_mutex_unlock(mgr->mutex);
}
 
 
static void *
pb_cache_buffer_map(struct pb_buffer *_buf,
unsigned flags, void *flush_ctx)
{
struct pb_cache_buffer *buf = pb_cache_buffer(_buf);
return pb_map(buf->buffer, flags, flush_ctx);
}
 
 
static void
pb_cache_buffer_unmap(struct pb_buffer *_buf)
{
struct pb_cache_buffer *buf = pb_cache_buffer(_buf);
pb_unmap(buf->buffer);
}
 
 
static enum pipe_error
pb_cache_buffer_validate(struct pb_buffer *_buf,
struct pb_validate *vl,
unsigned flags)
{
struct pb_cache_buffer *buf = pb_cache_buffer(_buf);
return pb_validate(buf->buffer, vl, flags);
}
 
 
static void
pb_cache_buffer_fence(struct pb_buffer *_buf,
struct pipe_fence_handle *fence)
{
struct pb_cache_buffer *buf = pb_cache_buffer(_buf);
pb_fence(buf->buffer, fence);
}
 
 
static void
pb_cache_buffer_get_base_buffer(struct pb_buffer *_buf,
struct pb_buffer **base_buf,
pb_size *offset)
{
struct pb_cache_buffer *buf = pb_cache_buffer(_buf);
pb_get_base_buffer(buf->buffer, base_buf, offset);
}
 
 
const struct pb_vtbl
pb_cache_buffer_vtbl = {
pb_cache_buffer_destroy,
pb_cache_buffer_map,
pb_cache_buffer_unmap,
pb_cache_buffer_validate,
pb_cache_buffer_fence,
pb_cache_buffer_get_base_buffer
};
 
 
static INLINE int
pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
pb_size size,
const struct pb_desc *desc)
{
if(buf->base.size < size)
return 0;
 
/* be lenient with size */
if(buf->base.size >= 2*size)
return 0;
if(!pb_check_alignment(desc->alignment, buf->base.alignment))
return 0;
if(!pb_check_usage(desc->usage, buf->base.usage))
return 0;
 
if (buf->mgr->provider->is_buffer_busy) {
if (buf->mgr->provider->is_buffer_busy(buf->mgr->provider, buf->buffer))
return -1;
} else {
void *ptr = pb_map(buf->buffer, PB_USAGE_DONTBLOCK, NULL);
 
if (!ptr)
return -1;
 
pb_unmap(buf->buffer);
}
 
return 1;
}
 
 
static struct pb_buffer *
pb_cache_manager_create_buffer(struct pb_manager *_mgr,
pb_size size,
const struct pb_desc *desc)
{
struct pb_cache_manager *mgr = pb_cache_manager(_mgr);
struct pb_cache_buffer *buf;
struct pb_cache_buffer *curr_buf;
struct list_head *curr, *next;
int64_t now;
int ret = 0;
 
pipe_mutex_lock(mgr->mutex);
 
buf = NULL;
curr = mgr->delayed.next;
next = curr->next;
/* search in the expired buffers, freeing them in the process */
now = os_time_get();
while(curr != &mgr->delayed) {
curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
if(!buf && (ret = pb_cache_is_buffer_compat(curr_buf, size, desc) > 0))
buf = curr_buf;
else if(os_time_timeout(curr_buf->start, curr_buf->end, now))
_pb_cache_buffer_destroy(curr_buf);
else
/* This buffer (and all hereafter) are still hot in cache */
break;
if (ret == -1)
break;
curr = next;
next = curr->next;
}
 
/* keep searching in the hot buffers */
if(!buf && ret != -1) {
while(curr != &mgr->delayed) {
curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
ret = pb_cache_is_buffer_compat(curr_buf, size, desc);
if (ret > 0) {
buf = curr_buf;
break;
}
if (ret == -1)
break;
/* no need to check the timeout here */
curr = next;
next = curr->next;
}
}
if(buf) {
LIST_DEL(&buf->head);
--mgr->numDelayed;
pipe_mutex_unlock(mgr->mutex);
/* Increase refcount */
pipe_reference_init(&buf->base.reference, 1);
return &buf->base;
}
pipe_mutex_unlock(mgr->mutex);
 
buf = CALLOC_STRUCT(pb_cache_buffer);
if(!buf)
return NULL;
buf->buffer = mgr->provider->create_buffer(mgr->provider, size, desc);
 
/* Empty the cache and try again. */
if (!buf->buffer) {
mgr->base.flush(&mgr->base);
buf->buffer = mgr->provider->create_buffer(mgr->provider, size, desc);
}
 
if(!buf->buffer) {
FREE(buf);
return NULL;
}
assert(pipe_is_referenced(&buf->buffer->reference));
assert(pb_check_alignment(desc->alignment, buf->buffer->alignment));
assert(pb_check_usage(desc->usage, buf->buffer->usage));
assert(buf->buffer->size >= size);
pipe_reference_init(&buf->base.reference, 1);
buf->base.alignment = buf->buffer->alignment;
buf->base.usage = buf->buffer->usage;
buf->base.size = buf->buffer->size;
buf->base.vtbl = &pb_cache_buffer_vtbl;
buf->mgr = mgr;
return &buf->base;
}
 
 
static void
pb_cache_manager_flush(struct pb_manager *_mgr)
{
struct pb_cache_manager *mgr = pb_cache_manager(_mgr);
struct list_head *curr, *next;
struct pb_cache_buffer *buf;
 
pipe_mutex_lock(mgr->mutex);
curr = mgr->delayed.next;
next = curr->next;
while(curr != &mgr->delayed) {
buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
_pb_cache_buffer_destroy(buf);
curr = next;
next = curr->next;
}
pipe_mutex_unlock(mgr->mutex);
assert(mgr->provider->flush);
if(mgr->provider->flush)
mgr->provider->flush(mgr->provider);
}
 
 
static void
pb_cache_manager_destroy(struct pb_manager *mgr)
{
pb_cache_manager_flush(mgr);
FREE(mgr);
}
 
 
struct pb_manager *
pb_cache_manager_create(struct pb_manager *provider,
unsigned usecs)
{
struct pb_cache_manager *mgr;
 
if(!provider)
return NULL;
mgr = CALLOC_STRUCT(pb_cache_manager);
if (!mgr)
return NULL;
 
mgr->base.destroy = pb_cache_manager_destroy;
mgr->base.create_buffer = pb_cache_manager_create_buffer;
mgr->base.flush = pb_cache_manager_flush;
mgr->provider = provider;
mgr->usecs = usecs;
LIST_INITHEAD(&mgr->delayed);
mgr->numDelayed = 0;
pipe_mutex_init(mgr->mutex);
return &mgr->base;
}
/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
0,0 → 1,497
/**************************************************************************
*
* Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/**
* \file
* Debug buffer manager to detect buffer under- and overflows.
*
* \author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
 
 
#include "pipe/p_compiler.h"
#include "util/u_debug.h"
#include "os/os_thread.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_double_list.h"
#include "util/u_time.h"
#include "util/u_debug_stack.h"
 
#include "pb_buffer.h"
#include "pb_bufmgr.h"
 
 
#ifdef DEBUG
 
 
#define PB_DEBUG_CREATE_BACKTRACE 8
#define PB_DEBUG_MAP_BACKTRACE 8
 
 
/**
* Convenience macro (type safe).
*/
#define SUPER(__derived) (&(__derived)->base)
 
 
struct pb_debug_manager;
 
 
/**
* Wrapper around a pipe buffer which adds delayed destruction.
*/
struct pb_debug_buffer
{
struct pb_buffer base;
struct pb_buffer *buffer;
struct pb_debug_manager *mgr;
pb_size underflow_size;
pb_size overflow_size;
 
struct debug_stack_frame create_backtrace[PB_DEBUG_CREATE_BACKTRACE];
 
pipe_mutex mutex;
unsigned map_count;
struct debug_stack_frame map_backtrace[PB_DEBUG_MAP_BACKTRACE];
struct list_head head;
};
 
 
struct pb_debug_manager
{
struct pb_manager base;
 
struct pb_manager *provider;
 
pb_size underflow_size;
pb_size overflow_size;
pipe_mutex mutex;
struct list_head list;
};
 
 
static INLINE struct pb_debug_buffer *
pb_debug_buffer(struct pb_buffer *buf)
{
assert(buf);
return (struct pb_debug_buffer *)buf;
}
 
 
static INLINE struct pb_debug_manager *
pb_debug_manager(struct pb_manager *mgr)
{
assert(mgr);
return (struct pb_debug_manager *)mgr;
}
 
 
static const uint8_t random_pattern[32] = {
0xaf, 0xcf, 0xa5, 0xa2, 0xc2, 0x63, 0x15, 0x1a,
0x7e, 0xe2, 0x7e, 0x84, 0x15, 0x49, 0xa2, 0x1e,
0x49, 0x63, 0xf5, 0x52, 0x74, 0x66, 0x9e, 0xc4,
0x6d, 0xcf, 0x2c, 0x4a, 0x74, 0xe6, 0xfd, 0x94
};
 
 
static INLINE void
fill_random_pattern(uint8_t *dst, pb_size size)
{
pb_size i = 0;
while(size--) {
*dst++ = random_pattern[i++];
i &= sizeof(random_pattern) - 1;
}
}
 
 
static INLINE boolean
check_random_pattern(const uint8_t *dst, pb_size size,
pb_size *min_ofs, pb_size *max_ofs)
{
boolean result = TRUE;
pb_size i;
*min_ofs = size;
*max_ofs = 0;
for(i = 0; i < size; ++i) {
if(*dst++ != random_pattern[i % sizeof(random_pattern)]) {
*min_ofs = MIN2(*min_ofs, i);
*max_ofs = MAX2(*max_ofs, i);
result = FALSE;
}
}
return result;
}
 
 
static void
pb_debug_buffer_fill(struct pb_debug_buffer *buf)
{
uint8_t *map;
map = pb_map(buf->buffer, PB_USAGE_CPU_WRITE, NULL);
assert(map);
if(map) {
fill_random_pattern(map, buf->underflow_size);
fill_random_pattern(map + buf->underflow_size + buf->base.size,
buf->overflow_size);
pb_unmap(buf->buffer);
}
}
 
 
/**
* Check for under/over flows.
*
* Should be called with the buffer unmaped.
*/
static void
pb_debug_buffer_check(struct pb_debug_buffer *buf)
{
uint8_t *map;
map = pb_map(buf->buffer,
PB_USAGE_CPU_READ |
PB_USAGE_UNSYNCHRONIZED, NULL);
assert(map);
if(map) {
boolean underflow, overflow;
pb_size min_ofs, max_ofs;
underflow = !check_random_pattern(map, buf->underflow_size,
&min_ofs, &max_ofs);
if(underflow) {
debug_printf("buffer underflow (offset -%u%s to -%u bytes) detected\n",
buf->underflow_size - min_ofs,
min_ofs == 0 ? "+" : "",
buf->underflow_size - max_ofs);
}
overflow = !check_random_pattern(map + buf->underflow_size + buf->base.size,
buf->overflow_size,
&min_ofs, &max_ofs);
if(overflow) {
debug_printf("buffer overflow (size %u plus offset %u to %u%s bytes) detected\n",
buf->base.size,
min_ofs,
max_ofs,
max_ofs == buf->overflow_size - 1 ? "+" : "");
}
if(underflow || overflow)
debug_backtrace_dump(buf->create_backtrace, PB_DEBUG_CREATE_BACKTRACE);
 
debug_assert(!underflow);
debug_assert(!overflow);
 
/* re-fill if not aborted */
if(underflow)
fill_random_pattern(map, buf->underflow_size);
if(overflow)
fill_random_pattern(map + buf->underflow_size + buf->base.size,
buf->overflow_size);
 
pb_unmap(buf->buffer);
}
}
 
 
static void
pb_debug_buffer_destroy(struct pb_buffer *_buf)
{
struct pb_debug_buffer *buf = pb_debug_buffer(_buf);
struct pb_debug_manager *mgr = buf->mgr;
assert(!pipe_is_referenced(&buf->base.reference));
pb_debug_buffer_check(buf);
 
pipe_mutex_lock(mgr->mutex);
LIST_DEL(&buf->head);
pipe_mutex_unlock(mgr->mutex);
 
pipe_mutex_destroy(buf->mutex);
pb_reference(&buf->buffer, NULL);
FREE(buf);
}
 
 
static void *
pb_debug_buffer_map(struct pb_buffer *_buf,
unsigned flags, void *flush_ctx)
{
struct pb_debug_buffer *buf = pb_debug_buffer(_buf);
void *map;
pb_debug_buffer_check(buf);
 
map = pb_map(buf->buffer, flags, flush_ctx);
if(!map)
return NULL;
if(map) {
pipe_mutex_lock(buf->mutex);
++buf->map_count;
debug_backtrace_capture(buf->map_backtrace, 1, PB_DEBUG_MAP_BACKTRACE);
pipe_mutex_unlock(buf->mutex);
}
return (uint8_t *)map + buf->underflow_size;
}
 
 
static void
pb_debug_buffer_unmap(struct pb_buffer *_buf)
{
struct pb_debug_buffer *buf = pb_debug_buffer(_buf);
pipe_mutex_lock(buf->mutex);
assert(buf->map_count);
if(buf->map_count)
--buf->map_count;
pipe_mutex_unlock(buf->mutex);
pb_unmap(buf->buffer);
pb_debug_buffer_check(buf);
}
 
 
static void
pb_debug_buffer_get_base_buffer(struct pb_buffer *_buf,
struct pb_buffer **base_buf,
pb_size *offset)
{
struct pb_debug_buffer *buf = pb_debug_buffer(_buf);
pb_get_base_buffer(buf->buffer, base_buf, offset);
*offset += buf->underflow_size;
}
 
 
static enum pipe_error
pb_debug_buffer_validate(struct pb_buffer *_buf,
struct pb_validate *vl,
unsigned flags)
{
struct pb_debug_buffer *buf = pb_debug_buffer(_buf);
pipe_mutex_lock(buf->mutex);
if(buf->map_count) {
debug_printf("%s: attempting to validate a mapped buffer\n", __FUNCTION__);
debug_printf("last map backtrace is\n");
debug_backtrace_dump(buf->map_backtrace, PB_DEBUG_MAP_BACKTRACE);
}
pipe_mutex_unlock(buf->mutex);
 
pb_debug_buffer_check(buf);
 
return pb_validate(buf->buffer, vl, flags);
}
 
 
static void
pb_debug_buffer_fence(struct pb_buffer *_buf,
struct pipe_fence_handle *fence)
{
struct pb_debug_buffer *buf = pb_debug_buffer(_buf);
pb_fence(buf->buffer, fence);
}
 
 
const struct pb_vtbl
pb_debug_buffer_vtbl = {
pb_debug_buffer_destroy,
pb_debug_buffer_map,
pb_debug_buffer_unmap,
pb_debug_buffer_validate,
pb_debug_buffer_fence,
pb_debug_buffer_get_base_buffer
};
 
 
static void
pb_debug_manager_dump_locked(struct pb_debug_manager *mgr)
{
struct list_head *curr, *next;
struct pb_debug_buffer *buf;
 
curr = mgr->list.next;
next = curr->next;
while(curr != &mgr->list) {
buf = LIST_ENTRY(struct pb_debug_buffer, curr, head);
 
debug_printf("buffer = %p\n", (void *) buf);
debug_printf(" .size = 0x%x\n", buf->base.size);
debug_backtrace_dump(buf->create_backtrace, PB_DEBUG_CREATE_BACKTRACE);
curr = next;
next = curr->next;
}
 
}
 
 
static struct pb_buffer *
pb_debug_manager_create_buffer(struct pb_manager *_mgr,
pb_size size,
const struct pb_desc *desc)
{
struct pb_debug_manager *mgr = pb_debug_manager(_mgr);
struct pb_debug_buffer *buf;
struct pb_desc real_desc;
pb_size real_size;
assert(size);
assert(desc->alignment);
 
buf = CALLOC_STRUCT(pb_debug_buffer);
if(!buf)
return NULL;
real_size = mgr->underflow_size + size + mgr->overflow_size;
real_desc = *desc;
real_desc.usage |= PB_USAGE_CPU_WRITE;
real_desc.usage |= PB_USAGE_CPU_READ;
 
buf->buffer = mgr->provider->create_buffer(mgr->provider,
real_size,
&real_desc);
if(!buf->buffer) {
FREE(buf);
#if 0
pipe_mutex_lock(mgr->mutex);
debug_printf("%s: failed to create buffer\n", __FUNCTION__);
if(!LIST_IS_EMPTY(&mgr->list))
pb_debug_manager_dump_locked(mgr);
pipe_mutex_unlock(mgr->mutex);
#endif
return NULL;
}
assert(pipe_is_referenced(&buf->buffer->reference));
assert(pb_check_alignment(real_desc.alignment, buf->buffer->alignment));
assert(pb_check_usage(real_desc.usage, buf->buffer->usage));
assert(buf->buffer->size >= real_size);
pipe_reference_init(&buf->base.reference, 1);
buf->base.alignment = desc->alignment;
buf->base.usage = desc->usage;
buf->base.size = size;
buf->base.vtbl = &pb_debug_buffer_vtbl;
buf->mgr = mgr;
 
buf->underflow_size = mgr->underflow_size;
buf->overflow_size = buf->buffer->size - buf->underflow_size - size;
debug_backtrace_capture(buf->create_backtrace, 1, PB_DEBUG_CREATE_BACKTRACE);
 
pb_debug_buffer_fill(buf);
pipe_mutex_init(buf->mutex);
pipe_mutex_lock(mgr->mutex);
LIST_ADDTAIL(&buf->head, &mgr->list);
pipe_mutex_unlock(mgr->mutex);
 
return &buf->base;
}
 
 
static void
pb_debug_manager_flush(struct pb_manager *_mgr)
{
struct pb_debug_manager *mgr = pb_debug_manager(_mgr);
assert(mgr->provider->flush);
if(mgr->provider->flush)
mgr->provider->flush(mgr->provider);
}
 
 
static void
pb_debug_manager_destroy(struct pb_manager *_mgr)
{
struct pb_debug_manager *mgr = pb_debug_manager(_mgr);
pipe_mutex_lock(mgr->mutex);
if(!LIST_IS_EMPTY(&mgr->list)) {
debug_printf("%s: unfreed buffers\n", __FUNCTION__);
pb_debug_manager_dump_locked(mgr);
}
pipe_mutex_unlock(mgr->mutex);
pipe_mutex_destroy(mgr->mutex);
mgr->provider->destroy(mgr->provider);
FREE(mgr);
}
 
 
struct pb_manager *
pb_debug_manager_create(struct pb_manager *provider,
pb_size underflow_size, pb_size overflow_size)
{
struct pb_debug_manager *mgr;
 
if(!provider)
return NULL;
mgr = CALLOC_STRUCT(pb_debug_manager);
if (!mgr)
return NULL;
 
mgr->base.destroy = pb_debug_manager_destroy;
mgr->base.create_buffer = pb_debug_manager_create_buffer;
mgr->base.flush = pb_debug_manager_flush;
mgr->provider = provider;
mgr->underflow_size = underflow_size;
mgr->overflow_size = overflow_size;
pipe_mutex_init(mgr->mutex);
LIST_INITHEAD(&mgr->list);
 
return &mgr->base;
}
 
 
#else /* !DEBUG */
 
 
struct pb_manager *
pb_debug_manager_create(struct pb_manager *provider,
pb_size underflow_size, pb_size overflow_size)
{
return provider;
}
 
 
#endif /* !DEBUG */
/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
0,0 → 1,320
/**************************************************************************
*
* Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/**
* \file
* Buffer manager using the old texture memory manager.
*
* \author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
 
 
#include "pipe/p_defines.h"
#include "util/u_debug.h"
#include "os/os_thread.h"
#include "util/u_memory.h"
#include "util/u_double_list.h"
#include "util/u_mm.h"
#include "pb_buffer.h"
#include "pb_bufmgr.h"
 
 
/**
* Convenience macro (type safe).
*/
#define SUPER(__derived) (&(__derived)->base)
 
 
struct mm_pb_manager
{
struct pb_manager base;
pipe_mutex mutex;
pb_size size;
struct mem_block *heap;
pb_size align2;
struct pb_buffer *buffer;
void *map;
};
 
 
static INLINE struct mm_pb_manager *
mm_pb_manager(struct pb_manager *mgr)
{
assert(mgr);
return (struct mm_pb_manager *)mgr;
}
 
 
struct mm_buffer
{
struct pb_buffer base;
struct mm_pb_manager *mgr;
struct mem_block *block;
};
 
 
static INLINE struct mm_buffer *
mm_buffer(struct pb_buffer *buf)
{
assert(buf);
return (struct mm_buffer *)buf;
}
 
 
static void
mm_buffer_destroy(struct pb_buffer *buf)
{
struct mm_buffer *mm_buf = mm_buffer(buf);
struct mm_pb_manager *mm = mm_buf->mgr;
assert(!pipe_is_referenced(&mm_buf->base.reference));
pipe_mutex_lock(mm->mutex);
u_mmFreeMem(mm_buf->block);
FREE(mm_buf);
pipe_mutex_unlock(mm->mutex);
}
 
 
static void *
mm_buffer_map(struct pb_buffer *buf,
unsigned flags,
void *flush_ctx)
{
struct mm_buffer *mm_buf = mm_buffer(buf);
struct mm_pb_manager *mm = mm_buf->mgr;
 
/* XXX: it will be necessary to remap here to propagate flush_ctx */
 
return (unsigned char *) mm->map + mm_buf->block->ofs;
}
 
 
static void
mm_buffer_unmap(struct pb_buffer *buf)
{
/* No-op */
}
 
 
static enum pipe_error
mm_buffer_validate(struct pb_buffer *buf,
struct pb_validate *vl,
unsigned flags)
{
struct mm_buffer *mm_buf = mm_buffer(buf);
struct mm_pb_manager *mm = mm_buf->mgr;
return pb_validate(mm->buffer, vl, flags);
}
 
 
static void
mm_buffer_fence(struct pb_buffer *buf,
struct pipe_fence_handle *fence)
{
struct mm_buffer *mm_buf = mm_buffer(buf);
struct mm_pb_manager *mm = mm_buf->mgr;
pb_fence(mm->buffer, fence);
}
 
 
static void
mm_buffer_get_base_buffer(struct pb_buffer *buf,
struct pb_buffer **base_buf,
pb_size *offset)
{
struct mm_buffer *mm_buf = mm_buffer(buf);
struct mm_pb_manager *mm = mm_buf->mgr;
pb_get_base_buffer(mm->buffer, base_buf, offset);
*offset += mm_buf->block->ofs;
}
 
 
static const struct pb_vtbl
mm_buffer_vtbl = {
mm_buffer_destroy,
mm_buffer_map,
mm_buffer_unmap,
mm_buffer_validate,
mm_buffer_fence,
mm_buffer_get_base_buffer
};
 
 
static struct pb_buffer *
mm_bufmgr_create_buffer(struct pb_manager *mgr,
pb_size size,
const struct pb_desc *desc)
{
struct mm_pb_manager *mm = mm_pb_manager(mgr);
struct mm_buffer *mm_buf;
 
/* We don't handle alignments larger then the one initially setup */
assert(pb_check_alignment(desc->alignment, (pb_size)1 << mm->align2));
if(!pb_check_alignment(desc->alignment, (pb_size)1 << mm->align2))
return NULL;
pipe_mutex_lock(mm->mutex);
 
mm_buf = CALLOC_STRUCT(mm_buffer);
if (!mm_buf) {
pipe_mutex_unlock(mm->mutex);
return NULL;
}
 
pipe_reference_init(&mm_buf->base.reference, 1);
mm_buf->base.alignment = desc->alignment;
mm_buf->base.usage = desc->usage;
mm_buf->base.size = size;
mm_buf->base.vtbl = &mm_buffer_vtbl;
mm_buf->mgr = mm;
mm_buf->block = u_mmAllocMem(mm->heap, (int)size, (int)mm->align2, 0);
if(!mm_buf->block) {
#if 0
debug_printf("warning: heap full\n");
mmDumpMemInfo(mm->heap);
#endif
FREE(mm_buf);
pipe_mutex_unlock(mm->mutex);
return NULL;
}
/* Some sanity checks */
assert(0 <= (pb_size)mm_buf->block->ofs && (pb_size)mm_buf->block->ofs < mm->size);
assert(size <= (pb_size)mm_buf->block->size && (pb_size)mm_buf->block->ofs + (pb_size)mm_buf->block->size <= mm->size);
pipe_mutex_unlock(mm->mutex);
return SUPER(mm_buf);
}
 
 
static void
mm_bufmgr_flush(struct pb_manager *mgr)
{
/* No-op */
}
 
 
static void
mm_bufmgr_destroy(struct pb_manager *mgr)
{
struct mm_pb_manager *mm = mm_pb_manager(mgr);
pipe_mutex_lock(mm->mutex);
 
u_mmDestroy(mm->heap);
pb_unmap(mm->buffer);
pb_reference(&mm->buffer, NULL);
pipe_mutex_unlock(mm->mutex);
FREE(mgr);
}
 
 
struct pb_manager *
mm_bufmgr_create_from_buffer(struct pb_buffer *buffer,
pb_size size, pb_size align2)
{
struct mm_pb_manager *mm;
 
if(!buffer)
return NULL;
mm = CALLOC_STRUCT(mm_pb_manager);
if (!mm)
return NULL;
 
mm->base.destroy = mm_bufmgr_destroy;
mm->base.create_buffer = mm_bufmgr_create_buffer;
mm->base.flush = mm_bufmgr_flush;
 
mm->size = size;
mm->align2 = align2; /* 64-byte alignment */
 
pipe_mutex_init(mm->mutex);
 
mm->buffer = buffer;
 
mm->map = pb_map(mm->buffer,
PB_USAGE_CPU_READ |
PB_USAGE_CPU_WRITE, NULL);
if(!mm->map)
goto failure;
 
mm->heap = u_mmInit(0, (int)size);
if (!mm->heap)
goto failure;
 
return SUPER(mm);
failure:
if(mm->heap)
u_mmDestroy(mm->heap);
if(mm->map)
pb_unmap(mm->buffer);
FREE(mm);
return NULL;
}
 
 
struct pb_manager *
mm_bufmgr_create(struct pb_manager *provider,
pb_size size, pb_size align2)
{
struct pb_buffer *buffer;
struct pb_manager *mgr;
struct pb_desc desc;
 
if(!provider)
return NULL;
memset(&desc, 0, sizeof(desc));
desc.alignment = 1 << align2;
buffer = provider->create_buffer(provider, size, &desc);
if (!buffer)
return NULL;
mgr = mm_bufmgr_create_from_buffer(buffer, size, align2);
if (!mgr) {
pb_reference(&buffer, NULL);
return NULL;
}
 
return mgr;
}
/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
0,0 → 1,305
/**************************************************************************
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/**
* @file
* A variation of malloc buffers which get transferred to real graphics memory
* when there is an attempt to validate them.
*
* @author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
 
 
#include "util/u_debug.h"
#include "util/u_memory.h"
#include "pb_buffer.h"
#include "pb_bufmgr.h"
 
 
struct pb_ondemand_manager;
 
 
struct pb_ondemand_buffer
{
struct pb_buffer base;
struct pb_ondemand_manager *mgr;
/** Regular malloc'ed memory */
void *data;
unsigned mapcount;
/** Real buffer */
struct pb_buffer *buffer;
pb_size size;
struct pb_desc desc;
};
 
 
struct pb_ondemand_manager
{
struct pb_manager base;
struct pb_manager *provider;
};
 
 
extern const struct pb_vtbl pb_ondemand_buffer_vtbl;
 
static INLINE struct pb_ondemand_buffer *
pb_ondemand_buffer(struct pb_buffer *buf)
{
assert(buf);
if (!buf)
return NULL;
assert(buf->vtbl == &pb_ondemand_buffer_vtbl);
return (struct pb_ondemand_buffer *)buf;
}
 
static INLINE struct pb_ondemand_manager *
pb_ondemand_manager(struct pb_manager *mgr)
{
assert(mgr);
return (struct pb_ondemand_manager *)mgr;
}
 
 
static void
pb_ondemand_buffer_destroy(struct pb_buffer *_buf)
{
struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf);
pb_reference(&buf->buffer, NULL);
align_free(buf->data);
FREE(buf);
}
 
 
static void *
pb_ondemand_buffer_map(struct pb_buffer *_buf,
unsigned flags, void *flush_ctx)
{
struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf);
 
if(buf->buffer) {
assert(!buf->data);
return pb_map(buf->buffer, flags, flush_ctx);
}
else {
assert(buf->data);
++buf->mapcount;
return buf->data;
}
}
 
 
static void
pb_ondemand_buffer_unmap(struct pb_buffer *_buf)
{
struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf);
 
if(buf->buffer) {
assert(!buf->data);
pb_unmap(buf->buffer);
}
else {
assert(buf->data);
assert(buf->mapcount);
if(buf->mapcount)
--buf->mapcount;
}
}
 
 
static enum pipe_error
pb_ondemand_buffer_instantiate(struct pb_ondemand_buffer *buf)
{
if(!buf->buffer) {
struct pb_manager *provider = buf->mgr->provider;
uint8_t *map;
assert(!buf->mapcount);
buf->buffer = provider->create_buffer(provider, buf->size, &buf->desc);
if(!buf->buffer)
return PIPE_ERROR_OUT_OF_MEMORY;
map = pb_map(buf->buffer, PB_USAGE_CPU_READ, NULL);
if(!map) {
pb_reference(&buf->buffer, NULL);
return PIPE_ERROR;
}
memcpy(map, buf->data, buf->size);
pb_unmap(buf->buffer);
if(!buf->mapcount) {
FREE(buf->data);
buf->data = NULL;
}
}
return PIPE_OK;
}
 
static enum pipe_error
pb_ondemand_buffer_validate(struct pb_buffer *_buf,
struct pb_validate *vl,
unsigned flags)
{
struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf);
enum pipe_error ret;
 
assert(!buf->mapcount);
if(buf->mapcount)
return PIPE_ERROR;
 
ret = pb_ondemand_buffer_instantiate(buf);
if(ret != PIPE_OK)
return ret;
return pb_validate(buf->buffer, vl, flags);
}
 
 
static void
pb_ondemand_buffer_fence(struct pb_buffer *_buf,
struct pipe_fence_handle *fence)
{
struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf);
assert(buf->buffer);
if(!buf->buffer)
return;
pb_fence(buf->buffer, fence);
}
 
 
static void
pb_ondemand_buffer_get_base_buffer(struct pb_buffer *_buf,
struct pb_buffer **base_buf,
pb_size *offset)
{
struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf);
 
if(pb_ondemand_buffer_instantiate(buf) != PIPE_OK) {
assert(0);
*base_buf = &buf->base;
*offset = 0;
return;
}
 
pb_get_base_buffer(buf->buffer, base_buf, offset);
}
 
 
const struct pb_vtbl
pb_ondemand_buffer_vtbl = {
pb_ondemand_buffer_destroy,
pb_ondemand_buffer_map,
pb_ondemand_buffer_unmap,
pb_ondemand_buffer_validate,
pb_ondemand_buffer_fence,
pb_ondemand_buffer_get_base_buffer
};
 
 
static struct pb_buffer *
pb_ondemand_manager_create_buffer(struct pb_manager *_mgr,
pb_size size,
const struct pb_desc *desc)
{
struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr);
struct pb_ondemand_buffer *buf;
buf = CALLOC_STRUCT(pb_ondemand_buffer);
if(!buf)
return NULL;
 
pipe_reference_init(&buf->base.reference, 1);
buf->base.alignment = desc->alignment;
buf->base.usage = desc->usage;
buf->base.size = size;
buf->base.vtbl = &pb_ondemand_buffer_vtbl;
buf->mgr = mgr;
buf->data = align_malloc(size, desc->alignment < sizeof(void*) ? sizeof(void*) : desc->alignment);
if(!buf->data) {
FREE(buf);
return NULL;
}
buf->size = size;
buf->desc = *desc;
 
return &buf->base;
}
 
 
static void
pb_ondemand_manager_flush(struct pb_manager *_mgr)
{
struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr);
mgr->provider->flush(mgr->provider);
}
 
 
static void
pb_ondemand_manager_destroy(struct pb_manager *_mgr)
{
struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr);
 
FREE(mgr);
}
 
 
struct pb_manager *
pb_ondemand_manager_create(struct pb_manager *provider)
{
struct pb_ondemand_manager *mgr;
 
if(!provider)
return NULL;
mgr = CALLOC_STRUCT(pb_ondemand_manager);
if(!mgr)
return NULL;
mgr->base.destroy = pb_ondemand_manager_destroy;
mgr->base.create_buffer = pb_ondemand_manager_create_buffer;
mgr->base.flush = pb_ondemand_manager_flush;
mgr->provider = provider;
 
return &mgr->base;
}
/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
0,0 → 1,321
/**************************************************************************
*
* Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
*
**************************************************************************/
 
/**
* \file
* Batch buffer pool management.
*
* \author Jose Fonseca <jrfonseca-at-tungstengraphics-dot-com>
* \author Thomas Hellström <thomas-at-tungstengraphics-dot-com>
*/
 
 
#include "pipe/p_compiler.h"
#include "util/u_debug.h"
#include "os/os_thread.h"
#include "pipe/p_defines.h"
#include "util/u_memory.h"
#include "util/u_double_list.h"
 
#include "pb_buffer.h"
#include "pb_bufmgr.h"
 
 
/**
* Convenience macro (type safe).
*/
#define SUPER(__derived) (&(__derived)->base)
 
 
struct pool_pb_manager
{
struct pb_manager base;
pipe_mutex mutex;
pb_size bufSize;
pb_size bufAlign;
pb_size numFree;
pb_size numTot;
struct list_head free;
struct pb_buffer *buffer;
void *map;
struct pool_buffer *bufs;
};
 
 
static INLINE struct pool_pb_manager *
pool_pb_manager(struct pb_manager *mgr)
{
assert(mgr);
return (struct pool_pb_manager *)mgr;
}
 
 
struct pool_buffer
{
struct pb_buffer base;
struct pool_pb_manager *mgr;
struct list_head head;
pb_size start;
};
 
 
static INLINE struct pool_buffer *
pool_buffer(struct pb_buffer *buf)
{
assert(buf);
return (struct pool_buffer *)buf;
}
 
 
 
static void
pool_buffer_destroy(struct pb_buffer *buf)
{
struct pool_buffer *pool_buf = pool_buffer(buf);
struct pool_pb_manager *pool = pool_buf->mgr;
assert(!pipe_is_referenced(&pool_buf->base.reference));
 
pipe_mutex_lock(pool->mutex);
LIST_ADD(&pool_buf->head, &pool->free);
pool->numFree++;
pipe_mutex_unlock(pool->mutex);
}
 
 
static void *
pool_buffer_map(struct pb_buffer *buf, unsigned flags, void *flush_ctx)
{
struct pool_buffer *pool_buf = pool_buffer(buf);
struct pool_pb_manager *pool = pool_buf->mgr;
void *map;
 
/* XXX: it will be necessary to remap here to propagate flush_ctx */
 
pipe_mutex_lock(pool->mutex);
map = (unsigned char *) pool->map + pool_buf->start;
pipe_mutex_unlock(pool->mutex);
return map;
}
 
 
static void
pool_buffer_unmap(struct pb_buffer *buf)
{
/* No-op */
}
 
 
static enum pipe_error
pool_buffer_validate(struct pb_buffer *buf,
struct pb_validate *vl,
unsigned flags)
{
struct pool_buffer *pool_buf = pool_buffer(buf);
struct pool_pb_manager *pool = pool_buf->mgr;
return pb_validate(pool->buffer, vl, flags);
}
 
 
static void
pool_buffer_fence(struct pb_buffer *buf,
struct pipe_fence_handle *fence)
{
struct pool_buffer *pool_buf = pool_buffer(buf);
struct pool_pb_manager *pool = pool_buf->mgr;
pb_fence(pool->buffer, fence);
}
 
 
static void
pool_buffer_get_base_buffer(struct pb_buffer *buf,
struct pb_buffer **base_buf,
pb_size *offset)
{
struct pool_buffer *pool_buf = pool_buffer(buf);
struct pool_pb_manager *pool = pool_buf->mgr;
pb_get_base_buffer(pool->buffer, base_buf, offset);
*offset += pool_buf->start;
}
 
 
static const struct pb_vtbl
pool_buffer_vtbl = {
pool_buffer_destroy,
pool_buffer_map,
pool_buffer_unmap,
pool_buffer_validate,
pool_buffer_fence,
pool_buffer_get_base_buffer
};
 
 
static struct pb_buffer *
pool_bufmgr_create_buffer(struct pb_manager *mgr,
pb_size size,
const struct pb_desc *desc)
{
struct pool_pb_manager *pool = pool_pb_manager(mgr);
struct pool_buffer *pool_buf;
struct list_head *item;
 
assert(size == pool->bufSize);
assert(pool->bufAlign % desc->alignment == 0);
pipe_mutex_lock(pool->mutex);
 
if (pool->numFree == 0) {
pipe_mutex_unlock(pool->mutex);
debug_printf("warning: out of fixed size buffer objects\n");
return NULL;
}
 
item = pool->free.next;
 
if (item == &pool->free) {
pipe_mutex_unlock(pool->mutex);
debug_printf("error: fixed size buffer pool corruption\n");
return NULL;
}
 
LIST_DEL(item);
--pool->numFree;
 
pipe_mutex_unlock(pool->mutex);
pool_buf = LIST_ENTRY(struct pool_buffer, item, head);
assert(!pipe_is_referenced(&pool_buf->base.reference));
pipe_reference_init(&pool_buf->base.reference, 1);
pool_buf->base.alignment = desc->alignment;
pool_buf->base.usage = desc->usage;
return SUPER(pool_buf);
}
 
 
static void
pool_bufmgr_flush(struct pb_manager *mgr)
{
/* No-op */
}
 
 
static void
pool_bufmgr_destroy(struct pb_manager *mgr)
{
struct pool_pb_manager *pool = pool_pb_manager(mgr);
pipe_mutex_lock(pool->mutex);
 
FREE(pool->bufs);
pb_unmap(pool->buffer);
pb_reference(&pool->buffer, NULL);
pipe_mutex_unlock(pool->mutex);
FREE(mgr);
}
 
 
struct pb_manager *
pool_bufmgr_create(struct pb_manager *provider,
pb_size numBufs,
pb_size bufSize,
const struct pb_desc *desc)
{
struct pool_pb_manager *pool;
struct pool_buffer *pool_buf;
pb_size i;
 
if(!provider)
return NULL;
pool = CALLOC_STRUCT(pool_pb_manager);
if (!pool)
return NULL;
 
pool->base.destroy = pool_bufmgr_destroy;
pool->base.create_buffer = pool_bufmgr_create_buffer;
pool->base.flush = pool_bufmgr_flush;
 
LIST_INITHEAD(&pool->free);
 
pool->numTot = numBufs;
pool->numFree = numBufs;
pool->bufSize = bufSize;
pool->bufAlign = desc->alignment;
pipe_mutex_init(pool->mutex);
 
pool->buffer = provider->create_buffer(provider, numBufs*bufSize, desc);
if (!pool->buffer)
goto failure;
 
pool->map = pb_map(pool->buffer,
PB_USAGE_CPU_READ |
PB_USAGE_CPU_WRITE, NULL);
if(!pool->map)
goto failure;
 
pool->bufs = (struct pool_buffer *)CALLOC(numBufs, sizeof(*pool->bufs));
if (!pool->bufs)
goto failure;
 
pool_buf = pool->bufs;
for (i = 0; i < numBufs; ++i) {
pipe_reference_init(&pool_buf->base.reference, 0);
pool_buf->base.alignment = 0;
pool_buf->base.usage = 0;
pool_buf->base.size = bufSize;
pool_buf->base.vtbl = &pool_buffer_vtbl;
pool_buf->mgr = pool;
pool_buf->start = i * bufSize;
LIST_ADDTAIL(&pool_buf->head, &pool->free);
pool_buf++;
}
 
return SUPER(pool);
failure:
FREE(pool->bufs);
if(pool->map)
pb_unmap(pool->buffer);
if(pool->buffer)
pb_reference(&pool->buffer, NULL);
FREE(pool);
return NULL;
}
/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
0,0 → 1,590
/**************************************************************************
*
* Copyright 2006-2008 Tungsten Graphics, Inc., Cedar Park, TX., USA
* All Rights Reserved.
*
* Permission is hereby granted, FREE of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
*
**************************************************************************/
 
/**
* @file
* S-lab pool implementation.
*
* @sa http://en.wikipedia.org/wiki/Slab_allocation
*
* @author Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
* @author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
 
#include "pipe/p_compiler.h"
#include "util/u_debug.h"
#include "os/os_thread.h"
#include "pipe/p_defines.h"
#include "util/u_memory.h"
#include "util/u_double_list.h"
#include "util/u_time.h"
 
#include "pb_buffer.h"
#include "pb_bufmgr.h"
 
 
struct pb_slab;
 
 
/**
* Buffer in a slab.
*
* Sub-allocation of a contiguous buffer.
*/
struct pb_slab_buffer
{
struct pb_buffer base;
struct pb_slab *slab;
struct list_head head;
unsigned mapCount;
/** Offset relative to the start of the slab buffer. */
pb_size start;
/** Use when validating, to signal that all mappings are finished */
/* TODO: Actually validation does not reach this stage yet */
pipe_condvar event;
};
 
 
/**
* Slab -- a contiguous piece of memory.
*/
struct pb_slab
{
struct list_head head;
struct list_head freeBuffers;
pb_size numBuffers;
pb_size numFree;
struct pb_slab_buffer *buffers;
struct pb_slab_manager *mgr;
/** Buffer from the provider */
struct pb_buffer *bo;
void *virtual;
};
 
 
/**
* It adds/removes slabs as needed in order to meet the allocation/destruction
* of individual buffers.
*/
struct pb_slab_manager
{
struct pb_manager base;
/** From where we get our buffers */
struct pb_manager *provider;
/** Size of the buffers we hand on downstream */
pb_size bufSize;
/** Size of the buffers we request upstream */
pb_size slabSize;
/**
* Alignment, usage to be used to allocate the slab buffers.
*
* We can only provide buffers which are consistent (in alignment, usage)
* with this description.
*/
struct pb_desc desc;
 
/**
* Partial slabs
*
* Full slabs are not stored in any list. Empty slabs are destroyed
* immediatly.
*/
struct list_head slabs;
pipe_mutex mutex;
};
 
 
/**
* Wrapper around several slabs, therefore capable of handling buffers of
* multiple sizes.
*
* This buffer manager just dispatches buffer allocations to the appropriate slab
* manager, according to the requested buffer size, or by passes the slab
* managers altogether for even greater sizes.
*
* The data of this structure remains constant after
* initialization and thus needs no mutex protection.
*/
struct pb_slab_range_manager
{
struct pb_manager base;
 
struct pb_manager *provider;
pb_size minBufSize;
pb_size maxBufSize;
/** @sa pb_slab_manager::desc */
struct pb_desc desc;
unsigned numBuckets;
pb_size *bucketSizes;
/** Array of pb_slab_manager, one for each bucket size */
struct pb_manager **buckets;
};
 
 
static INLINE struct pb_slab_buffer *
pb_slab_buffer(struct pb_buffer *buf)
{
assert(buf);
return (struct pb_slab_buffer *)buf;
}
 
 
static INLINE struct pb_slab_manager *
pb_slab_manager(struct pb_manager *mgr)
{
assert(mgr);
return (struct pb_slab_manager *)mgr;
}
 
 
static INLINE struct pb_slab_range_manager *
pb_slab_range_manager(struct pb_manager *mgr)
{
assert(mgr);
return (struct pb_slab_range_manager *)mgr;
}
 
 
/**
* Delete a buffer from the slab delayed list and put
* it on the slab FREE list.
*/
static void
pb_slab_buffer_destroy(struct pb_buffer *_buf)
{
struct pb_slab_buffer *buf = pb_slab_buffer(_buf);
struct pb_slab *slab = buf->slab;
struct pb_slab_manager *mgr = slab->mgr;
struct list_head *list = &buf->head;
 
pipe_mutex_lock(mgr->mutex);
assert(!pipe_is_referenced(&buf->base.reference));
buf->mapCount = 0;
 
LIST_DEL(list);
LIST_ADDTAIL(list, &slab->freeBuffers);
slab->numFree++;
 
if (slab->head.next == &slab->head)
LIST_ADDTAIL(&slab->head, &mgr->slabs);
 
/* If the slab becomes totally empty, free it */
if (slab->numFree == slab->numBuffers) {
list = &slab->head;
LIST_DELINIT(list);
pb_reference(&slab->bo, NULL);
FREE(slab->buffers);
FREE(slab);
}
 
pipe_mutex_unlock(mgr->mutex);
}
 
 
static void *
pb_slab_buffer_map(struct pb_buffer *_buf,
unsigned flags,
void *flush_ctx)
{
struct pb_slab_buffer *buf = pb_slab_buffer(_buf);
 
/* XXX: it will be necessary to remap here to propagate flush_ctx */
 
++buf->mapCount;
return (void *) ((uint8_t *) buf->slab->virtual + buf->start);
}
 
 
static void
pb_slab_buffer_unmap(struct pb_buffer *_buf)
{
struct pb_slab_buffer *buf = pb_slab_buffer(_buf);
 
--buf->mapCount;
if (buf->mapCount == 0)
pipe_condvar_broadcast(buf->event);
}
 
 
static enum pipe_error
pb_slab_buffer_validate(struct pb_buffer *_buf,
struct pb_validate *vl,
unsigned flags)
{
struct pb_slab_buffer *buf = pb_slab_buffer(_buf);
return pb_validate(buf->slab->bo, vl, flags);
}
 
 
static void
pb_slab_buffer_fence(struct pb_buffer *_buf,
struct pipe_fence_handle *fence)
{
struct pb_slab_buffer *buf = pb_slab_buffer(_buf);
pb_fence(buf->slab->bo, fence);
}
 
 
static void
pb_slab_buffer_get_base_buffer(struct pb_buffer *_buf,
struct pb_buffer **base_buf,
pb_size *offset)
{
struct pb_slab_buffer *buf = pb_slab_buffer(_buf);
pb_get_base_buffer(buf->slab->bo, base_buf, offset);
*offset += buf->start;
}
 
 
static const struct pb_vtbl
pb_slab_buffer_vtbl = {
pb_slab_buffer_destroy,
pb_slab_buffer_map,
pb_slab_buffer_unmap,
pb_slab_buffer_validate,
pb_slab_buffer_fence,
pb_slab_buffer_get_base_buffer
};
 
 
/**
* Create a new slab.
*
* Called when we ran out of free slabs.
*/
static enum pipe_error
pb_slab_create(struct pb_slab_manager *mgr)
{
struct pb_slab *slab;
struct pb_slab_buffer *buf;
unsigned numBuffers;
unsigned i;
enum pipe_error ret;
 
slab = CALLOC_STRUCT(pb_slab);
if (!slab)
return PIPE_ERROR_OUT_OF_MEMORY;
 
slab->bo = mgr->provider->create_buffer(mgr->provider, mgr->slabSize, &mgr->desc);
if(!slab->bo) {
ret = PIPE_ERROR_OUT_OF_MEMORY;
goto out_err0;
}
 
/* Note down the slab virtual address. All mappings are accessed directly
* through this address so it is required that the buffer is pinned. */
slab->virtual = pb_map(slab->bo,
PB_USAGE_CPU_READ |
PB_USAGE_CPU_WRITE, NULL);
if(!slab->virtual) {
ret = PIPE_ERROR_OUT_OF_MEMORY;
goto out_err1;
}
pb_unmap(slab->bo);
 
numBuffers = slab->bo->size / mgr->bufSize;
 
slab->buffers = CALLOC(numBuffers, sizeof(*slab->buffers));
if (!slab->buffers) {
ret = PIPE_ERROR_OUT_OF_MEMORY;
goto out_err1;
}
 
LIST_INITHEAD(&slab->head);
LIST_INITHEAD(&slab->freeBuffers);
slab->numBuffers = numBuffers;
slab->numFree = 0;
slab->mgr = mgr;
 
buf = slab->buffers;
for (i=0; i < numBuffers; ++i) {
pipe_reference_init(&buf->base.reference, 0);
buf->base.size = mgr->bufSize;
buf->base.alignment = 0;
buf->base.usage = 0;
buf->base.vtbl = &pb_slab_buffer_vtbl;
buf->slab = slab;
buf->start = i* mgr->bufSize;
buf->mapCount = 0;
pipe_condvar_init(buf->event);
LIST_ADDTAIL(&buf->head, &slab->freeBuffers);
slab->numFree++;
buf++;
}
 
/* Add this slab to the list of partial slabs */
LIST_ADDTAIL(&slab->head, &mgr->slabs);
 
return PIPE_OK;
 
out_err1:
pb_reference(&slab->bo, NULL);
out_err0:
FREE(slab);
return ret;
}
 
 
static struct pb_buffer *
pb_slab_manager_create_buffer(struct pb_manager *_mgr,
pb_size size,
const struct pb_desc *desc)
{
struct pb_slab_manager *mgr = pb_slab_manager(_mgr);
static struct pb_slab_buffer *buf;
struct pb_slab *slab;
struct list_head *list;
 
/* check size */
assert(size <= mgr->bufSize);
if(size > mgr->bufSize)
return NULL;
/* check if we can provide the requested alignment */
assert(pb_check_alignment(desc->alignment, mgr->desc.alignment));
if(!pb_check_alignment(desc->alignment, mgr->desc.alignment))
return NULL;
assert(pb_check_alignment(desc->alignment, mgr->bufSize));
if(!pb_check_alignment(desc->alignment, mgr->bufSize))
return NULL;
 
assert(pb_check_usage(desc->usage, mgr->desc.usage));
if(!pb_check_usage(desc->usage, mgr->desc.usage))
return NULL;
 
pipe_mutex_lock(mgr->mutex);
/* Create a new slab, if we run out of partial slabs */
if (mgr->slabs.next == &mgr->slabs) {
(void) pb_slab_create(mgr);
if (mgr->slabs.next == &mgr->slabs) {
pipe_mutex_unlock(mgr->mutex);
return NULL;
}
}
/* Allocate the buffer from a partial (or just created) slab */
list = mgr->slabs.next;
slab = LIST_ENTRY(struct pb_slab, list, head);
/* If totally full remove from the partial slab list */
if (--slab->numFree == 0)
LIST_DELINIT(list);
 
list = slab->freeBuffers.next;
LIST_DELINIT(list);
 
pipe_mutex_unlock(mgr->mutex);
buf = LIST_ENTRY(struct pb_slab_buffer, list, head);
pipe_reference_init(&buf->base.reference, 1);
buf->base.alignment = desc->alignment;
buf->base.usage = desc->usage;
return &buf->base;
}
 
 
static void
pb_slab_manager_flush(struct pb_manager *_mgr)
{
struct pb_slab_manager *mgr = pb_slab_manager(_mgr);
 
assert(mgr->provider->flush);
if(mgr->provider->flush)
mgr->provider->flush(mgr->provider);
}
 
 
static void
pb_slab_manager_destroy(struct pb_manager *_mgr)
{
struct pb_slab_manager *mgr = pb_slab_manager(_mgr);
 
/* TODO: cleanup all allocated buffers */
FREE(mgr);
}
 
 
struct pb_manager *
pb_slab_manager_create(struct pb_manager *provider,
pb_size bufSize,
pb_size slabSize,
const struct pb_desc *desc)
{
struct pb_slab_manager *mgr;
 
mgr = CALLOC_STRUCT(pb_slab_manager);
if (!mgr)
return NULL;
 
mgr->base.destroy = pb_slab_manager_destroy;
mgr->base.create_buffer = pb_slab_manager_create_buffer;
mgr->base.flush = pb_slab_manager_flush;
 
mgr->provider = provider;
mgr->bufSize = bufSize;
mgr->slabSize = slabSize;
mgr->desc = *desc;
 
LIST_INITHEAD(&mgr->slabs);
pipe_mutex_init(mgr->mutex);
 
return &mgr->base;
}
 
 
static struct pb_buffer *
pb_slab_range_manager_create_buffer(struct pb_manager *_mgr,
pb_size size,
const struct pb_desc *desc)
{
struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr);
pb_size bufSize;
pb_size reqSize = size;
unsigned i;
 
if(desc->alignment > reqSize)
reqSize = desc->alignment;
 
bufSize = mgr->minBufSize;
for (i = 0; i < mgr->numBuckets; ++i) {
if(bufSize >= reqSize)
return mgr->buckets[i]->create_buffer(mgr->buckets[i], size, desc);
bufSize *= 2;
}
 
/* Fall back to allocate a buffer object directly from the provider. */
return mgr->provider->create_buffer(mgr->provider, size, desc);
}
 
 
static void
pb_slab_range_manager_flush(struct pb_manager *_mgr)
{
struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr);
 
/* Individual slabs don't hold any temporary buffers so no need to call them */
assert(mgr->provider->flush);
if(mgr->provider->flush)
mgr->provider->flush(mgr->provider);
}
 
 
static void
pb_slab_range_manager_destroy(struct pb_manager *_mgr)
{
struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr);
unsigned i;
for (i = 0; i < mgr->numBuckets; ++i)
mgr->buckets[i]->destroy(mgr->buckets[i]);
FREE(mgr->buckets);
FREE(mgr->bucketSizes);
FREE(mgr);
}
 
 
struct pb_manager *
pb_slab_range_manager_create(struct pb_manager *provider,
pb_size minBufSize,
pb_size maxBufSize,
pb_size slabSize,
const struct pb_desc *desc)
{
struct pb_slab_range_manager *mgr;
pb_size bufSize;
unsigned i;
 
if(!provider)
return NULL;
mgr = CALLOC_STRUCT(pb_slab_range_manager);
if (!mgr)
goto out_err0;
 
mgr->base.destroy = pb_slab_range_manager_destroy;
mgr->base.create_buffer = pb_slab_range_manager_create_buffer;
mgr->base.flush = pb_slab_range_manager_flush;
 
mgr->provider = provider;
mgr->minBufSize = minBufSize;
mgr->maxBufSize = maxBufSize;
 
mgr->numBuckets = 1;
bufSize = minBufSize;
while(bufSize < maxBufSize) {
bufSize *= 2;
++mgr->numBuckets;
}
mgr->buckets = CALLOC(mgr->numBuckets, sizeof(*mgr->buckets));
if (!mgr->buckets)
goto out_err1;
 
bufSize = minBufSize;
for (i = 0; i < mgr->numBuckets; ++i) {
mgr->buckets[i] = pb_slab_manager_create(provider, bufSize, slabSize, desc);
if(!mgr->buckets[i])
goto out_err2;
bufSize *= 2;
}
 
return &mgr->base;
 
out_err2:
for (i = 0; i < mgr->numBuckets; ++i)
if(mgr->buckets[i])
mgr->buckets[i]->destroy(mgr->buckets[i]);
FREE(mgr->buckets);
out_err1:
FREE(mgr);
out_err0:
return NULL;
}
/drivers/video/Gallium/auxiliary/pipebuffer/pb_validate.c
0,0 → 1,192
/**************************************************************************
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/**
* @file
* Buffer validation.
*
* @author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
 
 
#include "pipe/p_compiler.h"
#include "pipe/p_defines.h"
#include "util/u_memory.h"
#include "util/u_debug.h"
 
#include "pb_buffer.h"
#include "pb_validate.h"
 
 
#define PB_VALIDATE_INITIAL_SIZE 1 /* 512 */
 
 
struct pb_validate_entry
{
struct pb_buffer *buf;
unsigned flags;
};
 
 
struct pb_validate
{
struct pb_validate_entry *entries;
unsigned used;
unsigned size;
};
 
 
enum pipe_error
pb_validate_add_buffer(struct pb_validate *vl,
struct pb_buffer *buf,
unsigned flags)
{
assert(buf);
if(!buf)
return PIPE_ERROR;
 
assert(flags & PB_USAGE_GPU_READ_WRITE);
assert(!(flags & ~PB_USAGE_GPU_READ_WRITE));
flags &= PB_USAGE_GPU_READ_WRITE;
 
/* We only need to store one reference for each buffer, so avoid storing
* consecutive references for the same buffer. It might not be the most
* common pattern, but it is easy to implement.
*/
if(vl->used && vl->entries[vl->used - 1].buf == buf) {
vl->entries[vl->used - 1].flags |= flags;
return PIPE_OK;
}
/* Grow the table */
if(vl->used == vl->size) {
unsigned new_size;
struct pb_validate_entry *new_entries;
new_size = vl->size * 2;
if(!new_size)
return PIPE_ERROR_OUT_OF_MEMORY;
 
new_entries = (struct pb_validate_entry *)REALLOC(vl->entries,
vl->size*sizeof(struct pb_validate_entry),
new_size*sizeof(struct pb_validate_entry));
if(!new_entries)
return PIPE_ERROR_OUT_OF_MEMORY;
memset(new_entries + vl->size, 0, (new_size - vl->size)*sizeof(struct pb_validate_entry));
vl->size = new_size;
vl->entries = new_entries;
}
assert(!vl->entries[vl->used].buf);
pb_reference(&vl->entries[vl->used].buf, buf);
vl->entries[vl->used].flags = flags;
++vl->used;
return PIPE_OK;
}
 
 
enum pipe_error
pb_validate_foreach(struct pb_validate *vl,
enum pipe_error (*callback)(struct pb_buffer *buf, void *data),
void *data)
{
unsigned i;
for(i = 0; i < vl->used; ++i) {
enum pipe_error ret;
ret = callback(vl->entries[i].buf, data);
if(ret != PIPE_OK)
return ret;
}
return PIPE_OK;
}
 
 
enum pipe_error
pb_validate_validate(struct pb_validate *vl)
{
unsigned i;
for(i = 0; i < vl->used; ++i) {
enum pipe_error ret;
ret = pb_validate(vl->entries[i].buf, vl, vl->entries[i].flags);
if(ret != PIPE_OK) {
while(i--)
pb_validate(vl->entries[i].buf, NULL, 0);
return ret;
}
}
 
return PIPE_OK;
}
 
 
void
pb_validate_fence(struct pb_validate *vl,
struct pipe_fence_handle *fence)
{
unsigned i;
for(i = 0; i < vl->used; ++i) {
pb_fence(vl->entries[i].buf, fence);
pb_reference(&vl->entries[i].buf, NULL);
}
vl->used = 0;
}
 
 
void
pb_validate_destroy(struct pb_validate *vl)
{
unsigned i;
for(i = 0; i < vl->used; ++i)
pb_reference(&vl->entries[i].buf, NULL);
FREE(vl->entries);
FREE(vl);
}
 
 
struct pb_validate *
pb_validate_create()
{
struct pb_validate *vl;
vl = CALLOC_STRUCT(pb_validate);
if(!vl)
return NULL;
vl->size = PB_VALIDATE_INITIAL_SIZE;
vl->entries = (struct pb_validate_entry *)CALLOC(vl->size, sizeof(struct pb_validate_entry));
if(!vl->entries) {
FREE(vl);
return NULL;
}
 
return vl;
}
 
/drivers/video/Gallium/auxiliary/pipebuffer/pb_validate.h
0,0 → 1,97
/**************************************************************************
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/**
* @file
* Buffer validation.
*
* @author Jose Fonseca <jrfonseca@tungstengraphics.com>
*/
 
#ifndef PB_VALIDATE_H_
#define PB_VALIDATE_H_
 
 
#include "pipe/p_compiler.h"
#include "pipe/p_defines.h"
 
#ifdef __cplusplus
extern "C" {
#endif
 
 
struct pb_buffer;
struct pipe_fence_handle;
 
 
/**
* Buffer validation list.
*
* It holds a list of buffers to be validated and fenced when flushing.
*/
struct pb_validate;
 
 
enum pipe_error
pb_validate_add_buffer(struct pb_validate *vl,
struct pb_buffer *buf,
unsigned flags);
 
enum pipe_error
pb_validate_foreach(struct pb_validate *vl,
enum pipe_error (*callback)(struct pb_buffer *buf, void *data),
void *data);
 
/**
* Validate all buffers for hardware access.
*
* Should be called right before issuing commands to the hardware.
*/
enum pipe_error
pb_validate_validate(struct pb_validate *vl);
 
/**
* Fence all buffers and clear the list.
*
* Should be called right after issuing commands to the hardware.
*/
void
pb_validate_fence(struct pb_validate *vl,
struct pipe_fence_handle *fence);
 
struct pb_validate *
pb_validate_create(void);
 
void
pb_validate_destroy(struct pb_validate *vl);
 
 
#ifdef __cplusplus
}
#endif
 
#endif /*PB_VALIDATE_H_*/
/drivers/video/Gallium/auxiliary/rtasm/rtasm_cpu.c
0,0 → 1,67
/**************************************************************************
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
#include "pipe/p_config.h"
#include "rtasm_cpu.h"
 
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 
#include "util/u_debug.h"
#include "util/u_cpu_detect.h"
 
DEBUG_GET_ONCE_BOOL_OPTION(nosse, "GALLIUM_NOSSE", FALSE);
 
static struct util_cpu_caps *get_cpu_caps(void)
{
util_cpu_detect();
return &util_cpu_caps;
}
 
int rtasm_cpu_has_sse(void)
{
return !debug_get_option_nosse() && get_cpu_caps()->has_sse;
}
 
int rtasm_cpu_has_sse2(void)
{
return !debug_get_option_nosse() && get_cpu_caps()->has_sse2;
}
 
 
#else
 
int rtasm_cpu_has_sse(void)
{
return 0;
}
 
int rtasm_cpu_has_sse2(void)
{
return 0;
}
 
#endif
/drivers/video/Gallium/auxiliary/rtasm/rtasm_cpu.h
0,0 → 1,42
/**************************************************************************
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/**
* @file
* Runtime detection of CPU capabilities.
*/
 
#ifndef _RTASM_CPU_H_
#define _RTASM_CPU_H_
 
 
int rtasm_cpu_has_sse(void);
 
int rtasm_cpu_has_sse2(void);
 
 
#endif /* _RTASM_CPU_H_ */
/drivers/video/Gallium/auxiliary/rtasm/rtasm_execmem.c
0,0 → 1,102
/**************************************************************************
*
* Copyright (C) 1999-2005 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
 
/**
* \file exemem.c
* Functions for allocating executable memory.
*
* \author Keith Whitwell
*/
 
 
#include "pipe/p_compiler.h"
#include "util/u_debug.h"
#include "os/os_thread.h"
#include "util/u_memory.h"
 
#include "rtasm_execmem.h"
 
#include "util/u_mm.h"
 
#define EXEC_HEAP_SIZE (4*1024*1024)
 
pipe_static_mutex(exec_mutex);
 
static struct mem_block *exec_heap = NULL;
static unsigned char *exec_mem = NULL;
 
 
static void
init_heap(void)
{
if (!exec_heap)
exec_heap = u_mmInit( 0, EXEC_HEAP_SIZE );
if (!exec_mem)
exec_mem = (unsigned char *) user_alloc(EXEC_HEAP_SIZE);
}
 
 
void *
rtasm_exec_malloc(size_t size)
{
struct mem_block *block = NULL;
void *addr = NULL;
 
pipe_mutex_lock(exec_mutex);
 
init_heap();
 
if (exec_heap) {
size = (size + 31) & ~31; /* next multiple of 32 bytes */
block = u_mmAllocMem( exec_heap, size, 5, 0 ); /* 5 -> 32-byte alignment */
}
 
if (block)
addr = exec_mem + block->ofs;
else
debug_printf("rtasm_exec_malloc failed\n");
pipe_mutex_unlock(exec_mutex);
return addr;
}
 
void
rtasm_exec_free(void *addr)
{
pipe_mutex_lock(exec_mutex);
 
if (exec_heap) {
struct mem_block *block = u_mmFindBlock(exec_heap, (unsigned char *)addr - exec_mem);
if (block)
u_mmFreeMem(block);
}
 
pipe_mutex_unlock(exec_mutex);
}
 
/drivers/video/Gallium/auxiliary/rtasm/rtasm_execmem.h
0,0 → 1,46
/**************************************************************************
*
* Copyright (C) 1999-2005 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/**
* \file exemem.c
* Functions for allocating executable memory.
*
* \author Keith Whitwell
*/
 
#ifndef _RTASM_EXECMEM_H_
#define _RTASM_EXECMEM_H_
 
#include "pipe/p_compiler.h"
 
 
extern void *
rtasm_exec_malloc( size_t size );
 
 
extern void
rtasm_exec_free( void *addr );
 
 
#endif
/drivers/video/Gallium/auxiliary/rtasm/rtasm_x86sse.c
0,0 → 1,2232
/**************************************************************************
*
* Copyright (C) 1999-2005 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
#include "pipe/p_config.h"
#include "util/u_cpu_detect.h"
 
#if defined(PIPE_ARCH_X86) || (defined(PIPE_ARCH_X86_64) && !defined(__MINGW32__))
 
#include "pipe/p_compiler.h"
#include "util/u_debug.h"
#include "util/u_pointer.h"
 
#include "rtasm_execmem.h"
#include "rtasm_x86sse.h"
 
#define DISASSEM 0
#define X86_TWOB 0x0f
 
 
#define DUMP_SSE 0
 
 
void x86_print_reg( struct x86_reg reg )
{
if (reg.mod != mod_REG)
debug_printf( "[" );
switch( reg.file ) {
case file_REG32:
switch( reg.idx ) {
case reg_AX: debug_printf( "EAX" ); break;
case reg_CX: debug_printf( "ECX" ); break;
case reg_DX: debug_printf( "EDX" ); break;
case reg_BX: debug_printf( "EBX" ); break;
case reg_SP: debug_printf( "ESP" ); break;
case reg_BP: debug_printf( "EBP" ); break;
case reg_SI: debug_printf( "ESI" ); break;
case reg_DI: debug_printf( "EDI" ); break;
}
break;
case file_MMX:
debug_printf( "MMX%u", reg.idx );
break;
case file_XMM:
debug_printf( "XMM%u", reg.idx );
break;
case file_x87:
debug_printf( "fp%u", reg.idx );
break;
}
 
if (reg.mod == mod_DISP8 ||
reg.mod == mod_DISP32)
debug_printf("+%d", reg.disp);
 
if (reg.mod != mod_REG)
debug_printf( "]" );
}
 
#if DUMP_SSE
 
#define DUMP_START() debug_printf( "\n" )
#define DUMP_END() debug_printf( "\n" )
 
#define DUMP() do { \
const char *foo = __FUNCTION__; \
while (*foo && *foo != '_') \
foo++; \
if (*foo) \
foo++; \
debug_printf( "\n%4x %14s ", p->csr - p->store, foo ); \
} while (0)
 
#define DUMP_I( I ) do { \
DUMP(); \
debug_printf( "%u", I ); \
} while( 0 )
 
#define DUMP_R( R0 ) do { \
DUMP(); \
x86_print_reg( R0 ); \
} while( 0 )
 
#define DUMP_RR( R0, R1 ) do { \
DUMP(); \
x86_print_reg( R0 ); \
debug_printf( ", " ); \
x86_print_reg( R1 ); \
} while( 0 )
 
#define DUMP_RI( R0, I ) do { \
DUMP(); \
x86_print_reg( R0 ); \
debug_printf( ", %u", I ); \
} while( 0 )
 
#define DUMP_RRI( R0, R1, I ) do { \
DUMP(); \
x86_print_reg( R0 ); \
debug_printf( ", " ); \
x86_print_reg( R1 ); \
debug_printf( ", %u", I ); \
} while( 0 )
 
#else
 
#define DUMP_START()
#define DUMP_END()
#define DUMP( )
#define DUMP_I( I )
#define DUMP_R( R0 )
#define DUMP_RR( R0, R1 )
#define DUMP_RI( R0, I )
#define DUMP_RRI( R0, R1, I )
 
#endif
 
 
static void do_realloc( struct x86_function *p )
{
if (p->store == p->error_overflow) {
p->csr = p->store;
}
else if (p->size == 0) {
p->size = 1024;
p->store = rtasm_exec_malloc(p->size);
p->csr = p->store;
}
else {
uintptr_t used = pointer_to_uintptr( p->csr ) - pointer_to_uintptr( p->store );
unsigned char *tmp = p->store;
p->size *= 2;
p->store = rtasm_exec_malloc(p->size);
 
if (p->store) {
memcpy(p->store, tmp, used);
p->csr = p->store + used;
}
else {
p->csr = p->store;
}
 
rtasm_exec_free(tmp);
}
 
if (p->store == NULL) {
p->store = p->csr = p->error_overflow;
p->size = sizeof(p->error_overflow);
}
}
 
/* Emit bytes to the instruction stream:
*/
static unsigned char *reserve( struct x86_function *p, int bytes )
{
if (p->csr + bytes - p->store > (int) p->size)
do_realloc(p);
 
{
unsigned char *csr = p->csr;
p->csr += bytes;
return csr;
}
}
 
 
 
static void emit_1b( struct x86_function *p, char b0 )
{
char *csr = (char *)reserve(p, 1);
*csr = b0;
}
 
static void emit_1i( struct x86_function *p, int i0 )
{
int *icsr = (int *)reserve(p, sizeof(i0));
*icsr = i0;
}
 
static void emit_1ub( struct x86_function *p, unsigned char b0 )
{
unsigned char *csr = reserve(p, 1);
*csr++ = b0;
}
 
static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 )
{
unsigned char *csr = reserve(p, 2);
*csr++ = b0;
*csr++ = b1;
}
 
static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 )
{
unsigned char *csr = reserve(p, 3);
*csr++ = b0;
*csr++ = b1;
*csr++ = b2;
}
 
 
/* Build a modRM byte + possible displacement. No treatment of SIB
* indexing. BZZT - no way to encode an absolute address.
*
* This is the "/r" field in the x86 manuals...
*/
static void emit_modrm( struct x86_function *p,
struct x86_reg reg,
struct x86_reg regmem )
{
unsigned char val = 0;
assert(reg.mod == mod_REG);
/* TODO: support extended x86-64 registers */
assert(reg.idx < 8);
assert(regmem.idx < 8);
 
val |= regmem.mod << 6; /* mod field */
val |= reg.idx << 3; /* reg field */
val |= regmem.idx; /* r/m field */
emit_1ub(p, val);
 
/* Oh-oh we've stumbled into the SIB thing.
*/
if (regmem.file == file_REG32 &&
regmem.idx == reg_SP &&
regmem.mod != mod_REG) {
emit_1ub(p, 0x24); /* simplistic! */
}
 
switch (regmem.mod) {
case mod_REG:
case mod_INDIRECT:
break;
case mod_DISP8:
emit_1b(p, (char) regmem.disp);
break;
case mod_DISP32:
emit_1i(p, regmem.disp);
break;
default:
assert(0);
break;
}
}
 
/* Emits the "/0".."/7" specialized versions of the modrm ("/r") bytes.
*/
static void emit_modrm_noreg( struct x86_function *p,
unsigned op,
struct x86_reg regmem )
{
struct x86_reg dummy = x86_make_reg(file_REG32, op);
emit_modrm(p, dummy, regmem);
}
 
/* Many x86 instructions have two opcodes to cope with the situations
* where the destination is a register or memory reference
* respectively. This function selects the correct opcode based on
* the arguments presented.
*/
static void emit_op_modrm( struct x86_function *p,
unsigned char op_dst_is_reg,
unsigned char op_dst_is_mem,
struct x86_reg dst,
struct x86_reg src )
{
switch (dst.mod) {
case mod_REG:
emit_1ub(p, op_dst_is_reg);
emit_modrm(p, dst, src);
break;
case mod_INDIRECT:
case mod_DISP32:
case mod_DISP8:
assert(src.mod == mod_REG);
emit_1ub(p, op_dst_is_mem);
emit_modrm(p, src, dst);
break;
default:
assert(0);
break;
}
}
 
 
 
 
 
 
 
/* Create and manipulate registers and regmem values:
*/
struct x86_reg x86_make_reg( enum x86_reg_file file,
enum x86_reg_name idx )
{
struct x86_reg reg;
 
reg.file = file;
reg.idx = idx;
reg.mod = mod_REG;
reg.disp = 0;
 
return reg;
}
 
struct x86_reg x86_make_disp( struct x86_reg reg,
int disp )
{
assert(reg.file == file_REG32);
 
if (reg.mod == mod_REG)
reg.disp = disp;
else
reg.disp += disp;
 
if (reg.disp == 0 && reg.idx != reg_BP)
reg.mod = mod_INDIRECT;
else if (reg.disp <= 127 && reg.disp >= -128)
reg.mod = mod_DISP8;
else
reg.mod = mod_DISP32;
 
return reg;
}
 
struct x86_reg x86_deref( struct x86_reg reg )
{
return x86_make_disp(reg, 0);
}
 
struct x86_reg x86_get_base_reg( struct x86_reg reg )
{
return x86_make_reg( reg.file, reg.idx );
}
 
int x86_get_label( struct x86_function *p )
{
return p->csr - p->store;
}
 
 
 
/***********************************************************************
* x86 instructions
*/
 
 
void x64_rexw(struct x86_function *p)
{
if(x86_target(p) != X86_32)
emit_1ub(p, 0x48);
}
 
void x86_jcc( struct x86_function *p,
enum x86_cc cc,
int label )
{
int offset = label - (x86_get_label(p) + 2);
DUMP_I(cc);
if (offset < 0) {
/*assert(p->csr - p->store > -offset);*/
if (p->csr - p->store <= -offset) {
/* probably out of memory (using the error_overflow buffer) */
return;
}
}
 
if (offset <= 127 && offset >= -128) {
emit_1ub(p, 0x70 + cc);
emit_1b(p, (char) offset);
}
else {
offset = label - (x86_get_label(p) + 6);
emit_2ub(p, 0x0f, 0x80 + cc);
emit_1i(p, offset);
}
}
 
/* Always use a 32bit offset for forward jumps:
*/
int x86_jcc_forward( struct x86_function *p,
enum x86_cc cc )
{
DUMP_I(cc);
emit_2ub(p, 0x0f, 0x80 + cc);
emit_1i(p, 0);
return x86_get_label(p);
}
 
int x86_jmp_forward( struct x86_function *p)
{
DUMP();
emit_1ub(p, 0xe9);
emit_1i(p, 0);
return x86_get_label(p);
}
 
int x86_call_forward( struct x86_function *p)
{
DUMP();
 
emit_1ub(p, 0xe8);
emit_1i(p, 0);
return x86_get_label(p);
}
 
/* Fixup offset from forward jump:
*/
void x86_fixup_fwd_jump( struct x86_function *p,
int fixup )
{
*(int *)(p->store + fixup - 4) = x86_get_label(p) - fixup;
}
 
void x86_jmp( struct x86_function *p, int label)
{
DUMP_I( label );
emit_1ub(p, 0xe9);
emit_1i(p, label - x86_get_label(p) - 4);
}
 
void x86_call( struct x86_function *p, struct x86_reg reg)
{
DUMP_R( reg );
emit_1ub(p, 0xff);
emit_modrm_noreg(p, 2, reg);
}
 
 
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
{
DUMP_RI( dst, imm );
assert(dst.file == file_REG32);
assert(dst.mod == mod_REG);
emit_1ub(p, 0xb8 + dst.idx);
emit_1i(p, imm);
}
 
void x86_mov_imm( struct x86_function *p, struct x86_reg dst, int imm )
{
DUMP_RI( dst, imm );
if(dst.mod == mod_REG)
x86_mov_reg_imm(p, dst, imm);
else
{
emit_1ub(p, 0xc7);
emit_modrm_noreg(p, 0, dst);
emit_1i(p, imm);
}
}
 
void x86_mov16_imm( struct x86_function *p, struct x86_reg dst, uint16_t imm )
{
DUMP_RI( dst, imm );
emit_1ub(p, 0x66);
if(dst.mod == mod_REG)
{
emit_1ub(p, 0xb8 + dst.idx);
emit_2ub(p, imm & 0xff, imm >> 8);
}
else
{
emit_1ub(p, 0xc7);
emit_modrm_noreg(p, 0, dst);
emit_2ub(p, imm & 0xff, imm >> 8);
}
}
 
void x86_mov8_imm( struct x86_function *p, struct x86_reg dst, uint8_t imm )
{
DUMP_RI( dst, imm );
if(dst.mod == mod_REG)
{
emit_1ub(p, 0xb0 + dst.idx);
emit_1ub(p, imm);
}
else
{
emit_1ub(p, 0xc6);
emit_modrm_noreg(p, 0, dst);
emit_1ub(p, imm);
}
}
 
/**
* Immediate group 1 instructions.
*/
static INLINE void
x86_group1_imm( struct x86_function *p,
unsigned op, struct x86_reg dst, int imm )
{
assert(dst.file == file_REG32);
assert(dst.mod == mod_REG);
if(-0x80 <= imm && imm < 0x80) {
emit_1ub(p, 0x83);
emit_modrm_noreg(p, op, dst);
emit_1b(p, (char)imm);
}
else {
emit_1ub(p, 0x81);
emit_modrm_noreg(p, op, dst);
emit_1i(p, imm);
}
}
 
void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm )
{
DUMP_RI( dst, imm );
x86_group1_imm(p, 0, dst, imm);
}
 
void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm )
{
DUMP_RI( dst, imm );
x86_group1_imm(p, 1, dst, imm);
}
 
void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm )
{
DUMP_RI( dst, imm );
x86_group1_imm(p, 4, dst, imm);
}
 
void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm )
{
DUMP_RI( dst, imm );
x86_group1_imm(p, 5, dst, imm);
}
 
void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm )
{
DUMP_RI( dst, imm );
x86_group1_imm(p, 6, dst, imm);
}
 
void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm )
{
DUMP_RI( dst, imm );
x86_group1_imm(p, 7, dst, imm);
}
 
 
void x86_push( struct x86_function *p,
struct x86_reg reg )
{
DUMP_R( reg );
if (reg.mod == mod_REG)
emit_1ub(p, 0x50 + reg.idx);
else
{
emit_1ub(p, 0xff);
emit_modrm_noreg(p, 6, reg);
}
 
 
p->stack_offset += sizeof(void*);
}
 
void x86_push_imm32( struct x86_function *p,
int imm32 )
{
DUMP_I( imm32 );
emit_1ub(p, 0x68);
emit_1i(p, imm32);
 
p->stack_offset += sizeof(void*);
}
 
 
void x86_pop( struct x86_function *p,
struct x86_reg reg )
{
DUMP_R( reg );
assert(reg.mod == mod_REG);
emit_1ub(p, 0x58 + reg.idx);
p->stack_offset -= sizeof(void*);
}
 
void x86_inc( struct x86_function *p,
struct x86_reg reg )
{
DUMP_R( reg );
if(x86_target(p) == X86_32 && reg.mod == mod_REG)
{
emit_1ub(p, 0x40 + reg.idx);
return;
}
emit_1ub(p, 0xff);
emit_modrm_noreg(p, 0, reg);
}
 
void x86_dec( struct x86_function *p,
struct x86_reg reg )
{
DUMP_R( reg );
if(x86_target(p) == X86_32 && reg.mod == mod_REG)
{
emit_1ub(p, 0x48 + reg.idx);
return;
}
emit_1ub(p, 0xff);
emit_modrm_noreg(p, 1, reg);
}
 
void x86_ret( struct x86_function *p )
{
DUMP();
assert(p->stack_offset == 0);
emit_1ub(p, 0xc3);
}
 
void x86_retw( struct x86_function *p, unsigned short imm )
{
DUMP();
emit_3ub(p, 0xc2, imm & 0xff, (imm >> 8) & 0xff);
}
 
void x86_sahf( struct x86_function *p )
{
DUMP();
emit_1ub(p, 0x9e);
}
 
void x86_mov( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
/* special hack for reading arguments until we support x86-64 registers everywhere */
if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8))
{
uint8_t rex = 0x40;
if(dst.idx >= 8)
{
rex |= 4;
dst.idx -= 8;
}
if(src.idx >= 8)
{
rex |= 1;
src.idx -= 8;
}
emit_1ub(p, rex);
}
emit_op_modrm( p, 0x8b, 0x89, dst, src );
}
 
void x86_mov16( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_1ub(p, 0x66);
emit_op_modrm( p, 0x8b, 0x89, dst, src );
}
 
void x86_mov8( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_op_modrm( p, 0x8a, 0x88, dst, src );
}
 
void x64_mov64( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
uint8_t rex = 0x48;
DUMP_RR( dst, src );
assert(x86_target(p) != X86_32);
 
/* special hack for reading arguments until we support x86-64 registers everywhere */
if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8))
{
if(dst.idx >= 8)
{
rex |= 4;
dst.idx -= 8;
}
if(src.idx >= 8)
{
rex |= 1;
src.idx -= 8;
}
}
emit_1ub(p, rex);
emit_op_modrm( p, 0x8b, 0x89, dst, src );
}
 
void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub(p, 0x0f, 0xb6);
emit_modrm(p, dst, src);
}
 
void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub(p, 0x0f, 0xb7);
emit_modrm(p, dst, src);
}
 
void x86_cmovcc( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src,
enum x86_cc cc)
{
DUMP_RRI( dst, src, cc );
emit_2ub( p, 0x0f, 0x40 + cc );
emit_modrm( p, dst, src );
}
 
void x86_xor( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_op_modrm( p, 0x33, 0x31, dst, src );
}
 
void x86_cmp( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_op_modrm( p, 0x3b, 0x39, dst, src );
}
 
void x86_lea( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_1ub(p, 0x8d);
emit_modrm( p, dst, src );
}
 
void x86_test( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_1ub(p, 0x85);
emit_modrm( p, dst, src );
}
 
void x86_add( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_op_modrm(p, 0x03, 0x01, dst, src );
}
 
/* Calculate EAX * src, results in EDX:EAX.
*/
void x86_mul( struct x86_function *p,
struct x86_reg src )
{
DUMP_R( src );
emit_1ub(p, 0xf7);
emit_modrm_noreg(p, 4, src );
}
 
 
void x86_imul( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0xAF);
emit_modrm(p, dst, src);
}
 
 
void x86_sub( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_op_modrm(p, 0x2b, 0x29, dst, src );
}
 
void x86_or( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_op_modrm( p, 0x0b, 0x09, dst, src );
}
 
void x86_and( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_op_modrm( p, 0x23, 0x21, dst, src );
}
 
void x86_div( struct x86_function *p,
struct x86_reg src )
{
assert(src.file == file_REG32 && src.mod == mod_REG);
emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src);
}
 
void x86_bswap( struct x86_function *p, struct x86_reg reg )
{
DUMP_R(reg);
assert(reg.file == file_REG32);
assert(reg.mod == mod_REG);
emit_2ub(p, 0x0f, 0xc8 + reg.idx);
}
 
void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
{
DUMP_RI(reg, imm);
if(imm == 1)
{
emit_1ub(p, 0xd1);
emit_modrm_noreg(p, 5, reg);
}
else
{
emit_1ub(p, 0xc1);
emit_modrm_noreg(p, 5, reg);
emit_1ub(p, imm);
}
}
 
void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
{
DUMP_RI(reg, imm);
if(imm == 1)
{
emit_1ub(p, 0xd1);
emit_modrm_noreg(p, 7, reg);
}
else
{
emit_1ub(p, 0xc1);
emit_modrm_noreg(p, 7, reg);
emit_1ub(p, imm);
}
}
 
void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
{
DUMP_RI(reg, imm);
if(imm == 1)
{
emit_1ub(p, 0xd1);
emit_modrm_noreg(p, 4, reg);
}
else
{
emit_1ub(p, 0xc1);
emit_modrm_noreg(p, 4, reg);
emit_1ub(p, imm);
}
}
 
 
/***********************************************************************
* SSE instructions
*/
 
void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr)
{
DUMP_R( ptr );
assert(ptr.mod != mod_REG);
emit_2ub(p, 0x0f, 0x18);
emit_modrm_noreg(p, 0, ptr);
}
 
void sse_prefetch0( struct x86_function *p, struct x86_reg ptr)
{
DUMP_R( ptr );
assert(ptr.mod != mod_REG);
emit_2ub(p, 0x0f, 0x18);
emit_modrm_noreg(p, 1, ptr);
}
 
void sse_prefetch1( struct x86_function *p, struct x86_reg ptr)
{
DUMP_R( ptr );
assert(ptr.mod != mod_REG);
emit_2ub(p, 0x0f, 0x18);
emit_modrm_noreg(p, 2, ptr);
}
 
void sse_movntps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src)
{
DUMP_RR( dst, src );
 
assert(dst.mod != mod_REG);
assert(src.mod == mod_REG);
emit_2ub(p, 0x0f, 0x2b);
emit_modrm(p, src, dst);
}
 
 
 
 
void sse_movss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub(p, 0xF3, X86_TWOB);
emit_op_modrm( p, 0x10, 0x11, dst, src );
}
 
void sse_movaps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x28, 0x29, dst, src );
}
 
void sse_movups( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x10, 0x11, dst, src );
}
 
void sse_movhps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
assert(dst.mod != mod_REG || src.mod != mod_REG);
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */
}
 
void sse_movlps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
assert(dst.mod != mod_REG || src.mod != mod_REG);
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */
}
 
void sse_maxps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x5F);
emit_modrm( p, dst, src );
}
 
void sse_maxss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x5F);
emit_modrm( p, dst, src );
}
 
void sse_divss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x5E);
emit_modrm( p, dst, src );
}
 
void sse_minps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x5D);
emit_modrm( p, dst, src );
}
 
void sse_subps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x5C);
emit_modrm( p, dst, src );
}
 
void sse_mulps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x59);
emit_modrm( p, dst, src );
}
 
void sse_mulss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x59);
emit_modrm( p, dst, src );
}
 
void sse_addps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x58);
emit_modrm( p, dst, src );
}
 
void sse_addss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x58);
emit_modrm( p, dst, src );
}
 
void sse_andnps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x55);
emit_modrm( p, dst, src );
}
 
void sse_andps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x54);
emit_modrm( p, dst, src );
}
 
void sse_rsqrtps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x52);
emit_modrm( p, dst, src );
}
 
void sse_rsqrtss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x52);
emit_modrm( p, dst, src );
 
}
 
void sse_movhlps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
assert(dst.mod == mod_REG && src.mod == mod_REG);
emit_2ub(p, X86_TWOB, 0x12);
emit_modrm( p, dst, src );
}
 
void sse_movlhps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
assert(dst.mod == mod_REG && src.mod == mod_REG);
emit_2ub(p, X86_TWOB, 0x16);
emit_modrm( p, dst, src );
}
 
void sse_orps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x56);
emit_modrm( p, dst, src );
}
 
void sse_xorps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x57);
emit_modrm( p, dst, src );
}
 
void sse_cvtps2pi( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
assert(dst.file == file_MMX &&
(src.file == file_XMM || src.mod != mod_REG));
 
p->need_emms = 1;
 
emit_2ub(p, X86_TWOB, 0x2d);
emit_modrm( p, dst, src );
}
 
void sse2_cvtdq2ps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x5b);
emit_modrm( p, dst, src );
}
 
 
/* Shufps can also be used to implement a reduced swizzle when dest ==
* arg0.
*/
void sse_shufps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src,
unsigned char shuf)
{
DUMP_RRI( dst, src, shuf );
emit_2ub(p, X86_TWOB, 0xC6);
emit_modrm(p, dst, src);
emit_1ub(p, shuf);
}
 
void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub( p, X86_TWOB, 0x15 );
emit_modrm( p, dst, src );
}
 
void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub( p, X86_TWOB, 0x14 );
emit_modrm( p, dst, src );
}
 
void sse_cmpps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src,
enum sse_cc cc)
{
DUMP_RRI( dst, src, cc );
emit_2ub(p, X86_TWOB, 0xC2);
emit_modrm(p, dst, src);
emit_1ub(p, cc);
}
 
void sse_pmovmskb( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src)
{
DUMP_RR( dst, src );
emit_3ub(p, 0x66, X86_TWOB, 0xD7);
emit_modrm(p, dst, src);
}
 
void sse_movmskps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src)
{
DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x50);
emit_modrm(p, dst, src);
}
 
/***********************************************************************
* SSE2 instructions
*/
 
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR(dst, src);
emit_2ub(p, 0x66, 0x0f);
if(dst.mod == mod_REG && dst.file == file_REG32)
{
emit_1ub(p, 0x7e);
emit_modrm(p, src, dst);
}
else
{
emit_op_modrm(p, 0x6e, 0x7e, dst, src);
}
}
 
void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR(dst, src);
switch (dst.mod) {
case mod_REG:
emit_3ub(p, 0xf3, 0x0f, 0x7e);
emit_modrm(p, dst, src);
break;
case mod_INDIRECT:
case mod_DISP32:
case mod_DISP8:
assert(src.mod == mod_REG);
emit_3ub(p, 0x66, 0x0f, 0xd6);
emit_modrm(p, src, dst);
break;
default:
assert(0);
break;
}
}
 
void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR(dst, src);
emit_2ub(p, 0xf3, 0x0f);
emit_op_modrm(p, 0x6f, 0x7f, dst, src);
}
 
void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR(dst, src);
emit_2ub(p, 0x66, 0x0f);
emit_op_modrm(p, 0x6f, 0x7f, dst, src);
}
 
void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR(dst, src);
emit_2ub(p, 0xf2, 0x0f);
emit_op_modrm(p, 0x10, 0x11, dst, src);
}
 
void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR(dst, src);
emit_2ub(p, 0x66, 0x0f);
emit_op_modrm(p, 0x10, 0x11, dst, src);
}
 
void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR(dst, src);
emit_2ub(p, 0x66, 0x0f);
emit_op_modrm(p, 0x28, 0x29, dst, src);
}
 
/**
* Perform a reduced swizzle:
*/
void sse2_pshufd( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src,
unsigned char shuf)
{
DUMP_RRI( dst, src, shuf );
emit_3ub(p, 0x66, X86_TWOB, 0x70);
emit_modrm(p, dst, src);
emit_1ub(p, shuf);
}
 
void sse2_pshuflw( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src,
unsigned char shuf)
{
DUMP_RRI( dst, src, shuf );
emit_3ub(p, 0xf2, X86_TWOB, 0x70);
emit_modrm(p, dst, src);
emit_1ub(p, shuf);
}
 
void sse2_pshufhw( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src,
unsigned char shuf)
{
DUMP_RRI( dst, src, shuf );
emit_3ub(p, 0xf3, X86_TWOB, 0x70);
emit_modrm(p, dst, src);
emit_1ub(p, shuf);
}
 
void sse2_cvttps2dq( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_3ub( p, 0xF3, X86_TWOB, 0x5B );
emit_modrm( p, dst, src );
}
 
void sse2_cvtps2dq( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_3ub(p, 0x66, X86_TWOB, 0x5B);
emit_modrm( p, dst, src );
}
 
void sse2_cvtsd2ss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_3ub(p, 0xf2, 0x0f, 0x5a);
emit_modrm( p, dst, src );
}
 
void sse2_cvtpd2ps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_3ub(p, 0x66, 0x0f, 0x5a);
emit_modrm( p, dst, src );
}
 
void sse2_packssdw( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_3ub(p, 0x66, X86_TWOB, 0x6B);
emit_modrm( p, dst, src );
}
 
void sse2_packsswb( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_3ub(p, 0x66, X86_TWOB, 0x63);
emit_modrm( p, dst, src );
}
 
void sse2_packuswb( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_3ub(p, 0x66, X86_TWOB, 0x67);
emit_modrm( p, dst, src );
}
 
void sse2_punpcklbw( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_3ub(p, 0x66, X86_TWOB, 0x60);
emit_modrm( p, dst, src );
}
 
void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR( dst, src );
emit_3ub(p, 0x66, 0x0f, 0x61);
emit_modrm( p, dst, src );
}
 
void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR( dst, src );
emit_3ub(p, 0x66, 0x0f, 0x62);
emit_modrm( p, dst, src );
}
 
void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR( dst, src );
emit_3ub(p, 0x66, 0x0f, 0x6c);
emit_modrm( p, dst, src );
}
 
void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
{
DUMP_RI(dst, imm);
emit_3ub(p, 0x66, 0x0f, 0x71);
emit_modrm_noreg(p, 6, dst);
emit_1ub(p, imm);
}
 
void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
{
DUMP_RI(dst, imm);
emit_3ub(p, 0x66, 0x0f, 0x72);
emit_modrm_noreg(p, 6, dst);
emit_1ub(p, imm);
}
 
void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
{
DUMP_RI(dst, imm);
emit_3ub(p, 0x66, 0x0f, 0x73);
emit_modrm_noreg(p, 6, dst);
emit_1ub(p, imm);
}
 
void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
{
DUMP_RI(dst, imm);
emit_3ub(p, 0x66, 0x0f, 0x71);
emit_modrm_noreg(p, 2, dst);
emit_1ub(p, imm);
}
 
void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
{
DUMP_RI(dst, imm);
emit_3ub(p, 0x66, 0x0f, 0x72);
emit_modrm_noreg(p, 2, dst);
emit_1ub(p, imm);
}
 
void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
{
DUMP_RI(dst, imm);
emit_3ub(p, 0x66, 0x0f, 0x73);
emit_modrm_noreg(p, 2, dst);
emit_1ub(p, imm);
}
 
void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
{
DUMP_RI(dst, imm);
emit_3ub(p, 0x66, 0x0f, 0x71);
emit_modrm_noreg(p, 4, dst);
emit_1ub(p, imm);
}
 
void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
{
DUMP_RI(dst, imm);
emit_3ub(p, 0x66, 0x0f, 0x72);
emit_modrm_noreg(p, 4, dst);
emit_1ub(p, imm);
}
 
void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR(dst, src);
emit_3ub(p, 0x66, 0x0f, 0xeb);
emit_modrm(p, dst, src);
}
 
void sse2_rcpps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub(p, X86_TWOB, 0x53);
emit_modrm( p, dst, src );
}
 
void sse2_rcpss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
emit_3ub(p, 0xF3, X86_TWOB, 0x53);
emit_modrm( p, dst, src );
}
 
/***********************************************************************
* x87 instructions
*/
static void note_x87_pop( struct x86_function *p )
{
p->x87_stack--;
assert(p->x87_stack >= 0);
}
 
static void note_x87_push( struct x86_function *p )
{
p->x87_stack++;
assert(p->x87_stack <= 7);
}
 
void x87_assert_stack_empty( struct x86_function *p )
{
assert (p->x87_stack == 0);
}
 
 
void x87_fist( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
emit_1ub(p, 0xdb);
emit_modrm_noreg(p, 2, dst);
}
 
void x87_fistp( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
emit_1ub(p, 0xdb);
emit_modrm_noreg(p, 3, dst);
note_x87_pop(p);
}
 
void x87_fild( struct x86_function *p, struct x86_reg arg )
{
DUMP_R( arg );
emit_1ub(p, 0xdf);
emit_modrm_noreg(p, 0, arg);
note_x87_push(p);
}
 
void x87_fldz( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xee);
note_x87_push(p);
}
 
 
void x87_fldcw( struct x86_function *p, struct x86_reg arg )
{
DUMP_R( arg );
assert(arg.file == file_REG32);
assert(arg.mod != mod_REG);
emit_1ub(p, 0xd9);
emit_modrm_noreg(p, 5, arg);
}
 
void x87_fld1( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xe8);
note_x87_push(p);
}
 
void x87_fldl2e( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xea);
note_x87_push(p);
}
 
void x87_fldln2( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xed);
note_x87_push(p);
}
 
void x87_fwait( struct x86_function *p )
{
DUMP();
emit_1ub(p, 0x9b);
}
 
void x87_fnclex( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xdb, 0xe2);
}
 
void x87_fclex( struct x86_function *p )
{
x87_fwait(p);
x87_fnclex(p);
}
 
void x87_fcmovb( struct x86_function *p, struct x86_reg arg )
{
DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xda, 0xc0+arg.idx);
}
 
void x87_fcmove( struct x86_function *p, struct x86_reg arg )
{
DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xda, 0xc8+arg.idx);
}
 
void x87_fcmovbe( struct x86_function *p, struct x86_reg arg )
{
DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xda, 0xd0+arg.idx);
}
 
void x87_fcmovnb( struct x86_function *p, struct x86_reg arg )
{
DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xdb, 0xc0+arg.idx);
}
 
void x87_fcmovne( struct x86_function *p, struct x86_reg arg )
{
DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xdb, 0xc8+arg.idx);
}
 
void x87_fcmovnbe( struct x86_function *p, struct x86_reg arg )
{
DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xdb, 0xd0+arg.idx);
}
 
 
 
static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg,
unsigned char dst0ub0,
unsigned char dst0ub1,
unsigned char arg0ub0,
unsigned char arg0ub1,
unsigned char argmem_noreg)
{
assert(dst.file == file_x87);
 
if (arg.file == file_x87) {
if (dst.idx == 0)
emit_2ub(p, dst0ub0, dst0ub1+arg.idx);
else if (arg.idx == 0)
emit_2ub(p, arg0ub0, arg0ub1+arg.idx);
else
assert(0);
}
else if (dst.idx == 0) {
assert(arg.file == file_REG32);
emit_1ub(p, 0xd8);
emit_modrm_noreg(p, argmem_noreg, arg);
}
else
assert(0);
}
 
void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR( dst, src );
x87_arith_op(p, dst, src,
0xd8, 0xc8,
0xdc, 0xc8,
4);
}
 
void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR( dst, src );
x87_arith_op(p, dst, src,
0xd8, 0xe0,
0xdc, 0xe8,
4);
}
 
void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR( dst, src );
x87_arith_op(p, dst, src,
0xd8, 0xe8,
0xdc, 0xe0,
5);
}
 
void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR( dst, src );
x87_arith_op(p, dst, src,
0xd8, 0xc0,
0xdc, 0xc0,
0);
}
 
void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR( dst, src );
x87_arith_op(p, dst, src,
0xd8, 0xf0,
0xdc, 0xf8,
6);
}
 
void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR( dst, src );
x87_arith_op(p, dst, src,
0xd8, 0xf8,
0xdc, 0xf0,
7);
}
 
void x87_fmulp( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xc8+dst.idx);
note_x87_pop(p);
}
 
void x87_fsubp( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xe8+dst.idx);
note_x87_pop(p);
}
 
void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xe0+dst.idx);
note_x87_pop(p);
}
 
void x87_faddp( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xc0+dst.idx);
note_x87_pop(p);
}
 
void x87_fdivp( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xf8+dst.idx);
note_x87_pop(p);
}
 
void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xf0+dst.idx);
note_x87_pop(p);
}
 
void x87_ftst( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xe4);
}
 
void x87_fucom( struct x86_function *p, struct x86_reg arg )
{
DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xdd, 0xe0+arg.idx);
}
 
void x87_fucomp( struct x86_function *p, struct x86_reg arg )
{
DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xdd, 0xe8+arg.idx);
note_x87_pop(p);
}
 
void x87_fucompp( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xda, 0xe9);
note_x87_pop(p); /* pop twice */
note_x87_pop(p); /* pop twice */
}
 
void x87_fxch( struct x86_function *p, struct x86_reg arg )
{
DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xd9, 0xc8+arg.idx);
}
 
void x87_fabs( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xe1);
}
 
void x87_fchs( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xe0);
}
 
void x87_fcos( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xff);
}
 
 
void x87_fprndint( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xfc);
}
 
void x87_fscale( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xfd);
}
 
void x87_fsin( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xfe);
}
 
void x87_fsincos( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xfb);
}
 
void x87_fsqrt( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xfa);
}
 
void x87_fxtract( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xf4);
}
 
/* st0 = (2^st0)-1
*
* Restrictions: -1.0 <= st0 <= 1.0
*/
void x87_f2xm1( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xf0);
}
 
/* st1 = st1 * log2(st0);
* pop_stack;
*/
void x87_fyl2x( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xf1);
note_x87_pop(p);
}
 
/* st1 = st1 * log2(st0 + 1.0);
* pop_stack;
*
* A fast operation, with restrictions: -.29 < st0 < .29
*/
void x87_fyl2xp1( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xf9);
note_x87_pop(p);
}
 
 
void x87_fld( struct x86_function *p, struct x86_reg arg )
{
DUMP_R( arg );
if (arg.file == file_x87)
emit_2ub(p, 0xd9, 0xc0 + arg.idx);
else {
emit_1ub(p, 0xd9);
emit_modrm_noreg(p, 0, arg);
}
note_x87_push(p);
}
 
void x87_fst( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
if (dst.file == file_x87)
emit_2ub(p, 0xdd, 0xd0 + dst.idx);
else {
emit_1ub(p, 0xd9);
emit_modrm_noreg(p, 2, dst);
}
}
 
void x87_fstp( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
if (dst.file == file_x87)
emit_2ub(p, 0xdd, 0xd8 + dst.idx);
else {
emit_1ub(p, 0xd9);
emit_modrm_noreg(p, 3, dst);
}
note_x87_pop(p);
}
 
void x87_fpop( struct x86_function *p )
{
x87_fstp( p, x86_make_reg( file_x87, 0 ));
}
 
 
void x87_fcom( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
if (dst.file == file_x87)
emit_2ub(p, 0xd8, 0xd0 + dst.idx);
else {
emit_1ub(p, 0xd8);
emit_modrm_noreg(p, 2, dst);
}
}
 
 
void x87_fcomp( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
if (dst.file == file_x87)
emit_2ub(p, 0xd8, 0xd8 + dst.idx);
else {
emit_1ub(p, 0xd8);
emit_modrm_noreg(p, 3, dst);
}
note_x87_pop(p);
}
 
void x87_fcomi( struct x86_function *p, struct x86_reg arg )
{
DUMP_R( arg );
emit_2ub(p, 0xdb, 0xf0+arg.idx);
}
 
void x87_fcomip( struct x86_function *p, struct x86_reg arg )
{
DUMP_R( arg );
emit_2ub(p, 0xdb, 0xf0+arg.idx);
note_x87_pop(p);
}
 
 
void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
assert(dst.file == file_REG32);
 
if (dst.idx == reg_AX &&
dst.mod == mod_REG)
emit_2ub(p, 0xdf, 0xe0);
else {
emit_1ub(p, 0xdd);
emit_modrm_noreg(p, 7, dst);
}
}
 
 
void x87_fnstcw( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
assert(dst.file == file_REG32);
 
emit_1ub(p, 0x9b); /* WAIT -- needed? */
emit_1ub(p, 0xd9);
emit_modrm_noreg(p, 7, dst);
}
 
 
 
 
/***********************************************************************
* MMX instructions
*/
 
void mmx_emms( struct x86_function *p )
{
DUMP();
assert(p->need_emms);
emit_2ub(p, 0x0f, 0x77);
p->need_emms = 0;
}
 
void mmx_packssdw( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
assert(dst.file == file_MMX &&
(src.file == file_MMX || src.mod != mod_REG));
 
p->need_emms = 1;
 
emit_2ub(p, X86_TWOB, 0x6b);
emit_modrm( p, dst, src );
}
 
void mmx_packuswb( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
assert(dst.file == file_MMX &&
(src.file == file_MMX || src.mod != mod_REG));
 
p->need_emms = 1;
 
emit_2ub(p, X86_TWOB, 0x67);
emit_modrm( p, dst, src );
}
 
void mmx_movd( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
p->need_emms = 1;
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x6e, 0x7e, dst, src );
}
 
void mmx_movq( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
{
DUMP_RR( dst, src );
p->need_emms = 1;
emit_1ub(p, X86_TWOB);
emit_op_modrm( p, 0x6f, 0x7f, dst, src );
}
 
 
/***********************************************************************
* Helper functions
*/
 
 
void x86_cdecl_caller_push_regs( struct x86_function *p )
{
x86_push(p, x86_make_reg(file_REG32, reg_AX));
x86_push(p, x86_make_reg(file_REG32, reg_CX));
x86_push(p, x86_make_reg(file_REG32, reg_DX));
}
 
void x86_cdecl_caller_pop_regs( struct x86_function *p )
{
x86_pop(p, x86_make_reg(file_REG32, reg_DX));
x86_pop(p, x86_make_reg(file_REG32, reg_CX));
x86_pop(p, x86_make_reg(file_REG32, reg_AX));
}
 
 
struct x86_reg x86_fn_arg( struct x86_function *p,
unsigned arg )
{
switch(x86_target(p))
{
case X86_64_WIN64_ABI:
/* Microsoft uses a different calling convention than the rest of the world */
switch(arg)
{
case 1:
return x86_make_reg(file_REG32, reg_CX);
case 2:
return x86_make_reg(file_REG32, reg_DX);
case 3:
return x86_make_reg(file_REG32, reg_R8);
case 4:
return x86_make_reg(file_REG32, reg_R9);
default:
/* Win64 allocates stack slots as if it pushed the first 4 arguments too */
return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
p->stack_offset + arg * 8);
}
case X86_64_STD_ABI:
switch(arg)
{
case 1:
return x86_make_reg(file_REG32, reg_DI);
case 2:
return x86_make_reg(file_REG32, reg_SI);
case 3:
return x86_make_reg(file_REG32, reg_DX);
case 4:
return x86_make_reg(file_REG32, reg_CX);
case 5:
return x86_make_reg(file_REG32, reg_R8);
case 6:
return x86_make_reg(file_REG32, reg_R9);
default:
return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
p->stack_offset + (arg - 6) * 8); /* ??? */
}
case X86_32:
return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
p->stack_offset + arg * 4); /* ??? */
default:
assert(0 && "Unexpected x86 target ABI in x86_fn_arg");
return x86_make_reg(file_REG32, reg_CX); /* not used / silence warning */
}
}
 
static void x86_init_func_common( struct x86_function *p )
{
util_cpu_detect();
p->caps = 0;
if(util_cpu_caps.has_mmx)
p->caps |= X86_MMX;
if(util_cpu_caps.has_mmx2)
p->caps |= X86_MMX2;
if(util_cpu_caps.has_sse)
p->caps |= X86_SSE;
if(util_cpu_caps.has_sse2)
p->caps |= X86_SSE2;
if(util_cpu_caps.has_sse3)
p->caps |= X86_SSE3;
if(util_cpu_caps.has_sse4_1)
p->caps |= X86_SSE4_1;
p->csr = p->store;
DUMP_START();
}
 
void x86_init_func( struct x86_function *p )
{
p->size = 0;
p->store = NULL;
x86_init_func_common(p);
}
 
void x86_init_func_size( struct x86_function *p, unsigned code_size )
{
p->size = code_size;
p->store = rtasm_exec_malloc(code_size);
if (p->store == NULL) {
p->store = p->error_overflow;
}
x86_init_func_common(p);
}
 
void x86_release_func( struct x86_function *p )
{
if (p->store && p->store != p->error_overflow)
rtasm_exec_free(p->store);
 
p->store = NULL;
p->csr = NULL;
p->size = 0;
}
 
 
static INLINE x86_func
voidptr_to_x86_func(void *v)
{
union {
void *v;
x86_func f;
} u;
assert(sizeof(u.v) == sizeof(u.f));
u.v = v;
return u.f;
}
 
 
x86_func x86_get_func( struct x86_function *p )
{
DUMP_END();
if (DISASSEM && p->store)
debug_printf("disassemble %p %p\n", p->store, p->csr);
 
if (p->store == p->error_overflow)
return voidptr_to_x86_func(NULL);
else
return voidptr_to_x86_func(p->store);
}
 
#else
 
void x86sse_dummy( void );
 
void x86sse_dummy( void )
{
}
 
#endif
/drivers/video/Gallium/auxiliary/rtasm/rtasm_x86sse.h
0,0 → 1,416
/**************************************************************************
*
* Copyright (C) 1999-2005 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
#ifndef _RTASM_X86SSE_H_
#define _RTASM_X86SSE_H_
 
#include "pipe/p_compiler.h"
#include "pipe/p_config.h"
 
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 
/* It is up to the caller to ensure that instructions issued are
* suitable for the host cpu. There are no checks made in this module
* for mmx/sse/sse2 support on the cpu.
*/
struct x86_reg {
unsigned file:2;
unsigned idx:4;
unsigned mod:2; /* mod_REG if this is just a register */
int disp:24; /* only +/- 23bits of offset - should be enough... */
};
 
#define X86_MMX 1
#define X86_MMX2 2
#define X86_SSE 4
#define X86_SSE2 8
#define X86_SSE3 0x10
#define X86_SSE4_1 0x20
 
struct x86_function {
unsigned caps;
unsigned size;
unsigned char *store;
unsigned char *csr;
 
unsigned stack_offset:16;
unsigned need_emms:8;
int x87_stack:8;
 
unsigned char error_overflow[4];
};
 
enum x86_reg_file {
file_REG32,
file_MMX,
file_XMM,
file_x87
};
 
/* Values for mod field of modr/m byte
*/
enum x86_reg_mod {
mod_INDIRECT,
mod_DISP8,
mod_DISP32,
mod_REG
};
 
enum x86_reg_name {
reg_AX,
reg_CX,
reg_DX,
reg_BX,
reg_SP,
reg_BP,
reg_SI,
reg_DI,
reg_R8,
reg_R9,
reg_R10,
reg_R11,
reg_R12,
reg_R13,
reg_R14,
reg_R15
};
 
 
enum x86_cc {
cc_O, /* overflow */
cc_NO, /* not overflow */
cc_NAE, /* not above or equal / carry */
cc_AE, /* above or equal / not carry */
cc_E, /* equal / zero */
cc_NE /* not equal / not zero */
};
 
enum sse_cc {
cc_Equal,
cc_LessThan,
cc_LessThanEqual,
cc_Unordered,
cc_NotEqual,
cc_NotLessThan,
cc_NotLessThanEqual,
cc_Ordered
};
 
#define cc_Z cc_E
#define cc_NZ cc_NE
 
 
/** generic pointer to function */
typedef void (*x86_func)(void);
 
 
/* Begin/end/retrieve function creation:
*/
 
enum x86_target
{
X86_32,
X86_64_STD_ABI,
X86_64_WIN64_ABI
};
 
/* make this read a member of x86_function if target != host is desired */
static INLINE enum x86_target x86_target( struct x86_function* p )
{
#ifdef PIPE_ARCH_X86
return X86_32;
#elif defined(_WIN64)
return X86_64_WIN64_ABI;
#elif defined(PIPE_ARCH_X86_64)
return X86_64_STD_ABI;
#endif
}
 
static INLINE unsigned x86_target_caps( struct x86_function* p )
{
return p->caps;
}
 
void x86_init_func( struct x86_function *p );
void x86_init_func_size( struct x86_function *p, unsigned code_size );
void x86_release_func( struct x86_function *p );
x86_func x86_get_func( struct x86_function *p );
 
/* Debugging:
*/
void x86_print_reg( struct x86_reg reg );
 
 
/* Create and manipulate registers and regmem values:
*/
struct x86_reg x86_make_reg( enum x86_reg_file file,
enum x86_reg_name idx );
 
struct x86_reg x86_make_disp( struct x86_reg reg,
int disp );
 
struct x86_reg x86_deref( struct x86_reg reg );
 
struct x86_reg x86_get_base_reg( struct x86_reg reg );
 
 
/* Labels, jumps and fixup:
*/
int x86_get_label( struct x86_function *p );
 
void x64_rexw(struct x86_function *p);
 
void x86_jcc( struct x86_function *p,
enum x86_cc cc,
int label );
 
int x86_jcc_forward( struct x86_function *p,
enum x86_cc cc );
 
int x86_jmp_forward( struct x86_function *p);
 
int x86_call_forward( struct x86_function *p);
 
void x86_fixup_fwd_jump( struct x86_function *p,
int fixup );
 
void x86_jmp( struct x86_function *p, int label );
 
/* void x86_call( struct x86_function *p, void (*label)() ); */
void x86_call( struct x86_function *p, struct x86_reg reg);
 
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm );
void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm );
void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm );
void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm );
void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm );
void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm );
void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm );
 
 
/* Macro for sse_shufps() and sse2_pshufd():
*/
#define SHUF(_x,_y,_z,_w) (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6))
#define SHUF_NOOP RSW(0,1,2,3)
#define GET_SHUF(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
 
void mmx_emms( struct x86_function *p );
void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 
void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvtsd2ss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvtpd2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
unsigned char shuf );
void sse2_pshuflw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
unsigned char shuf );
void sse2_pshufhw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
unsigned char shuf );
void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 
void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 
void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
 
void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
 
void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
 
void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
 
void sse2_pshuflw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
void sse2_pshufhw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
void sse2_pshufd( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
 
void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr);
void sse_prefetch0( struct x86_function *p, struct x86_reg ptr);
void sse_prefetch1( struct x86_function *p, struct x86_reg ptr);
 
void sse_movntps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
 
void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src,
enum sse_cc cc );
void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
unsigned char shuf );
void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
 
void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_cmovcc( struct x86_function *p, struct x86_reg dst, struct x86_reg src, enum x86_cc cc );
void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_dec( struct x86_function *p, struct x86_reg reg );
void x86_inc( struct x86_function *p, struct x86_reg reg );
void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x64_mov64( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mov8( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mov16( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mov_imm(struct x86_function *p, struct x86_reg dst, int imm );
void x86_mov8_imm(struct x86_function *p, struct x86_reg dst, uint8_t imm );
void x86_mov16_imm(struct x86_function *p, struct x86_reg dst, uint16_t imm );
void x86_mul( struct x86_function *p, struct x86_reg src );
void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_pop( struct x86_function *p, struct x86_reg reg );
void x86_push( struct x86_function *p, struct x86_reg reg );
void x86_push_imm32( struct x86_function *p, int imm );
void x86_ret( struct x86_function *p );
void x86_retw( struct x86_function *p, unsigned short imm );
void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_sahf( struct x86_function *p );
void x86_div( struct x86_function *p, struct x86_reg src );
void x86_bswap( struct x86_function *p, struct x86_reg src );
void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
 
void x86_cdecl_caller_push_regs( struct x86_function *p );
void x86_cdecl_caller_pop_regs( struct x86_function *p );
 
void x87_assert_stack_empty( struct x86_function *p );
 
void x87_f2xm1( struct x86_function *p );
void x87_fabs( struct x86_function *p );
void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_faddp( struct x86_function *p, struct x86_reg dst );
void x87_fchs( struct x86_function *p );
void x87_fclex( struct x86_function *p );
void x87_fcmovb( struct x86_function *p, struct x86_reg src );
void x87_fcmovbe( struct x86_function *p, struct x86_reg src );
void x87_fcmove( struct x86_function *p, struct x86_reg src );
void x87_fcmovnb( struct x86_function *p, struct x86_reg src );
void x87_fcmovnbe( struct x86_function *p, struct x86_reg src );
void x87_fcmovne( struct x86_function *p, struct x86_reg src );
void x87_fcom( struct x86_function *p, struct x86_reg dst );
void x87_fcomi( struct x86_function *p, struct x86_reg dst );
void x87_fcomip( struct x86_function *p, struct x86_reg dst );
void x87_fcomp( struct x86_function *p, struct x86_reg dst );
void x87_fcos( struct x86_function *p );
void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_fdivp( struct x86_function *p, struct x86_reg dst );
void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_fdivrp( struct x86_function *p, struct x86_reg dst );
void x87_fild( struct x86_function *p, struct x86_reg arg );
void x87_fist( struct x86_function *p, struct x86_reg dst );
void x87_fistp( struct x86_function *p, struct x86_reg dst );
void x87_fld( struct x86_function *p, struct x86_reg arg );
void x87_fld1( struct x86_function *p );
void x87_fldcw( struct x86_function *p, struct x86_reg arg );
void x87_fldl2e( struct x86_function *p );
void x87_fldln2( struct x86_function *p );
void x87_fldz( struct x86_function *p );
void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_fmulp( struct x86_function *p, struct x86_reg dst );
void x87_fnclex( struct x86_function *p );
void x87_fprndint( struct x86_function *p );
void x87_fpop( struct x86_function *p );
void x87_fscale( struct x86_function *p );
void x87_fsin( struct x86_function *p );
void x87_fsincos( struct x86_function *p );
void x87_fsqrt( struct x86_function *p );
void x87_fst( struct x86_function *p, struct x86_reg dst );
void x87_fstp( struct x86_function *p, struct x86_reg dst );
void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_fsubp( struct x86_function *p, struct x86_reg dst );
void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_fsubrp( struct x86_function *p, struct x86_reg dst );
void x87_ftst( struct x86_function *p );
void x87_fxch( struct x86_function *p, struct x86_reg dst );
void x87_fxtract( struct x86_function *p );
void x87_fyl2x( struct x86_function *p );
void x87_fyl2xp1( struct x86_function *p );
void x87_fwait( struct x86_function *p );
void x87_fnstcw( struct x86_function *p, struct x86_reg dst );
void x87_fnstsw( struct x86_function *p, struct x86_reg dst );
void x87_fucompp( struct x86_function *p );
void x87_fucomp( struct x86_function *p, struct x86_reg arg );
void x87_fucom( struct x86_function *p, struct x86_reg arg );
 
 
 
/* Retrieve a reference to one of the function arguments, taking into
* account any push/pop activity. Note - doesn't track explicit
* manipulation of ESP by other instructions.
*/
struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg );
 
#endif
#endif
/drivers/video/Gallium/auxiliary/translate/translate.c
0,0 → 1,55
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
 
#include "pipe/p_config.h"
#include "pipe/p_state.h"
#include "translate.h"
 
struct translate *translate_create( const struct translate_key *key )
{
struct translate *translate = NULL;
 
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
translate = translate_sse2_create( key );
if (translate)
return translate;
#else
(void)translate;
#endif
 
return translate_generic_create( key );
}
 
boolean translate_is_output_format_supported(enum pipe_format format)
{
return translate_generic_is_output_format_supported(format);
}
/drivers/video/Gallium/auxiliary/translate/translate.h
0,0 → 1,160
/*
* Copyright 2008 Tungsten Graphics, inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
 
 
/**
* Vertex fetch/store/convert code. This functionality is used in two places:
* 1. Vertex fetch/convert - to grab vertex data from incoming vertex
* arrays and convert to format needed by vertex shaders.
* 2. Vertex store/emit - to convert simple float[][4] vertex attributes
* (which is the organization used throughout the draw/prim pipeline) to
* hardware-specific formats and emit into hardware vertex buffers.
*
*
* Authors:
* Keith Whitwell <keithw@tungstengraphics.com>
*/
 
#ifndef _TRANSLATE_H
#define _TRANSLATE_H
 
 
#include "pipe/p_compiler.h"
#include "pipe/p_format.h"
#include "pipe/p_state.h"
 
enum translate_element_type {
TRANSLATE_ELEMENT_NORMAL,
TRANSLATE_ELEMENT_INSTANCE_ID
};
 
struct translate_element
{
enum translate_element_type type;
enum pipe_format input_format;
enum pipe_format output_format;
unsigned input_buffer:8;
unsigned input_offset:24;
unsigned instance_divisor;
unsigned output_offset;
};
 
 
struct translate_key {
unsigned output_stride;
unsigned nr_elements;
struct translate_element element[PIPE_MAX_ATTRIBS + 1];
};
 
 
struct translate;
 
 
typedef void (PIPE_CDECL *run_elts_func)(struct translate *,
const unsigned *elts,
unsigned count,
unsigned start_instance,
unsigned instance_id,
void *output_buffer);
 
typedef void (PIPE_CDECL *run_elts16_func)(struct translate *,
const uint16_t *elts,
unsigned count,
unsigned start_instance,
unsigned instance_id,
void *output_buffer);
 
typedef void (PIPE_CDECL *run_elts8_func)(struct translate *,
const uint8_t *elts,
unsigned count,
unsigned start_instance,
unsigned instance_id,
void *output_buffer);
 
typedef void (PIPE_CDECL *run_func)(struct translate *,
unsigned start,
unsigned count,
unsigned start_instance,
unsigned instance_id,
void *output_buffer);
 
struct translate {
struct translate_key key;
 
void (*release)( struct translate * );
 
void (*set_buffer)( struct translate *,
unsigned i,
const void *ptr,
unsigned stride,
unsigned max_index );
 
run_elts_func run_elts;
run_elts16_func run_elts16;
run_elts8_func run_elts8;
run_func run;
};
 
 
 
struct translate *translate_create( const struct translate_key *key );
 
boolean translate_is_output_format_supported(enum pipe_format format);
 
static INLINE int translate_keysize( const struct translate_key *key )
{
return 2 * sizeof(int) + key->nr_elements * sizeof(struct translate_element);
}
 
static INLINE int translate_key_compare( const struct translate_key *a,
const struct translate_key *b )
{
int keysize_a = translate_keysize(a);
int keysize_b = translate_keysize(b);
 
if (keysize_a != keysize_b) {
return keysize_a - keysize_b;
}
return memcmp(a, b, keysize_a);
}
 
 
static INLINE void translate_key_sanitize( struct translate_key *a )
{
int keysize = translate_keysize(a);
char *ptr = (char *)a;
memset(ptr + keysize, 0, sizeof(*a) - keysize);
}
 
 
/*******************************************************************************
* Private:
*/
struct translate *translate_sse2_create( const struct translate_key *key );
 
struct translate *translate_generic_create( const struct translate_key *key );
 
boolean translate_generic_is_output_format_supported(enum pipe_format format);
 
#endif
/drivers/video/Gallium/auxiliary/translate/translate_cache.c
0,0 → 1,106
/**************************************************************************
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
#include "util/u_memory.h"
#include "pipe/p_state.h"
#include "translate.h"
#include "translate_cache.h"
 
#include "cso_cache/cso_cache.h"
#include "cso_cache/cso_hash.h"
 
struct translate_cache {
struct cso_hash *hash;
};
 
struct translate_cache * translate_cache_create( void )
{
struct translate_cache *cache = MALLOC_STRUCT(translate_cache);
if (cache == NULL) {
return NULL;
}
 
cache->hash = cso_hash_create();
return cache;
}
 
 
static INLINE void delete_translates(struct translate_cache *cache)
{
struct cso_hash *hash = cache->hash;
struct cso_hash_iter iter = cso_hash_first_node(hash);
while (!cso_hash_iter_is_null(iter)) {
struct translate *state = (struct translate*)cso_hash_iter_data(iter);
iter = cso_hash_iter_next(iter);
if (state) {
state->release(state);
}
}
}
 
void translate_cache_destroy(struct translate_cache *cache)
{
delete_translates(cache);
cso_hash_delete(cache->hash);
FREE(cache);
}
 
 
static INLINE unsigned translate_hash_key_size(struct translate_key *key)
{
unsigned size = sizeof(struct translate_key) -
sizeof(struct translate_element) * (PIPE_MAX_ATTRIBS - key->nr_elements);
return size;
}
 
static INLINE unsigned create_key(struct translate_key *key)
{
unsigned hash_key;
unsigned size = translate_hash_key_size(key);
/*debug_printf("key size = %d, (els = %d)\n",
size, key->nr_elements);*/
hash_key = cso_construct_key(key, size);
return hash_key;
}
 
struct translate * translate_cache_find(struct translate_cache *cache,
struct translate_key *key)
{
unsigned hash_key = create_key(key);
struct translate *translate = (struct translate*)
cso_hash_find_data_from_template(cache->hash,
hash_key,
key, sizeof(*key));
 
if (!translate) {
/* create/insert */
translate = translate_create(key);
cso_hash_insert(cache->hash, hash_key, translate);
}
 
return translate;
}
/drivers/video/Gallium/auxiliary/translate/translate_cache.h
0,0 → 1,54
/*
* Copyright 2008 Tungsten Graphics, inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef _TRANSLATE_CACHE_H
#define _TRANSLATE_CACHE_H
 
 
/*******************************************************************************
* Translate cache.
* Simply used to cache created translates. Avoids unecessary creation of
* translate's if one suitable for a given translate_key has already been
* created.
*
* Note: this functionality depends and requires the CSO module.
*/
struct translate_cache;
 
struct translate_key;
struct translate;
 
struct translate_cache *translate_cache_create( void );
void translate_cache_destroy(struct translate_cache *cache);
 
/**
* Will try to find a translate structure matched by the given key.
* If such a structure doesn't exist in the cache the function
* will automatically create it, insert it in the cache and
* return the created version.
*
*/
struct translate *translate_cache_find(struct translate_cache *cache,
struct translate_key *key);
 
#endif
/drivers/video/Gallium/auxiliary/translate/translate_generic.c
0,0 → 1,998
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
 
#include "util/u_memory.h"
#include "util/u_format.h"
#include "util/u_half.h"
#include "util/u_math.h"
#include "pipe/p_state.h"
#include "translate.h"
 
 
#define DRAW_DBG 0
 
typedef void (*fetch_func)(void *dst,
const uint8_t *src,
unsigned i, unsigned j);
typedef void (*emit_func)(const void *attrib, void *ptr);
 
 
 
struct translate_generic {
struct translate translate;
 
struct {
enum translate_element_type type;
 
fetch_func fetch;
unsigned buffer;
unsigned input_offset;
unsigned instance_divisor;
 
emit_func emit;
unsigned output_offset;
const uint8_t *input_ptr;
unsigned input_stride;
unsigned max_index;
 
/* this value is set to -1 if this is a normal element with output_format != input_format:
* in this case, u_format is used to do a full conversion
*
* this value is set to the format size in bytes if output_format == input_format or for 32-bit instance ids:
* in this case, memcpy is used to copy this amount of bytes
*/
int copy_size;
 
} attrib[PIPE_MAX_ATTRIBS];
 
unsigned nr_attrib;
};
 
 
static struct translate_generic *translate_generic( struct translate *translate )
{
return (struct translate_generic *)translate;
}
 
/**
* Fetch a dword[4] vertex attribute from memory, doing format/type
* conversion as needed.
*
* This is probably needed/dupliocated elsewhere, eg format
* conversion, texture sampling etc.
*/
#define ATTRIB( NAME, SZ, SRCTYPE, DSTTYPE, TO ) \
static void \
emit_##NAME(const void *attrib, void *ptr) \
{ \
unsigned i; \
SRCTYPE *in = (SRCTYPE *)attrib; \
DSTTYPE *out = (DSTTYPE *)ptr; \
\
for (i = 0; i < SZ; i++) { \
out[i] = TO(in[i]); \
} \
}
 
 
#define TO_64_FLOAT(x) ((double) x)
#define TO_32_FLOAT(x) (x)
#define TO_16_FLOAT(x) util_float_to_half(x)
 
#define TO_8_USCALED(x) ((unsigned char) x)
#define TO_16_USCALED(x) ((unsigned short) x)
#define TO_32_USCALED(x) ((unsigned int) x)
 
#define TO_8_SSCALED(x) ((char) x)
#define TO_16_SSCALED(x) ((short) x)
#define TO_32_SSCALED(x) ((int) x)
 
#define TO_8_UNORM(x) ((unsigned char) (x * 255.0f))
#define TO_16_UNORM(x) ((unsigned short) (x * 65535.0f))
#define TO_32_UNORM(x) ((unsigned int) (x * 4294967295.0f))
 
#define TO_8_SNORM(x) ((char) (x * 127.0f))
#define TO_16_SNORM(x) ((short) (x * 32767.0f))
#define TO_32_SNORM(x) ((int) (x * 2147483647.0f))
 
#define TO_32_FIXED(x) ((int) (x * 65536.0f))
 
#define TO_INT(x) (x)
 
 
ATTRIB( R64G64B64A64_FLOAT, 4, float, double, TO_64_FLOAT )
ATTRIB( R64G64B64_FLOAT, 3, float, double, TO_64_FLOAT )
ATTRIB( R64G64_FLOAT, 2, float, double, TO_64_FLOAT )
ATTRIB( R64_FLOAT, 1, float, double, TO_64_FLOAT )
 
ATTRIB( R32G32B32A32_FLOAT, 4, float, float, TO_32_FLOAT )
ATTRIB( R32G32B32_FLOAT, 3, float, float, TO_32_FLOAT )
ATTRIB( R32G32_FLOAT, 2, float, float, TO_32_FLOAT )
ATTRIB( R32_FLOAT, 1, float, float, TO_32_FLOAT )
 
ATTRIB( R16G16B16A16_FLOAT, 4, float, ushort, TO_16_FLOAT )
ATTRIB( R16G16B16_FLOAT, 3, float, ushort, TO_16_FLOAT )
ATTRIB( R16G16_FLOAT, 2, float, ushort, TO_16_FLOAT )
ATTRIB( R16_FLOAT, 1, float, ushort, TO_16_FLOAT )
 
ATTRIB( R32G32B32A32_USCALED, 4, float, unsigned, TO_32_USCALED )
ATTRIB( R32G32B32_USCALED, 3, float, unsigned, TO_32_USCALED )
ATTRIB( R32G32_USCALED, 2, float, unsigned, TO_32_USCALED )
ATTRIB( R32_USCALED, 1, float, unsigned, TO_32_USCALED )
 
ATTRIB( R32G32B32A32_SSCALED, 4, float, int, TO_32_SSCALED )
ATTRIB( R32G32B32_SSCALED, 3, float, int, TO_32_SSCALED )
ATTRIB( R32G32_SSCALED, 2, float, int, TO_32_SSCALED )
ATTRIB( R32_SSCALED, 1, float, int, TO_32_SSCALED )
 
ATTRIB( R32G32B32A32_UNORM, 4, float, unsigned, TO_32_UNORM )
ATTRIB( R32G32B32_UNORM, 3, float, unsigned, TO_32_UNORM )
ATTRIB( R32G32_UNORM, 2, float, unsigned, TO_32_UNORM )
ATTRIB( R32_UNORM, 1, float, unsigned, TO_32_UNORM )
 
ATTRIB( R32G32B32A32_SNORM, 4, float, int, TO_32_SNORM )
ATTRIB( R32G32B32_SNORM, 3, float, int, TO_32_SNORM )
ATTRIB( R32G32_SNORM, 2, float, int, TO_32_SNORM )
ATTRIB( R32_SNORM, 1, float, int, TO_32_SNORM )
 
ATTRIB( R16G16B16A16_USCALED, 4, float, ushort, TO_16_USCALED )
ATTRIB( R16G16B16_USCALED, 3, float, ushort, TO_16_USCALED )
ATTRIB( R16G16_USCALED, 2, float, ushort, TO_16_USCALED )
ATTRIB( R16_USCALED, 1, float, ushort, TO_16_USCALED )
 
ATTRIB( R16G16B16A16_SSCALED, 4, float, short, TO_16_SSCALED )
ATTRIB( R16G16B16_SSCALED, 3, float, short, TO_16_SSCALED )
ATTRIB( R16G16_SSCALED, 2, float, short, TO_16_SSCALED )
ATTRIB( R16_SSCALED, 1, float, short, TO_16_SSCALED )
 
ATTRIB( R16G16B16A16_UNORM, 4, float, ushort, TO_16_UNORM )
ATTRIB( R16G16B16_UNORM, 3, float, ushort, TO_16_UNORM )
ATTRIB( R16G16_UNORM, 2, float, ushort, TO_16_UNORM )
ATTRIB( R16_UNORM, 1, float, ushort, TO_16_UNORM )
 
ATTRIB( R16G16B16A16_SNORM, 4, float, short, TO_16_SNORM )
ATTRIB( R16G16B16_SNORM, 3, float, short, TO_16_SNORM )
ATTRIB( R16G16_SNORM, 2, float, short, TO_16_SNORM )
ATTRIB( R16_SNORM, 1, float, short, TO_16_SNORM )
 
ATTRIB( R8G8B8A8_USCALED, 4, float, ubyte, TO_8_USCALED )
ATTRIB( R8G8B8_USCALED, 3, float, ubyte, TO_8_USCALED )
ATTRIB( R8G8_USCALED, 2, float, ubyte, TO_8_USCALED )
ATTRIB( R8_USCALED, 1, float, ubyte, TO_8_USCALED )
 
ATTRIB( R8G8B8A8_SSCALED, 4, float, char, TO_8_SSCALED )
ATTRIB( R8G8B8_SSCALED, 3, float, char, TO_8_SSCALED )
ATTRIB( R8G8_SSCALED, 2, float, char, TO_8_SSCALED )
ATTRIB( R8_SSCALED, 1, float, char, TO_8_SSCALED )
 
ATTRIB( R8G8B8A8_UNORM, 4, float, ubyte, TO_8_UNORM )
ATTRIB( R8G8B8_UNORM, 3, float, ubyte, TO_8_UNORM )
ATTRIB( R8G8_UNORM, 2, float, ubyte, TO_8_UNORM )
ATTRIB( R8_UNORM, 1, float, ubyte, TO_8_UNORM )
 
ATTRIB( R8G8B8A8_SNORM, 4, float, char, TO_8_SNORM )
ATTRIB( R8G8B8_SNORM, 3, float, char, TO_8_SNORM )
ATTRIB( R8G8_SNORM, 2, float, char, TO_8_SNORM )
ATTRIB( R8_SNORM, 1, float, char, TO_8_SNORM )
 
ATTRIB( R32G32B32A32_UINT, 4, uint32_t, unsigned, TO_INT )
ATTRIB( R32G32B32_UINT, 3, uint32_t, unsigned, TO_INT )
ATTRIB( R32G32_UINT, 2, uint32_t, unsigned, TO_INT )
ATTRIB( R32_UINT, 1, uint32_t, unsigned, TO_INT )
 
ATTRIB( R16G16B16A16_UINT, 4, uint32_t, ushort, TO_INT )
ATTRIB( R16G16B16_UINT, 3, uint32_t, ushort, TO_INT )
ATTRIB( R16G16_UINT, 2, uint32_t, ushort, TO_INT )
ATTRIB( R16_UINT, 1, uint32_t, ushort, TO_INT )
 
ATTRIB( R8G8B8A8_UINT, 4, uint32_t, ubyte, TO_INT )
ATTRIB( R8G8B8_UINT, 3, uint32_t, ubyte, TO_INT )
ATTRIB( R8G8_UINT, 2, uint32_t, ubyte, TO_INT )
ATTRIB( R8_UINT, 1, uint32_t, ubyte, TO_INT )
 
ATTRIB( R32G32B32A32_SINT, 4, int32_t, int, TO_INT )
ATTRIB( R32G32B32_SINT, 3, int32_t, int, TO_INT )
ATTRIB( R32G32_SINT, 2, int32_t, int, TO_INT )
ATTRIB( R32_SINT, 1, int32_t, int, TO_INT )
 
ATTRIB( R16G16B16A16_SINT, 4, int32_t, short, TO_INT )
ATTRIB( R16G16B16_SINT, 3, int32_t, short, TO_INT )
ATTRIB( R16G16_SINT, 2, int32_t, short, TO_INT )
ATTRIB( R16_SINT, 1, int32_t, short, TO_INT )
 
ATTRIB( R8G8B8A8_SINT, 4, int32_t, char, TO_INT )
ATTRIB( R8G8B8_SINT, 3, int32_t, char, TO_INT )
ATTRIB( R8G8_SINT, 2, int32_t, char, TO_INT )
ATTRIB( R8_SINT, 1, int32_t, char, TO_INT )
 
static void
emit_A8R8G8B8_UNORM( const void *attrib, void *ptr)
{
float *in = (float *)attrib;
ubyte *out = (ubyte *)ptr;
out[0] = TO_8_UNORM(in[3]);
out[1] = TO_8_UNORM(in[0]);
out[2] = TO_8_UNORM(in[1]);
out[3] = TO_8_UNORM(in[2]);
}
 
static void
emit_B8G8R8A8_UNORM( const void *attrib, void *ptr)
{
float *in = (float *)attrib;
ubyte *out = (ubyte *)ptr;
out[2] = TO_8_UNORM(in[0]);
out[1] = TO_8_UNORM(in[1]);
out[0] = TO_8_UNORM(in[2]);
out[3] = TO_8_UNORM(in[3]);
}
 
static void
emit_B10G10R10A2_UNORM( const void *attrib, void *ptr )
{
float *src = (float *)ptr;
uint32_t value = 0;
value |= ((uint32_t)(CLAMP(src[2], 0, 1) * 0x3ff)) & 0x3ff;
value |= (((uint32_t)(CLAMP(src[1], 0, 1) * 0x3ff)) & 0x3ff) << 10;
value |= (((uint32_t)(CLAMP(src[0], 0, 1) * 0x3ff)) & 0x3ff) << 20;
value |= ((uint32_t)(CLAMP(src[3], 0, 1) * 0x3)) << 30;
#ifdef PIPE_ARCH_BIG_ENDIAN
value = util_bswap32(value);
#endif
*(uint32_t *)attrib = value;
}
 
static void
emit_B10G10R10A2_USCALED( const void *attrib, void *ptr )
{
float *src = (float *)ptr;
uint32_t value = 0;
value |= ((uint32_t)CLAMP(src[2], 0, 1023)) & 0x3ff;
value |= (((uint32_t)CLAMP(src[1], 0, 1023)) & 0x3ff) << 10;
value |= (((uint32_t)CLAMP(src[0], 0, 1023)) & 0x3ff) << 20;
value |= ((uint32_t)CLAMP(src[3], 0, 3)) << 30;
#ifdef PIPE_ARCH_BIG_ENDIAN
value = util_bswap32(value);
#endif
*(uint32_t *)attrib = value;
}
 
static void
emit_B10G10R10A2_SNORM( const void *attrib, void *ptr )
{
float *src = (float *)ptr;
uint32_t value = 0;
value |= (uint32_t)(((uint32_t)(CLAMP(src[2], -1, 1) * 0x1ff)) & 0x3ff) ;
value |= (uint32_t)((((uint32_t)(CLAMP(src[1], -1, 1) * 0x1ff)) & 0x3ff) << 10) ;
value |= (uint32_t)((((uint32_t)(CLAMP(src[0], -1, 1) * 0x1ff)) & 0x3ff) << 20) ;
value |= (uint32_t)(((uint32_t)(CLAMP(src[3], -1, 1) * 0x1)) << 30) ;
#ifdef PIPE_ARCH_BIG_ENDIAN
value = util_bswap32(value);
#endif
*(uint32_t *)attrib = value;
}
 
static void
emit_B10G10R10A2_SSCALED( const void *attrib, void *ptr )
{
float *src = (float *)ptr;
uint32_t value = 0;
value |= (uint32_t)(((uint32_t)CLAMP(src[2], -512, 511)) & 0x3ff) ;
value |= (uint32_t)((((uint32_t)CLAMP(src[1], -512, 511)) & 0x3ff) << 10) ;
value |= (uint32_t)((((uint32_t)CLAMP(src[0], -512, 511)) & 0x3ff) << 20) ;
value |= (uint32_t)(((uint32_t)CLAMP(src[3], -2, 1)) << 30) ;
#ifdef PIPE_ARCH_BIG_ENDIAN
value = util_bswap32(value);
#endif
*(uint32_t *)attrib = value;
}
 
static void
emit_R10G10B10A2_UNORM( const void *attrib, void *ptr )
{
float *src = (float *)ptr;
uint32_t value = 0;
value |= ((uint32_t)(CLAMP(src[0], 0, 1) * 0x3ff)) & 0x3ff;
value |= (((uint32_t)(CLAMP(src[1], 0, 1) * 0x3ff)) & 0x3ff) << 10;
value |= (((uint32_t)(CLAMP(src[2], 0, 1) * 0x3ff)) & 0x3ff) << 20;
value |= ((uint32_t)(CLAMP(src[3], 0, 1) * 0x3)) << 30;
#ifdef PIPE_ARCH_BIG_ENDIAN
value = util_bswap32(value);
#endif
*(uint32_t *)attrib = value;
}
 
static void
emit_R10G10B10A2_USCALED( const void *attrib, void *ptr )
{
float *src = (float *)ptr;
uint32_t value = 0;
value |= ((uint32_t)CLAMP(src[0], 0, 1023)) & 0x3ff;
value |= (((uint32_t)CLAMP(src[1], 0, 1023)) & 0x3ff) << 10;
value |= (((uint32_t)CLAMP(src[2], 0, 1023)) & 0x3ff) << 20;
value |= ((uint32_t)CLAMP(src[3], 0, 3)) << 30;
#ifdef PIPE_ARCH_BIG_ENDIAN
value = util_bswap32(value);
#endif
*(uint32_t *)attrib = value;
}
 
static void
emit_R10G10B10A2_SNORM( const void *attrib, void *ptr )
{
float *src = (float *)ptr;
uint32_t value = 0;
value |= (uint32_t)(((uint32_t)(CLAMP(src[0], -1, 1) * 0x1ff)) & 0x3ff) ;
value |= (uint32_t)((((uint32_t)(CLAMP(src[1], -1, 1) * 0x1ff)) & 0x3ff) << 10) ;
value |= (uint32_t)((((uint32_t)(CLAMP(src[2], -1, 1) * 0x1ff)) & 0x3ff) << 20) ;
value |= (uint32_t)(((uint32_t)(CLAMP(src[3], -1, 1) * 0x1)) << 30) ;
#ifdef PIPE_ARCH_BIG_ENDIAN
value = util_bswap32(value);
#endif
*(uint32_t *)attrib = value;
}
 
static void
emit_R10G10B10A2_SSCALED( const void *attrib, void *ptr)
{
float *src = (float *)ptr;
uint32_t value = 0;
value |= (uint32_t)(((uint32_t)CLAMP(src[0], -512, 511)) & 0x3ff) ;
value |= (uint32_t)((((uint32_t)CLAMP(src[1], -512, 511)) & 0x3ff) << 10) ;
value |= (uint32_t)((((uint32_t)CLAMP(src[2], -512, 511)) & 0x3ff) << 20) ;
value |= (uint32_t)(((uint32_t)CLAMP(src[3], -2, 1)) << 30) ;
#ifdef PIPE_ARCH_BIG_ENDIAN
value = util_bswap32(value);
#endif
*(uint32_t *)attrib = value;
}
 
static void
emit_NULL( const void *attrib, void *ptr )
{
/* do nothing is the only sensible option */
}
 
static emit_func get_emit_func( enum pipe_format format )
{
switch (format) {
case PIPE_FORMAT_R64_FLOAT:
return &emit_R64_FLOAT;
case PIPE_FORMAT_R64G64_FLOAT:
return &emit_R64G64_FLOAT;
case PIPE_FORMAT_R64G64B64_FLOAT:
return &emit_R64G64B64_FLOAT;
case PIPE_FORMAT_R64G64B64A64_FLOAT:
return &emit_R64G64B64A64_FLOAT;
 
case PIPE_FORMAT_R32_FLOAT:
return &emit_R32_FLOAT;
case PIPE_FORMAT_R32G32_FLOAT:
return &emit_R32G32_FLOAT;
case PIPE_FORMAT_R32G32B32_FLOAT:
return &emit_R32G32B32_FLOAT;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
return &emit_R32G32B32A32_FLOAT;
 
case PIPE_FORMAT_R16_FLOAT:
return &emit_R16_FLOAT;
case PIPE_FORMAT_R16G16_FLOAT:
return &emit_R16G16_FLOAT;
case PIPE_FORMAT_R16G16B16_FLOAT:
return &emit_R16G16B16_FLOAT;
case PIPE_FORMAT_R16G16B16A16_FLOAT:
return &emit_R16G16B16A16_FLOAT;
 
case PIPE_FORMAT_R32_UNORM:
return &emit_R32_UNORM;
case PIPE_FORMAT_R32G32_UNORM:
return &emit_R32G32_UNORM;
case PIPE_FORMAT_R32G32B32_UNORM:
return &emit_R32G32B32_UNORM;
case PIPE_FORMAT_R32G32B32A32_UNORM:
return &emit_R32G32B32A32_UNORM;
 
case PIPE_FORMAT_R32_USCALED:
return &emit_R32_USCALED;
case PIPE_FORMAT_R32G32_USCALED:
return &emit_R32G32_USCALED;
case PIPE_FORMAT_R32G32B32_USCALED:
return &emit_R32G32B32_USCALED;
case PIPE_FORMAT_R32G32B32A32_USCALED:
return &emit_R32G32B32A32_USCALED;
 
case PIPE_FORMAT_R32_SNORM:
return &emit_R32_SNORM;
case PIPE_FORMAT_R32G32_SNORM:
return &emit_R32G32_SNORM;
case PIPE_FORMAT_R32G32B32_SNORM:
return &emit_R32G32B32_SNORM;
case PIPE_FORMAT_R32G32B32A32_SNORM:
return &emit_R32G32B32A32_SNORM;
 
case PIPE_FORMAT_R32_SSCALED:
return &emit_R32_SSCALED;
case PIPE_FORMAT_R32G32_SSCALED:
return &emit_R32G32_SSCALED;
case PIPE_FORMAT_R32G32B32_SSCALED:
return &emit_R32G32B32_SSCALED;
case PIPE_FORMAT_R32G32B32A32_SSCALED:
return &emit_R32G32B32A32_SSCALED;
 
case PIPE_FORMAT_R16_UNORM:
return &emit_R16_UNORM;
case PIPE_FORMAT_R16G16_UNORM:
return &emit_R16G16_UNORM;
case PIPE_FORMAT_R16G16B16_UNORM:
return &emit_R16G16B16_UNORM;
case PIPE_FORMAT_R16G16B16A16_UNORM:
return &emit_R16G16B16A16_UNORM;
 
case PIPE_FORMAT_R16_USCALED:
return &emit_R16_USCALED;
case PIPE_FORMAT_R16G16_USCALED:
return &emit_R16G16_USCALED;
case PIPE_FORMAT_R16G16B16_USCALED:
return &emit_R16G16B16_USCALED;
case PIPE_FORMAT_R16G16B16A16_USCALED:
return &emit_R16G16B16A16_USCALED;
 
case PIPE_FORMAT_R16_SNORM:
return &emit_R16_SNORM;
case PIPE_FORMAT_R16G16_SNORM:
return &emit_R16G16_SNORM;
case PIPE_FORMAT_R16G16B16_SNORM:
return &emit_R16G16B16_SNORM;
case PIPE_FORMAT_R16G16B16A16_SNORM:
return &emit_R16G16B16A16_SNORM;
 
case PIPE_FORMAT_R16_SSCALED:
return &emit_R16_SSCALED;
case PIPE_FORMAT_R16G16_SSCALED:
return &emit_R16G16_SSCALED;
case PIPE_FORMAT_R16G16B16_SSCALED:
return &emit_R16G16B16_SSCALED;
case PIPE_FORMAT_R16G16B16A16_SSCALED:
return &emit_R16G16B16A16_SSCALED;
 
case PIPE_FORMAT_R8_UNORM:
return &emit_R8_UNORM;
case PIPE_FORMAT_R8G8_UNORM:
return &emit_R8G8_UNORM;
case PIPE_FORMAT_R8G8B8_UNORM:
return &emit_R8G8B8_UNORM;
case PIPE_FORMAT_R8G8B8A8_UNORM:
return &emit_R8G8B8A8_UNORM;
 
case PIPE_FORMAT_R8_USCALED:
return &emit_R8_USCALED;
case PIPE_FORMAT_R8G8_USCALED:
return &emit_R8G8_USCALED;
case PIPE_FORMAT_R8G8B8_USCALED:
return &emit_R8G8B8_USCALED;
case PIPE_FORMAT_R8G8B8A8_USCALED:
return &emit_R8G8B8A8_USCALED;
 
case PIPE_FORMAT_R8_SNORM:
return &emit_R8_SNORM;
case PIPE_FORMAT_R8G8_SNORM:
return &emit_R8G8_SNORM;
case PIPE_FORMAT_R8G8B8_SNORM:
return &emit_R8G8B8_SNORM;
case PIPE_FORMAT_R8G8B8A8_SNORM:
return &emit_R8G8B8A8_SNORM;
 
case PIPE_FORMAT_R8_SSCALED:
return &emit_R8_SSCALED;
case PIPE_FORMAT_R8G8_SSCALED:
return &emit_R8G8_SSCALED;
case PIPE_FORMAT_R8G8B8_SSCALED:
return &emit_R8G8B8_SSCALED;
case PIPE_FORMAT_R8G8B8A8_SSCALED:
return &emit_R8G8B8A8_SSCALED;
 
case PIPE_FORMAT_B8G8R8A8_UNORM:
return &emit_B8G8R8A8_UNORM;
 
case PIPE_FORMAT_A8R8G8B8_UNORM:
return &emit_A8R8G8B8_UNORM;
 
case PIPE_FORMAT_R32_UINT:
return &emit_R32_UINT;
case PIPE_FORMAT_R32G32_UINT:
return &emit_R32G32_UINT;
case PIPE_FORMAT_R32G32B32_UINT:
return &emit_R32G32B32_UINT;
case PIPE_FORMAT_R32G32B32A32_UINT:
return &emit_R32G32B32A32_UINT;
 
case PIPE_FORMAT_R16_UINT:
return &emit_R16_UINT;
case PIPE_FORMAT_R16G16_UINT:
return &emit_R16G16_UINT;
case PIPE_FORMAT_R16G16B16_UINT:
return &emit_R16G16B16_UINT;
case PIPE_FORMAT_R16G16B16A16_UINT:
return &emit_R16G16B16A16_UINT;
 
case PIPE_FORMAT_R8_UINT:
return &emit_R8_UINT;
case PIPE_FORMAT_R8G8_UINT:
return &emit_R8G8_UINT;
case PIPE_FORMAT_R8G8B8_UINT:
return &emit_R8G8B8_UINT;
case PIPE_FORMAT_R8G8B8A8_UINT:
return &emit_R8G8B8A8_UINT;
 
case PIPE_FORMAT_R32_SINT:
return &emit_R32_SINT;
case PIPE_FORMAT_R32G32_SINT:
return &emit_R32G32_SINT;
case PIPE_FORMAT_R32G32B32_SINT:
return &emit_R32G32B32_SINT;
case PIPE_FORMAT_R32G32B32A32_SINT:
return &emit_R32G32B32A32_SINT;
 
case PIPE_FORMAT_R16_SINT:
return &emit_R16_SINT;
case PIPE_FORMAT_R16G16_SINT:
return &emit_R16G16_SINT;
case PIPE_FORMAT_R16G16B16_SINT:
return &emit_R16G16B16_SINT;
case PIPE_FORMAT_R16G16B16A16_SINT:
return &emit_R16G16B16A16_SINT;
 
case PIPE_FORMAT_R8_SINT:
return &emit_R8_SINT;
case PIPE_FORMAT_R8G8_SINT:
return &emit_R8G8_SINT;
case PIPE_FORMAT_R8G8B8_SINT:
return &emit_R8G8B8_SINT;
case PIPE_FORMAT_R8G8B8A8_SINT:
return &emit_R8G8B8A8_SINT;
 
case PIPE_FORMAT_B10G10R10A2_UNORM:
return &emit_B10G10R10A2_UNORM;
case PIPE_FORMAT_B10G10R10A2_USCALED:
return &emit_B10G10R10A2_USCALED;
case PIPE_FORMAT_B10G10R10A2_SNORM:
return &emit_B10G10R10A2_SNORM;
case PIPE_FORMAT_B10G10R10A2_SSCALED:
return &emit_B10G10R10A2_SSCALED;
 
case PIPE_FORMAT_R10G10B10A2_UNORM:
return &emit_R10G10B10A2_UNORM;
case PIPE_FORMAT_R10G10B10A2_USCALED:
return &emit_R10G10B10A2_USCALED;
case PIPE_FORMAT_R10G10B10A2_SNORM:
return &emit_R10G10B10A2_SNORM;
case PIPE_FORMAT_R10G10B10A2_SSCALED:
return &emit_R10G10B10A2_SSCALED;
 
default:
assert(0);
return &emit_NULL;
}
}
 
static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic *tg,
unsigned elt,
unsigned start_instance,
unsigned instance_id,
void *vert )
{
unsigned nr_attrs = tg->nr_attrib;
unsigned attr;
 
for (attr = 0; attr < nr_attrs; attr++) {
float data[4];
uint8_t *dst = (uint8_t *)vert + tg->attrib[attr].output_offset;
 
if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
const uint8_t *src;
unsigned index;
int copy_size;
 
if (tg->attrib[attr].instance_divisor) {
index = start_instance;
index += (instance_id - start_instance) /
tg->attrib[attr].instance_divisor;
/* XXX we need to clamp the index here too, but to a
* per-array max value, not the draw->pt.max_index value
* that's being given to us via translate->set_buffer().
*/
}
else {
index = elt;
/* clamp to avoid going out of bounds */
index = MIN2(index, tg->attrib[attr].max_index);
}
 
src = tg->attrib[attr].input_ptr +
tg->attrib[attr].input_stride * index;
 
copy_size = tg->attrib[attr].copy_size;
if(likely(copy_size >= 0))
memcpy(dst, src, copy_size);
else
{
tg->attrib[attr].fetch( data, src, 0, 0 );
 
if (0)
debug_printf("Fetch linear attr %d from %p stride %d index %d: "
" %f, %f, %f, %f \n",
attr,
tg->attrib[attr].input_ptr,
tg->attrib[attr].input_stride,
index,
data[0], data[1],data[2], data[3]);
 
tg->attrib[attr].emit( data, dst );
}
} else {
if(likely(tg->attrib[attr].copy_size >= 0))
memcpy(data, &instance_id, 4);
else
{
data[0] = (float)instance_id;
tg->attrib[attr].emit( data, dst );
}
}
}
}
 
/**
* Fetch vertex attributes for 'count' vertices.
*/
static void PIPE_CDECL generic_run_elts( struct translate *translate,
const unsigned *elts,
unsigned count,
unsigned start_instance,
unsigned instance_id,
void *output_buffer )
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;
unsigned i;
 
for (i = 0; i < count; i++) {
generic_run_one(tg, *elts++, start_instance, instance_id, vert);
vert += tg->translate.key.output_stride;
}
}
 
static void PIPE_CDECL generic_run_elts16( struct translate *translate,
const uint16_t *elts,
unsigned count,
unsigned start_instance,
unsigned instance_id,
void *output_buffer )
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;
unsigned i;
 
for (i = 0; i < count; i++) {
generic_run_one(tg, *elts++, start_instance, instance_id, vert);
vert += tg->translate.key.output_stride;
}
}
 
static void PIPE_CDECL generic_run_elts8( struct translate *translate,
const uint8_t *elts,
unsigned count,
unsigned start_instance,
unsigned instance_id,
void *output_buffer )
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;
unsigned i;
 
for (i = 0; i < count; i++) {
generic_run_one(tg, *elts++, start_instance, instance_id, vert);
vert += tg->translate.key.output_stride;
}
}
 
static void PIPE_CDECL generic_run( struct translate *translate,
unsigned start,
unsigned count,
unsigned start_instance,
unsigned instance_id,
void *output_buffer )
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;
unsigned i;
 
for (i = 0; i < count; i++) {
generic_run_one(tg, start + i, start_instance, instance_id, vert);
vert += tg->translate.key.output_stride;
}
}
 
 
static void generic_set_buffer( struct translate *translate,
unsigned buf,
const void *ptr,
unsigned stride,
unsigned max_index )
{
struct translate_generic *tg = translate_generic(translate);
unsigned i;
 
for (i = 0; i < tg->nr_attrib; i++) {
if (tg->attrib[i].buffer == buf) {
tg->attrib[i].input_ptr = ((const uint8_t *)ptr +
tg->attrib[i].input_offset);
tg->attrib[i].input_stride = stride;
tg->attrib[i].max_index = max_index;
}
}
}
 
 
static void generic_release( struct translate *translate )
{
/* Refcount?
*/
FREE(translate);
}
 
static boolean
is_legal_int_format_combo( const struct util_format_description *src,
const struct util_format_description *dst )
{
unsigned i;
unsigned nr = MIN2(src->nr_channels, dst->nr_channels);
 
for (i = 0; i < nr; i++) {
/* The signs must match. */
if (src->channel[i].type != dst->channel[i].type) {
return FALSE;
}
 
/* Integers must not lose precision at any point in the pipeline. */
if (src->channel[i].size > dst->channel[i].size) {
return FALSE;
}
}
return TRUE;
}
 
struct translate *translate_generic_create( const struct translate_key *key )
{
struct translate_generic *tg = CALLOC_STRUCT(translate_generic);
unsigned i;
 
if (tg == NULL)
return NULL;
 
tg->translate.key = *key;
tg->translate.release = generic_release;
tg->translate.set_buffer = generic_set_buffer;
tg->translate.run_elts = generic_run_elts;
tg->translate.run_elts16 = generic_run_elts16;
tg->translate.run_elts8 = generic_run_elts8;
tg->translate.run = generic_run;
 
for (i = 0; i < key->nr_elements; i++) {
const struct util_format_description *format_desc =
util_format_description(key->element[i].input_format);
 
assert(format_desc);
 
tg->attrib[i].type = key->element[i].type;
 
if (format_desc->channel[0].pure_integer) {
const struct util_format_description *out_format_desc =
util_format_description(key->element[i].output_format);
 
if (!is_legal_int_format_combo(format_desc, out_format_desc)) {
FREE(tg);
return NULL;
}
 
if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
assert(format_desc->fetch_rgba_sint);
tg->attrib[i].fetch = (fetch_func)format_desc->fetch_rgba_sint;
} else {
assert(format_desc->fetch_rgba_uint);
tg->attrib[i].fetch = (fetch_func)format_desc->fetch_rgba_uint;
}
} else {
assert(format_desc->fetch_rgba_float);
tg->attrib[i].fetch = (fetch_func)format_desc->fetch_rgba_float;
}
 
tg->attrib[i].buffer = key->element[i].input_buffer;
tg->attrib[i].input_offset = key->element[i].input_offset;
tg->attrib[i].instance_divisor = key->element[i].instance_divisor;
 
tg->attrib[i].output_offset = key->element[i].output_offset;
 
tg->attrib[i].copy_size = -1;
if (tg->attrib[i].type == TRANSLATE_ELEMENT_INSTANCE_ID)
{
if(key->element[i].output_format == PIPE_FORMAT_R32_USCALED
|| key->element[i].output_format == PIPE_FORMAT_R32_SSCALED)
tg->attrib[i].copy_size = 4;
}
else
{
if(key->element[i].input_format == key->element[i].output_format
&& format_desc->block.width == 1
&& format_desc->block.height == 1
&& !(format_desc->block.bits & 7))
tg->attrib[i].copy_size = format_desc->block.bits >> 3;
}
 
if(tg->attrib[i].copy_size < 0)
tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
else
tg->attrib[i].emit = NULL;
}
 
tg->nr_attrib = key->nr_elements;
 
 
return &tg->translate;
}
 
boolean translate_generic_is_output_format_supported(enum pipe_format format)
{
switch(format)
{
case PIPE_FORMAT_R64G64B64A64_FLOAT: return TRUE;
case PIPE_FORMAT_R64G64B64_FLOAT: return TRUE;
case PIPE_FORMAT_R64G64_FLOAT: return TRUE;
case PIPE_FORMAT_R64_FLOAT: return TRUE;
 
case PIPE_FORMAT_R32G32B32A32_FLOAT: return TRUE;
case PIPE_FORMAT_R32G32B32_FLOAT: return TRUE;
case PIPE_FORMAT_R32G32_FLOAT: return TRUE;
case PIPE_FORMAT_R32_FLOAT: return TRUE;
 
case PIPE_FORMAT_R16G16B16A16_FLOAT: return TRUE;
case PIPE_FORMAT_R16G16B16_FLOAT: return TRUE;
case PIPE_FORMAT_R16G16_FLOAT: return TRUE;
case PIPE_FORMAT_R16_FLOAT: return TRUE;
 
case PIPE_FORMAT_R32G32B32A32_USCALED: return TRUE;
case PIPE_FORMAT_R32G32B32_USCALED: return TRUE;
case PIPE_FORMAT_R32G32_USCALED: return TRUE;
case PIPE_FORMAT_R32_USCALED: return TRUE;
 
case PIPE_FORMAT_R32G32B32A32_SSCALED: return TRUE;
case PIPE_FORMAT_R32G32B32_SSCALED: return TRUE;
case PIPE_FORMAT_R32G32_SSCALED: return TRUE;
case PIPE_FORMAT_R32_SSCALED: return TRUE;
 
case PIPE_FORMAT_R32G32B32A32_UNORM: return TRUE;
case PIPE_FORMAT_R32G32B32_UNORM: return TRUE;
case PIPE_FORMAT_R32G32_UNORM: return TRUE;
case PIPE_FORMAT_R32_UNORM: return TRUE;
 
case PIPE_FORMAT_R32G32B32A32_SNORM: return TRUE;
case PIPE_FORMAT_R32G32B32_SNORM: return TRUE;
case PIPE_FORMAT_R32G32_SNORM: return TRUE;
case PIPE_FORMAT_R32_SNORM: return TRUE;
 
case PIPE_FORMAT_R16G16B16A16_USCALED: return TRUE;
case PIPE_FORMAT_R16G16B16_USCALED: return TRUE;
case PIPE_FORMAT_R16G16_USCALED: return TRUE;
case PIPE_FORMAT_R16_USCALED: return TRUE;
 
case PIPE_FORMAT_R16G16B16A16_SSCALED: return TRUE;
case PIPE_FORMAT_R16G16B16_SSCALED: return TRUE;
case PIPE_FORMAT_R16G16_SSCALED: return TRUE;
case PIPE_FORMAT_R16_SSCALED: return TRUE;
 
case PIPE_FORMAT_R16G16B16A16_UNORM: return TRUE;
case PIPE_FORMAT_R16G16B16_UNORM: return TRUE;
case PIPE_FORMAT_R16G16_UNORM: return TRUE;
case PIPE_FORMAT_R16_UNORM: return TRUE;
 
case PIPE_FORMAT_R16G16B16A16_SNORM: return TRUE;
case PIPE_FORMAT_R16G16B16_SNORM: return TRUE;
case PIPE_FORMAT_R16G16_SNORM: return TRUE;
case PIPE_FORMAT_R16_SNORM: return TRUE;
 
case PIPE_FORMAT_R8G8B8A8_USCALED: return TRUE;
case PIPE_FORMAT_R8G8B8_USCALED: return TRUE;
case PIPE_FORMAT_R8G8_USCALED: return TRUE;
case PIPE_FORMAT_R8_USCALED: return TRUE;
 
case PIPE_FORMAT_R8G8B8A8_SSCALED: return TRUE;
case PIPE_FORMAT_R8G8B8_SSCALED: return TRUE;
case PIPE_FORMAT_R8G8_SSCALED: return TRUE;
case PIPE_FORMAT_R8_SSCALED: return TRUE;
 
case PIPE_FORMAT_R8G8B8A8_UNORM: return TRUE;
case PIPE_FORMAT_R8G8B8_UNORM: return TRUE;
case PIPE_FORMAT_R8G8_UNORM: return TRUE;
case PIPE_FORMAT_R8_UNORM: return TRUE;
 
case PIPE_FORMAT_R8G8B8A8_SNORM: return TRUE;
case PIPE_FORMAT_R8G8B8_SNORM: return TRUE;
case PIPE_FORMAT_R8G8_SNORM: return TRUE;
case PIPE_FORMAT_R8_SNORM: return TRUE;
 
case PIPE_FORMAT_A8R8G8B8_UNORM: return TRUE;
case PIPE_FORMAT_B8G8R8A8_UNORM: return TRUE;
 
case PIPE_FORMAT_R32G32B32A32_UINT: return TRUE;
case PIPE_FORMAT_R32G32B32_UINT: return TRUE;
case PIPE_FORMAT_R32G32_UINT: return TRUE;
case PIPE_FORMAT_R32_UINT: return TRUE;
 
case PIPE_FORMAT_R16G16B16A16_UINT: return TRUE;
case PIPE_FORMAT_R16G16B16_UINT: return TRUE;
case PIPE_FORMAT_R16G16_UINT: return TRUE;
case PIPE_FORMAT_R16_UINT: return TRUE;
 
case PIPE_FORMAT_R8G8B8A8_UINT: return TRUE;
case PIPE_FORMAT_R8G8B8_UINT: return TRUE;
case PIPE_FORMAT_R8G8_UINT: return TRUE;
case PIPE_FORMAT_R8_UINT: return TRUE;
 
case PIPE_FORMAT_R32G32B32A32_SINT: return TRUE;
case PIPE_FORMAT_R32G32B32_SINT: return TRUE;
case PIPE_FORMAT_R32G32_SINT: return TRUE;
case PIPE_FORMAT_R32_SINT: return TRUE;
 
case PIPE_FORMAT_R16G16B16A16_SINT: return TRUE;
case PIPE_FORMAT_R16G16B16_SINT: return TRUE;
case PIPE_FORMAT_R16G16_SINT: return TRUE;
case PIPE_FORMAT_R16_SINT: return TRUE;
 
case PIPE_FORMAT_R8G8B8A8_SINT: return TRUE;
case PIPE_FORMAT_R8G8B8_SINT: return TRUE;
case PIPE_FORMAT_R8G8_SINT: return TRUE;
case PIPE_FORMAT_R8_SINT: return TRUE;
 
case PIPE_FORMAT_B10G10R10A2_UNORM: return TRUE;
case PIPE_FORMAT_B10G10R10A2_USCALED: return TRUE;
case PIPE_FORMAT_B10G10R10A2_SNORM: return TRUE;
case PIPE_FORMAT_B10G10R10A2_SSCALED: return TRUE;
 
case PIPE_FORMAT_R10G10B10A2_UNORM: return TRUE;
case PIPE_FORMAT_R10G10B10A2_USCALED: return TRUE;
case PIPE_FORMAT_R10G10B10A2_SNORM: return TRUE;
case PIPE_FORMAT_R10G10B10A2_SSCALED: return TRUE;
 
default: return FALSE;
}
}
/drivers/video/Gallium/auxiliary/translate/translate_sse.c
0,0 → 1,1573
/*
* Copyright 2003 Tungsten Graphics, inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Keith Whitwell <keithw@tungstengraphics.com>
*/
 
 
#include "pipe/p_config.h"
#include "pipe/p_compiler.h"
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_format.h"
 
#include "translate.h"
 
 
#if (defined(PIPE_ARCH_X86) || (defined(PIPE_ARCH_X86_64) && !defined(__MINGW32__))) && !defined(PIPE_SUBSYSTEM_EMBEDDED)
 
#include "rtasm/rtasm_cpu.h"
#include "rtasm/rtasm_x86sse.h"
 
 
#define X 0
#define Y 1
#define Z 2
#define W 3
 
 
struct translate_buffer {
const void *base_ptr;
uintptr_t stride;
unsigned max_index;
};
 
struct translate_buffer_variant {
unsigned buffer_index;
unsigned instance_divisor;
void *ptr; /* updated either per vertex or per instance */
};
 
 
#define ELEMENT_BUFFER_INSTANCE_ID 1001
 
#define NUM_CONSTS 7
 
enum
{
CONST_IDENTITY,
CONST_INV_127,
CONST_INV_255,
CONST_INV_32767,
CONST_INV_65535,
CONST_INV_2147483647,
CONST_255
};
 
#define C(v) {(float)(v), (float)(v), (float)(v), (float)(v)}
static float consts[NUM_CONSTS][4] = {
{0, 0, 0, 1},
C(1.0 / 127.0),
C(1.0 / 255.0),
C(1.0 / 32767.0),
C(1.0 / 65535.0),
C(1.0 / 2147483647.0),
C(255.0)
};
#undef C
 
struct translate_sse {
struct translate translate;
 
struct x86_function linear_func;
struct x86_function elt_func;
struct x86_function elt16_func;
struct x86_function elt8_func;
struct x86_function *func;
 
PIPE_ALIGN_VAR(16) float consts[NUM_CONSTS][4];
int8_t reg_to_const[16];
int8_t const_to_reg[NUM_CONSTS];
 
struct translate_buffer buffer[PIPE_MAX_ATTRIBS];
unsigned nr_buffers;
 
/* Multiple buffer variants can map to a single buffer. */
struct translate_buffer_variant buffer_variant[PIPE_MAX_ATTRIBS];
unsigned nr_buffer_variants;
 
/* Multiple elements can map to a single buffer variant. */
unsigned element_to_buffer_variant[PIPE_MAX_ATTRIBS];
 
boolean use_instancing;
unsigned instance_id;
unsigned start_instance;
 
/* these are actually known values, but putting them in a struct
* like this is helpful to keep them in sync across the file.
*/
struct x86_reg tmp_EAX;
struct x86_reg tmp2_EDX;
struct x86_reg src_ECX;
struct x86_reg idx_ESI; /* either start+i or &elt[i] */
struct x86_reg machine_EDI;
struct x86_reg outbuf_EBX;
struct x86_reg count_EBP; /* decrements to zero */
};
 
static int get_offset( const void *a, const void *b )
{
return (const char *)b - (const char *)a;
}
 
static struct x86_reg get_const( struct translate_sse *p, unsigned id)
{
struct x86_reg reg;
unsigned i;
 
if(p->const_to_reg[id] >= 0)
return x86_make_reg(file_XMM, p->const_to_reg[id]);
 
for(i = 2; i < 8; ++i)
{
if(p->reg_to_const[i] < 0)
break;
}
 
/* TODO: be smarter here */
if(i == 8)
--i;
 
reg = x86_make_reg(file_XMM, i);
 
if(p->reg_to_const[i] >= 0)
p->const_to_reg[p->reg_to_const[i]] = -1;
 
p->reg_to_const[i] = id;
p->const_to_reg[id] = i;
 
/* TODO: this should happen outside the loop, if possible */
sse_movaps(p->func, reg,
x86_make_disp(p->machine_EDI,
get_offset(p, &p->consts[id][0])));
 
return reg;
}
 
/* load the data in a SSE2 register, padding with zeros */
static boolean emit_load_sse2( struct translate_sse *p,
struct x86_reg data,
struct x86_reg src,
unsigned size)
{
struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1);
struct x86_reg tmp = p->tmp_EAX;
switch(size)
{
case 1:
x86_movzx8(p->func, tmp, src);
sse2_movd(p->func, data, tmp);
break;
case 2:
x86_movzx16(p->func, tmp, src);
sse2_movd(p->func, data, tmp);
break;
case 3:
x86_movzx8(p->func, tmp, x86_make_disp(src, 2));
x86_shl_imm(p->func, tmp, 16);
x86_mov16(p->func, tmp, src);
sse2_movd(p->func, data, tmp);
break;
case 4:
sse2_movd(p->func, data, src);
break;
case 6:
sse2_movd(p->func, data, src);
x86_movzx16(p->func, tmp, x86_make_disp(src, 4));
sse2_movd(p->func, tmpXMM, tmp);
sse2_punpckldq(p->func, data, tmpXMM);
break;
case 8:
sse2_movq(p->func, data, src);
break;
case 12:
sse2_movq(p->func, data, src);
sse2_movd(p->func, tmpXMM, x86_make_disp(src, 8));
sse2_punpcklqdq(p->func, data, tmpXMM);
break;
case 16:
sse2_movdqu(p->func, data, src);
break;
default:
return FALSE;
}
return TRUE;
}
 
/* this value can be passed for the out_chans argument */
#define CHANNELS_0001 5
 
/* this function will load #chans float values, and will
* pad the register with zeroes at least up to out_chans.
*
* If out_chans is set to CHANNELS_0001, then the fourth
* value will be padded with 1. Only pass this value if
* chans < 4 or results are undefined.
*/
static void emit_load_float32( struct translate_sse *p,
struct x86_reg data,
struct x86_reg arg0,
unsigned out_chans,
unsigned chans)
{
switch(chans)
{
case 1:
/* a 0 0 0
* a 0 0 1
*/
sse_movss(p->func, data, arg0);
if(out_chans == CHANNELS_0001)
sse_orps(p->func, data, get_const(p, CONST_IDENTITY) );
break;
case 2:
/* 0 0 0 1
* a b 0 1
*/
if(out_chans == CHANNELS_0001)
sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) );
else if(out_chans > 2)
sse_movlhps(p->func, data, get_const(p, CONST_IDENTITY) );
sse_movlps(p->func, data, arg0);
break;
case 3:
/* Have to jump through some hoops:
*
* c 0 0 0
* c 0 0 1 if out_chans == CHANNELS_0001
* 0 0 c 0/1
* a b c 0/1
*/
sse_movss(p->func, data, x86_make_disp(arg0, 8));
if(out_chans == CHANNELS_0001)
sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X,Y,Z,W) );
sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) );
sse_movlps(p->func, data, arg0);
break;
case 4:
sse_movups(p->func, data, arg0);
break;
}
}
 
/* this function behaves like emit_load_float32, but loads
64-bit floating point numbers, converting them to 32-bit
ones */
static void emit_load_float64to32( struct translate_sse *p,
struct x86_reg data,
struct x86_reg arg0,
unsigned out_chans,
unsigned chans)
{
struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1);
switch(chans)
{
case 1:
sse2_movsd(p->func, data, arg0);
if(out_chans > 1)
sse2_cvtpd2ps(p->func, data, data);
else
sse2_cvtsd2ss(p->func, data, data);
if(out_chans == CHANNELS_0001)
sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) );
break;
case 2:
sse2_movupd(p->func, data, arg0);
sse2_cvtpd2ps(p->func, data, data);
if(out_chans == CHANNELS_0001)
sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) );
else if(out_chans > 2)
sse_movlhps(p->func, data, get_const(p, CONST_IDENTITY) );
break;
case 3:
sse2_movupd(p->func, data, arg0);
sse2_cvtpd2ps(p->func, data, data);
sse2_movsd(p->func, tmpXMM, x86_make_disp(arg0, 16));
if(out_chans > 3)
sse2_cvtpd2ps(p->func, tmpXMM, tmpXMM);
else
sse2_cvtsd2ss(p->func, tmpXMM, tmpXMM);
sse_movlhps(p->func, data, tmpXMM);
if(out_chans == CHANNELS_0001)
sse_orps(p->func, data, get_const(p, CONST_IDENTITY) );
break;
case 4:
sse2_movupd(p->func, data, arg0);
sse2_cvtpd2ps(p->func, data, data);
sse2_movupd(p->func, tmpXMM, x86_make_disp(arg0, 16));
sse2_cvtpd2ps(p->func, tmpXMM, tmpXMM);
sse_movlhps(p->func, data, tmpXMM);
break;
}
}
 
static void emit_mov64(struct translate_sse *p, struct x86_reg dst_gpr, struct x86_reg dst_xmm, struct x86_reg src_gpr, struct x86_reg src_xmm)
{
if(x86_target(p->func) != X86_32)
x64_mov64(p->func, dst_gpr, src_gpr);
else
{
/* TODO: when/on which CPUs is SSE2 actually better than SSE? */
if(x86_target_caps(p->func) & X86_SSE2)
sse2_movq(p->func, dst_xmm, src_xmm);
else
sse_movlps(p->func, dst_xmm, src_xmm);
}
}
 
static void emit_load64(struct translate_sse *p, struct x86_reg dst_gpr, struct x86_reg dst_xmm, struct x86_reg src)
{
emit_mov64(p, dst_gpr, dst_xmm, src, src);
}
 
static void emit_store64(struct translate_sse *p, struct x86_reg dst, struct x86_reg src_gpr, struct x86_reg src_xmm)
{
emit_mov64(p, dst, dst, src_gpr, src_xmm);
}
 
static void emit_mov128(struct translate_sse *p, struct x86_reg dst, struct x86_reg src)
{
if(x86_target_caps(p->func) & X86_SSE2)
sse2_movdqu(p->func, dst, src);
else
sse_movups(p->func, dst, src);
}
 
/* TODO: this uses unaligned accesses liberally, which is great on Nehalem,
* but may or may not be good on older processors
* TODO: may perhaps want to use non-temporal stores here if possible
*/
static void emit_memcpy(struct translate_sse *p, struct x86_reg dst, struct x86_reg src, unsigned size)
{
struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
struct x86_reg dataXMM2 = x86_make_reg(file_XMM, 1);
struct x86_reg dataGPR = p->tmp_EAX;
struct x86_reg dataGPR2 = p->tmp2_EDX;
 
if(size < 8)
{
switch (size)
{
case 1:
x86_mov8(p->func, dataGPR, src);
x86_mov8(p->func, dst, dataGPR);
break;
case 2:
x86_mov16(p->func, dataGPR, src);
x86_mov16(p->func, dst, dataGPR);
break;
case 3:
x86_mov16(p->func, dataGPR, src);
x86_mov8(p->func, dataGPR2, x86_make_disp(src, 2));
x86_mov16(p->func, dst, dataGPR);
x86_mov8(p->func, x86_make_disp(dst, 2), dataGPR2);
break;
case 4:
x86_mov(p->func, dataGPR, src);
x86_mov(p->func, dst, dataGPR);
break;
case 6:
x86_mov(p->func, dataGPR, src);
x86_mov16(p->func, dataGPR2, x86_make_disp(src, 4));
x86_mov(p->func, dst, dataGPR);
x86_mov16(p->func, x86_make_disp(dst, 4), dataGPR2);
break;
}
}
else if(!(x86_target_caps(p->func) & X86_SSE))
{
unsigned i = 0;
assert((size & 3) == 0);
for(i = 0; i < size; i += 4)
{
x86_mov(p->func, dataGPR, x86_make_disp(src, i));
x86_mov(p->func, x86_make_disp(dst, i), dataGPR);
}
}
else
{
switch(size)
{
case 8:
emit_load64(p, dataGPR, dataXMM, src);
emit_store64(p, dst, dataGPR, dataXMM);
break;
case 12:
emit_load64(p, dataGPR2, dataXMM, src);
x86_mov(p->func, dataGPR, x86_make_disp(src, 8));
emit_store64(p, dst, dataGPR2, dataXMM);
x86_mov(p->func, x86_make_disp(dst, 8), dataGPR);
break;
case 16:
emit_mov128(p, dataXMM, src);
emit_mov128(p, dst, dataXMM);
break;
case 24:
emit_mov128(p, dataXMM, src);
emit_load64(p, dataGPR, dataXMM2, x86_make_disp(src, 16));
emit_mov128(p, dst, dataXMM);
emit_store64(p, x86_make_disp(dst, 16), dataGPR, dataXMM2);
break;
case 32:
emit_mov128(p, dataXMM, src);
emit_mov128(p, dataXMM2, x86_make_disp(src, 16));
emit_mov128(p, dst, dataXMM);
emit_mov128(p, x86_make_disp(dst, 16), dataXMM2);
break;
default:
assert(0);
}
}
}
 
static boolean translate_attr_convert( struct translate_sse *p,
const struct translate_element *a,
struct x86_reg src,
struct x86_reg dst)
 
{
const struct util_format_description* input_desc = util_format_description(a->input_format);
const struct util_format_description* output_desc = util_format_description(a->output_format);
unsigned i;
boolean id_swizzle = TRUE;
unsigned swizzle[4] = {UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE};
unsigned needed_chans = 0;
unsigned imms[2] = {0, 0x3f800000};
 
if(a->output_format == PIPE_FORMAT_NONE || a->input_format == PIPE_FORMAT_NONE)
return FALSE;
 
if(input_desc->channel[0].size & 7)
return FALSE;
 
if(input_desc->colorspace != output_desc->colorspace)
return FALSE;
 
for(i = 1; i < input_desc->nr_channels; ++i)
{
if(memcmp(&input_desc->channel[i], &input_desc->channel[0], sizeof(input_desc->channel[0])))
return FALSE;
}
 
for(i = 1; i < output_desc->nr_channels; ++i)
{
if(memcmp(&output_desc->channel[i], &output_desc->channel[0], sizeof(output_desc->channel[0])))
return FALSE;
}
 
for(i = 0; i < output_desc->nr_channels; ++i)
{
if(output_desc->swizzle[i] < 4)
swizzle[output_desc->swizzle[i]] = input_desc->swizzle[i];
}
 
if((x86_target_caps(p->func) & X86_SSE) && (0
|| a->output_format == PIPE_FORMAT_R32_FLOAT
|| a->output_format == PIPE_FORMAT_R32G32_FLOAT
|| a->output_format == PIPE_FORMAT_R32G32B32_FLOAT
|| a->output_format == PIPE_FORMAT_R32G32B32A32_FLOAT))
{
struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
 
for(i = 0; i < output_desc->nr_channels; ++i)
{
if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0 && i >= input_desc->nr_channels)
swizzle[i] = i;
}
 
for(i = 0; i < output_desc->nr_channels; ++i)
{
if(swizzle[i] < 4)
needed_chans = MAX2(needed_chans, swizzle[i] + 1);
if(swizzle[i] < UTIL_FORMAT_SWIZZLE_0 && swizzle[i] != i)
id_swizzle = FALSE;
}
 
if(needed_chans > 0)
{
switch(input_desc->channel[0].type)
{
case UTIL_FORMAT_TYPE_UNSIGNED:
if(!(x86_target_caps(p->func) & X86_SSE2))
return FALSE;
emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3);
 
/* TODO: add support for SSE4.1 pmovzx */
switch(input_desc->channel[0].size)
{
case 8:
/* TODO: this may be inefficient due to get_identity() being used both as a float and integer register */
sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY));
sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY));
break;
case 16:
sse2_punpcklwd(p->func, dataXMM, get_const(p, CONST_IDENTITY));
break;
case 32: /* we lose precision here */
sse2_psrld_imm(p->func, dataXMM, 1);
break;
default:
return FALSE;
}
sse2_cvtdq2ps(p->func, dataXMM, dataXMM);
if(input_desc->channel[0].normalized)
{
struct x86_reg factor;
switch(input_desc->channel[0].size)
{
case 8:
factor = get_const(p, CONST_INV_255);
break;
case 16:
factor = get_const(p, CONST_INV_65535);
break;
case 32:
factor = get_const(p, CONST_INV_2147483647);
break;
default:
assert(0);
factor.disp = 0;
factor.file = 0;
factor.idx = 0;
factor.mod = 0;
break;
}
sse_mulps(p->func, dataXMM, factor);
}
else if(input_desc->channel[0].size == 32)
sse_addps(p->func, dataXMM, dataXMM); /* compensate for the bit we threw away to fit u32 into s32 */
break;
case UTIL_FORMAT_TYPE_SIGNED:
if(!(x86_target_caps(p->func) & X86_SSE2))
return FALSE;
emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3);
 
/* TODO: add support for SSE4.1 pmovsx */
switch(input_desc->channel[0].size)
{
case 8:
sse2_punpcklbw(p->func, dataXMM, dataXMM);
sse2_punpcklbw(p->func, dataXMM, dataXMM);
sse2_psrad_imm(p->func, dataXMM, 24);
break;
case 16:
sse2_punpcklwd(p->func, dataXMM, dataXMM);
sse2_psrad_imm(p->func, dataXMM, 16);
break;
case 32: /* we lose precision here */
break;
default:
return FALSE;
}
sse2_cvtdq2ps(p->func, dataXMM, dataXMM);
if(input_desc->channel[0].normalized)
{
struct x86_reg factor;
switch(input_desc->channel[0].size)
{
case 8:
factor = get_const(p, CONST_INV_127);
break;
case 16:
factor = get_const(p, CONST_INV_32767);
break;
case 32:
factor = get_const(p, CONST_INV_2147483647);
break;
default:
assert(0);
factor.disp = 0;
factor.file = 0;
factor.idx = 0;
factor.mod = 0;
break;
}
sse_mulps(p->func, dataXMM, factor);
}
break;
 
break;
case UTIL_FORMAT_TYPE_FLOAT:
if(input_desc->channel[0].size != 32 && input_desc->channel[0].size != 64)
return FALSE;
if(swizzle[3] == UTIL_FORMAT_SWIZZLE_1 && input_desc->nr_channels <= 3)
{
swizzle[3] = UTIL_FORMAT_SWIZZLE_W;
needed_chans = CHANNELS_0001;
}
switch(input_desc->channel[0].size)
{
case 32:
emit_load_float32(p, dataXMM, src, needed_chans, input_desc->nr_channels);
break;
case 64: /* we lose precision here */
if(!(x86_target_caps(p->func) & X86_SSE2))
return FALSE;
emit_load_float64to32(p, dataXMM, src, needed_chans, input_desc->nr_channels);
break;
default:
return FALSE;
}
break;
default:
return FALSE;
}
 
if(!id_swizzle)
sse_shufps(p->func, dataXMM, dataXMM, SHUF(swizzle[0], swizzle[1], swizzle[2], swizzle[3]) );
}
 
if(output_desc->nr_channels >= 4
&& swizzle[0] < UTIL_FORMAT_SWIZZLE_0
&& swizzle[1] < UTIL_FORMAT_SWIZZLE_0
&& swizzle[2] < UTIL_FORMAT_SWIZZLE_0
&& swizzle[3] < UTIL_FORMAT_SWIZZLE_0
)
sse_movups(p->func, dst, dataXMM);
else
{
if(output_desc->nr_channels >= 2
&& swizzle[0] < UTIL_FORMAT_SWIZZLE_0
&& swizzle[1] < UTIL_FORMAT_SWIZZLE_0)
sse_movlps(p->func, dst, dataXMM);
else
{
if(swizzle[0] < UTIL_FORMAT_SWIZZLE_0)
sse_movss(p->func, dst, dataXMM);
else
x86_mov_imm(p->func, dst, imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]);
 
if(output_desc->nr_channels >= 2)
{
if(swizzle[1] < UTIL_FORMAT_SWIZZLE_0)
{
sse_shufps(p->func, dataXMM, dataXMM, SHUF(1, 1, 2, 3));
sse_movss(p->func, x86_make_disp(dst, 4), dataXMM);
}
else
x86_mov_imm(p->func, x86_make_disp(dst, 4), imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0]);
}
}
 
if(output_desc->nr_channels >= 3)
{
if(output_desc->nr_channels >= 4
&& swizzle[2] < UTIL_FORMAT_SWIZZLE_0
&& swizzle[3] < UTIL_FORMAT_SWIZZLE_0)
sse_movhps(p->func, x86_make_disp(dst, 8), dataXMM);
else
{
if(swizzle[2] < UTIL_FORMAT_SWIZZLE_0)
{
sse_shufps(p->func, dataXMM, dataXMM, SHUF(2, 2, 2, 3));
sse_movss(p->func, x86_make_disp(dst, 8), dataXMM);
}
else
x86_mov_imm(p->func, x86_make_disp(dst, 8), imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]);
 
if(output_desc->nr_channels >= 4)
{
if(swizzle[3] < UTIL_FORMAT_SWIZZLE_0)
{
sse_shufps(p->func, dataXMM, dataXMM, SHUF(3, 3, 3, 3));
sse_movss(p->func, x86_make_disp(dst, 12), dataXMM);
}
else
x86_mov_imm(p->func, x86_make_disp(dst, 12), imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0]);
}
}
}
}
return TRUE;
}
else if((x86_target_caps(p->func) & X86_SSE2) && input_desc->channel[0].size == 8 && output_desc->channel[0].size == 16
&& output_desc->channel[0].normalized == input_desc->channel[0].normalized
&& (0
|| (input_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED)
|| (input_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
|| (input_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
))
{
struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1);
struct x86_reg tmp = p->tmp_EAX;
unsigned imms[2] = {0, 1};
 
for(i = 0; i < output_desc->nr_channels; ++i)
{
if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0 && i >= input_desc->nr_channels)
swizzle[i] = i;
}
 
for(i = 0; i < output_desc->nr_channels; ++i)
{
if(swizzle[i] < 4)
needed_chans = MAX2(needed_chans, swizzle[i] + 1);
if(swizzle[i] < UTIL_FORMAT_SWIZZLE_0 && swizzle[i] != i)
id_swizzle = FALSE;
}
 
if(needed_chans > 0)
{
emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3);
 
switch(input_desc->channel[0].type)
{
case UTIL_FORMAT_TYPE_UNSIGNED:
if(input_desc->channel[0].normalized)
{
sse2_punpcklbw(p->func, dataXMM, dataXMM);
if(output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
sse2_psrlw_imm(p->func, dataXMM, 1);
}
else
sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY));
break;
case UTIL_FORMAT_TYPE_SIGNED:
if(input_desc->channel[0].normalized)
{
sse2_movq(p->func, tmpXMM, get_const(p, CONST_IDENTITY));
sse2_punpcklbw(p->func, tmpXMM, dataXMM);
sse2_psllw_imm(p->func, dataXMM, 9);
sse2_psrlw_imm(p->func, dataXMM, 8);
sse2_por(p->func, tmpXMM, dataXMM);
sse2_psrlw_imm(p->func, dataXMM, 7);
sse2_por(p->func, tmpXMM, dataXMM);
{
struct x86_reg t = dataXMM;
dataXMM = tmpXMM;
tmpXMM = t;
}
}
else
{
sse2_punpcklbw(p->func, dataXMM, dataXMM);
sse2_psraw_imm(p->func, dataXMM, 8);
}
break;
default:
assert(0);
}
 
if(output_desc->channel[0].normalized)
imms[1] = (output_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) ? 0xffff : 0x7ffff;
 
if(!id_swizzle)
sse2_pshuflw(p->func, dataXMM, dataXMM, (swizzle[0] & 3) | ((swizzle[1] & 3) << 2) | ((swizzle[2] & 3) << 4) | ((swizzle[3] & 3) << 6));
}
 
if(output_desc->nr_channels >= 4
&& swizzle[0] < UTIL_FORMAT_SWIZZLE_0
&& swizzle[1] < UTIL_FORMAT_SWIZZLE_0
&& swizzle[2] < UTIL_FORMAT_SWIZZLE_0
&& swizzle[3] < UTIL_FORMAT_SWIZZLE_0
)
sse2_movq(p->func, dst, dataXMM);
else
{
if(swizzle[0] < UTIL_FORMAT_SWIZZLE_0)
{
if(output_desc->nr_channels >= 2 && swizzle[1] < UTIL_FORMAT_SWIZZLE_0)
sse2_movd(p->func, dst, dataXMM);
else
{
sse2_movd(p->func, tmp, dataXMM);
x86_mov16(p->func, dst, tmp);
if(output_desc->nr_channels >= 2)
x86_mov16_imm(p->func, x86_make_disp(dst, 2), imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0]);
}
}
else
{
if(output_desc->nr_channels >= 2 && swizzle[1] >= UTIL_FORMAT_SWIZZLE_0)
x86_mov_imm(p->func, dst, (imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0] << 16) | imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]);
else
{
x86_mov16_imm(p->func, dst, imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]);
if(output_desc->nr_channels >= 2)
{
sse2_movd(p->func, tmp, dataXMM);
x86_shr_imm(p->func, tmp, 16);
x86_mov16(p->func, x86_make_disp(dst, 2), tmp);
}
}
}
 
if(output_desc->nr_channels >= 3)
{
if(swizzle[2] < UTIL_FORMAT_SWIZZLE_0)
{
if(output_desc->nr_channels >= 4 && swizzle[3] < UTIL_FORMAT_SWIZZLE_0)
{
sse2_psrlq_imm(p->func, dataXMM, 32);
sse2_movd(p->func, x86_make_disp(dst, 4), dataXMM);
}
else
{
sse2_psrlq_imm(p->func, dataXMM, 32);
sse2_movd(p->func, tmp, dataXMM);
x86_mov16(p->func, x86_make_disp(dst, 4), tmp);
if(output_desc->nr_channels >= 4)
{
x86_mov16_imm(p->func, x86_make_disp(dst, 6), imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0]);
}
}
}
else
{
if(output_desc->nr_channels >= 4 && swizzle[3] >= UTIL_FORMAT_SWIZZLE_0)
x86_mov_imm(p->func, x86_make_disp(dst, 4), (imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0] << 16) | imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]);
else
{
x86_mov16_imm(p->func, x86_make_disp(dst, 4), imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]);
 
if(output_desc->nr_channels >= 4)
{
sse2_psrlq_imm(p->func, dataXMM, 48);
sse2_movd(p->func, tmp, dataXMM);
x86_mov16(p->func, x86_make_disp(dst, 6), tmp);
}
}
}
}
}
return TRUE;
}
else if(!memcmp(&output_desc->channel[0], &input_desc->channel[0], sizeof(output_desc->channel[0])))
{
struct x86_reg tmp = p->tmp_EAX;
unsigned i;
if(input_desc->channel[0].size == 8 && input_desc->nr_channels == 4 && output_desc->nr_channels == 4
&& swizzle[0] == UTIL_FORMAT_SWIZZLE_W
&& swizzle[1] == UTIL_FORMAT_SWIZZLE_Z
&& swizzle[2] == UTIL_FORMAT_SWIZZLE_Y
&& swizzle[3] == UTIL_FORMAT_SWIZZLE_X)
{
/* TODO: support movbe */
x86_mov(p->func, tmp, src);
x86_bswap(p->func, tmp);
x86_mov(p->func, dst, tmp);
return TRUE;
}
 
for(i = 0; i < output_desc->nr_channels; ++i)
{
switch(output_desc->channel[0].size)
{
case 8:
if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
{
unsigned v = 0;
if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
{
switch(output_desc->channel[0].type)
{
case UTIL_FORMAT_TYPE_UNSIGNED:
v = output_desc->channel[0].normalized ? 0xff : 1;
break;
case UTIL_FORMAT_TYPE_SIGNED:
v = output_desc->channel[0].normalized ? 0x7f : 1;
break;
default:
return FALSE;
}
}
x86_mov8_imm(p->func, x86_make_disp(dst, i * 1), v);
}
else
{
x86_mov8(p->func, tmp, x86_make_disp(src, swizzle[i] * 1));
x86_mov8(p->func, x86_make_disp(dst, i * 1), tmp);
}
break;
case 16:
if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
{
unsigned v = 0;
if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
{
switch(output_desc->channel[1].type)
{
case UTIL_FORMAT_TYPE_UNSIGNED:
v = output_desc->channel[1].normalized ? 0xffff : 1;
break;
case UTIL_FORMAT_TYPE_SIGNED:
v = output_desc->channel[1].normalized ? 0x7fff : 1;
break;
case UTIL_FORMAT_TYPE_FLOAT:
v = 0x3c00;
break;
default:
return FALSE;
}
}
x86_mov16_imm(p->func, x86_make_disp(dst, i * 2), v);
}
else if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0)
x86_mov16_imm(p->func, x86_make_disp(dst, i * 2), 0);
else
{
x86_mov16(p->func, tmp, x86_make_disp(src, swizzle[i] * 2));
x86_mov16(p->func, x86_make_disp(dst, i * 2), tmp);
}
break;
case 32:
if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
{
unsigned v = 0;
if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
{
switch(output_desc->channel[1].type)
{
case UTIL_FORMAT_TYPE_UNSIGNED:
v = output_desc->channel[1].normalized ? 0xffffffff : 1;
break;
case UTIL_FORMAT_TYPE_SIGNED:
v = output_desc->channel[1].normalized ? 0x7fffffff : 1;
break;
case UTIL_FORMAT_TYPE_FLOAT:
v = 0x3f800000;
break;
default:
return FALSE;
}
}
x86_mov_imm(p->func, x86_make_disp(dst, i * 4), v);
}
else
{
x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 4));
x86_mov(p->func, x86_make_disp(dst, i * 4), tmp);
}
break;
case 64:
if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
{
unsigned l = 0;
unsigned h = 0;
if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
{
switch(output_desc->channel[1].type)
{
case UTIL_FORMAT_TYPE_UNSIGNED:
h = output_desc->channel[1].normalized ? 0xffffffff : 0;
l = output_desc->channel[1].normalized ? 0xffffffff : 1;
break;
case UTIL_FORMAT_TYPE_SIGNED:
h = output_desc->channel[1].normalized ? 0x7fffffff : 0;
l = output_desc->channel[1].normalized ? 0xffffffff : 1;
break;
case UTIL_FORMAT_TYPE_FLOAT:
h = 0x3ff00000;
l = 0;
break;
default:
return FALSE;
}
}
x86_mov_imm(p->func, x86_make_disp(dst, i * 8), l);
x86_mov_imm(p->func, x86_make_disp(dst, i * 8 + 4), h);
}
else
{
if(x86_target_caps(p->func) & X86_SSE)
{
struct x86_reg tmpXMM = x86_make_reg(file_XMM, 0);
emit_load64(p, tmp, tmpXMM, x86_make_disp(src, swizzle[i] * 8));
emit_store64(p, x86_make_disp(dst, i * 8), tmp, tmpXMM);
}
else
{
x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 8));
x86_mov(p->func, x86_make_disp(dst, i * 8), tmp);
x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 8 + 4));
x86_mov(p->func, x86_make_disp(dst, i * 8 + 4), tmp);
}
}
break;
default:
return FALSE;
}
}
return TRUE;
}
/* special case for draw's EMIT_4UB (RGBA) and EMIT_4UB_BGRA */
else if((x86_target_caps(p->func) & X86_SSE2) &&
a->input_format == PIPE_FORMAT_R32G32B32A32_FLOAT && (0
|| a->output_format == PIPE_FORMAT_B8G8R8A8_UNORM
|| a->output_format == PIPE_FORMAT_R8G8B8A8_UNORM
))
{
struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
 
/* load */
sse_movups(p->func, dataXMM, src);
 
if (a->output_format == PIPE_FORMAT_B8G8R8A8_UNORM)
sse_shufps(p->func, dataXMM, dataXMM, SHUF(2,1,0,3));
 
/* scale by 255.0 */
sse_mulps(p->func, dataXMM, get_const(p, CONST_255));
 
/* pack and emit */
sse2_cvtps2dq(p->func, dataXMM, dataXMM);
sse2_packssdw(p->func, dataXMM, dataXMM);
sse2_packuswb(p->func, dataXMM, dataXMM);
sse2_movd(p->func, dst, dataXMM);
 
return TRUE;
}
 
return FALSE;
}
 
static boolean translate_attr( struct translate_sse *p,
const struct translate_element *a,
struct x86_reg src,
struct x86_reg dst)
{
if(a->input_format == a->output_format)
{
emit_memcpy(p, dst, src, util_format_get_stride(a->input_format, 1));
return TRUE;
}
 
return translate_attr_convert(p, a, src, dst);
}
 
static boolean init_inputs( struct translate_sse *p,
unsigned index_size )
{
unsigned i;
struct x86_reg instance_id = x86_make_disp(p->machine_EDI,
get_offset(p, &p->instance_id));
struct x86_reg start_instance = x86_make_disp(p->machine_EDI,
get_offset(p, &p->start_instance));
 
for (i = 0; i < p->nr_buffer_variants; i++) {
struct translate_buffer_variant *variant = &p->buffer_variant[i];
struct translate_buffer *buffer = &p->buffer[variant->buffer_index];
 
if (!index_size || variant->instance_divisor) {
struct x86_reg buf_max_index = x86_make_disp(p->machine_EDI,
get_offset(p, &buffer->max_index));
struct x86_reg buf_stride = x86_make_disp(p->machine_EDI,
get_offset(p, &buffer->stride));
struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI,
get_offset(p, &variant->ptr));
struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDI,
get_offset(p, &buffer->base_ptr));
struct x86_reg elt = p->idx_ESI;
struct x86_reg tmp_EAX = p->tmp_EAX;
 
/* Calculate pointer to first attrib:
* base_ptr + stride * index, where index depends on instance divisor
*/
if (variant->instance_divisor) {
/* Start with instance = instance_id
* which is true if divisor is 1.
*/
x86_mov(p->func, tmp_EAX, instance_id);
 
if (variant->instance_divisor != 1) {
struct x86_reg tmp_EDX = p->tmp2_EDX;
struct x86_reg tmp_ECX = p->src_ECX;
 
/* instance_num = instance_id - start_instance */
x86_mov(p->func, tmp_EDX, start_instance);
x86_sub(p->func, tmp_EAX, tmp_EDX);
 
/* TODO: Add x86_shr() to rtasm and use it whenever
* instance divisor is power of two.
*/
x86_xor(p->func, tmp_EDX, tmp_EDX);
x86_mov_reg_imm(p->func, tmp_ECX, variant->instance_divisor);
x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */
 
/* instance = (instance_id - start_instance) / divisor +
* start_instance
*/
x86_mov(p->func, tmp_EDX, start_instance);
x86_add(p->func, tmp_EAX, tmp_EDX);
}
 
/* XXX we need to clamp the index here too, but to a
* per-array max value, not the draw->pt.max_index value
* that's being given to us via translate->set_buffer().
*/
} else {
x86_mov(p->func, tmp_EAX, elt);
 
/* Clamp to max_index
*/
x86_cmp(p->func, tmp_EAX, buf_max_index);
x86_cmovcc(p->func, tmp_EAX, buf_max_index, cc_AE);
}
 
x86_imul(p->func, tmp_EAX, buf_stride);
x64_rexw(p->func);
x86_add(p->func, tmp_EAX, buf_base_ptr);
 
x86_cmp(p->func, p->count_EBP, p->tmp_EAX);
 
/* In the linear case, keep the buffer pointer instead of the
* index number.
*/
if (!index_size && p->nr_buffer_variants == 1)
{
x64_rexw(p->func);
x86_mov(p->func, elt, tmp_EAX);
}
else
{
x64_rexw(p->func);
x86_mov(p->func, buf_ptr, tmp_EAX);
}
}
}
 
return TRUE;
}
 
 
static struct x86_reg get_buffer_ptr( struct translate_sse *p,
unsigned index_size,
unsigned var_idx,
struct x86_reg elt )
{
if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) {
return x86_make_disp(p->machine_EDI,
get_offset(p, &p->instance_id));
}
if (!index_size && p->nr_buffer_variants == 1) {
return p->idx_ESI;
}
else if (!index_size || p->buffer_variant[var_idx].instance_divisor) {
struct x86_reg ptr = p->src_ECX;
struct x86_reg buf_ptr =
x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer_variant[var_idx].ptr));
x64_rexw(p->func);
x86_mov(p->func, ptr, buf_ptr);
return ptr;
}
else {
struct x86_reg ptr = p->src_ECX;
const struct translate_buffer_variant *variant = &p->buffer_variant[var_idx];
 
struct x86_reg buf_stride =
x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer[variant->buffer_index].stride));
 
struct x86_reg buf_base_ptr =
x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer[variant->buffer_index].base_ptr));
 
struct x86_reg buf_max_index =
x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer[variant->buffer_index].max_index));
 
 
 
/* Calculate pointer to current attrib:
*/
switch(index_size)
{
case 1:
x86_movzx8(p->func, ptr, elt);
break;
case 2:
x86_movzx16(p->func, ptr, elt);
break;
case 4:
x86_mov(p->func, ptr, elt);
break;
}
 
/* Clamp to max_index
*/
x86_cmp(p->func, ptr, buf_max_index);
x86_cmovcc(p->func, ptr, buf_max_index, cc_AE);
 
x86_imul(p->func, ptr, buf_stride);
x64_rexw(p->func);
x86_add(p->func, ptr, buf_base_ptr);
return ptr;
}
}
 
 
 
static boolean incr_inputs( struct translate_sse *p,
unsigned index_size )
{
if (!index_size && p->nr_buffer_variants == 1) {
struct x86_reg stride = x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer[0].stride));
 
if (p->buffer_variant[0].instance_divisor == 0) {
x64_rexw(p->func);
x86_add(p->func, p->idx_ESI, stride);
sse_prefetchnta(p->func, x86_make_disp(p->idx_ESI, 192));
}
}
else if (!index_size) {
unsigned i;
 
/* Is this worthwhile??
*/
for (i = 0; i < p->nr_buffer_variants; i++) {
struct translate_buffer_variant *variant = &p->buffer_variant[i];
struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI,
get_offset(p, &variant->ptr));
struct x86_reg buf_stride = x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer[variant->buffer_index].stride));
 
if (variant->instance_divisor == 0) {
x86_mov(p->func, p->tmp_EAX, buf_stride);
x64_rexw(p->func);
x86_add(p->func, p->tmp_EAX, buf_ptr);
if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192));
x64_rexw(p->func);
x86_mov(p->func, buf_ptr, p->tmp_EAX);
}
}
}
else {
x64_rexw(p->func);
x86_lea(p->func, p->idx_ESI, x86_make_disp(p->idx_ESI, index_size));
}
return TRUE;
}
 
 
/* Build run( struct translate *machine,
* unsigned start,
* unsigned count,
* void *output_buffer )
* or
* run_elts( struct translate *machine,
* unsigned *elts,
* unsigned count,
* void *output_buffer )
*
* Lots of hardcoding
*
* EAX -- pointer to current output vertex
* ECX -- pointer to current attribute
*
*/
static boolean build_vertex_emit( struct translate_sse *p,
struct x86_function *func,
unsigned index_size )
{
int fixup, label;
unsigned j;
 
memset(p->reg_to_const, 0xff, sizeof(p->reg_to_const));
memset(p->const_to_reg, 0xff, sizeof(p->const_to_reg));
 
p->tmp_EAX = x86_make_reg(file_REG32, reg_AX);
p->idx_ESI = x86_make_reg(file_REG32, reg_SI);
p->outbuf_EBX = x86_make_reg(file_REG32, reg_BX);
p->machine_EDI = x86_make_reg(file_REG32, reg_DI);
p->count_EBP = x86_make_reg(file_REG32, reg_BP);
p->tmp2_EDX = x86_make_reg(file_REG32, reg_DX);
p->src_ECX = x86_make_reg(file_REG32, reg_CX);
 
p->func = func;
 
x86_init_func(p->func);
 
if(x86_target(p->func) == X86_64_WIN64_ABI)
{
/* the ABI guarantees a 16-byte aligned 32-byte "shadow space" above the return address */
sse2_movdqa(p->func, x86_make_disp(x86_make_reg(file_REG32, reg_SP), 8), x86_make_reg(file_XMM, 6));
sse2_movdqa(p->func, x86_make_disp(x86_make_reg(file_REG32, reg_SP), 24), x86_make_reg(file_XMM, 7));
}
 
x86_push(p->func, p->outbuf_EBX);
x86_push(p->func, p->count_EBP);
 
/* on non-Win64 x86-64, these are already in the right registers */
if(x86_target(p->func) != X86_64_STD_ABI)
{
x86_push(p->func, p->machine_EDI);
x86_push(p->func, p->idx_ESI);
 
x86_mov(p->func, p->machine_EDI, x86_fn_arg(p->func, 1));
x86_mov(p->func, p->idx_ESI, x86_fn_arg(p->func, 2));
}
 
x86_mov(p->func, p->count_EBP, x86_fn_arg(p->func, 3));
 
if(x86_target(p->func) != X86_32)
x64_mov64(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 6));
else
x86_mov(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 6));
 
/* Load instance ID.
*/
if (p->use_instancing) {
x86_mov(p->func,
p->tmp2_EDX,
x86_fn_arg(p->func, 4));
x86_mov(p->func,
x86_make_disp(p->machine_EDI, get_offset(p, &p->start_instance)),
p->tmp2_EDX);
 
x86_mov(p->func,
p->tmp_EAX,
x86_fn_arg(p->func, 5));
x86_mov(p->func,
x86_make_disp(p->machine_EDI, get_offset(p, &p->instance_id)),
p->tmp_EAX);
}
 
/* Get vertex count, compare to zero
*/
x86_xor(p->func, p->tmp_EAX, p->tmp_EAX);
x86_cmp(p->func, p->count_EBP, p->tmp_EAX);
fixup = x86_jcc_forward(p->func, cc_E);
 
/* always load, needed or not:
*/
init_inputs(p, index_size);
 
/* Note address for loop jump
*/
label = x86_get_label(p->func);
{
struct x86_reg elt = !index_size ? p->idx_ESI : x86_deref(p->idx_ESI);
int last_variant = -1;
struct x86_reg vb;
 
for (j = 0; j < p->translate.key.nr_elements; j++) {
const struct translate_element *a = &p->translate.key.element[j];
unsigned variant = p->element_to_buffer_variant[j];
 
/* Figure out source pointer address:
*/
if (variant != last_variant) {
last_variant = variant;
vb = get_buffer_ptr(p, index_size, variant, elt);
}
if (!translate_attr( p, a,
x86_make_disp(vb, a->input_offset),
x86_make_disp(p->outbuf_EBX, a->output_offset)))
return FALSE;
}
 
/* Next output vertex:
*/
x64_rexw(p->func);
x86_lea(p->func,
p->outbuf_EBX,
x86_make_disp(p->outbuf_EBX,
p->translate.key.output_stride));
 
/* Incr index
*/
incr_inputs( p, index_size );
}
 
/* decr count, loop if not zero
*/
x86_dec(p->func, p->count_EBP);
x86_jcc(p->func, cc_NZ, label);
 
/* Exit mmx state?
*/
if (p->func->need_emms)
mmx_emms(p->func);
 
/* Land forward jump here:
*/
x86_fixup_fwd_jump(p->func, fixup);
 
/* Pop regs and return
*/
if(x86_target(p->func) != X86_64_STD_ABI)
{
x86_pop(p->func, p->idx_ESI);
x86_pop(p->func, p->machine_EDI);
}
 
x86_pop(p->func, p->count_EBP);
x86_pop(p->func, p->outbuf_EBX);
 
if(x86_target(p->func) == X86_64_WIN64_ABI)
{
sse2_movdqa(p->func, x86_make_reg(file_XMM, 6), x86_make_disp(x86_make_reg(file_REG32, reg_SP), 8));
sse2_movdqa(p->func, x86_make_reg(file_XMM, 7), x86_make_disp(x86_make_reg(file_REG32, reg_SP), 24));
}
x86_ret(p->func);
 
return TRUE;
}
 
 
 
 
 
 
static void translate_sse_set_buffer( struct translate *translate,
unsigned buf,
const void *ptr,
unsigned stride,
unsigned max_index )
{
struct translate_sse *p = (struct translate_sse *)translate;
 
if (buf < p->nr_buffers) {
p->buffer[buf].base_ptr = (char *)ptr;
p->buffer[buf].stride = stride;
p->buffer[buf].max_index = max_index;
}
 
if (0) debug_printf("%s %d/%d: %p %d\n",
__FUNCTION__, buf,
p->nr_buffers,
ptr, stride);
}
 
 
static void translate_sse_release( struct translate *translate )
{
struct translate_sse *p = (struct translate_sse *)translate;
 
x86_release_func( &p->elt8_func );
x86_release_func( &p->elt16_func );
x86_release_func( &p->elt_func );
x86_release_func( &p->linear_func );
 
os_free_aligned(p);
}
 
 
struct translate *translate_sse2_create( const struct translate_key *key )
{
struct translate_sse *p = NULL;
unsigned i;
 
/* this is misnamed, it actually refers to whether rtasm is enabled or not */
if (!rtasm_cpu_has_sse())
goto fail;
 
p = os_malloc_aligned(sizeof(struct translate_sse), 16);
if (p == NULL)
goto fail;
memset(p, 0, sizeof(*p));
memcpy(p->consts, consts, sizeof(consts));
 
p->translate.key = *key;
p->translate.release = translate_sse_release;
p->translate.set_buffer = translate_sse_set_buffer;
 
for (i = 0; i < key->nr_elements; i++) {
if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) {
unsigned j;
 
p->nr_buffers = MAX2(p->nr_buffers, key->element[i].input_buffer + 1);
 
if (key->element[i].instance_divisor) {
p->use_instancing = TRUE;
}
 
/*
* Map vertex element to vertex buffer variant.
*/
for (j = 0; j < p->nr_buffer_variants; j++) {
if (p->buffer_variant[j].buffer_index == key->element[i].input_buffer &&
p->buffer_variant[j].instance_divisor == key->element[i].instance_divisor) {
break;
}
}
if (j == p->nr_buffer_variants) {
p->buffer_variant[j].buffer_index = key->element[i].input_buffer;
p->buffer_variant[j].instance_divisor = key->element[i].instance_divisor;
p->nr_buffer_variants++;
}
p->element_to_buffer_variant[i] = j;
} else {
assert(key->element[i].type == TRANSLATE_ELEMENT_INSTANCE_ID);
 
p->element_to_buffer_variant[i] = ELEMENT_BUFFER_INSTANCE_ID;
}
}
 
if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers);
 
if (!build_vertex_emit(p, &p->linear_func, 0))
goto fail;
 
if (!build_vertex_emit(p, &p->elt_func, 4))
goto fail;
 
if (!build_vertex_emit(p, &p->elt16_func, 2))
goto fail;
 
if (!build_vertex_emit(p, &p->elt8_func, 1))
goto fail;
 
p->translate.run = (run_func) x86_get_func(&p->linear_func);
if (p->translate.run == NULL)
goto fail;
 
p->translate.run_elts = (run_elts_func) x86_get_func(&p->elt_func);
if (p->translate.run_elts == NULL)
goto fail;
 
p->translate.run_elts16 = (run_elts16_func) x86_get_func(&p->elt16_func);
if (p->translate.run_elts16 == NULL)
goto fail;
 
p->translate.run_elts8 = (run_elts8_func) x86_get_func(&p->elt8_func);
if (p->translate.run_elts8 == NULL)
goto fail;
 
return &p->translate;
 
fail:
if (p)
translate_sse_release( &p->translate );
 
return NULL;
}
 
 
 
#else
 
struct translate *translate_sse2_create( const struct translate_key *key )
{
return NULL;
}
 
#endif
/drivers/video/Gallium/include/pipe/p_config.h
212,10 → 212,6
#define PIPE_OS_UNIX
#endif
 
#if defined(_WIN32) || defined(WIN32)
#define PIPE_OS_WINDOWS
#endif
 
#if defined(__HAIKU__)
#define PIPE_OS_HAIKU
#define PIPE_OS_UNIX