/drivers/video/Gallium/auxiliary/cso_cache/cso_cache.c |
---|
0,0 → 1,322 |
/************************************************************************** |
* |
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/* Authors: Zack Rusin <zack@tungstengraphics.com> |
*/ |
#include "util/u_debug.h" |
#include "util/u_memory.h" |
#include "cso_cache.h" |
#include "cso_hash.h" |
struct cso_cache { |
struct cso_hash *hashes[CSO_CACHE_MAX]; |
int max_size; |
cso_sanitize_callback sanitize_cb; |
void *sanitize_data; |
}; |
#if 1 |
static unsigned hash_key(const void *key, unsigned key_size) |
{ |
unsigned *ikey = (unsigned *)key; |
unsigned hash = 0, i; |
assert(key_size % 4 == 0); |
/* I'm sure this can be improved on: |
*/ |
for (i = 0; i < key_size/4; i++) |
hash ^= ikey[i]; |
return hash; |
} |
#else |
static unsigned hash_key(const unsigned char *p, int n) |
{ |
unsigned h = 0; |
unsigned g; |
while (n--) { |
h = (h << 4) + *p++; |
if ((g = (h & 0xf0000000)) != 0) |
h ^= g >> 23; |
h &= ~g; |
} |
return h; |
} |
#endif |
unsigned cso_construct_key(void *item, int item_size) |
{ |
return hash_key((item), item_size); |
} |
static INLINE struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_type type) |
{ |
struct cso_hash *hash; |
hash = sc->hashes[type]; |
return hash; |
} |
static void delete_blend_state(void *state, void *data) |
{ |
struct cso_blend *cso = (struct cso_blend *)state; |
if (cso->delete_state) |
cso->delete_state(cso->context, cso->data); |
FREE(state); |
} |
static void delete_depth_stencil_state(void *state, void *data) |
{ |
struct cso_depth_stencil_alpha *cso = (struct cso_depth_stencil_alpha *)state; |
if (cso->delete_state) |
cso->delete_state(cso->context, cso->data); |
FREE(state); |
} |
static void delete_sampler_state(void *state, void *data) |
{ |
struct cso_sampler *cso = (struct cso_sampler *)state; |
if (cso->delete_state) |
cso->delete_state(cso->context, cso->data); |
FREE(state); |
} |
static void delete_rasterizer_state(void *state, void *data) |
{ |
struct cso_rasterizer *cso = (struct cso_rasterizer *)state; |
if (cso->delete_state) |
cso->delete_state(cso->context, cso->data); |
FREE(state); |
} |
static void delete_velements(void *state, void *data) |
{ |
struct cso_velements *cso = (struct cso_velements *)state; |
if (cso->delete_state) |
cso->delete_state(cso->context, cso->data); |
FREE(state); |
} |
static INLINE void delete_cso(void *state, enum cso_cache_type type) |
{ |
switch (type) { |
case CSO_BLEND: |
delete_blend_state(state, 0); |
break; |
case CSO_SAMPLER: |
delete_sampler_state(state, 0); |
break; |
case CSO_DEPTH_STENCIL_ALPHA: |
delete_depth_stencil_state(state, 0); |
break; |
case CSO_RASTERIZER: |
delete_rasterizer_state(state, 0); |
break; |
case CSO_VELEMENTS: |
delete_velements(state, 0); |
break; |
default: |
assert(0); |
FREE(state); |
} |
} |
static INLINE void sanitize_hash(struct cso_cache *sc, |
struct cso_hash *hash, |
enum cso_cache_type type, |
int max_size) |
{ |
if (sc->sanitize_cb) |
sc->sanitize_cb(hash, type, max_size, sc->sanitize_data); |
} |
static INLINE void sanitize_cb(struct cso_hash *hash, enum cso_cache_type type, |
int max_size, void *user_data) |
{ |
/* if we're approach the maximum size, remove fourth of the entries |
* otherwise every subsequent call will go through the same */ |
int hash_size = cso_hash_size(hash); |
int max_entries = (max_size > hash_size) ? max_size : hash_size; |
int to_remove = (max_size < max_entries) * max_entries/4; |
if (hash_size > max_size) |
to_remove += hash_size - max_size; |
while (to_remove) { |
/*remove elements until we're good */ |
/*fixme: currently we pick the nodes to remove at random*/ |
struct cso_hash_iter iter = cso_hash_first_node(hash); |
void *cso = cso_hash_take(hash, cso_hash_iter_key(iter)); |
delete_cso(cso, type); |
--to_remove; |
} |
} |
struct cso_hash_iter |
cso_insert_state(struct cso_cache *sc, |
unsigned hash_key, enum cso_cache_type type, |
void *state) |
{ |
struct cso_hash *hash = _cso_hash_for_type(sc, type); |
sanitize_hash(sc, hash, type, sc->max_size); |
return cso_hash_insert(hash, hash_key, state); |
} |
struct cso_hash_iter |
cso_find_state(struct cso_cache *sc, |
unsigned hash_key, enum cso_cache_type type) |
{ |
struct cso_hash *hash = _cso_hash_for_type(sc, type); |
return cso_hash_find(hash, hash_key); |
} |
void *cso_hash_find_data_from_template( struct cso_hash *hash, |
unsigned hash_key, |
void *templ, |
int size ) |
{ |
struct cso_hash_iter iter = cso_hash_find(hash, hash_key); |
while (!cso_hash_iter_is_null(iter)) { |
void *iter_data = cso_hash_iter_data(iter); |
if (!memcmp(iter_data, templ, size)) { |
/* We found a match |
*/ |
return iter_data; |
} |
iter = cso_hash_iter_next(iter); |
} |
return NULL; |
} |
struct cso_hash_iter cso_find_state_template(struct cso_cache *sc, |
unsigned hash_key, enum cso_cache_type type, |
void *templ, unsigned size) |
{ |
struct cso_hash_iter iter = cso_find_state(sc, hash_key, type); |
while (!cso_hash_iter_is_null(iter)) { |
void *iter_data = cso_hash_iter_data(iter); |
if (!memcmp(iter_data, templ, size)) |
return iter; |
iter = cso_hash_iter_next(iter); |
} |
return iter; |
} |
void * cso_take_state(struct cso_cache *sc, |
unsigned hash_key, enum cso_cache_type type) |
{ |
struct cso_hash *hash = _cso_hash_for_type(sc, type); |
return cso_hash_take(hash, hash_key); |
} |
struct cso_cache *cso_cache_create(void) |
{ |
struct cso_cache *sc = MALLOC_STRUCT(cso_cache); |
int i; |
if (sc == NULL) |
return NULL; |
sc->max_size = 4096; |
for (i = 0; i < CSO_CACHE_MAX; i++) |
sc->hashes[i] = cso_hash_create(); |
sc->sanitize_cb = sanitize_cb; |
sc->sanitize_data = 0; |
return sc; |
} |
void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, |
cso_state_callback func, void *user_data) |
{ |
struct cso_hash *hash = _cso_hash_for_type(sc, type); |
struct cso_hash_iter iter; |
iter = cso_hash_first_node(hash); |
while (!cso_hash_iter_is_null(iter)) { |
void *state = cso_hash_iter_data(iter); |
iter = cso_hash_iter_next(iter); |
if (state) { |
func(state, user_data); |
} |
} |
} |
void cso_cache_delete(struct cso_cache *sc) |
{ |
int i; |
assert(sc); |
if (!sc) |
return; |
/* delete driver data */ |
cso_for_each_state(sc, CSO_BLEND, delete_blend_state, 0); |
cso_for_each_state(sc, CSO_DEPTH_STENCIL_ALPHA, delete_depth_stencil_state, 0); |
cso_for_each_state(sc, CSO_RASTERIZER, delete_rasterizer_state, 0); |
cso_for_each_state(sc, CSO_SAMPLER, delete_sampler_state, 0); |
cso_for_each_state(sc, CSO_VELEMENTS, delete_velements, 0); |
for (i = 0; i < CSO_CACHE_MAX; i++) |
cso_hash_delete(sc->hashes[i]); |
FREE(sc); |
} |
void cso_set_maximum_cache_size(struct cso_cache *sc, int number) |
{ |
int i; |
sc->max_size = number; |
for (i = 0; i < CSO_CACHE_MAX; i++) |
sanitize_hash(sc, sc->hashes[i], i, sc->max_size); |
} |
int cso_maximum_cache_size(const struct cso_cache *sc) |
{ |
return sc->max_size; |
} |
void cso_cache_set_sanitize_callback(struct cso_cache *sc, |
cso_sanitize_callback cb, |
void *user_data) |
{ |
sc->sanitize_cb = cb; |
sc->sanitize_data = user_data; |
} |
/drivers/video/Gallium/auxiliary/cso_cache/cso_cache.h |
---|
0,0 → 1,175 |
/************************************************************************** |
* |
* Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/** |
* @file |
* Constant State Object (CSO) cache. |
* |
* The basic idea is that the states are created via the |
* create_state/bind_state/delete_state semantics. The driver is expected to |
* perform as much of the Gallium state translation to whatever its internal |
* representation is during the create call. Gallium then has a caching |
* mechanism where it stores the created states. When the pipeline needs an |
* actual state change, a bind call is issued. In the bind call the driver |
* gets its already translated representation. |
* |
* Those semantics mean that the driver doesn't do the repeated translations |
* of states on every frame, but only once, when a new state is actually |
* created. |
* |
* Even on hardware that doesn't do any kind of state cache, it makes the |
* driver look a lot neater, plus it avoids all the redundant state |
* translations on every frame. |
* |
* Currently our constant state objects are: |
* - alpha test |
* - blend |
* - depth stencil |
* - fragment shader |
* - rasterizer (old setup) |
* - sampler |
* - vertex shader |
* - vertex elements |
* |
* Things that are not constant state objects include: |
* - blend_color |
* - clip_state |
* - clear_color_state |
* - constant_buffer |
* - feedback_state |
* - framebuffer_state |
* - polygon_stipple |
* - scissor_state |
* - texture_state |
* - viewport_state |
* |
* @author Zack Rusin <zack@tungstengraphics.com> |
*/ |
#ifndef CSO_CACHE_H |
#define CSO_CACHE_H |
#include "pipe/p_context.h" |
#include "pipe/p_state.h" |
/* cso_hash.h is necessary for cso_hash_iter, as MSVC requires structures |
* returned by value to be fully defined */ |
#include "cso_hash.h" |
#ifdef __cplusplus |
extern "C" { |
#endif |
enum cso_cache_type { |
CSO_RASTERIZER, |
CSO_BLEND, |
CSO_DEPTH_STENCIL_ALPHA, |
CSO_SAMPLER, |
CSO_VELEMENTS, |
CSO_CACHE_MAX, |
}; |
typedef void (*cso_state_callback)(void *ctx, void *obj); |
typedef void (*cso_sanitize_callback)(struct cso_hash *hash, |
enum cso_cache_type type, |
int max_size, |
void *user_data); |
struct cso_cache; |
struct cso_blend { |
struct pipe_blend_state state; |
void *data; |
cso_state_callback delete_state; |
struct pipe_context *context; |
}; |
struct cso_depth_stencil_alpha { |
struct pipe_depth_stencil_alpha_state state; |
void *data; |
cso_state_callback delete_state; |
struct pipe_context *context; |
}; |
struct cso_rasterizer { |
struct pipe_rasterizer_state state; |
void *data; |
cso_state_callback delete_state; |
struct pipe_context *context; |
}; |
struct cso_sampler { |
struct pipe_sampler_state state; |
void *data; |
cso_state_callback delete_state; |
struct pipe_context *context; |
}; |
struct cso_velems_state { |
unsigned count; |
struct pipe_vertex_element velems[PIPE_MAX_ATTRIBS]; |
}; |
struct cso_velements { |
struct cso_velems_state state; |
void *data; |
cso_state_callback delete_state; |
struct pipe_context *context; |
}; |
unsigned cso_construct_key(void *item, int item_size); |
struct cso_cache *cso_cache_create(void); |
void cso_cache_delete(struct cso_cache *sc); |
void cso_cache_set_sanitize_callback(struct cso_cache *sc, |
cso_sanitize_callback cb, |
void *user_data); |
struct cso_hash_iter cso_insert_state(struct cso_cache *sc, |
unsigned hash_key, enum cso_cache_type type, |
void *state); |
struct cso_hash_iter cso_find_state(struct cso_cache *sc, |
unsigned hash_key, enum cso_cache_type type); |
struct cso_hash_iter cso_find_state_template(struct cso_cache *sc, |
unsigned hash_key, enum cso_cache_type type, |
void *templ, unsigned size); |
void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, |
cso_state_callback func, void *user_data); |
void * cso_take_state(struct cso_cache *sc, unsigned hash_key, |
enum cso_cache_type type); |
void cso_set_maximum_cache_size(struct cso_cache *sc, int number); |
int cso_maximum_cache_size(const struct cso_cache *sc); |
#ifdef __cplusplus |
} |
#endif |
#endif |
/drivers/video/Gallium/auxiliary/cso_cache/cso_context.c |
---|
0,0 → 1,1431 |
/************************************************************************** |
* |
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/** |
* @file |
* |
* Wrap the cso cache & hash mechanisms in a simplified |
* pipe-driver-specific interface. |
* |
* @author Zack Rusin <zack@tungstengraphics.com> |
* @author Keith Whitwell <keith@tungstengraphics.com> |
*/ |
#include "pipe/p_state.h" |
#include "util/u_draw.h" |
#include "util/u_framebuffer.h" |
#include "util/u_inlines.h" |
#include "util/u_math.h" |
#include "util/u_memory.h" |
#include "util/u_vbuf.h" |
#include "tgsi/tgsi_parse.h" |
#include "cso_cache/cso_context.h" |
#include "cso_cache/cso_cache.h" |
#include "cso_cache/cso_hash.h" |
#include "cso_context.h" |
/** |
* Info related to samplers and sampler views. |
* We have one of these for fragment samplers and another for vertex samplers. |
*/ |
struct sampler_info |
{ |
struct { |
void *samplers[PIPE_MAX_SAMPLERS]; |
unsigned nr_samplers; |
} hw; |
void *samplers[PIPE_MAX_SAMPLERS]; |
unsigned nr_samplers; |
void *samplers_saved[PIPE_MAX_SAMPLERS]; |
unsigned nr_samplers_saved; |
struct pipe_sampler_view *views[PIPE_MAX_SAMPLERS]; |
unsigned nr_views; |
struct pipe_sampler_view *views_saved[PIPE_MAX_SAMPLERS]; |
unsigned nr_views_saved; |
}; |
struct cso_context { |
struct pipe_context *pipe; |
struct cso_cache *cache; |
struct u_vbuf *vbuf; |
boolean has_geometry_shader; |
boolean has_streamout; |
struct sampler_info samplers[PIPE_SHADER_TYPES]; |
struct pipe_vertex_buffer aux_vertex_buffer_current; |
struct pipe_vertex_buffer aux_vertex_buffer_saved; |
unsigned aux_vertex_buffer_index; |
struct pipe_constant_buffer aux_constbuf_current[PIPE_SHADER_TYPES]; |
struct pipe_constant_buffer aux_constbuf_saved[PIPE_SHADER_TYPES]; |
unsigned nr_so_targets; |
struct pipe_stream_output_target *so_targets[PIPE_MAX_SO_BUFFERS]; |
unsigned nr_so_targets_saved; |
struct pipe_stream_output_target *so_targets_saved[PIPE_MAX_SO_BUFFERS]; |
/** Current and saved state. |
* The saved state is used as a 1-deep stack. |
*/ |
void *blend, *blend_saved; |
void *depth_stencil, *depth_stencil_saved; |
void *rasterizer, *rasterizer_saved; |
void *fragment_shader, *fragment_shader_saved; |
void *vertex_shader, *vertex_shader_saved; |
void *geometry_shader, *geometry_shader_saved; |
void *velements, *velements_saved; |
struct pipe_query *render_condition, *render_condition_saved; |
uint render_condition_mode, render_condition_mode_saved; |
boolean render_condition_cond, render_condition_cond_saved; |
struct pipe_clip_state clip; |
struct pipe_clip_state clip_saved; |
struct pipe_framebuffer_state fb, fb_saved; |
struct pipe_viewport_state vp, vp_saved; |
struct pipe_blend_color blend_color; |
unsigned sample_mask, sample_mask_saved; |
struct pipe_stencil_ref stencil_ref, stencil_ref_saved; |
}; |
static boolean delete_blend_state(struct cso_context *ctx, void *state) |
{ |
struct cso_blend *cso = (struct cso_blend *)state; |
if (ctx->blend == cso->data) |
return FALSE; |
if (cso->delete_state) |
cso->delete_state(cso->context, cso->data); |
FREE(state); |
return TRUE; |
} |
static boolean delete_depth_stencil_state(struct cso_context *ctx, void *state) |
{ |
struct cso_depth_stencil_alpha *cso = |
(struct cso_depth_stencil_alpha *)state; |
if (ctx->depth_stencil == cso->data) |
return FALSE; |
if (cso->delete_state) |
cso->delete_state(cso->context, cso->data); |
FREE(state); |
return TRUE; |
} |
static boolean delete_sampler_state(struct cso_context *ctx, void *state) |
{ |
struct cso_sampler *cso = (struct cso_sampler *)state; |
if (cso->delete_state) |
cso->delete_state(cso->context, cso->data); |
FREE(state); |
return TRUE; |
} |
static boolean delete_rasterizer_state(struct cso_context *ctx, void *state) |
{ |
struct cso_rasterizer *cso = (struct cso_rasterizer *)state; |
if (ctx->rasterizer == cso->data) |
return FALSE; |
if (cso->delete_state) |
cso->delete_state(cso->context, cso->data); |
FREE(state); |
return TRUE; |
} |
static boolean delete_vertex_elements(struct cso_context *ctx, |
void *state) |
{ |
struct cso_velements *cso = (struct cso_velements *)state; |
if (ctx->velements == cso->data) |
return FALSE; |
if (cso->delete_state) |
cso->delete_state(cso->context, cso->data); |
FREE(state); |
return TRUE; |
} |
static INLINE boolean delete_cso(struct cso_context *ctx, |
void *state, enum cso_cache_type type) |
{ |
switch (type) { |
case CSO_BLEND: |
return delete_blend_state(ctx, state); |
case CSO_SAMPLER: |
return delete_sampler_state(ctx, state); |
case CSO_DEPTH_STENCIL_ALPHA: |
return delete_depth_stencil_state(ctx, state); |
case CSO_RASTERIZER: |
return delete_rasterizer_state(ctx, state); |
case CSO_VELEMENTS: |
return delete_vertex_elements(ctx, state); |
default: |
assert(0); |
FREE(state); |
} |
return FALSE; |
} |
static INLINE void |
sanitize_hash(struct cso_hash *hash, enum cso_cache_type type, |
int max_size, void *user_data) |
{ |
struct cso_context *ctx = (struct cso_context *)user_data; |
/* if we're approach the maximum size, remove fourth of the entries |
* otherwise every subsequent call will go through the same */ |
int hash_size = cso_hash_size(hash); |
int max_entries = (max_size > hash_size) ? max_size : hash_size; |
int to_remove = (max_size < max_entries) * max_entries/4; |
struct cso_hash_iter iter = cso_hash_first_node(hash); |
if (hash_size > max_size) |
to_remove += hash_size - max_size; |
while (to_remove) { |
/*remove elements until we're good */ |
/*fixme: currently we pick the nodes to remove at random*/ |
void *cso = cso_hash_iter_data(iter); |
if (delete_cso(ctx, cso, type)) { |
iter = cso_hash_erase(hash, iter); |
--to_remove; |
} else |
iter = cso_hash_iter_next(iter); |
} |
} |
static void cso_init_vbuf(struct cso_context *cso) |
{ |
struct u_vbuf_caps caps; |
u_vbuf_get_caps(cso->pipe->screen, &caps); |
/* Install u_vbuf if there is anything unsupported. */ |
if (!caps.buffer_offset_unaligned || |
!caps.buffer_stride_unaligned || |
!caps.velem_src_offset_unaligned || |
!caps.format_fixed32 || |
!caps.format_float16 || |
!caps.format_float64 || |
!caps.format_norm32 || |
!caps.format_scaled32 || |
!caps.user_vertex_buffers) { |
cso->vbuf = u_vbuf_create(cso->pipe, &caps, |
cso->aux_vertex_buffer_index); |
} |
} |
struct cso_context *cso_create_context( struct pipe_context *pipe ) |
{ |
struct cso_context *ctx = CALLOC_STRUCT(cso_context); |
if (ctx == NULL) |
goto out; |
ctx->cache = cso_cache_create(); |
if (ctx->cache == NULL) |
goto out; |
cso_cache_set_sanitize_callback(ctx->cache, |
sanitize_hash, |
ctx); |
ctx->pipe = pipe; |
ctx->sample_mask_saved = ~0; |
ctx->aux_vertex_buffer_index = 0; /* 0 for now */ |
cso_init_vbuf(ctx); |
/* Enable for testing: */ |
if (0) cso_set_maximum_cache_size( ctx->cache, 4 ); |
if (pipe->screen->get_shader_param(pipe->screen, PIPE_SHADER_GEOMETRY, |
PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) { |
ctx->has_geometry_shader = TRUE; |
} |
if (pipe->screen->get_param(pipe->screen, |
PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS) != 0) { |
ctx->has_streamout = TRUE; |
} |
return ctx; |
out: |
cso_destroy_context( ctx ); |
return NULL; |
} |
/** |
* Prior to context destruction, this function unbinds all state objects. |
*/ |
void cso_release_all( struct cso_context *ctx ) |
{ |
unsigned i, shader; |
if (ctx->pipe) { |
ctx->pipe->bind_blend_state( ctx->pipe, NULL ); |
ctx->pipe->bind_rasterizer_state( ctx->pipe, NULL ); |
ctx->pipe->bind_fragment_sampler_states( ctx->pipe, 0, NULL ); |
if (ctx->pipe->bind_vertex_sampler_states) |
ctx->pipe->bind_vertex_sampler_states(ctx->pipe, 0, NULL); |
ctx->pipe->bind_depth_stencil_alpha_state( ctx->pipe, NULL ); |
ctx->pipe->bind_fs_state( ctx->pipe, NULL ); |
ctx->pipe->bind_vs_state( ctx->pipe, NULL ); |
ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL ); |
ctx->pipe->set_fragment_sampler_views(ctx->pipe, 0, NULL); |
if (ctx->pipe->set_vertex_sampler_views) |
ctx->pipe->set_vertex_sampler_views(ctx->pipe, 0, NULL); |
if (ctx->pipe->set_stream_output_targets) |
ctx->pipe->set_stream_output_targets(ctx->pipe, 0, NULL, 0); |
} |
/* free fragment samplers, views */ |
for (shader = 0; shader < Elements(ctx->samplers); shader++) { |
struct sampler_info *info = &ctx->samplers[shader]; |
for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { |
pipe_sampler_view_reference(&info->views[i], NULL); |
pipe_sampler_view_reference(&info->views_saved[i], NULL); |
} |
} |
util_unreference_framebuffer_state(&ctx->fb); |
util_unreference_framebuffer_state(&ctx->fb_saved); |
pipe_resource_reference(&ctx->aux_vertex_buffer_current.buffer, NULL); |
pipe_resource_reference(&ctx->aux_vertex_buffer_saved.buffer, NULL); |
for (i = 0; i < PIPE_SHADER_TYPES; i++) { |
pipe_resource_reference(&ctx->aux_constbuf_current[i].buffer, NULL); |
pipe_resource_reference(&ctx->aux_constbuf_saved[i].buffer, NULL); |
} |
for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { |
pipe_so_target_reference(&ctx->so_targets[i], NULL); |
pipe_so_target_reference(&ctx->so_targets_saved[i], NULL); |
} |
if (ctx->cache) { |
cso_cache_delete( ctx->cache ); |
ctx->cache = NULL; |
} |
} |
/** |
* Free the CSO context. NOTE: the state tracker should have previously called |
* cso_release_all(). |
*/ |
void cso_destroy_context( struct cso_context *ctx ) |
{ |
if (ctx) { |
if (ctx->vbuf) |
u_vbuf_destroy(ctx->vbuf); |
FREE( ctx ); |
} |
} |
/* Those function will either find the state of the given template |
* in the cache or they will create a new state from the given |
* template, insert it in the cache and return it. |
*/ |
/* |
* If the driver returns 0 from the create method then they will assign |
* the data member of the cso to be the template itself. |
*/ |
enum pipe_error cso_set_blend(struct cso_context *ctx, |
const struct pipe_blend_state *templ) |
{ |
unsigned key_size, hash_key; |
struct cso_hash_iter iter; |
void *handle; |
key_size = templ->independent_blend_enable ? |
sizeof(struct pipe_blend_state) : |
(char *)&(templ->rt[1]) - (char *)templ; |
hash_key = cso_construct_key((void*)templ, key_size); |
iter = cso_find_state_template(ctx->cache, hash_key, CSO_BLEND, |
(void*)templ, key_size); |
if (cso_hash_iter_is_null(iter)) { |
struct cso_blend *cso = MALLOC(sizeof(struct cso_blend)); |
if (!cso) |
return PIPE_ERROR_OUT_OF_MEMORY; |
memset(&cso->state, 0, sizeof cso->state); |
memcpy(&cso->state, templ, key_size); |
cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state); |
cso->delete_state = (cso_state_callback)ctx->pipe->delete_blend_state; |
cso->context = ctx->pipe; |
iter = cso_insert_state(ctx->cache, hash_key, CSO_BLEND, cso); |
if (cso_hash_iter_is_null(iter)) { |
FREE(cso); |
return PIPE_ERROR_OUT_OF_MEMORY; |
} |
handle = cso->data; |
} |
else { |
handle = ((struct cso_blend *)cso_hash_iter_data(iter))->data; |
} |
if (ctx->blend != handle) { |
ctx->blend = handle; |
ctx->pipe->bind_blend_state(ctx->pipe, handle); |
} |
return PIPE_OK; |
} |
void cso_save_blend(struct cso_context *ctx) |
{ |
assert(!ctx->blend_saved); |
ctx->blend_saved = ctx->blend; |
} |
void cso_restore_blend(struct cso_context *ctx) |
{ |
if (ctx->blend != ctx->blend_saved) { |
ctx->blend = ctx->blend_saved; |
ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend_saved); |
} |
ctx->blend_saved = NULL; |
} |
enum pipe_error |
cso_set_depth_stencil_alpha(struct cso_context *ctx, |
const struct pipe_depth_stencil_alpha_state *templ) |
{ |
unsigned key_size = sizeof(struct pipe_depth_stencil_alpha_state); |
unsigned hash_key = cso_construct_key((void*)templ, key_size); |
struct cso_hash_iter iter = cso_find_state_template(ctx->cache, |
hash_key, |
CSO_DEPTH_STENCIL_ALPHA, |
(void*)templ, key_size); |
void *handle; |
if (cso_hash_iter_is_null(iter)) { |
struct cso_depth_stencil_alpha *cso = |
MALLOC(sizeof(struct cso_depth_stencil_alpha)); |
if (!cso) |
return PIPE_ERROR_OUT_OF_MEMORY; |
memcpy(&cso->state, templ, sizeof(*templ)); |
cso->data = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, |
&cso->state); |
cso->delete_state = |
(cso_state_callback)ctx->pipe->delete_depth_stencil_alpha_state; |
cso->context = ctx->pipe; |
iter = cso_insert_state(ctx->cache, hash_key, |
CSO_DEPTH_STENCIL_ALPHA, cso); |
if (cso_hash_iter_is_null(iter)) { |
FREE(cso); |
return PIPE_ERROR_OUT_OF_MEMORY; |
} |
handle = cso->data; |
} |
else { |
handle = ((struct cso_depth_stencil_alpha *) |
cso_hash_iter_data(iter))->data; |
} |
if (ctx->depth_stencil != handle) { |
ctx->depth_stencil = handle; |
ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, handle); |
} |
return PIPE_OK; |
} |
void cso_save_depth_stencil_alpha(struct cso_context *ctx) |
{ |
assert(!ctx->depth_stencil_saved); |
ctx->depth_stencil_saved = ctx->depth_stencil; |
} |
void cso_restore_depth_stencil_alpha(struct cso_context *ctx) |
{ |
if (ctx->depth_stencil != ctx->depth_stencil_saved) { |
ctx->depth_stencil = ctx->depth_stencil_saved; |
ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, |
ctx->depth_stencil_saved); |
} |
ctx->depth_stencil_saved = NULL; |
} |
enum pipe_error cso_set_rasterizer(struct cso_context *ctx, |
const struct pipe_rasterizer_state *templ) |
{ |
unsigned key_size = sizeof(struct pipe_rasterizer_state); |
unsigned hash_key = cso_construct_key((void*)templ, key_size); |
struct cso_hash_iter iter = cso_find_state_template(ctx->cache, |
hash_key, |
CSO_RASTERIZER, |
(void*)templ, key_size); |
void *handle = NULL; |
if (cso_hash_iter_is_null(iter)) { |
struct cso_rasterizer *cso = MALLOC(sizeof(struct cso_rasterizer)); |
if (!cso) |
return PIPE_ERROR_OUT_OF_MEMORY; |
memcpy(&cso->state, templ, sizeof(*templ)); |
cso->data = ctx->pipe->create_rasterizer_state(ctx->pipe, &cso->state); |
cso->delete_state = |
(cso_state_callback)ctx->pipe->delete_rasterizer_state; |
cso->context = ctx->pipe; |
iter = cso_insert_state(ctx->cache, hash_key, CSO_RASTERIZER, cso); |
if (cso_hash_iter_is_null(iter)) { |
FREE(cso); |
return PIPE_ERROR_OUT_OF_MEMORY; |
} |
handle = cso->data; |
} |
else { |
handle = ((struct cso_rasterizer *)cso_hash_iter_data(iter))->data; |
} |
if (ctx->rasterizer != handle) { |
ctx->rasterizer = handle; |
ctx->pipe->bind_rasterizer_state(ctx->pipe, handle); |
} |
return PIPE_OK; |
} |
void cso_save_rasterizer(struct cso_context *ctx) |
{ |
assert(!ctx->rasterizer_saved); |
ctx->rasterizer_saved = ctx->rasterizer; |
} |
void cso_restore_rasterizer(struct cso_context *ctx) |
{ |
if (ctx->rasterizer != ctx->rasterizer_saved) { |
ctx->rasterizer = ctx->rasterizer_saved; |
ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rasterizer_saved); |
} |
ctx->rasterizer_saved = NULL; |
} |
void cso_set_fragment_shader_handle(struct cso_context *ctx, void *handle ) |
{ |
if (ctx->fragment_shader != handle) { |
ctx->fragment_shader = handle; |
ctx->pipe->bind_fs_state(ctx->pipe, handle); |
} |
} |
void cso_delete_fragment_shader(struct cso_context *ctx, void *handle ) |
{ |
if (handle == ctx->fragment_shader) { |
/* unbind before deleting */ |
ctx->pipe->bind_fs_state(ctx->pipe, NULL); |
ctx->fragment_shader = NULL; |
} |
ctx->pipe->delete_fs_state(ctx->pipe, handle); |
} |
void cso_save_fragment_shader(struct cso_context *ctx) |
{ |
assert(!ctx->fragment_shader_saved); |
ctx->fragment_shader_saved = ctx->fragment_shader; |
} |
void cso_restore_fragment_shader(struct cso_context *ctx) |
{ |
if (ctx->fragment_shader_saved != ctx->fragment_shader) { |
ctx->pipe->bind_fs_state(ctx->pipe, ctx->fragment_shader_saved); |
ctx->fragment_shader = ctx->fragment_shader_saved; |
} |
ctx->fragment_shader_saved = NULL; |
} |
void cso_set_vertex_shader_handle(struct cso_context *ctx, void *handle) |
{ |
if (ctx->vertex_shader != handle) { |
ctx->vertex_shader = handle; |
ctx->pipe->bind_vs_state(ctx->pipe, handle); |
} |
} |
void cso_delete_vertex_shader(struct cso_context *ctx, void *handle ) |
{ |
if (handle == ctx->vertex_shader) { |
/* unbind before deleting */ |
ctx->pipe->bind_vs_state(ctx->pipe, NULL); |
ctx->vertex_shader = NULL; |
} |
ctx->pipe->delete_vs_state(ctx->pipe, handle); |
} |
void cso_save_vertex_shader(struct cso_context *ctx) |
{ |
assert(!ctx->vertex_shader_saved); |
ctx->vertex_shader_saved = ctx->vertex_shader; |
} |
void cso_restore_vertex_shader(struct cso_context *ctx) |
{ |
if (ctx->vertex_shader_saved != ctx->vertex_shader) { |
ctx->pipe->bind_vs_state(ctx->pipe, ctx->vertex_shader_saved); |
ctx->vertex_shader = ctx->vertex_shader_saved; |
} |
ctx->vertex_shader_saved = NULL; |
} |
void cso_set_framebuffer(struct cso_context *ctx, |
const struct pipe_framebuffer_state *fb) |
{ |
if (memcmp(&ctx->fb, fb, sizeof(*fb)) != 0) { |
util_copy_framebuffer_state(&ctx->fb, fb); |
ctx->pipe->set_framebuffer_state(ctx->pipe, fb); |
} |
} |
void cso_save_framebuffer(struct cso_context *ctx) |
{ |
util_copy_framebuffer_state(&ctx->fb_saved, &ctx->fb); |
} |
void cso_restore_framebuffer(struct cso_context *ctx) |
{ |
if (memcmp(&ctx->fb, &ctx->fb_saved, sizeof(ctx->fb))) { |
util_copy_framebuffer_state(&ctx->fb, &ctx->fb_saved); |
ctx->pipe->set_framebuffer_state(ctx->pipe, &ctx->fb); |
util_unreference_framebuffer_state(&ctx->fb_saved); |
} |
} |
void cso_set_viewport(struct cso_context *ctx, |
const struct pipe_viewport_state *vp) |
{ |
if (memcmp(&ctx->vp, vp, sizeof(*vp))) { |
ctx->vp = *vp; |
ctx->pipe->set_viewport_states(ctx->pipe, 0, 1, vp); |
} |
} |
void cso_save_viewport(struct cso_context *ctx) |
{ |
ctx->vp_saved = ctx->vp; |
} |
void cso_restore_viewport(struct cso_context *ctx) |
{ |
if (memcmp(&ctx->vp, &ctx->vp_saved, sizeof(ctx->vp))) { |
ctx->vp = ctx->vp_saved; |
ctx->pipe->set_viewport_states(ctx->pipe, 0, 1, &ctx->vp); |
} |
} |
void cso_set_blend_color(struct cso_context *ctx, |
const struct pipe_blend_color *bc) |
{ |
if (memcmp(&ctx->blend_color, bc, sizeof(ctx->blend_color))) { |
ctx->blend_color = *bc; |
ctx->pipe->set_blend_color(ctx->pipe, bc); |
} |
} |
void cso_set_sample_mask(struct cso_context *ctx, unsigned sample_mask) |
{ |
if (ctx->sample_mask != sample_mask) { |
ctx->sample_mask = sample_mask; |
ctx->pipe->set_sample_mask(ctx->pipe, sample_mask); |
} |
} |
void cso_save_sample_mask(struct cso_context *ctx) |
{ |
ctx->sample_mask_saved = ctx->sample_mask; |
} |
void cso_restore_sample_mask(struct cso_context *ctx) |
{ |
cso_set_sample_mask(ctx, ctx->sample_mask_saved); |
} |
void cso_set_stencil_ref(struct cso_context *ctx, |
const struct pipe_stencil_ref *sr) |
{ |
if (memcmp(&ctx->stencil_ref, sr, sizeof(ctx->stencil_ref))) { |
ctx->stencil_ref = *sr; |
ctx->pipe->set_stencil_ref(ctx->pipe, sr); |
} |
} |
void cso_save_stencil_ref(struct cso_context *ctx) |
{ |
ctx->stencil_ref_saved = ctx->stencil_ref; |
} |
void cso_restore_stencil_ref(struct cso_context *ctx) |
{ |
if (memcmp(&ctx->stencil_ref, &ctx->stencil_ref_saved, |
sizeof(ctx->stencil_ref))) { |
ctx->stencil_ref = ctx->stencil_ref_saved; |
ctx->pipe->set_stencil_ref(ctx->pipe, &ctx->stencil_ref); |
} |
} |
void cso_set_render_condition(struct cso_context *ctx, |
struct pipe_query *query, |
boolean condition, uint mode) |
{ |
struct pipe_context *pipe = ctx->pipe; |
if (ctx->render_condition != query || |
ctx->render_condition_mode != mode || |
ctx->render_condition_cond != condition) { |
pipe->render_condition(pipe, query, condition, mode); |
ctx->render_condition = query; |
ctx->render_condition_cond = condition; |
ctx->render_condition_mode = mode; |
} |
} |
void cso_save_render_condition(struct cso_context *ctx) |
{ |
ctx->render_condition_saved = ctx->render_condition; |
ctx->render_condition_cond_saved = ctx->render_condition_cond; |
ctx->render_condition_mode_saved = ctx->render_condition_mode; |
} |
void cso_restore_render_condition(struct cso_context *ctx) |
{ |
cso_set_render_condition(ctx, ctx->render_condition_saved, |
ctx->render_condition_cond_saved, |
ctx->render_condition_mode_saved); |
} |
void cso_set_geometry_shader_handle(struct cso_context *ctx, void *handle) |
{ |
assert(ctx->has_geometry_shader || !handle); |
if (ctx->has_geometry_shader && ctx->geometry_shader != handle) { |
ctx->geometry_shader = handle; |
ctx->pipe->bind_gs_state(ctx->pipe, handle); |
} |
} |
void cso_delete_geometry_shader(struct cso_context *ctx, void *handle) |
{ |
if (handle == ctx->geometry_shader) { |
/* unbind before deleting */ |
ctx->pipe->bind_gs_state(ctx->pipe, NULL); |
ctx->geometry_shader = NULL; |
} |
ctx->pipe->delete_gs_state(ctx->pipe, handle); |
} |
void cso_save_geometry_shader(struct cso_context *ctx) |
{ |
if (!ctx->has_geometry_shader) { |
return; |
} |
assert(!ctx->geometry_shader_saved); |
ctx->geometry_shader_saved = ctx->geometry_shader; |
} |
void cso_restore_geometry_shader(struct cso_context *ctx) |
{ |
if (!ctx->has_geometry_shader) { |
return; |
} |
if (ctx->geometry_shader_saved != ctx->geometry_shader) { |
ctx->pipe->bind_gs_state(ctx->pipe, ctx->geometry_shader_saved); |
ctx->geometry_shader = ctx->geometry_shader_saved; |
} |
ctx->geometry_shader_saved = NULL; |
} |
/* clip state */ |
static INLINE void |
clip_state_cpy(struct pipe_clip_state *dst, |
const struct pipe_clip_state *src) |
{ |
memcpy(dst->ucp, src->ucp, sizeof(dst->ucp)); |
} |
static INLINE int |
clip_state_cmp(const struct pipe_clip_state *a, |
const struct pipe_clip_state *b) |
{ |
return memcmp(a->ucp, b->ucp, sizeof(a->ucp)); |
} |
void |
cso_set_clip(struct cso_context *ctx, |
const struct pipe_clip_state *clip) |
{ |
if (clip_state_cmp(&ctx->clip, clip)) { |
clip_state_cpy(&ctx->clip, clip); |
ctx->pipe->set_clip_state(ctx->pipe, clip); |
} |
} |
void |
cso_save_clip(struct cso_context *ctx) |
{ |
clip_state_cpy(&ctx->clip_saved, &ctx->clip); |
} |
void |
cso_restore_clip(struct cso_context *ctx) |
{ |
if (clip_state_cmp(&ctx->clip, &ctx->clip_saved)) { |
clip_state_cpy(&ctx->clip, &ctx->clip_saved); |
ctx->pipe->set_clip_state(ctx->pipe, &ctx->clip_saved); |
} |
} |
enum pipe_error |
cso_set_vertex_elements(struct cso_context *ctx, |
unsigned count, |
const struct pipe_vertex_element *states) |
{ |
struct u_vbuf *vbuf = ctx->vbuf; |
unsigned key_size, hash_key; |
struct cso_hash_iter iter; |
void *handle; |
struct cso_velems_state velems_state; |
if (vbuf) { |
u_vbuf_set_vertex_elements(vbuf, count, states); |
return PIPE_OK; |
} |
/* Need to include the count into the stored state data too. |
* Otherwise first few count pipe_vertex_elements could be identical |
* even if count is different, and there's no guarantee the hash would |
* be different in that case neither. |
*/ |
key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned); |
velems_state.count = count; |
memcpy(velems_state.velems, states, |
sizeof(struct pipe_vertex_element) * count); |
hash_key = cso_construct_key((void*)&velems_state, key_size); |
iter = cso_find_state_template(ctx->cache, hash_key, CSO_VELEMENTS, |
(void*)&velems_state, key_size); |
if (cso_hash_iter_is_null(iter)) { |
struct cso_velements *cso = MALLOC(sizeof(struct cso_velements)); |
if (!cso) |
return PIPE_ERROR_OUT_OF_MEMORY; |
memcpy(&cso->state, &velems_state, key_size); |
cso->data = ctx->pipe->create_vertex_elements_state(ctx->pipe, count, |
&cso->state.velems[0]); |
cso->delete_state = |
(cso_state_callback) ctx->pipe->delete_vertex_elements_state; |
cso->context = ctx->pipe; |
iter = cso_insert_state(ctx->cache, hash_key, CSO_VELEMENTS, cso); |
if (cso_hash_iter_is_null(iter)) { |
FREE(cso); |
return PIPE_ERROR_OUT_OF_MEMORY; |
} |
handle = cso->data; |
} |
else { |
handle = ((struct cso_velements *)cso_hash_iter_data(iter))->data; |
} |
if (ctx->velements != handle) { |
ctx->velements = handle; |
ctx->pipe->bind_vertex_elements_state(ctx->pipe, handle); |
} |
return PIPE_OK; |
} |
void cso_save_vertex_elements(struct cso_context *ctx) |
{ |
struct u_vbuf *vbuf = ctx->vbuf; |
if (vbuf) { |
u_vbuf_save_vertex_elements(vbuf); |
return; |
} |
assert(!ctx->velements_saved); |
ctx->velements_saved = ctx->velements; |
} |
void cso_restore_vertex_elements(struct cso_context *ctx) |
{ |
struct u_vbuf *vbuf = ctx->vbuf; |
if (vbuf) { |
u_vbuf_restore_vertex_elements(vbuf); |
return; |
} |
if (ctx->velements != ctx->velements_saved) { |
ctx->velements = ctx->velements_saved; |
ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->velements_saved); |
} |
ctx->velements_saved = NULL; |
} |
/* vertex buffers */ |
void cso_set_vertex_buffers(struct cso_context *ctx, |
unsigned start_slot, unsigned count, |
const struct pipe_vertex_buffer *buffers) |
{ |
struct u_vbuf *vbuf = ctx->vbuf; |
if (vbuf) { |
u_vbuf_set_vertex_buffers(vbuf, start_slot, count, buffers); |
return; |
} |
/* Save what's in the auxiliary slot, so that we can save and restore it |
* for meta ops. */ |
if (start_slot <= ctx->aux_vertex_buffer_index && |
start_slot+count > ctx->aux_vertex_buffer_index) { |
if (buffers) { |
const struct pipe_vertex_buffer *vb = |
buffers + (ctx->aux_vertex_buffer_index - start_slot); |
pipe_resource_reference(&ctx->aux_vertex_buffer_current.buffer, |
vb->buffer); |
memcpy(&ctx->aux_vertex_buffer_current, vb, |
sizeof(struct pipe_vertex_buffer)); |
} |
else { |
pipe_resource_reference(&ctx->aux_vertex_buffer_current.buffer, |
NULL); |
ctx->aux_vertex_buffer_current.user_buffer = NULL; |
} |
} |
ctx->pipe->set_vertex_buffers(ctx->pipe, start_slot, count, buffers); |
} |
void cso_save_aux_vertex_buffer_slot(struct cso_context *ctx) |
{ |
struct u_vbuf *vbuf = ctx->vbuf; |
if (vbuf) { |
u_vbuf_save_aux_vertex_buffer_slot(vbuf); |
return; |
} |
pipe_resource_reference(&ctx->aux_vertex_buffer_saved.buffer, |
ctx->aux_vertex_buffer_current.buffer); |
memcpy(&ctx->aux_vertex_buffer_saved, &ctx->aux_vertex_buffer_current, |
sizeof(struct pipe_vertex_buffer)); |
} |
void cso_restore_aux_vertex_buffer_slot(struct cso_context *ctx) |
{ |
struct u_vbuf *vbuf = ctx->vbuf; |
if (vbuf) { |
u_vbuf_restore_aux_vertex_buffer_slot(vbuf); |
return; |
} |
cso_set_vertex_buffers(ctx, ctx->aux_vertex_buffer_index, 1, |
&ctx->aux_vertex_buffer_saved); |
pipe_resource_reference(&ctx->aux_vertex_buffer_saved.buffer, NULL); |
} |
unsigned cso_get_aux_vertex_buffer_slot(struct cso_context *ctx) |
{ |
return ctx->aux_vertex_buffer_index; |
} |
/**************** fragment/vertex sampler view state *************************/ |
static enum pipe_error |
single_sampler(struct cso_context *ctx, |
struct sampler_info *info, |
unsigned idx, |
const struct pipe_sampler_state *templ) |
{ |
void *handle = NULL; |
if (templ != NULL) { |
unsigned key_size = sizeof(struct pipe_sampler_state); |
unsigned hash_key = cso_construct_key((void*)templ, key_size); |
struct cso_hash_iter iter = |
cso_find_state_template(ctx->cache, |
hash_key, CSO_SAMPLER, |
(void *) templ, key_size); |
if (cso_hash_iter_is_null(iter)) { |
struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); |
if (!cso) |
return PIPE_ERROR_OUT_OF_MEMORY; |
memcpy(&cso->state, templ, sizeof(*templ)); |
cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state); |
cso->delete_state = |
(cso_state_callback) ctx->pipe->delete_sampler_state; |
cso->context = ctx->pipe; |
iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso); |
if (cso_hash_iter_is_null(iter)) { |
FREE(cso); |
return PIPE_ERROR_OUT_OF_MEMORY; |
} |
handle = cso->data; |
} |
else { |
handle = ((struct cso_sampler *)cso_hash_iter_data(iter))->data; |
} |
} |
info->samplers[idx] = handle; |
return PIPE_OK; |
} |
enum pipe_error |
cso_single_sampler(struct cso_context *ctx, |
unsigned shader_stage, |
unsigned idx, |
const struct pipe_sampler_state *templ) |
{ |
return single_sampler(ctx, &ctx->samplers[shader_stage], idx, templ); |
} |
static void |
single_sampler_done(struct cso_context *ctx, unsigned shader_stage) |
{ |
struct sampler_info *info = &ctx->samplers[shader_stage]; |
unsigned i; |
/* find highest non-null sampler */ |
for (i = PIPE_MAX_SAMPLERS; i > 0; i--) { |
if (info->samplers[i - 1] != NULL) |
break; |
} |
info->nr_samplers = i; |
if (info->hw.nr_samplers != info->nr_samplers || |
memcmp(info->hw.samplers, |
info->samplers, |
info->nr_samplers * sizeof(void *)) != 0) |
{ |
memcpy(info->hw.samplers, |
info->samplers, |
info->nr_samplers * sizeof(void *)); |
info->hw.nr_samplers = info->nr_samplers; |
switch (shader_stage) { |
case PIPE_SHADER_FRAGMENT: |
ctx->pipe->bind_fragment_sampler_states(ctx->pipe, |
info->nr_samplers, |
info->samplers); |
break; |
case PIPE_SHADER_VERTEX: |
ctx->pipe->bind_vertex_sampler_states(ctx->pipe, |
info->nr_samplers, |
info->samplers); |
break; |
case PIPE_SHADER_GEOMETRY: |
ctx->pipe->bind_geometry_sampler_states(ctx->pipe, |
info->nr_samplers, |
info->samplers); |
break; |
default: |
assert(!"bad shader type in single_sampler_done()"); |
} |
} |
} |
void |
cso_single_sampler_done(struct cso_context *ctx, unsigned shader_stage) |
{ |
single_sampler_done(ctx, shader_stage); |
} |
/* |
* If the function encouters any errors it will return the |
* last one. Done to always try to set as many samplers |
* as possible. |
*/ |
enum pipe_error |
cso_set_samplers(struct cso_context *ctx, |
unsigned shader_stage, |
unsigned nr, |
const struct pipe_sampler_state **templates) |
{ |
struct sampler_info *info = &ctx->samplers[shader_stage]; |
unsigned i; |
enum pipe_error temp, error = PIPE_OK; |
/* TODO: fastpath |
*/ |
for (i = 0; i < nr; i++) { |
temp = single_sampler(ctx, info, i, templates[i]); |
if (temp != PIPE_OK) |
error = temp; |
} |
for ( ; i < info->nr_samplers; i++) { |
temp = single_sampler(ctx, info, i, NULL); |
if (temp != PIPE_OK) |
error = temp; |
} |
single_sampler_done(ctx, shader_stage); |
return error; |
} |
void |
cso_save_samplers(struct cso_context *ctx, unsigned shader_stage) |
{ |
struct sampler_info *info = &ctx->samplers[shader_stage]; |
info->nr_samplers_saved = info->nr_samplers; |
memcpy(info->samplers_saved, info->samplers, sizeof(info->samplers)); |
} |
void |
cso_restore_samplers(struct cso_context *ctx, unsigned shader_stage) |
{ |
struct sampler_info *info = &ctx->samplers[shader_stage]; |
info->nr_samplers = info->nr_samplers_saved; |
memcpy(info->samplers, info->samplers_saved, sizeof(info->samplers)); |
single_sampler_done(ctx, shader_stage); |
} |
void |
cso_set_sampler_views(struct cso_context *ctx, |
unsigned shader_stage, |
unsigned count, |
struct pipe_sampler_view **views) |
{ |
struct sampler_info *info = &ctx->samplers[shader_stage]; |
unsigned i; |
/* reference new views */ |
for (i = 0; i < count; i++) { |
pipe_sampler_view_reference(&info->views[i], views[i]); |
} |
/* unref extra old views, if any */ |
for (; i < info->nr_views; i++) { |
pipe_sampler_view_reference(&info->views[i], NULL); |
} |
info->nr_views = count; |
/* bind the new sampler views */ |
switch (shader_stage) { |
case PIPE_SHADER_FRAGMENT: |
ctx->pipe->set_fragment_sampler_views(ctx->pipe, count, info->views); |
break; |
case PIPE_SHADER_VERTEX: |
ctx->pipe->set_vertex_sampler_views(ctx->pipe, count, info->views); |
break; |
case PIPE_SHADER_GEOMETRY: |
ctx->pipe->set_geometry_sampler_views(ctx->pipe, count, info->views); |
break; |
default: |
assert(!"bad shader type in cso_set_sampler_views()"); |
} |
} |
void |
cso_save_sampler_views(struct cso_context *ctx, unsigned shader_stage) |
{ |
struct sampler_info *info = &ctx->samplers[shader_stage]; |
unsigned i; |
info->nr_views_saved = info->nr_views; |
for (i = 0; i < info->nr_views; i++) { |
assert(!info->views_saved[i]); |
pipe_sampler_view_reference(&info->views_saved[i], info->views[i]); |
} |
} |
void |
cso_restore_sampler_views(struct cso_context *ctx, unsigned shader_stage) |
{ |
struct sampler_info *info = &ctx->samplers[shader_stage]; |
unsigned i, nr_saved = info->nr_views_saved; |
for (i = 0; i < nr_saved; i++) { |
pipe_sampler_view_reference(&info->views[i], NULL); |
/* move the reference from one pointer to another */ |
info->views[i] = info->views_saved[i]; |
info->views_saved[i] = NULL; |
} |
for (; i < info->nr_views; i++) { |
pipe_sampler_view_reference(&info->views[i], NULL); |
} |
/* bind the old/saved sampler views */ |
switch (shader_stage) { |
case PIPE_SHADER_FRAGMENT: |
ctx->pipe->set_fragment_sampler_views(ctx->pipe, nr_saved, info->views); |
break; |
case PIPE_SHADER_VERTEX: |
ctx->pipe->set_vertex_sampler_views(ctx->pipe, nr_saved, info->views); |
break; |
case PIPE_SHADER_GEOMETRY: |
ctx->pipe->set_geometry_sampler_views(ctx->pipe, nr_saved, info->views); |
break; |
default: |
assert(!"bad shader type in cso_restore_sampler_views()"); |
} |
info->nr_views = nr_saved; |
info->nr_views_saved = 0; |
} |
void |
cso_set_stream_outputs(struct cso_context *ctx, |
unsigned num_targets, |
struct pipe_stream_output_target **targets, |
unsigned append_bitmask) |
{ |
struct pipe_context *pipe = ctx->pipe; |
uint i; |
if (!ctx->has_streamout) { |
assert(num_targets == 0); |
return; |
} |
if (ctx->nr_so_targets == 0 && num_targets == 0) { |
/* Nothing to do. */ |
return; |
} |
/* reference new targets */ |
for (i = 0; i < num_targets; i++) { |
pipe_so_target_reference(&ctx->so_targets[i], targets[i]); |
} |
/* unref extra old targets, if any */ |
for (; i < ctx->nr_so_targets; i++) { |
pipe_so_target_reference(&ctx->so_targets[i], NULL); |
} |
pipe->set_stream_output_targets(pipe, num_targets, targets, |
append_bitmask); |
ctx->nr_so_targets = num_targets; |
} |
void |
cso_save_stream_outputs(struct cso_context *ctx) |
{ |
uint i; |
if (!ctx->has_streamout) { |
return; |
} |
ctx->nr_so_targets_saved = ctx->nr_so_targets; |
for (i = 0; i < ctx->nr_so_targets; i++) { |
assert(!ctx->so_targets_saved[i]); |
pipe_so_target_reference(&ctx->so_targets_saved[i], ctx->so_targets[i]); |
} |
} |
void |
cso_restore_stream_outputs(struct cso_context *ctx) |
{ |
struct pipe_context *pipe = ctx->pipe; |
uint i; |
if (!ctx->has_streamout) { |
return; |
} |
if (ctx->nr_so_targets == 0 && ctx->nr_so_targets_saved == 0) { |
/* Nothing to do. */ |
return; |
} |
for (i = 0; i < ctx->nr_so_targets_saved; i++) { |
pipe_so_target_reference(&ctx->so_targets[i], NULL); |
/* move the reference from one pointer to another */ |
ctx->so_targets[i] = ctx->so_targets_saved[i]; |
ctx->so_targets_saved[i] = NULL; |
} |
for (; i < ctx->nr_so_targets; i++) { |
pipe_so_target_reference(&ctx->so_targets[i], NULL); |
} |
/* ~0 means append */ |
pipe->set_stream_output_targets(pipe, ctx->nr_so_targets_saved, |
ctx->so_targets, ~0); |
ctx->nr_so_targets = ctx->nr_so_targets_saved; |
ctx->nr_so_targets_saved = 0; |
} |
/* constant buffers */ |
void |
cso_set_constant_buffer(struct cso_context *cso, unsigned shader_stage, |
unsigned index, struct pipe_constant_buffer *cb) |
{ |
struct pipe_context *pipe = cso->pipe; |
pipe->set_constant_buffer(pipe, shader_stage, index, cb); |
if (index == 0) { |
util_copy_constant_buffer(&cso->aux_constbuf_current[shader_stage], cb); |
} |
} |
void |
cso_set_constant_buffer_resource(struct cso_context *cso, |
unsigned shader_stage, |
unsigned index, |
struct pipe_resource *buffer) |
{ |
if (buffer) { |
struct pipe_constant_buffer cb; |
cb.buffer = buffer; |
cb.buffer_offset = 0; |
cb.buffer_size = buffer->width0; |
cb.user_buffer = NULL; |
cso_set_constant_buffer(cso, shader_stage, index, &cb); |
} else { |
cso_set_constant_buffer(cso, shader_stage, index, NULL); |
} |
} |
void |
cso_save_constant_buffer_slot0(struct cso_context *cso, |
unsigned shader_stage) |
{ |
util_copy_constant_buffer(&cso->aux_constbuf_saved[shader_stage], |
&cso->aux_constbuf_current[shader_stage]); |
} |
void |
cso_restore_constant_buffer_slot0(struct cso_context *cso, |
unsigned shader_stage) |
{ |
cso_set_constant_buffer(cso, shader_stage, 0, |
&cso->aux_constbuf_saved[shader_stage]); |
pipe_resource_reference(&cso->aux_constbuf_saved[shader_stage].buffer, |
NULL); |
} |
/* drawing */ |
void |
cso_set_index_buffer(struct cso_context *cso, |
const struct pipe_index_buffer *ib) |
{ |
struct u_vbuf *vbuf = cso->vbuf; |
if (vbuf) { |
u_vbuf_set_index_buffer(vbuf, ib); |
} else { |
struct pipe_context *pipe = cso->pipe; |
pipe->set_index_buffer(pipe, ib); |
} |
} |
void |
cso_draw_vbo(struct cso_context *cso, |
const struct pipe_draw_info *info) |
{ |
struct u_vbuf *vbuf = cso->vbuf; |
if (vbuf) { |
u_vbuf_draw_vbo(vbuf, info); |
} else { |
struct pipe_context *pipe = cso->pipe; |
pipe->draw_vbo(pipe, info); |
} |
} |
void |
cso_draw_arrays(struct cso_context *cso, uint mode, uint start, uint count) |
{ |
struct pipe_draw_info info; |
util_draw_init_info(&info); |
info.mode = mode; |
info.start = start; |
info.count = count; |
info.min_index = start; |
info.max_index = start + count - 1; |
cso_draw_vbo(cso, &info); |
} |
/drivers/video/Gallium/auxiliary/cso_cache/cso_context.h |
---|
0,0 → 1,239 |
/************************************************************************** |
* |
* Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
#ifndef CSO_CONTEXT_H |
#define CSO_CONTEXT_H |
#include "pipe/p_context.h" |
#include "pipe/p_state.h" |
#include "pipe/p_defines.h" |
#ifdef __cplusplus |
extern "C" { |
#endif |
struct cso_context; |
struct u_vbuf; |
struct cso_context *cso_create_context( struct pipe_context *pipe ); |
void cso_release_all( struct cso_context *ctx ); |
void cso_destroy_context( struct cso_context *cso ); |
enum pipe_error cso_set_blend( struct cso_context *cso, |
const struct pipe_blend_state *blend ); |
void cso_save_blend(struct cso_context *cso); |
void cso_restore_blend(struct cso_context *cso); |
enum pipe_error cso_set_depth_stencil_alpha( struct cso_context *cso, |
const struct pipe_depth_stencil_alpha_state *dsa ); |
void cso_save_depth_stencil_alpha(struct cso_context *cso); |
void cso_restore_depth_stencil_alpha(struct cso_context *cso); |
enum pipe_error cso_set_rasterizer( struct cso_context *cso, |
const struct pipe_rasterizer_state *rasterizer ); |
void cso_save_rasterizer(struct cso_context *cso); |
void cso_restore_rasterizer(struct cso_context *cso); |
enum pipe_error |
cso_set_samplers(struct cso_context *cso, |
unsigned shader_stage, |
unsigned count, |
const struct pipe_sampler_state **states); |
void |
cso_save_samplers(struct cso_context *cso, unsigned shader_stage); |
void |
cso_restore_samplers(struct cso_context *cso, unsigned shader_stage); |
/* Alternate interface to support state trackers that like to modify |
* samplers one at a time: |
*/ |
enum pipe_error |
cso_single_sampler(struct cso_context *cso, |
unsigned shader_stage, |
unsigned count, |
const struct pipe_sampler_state *states); |
void |
cso_single_sampler_done(struct cso_context *cso, unsigned shader_stage); |
enum pipe_error cso_set_vertex_elements(struct cso_context *ctx, |
unsigned count, |
const struct pipe_vertex_element *states); |
void cso_save_vertex_elements(struct cso_context *ctx); |
void cso_restore_vertex_elements(struct cso_context *ctx); |
void cso_set_vertex_buffers(struct cso_context *ctx, |
unsigned start_slot, unsigned count, |
const struct pipe_vertex_buffer *buffers); |
/* One vertex buffer slot is provided with the save/restore functionality. |
* cso_context chooses the slot, it can be non-zero. */ |
void cso_save_aux_vertex_buffer_slot(struct cso_context *ctx); |
void cso_restore_aux_vertex_buffer_slot(struct cso_context *ctx); |
unsigned cso_get_aux_vertex_buffer_slot(struct cso_context *ctx); |
void cso_set_stream_outputs(struct cso_context *ctx, |
unsigned num_targets, |
struct pipe_stream_output_target **targets, |
unsigned append_bitmask); |
void cso_save_stream_outputs(struct cso_context *ctx); |
void cso_restore_stream_outputs(struct cso_context *ctx); |
/* |
* We don't provide shader caching in CSO. Most of the time the api provides |
* object semantics for shaders anyway, and the cases where it doesn't |
* (eg mesa's internally-generated texenv programs), it will be up to |
* the state tracker to implement their own specialized caching. |
*/ |
void cso_set_fragment_shader_handle(struct cso_context *ctx, void *handle); |
void cso_delete_fragment_shader(struct cso_context *ctx, void *handle ); |
void cso_save_fragment_shader(struct cso_context *cso); |
void cso_restore_fragment_shader(struct cso_context *cso); |
void cso_set_vertex_shader_handle(struct cso_context *ctx, void *handle); |
void cso_delete_vertex_shader(struct cso_context *ctx, void *handle ); |
void cso_save_vertex_shader(struct cso_context *cso); |
void cso_restore_vertex_shader(struct cso_context *cso); |
void cso_set_geometry_shader_handle(struct cso_context *ctx, void *handle); |
void cso_delete_geometry_shader(struct cso_context *ctx, void *handle); |
void cso_save_geometry_shader(struct cso_context *cso); |
void cso_restore_geometry_shader(struct cso_context *cso); |
void cso_set_framebuffer(struct cso_context *cso, |
const struct pipe_framebuffer_state *fb); |
void cso_save_framebuffer(struct cso_context *cso); |
void cso_restore_framebuffer(struct cso_context *cso); |
void cso_set_viewport(struct cso_context *cso, |
const struct pipe_viewport_state *vp); |
void cso_save_viewport(struct cso_context *cso); |
void cso_restore_viewport(struct cso_context *cso); |
void cso_set_blend_color(struct cso_context *cso, |
const struct pipe_blend_color *bc); |
void cso_set_sample_mask(struct cso_context *cso, unsigned stencil_mask); |
void cso_save_sample_mask(struct cso_context *ctx); |
void cso_restore_sample_mask(struct cso_context *ctx); |
void cso_set_stencil_ref(struct cso_context *cso, |
const struct pipe_stencil_ref *sr); |
void cso_save_stencil_ref(struct cso_context *cso); |
void cso_restore_stencil_ref(struct cso_context *cso); |
void cso_set_render_condition(struct cso_context *cso, |
struct pipe_query *query, |
boolean condition, uint mode); |
void cso_save_render_condition(struct cso_context *cso); |
void cso_restore_render_condition(struct cso_context *cso); |
/* clip state */ |
void |
cso_set_clip(struct cso_context *cso, |
const struct pipe_clip_state *clip); |
void |
cso_save_clip(struct cso_context *cso); |
void |
cso_restore_clip(struct cso_context *cso); |
/* sampler view state */ |
void |
cso_set_sampler_views(struct cso_context *cso, |
unsigned shader_stage, |
unsigned count, |
struct pipe_sampler_view **views); |
void |
cso_save_sampler_views(struct cso_context *cso, unsigned shader_stage); |
void |
cso_restore_sampler_views(struct cso_context *cso, unsigned shader_stage); |
/* constant buffers */ |
void cso_set_constant_buffer(struct cso_context *cso, unsigned shader_stage, |
unsigned index, struct pipe_constant_buffer *cb); |
void cso_set_constant_buffer_resource(struct cso_context *cso, |
unsigned shader_stage, |
unsigned index, |
struct pipe_resource *buffer); |
void cso_save_constant_buffer_slot0(struct cso_context *cso, |
unsigned shader_stage); |
void cso_restore_constant_buffer_slot0(struct cso_context *cso, |
unsigned shader_stage); |
/* drawing */ |
void |
cso_set_index_buffer(struct cso_context *cso, |
const struct pipe_index_buffer *ib); |
void |
cso_draw_vbo(struct cso_context *cso, |
const struct pipe_draw_info *info); |
/* helper drawing function */ |
void |
cso_draw_arrays(struct cso_context *cso, uint mode, uint start, uint count); |
#ifdef __cplusplus |
} |
#endif |
#endif |
/drivers/video/Gallium/auxiliary/cso_cache/cso_hash.c |
---|
0,0 → 1,439 |
/************************************************************************** |
* |
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/* |
* Authors: |
* Zack Rusin <zack@tungstengraphics.com> |
*/ |
#include "util/u_debug.h" |
#include "util/u_memory.h" |
#include "cso_hash.h" |
#define MAX(a, b) ((a > b) ? (a) : (b)) |
static const int MinNumBits = 4; |
static const unsigned char prime_deltas[] = { |
0, 0, 1, 3, 1, 5, 3, 3, 1, 9, 7, 5, 3, 9, 25, 3, |
1, 21, 3, 21, 7, 15, 9, 5, 3, 29, 15, 0, 0, 0, 0, 0 |
}; |
static int primeForNumBits(int numBits) |
{ |
return (1 << numBits) + prime_deltas[numBits]; |
} |
/* |
Returns the smallest integer n such that |
primeForNumBits(n) >= hint. |
*/ |
static int countBits(int hint) |
{ |
int numBits = 0; |
int bits = hint; |
while (bits > 1) { |
bits >>= 1; |
numBits++; |
} |
if (numBits >= (int)sizeof(prime_deltas)) { |
numBits = sizeof(prime_deltas) - 1; |
} else if (primeForNumBits(numBits) < hint) { |
++numBits; |
} |
return numBits; |
} |
struct cso_node { |
struct cso_node *next; |
unsigned key; |
void *value; |
}; |
struct cso_hash_data { |
struct cso_node *fakeNext; |
struct cso_node **buckets; |
int size; |
int nodeSize; |
short userNumBits; |
short numBits; |
int numBuckets; |
}; |
struct cso_hash { |
union { |
struct cso_hash_data *d; |
struct cso_node *e; |
} data; |
}; |
static void *cso_data_allocate_node(struct cso_hash_data *hash) |
{ |
return MALLOC(hash->nodeSize); |
} |
static void cso_free_node(struct cso_node *node) |
{ |
FREE(node); |
} |
static struct cso_node * |
cso_hash_create_node(struct cso_hash *hash, |
unsigned akey, void *avalue, |
struct cso_node **anextNode) |
{ |
struct cso_node *node = cso_data_allocate_node(hash->data.d); |
if (!node) |
return NULL; |
node->key = akey; |
node->value = avalue; |
node->next = (struct cso_node*)(*anextNode); |
*anextNode = node; |
++hash->data.d->size; |
return node; |
} |
static void cso_data_rehash(struct cso_hash_data *hash, int hint) |
{ |
if (hint < 0) { |
hint = countBits(-hint); |
if (hint < MinNumBits) |
hint = MinNumBits; |
hash->userNumBits = (short)hint; |
while (primeForNumBits(hint) < (hash->size >> 1)) |
++hint; |
} else if (hint < MinNumBits) { |
hint = MinNumBits; |
} |
if (hash->numBits != hint) { |
struct cso_node *e = (struct cso_node *)(hash); |
struct cso_node **oldBuckets = hash->buckets; |
int oldNumBuckets = hash->numBuckets; |
int i = 0; |
hash->numBits = (short)hint; |
hash->numBuckets = primeForNumBits(hint); |
hash->buckets = MALLOC(sizeof(struct cso_node*) * hash->numBuckets); |
for (i = 0; i < hash->numBuckets; ++i) |
hash->buckets[i] = e; |
for (i = 0; i < oldNumBuckets; ++i) { |
struct cso_node *firstNode = oldBuckets[i]; |
while (firstNode != e) { |
unsigned h = firstNode->key; |
struct cso_node *lastNode = firstNode; |
struct cso_node *afterLastNode; |
struct cso_node **beforeFirstNode; |
while (lastNode->next != e && lastNode->next->key == h) |
lastNode = lastNode->next; |
afterLastNode = lastNode->next; |
beforeFirstNode = &hash->buckets[h % hash->numBuckets]; |
while (*beforeFirstNode != e) |
beforeFirstNode = &(*beforeFirstNode)->next; |
lastNode->next = *beforeFirstNode; |
*beforeFirstNode = firstNode; |
firstNode = afterLastNode; |
} |
} |
FREE(oldBuckets); |
} |
} |
static void cso_data_might_grow(struct cso_hash_data *hash) |
{ |
if (hash->size >= hash->numBuckets) |
cso_data_rehash(hash, hash->numBits + 1); |
} |
static void cso_data_has_shrunk(struct cso_hash_data *hash) |
{ |
if (hash->size <= (hash->numBuckets >> 3) && |
hash->numBits > hash->userNumBits) { |
int max = MAX(hash->numBits-2, hash->userNumBits); |
cso_data_rehash(hash, max); |
} |
} |
static struct cso_node *cso_data_first_node(struct cso_hash_data *hash) |
{ |
struct cso_node *e = (struct cso_node *)(hash); |
struct cso_node **bucket = hash->buckets; |
int n = hash->numBuckets; |
while (n--) { |
if (*bucket != e) |
return *bucket; |
++bucket; |
} |
return e; |
} |
static struct cso_node **cso_hash_find_node(struct cso_hash *hash, unsigned akey) |
{ |
struct cso_node **node; |
if (hash->data.d->numBuckets) { |
node = (struct cso_node **)(&hash->data.d->buckets[akey % hash->data.d->numBuckets]); |
assert(*node == hash->data.e || (*node)->next); |
while (*node != hash->data.e && (*node)->key != akey) |
node = &(*node)->next; |
} else { |
node = (struct cso_node **)((const struct cso_node * const *)(&hash->data.e)); |
} |
return node; |
} |
struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, |
unsigned key, void *data) |
{ |
cso_data_might_grow(hash->data.d); |
{ |
struct cso_node **nextNode = cso_hash_find_node(hash, key); |
struct cso_node *node = cso_hash_create_node(hash, key, data, nextNode); |
if (!node) { |
struct cso_hash_iter null_iter = {hash, 0}; |
return null_iter; |
} |
{ |
struct cso_hash_iter iter = {hash, node}; |
return iter; |
} |
} |
} |
struct cso_hash * cso_hash_create(void) |
{ |
struct cso_hash *hash = MALLOC_STRUCT(cso_hash); |
if (!hash) |
return NULL; |
hash->data.d = MALLOC_STRUCT(cso_hash_data); |
if (!hash->data.d) { |
FREE(hash); |
return NULL; |
} |
hash->data.d->fakeNext = 0; |
hash->data.d->buckets = 0; |
hash->data.d->size = 0; |
hash->data.d->nodeSize = sizeof(struct cso_node); |
hash->data.d->userNumBits = (short)MinNumBits; |
hash->data.d->numBits = 0; |
hash->data.d->numBuckets = 0; |
return hash; |
} |
void cso_hash_delete(struct cso_hash *hash) |
{ |
struct cso_node *e_for_x = (struct cso_node *)(hash->data.d); |
struct cso_node **bucket = (struct cso_node **)(hash->data.d->buckets); |
int n = hash->data.d->numBuckets; |
while (n--) { |
struct cso_node *cur = *bucket++; |
while (cur != e_for_x) { |
struct cso_node *next = cur->next; |
cso_free_node(cur); |
cur = next; |
} |
} |
FREE(hash->data.d->buckets); |
FREE(hash->data.d); |
FREE(hash); |
} |
struct cso_hash_iter cso_hash_find(struct cso_hash *hash, |
unsigned key) |
{ |
struct cso_node **nextNode = cso_hash_find_node(hash, key); |
struct cso_hash_iter iter = {hash, *nextNode}; |
return iter; |
} |
unsigned cso_hash_iter_key(struct cso_hash_iter iter) |
{ |
if (!iter.node || iter.hash->data.e == iter.node) |
return 0; |
return iter.node->key; |
} |
void * cso_hash_iter_data(struct cso_hash_iter iter) |
{ |
if (!iter.node || iter.hash->data.e == iter.node) |
return 0; |
return iter.node->value; |
} |
static struct cso_node *cso_hash_data_next(struct cso_node *node) |
{ |
union { |
struct cso_node *next; |
struct cso_node *e; |
struct cso_hash_data *d; |
} a; |
int start; |
struct cso_node **bucket; |
int n; |
a.next = node->next; |
if (!a.next) { |
debug_printf("iterating beyond the last element\n"); |
return 0; |
} |
if (a.next->next) |
return a.next; |
start = (node->key % a.d->numBuckets) + 1; |
bucket = a.d->buckets + start; |
n = a.d->numBuckets - start; |
while (n--) { |
if (*bucket != a.e) |
return *bucket; |
++bucket; |
} |
return a.e; |
} |
static struct cso_node *cso_hash_data_prev(struct cso_node *node) |
{ |
union { |
struct cso_node *e; |
struct cso_hash_data *d; |
} a; |
int start; |
struct cso_node *sentinel; |
struct cso_node **bucket; |
a.e = node; |
while (a.e->next) |
a.e = a.e->next; |
if (node == a.e) |
start = a.d->numBuckets - 1; |
else |
start = node->key % a.d->numBuckets; |
sentinel = node; |
bucket = a.d->buckets + start; |
while (start >= 0) { |
if (*bucket != sentinel) { |
struct cso_node *prev = *bucket; |
while (prev->next != sentinel) |
prev = prev->next; |
return prev; |
} |
sentinel = a.e; |
--bucket; |
--start; |
} |
debug_printf("iterating backward beyond first element\n"); |
return a.e; |
} |
struct cso_hash_iter cso_hash_iter_next(struct cso_hash_iter iter) |
{ |
struct cso_hash_iter next = {iter.hash, cso_hash_data_next(iter.node)}; |
return next; |
} |
int cso_hash_iter_is_null(struct cso_hash_iter iter) |
{ |
if (!iter.node || iter.node == iter.hash->data.e) |
return 1; |
return 0; |
} |
void * cso_hash_take(struct cso_hash *hash, |
unsigned akey) |
{ |
struct cso_node **node = cso_hash_find_node(hash, akey); |
if (*node != hash->data.e) { |
void *t = (*node)->value; |
struct cso_node *next = (*node)->next; |
cso_free_node(*node); |
*node = next; |
--hash->data.d->size; |
cso_data_has_shrunk(hash->data.d); |
return t; |
} |
return 0; |
} |
struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter) |
{ |
struct cso_hash_iter prev = {iter.hash, |
cso_hash_data_prev(iter.node)}; |
return prev; |
} |
struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash) |
{ |
struct cso_hash_iter iter = {hash, cso_data_first_node(hash->data.d)}; |
return iter; |
} |
int cso_hash_size(struct cso_hash *hash) |
{ |
return hash->data.d->size; |
} |
struct cso_hash_iter cso_hash_erase(struct cso_hash *hash, struct cso_hash_iter iter) |
{ |
struct cso_hash_iter ret = iter; |
struct cso_node *node = iter.node; |
struct cso_node **node_ptr; |
if (node == hash->data.e) |
return iter; |
ret = cso_hash_iter_next(ret); |
node_ptr = (struct cso_node**)(&hash->data.d->buckets[node->key % hash->data.d->numBuckets]); |
while (*node_ptr != node) |
node_ptr = &(*node_ptr)->next; |
*node_ptr = node->next; |
cso_free_node(node); |
--hash->data.d->size; |
return ret; |
} |
boolean cso_hash_contains(struct cso_hash *hash, unsigned key) |
{ |
struct cso_node **node = cso_hash_find_node(hash, key); |
return (*node != hash->data.e); |
} |
/drivers/video/Gallium/auxiliary/cso_cache/cso_hash.h |
---|
0,0 → 1,129 |
/************************************************************************** |
* |
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/** |
* @file |
* Hash table implementation. |
* |
* This file provides a hash implementation that is capable of dealing |
* with collisions. It stores colliding entries in linked list. All |
* functions operating on the hash return an iterator. The iterator |
* itself points to the collision list. If there wasn't any collision |
* the list will have just one entry, otherwise client code should |
* iterate over the entries to find the exact entry among ones that |
* had the same key (e.g. memcmp could be used on the data to check |
* that) |
* |
* @author Zack Rusin <zack@tungstengraphics.com> |
*/ |
#ifndef CSO_HASH_H |
#define CSO_HASH_H |
#include "pipe/p_compiler.h" |
#ifdef __cplusplus |
extern "C" { |
#endif |
struct cso_hash; |
struct cso_node; |
struct cso_hash_iter { |
struct cso_hash *hash; |
struct cso_node *node; |
}; |
struct cso_hash *cso_hash_create(void); |
void cso_hash_delete(struct cso_hash *hash); |
int cso_hash_size(struct cso_hash *hash); |
/** |
* Adds a data with the given key to the hash. If entry with the given |
* key is already in the hash, this current entry is instered before it |
* in the collision list. |
* Function returns iterator pointing to the inserted item in the hash. |
*/ |
struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, unsigned key, |
void *data); |
/** |
* Removes the item pointed to by the current iterator from the hash. |
* Note that the data itself is not erased and if it was a malloc'ed pointer |
* it will have to be freed after calling this function by the callee. |
* Function returns iterator pointing to the item after the removed one in |
* the hash. |
*/ |
struct cso_hash_iter cso_hash_erase(struct cso_hash *hash, struct cso_hash_iter iter); |
void *cso_hash_take(struct cso_hash *hash, unsigned key); |
struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash); |
/** |
* Return an iterator pointing to the first entry in the collision list. |
*/ |
struct cso_hash_iter cso_hash_find(struct cso_hash *hash, unsigned key); |
/** |
* Returns true if a value with the given key exists in the hash |
*/ |
boolean cso_hash_contains(struct cso_hash *hash, unsigned key); |
int cso_hash_iter_is_null(struct cso_hash_iter iter); |
unsigned cso_hash_iter_key(struct cso_hash_iter iter); |
void *cso_hash_iter_data(struct cso_hash_iter iter); |
struct cso_hash_iter cso_hash_iter_next(struct cso_hash_iter iter); |
struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter); |
/** |
* Convenience routine to iterate over the collision list while doing a memory |
* comparison to see which entry in the list is a direct copy of our template |
* and returns that entry. |
*/ |
void *cso_hash_find_data_from_template( struct cso_hash *hash, |
unsigned hash_key, |
void *templ, |
int size ); |
#ifdef __cplusplus |
} |
#endif |
#endif |
/drivers/video/Gallium/auxiliary/os/os_time.c |
---|
35,14 → 35,8 |
#include "pipe/p_config.h" |
#if defined(PIPE_OS_UNIX) |
# include <time.h> /* timeval */ |
# include <sys/time.h> /* timeval */ |
#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) |
# include <windows.h> |
#else |
# error Unsupported OS |
#endif |
#include "os_time.h" |
50,32 → 44,8 |
int64_t |
os_time_get_nano(void) |
{ |
#if defined(PIPE_OS_LINUX) |
struct timespec tv; |
clock_gettime(CLOCK_MONOTONIC, &tv); |
return tv.tv_nsec + tv.tv_sec*INT64_C(1000000000); |
#elif defined(PIPE_OS_UNIX) |
struct timeval tv; |
gettimeofday(&tv, NULL); |
return tv.tv_usec*INT64_C(1000) + tv.tv_sec*INT64_C(1000000000); |
#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) |
static LARGE_INTEGER frequency; |
LARGE_INTEGER counter; |
if(!frequency.QuadPart) |
QueryPerformanceFrequency(&frequency); |
QueryPerformanceCounter(&counter); |
return counter.QuadPart*INT64_C(1000000000)/frequency.QuadPart; |
#else |
#error Unsupported OS |
#endif |
} |
/drivers/video/Gallium/auxiliary/pipebuffer/pb_buffer.h |
---|
0,0 → 1,288 |
/************************************************************************** |
* |
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/** |
* \file |
* Generic code for buffers. |
* |
* Behind a pipe buffle handle there can be DMA buffers, client (or user) |
* buffers, regular malloced buffers, etc. This file provides an abstract base |
* buffer handle that allows the driver to cope with all those kinds of buffers |
* in a more flexible way. |
* |
* There is no obligation of a winsys driver to use this library. And a pipe |
* driver should be completly agnostic about it. |
* |
* \author Jose Fonseca <jrfonseca@tungstengraphics.com> |
*/ |
#ifndef PB_BUFFER_H_ |
#define PB_BUFFER_H_ |
#include "pipe/p_compiler.h" |
#include "util/u_debug.h" |
#include "util/u_inlines.h" |
#include "pipe/p_defines.h" |
#ifdef __cplusplus |
extern "C" { |
#endif |
struct pb_vtbl; |
struct pb_validate; |
struct pipe_fence_handle; |
#define PB_USAGE_CPU_READ (1 << 0) |
#define PB_USAGE_CPU_WRITE (1 << 1) |
#define PB_USAGE_GPU_READ (1 << 2) |
#define PB_USAGE_GPU_WRITE (1 << 3) |
#define PB_USAGE_UNSYNCHRONIZED (1 << 10) |
#define PB_USAGE_DONTBLOCK (1 << 9) |
#define PB_USAGE_CPU_READ_WRITE \ |
( PB_USAGE_CPU_READ | PB_USAGE_CPU_WRITE ) |
#define PB_USAGE_GPU_READ_WRITE \ |
( PB_USAGE_GPU_READ | PB_USAGE_GPU_WRITE ) |
#define PB_USAGE_WRITE \ |
( PB_USAGE_CPU_WRITE | PB_USAGE_GPU_WRITE ) |
/** |
* Buffer description. |
* |
* Used when allocating the buffer. |
*/ |
struct pb_desc |
{ |
unsigned alignment; |
unsigned usage; |
}; |
/** |
* Size. Regular (32bit) unsigned for now. |
*/ |
typedef unsigned pb_size; |
/** |
* Base class for all pb_* buffers. |
*/ |
struct pb_buffer |
{ |
struct pipe_reference reference; |
unsigned size; |
unsigned alignment; |
unsigned usage; |
/** |
* Pointer to the virtual function table. |
* |
* Avoid accessing this table directly. Use the inline functions below |
* instead to avoid mistakes. |
*/ |
const struct pb_vtbl *vtbl; |
}; |
/** |
* Virtual function table for the buffer storage operations. |
* |
* Note that creation is not done through this table. |
*/ |
struct pb_vtbl |
{ |
void (*destroy)( struct pb_buffer *buf ); |
/** |
* Map the entire data store of a buffer object into the client's address. |
* flags is bitmask of PB_USAGE_CPU_READ/WRITE. |
*/ |
void *(*map)( struct pb_buffer *buf, |
unsigned flags, void *flush_ctx ); |
void (*unmap)( struct pb_buffer *buf ); |
enum pipe_error (*validate)( struct pb_buffer *buf, |
struct pb_validate *vl, |
unsigned flags ); |
void (*fence)( struct pb_buffer *buf, |
struct pipe_fence_handle *fence ); |
/** |
* Get the base buffer and the offset. |
* |
* A buffer can be subdivided in smaller buffers. This method should return |
* the underlaying buffer, and the relative offset. |
* |
* Buffers without an underlaying base buffer should return themselves, with |
* a zero offset. |
* |
* Note that this will increase the reference count of the base buffer. |
*/ |
void (*get_base_buffer)( struct pb_buffer *buf, |
struct pb_buffer **base_buf, |
pb_size *offset ); |
}; |
/* Accessor functions for pb->vtbl: |
*/ |
static INLINE void * |
pb_map(struct pb_buffer *buf, |
unsigned flags, void *flush_ctx) |
{ |
assert(buf); |
if(!buf) |
return NULL; |
assert(pipe_is_referenced(&buf->reference)); |
return buf->vtbl->map(buf, flags, flush_ctx); |
} |
static INLINE void |
pb_unmap(struct pb_buffer *buf) |
{ |
assert(buf); |
if(!buf) |
return; |
assert(pipe_is_referenced(&buf->reference)); |
buf->vtbl->unmap(buf); |
} |
static INLINE void |
pb_get_base_buffer( struct pb_buffer *buf, |
struct pb_buffer **base_buf, |
pb_size *offset ) |
{ |
assert(buf); |
if(!buf) { |
base_buf = NULL; |
offset = 0; |
return; |
} |
assert(pipe_is_referenced(&buf->reference)); |
assert(buf->vtbl->get_base_buffer); |
buf->vtbl->get_base_buffer(buf, base_buf, offset); |
assert(*base_buf); |
assert(*offset < (*base_buf)->size); |
} |
static INLINE enum pipe_error |
pb_validate(struct pb_buffer *buf, struct pb_validate *vl, unsigned flags) |
{ |
assert(buf); |
if(!buf) |
return PIPE_ERROR; |
assert(buf->vtbl->validate); |
return buf->vtbl->validate(buf, vl, flags); |
} |
static INLINE void |
pb_fence(struct pb_buffer *buf, struct pipe_fence_handle *fence) |
{ |
assert(buf); |
if(!buf) |
return; |
assert(buf->vtbl->fence); |
buf->vtbl->fence(buf, fence); |
} |
static INLINE void |
pb_destroy(struct pb_buffer *buf) |
{ |
assert(buf); |
if(!buf) |
return; |
assert(!pipe_is_referenced(&buf->reference)); |
buf->vtbl->destroy(buf); |
} |
static INLINE void |
pb_reference(struct pb_buffer **dst, |
struct pb_buffer *src) |
{ |
struct pb_buffer *old = *dst; |
if (pipe_reference(&(*dst)->reference, &src->reference)) |
pb_destroy( old ); |
*dst = src; |
} |
/** |
* Utility function to check whether the provided alignment is consistent with |
* the requested or not. |
*/ |
static INLINE boolean |
pb_check_alignment(pb_size requested, pb_size provided) |
{ |
if(!requested) |
return TRUE; |
if(requested > provided) |
return FALSE; |
if(provided % requested != 0) |
return FALSE; |
return TRUE; |
} |
/** |
* Utility function to check whether the provided alignment is consistent with |
* the requested or not. |
*/ |
static INLINE boolean |
pb_check_usage(unsigned requested, unsigned provided) |
{ |
return (requested & provided) == requested ? TRUE : FALSE; |
} |
/** |
* Malloc-based buffer to store data that can't be used by the graphics |
* hardware. |
*/ |
struct pb_buffer * |
pb_malloc_buffer_create(pb_size size, |
const struct pb_desc *desc); |
#ifdef __cplusplus |
} |
#endif |
#endif /*PB_BUFFER_H_*/ |
/drivers/video/Gallium/auxiliary/pipebuffer/pb_buffer_fenced.c |
---|
0,0 → 1,1069 |
/************************************************************************** |
* |
* Copyright 2007-2010 VMware, Inc. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/** |
* \file |
* Implementation of fenced buffers. |
* |
* \author Jose Fonseca <jfonseca-at-vmware-dot-com> |
* \author Thomas Hellström <thellstrom-at-vmware-dot-com> |
*/ |
#include "pipe/p_config.h" |
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) |
#include <unistd.h> |
#include <sched.h> |
#endif |
#include "pipe/p_compiler.h" |
#include "pipe/p_defines.h" |
#include "util/u_debug.h" |
#include "os/os_thread.h" |
#include "util/u_memory.h" |
#include "util/u_double_list.h" |
#include "pb_buffer.h" |
#include "pb_buffer_fenced.h" |
#include "pb_bufmgr.h" |
/** |
* Convenience macro (type safe). |
*/ |
#define SUPER(__derived) (&(__derived)->base) |
struct fenced_manager |
{ |
struct pb_manager base; |
struct pb_manager *provider; |
struct pb_fence_ops *ops; |
/** |
* Maximum buffer size that can be safely allocated. |
*/ |
pb_size max_buffer_size; |
/** |
* Maximum cpu memory we can allocate before we start waiting for the |
* GPU to idle. |
*/ |
pb_size max_cpu_total_size; |
/** |
* Following members are mutable and protected by this mutex. |
*/ |
pipe_mutex mutex; |
/** |
* Fenced buffer list. |
* |
* All fenced buffers are placed in this listed, ordered from the oldest |
* fence to the newest fence. |
*/ |
struct list_head fenced; |
pb_size num_fenced; |
struct list_head unfenced; |
pb_size num_unfenced; |
/** |
* How much temporary CPU memory is being used to hold unvalidated buffers. |
*/ |
pb_size cpu_total_size; |
}; |
/** |
* Fenced buffer. |
* |
* Wrapper around a pipe buffer which adds fencing and reference counting. |
*/ |
struct fenced_buffer |
{ |
/* |
* Immutable members. |
*/ |
struct pb_buffer base; |
struct fenced_manager *mgr; |
/* |
* Following members are mutable and protected by fenced_manager::mutex. |
*/ |
struct list_head head; |
/** |
* Buffer with storage. |
*/ |
struct pb_buffer *buffer; |
pb_size size; |
struct pb_desc desc; |
/** |
* Temporary CPU storage data. Used when there isn't enough GPU memory to |
* store the buffer. |
*/ |
void *data; |
/** |
* A bitmask of PB_USAGE_CPU/GPU_READ/WRITE describing the current |
* buffer usage. |
*/ |
unsigned flags; |
unsigned mapcount; |
struct pb_validate *vl; |
unsigned validation_flags; |
struct pipe_fence_handle *fence; |
}; |
static INLINE struct fenced_manager * |
fenced_manager(struct pb_manager *mgr) |
{ |
assert(mgr); |
return (struct fenced_manager *)mgr; |
} |
static INLINE struct fenced_buffer * |
fenced_buffer(struct pb_buffer *buf) |
{ |
assert(buf); |
return (struct fenced_buffer *)buf; |
} |
static void |
fenced_buffer_destroy_cpu_storage_locked(struct fenced_buffer *fenced_buf); |
static enum pipe_error |
fenced_buffer_create_cpu_storage_locked(struct fenced_manager *fenced_mgr, |
struct fenced_buffer *fenced_buf); |
static void |
fenced_buffer_destroy_gpu_storage_locked(struct fenced_buffer *fenced_buf); |
static enum pipe_error |
fenced_buffer_create_gpu_storage_locked(struct fenced_manager *fenced_mgr, |
struct fenced_buffer *fenced_buf, |
boolean wait); |
static enum pipe_error |
fenced_buffer_copy_storage_to_gpu_locked(struct fenced_buffer *fenced_buf); |
static enum pipe_error |
fenced_buffer_copy_storage_to_cpu_locked(struct fenced_buffer *fenced_buf); |
/** |
* Dump the fenced buffer list. |
* |
* Useful to understand failures to allocate buffers. |
*/ |
static void |
fenced_manager_dump_locked(struct fenced_manager *fenced_mgr) |
{ |
#ifdef DEBUG |
struct pb_fence_ops *ops = fenced_mgr->ops; |
struct list_head *curr, *next; |
struct fenced_buffer *fenced_buf; |
debug_printf("%10s %7s %8s %7s %10s %s\n", |
"buffer", "size", "refcount", "storage", "fence", "signalled"); |
curr = fenced_mgr->unfenced.next; |
next = curr->next; |
while(curr != &fenced_mgr->unfenced) { |
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); |
assert(!fenced_buf->fence); |
debug_printf("%10p %7u %8u %7s\n", |
(void *) fenced_buf, |
fenced_buf->base.size, |
p_atomic_read(&fenced_buf->base.reference.count), |
fenced_buf->buffer ? "gpu" : (fenced_buf->data ? "cpu" : "none")); |
curr = next; |
next = curr->next; |
} |
curr = fenced_mgr->fenced.next; |
next = curr->next; |
while(curr != &fenced_mgr->fenced) { |
int signaled; |
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); |
assert(fenced_buf->buffer); |
signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); |
debug_printf("%10p %7u %8u %7s %10p %s\n", |
(void *) fenced_buf, |
fenced_buf->base.size, |
p_atomic_read(&fenced_buf->base.reference.count), |
"gpu", |
(void *) fenced_buf->fence, |
signaled == 0 ? "y" : "n"); |
curr = next; |
next = curr->next; |
} |
#else |
(void)fenced_mgr; |
#endif |
} |
static INLINE void |
fenced_buffer_destroy_locked(struct fenced_manager *fenced_mgr, |
struct fenced_buffer *fenced_buf) |
{ |
assert(!pipe_is_referenced(&fenced_buf->base.reference)); |
assert(!fenced_buf->fence); |
assert(fenced_buf->head.prev); |
assert(fenced_buf->head.next); |
LIST_DEL(&fenced_buf->head); |
assert(fenced_mgr->num_unfenced); |
--fenced_mgr->num_unfenced; |
fenced_buffer_destroy_gpu_storage_locked(fenced_buf); |
fenced_buffer_destroy_cpu_storage_locked(fenced_buf); |
FREE(fenced_buf); |
} |
/** |
* Add the buffer to the fenced list. |
* |
* Reference count should be incremented before calling this function. |
*/ |
static INLINE void |
fenced_buffer_add_locked(struct fenced_manager *fenced_mgr, |
struct fenced_buffer *fenced_buf) |
{ |
assert(pipe_is_referenced(&fenced_buf->base.reference)); |
assert(fenced_buf->flags & PB_USAGE_GPU_READ_WRITE); |
assert(fenced_buf->fence); |
p_atomic_inc(&fenced_buf->base.reference.count); |
LIST_DEL(&fenced_buf->head); |
assert(fenced_mgr->num_unfenced); |
--fenced_mgr->num_unfenced; |
LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->fenced); |
++fenced_mgr->num_fenced; |
} |
/** |
* Remove the buffer from the fenced list, and potentially destroy the buffer |
* if the reference count reaches zero. |
* |
* Returns TRUE if the buffer was detroyed. |
*/ |
static INLINE boolean |
fenced_buffer_remove_locked(struct fenced_manager *fenced_mgr, |
struct fenced_buffer *fenced_buf) |
{ |
struct pb_fence_ops *ops = fenced_mgr->ops; |
assert(fenced_buf->fence); |
assert(fenced_buf->mgr == fenced_mgr); |
ops->fence_reference(ops, &fenced_buf->fence, NULL); |
fenced_buf->flags &= ~PB_USAGE_GPU_READ_WRITE; |
assert(fenced_buf->head.prev); |
assert(fenced_buf->head.next); |
LIST_DEL(&fenced_buf->head); |
assert(fenced_mgr->num_fenced); |
--fenced_mgr->num_fenced; |
LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->unfenced); |
++fenced_mgr->num_unfenced; |
if (p_atomic_dec_zero(&fenced_buf->base.reference.count)) { |
fenced_buffer_destroy_locked(fenced_mgr, fenced_buf); |
return TRUE; |
} |
return FALSE; |
} |
/** |
* Wait for the fence to expire, and remove it from the fenced list. |
* |
* This function will release and re-aquire the mutex, so any copy of mutable |
* state must be discarded after calling it. |
*/ |
static INLINE enum pipe_error |
fenced_buffer_finish_locked(struct fenced_manager *fenced_mgr, |
struct fenced_buffer *fenced_buf) |
{ |
struct pb_fence_ops *ops = fenced_mgr->ops; |
enum pipe_error ret = PIPE_ERROR; |
#if 0 |
debug_warning("waiting for GPU"); |
#endif |
assert(pipe_is_referenced(&fenced_buf->base.reference)); |
assert(fenced_buf->fence); |
if(fenced_buf->fence) { |
struct pipe_fence_handle *fence = NULL; |
int finished; |
boolean proceed; |
ops->fence_reference(ops, &fence, fenced_buf->fence); |
pipe_mutex_unlock(fenced_mgr->mutex); |
finished = ops->fence_finish(ops, fenced_buf->fence, 0); |
pipe_mutex_lock(fenced_mgr->mutex); |
assert(pipe_is_referenced(&fenced_buf->base.reference)); |
/* |
* Only proceed if the fence object didn't change in the meanwhile. |
* Otherwise assume the work has been already carried out by another |
* thread that re-aquired the lock before us. |
*/ |
proceed = fence == fenced_buf->fence ? TRUE : FALSE; |
ops->fence_reference(ops, &fence, NULL); |
if(proceed && finished == 0) { |
/* |
* Remove from the fenced list |
*/ |
boolean destroyed; |
destroyed = fenced_buffer_remove_locked(fenced_mgr, fenced_buf); |
/* TODO: remove consequents buffers with the same fence? */ |
assert(!destroyed); |
fenced_buf->flags &= ~PB_USAGE_GPU_READ_WRITE; |
ret = PIPE_OK; |
} |
} |
return ret; |
} |
/** |
* Remove as many fenced buffers from the fenced list as possible. |
* |
* Returns TRUE if at least one buffer was removed. |
*/ |
static boolean |
fenced_manager_check_signalled_locked(struct fenced_manager *fenced_mgr, |
boolean wait) |
{ |
struct pb_fence_ops *ops = fenced_mgr->ops; |
struct list_head *curr, *next; |
struct fenced_buffer *fenced_buf; |
struct pipe_fence_handle *prev_fence = NULL; |
boolean ret = FALSE; |
curr = fenced_mgr->fenced.next; |
next = curr->next; |
while(curr != &fenced_mgr->fenced) { |
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); |
if(fenced_buf->fence != prev_fence) { |
int signaled; |
if (wait) { |
signaled = ops->fence_finish(ops, fenced_buf->fence, 0); |
/* |
* Don't return just now. Instead preemptively check if the |
* following buffers' fences already expired, without further waits. |
*/ |
wait = FALSE; |
} |
else { |
signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); |
} |
if (signaled != 0) { |
return ret; |
} |
prev_fence = fenced_buf->fence; |
} |
else { |
/* This buffer's fence object is identical to the previous buffer's |
* fence object, so no need to check the fence again. |
*/ |
assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0); |
} |
fenced_buffer_remove_locked(fenced_mgr, fenced_buf); |
ret = TRUE; |
curr = next; |
next = curr->next; |
} |
return ret; |
} |
/** |
* Try to free some GPU memory by backing it up into CPU memory. |
* |
* Returns TRUE if at least one buffer was freed. |
*/ |
static boolean |
fenced_manager_free_gpu_storage_locked(struct fenced_manager *fenced_mgr) |
{ |
struct list_head *curr, *next; |
struct fenced_buffer *fenced_buf; |
curr = fenced_mgr->unfenced.next; |
next = curr->next; |
while(curr != &fenced_mgr->unfenced) { |
fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); |
/* |
* We can only move storage if the buffer is not mapped and not |
* validated. |
*/ |
if(fenced_buf->buffer && |
!fenced_buf->mapcount && |
!fenced_buf->vl) { |
enum pipe_error ret; |
ret = fenced_buffer_create_cpu_storage_locked(fenced_mgr, fenced_buf); |
if(ret == PIPE_OK) { |
ret = fenced_buffer_copy_storage_to_cpu_locked(fenced_buf); |
if(ret == PIPE_OK) { |
fenced_buffer_destroy_gpu_storage_locked(fenced_buf); |
return TRUE; |
} |
fenced_buffer_destroy_cpu_storage_locked(fenced_buf); |
} |
} |
curr = next; |
next = curr->next; |
} |
return FALSE; |
} |
/** |
* Destroy CPU storage for this buffer. |
*/ |
static void |
fenced_buffer_destroy_cpu_storage_locked(struct fenced_buffer *fenced_buf) |
{ |
if(fenced_buf->data) { |
align_free(fenced_buf->data); |
fenced_buf->data = NULL; |
assert(fenced_buf->mgr->cpu_total_size >= fenced_buf->size); |
fenced_buf->mgr->cpu_total_size -= fenced_buf->size; |
} |
} |
/** |
* Create CPU storage for this buffer. |
*/ |
static enum pipe_error |
fenced_buffer_create_cpu_storage_locked(struct fenced_manager *fenced_mgr, |
struct fenced_buffer *fenced_buf) |
{ |
assert(!fenced_buf->data); |
if(fenced_buf->data) |
return PIPE_OK; |
if (fenced_mgr->cpu_total_size + fenced_buf->size > fenced_mgr->max_cpu_total_size) |
return PIPE_ERROR_OUT_OF_MEMORY; |
fenced_buf->data = align_malloc(fenced_buf->size, fenced_buf->desc.alignment); |
if(!fenced_buf->data) |
return PIPE_ERROR_OUT_OF_MEMORY; |
fenced_mgr->cpu_total_size += fenced_buf->size; |
return PIPE_OK; |
} |
/** |
* Destroy the GPU storage. |
*/ |
static void |
fenced_buffer_destroy_gpu_storage_locked(struct fenced_buffer *fenced_buf) |
{ |
if(fenced_buf->buffer) { |
pb_reference(&fenced_buf->buffer, NULL); |
} |
} |
/** |
* Try to create GPU storage for this buffer. |
* |
* This function is a shorthand around pb_manager::create_buffer for |
* fenced_buffer_create_gpu_storage_locked()'s benefit. |
*/ |
static INLINE boolean |
fenced_buffer_try_create_gpu_storage_locked(struct fenced_manager *fenced_mgr, |
struct fenced_buffer *fenced_buf) |
{ |
struct pb_manager *provider = fenced_mgr->provider; |
assert(!fenced_buf->buffer); |
fenced_buf->buffer = provider->create_buffer(fenced_mgr->provider, |
fenced_buf->size, |
&fenced_buf->desc); |
return fenced_buf->buffer ? TRUE : FALSE; |
} |
/** |
* Create GPU storage for this buffer. |
*/ |
static enum pipe_error |
fenced_buffer_create_gpu_storage_locked(struct fenced_manager *fenced_mgr, |
struct fenced_buffer *fenced_buf, |
boolean wait) |
{ |
assert(!fenced_buf->buffer); |
/* |
* Check for signaled buffers before trying to allocate. |
*/ |
fenced_manager_check_signalled_locked(fenced_mgr, FALSE); |
fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf); |
/* |
* Keep trying while there is some sort of progress: |
* - fences are expiring, |
* - or buffers are being being swapped out from GPU memory into CPU memory. |
*/ |
while(!fenced_buf->buffer && |
(fenced_manager_check_signalled_locked(fenced_mgr, FALSE) || |
fenced_manager_free_gpu_storage_locked(fenced_mgr))) { |
fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf); |
} |
if(!fenced_buf->buffer && wait) { |
/* |
* Same as before, but this time around, wait to free buffers if |
* necessary. |
*/ |
while(!fenced_buf->buffer && |
(fenced_manager_check_signalled_locked(fenced_mgr, TRUE) || |
fenced_manager_free_gpu_storage_locked(fenced_mgr))) { |
fenced_buffer_try_create_gpu_storage_locked(fenced_mgr, fenced_buf); |
} |
} |
if(!fenced_buf->buffer) { |
if(0) |
fenced_manager_dump_locked(fenced_mgr); |
/* give up */ |
return PIPE_ERROR_OUT_OF_MEMORY; |
} |
return PIPE_OK; |
} |
static enum pipe_error |
fenced_buffer_copy_storage_to_gpu_locked(struct fenced_buffer *fenced_buf) |
{ |
uint8_t *map; |
assert(fenced_buf->data); |
assert(fenced_buf->buffer); |
map = pb_map(fenced_buf->buffer, PB_USAGE_CPU_WRITE, NULL); |
if(!map) |
return PIPE_ERROR; |
memcpy(map, fenced_buf->data, fenced_buf->size); |
pb_unmap(fenced_buf->buffer); |
return PIPE_OK; |
} |
static enum pipe_error |
fenced_buffer_copy_storage_to_cpu_locked(struct fenced_buffer *fenced_buf) |
{ |
const uint8_t *map; |
assert(fenced_buf->data); |
assert(fenced_buf->buffer); |
map = pb_map(fenced_buf->buffer, PB_USAGE_CPU_READ, NULL); |
if(!map) |
return PIPE_ERROR; |
memcpy(fenced_buf->data, map, fenced_buf->size); |
pb_unmap(fenced_buf->buffer); |
return PIPE_OK; |
} |
static void |
fenced_buffer_destroy(struct pb_buffer *buf) |
{ |
struct fenced_buffer *fenced_buf = fenced_buffer(buf); |
struct fenced_manager *fenced_mgr = fenced_buf->mgr; |
assert(!pipe_is_referenced(&fenced_buf->base.reference)); |
pipe_mutex_lock(fenced_mgr->mutex); |
fenced_buffer_destroy_locked(fenced_mgr, fenced_buf); |
pipe_mutex_unlock(fenced_mgr->mutex); |
} |
static void * |
fenced_buffer_map(struct pb_buffer *buf, |
unsigned flags, void *flush_ctx) |
{ |
struct fenced_buffer *fenced_buf = fenced_buffer(buf); |
struct fenced_manager *fenced_mgr = fenced_buf->mgr; |
struct pb_fence_ops *ops = fenced_mgr->ops; |
void *map = NULL; |
pipe_mutex_lock(fenced_mgr->mutex); |
assert(!(flags & PB_USAGE_GPU_READ_WRITE)); |
/* |
* Serialize writes. |
*/ |
while((fenced_buf->flags & PB_USAGE_GPU_WRITE) || |
((fenced_buf->flags & PB_USAGE_GPU_READ) && |
(flags & PB_USAGE_CPU_WRITE))) { |
/* |
* Don't wait for the GPU to finish accessing it, if blocking is forbidden. |
*/ |
if((flags & PB_USAGE_DONTBLOCK) && |
ops->fence_signalled(ops, fenced_buf->fence, 0) != 0) { |
goto done; |
} |
if (flags & PB_USAGE_UNSYNCHRONIZED) { |
break; |
} |
/* |
* Wait for the GPU to finish accessing. This will release and re-acquire |
* the mutex, so all copies of mutable state must be discarded. |
*/ |
fenced_buffer_finish_locked(fenced_mgr, fenced_buf); |
} |
if(fenced_buf->buffer) { |
map = pb_map(fenced_buf->buffer, flags, flush_ctx); |
} |
else { |
assert(fenced_buf->data); |
map = fenced_buf->data; |
} |
if(map) { |
++fenced_buf->mapcount; |
fenced_buf->flags |= flags & PB_USAGE_CPU_READ_WRITE; |
} |
done: |
pipe_mutex_unlock(fenced_mgr->mutex); |
return map; |
} |
static void |
fenced_buffer_unmap(struct pb_buffer *buf) |
{ |
struct fenced_buffer *fenced_buf = fenced_buffer(buf); |
struct fenced_manager *fenced_mgr = fenced_buf->mgr; |
pipe_mutex_lock(fenced_mgr->mutex); |
assert(fenced_buf->mapcount); |
if(fenced_buf->mapcount) { |
if (fenced_buf->buffer) |
pb_unmap(fenced_buf->buffer); |
--fenced_buf->mapcount; |
if(!fenced_buf->mapcount) |
fenced_buf->flags &= ~PB_USAGE_CPU_READ_WRITE; |
} |
pipe_mutex_unlock(fenced_mgr->mutex); |
} |
static enum pipe_error |
fenced_buffer_validate(struct pb_buffer *buf, |
struct pb_validate *vl, |
unsigned flags) |
{ |
struct fenced_buffer *fenced_buf = fenced_buffer(buf); |
struct fenced_manager *fenced_mgr = fenced_buf->mgr; |
enum pipe_error ret; |
pipe_mutex_lock(fenced_mgr->mutex); |
if(!vl) { |
/* invalidate */ |
fenced_buf->vl = NULL; |
fenced_buf->validation_flags = 0; |
ret = PIPE_OK; |
goto done; |
} |
assert(flags & PB_USAGE_GPU_READ_WRITE); |
assert(!(flags & ~PB_USAGE_GPU_READ_WRITE)); |
flags &= PB_USAGE_GPU_READ_WRITE; |
/* Buffer cannot be validated in two different lists */ |
if(fenced_buf->vl && fenced_buf->vl != vl) { |
ret = PIPE_ERROR_RETRY; |
goto done; |
} |
if(fenced_buf->vl == vl && |
(fenced_buf->validation_flags & flags) == flags) { |
/* Nothing to do -- buffer already validated */ |
ret = PIPE_OK; |
goto done; |
} |
/* |
* Create and update GPU storage. |
*/ |
if(!fenced_buf->buffer) { |
assert(!fenced_buf->mapcount); |
ret = fenced_buffer_create_gpu_storage_locked(fenced_mgr, fenced_buf, TRUE); |
if(ret != PIPE_OK) { |
goto done; |
} |
ret = fenced_buffer_copy_storage_to_gpu_locked(fenced_buf); |
if(ret != PIPE_OK) { |
fenced_buffer_destroy_gpu_storage_locked(fenced_buf); |
goto done; |
} |
if(fenced_buf->mapcount) { |
debug_printf("warning: validating a buffer while it is still mapped\n"); |
} |
else { |
fenced_buffer_destroy_cpu_storage_locked(fenced_buf); |
} |
} |
ret = pb_validate(fenced_buf->buffer, vl, flags); |
if (ret != PIPE_OK) |
goto done; |
fenced_buf->vl = vl; |
fenced_buf->validation_flags |= flags; |
done: |
pipe_mutex_unlock(fenced_mgr->mutex); |
return ret; |
} |
static void |
fenced_buffer_fence(struct pb_buffer *buf, |
struct pipe_fence_handle *fence) |
{ |
struct fenced_buffer *fenced_buf = fenced_buffer(buf); |
struct fenced_manager *fenced_mgr = fenced_buf->mgr; |
struct pb_fence_ops *ops = fenced_mgr->ops; |
pipe_mutex_lock(fenced_mgr->mutex); |
assert(pipe_is_referenced(&fenced_buf->base.reference)); |
assert(fenced_buf->buffer); |
if(fence != fenced_buf->fence) { |
assert(fenced_buf->vl); |
assert(fenced_buf->validation_flags); |
if (fenced_buf->fence) { |
boolean destroyed; |
destroyed = fenced_buffer_remove_locked(fenced_mgr, fenced_buf); |
assert(!destroyed); |
} |
if (fence) { |
ops->fence_reference(ops, &fenced_buf->fence, fence); |
fenced_buf->flags |= fenced_buf->validation_flags; |
fenced_buffer_add_locked(fenced_mgr, fenced_buf); |
} |
pb_fence(fenced_buf->buffer, fence); |
fenced_buf->vl = NULL; |
fenced_buf->validation_flags = 0; |
} |
pipe_mutex_unlock(fenced_mgr->mutex); |
} |
static void |
fenced_buffer_get_base_buffer(struct pb_buffer *buf, |
struct pb_buffer **base_buf, |
pb_size *offset) |
{ |
struct fenced_buffer *fenced_buf = fenced_buffer(buf); |
struct fenced_manager *fenced_mgr = fenced_buf->mgr; |
pipe_mutex_lock(fenced_mgr->mutex); |
/* |
* This should only be called when the buffer is validated. Typically |
* when processing relocations. |
*/ |
assert(fenced_buf->vl); |
assert(fenced_buf->buffer); |
if(fenced_buf->buffer) |
pb_get_base_buffer(fenced_buf->buffer, base_buf, offset); |
else { |
*base_buf = buf; |
*offset = 0; |
} |
pipe_mutex_unlock(fenced_mgr->mutex); |
} |
static const struct pb_vtbl |
fenced_buffer_vtbl = { |
fenced_buffer_destroy, |
fenced_buffer_map, |
fenced_buffer_unmap, |
fenced_buffer_validate, |
fenced_buffer_fence, |
fenced_buffer_get_base_buffer |
}; |
/** |
* Wrap a buffer in a fenced buffer. |
*/ |
static struct pb_buffer * |
fenced_bufmgr_create_buffer(struct pb_manager *mgr, |
pb_size size, |
const struct pb_desc *desc) |
{ |
struct fenced_manager *fenced_mgr = fenced_manager(mgr); |
struct fenced_buffer *fenced_buf; |
enum pipe_error ret; |
/* |
* Don't stall the GPU, waste time evicting buffers, or waste memory |
* trying to create a buffer that will most likely never fit into the |
* graphics aperture. |
*/ |
if(size > fenced_mgr->max_buffer_size) { |
goto no_buffer; |
} |
fenced_buf = CALLOC_STRUCT(fenced_buffer); |
if(!fenced_buf) |
goto no_buffer; |
pipe_reference_init(&fenced_buf->base.reference, 1); |
fenced_buf->base.alignment = desc->alignment; |
fenced_buf->base.usage = desc->usage; |
fenced_buf->base.size = size; |
fenced_buf->size = size; |
fenced_buf->desc = *desc; |
fenced_buf->base.vtbl = &fenced_buffer_vtbl; |
fenced_buf->mgr = fenced_mgr; |
pipe_mutex_lock(fenced_mgr->mutex); |
/* |
* Try to create GPU storage without stalling, |
*/ |
ret = fenced_buffer_create_gpu_storage_locked(fenced_mgr, fenced_buf, FALSE); |
/* |
* Attempt to use CPU memory to avoid stalling the GPU. |
*/ |
if(ret != PIPE_OK) { |
ret = fenced_buffer_create_cpu_storage_locked(fenced_mgr, fenced_buf); |
} |
/* |
* Create GPU storage, waiting for some to be available. |
*/ |
if(ret != PIPE_OK) { |
ret = fenced_buffer_create_gpu_storage_locked(fenced_mgr, fenced_buf, TRUE); |
} |
/* |
* Give up. |
*/ |
if(ret != PIPE_OK) { |
goto no_storage; |
} |
assert(fenced_buf->buffer || fenced_buf->data); |
LIST_ADDTAIL(&fenced_buf->head, &fenced_mgr->unfenced); |
++fenced_mgr->num_unfenced; |
pipe_mutex_unlock(fenced_mgr->mutex); |
return &fenced_buf->base; |
no_storage: |
pipe_mutex_unlock(fenced_mgr->mutex); |
FREE(fenced_buf); |
no_buffer: |
return NULL; |
} |
static void |
fenced_bufmgr_flush(struct pb_manager *mgr) |
{ |
struct fenced_manager *fenced_mgr = fenced_manager(mgr); |
pipe_mutex_lock(fenced_mgr->mutex); |
while(fenced_manager_check_signalled_locked(fenced_mgr, TRUE)) |
; |
pipe_mutex_unlock(fenced_mgr->mutex); |
assert(fenced_mgr->provider->flush); |
if(fenced_mgr->provider->flush) |
fenced_mgr->provider->flush(fenced_mgr->provider); |
} |
static void |
fenced_bufmgr_destroy(struct pb_manager *mgr) |
{ |
struct fenced_manager *fenced_mgr = fenced_manager(mgr); |
pipe_mutex_lock(fenced_mgr->mutex); |
/* Wait on outstanding fences */ |
while (fenced_mgr->num_fenced) { |
pipe_mutex_unlock(fenced_mgr->mutex); |
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) |
sched_yield(); |
#endif |
pipe_mutex_lock(fenced_mgr->mutex); |
while(fenced_manager_check_signalled_locked(fenced_mgr, TRUE)) |
; |
} |
#ifdef DEBUG |
/*assert(!fenced_mgr->num_unfenced);*/ |
#endif |
pipe_mutex_unlock(fenced_mgr->mutex); |
pipe_mutex_destroy(fenced_mgr->mutex); |
if(fenced_mgr->provider) |
fenced_mgr->provider->destroy(fenced_mgr->provider); |
fenced_mgr->ops->destroy(fenced_mgr->ops); |
FREE(fenced_mgr); |
} |
struct pb_manager * |
fenced_bufmgr_create(struct pb_manager *provider, |
struct pb_fence_ops *ops, |
pb_size max_buffer_size, |
pb_size max_cpu_total_size) |
{ |
struct fenced_manager *fenced_mgr; |
if(!provider) |
return NULL; |
fenced_mgr = CALLOC_STRUCT(fenced_manager); |
if (!fenced_mgr) |
return NULL; |
fenced_mgr->base.destroy = fenced_bufmgr_destroy; |
fenced_mgr->base.create_buffer = fenced_bufmgr_create_buffer; |
fenced_mgr->base.flush = fenced_bufmgr_flush; |
fenced_mgr->provider = provider; |
fenced_mgr->ops = ops; |
fenced_mgr->max_buffer_size = max_buffer_size; |
fenced_mgr->max_cpu_total_size = max_cpu_total_size; |
LIST_INITHEAD(&fenced_mgr->fenced); |
fenced_mgr->num_fenced = 0; |
LIST_INITHEAD(&fenced_mgr->unfenced); |
fenced_mgr->num_unfenced = 0; |
pipe_mutex_init(fenced_mgr->mutex); |
return &fenced_mgr->base; |
} |
/drivers/video/Gallium/auxiliary/pipebuffer/pb_buffer_fenced.h |
---|
0,0 → 1,104 |
/************************************************************************** |
* |
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/** |
* \file |
* Buffer fencing. |
* |
* "Fenced buffers" is actually a misnomer. They should be referred as |
* "fenceable buffers", i.e, buffers that can be fenced, but I couldn't find |
* the word "fenceable" in the dictionary. |
* |
* A "fenced buffer" is a decorator around a normal buffer, which adds two |
* special properties: |
* - the ability for the destruction to be delayed by a fence; |
* - reference counting. |
* |
* Usually DMA buffers have a life-time that will extend the life-time of its |
* handle. The end-of-life is dictated by the fence signalling. |
* |
* Between the handle's destruction, and the fence signalling, the buffer is |
* stored in a fenced buffer list. |
* |
* \author Jose Fonseca <jrfonseca@tungstengraphics.com> |
*/ |
#ifndef PB_BUFFER_FENCED_H_ |
#define PB_BUFFER_FENCED_H_ |
#include "util/u_debug.h" |
#ifdef __cplusplus |
extern "C" { |
#endif |
struct pipe_fence_handle; |
/** |
* List of buffers which are awaiting fence signalling. |
*/ |
struct fenced_buffer_list; |
struct pb_fence_ops |
{ |
void (*destroy)( struct pb_fence_ops *ops ); |
/** Set ptr = fence, with reference counting */ |
void (*fence_reference)( struct pb_fence_ops *ops, |
struct pipe_fence_handle **ptr, |
struct pipe_fence_handle *fence ); |
/** |
* Checks whether the fence has been signalled. |
* \param flags driver-specific meaning |
* \return zero on success. |
*/ |
int (*fence_signalled)( struct pb_fence_ops *ops, |
struct pipe_fence_handle *fence, |
unsigned flag ); |
/** |
* Wait for the fence to finish. |
* \param flags driver-specific meaning |
* \return zero on success. |
*/ |
int (*fence_finish)( struct pb_fence_ops *ops, |
struct pipe_fence_handle *fence, |
unsigned flag ); |
}; |
#ifdef __cplusplus |
} |
#endif |
#endif /*PB_BUFFER_FENCED_H_*/ |
/drivers/video/Gallium/auxiliary/pipebuffer/pb_buffer_malloc.c |
---|
0,0 → 1,198 |
/************************************************************************** |
* |
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/** |
* \file |
* Implementation of malloc-based buffers to store data that can't be processed |
* by the hardware. |
* |
* \author Jose Fonseca <jrfonseca@tungstengraphics.com> |
*/ |
#include "util/u_debug.h" |
#include "util/u_memory.h" |
#include "pb_buffer.h" |
#include "pb_bufmgr.h" |
struct malloc_buffer |
{ |
struct pb_buffer base; |
void *data; |
}; |
extern const struct pb_vtbl malloc_buffer_vtbl; |
static INLINE struct malloc_buffer * |
malloc_buffer(struct pb_buffer *buf) |
{ |
assert(buf); |
if (!buf) |
return NULL; |
assert(buf->vtbl == &malloc_buffer_vtbl); |
return (struct malloc_buffer *)buf; |
} |
static void |
malloc_buffer_destroy(struct pb_buffer *buf) |
{ |
align_free(malloc_buffer(buf)->data); |
FREE(buf); |
} |
static void * |
malloc_buffer_map(struct pb_buffer *buf, |
unsigned flags, |
void *flush_ctx) |
{ |
return malloc_buffer(buf)->data; |
} |
static void |
malloc_buffer_unmap(struct pb_buffer *buf) |
{ |
/* No-op */ |
} |
static enum pipe_error |
malloc_buffer_validate(struct pb_buffer *buf, |
struct pb_validate *vl, |
unsigned flags) |
{ |
assert(0); |
return PIPE_ERROR; |
} |
static void |
malloc_buffer_fence(struct pb_buffer *buf, |
struct pipe_fence_handle *fence) |
{ |
assert(0); |
} |
static void |
malloc_buffer_get_base_buffer(struct pb_buffer *buf, |
struct pb_buffer **base_buf, |
pb_size *offset) |
{ |
*base_buf = buf; |
*offset = 0; |
} |
const struct pb_vtbl |
malloc_buffer_vtbl = { |
malloc_buffer_destroy, |
malloc_buffer_map, |
malloc_buffer_unmap, |
malloc_buffer_validate, |
malloc_buffer_fence, |
malloc_buffer_get_base_buffer |
}; |
struct pb_buffer * |
pb_malloc_buffer_create(pb_size size, |
const struct pb_desc *desc) |
{ |
struct malloc_buffer *buf; |
/* TODO: do a single allocation */ |
buf = CALLOC_STRUCT(malloc_buffer); |
if(!buf) |
return NULL; |
pipe_reference_init(&buf->base.reference, 1); |
buf->base.usage = desc->usage; |
buf->base.size = size; |
buf->base.alignment = desc->alignment; |
buf->base.vtbl = &malloc_buffer_vtbl; |
buf->data = align_malloc(size, desc->alignment < sizeof(void*) ? sizeof(void*) : desc->alignment); |
if(!buf->data) { |
FREE(buf); |
return NULL; |
} |
return &buf->base; |
} |
static struct pb_buffer * |
pb_malloc_bufmgr_create_buffer(struct pb_manager *mgr, |
pb_size size, |
const struct pb_desc *desc) |
{ |
return pb_malloc_buffer_create(size, desc); |
} |
static void |
pb_malloc_bufmgr_flush(struct pb_manager *mgr) |
{ |
/* No-op */ |
} |
static void |
pb_malloc_bufmgr_destroy(struct pb_manager *mgr) |
{ |
/* No-op */ |
} |
static boolean |
pb_malloc_bufmgr_is_buffer_busy( struct pb_manager *mgr, |
struct pb_buffer *buf ) |
{ |
return FALSE; |
} |
static struct pb_manager |
pb_malloc_bufmgr = { |
pb_malloc_bufmgr_destroy, |
pb_malloc_bufmgr_create_buffer, |
pb_malloc_bufmgr_flush, |
pb_malloc_bufmgr_is_buffer_busy |
}; |
struct pb_manager * |
pb_malloc_bufmgr_create(void) |
{ |
return &pb_malloc_bufmgr; |
} |
/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr.h |
---|
0,0 → 1,218 |
/************************************************************************** |
* |
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/** |
* \file |
* Buffer management. |
* |
* A buffer manager does only one basic thing: it creates buffers. Actually, |
* "buffer factory" would probably a more accurate description. |
* |
* You can chain buffer managers so that you can have a finer grained memory |
* management and pooling. |
* |
* For example, for a simple batch buffer manager you would chain: |
* - the native buffer manager, which provides DMA memory from the graphics |
* memory space; |
* - the pool buffer manager, which keep around a pool of equally sized buffers |
* to avoid latency associated with the native buffer manager; |
* - the fenced buffer manager, which will delay buffer destruction until the |
* the moment the card finishing processing it. |
* |
* \author Jose Fonseca <jrfonseca@tungstengraphics.com> |
*/ |
#ifndef PB_BUFMGR_H_ |
#define PB_BUFMGR_H_ |
#include "pb_buffer.h" |
#ifdef __cplusplus |
extern "C" { |
#endif |
struct pb_desc; |
/** |
* Abstract base class for all buffer managers. |
*/ |
struct pb_manager |
{ |
void |
(*destroy)( struct pb_manager *mgr ); |
struct pb_buffer * |
(*create_buffer)( struct pb_manager *mgr, |
pb_size size, |
const struct pb_desc *desc); |
/** |
* Flush all temporary-held buffers. |
* |
* Used mostly to aid debugging memory issues or to clean up resources when |
* the drivers are long lived. |
*/ |
void |
(*flush)( struct pb_manager *mgr ); |
boolean |
(*is_buffer_busy)( struct pb_manager *mgr, |
struct pb_buffer *buf ); |
}; |
/** |
* Malloc buffer provider. |
* |
* Simple wrapper around pb_malloc_buffer_create for convenience. |
*/ |
struct pb_manager * |
pb_malloc_bufmgr_create(void); |
/** |
* Static buffer pool sub-allocator. |
* |
* Manages the allocation of equally sized buffers. It does so by allocating |
* a single big buffer and divide it equally sized buffers. |
* |
* It is meant to manage the allocation of batch buffer pools. |
*/ |
struct pb_manager * |
pool_bufmgr_create(struct pb_manager *provider, |
pb_size n, pb_size size, |
const struct pb_desc *desc); |
/** |
* Static sub-allocator based the old memory manager. |
* |
* It managers buffers of different sizes. It does so by allocating a buffer |
* with the size of the heap, and then using the old mm memory manager to manage |
* that heap. |
*/ |
struct pb_manager * |
mm_bufmgr_create(struct pb_manager *provider, |
pb_size size, pb_size align2); |
/** |
* Same as mm_bufmgr_create. |
* |
* Buffer will be release when the manager is destroyed. |
*/ |
struct pb_manager * |
mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, |
pb_size size, pb_size align2); |
/** |
* Slab sub-allocator. |
*/ |
struct pb_manager * |
pb_slab_manager_create(struct pb_manager *provider, |
pb_size bufSize, |
pb_size slabSize, |
const struct pb_desc *desc); |
/** |
* Allow a range of buffer size, by aggregating multiple slabs sub-allocators |
* with different bucket sizes. |
*/ |
struct pb_manager * |
pb_slab_range_manager_create(struct pb_manager *provider, |
pb_size minBufSize, |
pb_size maxBufSize, |
pb_size slabSize, |
const struct pb_desc *desc); |
/** |
* Time-based buffer cache. |
* |
* This manager keeps a cache of destroyed buffers during a time interval. |
*/ |
struct pb_manager * |
pb_cache_manager_create(struct pb_manager *provider, |
unsigned usecs); |
struct pb_fence_ops; |
/** |
* Fenced buffer manager. |
* |
* This manager is just meant for convenience. It wraps the buffers returned |
* by another manager in fenced buffers, so that |
* |
* NOTE: the buffer manager that provides the buffers will be destroyed |
* at the same time. |
*/ |
struct pb_manager * |
fenced_bufmgr_create(struct pb_manager *provider, |
struct pb_fence_ops *ops, |
pb_size max_buffer_size, |
pb_size max_cpu_total_size); |
struct pb_manager * |
pb_alt_manager_create(struct pb_manager *provider1, |
struct pb_manager *provider2); |
/** |
* Ondemand buffer manager. |
* |
* Buffers are created in malloc'ed memory (fast and cached), and the constents |
* is transfered to a buffer from the provider (typically in slow uncached |
* memory) when there is an attempt to validate the buffer. |
* |
* Ideal for situations where one does not know before hand whether a given |
* buffer will effectively be used by the hardware or not. |
*/ |
struct pb_manager * |
pb_ondemand_manager_create(struct pb_manager *provider); |
/** |
* Debug buffer manager to detect buffer under- and overflows. |
* |
* Under/overflow sizes should be a multiple of the largest alignment |
*/ |
struct pb_manager * |
pb_debug_manager_create(struct pb_manager *provider, |
pb_size underflow_size, pb_size overflow_size); |
#ifdef __cplusplus |
} |
#endif |
#endif /*PB_BUFMGR_H_*/ |
/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c |
---|
0,0 → 1,120 |
/************************************************************************** |
* |
* Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/** |
* \file |
* Allocate buffers from two alternative buffer providers. |
* |
* \author Jose Fonseca <jrfonseca@tungstengraphics.com> |
*/ |
#include "pipe/p_compiler.h" |
#include "util/u_debug.h" |
#include "util/u_memory.h" |
#include "pb_buffer.h" |
#include "pb_bufmgr.h" |
struct pb_alt_manager |
{ |
struct pb_manager base; |
struct pb_manager *provider1; |
struct pb_manager *provider2; |
}; |
static INLINE struct pb_alt_manager * |
pb_alt_manager(struct pb_manager *mgr) |
{ |
assert(mgr); |
return (struct pb_alt_manager *)mgr; |
} |
static struct pb_buffer * |
pb_alt_manager_create_buffer(struct pb_manager *_mgr, |
pb_size size, |
const struct pb_desc *desc) |
{ |
struct pb_alt_manager *mgr = pb_alt_manager(_mgr); |
struct pb_buffer *buf; |
buf = mgr->provider1->create_buffer(mgr->provider1, size, desc); |
if(buf) |
return buf; |
buf = mgr->provider2->create_buffer(mgr->provider2, size, desc); |
return buf; |
} |
static void |
pb_alt_manager_flush(struct pb_manager *_mgr) |
{ |
struct pb_alt_manager *mgr = pb_alt_manager(_mgr); |
assert(mgr->provider1->flush); |
if(mgr->provider1->flush) |
mgr->provider1->flush(mgr->provider1); |
assert(mgr->provider2->flush); |
if(mgr->provider2->flush) |
mgr->provider2->flush(mgr->provider2); |
} |
static void |
pb_alt_manager_destroy(struct pb_manager *mgr) |
{ |
FREE(mgr); |
} |
struct pb_manager * |
pb_alt_manager_create(struct pb_manager *provider1, |
struct pb_manager *provider2) |
{ |
struct pb_alt_manager *mgr; |
if(!provider1 || !provider2) |
return NULL; |
mgr = CALLOC_STRUCT(pb_alt_manager); |
if (!mgr) |
return NULL; |
mgr->base.destroy = pb_alt_manager_destroy; |
mgr->base.create_buffer = pb_alt_manager_create_buffer; |
mgr->base.flush = pb_alt_manager_flush; |
mgr->provider1 = provider1; |
mgr->provider2 = provider2; |
return &mgr->base; |
} |
/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c |
---|
0,0 → 1,411 |
/************************************************************************** |
* |
* Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/** |
* \file |
* Buffer cache. |
* |
* \author Jose Fonseca <jrfonseca-at-tungstengraphics-dot-com> |
* \author Thomas Hellström <thomas-at-tungstengraphics-dot-com> |
*/ |
#include "pipe/p_compiler.h" |
#include "util/u_debug.h" |
#include "os/os_thread.h" |
#include "util/u_memory.h" |
#include "util/u_double_list.h" |
#include "util/u_time.h" |
#include "pb_buffer.h" |
#include "pb_bufmgr.h" |
/** |
* Convenience macro (type safe). |
*/ |
#define SUPER(__derived) (&(__derived)->base) |
struct pb_cache_manager; |
/** |
* Wrapper around a pipe buffer which adds delayed destruction. |
*/ |
struct pb_cache_buffer |
{ |
struct pb_buffer base; |
struct pb_buffer *buffer; |
struct pb_cache_manager *mgr; |
/** Caching time interval */ |
int64_t start, end; |
struct list_head head; |
}; |
struct pb_cache_manager |
{ |
struct pb_manager base; |
struct pb_manager *provider; |
unsigned usecs; |
pipe_mutex mutex; |
struct list_head delayed; |
pb_size numDelayed; |
}; |
static INLINE struct pb_cache_buffer * |
pb_cache_buffer(struct pb_buffer *buf) |
{ |
assert(buf); |
return (struct pb_cache_buffer *)buf; |
} |
static INLINE struct pb_cache_manager * |
pb_cache_manager(struct pb_manager *mgr) |
{ |
assert(mgr); |
return (struct pb_cache_manager *)mgr; |
} |
/** |
* Actually destroy the buffer. |
*/ |
static INLINE void |
_pb_cache_buffer_destroy(struct pb_cache_buffer *buf) |
{ |
struct pb_cache_manager *mgr = buf->mgr; |
LIST_DEL(&buf->head); |
assert(mgr->numDelayed); |
--mgr->numDelayed; |
assert(!pipe_is_referenced(&buf->base.reference)); |
pb_reference(&buf->buffer, NULL); |
FREE(buf); |
} |
/** |
* Free as many cache buffers from the list head as possible. |
*/ |
static void |
_pb_cache_buffer_list_check_free(struct pb_cache_manager *mgr) |
{ |
struct list_head *curr, *next; |
struct pb_cache_buffer *buf; |
int64_t now; |
now = os_time_get(); |
curr = mgr->delayed.next; |
next = curr->next; |
while(curr != &mgr->delayed) { |
buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); |
if(!os_time_timeout(buf->start, buf->end, now)) |
break; |
_pb_cache_buffer_destroy(buf); |
curr = next; |
next = curr->next; |
} |
} |
static void |
pb_cache_buffer_destroy(struct pb_buffer *_buf) |
{ |
struct pb_cache_buffer *buf = pb_cache_buffer(_buf); |
struct pb_cache_manager *mgr = buf->mgr; |
pipe_mutex_lock(mgr->mutex); |
assert(!pipe_is_referenced(&buf->base.reference)); |
_pb_cache_buffer_list_check_free(mgr); |
buf->start = os_time_get(); |
buf->end = buf->start + mgr->usecs; |
LIST_ADDTAIL(&buf->head, &mgr->delayed); |
++mgr->numDelayed; |
pipe_mutex_unlock(mgr->mutex); |
} |
static void * |
pb_cache_buffer_map(struct pb_buffer *_buf, |
unsigned flags, void *flush_ctx) |
{ |
struct pb_cache_buffer *buf = pb_cache_buffer(_buf); |
return pb_map(buf->buffer, flags, flush_ctx); |
} |
static void |
pb_cache_buffer_unmap(struct pb_buffer *_buf) |
{ |
struct pb_cache_buffer *buf = pb_cache_buffer(_buf); |
pb_unmap(buf->buffer); |
} |
static enum pipe_error |
pb_cache_buffer_validate(struct pb_buffer *_buf, |
struct pb_validate *vl, |
unsigned flags) |
{ |
struct pb_cache_buffer *buf = pb_cache_buffer(_buf); |
return pb_validate(buf->buffer, vl, flags); |
} |
static void |
pb_cache_buffer_fence(struct pb_buffer *_buf, |
struct pipe_fence_handle *fence) |
{ |
struct pb_cache_buffer *buf = pb_cache_buffer(_buf); |
pb_fence(buf->buffer, fence); |
} |
static void |
pb_cache_buffer_get_base_buffer(struct pb_buffer *_buf, |
struct pb_buffer **base_buf, |
pb_size *offset) |
{ |
struct pb_cache_buffer *buf = pb_cache_buffer(_buf); |
pb_get_base_buffer(buf->buffer, base_buf, offset); |
} |
const struct pb_vtbl |
pb_cache_buffer_vtbl = { |
pb_cache_buffer_destroy, |
pb_cache_buffer_map, |
pb_cache_buffer_unmap, |
pb_cache_buffer_validate, |
pb_cache_buffer_fence, |
pb_cache_buffer_get_base_buffer |
}; |
static INLINE int |
pb_cache_is_buffer_compat(struct pb_cache_buffer *buf, |
pb_size size, |
const struct pb_desc *desc) |
{ |
if(buf->base.size < size) |
return 0; |
/* be lenient with size */ |
if(buf->base.size >= 2*size) |
return 0; |
if(!pb_check_alignment(desc->alignment, buf->base.alignment)) |
return 0; |
if(!pb_check_usage(desc->usage, buf->base.usage)) |
return 0; |
if (buf->mgr->provider->is_buffer_busy) { |
if (buf->mgr->provider->is_buffer_busy(buf->mgr->provider, buf->buffer)) |
return -1; |
} else { |
void *ptr = pb_map(buf->buffer, PB_USAGE_DONTBLOCK, NULL); |
if (!ptr) |
return -1; |
pb_unmap(buf->buffer); |
} |
return 1; |
} |
static struct pb_buffer * |
pb_cache_manager_create_buffer(struct pb_manager *_mgr, |
pb_size size, |
const struct pb_desc *desc) |
{ |
struct pb_cache_manager *mgr = pb_cache_manager(_mgr); |
struct pb_cache_buffer *buf; |
struct pb_cache_buffer *curr_buf; |
struct list_head *curr, *next; |
int64_t now; |
int ret = 0; |
pipe_mutex_lock(mgr->mutex); |
buf = NULL; |
curr = mgr->delayed.next; |
next = curr->next; |
/* search in the expired buffers, freeing them in the process */ |
now = os_time_get(); |
while(curr != &mgr->delayed) { |
curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); |
if(!buf && (ret = pb_cache_is_buffer_compat(curr_buf, size, desc) > 0)) |
buf = curr_buf; |
else if(os_time_timeout(curr_buf->start, curr_buf->end, now)) |
_pb_cache_buffer_destroy(curr_buf); |
else |
/* This buffer (and all hereafter) are still hot in cache */ |
break; |
if (ret == -1) |
break; |
curr = next; |
next = curr->next; |
} |
/* keep searching in the hot buffers */ |
if(!buf && ret != -1) { |
while(curr != &mgr->delayed) { |
curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); |
ret = pb_cache_is_buffer_compat(curr_buf, size, desc); |
if (ret > 0) { |
buf = curr_buf; |
break; |
} |
if (ret == -1) |
break; |
/* no need to check the timeout here */ |
curr = next; |
next = curr->next; |
} |
} |
if(buf) { |
LIST_DEL(&buf->head); |
--mgr->numDelayed; |
pipe_mutex_unlock(mgr->mutex); |
/* Increase refcount */ |
pipe_reference_init(&buf->base.reference, 1); |
return &buf->base; |
} |
pipe_mutex_unlock(mgr->mutex); |
buf = CALLOC_STRUCT(pb_cache_buffer); |
if(!buf) |
return NULL; |
buf->buffer = mgr->provider->create_buffer(mgr->provider, size, desc); |
/* Empty the cache and try again. */ |
if (!buf->buffer) { |
mgr->base.flush(&mgr->base); |
buf->buffer = mgr->provider->create_buffer(mgr->provider, size, desc); |
} |
if(!buf->buffer) { |
FREE(buf); |
return NULL; |
} |
assert(pipe_is_referenced(&buf->buffer->reference)); |
assert(pb_check_alignment(desc->alignment, buf->buffer->alignment)); |
assert(pb_check_usage(desc->usage, buf->buffer->usage)); |
assert(buf->buffer->size >= size); |
pipe_reference_init(&buf->base.reference, 1); |
buf->base.alignment = buf->buffer->alignment; |
buf->base.usage = buf->buffer->usage; |
buf->base.size = buf->buffer->size; |
buf->base.vtbl = &pb_cache_buffer_vtbl; |
buf->mgr = mgr; |
return &buf->base; |
} |
static void |
pb_cache_manager_flush(struct pb_manager *_mgr) |
{ |
struct pb_cache_manager *mgr = pb_cache_manager(_mgr); |
struct list_head *curr, *next; |
struct pb_cache_buffer *buf; |
pipe_mutex_lock(mgr->mutex); |
curr = mgr->delayed.next; |
next = curr->next; |
while(curr != &mgr->delayed) { |
buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); |
_pb_cache_buffer_destroy(buf); |
curr = next; |
next = curr->next; |
} |
pipe_mutex_unlock(mgr->mutex); |
assert(mgr->provider->flush); |
if(mgr->provider->flush) |
mgr->provider->flush(mgr->provider); |
} |
static void |
pb_cache_manager_destroy(struct pb_manager *mgr) |
{ |
pb_cache_manager_flush(mgr); |
FREE(mgr); |
} |
struct pb_manager * |
pb_cache_manager_create(struct pb_manager *provider, |
unsigned usecs) |
{ |
struct pb_cache_manager *mgr; |
if(!provider) |
return NULL; |
mgr = CALLOC_STRUCT(pb_cache_manager); |
if (!mgr) |
return NULL; |
mgr->base.destroy = pb_cache_manager_destroy; |
mgr->base.create_buffer = pb_cache_manager_create_buffer; |
mgr->base.flush = pb_cache_manager_flush; |
mgr->provider = provider; |
mgr->usecs = usecs; |
LIST_INITHEAD(&mgr->delayed); |
mgr->numDelayed = 0; |
pipe_mutex_init(mgr->mutex); |
return &mgr->base; |
} |
/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c |
---|
0,0 → 1,497 |
/************************************************************************** |
* |
* Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/** |
* \file |
* Debug buffer manager to detect buffer under- and overflows. |
* |
* \author Jose Fonseca <jrfonseca@tungstengraphics.com> |
*/ |
#include "pipe/p_compiler.h" |
#include "util/u_debug.h" |
#include "os/os_thread.h" |
#include "util/u_math.h" |
#include "util/u_memory.h" |
#include "util/u_double_list.h" |
#include "util/u_time.h" |
#include "util/u_debug_stack.h" |
#include "pb_buffer.h" |
#include "pb_bufmgr.h" |
#ifdef DEBUG |
#define PB_DEBUG_CREATE_BACKTRACE 8 |
#define PB_DEBUG_MAP_BACKTRACE 8 |
/** |
* Convenience macro (type safe). |
*/ |
#define SUPER(__derived) (&(__derived)->base) |
struct pb_debug_manager; |
/** |
* Wrapper around a pipe buffer which adds delayed destruction. |
*/ |
struct pb_debug_buffer |
{ |
struct pb_buffer base; |
struct pb_buffer *buffer; |
struct pb_debug_manager *mgr; |
pb_size underflow_size; |
pb_size overflow_size; |
struct debug_stack_frame create_backtrace[PB_DEBUG_CREATE_BACKTRACE]; |
pipe_mutex mutex; |
unsigned map_count; |
struct debug_stack_frame map_backtrace[PB_DEBUG_MAP_BACKTRACE]; |
struct list_head head; |
}; |
struct pb_debug_manager |
{ |
struct pb_manager base; |
struct pb_manager *provider; |
pb_size underflow_size; |
pb_size overflow_size; |
pipe_mutex mutex; |
struct list_head list; |
}; |
static INLINE struct pb_debug_buffer * |
pb_debug_buffer(struct pb_buffer *buf) |
{ |
assert(buf); |
return (struct pb_debug_buffer *)buf; |
} |
static INLINE struct pb_debug_manager * |
pb_debug_manager(struct pb_manager *mgr) |
{ |
assert(mgr); |
return (struct pb_debug_manager *)mgr; |
} |
static const uint8_t random_pattern[32] = { |
0xaf, 0xcf, 0xa5, 0xa2, 0xc2, 0x63, 0x15, 0x1a, |
0x7e, 0xe2, 0x7e, 0x84, 0x15, 0x49, 0xa2, 0x1e, |
0x49, 0x63, 0xf5, 0x52, 0x74, 0x66, 0x9e, 0xc4, |
0x6d, 0xcf, 0x2c, 0x4a, 0x74, 0xe6, 0xfd, 0x94 |
}; |
static INLINE void |
fill_random_pattern(uint8_t *dst, pb_size size) |
{ |
pb_size i = 0; |
while(size--) { |
*dst++ = random_pattern[i++]; |
i &= sizeof(random_pattern) - 1; |
} |
} |
static INLINE boolean |
check_random_pattern(const uint8_t *dst, pb_size size, |
pb_size *min_ofs, pb_size *max_ofs) |
{ |
boolean result = TRUE; |
pb_size i; |
*min_ofs = size; |
*max_ofs = 0; |
for(i = 0; i < size; ++i) { |
if(*dst++ != random_pattern[i % sizeof(random_pattern)]) { |
*min_ofs = MIN2(*min_ofs, i); |
*max_ofs = MAX2(*max_ofs, i); |
result = FALSE; |
} |
} |
return result; |
} |
static void |
pb_debug_buffer_fill(struct pb_debug_buffer *buf) |
{ |
uint8_t *map; |
map = pb_map(buf->buffer, PB_USAGE_CPU_WRITE, NULL); |
assert(map); |
if(map) { |
fill_random_pattern(map, buf->underflow_size); |
fill_random_pattern(map + buf->underflow_size + buf->base.size, |
buf->overflow_size); |
pb_unmap(buf->buffer); |
} |
} |
/** |
* Check for under/over flows. |
* |
* Should be called with the buffer unmaped. |
*/ |
static void |
pb_debug_buffer_check(struct pb_debug_buffer *buf) |
{ |
uint8_t *map; |
map = pb_map(buf->buffer, |
PB_USAGE_CPU_READ | |
PB_USAGE_UNSYNCHRONIZED, NULL); |
assert(map); |
if(map) { |
boolean underflow, overflow; |
pb_size min_ofs, max_ofs; |
underflow = !check_random_pattern(map, buf->underflow_size, |
&min_ofs, &max_ofs); |
if(underflow) { |
debug_printf("buffer underflow (offset -%u%s to -%u bytes) detected\n", |
buf->underflow_size - min_ofs, |
min_ofs == 0 ? "+" : "", |
buf->underflow_size - max_ofs); |
} |
overflow = !check_random_pattern(map + buf->underflow_size + buf->base.size, |
buf->overflow_size, |
&min_ofs, &max_ofs); |
if(overflow) { |
debug_printf("buffer overflow (size %u plus offset %u to %u%s bytes) detected\n", |
buf->base.size, |
min_ofs, |
max_ofs, |
max_ofs == buf->overflow_size - 1 ? "+" : ""); |
} |
if(underflow || overflow) |
debug_backtrace_dump(buf->create_backtrace, PB_DEBUG_CREATE_BACKTRACE); |
debug_assert(!underflow); |
debug_assert(!overflow); |
/* re-fill if not aborted */ |
if(underflow) |
fill_random_pattern(map, buf->underflow_size); |
if(overflow) |
fill_random_pattern(map + buf->underflow_size + buf->base.size, |
buf->overflow_size); |
pb_unmap(buf->buffer); |
} |
} |
static void |
pb_debug_buffer_destroy(struct pb_buffer *_buf) |
{ |
struct pb_debug_buffer *buf = pb_debug_buffer(_buf); |
struct pb_debug_manager *mgr = buf->mgr; |
assert(!pipe_is_referenced(&buf->base.reference)); |
pb_debug_buffer_check(buf); |
pipe_mutex_lock(mgr->mutex); |
LIST_DEL(&buf->head); |
pipe_mutex_unlock(mgr->mutex); |
pipe_mutex_destroy(buf->mutex); |
pb_reference(&buf->buffer, NULL); |
FREE(buf); |
} |
static void * |
pb_debug_buffer_map(struct pb_buffer *_buf, |
unsigned flags, void *flush_ctx) |
{ |
struct pb_debug_buffer *buf = pb_debug_buffer(_buf); |
void *map; |
pb_debug_buffer_check(buf); |
map = pb_map(buf->buffer, flags, flush_ctx); |
if(!map) |
return NULL; |
if(map) { |
pipe_mutex_lock(buf->mutex); |
++buf->map_count; |
debug_backtrace_capture(buf->map_backtrace, 1, PB_DEBUG_MAP_BACKTRACE); |
pipe_mutex_unlock(buf->mutex); |
} |
return (uint8_t *)map + buf->underflow_size; |
} |
static void |
pb_debug_buffer_unmap(struct pb_buffer *_buf) |
{ |
struct pb_debug_buffer *buf = pb_debug_buffer(_buf); |
pipe_mutex_lock(buf->mutex); |
assert(buf->map_count); |
if(buf->map_count) |
--buf->map_count; |
pipe_mutex_unlock(buf->mutex); |
pb_unmap(buf->buffer); |
pb_debug_buffer_check(buf); |
} |
static void |
pb_debug_buffer_get_base_buffer(struct pb_buffer *_buf, |
struct pb_buffer **base_buf, |
pb_size *offset) |
{ |
struct pb_debug_buffer *buf = pb_debug_buffer(_buf); |
pb_get_base_buffer(buf->buffer, base_buf, offset); |
*offset += buf->underflow_size; |
} |
static enum pipe_error |
pb_debug_buffer_validate(struct pb_buffer *_buf, |
struct pb_validate *vl, |
unsigned flags) |
{ |
struct pb_debug_buffer *buf = pb_debug_buffer(_buf); |
pipe_mutex_lock(buf->mutex); |
if(buf->map_count) { |
debug_printf("%s: attempting to validate a mapped buffer\n", __FUNCTION__); |
debug_printf("last map backtrace is\n"); |
debug_backtrace_dump(buf->map_backtrace, PB_DEBUG_MAP_BACKTRACE); |
} |
pipe_mutex_unlock(buf->mutex); |
pb_debug_buffer_check(buf); |
return pb_validate(buf->buffer, vl, flags); |
} |
static void |
pb_debug_buffer_fence(struct pb_buffer *_buf, |
struct pipe_fence_handle *fence) |
{ |
struct pb_debug_buffer *buf = pb_debug_buffer(_buf); |
pb_fence(buf->buffer, fence); |
} |
const struct pb_vtbl |
pb_debug_buffer_vtbl = { |
pb_debug_buffer_destroy, |
pb_debug_buffer_map, |
pb_debug_buffer_unmap, |
pb_debug_buffer_validate, |
pb_debug_buffer_fence, |
pb_debug_buffer_get_base_buffer |
}; |
static void |
pb_debug_manager_dump_locked(struct pb_debug_manager *mgr) |
{ |
struct list_head *curr, *next; |
struct pb_debug_buffer *buf; |
curr = mgr->list.next; |
next = curr->next; |
while(curr != &mgr->list) { |
buf = LIST_ENTRY(struct pb_debug_buffer, curr, head); |
debug_printf("buffer = %p\n", (void *) buf); |
debug_printf(" .size = 0x%x\n", buf->base.size); |
debug_backtrace_dump(buf->create_backtrace, PB_DEBUG_CREATE_BACKTRACE); |
curr = next; |
next = curr->next; |
} |
} |
static struct pb_buffer * |
pb_debug_manager_create_buffer(struct pb_manager *_mgr, |
pb_size size, |
const struct pb_desc *desc) |
{ |
struct pb_debug_manager *mgr = pb_debug_manager(_mgr); |
struct pb_debug_buffer *buf; |
struct pb_desc real_desc; |
pb_size real_size; |
assert(size); |
assert(desc->alignment); |
buf = CALLOC_STRUCT(pb_debug_buffer); |
if(!buf) |
return NULL; |
real_size = mgr->underflow_size + size + mgr->overflow_size; |
real_desc = *desc; |
real_desc.usage |= PB_USAGE_CPU_WRITE; |
real_desc.usage |= PB_USAGE_CPU_READ; |
buf->buffer = mgr->provider->create_buffer(mgr->provider, |
real_size, |
&real_desc); |
if(!buf->buffer) { |
FREE(buf); |
#if 0 |
pipe_mutex_lock(mgr->mutex); |
debug_printf("%s: failed to create buffer\n", __FUNCTION__); |
if(!LIST_IS_EMPTY(&mgr->list)) |
pb_debug_manager_dump_locked(mgr); |
pipe_mutex_unlock(mgr->mutex); |
#endif |
return NULL; |
} |
assert(pipe_is_referenced(&buf->buffer->reference)); |
assert(pb_check_alignment(real_desc.alignment, buf->buffer->alignment)); |
assert(pb_check_usage(real_desc.usage, buf->buffer->usage)); |
assert(buf->buffer->size >= real_size); |
pipe_reference_init(&buf->base.reference, 1); |
buf->base.alignment = desc->alignment; |
buf->base.usage = desc->usage; |
buf->base.size = size; |
buf->base.vtbl = &pb_debug_buffer_vtbl; |
buf->mgr = mgr; |
buf->underflow_size = mgr->underflow_size; |
buf->overflow_size = buf->buffer->size - buf->underflow_size - size; |
debug_backtrace_capture(buf->create_backtrace, 1, PB_DEBUG_CREATE_BACKTRACE); |
pb_debug_buffer_fill(buf); |
pipe_mutex_init(buf->mutex); |
pipe_mutex_lock(mgr->mutex); |
LIST_ADDTAIL(&buf->head, &mgr->list); |
pipe_mutex_unlock(mgr->mutex); |
return &buf->base; |
} |
static void |
pb_debug_manager_flush(struct pb_manager *_mgr) |
{ |
struct pb_debug_manager *mgr = pb_debug_manager(_mgr); |
assert(mgr->provider->flush); |
if(mgr->provider->flush) |
mgr->provider->flush(mgr->provider); |
} |
static void |
pb_debug_manager_destroy(struct pb_manager *_mgr) |
{ |
struct pb_debug_manager *mgr = pb_debug_manager(_mgr); |
pipe_mutex_lock(mgr->mutex); |
if(!LIST_IS_EMPTY(&mgr->list)) { |
debug_printf("%s: unfreed buffers\n", __FUNCTION__); |
pb_debug_manager_dump_locked(mgr); |
} |
pipe_mutex_unlock(mgr->mutex); |
pipe_mutex_destroy(mgr->mutex); |
mgr->provider->destroy(mgr->provider); |
FREE(mgr); |
} |
struct pb_manager * |
pb_debug_manager_create(struct pb_manager *provider, |
pb_size underflow_size, pb_size overflow_size) |
{ |
struct pb_debug_manager *mgr; |
if(!provider) |
return NULL; |
mgr = CALLOC_STRUCT(pb_debug_manager); |
if (!mgr) |
return NULL; |
mgr->base.destroy = pb_debug_manager_destroy; |
mgr->base.create_buffer = pb_debug_manager_create_buffer; |
mgr->base.flush = pb_debug_manager_flush; |
mgr->provider = provider; |
mgr->underflow_size = underflow_size; |
mgr->overflow_size = overflow_size; |
pipe_mutex_init(mgr->mutex); |
LIST_INITHEAD(&mgr->list); |
return &mgr->base; |
} |
#else /* !DEBUG */ |
struct pb_manager * |
pb_debug_manager_create(struct pb_manager *provider, |
pb_size underflow_size, pb_size overflow_size) |
{ |
return provider; |
} |
#endif /* !DEBUG */ |
/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c |
---|
0,0 → 1,320 |
/************************************************************************** |
* |
* Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/** |
* \file |
* Buffer manager using the old texture memory manager. |
* |
* \author Jose Fonseca <jrfonseca@tungstengraphics.com> |
*/ |
#include "pipe/p_defines.h" |
#include "util/u_debug.h" |
#include "os/os_thread.h" |
#include "util/u_memory.h" |
#include "util/u_double_list.h" |
#include "util/u_mm.h" |
#include "pb_buffer.h" |
#include "pb_bufmgr.h" |
/** |
* Convenience macro (type safe). |
*/ |
#define SUPER(__derived) (&(__derived)->base) |
struct mm_pb_manager |
{ |
struct pb_manager base; |
pipe_mutex mutex; |
pb_size size; |
struct mem_block *heap; |
pb_size align2; |
struct pb_buffer *buffer; |
void *map; |
}; |
static INLINE struct mm_pb_manager * |
mm_pb_manager(struct pb_manager *mgr) |
{ |
assert(mgr); |
return (struct mm_pb_manager *)mgr; |
} |
struct mm_buffer |
{ |
struct pb_buffer base; |
struct mm_pb_manager *mgr; |
struct mem_block *block; |
}; |
static INLINE struct mm_buffer * |
mm_buffer(struct pb_buffer *buf) |
{ |
assert(buf); |
return (struct mm_buffer *)buf; |
} |
static void |
mm_buffer_destroy(struct pb_buffer *buf) |
{ |
struct mm_buffer *mm_buf = mm_buffer(buf); |
struct mm_pb_manager *mm = mm_buf->mgr; |
assert(!pipe_is_referenced(&mm_buf->base.reference)); |
pipe_mutex_lock(mm->mutex); |
u_mmFreeMem(mm_buf->block); |
FREE(mm_buf); |
pipe_mutex_unlock(mm->mutex); |
} |
static void * |
mm_buffer_map(struct pb_buffer *buf, |
unsigned flags, |
void *flush_ctx) |
{ |
struct mm_buffer *mm_buf = mm_buffer(buf); |
struct mm_pb_manager *mm = mm_buf->mgr; |
/* XXX: it will be necessary to remap here to propagate flush_ctx */ |
return (unsigned char *) mm->map + mm_buf->block->ofs; |
} |
static void |
mm_buffer_unmap(struct pb_buffer *buf) |
{ |
/* No-op */ |
} |
static enum pipe_error |
mm_buffer_validate(struct pb_buffer *buf, |
struct pb_validate *vl, |
unsigned flags) |
{ |
struct mm_buffer *mm_buf = mm_buffer(buf); |
struct mm_pb_manager *mm = mm_buf->mgr; |
return pb_validate(mm->buffer, vl, flags); |
} |
static void |
mm_buffer_fence(struct pb_buffer *buf, |
struct pipe_fence_handle *fence) |
{ |
struct mm_buffer *mm_buf = mm_buffer(buf); |
struct mm_pb_manager *mm = mm_buf->mgr; |
pb_fence(mm->buffer, fence); |
} |
static void |
mm_buffer_get_base_buffer(struct pb_buffer *buf, |
struct pb_buffer **base_buf, |
pb_size *offset) |
{ |
struct mm_buffer *mm_buf = mm_buffer(buf); |
struct mm_pb_manager *mm = mm_buf->mgr; |
pb_get_base_buffer(mm->buffer, base_buf, offset); |
*offset += mm_buf->block->ofs; |
} |
static const struct pb_vtbl |
mm_buffer_vtbl = { |
mm_buffer_destroy, |
mm_buffer_map, |
mm_buffer_unmap, |
mm_buffer_validate, |
mm_buffer_fence, |
mm_buffer_get_base_buffer |
}; |
static struct pb_buffer * |
mm_bufmgr_create_buffer(struct pb_manager *mgr, |
pb_size size, |
const struct pb_desc *desc) |
{ |
struct mm_pb_manager *mm = mm_pb_manager(mgr); |
struct mm_buffer *mm_buf; |
/* We don't handle alignments larger then the one initially setup */ |
assert(pb_check_alignment(desc->alignment, (pb_size)1 << mm->align2)); |
if(!pb_check_alignment(desc->alignment, (pb_size)1 << mm->align2)) |
return NULL; |
pipe_mutex_lock(mm->mutex); |
mm_buf = CALLOC_STRUCT(mm_buffer); |
if (!mm_buf) { |
pipe_mutex_unlock(mm->mutex); |
return NULL; |
} |
pipe_reference_init(&mm_buf->base.reference, 1); |
mm_buf->base.alignment = desc->alignment; |
mm_buf->base.usage = desc->usage; |
mm_buf->base.size = size; |
mm_buf->base.vtbl = &mm_buffer_vtbl; |
mm_buf->mgr = mm; |
mm_buf->block = u_mmAllocMem(mm->heap, (int)size, (int)mm->align2, 0); |
if(!mm_buf->block) { |
#if 0 |
debug_printf("warning: heap full\n"); |
mmDumpMemInfo(mm->heap); |
#endif |
FREE(mm_buf); |
pipe_mutex_unlock(mm->mutex); |
return NULL; |
} |
/* Some sanity checks */ |
assert(0 <= (pb_size)mm_buf->block->ofs && (pb_size)mm_buf->block->ofs < mm->size); |
assert(size <= (pb_size)mm_buf->block->size && (pb_size)mm_buf->block->ofs + (pb_size)mm_buf->block->size <= mm->size); |
pipe_mutex_unlock(mm->mutex); |
return SUPER(mm_buf); |
} |
static void |
mm_bufmgr_flush(struct pb_manager *mgr) |
{ |
/* No-op */ |
} |
static void |
mm_bufmgr_destroy(struct pb_manager *mgr) |
{ |
struct mm_pb_manager *mm = mm_pb_manager(mgr); |
pipe_mutex_lock(mm->mutex); |
u_mmDestroy(mm->heap); |
pb_unmap(mm->buffer); |
pb_reference(&mm->buffer, NULL); |
pipe_mutex_unlock(mm->mutex); |
FREE(mgr); |
} |
struct pb_manager * |
mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, |
pb_size size, pb_size align2) |
{ |
struct mm_pb_manager *mm; |
if(!buffer) |
return NULL; |
mm = CALLOC_STRUCT(mm_pb_manager); |
if (!mm) |
return NULL; |
mm->base.destroy = mm_bufmgr_destroy; |
mm->base.create_buffer = mm_bufmgr_create_buffer; |
mm->base.flush = mm_bufmgr_flush; |
mm->size = size; |
mm->align2 = align2; /* 64-byte alignment */ |
pipe_mutex_init(mm->mutex); |
mm->buffer = buffer; |
mm->map = pb_map(mm->buffer, |
PB_USAGE_CPU_READ | |
PB_USAGE_CPU_WRITE, NULL); |
if(!mm->map) |
goto failure; |
mm->heap = u_mmInit(0, (int)size); |
if (!mm->heap) |
goto failure; |
return SUPER(mm); |
failure: |
if(mm->heap) |
u_mmDestroy(mm->heap); |
if(mm->map) |
pb_unmap(mm->buffer); |
FREE(mm); |
return NULL; |
} |
struct pb_manager * |
mm_bufmgr_create(struct pb_manager *provider, |
pb_size size, pb_size align2) |
{ |
struct pb_buffer *buffer; |
struct pb_manager *mgr; |
struct pb_desc desc; |
if(!provider) |
return NULL; |
memset(&desc, 0, sizeof(desc)); |
desc.alignment = 1 << align2; |
buffer = provider->create_buffer(provider, size, &desc); |
if (!buffer) |
return NULL; |
mgr = mm_bufmgr_create_from_buffer(buffer, size, align2); |
if (!mgr) { |
pb_reference(&buffer, NULL); |
return NULL; |
} |
return mgr; |
} |
/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c |
---|
0,0 → 1,305 |
/************************************************************************** |
* |
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/** |
* @file |
* A variation of malloc buffers which get transferred to real graphics memory |
* when there is an attempt to validate them. |
* |
* @author Jose Fonseca <jrfonseca@tungstengraphics.com> |
*/ |
#include "util/u_debug.h" |
#include "util/u_memory.h" |
#include "pb_buffer.h" |
#include "pb_bufmgr.h" |
struct pb_ondemand_manager; |
struct pb_ondemand_buffer |
{ |
struct pb_buffer base; |
struct pb_ondemand_manager *mgr; |
/** Regular malloc'ed memory */ |
void *data; |
unsigned mapcount; |
/** Real buffer */ |
struct pb_buffer *buffer; |
pb_size size; |
struct pb_desc desc; |
}; |
struct pb_ondemand_manager |
{ |
struct pb_manager base; |
struct pb_manager *provider; |
}; |
extern const struct pb_vtbl pb_ondemand_buffer_vtbl; |
static INLINE struct pb_ondemand_buffer * |
pb_ondemand_buffer(struct pb_buffer *buf) |
{ |
assert(buf); |
if (!buf) |
return NULL; |
assert(buf->vtbl == &pb_ondemand_buffer_vtbl); |
return (struct pb_ondemand_buffer *)buf; |
} |
static INLINE struct pb_ondemand_manager * |
pb_ondemand_manager(struct pb_manager *mgr) |
{ |
assert(mgr); |
return (struct pb_ondemand_manager *)mgr; |
} |
static void |
pb_ondemand_buffer_destroy(struct pb_buffer *_buf) |
{ |
struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); |
pb_reference(&buf->buffer, NULL); |
align_free(buf->data); |
FREE(buf); |
} |
static void * |
pb_ondemand_buffer_map(struct pb_buffer *_buf, |
unsigned flags, void *flush_ctx) |
{ |
struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); |
if(buf->buffer) { |
assert(!buf->data); |
return pb_map(buf->buffer, flags, flush_ctx); |
} |
else { |
assert(buf->data); |
++buf->mapcount; |
return buf->data; |
} |
} |
static void |
pb_ondemand_buffer_unmap(struct pb_buffer *_buf) |
{ |
struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); |
if(buf->buffer) { |
assert(!buf->data); |
pb_unmap(buf->buffer); |
} |
else { |
assert(buf->data); |
assert(buf->mapcount); |
if(buf->mapcount) |
--buf->mapcount; |
} |
} |
static enum pipe_error |
pb_ondemand_buffer_instantiate(struct pb_ondemand_buffer *buf) |
{ |
if(!buf->buffer) { |
struct pb_manager *provider = buf->mgr->provider; |
uint8_t *map; |
assert(!buf->mapcount); |
buf->buffer = provider->create_buffer(provider, buf->size, &buf->desc); |
if(!buf->buffer) |
return PIPE_ERROR_OUT_OF_MEMORY; |
map = pb_map(buf->buffer, PB_USAGE_CPU_READ, NULL); |
if(!map) { |
pb_reference(&buf->buffer, NULL); |
return PIPE_ERROR; |
} |
memcpy(map, buf->data, buf->size); |
pb_unmap(buf->buffer); |
if(!buf->mapcount) { |
FREE(buf->data); |
buf->data = NULL; |
} |
} |
return PIPE_OK; |
} |
static enum pipe_error |
pb_ondemand_buffer_validate(struct pb_buffer *_buf, |
struct pb_validate *vl, |
unsigned flags) |
{ |
struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); |
enum pipe_error ret; |
assert(!buf->mapcount); |
if(buf->mapcount) |
return PIPE_ERROR; |
ret = pb_ondemand_buffer_instantiate(buf); |
if(ret != PIPE_OK) |
return ret; |
return pb_validate(buf->buffer, vl, flags); |
} |
static void |
pb_ondemand_buffer_fence(struct pb_buffer *_buf, |
struct pipe_fence_handle *fence) |
{ |
struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); |
assert(buf->buffer); |
if(!buf->buffer) |
return; |
pb_fence(buf->buffer, fence); |
} |
static void |
pb_ondemand_buffer_get_base_buffer(struct pb_buffer *_buf, |
struct pb_buffer **base_buf, |
pb_size *offset) |
{ |
struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); |
if(pb_ondemand_buffer_instantiate(buf) != PIPE_OK) { |
assert(0); |
*base_buf = &buf->base; |
*offset = 0; |
return; |
} |
pb_get_base_buffer(buf->buffer, base_buf, offset); |
} |
const struct pb_vtbl |
pb_ondemand_buffer_vtbl = { |
pb_ondemand_buffer_destroy, |
pb_ondemand_buffer_map, |
pb_ondemand_buffer_unmap, |
pb_ondemand_buffer_validate, |
pb_ondemand_buffer_fence, |
pb_ondemand_buffer_get_base_buffer |
}; |
static struct pb_buffer * |
pb_ondemand_manager_create_buffer(struct pb_manager *_mgr, |
pb_size size, |
const struct pb_desc *desc) |
{ |
struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr); |
struct pb_ondemand_buffer *buf; |
buf = CALLOC_STRUCT(pb_ondemand_buffer); |
if(!buf) |
return NULL; |
pipe_reference_init(&buf->base.reference, 1); |
buf->base.alignment = desc->alignment; |
buf->base.usage = desc->usage; |
buf->base.size = size; |
buf->base.vtbl = &pb_ondemand_buffer_vtbl; |
buf->mgr = mgr; |
buf->data = align_malloc(size, desc->alignment < sizeof(void*) ? sizeof(void*) : desc->alignment); |
if(!buf->data) { |
FREE(buf); |
return NULL; |
} |
buf->size = size; |
buf->desc = *desc; |
return &buf->base; |
} |
static void |
pb_ondemand_manager_flush(struct pb_manager *_mgr) |
{ |
struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr); |
mgr->provider->flush(mgr->provider); |
} |
static void |
pb_ondemand_manager_destroy(struct pb_manager *_mgr) |
{ |
struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr); |
FREE(mgr); |
} |
struct pb_manager * |
pb_ondemand_manager_create(struct pb_manager *provider) |
{ |
struct pb_ondemand_manager *mgr; |
if(!provider) |
return NULL; |
mgr = CALLOC_STRUCT(pb_ondemand_manager); |
if(!mgr) |
return NULL; |
mgr->base.destroy = pb_ondemand_manager_destroy; |
mgr->base.create_buffer = pb_ondemand_manager_create_buffer; |
mgr->base.flush = pb_ondemand_manager_flush; |
mgr->provider = provider; |
return &mgr->base; |
} |
/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c |
---|
0,0 → 1,321 |
/************************************************************************** |
* |
* Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* |
**************************************************************************/ |
/** |
* \file |
* Batch buffer pool management. |
* |
* \author Jose Fonseca <jrfonseca-at-tungstengraphics-dot-com> |
* \author Thomas Hellström <thomas-at-tungstengraphics-dot-com> |
*/ |
#include "pipe/p_compiler.h" |
#include "util/u_debug.h" |
#include "os/os_thread.h" |
#include "pipe/p_defines.h" |
#include "util/u_memory.h" |
#include "util/u_double_list.h" |
#include "pb_buffer.h" |
#include "pb_bufmgr.h" |
/** |
* Convenience macro (type safe). |
*/ |
#define SUPER(__derived) (&(__derived)->base) |
struct pool_pb_manager |
{ |
struct pb_manager base; |
pipe_mutex mutex; |
pb_size bufSize; |
pb_size bufAlign; |
pb_size numFree; |
pb_size numTot; |
struct list_head free; |
struct pb_buffer *buffer; |
void *map; |
struct pool_buffer *bufs; |
}; |
static INLINE struct pool_pb_manager * |
pool_pb_manager(struct pb_manager *mgr) |
{ |
assert(mgr); |
return (struct pool_pb_manager *)mgr; |
} |
struct pool_buffer |
{ |
struct pb_buffer base; |
struct pool_pb_manager *mgr; |
struct list_head head; |
pb_size start; |
}; |
static INLINE struct pool_buffer * |
pool_buffer(struct pb_buffer *buf) |
{ |
assert(buf); |
return (struct pool_buffer *)buf; |
} |
static void |
pool_buffer_destroy(struct pb_buffer *buf) |
{ |
struct pool_buffer *pool_buf = pool_buffer(buf); |
struct pool_pb_manager *pool = pool_buf->mgr; |
assert(!pipe_is_referenced(&pool_buf->base.reference)); |
pipe_mutex_lock(pool->mutex); |
LIST_ADD(&pool_buf->head, &pool->free); |
pool->numFree++; |
pipe_mutex_unlock(pool->mutex); |
} |
static void * |
pool_buffer_map(struct pb_buffer *buf, unsigned flags, void *flush_ctx) |
{ |
struct pool_buffer *pool_buf = pool_buffer(buf); |
struct pool_pb_manager *pool = pool_buf->mgr; |
void *map; |
/* XXX: it will be necessary to remap here to propagate flush_ctx */ |
pipe_mutex_lock(pool->mutex); |
map = (unsigned char *) pool->map + pool_buf->start; |
pipe_mutex_unlock(pool->mutex); |
return map; |
} |
static void |
pool_buffer_unmap(struct pb_buffer *buf) |
{ |
/* No-op */ |
} |
static enum pipe_error |
pool_buffer_validate(struct pb_buffer *buf, |
struct pb_validate *vl, |
unsigned flags) |
{ |
struct pool_buffer *pool_buf = pool_buffer(buf); |
struct pool_pb_manager *pool = pool_buf->mgr; |
return pb_validate(pool->buffer, vl, flags); |
} |
static void |
pool_buffer_fence(struct pb_buffer *buf, |
struct pipe_fence_handle *fence) |
{ |
struct pool_buffer *pool_buf = pool_buffer(buf); |
struct pool_pb_manager *pool = pool_buf->mgr; |
pb_fence(pool->buffer, fence); |
} |
static void |
pool_buffer_get_base_buffer(struct pb_buffer *buf, |
struct pb_buffer **base_buf, |
pb_size *offset) |
{ |
struct pool_buffer *pool_buf = pool_buffer(buf); |
struct pool_pb_manager *pool = pool_buf->mgr; |
pb_get_base_buffer(pool->buffer, base_buf, offset); |
*offset += pool_buf->start; |
} |
static const struct pb_vtbl |
pool_buffer_vtbl = { |
pool_buffer_destroy, |
pool_buffer_map, |
pool_buffer_unmap, |
pool_buffer_validate, |
pool_buffer_fence, |
pool_buffer_get_base_buffer |
}; |
static struct pb_buffer * |
pool_bufmgr_create_buffer(struct pb_manager *mgr, |
pb_size size, |
const struct pb_desc *desc) |
{ |
struct pool_pb_manager *pool = pool_pb_manager(mgr); |
struct pool_buffer *pool_buf; |
struct list_head *item; |
assert(size == pool->bufSize); |
assert(pool->bufAlign % desc->alignment == 0); |
pipe_mutex_lock(pool->mutex); |
if (pool->numFree == 0) { |
pipe_mutex_unlock(pool->mutex); |
debug_printf("warning: out of fixed size buffer objects\n"); |
return NULL; |
} |
item = pool->free.next; |
if (item == &pool->free) { |
pipe_mutex_unlock(pool->mutex); |
debug_printf("error: fixed size buffer pool corruption\n"); |
return NULL; |
} |
LIST_DEL(item); |
--pool->numFree; |
pipe_mutex_unlock(pool->mutex); |
pool_buf = LIST_ENTRY(struct pool_buffer, item, head); |
assert(!pipe_is_referenced(&pool_buf->base.reference)); |
pipe_reference_init(&pool_buf->base.reference, 1); |
pool_buf->base.alignment = desc->alignment; |
pool_buf->base.usage = desc->usage; |
return SUPER(pool_buf); |
} |
static void |
pool_bufmgr_flush(struct pb_manager *mgr) |
{ |
/* No-op */ |
} |
static void |
pool_bufmgr_destroy(struct pb_manager *mgr) |
{ |
struct pool_pb_manager *pool = pool_pb_manager(mgr); |
pipe_mutex_lock(pool->mutex); |
FREE(pool->bufs); |
pb_unmap(pool->buffer); |
pb_reference(&pool->buffer, NULL); |
pipe_mutex_unlock(pool->mutex); |
FREE(mgr); |
} |
struct pb_manager * |
pool_bufmgr_create(struct pb_manager *provider, |
pb_size numBufs, |
pb_size bufSize, |
const struct pb_desc *desc) |
{ |
struct pool_pb_manager *pool; |
struct pool_buffer *pool_buf; |
pb_size i; |
if(!provider) |
return NULL; |
pool = CALLOC_STRUCT(pool_pb_manager); |
if (!pool) |
return NULL; |
pool->base.destroy = pool_bufmgr_destroy; |
pool->base.create_buffer = pool_bufmgr_create_buffer; |
pool->base.flush = pool_bufmgr_flush; |
LIST_INITHEAD(&pool->free); |
pool->numTot = numBufs; |
pool->numFree = numBufs; |
pool->bufSize = bufSize; |
pool->bufAlign = desc->alignment; |
pipe_mutex_init(pool->mutex); |
pool->buffer = provider->create_buffer(provider, numBufs*bufSize, desc); |
if (!pool->buffer) |
goto failure; |
pool->map = pb_map(pool->buffer, |
PB_USAGE_CPU_READ | |
PB_USAGE_CPU_WRITE, NULL); |
if(!pool->map) |
goto failure; |
pool->bufs = (struct pool_buffer *)CALLOC(numBufs, sizeof(*pool->bufs)); |
if (!pool->bufs) |
goto failure; |
pool_buf = pool->bufs; |
for (i = 0; i < numBufs; ++i) { |
pipe_reference_init(&pool_buf->base.reference, 0); |
pool_buf->base.alignment = 0; |
pool_buf->base.usage = 0; |
pool_buf->base.size = bufSize; |
pool_buf->base.vtbl = &pool_buffer_vtbl; |
pool_buf->mgr = pool; |
pool_buf->start = i * bufSize; |
LIST_ADDTAIL(&pool_buf->head, &pool->free); |
pool_buf++; |
} |
return SUPER(pool); |
failure: |
FREE(pool->bufs); |
if(pool->map) |
pb_unmap(pool->buffer); |
if(pool->buffer) |
pb_reference(&pool->buffer, NULL); |
FREE(pool); |
return NULL; |
} |
/drivers/video/Gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c |
---|
0,0 → 1,590 |
/************************************************************************** |
* |
* Copyright 2006-2008 Tungsten Graphics, Inc., Cedar Park, TX., USA |
* All Rights Reserved. |
* |
* Permission is hereby granted, FREE of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* |
**************************************************************************/ |
/** |
* @file |
* S-lab pool implementation. |
* |
* @sa http://en.wikipedia.org/wiki/Slab_allocation |
* |
* @author Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> |
* @author Jose Fonseca <jrfonseca@tungstengraphics.com> |
*/ |
#include "pipe/p_compiler.h" |
#include "util/u_debug.h" |
#include "os/os_thread.h" |
#include "pipe/p_defines.h" |
#include "util/u_memory.h" |
#include "util/u_double_list.h" |
#include "util/u_time.h" |
#include "pb_buffer.h" |
#include "pb_bufmgr.h" |
struct pb_slab; |
/** |
* Buffer in a slab. |
* |
* Sub-allocation of a contiguous buffer. |
*/ |
struct pb_slab_buffer |
{ |
struct pb_buffer base; |
struct pb_slab *slab; |
struct list_head head; |
unsigned mapCount; |
/** Offset relative to the start of the slab buffer. */ |
pb_size start; |
/** Use when validating, to signal that all mappings are finished */ |
/* TODO: Actually validation does not reach this stage yet */ |
pipe_condvar event; |
}; |
/** |
* Slab -- a contiguous piece of memory. |
*/ |
struct pb_slab |
{ |
struct list_head head; |
struct list_head freeBuffers; |
pb_size numBuffers; |
pb_size numFree; |
struct pb_slab_buffer *buffers; |
struct pb_slab_manager *mgr; |
/** Buffer from the provider */ |
struct pb_buffer *bo; |
void *virtual; |
}; |
/** |
* It adds/removes slabs as needed in order to meet the allocation/destruction |
* of individual buffers. |
*/ |
struct pb_slab_manager |
{ |
struct pb_manager base; |
/** From where we get our buffers */ |
struct pb_manager *provider; |
/** Size of the buffers we hand on downstream */ |
pb_size bufSize; |
/** Size of the buffers we request upstream */ |
pb_size slabSize; |
/** |
* Alignment, usage to be used to allocate the slab buffers. |
* |
* We can only provide buffers which are consistent (in alignment, usage) |
* with this description. |
*/ |
struct pb_desc desc; |
/** |
* Partial slabs |
* |
* Full slabs are not stored in any list. Empty slabs are destroyed |
* immediatly. |
*/ |
struct list_head slabs; |
pipe_mutex mutex; |
}; |
/** |
* Wrapper around several slabs, therefore capable of handling buffers of |
* multiple sizes. |
* |
* This buffer manager just dispatches buffer allocations to the appropriate slab |
* manager, according to the requested buffer size, or by passes the slab |
* managers altogether for even greater sizes. |
* |
* The data of this structure remains constant after |
* initialization and thus needs no mutex protection. |
*/ |
struct pb_slab_range_manager |
{ |
struct pb_manager base; |
struct pb_manager *provider; |
pb_size minBufSize; |
pb_size maxBufSize; |
/** @sa pb_slab_manager::desc */ |
struct pb_desc desc; |
unsigned numBuckets; |
pb_size *bucketSizes; |
/** Array of pb_slab_manager, one for each bucket size */ |
struct pb_manager **buckets; |
}; |
static INLINE struct pb_slab_buffer * |
pb_slab_buffer(struct pb_buffer *buf) |
{ |
assert(buf); |
return (struct pb_slab_buffer *)buf; |
} |
static INLINE struct pb_slab_manager * |
pb_slab_manager(struct pb_manager *mgr) |
{ |
assert(mgr); |
return (struct pb_slab_manager *)mgr; |
} |
static INLINE struct pb_slab_range_manager * |
pb_slab_range_manager(struct pb_manager *mgr) |
{ |
assert(mgr); |
return (struct pb_slab_range_manager *)mgr; |
} |
/** |
* Delete a buffer from the slab delayed list and put |
* it on the slab FREE list. |
*/ |
static void |
pb_slab_buffer_destroy(struct pb_buffer *_buf) |
{ |
struct pb_slab_buffer *buf = pb_slab_buffer(_buf); |
struct pb_slab *slab = buf->slab; |
struct pb_slab_manager *mgr = slab->mgr; |
struct list_head *list = &buf->head; |
pipe_mutex_lock(mgr->mutex); |
assert(!pipe_is_referenced(&buf->base.reference)); |
buf->mapCount = 0; |
LIST_DEL(list); |
LIST_ADDTAIL(list, &slab->freeBuffers); |
slab->numFree++; |
if (slab->head.next == &slab->head) |
LIST_ADDTAIL(&slab->head, &mgr->slabs); |
/* If the slab becomes totally empty, free it */ |
if (slab->numFree == slab->numBuffers) { |
list = &slab->head; |
LIST_DELINIT(list); |
pb_reference(&slab->bo, NULL); |
FREE(slab->buffers); |
FREE(slab); |
} |
pipe_mutex_unlock(mgr->mutex); |
} |
static void * |
pb_slab_buffer_map(struct pb_buffer *_buf, |
unsigned flags, |
void *flush_ctx) |
{ |
struct pb_slab_buffer *buf = pb_slab_buffer(_buf); |
/* XXX: it will be necessary to remap here to propagate flush_ctx */ |
++buf->mapCount; |
return (void *) ((uint8_t *) buf->slab->virtual + buf->start); |
} |
static void |
pb_slab_buffer_unmap(struct pb_buffer *_buf) |
{ |
struct pb_slab_buffer *buf = pb_slab_buffer(_buf); |
--buf->mapCount; |
if (buf->mapCount == 0) |
pipe_condvar_broadcast(buf->event); |
} |
static enum pipe_error |
pb_slab_buffer_validate(struct pb_buffer *_buf, |
struct pb_validate *vl, |
unsigned flags) |
{ |
struct pb_slab_buffer *buf = pb_slab_buffer(_buf); |
return pb_validate(buf->slab->bo, vl, flags); |
} |
static void |
pb_slab_buffer_fence(struct pb_buffer *_buf, |
struct pipe_fence_handle *fence) |
{ |
struct pb_slab_buffer *buf = pb_slab_buffer(_buf); |
pb_fence(buf->slab->bo, fence); |
} |
static void |
pb_slab_buffer_get_base_buffer(struct pb_buffer *_buf, |
struct pb_buffer **base_buf, |
pb_size *offset) |
{ |
struct pb_slab_buffer *buf = pb_slab_buffer(_buf); |
pb_get_base_buffer(buf->slab->bo, base_buf, offset); |
*offset += buf->start; |
} |
static const struct pb_vtbl |
pb_slab_buffer_vtbl = { |
pb_slab_buffer_destroy, |
pb_slab_buffer_map, |
pb_slab_buffer_unmap, |
pb_slab_buffer_validate, |
pb_slab_buffer_fence, |
pb_slab_buffer_get_base_buffer |
}; |
/** |
* Create a new slab. |
* |
* Called when we ran out of free slabs. |
*/ |
static enum pipe_error |
pb_slab_create(struct pb_slab_manager *mgr) |
{ |
struct pb_slab *slab; |
struct pb_slab_buffer *buf; |
unsigned numBuffers; |
unsigned i; |
enum pipe_error ret; |
slab = CALLOC_STRUCT(pb_slab); |
if (!slab) |
return PIPE_ERROR_OUT_OF_MEMORY; |
slab->bo = mgr->provider->create_buffer(mgr->provider, mgr->slabSize, &mgr->desc); |
if(!slab->bo) { |
ret = PIPE_ERROR_OUT_OF_MEMORY; |
goto out_err0; |
} |
/* Note down the slab virtual address. All mappings are accessed directly |
* through this address so it is required that the buffer is pinned. */ |
slab->virtual = pb_map(slab->bo, |
PB_USAGE_CPU_READ | |
PB_USAGE_CPU_WRITE, NULL); |
if(!slab->virtual) { |
ret = PIPE_ERROR_OUT_OF_MEMORY; |
goto out_err1; |
} |
pb_unmap(slab->bo); |
numBuffers = slab->bo->size / mgr->bufSize; |
slab->buffers = CALLOC(numBuffers, sizeof(*slab->buffers)); |
if (!slab->buffers) { |
ret = PIPE_ERROR_OUT_OF_MEMORY; |
goto out_err1; |
} |
LIST_INITHEAD(&slab->head); |
LIST_INITHEAD(&slab->freeBuffers); |
slab->numBuffers = numBuffers; |
slab->numFree = 0; |
slab->mgr = mgr; |
buf = slab->buffers; |
for (i=0; i < numBuffers; ++i) { |
pipe_reference_init(&buf->base.reference, 0); |
buf->base.size = mgr->bufSize; |
buf->base.alignment = 0; |
buf->base.usage = 0; |
buf->base.vtbl = &pb_slab_buffer_vtbl; |
buf->slab = slab; |
buf->start = i* mgr->bufSize; |
buf->mapCount = 0; |
pipe_condvar_init(buf->event); |
LIST_ADDTAIL(&buf->head, &slab->freeBuffers); |
slab->numFree++; |
buf++; |
} |
/* Add this slab to the list of partial slabs */ |
LIST_ADDTAIL(&slab->head, &mgr->slabs); |
return PIPE_OK; |
out_err1: |
pb_reference(&slab->bo, NULL); |
out_err0: |
FREE(slab); |
return ret; |
} |
static struct pb_buffer * |
pb_slab_manager_create_buffer(struct pb_manager *_mgr, |
pb_size size, |
const struct pb_desc *desc) |
{ |
struct pb_slab_manager *mgr = pb_slab_manager(_mgr); |
static struct pb_slab_buffer *buf; |
struct pb_slab *slab; |
struct list_head *list; |
/* check size */ |
assert(size <= mgr->bufSize); |
if(size > mgr->bufSize) |
return NULL; |
/* check if we can provide the requested alignment */ |
assert(pb_check_alignment(desc->alignment, mgr->desc.alignment)); |
if(!pb_check_alignment(desc->alignment, mgr->desc.alignment)) |
return NULL; |
assert(pb_check_alignment(desc->alignment, mgr->bufSize)); |
if(!pb_check_alignment(desc->alignment, mgr->bufSize)) |
return NULL; |
assert(pb_check_usage(desc->usage, mgr->desc.usage)); |
if(!pb_check_usage(desc->usage, mgr->desc.usage)) |
return NULL; |
pipe_mutex_lock(mgr->mutex); |
/* Create a new slab, if we run out of partial slabs */ |
if (mgr->slabs.next == &mgr->slabs) { |
(void) pb_slab_create(mgr); |
if (mgr->slabs.next == &mgr->slabs) { |
pipe_mutex_unlock(mgr->mutex); |
return NULL; |
} |
} |
/* Allocate the buffer from a partial (or just created) slab */ |
list = mgr->slabs.next; |
slab = LIST_ENTRY(struct pb_slab, list, head); |
/* If totally full remove from the partial slab list */ |
if (--slab->numFree == 0) |
LIST_DELINIT(list); |
list = slab->freeBuffers.next; |
LIST_DELINIT(list); |
pipe_mutex_unlock(mgr->mutex); |
buf = LIST_ENTRY(struct pb_slab_buffer, list, head); |
pipe_reference_init(&buf->base.reference, 1); |
buf->base.alignment = desc->alignment; |
buf->base.usage = desc->usage; |
return &buf->base; |
} |
static void |
pb_slab_manager_flush(struct pb_manager *_mgr) |
{ |
struct pb_slab_manager *mgr = pb_slab_manager(_mgr); |
assert(mgr->provider->flush); |
if(mgr->provider->flush) |
mgr->provider->flush(mgr->provider); |
} |
static void |
pb_slab_manager_destroy(struct pb_manager *_mgr) |
{ |
struct pb_slab_manager *mgr = pb_slab_manager(_mgr); |
/* TODO: cleanup all allocated buffers */ |
FREE(mgr); |
} |
struct pb_manager * |
pb_slab_manager_create(struct pb_manager *provider, |
pb_size bufSize, |
pb_size slabSize, |
const struct pb_desc *desc) |
{ |
struct pb_slab_manager *mgr; |
mgr = CALLOC_STRUCT(pb_slab_manager); |
if (!mgr) |
return NULL; |
mgr->base.destroy = pb_slab_manager_destroy; |
mgr->base.create_buffer = pb_slab_manager_create_buffer; |
mgr->base.flush = pb_slab_manager_flush; |
mgr->provider = provider; |
mgr->bufSize = bufSize; |
mgr->slabSize = slabSize; |
mgr->desc = *desc; |
LIST_INITHEAD(&mgr->slabs); |
pipe_mutex_init(mgr->mutex); |
return &mgr->base; |
} |
static struct pb_buffer * |
pb_slab_range_manager_create_buffer(struct pb_manager *_mgr, |
pb_size size, |
const struct pb_desc *desc) |
{ |
struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr); |
pb_size bufSize; |
pb_size reqSize = size; |
unsigned i; |
if(desc->alignment > reqSize) |
reqSize = desc->alignment; |
bufSize = mgr->minBufSize; |
for (i = 0; i < mgr->numBuckets; ++i) { |
if(bufSize >= reqSize) |
return mgr->buckets[i]->create_buffer(mgr->buckets[i], size, desc); |
bufSize *= 2; |
} |
/* Fall back to allocate a buffer object directly from the provider. */ |
return mgr->provider->create_buffer(mgr->provider, size, desc); |
} |
static void |
pb_slab_range_manager_flush(struct pb_manager *_mgr) |
{ |
struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr); |
/* Individual slabs don't hold any temporary buffers so no need to call them */ |
assert(mgr->provider->flush); |
if(mgr->provider->flush) |
mgr->provider->flush(mgr->provider); |
} |
static void |
pb_slab_range_manager_destroy(struct pb_manager *_mgr) |
{ |
struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr); |
unsigned i; |
for (i = 0; i < mgr->numBuckets; ++i) |
mgr->buckets[i]->destroy(mgr->buckets[i]); |
FREE(mgr->buckets); |
FREE(mgr->bucketSizes); |
FREE(mgr); |
} |
struct pb_manager * |
pb_slab_range_manager_create(struct pb_manager *provider, |
pb_size minBufSize, |
pb_size maxBufSize, |
pb_size slabSize, |
const struct pb_desc *desc) |
{ |
struct pb_slab_range_manager *mgr; |
pb_size bufSize; |
unsigned i; |
if(!provider) |
return NULL; |
mgr = CALLOC_STRUCT(pb_slab_range_manager); |
if (!mgr) |
goto out_err0; |
mgr->base.destroy = pb_slab_range_manager_destroy; |
mgr->base.create_buffer = pb_slab_range_manager_create_buffer; |
mgr->base.flush = pb_slab_range_manager_flush; |
mgr->provider = provider; |
mgr->minBufSize = minBufSize; |
mgr->maxBufSize = maxBufSize; |
mgr->numBuckets = 1; |
bufSize = minBufSize; |
while(bufSize < maxBufSize) { |
bufSize *= 2; |
++mgr->numBuckets; |
} |
mgr->buckets = CALLOC(mgr->numBuckets, sizeof(*mgr->buckets)); |
if (!mgr->buckets) |
goto out_err1; |
bufSize = minBufSize; |
for (i = 0; i < mgr->numBuckets; ++i) { |
mgr->buckets[i] = pb_slab_manager_create(provider, bufSize, slabSize, desc); |
if(!mgr->buckets[i]) |
goto out_err2; |
bufSize *= 2; |
} |
return &mgr->base; |
out_err2: |
for (i = 0; i < mgr->numBuckets; ++i) |
if(mgr->buckets[i]) |
mgr->buckets[i]->destroy(mgr->buckets[i]); |
FREE(mgr->buckets); |
out_err1: |
FREE(mgr); |
out_err0: |
return NULL; |
} |
/drivers/video/Gallium/auxiliary/pipebuffer/pb_validate.c |
---|
0,0 → 1,192 |
/************************************************************************** |
* |
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/** |
* @file |
* Buffer validation. |
* |
* @author Jose Fonseca <jrfonseca@tungstengraphics.com> |
*/ |
#include "pipe/p_compiler.h" |
#include "pipe/p_defines.h" |
#include "util/u_memory.h" |
#include "util/u_debug.h" |
#include "pb_buffer.h" |
#include "pb_validate.h" |
#define PB_VALIDATE_INITIAL_SIZE 1 /* 512 */ |
struct pb_validate_entry |
{ |
struct pb_buffer *buf; |
unsigned flags; |
}; |
struct pb_validate |
{ |
struct pb_validate_entry *entries; |
unsigned used; |
unsigned size; |
}; |
enum pipe_error |
pb_validate_add_buffer(struct pb_validate *vl, |
struct pb_buffer *buf, |
unsigned flags) |
{ |
assert(buf); |
if(!buf) |
return PIPE_ERROR; |
assert(flags & PB_USAGE_GPU_READ_WRITE); |
assert(!(flags & ~PB_USAGE_GPU_READ_WRITE)); |
flags &= PB_USAGE_GPU_READ_WRITE; |
/* We only need to store one reference for each buffer, so avoid storing |
* consecutive references for the same buffer. It might not be the most |
* common pattern, but it is easy to implement. |
*/ |
if(vl->used && vl->entries[vl->used - 1].buf == buf) { |
vl->entries[vl->used - 1].flags |= flags; |
return PIPE_OK; |
} |
/* Grow the table */ |
if(vl->used == vl->size) { |
unsigned new_size; |
struct pb_validate_entry *new_entries; |
new_size = vl->size * 2; |
if(!new_size) |
return PIPE_ERROR_OUT_OF_MEMORY; |
new_entries = (struct pb_validate_entry *)REALLOC(vl->entries, |
vl->size*sizeof(struct pb_validate_entry), |
new_size*sizeof(struct pb_validate_entry)); |
if(!new_entries) |
return PIPE_ERROR_OUT_OF_MEMORY; |
memset(new_entries + vl->size, 0, (new_size - vl->size)*sizeof(struct pb_validate_entry)); |
vl->size = new_size; |
vl->entries = new_entries; |
} |
assert(!vl->entries[vl->used].buf); |
pb_reference(&vl->entries[vl->used].buf, buf); |
vl->entries[vl->used].flags = flags; |
++vl->used; |
return PIPE_OK; |
} |
enum pipe_error |
pb_validate_foreach(struct pb_validate *vl, |
enum pipe_error (*callback)(struct pb_buffer *buf, void *data), |
void *data) |
{ |
unsigned i; |
for(i = 0; i < vl->used; ++i) { |
enum pipe_error ret; |
ret = callback(vl->entries[i].buf, data); |
if(ret != PIPE_OK) |
return ret; |
} |
return PIPE_OK; |
} |
enum pipe_error |
pb_validate_validate(struct pb_validate *vl) |
{ |
unsigned i; |
for(i = 0; i < vl->used; ++i) { |
enum pipe_error ret; |
ret = pb_validate(vl->entries[i].buf, vl, vl->entries[i].flags); |
if(ret != PIPE_OK) { |
while(i--) |
pb_validate(vl->entries[i].buf, NULL, 0); |
return ret; |
} |
} |
return PIPE_OK; |
} |
void |
pb_validate_fence(struct pb_validate *vl, |
struct pipe_fence_handle *fence) |
{ |
unsigned i; |
for(i = 0; i < vl->used; ++i) { |
pb_fence(vl->entries[i].buf, fence); |
pb_reference(&vl->entries[i].buf, NULL); |
} |
vl->used = 0; |
} |
void |
pb_validate_destroy(struct pb_validate *vl) |
{ |
unsigned i; |
for(i = 0; i < vl->used; ++i) |
pb_reference(&vl->entries[i].buf, NULL); |
FREE(vl->entries); |
FREE(vl); |
} |
struct pb_validate * |
pb_validate_create() |
{ |
struct pb_validate *vl; |
vl = CALLOC_STRUCT(pb_validate); |
if(!vl) |
return NULL; |
vl->size = PB_VALIDATE_INITIAL_SIZE; |
vl->entries = (struct pb_validate_entry *)CALLOC(vl->size, sizeof(struct pb_validate_entry)); |
if(!vl->entries) { |
FREE(vl); |
return NULL; |
} |
return vl; |
} |
/drivers/video/Gallium/auxiliary/pipebuffer/pb_validate.h |
---|
0,0 → 1,97 |
/************************************************************************** |
* |
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/** |
* @file |
* Buffer validation. |
* |
* @author Jose Fonseca <jrfonseca@tungstengraphics.com> |
*/ |
#ifndef PB_VALIDATE_H_ |
#define PB_VALIDATE_H_ |
#include "pipe/p_compiler.h" |
#include "pipe/p_defines.h" |
#ifdef __cplusplus |
extern "C" { |
#endif |
struct pb_buffer; |
struct pipe_fence_handle; |
/** |
* Buffer validation list. |
* |
* It holds a list of buffers to be validated and fenced when flushing. |
*/ |
struct pb_validate; |
enum pipe_error |
pb_validate_add_buffer(struct pb_validate *vl, |
struct pb_buffer *buf, |
unsigned flags); |
enum pipe_error |
pb_validate_foreach(struct pb_validate *vl, |
enum pipe_error (*callback)(struct pb_buffer *buf, void *data), |
void *data); |
/** |
* Validate all buffers for hardware access. |
* |
* Should be called right before issuing commands to the hardware. |
*/ |
enum pipe_error |
pb_validate_validate(struct pb_validate *vl); |
/** |
* Fence all buffers and clear the list. |
* |
* Should be called right after issuing commands to the hardware. |
*/ |
void |
pb_validate_fence(struct pb_validate *vl, |
struct pipe_fence_handle *fence); |
struct pb_validate * |
pb_validate_create(void); |
void |
pb_validate_destroy(struct pb_validate *vl); |
#ifdef __cplusplus |
} |
#endif |
#endif /*PB_VALIDATE_H_*/ |
/drivers/video/Gallium/auxiliary/rtasm/rtasm_cpu.c |
---|
0,0 → 1,67 |
/************************************************************************** |
* |
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
#include "pipe/p_config.h" |
#include "rtasm_cpu.h" |
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) |
#include "util/u_debug.h" |
#include "util/u_cpu_detect.h" |
DEBUG_GET_ONCE_BOOL_OPTION(nosse, "GALLIUM_NOSSE", FALSE); |
static struct util_cpu_caps *get_cpu_caps(void) |
{ |
util_cpu_detect(); |
return &util_cpu_caps; |
} |
int rtasm_cpu_has_sse(void) |
{ |
return !debug_get_option_nosse() && get_cpu_caps()->has_sse; |
} |
int rtasm_cpu_has_sse2(void) |
{ |
return !debug_get_option_nosse() && get_cpu_caps()->has_sse2; |
} |
#else |
int rtasm_cpu_has_sse(void) |
{ |
return 0; |
} |
int rtasm_cpu_has_sse2(void) |
{ |
return 0; |
} |
#endif |
/drivers/video/Gallium/auxiliary/rtasm/rtasm_cpu.h |
---|
0,0 → 1,42 |
/************************************************************************** |
* |
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/** |
* @file |
* Runtime detection of CPU capabilities. |
*/ |
#ifndef _RTASM_CPU_H_ |
#define _RTASM_CPU_H_ |
int rtasm_cpu_has_sse(void); |
int rtasm_cpu_has_sse2(void); |
#endif /* _RTASM_CPU_H_ */ |
/drivers/video/Gallium/auxiliary/rtasm/rtasm_execmem.c |
---|
0,0 → 1,102 |
/************************************************************************** |
* |
* Copyright (C) 1999-2005 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/** |
* \file exemem.c |
* Functions for allocating executable memory. |
* |
* \author Keith Whitwell |
*/ |
#include "pipe/p_compiler.h" |
#include "util/u_debug.h" |
#include "os/os_thread.h" |
#include "util/u_memory.h" |
#include "rtasm_execmem.h" |
#include "util/u_mm.h" |
#define EXEC_HEAP_SIZE (4*1024*1024) |
pipe_static_mutex(exec_mutex); |
static struct mem_block *exec_heap = NULL; |
static unsigned char *exec_mem = NULL; |
static void |
init_heap(void) |
{ |
if (!exec_heap) |
exec_heap = u_mmInit( 0, EXEC_HEAP_SIZE ); |
if (!exec_mem) |
exec_mem = (unsigned char *) user_alloc(EXEC_HEAP_SIZE); |
} |
void * |
rtasm_exec_malloc(size_t size) |
{ |
struct mem_block *block = NULL; |
void *addr = NULL; |
pipe_mutex_lock(exec_mutex); |
init_heap(); |
if (exec_heap) { |
size = (size + 31) & ~31; /* next multiple of 32 bytes */ |
block = u_mmAllocMem( exec_heap, size, 5, 0 ); /* 5 -> 32-byte alignment */ |
} |
if (block) |
addr = exec_mem + block->ofs; |
else |
debug_printf("rtasm_exec_malloc failed\n"); |
pipe_mutex_unlock(exec_mutex); |
return addr; |
} |
void |
rtasm_exec_free(void *addr) |
{ |
pipe_mutex_lock(exec_mutex); |
if (exec_heap) { |
struct mem_block *block = u_mmFindBlock(exec_heap, (unsigned char *)addr - exec_mem); |
if (block) |
u_mmFreeMem(block); |
} |
pipe_mutex_unlock(exec_mutex); |
} |
/drivers/video/Gallium/auxiliary/rtasm/rtasm_execmem.h |
---|
0,0 → 1,46 |
/************************************************************************** |
* |
* Copyright (C) 1999-2005 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/** |
* \file exemem.c |
* Functions for allocating executable memory. |
* |
* \author Keith Whitwell |
*/ |
#ifndef _RTASM_EXECMEM_H_ |
#define _RTASM_EXECMEM_H_ |
#include "pipe/p_compiler.h" |
extern void * |
rtasm_exec_malloc( size_t size ); |
extern void |
rtasm_exec_free( void *addr ); |
#endif |
/drivers/video/Gallium/auxiliary/rtasm/rtasm_x86sse.c |
---|
0,0 → 1,2232 |
/************************************************************************** |
* |
* Copyright (C) 1999-2005 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
#include "pipe/p_config.h" |
#include "util/u_cpu_detect.h" |
#if defined(PIPE_ARCH_X86) || (defined(PIPE_ARCH_X86_64) && !defined(__MINGW32__)) |
#include "pipe/p_compiler.h" |
#include "util/u_debug.h" |
#include "util/u_pointer.h" |
#include "rtasm_execmem.h" |
#include "rtasm_x86sse.h" |
#define DISASSEM 0 |
#define X86_TWOB 0x0f |
#define DUMP_SSE 0 |
void x86_print_reg( struct x86_reg reg ) |
{ |
if (reg.mod != mod_REG) |
debug_printf( "[" ); |
switch( reg.file ) { |
case file_REG32: |
switch( reg.idx ) { |
case reg_AX: debug_printf( "EAX" ); break; |
case reg_CX: debug_printf( "ECX" ); break; |
case reg_DX: debug_printf( "EDX" ); break; |
case reg_BX: debug_printf( "EBX" ); break; |
case reg_SP: debug_printf( "ESP" ); break; |
case reg_BP: debug_printf( "EBP" ); break; |
case reg_SI: debug_printf( "ESI" ); break; |
case reg_DI: debug_printf( "EDI" ); break; |
} |
break; |
case file_MMX: |
debug_printf( "MMX%u", reg.idx ); |
break; |
case file_XMM: |
debug_printf( "XMM%u", reg.idx ); |
break; |
case file_x87: |
debug_printf( "fp%u", reg.idx ); |
break; |
} |
if (reg.mod == mod_DISP8 || |
reg.mod == mod_DISP32) |
debug_printf("+%d", reg.disp); |
if (reg.mod != mod_REG) |
debug_printf( "]" ); |
} |
#if DUMP_SSE |
#define DUMP_START() debug_printf( "\n" ) |
#define DUMP_END() debug_printf( "\n" ) |
#define DUMP() do { \ |
const char *foo = __FUNCTION__; \ |
while (*foo && *foo != '_') \ |
foo++; \ |
if (*foo) \ |
foo++; \ |
debug_printf( "\n%4x %14s ", p->csr - p->store, foo ); \ |
} while (0) |
#define DUMP_I( I ) do { \ |
DUMP(); \ |
debug_printf( "%u", I ); \ |
} while( 0 ) |
#define DUMP_R( R0 ) do { \ |
DUMP(); \ |
x86_print_reg( R0 ); \ |
} while( 0 ) |
#define DUMP_RR( R0, R1 ) do { \ |
DUMP(); \ |
x86_print_reg( R0 ); \ |
debug_printf( ", " ); \ |
x86_print_reg( R1 ); \ |
} while( 0 ) |
#define DUMP_RI( R0, I ) do { \ |
DUMP(); \ |
x86_print_reg( R0 ); \ |
debug_printf( ", %u", I ); \ |
} while( 0 ) |
#define DUMP_RRI( R0, R1, I ) do { \ |
DUMP(); \ |
x86_print_reg( R0 ); \ |
debug_printf( ", " ); \ |
x86_print_reg( R1 ); \ |
debug_printf( ", %u", I ); \ |
} while( 0 ) |
#else |
#define DUMP_START() |
#define DUMP_END() |
#define DUMP( ) |
#define DUMP_I( I ) |
#define DUMP_R( R0 ) |
#define DUMP_RR( R0, R1 ) |
#define DUMP_RI( R0, I ) |
#define DUMP_RRI( R0, R1, I ) |
#endif |
static void do_realloc( struct x86_function *p ) |
{ |
if (p->store == p->error_overflow) { |
p->csr = p->store; |
} |
else if (p->size == 0) { |
p->size = 1024; |
p->store = rtasm_exec_malloc(p->size); |
p->csr = p->store; |
} |
else { |
uintptr_t used = pointer_to_uintptr( p->csr ) - pointer_to_uintptr( p->store ); |
unsigned char *tmp = p->store; |
p->size *= 2; |
p->store = rtasm_exec_malloc(p->size); |
if (p->store) { |
memcpy(p->store, tmp, used); |
p->csr = p->store + used; |
} |
else { |
p->csr = p->store; |
} |
rtasm_exec_free(tmp); |
} |
if (p->store == NULL) { |
p->store = p->csr = p->error_overflow; |
p->size = sizeof(p->error_overflow); |
} |
} |
/* Emit bytes to the instruction stream: |
*/ |
static unsigned char *reserve( struct x86_function *p, int bytes ) |
{ |
if (p->csr + bytes - p->store > (int) p->size) |
do_realloc(p); |
{ |
unsigned char *csr = p->csr; |
p->csr += bytes; |
return csr; |
} |
} |
static void emit_1b( struct x86_function *p, char b0 ) |
{ |
char *csr = (char *)reserve(p, 1); |
*csr = b0; |
} |
static void emit_1i( struct x86_function *p, int i0 ) |
{ |
int *icsr = (int *)reserve(p, sizeof(i0)); |
*icsr = i0; |
} |
static void emit_1ub( struct x86_function *p, unsigned char b0 ) |
{ |
unsigned char *csr = reserve(p, 1); |
*csr++ = b0; |
} |
static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 ) |
{ |
unsigned char *csr = reserve(p, 2); |
*csr++ = b0; |
*csr++ = b1; |
} |
static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 ) |
{ |
unsigned char *csr = reserve(p, 3); |
*csr++ = b0; |
*csr++ = b1; |
*csr++ = b2; |
} |
/* Build a modRM byte + possible displacement. No treatment of SIB |
* indexing. BZZT - no way to encode an absolute address. |
* |
* This is the "/r" field in the x86 manuals... |
*/ |
static void emit_modrm( struct x86_function *p, |
struct x86_reg reg, |
struct x86_reg regmem ) |
{ |
unsigned char val = 0; |
assert(reg.mod == mod_REG); |
/* TODO: support extended x86-64 registers */ |
assert(reg.idx < 8); |
assert(regmem.idx < 8); |
val |= regmem.mod << 6; /* mod field */ |
val |= reg.idx << 3; /* reg field */ |
val |= regmem.idx; /* r/m field */ |
emit_1ub(p, val); |
/* Oh-oh we've stumbled into the SIB thing. |
*/ |
if (regmem.file == file_REG32 && |
regmem.idx == reg_SP && |
regmem.mod != mod_REG) { |
emit_1ub(p, 0x24); /* simplistic! */ |
} |
switch (regmem.mod) { |
case mod_REG: |
case mod_INDIRECT: |
break; |
case mod_DISP8: |
emit_1b(p, (char) regmem.disp); |
break; |
case mod_DISP32: |
emit_1i(p, regmem.disp); |
break; |
default: |
assert(0); |
break; |
} |
} |
/* Emits the "/0".."/7" specialized versions of the modrm ("/r") bytes. |
*/ |
static void emit_modrm_noreg( struct x86_function *p, |
unsigned op, |
struct x86_reg regmem ) |
{ |
struct x86_reg dummy = x86_make_reg(file_REG32, op); |
emit_modrm(p, dummy, regmem); |
} |
/* Many x86 instructions have two opcodes to cope with the situations |
* where the destination is a register or memory reference |
* respectively. This function selects the correct opcode based on |
* the arguments presented. |
*/ |
static void emit_op_modrm( struct x86_function *p, |
unsigned char op_dst_is_reg, |
unsigned char op_dst_is_mem, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
switch (dst.mod) { |
case mod_REG: |
emit_1ub(p, op_dst_is_reg); |
emit_modrm(p, dst, src); |
break; |
case mod_INDIRECT: |
case mod_DISP32: |
case mod_DISP8: |
assert(src.mod == mod_REG); |
emit_1ub(p, op_dst_is_mem); |
emit_modrm(p, src, dst); |
break; |
default: |
assert(0); |
break; |
} |
} |
/* Create and manipulate registers and regmem values: |
*/ |
struct x86_reg x86_make_reg( enum x86_reg_file file, |
enum x86_reg_name idx ) |
{ |
struct x86_reg reg; |
reg.file = file; |
reg.idx = idx; |
reg.mod = mod_REG; |
reg.disp = 0; |
return reg; |
} |
struct x86_reg x86_make_disp( struct x86_reg reg, |
int disp ) |
{ |
assert(reg.file == file_REG32); |
if (reg.mod == mod_REG) |
reg.disp = disp; |
else |
reg.disp += disp; |
if (reg.disp == 0 && reg.idx != reg_BP) |
reg.mod = mod_INDIRECT; |
else if (reg.disp <= 127 && reg.disp >= -128) |
reg.mod = mod_DISP8; |
else |
reg.mod = mod_DISP32; |
return reg; |
} |
struct x86_reg x86_deref( struct x86_reg reg ) |
{ |
return x86_make_disp(reg, 0); |
} |
struct x86_reg x86_get_base_reg( struct x86_reg reg ) |
{ |
return x86_make_reg( reg.file, reg.idx ); |
} |
int x86_get_label( struct x86_function *p ) |
{ |
return p->csr - p->store; |
} |
/*********************************************************************** |
* x86 instructions |
*/ |
void x64_rexw(struct x86_function *p) |
{ |
if(x86_target(p) != X86_32) |
emit_1ub(p, 0x48); |
} |
void x86_jcc( struct x86_function *p, |
enum x86_cc cc, |
int label ) |
{ |
int offset = label - (x86_get_label(p) + 2); |
DUMP_I(cc); |
if (offset < 0) { |
/*assert(p->csr - p->store > -offset);*/ |
if (p->csr - p->store <= -offset) { |
/* probably out of memory (using the error_overflow buffer) */ |
return; |
} |
} |
if (offset <= 127 && offset >= -128) { |
emit_1ub(p, 0x70 + cc); |
emit_1b(p, (char) offset); |
} |
else { |
offset = label - (x86_get_label(p) + 6); |
emit_2ub(p, 0x0f, 0x80 + cc); |
emit_1i(p, offset); |
} |
} |
/* Always use a 32bit offset for forward jumps: |
*/ |
int x86_jcc_forward( struct x86_function *p, |
enum x86_cc cc ) |
{ |
DUMP_I(cc); |
emit_2ub(p, 0x0f, 0x80 + cc); |
emit_1i(p, 0); |
return x86_get_label(p); |
} |
int x86_jmp_forward( struct x86_function *p) |
{ |
DUMP(); |
emit_1ub(p, 0xe9); |
emit_1i(p, 0); |
return x86_get_label(p); |
} |
int x86_call_forward( struct x86_function *p) |
{ |
DUMP(); |
emit_1ub(p, 0xe8); |
emit_1i(p, 0); |
return x86_get_label(p); |
} |
/* Fixup offset from forward jump: |
*/ |
void x86_fixup_fwd_jump( struct x86_function *p, |
int fixup ) |
{ |
*(int *)(p->store + fixup - 4) = x86_get_label(p) - fixup; |
} |
void x86_jmp( struct x86_function *p, int label) |
{ |
DUMP_I( label ); |
emit_1ub(p, 0xe9); |
emit_1i(p, label - x86_get_label(p) - 4); |
} |
void x86_call( struct x86_function *p, struct x86_reg reg) |
{ |
DUMP_R( reg ); |
emit_1ub(p, 0xff); |
emit_modrm_noreg(p, 2, reg); |
} |
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) |
{ |
DUMP_RI( dst, imm ); |
assert(dst.file == file_REG32); |
assert(dst.mod == mod_REG); |
emit_1ub(p, 0xb8 + dst.idx); |
emit_1i(p, imm); |
} |
void x86_mov_imm( struct x86_function *p, struct x86_reg dst, int imm ) |
{ |
DUMP_RI( dst, imm ); |
if(dst.mod == mod_REG) |
x86_mov_reg_imm(p, dst, imm); |
else |
{ |
emit_1ub(p, 0xc7); |
emit_modrm_noreg(p, 0, dst); |
emit_1i(p, imm); |
} |
} |
void x86_mov16_imm( struct x86_function *p, struct x86_reg dst, uint16_t imm ) |
{ |
DUMP_RI( dst, imm ); |
emit_1ub(p, 0x66); |
if(dst.mod == mod_REG) |
{ |
emit_1ub(p, 0xb8 + dst.idx); |
emit_2ub(p, imm & 0xff, imm >> 8); |
} |
else |
{ |
emit_1ub(p, 0xc7); |
emit_modrm_noreg(p, 0, dst); |
emit_2ub(p, imm & 0xff, imm >> 8); |
} |
} |
void x86_mov8_imm( struct x86_function *p, struct x86_reg dst, uint8_t imm ) |
{ |
DUMP_RI( dst, imm ); |
if(dst.mod == mod_REG) |
{ |
emit_1ub(p, 0xb0 + dst.idx); |
emit_1ub(p, imm); |
} |
else |
{ |
emit_1ub(p, 0xc6); |
emit_modrm_noreg(p, 0, dst); |
emit_1ub(p, imm); |
} |
} |
/** |
* Immediate group 1 instructions. |
*/ |
static INLINE void |
x86_group1_imm( struct x86_function *p, |
unsigned op, struct x86_reg dst, int imm ) |
{ |
assert(dst.file == file_REG32); |
assert(dst.mod == mod_REG); |
if(-0x80 <= imm && imm < 0x80) { |
emit_1ub(p, 0x83); |
emit_modrm_noreg(p, op, dst); |
emit_1b(p, (char)imm); |
} |
else { |
emit_1ub(p, 0x81); |
emit_modrm_noreg(p, op, dst); |
emit_1i(p, imm); |
} |
} |
void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ) |
{ |
DUMP_RI( dst, imm ); |
x86_group1_imm(p, 0, dst, imm); |
} |
void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ) |
{ |
DUMP_RI( dst, imm ); |
x86_group1_imm(p, 1, dst, imm); |
} |
void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ) |
{ |
DUMP_RI( dst, imm ); |
x86_group1_imm(p, 4, dst, imm); |
} |
void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ) |
{ |
DUMP_RI( dst, imm ); |
x86_group1_imm(p, 5, dst, imm); |
} |
void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ) |
{ |
DUMP_RI( dst, imm ); |
x86_group1_imm(p, 6, dst, imm); |
} |
void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ) |
{ |
DUMP_RI( dst, imm ); |
x86_group1_imm(p, 7, dst, imm); |
} |
void x86_push( struct x86_function *p, |
struct x86_reg reg ) |
{ |
DUMP_R( reg ); |
if (reg.mod == mod_REG) |
emit_1ub(p, 0x50 + reg.idx); |
else |
{ |
emit_1ub(p, 0xff); |
emit_modrm_noreg(p, 6, reg); |
} |
p->stack_offset += sizeof(void*); |
} |
void x86_push_imm32( struct x86_function *p, |
int imm32 ) |
{ |
DUMP_I( imm32 ); |
emit_1ub(p, 0x68); |
emit_1i(p, imm32); |
p->stack_offset += sizeof(void*); |
} |
void x86_pop( struct x86_function *p, |
struct x86_reg reg ) |
{ |
DUMP_R( reg ); |
assert(reg.mod == mod_REG); |
emit_1ub(p, 0x58 + reg.idx); |
p->stack_offset -= sizeof(void*); |
} |
void x86_inc( struct x86_function *p, |
struct x86_reg reg ) |
{ |
DUMP_R( reg ); |
if(x86_target(p) == X86_32 && reg.mod == mod_REG) |
{ |
emit_1ub(p, 0x40 + reg.idx); |
return; |
} |
emit_1ub(p, 0xff); |
emit_modrm_noreg(p, 0, reg); |
} |
void x86_dec( struct x86_function *p, |
struct x86_reg reg ) |
{ |
DUMP_R( reg ); |
if(x86_target(p) == X86_32 && reg.mod == mod_REG) |
{ |
emit_1ub(p, 0x48 + reg.idx); |
return; |
} |
emit_1ub(p, 0xff); |
emit_modrm_noreg(p, 1, reg); |
} |
void x86_ret( struct x86_function *p ) |
{ |
DUMP(); |
assert(p->stack_offset == 0); |
emit_1ub(p, 0xc3); |
} |
void x86_retw( struct x86_function *p, unsigned short imm ) |
{ |
DUMP(); |
emit_3ub(p, 0xc2, imm & 0xff, (imm >> 8) & 0xff); |
} |
void x86_sahf( struct x86_function *p ) |
{ |
DUMP(); |
emit_1ub(p, 0x9e); |
} |
void x86_mov( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
/* special hack for reading arguments until we support x86-64 registers everywhere */ |
if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8)) |
{ |
uint8_t rex = 0x40; |
if(dst.idx >= 8) |
{ |
rex |= 4; |
dst.idx -= 8; |
} |
if(src.idx >= 8) |
{ |
rex |= 1; |
src.idx -= 8; |
} |
emit_1ub(p, rex); |
} |
emit_op_modrm( p, 0x8b, 0x89, dst, src ); |
} |
void x86_mov16( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_1ub(p, 0x66); |
emit_op_modrm( p, 0x8b, 0x89, dst, src ); |
} |
void x86_mov8( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_op_modrm( p, 0x8a, 0x88, dst, src ); |
} |
void x64_mov64( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
uint8_t rex = 0x48; |
DUMP_RR( dst, src ); |
assert(x86_target(p) != X86_32); |
/* special hack for reading arguments until we support x86-64 registers everywhere */ |
if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8)) |
{ |
if(dst.idx >= 8) |
{ |
rex |= 4; |
dst.idx -= 8; |
} |
if(src.idx >= 8) |
{ |
rex |= 1; |
src.idx -= 8; |
} |
} |
emit_1ub(p, rex); |
emit_op_modrm( p, 0x8b, 0x89, dst, src ); |
} |
void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_2ub(p, 0x0f, 0xb6); |
emit_modrm(p, dst, src); |
} |
void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_2ub(p, 0x0f, 0xb7); |
emit_modrm(p, dst, src); |
} |
void x86_cmovcc( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src, |
enum x86_cc cc) |
{ |
DUMP_RRI( dst, src, cc ); |
emit_2ub( p, 0x0f, 0x40 + cc ); |
emit_modrm( p, dst, src ); |
} |
void x86_xor( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_op_modrm( p, 0x33, 0x31, dst, src ); |
} |
void x86_cmp( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_op_modrm( p, 0x3b, 0x39, dst, src ); |
} |
void x86_lea( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_1ub(p, 0x8d); |
emit_modrm( p, dst, src ); |
} |
void x86_test( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_1ub(p, 0x85); |
emit_modrm( p, dst, src ); |
} |
void x86_add( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_op_modrm(p, 0x03, 0x01, dst, src ); |
} |
/* Calculate EAX * src, results in EDX:EAX. |
*/ |
void x86_mul( struct x86_function *p, |
struct x86_reg src ) |
{ |
DUMP_R( src ); |
emit_1ub(p, 0xf7); |
emit_modrm_noreg(p, 4, src ); |
} |
void x86_imul( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_2ub(p, X86_TWOB, 0xAF); |
emit_modrm(p, dst, src); |
} |
void x86_sub( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_op_modrm(p, 0x2b, 0x29, dst, src ); |
} |
void x86_or( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_op_modrm( p, 0x0b, 0x09, dst, src ); |
} |
void x86_and( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_op_modrm( p, 0x23, 0x21, dst, src ); |
} |
void x86_div( struct x86_function *p, |
struct x86_reg src ) |
{ |
assert(src.file == file_REG32 && src.mod == mod_REG); |
emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src); |
} |
void x86_bswap( struct x86_function *p, struct x86_reg reg ) |
{ |
DUMP_R(reg); |
assert(reg.file == file_REG32); |
assert(reg.mod == mod_REG); |
emit_2ub(p, 0x0f, 0xc8 + reg.idx); |
} |
void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ) |
{ |
DUMP_RI(reg, imm); |
if(imm == 1) |
{ |
emit_1ub(p, 0xd1); |
emit_modrm_noreg(p, 5, reg); |
} |
else |
{ |
emit_1ub(p, 0xc1); |
emit_modrm_noreg(p, 5, reg); |
emit_1ub(p, imm); |
} |
} |
void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ) |
{ |
DUMP_RI(reg, imm); |
if(imm == 1) |
{ |
emit_1ub(p, 0xd1); |
emit_modrm_noreg(p, 7, reg); |
} |
else |
{ |
emit_1ub(p, 0xc1); |
emit_modrm_noreg(p, 7, reg); |
emit_1ub(p, imm); |
} |
} |
void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ) |
{ |
DUMP_RI(reg, imm); |
if(imm == 1) |
{ |
emit_1ub(p, 0xd1); |
emit_modrm_noreg(p, 4, reg); |
} |
else |
{ |
emit_1ub(p, 0xc1); |
emit_modrm_noreg(p, 4, reg); |
emit_1ub(p, imm); |
} |
} |
/*********************************************************************** |
* SSE instructions |
*/ |
void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr) |
{ |
DUMP_R( ptr ); |
assert(ptr.mod != mod_REG); |
emit_2ub(p, 0x0f, 0x18); |
emit_modrm_noreg(p, 0, ptr); |
} |
void sse_prefetch0( struct x86_function *p, struct x86_reg ptr) |
{ |
DUMP_R( ptr ); |
assert(ptr.mod != mod_REG); |
emit_2ub(p, 0x0f, 0x18); |
emit_modrm_noreg(p, 1, ptr); |
} |
void sse_prefetch1( struct x86_function *p, struct x86_reg ptr) |
{ |
DUMP_R( ptr ); |
assert(ptr.mod != mod_REG); |
emit_2ub(p, 0x0f, 0x18); |
emit_modrm_noreg(p, 2, ptr); |
} |
void sse_movntps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src) |
{ |
DUMP_RR( dst, src ); |
assert(dst.mod != mod_REG); |
assert(src.mod == mod_REG); |
emit_2ub(p, 0x0f, 0x2b); |
emit_modrm(p, src, dst); |
} |
void sse_movss( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_2ub(p, 0xF3, X86_TWOB); |
emit_op_modrm( p, 0x10, 0x11, dst, src ); |
} |
void sse_movaps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_1ub(p, X86_TWOB); |
emit_op_modrm( p, 0x28, 0x29, dst, src ); |
} |
void sse_movups( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_1ub(p, X86_TWOB); |
emit_op_modrm( p, 0x10, 0x11, dst, src ); |
} |
void sse_movhps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
assert(dst.mod != mod_REG || src.mod != mod_REG); |
emit_1ub(p, X86_TWOB); |
emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ |
} |
void sse_movlps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
assert(dst.mod != mod_REG || src.mod != mod_REG); |
emit_1ub(p, X86_TWOB); |
emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ |
} |
void sse_maxps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_2ub(p, X86_TWOB, 0x5F); |
emit_modrm( p, dst, src ); |
} |
void sse_maxss( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_3ub(p, 0xF3, X86_TWOB, 0x5F); |
emit_modrm( p, dst, src ); |
} |
void sse_divss( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_3ub(p, 0xF3, X86_TWOB, 0x5E); |
emit_modrm( p, dst, src ); |
} |
void sse_minps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_2ub(p, X86_TWOB, 0x5D); |
emit_modrm( p, dst, src ); |
} |
void sse_subps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_2ub(p, X86_TWOB, 0x5C); |
emit_modrm( p, dst, src ); |
} |
void sse_mulps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_2ub(p, X86_TWOB, 0x59); |
emit_modrm( p, dst, src ); |
} |
void sse_mulss( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_3ub(p, 0xF3, X86_TWOB, 0x59); |
emit_modrm( p, dst, src ); |
} |
void sse_addps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_2ub(p, X86_TWOB, 0x58); |
emit_modrm( p, dst, src ); |
} |
void sse_addss( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_3ub(p, 0xF3, X86_TWOB, 0x58); |
emit_modrm( p, dst, src ); |
} |
void sse_andnps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_2ub(p, X86_TWOB, 0x55); |
emit_modrm( p, dst, src ); |
} |
void sse_andps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_2ub(p, X86_TWOB, 0x54); |
emit_modrm( p, dst, src ); |
} |
void sse_rsqrtps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_2ub(p, X86_TWOB, 0x52); |
emit_modrm( p, dst, src ); |
} |
void sse_rsqrtss( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_3ub(p, 0xF3, X86_TWOB, 0x52); |
emit_modrm( p, dst, src ); |
} |
void sse_movhlps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
assert(dst.mod == mod_REG && src.mod == mod_REG); |
emit_2ub(p, X86_TWOB, 0x12); |
emit_modrm( p, dst, src ); |
} |
void sse_movlhps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
assert(dst.mod == mod_REG && src.mod == mod_REG); |
emit_2ub(p, X86_TWOB, 0x16); |
emit_modrm( p, dst, src ); |
} |
void sse_orps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_2ub(p, X86_TWOB, 0x56); |
emit_modrm( p, dst, src ); |
} |
void sse_xorps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_2ub(p, X86_TWOB, 0x57); |
emit_modrm( p, dst, src ); |
} |
void sse_cvtps2pi( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
assert(dst.file == file_MMX && |
(src.file == file_XMM || src.mod != mod_REG)); |
p->need_emms = 1; |
emit_2ub(p, X86_TWOB, 0x2d); |
emit_modrm( p, dst, src ); |
} |
void sse2_cvtdq2ps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_2ub(p, X86_TWOB, 0x5b); |
emit_modrm( p, dst, src ); |
} |
/* Shufps can also be used to implement a reduced swizzle when dest == |
* arg0. |
*/ |
void sse_shufps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src, |
unsigned char shuf) |
{ |
DUMP_RRI( dst, src, shuf ); |
emit_2ub(p, X86_TWOB, 0xC6); |
emit_modrm(p, dst, src); |
emit_1ub(p, shuf); |
} |
void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_2ub( p, X86_TWOB, 0x15 ); |
emit_modrm( p, dst, src ); |
} |
void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_2ub( p, X86_TWOB, 0x14 ); |
emit_modrm( p, dst, src ); |
} |
void sse_cmpps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src, |
enum sse_cc cc) |
{ |
DUMP_RRI( dst, src, cc ); |
emit_2ub(p, X86_TWOB, 0xC2); |
emit_modrm(p, dst, src); |
emit_1ub(p, cc); |
} |
void sse_pmovmskb( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src) |
{ |
DUMP_RR( dst, src ); |
emit_3ub(p, 0x66, X86_TWOB, 0xD7); |
emit_modrm(p, dst, src); |
} |
void sse_movmskps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src) |
{ |
DUMP_RR( dst, src ); |
emit_2ub(p, X86_TWOB, 0x50); |
emit_modrm(p, dst, src); |
} |
/*********************************************************************** |
* SSE2 instructions |
*/ |
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) |
{ |
DUMP_RR(dst, src); |
emit_2ub(p, 0x66, 0x0f); |
if(dst.mod == mod_REG && dst.file == file_REG32) |
{ |
emit_1ub(p, 0x7e); |
emit_modrm(p, src, dst); |
} |
else |
{ |
emit_op_modrm(p, 0x6e, 0x7e, dst, src); |
} |
} |
void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) |
{ |
DUMP_RR(dst, src); |
switch (dst.mod) { |
case mod_REG: |
emit_3ub(p, 0xf3, 0x0f, 0x7e); |
emit_modrm(p, dst, src); |
break; |
case mod_INDIRECT: |
case mod_DISP32: |
case mod_DISP8: |
assert(src.mod == mod_REG); |
emit_3ub(p, 0x66, 0x0f, 0xd6); |
emit_modrm(p, src, dst); |
break; |
default: |
assert(0); |
break; |
} |
} |
void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) |
{ |
DUMP_RR(dst, src); |
emit_2ub(p, 0xf3, 0x0f); |
emit_op_modrm(p, 0x6f, 0x7f, dst, src); |
} |
void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) |
{ |
DUMP_RR(dst, src); |
emit_2ub(p, 0x66, 0x0f); |
emit_op_modrm(p, 0x6f, 0x7f, dst, src); |
} |
void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) |
{ |
DUMP_RR(dst, src); |
emit_2ub(p, 0xf2, 0x0f); |
emit_op_modrm(p, 0x10, 0x11, dst, src); |
} |
void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) |
{ |
DUMP_RR(dst, src); |
emit_2ub(p, 0x66, 0x0f); |
emit_op_modrm(p, 0x10, 0x11, dst, src); |
} |
void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) |
{ |
DUMP_RR(dst, src); |
emit_2ub(p, 0x66, 0x0f); |
emit_op_modrm(p, 0x28, 0x29, dst, src); |
} |
/** |
* Perform a reduced swizzle: |
*/ |
void sse2_pshufd( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src, |
unsigned char shuf) |
{ |
DUMP_RRI( dst, src, shuf ); |
emit_3ub(p, 0x66, X86_TWOB, 0x70); |
emit_modrm(p, dst, src); |
emit_1ub(p, shuf); |
} |
void sse2_pshuflw( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src, |
unsigned char shuf) |
{ |
DUMP_RRI( dst, src, shuf ); |
emit_3ub(p, 0xf2, X86_TWOB, 0x70); |
emit_modrm(p, dst, src); |
emit_1ub(p, shuf); |
} |
void sse2_pshufhw( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src, |
unsigned char shuf) |
{ |
DUMP_RRI( dst, src, shuf ); |
emit_3ub(p, 0xf3, X86_TWOB, 0x70); |
emit_modrm(p, dst, src); |
emit_1ub(p, shuf); |
} |
void sse2_cvttps2dq( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); |
emit_modrm( p, dst, src ); |
} |
void sse2_cvtps2dq( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_3ub(p, 0x66, X86_TWOB, 0x5B); |
emit_modrm( p, dst, src ); |
} |
void sse2_cvtsd2ss( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_3ub(p, 0xf2, 0x0f, 0x5a); |
emit_modrm( p, dst, src ); |
} |
void sse2_cvtpd2ps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_3ub(p, 0x66, 0x0f, 0x5a); |
emit_modrm( p, dst, src ); |
} |
void sse2_packssdw( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_3ub(p, 0x66, X86_TWOB, 0x6B); |
emit_modrm( p, dst, src ); |
} |
void sse2_packsswb( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_3ub(p, 0x66, X86_TWOB, 0x63); |
emit_modrm( p, dst, src ); |
} |
void sse2_packuswb( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_3ub(p, 0x66, X86_TWOB, 0x67); |
emit_modrm( p, dst, src ); |
} |
void sse2_punpcklbw( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_3ub(p, 0x66, X86_TWOB, 0x60); |
emit_modrm( p, dst, src ); |
} |
void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_3ub(p, 0x66, 0x0f, 0x61); |
emit_modrm( p, dst, src ); |
} |
void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_3ub(p, 0x66, 0x0f, 0x62); |
emit_modrm( p, dst, src ); |
} |
void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_3ub(p, 0x66, 0x0f, 0x6c); |
emit_modrm( p, dst, src ); |
} |
void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) |
{ |
DUMP_RI(dst, imm); |
emit_3ub(p, 0x66, 0x0f, 0x71); |
emit_modrm_noreg(p, 6, dst); |
emit_1ub(p, imm); |
} |
void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) |
{ |
DUMP_RI(dst, imm); |
emit_3ub(p, 0x66, 0x0f, 0x72); |
emit_modrm_noreg(p, 6, dst); |
emit_1ub(p, imm); |
} |
void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) |
{ |
DUMP_RI(dst, imm); |
emit_3ub(p, 0x66, 0x0f, 0x73); |
emit_modrm_noreg(p, 6, dst); |
emit_1ub(p, imm); |
} |
void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) |
{ |
DUMP_RI(dst, imm); |
emit_3ub(p, 0x66, 0x0f, 0x71); |
emit_modrm_noreg(p, 2, dst); |
emit_1ub(p, imm); |
} |
void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) |
{ |
DUMP_RI(dst, imm); |
emit_3ub(p, 0x66, 0x0f, 0x72); |
emit_modrm_noreg(p, 2, dst); |
emit_1ub(p, imm); |
} |
void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) |
{ |
DUMP_RI(dst, imm); |
emit_3ub(p, 0x66, 0x0f, 0x73); |
emit_modrm_noreg(p, 2, dst); |
emit_1ub(p, imm); |
} |
void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) |
{ |
DUMP_RI(dst, imm); |
emit_3ub(p, 0x66, 0x0f, 0x71); |
emit_modrm_noreg(p, 4, dst); |
emit_1ub(p, imm); |
} |
void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) |
{ |
DUMP_RI(dst, imm); |
emit_3ub(p, 0x66, 0x0f, 0x72); |
emit_modrm_noreg(p, 4, dst); |
emit_1ub(p, imm); |
} |
void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) |
{ |
DUMP_RR(dst, src); |
emit_3ub(p, 0x66, 0x0f, 0xeb); |
emit_modrm(p, dst, src); |
} |
void sse2_rcpps( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_2ub(p, X86_TWOB, 0x53); |
emit_modrm( p, dst, src ); |
} |
void sse2_rcpss( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
emit_3ub(p, 0xF3, X86_TWOB, 0x53); |
emit_modrm( p, dst, src ); |
} |
/*********************************************************************** |
* x87 instructions |
*/ |
static void note_x87_pop( struct x86_function *p ) |
{ |
p->x87_stack--; |
assert(p->x87_stack >= 0); |
} |
static void note_x87_push( struct x86_function *p ) |
{ |
p->x87_stack++; |
assert(p->x87_stack <= 7); |
} |
void x87_assert_stack_empty( struct x86_function *p ) |
{ |
assert (p->x87_stack == 0); |
} |
void x87_fist( struct x86_function *p, struct x86_reg dst ) |
{ |
DUMP_R( dst ); |
emit_1ub(p, 0xdb); |
emit_modrm_noreg(p, 2, dst); |
} |
void x87_fistp( struct x86_function *p, struct x86_reg dst ) |
{ |
DUMP_R( dst ); |
emit_1ub(p, 0xdb); |
emit_modrm_noreg(p, 3, dst); |
note_x87_pop(p); |
} |
void x87_fild( struct x86_function *p, struct x86_reg arg ) |
{ |
DUMP_R( arg ); |
emit_1ub(p, 0xdf); |
emit_modrm_noreg(p, 0, arg); |
note_x87_push(p); |
} |
void x87_fldz( struct x86_function *p ) |
{ |
DUMP(); |
emit_2ub(p, 0xd9, 0xee); |
note_x87_push(p); |
} |
void x87_fldcw( struct x86_function *p, struct x86_reg arg ) |
{ |
DUMP_R( arg ); |
assert(arg.file == file_REG32); |
assert(arg.mod != mod_REG); |
emit_1ub(p, 0xd9); |
emit_modrm_noreg(p, 5, arg); |
} |
void x87_fld1( struct x86_function *p ) |
{ |
DUMP(); |
emit_2ub(p, 0xd9, 0xe8); |
note_x87_push(p); |
} |
void x87_fldl2e( struct x86_function *p ) |
{ |
DUMP(); |
emit_2ub(p, 0xd9, 0xea); |
note_x87_push(p); |
} |
void x87_fldln2( struct x86_function *p ) |
{ |
DUMP(); |
emit_2ub(p, 0xd9, 0xed); |
note_x87_push(p); |
} |
void x87_fwait( struct x86_function *p ) |
{ |
DUMP(); |
emit_1ub(p, 0x9b); |
} |
void x87_fnclex( struct x86_function *p ) |
{ |
DUMP(); |
emit_2ub(p, 0xdb, 0xe2); |
} |
void x87_fclex( struct x86_function *p ) |
{ |
x87_fwait(p); |
x87_fnclex(p); |
} |
void x87_fcmovb( struct x86_function *p, struct x86_reg arg ) |
{ |
DUMP_R( arg ); |
assert(arg.file == file_x87); |
emit_2ub(p, 0xda, 0xc0+arg.idx); |
} |
void x87_fcmove( struct x86_function *p, struct x86_reg arg ) |
{ |
DUMP_R( arg ); |
assert(arg.file == file_x87); |
emit_2ub(p, 0xda, 0xc8+arg.idx); |
} |
void x87_fcmovbe( struct x86_function *p, struct x86_reg arg ) |
{ |
DUMP_R( arg ); |
assert(arg.file == file_x87); |
emit_2ub(p, 0xda, 0xd0+arg.idx); |
} |
void x87_fcmovnb( struct x86_function *p, struct x86_reg arg ) |
{ |
DUMP_R( arg ); |
assert(arg.file == file_x87); |
emit_2ub(p, 0xdb, 0xc0+arg.idx); |
} |
void x87_fcmovne( struct x86_function *p, struct x86_reg arg ) |
{ |
DUMP_R( arg ); |
assert(arg.file == file_x87); |
emit_2ub(p, 0xdb, 0xc8+arg.idx); |
} |
void x87_fcmovnbe( struct x86_function *p, struct x86_reg arg ) |
{ |
DUMP_R( arg ); |
assert(arg.file == file_x87); |
emit_2ub(p, 0xdb, 0xd0+arg.idx); |
} |
static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, |
unsigned char dst0ub0, |
unsigned char dst0ub1, |
unsigned char arg0ub0, |
unsigned char arg0ub1, |
unsigned char argmem_noreg) |
{ |
assert(dst.file == file_x87); |
if (arg.file == file_x87) { |
if (dst.idx == 0) |
emit_2ub(p, dst0ub0, dst0ub1+arg.idx); |
else if (arg.idx == 0) |
emit_2ub(p, arg0ub0, arg0ub1+arg.idx); |
else |
assert(0); |
} |
else if (dst.idx == 0) { |
assert(arg.file == file_REG32); |
emit_1ub(p, 0xd8); |
emit_modrm_noreg(p, argmem_noreg, arg); |
} |
else |
assert(0); |
} |
void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
x87_arith_op(p, dst, src, |
0xd8, 0xc8, |
0xdc, 0xc8, |
4); |
} |
void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
x87_arith_op(p, dst, src, |
0xd8, 0xe0, |
0xdc, 0xe8, |
4); |
} |
void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
x87_arith_op(p, dst, src, |
0xd8, 0xe8, |
0xdc, 0xe0, |
5); |
} |
void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
x87_arith_op(p, dst, src, |
0xd8, 0xc0, |
0xdc, 0xc0, |
0); |
} |
void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
x87_arith_op(p, dst, src, |
0xd8, 0xf0, |
0xdc, 0xf8, |
6); |
} |
void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
x87_arith_op(p, dst, src, |
0xd8, 0xf8, |
0xdc, 0xf0, |
7); |
} |
void x87_fmulp( struct x86_function *p, struct x86_reg dst ) |
{ |
DUMP_R( dst ); |
assert(dst.file == file_x87); |
assert(dst.idx >= 1); |
emit_2ub(p, 0xde, 0xc8+dst.idx); |
note_x87_pop(p); |
} |
void x87_fsubp( struct x86_function *p, struct x86_reg dst ) |
{ |
DUMP_R( dst ); |
assert(dst.file == file_x87); |
assert(dst.idx >= 1); |
emit_2ub(p, 0xde, 0xe8+dst.idx); |
note_x87_pop(p); |
} |
void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) |
{ |
DUMP_R( dst ); |
assert(dst.file == file_x87); |
assert(dst.idx >= 1); |
emit_2ub(p, 0xde, 0xe0+dst.idx); |
note_x87_pop(p); |
} |
void x87_faddp( struct x86_function *p, struct x86_reg dst ) |
{ |
DUMP_R( dst ); |
assert(dst.file == file_x87); |
assert(dst.idx >= 1); |
emit_2ub(p, 0xde, 0xc0+dst.idx); |
note_x87_pop(p); |
} |
void x87_fdivp( struct x86_function *p, struct x86_reg dst ) |
{ |
DUMP_R( dst ); |
assert(dst.file == file_x87); |
assert(dst.idx >= 1); |
emit_2ub(p, 0xde, 0xf8+dst.idx); |
note_x87_pop(p); |
} |
void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) |
{ |
DUMP_R( dst ); |
assert(dst.file == file_x87); |
assert(dst.idx >= 1); |
emit_2ub(p, 0xde, 0xf0+dst.idx); |
note_x87_pop(p); |
} |
void x87_ftst( struct x86_function *p ) |
{ |
DUMP(); |
emit_2ub(p, 0xd9, 0xe4); |
} |
void x87_fucom( struct x86_function *p, struct x86_reg arg ) |
{ |
DUMP_R( arg ); |
assert(arg.file == file_x87); |
emit_2ub(p, 0xdd, 0xe0+arg.idx); |
} |
void x87_fucomp( struct x86_function *p, struct x86_reg arg ) |
{ |
DUMP_R( arg ); |
assert(arg.file == file_x87); |
emit_2ub(p, 0xdd, 0xe8+arg.idx); |
note_x87_pop(p); |
} |
void x87_fucompp( struct x86_function *p ) |
{ |
DUMP(); |
emit_2ub(p, 0xda, 0xe9); |
note_x87_pop(p); /* pop twice */ |
note_x87_pop(p); /* pop twice */ |
} |
void x87_fxch( struct x86_function *p, struct x86_reg arg ) |
{ |
DUMP_R( arg ); |
assert(arg.file == file_x87); |
emit_2ub(p, 0xd9, 0xc8+arg.idx); |
} |
void x87_fabs( struct x86_function *p ) |
{ |
DUMP(); |
emit_2ub(p, 0xd9, 0xe1); |
} |
void x87_fchs( struct x86_function *p ) |
{ |
DUMP(); |
emit_2ub(p, 0xd9, 0xe0); |
} |
void x87_fcos( struct x86_function *p ) |
{ |
DUMP(); |
emit_2ub(p, 0xd9, 0xff); |
} |
void x87_fprndint( struct x86_function *p ) |
{ |
DUMP(); |
emit_2ub(p, 0xd9, 0xfc); |
} |
void x87_fscale( struct x86_function *p ) |
{ |
DUMP(); |
emit_2ub(p, 0xd9, 0xfd); |
} |
void x87_fsin( struct x86_function *p ) |
{ |
DUMP(); |
emit_2ub(p, 0xd9, 0xfe); |
} |
void x87_fsincos( struct x86_function *p ) |
{ |
DUMP(); |
emit_2ub(p, 0xd9, 0xfb); |
} |
void x87_fsqrt( struct x86_function *p ) |
{ |
DUMP(); |
emit_2ub(p, 0xd9, 0xfa); |
} |
void x87_fxtract( struct x86_function *p ) |
{ |
DUMP(); |
emit_2ub(p, 0xd9, 0xf4); |
} |
/* st0 = (2^st0)-1 |
* |
* Restrictions: -1.0 <= st0 <= 1.0 |
*/ |
void x87_f2xm1( struct x86_function *p ) |
{ |
DUMP(); |
emit_2ub(p, 0xd9, 0xf0); |
} |
/* st1 = st1 * log2(st0); |
* pop_stack; |
*/ |
void x87_fyl2x( struct x86_function *p ) |
{ |
DUMP(); |
emit_2ub(p, 0xd9, 0xf1); |
note_x87_pop(p); |
} |
/* st1 = st1 * log2(st0 + 1.0); |
* pop_stack; |
* |
* A fast operation, with restrictions: -.29 < st0 < .29 |
*/ |
void x87_fyl2xp1( struct x86_function *p ) |
{ |
DUMP(); |
emit_2ub(p, 0xd9, 0xf9); |
note_x87_pop(p); |
} |
void x87_fld( struct x86_function *p, struct x86_reg arg ) |
{ |
DUMP_R( arg ); |
if (arg.file == file_x87) |
emit_2ub(p, 0xd9, 0xc0 + arg.idx); |
else { |
emit_1ub(p, 0xd9); |
emit_modrm_noreg(p, 0, arg); |
} |
note_x87_push(p); |
} |
void x87_fst( struct x86_function *p, struct x86_reg dst ) |
{ |
DUMP_R( dst ); |
if (dst.file == file_x87) |
emit_2ub(p, 0xdd, 0xd0 + dst.idx); |
else { |
emit_1ub(p, 0xd9); |
emit_modrm_noreg(p, 2, dst); |
} |
} |
void x87_fstp( struct x86_function *p, struct x86_reg dst ) |
{ |
DUMP_R( dst ); |
if (dst.file == file_x87) |
emit_2ub(p, 0xdd, 0xd8 + dst.idx); |
else { |
emit_1ub(p, 0xd9); |
emit_modrm_noreg(p, 3, dst); |
} |
note_x87_pop(p); |
} |
void x87_fpop( struct x86_function *p ) |
{ |
x87_fstp( p, x86_make_reg( file_x87, 0 )); |
} |
void x87_fcom( struct x86_function *p, struct x86_reg dst ) |
{ |
DUMP_R( dst ); |
if (dst.file == file_x87) |
emit_2ub(p, 0xd8, 0xd0 + dst.idx); |
else { |
emit_1ub(p, 0xd8); |
emit_modrm_noreg(p, 2, dst); |
} |
} |
void x87_fcomp( struct x86_function *p, struct x86_reg dst ) |
{ |
DUMP_R( dst ); |
if (dst.file == file_x87) |
emit_2ub(p, 0xd8, 0xd8 + dst.idx); |
else { |
emit_1ub(p, 0xd8); |
emit_modrm_noreg(p, 3, dst); |
} |
note_x87_pop(p); |
} |
void x87_fcomi( struct x86_function *p, struct x86_reg arg ) |
{ |
DUMP_R( arg ); |
emit_2ub(p, 0xdb, 0xf0+arg.idx); |
} |
void x87_fcomip( struct x86_function *p, struct x86_reg arg ) |
{ |
DUMP_R( arg ); |
emit_2ub(p, 0xdb, 0xf0+arg.idx); |
note_x87_pop(p); |
} |
void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) |
{ |
DUMP_R( dst ); |
assert(dst.file == file_REG32); |
if (dst.idx == reg_AX && |
dst.mod == mod_REG) |
emit_2ub(p, 0xdf, 0xe0); |
else { |
emit_1ub(p, 0xdd); |
emit_modrm_noreg(p, 7, dst); |
} |
} |
void x87_fnstcw( struct x86_function *p, struct x86_reg dst ) |
{ |
DUMP_R( dst ); |
assert(dst.file == file_REG32); |
emit_1ub(p, 0x9b); /* WAIT -- needed? */ |
emit_1ub(p, 0xd9); |
emit_modrm_noreg(p, 7, dst); |
} |
/*********************************************************************** |
* MMX instructions |
*/ |
void mmx_emms( struct x86_function *p ) |
{ |
DUMP(); |
assert(p->need_emms); |
emit_2ub(p, 0x0f, 0x77); |
p->need_emms = 0; |
} |
void mmx_packssdw( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
assert(dst.file == file_MMX && |
(src.file == file_MMX || src.mod != mod_REG)); |
p->need_emms = 1; |
emit_2ub(p, X86_TWOB, 0x6b); |
emit_modrm( p, dst, src ); |
} |
void mmx_packuswb( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
assert(dst.file == file_MMX && |
(src.file == file_MMX || src.mod != mod_REG)); |
p->need_emms = 1; |
emit_2ub(p, X86_TWOB, 0x67); |
emit_modrm( p, dst, src ); |
} |
void mmx_movd( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
p->need_emms = 1; |
emit_1ub(p, X86_TWOB); |
emit_op_modrm( p, 0x6e, 0x7e, dst, src ); |
} |
void mmx_movq( struct x86_function *p, |
struct x86_reg dst, |
struct x86_reg src ) |
{ |
DUMP_RR( dst, src ); |
p->need_emms = 1; |
emit_1ub(p, X86_TWOB); |
emit_op_modrm( p, 0x6f, 0x7f, dst, src ); |
} |
/*********************************************************************** |
* Helper functions |
*/ |
void x86_cdecl_caller_push_regs( struct x86_function *p ) |
{ |
x86_push(p, x86_make_reg(file_REG32, reg_AX)); |
x86_push(p, x86_make_reg(file_REG32, reg_CX)); |
x86_push(p, x86_make_reg(file_REG32, reg_DX)); |
} |
void x86_cdecl_caller_pop_regs( struct x86_function *p ) |
{ |
x86_pop(p, x86_make_reg(file_REG32, reg_DX)); |
x86_pop(p, x86_make_reg(file_REG32, reg_CX)); |
x86_pop(p, x86_make_reg(file_REG32, reg_AX)); |
} |
struct x86_reg x86_fn_arg( struct x86_function *p, |
unsigned arg ) |
{ |
switch(x86_target(p)) |
{ |
case X86_64_WIN64_ABI: |
/* Microsoft uses a different calling convention than the rest of the world */ |
switch(arg) |
{ |
case 1: |
return x86_make_reg(file_REG32, reg_CX); |
case 2: |
return x86_make_reg(file_REG32, reg_DX); |
case 3: |
return x86_make_reg(file_REG32, reg_R8); |
case 4: |
return x86_make_reg(file_REG32, reg_R9); |
default: |
/* Win64 allocates stack slots as if it pushed the first 4 arguments too */ |
return x86_make_disp(x86_make_reg(file_REG32, reg_SP), |
p->stack_offset + arg * 8); |
} |
case X86_64_STD_ABI: |
switch(arg) |
{ |
case 1: |
return x86_make_reg(file_REG32, reg_DI); |
case 2: |
return x86_make_reg(file_REG32, reg_SI); |
case 3: |
return x86_make_reg(file_REG32, reg_DX); |
case 4: |
return x86_make_reg(file_REG32, reg_CX); |
case 5: |
return x86_make_reg(file_REG32, reg_R8); |
case 6: |
return x86_make_reg(file_REG32, reg_R9); |
default: |
return x86_make_disp(x86_make_reg(file_REG32, reg_SP), |
p->stack_offset + (arg - 6) * 8); /* ??? */ |
} |
case X86_32: |
return x86_make_disp(x86_make_reg(file_REG32, reg_SP), |
p->stack_offset + arg * 4); /* ??? */ |
default: |
assert(0 && "Unexpected x86 target ABI in x86_fn_arg"); |
return x86_make_reg(file_REG32, reg_CX); /* not used / silence warning */ |
} |
} |
static void x86_init_func_common( struct x86_function *p ) |
{ |
util_cpu_detect(); |
p->caps = 0; |
if(util_cpu_caps.has_mmx) |
p->caps |= X86_MMX; |
if(util_cpu_caps.has_mmx2) |
p->caps |= X86_MMX2; |
if(util_cpu_caps.has_sse) |
p->caps |= X86_SSE; |
if(util_cpu_caps.has_sse2) |
p->caps |= X86_SSE2; |
if(util_cpu_caps.has_sse3) |
p->caps |= X86_SSE3; |
if(util_cpu_caps.has_sse4_1) |
p->caps |= X86_SSE4_1; |
p->csr = p->store; |
DUMP_START(); |
} |
void x86_init_func( struct x86_function *p ) |
{ |
p->size = 0; |
p->store = NULL; |
x86_init_func_common(p); |
} |
void x86_init_func_size( struct x86_function *p, unsigned code_size ) |
{ |
p->size = code_size; |
p->store = rtasm_exec_malloc(code_size); |
if (p->store == NULL) { |
p->store = p->error_overflow; |
} |
x86_init_func_common(p); |
} |
void x86_release_func( struct x86_function *p ) |
{ |
if (p->store && p->store != p->error_overflow) |
rtasm_exec_free(p->store); |
p->store = NULL; |
p->csr = NULL; |
p->size = 0; |
} |
static INLINE x86_func |
voidptr_to_x86_func(void *v) |
{ |
union { |
void *v; |
x86_func f; |
} u; |
assert(sizeof(u.v) == sizeof(u.f)); |
u.v = v; |
return u.f; |
} |
x86_func x86_get_func( struct x86_function *p ) |
{ |
DUMP_END(); |
if (DISASSEM && p->store) |
debug_printf("disassemble %p %p\n", p->store, p->csr); |
if (p->store == p->error_overflow) |
return voidptr_to_x86_func(NULL); |
else |
return voidptr_to_x86_func(p->store); |
} |
#else |
void x86sse_dummy( void ); |
void x86sse_dummy( void ) |
{ |
} |
#endif |
/drivers/video/Gallium/auxiliary/rtasm/rtasm_x86sse.h |
---|
0,0 → 1,416 |
/************************************************************************** |
* |
* Copyright (C) 1999-2005 Brian Paul All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
#ifndef _RTASM_X86SSE_H_ |
#define _RTASM_X86SSE_H_ |
#include "pipe/p_compiler.h" |
#include "pipe/p_config.h" |
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) |
/* It is up to the caller to ensure that instructions issued are |
* suitable for the host cpu. There are no checks made in this module |
* for mmx/sse/sse2 support on the cpu. |
*/ |
struct x86_reg { |
unsigned file:2; |
unsigned idx:4; |
unsigned mod:2; /* mod_REG if this is just a register */ |
int disp:24; /* only +/- 23bits of offset - should be enough... */ |
}; |
#define X86_MMX 1 |
#define X86_MMX2 2 |
#define X86_SSE 4 |
#define X86_SSE2 8 |
#define X86_SSE3 0x10 |
#define X86_SSE4_1 0x20 |
struct x86_function { |
unsigned caps; |
unsigned size; |
unsigned char *store; |
unsigned char *csr; |
unsigned stack_offset:16; |
unsigned need_emms:8; |
int x87_stack:8; |
unsigned char error_overflow[4]; |
}; |
enum x86_reg_file { |
file_REG32, |
file_MMX, |
file_XMM, |
file_x87 |
}; |
/* Values for mod field of modr/m byte |
*/ |
enum x86_reg_mod { |
mod_INDIRECT, |
mod_DISP8, |
mod_DISP32, |
mod_REG |
}; |
enum x86_reg_name { |
reg_AX, |
reg_CX, |
reg_DX, |
reg_BX, |
reg_SP, |
reg_BP, |
reg_SI, |
reg_DI, |
reg_R8, |
reg_R9, |
reg_R10, |
reg_R11, |
reg_R12, |
reg_R13, |
reg_R14, |
reg_R15 |
}; |
enum x86_cc { |
cc_O, /* overflow */ |
cc_NO, /* not overflow */ |
cc_NAE, /* not above or equal / carry */ |
cc_AE, /* above or equal / not carry */ |
cc_E, /* equal / zero */ |
cc_NE /* not equal / not zero */ |
}; |
enum sse_cc { |
cc_Equal, |
cc_LessThan, |
cc_LessThanEqual, |
cc_Unordered, |
cc_NotEqual, |
cc_NotLessThan, |
cc_NotLessThanEqual, |
cc_Ordered |
}; |
#define cc_Z cc_E |
#define cc_NZ cc_NE |
/** generic pointer to function */ |
typedef void (*x86_func)(void); |
/* Begin/end/retrieve function creation: |
*/ |
enum x86_target |
{ |
X86_32, |
X86_64_STD_ABI, |
X86_64_WIN64_ABI |
}; |
/* make this read a member of x86_function if target != host is desired */ |
static INLINE enum x86_target x86_target( struct x86_function* p ) |
{ |
#ifdef PIPE_ARCH_X86 |
return X86_32; |
#elif defined(_WIN64) |
return X86_64_WIN64_ABI; |
#elif defined(PIPE_ARCH_X86_64) |
return X86_64_STD_ABI; |
#endif |
} |
static INLINE unsigned x86_target_caps( struct x86_function* p ) |
{ |
return p->caps; |
} |
void x86_init_func( struct x86_function *p ); |
void x86_init_func_size( struct x86_function *p, unsigned code_size ); |
void x86_release_func( struct x86_function *p ); |
x86_func x86_get_func( struct x86_function *p ); |
/* Debugging: |
*/ |
void x86_print_reg( struct x86_reg reg ); |
/* Create and manipulate registers and regmem values: |
*/ |
struct x86_reg x86_make_reg( enum x86_reg_file file, |
enum x86_reg_name idx ); |
struct x86_reg x86_make_disp( struct x86_reg reg, |
int disp ); |
struct x86_reg x86_deref( struct x86_reg reg ); |
struct x86_reg x86_get_base_reg( struct x86_reg reg ); |
/* Labels, jumps and fixup: |
*/ |
int x86_get_label( struct x86_function *p ); |
void x64_rexw(struct x86_function *p); |
void x86_jcc( struct x86_function *p, |
enum x86_cc cc, |
int label ); |
int x86_jcc_forward( struct x86_function *p, |
enum x86_cc cc ); |
int x86_jmp_forward( struct x86_function *p); |
int x86_call_forward( struct x86_function *p); |
void x86_fixup_fwd_jump( struct x86_function *p, |
int fixup ); |
void x86_jmp( struct x86_function *p, int label ); |
/* void x86_call( struct x86_function *p, void (*label)() ); */ |
void x86_call( struct x86_function *p, struct x86_reg reg); |
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); |
void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ); |
void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ); |
void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ); |
void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ); |
void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ); |
void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ); |
/* Macro for sse_shufps() and sse2_pshufd(): |
*/ |
#define SHUF(_x,_y,_z,_w) (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6)) |
#define SHUF_NOOP RSW(0,1,2,3) |
#define GET_SHUF(swz, idx) (((swz) >> ((idx)*2)) & 0x3) |
void mmx_emms( struct x86_function *p ); |
void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_cvtsd2ss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_cvtpd2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, |
unsigned char shuf ); |
void sse2_pshuflw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, |
unsigned char shuf ); |
void sse2_pshufhw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, |
unsigned char shuf ); |
void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); |
void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); |
void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); |
void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); |
void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); |
void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); |
void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); |
void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); |
void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse2_pshuflw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm ); |
void sse2_pshufhw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm ); |
void sse2_pshufd( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm ); |
void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr); |
void sse_prefetch0( struct x86_function *p, struct x86_reg ptr); |
void sse_prefetch1( struct x86_function *p, struct x86_reg ptr); |
void sse_movntps( struct x86_function *p, struct x86_reg dst, struct x86_reg src); |
void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, |
enum sse_cc cc ); |
void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, |
unsigned char shuf ); |
void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src ); |
void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg src); |
void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_cmovcc( struct x86_function *p, struct x86_reg dst, struct x86_reg src, enum x86_cc cc ); |
void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_dec( struct x86_function *p, struct x86_reg reg ); |
void x86_inc( struct x86_function *p, struct x86_reg reg ); |
void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x64_mov64( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_mov8( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_mov16( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_mov_imm(struct x86_function *p, struct x86_reg dst, int imm ); |
void x86_mov8_imm(struct x86_function *p, struct x86_reg dst, uint8_t imm ); |
void x86_mov16_imm(struct x86_function *p, struct x86_reg dst, uint16_t imm ); |
void x86_mul( struct x86_function *p, struct x86_reg src ); |
void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_pop( struct x86_function *p, struct x86_reg reg ); |
void x86_push( struct x86_function *p, struct x86_reg reg ); |
void x86_push_imm32( struct x86_function *p, int imm ); |
void x86_ret( struct x86_function *p ); |
void x86_retw( struct x86_function *p, unsigned short imm ); |
void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); |
void x86_sahf( struct x86_function *p ); |
void x86_div( struct x86_function *p, struct x86_reg src ); |
void x86_bswap( struct x86_function *p, struct x86_reg src ); |
void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ); |
void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ); |
void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ); |
void x86_cdecl_caller_push_regs( struct x86_function *p ); |
void x86_cdecl_caller_pop_regs( struct x86_function *p ); |
void x87_assert_stack_empty( struct x86_function *p ); |
void x87_f2xm1( struct x86_function *p ); |
void x87_fabs( struct x86_function *p ); |
void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); |
void x87_faddp( struct x86_function *p, struct x86_reg dst ); |
void x87_fchs( struct x86_function *p ); |
void x87_fclex( struct x86_function *p ); |
void x87_fcmovb( struct x86_function *p, struct x86_reg src ); |
void x87_fcmovbe( struct x86_function *p, struct x86_reg src ); |
void x87_fcmove( struct x86_function *p, struct x86_reg src ); |
void x87_fcmovnb( struct x86_function *p, struct x86_reg src ); |
void x87_fcmovnbe( struct x86_function *p, struct x86_reg src ); |
void x87_fcmovne( struct x86_function *p, struct x86_reg src ); |
void x87_fcom( struct x86_function *p, struct x86_reg dst ); |
void x87_fcomi( struct x86_function *p, struct x86_reg dst ); |
void x87_fcomip( struct x86_function *p, struct x86_reg dst ); |
void x87_fcomp( struct x86_function *p, struct x86_reg dst ); |
void x87_fcos( struct x86_function *p ); |
void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); |
void x87_fdivp( struct x86_function *p, struct x86_reg dst ); |
void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); |
void x87_fdivrp( struct x86_function *p, struct x86_reg dst ); |
void x87_fild( struct x86_function *p, struct x86_reg arg ); |
void x87_fist( struct x86_function *p, struct x86_reg dst ); |
void x87_fistp( struct x86_function *p, struct x86_reg dst ); |
void x87_fld( struct x86_function *p, struct x86_reg arg ); |
void x87_fld1( struct x86_function *p ); |
void x87_fldcw( struct x86_function *p, struct x86_reg arg ); |
void x87_fldl2e( struct x86_function *p ); |
void x87_fldln2( struct x86_function *p ); |
void x87_fldz( struct x86_function *p ); |
void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); |
void x87_fmulp( struct x86_function *p, struct x86_reg dst ); |
void x87_fnclex( struct x86_function *p ); |
void x87_fprndint( struct x86_function *p ); |
void x87_fpop( struct x86_function *p ); |
void x87_fscale( struct x86_function *p ); |
void x87_fsin( struct x86_function *p ); |
void x87_fsincos( struct x86_function *p ); |
void x87_fsqrt( struct x86_function *p ); |
void x87_fst( struct x86_function *p, struct x86_reg dst ); |
void x87_fstp( struct x86_function *p, struct x86_reg dst ); |
void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); |
void x87_fsubp( struct x86_function *p, struct x86_reg dst ); |
void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); |
void x87_fsubrp( struct x86_function *p, struct x86_reg dst ); |
void x87_ftst( struct x86_function *p ); |
void x87_fxch( struct x86_function *p, struct x86_reg dst ); |
void x87_fxtract( struct x86_function *p ); |
void x87_fyl2x( struct x86_function *p ); |
void x87_fyl2xp1( struct x86_function *p ); |
void x87_fwait( struct x86_function *p ); |
void x87_fnstcw( struct x86_function *p, struct x86_reg dst ); |
void x87_fnstsw( struct x86_function *p, struct x86_reg dst ); |
void x87_fucompp( struct x86_function *p ); |
void x87_fucomp( struct x86_function *p, struct x86_reg arg ); |
void x87_fucom( struct x86_function *p, struct x86_reg arg ); |
/* Retrieve a reference to one of the function arguments, taking into |
* account any push/pop activity. Note - doesn't track explicit |
* manipulation of ESP by other instructions. |
*/ |
struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg ); |
#endif |
#endif |
/drivers/video/Gallium/auxiliary/translate/translate.c |
---|
0,0 → 1,55 |
/************************************************************************** |
* |
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/* |
* Authors: |
* Keith Whitwell <keith@tungstengraphics.com> |
*/ |
#include "pipe/p_config.h" |
#include "pipe/p_state.h" |
#include "translate.h" |
struct translate *translate_create( const struct translate_key *key ) |
{ |
struct translate *translate = NULL; |
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) |
translate = translate_sse2_create( key ); |
if (translate) |
return translate; |
#else |
(void)translate; |
#endif |
return translate_generic_create( key ); |
} |
boolean translate_is_output_format_supported(enum pipe_format format) |
{ |
return translate_generic_is_output_format_supported(format); |
} |
/drivers/video/Gallium/auxiliary/translate/translate.h |
---|
0,0 → 1,160 |
/* |
* Copyright 2008 Tungsten Graphics, inc. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. |
*/ |
/** |
* Vertex fetch/store/convert code. This functionality is used in two places: |
* 1. Vertex fetch/convert - to grab vertex data from incoming vertex |
* arrays and convert to format needed by vertex shaders. |
* 2. Vertex store/emit - to convert simple float[][4] vertex attributes |
* (which is the organization used throughout the draw/prim pipeline) to |
* hardware-specific formats and emit into hardware vertex buffers. |
* |
* |
* Authors: |
* Keith Whitwell <keithw@tungstengraphics.com> |
*/ |
#ifndef _TRANSLATE_H |
#define _TRANSLATE_H |
#include "pipe/p_compiler.h" |
#include "pipe/p_format.h" |
#include "pipe/p_state.h" |
enum translate_element_type { |
TRANSLATE_ELEMENT_NORMAL, |
TRANSLATE_ELEMENT_INSTANCE_ID |
}; |
struct translate_element |
{ |
enum translate_element_type type; |
enum pipe_format input_format; |
enum pipe_format output_format; |
unsigned input_buffer:8; |
unsigned input_offset:24; |
unsigned instance_divisor; |
unsigned output_offset; |
}; |
struct translate_key { |
unsigned output_stride; |
unsigned nr_elements; |
struct translate_element element[PIPE_MAX_ATTRIBS + 1]; |
}; |
struct translate; |
typedef void (PIPE_CDECL *run_elts_func)(struct translate *, |
const unsigned *elts, |
unsigned count, |
unsigned start_instance, |
unsigned instance_id, |
void *output_buffer); |
typedef void (PIPE_CDECL *run_elts16_func)(struct translate *, |
const uint16_t *elts, |
unsigned count, |
unsigned start_instance, |
unsigned instance_id, |
void *output_buffer); |
typedef void (PIPE_CDECL *run_elts8_func)(struct translate *, |
const uint8_t *elts, |
unsigned count, |
unsigned start_instance, |
unsigned instance_id, |
void *output_buffer); |
typedef void (PIPE_CDECL *run_func)(struct translate *, |
unsigned start, |
unsigned count, |
unsigned start_instance, |
unsigned instance_id, |
void *output_buffer); |
struct translate { |
struct translate_key key; |
void (*release)( struct translate * ); |
void (*set_buffer)( struct translate *, |
unsigned i, |
const void *ptr, |
unsigned stride, |
unsigned max_index ); |
run_elts_func run_elts; |
run_elts16_func run_elts16; |
run_elts8_func run_elts8; |
run_func run; |
}; |
struct translate *translate_create( const struct translate_key *key ); |
boolean translate_is_output_format_supported(enum pipe_format format); |
static INLINE int translate_keysize( const struct translate_key *key ) |
{ |
return 2 * sizeof(int) + key->nr_elements * sizeof(struct translate_element); |
} |
static INLINE int translate_key_compare( const struct translate_key *a, |
const struct translate_key *b ) |
{ |
int keysize_a = translate_keysize(a); |
int keysize_b = translate_keysize(b); |
if (keysize_a != keysize_b) { |
return keysize_a - keysize_b; |
} |
return memcmp(a, b, keysize_a); |
} |
static INLINE void translate_key_sanitize( struct translate_key *a ) |
{ |
int keysize = translate_keysize(a); |
char *ptr = (char *)a; |
memset(ptr + keysize, 0, sizeof(*a) - keysize); |
} |
/******************************************************************************* |
* Private: |
*/ |
struct translate *translate_sse2_create( const struct translate_key *key ); |
struct translate *translate_generic_create( const struct translate_key *key ); |
boolean translate_generic_is_output_format_supported(enum pipe_format format); |
#endif |
/drivers/video/Gallium/auxiliary/translate/translate_cache.c |
---|
0,0 → 1,106 |
/************************************************************************** |
* |
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
#include "util/u_memory.h" |
#include "pipe/p_state.h" |
#include "translate.h" |
#include "translate_cache.h" |
#include "cso_cache/cso_cache.h" |
#include "cso_cache/cso_hash.h" |
struct translate_cache { |
struct cso_hash *hash; |
}; |
struct translate_cache * translate_cache_create( void ) |
{ |
struct translate_cache *cache = MALLOC_STRUCT(translate_cache); |
if (cache == NULL) { |
return NULL; |
} |
cache->hash = cso_hash_create(); |
return cache; |
} |
static INLINE void delete_translates(struct translate_cache *cache) |
{ |
struct cso_hash *hash = cache->hash; |
struct cso_hash_iter iter = cso_hash_first_node(hash); |
while (!cso_hash_iter_is_null(iter)) { |
struct translate *state = (struct translate*)cso_hash_iter_data(iter); |
iter = cso_hash_iter_next(iter); |
if (state) { |
state->release(state); |
} |
} |
} |
void translate_cache_destroy(struct translate_cache *cache) |
{ |
delete_translates(cache); |
cso_hash_delete(cache->hash); |
FREE(cache); |
} |
static INLINE unsigned translate_hash_key_size(struct translate_key *key) |
{ |
unsigned size = sizeof(struct translate_key) - |
sizeof(struct translate_element) * (PIPE_MAX_ATTRIBS - key->nr_elements); |
return size; |
} |
static INLINE unsigned create_key(struct translate_key *key) |
{ |
unsigned hash_key; |
unsigned size = translate_hash_key_size(key); |
/*debug_printf("key size = %d, (els = %d)\n", |
size, key->nr_elements);*/ |
hash_key = cso_construct_key(key, size); |
return hash_key; |
} |
struct translate * translate_cache_find(struct translate_cache *cache, |
struct translate_key *key) |
{ |
unsigned hash_key = create_key(key); |
struct translate *translate = (struct translate*) |
cso_hash_find_data_from_template(cache->hash, |
hash_key, |
key, sizeof(*key)); |
if (!translate) { |
/* create/insert */ |
translate = translate_create(key); |
cso_hash_insert(cache->hash, hash_key, translate); |
} |
return translate; |
} |
/drivers/video/Gallium/auxiliary/translate/translate_cache.h |
---|
0,0 → 1,54 |
/* |
* Copyright 2008 Tungsten Graphics, inc. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#ifndef _TRANSLATE_CACHE_H |
#define _TRANSLATE_CACHE_H |
/******************************************************************************* |
* Translate cache. |
* Simply used to cache created translates. Avoids unecessary creation of |
* translate's if one suitable for a given translate_key has already been |
* created. |
* |
* Note: this functionality depends and requires the CSO module. |
*/ |
struct translate_cache; |
struct translate_key; |
struct translate; |
struct translate_cache *translate_cache_create( void ); |
void translate_cache_destroy(struct translate_cache *cache); |
/** |
* Will try to find a translate structure matched by the given key. |
* If such a structure doesn't exist in the cache the function |
* will automatically create it, insert it in the cache and |
* return the created version. |
* |
*/ |
struct translate *translate_cache_find(struct translate_cache *cache, |
struct translate_key *key); |
#endif |
/drivers/video/Gallium/auxiliary/translate/translate_generic.c |
---|
0,0 → 1,998 |
/************************************************************************** |
* |
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
/* |
* Authors: |
* Keith Whitwell <keith@tungstengraphics.com> |
*/ |
#include "util/u_memory.h" |
#include "util/u_format.h" |
#include "util/u_half.h" |
#include "util/u_math.h" |
#include "pipe/p_state.h" |
#include "translate.h" |
#define DRAW_DBG 0 |
typedef void (*fetch_func)(void *dst, |
const uint8_t *src, |
unsigned i, unsigned j); |
typedef void (*emit_func)(const void *attrib, void *ptr); |
struct translate_generic { |
struct translate translate; |
struct { |
enum translate_element_type type; |
fetch_func fetch; |
unsigned buffer; |
unsigned input_offset; |
unsigned instance_divisor; |
emit_func emit; |
unsigned output_offset; |
const uint8_t *input_ptr; |
unsigned input_stride; |
unsigned max_index; |
/* this value is set to -1 if this is a normal element with output_format != input_format: |
* in this case, u_format is used to do a full conversion |
* |
* this value is set to the format size in bytes if output_format == input_format or for 32-bit instance ids: |
* in this case, memcpy is used to copy this amount of bytes |
*/ |
int copy_size; |
} attrib[PIPE_MAX_ATTRIBS]; |
unsigned nr_attrib; |
}; |
static struct translate_generic *translate_generic( struct translate *translate ) |
{ |
return (struct translate_generic *)translate; |
} |
/** |
* Fetch a dword[4] vertex attribute from memory, doing format/type |
* conversion as needed. |
* |
* This is probably needed/dupliocated elsewhere, eg format |
* conversion, texture sampling etc. |
*/ |
#define ATTRIB( NAME, SZ, SRCTYPE, DSTTYPE, TO ) \ |
static void \ |
emit_##NAME(const void *attrib, void *ptr) \ |
{ \ |
unsigned i; \ |
SRCTYPE *in = (SRCTYPE *)attrib; \ |
DSTTYPE *out = (DSTTYPE *)ptr; \ |
\ |
for (i = 0; i < SZ; i++) { \ |
out[i] = TO(in[i]); \ |
} \ |
} |
#define TO_64_FLOAT(x) ((double) x) |
#define TO_32_FLOAT(x) (x) |
#define TO_16_FLOAT(x) util_float_to_half(x) |
#define TO_8_USCALED(x) ((unsigned char) x) |
#define TO_16_USCALED(x) ((unsigned short) x) |
#define TO_32_USCALED(x) ((unsigned int) x) |
#define TO_8_SSCALED(x) ((char) x) |
#define TO_16_SSCALED(x) ((short) x) |
#define TO_32_SSCALED(x) ((int) x) |
#define TO_8_UNORM(x) ((unsigned char) (x * 255.0f)) |
#define TO_16_UNORM(x) ((unsigned short) (x * 65535.0f)) |
#define TO_32_UNORM(x) ((unsigned int) (x * 4294967295.0f)) |
#define TO_8_SNORM(x) ((char) (x * 127.0f)) |
#define TO_16_SNORM(x) ((short) (x * 32767.0f)) |
#define TO_32_SNORM(x) ((int) (x * 2147483647.0f)) |
#define TO_32_FIXED(x) ((int) (x * 65536.0f)) |
#define TO_INT(x) (x) |
ATTRIB( R64G64B64A64_FLOAT, 4, float, double, TO_64_FLOAT ) |
ATTRIB( R64G64B64_FLOAT, 3, float, double, TO_64_FLOAT ) |
ATTRIB( R64G64_FLOAT, 2, float, double, TO_64_FLOAT ) |
ATTRIB( R64_FLOAT, 1, float, double, TO_64_FLOAT ) |
ATTRIB( R32G32B32A32_FLOAT, 4, float, float, TO_32_FLOAT ) |
ATTRIB( R32G32B32_FLOAT, 3, float, float, TO_32_FLOAT ) |
ATTRIB( R32G32_FLOAT, 2, float, float, TO_32_FLOAT ) |
ATTRIB( R32_FLOAT, 1, float, float, TO_32_FLOAT ) |
ATTRIB( R16G16B16A16_FLOAT, 4, float, ushort, TO_16_FLOAT ) |
ATTRIB( R16G16B16_FLOAT, 3, float, ushort, TO_16_FLOAT ) |
ATTRIB( R16G16_FLOAT, 2, float, ushort, TO_16_FLOAT ) |
ATTRIB( R16_FLOAT, 1, float, ushort, TO_16_FLOAT ) |
ATTRIB( R32G32B32A32_USCALED, 4, float, unsigned, TO_32_USCALED ) |
ATTRIB( R32G32B32_USCALED, 3, float, unsigned, TO_32_USCALED ) |
ATTRIB( R32G32_USCALED, 2, float, unsigned, TO_32_USCALED ) |
ATTRIB( R32_USCALED, 1, float, unsigned, TO_32_USCALED ) |
ATTRIB( R32G32B32A32_SSCALED, 4, float, int, TO_32_SSCALED ) |
ATTRIB( R32G32B32_SSCALED, 3, float, int, TO_32_SSCALED ) |
ATTRIB( R32G32_SSCALED, 2, float, int, TO_32_SSCALED ) |
ATTRIB( R32_SSCALED, 1, float, int, TO_32_SSCALED ) |
ATTRIB( R32G32B32A32_UNORM, 4, float, unsigned, TO_32_UNORM ) |
ATTRIB( R32G32B32_UNORM, 3, float, unsigned, TO_32_UNORM ) |
ATTRIB( R32G32_UNORM, 2, float, unsigned, TO_32_UNORM ) |
ATTRIB( R32_UNORM, 1, float, unsigned, TO_32_UNORM ) |
ATTRIB( R32G32B32A32_SNORM, 4, float, int, TO_32_SNORM ) |
ATTRIB( R32G32B32_SNORM, 3, float, int, TO_32_SNORM ) |
ATTRIB( R32G32_SNORM, 2, float, int, TO_32_SNORM ) |
ATTRIB( R32_SNORM, 1, float, int, TO_32_SNORM ) |
ATTRIB( R16G16B16A16_USCALED, 4, float, ushort, TO_16_USCALED ) |
ATTRIB( R16G16B16_USCALED, 3, float, ushort, TO_16_USCALED ) |
ATTRIB( R16G16_USCALED, 2, float, ushort, TO_16_USCALED ) |
ATTRIB( R16_USCALED, 1, float, ushort, TO_16_USCALED ) |
ATTRIB( R16G16B16A16_SSCALED, 4, float, short, TO_16_SSCALED ) |
ATTRIB( R16G16B16_SSCALED, 3, float, short, TO_16_SSCALED ) |
ATTRIB( R16G16_SSCALED, 2, float, short, TO_16_SSCALED ) |
ATTRIB( R16_SSCALED, 1, float, short, TO_16_SSCALED ) |
ATTRIB( R16G16B16A16_UNORM, 4, float, ushort, TO_16_UNORM ) |
ATTRIB( R16G16B16_UNORM, 3, float, ushort, TO_16_UNORM ) |
ATTRIB( R16G16_UNORM, 2, float, ushort, TO_16_UNORM ) |
ATTRIB( R16_UNORM, 1, float, ushort, TO_16_UNORM ) |
ATTRIB( R16G16B16A16_SNORM, 4, float, short, TO_16_SNORM ) |
ATTRIB( R16G16B16_SNORM, 3, float, short, TO_16_SNORM ) |
ATTRIB( R16G16_SNORM, 2, float, short, TO_16_SNORM ) |
ATTRIB( R16_SNORM, 1, float, short, TO_16_SNORM ) |
ATTRIB( R8G8B8A8_USCALED, 4, float, ubyte, TO_8_USCALED ) |
ATTRIB( R8G8B8_USCALED, 3, float, ubyte, TO_8_USCALED ) |
ATTRIB( R8G8_USCALED, 2, float, ubyte, TO_8_USCALED ) |
ATTRIB( R8_USCALED, 1, float, ubyte, TO_8_USCALED ) |
ATTRIB( R8G8B8A8_SSCALED, 4, float, char, TO_8_SSCALED ) |
ATTRIB( R8G8B8_SSCALED, 3, float, char, TO_8_SSCALED ) |
ATTRIB( R8G8_SSCALED, 2, float, char, TO_8_SSCALED ) |
ATTRIB( R8_SSCALED, 1, float, char, TO_8_SSCALED ) |
ATTRIB( R8G8B8A8_UNORM, 4, float, ubyte, TO_8_UNORM ) |
ATTRIB( R8G8B8_UNORM, 3, float, ubyte, TO_8_UNORM ) |
ATTRIB( R8G8_UNORM, 2, float, ubyte, TO_8_UNORM ) |
ATTRIB( R8_UNORM, 1, float, ubyte, TO_8_UNORM ) |
ATTRIB( R8G8B8A8_SNORM, 4, float, char, TO_8_SNORM ) |
ATTRIB( R8G8B8_SNORM, 3, float, char, TO_8_SNORM ) |
ATTRIB( R8G8_SNORM, 2, float, char, TO_8_SNORM ) |
ATTRIB( R8_SNORM, 1, float, char, TO_8_SNORM ) |
ATTRIB( R32G32B32A32_UINT, 4, uint32_t, unsigned, TO_INT ) |
ATTRIB( R32G32B32_UINT, 3, uint32_t, unsigned, TO_INT ) |
ATTRIB( R32G32_UINT, 2, uint32_t, unsigned, TO_INT ) |
ATTRIB( R32_UINT, 1, uint32_t, unsigned, TO_INT ) |
ATTRIB( R16G16B16A16_UINT, 4, uint32_t, ushort, TO_INT ) |
ATTRIB( R16G16B16_UINT, 3, uint32_t, ushort, TO_INT ) |
ATTRIB( R16G16_UINT, 2, uint32_t, ushort, TO_INT ) |
ATTRIB( R16_UINT, 1, uint32_t, ushort, TO_INT ) |
ATTRIB( R8G8B8A8_UINT, 4, uint32_t, ubyte, TO_INT ) |
ATTRIB( R8G8B8_UINT, 3, uint32_t, ubyte, TO_INT ) |
ATTRIB( R8G8_UINT, 2, uint32_t, ubyte, TO_INT ) |
ATTRIB( R8_UINT, 1, uint32_t, ubyte, TO_INT ) |
ATTRIB( R32G32B32A32_SINT, 4, int32_t, int, TO_INT ) |
ATTRIB( R32G32B32_SINT, 3, int32_t, int, TO_INT ) |
ATTRIB( R32G32_SINT, 2, int32_t, int, TO_INT ) |
ATTRIB( R32_SINT, 1, int32_t, int, TO_INT ) |
ATTRIB( R16G16B16A16_SINT, 4, int32_t, short, TO_INT ) |
ATTRIB( R16G16B16_SINT, 3, int32_t, short, TO_INT ) |
ATTRIB( R16G16_SINT, 2, int32_t, short, TO_INT ) |
ATTRIB( R16_SINT, 1, int32_t, short, TO_INT ) |
ATTRIB( R8G8B8A8_SINT, 4, int32_t, char, TO_INT ) |
ATTRIB( R8G8B8_SINT, 3, int32_t, char, TO_INT ) |
ATTRIB( R8G8_SINT, 2, int32_t, char, TO_INT ) |
ATTRIB( R8_SINT, 1, int32_t, char, TO_INT ) |
static void |
emit_A8R8G8B8_UNORM( const void *attrib, void *ptr) |
{ |
float *in = (float *)attrib; |
ubyte *out = (ubyte *)ptr; |
out[0] = TO_8_UNORM(in[3]); |
out[1] = TO_8_UNORM(in[0]); |
out[2] = TO_8_UNORM(in[1]); |
out[3] = TO_8_UNORM(in[2]); |
} |
static void |
emit_B8G8R8A8_UNORM( const void *attrib, void *ptr) |
{ |
float *in = (float *)attrib; |
ubyte *out = (ubyte *)ptr; |
out[2] = TO_8_UNORM(in[0]); |
out[1] = TO_8_UNORM(in[1]); |
out[0] = TO_8_UNORM(in[2]); |
out[3] = TO_8_UNORM(in[3]); |
} |
static void |
emit_B10G10R10A2_UNORM( const void *attrib, void *ptr ) |
{ |
float *src = (float *)ptr; |
uint32_t value = 0; |
value |= ((uint32_t)(CLAMP(src[2], 0, 1) * 0x3ff)) & 0x3ff; |
value |= (((uint32_t)(CLAMP(src[1], 0, 1) * 0x3ff)) & 0x3ff) << 10; |
value |= (((uint32_t)(CLAMP(src[0], 0, 1) * 0x3ff)) & 0x3ff) << 20; |
value |= ((uint32_t)(CLAMP(src[3], 0, 1) * 0x3)) << 30; |
#ifdef PIPE_ARCH_BIG_ENDIAN |
value = util_bswap32(value); |
#endif |
*(uint32_t *)attrib = value; |
} |
static void |
emit_B10G10R10A2_USCALED( const void *attrib, void *ptr ) |
{ |
float *src = (float *)ptr; |
uint32_t value = 0; |
value |= ((uint32_t)CLAMP(src[2], 0, 1023)) & 0x3ff; |
value |= (((uint32_t)CLAMP(src[1], 0, 1023)) & 0x3ff) << 10; |
value |= (((uint32_t)CLAMP(src[0], 0, 1023)) & 0x3ff) << 20; |
value |= ((uint32_t)CLAMP(src[3], 0, 3)) << 30; |
#ifdef PIPE_ARCH_BIG_ENDIAN |
value = util_bswap32(value); |
#endif |
*(uint32_t *)attrib = value; |
} |
static void |
emit_B10G10R10A2_SNORM( const void *attrib, void *ptr ) |
{ |
float *src = (float *)ptr; |
uint32_t value = 0; |
value |= (uint32_t)(((uint32_t)(CLAMP(src[2], -1, 1) * 0x1ff)) & 0x3ff) ; |
value |= (uint32_t)((((uint32_t)(CLAMP(src[1], -1, 1) * 0x1ff)) & 0x3ff) << 10) ; |
value |= (uint32_t)((((uint32_t)(CLAMP(src[0], -1, 1) * 0x1ff)) & 0x3ff) << 20) ; |
value |= (uint32_t)(((uint32_t)(CLAMP(src[3], -1, 1) * 0x1)) << 30) ; |
#ifdef PIPE_ARCH_BIG_ENDIAN |
value = util_bswap32(value); |
#endif |
*(uint32_t *)attrib = value; |
} |
static void |
emit_B10G10R10A2_SSCALED( const void *attrib, void *ptr ) |
{ |
float *src = (float *)ptr; |
uint32_t value = 0; |
value |= (uint32_t)(((uint32_t)CLAMP(src[2], -512, 511)) & 0x3ff) ; |
value |= (uint32_t)((((uint32_t)CLAMP(src[1], -512, 511)) & 0x3ff) << 10) ; |
value |= (uint32_t)((((uint32_t)CLAMP(src[0], -512, 511)) & 0x3ff) << 20) ; |
value |= (uint32_t)(((uint32_t)CLAMP(src[3], -2, 1)) << 30) ; |
#ifdef PIPE_ARCH_BIG_ENDIAN |
value = util_bswap32(value); |
#endif |
*(uint32_t *)attrib = value; |
} |
static void |
emit_R10G10B10A2_UNORM( const void *attrib, void *ptr ) |
{ |
float *src = (float *)ptr; |
uint32_t value = 0; |
value |= ((uint32_t)(CLAMP(src[0], 0, 1) * 0x3ff)) & 0x3ff; |
value |= (((uint32_t)(CLAMP(src[1], 0, 1) * 0x3ff)) & 0x3ff) << 10; |
value |= (((uint32_t)(CLAMP(src[2], 0, 1) * 0x3ff)) & 0x3ff) << 20; |
value |= ((uint32_t)(CLAMP(src[3], 0, 1) * 0x3)) << 30; |
#ifdef PIPE_ARCH_BIG_ENDIAN |
value = util_bswap32(value); |
#endif |
*(uint32_t *)attrib = value; |
} |
static void |
emit_R10G10B10A2_USCALED( const void *attrib, void *ptr ) |
{ |
float *src = (float *)ptr; |
uint32_t value = 0; |
value |= ((uint32_t)CLAMP(src[0], 0, 1023)) & 0x3ff; |
value |= (((uint32_t)CLAMP(src[1], 0, 1023)) & 0x3ff) << 10; |
value |= (((uint32_t)CLAMP(src[2], 0, 1023)) & 0x3ff) << 20; |
value |= ((uint32_t)CLAMP(src[3], 0, 3)) << 30; |
#ifdef PIPE_ARCH_BIG_ENDIAN |
value = util_bswap32(value); |
#endif |
*(uint32_t *)attrib = value; |
} |
static void |
emit_R10G10B10A2_SNORM( const void *attrib, void *ptr ) |
{ |
float *src = (float *)ptr; |
uint32_t value = 0; |
value |= (uint32_t)(((uint32_t)(CLAMP(src[0], -1, 1) * 0x1ff)) & 0x3ff) ; |
value |= (uint32_t)((((uint32_t)(CLAMP(src[1], -1, 1) * 0x1ff)) & 0x3ff) << 10) ; |
value |= (uint32_t)((((uint32_t)(CLAMP(src[2], -1, 1) * 0x1ff)) & 0x3ff) << 20) ; |
value |= (uint32_t)(((uint32_t)(CLAMP(src[3], -1, 1) * 0x1)) << 30) ; |
#ifdef PIPE_ARCH_BIG_ENDIAN |
value = util_bswap32(value); |
#endif |
*(uint32_t *)attrib = value; |
} |
static void |
emit_R10G10B10A2_SSCALED( const void *attrib, void *ptr) |
{ |
float *src = (float *)ptr; |
uint32_t value = 0; |
value |= (uint32_t)(((uint32_t)CLAMP(src[0], -512, 511)) & 0x3ff) ; |
value |= (uint32_t)((((uint32_t)CLAMP(src[1], -512, 511)) & 0x3ff) << 10) ; |
value |= (uint32_t)((((uint32_t)CLAMP(src[2], -512, 511)) & 0x3ff) << 20) ; |
value |= (uint32_t)(((uint32_t)CLAMP(src[3], -2, 1)) << 30) ; |
#ifdef PIPE_ARCH_BIG_ENDIAN |
value = util_bswap32(value); |
#endif |
*(uint32_t *)attrib = value; |
} |
static void |
emit_NULL( const void *attrib, void *ptr ) |
{ |
/* do nothing is the only sensible option */ |
} |
static emit_func get_emit_func( enum pipe_format format ) |
{ |
switch (format) { |
case PIPE_FORMAT_R64_FLOAT: |
return &emit_R64_FLOAT; |
case PIPE_FORMAT_R64G64_FLOAT: |
return &emit_R64G64_FLOAT; |
case PIPE_FORMAT_R64G64B64_FLOAT: |
return &emit_R64G64B64_FLOAT; |
case PIPE_FORMAT_R64G64B64A64_FLOAT: |
return &emit_R64G64B64A64_FLOAT; |
case PIPE_FORMAT_R32_FLOAT: |
return &emit_R32_FLOAT; |
case PIPE_FORMAT_R32G32_FLOAT: |
return &emit_R32G32_FLOAT; |
case PIPE_FORMAT_R32G32B32_FLOAT: |
return &emit_R32G32B32_FLOAT; |
case PIPE_FORMAT_R32G32B32A32_FLOAT: |
return &emit_R32G32B32A32_FLOAT; |
case PIPE_FORMAT_R16_FLOAT: |
return &emit_R16_FLOAT; |
case PIPE_FORMAT_R16G16_FLOAT: |
return &emit_R16G16_FLOAT; |
case PIPE_FORMAT_R16G16B16_FLOAT: |
return &emit_R16G16B16_FLOAT; |
case PIPE_FORMAT_R16G16B16A16_FLOAT: |
return &emit_R16G16B16A16_FLOAT; |
case PIPE_FORMAT_R32_UNORM: |
return &emit_R32_UNORM; |
case PIPE_FORMAT_R32G32_UNORM: |
return &emit_R32G32_UNORM; |
case PIPE_FORMAT_R32G32B32_UNORM: |
return &emit_R32G32B32_UNORM; |
case PIPE_FORMAT_R32G32B32A32_UNORM: |
return &emit_R32G32B32A32_UNORM; |
case PIPE_FORMAT_R32_USCALED: |
return &emit_R32_USCALED; |
case PIPE_FORMAT_R32G32_USCALED: |
return &emit_R32G32_USCALED; |
case PIPE_FORMAT_R32G32B32_USCALED: |
return &emit_R32G32B32_USCALED; |
case PIPE_FORMAT_R32G32B32A32_USCALED: |
return &emit_R32G32B32A32_USCALED; |
case PIPE_FORMAT_R32_SNORM: |
return &emit_R32_SNORM; |
case PIPE_FORMAT_R32G32_SNORM: |
return &emit_R32G32_SNORM; |
case PIPE_FORMAT_R32G32B32_SNORM: |
return &emit_R32G32B32_SNORM; |
case PIPE_FORMAT_R32G32B32A32_SNORM: |
return &emit_R32G32B32A32_SNORM; |
case PIPE_FORMAT_R32_SSCALED: |
return &emit_R32_SSCALED; |
case PIPE_FORMAT_R32G32_SSCALED: |
return &emit_R32G32_SSCALED; |
case PIPE_FORMAT_R32G32B32_SSCALED: |
return &emit_R32G32B32_SSCALED; |
case PIPE_FORMAT_R32G32B32A32_SSCALED: |
return &emit_R32G32B32A32_SSCALED; |
case PIPE_FORMAT_R16_UNORM: |
return &emit_R16_UNORM; |
case PIPE_FORMAT_R16G16_UNORM: |
return &emit_R16G16_UNORM; |
case PIPE_FORMAT_R16G16B16_UNORM: |
return &emit_R16G16B16_UNORM; |
case PIPE_FORMAT_R16G16B16A16_UNORM: |
return &emit_R16G16B16A16_UNORM; |
case PIPE_FORMAT_R16_USCALED: |
return &emit_R16_USCALED; |
case PIPE_FORMAT_R16G16_USCALED: |
return &emit_R16G16_USCALED; |
case PIPE_FORMAT_R16G16B16_USCALED: |
return &emit_R16G16B16_USCALED; |
case PIPE_FORMAT_R16G16B16A16_USCALED: |
return &emit_R16G16B16A16_USCALED; |
case PIPE_FORMAT_R16_SNORM: |
return &emit_R16_SNORM; |
case PIPE_FORMAT_R16G16_SNORM: |
return &emit_R16G16_SNORM; |
case PIPE_FORMAT_R16G16B16_SNORM: |
return &emit_R16G16B16_SNORM; |
case PIPE_FORMAT_R16G16B16A16_SNORM: |
return &emit_R16G16B16A16_SNORM; |
case PIPE_FORMAT_R16_SSCALED: |
return &emit_R16_SSCALED; |
case PIPE_FORMAT_R16G16_SSCALED: |
return &emit_R16G16_SSCALED; |
case PIPE_FORMAT_R16G16B16_SSCALED: |
return &emit_R16G16B16_SSCALED; |
case PIPE_FORMAT_R16G16B16A16_SSCALED: |
return &emit_R16G16B16A16_SSCALED; |
case PIPE_FORMAT_R8_UNORM: |
return &emit_R8_UNORM; |
case PIPE_FORMAT_R8G8_UNORM: |
return &emit_R8G8_UNORM; |
case PIPE_FORMAT_R8G8B8_UNORM: |
return &emit_R8G8B8_UNORM; |
case PIPE_FORMAT_R8G8B8A8_UNORM: |
return &emit_R8G8B8A8_UNORM; |
case PIPE_FORMAT_R8_USCALED: |
return &emit_R8_USCALED; |
case PIPE_FORMAT_R8G8_USCALED: |
return &emit_R8G8_USCALED; |
case PIPE_FORMAT_R8G8B8_USCALED: |
return &emit_R8G8B8_USCALED; |
case PIPE_FORMAT_R8G8B8A8_USCALED: |
return &emit_R8G8B8A8_USCALED; |
case PIPE_FORMAT_R8_SNORM: |
return &emit_R8_SNORM; |
case PIPE_FORMAT_R8G8_SNORM: |
return &emit_R8G8_SNORM; |
case PIPE_FORMAT_R8G8B8_SNORM: |
return &emit_R8G8B8_SNORM; |
case PIPE_FORMAT_R8G8B8A8_SNORM: |
return &emit_R8G8B8A8_SNORM; |
case PIPE_FORMAT_R8_SSCALED: |
return &emit_R8_SSCALED; |
case PIPE_FORMAT_R8G8_SSCALED: |
return &emit_R8G8_SSCALED; |
case PIPE_FORMAT_R8G8B8_SSCALED: |
return &emit_R8G8B8_SSCALED; |
case PIPE_FORMAT_R8G8B8A8_SSCALED: |
return &emit_R8G8B8A8_SSCALED; |
case PIPE_FORMAT_B8G8R8A8_UNORM: |
return &emit_B8G8R8A8_UNORM; |
case PIPE_FORMAT_A8R8G8B8_UNORM: |
return &emit_A8R8G8B8_UNORM; |
case PIPE_FORMAT_R32_UINT: |
return &emit_R32_UINT; |
case PIPE_FORMAT_R32G32_UINT: |
return &emit_R32G32_UINT; |
case PIPE_FORMAT_R32G32B32_UINT: |
return &emit_R32G32B32_UINT; |
case PIPE_FORMAT_R32G32B32A32_UINT: |
return &emit_R32G32B32A32_UINT; |
case PIPE_FORMAT_R16_UINT: |
return &emit_R16_UINT; |
case PIPE_FORMAT_R16G16_UINT: |
return &emit_R16G16_UINT; |
case PIPE_FORMAT_R16G16B16_UINT: |
return &emit_R16G16B16_UINT; |
case PIPE_FORMAT_R16G16B16A16_UINT: |
return &emit_R16G16B16A16_UINT; |
case PIPE_FORMAT_R8_UINT: |
return &emit_R8_UINT; |
case PIPE_FORMAT_R8G8_UINT: |
return &emit_R8G8_UINT; |
case PIPE_FORMAT_R8G8B8_UINT: |
return &emit_R8G8B8_UINT; |
case PIPE_FORMAT_R8G8B8A8_UINT: |
return &emit_R8G8B8A8_UINT; |
case PIPE_FORMAT_R32_SINT: |
return &emit_R32_SINT; |
case PIPE_FORMAT_R32G32_SINT: |
return &emit_R32G32_SINT; |
case PIPE_FORMAT_R32G32B32_SINT: |
return &emit_R32G32B32_SINT; |
case PIPE_FORMAT_R32G32B32A32_SINT: |
return &emit_R32G32B32A32_SINT; |
case PIPE_FORMAT_R16_SINT: |
return &emit_R16_SINT; |
case PIPE_FORMAT_R16G16_SINT: |
return &emit_R16G16_SINT; |
case PIPE_FORMAT_R16G16B16_SINT: |
return &emit_R16G16B16_SINT; |
case PIPE_FORMAT_R16G16B16A16_SINT: |
return &emit_R16G16B16A16_SINT; |
case PIPE_FORMAT_R8_SINT: |
return &emit_R8_SINT; |
case PIPE_FORMAT_R8G8_SINT: |
return &emit_R8G8_SINT; |
case PIPE_FORMAT_R8G8B8_SINT: |
return &emit_R8G8B8_SINT; |
case PIPE_FORMAT_R8G8B8A8_SINT: |
return &emit_R8G8B8A8_SINT; |
case PIPE_FORMAT_B10G10R10A2_UNORM: |
return &emit_B10G10R10A2_UNORM; |
case PIPE_FORMAT_B10G10R10A2_USCALED: |
return &emit_B10G10R10A2_USCALED; |
case PIPE_FORMAT_B10G10R10A2_SNORM: |
return &emit_B10G10R10A2_SNORM; |
case PIPE_FORMAT_B10G10R10A2_SSCALED: |
return &emit_B10G10R10A2_SSCALED; |
case PIPE_FORMAT_R10G10B10A2_UNORM: |
return &emit_R10G10B10A2_UNORM; |
case PIPE_FORMAT_R10G10B10A2_USCALED: |
return &emit_R10G10B10A2_USCALED; |
case PIPE_FORMAT_R10G10B10A2_SNORM: |
return &emit_R10G10B10A2_SNORM; |
case PIPE_FORMAT_R10G10B10A2_SSCALED: |
return &emit_R10G10B10A2_SSCALED; |
default: |
assert(0); |
return &emit_NULL; |
} |
} |
static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic *tg, |
unsigned elt, |
unsigned start_instance, |
unsigned instance_id, |
void *vert ) |
{ |
unsigned nr_attrs = tg->nr_attrib; |
unsigned attr; |
for (attr = 0; attr < nr_attrs; attr++) { |
float data[4]; |
uint8_t *dst = (uint8_t *)vert + tg->attrib[attr].output_offset; |
if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { |
const uint8_t *src; |
unsigned index; |
int copy_size; |
if (tg->attrib[attr].instance_divisor) { |
index = start_instance; |
index += (instance_id - start_instance) / |
tg->attrib[attr].instance_divisor; |
/* XXX we need to clamp the index here too, but to a |
* per-array max value, not the draw->pt.max_index value |
* that's being given to us via translate->set_buffer(). |
*/ |
} |
else { |
index = elt; |
/* clamp to avoid going out of bounds */ |
index = MIN2(index, tg->attrib[attr].max_index); |
} |
src = tg->attrib[attr].input_ptr + |
tg->attrib[attr].input_stride * index; |
copy_size = tg->attrib[attr].copy_size; |
if(likely(copy_size >= 0)) |
memcpy(dst, src, copy_size); |
else |
{ |
tg->attrib[attr].fetch( data, src, 0, 0 ); |
if (0) |
debug_printf("Fetch linear attr %d from %p stride %d index %d: " |
" %f, %f, %f, %f \n", |
attr, |
tg->attrib[attr].input_ptr, |
tg->attrib[attr].input_stride, |
index, |
data[0], data[1],data[2], data[3]); |
tg->attrib[attr].emit( data, dst ); |
} |
} else { |
if(likely(tg->attrib[attr].copy_size >= 0)) |
memcpy(data, &instance_id, 4); |
else |
{ |
data[0] = (float)instance_id; |
tg->attrib[attr].emit( data, dst ); |
} |
} |
} |
} |
/** |
* Fetch vertex attributes for 'count' vertices. |
*/ |
static void PIPE_CDECL generic_run_elts( struct translate *translate, |
const unsigned *elts, |
unsigned count, |
unsigned start_instance, |
unsigned instance_id, |
void *output_buffer ) |
{ |
struct translate_generic *tg = translate_generic(translate); |
char *vert = output_buffer; |
unsigned i; |
for (i = 0; i < count; i++) { |
generic_run_one(tg, *elts++, start_instance, instance_id, vert); |
vert += tg->translate.key.output_stride; |
} |
} |
static void PIPE_CDECL generic_run_elts16( struct translate *translate, |
const uint16_t *elts, |
unsigned count, |
unsigned start_instance, |
unsigned instance_id, |
void *output_buffer ) |
{ |
struct translate_generic *tg = translate_generic(translate); |
char *vert = output_buffer; |
unsigned i; |
for (i = 0; i < count; i++) { |
generic_run_one(tg, *elts++, start_instance, instance_id, vert); |
vert += tg->translate.key.output_stride; |
} |
} |
static void PIPE_CDECL generic_run_elts8( struct translate *translate, |
const uint8_t *elts, |
unsigned count, |
unsigned start_instance, |
unsigned instance_id, |
void *output_buffer ) |
{ |
struct translate_generic *tg = translate_generic(translate); |
char *vert = output_buffer; |
unsigned i; |
for (i = 0; i < count; i++) { |
generic_run_one(tg, *elts++, start_instance, instance_id, vert); |
vert += tg->translate.key.output_stride; |
} |
} |
static void PIPE_CDECL generic_run( struct translate *translate, |
unsigned start, |
unsigned count, |
unsigned start_instance, |
unsigned instance_id, |
void *output_buffer ) |
{ |
struct translate_generic *tg = translate_generic(translate); |
char *vert = output_buffer; |
unsigned i; |
for (i = 0; i < count; i++) { |
generic_run_one(tg, start + i, start_instance, instance_id, vert); |
vert += tg->translate.key.output_stride; |
} |
} |
static void generic_set_buffer( struct translate *translate, |
unsigned buf, |
const void *ptr, |
unsigned stride, |
unsigned max_index ) |
{ |
struct translate_generic *tg = translate_generic(translate); |
unsigned i; |
for (i = 0; i < tg->nr_attrib; i++) { |
if (tg->attrib[i].buffer == buf) { |
tg->attrib[i].input_ptr = ((const uint8_t *)ptr + |
tg->attrib[i].input_offset); |
tg->attrib[i].input_stride = stride; |
tg->attrib[i].max_index = max_index; |
} |
} |
} |
static void generic_release( struct translate *translate ) |
{ |
/* Refcount? |
*/ |
FREE(translate); |
} |
static boolean |
is_legal_int_format_combo( const struct util_format_description *src, |
const struct util_format_description *dst ) |
{ |
unsigned i; |
unsigned nr = MIN2(src->nr_channels, dst->nr_channels); |
for (i = 0; i < nr; i++) { |
/* The signs must match. */ |
if (src->channel[i].type != dst->channel[i].type) { |
return FALSE; |
} |
/* Integers must not lose precision at any point in the pipeline. */ |
if (src->channel[i].size > dst->channel[i].size) { |
return FALSE; |
} |
} |
return TRUE; |
} |
struct translate *translate_generic_create( const struct translate_key *key ) |
{ |
struct translate_generic *tg = CALLOC_STRUCT(translate_generic); |
unsigned i; |
if (tg == NULL) |
return NULL; |
tg->translate.key = *key; |
tg->translate.release = generic_release; |
tg->translate.set_buffer = generic_set_buffer; |
tg->translate.run_elts = generic_run_elts; |
tg->translate.run_elts16 = generic_run_elts16; |
tg->translate.run_elts8 = generic_run_elts8; |
tg->translate.run = generic_run; |
for (i = 0; i < key->nr_elements; i++) { |
const struct util_format_description *format_desc = |
util_format_description(key->element[i].input_format); |
assert(format_desc); |
tg->attrib[i].type = key->element[i].type; |
if (format_desc->channel[0].pure_integer) { |
const struct util_format_description *out_format_desc = |
util_format_description(key->element[i].output_format); |
if (!is_legal_int_format_combo(format_desc, out_format_desc)) { |
FREE(tg); |
return NULL; |
} |
if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { |
assert(format_desc->fetch_rgba_sint); |
tg->attrib[i].fetch = (fetch_func)format_desc->fetch_rgba_sint; |
} else { |
assert(format_desc->fetch_rgba_uint); |
tg->attrib[i].fetch = (fetch_func)format_desc->fetch_rgba_uint; |
} |
} else { |
assert(format_desc->fetch_rgba_float); |
tg->attrib[i].fetch = (fetch_func)format_desc->fetch_rgba_float; |
} |
tg->attrib[i].buffer = key->element[i].input_buffer; |
tg->attrib[i].input_offset = key->element[i].input_offset; |
tg->attrib[i].instance_divisor = key->element[i].instance_divisor; |
tg->attrib[i].output_offset = key->element[i].output_offset; |
tg->attrib[i].copy_size = -1; |
if (tg->attrib[i].type == TRANSLATE_ELEMENT_INSTANCE_ID) |
{ |
if(key->element[i].output_format == PIPE_FORMAT_R32_USCALED |
|| key->element[i].output_format == PIPE_FORMAT_R32_SSCALED) |
tg->attrib[i].copy_size = 4; |
} |
else |
{ |
if(key->element[i].input_format == key->element[i].output_format |
&& format_desc->block.width == 1 |
&& format_desc->block.height == 1 |
&& !(format_desc->block.bits & 7)) |
tg->attrib[i].copy_size = format_desc->block.bits >> 3; |
} |
if(tg->attrib[i].copy_size < 0) |
tg->attrib[i].emit = get_emit_func(key->element[i].output_format); |
else |
tg->attrib[i].emit = NULL; |
} |
tg->nr_attrib = key->nr_elements; |
return &tg->translate; |
} |
boolean translate_generic_is_output_format_supported(enum pipe_format format) |
{ |
switch(format) |
{ |
case PIPE_FORMAT_R64G64B64A64_FLOAT: return TRUE; |
case PIPE_FORMAT_R64G64B64_FLOAT: return TRUE; |
case PIPE_FORMAT_R64G64_FLOAT: return TRUE; |
case PIPE_FORMAT_R64_FLOAT: return TRUE; |
case PIPE_FORMAT_R32G32B32A32_FLOAT: return TRUE; |
case PIPE_FORMAT_R32G32B32_FLOAT: return TRUE; |
case PIPE_FORMAT_R32G32_FLOAT: return TRUE; |
case PIPE_FORMAT_R32_FLOAT: return TRUE; |
case PIPE_FORMAT_R16G16B16A16_FLOAT: return TRUE; |
case PIPE_FORMAT_R16G16B16_FLOAT: return TRUE; |
case PIPE_FORMAT_R16G16_FLOAT: return TRUE; |
case PIPE_FORMAT_R16_FLOAT: return TRUE; |
case PIPE_FORMAT_R32G32B32A32_USCALED: return TRUE; |
case PIPE_FORMAT_R32G32B32_USCALED: return TRUE; |
case PIPE_FORMAT_R32G32_USCALED: return TRUE; |
case PIPE_FORMAT_R32_USCALED: return TRUE; |
case PIPE_FORMAT_R32G32B32A32_SSCALED: return TRUE; |
case PIPE_FORMAT_R32G32B32_SSCALED: return TRUE; |
case PIPE_FORMAT_R32G32_SSCALED: return TRUE; |
case PIPE_FORMAT_R32_SSCALED: return TRUE; |
case PIPE_FORMAT_R32G32B32A32_UNORM: return TRUE; |
case PIPE_FORMAT_R32G32B32_UNORM: return TRUE; |
case PIPE_FORMAT_R32G32_UNORM: return TRUE; |
case PIPE_FORMAT_R32_UNORM: return TRUE; |
case PIPE_FORMAT_R32G32B32A32_SNORM: return TRUE; |
case PIPE_FORMAT_R32G32B32_SNORM: return TRUE; |
case PIPE_FORMAT_R32G32_SNORM: return TRUE; |
case PIPE_FORMAT_R32_SNORM: return TRUE; |
case PIPE_FORMAT_R16G16B16A16_USCALED: return TRUE; |
case PIPE_FORMAT_R16G16B16_USCALED: return TRUE; |
case PIPE_FORMAT_R16G16_USCALED: return TRUE; |
case PIPE_FORMAT_R16_USCALED: return TRUE; |
case PIPE_FORMAT_R16G16B16A16_SSCALED: return TRUE; |
case PIPE_FORMAT_R16G16B16_SSCALED: return TRUE; |
case PIPE_FORMAT_R16G16_SSCALED: return TRUE; |
case PIPE_FORMAT_R16_SSCALED: return TRUE; |
case PIPE_FORMAT_R16G16B16A16_UNORM: return TRUE; |
case PIPE_FORMAT_R16G16B16_UNORM: return TRUE; |
case PIPE_FORMAT_R16G16_UNORM: return TRUE; |
case PIPE_FORMAT_R16_UNORM: return TRUE; |
case PIPE_FORMAT_R16G16B16A16_SNORM: return TRUE; |
case PIPE_FORMAT_R16G16B16_SNORM: return TRUE; |
case PIPE_FORMAT_R16G16_SNORM: return TRUE; |
case PIPE_FORMAT_R16_SNORM: return TRUE; |
case PIPE_FORMAT_R8G8B8A8_USCALED: return TRUE; |
case PIPE_FORMAT_R8G8B8_USCALED: return TRUE; |
case PIPE_FORMAT_R8G8_USCALED: return TRUE; |
case PIPE_FORMAT_R8_USCALED: return TRUE; |
case PIPE_FORMAT_R8G8B8A8_SSCALED: return TRUE; |
case PIPE_FORMAT_R8G8B8_SSCALED: return TRUE; |
case PIPE_FORMAT_R8G8_SSCALED: return TRUE; |
case PIPE_FORMAT_R8_SSCALED: return TRUE; |
case PIPE_FORMAT_R8G8B8A8_UNORM: return TRUE; |
case PIPE_FORMAT_R8G8B8_UNORM: return TRUE; |
case PIPE_FORMAT_R8G8_UNORM: return TRUE; |
case PIPE_FORMAT_R8_UNORM: return TRUE; |
case PIPE_FORMAT_R8G8B8A8_SNORM: return TRUE; |
case PIPE_FORMAT_R8G8B8_SNORM: return TRUE; |
case PIPE_FORMAT_R8G8_SNORM: return TRUE; |
case PIPE_FORMAT_R8_SNORM: return TRUE; |
case PIPE_FORMAT_A8R8G8B8_UNORM: return TRUE; |
case PIPE_FORMAT_B8G8R8A8_UNORM: return TRUE; |
case PIPE_FORMAT_R32G32B32A32_UINT: return TRUE; |
case PIPE_FORMAT_R32G32B32_UINT: return TRUE; |
case PIPE_FORMAT_R32G32_UINT: return TRUE; |
case PIPE_FORMAT_R32_UINT: return TRUE; |
case PIPE_FORMAT_R16G16B16A16_UINT: return TRUE; |
case PIPE_FORMAT_R16G16B16_UINT: return TRUE; |
case PIPE_FORMAT_R16G16_UINT: return TRUE; |
case PIPE_FORMAT_R16_UINT: return TRUE; |
case PIPE_FORMAT_R8G8B8A8_UINT: return TRUE; |
case PIPE_FORMAT_R8G8B8_UINT: return TRUE; |
case PIPE_FORMAT_R8G8_UINT: return TRUE; |
case PIPE_FORMAT_R8_UINT: return TRUE; |
case PIPE_FORMAT_R32G32B32A32_SINT: return TRUE; |
case PIPE_FORMAT_R32G32B32_SINT: return TRUE; |
case PIPE_FORMAT_R32G32_SINT: return TRUE; |
case PIPE_FORMAT_R32_SINT: return TRUE; |
case PIPE_FORMAT_R16G16B16A16_SINT: return TRUE; |
case PIPE_FORMAT_R16G16B16_SINT: return TRUE; |
case PIPE_FORMAT_R16G16_SINT: return TRUE; |
case PIPE_FORMAT_R16_SINT: return TRUE; |
case PIPE_FORMAT_R8G8B8A8_SINT: return TRUE; |
case PIPE_FORMAT_R8G8B8_SINT: return TRUE; |
case PIPE_FORMAT_R8G8_SINT: return TRUE; |
case PIPE_FORMAT_R8_SINT: return TRUE; |
case PIPE_FORMAT_B10G10R10A2_UNORM: return TRUE; |
case PIPE_FORMAT_B10G10R10A2_USCALED: return TRUE; |
case PIPE_FORMAT_B10G10R10A2_SNORM: return TRUE; |
case PIPE_FORMAT_B10G10R10A2_SSCALED: return TRUE; |
case PIPE_FORMAT_R10G10B10A2_UNORM: return TRUE; |
case PIPE_FORMAT_R10G10B10A2_USCALED: return TRUE; |
case PIPE_FORMAT_R10G10B10A2_SNORM: return TRUE; |
case PIPE_FORMAT_R10G10B10A2_SSCALED: return TRUE; |
default: return FALSE; |
} |
} |
/drivers/video/Gallium/auxiliary/translate/translate_sse.c |
---|
0,0 → 1,1573 |
/* |
* Copyright 2003 Tungsten Graphics, inc. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Keith Whitwell <keithw@tungstengraphics.com> |
*/ |
#include "pipe/p_config.h" |
#include "pipe/p_compiler.h" |
#include "util/u_memory.h" |
#include "util/u_math.h" |
#include "util/u_format.h" |
#include "translate.h" |
#if (defined(PIPE_ARCH_X86) || (defined(PIPE_ARCH_X86_64) && !defined(__MINGW32__))) && !defined(PIPE_SUBSYSTEM_EMBEDDED) |
#include "rtasm/rtasm_cpu.h" |
#include "rtasm/rtasm_x86sse.h" |
#define X 0 |
#define Y 1 |
#define Z 2 |
#define W 3 |
struct translate_buffer { |
const void *base_ptr; |
uintptr_t stride; |
unsigned max_index; |
}; |
struct translate_buffer_variant { |
unsigned buffer_index; |
unsigned instance_divisor; |
void *ptr; /* updated either per vertex or per instance */ |
}; |
#define ELEMENT_BUFFER_INSTANCE_ID 1001 |
#define NUM_CONSTS 7 |
enum |
{ |
CONST_IDENTITY, |
CONST_INV_127, |
CONST_INV_255, |
CONST_INV_32767, |
CONST_INV_65535, |
CONST_INV_2147483647, |
CONST_255 |
}; |
#define C(v) {(float)(v), (float)(v), (float)(v), (float)(v)} |
static float consts[NUM_CONSTS][4] = { |
{0, 0, 0, 1}, |
C(1.0 / 127.0), |
C(1.0 / 255.0), |
C(1.0 / 32767.0), |
C(1.0 / 65535.0), |
C(1.0 / 2147483647.0), |
C(255.0) |
}; |
#undef C |
struct translate_sse { |
struct translate translate; |
struct x86_function linear_func; |
struct x86_function elt_func; |
struct x86_function elt16_func; |
struct x86_function elt8_func; |
struct x86_function *func; |
PIPE_ALIGN_VAR(16) float consts[NUM_CONSTS][4]; |
int8_t reg_to_const[16]; |
int8_t const_to_reg[NUM_CONSTS]; |
struct translate_buffer buffer[PIPE_MAX_ATTRIBS]; |
unsigned nr_buffers; |
/* Multiple buffer variants can map to a single buffer. */ |
struct translate_buffer_variant buffer_variant[PIPE_MAX_ATTRIBS]; |
unsigned nr_buffer_variants; |
/* Multiple elements can map to a single buffer variant. */ |
unsigned element_to_buffer_variant[PIPE_MAX_ATTRIBS]; |
boolean use_instancing; |
unsigned instance_id; |
unsigned start_instance; |
/* these are actually known values, but putting them in a struct |
* like this is helpful to keep them in sync across the file. |
*/ |
struct x86_reg tmp_EAX; |
struct x86_reg tmp2_EDX; |
struct x86_reg src_ECX; |
struct x86_reg idx_ESI; /* either start+i or &elt[i] */ |
struct x86_reg machine_EDI; |
struct x86_reg outbuf_EBX; |
struct x86_reg count_EBP; /* decrements to zero */ |
}; |
static int get_offset( const void *a, const void *b ) |
{ |
return (const char *)b - (const char *)a; |
} |
static struct x86_reg get_const( struct translate_sse *p, unsigned id) |
{ |
struct x86_reg reg; |
unsigned i; |
if(p->const_to_reg[id] >= 0) |
return x86_make_reg(file_XMM, p->const_to_reg[id]); |
for(i = 2; i < 8; ++i) |
{ |
if(p->reg_to_const[i] < 0) |
break; |
} |
/* TODO: be smarter here */ |
if(i == 8) |
--i; |
reg = x86_make_reg(file_XMM, i); |
if(p->reg_to_const[i] >= 0) |
p->const_to_reg[p->reg_to_const[i]] = -1; |
p->reg_to_const[i] = id; |
p->const_to_reg[id] = i; |
/* TODO: this should happen outside the loop, if possible */ |
sse_movaps(p->func, reg, |
x86_make_disp(p->machine_EDI, |
get_offset(p, &p->consts[id][0]))); |
return reg; |
} |
/* load the data in a SSE2 register, padding with zeros */ |
static boolean emit_load_sse2( struct translate_sse *p, |
struct x86_reg data, |
struct x86_reg src, |
unsigned size) |
{ |
struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1); |
struct x86_reg tmp = p->tmp_EAX; |
switch(size) |
{ |
case 1: |
x86_movzx8(p->func, tmp, src); |
sse2_movd(p->func, data, tmp); |
break; |
case 2: |
x86_movzx16(p->func, tmp, src); |
sse2_movd(p->func, data, tmp); |
break; |
case 3: |
x86_movzx8(p->func, tmp, x86_make_disp(src, 2)); |
x86_shl_imm(p->func, tmp, 16); |
x86_mov16(p->func, tmp, src); |
sse2_movd(p->func, data, tmp); |
break; |
case 4: |
sse2_movd(p->func, data, src); |
break; |
case 6: |
sse2_movd(p->func, data, src); |
x86_movzx16(p->func, tmp, x86_make_disp(src, 4)); |
sse2_movd(p->func, tmpXMM, tmp); |
sse2_punpckldq(p->func, data, tmpXMM); |
break; |
case 8: |
sse2_movq(p->func, data, src); |
break; |
case 12: |
sse2_movq(p->func, data, src); |
sse2_movd(p->func, tmpXMM, x86_make_disp(src, 8)); |
sse2_punpcklqdq(p->func, data, tmpXMM); |
break; |
case 16: |
sse2_movdqu(p->func, data, src); |
break; |
default: |
return FALSE; |
} |
return TRUE; |
} |
/* this value can be passed for the out_chans argument */ |
#define CHANNELS_0001 5 |
/* this function will load #chans float values, and will |
* pad the register with zeroes at least up to out_chans. |
* |
* If out_chans is set to CHANNELS_0001, then the fourth |
* value will be padded with 1. Only pass this value if |
* chans < 4 or results are undefined. |
*/ |
static void emit_load_float32( struct translate_sse *p, |
struct x86_reg data, |
struct x86_reg arg0, |
unsigned out_chans, |
unsigned chans) |
{ |
switch(chans) |
{ |
case 1: |
/* a 0 0 0 |
* a 0 0 1 |
*/ |
sse_movss(p->func, data, arg0); |
if(out_chans == CHANNELS_0001) |
sse_orps(p->func, data, get_const(p, CONST_IDENTITY) ); |
break; |
case 2: |
/* 0 0 0 1 |
* a b 0 1 |
*/ |
if(out_chans == CHANNELS_0001) |
sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) ); |
else if(out_chans > 2) |
sse_movlhps(p->func, data, get_const(p, CONST_IDENTITY) ); |
sse_movlps(p->func, data, arg0); |
break; |
case 3: |
/* Have to jump through some hoops: |
* |
* c 0 0 0 |
* c 0 0 1 if out_chans == CHANNELS_0001 |
* 0 0 c 0/1 |
* a b c 0/1 |
*/ |
sse_movss(p->func, data, x86_make_disp(arg0, 8)); |
if(out_chans == CHANNELS_0001) |
sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X,Y,Z,W) ); |
sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) ); |
sse_movlps(p->func, data, arg0); |
break; |
case 4: |
sse_movups(p->func, data, arg0); |
break; |
} |
} |
/* this function behaves like emit_load_float32, but loads |
64-bit floating point numbers, converting them to 32-bit |
ones */ |
static void emit_load_float64to32( struct translate_sse *p, |
struct x86_reg data, |
struct x86_reg arg0, |
unsigned out_chans, |
unsigned chans) |
{ |
struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1); |
switch(chans) |
{ |
case 1: |
sse2_movsd(p->func, data, arg0); |
if(out_chans > 1) |
sse2_cvtpd2ps(p->func, data, data); |
else |
sse2_cvtsd2ss(p->func, data, data); |
if(out_chans == CHANNELS_0001) |
sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) ); |
break; |
case 2: |
sse2_movupd(p->func, data, arg0); |
sse2_cvtpd2ps(p->func, data, data); |
if(out_chans == CHANNELS_0001) |
sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) ); |
else if(out_chans > 2) |
sse_movlhps(p->func, data, get_const(p, CONST_IDENTITY) ); |
break; |
case 3: |
sse2_movupd(p->func, data, arg0); |
sse2_cvtpd2ps(p->func, data, data); |
sse2_movsd(p->func, tmpXMM, x86_make_disp(arg0, 16)); |
if(out_chans > 3) |
sse2_cvtpd2ps(p->func, tmpXMM, tmpXMM); |
else |
sse2_cvtsd2ss(p->func, tmpXMM, tmpXMM); |
sse_movlhps(p->func, data, tmpXMM); |
if(out_chans == CHANNELS_0001) |
sse_orps(p->func, data, get_const(p, CONST_IDENTITY) ); |
break; |
case 4: |
sse2_movupd(p->func, data, arg0); |
sse2_cvtpd2ps(p->func, data, data); |
sse2_movupd(p->func, tmpXMM, x86_make_disp(arg0, 16)); |
sse2_cvtpd2ps(p->func, tmpXMM, tmpXMM); |
sse_movlhps(p->func, data, tmpXMM); |
break; |
} |
} |
static void emit_mov64(struct translate_sse *p, struct x86_reg dst_gpr, struct x86_reg dst_xmm, struct x86_reg src_gpr, struct x86_reg src_xmm) |
{ |
if(x86_target(p->func) != X86_32) |
x64_mov64(p->func, dst_gpr, src_gpr); |
else |
{ |
/* TODO: when/on which CPUs is SSE2 actually better than SSE? */ |
if(x86_target_caps(p->func) & X86_SSE2) |
sse2_movq(p->func, dst_xmm, src_xmm); |
else |
sse_movlps(p->func, dst_xmm, src_xmm); |
} |
} |
static void emit_load64(struct translate_sse *p, struct x86_reg dst_gpr, struct x86_reg dst_xmm, struct x86_reg src) |
{ |
emit_mov64(p, dst_gpr, dst_xmm, src, src); |
} |
static void emit_store64(struct translate_sse *p, struct x86_reg dst, struct x86_reg src_gpr, struct x86_reg src_xmm) |
{ |
emit_mov64(p, dst, dst, src_gpr, src_xmm); |
} |
static void emit_mov128(struct translate_sse *p, struct x86_reg dst, struct x86_reg src) |
{ |
if(x86_target_caps(p->func) & X86_SSE2) |
sse2_movdqu(p->func, dst, src); |
else |
sse_movups(p->func, dst, src); |
} |
/* TODO: this uses unaligned accesses liberally, which is great on Nehalem, |
* but may or may not be good on older processors |
* TODO: may perhaps want to use non-temporal stores here if possible |
*/ |
static void emit_memcpy(struct translate_sse *p, struct x86_reg dst, struct x86_reg src, unsigned size) |
{ |
struct x86_reg dataXMM = x86_make_reg(file_XMM, 0); |
struct x86_reg dataXMM2 = x86_make_reg(file_XMM, 1); |
struct x86_reg dataGPR = p->tmp_EAX; |
struct x86_reg dataGPR2 = p->tmp2_EDX; |
if(size < 8) |
{ |
switch (size) |
{ |
case 1: |
x86_mov8(p->func, dataGPR, src); |
x86_mov8(p->func, dst, dataGPR); |
break; |
case 2: |
x86_mov16(p->func, dataGPR, src); |
x86_mov16(p->func, dst, dataGPR); |
break; |
case 3: |
x86_mov16(p->func, dataGPR, src); |
x86_mov8(p->func, dataGPR2, x86_make_disp(src, 2)); |
x86_mov16(p->func, dst, dataGPR); |
x86_mov8(p->func, x86_make_disp(dst, 2), dataGPR2); |
break; |
case 4: |
x86_mov(p->func, dataGPR, src); |
x86_mov(p->func, dst, dataGPR); |
break; |
case 6: |
x86_mov(p->func, dataGPR, src); |
x86_mov16(p->func, dataGPR2, x86_make_disp(src, 4)); |
x86_mov(p->func, dst, dataGPR); |
x86_mov16(p->func, x86_make_disp(dst, 4), dataGPR2); |
break; |
} |
} |
else if(!(x86_target_caps(p->func) & X86_SSE)) |
{ |
unsigned i = 0; |
assert((size & 3) == 0); |
for(i = 0; i < size; i += 4) |
{ |
x86_mov(p->func, dataGPR, x86_make_disp(src, i)); |
x86_mov(p->func, x86_make_disp(dst, i), dataGPR); |
} |
} |
else |
{ |
switch(size) |
{ |
case 8: |
emit_load64(p, dataGPR, dataXMM, src); |
emit_store64(p, dst, dataGPR, dataXMM); |
break; |
case 12: |
emit_load64(p, dataGPR2, dataXMM, src); |
x86_mov(p->func, dataGPR, x86_make_disp(src, 8)); |
emit_store64(p, dst, dataGPR2, dataXMM); |
x86_mov(p->func, x86_make_disp(dst, 8), dataGPR); |
break; |
case 16: |
emit_mov128(p, dataXMM, src); |
emit_mov128(p, dst, dataXMM); |
break; |
case 24: |
emit_mov128(p, dataXMM, src); |
emit_load64(p, dataGPR, dataXMM2, x86_make_disp(src, 16)); |
emit_mov128(p, dst, dataXMM); |
emit_store64(p, x86_make_disp(dst, 16), dataGPR, dataXMM2); |
break; |
case 32: |
emit_mov128(p, dataXMM, src); |
emit_mov128(p, dataXMM2, x86_make_disp(src, 16)); |
emit_mov128(p, dst, dataXMM); |
emit_mov128(p, x86_make_disp(dst, 16), dataXMM2); |
break; |
default: |
assert(0); |
} |
} |
} |
static boolean translate_attr_convert( struct translate_sse *p, |
const struct translate_element *a, |
struct x86_reg src, |
struct x86_reg dst) |
{ |
const struct util_format_description* input_desc = util_format_description(a->input_format); |
const struct util_format_description* output_desc = util_format_description(a->output_format); |
unsigned i; |
boolean id_swizzle = TRUE; |
unsigned swizzle[4] = {UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE}; |
unsigned needed_chans = 0; |
unsigned imms[2] = {0, 0x3f800000}; |
if(a->output_format == PIPE_FORMAT_NONE || a->input_format == PIPE_FORMAT_NONE) |
return FALSE; |
if(input_desc->channel[0].size & 7) |
return FALSE; |
if(input_desc->colorspace != output_desc->colorspace) |
return FALSE; |
for(i = 1; i < input_desc->nr_channels; ++i) |
{ |
if(memcmp(&input_desc->channel[i], &input_desc->channel[0], sizeof(input_desc->channel[0]))) |
return FALSE; |
} |
for(i = 1; i < output_desc->nr_channels; ++i) |
{ |
if(memcmp(&output_desc->channel[i], &output_desc->channel[0], sizeof(output_desc->channel[0]))) |
return FALSE; |
} |
for(i = 0; i < output_desc->nr_channels; ++i) |
{ |
if(output_desc->swizzle[i] < 4) |
swizzle[output_desc->swizzle[i]] = input_desc->swizzle[i]; |
} |
if((x86_target_caps(p->func) & X86_SSE) && (0 |
|| a->output_format == PIPE_FORMAT_R32_FLOAT |
|| a->output_format == PIPE_FORMAT_R32G32_FLOAT |
|| a->output_format == PIPE_FORMAT_R32G32B32_FLOAT |
|| a->output_format == PIPE_FORMAT_R32G32B32A32_FLOAT)) |
{ |
struct x86_reg dataXMM = x86_make_reg(file_XMM, 0); |
for(i = 0; i < output_desc->nr_channels; ++i) |
{ |
if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0 && i >= input_desc->nr_channels) |
swizzle[i] = i; |
} |
for(i = 0; i < output_desc->nr_channels; ++i) |
{ |
if(swizzle[i] < 4) |
needed_chans = MAX2(needed_chans, swizzle[i] + 1); |
if(swizzle[i] < UTIL_FORMAT_SWIZZLE_0 && swizzle[i] != i) |
id_swizzle = FALSE; |
} |
if(needed_chans > 0) |
{ |
switch(input_desc->channel[0].type) |
{ |
case UTIL_FORMAT_TYPE_UNSIGNED: |
if(!(x86_target_caps(p->func) & X86_SSE2)) |
return FALSE; |
emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3); |
/* TODO: add support for SSE4.1 pmovzx */ |
switch(input_desc->channel[0].size) |
{ |
case 8: |
/* TODO: this may be inefficient due to get_identity() being used both as a float and integer register */ |
sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY)); |
sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY)); |
break; |
case 16: |
sse2_punpcklwd(p->func, dataXMM, get_const(p, CONST_IDENTITY)); |
break; |
case 32: /* we lose precision here */ |
sse2_psrld_imm(p->func, dataXMM, 1); |
break; |
default: |
return FALSE; |
} |
sse2_cvtdq2ps(p->func, dataXMM, dataXMM); |
if(input_desc->channel[0].normalized) |
{ |
struct x86_reg factor; |
switch(input_desc->channel[0].size) |
{ |
case 8: |
factor = get_const(p, CONST_INV_255); |
break; |
case 16: |
factor = get_const(p, CONST_INV_65535); |
break; |
case 32: |
factor = get_const(p, CONST_INV_2147483647); |
break; |
default: |
assert(0); |
factor.disp = 0; |
factor.file = 0; |
factor.idx = 0; |
factor.mod = 0; |
break; |
} |
sse_mulps(p->func, dataXMM, factor); |
} |
else if(input_desc->channel[0].size == 32) |
sse_addps(p->func, dataXMM, dataXMM); /* compensate for the bit we threw away to fit u32 into s32 */ |
break; |
case UTIL_FORMAT_TYPE_SIGNED: |
if(!(x86_target_caps(p->func) & X86_SSE2)) |
return FALSE; |
emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3); |
/* TODO: add support for SSE4.1 pmovsx */ |
switch(input_desc->channel[0].size) |
{ |
case 8: |
sse2_punpcklbw(p->func, dataXMM, dataXMM); |
sse2_punpcklbw(p->func, dataXMM, dataXMM); |
sse2_psrad_imm(p->func, dataXMM, 24); |
break; |
case 16: |
sse2_punpcklwd(p->func, dataXMM, dataXMM); |
sse2_psrad_imm(p->func, dataXMM, 16); |
break; |
case 32: /* we lose precision here */ |
break; |
default: |
return FALSE; |
} |
sse2_cvtdq2ps(p->func, dataXMM, dataXMM); |
if(input_desc->channel[0].normalized) |
{ |
struct x86_reg factor; |
switch(input_desc->channel[0].size) |
{ |
case 8: |
factor = get_const(p, CONST_INV_127); |
break; |
case 16: |
factor = get_const(p, CONST_INV_32767); |
break; |
case 32: |
factor = get_const(p, CONST_INV_2147483647); |
break; |
default: |
assert(0); |
factor.disp = 0; |
factor.file = 0; |
factor.idx = 0; |
factor.mod = 0; |
break; |
} |
sse_mulps(p->func, dataXMM, factor); |
} |
break; |
break; |
case UTIL_FORMAT_TYPE_FLOAT: |
if(input_desc->channel[0].size != 32 && input_desc->channel[0].size != 64) |
return FALSE; |
if(swizzle[3] == UTIL_FORMAT_SWIZZLE_1 && input_desc->nr_channels <= 3) |
{ |
swizzle[3] = UTIL_FORMAT_SWIZZLE_W; |
needed_chans = CHANNELS_0001; |
} |
switch(input_desc->channel[0].size) |
{ |
case 32: |
emit_load_float32(p, dataXMM, src, needed_chans, input_desc->nr_channels); |
break; |
case 64: /* we lose precision here */ |
if(!(x86_target_caps(p->func) & X86_SSE2)) |
return FALSE; |
emit_load_float64to32(p, dataXMM, src, needed_chans, input_desc->nr_channels); |
break; |
default: |
return FALSE; |
} |
break; |
default: |
return FALSE; |
} |
if(!id_swizzle) |
sse_shufps(p->func, dataXMM, dataXMM, SHUF(swizzle[0], swizzle[1], swizzle[2], swizzle[3]) ); |
} |
if(output_desc->nr_channels >= 4 |
&& swizzle[0] < UTIL_FORMAT_SWIZZLE_0 |
&& swizzle[1] < UTIL_FORMAT_SWIZZLE_0 |
&& swizzle[2] < UTIL_FORMAT_SWIZZLE_0 |
&& swizzle[3] < UTIL_FORMAT_SWIZZLE_0 |
) |
sse_movups(p->func, dst, dataXMM); |
else |
{ |
if(output_desc->nr_channels >= 2 |
&& swizzle[0] < UTIL_FORMAT_SWIZZLE_0 |
&& swizzle[1] < UTIL_FORMAT_SWIZZLE_0) |
sse_movlps(p->func, dst, dataXMM); |
else |
{ |
if(swizzle[0] < UTIL_FORMAT_SWIZZLE_0) |
sse_movss(p->func, dst, dataXMM); |
else |
x86_mov_imm(p->func, dst, imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]); |
if(output_desc->nr_channels >= 2) |
{ |
if(swizzle[1] < UTIL_FORMAT_SWIZZLE_0) |
{ |
sse_shufps(p->func, dataXMM, dataXMM, SHUF(1, 1, 2, 3)); |
sse_movss(p->func, x86_make_disp(dst, 4), dataXMM); |
} |
else |
x86_mov_imm(p->func, x86_make_disp(dst, 4), imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0]); |
} |
} |
if(output_desc->nr_channels >= 3) |
{ |
if(output_desc->nr_channels >= 4 |
&& swizzle[2] < UTIL_FORMAT_SWIZZLE_0 |
&& swizzle[3] < UTIL_FORMAT_SWIZZLE_0) |
sse_movhps(p->func, x86_make_disp(dst, 8), dataXMM); |
else |
{ |
if(swizzle[2] < UTIL_FORMAT_SWIZZLE_0) |
{ |
sse_shufps(p->func, dataXMM, dataXMM, SHUF(2, 2, 2, 3)); |
sse_movss(p->func, x86_make_disp(dst, 8), dataXMM); |
} |
else |
x86_mov_imm(p->func, x86_make_disp(dst, 8), imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]); |
if(output_desc->nr_channels >= 4) |
{ |
if(swizzle[3] < UTIL_FORMAT_SWIZZLE_0) |
{ |
sse_shufps(p->func, dataXMM, dataXMM, SHUF(3, 3, 3, 3)); |
sse_movss(p->func, x86_make_disp(dst, 12), dataXMM); |
} |
else |
x86_mov_imm(p->func, x86_make_disp(dst, 12), imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0]); |
} |
} |
} |
} |
return TRUE; |
} |
else if((x86_target_caps(p->func) & X86_SSE2) && input_desc->channel[0].size == 8 && output_desc->channel[0].size == 16 |
&& output_desc->channel[0].normalized == input_desc->channel[0].normalized |
&& (0 |
|| (input_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) |
|| (input_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) |
|| (input_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) |
)) |
{ |
struct x86_reg dataXMM = x86_make_reg(file_XMM, 0); |
struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1); |
struct x86_reg tmp = p->tmp_EAX; |
unsigned imms[2] = {0, 1}; |
for(i = 0; i < output_desc->nr_channels; ++i) |
{ |
if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0 && i >= input_desc->nr_channels) |
swizzle[i] = i; |
} |
for(i = 0; i < output_desc->nr_channels; ++i) |
{ |
if(swizzle[i] < 4) |
needed_chans = MAX2(needed_chans, swizzle[i] + 1); |
if(swizzle[i] < UTIL_FORMAT_SWIZZLE_0 && swizzle[i] != i) |
id_swizzle = FALSE; |
} |
if(needed_chans > 0) |
{ |
emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3); |
switch(input_desc->channel[0].type) |
{ |
case UTIL_FORMAT_TYPE_UNSIGNED: |
if(input_desc->channel[0].normalized) |
{ |
sse2_punpcklbw(p->func, dataXMM, dataXMM); |
if(output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) |
sse2_psrlw_imm(p->func, dataXMM, 1); |
} |
else |
sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY)); |
break; |
case UTIL_FORMAT_TYPE_SIGNED: |
if(input_desc->channel[0].normalized) |
{ |
sse2_movq(p->func, tmpXMM, get_const(p, CONST_IDENTITY)); |
sse2_punpcklbw(p->func, tmpXMM, dataXMM); |
sse2_psllw_imm(p->func, dataXMM, 9); |
sse2_psrlw_imm(p->func, dataXMM, 8); |
sse2_por(p->func, tmpXMM, dataXMM); |
sse2_psrlw_imm(p->func, dataXMM, 7); |
sse2_por(p->func, tmpXMM, dataXMM); |
{ |
struct x86_reg t = dataXMM; |
dataXMM = tmpXMM; |
tmpXMM = t; |
} |
} |
else |
{ |
sse2_punpcklbw(p->func, dataXMM, dataXMM); |
sse2_psraw_imm(p->func, dataXMM, 8); |
} |
break; |
default: |
assert(0); |
} |
if(output_desc->channel[0].normalized) |
imms[1] = (output_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) ? 0xffff : 0x7ffff; |
if(!id_swizzle) |
sse2_pshuflw(p->func, dataXMM, dataXMM, (swizzle[0] & 3) | ((swizzle[1] & 3) << 2) | ((swizzle[2] & 3) << 4) | ((swizzle[3] & 3) << 6)); |
} |
if(output_desc->nr_channels >= 4 |
&& swizzle[0] < UTIL_FORMAT_SWIZZLE_0 |
&& swizzle[1] < UTIL_FORMAT_SWIZZLE_0 |
&& swizzle[2] < UTIL_FORMAT_SWIZZLE_0 |
&& swizzle[3] < UTIL_FORMAT_SWIZZLE_0 |
) |
sse2_movq(p->func, dst, dataXMM); |
else |
{ |
if(swizzle[0] < UTIL_FORMAT_SWIZZLE_0) |
{ |
if(output_desc->nr_channels >= 2 && swizzle[1] < UTIL_FORMAT_SWIZZLE_0) |
sse2_movd(p->func, dst, dataXMM); |
else |
{ |
sse2_movd(p->func, tmp, dataXMM); |
x86_mov16(p->func, dst, tmp); |
if(output_desc->nr_channels >= 2) |
x86_mov16_imm(p->func, x86_make_disp(dst, 2), imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0]); |
} |
} |
else |
{ |
if(output_desc->nr_channels >= 2 && swizzle[1] >= UTIL_FORMAT_SWIZZLE_0) |
x86_mov_imm(p->func, dst, (imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0] << 16) | imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]); |
else |
{ |
x86_mov16_imm(p->func, dst, imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]); |
if(output_desc->nr_channels >= 2) |
{ |
sse2_movd(p->func, tmp, dataXMM); |
x86_shr_imm(p->func, tmp, 16); |
x86_mov16(p->func, x86_make_disp(dst, 2), tmp); |
} |
} |
} |
if(output_desc->nr_channels >= 3) |
{ |
if(swizzle[2] < UTIL_FORMAT_SWIZZLE_0) |
{ |
if(output_desc->nr_channels >= 4 && swizzle[3] < UTIL_FORMAT_SWIZZLE_0) |
{ |
sse2_psrlq_imm(p->func, dataXMM, 32); |
sse2_movd(p->func, x86_make_disp(dst, 4), dataXMM); |
} |
else |
{ |
sse2_psrlq_imm(p->func, dataXMM, 32); |
sse2_movd(p->func, tmp, dataXMM); |
x86_mov16(p->func, x86_make_disp(dst, 4), tmp); |
if(output_desc->nr_channels >= 4) |
{ |
x86_mov16_imm(p->func, x86_make_disp(dst, 6), imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0]); |
} |
} |
} |
else |
{ |
if(output_desc->nr_channels >= 4 && swizzle[3] >= UTIL_FORMAT_SWIZZLE_0) |
x86_mov_imm(p->func, x86_make_disp(dst, 4), (imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0] << 16) | imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]); |
else |
{ |
x86_mov16_imm(p->func, x86_make_disp(dst, 4), imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]); |
if(output_desc->nr_channels >= 4) |
{ |
sse2_psrlq_imm(p->func, dataXMM, 48); |
sse2_movd(p->func, tmp, dataXMM); |
x86_mov16(p->func, x86_make_disp(dst, 6), tmp); |
} |
} |
} |
} |
} |
return TRUE; |
} |
else if(!memcmp(&output_desc->channel[0], &input_desc->channel[0], sizeof(output_desc->channel[0]))) |
{ |
struct x86_reg tmp = p->tmp_EAX; |
unsigned i; |
if(input_desc->channel[0].size == 8 && input_desc->nr_channels == 4 && output_desc->nr_channels == 4 |
&& swizzle[0] == UTIL_FORMAT_SWIZZLE_W |
&& swizzle[1] == UTIL_FORMAT_SWIZZLE_Z |
&& swizzle[2] == UTIL_FORMAT_SWIZZLE_Y |
&& swizzle[3] == UTIL_FORMAT_SWIZZLE_X) |
{ |
/* TODO: support movbe */ |
x86_mov(p->func, tmp, src); |
x86_bswap(p->func, tmp); |
x86_mov(p->func, dst, tmp); |
return TRUE; |
} |
for(i = 0; i < output_desc->nr_channels; ++i) |
{ |
switch(output_desc->channel[0].size) |
{ |
case 8: |
if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0) |
{ |
unsigned v = 0; |
if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1) |
{ |
switch(output_desc->channel[0].type) |
{ |
case UTIL_FORMAT_TYPE_UNSIGNED: |
v = output_desc->channel[0].normalized ? 0xff : 1; |
break; |
case UTIL_FORMAT_TYPE_SIGNED: |
v = output_desc->channel[0].normalized ? 0x7f : 1; |
break; |
default: |
return FALSE; |
} |
} |
x86_mov8_imm(p->func, x86_make_disp(dst, i * 1), v); |
} |
else |
{ |
x86_mov8(p->func, tmp, x86_make_disp(src, swizzle[i] * 1)); |
x86_mov8(p->func, x86_make_disp(dst, i * 1), tmp); |
} |
break; |
case 16: |
if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0) |
{ |
unsigned v = 0; |
if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1) |
{ |
switch(output_desc->channel[1].type) |
{ |
case UTIL_FORMAT_TYPE_UNSIGNED: |
v = output_desc->channel[1].normalized ? 0xffff : 1; |
break; |
case UTIL_FORMAT_TYPE_SIGNED: |
v = output_desc->channel[1].normalized ? 0x7fff : 1; |
break; |
case UTIL_FORMAT_TYPE_FLOAT: |
v = 0x3c00; |
break; |
default: |
return FALSE; |
} |
} |
x86_mov16_imm(p->func, x86_make_disp(dst, i * 2), v); |
} |
else if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0) |
x86_mov16_imm(p->func, x86_make_disp(dst, i * 2), 0); |
else |
{ |
x86_mov16(p->func, tmp, x86_make_disp(src, swizzle[i] * 2)); |
x86_mov16(p->func, x86_make_disp(dst, i * 2), tmp); |
} |
break; |
case 32: |
if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0) |
{ |
unsigned v = 0; |
if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1) |
{ |
switch(output_desc->channel[1].type) |
{ |
case UTIL_FORMAT_TYPE_UNSIGNED: |
v = output_desc->channel[1].normalized ? 0xffffffff : 1; |
break; |
case UTIL_FORMAT_TYPE_SIGNED: |
v = output_desc->channel[1].normalized ? 0x7fffffff : 1; |
break; |
case UTIL_FORMAT_TYPE_FLOAT: |
v = 0x3f800000; |
break; |
default: |
return FALSE; |
} |
} |
x86_mov_imm(p->func, x86_make_disp(dst, i * 4), v); |
} |
else |
{ |
x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 4)); |
x86_mov(p->func, x86_make_disp(dst, i * 4), tmp); |
} |
break; |
case 64: |
if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0) |
{ |
unsigned l = 0; |
unsigned h = 0; |
if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1) |
{ |
switch(output_desc->channel[1].type) |
{ |
case UTIL_FORMAT_TYPE_UNSIGNED: |
h = output_desc->channel[1].normalized ? 0xffffffff : 0; |
l = output_desc->channel[1].normalized ? 0xffffffff : 1; |
break; |
case UTIL_FORMAT_TYPE_SIGNED: |
h = output_desc->channel[1].normalized ? 0x7fffffff : 0; |
l = output_desc->channel[1].normalized ? 0xffffffff : 1; |
break; |
case UTIL_FORMAT_TYPE_FLOAT: |
h = 0x3ff00000; |
l = 0; |
break; |
default: |
return FALSE; |
} |
} |
x86_mov_imm(p->func, x86_make_disp(dst, i * 8), l); |
x86_mov_imm(p->func, x86_make_disp(dst, i * 8 + 4), h); |
} |
else |
{ |
if(x86_target_caps(p->func) & X86_SSE) |
{ |
struct x86_reg tmpXMM = x86_make_reg(file_XMM, 0); |
emit_load64(p, tmp, tmpXMM, x86_make_disp(src, swizzle[i] * 8)); |
emit_store64(p, x86_make_disp(dst, i * 8), tmp, tmpXMM); |
} |
else |
{ |
x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 8)); |
x86_mov(p->func, x86_make_disp(dst, i * 8), tmp); |
x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 8 + 4)); |
x86_mov(p->func, x86_make_disp(dst, i * 8 + 4), tmp); |
} |
} |
break; |
default: |
return FALSE; |
} |
} |
return TRUE; |
} |
/* special case for draw's EMIT_4UB (RGBA) and EMIT_4UB_BGRA */ |
else if((x86_target_caps(p->func) & X86_SSE2) && |
a->input_format == PIPE_FORMAT_R32G32B32A32_FLOAT && (0 |
|| a->output_format == PIPE_FORMAT_B8G8R8A8_UNORM |
|| a->output_format == PIPE_FORMAT_R8G8B8A8_UNORM |
)) |
{ |
struct x86_reg dataXMM = x86_make_reg(file_XMM, 0); |
/* load */ |
sse_movups(p->func, dataXMM, src); |
if (a->output_format == PIPE_FORMAT_B8G8R8A8_UNORM) |
sse_shufps(p->func, dataXMM, dataXMM, SHUF(2,1,0,3)); |
/* scale by 255.0 */ |
sse_mulps(p->func, dataXMM, get_const(p, CONST_255)); |
/* pack and emit */ |
sse2_cvtps2dq(p->func, dataXMM, dataXMM); |
sse2_packssdw(p->func, dataXMM, dataXMM); |
sse2_packuswb(p->func, dataXMM, dataXMM); |
sse2_movd(p->func, dst, dataXMM); |
return TRUE; |
} |
return FALSE; |
} |
static boolean translate_attr( struct translate_sse *p, |
const struct translate_element *a, |
struct x86_reg src, |
struct x86_reg dst) |
{ |
if(a->input_format == a->output_format) |
{ |
emit_memcpy(p, dst, src, util_format_get_stride(a->input_format, 1)); |
return TRUE; |
} |
return translate_attr_convert(p, a, src, dst); |
} |
static boolean init_inputs( struct translate_sse *p, |
unsigned index_size ) |
{ |
unsigned i; |
struct x86_reg instance_id = x86_make_disp(p->machine_EDI, |
get_offset(p, &p->instance_id)); |
struct x86_reg start_instance = x86_make_disp(p->machine_EDI, |
get_offset(p, &p->start_instance)); |
for (i = 0; i < p->nr_buffer_variants; i++) { |
struct translate_buffer_variant *variant = &p->buffer_variant[i]; |
struct translate_buffer *buffer = &p->buffer[variant->buffer_index]; |
if (!index_size || variant->instance_divisor) { |
struct x86_reg buf_max_index = x86_make_disp(p->machine_EDI, |
get_offset(p, &buffer->max_index)); |
struct x86_reg buf_stride = x86_make_disp(p->machine_EDI, |
get_offset(p, &buffer->stride)); |
struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI, |
get_offset(p, &variant->ptr)); |
struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDI, |
get_offset(p, &buffer->base_ptr)); |
struct x86_reg elt = p->idx_ESI; |
struct x86_reg tmp_EAX = p->tmp_EAX; |
/* Calculate pointer to first attrib: |
* base_ptr + stride * index, where index depends on instance divisor |
*/ |
if (variant->instance_divisor) { |
/* Start with instance = instance_id |
* which is true if divisor is 1. |
*/ |
x86_mov(p->func, tmp_EAX, instance_id); |
if (variant->instance_divisor != 1) { |
struct x86_reg tmp_EDX = p->tmp2_EDX; |
struct x86_reg tmp_ECX = p->src_ECX; |
/* instance_num = instance_id - start_instance */ |
x86_mov(p->func, tmp_EDX, start_instance); |
x86_sub(p->func, tmp_EAX, tmp_EDX); |
/* TODO: Add x86_shr() to rtasm and use it whenever |
* instance divisor is power of two. |
*/ |
x86_xor(p->func, tmp_EDX, tmp_EDX); |
x86_mov_reg_imm(p->func, tmp_ECX, variant->instance_divisor); |
x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */ |
/* instance = (instance_id - start_instance) / divisor + |
* start_instance |
*/ |
x86_mov(p->func, tmp_EDX, start_instance); |
x86_add(p->func, tmp_EAX, tmp_EDX); |
} |
/* XXX we need to clamp the index here too, but to a |
* per-array max value, not the draw->pt.max_index value |
* that's being given to us via translate->set_buffer(). |
*/ |
} else { |
x86_mov(p->func, tmp_EAX, elt); |
/* Clamp to max_index |
*/ |
x86_cmp(p->func, tmp_EAX, buf_max_index); |
x86_cmovcc(p->func, tmp_EAX, buf_max_index, cc_AE); |
} |
x86_imul(p->func, tmp_EAX, buf_stride); |
x64_rexw(p->func); |
x86_add(p->func, tmp_EAX, buf_base_ptr); |
x86_cmp(p->func, p->count_EBP, p->tmp_EAX); |
/* In the linear case, keep the buffer pointer instead of the |
* index number. |
*/ |
if (!index_size && p->nr_buffer_variants == 1) |
{ |
x64_rexw(p->func); |
x86_mov(p->func, elt, tmp_EAX); |
} |
else |
{ |
x64_rexw(p->func); |
x86_mov(p->func, buf_ptr, tmp_EAX); |
} |
} |
} |
return TRUE; |
} |
static struct x86_reg get_buffer_ptr( struct translate_sse *p, |
unsigned index_size, |
unsigned var_idx, |
struct x86_reg elt ) |
{ |
if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) { |
return x86_make_disp(p->machine_EDI, |
get_offset(p, &p->instance_id)); |
} |
if (!index_size && p->nr_buffer_variants == 1) { |
return p->idx_ESI; |
} |
else if (!index_size || p->buffer_variant[var_idx].instance_divisor) { |
struct x86_reg ptr = p->src_ECX; |
struct x86_reg buf_ptr = |
x86_make_disp(p->machine_EDI, |
get_offset(p, &p->buffer_variant[var_idx].ptr)); |
x64_rexw(p->func); |
x86_mov(p->func, ptr, buf_ptr); |
return ptr; |
} |
else { |
struct x86_reg ptr = p->src_ECX; |
const struct translate_buffer_variant *variant = &p->buffer_variant[var_idx]; |
struct x86_reg buf_stride = |
x86_make_disp(p->machine_EDI, |
get_offset(p, &p->buffer[variant->buffer_index].stride)); |
struct x86_reg buf_base_ptr = |
x86_make_disp(p->machine_EDI, |
get_offset(p, &p->buffer[variant->buffer_index].base_ptr)); |
struct x86_reg buf_max_index = |
x86_make_disp(p->machine_EDI, |
get_offset(p, &p->buffer[variant->buffer_index].max_index)); |
/* Calculate pointer to current attrib: |
*/ |
switch(index_size) |
{ |
case 1: |
x86_movzx8(p->func, ptr, elt); |
break; |
case 2: |
x86_movzx16(p->func, ptr, elt); |
break; |
case 4: |
x86_mov(p->func, ptr, elt); |
break; |
} |
/* Clamp to max_index |
*/ |
x86_cmp(p->func, ptr, buf_max_index); |
x86_cmovcc(p->func, ptr, buf_max_index, cc_AE); |
x86_imul(p->func, ptr, buf_stride); |
x64_rexw(p->func); |
x86_add(p->func, ptr, buf_base_ptr); |
return ptr; |
} |
} |
static boolean incr_inputs( struct translate_sse *p, |
unsigned index_size ) |
{ |
if (!index_size && p->nr_buffer_variants == 1) { |
struct x86_reg stride = x86_make_disp(p->machine_EDI, |
get_offset(p, &p->buffer[0].stride)); |
if (p->buffer_variant[0].instance_divisor == 0) { |
x64_rexw(p->func); |
x86_add(p->func, p->idx_ESI, stride); |
sse_prefetchnta(p->func, x86_make_disp(p->idx_ESI, 192)); |
} |
} |
else if (!index_size) { |
unsigned i; |
/* Is this worthwhile?? |
*/ |
for (i = 0; i < p->nr_buffer_variants; i++) { |
struct translate_buffer_variant *variant = &p->buffer_variant[i]; |
struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI, |
get_offset(p, &variant->ptr)); |
struct x86_reg buf_stride = x86_make_disp(p->machine_EDI, |
get_offset(p, &p->buffer[variant->buffer_index].stride)); |
if (variant->instance_divisor == 0) { |
x86_mov(p->func, p->tmp_EAX, buf_stride); |
x64_rexw(p->func); |
x86_add(p->func, p->tmp_EAX, buf_ptr); |
if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192)); |
x64_rexw(p->func); |
x86_mov(p->func, buf_ptr, p->tmp_EAX); |
} |
} |
} |
else { |
x64_rexw(p->func); |
x86_lea(p->func, p->idx_ESI, x86_make_disp(p->idx_ESI, index_size)); |
} |
return TRUE; |
} |
/* Build run( struct translate *machine, |
* unsigned start, |
* unsigned count, |
* void *output_buffer ) |
* or |
* run_elts( struct translate *machine, |
* unsigned *elts, |
* unsigned count, |
* void *output_buffer ) |
* |
* Lots of hardcoding |
* |
* EAX -- pointer to current output vertex |
* ECX -- pointer to current attribute |
* |
*/ |
static boolean build_vertex_emit( struct translate_sse *p, |
struct x86_function *func, |
unsigned index_size ) |
{ |
int fixup, label; |
unsigned j; |
memset(p->reg_to_const, 0xff, sizeof(p->reg_to_const)); |
memset(p->const_to_reg, 0xff, sizeof(p->const_to_reg)); |
p->tmp_EAX = x86_make_reg(file_REG32, reg_AX); |
p->idx_ESI = x86_make_reg(file_REG32, reg_SI); |
p->outbuf_EBX = x86_make_reg(file_REG32, reg_BX); |
p->machine_EDI = x86_make_reg(file_REG32, reg_DI); |
p->count_EBP = x86_make_reg(file_REG32, reg_BP); |
p->tmp2_EDX = x86_make_reg(file_REG32, reg_DX); |
p->src_ECX = x86_make_reg(file_REG32, reg_CX); |
p->func = func; |
x86_init_func(p->func); |
if(x86_target(p->func) == X86_64_WIN64_ABI) |
{ |
/* the ABI guarantees a 16-byte aligned 32-byte "shadow space" above the return address */ |
sse2_movdqa(p->func, x86_make_disp(x86_make_reg(file_REG32, reg_SP), 8), x86_make_reg(file_XMM, 6)); |
sse2_movdqa(p->func, x86_make_disp(x86_make_reg(file_REG32, reg_SP), 24), x86_make_reg(file_XMM, 7)); |
} |
x86_push(p->func, p->outbuf_EBX); |
x86_push(p->func, p->count_EBP); |
/* on non-Win64 x86-64, these are already in the right registers */ |
if(x86_target(p->func) != X86_64_STD_ABI) |
{ |
x86_push(p->func, p->machine_EDI); |
x86_push(p->func, p->idx_ESI); |
x86_mov(p->func, p->machine_EDI, x86_fn_arg(p->func, 1)); |
x86_mov(p->func, p->idx_ESI, x86_fn_arg(p->func, 2)); |
} |
x86_mov(p->func, p->count_EBP, x86_fn_arg(p->func, 3)); |
if(x86_target(p->func) != X86_32) |
x64_mov64(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 6)); |
else |
x86_mov(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 6)); |
/* Load instance ID. |
*/ |
if (p->use_instancing) { |
x86_mov(p->func, |
p->tmp2_EDX, |
x86_fn_arg(p->func, 4)); |
x86_mov(p->func, |
x86_make_disp(p->machine_EDI, get_offset(p, &p->start_instance)), |
p->tmp2_EDX); |
x86_mov(p->func, |
p->tmp_EAX, |
x86_fn_arg(p->func, 5)); |
x86_mov(p->func, |
x86_make_disp(p->machine_EDI, get_offset(p, &p->instance_id)), |
p->tmp_EAX); |
} |
/* Get vertex count, compare to zero |
*/ |
x86_xor(p->func, p->tmp_EAX, p->tmp_EAX); |
x86_cmp(p->func, p->count_EBP, p->tmp_EAX); |
fixup = x86_jcc_forward(p->func, cc_E); |
/* always load, needed or not: |
*/ |
init_inputs(p, index_size); |
/* Note address for loop jump |
*/ |
label = x86_get_label(p->func); |
{ |
struct x86_reg elt = !index_size ? p->idx_ESI : x86_deref(p->idx_ESI); |
int last_variant = -1; |
struct x86_reg vb; |
for (j = 0; j < p->translate.key.nr_elements; j++) { |
const struct translate_element *a = &p->translate.key.element[j]; |
unsigned variant = p->element_to_buffer_variant[j]; |
/* Figure out source pointer address: |
*/ |
if (variant != last_variant) { |
last_variant = variant; |
vb = get_buffer_ptr(p, index_size, variant, elt); |
} |
if (!translate_attr( p, a, |
x86_make_disp(vb, a->input_offset), |
x86_make_disp(p->outbuf_EBX, a->output_offset))) |
return FALSE; |
} |
/* Next output vertex: |
*/ |
x64_rexw(p->func); |
x86_lea(p->func, |
p->outbuf_EBX, |
x86_make_disp(p->outbuf_EBX, |
p->translate.key.output_stride)); |
/* Incr index |
*/ |
incr_inputs( p, index_size ); |
} |
/* decr count, loop if not zero |
*/ |
x86_dec(p->func, p->count_EBP); |
x86_jcc(p->func, cc_NZ, label); |
/* Exit mmx state? |
*/ |
if (p->func->need_emms) |
mmx_emms(p->func); |
/* Land forward jump here: |
*/ |
x86_fixup_fwd_jump(p->func, fixup); |
/* Pop regs and return |
*/ |
if(x86_target(p->func) != X86_64_STD_ABI) |
{ |
x86_pop(p->func, p->idx_ESI); |
x86_pop(p->func, p->machine_EDI); |
} |
x86_pop(p->func, p->count_EBP); |
x86_pop(p->func, p->outbuf_EBX); |
if(x86_target(p->func) == X86_64_WIN64_ABI) |
{ |
sse2_movdqa(p->func, x86_make_reg(file_XMM, 6), x86_make_disp(x86_make_reg(file_REG32, reg_SP), 8)); |
sse2_movdqa(p->func, x86_make_reg(file_XMM, 7), x86_make_disp(x86_make_reg(file_REG32, reg_SP), 24)); |
} |
x86_ret(p->func); |
return TRUE; |
} |
static void translate_sse_set_buffer( struct translate *translate, |
unsigned buf, |
const void *ptr, |
unsigned stride, |
unsigned max_index ) |
{ |
struct translate_sse *p = (struct translate_sse *)translate; |
if (buf < p->nr_buffers) { |
p->buffer[buf].base_ptr = (char *)ptr; |
p->buffer[buf].stride = stride; |
p->buffer[buf].max_index = max_index; |
} |
if (0) debug_printf("%s %d/%d: %p %d\n", |
__FUNCTION__, buf, |
p->nr_buffers, |
ptr, stride); |
} |
static void translate_sse_release( struct translate *translate ) |
{ |
struct translate_sse *p = (struct translate_sse *)translate; |
x86_release_func( &p->elt8_func ); |
x86_release_func( &p->elt16_func ); |
x86_release_func( &p->elt_func ); |
x86_release_func( &p->linear_func ); |
os_free_aligned(p); |
} |
struct translate *translate_sse2_create( const struct translate_key *key ) |
{ |
struct translate_sse *p = NULL; |
unsigned i; |
/* this is misnamed, it actually refers to whether rtasm is enabled or not */ |
if (!rtasm_cpu_has_sse()) |
goto fail; |
p = os_malloc_aligned(sizeof(struct translate_sse), 16); |
if (p == NULL) |
goto fail; |
memset(p, 0, sizeof(*p)); |
memcpy(p->consts, consts, sizeof(consts)); |
p->translate.key = *key; |
p->translate.release = translate_sse_release; |
p->translate.set_buffer = translate_sse_set_buffer; |
for (i = 0; i < key->nr_elements; i++) { |
if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) { |
unsigned j; |
p->nr_buffers = MAX2(p->nr_buffers, key->element[i].input_buffer + 1); |
if (key->element[i].instance_divisor) { |
p->use_instancing = TRUE; |
} |
/* |
* Map vertex element to vertex buffer variant. |
*/ |
for (j = 0; j < p->nr_buffer_variants; j++) { |
if (p->buffer_variant[j].buffer_index == key->element[i].input_buffer && |
p->buffer_variant[j].instance_divisor == key->element[i].instance_divisor) { |
break; |
} |
} |
if (j == p->nr_buffer_variants) { |
p->buffer_variant[j].buffer_index = key->element[i].input_buffer; |
p->buffer_variant[j].instance_divisor = key->element[i].instance_divisor; |
p->nr_buffer_variants++; |
} |
p->element_to_buffer_variant[i] = j; |
} else { |
assert(key->element[i].type == TRANSLATE_ELEMENT_INSTANCE_ID); |
p->element_to_buffer_variant[i] = ELEMENT_BUFFER_INSTANCE_ID; |
} |
} |
if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers); |
if (!build_vertex_emit(p, &p->linear_func, 0)) |
goto fail; |
if (!build_vertex_emit(p, &p->elt_func, 4)) |
goto fail; |
if (!build_vertex_emit(p, &p->elt16_func, 2)) |
goto fail; |
if (!build_vertex_emit(p, &p->elt8_func, 1)) |
goto fail; |
p->translate.run = (run_func) x86_get_func(&p->linear_func); |
if (p->translate.run == NULL) |
goto fail; |
p->translate.run_elts = (run_elts_func) x86_get_func(&p->elt_func); |
if (p->translate.run_elts == NULL) |
goto fail; |
p->translate.run_elts16 = (run_elts16_func) x86_get_func(&p->elt16_func); |
if (p->translate.run_elts16 == NULL) |
goto fail; |
p->translate.run_elts8 = (run_elts8_func) x86_get_func(&p->elt8_func); |
if (p->translate.run_elts8 == NULL) |
goto fail; |
return &p->translate; |
fail: |
if (p) |
translate_sse_release( &p->translate ); |
return NULL; |
} |
#else |
struct translate *translate_sse2_create( const struct translate_key *key ) |
{ |
return NULL; |
} |
#endif |
/drivers/video/Gallium/include/pipe/p_config.h |
---|
212,10 → 212,6 |
#define PIPE_OS_UNIX |
#endif |
#if defined(_WIN32) || defined(WIN32) |
#define PIPE_OS_WINDOWS |
#endif |
#if defined(__HAIKU__) |
#define PIPE_OS_HAIKU |
#define PIPE_OS_UNIX |