bufferkdtree (C sources)
C source code for the Python bufferkdtree implementation
 All Classes Files Functions Variables Typedefs Macros
Functions
gpu_opencl.c File Reference
#include "include/gpu_opencl.h"
Include dependency graph for gpu_opencl.c:

Functions

void init_opencl_devices (TREE_RECORD *tree_record, TREE_PARAMETERS *params)
 
void free_opencl_devices (TREE_RECORD *tree_record, TREE_PARAMETERS *params)
 
void free_train_buffers_gpu (TREE_RECORD *tree_record, TREE_PARAMETERS *params)
 
void free_query_buffers_gpu (TREE_RECORD *tree_record, TREE_PARAMETERS *params)
 
void allocate_memory_opencl_devices (TREE_RECORD *tree_record, TREE_PARAMETERS *params)
 
void process_all_buffers_gpu (TREE_RECORD *tree_record, TREE_PARAMETERS *params)
 
void process_buffers_brute_force_gpu (TREE_RECORD *tree_record, TREE_PARAMETERS *params, INT_TYPE all_brute)
 
INT_TYPE retrieve_indices_from_buffers_gpu (TREE_RECORD *tree_record, TREE_PARAMETERS *params, INT_TYPE all_brute, INT_TYPE *tindices_removed, INT_TYPE *fr_indices, INT_TYPE *to_indices)
 
void process_buffers_brute_force_in_chunks_gpu (TREE_RECORD *tree_record, TREE_PARAMETERS *params, INT_TYPE all_brute, INT_TYPE *tindices_removed, INT_TYPE n_tindices_removed, INT_TYPE *fr_indices, INT_TYPE *to_indices)
 
void do_brute_force_all_leaves_FIRST_gpu (INT_TYPE *test_indices, INT_TYPE n_test_indices, INT_TYPE *fr_indices, INT_TYPE *to_indices, TREE_RECORD *tree_record, TREE_PARAMETERS *params, INT_TYPE n_device_train_patterns, cl_mem device_train_patterns, INT_TYPE chunk_offset, INT_TYPE all_brute, INT_TYPE current_chunk)
 
void do_brute_force_all_leaves_SECOND_gpu (INT_TYPE *test_indices, INT_TYPE n_test_indices, INT_TYPE *fr_indices, INT_TYPE *to_indices, TREE_RECORD *tree_record, TREE_PARAMETERS *params, INT_TYPE n_device_train_patterns, cl_mem device_train_patterns, INT_TYPE chunk_offset, INT_TYPE all_brute, INT_TYPE current_chunk)
 
void find_leaf_idx_batch_gpu (INT_TYPE *all_next_indices, INT_TYPE num_all_next_indices, INT_TYPE *ret_vals, TREE_RECORD *tree_record, TREE_PARAMETERS *params)
 
void get_distances_and_indices_gpu (TREE_RECORD *tree_record, TREE_PARAMETERS *params)
 
void write_sorted_training_patterns_gpu (TREE_RECORD *tree_record, TREE_PARAMETERS *params)
 
void init_train_patterns_buffers (TREE_RECORD *tree_record, TREE_PARAMETERS *params, INT_TYPE chunk, INT_TYPE n_indices)
 
void copy_train_patterns_to_device (TREE_RECORD *tree_record, TREE_PARAMETERS *params, INT_TYPE chunk, INT_TYPE start_idx, INT_TYPE end_idx)
 
void free_train_patterns_device (TREE_RECORD *tree_record, TREE_PARAMETERS *params, INT_TYPE chunk)
 
int training_chunks_inactive (TREE_RECORD *tree_record, TREE_PARAMETERS *params)
 

Function Documentation

void allocate_memory_opencl_devices ( TREE_RECORD tree_record,
TREE_PARAMETERS params 
)

Allocates memory for testing phase.

Parameters
*tree_recordPointer to struct instance storing the model
*paramsPointer to struct instance storing all model parameters
void copy_train_patterns_to_device ( TREE_RECORD tree_record,
TREE_PARAMETERS params,
INT_TYPE  chunk,
INT_TYPE  start_idx,
INT_TYPE  end_idx 
)
Parameters
*tree_recordPointer to struct instance storing the model
*paramsPointer to struct instance storing all model parameters
chunkSpecifies the chunk 0 or 1
start_idxThe start index
end_idxThe end index
void do_brute_force_all_leaves_FIRST_gpu ( INT_TYPE test_indices,
INT_TYPE  n_test_indices,
INT_TYPE fr_indices,
INT_TYPE to_indices,
TREE_RECORD tree_record,
TREE_PARAMETERS params,
INT_TYPE  n_device_train_patterns,
cl_mem  device_train_patterns,
INT_TYPE  chunk_offset,
INT_TYPE  all_brute,
INT_TYPE  current_chunk 
)

Apply brute-force approach for all leaves (first stage)

Parameters
*test_indicesArray of test indices to be processed
n_test_indicesNumber of test indices
*fr_indicesArray of "from" indices, one for each test index
*to_indicesArray of "to" indices, one for each test index
*tree_recordPointer to struct instance storing the model
*paramsPointer to struct instance storing all model parameters
n_device_train_patternsNumber of training patterns on device
device_train_patternsPointer to training patterns
chunk_offsetOffset (for indices) for the given chunk
all_bruteFlag specififying if all indices should be processed completely via brute force
current_chunkThe current chunk (id)
void do_brute_force_all_leaves_SECOND_gpu ( INT_TYPE test_indices,
INT_TYPE  n_test_indices,
INT_TYPE fr_indices,
INT_TYPE to_indices,
TREE_RECORD tree_record,
TREE_PARAMETERS params,
INT_TYPE  n_device_train_patterns,
cl_mem  device_train_patterns,
INT_TYPE  chunk_offset,
INT_TYPE  all_brute,
INT_TYPE  current_chunk 
)

Second stage of the brute-force processing

Parameters
*test_indicesArray of test indices to be processed
n_test_indicesNumber of test indices
*fr_indicesArray of "from" indices, one for each test index
*to_indicesArray of "to" indices, one for each test index
*tree_recordPointer to struct instance storing the model
*paramsPointer to struct instance storing all model parameters
n_device_train_patternsNumber of training patterns on device
device_train_patternsPointer to training patterns
chunk_offsetOffset (for indices) for the given chunk
all_bruteFlag specififying if all indices should be processed completely via brute force
current_chunkThe current chunk (id)
void find_leaf_idx_batch_gpu ( INT_TYPE all_next_indices,
INT_TYPE  num_all_next_indices,
INT_TYPE ret_vals,
TREE_RECORD tree_record,
TREE_PARAMETERS params 
)

Finds the next leaf indices for all test patterns indexed by all_next_indices.

Parameters
*all_next_indicesArray containing all indices that need to be processed next
num_all_next_indicesNumber of indices
*ret_valsArray containing the next leaf ids for each index
*tree_recordPointer to struct instance storing the model
*paramsPointer to struct instance storing all model parameters
void free_opencl_devices ( TREE_RECORD tree_record,
TREE_PARAMETERS params 
)

After having performed all queries: Free memory etc.

Parameters
*tree_recordPointer to struct instance storing the model
*paramsPointer to struct instance storing all model parameters
void free_query_buffers_gpu ( TREE_RECORD tree_record,
TREE_PARAMETERS params 
)

Free buffers needed for querying phase.

Parameters
*tree_recordPointer to struct instance storing the model
*paramsPointer to struct instance storing all model parameters
void free_train_buffers_gpu ( TREE_RECORD tree_record,
TREE_PARAMETERS params 
)

Free buffers used during training.

Parameters
*tree_recordPointer to struct instance storing the model
*paramsPointer to struct instance storing all model parameters
void free_train_patterns_device ( TREE_RECORD tree_record,
TREE_PARAMETERS params,
INT_TYPE  chunk 
)

Releases training patterns on device

Parameters
*tree_recordPointer to struct instance storing the model
*paramsPointer to struct instance storing all model parameters
chunkSpecifies the chunk 0 or 1
void get_distances_and_indices_gpu ( TREE_RECORD tree_record,
TREE_PARAMETERS params 
)

Copies the arrays dist_min_global and idx_min_global from GPU to CPU Updates the distances and indices (w.r.t the original indices)

Parameters
*tree_recordPointer to struct instance storing the model
*paramsPointer to struct instance storing all model parameters
void init_opencl_devices ( TREE_RECORD tree_record,
TREE_PARAMETERS params 
)

Initializes all devices at the beginning of the querying process.

Parameters
*tree_recordPointer to struct instance storing the model
*paramsPointer to struct instance storing all model parameters
void init_train_patterns_buffers ( TREE_RECORD tree_record,
TREE_PARAMETERS params,
INT_TYPE  chunk,
INT_TYPE  n_indices 
)
Parameters
*tree_recordPointer to struct instance storing the model
*paramsPointer to struct instance storing all model parameters
chunkSpecifies the chunk 0 or 1
n_indicesNumber of indices for which space shall be allocated
void process_all_buffers_gpu ( TREE_RECORD tree_record,
TREE_PARAMETERS params 
)

Processes all buffers on the GPU. It is important that only INDICES are moved from the CPU to the GPU (only CPU->GPU; GPU->CPU is not necessary). Further, the global distances and the indices are updated ON THE GPU.

Parameters
*tree_recordPointer to struct instance storing the model
*paramsPointer to struct instance storing all model parameters
void process_buffers_brute_force_gpu ( TREE_RECORD tree_record,
TREE_PARAMETERS params,
INT_TYPE  all_brute 
)

Helper function that processed all buffers via different methods, specified via the flag 'all_brute'.

Parameters
*tree_recordPointer to struct instance storing the model
*paramsPointer to struct instance storing all model parameters
all_brute
void process_buffers_brute_force_in_chunks_gpu ( TREE_RECORD tree_record,
TREE_PARAMETERS params,
INT_TYPE  all_brute,
INT_TYPE tindices_removed,
INT_TYPE  n_tindices_removed,
INT_TYPE fr_indices,
INT_TYPE to_indices 
)

Processes all indices stored in all buffers

Parameters
*tree_recordPointer to struct instance storing the model
*paramsPointer to struct instance storing all model parameters
all_bruteFlag specififying if all indices should be processed completely via brute force
*tindices_removedArray containing the test indices afterwards
n_tindices_removedNumber of indices
*fr_indicesArray of "from" indices
*to_indicesArray of "to" indices
INT_TYPE retrieve_indices_from_buffers_gpu ( TREE_RECORD tree_record,
TREE_PARAMETERS params,
INT_TYPE  all_brute,
INT_TYPE tindices_removed,
INT_TYPE fr_indices,
INT_TYPE to_indices 
)

Retrieves all indices from all buffers

Parameters
*tree_recordPointer to struct instance storing the model
*paramsPointer to struct instance storing all model parameters
all_bruteFlag specififying if all indices should be processed completely via brute force
*tindices_removedArray containing the test indices afterwards
*fr_indicesArray of "from" indices
*to_indicesArray of "to" indices
int training_chunks_inactive ( TREE_RECORD tree_record,
TREE_PARAMETERS params 
)

Helper method that checks of training chunks are used or not

Parameters
*tree_recordPointer to struct instance storing the model
*paramsPointer to struct instance storing all model parameters
void write_sorted_training_patterns_gpu ( TREE_RECORD tree_record,
TREE_PARAMETERS params 
)

Writes the training patterns in a specific ordering (needed for coalesced data access on the GPU etc.

Parameters
*tree_recordPointer to struct instance storing the model
*paramsPointer to struct instance storing all model parameters