bufferkdtree (C sources)
C source code for the Python bufferkdtree implementation
|
#include "include/gpu_opencl.h"
void allocate_memory_opencl_devices | ( | TREE_RECORD * | tree_record, |
TREE_PARAMETERS * | params | ||
) |
Allocates memory for testing phase.
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
void copy_train_patterns_to_device | ( | TREE_RECORD * | tree_record, |
TREE_PARAMETERS * | params, | ||
INT_TYPE | chunk, | ||
INT_TYPE | start_idx, | ||
INT_TYPE | end_idx | ||
) |
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
chunk | Specifies the chunk 0 or 1 |
start_idx | The start index |
end_idx | The end index |
void do_brute_force_all_leaves_FIRST_gpu | ( | INT_TYPE * | test_indices, |
INT_TYPE | n_test_indices, | ||
INT_TYPE * | fr_indices, | ||
INT_TYPE * | to_indices, | ||
TREE_RECORD * | tree_record, | ||
TREE_PARAMETERS * | params, | ||
INT_TYPE | n_device_train_patterns, | ||
cl_mem | device_train_patterns, | ||
INT_TYPE | chunk_offset, | ||
INT_TYPE | all_brute, | ||
INT_TYPE | current_chunk | ||
) |
Apply brute-force approach for all leaves (first stage)
*test_indices | Array of test indices to be processed |
n_test_indices | Number of test indices |
*fr_indices | Array of "from" indices, one for each test index |
*to_indices | Array of "to" indices, one for each test index |
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
n_device_train_patterns | Number of training patterns on device |
device_train_patterns | Pointer to training patterns |
chunk_offset | Offset (for indices) for the given chunk |
all_brute | Flag specififying if all indices should be processed completely via brute force |
current_chunk | The current chunk (id) |
void do_brute_force_all_leaves_SECOND_gpu | ( | INT_TYPE * | test_indices, |
INT_TYPE | n_test_indices, | ||
INT_TYPE * | fr_indices, | ||
INT_TYPE * | to_indices, | ||
TREE_RECORD * | tree_record, | ||
TREE_PARAMETERS * | params, | ||
INT_TYPE | n_device_train_patterns, | ||
cl_mem | device_train_patterns, | ||
INT_TYPE | chunk_offset, | ||
INT_TYPE | all_brute, | ||
INT_TYPE | current_chunk | ||
) |
Second stage of the brute-force processing
*test_indices | Array of test indices to be processed |
n_test_indices | Number of test indices |
*fr_indices | Array of "from" indices, one for each test index |
*to_indices | Array of "to" indices, one for each test index |
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
n_device_train_patterns | Number of training patterns on device |
device_train_patterns | Pointer to training patterns |
chunk_offset | Offset (for indices) for the given chunk |
all_brute | Flag specififying if all indices should be processed completely via brute force |
current_chunk | The current chunk (id) |
void find_leaf_idx_batch_gpu | ( | INT_TYPE * | all_next_indices, |
INT_TYPE | num_all_next_indices, | ||
INT_TYPE * | ret_vals, | ||
TREE_RECORD * | tree_record, | ||
TREE_PARAMETERS * | params | ||
) |
Finds the next leaf indices for all test patterns indexed by all_next_indices.
*all_next_indices | Array containing all indices that need to be processed next |
num_all_next_indices | Number of indices |
*ret_vals | Array containing the next leaf ids for each index |
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
void free_opencl_devices | ( | TREE_RECORD * | tree_record, |
TREE_PARAMETERS * | params | ||
) |
After having performed all queries: Free memory etc.
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
void free_query_buffers_gpu | ( | TREE_RECORD * | tree_record, |
TREE_PARAMETERS * | params | ||
) |
Free buffers needed for querying phase.
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
void free_train_buffers_gpu | ( | TREE_RECORD * | tree_record, |
TREE_PARAMETERS * | params | ||
) |
Free buffers used during training.
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
void free_train_patterns_device | ( | TREE_RECORD * | tree_record, |
TREE_PARAMETERS * | params, | ||
INT_TYPE | chunk | ||
) |
Releases training patterns on device
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
chunk | Specifies the chunk 0 or 1 |
void get_distances_and_indices_gpu | ( | TREE_RECORD * | tree_record, |
TREE_PARAMETERS * | params | ||
) |
Copies the arrays dist_min_global and idx_min_global from GPU to CPU Updates the distances and indices (w.r.t the original indices)
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
void init_opencl_devices | ( | TREE_RECORD * | tree_record, |
TREE_PARAMETERS * | params | ||
) |
Initializes all devices at the beginning of the querying process.
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
void init_train_patterns_buffers | ( | TREE_RECORD * | tree_record, |
TREE_PARAMETERS * | params, | ||
INT_TYPE | chunk, | ||
INT_TYPE | n_indices | ||
) |
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
chunk | Specifies the chunk 0 or 1 |
n_indices | Number of indices for which space shall be allocated |
void process_all_buffers_gpu | ( | TREE_RECORD * | tree_record, |
TREE_PARAMETERS * | params | ||
) |
Processes all buffers on the GPU. It is important that only INDICES are moved from the CPU to the GPU (only CPU->GPU; GPU->CPU is not necessary). Further, the global distances and the indices are updated ON THE GPU.
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
void process_buffers_brute_force_gpu | ( | TREE_RECORD * | tree_record, |
TREE_PARAMETERS * | params, | ||
INT_TYPE | all_brute | ||
) |
Helper function that processed all buffers via different methods, specified via the flag 'all_brute'.
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
all_brute |
void process_buffers_brute_force_in_chunks_gpu | ( | TREE_RECORD * | tree_record, |
TREE_PARAMETERS * | params, | ||
INT_TYPE | all_brute, | ||
INT_TYPE * | tindices_removed, | ||
INT_TYPE | n_tindices_removed, | ||
INT_TYPE * | fr_indices, | ||
INT_TYPE * | to_indices | ||
) |
Processes all indices stored in all buffers
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
all_brute | Flag specififying if all indices should be processed completely via brute force |
*tindices_removed | Array containing the test indices afterwards |
n_tindices_removed | Number of indices |
*fr_indices | Array of "from" indices |
*to_indices | Array of "to" indices |
INT_TYPE retrieve_indices_from_buffers_gpu | ( | TREE_RECORD * | tree_record, |
TREE_PARAMETERS * | params, | ||
INT_TYPE | all_brute, | ||
INT_TYPE * | tindices_removed, | ||
INT_TYPE * | fr_indices, | ||
INT_TYPE * | to_indices | ||
) |
Retrieves all indices from all buffers
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
all_brute | Flag specififying if all indices should be processed completely via brute force |
*tindices_removed | Array containing the test indices afterwards |
*fr_indices | Array of "from" indices |
*to_indices | Array of "to" indices |
int training_chunks_inactive | ( | TREE_RECORD * | tree_record, |
TREE_PARAMETERS * | params | ||
) |
Helper method that checks of training chunks are used or not
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
void write_sorted_training_patterns_gpu | ( | TREE_RECORD * | tree_record, |
TREE_PARAMETERS * | params | ||
) |
Writes the training patterns in a specific ordering (needed for coalesced data access on the GPU etc.
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |