|
bufferkdtree (C sources)
C source code for the Python bufferkdtree implementation
|
#include "include/gpu_opencl.h"
| void allocate_memory_opencl_devices | ( | TREE_RECORD * | tree_record, |
| TREE_PARAMETERS * | params | ||
| ) |
Allocates memory for testing phase.
| *tree_record | Pointer to struct instance storing the model |
| *params | Pointer to struct instance storing all model parameters |
| void copy_train_patterns_to_device | ( | TREE_RECORD * | tree_record, |
| TREE_PARAMETERS * | params, | ||
| INT_TYPE | chunk, | ||
| INT_TYPE | start_idx, | ||
| INT_TYPE | end_idx | ||
| ) |
| *tree_record | Pointer to struct instance storing the model |
| *params | Pointer to struct instance storing all model parameters |
| chunk | Specifies the chunk 0 or 1 |
| start_idx | The start index |
| end_idx | The end index |
| void do_brute_force_all_leaves_FIRST_gpu | ( | INT_TYPE * | test_indices, |
| INT_TYPE | n_test_indices, | ||
| INT_TYPE * | fr_indices, | ||
| INT_TYPE * | to_indices, | ||
| TREE_RECORD * | tree_record, | ||
| TREE_PARAMETERS * | params, | ||
| INT_TYPE | n_device_train_patterns, | ||
| cl_mem | device_train_patterns, | ||
| INT_TYPE | chunk_offset, | ||
| INT_TYPE | all_brute, | ||
| INT_TYPE | current_chunk | ||
| ) |
Apply brute-force approach for all leaves (first stage)
| *test_indices | Array of test indices to be processed |
| n_test_indices | Number of test indices |
| *fr_indices | Array of "from" indices, one for each test index |
| *to_indices | Array of "to" indices, one for each test index |
| *tree_record | Pointer to struct instance storing the model |
| *params | Pointer to struct instance storing all model parameters |
| n_device_train_patterns | Number of training patterns on device |
| device_train_patterns | Pointer to training patterns |
| chunk_offset | Offset (for indices) for the given chunk |
| all_brute | Flag specififying if all indices should be processed completely via brute force |
| current_chunk | The current chunk (id) |
| void do_brute_force_all_leaves_SECOND_gpu | ( | INT_TYPE * | test_indices, |
| INT_TYPE | n_test_indices, | ||
| INT_TYPE * | fr_indices, | ||
| INT_TYPE * | to_indices, | ||
| TREE_RECORD * | tree_record, | ||
| TREE_PARAMETERS * | params, | ||
| INT_TYPE | n_device_train_patterns, | ||
| cl_mem | device_train_patterns, | ||
| INT_TYPE | chunk_offset, | ||
| INT_TYPE | all_brute, | ||
| INT_TYPE | current_chunk | ||
| ) |
Second stage of the brute-force processing
| *test_indices | Array of test indices to be processed |
| n_test_indices | Number of test indices |
| *fr_indices | Array of "from" indices, one for each test index |
| *to_indices | Array of "to" indices, one for each test index |
| *tree_record | Pointer to struct instance storing the model |
| *params | Pointer to struct instance storing all model parameters |
| n_device_train_patterns | Number of training patterns on device |
| device_train_patterns | Pointer to training patterns |
| chunk_offset | Offset (for indices) for the given chunk |
| all_brute | Flag specififying if all indices should be processed completely via brute force |
| current_chunk | The current chunk (id) |
| void find_leaf_idx_batch_gpu | ( | INT_TYPE * | all_next_indices, |
| INT_TYPE | num_all_next_indices, | ||
| INT_TYPE * | ret_vals, | ||
| TREE_RECORD * | tree_record, | ||
| TREE_PARAMETERS * | params | ||
| ) |
Finds the next leaf indices for all test patterns indexed by all_next_indices.
| *all_next_indices | Array containing all indices that need to be processed next |
| num_all_next_indices | Number of indices |
| *ret_vals | Array containing the next leaf ids for each index |
| *tree_record | Pointer to struct instance storing the model |
| *params | Pointer to struct instance storing all model parameters |
| void free_opencl_devices | ( | TREE_RECORD * | tree_record, |
| TREE_PARAMETERS * | params | ||
| ) |
After having performed all queries: Free memory etc.
| *tree_record | Pointer to struct instance storing the model |
| *params | Pointer to struct instance storing all model parameters |
| void free_query_buffers_gpu | ( | TREE_RECORD * | tree_record, |
| TREE_PARAMETERS * | params | ||
| ) |
Free buffers needed for querying phase.
| *tree_record | Pointer to struct instance storing the model |
| *params | Pointer to struct instance storing all model parameters |
| void free_train_buffers_gpu | ( | TREE_RECORD * | tree_record, |
| TREE_PARAMETERS * | params | ||
| ) |
Free buffers used during training.
| *tree_record | Pointer to struct instance storing the model |
| *params | Pointer to struct instance storing all model parameters |
| void free_train_patterns_device | ( | TREE_RECORD * | tree_record, |
| TREE_PARAMETERS * | params, | ||
| INT_TYPE | chunk | ||
| ) |
Releases training patterns on device
| *tree_record | Pointer to struct instance storing the model |
| *params | Pointer to struct instance storing all model parameters |
| chunk | Specifies the chunk 0 or 1 |
| void get_distances_and_indices_gpu | ( | TREE_RECORD * | tree_record, |
| TREE_PARAMETERS * | params | ||
| ) |
Copies the arrays dist_min_global and idx_min_global from GPU to CPU Updates the distances and indices (w.r.t the original indices)
| *tree_record | Pointer to struct instance storing the model |
| *params | Pointer to struct instance storing all model parameters |
| void init_opencl_devices | ( | TREE_RECORD * | tree_record, |
| TREE_PARAMETERS * | params | ||
| ) |
Initializes all devices at the beginning of the querying process.
| *tree_record | Pointer to struct instance storing the model |
| *params | Pointer to struct instance storing all model parameters |
| void init_train_patterns_buffers | ( | TREE_RECORD * | tree_record, |
| TREE_PARAMETERS * | params, | ||
| INT_TYPE | chunk, | ||
| INT_TYPE | n_indices | ||
| ) |
| *tree_record | Pointer to struct instance storing the model |
| *params | Pointer to struct instance storing all model parameters |
| chunk | Specifies the chunk 0 or 1 |
| n_indices | Number of indices for which space shall be allocated |
| void process_all_buffers_gpu | ( | TREE_RECORD * | tree_record, |
| TREE_PARAMETERS * | params | ||
| ) |
Processes all buffers on the GPU. It is important that only INDICES are moved from the CPU to the GPU (only CPU->GPU; GPU->CPU is not necessary). Further, the global distances and the indices are updated ON THE GPU.
| *tree_record | Pointer to struct instance storing the model |
| *params | Pointer to struct instance storing all model parameters |
| void process_buffers_brute_force_gpu | ( | TREE_RECORD * | tree_record, |
| TREE_PARAMETERS * | params, | ||
| INT_TYPE | all_brute | ||
| ) |
Helper function that processed all buffers via different methods, specified via the flag 'all_brute'.
| *tree_record | Pointer to struct instance storing the model |
| *params | Pointer to struct instance storing all model parameters |
| all_brute |
| void process_buffers_brute_force_in_chunks_gpu | ( | TREE_RECORD * | tree_record, |
| TREE_PARAMETERS * | params, | ||
| INT_TYPE | all_brute, | ||
| INT_TYPE * | tindices_removed, | ||
| INT_TYPE | n_tindices_removed, | ||
| INT_TYPE * | fr_indices, | ||
| INT_TYPE * | to_indices | ||
| ) |
Processes all indices stored in all buffers
| *tree_record | Pointer to struct instance storing the model |
| *params | Pointer to struct instance storing all model parameters |
| all_brute | Flag specififying if all indices should be processed completely via brute force |
| *tindices_removed | Array containing the test indices afterwards |
| n_tindices_removed | Number of indices |
| *fr_indices | Array of "from" indices |
| *to_indices | Array of "to" indices |
| INT_TYPE retrieve_indices_from_buffers_gpu | ( | TREE_RECORD * | tree_record, |
| TREE_PARAMETERS * | params, | ||
| INT_TYPE | all_brute, | ||
| INT_TYPE * | tindices_removed, | ||
| INT_TYPE * | fr_indices, | ||
| INT_TYPE * | to_indices | ||
| ) |
Retrieves all indices from all buffers
| *tree_record | Pointer to struct instance storing the model |
| *params | Pointer to struct instance storing all model parameters |
| all_brute | Flag specififying if all indices should be processed completely via brute force |
| *tindices_removed | Array containing the test indices afterwards |
| *fr_indices | Array of "from" indices |
| *to_indices | Array of "to" indices |
| int training_chunks_inactive | ( | TREE_RECORD * | tree_record, |
| TREE_PARAMETERS * | params | ||
| ) |
Helper method that checks of training chunks are used or not
| *tree_record | Pointer to struct instance storing the model |
| *params | Pointer to struct instance storing all model parameters |
| void write_sorted_training_patterns_gpu | ( | TREE_RECORD * | tree_record, |
| TREE_PARAMETERS * | params | ||
| ) |
Writes the training patterns in a specific ordering (needed for coalesced data access on the GPU etc.
| *tree_record | Pointer to struct instance storing the model |
| *params | Pointer to struct instance storing all model parameters |
1.8.6