#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <CL/cl.h>
#include <math.h>
#include "base.h"
#include "types.h"
#include "util.h"
#include "../../../include/opencl.h"
Go to the source code of this file.
|
void | init_opencl_devices (TREE_RECORD *tree_record, TREE_PARAMETERS *params) |
|
void | free_opencl_devices (TREE_RECORD *tree_record, TREE_PARAMETERS *params) |
|
void | free_train_buffers_gpu (TREE_RECORD *tree_record, TREE_PARAMETERS *params) |
|
void | free_query_buffers_gpu (TREE_RECORD *tree_record, TREE_PARAMETERS *params) |
|
void | allocate_memory_opencl_devices (TREE_RECORD *tree_record, TREE_PARAMETERS *params) |
|
void | process_all_buffers_gpu (TREE_RECORD *tree_record, TREE_PARAMETERS *params) |
|
void | process_buffers_brute_force_gpu (TREE_RECORD *tree_record, TREE_PARAMETERS *params, INT_TYPE all_brute) |
|
INT_TYPE | retrieve_indices_from_buffers_gpu (TREE_RECORD *tree_record, TREE_PARAMETERS *params, INT_TYPE all_brute, INT_TYPE *tindices_removed, INT_TYPE *fr_indices, INT_TYPE *to_indices) |
|
void | process_buffers_brute_force_in_chunks_gpu (TREE_RECORD *tree_record, TREE_PARAMETERS *params, INT_TYPE all_brute, INT_TYPE *tindices_removed, INT_TYPE n_tindices_removed, INT_TYPE *fr_indices, INT_TYPE *to_indices) |
|
void | do_brute_force_all_leaves_FIRST_gpu (INT_TYPE *test_indices, INT_TYPE n_test_indices, INT_TYPE *fr_indices, INT_TYPE *to_indices, TREE_RECORD *tree_record, TREE_PARAMETERS *params, INT_TYPE n_device_train_patterns, cl_mem device_train_patterns, INT_TYPE chunk_offset, INT_TYPE all_brute, INT_TYPE current_chunk) |
|
void | do_brute_force_all_leaves_SECOND_gpu (INT_TYPE *test_indices, INT_TYPE n_test_indices, INT_TYPE *fr_indices, INT_TYPE *to_indices, TREE_RECORD *tree_record, TREE_PARAMETERS *params, INT_TYPE n_device_train_patterns, cl_mem device_train_patterns, INT_TYPE chunk_offset, INT_TYPE all_brute, INT_TYPE current_chunk) |
|
void | find_leaf_idx_batch_gpu (INT_TYPE *all_next_indices, INT_TYPE num_all_next_indices, INT_TYPE *ret_vals, TREE_RECORD *tree_record, TREE_PARAMETERS *params) |
|
void | get_distances_and_indices_gpu (TREE_RECORD *tree_record, TREE_PARAMETERS *params) |
|
void | write_sorted_training_patterns_gpu (TREE_RECORD *tree_record, TREE_PARAMETERS *params) |
|
void | init_train_patterns_buffers (TREE_RECORD *tree_record, TREE_PARAMETERS *params, INT_TYPE chunk, INT_TYPE n_indices) |
|
void | copy_train_patterns_to_device (TREE_RECORD *tree_record, TREE_PARAMETERS *params, INT_TYPE chunk, INT_TYPE start_idx, INT_TYPE end_idx) |
|
void | free_train_patterns_device (TREE_RECORD *tree_record, TREE_PARAMETERS *params, INT_TYPE chunk) |
|
int | training_chunks_inactive (TREE_RECORD *tree_record, TREE_PARAMETERS *params) |
|
Allocates memory for testing phase.
- Parameters
-
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
- Parameters
-
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
chunk | Specifies the chunk 0 or 1 |
start_idx | The start index |
end_idx | The end index |
void do_brute_force_all_leaves_FIRST_gpu |
( |
INT_TYPE * |
test_indices, |
|
|
INT_TYPE |
n_test_indices, |
|
|
INT_TYPE * |
fr_indices, |
|
|
INT_TYPE * |
to_indices, |
|
|
TREE_RECORD * |
tree_record, |
|
|
TREE_PARAMETERS * |
params, |
|
|
INT_TYPE |
n_device_train_patterns, |
|
|
cl_mem |
device_train_patterns, |
|
|
INT_TYPE |
chunk_offset, |
|
|
INT_TYPE |
all_brute, |
|
|
INT_TYPE |
current_chunk |
|
) |
| |
Apply brute-force approach for all leaves (first stage)
- Parameters
-
*test_indices | Array of test indices to be processed |
n_test_indices | Number of test indices |
*fr_indices | Array of "from" indices, one for each test index |
*to_indices | Array of "to" indices, one for each test index |
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
n_device_train_patterns | Number of training patterns on device |
device_train_patterns | Pointer to training patterns |
chunk_offset | Offset (for indices) for the given chunk |
all_brute | Flag specififying if all indices should be processed completely via brute force |
current_chunk | The current chunk (id) |
void do_brute_force_all_leaves_SECOND_gpu |
( |
INT_TYPE * |
test_indices, |
|
|
INT_TYPE |
n_test_indices, |
|
|
INT_TYPE * |
fr_indices, |
|
|
INT_TYPE * |
to_indices, |
|
|
TREE_RECORD * |
tree_record, |
|
|
TREE_PARAMETERS * |
params, |
|
|
INT_TYPE |
n_device_train_patterns, |
|
|
cl_mem |
device_train_patterns, |
|
|
INT_TYPE |
chunk_offset, |
|
|
INT_TYPE |
all_brute, |
|
|
INT_TYPE |
current_chunk |
|
) |
| |
Second stage of the brute-force processing
- Parameters
-
*test_indices | Array of test indices to be processed |
n_test_indices | Number of test indices |
*fr_indices | Array of "from" indices, one for each test index |
*to_indices | Array of "to" indices, one for each test index |
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
n_device_train_patterns | Number of training patterns on device |
device_train_patterns | Pointer to training patterns |
chunk_offset | Offset (for indices) for the given chunk |
all_brute | Flag specififying if all indices should be processed completely via brute force |
current_chunk | The current chunk (id) |
Finds the next leaf indices for all test patterns indexed by all_next_indices.
- Parameters
-
*all_next_indices | Array containing all indices that need to be processed next |
num_all_next_indices | Number of indices |
*ret_vals | Array containing the next leaf ids for each index |
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
After having performed all queries: Free memory etc.
- Parameters
-
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
Free buffers needed for querying phase.
- Parameters
-
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
Free buffers used during training.
- Parameters
-
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
Releases training patterns on device
- Parameters
-
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
chunk | Specifies the chunk 0 or 1 |
Copies the arrays dist_min_global and idx_min_global from GPU to CPU Updates the distances and indices (w.r.t the original indices)
- Parameters
-
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
Initializes all devices at the beginning of the querying process.
- Parameters
-
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
- Parameters
-
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
chunk | Specifies the chunk 0 or 1 |
n_indices | Number of indices for which space shall be allocated |
Processes all buffers on the GPU. It is important that only INDICES are moved from the CPU to the GPU (only CPU->GPU; GPU->CPU is not necessary). Further, the global distances and the indices are updated ON THE GPU.
- Parameters
-
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
Helper function that processed all buffers via different methods, specified via the flag 'all_brute'.
- Parameters
-
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
all_brute | |
Processes all indices stored in all buffers
- Parameters
-
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
all_brute | Flag specififying if all indices should be processed completely via brute force |
*tindices_removed | Array containing the test indices afterwards |
n_tindices_removed | Number of indices |
*fr_indices | Array of "from" indices |
*to_indices | Array of "to" indices |
Retrieves all indices from all buffers
- Parameters
-
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
all_brute | Flag specififying if all indices should be processed completely via brute force |
*tindices_removed | Array containing the test indices afterwards |
*fr_indices | Array of "from" indices |
*to_indices | Array of "to" indices |
Helper method that checks of training chunks are used or not
- Parameters
-
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |
Writes the training patterns in a specific ordering (needed for coalesced data access on the GPU etc.
- Parameters
-
*tree_record | Pointer to struct instance storing the model |
*params | Pointer to struct instance storing all model parameters |