bufferkdtree (C sources)
C source code for the Python bufferkdtree implementation
 All Classes Files Functions Variables Typedefs Macros
types.h
Go to the documentation of this file.
1 /*
2  * types.h
3  *
4  * Copyright (C) 2013-2016 Fabian Gieseke <fabian.gieseke@di.ku.dk>
5  * License: GPL v2
6  *
7  */
8 
9 #ifndef NEIGHBORS_BUFFER_KD_TREE_TYPES_H_
10 #define NEIGHBORS_BUFFER_KD_TREE_TYPES_H_
11 
12 #include "../../../include/timing.h"
13 #include "../../../include/float.h"
14 #include "../../../include/opencl.h"
15 
16 #include <string.h>
17 
18 // helper macros
19 #define STR_HELPER(x) #x
20 #define STR(x) STR_HELPER(x)
21 
22 #define SPLITTING_TYPE_CYCLIC 0
23 #define SPLITTING_TYPE_LONGEST_BOX 1
24 
25 // 1024*1024*1024 or 1E9
26 #define MEM_GB 1.073741824E9
27 
28 #define DO_ALL_BRUTE_NO 0
29 #define DO_ALL_BRUTE_YES 1
30 
31 #define TRAIN_CHUNK_0 0
32 #define TRAIN_CHUNK_1 1
33 
34 #ifndef MAX_CHUNK_BRUTE_KERNEL
35 #define MAX_CHUNK_BRUTE_KERNEL 1048576
36 #endif
37 
38 // workgroup sizes
39 #ifndef WORKGROUP_SIZE_BRUTE
40 #define WORKGROUP_SIZE_BRUTE 256
41 #endif
42 
43 #ifndef WORKGROUP_SIZE_LEAVES
44 #define WORKGROUP_SIZE_LEAVES 32
45 #endif
46 
47 #ifndef WORKGROUP_SIZE_UPDATE
48 #define WORKGROUP_SIZE_UPDATE 16
49 #endif
50 
51 #ifndef WORKGROUP_SIZE_COPY_INIT
52 #define WORKGROUP_SIZE_COPY_INIT 32
53 #endif
54 
55 #ifndef WORKGROUP_SIZE_COMBINE
56 #define WORKGROUP_SIZE_COMBINE 64
57 #endif
58 
59 #ifndef WORKGROUP_SIZE_TEST_SUBSET
60 #define WORKGROUP_SIZE_TEST_SUBSET 32
61 #endif
62 
63 #ifndef WORKGROUP_SIZE_COPY_DISTS_INDICES
64 #define WORKGROUP_SIZE_COPY_DISTS_INDICES 32
65 #endif
66 
67 #define LEAF_WIDTH 2
68 
69 // if changed -> modify init_extern, swig interface, and OpenCL kernels
70 #define INT_TYPE int
71 #define UINT_TYPE unsigned int
72 
73 // struct for storing a single node
74 typedef struct tree_node {
75 
76  int axis;
78 
79 } TREE_NODE;
80 
81 typedef struct {
87 
88 // struct for input parameters
89 typedef struct tree_parameters {
90 
99 
100  // do brute force as soon as only bf_remaining_threshold elements are in test queue
102 
103  // the number of desired training chunks
105 
108 
110 
111 // struct for input parameters
112 typedef struct tree_record {
113 
114  // array containing pairs of patterns and (original) indices
115  void *XtrainI;
116 
117  // in-place sorted training patterns
119 
120  // in-place sorted training indices
122 
123  // dimension of patterns
125 
126  // number of training patters
128 
129  // test patterns
131 
132  // number of test patterns
134 
135  // top tree stored as array nodes (median values w.r.t. axes)
137 
138  // number of (internal) nodes
140 
141  // leaves of the kdtree (left,right for each leaf)
143 
144  // number of leaves
146 
147  // max number of visits in tree (needed for kernels)
149 
150  // size of circular buffers for query indices (i.e., how many indices we can store at each leaf)
152 
153  // threshold for the buffers: we have to have many indices in the leaf in
154  // order to make the processing efficient. When should we empty a buffer?
156 
157  // estimate: how many elements can be put into the buffers in each round?
159 
160  // flag if one buffer is almost full
162 
163  // test buffers
165 
166  // queue "reinsert"
168 
169  // queue "insert", implemented as counter current_test_index
171 
172  // all stacks
174 
175  // all depths
177 
178  // all idxs
180 
181  // global distances to be returned by a query
183 
184  // global indices to be returned by a query
186 
187  // return values of find_leaf_batch
189 
190  // counter for leaf calls
192 
193  // counter for empty buffer calls
195 
196  // global opencl variables
197  cl_platform_id gpu_platform;
198  cl_device_id gpu_device;
199  cl_context gpu_context;
200  cl_command_queue gpu_command_queue;
201  cl_command_queue gpu_command_queue_chunk_0;
202  cl_command_queue gpu_command_queue_chunk_1;
204 
205  // kernels
206  cl_kernel brute_nn_kernel;
211  cl_kernel init_dists_kernel;
215 
216  // train buffers
218  cl_mem device_train_patterns_chunk_0; // n_train_chunks * dXtrain * sizeof(FLOAT_TYPE)
219  cl_mem device_train_patterns_chunk_1; // n_train_chunks * dXtrain * sizeof(FLOAT_TYPE)
223  cl_mem device_nodes; // n_nodes * sizeof(FLOAT_TYPE)
224  cl_mem device_leave_bounds; // n_leaves * 2 * sizeof(FLOAT_TYPE)
225 
226  // test buffers
227  cl_mem device_test_patterns; // nXtest * dXtest * sizeof(FLOAT_TYPE)
228  cl_mem device_d_mins; // nXtest * n_neighbors * sizeof(FLOAT_TYPE)
229  cl_mem device_idx_mins; // nXtest * n_neighbors * sizeof(INT_TYPE)
230  cl_mem device_all_stacks; // nXtest * tree_depth * sizeof(INT_TYPE)
231  cl_mem device_all_depths; // nXtest * sizeof(INT_TYPE)
232  cl_mem device_all_idxs; // nXtest * sizeof(INT_TYPE)
233  cl_mem device_idx_mins_tmp; // nXtest * n_neighbors * sizeof(FLOAT_TYPE)
234  cl_mem device_dist_mins_tmp; // nXtest * n_neighbors * sizeof(INT_TYPE)
235  cl_mem device_test_patterns_subset_tmp; // nXtest * dXtest * sizeof(FLOAT_TYPE)
236 
237  // tmp
238  cl_mem device_test_indices_removed_from_all_buffers; // n_test_indices * sizeof(INT_TYPE)
239  cl_mem device_all_next_indices; // tree_record->approx_number_of_avail_buffer_slots * sizeof(INT_TYPE)
240  cl_mem device_ret_vals; // tree_record->approx_number_of_avail_buffer_slots * sizeof(INT_TYPE)
241  cl_mem device_fr_indices; // nXtest * sizeof(INT_TYPE)
242  cl_mem device_to_indices; // nXtest * sizeof(INT_TYPE)
243 
244  // Memory Consumption (roughly)
245  // train: 2* n_train_chunks * dXtrain * sizeof(FLOAT_TYPE)
246  // + n_nodes * sizeof(FLOAT_TYPE)
247  // + n_leaves * 2 * sizeof(FLOAT_TYPE)
248  // test: nXtest * sizeof(FLOAT_TYPE) * (2 * dXtest + 2 * n_neighbors)
249  // nXtest * sizeof(INT_TYPE) * (2 * n_neighbors + tree_depth + 2)
250 
251  // tree_record->approx_number_of_avail_buffer_slots = 5 * tree_record->leaves_initial_buffer_sizes < 5 * 2^20 = 5E6
252  // tmp: nXtest * 3 * sizeof(INT_TYPE) + 5E6 * sizeof(INT_TYPE)
253 
255 
258 
259 } TREE_RECORD;
260 
261 #ifndef USE_GPU
262 #define USE_GPU 0
263 #endif
264 
265 
266 #define MIN(a,b) (((a)<(b))?(a):(b))
267 #define MAX(a,b) (((a)>(b))?(a):(b))
268 
269 #endif /* NEIGHBORS_BUFFER_KD_TREE_TYPES_H_ */
cl_mem device_dist_mins_tmp
Definition: types.h:234
cl_context gpu_context
Definition: types.h:199
cl_mem device_idx_mins_tmp
Definition: types.h:233
FLOAT_TYPE * Xtrain_sorted
Definition: types.h:118
cl_device_id gpu_device
Definition: types.h:198
INT_TYPE splitting_type
Definition: types.h:94
cl_platform_id gpu_platform
Definition: types.h:197
cl_kernel find_leaves_kernel
Definition: types.h:209
UINT_TYPE approx_number_of_avail_buffer_slots
Definition: types.h:158
INT_TYPE n_patts_per_chunk
Definition: types.h:222
INT_TYPE * items
Definition: types.h:85
cl_mem device_test_patterns_subset_tmp
Definition: types.h:235
cl_mem device_ret_vals
Definition: types.h:240
Definition: types.h:81
INT_TYPE empty_all_buffers_calls
Definition: types.h:194
INT_TYPE current_chunk_id
Definition: types.h:217
int device_query_buffers_allocated
Definition: types.h:254
cl_mem device_idx_mins
Definition: types.h:229
UINT_TYPE bf_remaining_threshold
Definition: types.h:101
cl_kernel brute_nn_kernel
Definition: types.h:206
INT_TYPE * all_depths
Definition: types.h:176
cl_mem device_test_indices_removed_from_all_buffers
Definition: types.h:238
Definition: types.h:89
cl_mem device_all_stacks
Definition: types.h:230
cl_mem device_test_patterns
Definition: types.h:227
cl_mem device_all_idxs
Definition: types.h:232
INT_TYPE dXtrain
Definition: types.h:124
INT_TYPE size
Definition: types.h:82
INT_TYPE n_neighbors
Definition: types.h:91
Definition: types.h:74
cl_command_queue gpu_command_queue_chunk_1
Definition: types.h:202
DEVICE_INFOS device_infos
Definition: types.h:203
INT_TYPE max_visited
Definition: types.h:148
circular_buffer queue_reinsert
Definition: types.h:167
cl_kernel retrieve_dist_kernel
Definition: types.h:208
FLOAT_TYPE allowed_test_mem_percent
Definition: types.h:107
circular_buffer ** buffers
Definition: types.h:164
INT_TYPE tree_depth
Definition: types.h:92
INT_TYPE leaves_buffer_sizes_threshold
Definition: types.h:155
INT_TYPE verbosity_level
Definition: types.h:96
INT_TYPE * Itrain_sorted
Definition: types.h:121
cl_mem device_to_indices
Definition: types.h:242
cl_mem device_fr_indices
Definition: types.h:241
INT_TYPE n_train_chunks
Definition: types.h:104
INT_TYPE start
Definition: types.h:83
INT_TYPE counters[10]
Definition: types.h:257
void * XtrainI
Definition: types.h:115
#define UINT_TYPE
Definition: types.h:71
struct tree_record TREE_RECORD
FLOAT_TYPE * dist_mins_global
Definition: types.h:182
INT_TYPE * all_stacks
Definition: types.h:173
struct tree_parameters TREE_PARAMETERS
Definition: timing.h:31
FLOAT_TYPE * Xtest
Definition: types.h:130
struct tree_node TREE_NODE
cl_mem device_all_depths
Definition: types.h:231
FLOAT_TYPE * leaves
Definition: types.h:142
Definition: types.h:112
TIMER timers[30]
Definition: types.h:256
cl_mem device_leave_bounds
Definition: types.h:224
INT_TYPE n_nodes
Definition: types.h:139
cl_kernel update_dist_kernel
Definition: types.h:207
INT_TYPE end
Definition: types.h:84
INT_TYPE * all_idxs
Definition: types.h:179
FLOAT_TYPE * host_pinned_train_patterns_chunk_0
Definition: types.h:220
cl_mem device_train_patterns_chunk_0
Definition: types.h:218
cl_command_queue gpu_command_queue
Definition: types.h:200
INT_TYPE leaves_initial_buffer_sizes
Definition: types.h:151
FLOAT_TYPE allowed_train_mem_percent_chunk
Definition: types.h:106
INT_TYPE num_threads
Definition: types.h:93
cl_mem device_train_patterns_chunk_1
Definition: types.h:219
INT_TYPE platform_id
Definition: types.h:97
cl_mem device_nodes
Definition: types.h:223
TREE_NODE * nodes
Definition: types.h:136
FLOAT_TYPE splitting_value
Definition: types.h:77
cl_kernel init_depths_idxs_kernel
Definition: types.h:213
INT_TYPE * idx_mins_global
Definition: types.h:185
char * kernels_source_directory
Definition: types.h:95
cl_mem device_all_next_indices
Definition: types.h:239
cl_kernel generate_test_subset_kernel
Definition: types.h:210
cl_command_queue gpu_command_queue_chunk_0
Definition: types.h:201
int axis
Definition: types.h:76
INT_TYPE current_test_index
Definition: types.h:170
cl_mem device_d_mins
Definition: types.h:228
UINT_TYPE buffer_full_warning
Definition: types.h:161
cl_kernel init_dists_kernel
Definition: types.h:211
INT_TYPE find_leaf_idx_calls
Definition: types.h:191
#define INT_TYPE
Definition: types.h:70
Definition: opencl.h:30
INT_TYPE device_id
Definition: types.h:98
INT_TYPE * leaf_indices_batch_ret_vals
Definition: types.h:188
#define FLOAT_TYPE
Definition: float.h:17
cl_kernel compute_final_dists_idxs_kernel
Definition: types.h:214
INT_TYPE nXtrain
Definition: types.h:127
cl_kernel init_stacks_kernel
Definition: types.h:212
INT_TYPE n_leaves
Definition: types.h:145
FLOAT_TYPE * host_pinned_train_patterns_chunk_1
Definition: types.h:221
UINT_TYPE nXtest
Definition: types.h:133