MLPACK  1.0.7
lsh_search.hpp
Go to the documentation of this file.
1 
38 #ifndef __MLPACK_METHODS_NEIGHBOR_SEARCH_LSH_SEARCH_HPP
39 #define __MLPACK_METHODS_NEIGHBOR_SEARCH_LSH_SEARCH_HPP
40 
41 #include <mlpack/core.hpp>
42 #include <vector>
43 #include <string>
44 
47 
48 namespace mlpack {
49 namespace neighbor {
50 
58 template<typename SortPolicy = NearestNeighborSort>
59 class LSHSearch
60 {
61  public:
83  LSHSearch(const arma::mat& referenceSet,
84  const arma::mat& querySet,
85  const size_t numProj,
86  const size_t numTables,
87  const double hashWidth = 0.0,
88  const size_t secondHashSize = 99901,
89  const size_t bucketSize = 500);
90 
111  LSHSearch(const arma::mat& referenceSet,
112  const size_t numProj,
113  const size_t numTables,
114  const double hashWidth = 0.0,
115  const size_t secondHashSize = 99901,
116  const size_t bucketSize = 500);
117 
136  void Search(const size_t k,
137  arma::Mat<size_t>& resultingNeighbors,
138  arma::mat& distances,
139  const size_t numTablesToSearch = 0);
140 
141  private:
155  void BuildHash();
156 
168  void ReturnIndicesFromTable(const size_t queryIndex,
169  arma::uvec& referenceIndices,
170  size_t numTablesToSearch);
171 
179  double BaseCase(const size_t queryIndex, const size_t referenceIndex);
180 
193  void InsertNeighbor(const size_t queryIndex, const size_t pos,
194  const size_t neighbor, const double distance);
195 
196  private:
198  const arma::mat& referenceSet;
199 
201  const arma::mat& querySet;
202 
204  const size_t numProj;
205 
207  const size_t numTables;
208 
210  std::vector<arma::mat> projections; // should be [numProj x dims] x numTables
211 
213  arma::mat offsets; // should be numProj x numTables
214 
216  double hashWidth;
217 
219  const size_t secondHashSize;
220 
222  arma::vec secondHashWeights;
223 
225  const size_t bucketSize;
226 
229 
231  arma::Mat<size_t> secondHashTable;
232 
235  arma::Col<size_t> bucketContentSize;
236 
239  arma::Col<size_t> bucketRowInHashTable;
240 
242  arma::mat* distancePtr;
243 
245  arma::Mat<size_t>* neighborPtr;
246 }; // class LSHSearch
247 
248 }; // namespace neighbor
249 }; // namespace mlpack
250 
251 // Include implementation.
252 #include "lsh_search_impl.hpp"
253 
254 #endif
const arma::mat & referenceSet
Reference dataset.
Definition: lsh_search.hpp:198
const arma::mat & querySet
Query dataset (may not be given).
Definition: lsh_search.hpp:201
void BuildHash()
This function builds a hash table with two levels of hashing as presented in the paper.
const size_t secondHashSize
The big prime representing the size of the second hash.
Definition: lsh_search.hpp:219
metric::SquaredEuclideanDistance metric
Instantiation of the metric.
Definition: lsh_search.hpp:228
std::vector< arma::mat > projections
The std::vector containing the projection matrix of each table.
Definition: lsh_search.hpp:210
const size_t bucketSize
The bucket size of the second hash.
Definition: lsh_search.hpp:225
arma::Col< size_t > bucketContentSize
The number of elements present in each hash bucket; should be secondHashSize.
Definition: lsh_search.hpp:235
arma::Mat< size_t > * neighborPtr
The pointer to the nearest neighbor indices.
Definition: lsh_search.hpp:245
double BaseCase(const size_t queryIndex, const size_t referenceIndex)
This is a helper function that computes the distance of the query to the neighbor candidates and appr...
The LSHSearch class – This class builds a hash on the reference set and uses this hash to compute t...
Definition: lsh_search.hpp:59
const size_t numProj
The number of projections.
Definition: lsh_search.hpp:204
arma::mat offsets
The list of the offset &#39;b&#39; for each of the projection for each table.
Definition: lsh_search.hpp:213
The L_p metric for arbitrary integer p, with an option to take the root.
Definition: lmetric.hpp:73
LSHSearch(const arma::mat &referenceSet, const arma::mat &querySet, const size_t numProj, const size_t numTables, const double hashWidth=0.0, const size_t secondHashSize=99901, const size_t bucketSize=500)
This function initializes the LSH class.
void InsertNeighbor(const size_t queryIndex, const size_t pos, const size_t neighbor, const double distance)
This is a helper function that efficiently inserts better neighbor candidates into an existing set of...
const size_t numTables
The number of hash tables.
Definition: lsh_search.hpp:207
arma::Mat< size_t > secondHashTable
The final hash table; should be (&lt; secondHashSize) x bucketSize.
Definition: lsh_search.hpp:231
void ReturnIndicesFromTable(const size_t queryIndex, arma::uvec &referenceIndices, size_t numTablesToSearch)
This function takes a query and hashes it into each of the hash tables to get keys for the query and ...
arma::vec secondHashWeights
The weights of the second hash.
Definition: lsh_search.hpp:222
void Search(const size_t k, arma::Mat< size_t > &resultingNeighbors, arma::mat &distances, const size_t numTablesToSearch=0)
Compute the nearest neighbors and store the output in the given matrices.
double hashWidth
The hash width.
Definition: lsh_search.hpp:216
arma::Col< size_t > bucketRowInHashTable
For a particular hash value, points to the row in secondHashTable corresponding to this value...
Definition: lsh_search.hpp:239
arma::mat * distancePtr
The pointer to the nearest neighbor distances.
Definition: lsh_search.hpp:242