My library GeographicLib contains a class NearestNeighbor
which implements the vantage-point trees. This works for any data
where there's a true distance metric; the geodesic distance obviously
satisfies this criterion.
Basically you initialize the class with your L
locations
and then for each of E
events you
ask for the nearest location. The breakdown on the computational
costs is as follows
set-up cost: L log L
cost per query: log L
cost of all queries: E log L
total cost: (L + E) log L
(These are base-2 logs.) Here's code that will do the lookup. Running this on 30k locations and 1M events takes about 40 secs and involved a total of 16M geodesic distance calculations. (The brute force way would take about 21 hours.)
// Read lat/lon locations from locations.txt and lat/lon events from
// events.txt. For each event print to closest.txt: the index for the
// closest location and the distance to it.
// Needs GeographicLib 1.47 or later
// compile and link with
// g++ -o closest closest.cpp -lGeographic \
// -L/usr/local/lib -Wl,-rpath=/usr/local/lib
#include <iostream>
#include <vector>
#include <fstream>
#include <GeographicLib/NearestNeighbor.hpp>
#include <GeographicLib/Geodesic.hpp>
using namespace std;
using namespace GeographicLib;
// A structure to hold a geographic coordinate.
struct pos {
double lat, lon;
pos(double lat = 0, double lon = 0)
: lat(lat), lon(lon) {}
};
// A class to compute the distance between 2 positions.
class DistanceCalculator {
private:
Geodesic _geod;
public:
explicit DistanceCalculator(const Geodesic& geod)
: _geod(geod) {}
double operator() (const pos& a, const pos& b) const {
double s12;
_geod.Inverse(a.lat, a.lon, b.lat, b.lon, s12);
return s12;
}
};
int main() {
// Define a distance function object
DistanceCalculator distance(Geodesic::WGS84());
// Read in locations
vector<pos> locs;
{
double lat, lon;
ifstream is("locations.txt");
while (is >> lat >> lon)
locs.push_back(pos(lat, lon));
}
// Construct NearestNeighbor object
NearestNeighbor<double, pos, DistanceCalculator>
locationset(locs, distance);
ifstream is("events.txt");
ofstream os("closest.txt");
double lat, lon, d;
vector<int> k;
while (is >> lat >> lon) {
pos event(lat, lon);
d = locationset.Search(locs, distance, event, k);
os << k[0] << " " << d << "\n";
}
}