#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
using namespace std;
// --- GEOMETRIC STRUCTURES ---
class Rectangle {
public:
int id;
double minX, minY, maxX, maxY;
// The core geometric relation check
bool intersects(const Rectangle& other) const {
return !(minX > other.maxX || maxX < other.minX ||
minY > other.maxY || maxY < other.minY);
}
};
// --- PROVIDED TREE LIBRARY (Simulated with Vector) ---
class GeoTree {
private:
// Internal storage simulating the Tree's dataset
vector<Rectangle> treeData;
public:
/**
* Requirement: Add operation takes O(log n) in a real tree.
* Here we use push_back for the simulation.
*/
void addRectangle(int id, Rectangle r) {
r.id = id;
treeData.push_back(r);
}
/**
* Requirement: Query operation takes O(log n) in a real tree.
* In a real library, this would only visit branches that overlap with q.
*/
vector<int> queryRectangle(Rectangle q) {
vector<int> results;
for (const auto& rect : treeData) {
if (rect.intersects(q)) {
results.push_back(rect.id);
}
}
return results;
}
};
// --- MAP-REDUCE LOGIC (Case 1: Map-Only Join) ---
/**
* @brief Executes the Spatial Join using the Broadcast strategy.
* * Part 1 Strategy Identification:
* Since Table A (Cities) is small, we categorize this as a Broadcast Join.
* * Part 2 Implementation:
* We broadcast the small table to every mapper. Each mapper builds a local
* GeoTree and streams the large Table B through it.
*/
void broadcastSpatialJoin(const vector<Rectangle>& smallTableA,
const vector<Rectangle>& largeTableBPartition) {
// 1. INITIALIZE the local index
GeoTree spatialIndex;
// 2. BROADCAST DATA PROCESSING:
// Build the tree from the small table. This happens in the Mapper's memory.
for (const auto& city : smallTableA) {
spatialIndex.addRectangle(city.id, city);
}
// 3. MAP PHASE:
// Process the local partition of the massive Table B.
// We stream these records so we don't overwhelm memory.
cout << "--- Spatial Join Results ---" << endl;
cout << "CityID\tTripID" << endl;
for (const auto& trip : largeTableBPartition) {
// Find all City IDs satisfying the 'intersection' relation
vector<int> matchingCityIds = spatialIndex.queryRectangle(trip);
for (int cityId : matchingCityIds) {
// Output the satisfying pairs
cout << cityId << "\t" << trip.id << endl;
}
}
}
// --- MAIN EXECUTION SIMULATION ---
int main() {
// TABLE A: Small Table (e.g., US City Boundaries)
vector<Rectangle> cities = {
{1, 0.0, 0.0, 5.0, 5.0},
{2, 10.0, 10.0, 15.0, 15.0}
};
// TABLE B: Large Table Partition (e.g., Uber Trips on local node)
vector<Rectangle> trips = {
{101, 2.0, 2.0, 3.0, 3.0},
{102, 4.0, 4.0, 6.0, 6.0},
{103, 12.0, 12.0, 18.0, 18.0},
{104, 20.0, 20.0, 25.0, 25.0}
};
broadcastSpatialJoin(cities, trips);
return 0;
}