fork download
  1. #include <iostream>
  2. #include <vector>
  3. #include <string>
  4. #include <algorithm>
  5.  
  6. using namespace std;
  7.  
  8. // --- GEOMETRIC STRUCTURES ---
  9.  
  10. struct Rectangle {
  11. int id;
  12. double minX, minY, maxX, maxY;
  13.  
  14. // Helper to check intersection between two rectangles
  15. bool intersects(const Rectangle& other) const {
  16. return !(minX > other.maxX || maxX < other.minX ||
  17. minY > other.maxY || maxY < other.minY);
  18. }
  19. };
  20.  
  21. // --- PROVIDED TREE LIBRARY (Simulated) ---
  22.  
  23. class GeoTree {
  24. private:
  25. // In a real library, this would be a pointer to an R-Tree or Quad-Tree structure
  26. vector<Rectangle> treeData;
  27.  
  28. public:
  29. // Requirement: Add operation takes O(log n)
  30. void addRectangle(int id, Rectangle r) {
  31. r.id = id;
  32. treeData.push_back(r);
  33. // Note: In a real R-Tree, this would involve rebalancing the tree nodes.
  34. }
  35.  
  36. // Requirement: Query operation takes O(log n)
  37. vector<int> queryRectangle(Rectangle q) {
  38. vector<int> results;
  39. // In a real tree library, this traverses only relevant branches
  40. for (const auto& rect : treeData) {
  41. if (rect.intersects(q)) {
  42. results.push_back(rect.id);
  43. }
  44. }
  45. return results;
  46. }
  47. };
  48.  
  49. // --- MAP-REDUCE LOGIC (Case 1: Map-Only Join) ---
  50.  
  51. /**
  52.  * @brief Performs a Spatial Join using the Broadcast strategy.
  53.  * * Case 1: Table A is small enough to fit in memory.
  54.  * We build the spatial index (Tree) once and then stream Table B.
  55.  */
  56. void broadcastSpatialJoin(const vector<Rectangle>& smallTableA,
  57. const vector<Rectangle>& largeTableBPartition) {
  58.  
  59. // 1. INITIALIZE the Tree
  60. GeoTree spatialIndex;
  61.  
  62. // 2. BROADCAST PHASE: Build the tree from the small table (e.g., Cities)
  63. // This happens once per Mapper worker.
  64. for (const auto& city : smallTableA) {
  65. spatialIndex.addRectangle(city.id, city);
  66. }
  67.  
  68.  
  69.  
  70. // 3. MAP PHASE: Stream the large table (e.g., Uber Trips)
  71. // We check each trip against the indexed cities.
  72. cout << "--- Spatial Join Results ---" << endl;
  73. cout << "CityID\tTripID" << endl;
  74.  
  75. for (const auto& trip : largeTableBPartition) {
  76. // Query returns all City IDs that the current trip intersects
  77. vector<int> matchingCityIds = spatialIndex.queryRectangle(trip);
  78.  
  79. for (int cityId : matchingCityIds) {
  80. // "Emit" the join result
  81. cout << cityId << "\t" << trip.id << endl;
  82. }
  83. }
  84. }
  85.  
  86. // --- MAIN EXECUTION ---
  87.  
  88. int main() {
  89. // Dataset A: Small Table (e.g., City Boundaries)
  90. // Format: {ID, minX, minY, maxX, maxY}
  91. vector<Rectangle> cities = {
  92. {1, 0.0, 0.0, 5.0, 5.0}, // City A
  93. {2, 10.0, 10.0, 15.0, 15.0} // City B
  94. };
  95.  
  96. // Dataset B: Large Table Partition (e.g., Uber Trips)
  97. vector<Rectangle> trips = {
  98. {101, 2.0, 2.0, 3.0, 3.0}, // Inside City A
  99. {102, 4.0, 4.0, 6.0, 6.0}, // Intersects City A
  100. {103, 12.0, 12.0, 18.0, 18.0}, // Intersects City B
  101. {104, 20.0, 20.0, 25.0, 25.0} // Matches nothing
  102. };
  103.  
  104. // Run the distributed join simulation
  105. broadcastSpatialJoin(cities, trips);
  106.  
  107. return 0;
  108. }
Success #stdin #stdout 0.01s 5288KB
stdin
Standard input is empty
stdout
--- Spatial Join Results ---
CityID	TripID
1	101
1	102
2	103