fork download
  1. #include <iostream>
  2. #include <vector>
  3. #include <string>
  4. #include <algorithm>
  5.  
  6. using namespace std;
  7.  
  8. // --- GEOMETRIC STRUCTURES ---
  9.  
  10. class Rectangle {
  11. public:
  12. int id;
  13. double minX, minY, maxX, maxY;
  14.  
  15. // The core geometric relation check
  16. bool intersects(const Rectangle& other) const {
  17. return !(minX > other.maxX || maxX < other.minX ||
  18. minY > other.maxY || maxY < other.minY);
  19. }
  20. };
  21.  
  22. // --- PROVIDED TREE LIBRARY (Simulated with Vector) ---
  23.  
  24. class GeoTree {
  25. private:
  26. // Internal storage simulating the Tree's dataset
  27. vector<Rectangle> treeData;
  28.  
  29. public:
  30. /**
  31.   * Requirement: Add operation takes O(log n) in a real tree.
  32.   * Here we use push_back for the simulation.
  33.   */
  34. void addRectangle(int id, Rectangle r) {
  35. r.id = id;
  36. treeData.push_back(r);
  37. }
  38.  
  39. /**
  40.   * Requirement: Query operation takes O(log n) in a real tree.
  41.   * In a real library, this would only visit branches that overlap with q.
  42.   */
  43. vector<int> queryRectangle(Rectangle q) {
  44. vector<int> results;
  45. for (const auto& rect : treeData) {
  46. if (rect.intersects(q)) {
  47. results.push_back(rect.id);
  48. }
  49. }
  50. return results;
  51. }
  52. };
  53.  
  54. // --- MAP-REDUCE LOGIC (Case 1: Map-Only Join) ---
  55.  
  56. /**
  57.  * @brief Executes the Spatial Join using the Broadcast strategy.
  58.  * * Part 1 Strategy Identification:
  59.  * Since Table A (Cities) is small, we categorize this as a Broadcast Join.
  60.  * * Part 2 Implementation:
  61.  * We broadcast the small table to every mapper. Each mapper builds a local
  62.  * GeoTree and streams the large Table B through it.
  63.  */
  64. void broadcastSpatialJoin(const vector<Rectangle>& smallTableA,
  65. const vector<Rectangle>& largeTableBPartition) {
  66.  
  67. // 1. INITIALIZE the local index
  68. GeoTree spatialIndex;
  69.  
  70. // 2. BROADCAST DATA PROCESSING:
  71. // Build the tree from the small table. This happens in the Mapper's memory.
  72. for (const auto& city : smallTableA) {
  73. spatialIndex.addRectangle(city.id, city);
  74. }
  75.  
  76.  
  77.  
  78. // 3. MAP PHASE:
  79. // Process the local partition of the massive Table B.
  80. // We stream these records so we don't overwhelm memory.
  81. cout << "--- Spatial Join Results ---" << endl;
  82. cout << "CityID\tTripID" << endl;
  83.  
  84. for (const auto& trip : largeTableBPartition) {
  85. // Find all City IDs satisfying the 'intersection' relation
  86. vector<int> matchingCityIds = spatialIndex.queryRectangle(trip);
  87.  
  88. for (int cityId : matchingCityIds) {
  89. // Output the satisfying pairs
  90. cout << cityId << "\t" << trip.id << endl;
  91. }
  92. }
  93. }
  94.  
  95. // --- MAIN EXECUTION SIMULATION ---
  96.  
  97. int main() {
  98. // TABLE A: Small Table (e.g., US City Boundaries)
  99. vector<Rectangle> cities = {
  100. {1, 0.0, 0.0, 5.0, 5.0},
  101. {2, 10.0, 10.0, 15.0, 15.0}
  102. };
  103.  
  104. // TABLE B: Large Table Partition (e.g., Uber Trips on local node)
  105. vector<Rectangle> trips = {
  106. {101, 2.0, 2.0, 3.0, 3.0},
  107. {102, 4.0, 4.0, 6.0, 6.0},
  108. {103, 12.0, 12.0, 18.0, 18.0},
  109. {104, 20.0, 20.0, 25.0, 25.0}
  110. };
  111.  
  112. broadcastSpatialJoin(cities, trips);
  113.  
  114. return 0;
  115. }
Success #stdin #stdout 0.01s 5288KB
stdin
Standard input is empty
stdout
--- Spatial Join Results ---
CityID	TripID
1	101
1	102
2	103