fork download
  1. <?php
  2.  
  3. function curl_fetch($url) {
  4. $ch = curl_init($url);
  5. CURLOPT_RETURNTRANSFER => true,
  6. CURLOPT_FOLLOWLOCATION => true,
  7. CURLOPT_SSL_VERIFYPEER => false,
  8. CURLOPT_USERAGENT => "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
  9. CURLOPT_TIMEOUT => 10,
  10. ]);
  11.  
  12. $html = curl_exec($ch);
  13.  
  14. if (curl_errno($ch)) {
  15. echo "cURL error: " . curl_error($ch) . "\n";
  16. curl_close($ch);
  17. return false;
  18. }
  19.  
  20. $statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  21. curl_close($ch);
  22.  
  23. if ($statusCode !== 200) {
  24. echo "Failed to fetch: $url (HTTP $statusCode)\n";
  25. return false;
  26. }
  27.  
  28. return $html;
  29. }
  30.  
  31. function find_first_product_url($homepageHtml, $baseUrl) {
  32. $dom = new DOMDocument();
  33. @$dom->loadHTML($homepageHtml);
  34. $xpath = new DOMXPath($dom);
  35.  
  36. $links = $xpath->query("//a[contains(@href, '/products/')]");
  37. foreach ($links as $link) {
  38. $href = $link->getAttribute("href");
  39. if (strpos($href, '/products/') !== false) {
  40. // Normalize URL
  41. if (strpos($href, 'http') !== 0) {
  42. $href = rtrim($baseUrl, '/') . '/' . ltrim($href, '/');
  43. }
  44. return $href;
  45. }
  46. }
  47.  
  48. return null;
  49. }
  50.  
  51. function extract_clean_review_text($html) {
  52. $dom = new DOMDocument();
  53. @$dom->loadHTML($html);
  54.  
  55. $xpath = new DOMXPath($dom);
  56. $reviews = [];
  57.  
  58. // Known Shopify review selectors
  59. $selectors = [
  60. "//*[contains(@class, 'loox-review-content')]",
  61. "//*[contains(@class, 'jdgm-rev__body')]",
  62. "//*[contains(@class, 'yotpo-review-content')]",
  63. "//*[contains(@class, 'spr-review-content')]",
  64. "//*[contains(@class, 'review')]",
  65. "//*[contains(@class, 'customer-review')]"
  66. ];
  67.  
  68. foreach ($selectors as $selector) {
  69. $nodes = $xpath->query($selector);
  70. foreach ($nodes as $node) {
  71. $text = trim($node->textContent);
  72. if (!empty($text)) {
  73. $reviews[] = $text;
  74. }
  75. }
  76.  
  77. if (!empty($reviews)) {
  78. break;
  79. }
  80. }
  81.  
  82. return $reviews;
  83. }
  84.  
  85. // ---------------- MAIN SCRIPT ----------------
  86. $shopifyStore = "https://w...content-available-to-author-only...o.uk";
  87.  
  88. echo "Fetching homepage: $shopifyStore\n";
  89. $homeHtml = curl_fetch($shopifyStore);
  90. if (!$homeHtml) exit;
  91.  
  92. $productUrl = find_first_product_url($homeHtml, $shopifyStore);
  93. if (!$productUrl) {
  94. die("No product URLs found on the homepage.\n");
  95. }
  96.  
  97. echo "Found product URL: $productUrl\n";
  98. $productHtml = curl_fetch($productUrl);
  99. if (!$productHtml) exit;
  100.  
  101. $reviews = extract_clean_review_text($productHtml);
  102.  
  103. if (empty($reviews)) {
  104. echo "No reviews found on the product page.\n";
  105. } else {
  106. echo "Found " . count($reviews) . " reviews:\n\n";
  107. foreach ($reviews as $i => $review) {
  108. echo ($i + 1) . ". " . $review . "\n\n";
  109. }
  110. }
  111.  
Success #stdin #stdout 0.04s 26528KB
stdin
Standard input is empty
stdout
Fetching homepage: https://w...content-available-to-author-only...o.uk
cURL error: Could not resolve host: www.nakeddog.co.uk