#!/usr/bin/env php = 0 && $hour < 24) { $byHour[$hour]++; } } } // 4. Count shares $shares = ['mastodon' => 0, 'bluesky' => 0, 'copy' => 0]; foreach ($visits as $visit) { if ($visit['type'] === 'share' && isset($visit['platform'])) { $platform = $visit['platform']; if (isset($shares[$platform])) { $shares[$platform]++; } } } // 5. Count RSS clicks $rssClicks = 0; foreach ($visits as $visit) { if ($visit['type'] === 'rss_click') { $rssClicks++; } } // 6. Identify potential issues $issues = []; // Check for duplicate pageviews (same visitor, same page, within 5 seconds) $duplicates = []; foreach ($pageviews as $i => $visit1) { foreach ($pageviews as $j => $visit2) { if ($i < $j && $visit1['visitorId'] === $visit2['visitorId'] && $visit1['page'] === $visit2['page'] && abs($visit1['timestamp'] - $visit2['timestamp']) < 5) { $duplicates[] = [ 'visitor' => $visit1['visitorId'], 'page' => $visit1['page'], 'time1' => date('H:i:s', $visit1['timestamp']), 'time2' => date('H:i:s', $visit2['timestamp']) ]; } } } if (count($duplicates) > 0) { $issues[] = "Found " . count($duplicates) . " potential duplicate pageviews (same visitor, same page, <5s apart)"; } // Check for suspicious user agents (common bots) $botPatterns = [ '/bot/i', '/crawler/i', '/spider/i', '/scraper/i', '/google/i', '/bing/i', '/yahoo/i', '/duckduckbot/i', '/facebookexternalhit/i', '/twitterbot/i', '/linkedinbot/i' ]; $botCount = 0; $botVisitors = []; foreach ($visits as $visit) { $ua = $visit['userAgent'] ?? ''; foreach ($botPatterns as $pattern) { if (preg_match($pattern, $ua)) { $botCount++; $botVisitors[$visit['visitorId']] = true; break; } } } if ($botCount > 0) { $issues[] = "Found {$botCount} visits from potential bots/crawlers (" . count($botVisitors) . " unique visitors)"; } // Check for rapid-fire visits (potential scripted access) $rapidVisits = []; $visitorTimestamps = []; foreach ($pageviews as $visit) { $vid = $visit['visitorId']; if (!isset($visitorTimestamps[$vid])) { $visitorTimestamps[$vid] = []; } $visitorTimestamps[$vid][] = $visit['timestamp']; } foreach ($visitorTimestamps as $vid => $timestamps) { sort($timestamps); for ($i = 1; $i < count($timestamps); $i++) { $diff = $timestamps[$i] - $timestamps[$i-1]; if ($diff < 2) { // Less than 2 seconds between pageviews $rapidVisits[$vid] = ($rapidVisits[$vid] ?? 0) + 1; } } } if (count($rapidVisits) > 0) { $issues[] = "Found " . count($rapidVisits) . " visitors with rapid-fire pageviews (<2s apart)"; } // Check summary vs raw data discrepancies $discrepancies = []; if ($summary['total'] != $pageviewCount) { $discrepancies[] = sprintf( "Total visits mismatch: Summary=%d, Raw count=%d (diff: %+d)", $summary['total'], $pageviewCount, $summary['total'] - $pageviewCount ); } if ($summary['new'] != $newCount) { $discrepancies[] = sprintf( "New visitors mismatch: Summary=%d, Raw count=%d (diff: %+d)", $summary['new'], $newCount, $summary['new'] - $newCount ); } if ($summary['returning'] != $returningCount) { $discrepancies[] = sprintf( "Returning visitors mismatch: Summary=%d, Raw count=%d (diff: %+d)", $summary['returning'], $returningCount, $summary['returning'] - $returningCount ); } // Compare hourly data $hourlyDiff = false; for ($h = 0; $h < 24; $h++) { if ($summary['byHour'][$h] != $byHour[$h]) { $hourlyDiff = true; break; } } if ($hourlyDiff) { $discrepancies[] = "Hourly distribution differs between summary and raw data"; } // Compare shares foreach (['mastodon', 'bluesky', 'copy'] as $platform) { $summaryShares = $summary['shares'][$platform] ?? 0; $rawShares = $shares[$platform] ?? 0; if ($summaryShares != $rawShares) { $discrepancies[] = sprintf( "Shares ({$platform}) mismatch: Summary=%d, Raw count=%d (diff: %+d)", $summaryShares, $rawShares, $summaryShares - $rawShares ); } } // Compare RSS $summaryRss = $summary['rss'] ?? 0; if ($summaryRss != $rssClicks) { $discrepancies[] = sprintf( "RSS clicks mismatch: Summary=%d, Raw count=%d (diff: %+d)", $summaryRss, $rssClicks, $summaryRss - $rssClicks ); } // Display results echo "SUMMARY DATA:\n"; echo " Total visits: " . ($summary['total'] ?? 0) . "\n"; echo " New visitors: " . ($summary['new'] ?? 0) . "\n"; echo " Returning visitors: " . ($summary['returning'] ?? 0) . "\n"; echo " RSS clicks: " . ($summary['rss'] ?? 0) . "\n"; echo " Shares: Mastodon=" . ($summary['shares']['mastodon'] ?? 0) . ", Bluesky=" . ($summary['shares']['bluesky'] ?? 0) . ", Copy=" . ($summary['shares']['copy'] ?? 0) . "\n\n"; echo "RAW DATA COUNT:\n"; echo " Total pageviews: {$pageviewCount}\n"; echo " Unique new visitors: {$newCount}\n"; echo " Unique returning visitors: {$returningCount}\n"; echo " RSS clicks: {$rssClicks}\n"; echo " Shares: Mastodon={$shares['mastodon']}, Bluesky={$shares['bluesky']}, Copy={$shares['copy']}\n"; echo " Total visits (all types): " . count($visits) . "\n\n"; if (count($discrepancies) > 0) { echo "⚠️ DISCREPANCIES FOUND:\n"; foreach ($discrepancies as $disc) { echo " - {$disc}\n"; } echo "\n"; } else { echo "✓ Summary and raw data match!\n\n"; } if (count($issues) > 0) { echo "⚠️ POTENTIAL ISSUES:\n"; foreach ($issues as $issue) { echo " - {$issue}\n"; } echo "\n"; } else { echo "✓ No obvious issues detected.\n\n"; } // Show top visitors echo "TOP VISITORS (by pageview count):\n"; $visitorCounts = []; foreach ($pageviews as $visit) { $vid = $visit['visitorId']; $visitorCounts[$vid] = ($visitorCounts[$vid] ?? 0) + 1; } arsort($visitorCounts); $topVisitors = array_slice($visitorCounts, 0, 10, true); foreach ($topVisitors as $vid => $count) { $firstVisit = null; foreach ($pageviews as $v) { if ($v['visitorId'] === $vid) { $firstVisit = $v; break; } } $ua = substr($firstVisit['userAgent'] ?? 'Unknown', 0, 50); echo sprintf(" %s: %d pageviews (UA: %s...)\n", substr($vid, 0, 30), $count, $ua); } echo "\n"; // Show hourly breakdown echo "HOURLY BREAKDOWN (from raw data):\n"; for ($h = 0; $h < 24; $h++) { $count = $byHour[$h]; $bar = str_repeat('█', min(50, (int)($count / max(1, max($byHour)) * 50))); echo sprintf(" %02d:00 %5d %s\n", $h, $count, $bar); } echo "\n"; // Accuracy notes echo "ACCURACY CONSIDERATIONS:\n"; echo " ✓ Data is recalculated from raw timestamps (hourly stats are accurate)\n"; echo " ⚠ Bot traffic is NOT filtered (may inflate numbers)\n"; echo " ⚠ Ad blockers may prevent tracking (may deflate numbers)\n"; echo " ⚠ Self-visits are NOT filtered\n"; echo " ⚠ JavaScript-disabled browsers won't be tracked\n"; echo " ⚠ Privacy tools may block localStorage (affects visitor ID)\n"; echo " ⚠ New/Returning is calculated per-day, not across days\n"; echo " ⚠ Multiple tabs/devices = multiple visitors\n"; echo "\n"; ?>