WebsiteTemplate/api/analytics.php
2026-01-25 11:33:37 -04:00

337 lines
12 KiB
PHP
Executable File

<?php
// Timezone: the eternal struggle between server time and human sanity
date_default_timezone_set('UTC'); // Adjust to your server's actual timezone
header('Content-Type: application/json');
// Paranoid? Maybe. Secure? Definitely.
// Nginx filters IPs, but we're not taking chances with your data
$allowedIPs = ['127.0.0.1', '::1'];
$clientIP = $_SERVER['REMOTE_ADDR'] ?? '';
// Math! It's what separates us from the script kiddies
function ipInRange($ip, $range) {
list($subnet, $mask) = explode('/', $range);
$ipLong = ip2long($ip);
$subnetLong = ip2long($subnet);
$maskLong = -1 << (32 - (int)$mask);
return ($ipLong & $maskLong) === ($subnetLong & $maskLong);
}
$isLocal = in_array($clientIP, $allowedIPs) ||
(filter_var($clientIP, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE) === false) ||
ipInRange($clientIP, '10.0.0.0/8') ||
ipInRange($clientIP, '172.16.0.0/12') ||
ipInRange($clientIP, '192.168.0.0/16');
if (!$isLocal) {
http_response_code(403);
echo json_encode(['error' => 'Access denied']); // No peeking, strangers
exit;
}
$date = isset($_GET['date']) ? $_GET['date'] : date('Y-m-d');
$endDate = isset($_GET['endDate']) ? $_GET['endDate'] : null;
$dataDir = __DIR__ . '/../data/analytics';
$reactionsDir = __DIR__ . '/../data/reactions';
// Date range wizard - conjures dates out of thin air
function getDatesInRange($startDate, $endDate) {
$dates = [];
$current = strtotime($startDate);
$end = strtotime($endDate);
while ($current <= $end) {
$dates[] = date('Y-m-d', $current);
$current = strtotime('+1 day', $current);
}
return $dates;
}
// Load summary data - aggregate if date range is provided
$data = [
'total' => 0,
'new' => 0,
'returning' => 0,
'byHour' => array_fill(0, 24, 0),
'shares' => ['mastodon' => 0, 'bluesky' => 0, 'copy' => 0],
'rss' => 0
];
if ($endDate && $endDate >= $date) {
// Date range mode - aggregate data from multiple days
$dates = getDatesInRange($date, $endDate);
$allVisits = [];
foreach ($dates as $dayDate) {
$filename = $dataDir . '/summary_' . $dayDate . '.json';
if (file_exists($filename)) {
$dayData = json_decode(file_get_contents($filename), true);
if ($dayData) {
// Aggregate totals
$data['total'] += $dayData['total'] ?? 0;
$data['new'] += $dayData['new'] ?? 0;
$data['returning'] += $dayData['returning'] ?? 0;
// Aggregate shares
if (isset($dayData['shares'])) {
$data['shares']['mastodon'] += $dayData['shares']['mastodon'] ?? 0;
$data['shares']['bluesky'] += $dayData['shares']['bluesky'] ?? 0;
$data['shares']['copy'] += $dayData['shares']['copy'] ?? 0;
}
// Aggregate hourly data
if (isset($dayData['byHour']) && is_array($dayData['byHour'])) {
for ($i = 0; $i < 24; $i++) {
$data['byHour'][$i] += $dayData['byHour'][$i] ?? 0;
}
}
// Collect visits for recent visitors
$visitsFile = $dataDir . '/visits_' . $dayDate . '.json';
if (file_exists($visitsFile)) {
$dayVisits = json_decode(file_get_contents($visitsFile), true) ?: [];
$allVisits = array_merge($allVisits, $dayVisits);
}
}
} else {
// Load visits even if summary doesn't exist
$visitsFile = $dataDir . '/visits_' . $dayDate . '.json';
if (file_exists($visitsFile)) {
$dayVisits = json_decode(file_get_contents($visitsFile), true) ?: [];
$allVisits = array_merge($allVisits, $dayVisits);
}
}
}
// Recalculate byHour from actual timestamps for the range
$recalculatedByHour = array_fill(0, 24, 0);
foreach ($allVisits as $visit) {
if (isset($visit['type']) && $visit['type'] === 'pageview' && isset($visit['timestamp'])) {
$hour = (int)date('H', $visit['timestamp']);
if ($hour >= 0 && $hour < 24) {
$recalculatedByHour[$hour]++;
}
}
}
$data['byHour'] = $recalculatedByHour;
// Use all visits for recent visitors
$visits = $allVisits;
} else {
// Single date mode
$filename = $dataDir . '/summary_' . $date . '.json';
if (file_exists($filename)) {
$data = json_decode(file_get_contents($filename), true);
}
// Load visits file for this date
$visitsFile = $dataDir . '/visits_' . $date . '.json';
$visits = [];
if (file_exists($visitsFile)) {
$visits = json_decode(file_get_contents($visitsFile), true) ?: [];
}
// Recalculate byHour from actual timestamps
$recalculatedByHour = array_fill(0, 24, 0);
foreach ($visits as $visit) {
if (isset($visit['type']) && $visit['type'] === 'pageview' && isset($visit['timestamp'])) {
$hour = (int)date('H', $visit['timestamp']);
if ($hour >= 0 && $hour < 24) {
$recalculatedByHour[$hour]++;
}
}
}
$data['byHour'] = $recalculatedByHour;
}
// Ensure RSS tracking exists
if (!isset($data['rss'])) {
$data['rss'] = 0;
}
// Calculate active RSS subscribers (readers who fetched in last 7 days)
$readersFile = $dataDir . '/rss_readers.json';
$activeRssSubscribers = 0;
if (file_exists($readersFile)) {
$readers = json_decode(file_get_contents($readersFile), true) ?: [];
$sevenDaysAgo = time() - (7 * 24 * 60 * 60);
foreach ($readers as $reader) {
if (isset($reader['lastFetch']) && $reader['lastFetch'] >= $sevenDaysAgo) {
$activeRssSubscribers++;
}
}
}
$data['activeRssSubscribers'] = $activeRssSubscribers;
// Get 10 most recent unique IP addresses with geolocation
function getGeolocation($ip) {
// Skip private/local IPs
if (filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE) === false) {
return ['country' => 'Local', 'city' => 'Private Network', 'isp' => ''];
}
// Use ip-api.com (free, no API key needed, 45 requests/minute limit)
$url = "http://ip-api.com/json/{$ip}?fields=status,country,countryCode,city,region,isp,query";
// Use file_get_contents with stream context
$context = stream_context_create([
'http' => [
'timeout' => 2,
'method' => 'GET',
'header' => 'User-Agent: Analytics/1.0'
]
]);
$response = @file_get_contents($url, false, $context);
if ($response) {
$geo = json_decode($response, true);
if ($geo && isset($geo['status']) && $geo['status'] === 'success') {
return [
'country' => $geo['country'] ?? 'Unknown',
'countryCode' => $geo['countryCode'] ?? '',
'city' => $geo['city'] ?? 'Unknown',
'region' => $geo['region'] ?? '',
'isp' => $geo['isp'] ?? 'Unknown',
'ip' => $geo['query'] ?? $ip
];
}
}
return ['country' => 'Unknown', 'city' => 'Unknown', 'isp' => 'Unknown', 'ip' => $ip];
}
// Get unique IPs with their most recent visit timestamp
$ipVisits = [];
foreach ($visits as $visit) {
$ip = $visit['ip'] ?? '';
if ($ip && !isset($ipVisits[$ip])) {
$ipVisits[$ip] = [
'ip' => $ip,
'timestamp' => $visit['timestamp'] ?? 0,
'page' => $visit['page'] ?? '',
'userAgent' => $visit['userAgent'] ?? ''
];
} elseif ($ip && isset($ipVisits[$ip])) {
// Keep the most recent timestamp
if (($visit['timestamp'] ?? 0) > $ipVisits[$ip]['timestamp']) {
$ipVisits[$ip]['timestamp'] = $visit['timestamp'] ?? 0;
$ipVisits[$ip]['page'] = $visit['page'] ?? '';
}
}
}
// Sort by timestamp (most recent first) and get top 10
usort($ipVisits, function($a, $b) {
return $b['timestamp'] - $a['timestamp'];
});
$recentIPs = array_slice($ipVisits, 0, 10);
// Get geolocation for each IP (with caching to avoid rate limits)
$geoCacheFile = $dataDir . '/geo_cache.json';
$geoCache = [];
if (file_exists($geoCacheFile)) {
$geoCache = json_decode(file_get_contents($geoCacheFile), true) ?: [];
}
$recentVisitors = [];
foreach ($recentIPs as $ipVisit) {
$ip = $ipVisit['ip'];
// Check cache first (cache expires after 24 hours)
$geo = null;
if (isset($geoCache[$ip]) && (time() - $geoCache[$ip]['cached_at']) < 86400) {
$geo = $geoCache[$ip]['data'];
} else {
$geo = getGeolocation($ip);
$geoCache[$ip] = ['data' => $geo, 'cached_at' => time()];
// Small delay to respect API rate limits
usleep(200000); // 0.2 second delay
}
$recentVisitors[] = [
'ip' => $ip,
'timestamp' => $ipVisit['timestamp'],
'time' => gmdate('Y-m-d\TH:i:s\Z', $ipVisit['timestamp']), // ISO 8601 UTC format for frontend conversion
'page' => $ipVisit['page'],
'country' => $geo['country'] ?? 'Unknown',
'countryCode' => $geo['countryCode'] ?? '',
'city' => $geo['city'] ?? 'Unknown',
'region' => $geo['region'] ?? '',
'isp' => $geo['isp'] ?? 'Unknown'
];
}
// Save updated cache
if (count($geoCache) > 0) {
file_put_contents($geoCacheFile, json_encode($geoCache, JSON_PRETTY_PRINT));
}
$data['recentVisitors'] = $recentVisitors;
// Ensure shares structure exists and normalize old platform names
if (!isset($data['shares'])) {
$data['shares'] = ['mastodon' => 0, 'bluesky' => 0, 'copy' => 0];
} else {
// Normalize old platform names to new ones
$normalizedShares = ['mastodon' => 0, 'bluesky' => 0, 'copy' => 0];
foreach ($data['shares'] as $platform => $count) {
if ($platform === 'twitter' || $platform === 'mastodon') {
$normalizedShares['mastodon'] += $count;
} else if ($platform === 'facebook' || $platform === 'bluesky') {
$normalizedShares['bluesky'] += $count;
} else if ($platform === 'copy') {
$normalizedShares['copy'] += $count;
}
}
$data['shares'] = $normalizedShares;
}
// Load blog posts and their stats
$postsFile = __DIR__ . '/../blog/data/posts.json';
$posts = [];
if (file_exists($postsFile)) {
$posts = json_decode(file_get_contents($postsFile), true) ?: [];
}
// Get reactions and shares for each post
$blogStats = [];
foreach ($posts as $post) {
$postId = $post['id'];
$reactionFile = $reactionsDir . '/post_' . $postId . '.json';
$reactions = ['like' => 0, 'love' => 0, 'helpful' => 0];
if (file_exists($reactionFile)) {
$reactionData = json_decode(file_get_contents($reactionFile), true);
if (isset($reactionData['counts'])) {
$reactions = $reactionData['counts'];
}
}
// Get shares for this post from visits data (already loaded, includes date range)
$postShares = ['mastodon' => 0, 'bluesky' => 0, 'copy' => 0];
foreach ($visits as $visit) {
if (isset($visit['type']) && $visit['type'] === 'share' &&
isset($visit['page']) && strpos($visit['page'], '#post-' . $postId) !== false) {
$platform = $visit['platform'] ?? 'copy';
if (isset($postShares[$platform])) {
$postShares[$platform]++;
}
}
}
$blogStats[] = [
'id' => $postId,
'title' => $post['title'],
'reactions' => $reactions,
'shares' => $postShares,
'totalReactions' => array_sum($reactions),
'totalShares' => array_sum($postShares)
];
}
$data['blogPosts'] = $blogStats;
echo json_encode(['success' => true, 'data' => $data]);
?>