Error Handling Strategy

Why Error Handling Matters

Proper error handling ensures you never lose monitoring data, even when networks fail or APIs are unavailable

Key Principles

Never let monitoring failures affect your main application logic
Save failed payloads locally for later retry
Implement exponential backoff for retries
Log errors for debugging but don't crash on monitoring failures

Offline Mode & Failed Payload Storage

Python Implementation

Store failed payloads locally and retry later

import os
import json
import time
import requests
from datetime import datetime

class SEERClient:
    def __init__(self, api_key, offline_dir="failed_payloads"):
        self.api_key = api_key
        self.offline_dir = offline_dir
        self.base_url = "https://api.seer.ansrstudio.com"
        
        # Create offline directory if it doesn't exist
        if not os.path.exists(self.offline_dir):
            os.makedirs(self.offline_dir)
    
    def _save_offline(self, endpoint, payload):
        """Save failed payload to disk for later retry"""
        timestamp = int(time.time() * 1000)
        filename = f"{endpoint}_{timestamp}.json"
        filepath = os.path.join(self.offline_dir, filename)
        
        with open(filepath, "w") as f:
            json.dump(payload, f)
        
        print(f"Saved failed payload to {filepath}")
    
    def send_monitoring_data(self, payload):
        """Send monitoring data with offline fallback"""
        try:
            response = requests.post(
                f"{self.base_url}/monitoring",
                headers={
                    "Authorization": self.api_key,
                    "Content-Type": "application/json"
                },
                json=payload,
                timeout=10
            )
            response.raise_for_status()
            return True
        except Exception as e:
            print(f"Failed to send monitoring data: {e}")
            self._save_offline("monitoring", payload)
            return False
    
    def send_heartbeat(self, pipeline_id):
        """Send heartbeat with offline fallback"""
        payload = {"pipeline_id": pipeline_id}
        try:
            response = requests.post(
                f"{self.base_url}/heartbeat",
                headers={
                    "Authorization": self.api_key,
                    "Content-Type": "application/json"
                },
                json=payload,
                timeout=10
            )
            response.raise_for_status()
            return True
        except Exception as e:
            print(f"Failed to send heartbeat: {e}")
            self._save_offline("heartbeat", payload)
            return False

Replaying Failed Payloads

Automatic Retry Logic

Replay all saved payloads when connectivity is restored

def replay_failed_payloads(api_key):
    """Replay all failed payloads from offline storage"""
    temp_dir = os.path.join(os.path.dirname(__file__), "failed_payloads")
    
    if not os.path.exists(temp_dir):
        return
    
    replayed = 0
    failed = 0
    
    for filename in os.listdir(temp_dir):
        filepath = os.path.join(temp_dir, filename)
        
        try:
            with open(filepath, "r") as f:
                payload = json.load(f)
            
            headers = {
                "Authorization": api_key,
                "Content-Type": "application/json"
            }
            
            # Determine endpoint from filename
            if "monitoring" in filename:
                url = "https://api.seer.ansrstudio.com/monitoring"
            elif "heartbeat" in filename:
                url = "https://api.seer.ansrstudio.com/heartbeat"
            else:
                continue
            
            # Attempt to send
            response = requests.post(url, headers=headers, json=payload, timeout=10)
            response.raise_for_status()
            
            # Success - remove the file
            os.remove(filepath)
            replayed += 1
            print(f"✓ Replayed {filename}")
            
        except Exception as e:
            failed += 1
            print(f"✗ Failed to replay {filename}: {e}")
            # Leave the file for next retry
    
    print(f"Replay complete: {replayed} succeeded, {failed} failed")

# Usage: Run this periodically or at script startup
replay_failed_payloads("your_api_key_here")

Best Practices Checklist

Wrap API calls in try-catch blocks
Never let monitoring failures crash your application
Use timeouts on all requests
Set reasonable timeouts (5-10 seconds) to prevent hanging
Implement exponential backoff
Wait progressively longer between retry attempts
Store failed payloads locally
Save to disk for later replay when connectivity returns
Log errors for debugging
Keep logs of failures to identify patterns
Test failure scenarios
Simulate network failures to verify your error handling works

Error Handling Best Practices

Error Handling Strategy

Key Principles

Offline Mode & Failed Payload Storage

Replaying Failed Payloads

Best Practices Checklist