← Back to Documentation

Error Handling Best Practices

Comprehensive strategies for handling errors and ensuring reliable monitoring

Error Handling Strategy

Why Error Handling Matters
Proper error handling ensures you never lose monitoring data, even when networks fail or APIs are unavailable

Key Principles

  • Never let monitoring failures affect your main application logic
  • Save failed payloads locally for later retry
  • Implement exponential backoff for retries
  • Log errors for debugging but don't crash on monitoring failures

Offline Mode & Failed Payload Storage

Python Implementation
Store failed payloads locally and retry later
import os
import json
import time
import requests
from datetime import datetime

class SEERClient:
    def __init__(self, api_key, offline_dir="failed_payloads"):
        self.api_key = api_key
        self.offline_dir = offline_dir
        self.base_url = "https://api.seer.ansrstudio.com"
        
        # Create offline directory if it doesn't exist
        if not os.path.exists(self.offline_dir):
            os.makedirs(self.offline_dir)
    
    def _save_offline(self, endpoint, payload):
        """Save failed payload to disk for later retry"""
        timestamp = int(time.time() * 1000)
        filename = f"{endpoint}_{timestamp}.json"
        filepath = os.path.join(self.offline_dir, filename)
        
        with open(filepath, "w") as f:
            json.dump(payload, f)
        
        print(f"Saved failed payload to {filepath}")
    
    def send_monitoring_data(self, payload):
        """Send monitoring data with offline fallback"""
        try:
            response = requests.post(
                f"{self.base_url}/monitoring",
                headers={
                    "Authorization": self.api_key,
                    "Content-Type": "application/json"
                },
                json=payload,
                timeout=10
            )
            response.raise_for_status()
            return True
        except Exception as e:
            print(f"Failed to send monitoring data: {e}")
            self._save_offline("monitoring", payload)
            return False
    
    def send_heartbeat(self, pipeline_id):
        """Send heartbeat with offline fallback"""
        payload = {"pipeline_id": pipeline_id}
        try:
            response = requests.post(
                f"{self.base_url}/heartbeat",
                headers={
                    "Authorization": self.api_key,
                    "Content-Type": "application/json"
                },
                json=payload,
                timeout=10
            )
            response.raise_for_status()
            return True
        except Exception as e:
            print(f"Failed to send heartbeat: {e}")
            self._save_offline("heartbeat", payload)
            return False

Replaying Failed Payloads

Automatic Retry Logic
Replay all saved payloads when connectivity is restored
def replay_failed_payloads(api_key):
    """Replay all failed payloads from offline storage"""
    temp_dir = os.path.join(os.path.dirname(__file__), "failed_payloads")
    
    if not os.path.exists(temp_dir):
        return
    
    replayed = 0
    failed = 0
    
    for filename in os.listdir(temp_dir):
        filepath = os.path.join(temp_dir, filename)
        
        try:
            with open(filepath, "r") as f:
                payload = json.load(f)
            
            headers = {
                "Authorization": api_key,
                "Content-Type": "application/json"
            }
            
            # Determine endpoint from filename
            if "monitoring" in filename:
                url = "https://api.seer.ansrstudio.com/monitoring"
            elif "heartbeat" in filename:
                url = "https://api.seer.ansrstudio.com/heartbeat"
            else:
                continue
            
            # Attempt to send
            response = requests.post(url, headers=headers, json=payload, timeout=10)
            response.raise_for_status()
            
            # Success - remove the file
            os.remove(filepath)
            replayed += 1
            print(f"✓ Replayed {filename}")
            
        except Exception as e:
            failed += 1
            print(f"✗ Failed to replay {filename}: {e}")
            # Leave the file for next retry
    
    print(f"Replay complete: {replayed} succeeded, {failed} failed")

# Usage: Run this periodically or at script startup
replay_failed_payloads("your_api_key_here")

Best Practices Checklist

  • Wrap API calls in try-catch blocks

    Never let monitoring failures crash your application

  • Use timeouts on all requests

    Set reasonable timeouts (5-10 seconds) to prevent hanging

  • Implement exponential backoff

    Wait progressively longer between retry attempts

  • Store failed payloads locally

    Save to disk for later replay when connectivity returns

  • Log errors for debugging

    Keep logs of failures to identify patterns

  • Test failure scenarios

    Simulate network failures to verify your error handling works