""" Deduplication service Implements deduplication strategy based on commit hash + branch """ import asyncio import hashlib import json from typing import Optional, Dict, Any from datetime import datetime, timedelta import structlog from redis import asyncio as aioredis from app.config import get_settings logger = structlog.get_logger() class DeduplicationService: """Deduplication service""" def __init__(self, redis_client: aioredis.Redis): self.redis = redis_client self.settings = get_settings() self.cache_prefix = "webhook:dedup:" async def is_duplicate(self, dedup_key: str) -> bool: """ Check if the event is a duplicate Args: dedup_key: deduplication key (commit_hash:branch) Returns: bool: True if duplicate, False if new event """ if not self.settings.deduplication.enabled: return False try: cache_key = f"{self.cache_prefix}{dedup_key}" # Check if in cache exists = await self.redis.exists(cache_key) if exists: logger.info("Duplicate event detected", dedup_key=dedup_key) return True # Record new event await self._record_event(cache_key, dedup_key) logger.info("New event recorded", dedup_key=dedup_key) return False except Exception as e: logger.error("Error checking duplication", dedup_key=dedup_key, error=str(e)) # Allow through on error to avoid blocking return False async def _record_event(self, cache_key: str, dedup_key: str): """Record event to cache""" try: # Set cache, TTL is deduplication window ttl = self.settings.deduplication.cache_ttl await self.redis.setex(cache_key, ttl, json.dumps({ "dedup_key": dedup_key, "timestamp": datetime.utcnow().isoformat(), "ttl": ttl })) # Also record to window cache window_key = f"{self.cache_prefix}window:{dedup_key}" window_ttl = self.settings.deduplication.window_seconds await self.redis.setex(window_key, window_ttl, "1") except Exception as e: logger.error("Error recording event", cache_key=cache_key, error=str(e)) async def get_event_info(self, dedup_key: str) -> Optional[Dict[str, Any]]: """Get event info""" try: cache_key = f"{self.cache_prefix}{dedup_key}" data = await self.redis.get(cache_key) if data: return json.loads(data) return None except Exception as e: logger.error("Error getting event info", dedup_key=dedup_key, error=str(e)) return None async def clear_event(self, dedup_key: str) -> bool: """Clear event record""" try: cache_key = f"{self.cache_prefix}{dedup_key}" window_key = f"{self.cache_prefix}window:{dedup_key}" # Delete both cache keys await self.redis.delete(cache_key, window_key) logger.info("Event cleared", dedup_key=dedup_key) return True except Exception as e: logger.error("Error clearing event", dedup_key=dedup_key, error=str(e)) return False async def get_stats(self) -> Dict[str, Any]: """Get deduplication statistics""" try: # Get all deduplication keys pattern = f"{self.cache_prefix}*" keys = await self.redis.keys(pattern) # Count different types of keys total_keys = len(keys) window_keys = len([k for k in keys if b"window:" in k]) event_keys = total_keys - window_keys # Get config info config = { "enabled": self.settings.deduplication.enabled, "window_seconds": self.settings.deduplication.window_seconds, "cache_ttl": self.settings.deduplication.cache_ttl, "strategy": self.settings.deduplication.strategy } return { "total_keys": total_keys, "window_keys": window_keys, "event_keys": event_keys, "config": config, "timestamp": datetime.utcnow().isoformat() } except Exception as e: logger.error("Error getting deduplication stats", error=str(e)) return {"error": str(e)} async def cleanup_expired_events(self) -> int: """Clean up expired events""" try: pattern = f"{self.cache_prefix}*" keys = await self.redis.keys(pattern) cleaned_count = 0 for key in keys: # Check TTL ttl = await self.redis.ttl(key) if ttl <= 0: await self.redis.delete(key) cleaned_count += 1 if cleaned_count > 0: logger.info("Cleaned up expired events", count=cleaned_count) return cleaned_count except Exception as e: logger.error("Error cleaning up expired events", error=str(e)) return 0 def generate_dedup_key(self, commit_hash: str, branch: str) -> str: """ Generate deduplication key Args: commit_hash: commit hash branch: branch name Returns: str: deduplication key """ if self.settings.deduplication.strategy == "commit_branch": return f"{commit_hash}:{branch}" elif self.settings.deduplication.strategy == "commit_only": return commit_hash elif self.settings.deduplication.strategy == "branch_only": return branch else: # Default use commit_hash:branch return f"{commit_hash}:{branch}" async def is_in_window(self, dedup_key: str) -> bool: """ Check if in deduplication time window Args: dedup_key: deduplication key Returns: bool: True if in window """ try: window_key = f"{self.cache_prefix}window:{dedup_key}" exists = await self.redis.exists(window_key) return bool(exists) except Exception as e: logger.error("Error checking window", dedup_key=dedup_key, error=str(e)) return False # Global deduplication service instance _dedup_service: Optional[DeduplicationService] = None def get_deduplication_service() -> DeduplicationService: """Get deduplication service instance""" global _dedup_service if _dedup_service is None: # Should get Redis client from dependency injection # In actual use, should be passed in raise RuntimeError("DeduplicationService not initialized") return _dedup_service def set_deduplication_service(service: DeduplicationService): """Set deduplication service instance""" global _dedup_service _dedup_service = service