#!/usr/bin/env python3 """ Batch Operations Service This service provides efficient batch operations for soft deletion and recovery of multiple shots and assets. It optimizes database operations by using bulk updates and proper transaction management. """ from datetime import datetime from typing import List, Dict, Any, Optional, Tuple from sqlalchemy.orm import Session from sqlalchemy.exc import SQLAlchemyError from sqlalchemy import func, and_, or_ import sys import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from models.shot import Shot from models.asset import Asset from models.task import Task, Submission, Review, ProductionNote, TaskAttachment from models.user import User from models.activity import Activity, ActivityType from services.shot_soft_deletion import ShotSoftDeletionService, DeletionResult from services.asset_soft_deletion import AssetSoftDeletionService, AssetDeletionResult from services.recovery_service import RecoveryService, RecoveryResult class BatchDeletionItem: """Information about an item in a batch deletion.""" def __init__(self): self.id: int = 0 self.name: str = "" self.type: str = "" # 'shot' or 'asset' self.success: bool = False self.error: Optional[str] = None self.deleted_counts: Dict[str, int] = {} class BatchDeletionResult: """Result of a batch deletion operation.""" def __init__(self): self.total_items: int = 0 self.successful_deletions: int = 0 self.failed_deletions: int = 0 self.operation_duration: float = 0.0 self.items: List[BatchDeletionItem] = [] # Aggregate counts self.total_deleted_tasks: int = 0 self.total_deleted_submissions: int = 0 self.total_deleted_attachments: int = 0 self.total_deleted_notes: int = 0 self.total_deleted_reviews: int = 0 class BatchRecoveryItem: """Information about an item in a batch recovery.""" def __init__(self): self.id: int = 0 self.name: str = "" self.type: str = "" # 'shot' or 'asset' self.success: bool = False self.error: Optional[str] = None self.recovered_counts: Dict[str, int] = {} class BatchRecoveryResult: """Result of a batch recovery operation.""" def __init__(self): self.total_items: int = 0 self.successful_recoveries: int = 0 self.failed_recoveries: int = 0 self.operation_duration: float = 0.0 self.items: List[BatchRecoveryItem] = [] # Aggregate counts self.total_recovered_tasks: int = 0 self.total_recovered_submissions: int = 0 self.total_recovered_attachments: int = 0 self.total_recovered_notes: int = 0 self.total_recovered_reviews: int = 0 class BatchOperationsService: """Service for efficient batch operations on shots and assets.""" def __init__(self): self.shot_deletion_service = ShotSoftDeletionService() self.asset_deletion_service = AssetSoftDeletionService() self.recovery_service = RecoveryService() def batch_delete_shots(self, shot_ids: List[int], db: Session, current_user: User, batch_size: int = 50) -> BatchDeletionResult: """ Efficiently delete multiple shots in batches. Args: shot_ids: List of shot IDs to delete db: Database session current_user: User performing the deletion batch_size: Number of shots to process in each batch Returns: BatchDeletionResult with operation details """ start_time = datetime.utcnow() result = BatchDeletionResult() result.total_items = len(shot_ids) try: # Process shots in batches to avoid memory issues for i in range(0, len(shot_ids), batch_size): batch_shot_ids = shot_ids[i:i + batch_size] # Process each shot in the batch for shot_id in batch_shot_ids: item = BatchDeletionItem() item.id = shot_id item.type = "shot" try: # Get deletion info first deletion_info = self.shot_deletion_service.get_deletion_info(shot_id, db) if not deletion_info: item.success = False item.error = "Shot not found or already deleted" result.failed_deletions += 1 result.items.append(item) continue # Perform deletion manually without nested transaction deleted_at = datetime.utcnow() # Get the shot shot = db.query(Shot).filter( Shot.id == shot_id, Shot.deleted_at.is_(None) ).first() if not shot: item.success = False item.error = "Shot not found or already deleted" result.failed_deletions += 1 result.items.append(item) continue # Mark related data as deleted tasks = db.query(Task).filter( Task.shot_id == shot_id, Task.deleted_at.is_(None) ).all() task_ids = [task.id for task in tasks] if tasks else [] # Update tasks task_update_count = db.query(Task).filter( Task.shot_id == shot_id, Task.deleted_at.is_(None) ).update({ Task.deleted_at: deleted_at, Task.deleted_by: current_user.id }, synchronize_session=False) # Update related data submission_update_count = 0 attachment_update_count = 0 note_update_count = 0 review_update_count = 0 if task_ids: submission_update_count = db.query(Submission).filter( Submission.task_id.in_(task_ids), Submission.deleted_at.is_(None) ).update({ Submission.deleted_at: deleted_at, Submission.deleted_by: current_user.id }, synchronize_session=False) attachment_update_count = db.query(TaskAttachment).filter( TaskAttachment.task_id.in_(task_ids), TaskAttachment.deleted_at.is_(None) ).update({ TaskAttachment.deleted_at: deleted_at, TaskAttachment.deleted_by: current_user.id }, synchronize_session=False) note_update_count = db.query(ProductionNote).filter( ProductionNote.task_id.in_(task_ids), ProductionNote.deleted_at.is_(None) ).update({ ProductionNote.deleted_at: deleted_at, ProductionNote.deleted_by: current_user.id }, synchronize_session=False) # Update reviews submission_ids = [s.id for s in db.query(Submission.id).filter( Submission.task_id.in_(task_ids) ).all()] if submission_ids: review_update_count = db.query(Review).filter( Review.submission_id.in_(submission_ids), Review.deleted_at.is_(None) ).update({ Review.deleted_at: deleted_at, Review.deleted_by: current_user.id }, synchronize_session=False) # Mark the shot as deleted shot.deleted_at = deleted_at shot.deleted_by = current_user.id db.flush() # Success item.success = True item.name = shot.name item.deleted_counts = { 'tasks': task_update_count, 'submissions': submission_update_count, 'attachments': attachment_update_count, 'notes': note_update_count, 'reviews': review_update_count } result.successful_deletions += 1 result.total_deleted_tasks += task_update_count result.total_deleted_submissions += submission_update_count result.total_deleted_attachments += attachment_update_count result.total_deleted_notes += note_update_count result.total_deleted_reviews += review_update_count except Exception as e: item.success = False item.error = str(e) result.failed_deletions += 1 result.items.append(item) # Commit batch to avoid long-running transactions db.commit() except Exception as e: db.rollback() # Mark remaining items as failed for shot_id in shot_ids[len(result.items):]: item = BatchDeletionItem() item.id = shot_id item.type = "shot" item.success = False item.error = f"Batch operation failed: {str(e)}" result.items.append(item) result.failed_deletions += 1 # Calculate operation duration end_time = datetime.utcnow() result.operation_duration = (end_time - start_time).total_seconds() return result def batch_delete_assets(self, asset_ids: List[int], db: Session, current_user: User, batch_size: int = 50) -> BatchDeletionResult: """ Efficiently delete multiple assets in batches. Args: asset_ids: List of asset IDs to delete db: Database session current_user: User performing the deletion batch_size: Number of assets to process in each batch Returns: BatchDeletionResult with operation details """ start_time = datetime.utcnow() result = BatchDeletionResult() result.total_items = len(asset_ids) try: # Process assets in batches to avoid memory issues for i in range(0, len(asset_ids), batch_size): batch_asset_ids = asset_ids[i:i + batch_size] # Process each asset in the batch for asset_id in batch_asset_ids: item = BatchDeletionItem() item.id = asset_id item.type = "asset" try: # Get deletion info first deletion_info = self.asset_deletion_service.get_deletion_info(asset_id, db) if not deletion_info: item.success = False item.error = "Asset not found or already deleted" result.failed_deletions += 1 result.items.append(item) continue # Perform deletion manually without nested transaction deleted_at = datetime.utcnow() # Get the asset asset = db.query(Asset).filter( Asset.id == asset_id, Asset.deleted_at.is_(None) ).first() if not asset: item.success = False item.error = "Asset not found or already deleted" result.failed_deletions += 1 result.items.append(item) continue # Mark related data as deleted tasks = db.query(Task).filter( Task.asset_id == asset_id, Task.deleted_at.is_(None) ).all() task_ids = [task.id for task in tasks] if tasks else [] # Update tasks task_update_count = db.query(Task).filter( Task.asset_id == asset_id, Task.deleted_at.is_(None) ).update({ Task.deleted_at: deleted_at, Task.deleted_by: current_user.id }, synchronize_session=False) # Update related data submission_update_count = 0 attachment_update_count = 0 note_update_count = 0 review_update_count = 0 if task_ids: submission_update_count = db.query(Submission).filter( Submission.task_id.in_(task_ids), Submission.deleted_at.is_(None) ).update({ Submission.deleted_at: deleted_at, Submission.deleted_by: current_user.id }, synchronize_session=False) attachment_update_count = db.query(TaskAttachment).filter( TaskAttachment.task_id.in_(task_ids), TaskAttachment.deleted_at.is_(None) ).update({ TaskAttachment.deleted_at: deleted_at, TaskAttachment.deleted_by: current_user.id }, synchronize_session=False) note_update_count = db.query(ProductionNote).filter( ProductionNote.task_id.in_(task_ids), ProductionNote.deleted_at.is_(None) ).update({ ProductionNote.deleted_at: deleted_at, ProductionNote.deleted_by: current_user.id }, synchronize_session=False) # Update reviews submission_ids = [s.id for s in db.query(Submission.id).filter( Submission.task_id.in_(task_ids) ).all()] if submission_ids: review_update_count = db.query(Review).filter( Review.submission_id.in_(submission_ids), Review.deleted_at.is_(None) ).update({ Review.deleted_at: deleted_at, Review.deleted_by: current_user.id }, synchronize_session=False) # Mark the asset as deleted asset.deleted_at = deleted_at asset.deleted_by = current_user.id db.flush() # Success item.success = True item.name = asset.name item.deleted_counts = { 'tasks': task_update_count, 'submissions': submission_update_count, 'attachments': attachment_update_count, 'notes': note_update_count, 'reviews': review_update_count } result.successful_deletions += 1 result.total_deleted_tasks += task_update_count result.total_deleted_submissions += submission_update_count result.total_deleted_attachments += attachment_update_count result.total_deleted_notes += note_update_count result.total_deleted_reviews += review_update_count except Exception as e: item.success = False item.error = str(e) result.failed_deletions += 1 result.items.append(item) # Commit batch to avoid long-running transactions db.commit() except Exception as e: db.rollback() # Mark remaining items as failed for asset_id in asset_ids[len(result.items):]: item = BatchDeletionItem() item.id = asset_id item.type = "asset" item.success = False item.error = f"Batch operation failed: {str(e)}" result.items.append(item) result.failed_deletions += 1 # Calculate operation duration end_time = datetime.utcnow() result.operation_duration = (end_time - start_time).total_seconds() return result def batch_recover_shots(self, shot_ids: List[int], db: Session, current_user: User, batch_size: int = 50) -> BatchRecoveryResult: """ Efficiently recover multiple shots in batches. Args: shot_ids: List of shot IDs to recover db: Database session current_user: User performing the recovery batch_size: Number of shots to process in each batch Returns: BatchRecoveryResult with operation details """ start_time = datetime.utcnow() result = BatchRecoveryResult() result.total_items = len(shot_ids) try: # Process shots in batches to avoid memory issues for i in range(0, len(shot_ids), batch_size): batch_shot_ids = shot_ids[i:i + batch_size] # Process each shot in the batch for shot_id in batch_shot_ids: item = BatchRecoveryItem() item.id = shot_id item.type = "shot" try: # Get the deleted shot shot = db.query(Shot).filter( Shot.id == shot_id, Shot.deleted_at.isnot(None) ).first() if not shot: item.success = False item.error = "Shot not found or not deleted" result.failed_recoveries += 1 result.items.append(item) continue # Get deleted task IDs task_ids = [t.id for t in db.query(Task.id).filter( Task.shot_id == shot_id, Task.deleted_at.isnot(None) ).all()] # Recover tasks recovered_tasks = db.query(Task).filter( Task.shot_id == shot_id, Task.deleted_at.isnot(None) ).update({ Task.deleted_at: None, Task.deleted_by: None }, synchronize_session=False) # Recover related data recovered_submissions = 0 recovered_attachments = 0 recovered_notes = 0 recovered_reviews = 0 if task_ids: recovered_submissions = db.query(Submission).filter( Submission.task_id.in_(task_ids), Submission.deleted_at.isnot(None) ).update({ Submission.deleted_at: None, Submission.deleted_by: None }, synchronize_session=False) recovered_attachments = db.query(TaskAttachment).filter( TaskAttachment.task_id.in_(task_ids), TaskAttachment.deleted_at.isnot(None) ).update({ TaskAttachment.deleted_at: None, TaskAttachment.deleted_by: None }, synchronize_session=False) recovered_notes = db.query(ProductionNote).filter( ProductionNote.task_id.in_(task_ids), ProductionNote.deleted_at.isnot(None) ).update({ ProductionNote.deleted_at: None, ProductionNote.deleted_by: None }, synchronize_session=False) # Recover reviews submission_ids = [s.id for s in db.query(Submission.id).filter( Submission.task_id.in_(task_ids) ).all()] if submission_ids: recovered_reviews = db.query(Review).filter( Review.submission_id.in_(submission_ids), Review.deleted_at.isnot(None) ).update({ Review.deleted_at: None, Review.deleted_by: None }, synchronize_session=False) # Recover the shot shot.deleted_at = None shot.deleted_by = None db.flush() # Success item.success = True item.name = shot.name item.recovered_counts = { 'tasks': recovered_tasks, 'submissions': recovered_submissions, 'attachments': recovered_attachments, 'notes': recovered_notes, 'reviews': recovered_reviews } result.successful_recoveries += 1 result.total_recovered_tasks += recovered_tasks result.total_recovered_submissions += recovered_submissions result.total_recovered_attachments += recovered_attachments result.total_recovered_notes += recovered_notes result.total_recovered_reviews += recovered_reviews except Exception as e: item.success = False item.error = str(e) result.failed_recoveries += 1 result.items.append(item) # Commit batch to avoid long-running transactions db.commit() except Exception as e: db.rollback() # Mark remaining items as failed for shot_id in shot_ids[len(result.items):]: item = BatchRecoveryItem() item.id = shot_id item.type = "shot" item.success = False item.error = f"Batch operation failed: {str(e)}" result.items.append(item) result.failed_recoveries += 1 # Calculate operation duration end_time = datetime.utcnow() result.operation_duration = (end_time - start_time).total_seconds() return result def batch_recover_assets(self, asset_ids: List[int], db: Session, current_user: User, batch_size: int = 50) -> BatchRecoveryResult: """ Efficiently recover multiple assets in batches. Args: asset_ids: List of asset IDs to recover db: Database session current_user: User performing the recovery batch_size: Number of assets to process in each batch Returns: BatchRecoveryResult with operation details """ start_time = datetime.utcnow() result = BatchRecoveryResult() result.total_items = len(asset_ids) try: # Process assets in batches to avoid memory issues for i in range(0, len(asset_ids), batch_size): batch_asset_ids = asset_ids[i:i + batch_size] # Process each asset in the batch for asset_id in batch_asset_ids: item = BatchRecoveryItem() item.id = asset_id item.type = "asset" try: # Get the deleted asset asset = db.query(Asset).filter( Asset.id == asset_id, Asset.deleted_at.isnot(None) ).first() if not asset: item.success = False item.error = "Asset not found or not deleted" result.failed_recoveries += 1 result.items.append(item) continue # Get deleted task IDs task_ids = [t.id for t in db.query(Task.id).filter( Task.asset_id == asset_id, Task.deleted_at.isnot(None) ).all()] # Recover tasks recovered_tasks = db.query(Task).filter( Task.asset_id == asset_id, Task.deleted_at.isnot(None) ).update({ Task.deleted_at: None, Task.deleted_by: None }, synchronize_session=False) # Recover related data recovered_submissions = 0 recovered_attachments = 0 recovered_notes = 0 recovered_reviews = 0 if task_ids: recovered_submissions = db.query(Submission).filter( Submission.task_id.in_(task_ids), Submission.deleted_at.isnot(None) ).update({ Submission.deleted_at: None, Submission.deleted_by: None }, synchronize_session=False) recovered_attachments = db.query(TaskAttachment).filter( TaskAttachment.task_id.in_(task_ids), TaskAttachment.deleted_at.isnot(None) ).update({ TaskAttachment.deleted_at: None, TaskAttachment.deleted_by: None }, synchronize_session=False) recovered_notes = db.query(ProductionNote).filter( ProductionNote.task_id.in_(task_ids), ProductionNote.deleted_at.isnot(None) ).update({ ProductionNote.deleted_at: None, ProductionNote.deleted_by: None }, synchronize_session=False) # Recover reviews submission_ids = [s.id for s in db.query(Submission.id).filter( Submission.task_id.in_(task_ids) ).all()] if submission_ids: recovered_reviews = db.query(Review).filter( Review.submission_id.in_(submission_ids), Review.deleted_at.isnot(None) ).update({ Review.deleted_at: None, Review.deleted_by: None }, synchronize_session=False) # Recover the asset asset.deleted_at = None asset.deleted_by = None db.flush() # Success item.success = True item.name = asset.name item.recovered_counts = { 'tasks': recovered_tasks, 'submissions': recovered_submissions, 'attachments': recovered_attachments, 'notes': recovered_notes, 'reviews': recovered_reviews } result.successful_recoveries += 1 result.total_recovered_tasks += recovered_tasks result.total_recovered_submissions += recovered_submissions result.total_recovered_attachments += recovered_attachments result.total_recovered_notes += recovered_notes result.total_recovered_reviews += recovered_reviews except Exception as e: item.success = False item.error = str(e) result.failed_recoveries += 1 result.items.append(item) # Commit batch to avoid long-running transactions db.commit() except Exception as e: db.rollback() # Mark remaining items as failed for asset_id in asset_ids[len(result.items):]: item = BatchRecoveryItem() item.id = asset_id item.type = "asset" item.success = False item.error = f"Batch operation failed: {str(e)}" result.items.append(item) result.failed_recoveries += 1 # Calculate operation duration end_time = datetime.utcnow() result.operation_duration = (end_time - start_time).total_seconds() return result def get_batch_deletion_preview(self, shot_ids: List[int], asset_ids: List[int], db: Session) -> Dict[str, Any]: """ Get preview information for a batch deletion operation. Args: shot_ids: List of shot IDs to delete asset_ids: List of asset IDs to delete db: Database session Returns: Dictionary with preview information """ preview = { 'total_shots': len(shot_ids), 'total_assets': len(asset_ids), 'total_items': len(shot_ids) + len(asset_ids), 'estimated_tasks': 0, 'estimated_submissions': 0, 'estimated_attachments': 0, 'estimated_notes': 0, 'estimated_reviews': 0, 'affected_users': set(), 'projects_affected': set() } try: # Count tasks for shots if shot_ids: shot_task_count = db.query(Task).filter( Task.shot_id.in_(shot_ids), Task.deleted_at.is_(None) ).count() preview['estimated_tasks'] += shot_task_count # Get affected users and projects for shots shot_users = db.query(Task.assigned_user_id).filter( Task.shot_id.in_(shot_ids), Task.deleted_at.is_(None), Task.assigned_user_id.isnot(None) ).distinct().all() preview['affected_users'].update([u[0] for u in shot_users]) shot_projects = db.query(Shot.episode_id).join(Shot.episode).filter( Shot.id.in_(shot_ids) ).distinct().all() preview['projects_affected'].update([p[0] for p in shot_projects]) # Count tasks for assets if asset_ids: asset_task_count = db.query(Task).filter( Task.asset_id.in_(asset_ids), Task.deleted_at.is_(None) ).count() preview['estimated_tasks'] += asset_task_count # Get affected users and projects for assets asset_users = db.query(Task.assigned_user_id).filter( Task.asset_id.in_(asset_ids), Task.deleted_at.is_(None), Task.assigned_user_id.isnot(None) ).distinct().all() preview['affected_users'].update([u[0] for u in asset_users]) asset_projects = db.query(Asset.project_id).filter( Asset.id.in_(asset_ids) ).distinct().all() preview['projects_affected'].update([p[0] for p in asset_projects]) # Get all task IDs for counting related items all_task_ids = [] if shot_ids: shot_task_ids = [t.id for t in db.query(Task.id).filter( Task.shot_id.in_(shot_ids), Task.deleted_at.is_(None) ).all()] all_task_ids.extend(shot_task_ids) if asset_ids: asset_task_ids = [t.id for t in db.query(Task.id).filter( Task.asset_id.in_(asset_ids), Task.deleted_at.is_(None) ).all()] all_task_ids.extend(asset_task_ids) if all_task_ids: # Count submissions preview['estimated_submissions'] = db.query(Submission).filter( Submission.task_id.in_(all_task_ids), Submission.deleted_at.is_(None) ).count() # Count attachments preview['estimated_attachments'] = db.query(TaskAttachment).filter( TaskAttachment.task_id.in_(all_task_ids), TaskAttachment.deleted_at.is_(None) ).count() # Count notes preview['estimated_notes'] = db.query(ProductionNote).filter( ProductionNote.task_id.in_(all_task_ids), ProductionNote.deleted_at.is_(None) ).count() # Count reviews submission_ids = [s.id for s in db.query(Submission.id).filter( Submission.task_id.in_(all_task_ids), Submission.deleted_at.is_(None) ).all()] if submission_ids: preview['estimated_reviews'] = db.query(Review).filter( Review.submission_id.in_(submission_ids), Review.deleted_at.is_(None) ).count() # Convert sets to counts preview['affected_users_count'] = len(preview['affected_users']) preview['projects_affected_count'] = len(preview['projects_affected']) # Remove the sets from the response (not JSON serializable) del preview['affected_users'] del preview['projects_affected'] except SQLAlchemyError: # Return basic preview on error pass return preview def optimize_batch_size(self, total_items: int, estimated_related_items: int) -> int: """ Calculate optimal batch size based on the number of items and their complexity. Args: total_items: Total number of items to process estimated_related_items: Estimated number of related items (tasks, submissions, etc.) Returns: Optimal batch size """ # Base batch size base_batch_size = 50 # Adjust based on complexity if estimated_related_items == 0: # No related items, can use larger batches return min(100, total_items) # Calculate complexity factor complexity_factor = estimated_related_items / total_items if total_items > 0 else 1 if complexity_factor > 100: # Very complex items, use smaller batches return min(10, total_items) elif complexity_factor > 50: # Moderately complex items return min(25, total_items) else: # Simple items, use base batch size return min(base_batch_size, total_items)