LinkDesk/backend/services/batch_operations.py

902 lines
39 KiB
Python

#!/usr/bin/env python3
"""
Batch Operations Service
This service provides efficient batch operations for soft deletion and recovery
of multiple shots and assets. It optimizes database operations by using bulk
updates and proper transaction management.
"""
from datetime import datetime
from typing import List, Dict, Any, Optional, Tuple
from sqlalchemy.orm import Session
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy import func, and_, or_
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from models.shot import Shot
from models.asset import Asset
from models.task import Task, Submission, Review, ProductionNote, TaskAttachment
from models.user import User
from models.activity import Activity, ActivityType
from services.shot_soft_deletion import ShotSoftDeletionService, DeletionResult
from services.asset_soft_deletion import AssetSoftDeletionService, AssetDeletionResult
from services.recovery_service import RecoveryService, RecoveryResult
class BatchDeletionItem:
"""Information about an item in a batch deletion."""
def __init__(self):
self.id: int = 0
self.name: str = ""
self.type: str = "" # 'shot' or 'asset'
self.success: bool = False
self.error: Optional[str] = None
self.deleted_counts: Dict[str, int] = {}
class BatchDeletionResult:
"""Result of a batch deletion operation."""
def __init__(self):
self.total_items: int = 0
self.successful_deletions: int = 0
self.failed_deletions: int = 0
self.operation_duration: float = 0.0
self.items: List[BatchDeletionItem] = []
# Aggregate counts
self.total_deleted_tasks: int = 0
self.total_deleted_submissions: int = 0
self.total_deleted_attachments: int = 0
self.total_deleted_notes: int = 0
self.total_deleted_reviews: int = 0
class BatchRecoveryItem:
"""Information about an item in a batch recovery."""
def __init__(self):
self.id: int = 0
self.name: str = ""
self.type: str = "" # 'shot' or 'asset'
self.success: bool = False
self.error: Optional[str] = None
self.recovered_counts: Dict[str, int] = {}
class BatchRecoveryResult:
"""Result of a batch recovery operation."""
def __init__(self):
self.total_items: int = 0
self.successful_recoveries: int = 0
self.failed_recoveries: int = 0
self.operation_duration: float = 0.0
self.items: List[BatchRecoveryItem] = []
# Aggregate counts
self.total_recovered_tasks: int = 0
self.total_recovered_submissions: int = 0
self.total_recovered_attachments: int = 0
self.total_recovered_notes: int = 0
self.total_recovered_reviews: int = 0
class BatchOperationsService:
"""Service for efficient batch operations on shots and assets."""
def __init__(self):
self.shot_deletion_service = ShotSoftDeletionService()
self.asset_deletion_service = AssetSoftDeletionService()
self.recovery_service = RecoveryService()
def batch_delete_shots(self, shot_ids: List[int], db: Session, current_user: User,
batch_size: int = 50) -> BatchDeletionResult:
"""
Efficiently delete multiple shots in batches.
Args:
shot_ids: List of shot IDs to delete
db: Database session
current_user: User performing the deletion
batch_size: Number of shots to process in each batch
Returns:
BatchDeletionResult with operation details
"""
start_time = datetime.utcnow()
result = BatchDeletionResult()
result.total_items = len(shot_ids)
try:
# Process shots in batches to avoid memory issues
for i in range(0, len(shot_ids), batch_size):
batch_shot_ids = shot_ids[i:i + batch_size]
# Process each shot in the batch
for shot_id in batch_shot_ids:
item = BatchDeletionItem()
item.id = shot_id
item.type = "shot"
try:
# Get deletion info first
deletion_info = self.shot_deletion_service.get_deletion_info(shot_id, db)
if not deletion_info:
item.success = False
item.error = "Shot not found or already deleted"
result.failed_deletions += 1
result.items.append(item)
continue
# Perform deletion manually without nested transaction
deleted_at = datetime.utcnow()
# Get the shot
shot = db.query(Shot).filter(
Shot.id == shot_id,
Shot.deleted_at.is_(None)
).first()
if not shot:
item.success = False
item.error = "Shot not found or already deleted"
result.failed_deletions += 1
result.items.append(item)
continue
# Mark related data as deleted
tasks = db.query(Task).filter(
Task.shot_id == shot_id,
Task.deleted_at.is_(None)
).all()
task_ids = [task.id for task in tasks] if tasks else []
# Update tasks
task_update_count = db.query(Task).filter(
Task.shot_id == shot_id,
Task.deleted_at.is_(None)
).update({
Task.deleted_at: deleted_at,
Task.deleted_by: current_user.id
}, synchronize_session=False)
# Update related data
submission_update_count = 0
attachment_update_count = 0
note_update_count = 0
review_update_count = 0
if task_ids:
submission_update_count = db.query(Submission).filter(
Submission.task_id.in_(task_ids),
Submission.deleted_at.is_(None)
).update({
Submission.deleted_at: deleted_at,
Submission.deleted_by: current_user.id
}, synchronize_session=False)
attachment_update_count = db.query(TaskAttachment).filter(
TaskAttachment.task_id.in_(task_ids),
TaskAttachment.deleted_at.is_(None)
).update({
TaskAttachment.deleted_at: deleted_at,
TaskAttachment.deleted_by: current_user.id
}, synchronize_session=False)
note_update_count = db.query(ProductionNote).filter(
ProductionNote.task_id.in_(task_ids),
ProductionNote.deleted_at.is_(None)
).update({
ProductionNote.deleted_at: deleted_at,
ProductionNote.deleted_by: current_user.id
}, synchronize_session=False)
# Update reviews
submission_ids = [s.id for s in db.query(Submission.id).filter(
Submission.task_id.in_(task_ids)
).all()]
if submission_ids:
review_update_count = db.query(Review).filter(
Review.submission_id.in_(submission_ids),
Review.deleted_at.is_(None)
).update({
Review.deleted_at: deleted_at,
Review.deleted_by: current_user.id
}, synchronize_session=False)
# Mark the shot as deleted
shot.deleted_at = deleted_at
shot.deleted_by = current_user.id
db.flush()
# Success
item.success = True
item.name = shot.name
item.deleted_counts = {
'tasks': task_update_count,
'submissions': submission_update_count,
'attachments': attachment_update_count,
'notes': note_update_count,
'reviews': review_update_count
}
result.successful_deletions += 1
result.total_deleted_tasks += task_update_count
result.total_deleted_submissions += submission_update_count
result.total_deleted_attachments += attachment_update_count
result.total_deleted_notes += note_update_count
result.total_deleted_reviews += review_update_count
except Exception as e:
item.success = False
item.error = str(e)
result.failed_deletions += 1
result.items.append(item)
# Commit batch to avoid long-running transactions
db.commit()
except Exception as e:
db.rollback()
# Mark remaining items as failed
for shot_id in shot_ids[len(result.items):]:
item = BatchDeletionItem()
item.id = shot_id
item.type = "shot"
item.success = False
item.error = f"Batch operation failed: {str(e)}"
result.items.append(item)
result.failed_deletions += 1
# Calculate operation duration
end_time = datetime.utcnow()
result.operation_duration = (end_time - start_time).total_seconds()
return result
def batch_delete_assets(self, asset_ids: List[int], db: Session, current_user: User,
batch_size: int = 50) -> BatchDeletionResult:
"""
Efficiently delete multiple assets in batches.
Args:
asset_ids: List of asset IDs to delete
db: Database session
current_user: User performing the deletion
batch_size: Number of assets to process in each batch
Returns:
BatchDeletionResult with operation details
"""
start_time = datetime.utcnow()
result = BatchDeletionResult()
result.total_items = len(asset_ids)
try:
# Process assets in batches to avoid memory issues
for i in range(0, len(asset_ids), batch_size):
batch_asset_ids = asset_ids[i:i + batch_size]
# Process each asset in the batch
for asset_id in batch_asset_ids:
item = BatchDeletionItem()
item.id = asset_id
item.type = "asset"
try:
# Get deletion info first
deletion_info = self.asset_deletion_service.get_deletion_info(asset_id, db)
if not deletion_info:
item.success = False
item.error = "Asset not found or already deleted"
result.failed_deletions += 1
result.items.append(item)
continue
# Perform deletion manually without nested transaction
deleted_at = datetime.utcnow()
# Get the asset
asset = db.query(Asset).filter(
Asset.id == asset_id,
Asset.deleted_at.is_(None)
).first()
if not asset:
item.success = False
item.error = "Asset not found or already deleted"
result.failed_deletions += 1
result.items.append(item)
continue
# Mark related data as deleted
tasks = db.query(Task).filter(
Task.asset_id == asset_id,
Task.deleted_at.is_(None)
).all()
task_ids = [task.id for task in tasks] if tasks else []
# Update tasks
task_update_count = db.query(Task).filter(
Task.asset_id == asset_id,
Task.deleted_at.is_(None)
).update({
Task.deleted_at: deleted_at,
Task.deleted_by: current_user.id
}, synchronize_session=False)
# Update related data
submission_update_count = 0
attachment_update_count = 0
note_update_count = 0
review_update_count = 0
if task_ids:
submission_update_count = db.query(Submission).filter(
Submission.task_id.in_(task_ids),
Submission.deleted_at.is_(None)
).update({
Submission.deleted_at: deleted_at,
Submission.deleted_by: current_user.id
}, synchronize_session=False)
attachment_update_count = db.query(TaskAttachment).filter(
TaskAttachment.task_id.in_(task_ids),
TaskAttachment.deleted_at.is_(None)
).update({
TaskAttachment.deleted_at: deleted_at,
TaskAttachment.deleted_by: current_user.id
}, synchronize_session=False)
note_update_count = db.query(ProductionNote).filter(
ProductionNote.task_id.in_(task_ids),
ProductionNote.deleted_at.is_(None)
).update({
ProductionNote.deleted_at: deleted_at,
ProductionNote.deleted_by: current_user.id
}, synchronize_session=False)
# Update reviews
submission_ids = [s.id for s in db.query(Submission.id).filter(
Submission.task_id.in_(task_ids)
).all()]
if submission_ids:
review_update_count = db.query(Review).filter(
Review.submission_id.in_(submission_ids),
Review.deleted_at.is_(None)
).update({
Review.deleted_at: deleted_at,
Review.deleted_by: current_user.id
}, synchronize_session=False)
# Mark the asset as deleted
asset.deleted_at = deleted_at
asset.deleted_by = current_user.id
db.flush()
# Success
item.success = True
item.name = asset.name
item.deleted_counts = {
'tasks': task_update_count,
'submissions': submission_update_count,
'attachments': attachment_update_count,
'notes': note_update_count,
'reviews': review_update_count
}
result.successful_deletions += 1
result.total_deleted_tasks += task_update_count
result.total_deleted_submissions += submission_update_count
result.total_deleted_attachments += attachment_update_count
result.total_deleted_notes += note_update_count
result.total_deleted_reviews += review_update_count
except Exception as e:
item.success = False
item.error = str(e)
result.failed_deletions += 1
result.items.append(item)
# Commit batch to avoid long-running transactions
db.commit()
except Exception as e:
db.rollback()
# Mark remaining items as failed
for asset_id in asset_ids[len(result.items):]:
item = BatchDeletionItem()
item.id = asset_id
item.type = "asset"
item.success = False
item.error = f"Batch operation failed: {str(e)}"
result.items.append(item)
result.failed_deletions += 1
# Calculate operation duration
end_time = datetime.utcnow()
result.operation_duration = (end_time - start_time).total_seconds()
return result
def batch_recover_shots(self, shot_ids: List[int], db: Session, current_user: User,
batch_size: int = 50) -> BatchRecoveryResult:
"""
Efficiently recover multiple shots in batches.
Args:
shot_ids: List of shot IDs to recover
db: Database session
current_user: User performing the recovery
batch_size: Number of shots to process in each batch
Returns:
BatchRecoveryResult with operation details
"""
start_time = datetime.utcnow()
result = BatchRecoveryResult()
result.total_items = len(shot_ids)
try:
# Process shots in batches to avoid memory issues
for i in range(0, len(shot_ids), batch_size):
batch_shot_ids = shot_ids[i:i + batch_size]
# Process each shot in the batch
for shot_id in batch_shot_ids:
item = BatchRecoveryItem()
item.id = shot_id
item.type = "shot"
try:
# Get the deleted shot
shot = db.query(Shot).filter(
Shot.id == shot_id,
Shot.deleted_at.isnot(None)
).first()
if not shot:
item.success = False
item.error = "Shot not found or not deleted"
result.failed_recoveries += 1
result.items.append(item)
continue
# Get deleted task IDs
task_ids = [t.id for t in db.query(Task.id).filter(
Task.shot_id == shot_id,
Task.deleted_at.isnot(None)
).all()]
# Recover tasks
recovered_tasks = db.query(Task).filter(
Task.shot_id == shot_id,
Task.deleted_at.isnot(None)
).update({
Task.deleted_at: None,
Task.deleted_by: None
}, synchronize_session=False)
# Recover related data
recovered_submissions = 0
recovered_attachments = 0
recovered_notes = 0
recovered_reviews = 0
if task_ids:
recovered_submissions = db.query(Submission).filter(
Submission.task_id.in_(task_ids),
Submission.deleted_at.isnot(None)
).update({
Submission.deleted_at: None,
Submission.deleted_by: None
}, synchronize_session=False)
recovered_attachments = db.query(TaskAttachment).filter(
TaskAttachment.task_id.in_(task_ids),
TaskAttachment.deleted_at.isnot(None)
).update({
TaskAttachment.deleted_at: None,
TaskAttachment.deleted_by: None
}, synchronize_session=False)
recovered_notes = db.query(ProductionNote).filter(
ProductionNote.task_id.in_(task_ids),
ProductionNote.deleted_at.isnot(None)
).update({
ProductionNote.deleted_at: None,
ProductionNote.deleted_by: None
}, synchronize_session=False)
# Recover reviews
submission_ids = [s.id for s in db.query(Submission.id).filter(
Submission.task_id.in_(task_ids)
).all()]
if submission_ids:
recovered_reviews = db.query(Review).filter(
Review.submission_id.in_(submission_ids),
Review.deleted_at.isnot(None)
).update({
Review.deleted_at: None,
Review.deleted_by: None
}, synchronize_session=False)
# Recover the shot
shot.deleted_at = None
shot.deleted_by = None
db.flush()
# Success
item.success = True
item.name = shot.name
item.recovered_counts = {
'tasks': recovered_tasks,
'submissions': recovered_submissions,
'attachments': recovered_attachments,
'notes': recovered_notes,
'reviews': recovered_reviews
}
result.successful_recoveries += 1
result.total_recovered_tasks += recovered_tasks
result.total_recovered_submissions += recovered_submissions
result.total_recovered_attachments += recovered_attachments
result.total_recovered_notes += recovered_notes
result.total_recovered_reviews += recovered_reviews
except Exception as e:
item.success = False
item.error = str(e)
result.failed_recoveries += 1
result.items.append(item)
# Commit batch to avoid long-running transactions
db.commit()
except Exception as e:
db.rollback()
# Mark remaining items as failed
for shot_id in shot_ids[len(result.items):]:
item = BatchRecoveryItem()
item.id = shot_id
item.type = "shot"
item.success = False
item.error = f"Batch operation failed: {str(e)}"
result.items.append(item)
result.failed_recoveries += 1
# Calculate operation duration
end_time = datetime.utcnow()
result.operation_duration = (end_time - start_time).total_seconds()
return result
def batch_recover_assets(self, asset_ids: List[int], db: Session, current_user: User,
batch_size: int = 50) -> BatchRecoveryResult:
"""
Efficiently recover multiple assets in batches.
Args:
asset_ids: List of asset IDs to recover
db: Database session
current_user: User performing the recovery
batch_size: Number of assets to process in each batch
Returns:
BatchRecoveryResult with operation details
"""
start_time = datetime.utcnow()
result = BatchRecoveryResult()
result.total_items = len(asset_ids)
try:
# Process assets in batches to avoid memory issues
for i in range(0, len(asset_ids), batch_size):
batch_asset_ids = asset_ids[i:i + batch_size]
# Process each asset in the batch
for asset_id in batch_asset_ids:
item = BatchRecoveryItem()
item.id = asset_id
item.type = "asset"
try:
# Get the deleted asset
asset = db.query(Asset).filter(
Asset.id == asset_id,
Asset.deleted_at.isnot(None)
).first()
if not asset:
item.success = False
item.error = "Asset not found or not deleted"
result.failed_recoveries += 1
result.items.append(item)
continue
# Get deleted task IDs
task_ids = [t.id for t in db.query(Task.id).filter(
Task.asset_id == asset_id,
Task.deleted_at.isnot(None)
).all()]
# Recover tasks
recovered_tasks = db.query(Task).filter(
Task.asset_id == asset_id,
Task.deleted_at.isnot(None)
).update({
Task.deleted_at: None,
Task.deleted_by: None
}, synchronize_session=False)
# Recover related data
recovered_submissions = 0
recovered_attachments = 0
recovered_notes = 0
recovered_reviews = 0
if task_ids:
recovered_submissions = db.query(Submission).filter(
Submission.task_id.in_(task_ids),
Submission.deleted_at.isnot(None)
).update({
Submission.deleted_at: None,
Submission.deleted_by: None
}, synchronize_session=False)
recovered_attachments = db.query(TaskAttachment).filter(
TaskAttachment.task_id.in_(task_ids),
TaskAttachment.deleted_at.isnot(None)
).update({
TaskAttachment.deleted_at: None,
TaskAttachment.deleted_by: None
}, synchronize_session=False)
recovered_notes = db.query(ProductionNote).filter(
ProductionNote.task_id.in_(task_ids),
ProductionNote.deleted_at.isnot(None)
).update({
ProductionNote.deleted_at: None,
ProductionNote.deleted_by: None
}, synchronize_session=False)
# Recover reviews
submission_ids = [s.id for s in db.query(Submission.id).filter(
Submission.task_id.in_(task_ids)
).all()]
if submission_ids:
recovered_reviews = db.query(Review).filter(
Review.submission_id.in_(submission_ids),
Review.deleted_at.isnot(None)
).update({
Review.deleted_at: None,
Review.deleted_by: None
}, synchronize_session=False)
# Recover the asset
asset.deleted_at = None
asset.deleted_by = None
db.flush()
# Success
item.success = True
item.name = asset.name
item.recovered_counts = {
'tasks': recovered_tasks,
'submissions': recovered_submissions,
'attachments': recovered_attachments,
'notes': recovered_notes,
'reviews': recovered_reviews
}
result.successful_recoveries += 1
result.total_recovered_tasks += recovered_tasks
result.total_recovered_submissions += recovered_submissions
result.total_recovered_attachments += recovered_attachments
result.total_recovered_notes += recovered_notes
result.total_recovered_reviews += recovered_reviews
except Exception as e:
item.success = False
item.error = str(e)
result.failed_recoveries += 1
result.items.append(item)
# Commit batch to avoid long-running transactions
db.commit()
except Exception as e:
db.rollback()
# Mark remaining items as failed
for asset_id in asset_ids[len(result.items):]:
item = BatchRecoveryItem()
item.id = asset_id
item.type = "asset"
item.success = False
item.error = f"Batch operation failed: {str(e)}"
result.items.append(item)
result.failed_recoveries += 1
# Calculate operation duration
end_time = datetime.utcnow()
result.operation_duration = (end_time - start_time).total_seconds()
return result
def get_batch_deletion_preview(self, shot_ids: List[int], asset_ids: List[int],
db: Session) -> Dict[str, Any]:
"""
Get preview information for a batch deletion operation.
Args:
shot_ids: List of shot IDs to delete
asset_ids: List of asset IDs to delete
db: Database session
Returns:
Dictionary with preview information
"""
preview = {
'total_shots': len(shot_ids),
'total_assets': len(asset_ids),
'total_items': len(shot_ids) + len(asset_ids),
'estimated_tasks': 0,
'estimated_submissions': 0,
'estimated_attachments': 0,
'estimated_notes': 0,
'estimated_reviews': 0,
'affected_users': set(),
'projects_affected': set()
}
try:
# Count tasks for shots
if shot_ids:
shot_task_count = db.query(Task).filter(
Task.shot_id.in_(shot_ids),
Task.deleted_at.is_(None)
).count()
preview['estimated_tasks'] += shot_task_count
# Get affected users and projects for shots
shot_users = db.query(Task.assigned_user_id).filter(
Task.shot_id.in_(shot_ids),
Task.deleted_at.is_(None),
Task.assigned_user_id.isnot(None)
).distinct().all()
preview['affected_users'].update([u[0] for u in shot_users])
shot_projects = db.query(Shot.episode_id).join(Shot.episode).filter(
Shot.id.in_(shot_ids)
).distinct().all()
preview['projects_affected'].update([p[0] for p in shot_projects])
# Count tasks for assets
if asset_ids:
asset_task_count = db.query(Task).filter(
Task.asset_id.in_(asset_ids),
Task.deleted_at.is_(None)
).count()
preview['estimated_tasks'] += asset_task_count
# Get affected users and projects for assets
asset_users = db.query(Task.assigned_user_id).filter(
Task.asset_id.in_(asset_ids),
Task.deleted_at.is_(None),
Task.assigned_user_id.isnot(None)
).distinct().all()
preview['affected_users'].update([u[0] for u in asset_users])
asset_projects = db.query(Asset.project_id).filter(
Asset.id.in_(asset_ids)
).distinct().all()
preview['projects_affected'].update([p[0] for p in asset_projects])
# Get all task IDs for counting related items
all_task_ids = []
if shot_ids:
shot_task_ids = [t.id for t in db.query(Task.id).filter(
Task.shot_id.in_(shot_ids),
Task.deleted_at.is_(None)
).all()]
all_task_ids.extend(shot_task_ids)
if asset_ids:
asset_task_ids = [t.id for t in db.query(Task.id).filter(
Task.asset_id.in_(asset_ids),
Task.deleted_at.is_(None)
).all()]
all_task_ids.extend(asset_task_ids)
if all_task_ids:
# Count submissions
preview['estimated_submissions'] = db.query(Submission).filter(
Submission.task_id.in_(all_task_ids),
Submission.deleted_at.is_(None)
).count()
# Count attachments
preview['estimated_attachments'] = db.query(TaskAttachment).filter(
TaskAttachment.task_id.in_(all_task_ids),
TaskAttachment.deleted_at.is_(None)
).count()
# Count notes
preview['estimated_notes'] = db.query(ProductionNote).filter(
ProductionNote.task_id.in_(all_task_ids),
ProductionNote.deleted_at.is_(None)
).count()
# Count reviews
submission_ids = [s.id for s in db.query(Submission.id).filter(
Submission.task_id.in_(all_task_ids),
Submission.deleted_at.is_(None)
).all()]
if submission_ids:
preview['estimated_reviews'] = db.query(Review).filter(
Review.submission_id.in_(submission_ids),
Review.deleted_at.is_(None)
).count()
# Convert sets to counts
preview['affected_users_count'] = len(preview['affected_users'])
preview['projects_affected_count'] = len(preview['projects_affected'])
# Remove the sets from the response (not JSON serializable)
del preview['affected_users']
del preview['projects_affected']
except SQLAlchemyError:
# Return basic preview on error
pass
return preview
def optimize_batch_size(self, total_items: int, estimated_related_items: int) -> int:
"""
Calculate optimal batch size based on the number of items and their complexity.
Args:
total_items: Total number of items to process
estimated_related_items: Estimated number of related items (tasks, submissions, etc.)
Returns:
Optimal batch size
"""
# Base batch size
base_batch_size = 50
# Adjust based on complexity
if estimated_related_items == 0:
# No related items, can use larger batches
return min(100, total_items)
# Calculate complexity factor
complexity_factor = estimated_related_items / total_items if total_items > 0 else 1
if complexity_factor > 100:
# Very complex items, use smaller batches
return min(10, total_items)
elif complexity_factor > 50:
# Moderately complex items
return min(25, total_items)
else:
# Simple items, use base batch size
return min(base_batch_size, total_items)