""" Data Consistency Service for Shot/Asset Task Status Optimization This service ensures data consistency between individual task updates and aggregated views, and provides real-time update propagation mechanisms. """ from typing import Dict, List, Optional, Any, Set from sqlalchemy.orm import Session from sqlalchemy import and_, or_, func from datetime import datetime import json import logging from models.task import Task from models.shot import Shot from models.asset import Asset from models.project import Project from models.episode import Episode from schemas.shot import TaskStatusInfo logger = logging.getLogger(__name__) class DataConsistencyService: """Service for maintaining data consistency between individual task updates and aggregated views.""" def __init__(self, db: Session): self.db = db def validate_task_aggregation_consistency(self, entity_id: int, entity_type: str) -> Dict[str, Any]: """ Validate that aggregated task data matches individual task records. Args: entity_id: ID of the shot or asset entity_type: 'shot' or 'asset' Returns: Dict containing validation results and any inconsistencies found """ logger.info(f"Validating task aggregation consistency for {entity_type} {entity_id}") # Get individual task records if entity_type == 'shot': tasks = self.db.query(Task).filter( and_( Task.shot_id == entity_id, Task.deleted_at.is_(None) ) ).all() entity = self.db.query(Shot).filter(Shot.id == entity_id).first() elif entity_type == 'asset': tasks = self.db.query(Task).filter( and_( Task.asset_id == entity_id, Task.deleted_at.is_(None) ) ).all() entity = self.db.query(Asset).filter(Asset.id == entity_id).first() else: raise ValueError(f"Invalid entity_type: {entity_type}") if not entity: return { 'valid': False, 'error': f'{entity_type.title()} not found', 'inconsistencies': [] } # Build expected aggregated data from individual tasks expected_task_status = {} expected_task_details = [] # Get all task types for the project project = self.db.query(Project).filter(Project.id == entity.project_id).first() if not project: return { 'valid': False, 'error': 'Project not found', 'inconsistencies': [] } # Get standard and custom task types if entity_type == 'shot': standard_types = ["layout", "animation", "simulation", "lighting", "compositing"] custom_types = project.custom_shot_task_types or [] else: # asset standard_types = ["modeling", "surfacing", "rigging"] custom_types = project.custom_asset_task_types or [] all_task_types = standard_types + custom_types # Initialize all task types as not_started for task_type in all_task_types: expected_task_status[task_type] = "not_started" # Update with actual task data for task in tasks: expected_task_status[task.task_type] = task.status expected_task_details.append(TaskStatusInfo( task_type=task.task_type, status=task.status, task_id=task.id, assigned_user_id=task.assigned_user_id )) # Get current aggregated data using the optimized query if entity_type == 'shot': aggregated_data = self._get_shot_aggregated_data(entity_id) else: aggregated_data = self._get_asset_aggregated_data(entity_id) # Compare expected vs actual aggregated data inconsistencies = [] # Check task_status consistency for task_type, expected_status in expected_task_status.items(): actual_status = aggregated_data.get('task_status', {}).get(task_type) if actual_status != expected_status: inconsistencies.append({ 'type': 'task_status_mismatch', 'task_type': task_type, 'expected': expected_status, 'actual': actual_status }) # Check task_details consistency actual_task_details = aggregated_data.get('task_details', []) expected_task_ids = {detail.task_id for detail in expected_task_details} actual_task_ids = {detail.get('task_id') for detail in actual_task_details if detail.get('task_id')} if expected_task_ids != actual_task_ids: inconsistencies.append({ 'type': 'task_details_mismatch', 'expected_task_ids': list(expected_task_ids), 'actual_task_ids': list(actual_task_ids), 'missing_tasks': list(expected_task_ids - actual_task_ids), 'extra_tasks': list(actual_task_ids - expected_task_ids) }) return { 'valid': len(inconsistencies) == 0, 'entity_id': entity_id, 'entity_type': entity_type, 'inconsistencies': inconsistencies, 'expected_task_status': expected_task_status, 'actual_task_status': aggregated_data.get('task_status', {}), 'validation_timestamp': datetime.utcnow().isoformat() } def _get_shot_aggregated_data(self, shot_id: int) -> Dict[str, Any]: """Get aggregated task data for a shot using the optimized query.""" from sqlalchemy.orm import joinedload, selectinload shot_with_tasks = ( self.db.query(Shot) .filter(and_(Shot.id == shot_id, Shot.deleted_at.is_(None))) .outerjoin(Task, (Task.shot_id == Shot.id) & (Task.deleted_at.is_(None))) .options( joinedload(Shot.episode).joinedload(Episode.project), selectinload(Shot.tasks).options(selectinload(Task.assigned_user)) ) .add_columns( Task.id.label('task_id'), Task.task_type, Task.status.label('task_status'), Task.assigned_user_id, Task.updated_at.label('task_updated_at') ) .all() ) if not shot_with_tasks: return {'task_status': {}, 'task_details': []} # Process the results similar to the optimized list_shots implementation shot = shot_with_tasks[0][0] project = shot.episode.project if shot.episode else None if not project: return {'task_status': {}, 'task_details': []} # Get task types standard_types = ["layout", "animation", "simulation", "lighting", "compositing"] custom_types = project.custom_shot_task_types or [] all_task_types = standard_types + custom_types # Initialize task status task_status = {} for task_type in all_task_types: task_status[task_type] = "not_started" # Build task details task_details = [] for row in shot_with_tasks: task_id = row[1] task_type = row[2] task_status_val = row[3] assigned_user_id = row[4] if task_id is not None: task_status[task_type] = task_status_val task_details.append({ 'task_type': task_type, 'status': task_status_val, 'task_id': task_id, 'assigned_user_id': assigned_user_id }) return { 'task_status': task_status, 'task_details': task_details } def _get_asset_aggregated_data(self, asset_id: int) -> Dict[str, Any]: """Get aggregated task data for an asset using the optimized query.""" from sqlalchemy.orm import joinedload, selectinload asset_with_tasks = ( self.db.query(Asset) .filter(and_(Asset.id == asset_id, Asset.deleted_at.is_(None))) .outerjoin(Task, (Task.asset_id == Asset.id) & (Task.deleted_at.is_(None))) .options( joinedload(Asset.project), selectinload(Asset.tasks).options(selectinload(Task.assigned_user)) ) .add_columns( Task.id.label('task_id'), Task.task_type, Task.status.label('task_status'), Task.assigned_user_id, Task.updated_at.label('task_updated_at') ) .all() ) if not asset_with_tasks: return {'task_status': {}, 'task_details': []} # Process the results similar to the optimized list_assets implementation asset = asset_with_tasks[0][0] project = asset.project if not project: return {'task_status': {}, 'task_details': []} # Get task types standard_types = ["modeling", "surfacing", "rigging"] custom_types = project.custom_asset_task_types or [] all_task_types = standard_types + custom_types # Initialize task status task_status = {} for task_type in all_task_types: task_status[task_type] = "not_started" # Build task details task_details = [] for row in asset_with_tasks: task_id = row[1] task_type = row[2] task_status_val = row[3] assigned_user_id = row[4] if task_id is not None: task_status[task_type] = task_status_val task_details.append({ 'task_type': task_type, 'status': task_status_val, 'task_id': task_id, 'assigned_user_id': assigned_user_id }) return { 'task_status': task_status, 'task_details': task_details } def validate_bulk_consistency(self, entity_ids: List[int], entity_type: str) -> Dict[str, Any]: """ Validate consistency for multiple entities at once. Args: entity_ids: List of shot or asset IDs entity_type: 'shot' or 'asset' Returns: Dict containing bulk validation results """ logger.info(f"Validating bulk consistency for {len(entity_ids)} {entity_type}s") results = [] total_inconsistencies = 0 for entity_id in entity_ids: validation_result = self.validate_task_aggregation_consistency(entity_id, entity_type) results.append(validation_result) if not validation_result['valid']: total_inconsistencies += len(validation_result.get('inconsistencies', [])) return { 'total_entities': len(entity_ids), 'valid_entities': sum(1 for r in results if r['valid']), 'invalid_entities': sum(1 for r in results if not r['valid']), 'total_inconsistencies': total_inconsistencies, 'results': results, 'validation_timestamp': datetime.utcnow().isoformat() } def propagate_task_update(self, task_id: int, old_status: Optional[str] = None, new_status: Optional[str] = None) -> Dict[str, Any]: """ Propagate task status changes to ensure real-time updates in aggregated data. Args: task_id: ID of the updated task old_status: Previous task status (optional) new_status: New task status (optional) Returns: Dict containing propagation results """ logger.info(f"Propagating task update for task {task_id}") # Get the task and its parent entity task = self.db.query(Task).filter(Task.id == task_id).first() if not task: return { 'success': False, 'error': 'Task not found', 'task_id': task_id } # Determine entity type and ID if task.shot_id: entity_type = 'shot' entity_id = task.shot_id elif task.asset_id: entity_type = 'asset' entity_id = task.asset_id else: return { 'success': False, 'error': 'Task is not associated with a shot or asset', 'task_id': task_id } # Validate consistency after the update validation_result = self.validate_task_aggregation_consistency(entity_id, entity_type) # Log the propagation propagation_log = { 'task_id': task_id, 'entity_type': entity_type, 'entity_id': entity_id, 'old_status': old_status, 'new_status': new_status or task.status, 'consistency_valid': validation_result['valid'], 'inconsistencies': validation_result.get('inconsistencies', []), 'timestamp': datetime.utcnow().isoformat() } logger.info(f"Task update propagated: {json.dumps(propagation_log)}") return { 'success': True, 'task_id': task_id, 'entity_type': entity_type, 'entity_id': entity_id, 'validation_result': validation_result, 'propagation_log': propagation_log } def get_consistency_report(self, project_id: Optional[int] = None) -> Dict[str, Any]: """ Generate a comprehensive consistency report for shots and assets. Args: project_id: Optional project ID to filter by Returns: Dict containing comprehensive consistency report """ logger.info(f"Generating consistency report for project {project_id}") # Get all shots and assets shot_query = self.db.query(Shot.id).filter(Shot.deleted_at.is_(None)) asset_query = self.db.query(Asset.id).filter(Asset.deleted_at.is_(None)) if project_id: shot_query = shot_query.filter(Shot.project_id == project_id) asset_query = asset_query.filter(Asset.project_id == project_id) shot_ids = [row[0] for row in shot_query.all()] asset_ids = [row[0] for row in asset_query.all()] # Validate consistency for all entities shot_results = self.validate_bulk_consistency(shot_ids, 'shot') if shot_ids else {'total_entities': 0, 'valid_entities': 0, 'invalid_entities': 0, 'total_inconsistencies': 0, 'results': []} asset_results = self.validate_bulk_consistency(asset_ids, 'asset') if asset_ids else {'total_entities': 0, 'valid_entities': 0, 'invalid_entities': 0, 'total_inconsistencies': 0, 'results': []} return { 'project_id': project_id, 'shots': shot_results, 'assets': asset_results, 'summary': { 'total_entities': shot_results['total_entities'] + asset_results['total_entities'], 'valid_entities': shot_results['valid_entities'] + asset_results['valid_entities'], 'invalid_entities': shot_results['invalid_entities'] + asset_results['invalid_entities'], 'total_inconsistencies': shot_results['total_inconsistencies'] + asset_results['total_inconsistencies'], 'consistency_percentage': ( (shot_results['valid_entities'] + asset_results['valid_entities']) / max(1, shot_results['total_entities'] + asset_results['total_entities']) ) * 100 }, 'report_timestamp': datetime.utcnow().isoformat() } def create_data_consistency_service(db: Session) -> DataConsistencyService: """Factory function to create a DataConsistencyService instance.""" return DataConsistencyService(db)