LinkDesk/backend/services/data_consistency.py

421 lines
16 KiB
Python

"""
Data Consistency Service for Shot/Asset Task Status Optimization
This service ensures data consistency between individual task updates and aggregated views,
and provides real-time update propagation mechanisms.
"""
from typing import Dict, List, Optional, Any, Set
from sqlalchemy.orm import Session
from sqlalchemy import and_, or_, func
from datetime import datetime
import json
import logging
from models.task import Task
from models.shot import Shot
from models.asset import Asset
from models.project import Project
from models.episode import Episode
from schemas.shot import TaskStatusInfo
logger = logging.getLogger(__name__)
class DataConsistencyService:
"""Service for maintaining data consistency between individual task updates and aggregated views."""
def __init__(self, db: Session):
self.db = db
def validate_task_aggregation_consistency(self, entity_id: int, entity_type: str) -> Dict[str, Any]:
"""
Validate that aggregated task data matches individual task records.
Args:
entity_id: ID of the shot or asset
entity_type: 'shot' or 'asset'
Returns:
Dict containing validation results and any inconsistencies found
"""
logger.info(f"Validating task aggregation consistency for {entity_type} {entity_id}")
# Get individual task records
if entity_type == 'shot':
tasks = self.db.query(Task).filter(
and_(
Task.shot_id == entity_id,
Task.deleted_at.is_(None)
)
).all()
entity = self.db.query(Shot).filter(Shot.id == entity_id).first()
elif entity_type == 'asset':
tasks = self.db.query(Task).filter(
and_(
Task.asset_id == entity_id,
Task.deleted_at.is_(None)
)
).all()
entity = self.db.query(Asset).filter(Asset.id == entity_id).first()
else:
raise ValueError(f"Invalid entity_type: {entity_type}")
if not entity:
return {
'valid': False,
'error': f'{entity_type.title()} not found',
'inconsistencies': []
}
# Build expected aggregated data from individual tasks
expected_task_status = {}
expected_task_details = []
# Get all task types for the project
project = self.db.query(Project).filter(Project.id == entity.project_id).first()
if not project:
return {
'valid': False,
'error': 'Project not found',
'inconsistencies': []
}
# Get standard and custom task types
if entity_type == 'shot':
standard_types = ["layout", "animation", "simulation", "lighting", "compositing"]
custom_types = project.custom_shot_task_types or []
else: # asset
standard_types = ["modeling", "surfacing", "rigging"]
custom_types = project.custom_asset_task_types or []
all_task_types = standard_types + custom_types
# Initialize all task types as not_started
for task_type in all_task_types:
expected_task_status[task_type] = "not_started"
# Update with actual task data
for task in tasks:
expected_task_status[task.task_type] = task.status
expected_task_details.append(TaskStatusInfo(
task_type=task.task_type,
status=task.status,
task_id=task.id,
assigned_user_id=task.assigned_user_id
))
# Get current aggregated data using the optimized query
if entity_type == 'shot':
aggregated_data = self._get_shot_aggregated_data(entity_id)
else:
aggregated_data = self._get_asset_aggregated_data(entity_id)
# Compare expected vs actual aggregated data
inconsistencies = []
# Check task_status consistency
for task_type, expected_status in expected_task_status.items():
actual_status = aggregated_data.get('task_status', {}).get(task_type)
if actual_status != expected_status:
inconsistencies.append({
'type': 'task_status_mismatch',
'task_type': task_type,
'expected': expected_status,
'actual': actual_status
})
# Check task_details consistency
actual_task_details = aggregated_data.get('task_details', [])
expected_task_ids = {detail.task_id for detail in expected_task_details}
actual_task_ids = {detail.get('task_id') for detail in actual_task_details if detail.get('task_id')}
if expected_task_ids != actual_task_ids:
inconsistencies.append({
'type': 'task_details_mismatch',
'expected_task_ids': list(expected_task_ids),
'actual_task_ids': list(actual_task_ids),
'missing_tasks': list(expected_task_ids - actual_task_ids),
'extra_tasks': list(actual_task_ids - expected_task_ids)
})
return {
'valid': len(inconsistencies) == 0,
'entity_id': entity_id,
'entity_type': entity_type,
'inconsistencies': inconsistencies,
'expected_task_status': expected_task_status,
'actual_task_status': aggregated_data.get('task_status', {}),
'validation_timestamp': datetime.utcnow().isoformat()
}
def _get_shot_aggregated_data(self, shot_id: int) -> Dict[str, Any]:
"""Get aggregated task data for a shot using the optimized query."""
from sqlalchemy.orm import joinedload, selectinload
shot_with_tasks = (
self.db.query(Shot)
.filter(and_(Shot.id == shot_id, Shot.deleted_at.is_(None)))
.outerjoin(Task, (Task.shot_id == Shot.id) & (Task.deleted_at.is_(None)))
.options(
joinedload(Shot.episode).joinedload(Episode.project),
selectinload(Shot.tasks).options(selectinload(Task.assigned_user))
)
.add_columns(
Task.id.label('task_id'),
Task.task_type,
Task.status.label('task_status'),
Task.assigned_user_id,
Task.updated_at.label('task_updated_at')
)
.all()
)
if not shot_with_tasks:
return {'task_status': {}, 'task_details': []}
# Process the results similar to the optimized list_shots implementation
shot = shot_with_tasks[0][0]
project = shot.episode.project if shot.episode else None
if not project:
return {'task_status': {}, 'task_details': []}
# Get task types
standard_types = ["layout", "animation", "simulation", "lighting", "compositing"]
custom_types = project.custom_shot_task_types or []
all_task_types = standard_types + custom_types
# Initialize task status
task_status = {}
for task_type in all_task_types:
task_status[task_type] = "not_started"
# Build task details
task_details = []
for row in shot_with_tasks:
task_id = row[1]
task_type = row[2]
task_status_val = row[3]
assigned_user_id = row[4]
if task_id is not None:
task_status[task_type] = task_status_val
task_details.append({
'task_type': task_type,
'status': task_status_val,
'task_id': task_id,
'assigned_user_id': assigned_user_id
})
return {
'task_status': task_status,
'task_details': task_details
}
def _get_asset_aggregated_data(self, asset_id: int) -> Dict[str, Any]:
"""Get aggregated task data for an asset using the optimized query."""
from sqlalchemy.orm import joinedload, selectinload
asset_with_tasks = (
self.db.query(Asset)
.filter(and_(Asset.id == asset_id, Asset.deleted_at.is_(None)))
.outerjoin(Task, (Task.asset_id == Asset.id) & (Task.deleted_at.is_(None)))
.options(
joinedload(Asset.project),
selectinload(Asset.tasks).options(selectinload(Task.assigned_user))
)
.add_columns(
Task.id.label('task_id'),
Task.task_type,
Task.status.label('task_status'),
Task.assigned_user_id,
Task.updated_at.label('task_updated_at')
)
.all()
)
if not asset_with_tasks:
return {'task_status': {}, 'task_details': []}
# Process the results similar to the optimized list_assets implementation
asset = asset_with_tasks[0][0]
project = asset.project
if not project:
return {'task_status': {}, 'task_details': []}
# Get task types
standard_types = ["modeling", "surfacing", "rigging"]
custom_types = project.custom_asset_task_types or []
all_task_types = standard_types + custom_types
# Initialize task status
task_status = {}
for task_type in all_task_types:
task_status[task_type] = "not_started"
# Build task details
task_details = []
for row in asset_with_tasks:
task_id = row[1]
task_type = row[2]
task_status_val = row[3]
assigned_user_id = row[4]
if task_id is not None:
task_status[task_type] = task_status_val
task_details.append({
'task_type': task_type,
'status': task_status_val,
'task_id': task_id,
'assigned_user_id': assigned_user_id
})
return {
'task_status': task_status,
'task_details': task_details
}
def validate_bulk_consistency(self, entity_ids: List[int], entity_type: str) -> Dict[str, Any]:
"""
Validate consistency for multiple entities at once.
Args:
entity_ids: List of shot or asset IDs
entity_type: 'shot' or 'asset'
Returns:
Dict containing bulk validation results
"""
logger.info(f"Validating bulk consistency for {len(entity_ids)} {entity_type}s")
results = []
total_inconsistencies = 0
for entity_id in entity_ids:
validation_result = self.validate_task_aggregation_consistency(entity_id, entity_type)
results.append(validation_result)
if not validation_result['valid']:
total_inconsistencies += len(validation_result.get('inconsistencies', []))
return {
'total_entities': len(entity_ids),
'valid_entities': sum(1 for r in results if r['valid']),
'invalid_entities': sum(1 for r in results if not r['valid']),
'total_inconsistencies': total_inconsistencies,
'results': results,
'validation_timestamp': datetime.utcnow().isoformat()
}
def propagate_task_update(self, task_id: int, old_status: Optional[str] = None, new_status: Optional[str] = None) -> Dict[str, Any]:
"""
Propagate task status changes to ensure real-time updates in aggregated data.
Args:
task_id: ID of the updated task
old_status: Previous task status (optional)
new_status: New task status (optional)
Returns:
Dict containing propagation results
"""
logger.info(f"Propagating task update for task {task_id}")
# Get the task and its parent entity
task = self.db.query(Task).filter(Task.id == task_id).first()
if not task:
return {
'success': False,
'error': 'Task not found',
'task_id': task_id
}
# Determine entity type and ID
if task.shot_id:
entity_type = 'shot'
entity_id = task.shot_id
elif task.asset_id:
entity_type = 'asset'
entity_id = task.asset_id
else:
return {
'success': False,
'error': 'Task is not associated with a shot or asset',
'task_id': task_id
}
# Validate consistency after the update
validation_result = self.validate_task_aggregation_consistency(entity_id, entity_type)
# Log the propagation
propagation_log = {
'task_id': task_id,
'entity_type': entity_type,
'entity_id': entity_id,
'old_status': old_status,
'new_status': new_status or task.status,
'consistency_valid': validation_result['valid'],
'inconsistencies': validation_result.get('inconsistencies', []),
'timestamp': datetime.utcnow().isoformat()
}
logger.info(f"Task update propagated: {json.dumps(propagation_log)}")
return {
'success': True,
'task_id': task_id,
'entity_type': entity_type,
'entity_id': entity_id,
'validation_result': validation_result,
'propagation_log': propagation_log
}
def get_consistency_report(self, project_id: Optional[int] = None) -> Dict[str, Any]:
"""
Generate a comprehensive consistency report for shots and assets.
Args:
project_id: Optional project ID to filter by
Returns:
Dict containing comprehensive consistency report
"""
logger.info(f"Generating consistency report for project {project_id}")
# Get all shots and assets
shot_query = self.db.query(Shot.id).filter(Shot.deleted_at.is_(None))
asset_query = self.db.query(Asset.id).filter(Asset.deleted_at.is_(None))
if project_id:
shot_query = shot_query.filter(Shot.project_id == project_id)
asset_query = asset_query.filter(Asset.project_id == project_id)
shot_ids = [row[0] for row in shot_query.all()]
asset_ids = [row[0] for row in asset_query.all()]
# Validate consistency for all entities
shot_results = self.validate_bulk_consistency(shot_ids, 'shot') if shot_ids else {'total_entities': 0, 'valid_entities': 0, 'invalid_entities': 0, 'total_inconsistencies': 0, 'results': []}
asset_results = self.validate_bulk_consistency(asset_ids, 'asset') if asset_ids else {'total_entities': 0, 'valid_entities': 0, 'invalid_entities': 0, 'total_inconsistencies': 0, 'results': []}
return {
'project_id': project_id,
'shots': shot_results,
'assets': asset_results,
'summary': {
'total_entities': shot_results['total_entities'] + asset_results['total_entities'],
'valid_entities': shot_results['valid_entities'] + asset_results['valid_entities'],
'invalid_entities': shot_results['invalid_entities'] + asset_results['invalid_entities'],
'total_inconsistencies': shot_results['total_inconsistencies'] + asset_results['total_inconsistencies'],
'consistency_percentage': (
(shot_results['valid_entities'] + asset_results['valid_entities']) /
max(1, shot_results['total_entities'] + asset_results['total_entities'])
) * 100
},
'report_timestamp': datetime.utcnow().isoformat()
}
def create_data_consistency_service(db: Session) -> DataConsistencyService:
"""Factory function to create a DataConsistencyService instance."""
return DataConsistencyService(db)