data_manager.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661
  1. """
  2. Data Manager Module
  3. Handles all data persistence operations for analysis results.
  4. Manages database operations and file storage for analysis data.
  5. """
  6. import json
  7. import logging
  8. import sqlite3
  9. import shutil
  10. from datetime import datetime, timezone
  11. from pathlib import Path
  12. from typing import Optional, Dict, List, Any
  13. from .db_schema import get_database_connection, close_database_connection, init_database
  14. from .config import DATABASE_PATH, ANALYSES_DIR
  15. logger = logging.getLogger(__name__)
  16. class DataManager:
  17. """
  18. Manages persistent storage of analysis data.
  19. Handles:
  20. - Database operations for metadata
  21. - File system storage for binary files
  22. - Analysis record lifecycle
  23. """
  24. def __init__(self):
  25. """Initialize the data manager."""
  26. self.db_path = DATABASE_PATH
  27. self.analyses_dir = ANALYSES_DIR
  28. # Initialize database
  29. init_database(self.db_path)
  30. logger.info(f"DataManager initialized with database: {self.db_path}")
  31. def create_analysis(self, report_id: str, device_id: str) -> Optional[int]:
  32. """
  33. Create a new analysis record.
  34. Args:
  35. report_id: Unique report identifier (e.g., "DUR-20250115-143022")
  36. device_id: Device identifier
  37. Returns:
  38. int: Analysis ID or None if failed
  39. """
  40. try:
  41. conn = get_database_connection(self.db_path)
  42. if not conn:
  43. logger.error("Failed to connect to database")
  44. return None
  45. cursor = conn.cursor()
  46. cursor.execute("""
  47. INSERT INTO analyses (report_id, timestamp, device_id)
  48. VALUES (?, ?, ?)
  49. """, (report_id, datetime.now(), device_id))
  50. analysis_id = cursor.lastrowid
  51. conn.commit()
  52. close_database_connection(conn)
  53. # Create directory structure for this analysis
  54. analysis_dir = self.analyses_dir / report_id
  55. (analysis_dir / "inputs").mkdir(parents=True, exist_ok=True)
  56. (analysis_dir / "results").mkdir(parents=True, exist_ok=True)
  57. (analysis_dir / "reports").mkdir(parents=True, exist_ok=True)
  58. logger.info(f"Created analysis record: {report_id} (ID: {analysis_id})")
  59. return analysis_id
  60. except Exception as e:
  61. logger.error(f"Error creating analysis: {e}")
  62. return None
  63. def save_input_file(self, analysis_id: int, input_type: str, original_path: str) -> bool:
  64. """
  65. Save an input file and record it in the database.
  66. Args:
  67. analysis_id: Analysis ID
  68. input_type: Type of input ('dslr_side', 'dslr_top', 'multispectral', 'thermal', 'audio')
  69. original_path: Path to the original file
  70. Returns:
  71. bool: True if successful
  72. """
  73. try:
  74. original_file = Path(original_path)
  75. if not original_file.exists():
  76. logger.error(f"Input file not found: {original_path}")
  77. return False
  78. # Get analysis record to find report_id
  79. conn = get_database_connection(self.db_path)
  80. if not conn:
  81. logger.error("Failed to connect to database")
  82. return False
  83. cursor = conn.cursor()
  84. cursor.execute("SELECT report_id FROM analyses WHERE id = ?", (analysis_id,))
  85. result = cursor.fetchone()
  86. close_database_connection(conn)
  87. if not result:
  88. logger.error(f"Analysis not found: {analysis_id}")
  89. return False
  90. report_id = result[0]
  91. # Determine destination filename
  92. # Map input_type to file extension if needed
  93. filename = original_file.name
  94. dest_dir = self.analyses_dir / report_id / "inputs"
  95. dest_path = dest_dir / filename
  96. # Copy file
  97. shutil.copy2(original_file, dest_path)
  98. file_size = dest_path.stat().st_size
  99. # Record in database
  100. conn = get_database_connection(self.db_path)
  101. if not conn:
  102. logger.error("Failed to connect to database")
  103. return False
  104. cursor = conn.cursor()
  105. relative_path = f"inputs/{filename}"
  106. cursor.execute("""
  107. INSERT INTO analysis_inputs (analysis_id, input_type, original_path, saved_path, file_size)
  108. VALUES (?, ?, ?, ?, ?)
  109. """, (analysis_id, input_type, original_path, relative_path, file_size))
  110. conn.commit()
  111. close_database_connection(conn)
  112. logger.info(f"Saved input file: {input_type} -> {relative_path}")
  113. return True
  114. except Exception as e:
  115. logger.error(f"Error saving input file: {e}")
  116. return False
  117. def save_result(self, analysis_id: int, model_type: str, result_dict: Dict[str, Any]) -> bool:
  118. """
  119. Save model result to database.
  120. Args:
  121. analysis_id: Analysis ID
  122. model_type: Type of model ('defect', 'locule', 'maturity', 'shape', 'audio')
  123. result_dict: Dictionary containing result data:
  124. - predicted_class: Predicted class name
  125. - confidence: Confidence value (0-1)
  126. - probabilities: Dict of class probabilities
  127. - processing_time: Time in seconds
  128. - metadata: Additional model-specific data (optional)
  129. Returns:
  130. bool: True if successful
  131. """
  132. try:
  133. predicted_class = result_dict.get('predicted_class', 'Unknown')
  134. confidence = result_dict.get('confidence', 0.0)
  135. probabilities = result_dict.get('probabilities', {})
  136. processing_time = result_dict.get('processing_time', 0.0)
  137. metadata = result_dict.get('metadata', {})
  138. # Ensure confidence is 0-1 range
  139. if confidence > 1.0:
  140. confidence = confidence / 100.0
  141. # Convert to JSON strings
  142. probabilities_json = json.dumps(probabilities)
  143. metadata_json = json.dumps(metadata)
  144. conn = get_database_connection(self.db_path)
  145. if not conn:
  146. logger.error("Failed to connect to database")
  147. return False
  148. cursor = conn.cursor()
  149. cursor.execute("""
  150. INSERT INTO analysis_results
  151. (analysis_id, model_type, predicted_class, confidence, probabilities, processing_time, metadata)
  152. VALUES (?, ?, ?, ?, ?, ?, ?)
  153. """, (analysis_id, model_type, predicted_class, confidence, probabilities_json, processing_time, metadata_json))
  154. conn.commit()
  155. close_database_connection(conn)
  156. logger.info(f"Saved result: {model_type} -> {predicted_class} ({confidence*100:.1f}%)")
  157. return True
  158. except Exception as e:
  159. logger.error(f"Error saving result: {e}")
  160. return False
  161. def save_visualization(self, analysis_id: int, viz_type: str, image_data: Any, format_ext: str = 'png') -> bool:
  162. """
  163. Save visualization image to file system.
  164. Args:
  165. analysis_id: Analysis ID
  166. viz_type: Type of visualization ('defect_annotated', 'locule_annotated', etc.)
  167. image_data: Image data (QImage, numpy array, or file path)
  168. format_ext: File extension for saved image
  169. Returns:
  170. bool: True if successful
  171. """
  172. try:
  173. from PyQt5.QtGui import QImage, QPixmap
  174. import numpy as np
  175. # Get report_id
  176. conn = get_database_connection(self.db_path)
  177. if not conn:
  178. logger.error("Failed to connect to database")
  179. return False
  180. cursor = conn.cursor()
  181. cursor.execute("SELECT report_id FROM analyses WHERE id = ?", (analysis_id,))
  182. result = cursor.fetchone()
  183. close_database_connection(conn)
  184. if not result:
  185. logger.error(f"Analysis not found: {analysis_id}")
  186. return False
  187. report_id = result[0]
  188. results_dir = self.analyses_dir / report_id / "results"
  189. # Generate filename
  190. filename = f"{viz_type}.{format_ext}"
  191. file_path = results_dir / filename
  192. # Save based on image_data type
  193. if isinstance(image_data, str):
  194. # It's a file path, copy it
  195. shutil.copy2(image_data, file_path)
  196. elif isinstance(image_data, QImage):
  197. # PyQt QImage
  198. pixmap = QPixmap.fromImage(image_data)
  199. pixmap.save(str(file_path))
  200. elif isinstance(image_data, QPixmap):
  201. # PyQt QPixmap
  202. image_data.save(str(file_path))
  203. elif isinstance(image_data, np.ndarray):
  204. # Numpy array - save as PNG using OpenCV
  205. import cv2
  206. # Convert RGB to BGR if needed
  207. if len(image_data.shape) == 3 and image_data.shape[2] == 3:
  208. image_data = cv2.cvtColor(image_data, cv2.COLOR_RGB2BGR)
  209. cv2.imwrite(str(file_path), image_data)
  210. else:
  211. logger.error(f"Unsupported image data type: {type(image_data)}")
  212. return False
  213. # Record in database
  214. conn = get_database_connection(self.db_path)
  215. if not conn:
  216. logger.error("Failed to connect to database")
  217. return False
  218. cursor = conn.cursor()
  219. relative_path = f"results/{filename}"
  220. cursor.execute("""
  221. INSERT INTO analysis_visualizations (analysis_id, visualization_type, file_path)
  222. VALUES (?, ?, ?)
  223. """, (analysis_id, viz_type, relative_path))
  224. conn.commit()
  225. close_database_connection(conn)
  226. logger.info(f"Saved visualization: {viz_type} -> {relative_path}")
  227. return True
  228. except Exception as e:
  229. logger.error(f"Error saving visualization: {e}")
  230. return False
  231. def finalize_analysis(self, analysis_id: int, overall_grade: str, grade_description: str, total_time: float) -> bool:
  232. """
  233. Finalize an analysis record with results.
  234. Args:
  235. analysis_id: Analysis ID
  236. overall_grade: Grade ('A', 'B', or 'C')
  237. grade_description: Description of the grade
  238. total_time: Total processing time in seconds
  239. Returns:
  240. bool: True if successful
  241. """
  242. try:
  243. conn = get_database_connection(self.db_path)
  244. if not conn:
  245. logger.error("Failed to connect to database")
  246. return False
  247. cursor = conn.cursor()
  248. cursor.execute("""
  249. UPDATE analyses
  250. SET overall_grade = ?, grade_description = ?, processing_time = ?
  251. WHERE id = ?
  252. """, (overall_grade, grade_description, total_time, analysis_id))
  253. conn.commit()
  254. close_database_connection(conn)
  255. logger.info(f"Finalized analysis: ID={analysis_id}, Grade={overall_grade}")
  256. return True
  257. except Exception as e:
  258. logger.error(f"Error finalizing analysis: {e}")
  259. return False
  260. def get_analysis(self, report_id: str) -> Optional[Dict[str, Any]]:
  261. """
  262. Retrieve complete analysis data.
  263. Args:
  264. report_id: Report ID to retrieve
  265. Returns:
  266. Dict with analysis data or None if not found
  267. """
  268. try:
  269. conn = get_database_connection(self.db_path)
  270. if not conn:
  271. logger.error("Failed to connect to database")
  272. return None
  273. cursor = conn.cursor()
  274. # Get analysis record
  275. cursor.execute("""
  276. SELECT id, report_id, timestamp, device_id, overall_grade, grade_description, processing_time, created_at
  277. FROM analyses
  278. WHERE report_id = ?
  279. """, (report_id,))
  280. analysis_row = cursor.fetchone()
  281. if not analysis_row:
  282. logger.warning(f"Analysis not found: {report_id}")
  283. close_database_connection(conn)
  284. return None
  285. analysis_id = analysis_row[0]
  286. # Get inputs
  287. cursor.execute("""
  288. SELECT input_type, original_path, saved_path, file_size
  289. FROM analysis_inputs
  290. WHERE analysis_id = ?
  291. ORDER BY created_at
  292. """, (analysis_id,))
  293. inputs = [dict(zip(['input_type', 'original_path', 'saved_path', 'file_size'], row))
  294. for row in cursor.fetchall()]
  295. # Get results
  296. cursor.execute("""
  297. SELECT model_type, predicted_class, confidence, probabilities, processing_time, metadata
  298. FROM analysis_results
  299. WHERE analysis_id = ?
  300. ORDER BY created_at
  301. """, (analysis_id,))
  302. results = []
  303. for row in cursor.fetchall():
  304. result_dict = {
  305. 'model_type': row[0],
  306. 'predicted_class': row[1],
  307. 'confidence': row[2],
  308. 'probabilities': json.loads(row[3] or '{}'),
  309. 'processing_time': row[4],
  310. 'metadata': json.loads(row[5] or '{}'),
  311. }
  312. results.append(result_dict)
  313. # Get visualizations
  314. cursor.execute("""
  315. SELECT visualization_type, file_path
  316. FROM analysis_visualizations
  317. WHERE analysis_id = ?
  318. ORDER BY created_at
  319. """, (analysis_id,))
  320. visualizations = [dict(zip(['visualization_type', 'file_path'], row))
  321. for row in cursor.fetchall()]
  322. close_database_connection(conn)
  323. return {
  324. 'id': analysis_id,
  325. 'report_id': analysis_row[1],
  326. 'timestamp': analysis_row[2],
  327. 'device_id': analysis_row[3],
  328. 'overall_grade': analysis_row[4],
  329. 'grade_description': analysis_row[5],
  330. 'processing_time': analysis_row[6],
  331. 'created_at': analysis_row[7],
  332. 'inputs': inputs,
  333. 'results': results,
  334. 'visualizations': visualizations,
  335. }
  336. except Exception as e:
  337. logger.error(f"Error retrieving analysis: {e}")
  338. return None
  339. def list_recent_analyses(self, limit: int = 50) -> List[Dict[str, Any]]:
  340. """
  341. Get list of recent analyses.
  342. Args:
  343. limit: Maximum number of analyses to return
  344. Returns:
  345. List of analysis records with created_at formatted in local timezone
  346. """
  347. try:
  348. conn = get_database_connection(self.db_path)
  349. if not conn:
  350. logger.error("Failed to connect to database")
  351. return []
  352. cursor = conn.cursor()
  353. cursor.execute("""
  354. SELECT id, report_id, timestamp, device_id, overall_grade, processing_time, created_at
  355. FROM analyses
  356. ORDER BY created_at DESC
  357. LIMIT ?
  358. """, (limit,))
  359. analyses = []
  360. for row in cursor.fetchall():
  361. # Convert created_at from UTC to local timezone
  362. created_at = row[6]
  363. if created_at:
  364. try:
  365. # Parse the UTC datetime string from database
  366. utc_datetime = datetime.strptime(created_at, "%Y-%m-%d %H:%M:%S")
  367. # Mark it as UTC
  368. utc_datetime = utc_datetime.replace(tzinfo=timezone.utc)
  369. # Convert to local timezone
  370. local_datetime = utc_datetime.astimezone()
  371. # Format for display
  372. created_at = local_datetime.strftime("%Y-%m-%d %H:%M:%S")
  373. except Exception as e:
  374. # If parsing fails, keep original value
  375. logger.debug(f"Failed to parse datetime: {e}")
  376. pass
  377. analyses.append({
  378. 'id': row[0],
  379. 'report_id': row[1],
  380. 'timestamp': row[2],
  381. 'device_id': row[3],
  382. 'overall_grade': row[4],
  383. 'processing_time': row[5],
  384. 'created_at': created_at,
  385. })
  386. close_database_connection(conn)
  387. return analyses
  388. except Exception as e:
  389. logger.error(f"Error listing analyses: {e}")
  390. return []
  391. def get_analysis_file_path(self, report_id: str, relative_path: str) -> Optional[Path]:
  392. """
  393. Get full path to a file stored in analysis folder.
  394. Args:
  395. report_id: Report ID
  396. relative_path: Relative path (e.g., 'inputs/image.jpg' or 'results/defect_annotated.png')
  397. Returns:
  398. Full file path or None if not found
  399. """
  400. file_path = self.analyses_dir / report_id / relative_path
  401. if file_path.exists():
  402. return file_path
  403. return None
  404. def export_analysis_to_dict(self, report_id: str) -> Optional[Dict[str, Any]]:
  405. """
  406. Export analysis data for reports display.
  407. Converts file paths to full paths for use in UI.
  408. Args:
  409. report_id: Report ID to export
  410. Returns:
  411. Analysis data with full file paths or None if not found
  412. """
  413. analysis = self.get_analysis(report_id)
  414. if not analysis:
  415. return None
  416. # Convert relative paths to full paths
  417. for input_item in analysis['inputs']:
  418. if input_item['saved_path']:
  419. full_path = self.get_analysis_file_path(report_id, input_item['saved_path'])
  420. input_item['full_path'] = str(full_path) if full_path else None
  421. for viz_item in analysis['visualizations']:
  422. if viz_item['file_path']:
  423. full_path = self.get_analysis_file_path(report_id, viz_item['file_path'])
  424. viz_item['full_path'] = str(full_path) if full_path else None
  425. return analysis
  426. def get_daily_analysis_count(self) -> int:
  427. """
  428. Get count of analyses created today.
  429. Returns:
  430. int: Number of analyses created today (UTC date)
  431. """
  432. try:
  433. conn = get_database_connection(self.db_path)
  434. if not conn:
  435. logger.error("Failed to connect to database")
  436. return 0
  437. cursor = conn.cursor()
  438. # Get today's date in UTC
  439. today = datetime.now(timezone.utc).date()
  440. # Query analyses created today
  441. cursor.execute("""
  442. SELECT COUNT(*)
  443. FROM analyses
  444. WHERE DATE(created_at) = ?
  445. """, (str(today),))
  446. result = cursor.fetchone()
  447. close_database_connection(conn)
  448. count = result[0] if result else 0
  449. logger.info(f"Daily analysis count: {count}")
  450. return count
  451. except Exception as e:
  452. logger.error(f"Error getting daily analysis count: {e}")
  453. return 0
  454. def get_average_processing_time(self, limit: int = 100) -> float:
  455. """
  456. Get average processing time from recent analyses.
  457. Args:
  458. limit: Maximum number of recent analyses to consider
  459. Returns:
  460. float: Average processing time in seconds (0.0 if no data)
  461. """
  462. try:
  463. conn = get_database_connection(self.db_path)
  464. if not conn:
  465. logger.error("Failed to connect to database")
  466. return 0.0
  467. cursor = conn.cursor()
  468. # Query average processing time from recent analyses
  469. cursor.execute("""
  470. SELECT AVG(processing_time)
  471. FROM (
  472. SELECT processing_time
  473. FROM analyses
  474. WHERE processing_time IS NOT NULL AND processing_time > 0
  475. ORDER BY created_at DESC
  476. LIMIT ?
  477. )
  478. """, (limit,))
  479. result = cursor.fetchone()
  480. close_database_connection(conn)
  481. avg_time = float(result[0]) if result and result[0] else 0.0
  482. logger.info(f"Average processing time: {avg_time:.2f}s")
  483. return avg_time
  484. except Exception as e:
  485. logger.error(f"Error getting average processing time: {e}")
  486. return 0.0
  487. def get_model_accuracy_stats(self, limit: int = 100) -> Dict[str, float]:
  488. """
  489. Get average confidence scores per model type from recent results.
  490. This calculates the average confidence (as a proxy for accuracy) for each model
  491. from the most recent analyses.
  492. Args:
  493. limit: Maximum number of recent analyses to consider
  494. Returns:
  495. Dict mapping model type to average confidence percentage (0-100)
  496. Example: {'audio': 94.2, 'defect': 87.5, 'locule': 91.8, 'maturity': 88.3, 'shape': 89.1}
  497. """
  498. try:
  499. conn = get_database_connection(self.db_path)
  500. if not conn:
  501. logger.error("Failed to connect to database")
  502. return {}
  503. cursor = conn.cursor()
  504. # Get most recent analysis IDs
  505. cursor.execute("""
  506. SELECT id FROM analyses
  507. ORDER BY created_at DESC
  508. LIMIT ?
  509. """, (limit,))
  510. recent_analysis_ids = [row[0] for row in cursor.fetchall()]
  511. if not recent_analysis_ids:
  512. close_database_connection(conn)
  513. logger.info("No analyses found for accuracy calculation")
  514. return {}
  515. # Format IDs for SQL IN clause
  516. ids_placeholder = ','.join('?' * len(recent_analysis_ids))
  517. # Query average confidence per model type
  518. cursor.execute(f"""
  519. SELECT model_type, AVG(confidence * 100)
  520. FROM analysis_results
  521. WHERE analysis_id IN ({ids_placeholder})
  522. GROUP BY model_type
  523. """, recent_analysis_ids)
  524. results = cursor.fetchall()
  525. close_database_connection(conn)
  526. # Build result dictionary
  527. accuracy_stats = {}
  528. for model_type, avg_confidence in results:
  529. if model_type and avg_confidence is not None:
  530. accuracy_stats[model_type] = round(avg_confidence, 1)
  531. logger.info(f"Model accuracy stats: {accuracy_stats}")
  532. return accuracy_stats
  533. except Exception as e:
  534. logger.error(f"Error getting model accuracy stats: {e}")
  535. return {}