spectrogram_preview_dialog.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. """
  2. Spectrogram Preview Dialog
  3. Enlarged view for the audio spectrogram with waveform and audio playback controls.
  4. """
  5. from PyQt5.QtWidgets import (
  6. QDialog, QVBoxLayout, QHBoxLayout, QLabel, QPushButton, QSlider, QWidget, QGridLayout
  7. )
  8. from PyQt5.QtCore import Qt, QUrl, QTime
  9. from PyQt5.QtGui import QPixmap, QImage, QFont
  10. from PyQt5.QtMultimedia import QMediaPlayer, QMediaContent
  11. import numpy as np
  12. from scipy.io import wavfile
  13. import os
  14. import time
  15. # Matplotlib for waveform rendering
  16. import matplotlib
  17. matplotlib.use('agg')
  18. from matplotlib.figure import Figure
  19. from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
  20. class SpectrogramPreviewDialog(QDialog):
  21. """
  22. Dialog that shows an enlarged spectrogram, a waveform visualization,
  23. and audio playback controls (play/pause + seek slider).
  24. """
  25. def __init__(self, spectrogram_pixmap: QPixmap, audio_path: str = None, parent: QWidget = None):
  26. super().__init__(parent)
  27. self.setWindowTitle("Audio Preview")
  28. self.setModal(True)
  29. self.spectrogram_pixmap = spectrogram_pixmap
  30. self.audio_path = audio_path
  31. self.player = None
  32. if self.audio_path:
  33. self.player = QMediaPlayer(self)
  34. self.player.setMedia(QMediaContent(QUrl.fromLocalFile(self.audio_path)))
  35. self._init_ui()
  36. def _get_audio_metadata(self, path: str) -> dict:
  37. """Extract comprehensive audio metadata"""
  38. metadata = {}
  39. try:
  40. # Basic file info
  41. metadata['filename'] = os.path.basename(path)
  42. metadata['file_size'] = self._format_file_size(os.path.getsize(path))
  43. # Audio file info
  44. sr, data = wavfile.read(path)
  45. metadata['sample_rate'] = "{:,} Hz".format(sr)
  46. metadata['duration'] = len(data) / sr
  47. metadata['duration_str'] = self._format_duration(metadata['duration'])
  48. metadata['channels'] = 2 if data.ndim > 1 else 1
  49. metadata['bit_depth'] = data.dtype.itemsize * 8
  50. metadata['total_samples'] = len(data)
  51. metadata['bitrate'] = self._calculate_bitrate(path, metadata['duration'])
  52. except Exception as e:
  53. metadata['error'] = str(e)
  54. return metadata
  55. def _format_file_size(self, size_bytes: int) -> str:
  56. """Format file size in human readable format"""
  57. for unit in ['B', 'KB', 'MB', 'GB']:
  58. if size_bytes < 1024.0:
  59. return f"{size_bytes:.1f} {unit}"
  60. size_bytes /= 1024.0
  61. return f"{size_bytes:.1f} TB"
  62. def _format_duration(self, seconds: float) -> str:
  63. """Format duration in human readable format"""
  64. hours = int(seconds // 3600)
  65. minutes = int((seconds % 3600) // 60)
  66. seconds = seconds % 60
  67. if hours > 0:
  68. return f"{hours}:{minutes:02d}:{seconds:02.1f}"
  69. else:
  70. return f"{minutes}:{seconds:02.1f}"
  71. def _calculate_bitrate(self, path: str, duration: float) -> str:
  72. """Calculate approximate bitrate"""
  73. try:
  74. file_size_bits = os.path.getsize(path) * 8
  75. bitrate = file_size_bits / duration
  76. return f"{bitrate/1000:.0f} kbps"
  77. except:
  78. return "Unknown"
  79. def _create_metadata_widget(self) -> QWidget:
  80. """Create a widget displaying audio metadata in a grid layout"""
  81. widget = QWidget()
  82. widget.setStyleSheet("background-color: #f8f9fa; padding: 15px; border-bottom: 1px solid #dee2e6;")
  83. layout = QGridLayout(widget)
  84. layout.setSpacing(10)
  85. # Define metadata fields to display
  86. fields = [
  87. ("File Size", self.metadata.get('file_size', 'Unknown')),
  88. ("Duration", self.metadata.get('duration_str', 'Unknown')),
  89. ("Sample Rate", self.metadata.get('sample_rate', 'Unknown')),
  90. ("Channels", str(self.metadata.get('channels', 'Unknown'))),
  91. ("Bit Depth", f"{self.metadata.get('bit_depth', 'Unknown')} bits"),
  92. ("Total Samples", "{:,}".format(self.metadata.get('total_samples', 'Unknown'))),
  93. ("Bitrate", self.metadata.get('bitrate', 'Unknown')),
  94. ]
  95. # Create labels for each field
  96. for i, (label, value) in enumerate(fields):
  97. row = i // 2
  98. col = (i % 2) * 2
  99. # Field label
  100. field_label = QLabel(f"{label}:")
  101. field_label.setStyleSheet("font-weight: bold; color: #495057; font-size: 11px;")
  102. layout.addWidget(field_label, row, col)
  103. # Field value
  104. value_label = QLabel(str(value))
  105. value_label.setStyleSheet("color: #212529; font-size: 11px; background-color: white; padding: 2px 6px; border-radius: 3px;")
  106. layout.addWidget(value_label, row, col + 1)
  107. return widget
  108. def _init_ui(self):
  109. layout = QVBoxLayout(self)
  110. layout.setContentsMargins(0, 0, 0, 0)
  111. layout.setSpacing(0)
  112. # Header with filename
  113. self.metadata = self._get_audio_metadata(self.audio_path) if self.audio_path else {}
  114. filename = self.metadata.get('filename', 'Unknown Audio File')
  115. # Main header with filename
  116. header = QLabel(f"Audio Spectrogram Preview - {filename}")
  117. header.setAlignment(Qt.AlignCenter)
  118. header.setStyleSheet(
  119. "QLabel { background-color: #2c3e50; color: white; font-weight: bold; font-size: 14px; padding: 12px; }"
  120. )
  121. layout.addWidget(header)
  122. # Metadata section
  123. if self.audio_path:
  124. # Metadata section title
  125. metadata_title = QLabel("📊 Audio File Information")
  126. metadata_title.setStyleSheet(
  127. "QLabel { color: #2c3e50; font-weight: bold; font-size: 13px; padding: 8px; background-color: #ecf0f1; }"
  128. )
  129. layout.addWidget(metadata_title)
  130. if 'error' not in self.metadata:
  131. metadata_widget = self._create_metadata_widget()
  132. layout.addWidget(metadata_widget)
  133. else:
  134. # Show error message if metadata extraction failed
  135. error_label = QLabel(f"Could not read audio file: {self.metadata['error']}")
  136. error_label.setStyleSheet("color: #e74c3c; background-color: #fdf2f2; padding: 10px; border: 1px solid #f5c6cb;")
  137. error_label.setAlignment(Qt.AlignCenter)
  138. layout.addWidget(error_label)
  139. # Spectrogram section
  140. spectrogram_title = QLabel("🔍 Spectrogram Analysis")
  141. spectrogram_title.setStyleSheet(
  142. "QLabel { color: #2c3e50; font-weight: bold; font-size: 13px; padding: 8px; background-color: #ecf0f1; }"
  143. )
  144. layout.addWidget(spectrogram_title)
  145. # Spectrogram (enlarged)
  146. self.spectrogram_label = QLabel()
  147. self.spectrogram_label.setAlignment(Qt.AlignCenter)
  148. self.spectrogram_label.setStyleSheet("background-color: #2c3e50; padding: 10px;")
  149. spec_scaled = self.spectrogram_pixmap.scaled(1100, 500, Qt.KeepAspectRatio, Qt.SmoothTransformation)
  150. self.spectrogram_label.setPixmap(spec_scaled)
  151. layout.addWidget(self.spectrogram_label)
  152. # Waveform section
  153. waveform_title = QLabel("📈 Waveform Visualization")
  154. waveform_title.setStyleSheet(
  155. "QLabel { color: #2c3e50; font-weight: bold; font-size: 13px; padding: 8px; background-color: #ecf0f1; }"
  156. )
  157. layout.addWidget(waveform_title)
  158. # Waveform area
  159. self.waveform_label = QLabel()
  160. self.waveform_label.setAlignment(Qt.AlignCenter)
  161. self.waveform_label.setStyleSheet("background-color: #ffffff; padding: 10px; border-top: 1px solid #ecf0f1;")
  162. layout.addWidget(self.waveform_label)
  163. # Controls section
  164. controls_title = QLabel("🎵 Audio Playback Controls")
  165. controls_title.setStyleSheet(
  166. "QLabel { color: #2c3e50; font-weight: bold; font-size: 13px; padding: 8px; background-color: #ecf0f1; }"
  167. )
  168. layout.addWidget(controls_title)
  169. # Controls
  170. controls = QHBoxLayout()
  171. controls.setContentsMargins(10, 10, 10, 10)
  172. controls.setSpacing(10)
  173. self.play_btn = QPushButton("Play")
  174. self.play_btn.setFixedHeight(30)
  175. self.play_btn.setFont(QFont("Arial", 10, QFont.Bold))
  176. self.play_btn.setStyleSheet("QPushButton { background-color: #27ae60; color: white; border: none; padding: 6px 12px; }")
  177. self.play_btn.clicked.connect(self._toggle_play)
  178. controls.addWidget(self.play_btn)
  179. self.position_slider = QSlider(Qt.Horizontal)
  180. self.position_slider.setRange(0, 0)
  181. self.position_slider.sliderMoved.connect(self._set_position)
  182. controls.addWidget(self.position_slider, 1)
  183. layout.addLayout(controls)
  184. # Footer
  185. close_btn = QPushButton("Close (ESC)")
  186. close_btn.setFixedHeight(28)
  187. close_btn.clicked.connect(self.accept)
  188. layout.addWidget(close_btn)
  189. # Load waveform and connect player
  190. if self.audio_path:
  191. self._render_waveform(self.audio_path)
  192. if self.player is not None:
  193. self.player.positionChanged.connect(self._on_position_changed)
  194. self.player.durationChanged.connect(self._on_duration_changed)
  195. # Size
  196. self.resize(1200, 900)
  197. def _render_waveform(self, path: str):
  198. try:
  199. sr, data = wavfile.read(path)
  200. if data.ndim > 1:
  201. data = data.mean(axis=1)
  202. # Normalize for plotting
  203. data = data.astype(np.float64)
  204. if np.max(np.abs(data)) > 0:
  205. data = data / np.max(np.abs(data))
  206. # Create matplotlib figure
  207. fig = Figure(figsize=(11, 2.8), dpi=100)
  208. ax = fig.add_subplot(111)
  209. times = np.linspace(0, len(data) / sr, num=len(data))
  210. ax.plot(times, data, color="#3498db", linewidth=0.6)
  211. ax.set_xlim(0, times[-1] if len(times) > 0 else 1)
  212. ax.set_ylim(-1.05, 1.05)
  213. ax.set_xlabel("Time (s)")
  214. ax.set_ylabel("Amplitude")
  215. ax.grid(True, alpha=0.2)
  216. fig.tight_layout()
  217. canvas = FigureCanvas(fig)
  218. canvas.draw()
  219. w, h = fig.get_size_inches() * fig.get_dpi()
  220. w, h = int(w), int(h)
  221. img = QImage(canvas.buffer_rgba(), w, h, QImage.Format_ARGB32)
  222. img = img.rgbSwapped()
  223. self.waveform_label.setPixmap(QPixmap(img))
  224. except Exception:
  225. # Fallback text if waveform can't be rendered
  226. self.waveform_label.setText("Waveform preview unavailable.")
  227. def _toggle_play(self):
  228. if not self.player:
  229. return
  230. if self.player.state() == QMediaPlayer.PlayingState:
  231. self.player.pause()
  232. self.play_btn.setText("Play")
  233. else:
  234. self.player.play()
  235. self.play_btn.setText("Pause")
  236. def _set_position(self, position: int):
  237. if self.player:
  238. self.player.setPosition(position)
  239. def _on_position_changed(self, position: int):
  240. self.position_slider.setValue(position)
  241. def _on_duration_changed(self, duration: int):
  242. self.position_slider.setRange(0, duration)
  243. def keyPressEvent(self, event):
  244. if event.key() == Qt.Key_Escape:
  245. self.accept()
  246. else:
  247. super().keyPressEvent(event)