""" Spectrogram Preview Dialog Enlarged view for the audio spectrogram with waveform and audio playback controls. """ from PyQt5.QtWidgets import ( QDialog, QVBoxLayout, QHBoxLayout, QLabel, QPushButton, QSlider, QWidget, QGridLayout ) from PyQt5.QtCore import Qt, QUrl, QTime from PyQt5.QtGui import QPixmap, QImage, QFont from PyQt5.QtMultimedia import QMediaPlayer, QMediaContent import numpy as np from scipy.io import wavfile import os import time # Matplotlib for waveform rendering import matplotlib matplotlib.use('agg') from matplotlib.figure import Figure from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas class SpectrogramPreviewDialog(QDialog): """ Dialog that shows an enlarged spectrogram, a waveform visualization, and audio playback controls (play/pause + seek slider). """ def __init__(self, spectrogram_pixmap: QPixmap, audio_path: str = None, parent: QWidget = None): super().__init__(parent) self.setWindowTitle("Audio Preview") self.setModal(True) self.spectrogram_pixmap = spectrogram_pixmap self.audio_path = audio_path self.player = None if self.audio_path: self.player = QMediaPlayer(self) self.player.setMedia(QMediaContent(QUrl.fromLocalFile(self.audio_path))) self._init_ui() def _get_audio_metadata(self, path: str) -> dict: """Extract comprehensive audio metadata""" metadata = {} try: # Basic file info metadata['filename'] = os.path.basename(path) metadata['file_size'] = self._format_file_size(os.path.getsize(path)) # Audio file info sr, data = wavfile.read(path) metadata['sample_rate'] = "{:,} Hz".format(sr) metadata['duration'] = len(data) / sr metadata['duration_str'] = self._format_duration(metadata['duration']) metadata['channels'] = 2 if data.ndim > 1 else 1 metadata['bit_depth'] = data.dtype.itemsize * 8 metadata['total_samples'] = len(data) metadata['bitrate'] = self._calculate_bitrate(path, metadata['duration']) except Exception as e: metadata['error'] = str(e) return metadata def _format_file_size(self, size_bytes: int) -> str: """Format file size in human readable format""" for unit in ['B', 'KB', 'MB', 'GB']: if size_bytes < 1024.0: return f"{size_bytes:.1f} {unit}" size_bytes /= 1024.0 return f"{size_bytes:.1f} TB" def _format_duration(self, seconds: float) -> str: """Format duration in human readable format""" hours = int(seconds // 3600) minutes = int((seconds % 3600) // 60) seconds = seconds % 60 if hours > 0: return f"{hours}:{minutes:02d}:{seconds:02.1f}" else: return f"{minutes}:{seconds:02.1f}" def _calculate_bitrate(self, path: str, duration: float) -> str: """Calculate approximate bitrate""" try: file_size_bits = os.path.getsize(path) * 8 bitrate = file_size_bits / duration return f"{bitrate/1000:.0f} kbps" except: return "Unknown" def _create_metadata_widget(self) -> QWidget: """Create a widget displaying audio metadata in a grid layout""" widget = QWidget() widget.setStyleSheet("background-color: #f8f9fa; padding: 15px; border-bottom: 1px solid #dee2e6;") layout = QGridLayout(widget) layout.setSpacing(10) # Define metadata fields to display fields = [ ("File Size", self.metadata.get('file_size', 'Unknown')), ("Duration", self.metadata.get('duration_str', 'Unknown')), ("Sample Rate", self.metadata.get('sample_rate', 'Unknown')), ("Channels", str(self.metadata.get('channels', 'Unknown'))), ("Bit Depth", f"{self.metadata.get('bit_depth', 'Unknown')} bits"), ("Total Samples", "{:,}".format(self.metadata.get('total_samples', 'Unknown'))), ("Bitrate", self.metadata.get('bitrate', 'Unknown')), ] # Create labels for each field for i, (label, value) in enumerate(fields): row = i // 2 col = (i % 2) * 2 # Field label field_label = QLabel(f"{label}:") field_label.setStyleSheet("font-weight: bold; color: #495057; font-size: 11px;") layout.addWidget(field_label, row, col) # Field value value_label = QLabel(str(value)) value_label.setStyleSheet("color: #212529; font-size: 11px; background-color: white; padding: 2px 6px; border-radius: 3px;") layout.addWidget(value_label, row, col + 1) return widget def _init_ui(self): layout = QVBoxLayout(self) layout.setContentsMargins(0, 0, 0, 0) layout.setSpacing(0) # Header with filename self.metadata = self._get_audio_metadata(self.audio_path) if self.audio_path else {} filename = self.metadata.get('filename', 'Unknown Audio File') # Main header with filename header = QLabel(f"Audio Spectrogram Preview - {filename}") header.setAlignment(Qt.AlignCenter) header.setStyleSheet( "QLabel { background-color: #2c3e50; color: white; font-weight: bold; font-size: 14px; padding: 12px; }" ) layout.addWidget(header) # Metadata section if self.audio_path: # Metadata section title metadata_title = QLabel("📊 Audio File Information") metadata_title.setStyleSheet( "QLabel { color: #2c3e50; font-weight: bold; font-size: 13px; padding: 8px; background-color: #ecf0f1; }" ) layout.addWidget(metadata_title) if 'error' not in self.metadata: metadata_widget = self._create_metadata_widget() layout.addWidget(metadata_widget) else: # Show error message if metadata extraction failed error_label = QLabel(f"Could not read audio file: {self.metadata['error']}") error_label.setStyleSheet("color: #e74c3c; background-color: #fdf2f2; padding: 10px; border: 1px solid #f5c6cb;") error_label.setAlignment(Qt.AlignCenter) layout.addWidget(error_label) # Spectrogram section spectrogram_title = QLabel("🔍 Spectrogram Analysis") spectrogram_title.setStyleSheet( "QLabel { color: #2c3e50; font-weight: bold; font-size: 13px; padding: 8px; background-color: #ecf0f1; }" ) layout.addWidget(spectrogram_title) # Spectrogram (enlarged) self.spectrogram_label = QLabel() self.spectrogram_label.setAlignment(Qt.AlignCenter) self.spectrogram_label.setStyleSheet("background-color: #2c3e50; padding: 10px;") spec_scaled = self.spectrogram_pixmap.scaled(1100, 500, Qt.KeepAspectRatio, Qt.SmoothTransformation) self.spectrogram_label.setPixmap(spec_scaled) layout.addWidget(self.spectrogram_label) # Waveform section waveform_title = QLabel("📈 Waveform Visualization") waveform_title.setStyleSheet( "QLabel { color: #2c3e50; font-weight: bold; font-size: 13px; padding: 8px; background-color: #ecf0f1; }" ) layout.addWidget(waveform_title) # Waveform area self.waveform_label = QLabel() self.waveform_label.setAlignment(Qt.AlignCenter) self.waveform_label.setStyleSheet("background-color: #ffffff; padding: 10px; border-top: 1px solid #ecf0f1;") layout.addWidget(self.waveform_label) # Controls section controls_title = QLabel("🎵 Audio Playback Controls") controls_title.setStyleSheet( "QLabel { color: #2c3e50; font-weight: bold; font-size: 13px; padding: 8px; background-color: #ecf0f1; }" ) layout.addWidget(controls_title) # Controls controls = QHBoxLayout() controls.setContentsMargins(10, 10, 10, 10) controls.setSpacing(10) self.play_btn = QPushButton("Play") self.play_btn.setFixedHeight(30) self.play_btn.setFont(QFont("Arial", 10, QFont.Bold)) self.play_btn.setStyleSheet("QPushButton { background-color: #27ae60; color: white; border: none; padding: 6px 12px; }") self.play_btn.clicked.connect(self._toggle_play) controls.addWidget(self.play_btn) self.position_slider = QSlider(Qt.Horizontal) self.position_slider.setRange(0, 0) self.position_slider.sliderMoved.connect(self._set_position) controls.addWidget(self.position_slider, 1) layout.addLayout(controls) # Footer close_btn = QPushButton("Close (ESC)") close_btn.setFixedHeight(28) close_btn.clicked.connect(self.accept) layout.addWidget(close_btn) # Load waveform and connect player if self.audio_path: self._render_waveform(self.audio_path) if self.player is not None: self.player.positionChanged.connect(self._on_position_changed) self.player.durationChanged.connect(self._on_duration_changed) # Size self.resize(1200, 900) def _render_waveform(self, path: str): try: sr, data = wavfile.read(path) if data.ndim > 1: data = data.mean(axis=1) # Normalize for plotting data = data.astype(np.float64) if np.max(np.abs(data)) > 0: data = data / np.max(np.abs(data)) # Create matplotlib figure fig = Figure(figsize=(11, 2.8), dpi=100) ax = fig.add_subplot(111) times = np.linspace(0, len(data) / sr, num=len(data)) ax.plot(times, data, color="#3498db", linewidth=0.6) ax.set_xlim(0, times[-1] if len(times) > 0 else 1) ax.set_ylim(-1.05, 1.05) ax.set_xlabel("Time (s)") ax.set_ylabel("Amplitude") ax.grid(True, alpha=0.2) fig.tight_layout() canvas = FigureCanvas(fig) canvas.draw() w, h = fig.get_size_inches() * fig.get_dpi() w, h = int(w), int(h) img = QImage(canvas.buffer_rgba(), w, h, QImage.Format_ARGB32) img = img.rgbSwapped() self.waveform_label.setPixmap(QPixmap(img)) except Exception: # Fallback text if waveform can't be rendered self.waveform_label.setText("Waveform preview unavailable.") def _toggle_play(self): if not self.player: return if self.player.state() == QMediaPlayer.PlayingState: self.player.pause() self.play_btn.setText("Play") else: self.player.play() self.play_btn.setText("Pause") def _set_position(self, position: int): if self.player: self.player.setPosition(position) def _on_position_changed(self, position: int): self.position_slider.setValue(position) def _on_duration_changed(self, duration: int): self.position_slider.setRange(0, duration) def keyPressEvent(self, event): if event.key() == Qt.Key_Escape: self.accept() else: super().keyPressEvent(event)