Spaces:
Sleeping
Sleeping
| """ | |
| Visualization module. | |
| Provides functions to render HTML visualizations of word alignment between reference and hypothesis texts, | |
| and to generate the complete results HTML page with an embedded audio element and progress status. | |
| """ | |
| from itertools import zip_longest | |
| from jiwer import process_words | |
| import hashlib | |
| def render_visualize_jiwer_result_html(ref: str, hyp: str, title: str = "", model_id: str = None) -> str: | |
| """ | |
| Generate an HTML visualization of the alignment between reference and hypothesis texts. | |
| Args: | |
| ref: The reference text. | |
| hyp: The hypothesis (transcribed) text. | |
| title: A title for the evaluation block (e.g., model name). | |
| model_id: A unique identifier for the model (used in word IDs). | |
| Returns: | |
| An HTML string visualizing word-level alignments and error metrics. | |
| """ | |
| # Use the title as model_id if none provided | |
| if model_id is None: | |
| model_id = hashlib.md5(title.encode()).hexdigest()[:8] | |
| # Process word alignment via jiwer | |
| word_output = process_words(ref, hyp) | |
| alignment_chunks = word_output.alignments[0] | |
| columns = [] | |
| ref_position = 0 # This tracks the position in the reference text | |
| for chunk in alignment_chunks: | |
| if chunk.type == "equal": | |
| words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx] | |
| for word in words: | |
| ref_cell = f'<span class="word-item ref-word" data-ref-pos="{ref_position}" data-ref-word="{word}">{word}</span>' | |
| hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{ref_position}" data-ref-word="{word}">{word}</span>' | |
| columns.append((ref_cell, hyp_cell, ref_position)) | |
| ref_position += 1 | |
| elif chunk.type == "delete": | |
| words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx] | |
| for word in words: | |
| ref_cell = f'<span class="word-item ref-word" data-ref-pos="{ref_position}" data-ref-word="{word}">{word}</span>' | |
| hyp_cell = '<span style="background-color: #ffb3d7; padding: 0 4px;"> </span>' | |
| columns.append((ref_cell, hyp_cell, ref_position)) | |
| ref_position += 1 | |
| elif chunk.type == "insert": | |
| words = word_output.hypotheses[0][chunk.hyp_start_idx : chunk.hyp_end_idx] | |
| # For inserted words, they are linked to the previous reference position | |
| # If we're at the beginning, use position 0 | |
| last_ref_pos = max(0, ref_position - 1) if ref_position > 0 else 0 | |
| for word in words: | |
| ref_cell = '<span> </span>' | |
| hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{last_ref_pos}" data-inserted="true" style="background-color: #99f7c8; padding: 0 4px;">{word}</span>' | |
| columns.append((ref_cell, hyp_cell, last_ref_pos)) | |
| # Note: ref_position is NOT incremented for inserts | |
| elif chunk.type == "substitute": | |
| ref_words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx] | |
| hyp_words = word_output.hypotheses[0][chunk.hyp_start_idx : chunk.hyp_end_idx] | |
| for ref_word, hyp_word in zip_longest(ref_words, hyp_words, fillvalue=""): | |
| if ref_word: # Only increment position for actual reference words | |
| ref_cell = f'<span class="word-item ref-word" data-ref-pos="{ref_position}" data-ref-word="{ref_word}" style="background-color: #dddddd;">{ref_word}</span>' | |
| if hyp_word: | |
| hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{ref_position}" data-subst="true" style="background-color: #ffc04d; padding: 0 4px;">{hyp_word}</span>' | |
| else: | |
| hyp_cell = '<span style="background-color: #ffb3d7; padding: 0 4px;"> </span>' | |
| columns.append((ref_cell, hyp_cell, ref_position)) | |
| ref_position += 1 | |
| elif hyp_word: # Extra hypothesis words with no reference pair | |
| # Link to previous reference position | |
| last_ref_pos = max(0, ref_position - 1) | |
| ref_cell = '<span> </span>' | |
| hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{last_ref_pos}" data-inserted="true" style="background-color: #99f7c8; padding: 0 4px;">{hyp_word}</span>' | |
| columns.append((ref_cell, hyp_cell, last_ref_pos)) | |
| # Create HTML visualization | |
| html_blocks = [] | |
| metrics_results_str = f"WER: {word_output.wer * 100:0.04f}%, WIL: {word_output.wil * 100:0.04f}%" | |
| summary_operations_str = f"Subs: {word_output.substitutions}, Dels: {word_output.deletions}, Insrt: {word_output.insertions}" | |
| html_blocks.append( | |
| f"<div dir='ltr' class='model-result' data-model-id='{model_id}' style='font-size: 1.25em; margin-bottom: 10px; display: flex; justify-content: space-between; gap: 1.5em;'>" | |
| f"<div style='flex: 0 0 content;'>{metrics_results_str}</div>" | |
| f"<div>{title}</div>" | |
| f"<div style='flex: 0 0 content;'>{summary_operations_str}</div></div>" | |
| ) | |
| flex_container = f'<div class="word-alignment-container" data-model-id="{model_id}" style="display: flex; flex-wrap: wrap; margin-bottom: 10px;">' | |
| for ref_cell, hyp_cell, ref_pos in columns: | |
| cell_html = ( | |
| f'<div class="word-pair" data-ref-pos="{ref_pos}" style="display: flex; flex-direction: column; align-items: center; border-bottom: 1px solid grey; ' | |
| 'padding-left: 1em; font-family: monospace;">' | |
| f'<div style="text-align: center;">{ref_cell}</div>' | |
| f'<div style="text-align: center;">{hyp_cell}</div>' | |
| '</div>' | |
| ) | |
| flex_container += cell_html | |
| flex_container += '</div>' | |
| html_blocks.append(flex_container) | |
| html_string = f'<div class="model-block" data-model-id="{model_id}" style="background: white; color: black; margin-bottom: 20px;">' + "\n".join(html_blocks) + '</div>' | |
| return html_string | |
| def generate_results_html(dataset_description: str, html_blocks: list, audio_file: str, timestamp: str, progress: tuple = None) -> str: | |
| """ | |
| Generate the complete HTML results page including an audio player, all evaluation blocks, and progress status. | |
| Args: | |
| dataset_description: A string describing the dataset. | |
| html_blocks: A list of HTML strings (one per model evaluation). | |
| audio_file: The filename of the saved audio sample. | |
| timestamp: The timestamp string used in titles. | |
| progress: A tuple (done, total) indicating the number of models evaluated so far. | |
| Returns: | |
| A complete HTML document as a string. | |
| """ | |
| progress_html = "" | |
| auto_scroll_to_bottom_on_load = "" | |
| if progress: | |
| done, total = progress | |
| progress_html = f"<div style='margin-bottom:20px;'><strong>Progress:</strong> {done} of {total} models evaluated.</div>" | |
| if done < total: | |
| auto_scroll_to_bottom_on_load = """ | |
| <script type="text/javascript"> | |
| document.getElementById('results-container').scrollTop = document.getElementById('results-container').scrollHeight; | |
| </script> | |
| """ | |
| refresh_page_control = """ | |
| <button onclick="location.reload();">Refresh Page</button> | |
| """ | |
| audio_element = f""" | |
| <div style="margin-bottom: 20px;"> | |
| <audio controls> | |
| <source src="{audio_file}" type="audio/mp3"> | |
| Your browser does not support the audio element. | |
| </audio> | |
| </div> | |
| """ | |
| # Add JavaScript for reference-based word highlighting with sticky functionality | |
| highlighting_js = """ | |
| <script type="text/javascript"> | |
| document.addEventListener('DOMContentLoaded', function() { | |
| // Track the currently selected reference position | |
| let selectedRefPos = null; | |
| // Helper function to apply highlighting | |
| function highlightPosition(refPos, isSticky = false) { | |
| // Apply highlighting style | |
| const highlightStyle = 'underline'; | |
| // Highlight all elements with the matching reference position | |
| document.querySelectorAll(`.word-item[data-ref-pos="${refPos}"]`).forEach(el => { | |
| el.style.textDecoration = highlightStyle; | |
| el.style.textDecorationThickness = '2px'; | |
| el.style.textDecorationColor = isSticky ? 'red' : 'blue'; | |
| }); | |
| } | |
| // Helper function to remove highlighting | |
| function removeHighlighting(refPos) { | |
| // Don't remove highlighting if this is the selected position | |
| if (refPos === selectedRefPos) return; | |
| document.querySelectorAll(`.word-item[data-ref-pos="${refPos}"]`).forEach(el => { | |
| el.style.textDecoration = 'none'; | |
| }); | |
| } | |
| // Helper function to clear all sticky highlighting | |
| function clearStickyHighlighting() { | |
| if (selectedRefPos !== null) { | |
| document.querySelectorAll(`.word-item[data-ref-pos="${selectedRefPos}"]`).forEach(el => { | |
| el.style.textDecoration = 'none'; | |
| }); | |
| selectedRefPos = null; | |
| } | |
| } | |
| // Use event delegation for all word-alignment-containers | |
| document.querySelectorAll('.word-alignment-container').forEach(container => { | |
| // Mouseover (replaces mouseenter on individual elements) | |
| container.addEventListener('mouseover', function(event) { | |
| const target = event.target.closest('.word-item'); | |
| if (!target) return; | |
| const refPos = target.dataset.refPos; | |
| if (!refPos) return; | |
| highlightPosition(refPos, false); | |
| }); | |
| // Mouseout (replaces mouseleave on individual elements) | |
| container.addEventListener('mouseout', function(event) { | |
| const target = event.target.closest('.word-item'); | |
| if (!target) return; | |
| const refPos = target.dataset.refPos; | |
| if (!refPos) return; | |
| removeHighlighting(refPos); | |
| }); | |
| // Click for sticky highlighting | |
| container.addEventListener('click', function(event) { | |
| const target = event.target.closest('.word-item'); | |
| if (!target) return; | |
| const refPos = target.dataset.refPos; | |
| if (!refPos) return; | |
| // If this position is already selected, clear it | |
| if (selectedRefPos === refPos) { | |
| clearStickyHighlighting(); | |
| } else { | |
| // Clear any existing sticky highlighting | |
| clearStickyHighlighting(); | |
| // Set new selected position | |
| selectedRefPos = refPos; | |
| // Apply sticky highlighting | |
| highlightPosition(refPos, true); | |
| } | |
| }); | |
| }); | |
| // Add a click handler on the document to clear sticky highlighting when clicking elsewhere | |
| document.addEventListener('click', function(e) { | |
| // If the click wasn't on a word item or word pair, clear sticky highlighting | |
| if (!e.target.closest('.word-item') && !e.target.closest('.word-pair') && selectedRefPos !== null) { | |
| clearStickyHighlighting(); | |
| } | |
| }); | |
| }); | |
| </script> | |
| """ | |
| # Add CSS for hover effects | |
| highlighting_css = """ | |
| <style> | |
| .word-item { | |
| cursor: pointer; | |
| transition: all 0.2s; | |
| } | |
| </style> | |
| """ | |
| results_html = f""" | |
| <html dir="rtl" lang="he"> | |
| <head> | |
| <meta charset="utf-8"> | |
| <title>Evaluation Results - {dataset_description} - {timestamp}</title> | |
| {highlighting_css} | |
| </head> | |
| <body> | |
| <h3>Evaluation Results - {dataset_description} - {timestamp}</h3> | |
| {progress_html}{refresh_page_control} | |
| {audio_element} | |
| <div id="results-container" style="max-height: 80vh; overflow-y: auto;"> | |
| {''.join(html_blocks)} | |
| </div> | |
| {highlighting_js} | |
| {auto_scroll_to_bottom_on_load} | |
| </body> | |
| </html> | |
| """ | |
| return results_html |