Spaces:

yoad
/

visualize_eval_results

Sleeping

visualize_eval_results / src /visual_eval /visualization.py

Yoad

First commit with actual logic

2f5cf2f 8 months ago

12.8 kB

	"""
	Visualization module.
	Provides functions to render HTML visualizations of word alignment between reference and hypothesis texts,
	and to generate the complete results HTML page with an embedded audio element and progress status.
	"""

	from itertools import zip_longest
	from jiwer import process_words
	import hashlib

	def render_visualize_jiwer_result_html(ref: str, hyp: str, title: str = "", model_id: str = None) -> str:
	"""
	Generate an HTML visualization of the alignment between reference and hypothesis texts.

	Args:
	ref: The reference text.
	hyp: The hypothesis (transcribed) text.
	title: A title for the evaluation block (e.g., model name).
	model_id: A unique identifier for the model (used in word IDs).

	Returns:
	An HTML string visualizing word-level alignments and error metrics.
	"""
	# Use the title as model_id if none provided
	if model_id is None:
	model_id = hashlib.md5(title.encode()).hexdigest()[:8]

	# Process word alignment via jiwer
	word_output = process_words(ref, hyp)
	alignment_chunks = word_output.alignments[0]

	columns = []
	ref_position = 0 # This tracks the position in the reference text

	for chunk in alignment_chunks:
	if chunk.type == "equal":
	words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx]
	for word in words:
	ref_cell = f'<span class="word-item ref-word" data-ref-pos="{ref_position}" data-ref-word="{word}">{word}</span>'
	hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{ref_position}" data-ref-word="{word}">{word}</span>'
	columns.append((ref_cell, hyp_cell, ref_position))
	ref_position += 1

	elif chunk.type == "delete":
	words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx]
	for word in words:
	ref_cell = f'<span class="word-item ref-word" data-ref-pos="{ref_position}" data-ref-word="{word}">{word}</span>'
	hyp_cell = '<span style="background-color: #ffb3d7; padding: 0 4px;"> </span>'
	columns.append((ref_cell, hyp_cell, ref_position))
	ref_position += 1

	elif chunk.type == "insert":
	words = word_output.hypotheses[0][chunk.hyp_start_idx : chunk.hyp_end_idx]
	# For inserted words, they are linked to the previous reference position
	# If we're at the beginning, use position 0
	last_ref_pos = max(0, ref_position - 1) if ref_position > 0 else 0
	for word in words:
	ref_cell = '<span> </span>'
	hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{last_ref_pos}" data-inserted="true" style="background-color: #99f7c8; padding: 0 4px;">{word}</span>'
	columns.append((ref_cell, hyp_cell, last_ref_pos))
	# Note: ref_position is NOT incremented for inserts

	elif chunk.type == "substitute":
	ref_words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx]
	hyp_words = word_output.hypotheses[0][chunk.hyp_start_idx : chunk.hyp_end_idx]

	for ref_word, hyp_word in zip_longest(ref_words, hyp_words, fillvalue=""):
	if ref_word: # Only increment position for actual reference words
	ref_cell = f'<span class="word-item ref-word" data-ref-pos="{ref_position}" data-ref-word="{ref_word}" style="background-color: #dddddd;">{ref_word}</span>'
	if hyp_word:
	hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{ref_position}" data-subst="true" style="background-color: #ffc04d; padding: 0 4px;">{hyp_word}</span>'
	else:
	hyp_cell = '<span style="background-color: #ffb3d7; padding: 0 4px;"> </span>'
	columns.append((ref_cell, hyp_cell, ref_position))
	ref_position += 1
	elif hyp_word: # Extra hypothesis words with no reference pair
	# Link to previous reference position
	last_ref_pos = max(0, ref_position - 1)
	ref_cell = '<span> </span>'
	hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{last_ref_pos}" data-inserted="true" style="background-color: #99f7c8; padding: 0 4px;">{hyp_word}</span>'
	columns.append((ref_cell, hyp_cell, last_ref_pos))

	# Create HTML visualization
	html_blocks = []
	metrics_results_str = f"WER: {word_output.wer * 100:0.04f}%, WIL: {word_output.wil * 100:0.04f}%"
	summary_operations_str = f"Subs: {word_output.substitutions}, Dels: {word_output.deletions}, Insrt: {word_output.insertions}"

	html_blocks.append(
	f"<div dir='ltr' class='model-result' data-model-id='{model_id}' style='font-size: 1.25em; margin-bottom: 10px; display: flex; justify-content: space-between; gap: 1.5em;'>"
	f"<div style='flex: 0 0 content;'>{metrics_results_str}</div>"
	f"<div>{title}</div>"
	f"<div style='flex: 0 0 content;'>{summary_operations_str}</div></div>"
	)

	flex_container = f'<div class="word-alignment-container" data-model-id="{model_id}" style="display: flex; flex-wrap: wrap; margin-bottom: 10px;">'
	for ref_cell, hyp_cell, ref_pos in columns:
	cell_html = (
	f'<div class="word-pair" data-ref-pos="{ref_pos}" style="display: flex; flex-direction: column; align-items: center; border-bottom: 1px solid grey; '
	'padding-left: 1em; font-family: monospace;">'
	f'<div style="text-align: center;">{ref_cell}</div>'
	f'<div style="text-align: center;">{hyp_cell}</div>'
	'</div>'
	)
	flex_container += cell_html
	flex_container += '</div>'
	html_blocks.append(flex_container)

	html_string = f'<div class="model-block" data-model-id="{model_id}" style="background: white; color: black; margin-bottom: 20px;">' + "\n".join(html_blocks) + '</div>'

	return html_string

	def generate_results_html(dataset_description: str, html_blocks: list, audio_file: str, timestamp: str, progress: tuple = None) -> str:
	"""
	Generate the complete HTML results page including an audio player, all evaluation blocks, and progress status.

	Args:
	dataset_description: A string describing the dataset.
	html_blocks: A list of HTML strings (one per model evaluation).
	audio_file: The filename of the saved audio sample.
	timestamp: The timestamp string used in titles.
	progress: A tuple (done, total) indicating the number of models evaluated so far.

	Returns:
	A complete HTML document as a string.
	"""
	progress_html = ""
	auto_scroll_to_bottom_on_load = ""
	if progress:
	done, total = progress
	progress_html = f"<div style='margin-bottom:20px;'><strong>Progress:</strong> {done} of {total} models evaluated.</div>"
	if done < total:
	auto_scroll_to_bottom_on_load = """
	<script type="text/javascript">
	document.getElementById('results-container').scrollTop = document.getElementById('results-container').scrollHeight;
	</script>
	"""

	refresh_page_control = """
	<button onclick="location.reload();">Refresh Page</button>
	"""
	audio_element = f"""
	<div style="margin-bottom: 20px;">
	<audio controls>
	<source src="{audio_file}" type="audio/mp3">
	Your browser does not support the audio element.
	</audio>
	</div>
	"""

	# Add JavaScript for reference-based word highlighting with sticky functionality
	highlighting_js = """
	<script type="text/javascript">
	document.addEventListener('DOMContentLoaded', function() {
	// Track the currently selected reference position
	let selectedRefPos = null;

	// Helper function to apply highlighting
	function highlightPosition(refPos, isSticky = false) {
	// Apply highlighting style
	const highlightStyle = 'underline';

	// Highlight all elements with the matching reference position
	document.querySelectorAll(`.word-item[data-ref-pos="${refPos}"]`).forEach(el => {
	el.style.textDecoration = highlightStyle;
	el.style.textDecorationThickness = '2px';
	el.style.textDecorationColor = isSticky ? 'red' : 'blue';
	});
	}

	// Helper function to remove highlighting
	function removeHighlighting(refPos) {
	// Don't remove highlighting if this is the selected position
	if (refPos === selectedRefPos) return;

	document.querySelectorAll(`.word-item[data-ref-pos="${refPos}"]`).forEach(el => {
	el.style.textDecoration = 'none';
	});
	}

	// Helper function to clear all sticky highlighting
	function clearStickyHighlighting() {
	if (selectedRefPos !== null) {
	document.querySelectorAll(`.word-item[data-ref-pos="${selectedRefPos}"]`).forEach(el => {
	el.style.textDecoration = 'none';
	});

	selectedRefPos = null;
	}
	}

	// Use event delegation for all word-alignment-containers
	document.querySelectorAll('.word-alignment-container').forEach(container => {
	// Mouseover (replaces mouseenter on individual elements)
	container.addEventListener('mouseover', function(event) {
	const target = event.target.closest('.word-item');
	if (!target) return;

	const refPos = target.dataset.refPos;
	if (!refPos) return;

	highlightPosition(refPos, false);
	});

	// Mouseout (replaces mouseleave on individual elements)
	container.addEventListener('mouseout', function(event) {
	const target = event.target.closest('.word-item');
	if (!target) return;

	const refPos = target.dataset.refPos;
	if (!refPos) return;

	removeHighlighting(refPos);
	});

	// Click for sticky highlighting
	container.addEventListener('click', function(event) {
	const target = event.target.closest('.word-item');
	if (!target) return;

	const refPos = target.dataset.refPos;
	if (!refPos) return;

	// If this position is already selected, clear it
	if (selectedRefPos === refPos) {
	clearStickyHighlighting();
	} else {
	// Clear any existing sticky highlighting
	clearStickyHighlighting();

	// Set new selected position
	selectedRefPos = refPos;

	// Apply sticky highlighting
	highlightPosition(refPos, true);
	}
	});
	});

	// Add a click handler on the document to clear sticky highlighting when clicking elsewhere
	document.addEventListener('click', function(e) {
	// If the click wasn't on a word item or word pair, clear sticky highlighting
	if (!e.target.closest('.word-item') && !e.target.closest('.word-pair') && selectedRefPos !== null) {
	clearStickyHighlighting();
	}
	});
	});
	</script>
	"""

	# Add CSS for hover effects
	highlighting_css = """
	<style>
	.word-item {
	cursor: pointer;
	transition: all 0.2s;
	}
	</style>
	"""

	results_html = f"""
	<html dir="rtl" lang="he">
	<head>
	<meta charset="utf-8">
	<title>Evaluation Results - {dataset_description} - {timestamp}</title>
	{highlighting_css}
	</head>
	<body>
	<h3>Evaluation Results - {dataset_description} - {timestamp}</h3>
	{progress_html}{refresh_page_control}
	{audio_element}
	<div id="results-container" style="max-height: 80vh; overflow-y: auto;">
	{''.join(html_blocks)}
	</div>
	{highlighting_js}
	{auto_scroll_to_bottom_on_load}
	</body>
	</html>
	"""
	return results_html