Coverage for src/ollamapy/vibe_report.py: 10%
178 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-01 12:29 -0400
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-01 12:29 -0400
1"""Visual report generation for vibe test results using Plotly with timing analysis."""
3from typing import Dict, List, Any
4from datetime import datetime
5import plotly.graph_objects as go
6from plotly.subplots import make_subplots
7import plotly.io as pio
10class VibeTestReportGenerator:
11 """Generates HTML reports with Plotly visualizations for vibe test results including timing analysis."""
13 def __init__(self, model: str, analysis_model: str):
14 """Initialize the report generator.
16 Args:
17 model: The chat model used for testing
18 analysis_model: The analysis model used for testing
19 """
20 self.model = model
21 self.analysis_model = analysis_model
22 self.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
24 def create_action_success_chart(self, action_name: str, results: Dict) -> str:
25 """Create a bar chart showing success rate for each phrase of an action.
27 Args:
28 action_name: Name of the action
29 results: Test results for the action
31 Returns:
32 HTML div containing the Plotly chart
33 """
34 phrases = []
35 success_rates = []
36 colors = []
38 for phrase, data in results["phrase_results"].items():
39 # Truncate long phrases for display
40 display_phrase = phrase[:40] + "..." if len(phrase) > 40 else phrase
41 phrases.append(display_phrase)
42 success_rates.append(data["success_rate"])
43 # Color based on success rate
44 if data["success_rate"] >= 80:
45 colors.append("green")
46 elif data["success_rate"] >= 60:
47 colors.append("yellow")
48 else:
49 colors.append("red")
51 fig = go.Figure(
52 data=[
53 go.Bar(
54 x=phrases,
55 y=success_rates,
56 marker_color=colors,
57 text=[f"{rate:.1f}%" for rate in success_rates],
58 textposition="outside",
59 hovertemplate="<b>%{x}</b><br>Success Rate: %{y:.1f}%<extra></extra>",
60 )
61 ]
62 )
64 fig.update_layout(
65 title=f"{action_name} - Success Rate by Phrase",
66 xaxis_title="Test Phrase",
67 yaxis_title="Success Rate (%)",
68 yaxis_range=[0, 110],
69 showlegend=False,
70 height=400,
71 margin=dict(b=100),
72 xaxis_tickangle=-45,
73 )
75 # Convert to HTML div
76 return fig.to_html(
77 full_html=False,
78 include_plotlyjs=False,
79 div_id=f"success-{action_name.replace(' ', '-')}",
80 )
82 def create_timing_performance_chart(self, action_name: str, results: Dict) -> str:
83 """Create a combined chart showing timing performance for each phrase.
85 Args:
86 action_name: Name of the action
87 results: Test results for the action
89 Returns:
90 HTML div containing the Plotly chart
91 """
92 phrases = []
93 avg_times = []
94 consistency_scores = []
95 colors_time = []
96 colors_consistency = []
98 for phrase, data in results["phrase_results"].items():
99 display_phrase = phrase[:30] + "..." if len(phrase) > 30 else phrase
100 phrases.append(display_phrase)
102 timing_stats = data["timing_stats"]
103 avg_times.append(timing_stats["mean"])
104 consistency_scores.append(timing_stats["consistency_score"])
106 # Color coding for average time
107 if timing_stats["mean"] < 1.0:
108 colors_time.append("green")
109 elif timing_stats["mean"] < 3.0:
110 colors_time.append("yellow")
111 else:
112 colors_time.append("red")
114 # Color coding for consistency
115 if timing_stats["consistency_score"] >= 80:
116 colors_consistency.append("green")
117 elif timing_stats["consistency_score"] >= 60:
118 colors_consistency.append("yellow")
119 else:
120 colors_consistency.append("red")
122 # Create subplot with secondary y-axis
123 fig = make_subplots(specs=[[{"secondary_y": True}]])
125 # Add average time bars
126 fig.add_trace(
127 go.Bar(
128 name="Average Time",
129 x=phrases,
130 y=avg_times,
131 marker_color=colors_time,
132 text=[f"{time:.2f}s" for time in avg_times],
133 textposition="outside",
134 hovertemplate="<b>%{x}</b><br>Average Time: %{y:.2f}s<extra></extra>",
135 yaxis="y",
136 ),
137 secondary_y=False,
138 )
140 # Add consistency line
141 fig.add_trace(
142 go.Scatter(
143 name="Consistency Score",
144 x=phrases,
145 y=consistency_scores,
146 mode="lines+markers",
147 line=dict(color="purple", width=3),
148 marker=dict(
149 size=8, color=colors_consistency, line=dict(color="purple", width=2)
150 ),
151 text=[f"{score:.1f}" for score in consistency_scores],
152 textposition="top center",
153 hovertemplate="<b>%{x}</b><br>Consistency: %{y:.1f}/100<extra></extra>",
154 yaxis="y2",
155 ),
156 secondary_y=True,
157 )
159 # Set x-axis title
160 fig.update_xaxes(title_text="Test Phrase", tickangle=-45)
162 # Set y-axes titles
163 fig.update_yaxes(title_text="Average Time (seconds)", secondary_y=False)
164 fig.update_yaxes(
165 title_text="Consistency Score (0-100)", secondary_y=True, range=[0, 110]
166 )
168 fig.update_layout(
169 title=f"{action_name} - Timing Performance Analysis",
170 height=500,
171 margin=dict(b=100),
172 legend=dict(
173 orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1
174 ),
175 )
177 return fig.to_html(
178 full_html=False,
179 include_plotlyjs=False,
180 div_id=f"timing-{action_name.replace(' ', '-')}",
181 )
183 def create_secondary_actions_chart(self, action_name: str, results: Dict) -> str:
184 """Create a grouped bar chart showing secondary actions triggered for each phrase.
186 Args:
187 action_name: Name of the action
188 results: Test results for the action
190 Returns:
191 HTML div containing the Plotly chart
192 """
193 # Collect all unique secondary actions across all phrases
194 all_secondary_actions = set()
195 for phrase_data in results["phrase_results"].values():
196 all_secondary_actions.update(phrase_data["secondary_action_counts"].keys())
198 if not all_secondary_actions:
199 # No secondary actions triggered - create an empty chart with message
200 fig = go.Figure()
201 fig.add_annotation(
202 text="No secondary actions were triggered for any test phrase",
203 xref="paper",
204 yref="paper",
205 x=0.5,
206 y=0.5,
207 showarrow=False,
208 font=dict(size=14),
209 )
210 fig.update_layout(
211 title=f"{action_name} - Secondary Actions Triggered",
212 height=400,
213 xaxis=dict(visible=False),
214 yaxis=dict(visible=False),
215 )
216 return fig.to_html(
217 full_html=False,
218 include_plotlyjs=False,
219 div_id=f"secondary-{action_name.replace(' ', '-')}",
220 )
222 # Prepare data for grouped bar chart
223 phrases = []
224 traces = []
226 for phrase, data in results["phrase_results"].items():
227 display_phrase = phrase[:30] + "..." if len(phrase) > 30 else phrase
228 phrases.append(display_phrase)
230 # Create a trace for each secondary action
231 for secondary_action in sorted(all_secondary_actions):
232 counts = []
233 for phrase_data in results["phrase_results"].values():
234 count = phrase_data["secondary_action_counts"].get(secondary_action, 0)
235 total = phrase_data["total"]
236 # Store as percentage
237 percentage = (count / total * 100) if total > 0 else 0
238 counts.append(percentage)
240 traces.append(
241 go.Bar(
242 name=secondary_action,
243 x=phrases,
244 y=counts,
245 text=[f"{c:.0f}%" if c > 0 else "" for c in counts],
246 textposition="outside",
247 hovertemplate="<b>%{x}</b><br>"
248 + f"{secondary_action}: "
249 + "%{y:.1f}%<extra></extra>",
250 )
251 )
253 fig = go.Figure(data=traces)
255 fig.update_layout(
256 title=f"{action_name} - Secondary Actions Triggered by Phrase",
257 xaxis_title="Test Phrase",
258 yaxis_title="Trigger Rate (%)",
259 barmode="group",
260 height=500,
261 margin=dict(b=100),
262 xaxis_tickangle=-45,
263 legend=dict(
264 orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1
265 ),
266 )
268 return fig.to_html(
269 full_html=False,
270 include_plotlyjs=False,
271 div_id=f"secondary-{action_name.replace(' ', '-')}",
272 )
274 def create_overall_summary_chart(self, test_results: Dict) -> str:
275 """Create an overall summary chart showing all actions' performance.
277 Args:
278 test_results: All test results
280 Returns:
281 HTML div containing the Plotly chart
282 """
283 action_names = []
284 success_rates = []
285 avg_times = []
286 colors = []
288 for action_name, test_data in test_results.items():
289 action_names.append(action_name)
290 rate = test_data["results"]["success_rate"]
291 avg_time = test_data["results"]["overall_timing_stats"]["mean"]
292 success_rates.append(rate)
293 avg_times.append(avg_time)
295 # Color based on pass/fail
296 if rate >= 60:
297 colors.append("green")
298 else:
299 colors.append("red")
301 # Create subplot with secondary y-axis for timing
302 fig = make_subplots(specs=[[{"secondary_y": True}]])
304 # Add success rate bars
305 fig.add_trace(
306 go.Bar(
307 name="Success Rate",
308 x=action_names,
309 y=success_rates,
310 marker_color=colors,
311 text=[f"{rate:.1f}%" for rate in success_rates],
312 textposition="outside",
313 hovertemplate="<b>%{x}</b><br>Success Rate: %{y:.1f}%<extra></extra>",
314 yaxis="y",
315 ),
316 secondary_y=False,
317 )
319 # Add average time line
320 fig.add_trace(
321 go.Scatter(
322 name="Average Time",
323 x=action_names,
324 y=avg_times,
325 mode="lines+markers",
326 line=dict(color="purple", width=3),
327 marker=dict(size=10, color="purple"),
328 text=[f"{time:.2f}s" for time in avg_times],
329 textposition="top center",
330 hovertemplate="<b>%{x}</b><br>Average Time: %{y:.2f}s<extra></extra>",
331 yaxis="y2",
332 ),
333 secondary_y=True,
334 )
336 # Add pass threshold line for success rate
337 fig.add_hline(
338 y=60,
339 line_dash="dash",
340 line_color="orange",
341 annotation_text="Pass Threshold (60%)",
342 secondary_y=False,
343 )
345 # Set x-axis title
346 fig.update_xaxes(title_text="Action", tickangle=-45)
348 # Set y-axes titles
349 fig.update_yaxes(
350 title_text="Success Rate (%)", range=[0, 110], secondary_y=False
351 )
352 fig.update_yaxes(title_text="Average Time (seconds)", secondary_y=True)
354 fig.update_layout(
355 title="Overall Vibe Test Results - Success Rate & Performance",
356 height=500,
357 legend=dict(
358 orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1
359 ),
360 )
362 return fig.to_html(
363 full_html=False, include_plotlyjs=False, div_id="overall-summary"
364 )
366 def create_performance_comparison_chart(self, test_results: Dict) -> str:
367 """Create a scatter plot comparing consistency vs speed for all actions.
369 Args:
370 test_results: All test results
372 Returns:
373 HTML div containing the Plotly chart
374 """
375 action_names = []
376 avg_times = []
377 consistency_scores = []
378 success_rates = []
379 colors = []
380 sizes = []
382 for action_name, test_data in test_results.items():
383 timing_stats = test_data["results"]["overall_timing_stats"]
384 action_names.append(action_name)
385 avg_times.append(timing_stats["mean"])
386 consistency_scores.append(timing_stats["consistency_score"])
387 success_rate = test_data["results"]["success_rate"]
388 success_rates.append(success_rate)
390 # Color based on success rate
391 if success_rate >= 80:
392 colors.append("green")
393 elif success_rate >= 60:
394 colors.append("yellow")
395 else:
396 colors.append("red")
398 # Size based on success rate (larger = better)
399 sizes.append(max(10, success_rate / 2))
401 fig = go.Figure(
402 data=go.Scatter(
403 x=avg_times,
404 y=consistency_scores,
405 mode="markers+text",
406 marker=dict(
407 size=sizes,
408 color=colors,
409 line=dict(color="black", width=1),
410 opacity=0.8,
411 ),
412 text=action_names,
413 textposition="top center",
414 customdata=success_rates,
415 hovertemplate="<b>%{text}</b><br>"
416 + "Average Time: %{x:.2f}s<br>"
417 + "Consistency: %{y:.1f}/100<br>"
418 + "Success Rate: %{customdata:.1f}%"
419 + "<extra></extra>",
420 name="",
421 )
422 )
424 # Add quadrant lines
425 fig.add_vline(
426 x=2.0,
427 line_dash="dash",
428 line_color="gray",
429 annotation_text="2s threshold",
430 annotation_position="top",
431 )
432 fig.add_hline(
433 y=80,
434 line_dash="dash",
435 line_color="gray",
436 annotation_text="High Consistency (80+)",
437 annotation_position="right",
438 )
440 fig.update_layout(
441 title="Performance Comparison: Speed vs Consistency",
442 xaxis_title="Average Time (seconds)",
443 yaxis_title="Consistency Score (0-100)",
444 xaxis=dict(range=[0, max(avg_times) * 1.1]),
445 yaxis=dict(range=[0, 105]),
446 height=500,
447 showlegend=False,
448 annotations=[
449 dict(
450 text="Fast & Consistent<br>(Ideal)",
451 x=0.5,
452 y=95,
453 showarrow=False,
454 font=dict(size=12, color="green"),
455 bgcolor="rgba(0,255,0,0.1)",
456 bordercolor="green",
457 borderwidth=1,
458 ),
459 dict(
460 text="Slow but Consistent",
461 x=max(avg_times) * 0.8,
462 y=95,
463 showarrow=False,
464 font=dict(size=12, color="orange"),
465 bgcolor="rgba(255,165,0,0.1)",
466 bordercolor="orange",
467 borderwidth=1,
468 ),
469 dict(
470 text="Fast but Inconsistent",
471 x=0.5,
472 y=20,
473 showarrow=False,
474 font=dict(size=12, color="orange"),
475 bgcolor="rgba(255,165,0,0.1)",
476 bordercolor="orange",
477 borderwidth=1,
478 ),
479 dict(
480 text="Slow & Inconsistent<br>(Needs Work)",
481 x=max(avg_times) * 0.8,
482 y=20,
483 showarrow=False,
484 font=dict(size=12, color="red"),
485 bgcolor="rgba(255,0,0,0.1)",
486 bordercolor="red",
487 borderwidth=1,
488 ),
489 ],
490 )
492 return fig.to_html(
493 full_html=False, include_plotlyjs=False, div_id="performance-comparison"
494 )
496 def generate_html_header(self) -> str:
497 """Generate the HTML header with styles and scripts.
499 Returns:
500 HTML header string
501 """
502 return f"""<!DOCTYPE html>
503<html lang="en">
504<head>
505 <meta charset="utf-8">
506 <meta name="viewport" content="width=device-width, initial-scale=1.0">
507 <title>Vibe Test Report - {self.timestamp}</title>
508 <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
509 <style>
510 body {{
511 font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif;
512 margin: 0;
513 padding: 20px;
514 background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
515 min-height: 100vh;
516 }}
517 .container {{
518 max-width: 1400px;
519 margin: 0 auto;
520 background: white;
521 border-radius: 15px;
522 box-shadow: 0 20px 60px rgba(0,0,0,0.3);
523 padding: 40px;
524 }}
525 h1 {{
526 color: #333;
527 text-align: center;
528 font-size: 2.5em;
529 margin-bottom: 10px;
530 }}
531 .subtitle {{
532 text-align: center;
533 color: #666;
534 margin-bottom: 30px;
535 }}
536 .model-info {{
537 background: #f8f9fa;
538 border-radius: 10px;
539 padding: 20px;
540 margin-bottom: 30px;
541 }}
542 .model-info h3 {{
543 margin-top: 0;
544 color: #495057;
545 }}
546 .model-detail {{
547 display: flex;
548 justify-content: space-between;
549 margin: 10px 0;
550 }}
551 .model-label {{
552 font-weight: 600;
553 color: #6c757d;
554 }}
555 .action-section {{
556 margin: 40px 0;
557 padding: 30px;
558 background: #f8f9fa;
559 border-radius: 10px;
560 border-left: 5px solid #667eea;
561 }}
562 .action-header {{
563 margin-bottom: 20px;
564 }}
565 .action-name {{
566 font-size: 1.8em;
567 color: #333;
568 margin-bottom: 10px;
569 }}
570 .action-description {{
571 color: #666;
572 font-style: italic;
573 margin-bottom: 10px;
574 }}
575 .action-stats {{
576 display: flex;
577 gap: 20px;
578 margin-top: 15px;
579 flex-wrap: wrap;
580 }}
581 .stat-box {{
582 background: white;
583 padding: 10px 15px;
584 border-radius: 8px;
585 box-shadow: 0 2px 4px rgba(0,0,0,0.1);
586 }}
587 .stat-label {{
588 font-size: 0.9em;
589 color: #6c757d;
590 }}
591 .stat-value {{
592 font-size: 1.5em;
593 font-weight: bold;
594 color: #333;
595 }}
596 .pass {{
597 color: #28a745;
598 }}
599 .fail {{
600 color: #dc3545;
601 }}
602 .chart-container {{
603 margin: 20px 0;
604 }}
605 .summary-section {{
606 margin-top: 40px;
607 padding: 30px;
608 background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
609 border-radius: 10px;
610 color: white;
611 }}
612 .summary-title {{
613 font-size: 2em;
614 margin-bottom: 20px;
615 text-align: center;
616 }}
617 .summary-stats {{
618 display: flex;
619 justify-content: space-around;
620 flex-wrap: wrap;
621 gap: 20px;
622 }}
623 .footer {{
624 text-align: center;
625 margin-top: 40px;
626 padding-top: 20px;
627 border-top: 2px solid #e9ecef;
628 color: #6c757d;
629 }}
630 .timing-highlight {{
631 background: #e3f2fd;
632 border-left: 4px solid #2196f3;
633 padding: 15px;
634 margin: 10px 0;
635 border-radius: 5px;
636 }}
637 </style>
638</head>
639<body>
640 <div class="container">
641 <h1>🧪 Vibe Test Report</h1>
642 <div class="subtitle">AI Decision-Making Consistency & Performance Analysis</div>
643 <div class="subtitle">Generated: {self.timestamp}</div>
645 <div class="model-info">
646 <h3>Test Configuration</h3>
647 <div class="model-detail">
648 <span class="model-label">Chat Model:</span>
649 <span>{self.model}</span>
650 </div>
651 <div class="model-detail">
652 <span class="model-label">Analysis Model:</span>
653 <span>{self.analysis_model}</span>
654 </div>
655 <div class="model-detail">
656 <span class="model-label">Test Mode:</span>
657 <span>Multi-action selection with timing analysis</span>
658 </div>
659 </div>
660"""
662 def generate_action_section(self, action_name: str, test_data: Dict) -> str:
663 """Generate HTML for a single action's results.
665 Args:
666 action_name: Name of the action
667 test_data: Test data for the action
669 Returns:
670 HTML string for the action section
671 """
672 results = test_data["results"]
673 passed = test_data["passed"]
674 status_icon = "✅" if passed else "❌"
675 pass_class = "pass" if passed else "fail"
677 # Get timing stats
678 timing_stats = results["overall_timing_stats"]
680 # Generate charts
681 success_chart = self.create_action_success_chart(action_name, results)
682 timing_chart = self.create_timing_performance_chart(action_name, results)
683 secondary_chart = self.create_secondary_actions_chart(action_name, results)
685 return f"""
686 <div class="action-section">
687 <div class="action-header">
688 <div class="action-name">{action_name} {status_icon}</div>
689 <div class="action-description">{results['action_description']}</div>
690 <div class="action-stats">
691 <div class="stat-box">
692 <div class="stat-label">Overall Success Rate</div>
693 <div class="stat-value {pass_class}">{results['success_rate']:.1f}%</div>
694 </div>
695 <div class="stat-box">
696 <div class="stat-label">Tests Passed</div>
697 <div class="stat-value">{results['total_correct']}/{results['total_tests']}</div>
698 </div>
699 <div class="stat-box">
700 <div class="stat-label">Average Time</div>
701 <div class="stat-value">{timing_stats['mean']:.2f}s</div>
702 </div>
703 <div class="stat-box">
704 <div class="stat-label">Performance</div>
705 <div class="stat-value">{timing_stats['performance_category']}</div>
706 </div>
707 <div class="stat-box">
708 <div class="stat-label">Consistency</div>
709 <div class="stat-value">{timing_stats['consistency_score']:.1f}/100</div>
710 </div>
711 <div class="stat-box">
712 <div class="stat-label">Status</div>
713 <div class="stat-value {pass_class}">{'PASS' if passed else 'FAIL'}</div>
714 </div>
715 </div>
716 <div class="timing-highlight">
717 <strong>⏱️ Timing Analysis:</strong>
718 Range: {timing_stats['min']:.2f}s - {timing_stats['max']:.2f}s |
719 Median: {timing_stats['median']:.2f}s |
720 95th percentile: {timing_stats['p95']:.2f}s
721 </div>
722 </div>
723 <div class="chart-container">
724 {success_chart}
725 </div>
726 <div class="chart-container">
727 {timing_chart}
728 </div>
729 <div class="chart-container">
730 {secondary_chart}
731 </div>
732 </div>
733"""
735 def generate_summary_section(self, test_results: Dict) -> str:
736 """Generate the summary section of the report.
738 Args:
739 test_results: All test results
741 Returns:
742 HTML string for the summary section
743 """
744 total_actions = len(test_results)
745 passed_actions = sum(1 for data in test_results.values() if data["passed"])
746 failed_actions = total_actions - passed_actions
747 all_passed = passed_actions == total_actions
749 # Calculate overall timing stats
750 all_times = []
751 for test_data in test_results.values():
752 all_times.extend(test_data["results"]["overall_timing_stats"]["raw_times"])
754 if all_times:
755 avg_overall_time = sum(all_times) / len(all_times)
756 fastest_overall = min(all_times)
757 slowest_overall = max(all_times)
758 else:
759 avg_overall_time = fastest_overall = slowest_overall = 0.0
761 return f"""
762 <div class="summary-section">
763 <div class="summary-title">Test Summary</div>
764 <div class="summary-stats">
765 <div class="stat-box" style="background: rgba(255,255,255,0.9); color: #333;">
766 <div class="stat-label">Total Actions Tested</div>
767 <div class="stat-value">{total_actions}</div>
768 </div>
769 <div class="stat-box" style="background: rgba(255,255,255,0.9); color: #333;">
770 <div class="stat-label">Actions Passed</div>
771 <div class="stat-value pass">{passed_actions}</div>
772 </div>
773 <div class="stat-box" style="background: rgba(255,255,255,0.9); color: #333;">
774 <div class="stat-label">Actions Failed</div>
775 <div class="stat-value fail">{failed_actions}</div>
776 </div>
777 <div class="stat-box" style="background: rgba(255,255,255,0.9); color: #333;">
778 <div class="stat-label">Overall Result</div>
779 <div class="stat-value {'pass' if all_passed else 'fail'}">
780 {'ALL PASS' if all_passed else f'{passed_actions}/{total_actions} PASS'}
781 </div>
782 </div>
783 <div class="stat-box" style="background: rgba(255,255,255,0.9); color: #333;">
784 <div class="stat-label">Average Response Time</div>
785 <div class="stat-value">{avg_overall_time:.2f}s</div>
786 </div>
787 <div class="stat-box" style="background: rgba(255,255,255,0.9); color: #333;">
788 <div class="stat-label">Response Range</div>
789 <div class="stat-value">{fastest_overall:.2f}s - {slowest_overall:.2f}s</div>
790 </div>
791 </div>
792 </div>
793"""
795 def generate_footer(self) -> str:
796 """Generate the HTML footer.
798 Returns:
799 HTML footer string
800 """
801 return f"""
802 <div class="footer">
803 <p>Report generated by OllamaPy Vibe Test Runner with Timing Analysis</p>
804 <p>Models: {self.model} (chat) | {self.analysis_model} (analysis)</p>
805 <p>Timing measurements include full action selection pipeline analysis</p>
806 </div>
807 </div>
808</body>
809</html>
810"""
812 def generate_full_report(self, test_results: Dict) -> str:
813 """Generate the complete HTML report.
815 Args:
816 test_results: All test results
818 Returns:
819 Complete HTML report as a string
820 """
821 # Start with header
822 html_parts = [self.generate_html_header()]
824 # Add overall summary chart
825 html_parts.append('<div class="chart-container">')
826 html_parts.append(self.create_overall_summary_chart(test_results))
827 html_parts.append("</div>")
829 # Add performance comparison chart
830 html_parts.append('<div class="chart-container">')
831 html_parts.append(self.create_performance_comparison_chart(test_results))
832 html_parts.append("</div>")
834 # Add each action section
835 for action_name, test_data in test_results.items():
836 html_parts.append(self.generate_action_section(action_name, test_data))
838 # Add summary section
839 html_parts.append(self.generate_summary_section(test_results))
841 # Add footer
842 html_parts.append(self.generate_footer())
844 return "".join(html_parts)
846 def save_report(self, test_results: Dict, filename: str = None) -> str:
847 """Save the HTML report to a file.
849 Args:
850 test_results: All test results
851 filename: Optional filename (defaults to timestamped name)
853 Returns:
854 The filename where the report was saved
855 """
856 if filename is None:
857 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
858 filename = f"vibe_test_report_{timestamp}.html"
860 html_content = self.generate_full_report(test_results)
862 with open(filename, "w", encoding="utf-8") as f:
863 f.write(html_content)
865 return filename