Coverage for src/ollamapy/vibe_report.py: 10%

178 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-01 12:29 -0400

1"""Visual report generation for vibe test results using Plotly with timing analysis.""" 

2 

3from typing import Dict, List, Any 

4from datetime import datetime 

5import plotly.graph_objects as go 

6from plotly.subplots import make_subplots 

7import plotly.io as pio 

8 

9 

10class VibeTestReportGenerator: 

11 """Generates HTML reports with Plotly visualizations for vibe test results including timing analysis.""" 

12 

13 def __init__(self, model: str, analysis_model: str): 

14 """Initialize the report generator. 

15 

16 Args: 

17 model: The chat model used for testing 

18 analysis_model: The analysis model used for testing 

19 """ 

20 self.model = model 

21 self.analysis_model = analysis_model 

22 self.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 

23 

24 def create_action_success_chart(self, action_name: str, results: Dict) -> str: 

25 """Create a bar chart showing success rate for each phrase of an action. 

26 

27 Args: 

28 action_name: Name of the action 

29 results: Test results for the action 

30 

31 Returns: 

32 HTML div containing the Plotly chart 

33 """ 

34 phrases = [] 

35 success_rates = [] 

36 colors = [] 

37 

38 for phrase, data in results["phrase_results"].items(): 

39 # Truncate long phrases for display 

40 display_phrase = phrase[:40] + "..." if len(phrase) > 40 else phrase 

41 phrases.append(display_phrase) 

42 success_rates.append(data["success_rate"]) 

43 # Color based on success rate 

44 if data["success_rate"] >= 80: 

45 colors.append("green") 

46 elif data["success_rate"] >= 60: 

47 colors.append("yellow") 

48 else: 

49 colors.append("red") 

50 

51 fig = go.Figure( 

52 data=[ 

53 go.Bar( 

54 x=phrases, 

55 y=success_rates, 

56 marker_color=colors, 

57 text=[f"{rate:.1f}%" for rate in success_rates], 

58 textposition="outside", 

59 hovertemplate="<b>%{x}</b><br>Success Rate: %{y:.1f}%<extra></extra>", 

60 ) 

61 ] 

62 ) 

63 

64 fig.update_layout( 

65 title=f"{action_name} - Success Rate by Phrase", 

66 xaxis_title="Test Phrase", 

67 yaxis_title="Success Rate (%)", 

68 yaxis_range=[0, 110], 

69 showlegend=False, 

70 height=400, 

71 margin=dict(b=100), 

72 xaxis_tickangle=-45, 

73 ) 

74 

75 # Convert to HTML div 

76 return fig.to_html( 

77 full_html=False, 

78 include_plotlyjs=False, 

79 div_id=f"success-{action_name.replace(' ', '-')}", 

80 ) 

81 

82 def create_timing_performance_chart(self, action_name: str, results: Dict) -> str: 

83 """Create a combined chart showing timing performance for each phrase. 

84 

85 Args: 

86 action_name: Name of the action 

87 results: Test results for the action 

88 

89 Returns: 

90 HTML div containing the Plotly chart 

91 """ 

92 phrases = [] 

93 avg_times = [] 

94 consistency_scores = [] 

95 colors_time = [] 

96 colors_consistency = [] 

97 

98 for phrase, data in results["phrase_results"].items(): 

99 display_phrase = phrase[:30] + "..." if len(phrase) > 30 else phrase 

100 phrases.append(display_phrase) 

101 

102 timing_stats = data["timing_stats"] 

103 avg_times.append(timing_stats["mean"]) 

104 consistency_scores.append(timing_stats["consistency_score"]) 

105 

106 # Color coding for average time 

107 if timing_stats["mean"] < 1.0: 

108 colors_time.append("green") 

109 elif timing_stats["mean"] < 3.0: 

110 colors_time.append("yellow") 

111 else: 

112 colors_time.append("red") 

113 

114 # Color coding for consistency 

115 if timing_stats["consistency_score"] >= 80: 

116 colors_consistency.append("green") 

117 elif timing_stats["consistency_score"] >= 60: 

118 colors_consistency.append("yellow") 

119 else: 

120 colors_consistency.append("red") 

121 

122 # Create subplot with secondary y-axis 

123 fig = make_subplots(specs=[[{"secondary_y": True}]]) 

124 

125 # Add average time bars 

126 fig.add_trace( 

127 go.Bar( 

128 name="Average Time", 

129 x=phrases, 

130 y=avg_times, 

131 marker_color=colors_time, 

132 text=[f"{time:.2f}s" for time in avg_times], 

133 textposition="outside", 

134 hovertemplate="<b>%{x}</b><br>Average Time: %{y:.2f}s<extra></extra>", 

135 yaxis="y", 

136 ), 

137 secondary_y=False, 

138 ) 

139 

140 # Add consistency line 

141 fig.add_trace( 

142 go.Scatter( 

143 name="Consistency Score", 

144 x=phrases, 

145 y=consistency_scores, 

146 mode="lines+markers", 

147 line=dict(color="purple", width=3), 

148 marker=dict( 

149 size=8, color=colors_consistency, line=dict(color="purple", width=2) 

150 ), 

151 text=[f"{score:.1f}" for score in consistency_scores], 

152 textposition="top center", 

153 hovertemplate="<b>%{x}</b><br>Consistency: %{y:.1f}/100<extra></extra>", 

154 yaxis="y2", 

155 ), 

156 secondary_y=True, 

157 ) 

158 

159 # Set x-axis title 

160 fig.update_xaxes(title_text="Test Phrase", tickangle=-45) 

161 

162 # Set y-axes titles 

163 fig.update_yaxes(title_text="Average Time (seconds)", secondary_y=False) 

164 fig.update_yaxes( 

165 title_text="Consistency Score (0-100)", secondary_y=True, range=[0, 110] 

166 ) 

167 

168 fig.update_layout( 

169 title=f"{action_name} - Timing Performance Analysis", 

170 height=500, 

171 margin=dict(b=100), 

172 legend=dict( 

173 orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1 

174 ), 

175 ) 

176 

177 return fig.to_html( 

178 full_html=False, 

179 include_plotlyjs=False, 

180 div_id=f"timing-{action_name.replace(' ', '-')}", 

181 ) 

182 

183 def create_secondary_actions_chart(self, action_name: str, results: Dict) -> str: 

184 """Create a grouped bar chart showing secondary actions triggered for each phrase. 

185 

186 Args: 

187 action_name: Name of the action 

188 results: Test results for the action 

189 

190 Returns: 

191 HTML div containing the Plotly chart 

192 """ 

193 # Collect all unique secondary actions across all phrases 

194 all_secondary_actions = set() 

195 for phrase_data in results["phrase_results"].values(): 

196 all_secondary_actions.update(phrase_data["secondary_action_counts"].keys()) 

197 

198 if not all_secondary_actions: 

199 # No secondary actions triggered - create an empty chart with message 

200 fig = go.Figure() 

201 fig.add_annotation( 

202 text="No secondary actions were triggered for any test phrase", 

203 xref="paper", 

204 yref="paper", 

205 x=0.5, 

206 y=0.5, 

207 showarrow=False, 

208 font=dict(size=14), 

209 ) 

210 fig.update_layout( 

211 title=f"{action_name} - Secondary Actions Triggered", 

212 height=400, 

213 xaxis=dict(visible=False), 

214 yaxis=dict(visible=False), 

215 ) 

216 return fig.to_html( 

217 full_html=False, 

218 include_plotlyjs=False, 

219 div_id=f"secondary-{action_name.replace(' ', '-')}", 

220 ) 

221 

222 # Prepare data for grouped bar chart 

223 phrases = [] 

224 traces = [] 

225 

226 for phrase, data in results["phrase_results"].items(): 

227 display_phrase = phrase[:30] + "..." if len(phrase) > 30 else phrase 

228 phrases.append(display_phrase) 

229 

230 # Create a trace for each secondary action 

231 for secondary_action in sorted(all_secondary_actions): 

232 counts = [] 

233 for phrase_data in results["phrase_results"].values(): 

234 count = phrase_data["secondary_action_counts"].get(secondary_action, 0) 

235 total = phrase_data["total"] 

236 # Store as percentage 

237 percentage = (count / total * 100) if total > 0 else 0 

238 counts.append(percentage) 

239 

240 traces.append( 

241 go.Bar( 

242 name=secondary_action, 

243 x=phrases, 

244 y=counts, 

245 text=[f"{c:.0f}%" if c > 0 else "" for c in counts], 

246 textposition="outside", 

247 hovertemplate="<b>%{x}</b><br>" 

248 + f"{secondary_action}: " 

249 + "%{y:.1f}%<extra></extra>", 

250 ) 

251 ) 

252 

253 fig = go.Figure(data=traces) 

254 

255 fig.update_layout( 

256 title=f"{action_name} - Secondary Actions Triggered by Phrase", 

257 xaxis_title="Test Phrase", 

258 yaxis_title="Trigger Rate (%)", 

259 barmode="group", 

260 height=500, 

261 margin=dict(b=100), 

262 xaxis_tickangle=-45, 

263 legend=dict( 

264 orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1 

265 ), 

266 ) 

267 

268 return fig.to_html( 

269 full_html=False, 

270 include_plotlyjs=False, 

271 div_id=f"secondary-{action_name.replace(' ', '-')}", 

272 ) 

273 

274 def create_overall_summary_chart(self, test_results: Dict) -> str: 

275 """Create an overall summary chart showing all actions' performance. 

276 

277 Args: 

278 test_results: All test results 

279 

280 Returns: 

281 HTML div containing the Plotly chart 

282 """ 

283 action_names = [] 

284 success_rates = [] 

285 avg_times = [] 

286 colors = [] 

287 

288 for action_name, test_data in test_results.items(): 

289 action_names.append(action_name) 

290 rate = test_data["results"]["success_rate"] 

291 avg_time = test_data["results"]["overall_timing_stats"]["mean"] 

292 success_rates.append(rate) 

293 avg_times.append(avg_time) 

294 

295 # Color based on pass/fail 

296 if rate >= 60: 

297 colors.append("green") 

298 else: 

299 colors.append("red") 

300 

301 # Create subplot with secondary y-axis for timing 

302 fig = make_subplots(specs=[[{"secondary_y": True}]]) 

303 

304 # Add success rate bars 

305 fig.add_trace( 

306 go.Bar( 

307 name="Success Rate", 

308 x=action_names, 

309 y=success_rates, 

310 marker_color=colors, 

311 text=[f"{rate:.1f}%" for rate in success_rates], 

312 textposition="outside", 

313 hovertemplate="<b>%{x}</b><br>Success Rate: %{y:.1f}%<extra></extra>", 

314 yaxis="y", 

315 ), 

316 secondary_y=False, 

317 ) 

318 

319 # Add average time line 

320 fig.add_trace( 

321 go.Scatter( 

322 name="Average Time", 

323 x=action_names, 

324 y=avg_times, 

325 mode="lines+markers", 

326 line=dict(color="purple", width=3), 

327 marker=dict(size=10, color="purple"), 

328 text=[f"{time:.2f}s" for time in avg_times], 

329 textposition="top center", 

330 hovertemplate="<b>%{x}</b><br>Average Time: %{y:.2f}s<extra></extra>", 

331 yaxis="y2", 

332 ), 

333 secondary_y=True, 

334 ) 

335 

336 # Add pass threshold line for success rate 

337 fig.add_hline( 

338 y=60, 

339 line_dash="dash", 

340 line_color="orange", 

341 annotation_text="Pass Threshold (60%)", 

342 secondary_y=False, 

343 ) 

344 

345 # Set x-axis title 

346 fig.update_xaxes(title_text="Action", tickangle=-45) 

347 

348 # Set y-axes titles 

349 fig.update_yaxes( 

350 title_text="Success Rate (%)", range=[0, 110], secondary_y=False 

351 ) 

352 fig.update_yaxes(title_text="Average Time (seconds)", secondary_y=True) 

353 

354 fig.update_layout( 

355 title="Overall Vibe Test Results - Success Rate & Performance", 

356 height=500, 

357 legend=dict( 

358 orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1 

359 ), 

360 ) 

361 

362 return fig.to_html( 

363 full_html=False, include_plotlyjs=False, div_id="overall-summary" 

364 ) 

365 

366 def create_performance_comparison_chart(self, test_results: Dict) -> str: 

367 """Create a scatter plot comparing consistency vs speed for all actions. 

368 

369 Args: 

370 test_results: All test results 

371 

372 Returns: 

373 HTML div containing the Plotly chart 

374 """ 

375 action_names = [] 

376 avg_times = [] 

377 consistency_scores = [] 

378 success_rates = [] 

379 colors = [] 

380 sizes = [] 

381 

382 for action_name, test_data in test_results.items(): 

383 timing_stats = test_data["results"]["overall_timing_stats"] 

384 action_names.append(action_name) 

385 avg_times.append(timing_stats["mean"]) 

386 consistency_scores.append(timing_stats["consistency_score"]) 

387 success_rate = test_data["results"]["success_rate"] 

388 success_rates.append(success_rate) 

389 

390 # Color based on success rate 

391 if success_rate >= 80: 

392 colors.append("green") 

393 elif success_rate >= 60: 

394 colors.append("yellow") 

395 else: 

396 colors.append("red") 

397 

398 # Size based on success rate (larger = better) 

399 sizes.append(max(10, success_rate / 2)) 

400 

401 fig = go.Figure( 

402 data=go.Scatter( 

403 x=avg_times, 

404 y=consistency_scores, 

405 mode="markers+text", 

406 marker=dict( 

407 size=sizes, 

408 color=colors, 

409 line=dict(color="black", width=1), 

410 opacity=0.8, 

411 ), 

412 text=action_names, 

413 textposition="top center", 

414 customdata=success_rates, 

415 hovertemplate="<b>%{text}</b><br>" 

416 + "Average Time: %{x:.2f}s<br>" 

417 + "Consistency: %{y:.1f}/100<br>" 

418 + "Success Rate: %{customdata:.1f}%" 

419 + "<extra></extra>", 

420 name="", 

421 ) 

422 ) 

423 

424 # Add quadrant lines 

425 fig.add_vline( 

426 x=2.0, 

427 line_dash="dash", 

428 line_color="gray", 

429 annotation_text="2s threshold", 

430 annotation_position="top", 

431 ) 

432 fig.add_hline( 

433 y=80, 

434 line_dash="dash", 

435 line_color="gray", 

436 annotation_text="High Consistency (80+)", 

437 annotation_position="right", 

438 ) 

439 

440 fig.update_layout( 

441 title="Performance Comparison: Speed vs Consistency", 

442 xaxis_title="Average Time (seconds)", 

443 yaxis_title="Consistency Score (0-100)", 

444 xaxis=dict(range=[0, max(avg_times) * 1.1]), 

445 yaxis=dict(range=[0, 105]), 

446 height=500, 

447 showlegend=False, 

448 annotations=[ 

449 dict( 

450 text="Fast & Consistent<br>(Ideal)", 

451 x=0.5, 

452 y=95, 

453 showarrow=False, 

454 font=dict(size=12, color="green"), 

455 bgcolor="rgba(0,255,0,0.1)", 

456 bordercolor="green", 

457 borderwidth=1, 

458 ), 

459 dict( 

460 text="Slow but Consistent", 

461 x=max(avg_times) * 0.8, 

462 y=95, 

463 showarrow=False, 

464 font=dict(size=12, color="orange"), 

465 bgcolor="rgba(255,165,0,0.1)", 

466 bordercolor="orange", 

467 borderwidth=1, 

468 ), 

469 dict( 

470 text="Fast but Inconsistent", 

471 x=0.5, 

472 y=20, 

473 showarrow=False, 

474 font=dict(size=12, color="orange"), 

475 bgcolor="rgba(255,165,0,0.1)", 

476 bordercolor="orange", 

477 borderwidth=1, 

478 ), 

479 dict( 

480 text="Slow & Inconsistent<br>(Needs Work)", 

481 x=max(avg_times) * 0.8, 

482 y=20, 

483 showarrow=False, 

484 font=dict(size=12, color="red"), 

485 bgcolor="rgba(255,0,0,0.1)", 

486 bordercolor="red", 

487 borderwidth=1, 

488 ), 

489 ], 

490 ) 

491 

492 return fig.to_html( 

493 full_html=False, include_plotlyjs=False, div_id="performance-comparison" 

494 ) 

495 

496 def generate_html_header(self) -> str: 

497 """Generate the HTML header with styles and scripts. 

498 

499 Returns: 

500 HTML header string 

501 """ 

502 return f"""<!DOCTYPE html> 

503<html lang="en"> 

504<head> 

505 <meta charset="utf-8"> 

506 <meta name="viewport" content="width=device-width, initial-scale=1.0"> 

507 <title>Vibe Test Report - {self.timestamp}</title> 

508 <script src="https://cdn.plot.ly/plotly-latest.min.js"></script> 

509 <style> 

510 body {{ 

511 font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif; 

512 margin: 0; 

513 padding: 20px; 

514 background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); 

515 min-height: 100vh; 

516 }} 

517 .container {{ 

518 max-width: 1400px; 

519 margin: 0 auto; 

520 background: white; 

521 border-radius: 15px; 

522 box-shadow: 0 20px 60px rgba(0,0,0,0.3); 

523 padding: 40px; 

524 }} 

525 h1 {{ 

526 color: #333; 

527 text-align: center; 

528 font-size: 2.5em; 

529 margin-bottom: 10px; 

530 }} 

531 .subtitle {{ 

532 text-align: center; 

533 color: #666; 

534 margin-bottom: 30px; 

535 }} 

536 .model-info {{ 

537 background: #f8f9fa; 

538 border-radius: 10px; 

539 padding: 20px; 

540 margin-bottom: 30px; 

541 }} 

542 .model-info h3 {{ 

543 margin-top: 0; 

544 color: #495057; 

545 }} 

546 .model-detail {{ 

547 display: flex; 

548 justify-content: space-between; 

549 margin: 10px 0; 

550 }} 

551 .model-label {{ 

552 font-weight: 600; 

553 color: #6c757d; 

554 }} 

555 .action-section {{ 

556 margin: 40px 0; 

557 padding: 30px; 

558 background: #f8f9fa; 

559 border-radius: 10px; 

560 border-left: 5px solid #667eea; 

561 }} 

562 .action-header {{ 

563 margin-bottom: 20px; 

564 }} 

565 .action-name {{ 

566 font-size: 1.8em; 

567 color: #333; 

568 margin-bottom: 10px; 

569 }} 

570 .action-description {{ 

571 color: #666; 

572 font-style: italic; 

573 margin-bottom: 10px; 

574 }} 

575 .action-stats {{ 

576 display: flex; 

577 gap: 20px; 

578 margin-top: 15px; 

579 flex-wrap: wrap; 

580 }} 

581 .stat-box {{ 

582 background: white; 

583 padding: 10px 15px; 

584 border-radius: 8px; 

585 box-shadow: 0 2px 4px rgba(0,0,0,0.1); 

586 }} 

587 .stat-label {{ 

588 font-size: 0.9em; 

589 color: #6c757d; 

590 }} 

591 .stat-value {{ 

592 font-size: 1.5em; 

593 font-weight: bold; 

594 color: #333; 

595 }} 

596 .pass {{ 

597 color: #28a745; 

598 }} 

599 .fail {{ 

600 color: #dc3545; 

601 }} 

602 .chart-container {{ 

603 margin: 20px 0; 

604 }} 

605 .summary-section {{ 

606 margin-top: 40px; 

607 padding: 30px; 

608 background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); 

609 border-radius: 10px; 

610 color: white; 

611 }} 

612 .summary-title {{ 

613 font-size: 2em; 

614 margin-bottom: 20px; 

615 text-align: center; 

616 }} 

617 .summary-stats {{ 

618 display: flex; 

619 justify-content: space-around; 

620 flex-wrap: wrap; 

621 gap: 20px; 

622 }} 

623 .footer {{ 

624 text-align: center; 

625 margin-top: 40px; 

626 padding-top: 20px; 

627 border-top: 2px solid #e9ecef; 

628 color: #6c757d; 

629 }} 

630 .timing-highlight {{ 

631 background: #e3f2fd; 

632 border-left: 4px solid #2196f3; 

633 padding: 15px; 

634 margin: 10px 0; 

635 border-radius: 5px; 

636 }} 

637 </style> 

638</head> 

639<body> 

640 <div class="container"> 

641 <h1>🧪 Vibe Test Report</h1> 

642 <div class="subtitle">AI Decision-Making Consistency & Performance Analysis</div> 

643 <div class="subtitle">Generated: {self.timestamp}</div> 

644  

645 <div class="model-info"> 

646 <h3>Test Configuration</h3> 

647 <div class="model-detail"> 

648 <span class="model-label">Chat Model:</span> 

649 <span>{self.model}</span> 

650 </div> 

651 <div class="model-detail"> 

652 <span class="model-label">Analysis Model:</span> 

653 <span>{self.analysis_model}</span> 

654 </div> 

655 <div class="model-detail"> 

656 <span class="model-label">Test Mode:</span> 

657 <span>Multi-action selection with timing analysis</span> 

658 </div> 

659 </div> 

660""" 

661 

662 def generate_action_section(self, action_name: str, test_data: Dict) -> str: 

663 """Generate HTML for a single action's results. 

664 

665 Args: 

666 action_name: Name of the action 

667 test_data: Test data for the action 

668 

669 Returns: 

670 HTML string for the action section 

671 """ 

672 results = test_data["results"] 

673 passed = test_data["passed"] 

674 status_icon = "✅" if passed else "❌" 

675 pass_class = "pass" if passed else "fail" 

676 

677 # Get timing stats 

678 timing_stats = results["overall_timing_stats"] 

679 

680 # Generate charts 

681 success_chart = self.create_action_success_chart(action_name, results) 

682 timing_chart = self.create_timing_performance_chart(action_name, results) 

683 secondary_chart = self.create_secondary_actions_chart(action_name, results) 

684 

685 return f""" 

686 <div class="action-section"> 

687 <div class="action-header"> 

688 <div class="action-name">{action_name} {status_icon}</div> 

689 <div class="action-description">{results['action_description']}</div> 

690 <div class="action-stats"> 

691 <div class="stat-box"> 

692 <div class="stat-label">Overall Success Rate</div> 

693 <div class="stat-value {pass_class}">{results['success_rate']:.1f}%</div> 

694 </div> 

695 <div class="stat-box"> 

696 <div class="stat-label">Tests Passed</div> 

697 <div class="stat-value">{results['total_correct']}/{results['total_tests']}</div> 

698 </div> 

699 <div class="stat-box"> 

700 <div class="stat-label">Average Time</div> 

701 <div class="stat-value">{timing_stats['mean']:.2f}s</div> 

702 </div> 

703 <div class="stat-box"> 

704 <div class="stat-label">Performance</div> 

705 <div class="stat-value">{timing_stats['performance_category']}</div> 

706 </div> 

707 <div class="stat-box"> 

708 <div class="stat-label">Consistency</div> 

709 <div class="stat-value">{timing_stats['consistency_score']:.1f}/100</div> 

710 </div> 

711 <div class="stat-box"> 

712 <div class="stat-label">Status</div> 

713 <div class="stat-value {pass_class}">{'PASS' if passed else 'FAIL'}</div> 

714 </div> 

715 </div> 

716 <div class="timing-highlight"> 

717 <strong>⏱️ Timing Analysis:</strong>  

718 Range: {timing_stats['min']:.2f}s - {timing_stats['max']:.2f}s |  

719 Median: {timing_stats['median']:.2f}s |  

720 95th percentile: {timing_stats['p95']:.2f}s 

721 </div> 

722 </div> 

723 <div class="chart-container"> 

724 {success_chart} 

725 </div> 

726 <div class="chart-container"> 

727 {timing_chart} 

728 </div> 

729 <div class="chart-container"> 

730 {secondary_chart} 

731 </div> 

732 </div> 

733""" 

734 

735 def generate_summary_section(self, test_results: Dict) -> str: 

736 """Generate the summary section of the report. 

737 

738 Args: 

739 test_results: All test results 

740 

741 Returns: 

742 HTML string for the summary section 

743 """ 

744 total_actions = len(test_results) 

745 passed_actions = sum(1 for data in test_results.values() if data["passed"]) 

746 failed_actions = total_actions - passed_actions 

747 all_passed = passed_actions == total_actions 

748 

749 # Calculate overall timing stats 

750 all_times = [] 

751 for test_data in test_results.values(): 

752 all_times.extend(test_data["results"]["overall_timing_stats"]["raw_times"]) 

753 

754 if all_times: 

755 avg_overall_time = sum(all_times) / len(all_times) 

756 fastest_overall = min(all_times) 

757 slowest_overall = max(all_times) 

758 else: 

759 avg_overall_time = fastest_overall = slowest_overall = 0.0 

760 

761 return f""" 

762 <div class="summary-section"> 

763 <div class="summary-title">Test Summary</div> 

764 <div class="summary-stats"> 

765 <div class="stat-box" style="background: rgba(255,255,255,0.9); color: #333;"> 

766 <div class="stat-label">Total Actions Tested</div> 

767 <div class="stat-value">{total_actions}</div> 

768 </div> 

769 <div class="stat-box" style="background: rgba(255,255,255,0.9); color: #333;"> 

770 <div class="stat-label">Actions Passed</div> 

771 <div class="stat-value pass">{passed_actions}</div> 

772 </div> 

773 <div class="stat-box" style="background: rgba(255,255,255,0.9); color: #333;"> 

774 <div class="stat-label">Actions Failed</div> 

775 <div class="stat-value fail">{failed_actions}</div> 

776 </div> 

777 <div class="stat-box" style="background: rgba(255,255,255,0.9); color: #333;"> 

778 <div class="stat-label">Overall Result</div> 

779 <div class="stat-value {'pass' if all_passed else 'fail'}"> 

780 {'ALL PASS' if all_passed else f'{passed_actions}/{total_actions} PASS'} 

781 </div> 

782 </div> 

783 <div class="stat-box" style="background: rgba(255,255,255,0.9); color: #333;"> 

784 <div class="stat-label">Average Response Time</div> 

785 <div class="stat-value">{avg_overall_time:.2f}s</div> 

786 </div> 

787 <div class="stat-box" style="background: rgba(255,255,255,0.9); color: #333;"> 

788 <div class="stat-label">Response Range</div> 

789 <div class="stat-value">{fastest_overall:.2f}s - {slowest_overall:.2f}s</div> 

790 </div> 

791 </div> 

792 </div> 

793""" 

794 

795 def generate_footer(self) -> str: 

796 """Generate the HTML footer. 

797 

798 Returns: 

799 HTML footer string 

800 """ 

801 return f""" 

802 <div class="footer"> 

803 <p>Report generated by OllamaPy Vibe Test Runner with Timing Analysis</p> 

804 <p>Models: {self.model} (chat) | {self.analysis_model} (analysis)</p> 

805 <p>Timing measurements include full action selection pipeline analysis</p> 

806 </div> 

807 </div> 

808</body> 

809</html> 

810""" 

811 

812 def generate_full_report(self, test_results: Dict) -> str: 

813 """Generate the complete HTML report. 

814 

815 Args: 

816 test_results: All test results 

817 

818 Returns: 

819 Complete HTML report as a string 

820 """ 

821 # Start with header 

822 html_parts = [self.generate_html_header()] 

823 

824 # Add overall summary chart 

825 html_parts.append('<div class="chart-container">') 

826 html_parts.append(self.create_overall_summary_chart(test_results)) 

827 html_parts.append("</div>") 

828 

829 # Add performance comparison chart 

830 html_parts.append('<div class="chart-container">') 

831 html_parts.append(self.create_performance_comparison_chart(test_results)) 

832 html_parts.append("</div>") 

833 

834 # Add each action section 

835 for action_name, test_data in test_results.items(): 

836 html_parts.append(self.generate_action_section(action_name, test_data)) 

837 

838 # Add summary section 

839 html_parts.append(self.generate_summary_section(test_results)) 

840 

841 # Add footer 

842 html_parts.append(self.generate_footer()) 

843 

844 return "".join(html_parts) 

845 

846 def save_report(self, test_results: Dict, filename: str = None) -> str: 

847 """Save the HTML report to a file. 

848 

849 Args: 

850 test_results: All test results 

851 filename: Optional filename (defaults to timestamped name) 

852 

853 Returns: 

854 The filename where the report was saved 

855 """ 

856 if filename is None: 

857 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") 

858 filename = f"vibe_test_report_{timestamp}.html" 

859 

860 html_content = self.generate_full_report(test_results) 

861 

862 with open(filename, "w", encoding="utf-8") as f: 

863 f.write(html_content) 

864 

865 return filename