Coverage for src/ollamapy/ai_query.py: 69%

178 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-01 12:29 -0400

1""" 

2Enhanced OllamaPy with 4 Query Types, Response Parser, Context Compression, and Templated Prompting 

3Integrating the best features from todollama's AI system 

4""" 

5 

6import json 

7import logging 

8import re 

9import time 

10from dataclasses import dataclass 

11from typing import Dict, List, Optional, Tuple, Generator 

12 

13from .ollama_client import OllamaClient 

14 

15# Configure logging 

16logging.basicConfig(level=logging.INFO) 

17logger = logging.getLogger(__name__) 

18 

19 

20# ============================================================================ 

21# DATA CLASSES FOR QUERY RESULTS 

22# ============================================================================ 

23 

24 

25@dataclass 

26class MultipleChoiceResult: 

27 """Result from a multiple choice query with lettered answers""" 

28 

29 letter: str # A, B, C, etc. 

30 index: int # 0, 1, 2, etc. 

31 value: str # The actual option text 

32 confidence: float 

33 raw: str 

34 context_compressed: bool = False 

35 compression_rounds: int = 0 

36 

37 

38@dataclass 

39class SingleWordResult: 

40 """Result from a single continuous string query (no whitespace allowed)""" 

41 

42 word: str # The extracted continuous string without any whitespace 

43 confidence: float 

44 raw: str 

45 context_compressed: bool = False 

46 compression_rounds: int = 0 

47 

48 

49@dataclass 

50class OpenResult: 

51 """Result from an open essay-style response query""" 

52 

53 content: str 

54 raw: str 

55 context_compressed: bool = False 

56 compression_rounds: int = 0 

57 

58 

59@dataclass 

60class FileWriteResult: 

61 """Result from a file write query""" 

62 

63 content: str 

64 raw: str 

65 context_compressed: bool = False 

66 compression_rounds: int = 0 

67 

68 

69# ============================================================================ 

70# RESPONSE PARSER 

71# ============================================================================ 

72 

73 

74class ResponseParser: 

75 """Parse and extract information from AI responses""" 

76 

77 @staticmethod 

78 def extract_code_blocks(text: str) -> List[Tuple[Optional[str], str]]: 

79 """Extract code blocks from text. Returns list of (language, code) tuples.""" 

80 blocks = [] 

81 

82 # Pattern for fenced code blocks with optional language 

83 pattern = r"```(?:(\w+))?\n(.*?)\n```" 

84 matches = re.finditer(pattern, text, re.DOTALL) 

85 

86 for match in matches: 

87 language = match.group(1) 

88 code = match.group(2) 

89 blocks.append((language, code)) 

90 

91 return blocks 

92 

93 @staticmethod 

94 def parse_multiple_choice( 

95 response: str, options: List[str] 

96 ) -> Tuple[str, int, float]: 

97 """Parse multiple choice response to extract letter, index, and confidence""" 

98 # Look for single letter A-Z 

99 letter_match = re.search(r"\b([A-Z])\b", response.upper().strip()) 

100 

101 if letter_match: 

102 letter = letter_match.group(1) 

103 index = ord(letter) - ord("A") 

104 if 0 <= index < len(options): 

105 confidence = 0.9 # High confidence for clear letter match 

106 return letter, index, confidence 

107 

108 # Fallback: try to match option text 

109 response_clean = response.lower().strip() 

110 for i, option in enumerate(options): 

111 if option.lower() in response_clean: 

112 letter = chr(ord("A") + i) 

113 confidence = 0.7 # Medium confidence for text match 

114 return letter, i, confidence 

115 

116 # Default to first option with low confidence 

117 logger.warning(f"Could not parse multiple choice response: {response}") 

118 return "A", 0, 0.3 

119 

120 @staticmethod 

121 def parse_single_word(response: str) -> Tuple[str, float]: 

122 """Parse single continuous string response (no whitespace allowed)""" 

123 cleaned = response.strip() 

124 

125 # Extract the first continuous alphanumeric string 

126 match = re.search(r"^([a-zA-Z0-9_-]+)", cleaned) 

127 

128 if match: 

129 word = match.group(1) 

130 confidence = 0.9 if word == cleaned else 0.7 

131 return word, confidence 

132 

133 # Fallback: try to extract any alphanumeric sequence 

134 fallback_match = re.search(r"([a-zA-Z0-9]+)", cleaned) 

135 if fallback_match: 

136 word = fallback_match.group(1) 

137 return word, 0.5 

138 

139 return "unknown", 0.3 

140 

141 @staticmethod 

142 def clean_file_content(response: str) -> str: 

143 """Clean response for file content""" 

144 content = response.strip() 

145 

146 # Remove markdown code block markers if present 

147 if content.startswith("```"): 

148 lines = content.split("\n") 

149 lines = lines[1:] # Remove first line 

150 if lines and lines[-1].strip() == "```": 

151 lines = lines[:-1] # Remove last line 

152 content = "\n".join(lines) 

153 

154 return content.strip() 

155 

156 

157# ============================================================================ 

158# CONTEXT COMPRESSOR 

159# ============================================================================ 

160 

161 

162class ContextCompressor: 

163 """Compress large contexts to fit within model limits""" 

164 

165 def __init__(self, client: OllamaClient, model: str): 

166 self.client = client 

167 self.model = model 

168 self.max_context = client.get_model_context_size(model) 

169 self.usable_context = int( 

170 self.max_context * 0.7 

171 ) # Reserve 30% for prompt/response 

172 

173 def needs_compression(self, text: str) -> bool: 

174 """Check if text needs compression""" 

175 # Rough estimate: 1 token ≈ 4 characters 

176 estimated_tokens = len(text) / 4 

177 return estimated_tokens > self.usable_context 

178 

179 def compress(self, text: str, query: str, max_rounds: int = 3) -> Tuple[str, int]: 

180 """Compress text focusing on query relevance""" 

181 if not self.needs_compression(text): 

182 return text, 0 

183 

184 rounds = 0 

185 current_text = text 

186 

187 while self.needs_compression(current_text) and rounds < max_rounds: 

188 rounds += 1 

189 logger.info(f"Compression round {rounds} - Size: {len(current_text)} chars") 

190 

191 # Split into chunks 

192 chunks = self._split_into_chunks(current_text) 

193 compressed_chunks = [] 

194 

195 for i, chunk in enumerate(chunks): 

196 prompt = f"""Compress the following text, keeping ONLY information relevant to this query: 

197  

198 "{query}" 

199  

200 Remove all irrelevant details, examples, and redundancy. 

201 Keep technical details, names, and specific information related to the query. 

202 

203 Text to compress: 

204 {chunk} 

205 

206 Compressed version (be aggressive in removing irrelevant content):""" 

207 

208 compressed = self.client.generate(self.model, prompt) 

209 compressed_chunks.append(compressed) 

210 logger.debug(f"Chunk {i+1}/{len(chunks)} compressed") 

211 

212 current_text = "\n\n".join(compressed_chunks) 

213 

214 # Check compression ratio 

215 ratio = len(current_text) / len(text) 

216 logger.info(f"Compression ratio: {ratio:.2%}") 

217 

218 if ratio > 0.9: # Less than 10% reduction 

219 logger.warning("Compression ineffective, stopping") 

220 break 

221 

222 return current_text, rounds 

223 

224 def _split_into_chunks(self, text: str, chunk_size: int = 2000) -> List[str]: 

225 """Split text into manageable chunks""" 

226 words = text.split() 

227 chunks = [] 

228 current_chunk: List[str] = [] 

229 current_size = 0 

230 

231 for word in words: 

232 word_size = len(word) + 1 # +1 for space 

233 if current_size + word_size > chunk_size and current_chunk: 

234 chunks.append(" ".join(current_chunk)) 

235 current_chunk = [word] 

236 current_size = word_size 

237 else: 

238 current_chunk.append(word) 

239 current_size += word_size 

240 

241 if current_chunk: 

242 chunks.append(" ".join(current_chunk)) 

243 

244 return chunks 

245 

246 

247# ============================================================================ 

248# AI QUERY INTERFACE WITH 4 QUERY TYPES 

249# ============================================================================ 

250 

251 

252class AIQuery: 

253 """Enhanced AI query interface with 4 distinct query types and templated prompts""" 

254 

255 # Query Templates 

256 TEMPLATES = { 

257 "multiple_choice": """Based on the context provided, answer the following question by selecting the best option. 

258 

259Context: {context} 

260 

261Question: {question} 

262 

263Options: 

264{options} 

265 

266Instructions: 

267- Choose the BEST answer from the options above 

268- Respond with ONLY the letter (A, B, C, etc.) of your chosen answer 

269- Do not include explanations or additional text 

270- Be decisive and select exactly one option 

271 

272Your answer:""", 

273 "single_word": """Based on the context provided, answer the following question with a single continuous string. 

274 

275Context: {context} 

276 

277Question: {question} 

278 

279CRITICAL OUTPUT REQUIREMENTS: 

280- Output EXACTLY ONE continuous string with NO spaces, NO tabs, NO newlines 

281- Do NOT add quotes, apostrophes, backticks, or ANY punctuation marks 

282- The output will be read LITERALLY character-by-character as raw text 

283- If your answer would normally be "hello world", output: helloworld 

284- NO whitespace characters allowed ANYWHERE in your response 

285 

286Your answer:""", 

287 "open": """Write a comprehensive response to the following prompt. 

288 

289Context: {context} 

290 

291Prompt: {prompt} 

292 

293Instructions: 

294- Provide a detailed, well-structured response 

295- Use clear reasoning and examples where appropriate 

296- Write in a professional, informative tone 

297- Structure your response logically with proper flow 

298 

299Your response:""", 

300 "file_write": """Generate the complete content for a file based on the requirements below. 

301 

302Context: {context} 

303 

304Requirements: {requirements} 

305 

306Instructions: 

307- Generate ONLY the file content, no explanations 

308- Include all necessary components as specified 

309- Use proper formatting and syntax 

310- Do not include markdown code blocks or backticks 

311- Start immediately with the actual file content 

312 

313File content:""", 

314 } 

315 

316 def __init__(self, client: OllamaClient, model: str = "gemma3:4b"): 

317 self.client = client 

318 self.model = model 

319 self.parser = ResponseParser() 

320 self.compressor = ContextCompressor(client, model) 

321 

322 def multiple_choice( 

323 self, 

324 question: str, 

325 options: List[str], 

326 context: str = "", 

327 auto_compress: bool = True, 

328 show_context: bool = True, 

329 ) -> MultipleChoiceResult: 

330 """Ask AI to choose from multiple options with lettered answers""" 

331 

332 # Handle context compression if needed 

333 compressed_context = context 

334 compression_rounds = 0 

335 

336 if auto_compress and context: 

337 compressed_context, compression_rounds = self.compressor.compress( 

338 context, question 

339 ) 

340 

341 # Format options with letters 

342 formatted_options = "\n".join( 

343 [f"{chr(ord('A') + i)}. {option}" for i, option in enumerate(options)] 

344 ) 

345 

346 # Build prompt from template 

347 prompt = self.TEMPLATES["multiple_choice"].format( 

348 context=( 

349 compressed_context 

350 if compressed_context 

351 else "No additional context provided" 

352 ), 

353 question=question, 

354 options=formatted_options, 

355 ) 

356 

357 # Get response with context monitoring 

358 response = self.client.generate(self.model, prompt, show_context=show_context) 

359 

360 # Parse response 

361 letter, index, confidence = self.parser.parse_multiple_choice(response, options) 

362 

363 return MultipleChoiceResult( 

364 letter=letter, 

365 index=index, 

366 value=options[index] if 0 <= index < len(options) else options[0], 

367 confidence=confidence, 

368 raw=response, 

369 context_compressed=compression_rounds > 0, 

370 compression_rounds=compression_rounds, 

371 ) 

372 

373 def single_word( 

374 self, 

375 question: str, 

376 context: str = "", 

377 auto_compress: bool = True, 

378 show_context: bool = True, 

379 ) -> SingleWordResult: 

380 """Ask AI for a single word response""" 

381 

382 # Handle context compression if needed 

383 compressed_context = context 

384 compression_rounds = 0 

385 

386 if auto_compress and context: 

387 compressed_context, compression_rounds = self.compressor.compress( 

388 context, question 

389 ) 

390 

391 # Build prompt from template 

392 prompt = self.TEMPLATES["single_word"].format( 

393 context=( 

394 compressed_context 

395 if compressed_context 

396 else "No additional context provided" 

397 ), 

398 question=question, 

399 ) 

400 

401 # Get response with context monitoring 

402 response = self.client.generate(self.model, prompt, show_context=show_context) 

403 

404 # Parse response 

405 word, confidence = self.parser.parse_single_word(response) 

406 

407 return SingleWordResult( 

408 word=word, 

409 confidence=confidence, 

410 raw=response, 

411 context_compressed=compression_rounds > 0, 

412 compression_rounds=compression_rounds, 

413 ) 

414 

415 def open( 

416 self, 

417 prompt: str, 

418 context: str = "", 

419 auto_compress: bool = True, 

420 show_context: bool = True, 

421 ) -> OpenResult: 

422 """Ask AI for an open-ended detailed response""" 

423 

424 # Handle context compression if needed 

425 compressed_context = context 

426 compression_rounds = 0 

427 

428 if auto_compress and context: 

429 compressed_context, compression_rounds = self.compressor.compress( 

430 context, prompt 

431 ) 

432 

433 # Build prompt from template 

434 full_prompt = self.TEMPLATES["open"].format( 

435 context=( 

436 compressed_context 

437 if compressed_context 

438 else "No additional context provided" 

439 ), 

440 prompt=prompt, 

441 ) 

442 

443 # Get response with context monitoring 

444 response = self.client.generate( 

445 self.model, full_prompt, show_context=show_context 

446 ) 

447 

448 return OpenResult( 

449 content=response.strip(), 

450 raw=response, 

451 context_compressed=compression_rounds > 0, 

452 compression_rounds=compression_rounds, 

453 ) 

454 

455 def file_write( 

456 self, 

457 requirements: str, 

458 context: str = "", 

459 auto_compress: bool = True, 

460 show_context: bool = True, 

461 ) -> FileWriteResult: 

462 """Ask AI to generate file content""" 

463 

464 # Handle context compression if needed 

465 compressed_context = context 

466 compression_rounds = 0 

467 

468 if auto_compress and context: 

469 compressed_context, compression_rounds = self.compressor.compress( 

470 context, requirements 

471 ) 

472 

473 # Build prompt from template 

474 prompt = self.TEMPLATES["file_write"].format( 

475 context=( 

476 compressed_context 

477 if compressed_context 

478 else "No additional context provided" 

479 ), 

480 requirements=requirements, 

481 ) 

482 

483 # Get response with context monitoring 

484 response = self.client.generate(self.model, prompt, show_context=show_context) 

485 

486 # Clean the response 

487 content = self.parser.clean_file_content(response) 

488 

489 return FileWriteResult( 

490 content=content, 

491 raw=response, 

492 context_compressed=compression_rounds > 0, 

493 compression_rounds=compression_rounds, 

494 )