1use std::path::PathBuf;
67
68use regex::Regex;
69
70use crate::{AirError, Result, dev_log};
71use crate::Indexing::State::CreateState::{FileIndex, FileMetadata};
73
74pub const MAX_SEARCH_RESULTS_DEFAULT:u32 = 100;
76
77#[derive(Debug, Clone)]
79pub struct SearchQuery {
80 pub query:String,
82 pub mode:SearchMode,
84 pub case_sensitive:bool,
86 pub whole_word:bool,
88 pub regex:Option<Regex>,
90 pub max_results:u32,
92 pub page:u32,
94}
95
96#[derive(Debug, Clone, PartialEq)]
98pub enum SearchMode {
99 Literal,
101 Regex,
103 Fuzzy,
105 Exact,
107}
108
109#[derive(Debug, Clone)]
111pub struct SearchResult {
112 pub path:String,
114 pub file_name:String,
116 pub matches:Vec<SearchMatch>,
118 pub relevance:f64,
120 pub language:Option<String>,
122}
123
124#[derive(Debug, Clone)]
126pub struct SearchMatch {
127 pub line_number:u32,
129 pub line_content:String,
131 pub match_start:usize,
133 pub match_end:usize,
135 pub context_before:Vec<String>,
137 pub context_after:Vec<String>,
139}
140
141#[derive(Debug, Clone)]
143pub struct PaginatedSearchResults {
144 pub results:Vec<SearchResult>,
146 pub total_count:u32,
148 pub page:u32,
150 pub total_pages:u32,
152 pub page_size:u32,
154}
155
156impl IntoIterator for PaginatedSearchResults {
157 type Item = SearchResult;
158 type IntoIter = std::vec::IntoIter<SearchResult>;
159
160 fn into_iter(self) -> Self::IntoIter { self.results.into_iter() }
161}
162
163impl<'a> IntoIterator for &'a PaginatedSearchResults {
164 type Item = &'a SearchResult;
165 type IntoIter = std::slice::Iter<'a, SearchResult>;
166
167 fn into_iter(self) -> Self::IntoIter { self.results.iter() }
168}
169
170pub async fn QueryIndexSearch(
182 index:&FileIndex,
183 query:SearchQuery,
184 path_filter:Option<String>,
185 language_filter:Option<String>,
186) -> Result<PaginatedSearchResults> {
187 dev_log!(
188 "indexing",
189 "[QueryIndex] Searching for: '{}' (mode: {:?})",
190 query.query,
191 query.mode
192 );
193 let sanitized_query = SanitizeSearchQuery(&query.query)?;
195
196 let case_sensitive = query.case_sensitive;
198 let whole_word = query.whole_word;
199 let max_results = if query.max_results == 0 {
200 MAX_SEARCH_RESULTS_DEFAULT
201 } else {
202 query.max_results.min(1000) };
204
205 let mut all_results = Vec::new();
206
207 match query.mode {
209 SearchMode::Literal => {
210 QueryIndexLiteral(
211 &sanitized_query,
212 case_sensitive,
213 whole_word,
214 path_filter.as_deref(),
215 language_filter.as_deref(),
216 index,
217 &mut all_results,
218 )
219 .await;
220 },
221 SearchMode::Regex => {
222 if let Some(regex) = &query.regex {
223 QueryIndexRegex(
224 regex,
225 path_filter.as_deref(),
226 language_filter.as_deref(),
227 index,
228 &mut all_results,
229 )
230 .await;
231 } else {
232 if let Ok(regex) = Regex::new(&sanitized_query) {
234 QueryIndexRegex(
235 ®ex,
236 path_filter.as_deref(),
237 language_filter.as_deref(),
238 index,
239 &mut all_results,
240 )
241 .await;
242 }
243 }
244 },
245 SearchMode::Fuzzy => {
246 QueryIndexFuzzy(
247 &sanitized_query,
248 case_sensitive,
249 path_filter.as_deref(),
250 language_filter.as_deref(),
251 index,
252 &mut all_results,
253 )
254 .await;
255 },
256 SearchMode::Exact => {
257 QueryIndexExact(
258 &sanitized_query,
259 case_sensitive,
260 path_filter.as_deref(),
261 language_filter.as_deref(),
262 index,
263 &mut all_results,
264 )
265 .await;
266 },
267 }
268
269 all_results.sort_by(|a, b| b.relevance.partial_cmp(&a.relevance).unwrap());
271
272 let total_count = all_results.len() as u32;
274 let total_pages = if max_results == 0 { 0 } else { total_count.div_ceil(max_results) };
275 let page = query.page.min(total_pages.saturating_sub(1));
276
277 let start = (page * max_results) as usize;
279 let end = ((page + 1) * max_results).min(total_count) as usize;
280 let page_results = all_results[start..end].to_vec();
281
282 dev_log!(
283 "indexing",
284 "[QueryIndex] Search completed: {} total results, page {} of {}",
285 total_count,
286 page + 1,
287 total_pages
288 );
289
290 Ok(PaginatedSearchResults { results:page_results, total_count, page, total_pages, page_size:max_results })
291}
292
293pub fn SanitizeSearchQuery(query:&str) -> Result<String> {
295 let sanitized:String = query.chars().filter(|c| *c != '\0' && !c.is_control()).collect();
297
298 if sanitized.len() > 1000 {
300 return Err(AirError::Validation(
301 "Search query exceeds maximum length of 1000 characters".to_string(),
302 ));
303 }
304
305 Ok(sanitized)
306}
307
308async fn QueryIndexLiteral(
310 query:&str,
311 case_sensitive:bool,
312 whole_word:bool,
313 path_filter:Option<&str>,
314 language_filter:Option<&str>,
315 index:&FileIndex,
316 results:&mut Vec<SearchResult>,
317) {
318 let search_query = if case_sensitive { query.to_string() } else { query.to_lowercase() };
319
320 if let Some(file_paths) = index.content_index.get(&search_query.to_lowercase()) {
322 for file_path in file_paths {
323 if let Some(metadata) = index.files.get(file_path) {
324 if MatchesFilters(file_path, metadata, path_filter, language_filter) {
325 if let Ok(search_result) =
326 FindMatchesInFile(file_path, &search_query, case_sensitive, whole_word, index).await
327 {
328 if !search_result.matches.is_empty() {
329 results.push(search_result);
330 }
331 }
332 }
333 }
334 }
335 }
336
337 for (file_path, metadata) in &index.files {
339 if results.len() >= 1000 {
340 break;
341 }
342
343 let file_name = file_path.file_name().unwrap_or_default().to_string_lossy().to_string();
344
345 let name_to_search = if case_sensitive { file_name.clone() } else { file_name.to_lowercase() };
346
347 if name_to_search.contains(&search_query) {
348 if MatchesFilters(file_path, metadata, path_filter, language_filter) {
349 results.push(SearchResult {
351 path:file_path.to_string_lossy().to_string(),
352 file_name,
353 matches:Vec::new(),
354 relevance:0.3,
355 language:metadata.language.clone(),
356 });
357 }
358 }
359 }
360}
361
362async fn QueryIndexRegex(
364 regex:&Regex,
365 path_filter:Option<&str>,
366 language_filter:Option<&str>,
367 index:&FileIndex,
368 results:&mut Vec<SearchResult>,
369) {
370 for (file_path, metadata) in &index.files {
371 if results.len() >= 1000 {
372 break;
373 }
374
375 if !MatchesFilters(file_path, metadata, path_filter, language_filter) {
376 continue;
377 }
378
379 if let Ok(content) = tokio::fs::read_to_string(file_path).await {
380 let matches = FindRegexMatches(&content, regex);
381
382 if !matches.is_empty() {
383 let relevance = CalculateRelevance(&matches, metadata);
384
385 results.push(SearchResult {
386 path:file_path.to_string_lossy().to_string(),
387 file_name:file_path.file_name().unwrap_or_default().to_string_lossy().to_string(),
388 matches,
389 relevance,
390 language:metadata.language.clone(),
391 });
392 }
393 }
394 }
395}
396
397async fn QueryIndexFuzzy(
399 query:&str,
400 case_sensitive:bool,
401 path_filter:Option<&str>,
402 language_filter:Option<&str>,
403 index:&FileIndex,
404 results:&mut Vec<SearchResult>,
405) {
406 let query_lower = query.to_lowercase();
407
408 for (file_path, metadata) in &index.files {
409 if results.len() >= 1000 {
410 break;
411 }
412
413 if !MatchesFilters(file_path, metadata, path_filter, language_filter) {
414 continue;
415 }
416
417 if let Ok(content) = tokio::fs::read_to_string(file_path).await {
418 const MAX_FUZZY_DISTANCE:usize = 2;
419 let matches = FindFuzzyMatches(&content, &query_lower, case_sensitive, MAX_FUZZY_DISTANCE);
420
421 if !matches.is_empty() {
422 let relevance = CalculateRelevance(&matches, metadata) * 0.8; results.push(SearchResult {
425 path:file_path.to_string_lossy().to_string(),
426 file_name:file_path.file_name().unwrap_or_default().to_string_lossy().to_string(),
427 matches,
428 relevance,
429 language:metadata.language.clone(),
430 });
431 }
432 }
433 }
434}
435
436async fn QueryIndexExact(
438 query:&str,
439 _case_sensitive:bool,
440 path_filter:Option<&str>,
441 language_filter:Option<&str>,
442 index:&FileIndex,
443 results:&mut Vec<SearchResult>,
444) {
445 for (file_path, metadata) in &index.files {
446 if results.len() >= 1000 {
447 break;
448 }
449
450 if !MatchesFilters(file_path, metadata, path_filter, language_filter) {
451 continue;
452 }
453
454 if let Ok(content) = tokio::fs::read_to_string(file_path).await {
455 let matches = FindExactMatches(&content, query);
456
457 if !matches.is_empty() {
458 let relevance = CalculateRelevance(&matches, metadata) * 1.1; results.push(SearchResult {
461 path:file_path.to_string_lossy().to_string(),
462 file_name:file_path.file_name().unwrap_or_default().to_string_lossy().to_string(),
463 matches,
464 relevance,
465 language:metadata.language.clone(),
466 });
467 }
468 }
469 }
470}
471
472async fn FindMatchesInFile(
474 file_path:&PathBuf,
475 query:&str,
476 case_sensitive:bool,
477 whole_word:bool,
478 index:&FileIndex,
479) -> Result<SearchResult> {
480 let content = tokio::fs::read_to_string(file_path)
481 .await
482 .map_err(|e| AirError::FileSystem(format!("Failed to read file: {}", e)))?;
483
484 let metadata = index
485 .files
486 .get(file_path)
487 .ok_or_else(|| AirError::Internal("File metadata not found in index".to_string()))?;
488
489 let matches = FindMatchesWithContext(&content, query, case_sensitive, whole_word);
490 let relevance = CalculateRelevance(&matches, metadata);
491
492 Ok(SearchResult {
493 path:file_path.to_string_lossy().to_string(),
494 file_name:file_path.file_name().unwrap_or_default().to_string_lossy().to_string(),
495 matches,
496 relevance,
497 language:metadata.language.clone(),
498 })
499}
500
501fn FindMatchesWithContext(content:&str, query:&str, case_sensitive:bool, whole_word:bool) -> Vec<SearchMatch> {
503 let mut matches = Vec::new();
504 let lines:Vec<&str> = content.lines().collect();
505
506 let search_in = |line:&str| -> Option<(usize, usize)> {
507 let line_to_search = if case_sensitive { line.to_string() } else { line.to_lowercase() };
508
509 let query_to_find = if case_sensitive { query.to_string() } else { query.to_lowercase() };
510
511 let start = if whole_word {
512 FindWholeWordMatch(&line_to_search, &query_to_find)
513 } else {
514 line_to_search.find(&query_to_find)
515 };
516
517 start.map(|s| (s, s + query.len()))
518 };
519
520 for (line_idx, line) in lines.iter().enumerate() {
521 let line_number = line_idx as u32 + 1;
522
523 if let Some((match_start, match_end)) = search_in(line) {
524 let context_start = line_idx.saturating_sub(2);
526 let context_end = (line_idx + 3).min(lines.len());
527
528 let context_before = lines[context_start..line_idx].iter().map(|s| s.to_string()).collect();
529
530 let context_after = lines[line_idx + 1..context_end].iter().map(|s| s.to_string()).collect();
531
532 matches.push(SearchMatch {
533 line_number,
534 line_content:line.to_string(),
535 match_start,
536 match_end,
537 context_before,
538 context_after,
539 });
540 }
541 }
542
543 matches
544}
545
546fn FindWholeWordMatch(line:&str, word:&str) -> Option<usize> {
548 let mut start = 0;
549
550 while let Some(pos) = line[start..].find(word) {
551 let actual_pos = start + pos;
552
553 let valid_before = actual_pos == 0
555 || line
556 .chars()
557 .nth(actual_pos - 1)
558 .map_or(true, |c| !c.is_alphanumeric() && c != '_');
559
560 let match_end = actual_pos + word.len();
562 let valid_after =
563 match_end == line.len() || line.chars().nth(match_end).map_or(true, |c| !c.is_alphanumeric() && c != '_');
564
565 if valid_before && valid_after {
566 return Some(actual_pos);
567 }
568
569 start = actual_pos + 1;
570 }
571
572 None
573}
574
575fn FindRegexMatches(content:&str, regex:&Regex) -> Vec<SearchMatch> {
577 let mut matches = Vec::new();
578 let lines:Vec<&str> = content.lines().collect();
579
580 for (line_idx, line) in lines.iter().enumerate() {
581 let line_number = line_idx as u32 + 1;
582
583 for mat in regex.find_iter(line) {
584 matches.push(SearchMatch {
585 line_number,
586 line_content:line.to_string(),
587 match_start:mat.start(),
588 match_end:mat.end(),
589 context_before:Vec::new(),
590 context_after:Vec::new(),
591 });
592 }
593 }
594
595 matches
596}
597
598fn FindFuzzyMatches(content:&str, query:&str, case_sensitive:bool, max_distance:usize) -> Vec<SearchMatch> {
600 let mut matches = Vec::new();
601 let lines:Vec<&str> = content.lines().collect();
602
603 for (line_idx, line) in lines.iter().enumerate() {
604 let line_number = line_idx as u32 + 1;
605 let line_to_search = if case_sensitive { line.to_string() } else { line.to_lowercase() };
606
607 if let Some(pos) = line_to_search.find(query) {
609 let distance = CalculateLevenshteinDistance(&line_to_search[pos..pos.saturating_add(query.len())], query);
611
612 if distance <= max_distance {
613 matches.push(SearchMatch {
614 line_number,
615 line_content:line.to_string(),
616 match_start:pos,
617 match_end:pos + query.len(),
618 context_before:Vec::new(),
619 context_after:Vec::new(),
620 });
621 }
622 }
623 }
624
625 matches
626}
627
628fn FindExactMatches(content:&str, query:&str) -> Vec<SearchMatch> { FindMatchesWithContext(content, query, true, true) }
630
631fn CalculateLevenshteinDistance(s1:&str, s2:&str) -> usize {
633 let s1_chars:Vec<char> = s1.chars().collect();
634 let s2_chars:Vec<char> = s2.chars().collect();
635 let len1 = s1_chars.len();
636 let len2 = s2_chars.len();
637
638 let mut dp = vec![vec![0usize; len2 + 1]; len1 + 1];
640
641 for i in 0..=len1 {
643 dp[i][0] = i;
644 }
645 for j in 0..=len2 {
646 dp[0][j] = j;
647 }
648
649 for i in 1..=len1 {
651 for j in 1..=len2 {
652 if s1_chars[i - 1] == s2_chars[j - 1] {
653 dp[i][j] = dp[i - 1][j - 1];
654 } else {
655 dp[i][j] = 1 + [
656 dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1], ]
660 .into_iter()
661 .min()
662 .unwrap();
663 }
664 }
665 }
666
667 dp[len1][len2]
668}
669
670fn CalculateRelevance(matches:&[SearchMatch], metadata:&FileMetadata) -> f64 {
672 let match_count = matches.len();
673 let line_count = metadata.line_count.unwrap_or(1) as f64;
674
675 let mut relevance = (match_count as f64 / line_count) * 10.0;
677
678 if match_count > 0 {
680 relevance += (match_count as f64).log10() * 0.5;
681 }
682
683 let days_old = (chrono::Utc::now() - metadata.modified).num_days() as f64;
685 relevance += 1.0 / (days_old + 1.0).max(1.0);
686
687 relevance.min(10.0).max(0.0)
688}
689
690pub fn MatchesFilters(
692 file_path:&PathBuf,
693 metadata:&FileMetadata,
694 path_filter:Option<&str>,
695 language_filter:Option<&str>,
696) -> bool {
697 if let Some(search_path) = path_filter {
699 if !file_path.to_string_lossy().contains(search_path) {
700 return false;
701 }
702 }
703
704 if let Some(lang) = language_filter {
706 if metadata.language.as_deref() != Some(lang) {
707 return false;
708 }
709 }
710
711 true
712}