Skip to main content

AirLibrary/Indexing/
mod.rs

1//! # File Indexing and Search Service
2//!
3//! ## File: Indexing/mod.rs
4//!
5//! ## Role in Air Architecture
6//!
7//! Provides comprehensive file indexing, search, and content analysis
8//! capabilities for the Land ecosystem, inspired by and compatible with
9//! Visual Studio Code's search service.
10//!
11//! ## Primary Responsibility
12//!
13//! Facade module for the Indexing service, exposing the public API for
14//! file indexing, search, and symbol extraction operations.
15//!
16//! ## Secondary Responsibilities
17//!
18//! - Re-export public types from submodule
19//! - Provide unified FileIndexer API
20//! - Coordinate between indexing subsystems
21//!
22//! ## Dependencies
23//!
24//! **External Crates:**
25//! - `regex` - Regular expression search patterns
26//! - `serde` - Serialization for index storage
27//! - `tokio` - Async runtime for all operations
28//! - `notify` - File system watching
29//! - `chrono` - Timestamp management
30//!
31//! **Internal Modules:**
32//! - `crate::Result` - Error handling type
33//! - `crate::AirError` - Error types
34//! - `crate::ApplicationState::ApplicationState` - Application state
35//! - `crate::Configuration::ConfigurationManager` - Configuration management
36//!
37//! ## Dependents
38//!
39//! - `Indexing::FileIndexer` - Main indexer implementation
40//! - `Vine::Server::AirVinegRPCService` - gRPC integration
41//!
42//! ## VSCode Integration
43//!
44//! This service integrates with VSCode's search and file service architecture:
45//!
46//! - References: vs/workbench/services/search
47//! - File Service: vs/workbench/services/files
48//!
49//! The indexing system supports VSCode features:
50//! - **Outline View**: Symbol extraction for class/function navigation
51//! - **Go to Symbol**: Cross-file symbol search and lookup
52//! - **Search Integration**: File content and name search with regex support
53//! - **Workspace Search**: Multi-workspace index sharing
54//!
55//! ## FUTURE Enhancements
56//!
57//! - [ ] Implement full ripgrep integration for ultra-fast text search
58//! - [ ] Add project-level search with workspace awareness
59//! - [ ] Implement search query caching
60//! - [ ] Add fuzzy search with typos tolerance
61//! - [ ] Implement search history and recent queries
62//! - [ ] Add search result preview with context
63//! - [ ] Implement parallel indexing for large directories
64
65// Modules - file-based (no inline definitions)
66pub mod State;
67pub mod Scan;
68pub mod Process;
69pub mod Language;
70pub mod Store;
71pub mod Watch;
72pub mod Background;
73
74// Import types and functions needed for the FileIndexer implementation
75use std::{collections::HashMap, path::PathBuf, sync::Arc};
76
77use tokio::sync::{Mutex, RwLock};
78
79use crate::{
80	AirError,
81	ApplicationState::ApplicationState,
82	Configuration::ConfigurationManager,
83	Indexing::{
84		Scan::{
85			ScanDirectory::{ScanAndRemoveDeleted, ScanDirectoriesParallel},
86			ScanFile::IndexFileInternal,
87		},
88		State::UpdateState::{UpdateIndexMetadata, ValidateIndexConsistency},
89		Store::{
90			QueryIndex::{PaginatedSearchResults, QueryIndexSearch, SearchQuery},
91			StoreEntry::{BackupCorruptedIndex, EnsureIndexDirectory, LoadOrCreateIndex, SaveIndex},
92			UpdateIndex::UpdateFileContent,
93		},
94	},
95	Result,
96	dev_log,
97};
98// Import types from submodules with explicit full paths
99use crate::Indexing::State::CreateState::{CreateNewIndex, FileIndex, FileMetadata, SymbolInfo, SymbolLocation};
100
101/// Maximum number of parallel indexing operations
102const MAX_PARALLEL_INDEXING:usize = 10;
103
104/// Indexing result with statistics
105#[derive(Debug, Clone)]
106pub struct IndexResult {
107	/// Number of files successfully indexed
108	pub files_indexed:u32,
109	/// Total size of indexed files in bytes
110	pub total_size:u64,
111	/// Time taken in seconds
112	pub duration_seconds:f64,
113	/// Number of symbols extracted
114	pub symbols_extracted:u32,
115	/// Number of files with errors
116	pub files_with_errors:u32,
117}
118
119/// Index statistics
120#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
121pub struct IndexStatistics {
122	pub file_count:u32,
123	pub total_size:u64,
124	pub total_symbols:u32,
125	pub language_counts:HashMap<String, u32>,
126	pub last_updated:chrono::DateTime<chrono::Utc>,
127	pub index_version:String,
128}
129
130/// File indexer implementation with comprehensive search capabilities
131///
132/// This indexer provides:
133/// - Incremental file watching with real-time updates
134/// - Multi-mode search (literal, regex, fuzzy)
135/// - Symbol extraction for VSCode Outline View
136/// - Language detection for syntax highlighting
137/// - Index corruption detection and recovery
138/// - Parallel indexing with resource limits
139pub struct FileIndexer {
140	/// Application state
141	AppState:Arc<ApplicationState>,
142
143	/// File index with metadata and symbols
144	file_index:Arc<RwLock<FileIndex>>,
145
146	/// Index storage directory
147	index_directory:PathBuf,
148
149	/// File watcher for incremental updates
150	file_watcher:Arc<Mutex<Option<notify::RecommendedWatcher>>>,
151
152	/// Semaphore for limiting parallel indexing operations
153	indexing_semaphore:Arc<tokio::sync::Semaphore>,
154
155	/// Index corruption detection state
156	corruption_detected:Arc<Mutex<bool>>,
157}
158
159impl FileIndexer {
160	/// Create a new file indexer with comprehensive setup
161	///
162	/// Initializes the indexer with:
163	/// - Index directory creation
164	/// - Existing index loading or fresh creation
165	/// - Index corruption detection
166	/// - Service status initialization
167	pub async fn new(AppState:Arc<ApplicationState>) -> Result<Self> {
168		let config = &AppState.Configuration.Indexing;
169
170		// Expand index directory path with validation
171		let index_directory = Self::ValidateAndExpandPath(&config.IndexDirectory)?;
172
173		// Create index directory if it doesn't exist with error handling
174		EnsureIndexDirectory(&index_directory).await?;
175
176		// Load or create index with corruption detection
177		let file_index = LoadOrCreateIndex(&index_directory).await?;
178
179		let indexer = Self {
180			AppState:AppState.clone(),
181			file_index:Arc::new(RwLock::new(file_index)),
182			index_directory:index_directory.clone(),
183			file_watcher:Arc::new(Mutex::new(None)),
184			indexing_semaphore:Arc::new(tokio::sync::Semaphore::new(MAX_PARALLEL_INDEXING)),
185			corruption_detected:Arc::new(Mutex::new(false)),
186		};
187
188		// Verify index integrity
189		indexer.VerifyIndexIntegrity().await?;
190
191		// Initialize service status
192		indexer
193			.AppState
194			.UpdateServiceStatus("indexing", crate::ApplicationState::ServiceStatus::Running)
195			.await
196			.map_err(|e| AirError::Internal(e.to_string()))?;
197
198		dev_log!(
199			"indexing",
200			"[FileIndexer] Initialized with index directory: {}",
201			index_directory.display()
202		);
203		Ok(indexer)
204	}
205
206	/// Validate and expand path with traversal protection
207	fn ValidateAndExpandPath(path:&str) -> Result<PathBuf> {
208		let expanded = ConfigurationManager::ExpandPath(path)?;
209
210		// Prevent path traversal attacks
211		let path_str = expanded.to_string_lossy();
212		if path_str.contains("..") {
213			return Err(AirError::FileSystem("Path contains invalid traversal sequence".to_string()));
214		}
215
216		Ok(expanded)
217	}
218
219	/// Verify index integrity and detect corruption
220	async fn VerifyIndexIntegrity(&self) -> Result<()> {
221		let index = self.file_index.read().await;
222
223		// Check consistency
224		ValidateIndexConsistency(&index)?;
225
226		// Verify all indexed files exist
227		let mut missing_files = 0;
228		for file_path in index.files.keys() {
229			if !file_path.exists() {
230				missing_files += 1;
231			}
232		}
233
234		if missing_files > 0 {
235			dev_log!("indexing", "warn: [FileIndexer] Found {} missing files in index", missing_files);
236		}
237
238		dev_log!("indexing", "[FileIndexer] Index integrity verified successfully");
239		Ok(())
240	}
241
242	/// Index a directory with comprehensive validation and parallel processing
243	pub async fn IndexDirectory(&self, path:String, patterns:Vec<String>) -> Result<IndexResult> {
244		let start_time = std::time::Instant::now();
245
246		dev_log!("indexing", "[FileIndexer] Starting directory index: {}", path);
247		let config = &self.AppState.Configuration.Indexing;
248
249		// Scan directory
250		let (files_to_index, _scan_result) =
251			ScanDirectoriesParallel(vec![path.clone()], patterns.clone(), config, MAX_PARALLEL_INDEXING).await?;
252
253		// Index files in parallel
254		// Variables cloned for use in async task
255		let _index_arc = self.file_index.clone();
256		let semaphore = self.indexing_semaphore.clone();
257		let config_clone = config.clone();
258		let mut index_tasks = Vec::new();
259
260		for file_path in files_to_index {
261			let permit = semaphore.clone().acquire_owned().await.unwrap();
262			let config_for_task = config_clone.clone();
263
264			let task = tokio::spawn(async move {
265				let _permit = permit;
266				IndexFileInternal(&file_path, &config_for_task, &[]).await
267			});
268
269			index_tasks.push(task);
270		}
271
272		// Collect results
273		let mut index = self.file_index.write().await;
274		let mut indexed_paths = std::collections::HashSet::new();
275		let mut files_indexed = 0u32;
276		let mut total_size = 0u64;
277		let mut symbols_extracted = 0u32;
278		let mut files_with_errors = 0u32;
279
280		for task in index_tasks {
281			match task.await {
282				Ok(Ok((metadata, symbols))) => {
283					let file_path = metadata.path.clone();
284
285					index.files.insert(file_path.clone(), metadata.clone());
286					indexed_paths.insert(file_path.clone());
287
288					// Index content for search
289					if let Err(e) = UpdateFileContent(&mut index, &file_path, &metadata).await {
290						dev_log!(
291							"indexing",
292							"warn: [FileIndexer] Failed to index content for {}: {}",
293							file_path.display(),
294							e
295						);
296					}
297
298					// Index symbols
299					index.file_symbols.insert(file_path.clone(), symbols.clone());
300					symbols_extracted += symbols.len() as u32;
301
302					// Update symbol index
303					for symbol in symbols {
304						index
305							.symbol_index
306							.entry(symbol.name.clone())
307							.or_insert_with(Vec::new)
308							.push(SymbolLocation { file_path:file_path.clone(), line:symbol.line, symbol });
309					}
310
311					files_indexed += 1;
312					total_size += metadata.size;
313				},
314				Ok(Err(_)) => {
315					files_with_errors += 1;
316				},
317				Err(e) => {
318					dev_log!("indexing", "error: [FileIndexer] Indexing task failed: {}", e);
319					files_with_errors += 1;
320				},
321			}
322		}
323
324		// Remove files that were indexed before but no longer exist
325		ScanAndRemoveDeleted(&mut index, &Self::ValidateAndExpandPath(&path)?).await?;
326
327		// Update index metadata
328		UpdateIndexMetadata(&mut index)?;
329
330		// Save index to disk
331		SaveIndex(&self.index_directory, &index).await?;
332
333		let duration = start_time.elapsed().as_secs_f64();
334
335		dev_log!(
336			"indexing",
337			"[FileIndexer] Indexing completed: {} files, {} bytes, {} symbols, {} errors in {:.2}s",
338			files_indexed,
339			total_size,
340			symbols_extracted,
341			files_with_errors,
342			duration
343		);
344
345		Ok(IndexResult {
346			files_indexed,
347			total_size,
348			duration_seconds:duration,
349			symbols_extracted,
350			files_with_errors,
351		})
352	}
353
354	/// Search files with multiple modes
355	pub async fn SearchFiles(
356		&self,
357		query:SearchQuery,
358		path:Option<String>,
359		language:Option<String>,
360	) -> Result<PaginatedSearchResults> {
361		let index = self.file_index.read().await;
362		QueryIndexSearch(&index, query, path, language).await
363	}
364
365	/// Search symbols across all files (for VSCode Go to Symbol)
366	pub async fn SearchSymbols(&self, query:&str, max_results:u32) -> Result<Vec<SymbolInfo>> {
367		let index = self.file_index.read().await;
368		let query_lower = query.to_lowercase();
369		let mut results = Vec::new();
370
371		for (symbol_name, locations) in &index.symbol_index {
372			if symbol_name.to_lowercase().contains(&query_lower) {
373				for loc in locations.iter().take(max_results as usize) {
374					results.push(loc.symbol.clone());
375					if results.len() >= max_results as usize {
376						break;
377					}
378				}
379			}
380		}
381
382		Ok(results)
383	}
384
385	/// Get symbols for a specific file (for VSCode Outline View)
386	pub async fn GetFileSymbols(&self, file_path:&PathBuf) -> Result<Vec<SymbolInfo>> {
387		let index = self.file_index.read().await;
388		Ok(index.file_symbols.get(file_path).cloned().unwrap_or_default())
389	}
390
391	/// Get file information
392	pub async fn GetFileInfo(&self, path:String) -> Result<Option<FileMetadata>> {
393		let file_path = Self::ValidateAndExpandPath(&path)?;
394		let index = self.file_index.read().await;
395
396		Ok(index.files.get(&file_path).cloned())
397	}
398
399	/// Get index statistics
400	pub async fn GetIndexStatistics(&self) -> Result<IndexStatistics> {
401		let index = self.file_index.read().await;
402
403		let mut language_counts:HashMap<String, u32> = HashMap::new();
404		let total_size = index.files.values().map(|m| m.size).sum();
405		let total_symbols = index.files.values().map(|m| m.symbol_count).sum();
406
407		for metadata in index.files.values() {
408			if let Some(lang) = &metadata.language {
409				*language_counts.entry(lang.clone()).or_insert(0) += 1;
410			}
411		}
412
413		Ok(IndexStatistics {
414			file_count:index.files.len() as u32,
415			total_size,
416			total_symbols,
417			language_counts,
418			last_updated:index.last_updated,
419			index_version:index.index_version.clone(),
420		})
421	}
422
423	/// Recover corrupted index
424	pub async fn recover_from_corruption(&self) -> Result<()> {
425		dev_log!("indexing", "[FileIndexer] Recovering from corrupted index...");
426		// Backup corrupted index
427		BackupCorruptedIndex(&self.index_directory).await?;
428
429		// Create new index
430		let new_index = CreateNewIndex();
431		*self.file_index.write().await = new_index;
432
433		// Clear corruption flag
434		*self.corruption_detected.lock().await = false;
435
436		dev_log!("indexing", "[FileIndexer] Index recovery completed");
437		Ok(())
438	}
439}
440
441impl Clone for FileIndexer {
442	fn clone(&self) -> Self {
443		Self {
444			AppState:self.AppState.clone(),
445			file_index:self.file_index.clone(),
446			index_directory:self.index_directory.clone(),
447			file_watcher:self.file_watcher.clone(),
448			indexing_semaphore:self.indexing_semaphore.clone(),
449			corruption_detected:self.corruption_detected.clone(),
450		}
451	}
452}