1pub mod State;
67pub mod Scan;
68pub mod Process;
69pub mod Language;
70pub mod Store;
71pub mod Watch;
72pub mod Background;
73
74use std::{collections::HashMap, path::PathBuf, sync::Arc};
76
77use tokio::sync::{Mutex, RwLock};
78
79use crate::{
80 AirError,
81 ApplicationState::ApplicationState,
82 Configuration::ConfigurationManager,
83 Indexing::{
84 Scan::{
85 ScanDirectory::{ScanAndRemoveDeleted, ScanDirectoriesParallel},
86 ScanFile::IndexFileInternal,
87 },
88 State::UpdateState::{UpdateIndexMetadata, ValidateIndexConsistency},
89 Store::{
90 QueryIndex::{PaginatedSearchResults, QueryIndexSearch, SearchQuery},
91 StoreEntry::{BackupCorruptedIndex, EnsureIndexDirectory, LoadOrCreateIndex, SaveIndex},
92 UpdateIndex::UpdateFileContent,
93 },
94 },
95 Result,
96 dev_log,
97};
98use crate::Indexing::State::CreateState::{CreateNewIndex, FileIndex, FileMetadata, SymbolInfo, SymbolLocation};
100
101const MAX_PARALLEL_INDEXING:usize = 10;
103
104#[derive(Debug, Clone)]
106pub struct IndexResult {
107 pub files_indexed:u32,
109 pub total_size:u64,
111 pub duration_seconds:f64,
113 pub symbols_extracted:u32,
115 pub files_with_errors:u32,
117}
118
119#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
121pub struct IndexStatistics {
122 pub file_count:u32,
123 pub total_size:u64,
124 pub total_symbols:u32,
125 pub language_counts:HashMap<String, u32>,
126 pub last_updated:chrono::DateTime<chrono::Utc>,
127 pub index_version:String,
128}
129
130pub struct FileIndexer {
140 AppState:Arc<ApplicationState>,
142
143 file_index:Arc<RwLock<FileIndex>>,
145
146 index_directory:PathBuf,
148
149 file_watcher:Arc<Mutex<Option<notify::RecommendedWatcher>>>,
151
152 indexing_semaphore:Arc<tokio::sync::Semaphore>,
154
155 corruption_detected:Arc<Mutex<bool>>,
157}
158
159impl FileIndexer {
160 pub async fn new(AppState:Arc<ApplicationState>) -> Result<Self> {
168 let config = &AppState.Configuration.Indexing;
169
170 let index_directory = Self::ValidateAndExpandPath(&config.IndexDirectory)?;
172
173 EnsureIndexDirectory(&index_directory).await?;
175
176 let file_index = LoadOrCreateIndex(&index_directory).await?;
178
179 let indexer = Self {
180 AppState:AppState.clone(),
181 file_index:Arc::new(RwLock::new(file_index)),
182 index_directory:index_directory.clone(),
183 file_watcher:Arc::new(Mutex::new(None)),
184 indexing_semaphore:Arc::new(tokio::sync::Semaphore::new(MAX_PARALLEL_INDEXING)),
185 corruption_detected:Arc::new(Mutex::new(false)),
186 };
187
188 indexer.VerifyIndexIntegrity().await?;
190
191 indexer
193 .AppState
194 .UpdateServiceStatus("indexing", crate::ApplicationState::ServiceStatus::Running)
195 .await
196 .map_err(|e| AirError::Internal(e.to_string()))?;
197
198 dev_log!(
199 "indexing",
200 "[FileIndexer] Initialized with index directory: {}",
201 index_directory.display()
202 );
203 Ok(indexer)
204 }
205
206 fn ValidateAndExpandPath(path:&str) -> Result<PathBuf> {
208 let expanded = ConfigurationManager::ExpandPath(path)?;
209
210 let path_str = expanded.to_string_lossy();
212 if path_str.contains("..") {
213 return Err(AirError::FileSystem("Path contains invalid traversal sequence".to_string()));
214 }
215
216 Ok(expanded)
217 }
218
219 async fn VerifyIndexIntegrity(&self) -> Result<()> {
221 let index = self.file_index.read().await;
222
223 ValidateIndexConsistency(&index)?;
225
226 let mut missing_files = 0;
228 for file_path in index.files.keys() {
229 if !file_path.exists() {
230 missing_files += 1;
231 }
232 }
233
234 if missing_files > 0 {
235 dev_log!("indexing", "warn: [FileIndexer] Found {} missing files in index", missing_files);
236 }
237
238 dev_log!("indexing", "[FileIndexer] Index integrity verified successfully");
239 Ok(())
240 }
241
242 pub async fn IndexDirectory(&self, path:String, patterns:Vec<String>) -> Result<IndexResult> {
244 let start_time = std::time::Instant::now();
245
246 dev_log!("indexing", "[FileIndexer] Starting directory index: {}", path);
247 let config = &self.AppState.Configuration.Indexing;
248
249 let (files_to_index, _scan_result) =
251 ScanDirectoriesParallel(vec![path.clone()], patterns.clone(), config, MAX_PARALLEL_INDEXING).await?;
252
253 let _index_arc = self.file_index.clone();
256 let semaphore = self.indexing_semaphore.clone();
257 let config_clone = config.clone();
258 let mut index_tasks = Vec::new();
259
260 for file_path in files_to_index {
261 let permit = semaphore.clone().acquire_owned().await.unwrap();
262 let config_for_task = config_clone.clone();
263
264 let task = tokio::spawn(async move {
265 let _permit = permit;
266 IndexFileInternal(&file_path, &config_for_task, &[]).await
267 });
268
269 index_tasks.push(task);
270 }
271
272 let mut index = self.file_index.write().await;
274 let mut indexed_paths = std::collections::HashSet::new();
275 let mut files_indexed = 0u32;
276 let mut total_size = 0u64;
277 let mut symbols_extracted = 0u32;
278 let mut files_with_errors = 0u32;
279
280 for task in index_tasks {
281 match task.await {
282 Ok(Ok((metadata, symbols))) => {
283 let file_path = metadata.path.clone();
284
285 index.files.insert(file_path.clone(), metadata.clone());
286 indexed_paths.insert(file_path.clone());
287
288 if let Err(e) = UpdateFileContent(&mut index, &file_path, &metadata).await {
290 dev_log!(
291 "indexing",
292 "warn: [FileIndexer] Failed to index content for {}: {}",
293 file_path.display(),
294 e
295 );
296 }
297
298 index.file_symbols.insert(file_path.clone(), symbols.clone());
300 symbols_extracted += symbols.len() as u32;
301
302 for symbol in symbols {
304 index
305 .symbol_index
306 .entry(symbol.name.clone())
307 .or_insert_with(Vec::new)
308 .push(SymbolLocation { file_path:file_path.clone(), line:symbol.line, symbol });
309 }
310
311 files_indexed += 1;
312 total_size += metadata.size;
313 },
314 Ok(Err(_)) => {
315 files_with_errors += 1;
316 },
317 Err(e) => {
318 dev_log!("indexing", "error: [FileIndexer] Indexing task failed: {}", e);
319 files_with_errors += 1;
320 },
321 }
322 }
323
324 ScanAndRemoveDeleted(&mut index, &Self::ValidateAndExpandPath(&path)?).await?;
326
327 UpdateIndexMetadata(&mut index)?;
329
330 SaveIndex(&self.index_directory, &index).await?;
332
333 let duration = start_time.elapsed().as_secs_f64();
334
335 dev_log!(
336 "indexing",
337 "[FileIndexer] Indexing completed: {} files, {} bytes, {} symbols, {} errors in {:.2}s",
338 files_indexed,
339 total_size,
340 symbols_extracted,
341 files_with_errors,
342 duration
343 );
344
345 Ok(IndexResult {
346 files_indexed,
347 total_size,
348 duration_seconds:duration,
349 symbols_extracted,
350 files_with_errors,
351 })
352 }
353
354 pub async fn SearchFiles(
356 &self,
357 query:SearchQuery,
358 path:Option<String>,
359 language:Option<String>,
360 ) -> Result<PaginatedSearchResults> {
361 let index = self.file_index.read().await;
362 QueryIndexSearch(&index, query, path, language).await
363 }
364
365 pub async fn SearchSymbols(&self, query:&str, max_results:u32) -> Result<Vec<SymbolInfo>> {
367 let index = self.file_index.read().await;
368 let query_lower = query.to_lowercase();
369 let mut results = Vec::new();
370
371 for (symbol_name, locations) in &index.symbol_index {
372 if symbol_name.to_lowercase().contains(&query_lower) {
373 for loc in locations.iter().take(max_results as usize) {
374 results.push(loc.symbol.clone());
375 if results.len() >= max_results as usize {
376 break;
377 }
378 }
379 }
380 }
381
382 Ok(results)
383 }
384
385 pub async fn GetFileSymbols(&self, file_path:&PathBuf) -> Result<Vec<SymbolInfo>> {
387 let index = self.file_index.read().await;
388 Ok(index.file_symbols.get(file_path).cloned().unwrap_or_default())
389 }
390
391 pub async fn GetFileInfo(&self, path:String) -> Result<Option<FileMetadata>> {
393 let file_path = Self::ValidateAndExpandPath(&path)?;
394 let index = self.file_index.read().await;
395
396 Ok(index.files.get(&file_path).cloned())
397 }
398
399 pub async fn GetIndexStatistics(&self) -> Result<IndexStatistics> {
401 let index = self.file_index.read().await;
402
403 let mut language_counts:HashMap<String, u32> = HashMap::new();
404 let total_size = index.files.values().map(|m| m.size).sum();
405 let total_symbols = index.files.values().map(|m| m.symbol_count).sum();
406
407 for metadata in index.files.values() {
408 if let Some(lang) = &metadata.language {
409 *language_counts.entry(lang.clone()).or_insert(0) += 1;
410 }
411 }
412
413 Ok(IndexStatistics {
414 file_count:index.files.len() as u32,
415 total_size,
416 total_symbols,
417 language_counts,
418 last_updated:index.last_updated,
419 index_version:index.index_version.clone(),
420 })
421 }
422
423 pub async fn recover_from_corruption(&self) -> Result<()> {
425 dev_log!("indexing", "[FileIndexer] Recovering from corrupted index...");
426 BackupCorruptedIndex(&self.index_directory).await?;
428
429 let new_index = CreateNewIndex();
431 *self.file_index.write().await = new_index;
432
433 *self.corruption_detected.lock().await = false;
435
436 dev_log!("indexing", "[FileIndexer] Index recovery completed");
437 Ok(())
438 }
439}
440
441impl Clone for FileIndexer {
442 fn clone(&self) -> Self {
443 Self {
444 AppState:self.AppState.clone(),
445 file_index:self.file_index.clone(),
446 index_directory:self.index_directory.clone(),
447 file_watcher:self.file_watcher.clone(),
448 indexing_semaphore:self.indexing_semaphore.clone(),
449 corruption_detected:self.corruption_detected.clone(),
450 }
451 }
452}