Skip to main content

AirLibrary/Indexing/Watch/
WatchFile.rs

1//! # WatchFile
2//!
3//! ## File: Indexing/Watch/WatchFile.rs
4//!
5//! ## Role in Air Architecture
6//!
7//! Provides file watching functionality for the File Indexer service,
8//! handling file system events for incremental index updates.
9//!
10//! ## Primary Responsibility
11//!
12//! Handle file system change events and trigger index updates for
13//! created, modified, and deleted files.
14//!
15//! ## Secondary Responsibilities
16//!
17//! - File creation event handling
18//! - File modification event handling
19//! - File deletion event handling
20//! - Directory change event handling
21//! - Event debouncing for rapid changes
22//!
23//! ## Dependencies
24//!
25//! **External Crates:**
26//! - `notify` - File system watching
27//! - `tokio` - Async runtime for event handling
28//!
29//! **Internal Modules:**
30//! - `crate::Result` - Error handling type
31//! - `crate::AirError` - Error types
32//! - `super::super::FileIndex` - Index structure definitions
33//! - `super::super::Store::UpdateIndex` - Index update operations
34//!
35//! ## Dependents
36//!
37//! - `Indexing::Background::StartWatcher` - Watcher setup and management
38//! - `Indexing::mod::FileIndexer` - Main file indexer implementation
39//!
40//! ## VSCode Pattern Reference
41//!
42//! Inspired by VSCode's file watching in
43//! `src/vs/base/node/watcher/`
44//!
45//! ## Security Considerations
46//!
47//! - Path validation before watching
48//! - Symbolic link following disabled
49//! - Permission checking on watch paths
50//!
51//! ## Performance Considerations
52//!
53//! - Event debouncing prevents excessive updates
54//! - Batch processing of multiple events
55//! - Efficient event filtering
56//!
57//! ## Error Handling Strategy
58//!
59//! Event operations log warnings for individual errors and continue,
60//! ensuring a single event failure doesn't stop the watcher.
61//!
62//! ## Thread Safety
63//!
64//! Event handlers acquire write locks on shared state and process
65//! events asynchronously to avoid blocking the watcher loop.
66
67use std::path::PathBuf;
68
69use tokio::sync::{Mutex, RwLock};
70
71use crate::{AirError, Configuration::IndexingConfig, Indexing::State::CreateState::FileIndex, Result, dev_log};
72
73/// Handle file watcher event for incremental indexing
74///
75/// This function processes file system events and updates the index
76/// accordingly:
77/// - File Created: Index the new file
78/// - File Modified: Re-index the modified file
79/// - File Removed: Remove from index
80pub async fn HandleFileEvent(event:notify::Event, index_arc:&RwLock<FileIndex>, config:&IndexingConfig) -> Result<()> {
81	match event.kind {
82		notify::EventKind::Create(notify::event::CreateKind::File) => {
83			for path in event.paths {
84				dev_log!("indexing", "[WatchFile] File created: {}", path.display());
85				let mut index = index_arc.write().await;
86				if let Err(e) = crate::Indexing::Store::UpdateIndex::UpdateSingleFile(&mut index, &path, config).await {
87					dev_log!(
88						"indexing",
89						"warn: [WatchFile] Failed to index new file {}: {}",
90						path.display(),
91						e
92					);
93				}
94			}
95		},
96		notify::EventKind::Modify(notify::event::ModifyKind::Data(_))
97		| notify::EventKind::Modify(notify::event::ModifyKind::Name(notify::event::RenameMode::Both)) => {
98			for path in event.paths {
99				dev_log!("indexing", "[WatchFile] File modified: {}", path.display());
100				let mut index = index_arc.write().await;
101				if let Err(e) = crate::Indexing::Store::UpdateIndex::UpdateSingleFile(&mut index, &path, config).await {
102					dev_log!(
103						"indexing",
104						"warn: [WatchFile] Failed to re-index modified file {}: {}",
105						path.display(),
106						e
107					);
108				}
109			}
110		},
111		notify::EventKind::Remove(notify::event::RemoveKind::File) => {
112			for path in event.paths {
113				dev_log!("indexing", "[WatchFile] File removed: {}", path.display());
114				let mut index = index_arc.write().await;
115				if let Err(e) = crate::Indexing::State::UpdateState::RemoveFileFromIndex(&mut index, &path) {
116					dev_log!(
117						"indexing",
118						"warn: [WatchFile] Failed to remove file from index {}: {}",
119						path.display(),
120						e
121					);
122				}
123			}
124		},
125		notify::EventKind::Create(notify::event::CreateKind::Folder) => {
126			for path in event.paths {
127				dev_log!("indexing", "[WatchFile] Directory created: {}", path.display()); // Directories themselves don't need indexing, just their
128				// contents
129			}
130		},
131		notify::EventKind::Remove(notify::event::RemoveKind::Folder) => {
132			for path in event.paths {
133				dev_log!("indexing", "[WatchFile] Directory removed: {}", path.display()); // Remove all files from this directory
134				let mut index = index_arc.write().await;
135				let mut paths_to_remove = Vec::new();
136				for indexed_path in index.files.keys() {
137					if indexed_path.starts_with(&path) {
138						paths_to_remove.push(indexed_path.clone());
139					}
140				}
141				for indexed_path in paths_to_remove {
142					if let Err(e) = crate::Indexing::State::UpdateState::RemoveFileFromIndex(&mut index, &indexed_path)
143					{
144						dev_log!(
145							"indexing",
146							"warn: [WatchFile] Failed to remove file {}: {}",
147							indexed_path.display(),
148							e
149						);
150					}
151				}
152			}
153		},
154		_ => {
155			// Ignore other event types
156			dev_log!("indexing", "ignored event kind: {:?}", event.kind);
157		},
158	}
159
160	Ok(())
161}
162
163/// Debounced file change handler
164///
165/// Prevents rapid successive changes from causing excessive re-indexing
166pub struct DebouncedEventHandler {
167	pending_changes:Mutex<std::collections::HashMap<PathBuf, FileChangeInfo>>,
168}
169
170impl DebouncedEventHandler {
171	pub fn new() -> Self { Self { pending_changes:Mutex::new(std::collections::HashMap::new()) } }
172
173	/// Add a file change event
174	pub async fn AddChange(&self, path:PathBuf, change_type:FileChangeType) {
175		let mut pending = self.pending_changes.lock().await;
176
177		let now = std::time::Instant::now();
178
179		match pending.get_mut(&path) {
180			Some(change_info) => {
181				change_info.last_seen = now;
182				change_info.change_type = change_type.max(change_info.change_type);
183				change_info.suppressed_count += 1;
184			},
185			None => {
186				pending.insert(
187					path.clone(),
188					FileChangeInfo { path:path.clone(), change_type, last_seen:now, suppressed_count:0 },
189				);
190			},
191		}
192	}
193
194	/// Process pending changes older than the specified duration
195	pub async fn ProcessPendingChanges(
196		&self,
197		age_cutoff:std::time::Duration,
198		index_arc:&RwLock<FileIndex>,
199		config:&IndexingConfig,
200	) -> Result<Vec<ProcessedChange>> {
201		let mut processed = Vec::new();
202		let expired_paths = {
203			let mut pending = self.pending_changes.lock().await;
204			let mut expired = Vec::new();
205
206			for (path, change_info) in pending.iter() {
207				if change_info.last_seen.elapsed() >= age_cutoff {
208					expired.push((path.clone(), change_info.clone()));
209				}
210			}
211
212			// Remove expired entries
213			for (path, _) in &expired {
214				pending.remove(path);
215			}
216
217			expired
218		};
219
220		for (path, change_info) in expired_paths {
221			dev_log!(
222				"indexing",
223				"[WatchFile] Processing debounced change for {} (suppressed: {})",
224				path.display(),
225				change_info.suppressed_count
226			);
227
228			let result = match change_info.change_type {
229				FileChangeType::Created => {
230					let mut index = index_arc.write().await;
231					crate::Indexing::Store::UpdateIndex::UpdateSingleFile(&mut index, &path, config)
232						.await
233						.map(|_| ProcessedChangeResult::Success)
234						.unwrap_or(ProcessedChangeResult::Failed)
235				},
236				FileChangeType::Modified => {
237					let mut index = index_arc.write().await;
238					super::super::Store::UpdateIndex::UpdateSingleFile(&mut index, &path, config)
239						.await
240						.map(|_| ProcessedChangeResult::Success)
241						.unwrap_or(ProcessedChangeResult::Failed)
242				},
243				FileChangeType::Removed => {
244					let mut index = index_arc.write().await;
245					crate::Indexing::State::UpdateState::RemoveFileFromIndex(&mut index, &path)
246						.map(|_| ProcessedChangeResult::Success)
247						.unwrap_or(ProcessedChangeResult::Failed)
248				},
249			};
250
251			processed.push(ProcessedChange {
252				path,
253				change_type:change_info.change_type,
254				suppressed_count:change_info.suppressed_count,
255				result,
256			});
257		}
258
259		Ok(processed)
260	}
261
262	/// Clear all pending changes
263	pub async fn ClearPending(&self) -> usize {
264		let mut pending = self.pending_changes.lock().await;
265		let count = pending.len();
266		pending.clear();
267		count
268	}
269
270	/// Get the number of pending changes
271	pub async fn PendingCount(&self) -> usize {
272		let pending = self.pending_changes.lock().await;
273		pending.len()
274	}
275}
276
277impl Default for DebouncedEventHandler {
278	fn default() -> Self { Self::new() }
279}
280
281/// File change type for debouncing
282#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
283pub enum FileChangeType {
284	Created,
285	Modified,
286	Removed,
287}
288
289impl FileChangeType {
290	pub fn max(self, other:Self) -> Self {
291		// Removed takes precedence over Modified, which takes precedence over Created
292		match (self, other) {
293			(Self::Removed, _) | (_, Self::Removed) => Self::Removed,
294			(Self::Modified, _) | (_, Self::Modified) => Self::Modified,
295			(Self::Created, Self::Created) => Self::Created,
296		}
297	}
298}
299
300/// File change information for debouncing
301#[derive(Debug, Clone)]
302struct FileChangeInfo {
303	#[allow(dead_code)]
304	path:PathBuf,
305	change_type:FileChangeType,
306	last_seen:std::time::Instant,
307	suppressed_count:usize,
308}
309
310/// Result of processing a debounced change
311#[derive(Debug, Clone)]
312pub enum ProcessedChangeResult {
313	Success,
314	Failed,
315}
316
317/// Describes a processed file change
318#[derive(Debug, Clone)]
319pub struct ProcessedChange {
320	pub path:PathBuf,
321	pub change_type:FileChangeType,
322	pub suppressed_count:usize,
323	pub result:ProcessedChangeResult,
324}
325
326/// Convert notify event kind to FileChangeType
327pub fn EventKindToChangeType(kind:notify::EventKind) -> Option<FileChangeType> {
328	match kind {
329		notify::EventKind::Create(_) => Some(FileChangeType::Created),
330		notify::EventKind::Modify(_) => Some(FileChangeType::Modified),
331		notify::EventKind::Remove(_) => Some(FileChangeType::Removed),
332		_ => None,
333	}
334}
335
336/// Check if a path should be watched (not in ignored paths)
337pub fn ShouldWatchPath(path:&PathBuf, ignored_patterns:&[String]) -> bool {
338	let path_str = path.to_string_lossy();
339
340	// Check against ignore patterns
341	for pattern in ignored_patterns {
342		if path_str.contains(pattern) {
343			return false;
344		}
345	}
346
347	true
348}
349
350/// Get default ignored patterns for file watching
351pub fn GetDefaultIgnoredPatterns() -> Vec<String> {
352	vec![
353		"node_modules".to_string(),
354		"target".to_string(),
355		".git".to_string(),
356		".svn".to_string(),
357		".hg".to_string(),
358		".bzr".to_string(),
359		"dist".to_string(),
360		"build".to_string(),
361		".next".to_string(),
362		".nuxt".to_string(),
363		"__pycache__".to_string(),
364		"*.pyc".to_string(),
365		".venv".to_string(),
366		"venv".to_string(),
367		"env".to_string(),
368		".env".to_string(),
369		".idea".to_string(),
370		".vscode".to_string(),
371		".DS_Store".to_string(),
372		"Thumbs.db".to_string(),
373		"*.swp".to_string(),
374		"*.tmp".to_string(),
375	]
376}
377
378/// Validate that a watch path exists and is accessible
379pub fn ValidateWatchPath(path:&PathBuf) -> Result<()> {
380	if !path.exists() {
381		return Err(AirError::FileSystem(format!("Watch path does not exist: {}", path.display())));
382	}
383
384	if !path.is_dir() {
385		return Err(AirError::FileSystem(format!(
386			"Watch path is not a directory: {}",
387			path.display()
388		)));
389	}
390
391	// Check read access
392	std::fs::read_dir(path)
393		.map_err(|e| AirError::FileSystem(format!("Cannot access watch path {}: {}", path.display(), e)))?;
394
395	Ok(())
396}