AirLibrary/Indexing/Scan/
ScanDirectory.rs1use std::{path::Path, sync::Arc};
68
69use tokio::sync::Semaphore;
70
71use crate::{
72 AirError,
73 Configuration::IndexingConfig,
74 Indexing::{Scan::ScanFile::ValidateFileAccess, State::CreateState::FileIndex},
75 Result,
76 dev_log,
77};
78
79#[derive(Debug, Clone)]
81pub struct ScanDirectoryResult {
82 pub files_found:u32,
84 pub files_skipped:u32,
86 pub errors:u32,
88 pub total_size:u64,
90}
91
92pub async fn ScanDirectory(
102 path:&str,
103 patterns:Vec<String>,
104 config:&IndexingConfig,
105 _max_parallel:usize,
106) -> Result<(Vec<std::path::PathBuf>, ScanDirectoryResult)> {
107 let directory_path = crate::Configuration::ConfigurationManager::ExpandPath(path)?;
108
109 if !directory_path.exists() {
111 return Err(AirError::FileSystem(format!("Directory does not exist: {}", path)));
112 }
113
114 if !directory_path.is_dir() {
115 return Err(AirError::FileSystem(format!("Path is not a directory: {}", path)));
116 }
117
118 CheckDirectoryPermissions(&directory_path).await?;
120
121 let include_patterns = if patterns.is_empty() { config.FileTypes.clone() } else { patterns };
123
124 let walker = ignore::WalkBuilder::new(&directory_path)
126 .max_depth(Some(10)) .hidden(false)
128 .follow_links(false) .build();
130
131 let mut files_to_scan:Vec<std::path::PathBuf> = Vec::new();
132 let mut files_found = 0u32;
133 let mut files_skipped = 0u32;
134 let mut errors = 0u32;
135 let mut total_size = 0u64;
136
137 for result in walker {
139 match result {
140 Ok(entry) => {
141 if entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) {
143 let file_path = entry.path().to_path_buf();
144
145 if entry.path_is_symlink() {
147 dev_log!("indexing", "[ScanDirectory] Skipping symlink: {}", file_path.display());
148 files_skipped += 1;
149 continue;
150 }
151
152 if let Ok(metadata) = entry.metadata() {
154 let file_size = metadata.len();
155
156 if file_size > config.MaxFileSizeMb as u64 * 1024 * 1024 {
157 dev_log!(
158 "indexing",
159 "warn: [ScanDirectory] Skipping oversized file: {} ({} bytes)",
160 file_path.display(),
161 file_size
162 );
163 files_skipped += 1;
164 continue;
165 }
166
167 if MatchesPatterns(&file_path, &include_patterns) {
169 if ValidateFileAccess(&file_path).await {
171 files_to_scan.push(file_path);
172 files_found += 1;
173 total_size += file_size;
174 } else {
175 dev_log!(
176 "indexing",
177 "warn: [ScanDirectory] Cannot access file (permission denied): {}",
178 file_path.display()
179 );
180 errors += 1;
181 }
182 } else {
183 files_skipped += 1;
184 }
185 } else {
186 errors += 1;
187 }
188 }
189 },
190 Err(e) => {
191 dev_log!("indexing", "warn: [ScanDirectory] Error walking directory: {}", e);
192 errors += 1;
193 },
194 }
195 }
196
197 dev_log!(
198 "indexing",
199 "[ScanDirectory] Directory scan completed: {} files, {} skipped, {} errors, {} bytes",
200 files_found,
201 files_skipped,
202 errors,
203 total_size
204 );
205
206 Ok((
207 files_to_scan,
208 ScanDirectoryResult { files_found, files_skipped, errors, total_size },
209 ))
210}
211
212pub async fn ScanAndRemoveDeleted(index:&mut FileIndex, directory_path:&Path) -> Result<u32> {
214 let mut paths_to_remove = Vec::new();
215 let all_paths:Vec<_> = index.files.keys().cloned().collect();
216
217 for path in all_paths {
218 if !path.exists() && path.starts_with(directory_path) {
219 paths_to_remove.push(path.clone());
220 }
221 }
222
223 let removed_count = paths_to_remove.len();
224 for path in paths_to_remove {
225 index.files.remove(&path);
226 index.file_symbols.remove(&path);
227
228 for (_, locations) in index.symbol_index.iter_mut() {
230 locations.retain(|loc| loc.file_path != path);
231 }
232
233 for (_, files) in index.content_index.iter_mut() {
235 files.retain(|p| p != &path);
236 }
237 }
238
239 Ok(removed_count as u32)
240}
241
242async fn CheckDirectoryPermissions(path:&Path) -> Result<()> {
244 tokio::task::spawn_blocking({
245 let path = path.to_path_buf();
246 move || {
247 std::fs::read_dir(&path)
248 .map_err(|e| AirError::FileSystem(format!("Cannot read directory {}: {}", path.display(), e)))?;
249 Ok(())
250 }
251 })
252 .await?
253}
254
255pub fn MatchesPatterns(file_path:&std::path::Path, patterns:&[String]) -> bool {
257 if patterns.is_empty() {
258 return true;
259 }
260
261 let file_name = file_path.file_name().unwrap_or_default().to_string_lossy().to_string();
262
263 for pattern in patterns {
264 if MatchesPattern(&file_name, pattern) {
265 return true;
266 }
267 }
268
269 false
270}
271
272pub fn MatchesPattern(filename:&str, pattern:&str) -> bool {
274 if pattern.starts_with("*.") {
275 let extension = &pattern[2..];
276 filename.ends_with(extension)
277 } else {
278 filename == pattern
279 }
280}
281
282pub fn GetDefaultExcludePatterns() -> Vec<String> {
284 vec![
285 "node_modules".to_string(),
286 "target".to_string(),
287 ".git".to_string(),
288 ".svn".to_string(),
289 ".hg".to_string(),
290 ".bzr".to_string(),
291 "dist".to_string(),
292 "build".to_string(),
293 ".next".to_string(),
294 ".nuxt".to_string(),
295 "__pycache__".to_string(),
296 "*.pyc".to_string(),
297 ".venv".to_string(),
298 "venv".to_string(),
299 "env".to_string(),
300 ".env".to_string(),
301 ".idea".to_string(),
302 ".vscode".to_string(),
303 ".DS_Store".to_string(),
304 "Thumbs.db".to_string(),
305 ]
306}
307
308pub async fn ScanDirectoriesParallel(
310 directories:Vec<String>,
311 patterns:Vec<String>,
312 config:&IndexingConfig,
313 max_parallel:usize,
314) -> Result<(Vec<std::path::PathBuf>, ScanDirectoryResult)> {
315 let semaphore = Arc::new(Semaphore::new(max_parallel));
316 let mut all_files = Vec::new();
317 let mut total_result = ScanDirectoryResult { files_found:0, files_skipped:0, errors:0, total_size:0 };
318
319 let mut scan_tasks = Vec::new();
320
321 for directory in directories {
322 let permit = semaphore.clone().acquire_owned().await.unwrap();
323 let config_clone = config.clone();
324 let patterns_clone = patterns.clone();
325
326 let task = tokio::spawn(async move {
327 let _permit = permit;
328 ScanDirectory(&directory, patterns_clone, &config_clone, max_parallel).await
329 });
330
331 scan_tasks.push(task);
332 }
333
334 for task in scan_tasks {
336 match task.await {
337 Ok(Ok((files, result))) => {
338 all_files.extend(files);
339 total_result.files_found += result.files_found;
340 total_result.files_skipped += result.files_skipped;
341 total_result.errors += result.errors;
342 total_result.total_size += result.total_size;
343 },
344 Ok(Err(e)) => {
345 dev_log!("indexing", "error: [ScanDirectory] Parallel scan failed: {}", e);
346 total_result.errors += 1;
347 },
348 Err(e) => {
349 dev_log!("indexing", "error: [ScanDirectory] Parallel task panicked: {}", e);
350 total_result.errors += 1;
351 },
352 }
353 }
354
355 Ok((all_files, total_result))
356}
357
358pub async fn GetDirectoryStatistics(path:&str, max_depth:Option<usize>) -> Result<DirectoryStatistics> {
360 let directory_path = crate::Configuration::ConfigurationManager::ExpandPath(path)?;
361
362 if !directory_path.exists() || !directory_path.is_dir() {
363 return Err(AirError::FileSystem(format!("Invalid directory: {}", path)));
364 }
365
366 let mut file_count = 0u64;
367 let mut total_size = 0u64;
368 let mut directory_count = 0u64;
369 let mut hidden_count = 0u64;
370
371 let walker = ignore::WalkBuilder::new(&directory_path)
372 .max_depth(max_depth)
373 .hidden(true)
374 .follow_links(false)
375 .build();
376
377 for entry in walker.flatten() {
378 let file_type = entry.file_type().expect("Failed to get file type");
379
380 if file_type.is_file() {
381 file_count += 1;
382 if let Ok(metadata) = entry.metadata() {
383 total_size += metadata.len();
384 }
385 } else if file_type.is_dir() {
386 directory_count += 1;
387 }
388
389 if entry.depth() > 0
390 && entry
391 .path()
392 .components()
393 .any(|c| c.as_os_str().to_string_lossy().starts_with('.'))
394 {
395 hidden_count += 1;
396 }
397 }
398
399 Ok(DirectoryStatistics { file_count, directory_count, hidden_count, total_size })
400}
401
402#[derive(Debug, Clone)]
404pub struct DirectoryStatistics {
405 pub file_count:u64,
406 pub directory_count:u64,
407 pub hidden_count:u64,
408 pub total_size:u64,
409}