AirLibrary/Indexing/Scan/
ScanFile.rs1use std::{
72 path::PathBuf,
73 time::{Duration, Instant},
74};
75
76use crate::dev_log;
78use crate::{
79 AirError,
80 Configuration::IndexingConfig,
81 Indexing::{
82 Process::{
83 ExtractSymbols::ExtractSymbols,
84 ProcessContent::{DetectEncoding, DetectLanguage, DetectMimeType},
85 },
86 State::CreateState::{FileMetadata, SymbolInfo},
87 },
88 Result,
89};
90
91pub async fn IndexFileInternal(
103 file_path:&PathBuf,
104 config:&IndexingConfig,
105 _patterns:&[String],
106) -> Result<(FileMetadata, Vec<SymbolInfo>)> {
107 let start_time = Instant::now();
108
109 let metadata = std::fs::metadata(file_path)
111 .map_err(|e| AirError::FileSystem(format!("Failed to get file metadata: {}", e)))?;
112
113 let modified = metadata
115 .modified()
116 .map_err(|e| AirError::FileSystem(format!("Failed to get modification time: {}", e)))?;
117
118 let modified_time = chrono::DateTime::<chrono::Utc>::from(modified);
119
120 let file_size = metadata.len();
122 if file_size > config.MaxFileSizeMb as u64 * 1024 * 1024 {
123 return Err(AirError::FileSystem(format!(
124 "File size {} exceeds limit {} MB",
125 file_size, config.MaxFileSizeMb
126 )));
127 }
128
129 let content = tokio::time::timeout(Duration::from_secs(30), tokio::fs::read(file_path))
131 .await
132 .map_err(|_| AirError::FileSystem(format!("Timeout reading file: {} (30s limit)", file_path.display())))?
133 .map_err(|e| AirError::FileSystem(format!("Failed to read file: {}", e)))?;
134
135 let is_symlink = std::fs::symlink_metadata(file_path)
137 .map(|m| m.file_type().is_symlink())
138 .unwrap_or(false);
139
140 let checksum = CalculateChecksum(&content);
142
143 let encoding = DetectEncoding(&content);
145
146 let mime_type = DetectMimeType(file_path, &content);
148
149 let language = DetectLanguage(file_path);
151
152 let line_count = if mime_type.starts_with("text/") {
154 Some(content.iter().filter(|&&b| b == b'\n').count() as u32 + 1)
155 } else {
156 None
157 };
158
159 let symbols = if let Some(lang) = &language {
161 ExtractSymbols(file_path, &content, lang).await?
162 } else {
163 Vec::new()
164 };
165
166 let permissions = GetPermissionsString(&metadata);
167
168 let elapsed = start_time.elapsed();
169
170 dev_log!(
171 "indexing",
172 "indexed {} in {}ms ({} symbols)",
173 file_path.display(),
174 elapsed.as_millis(),
175 symbols.len()
176 );
177
178 Ok((
179 FileMetadata {
180 path:file_path.clone(),
181 size:file_size,
182 modified:modified_time,
183 mime_type,
184 language,
185 line_count,
186 checksum,
187 is_symlink,
188 permissions,
189 encoding,
190 indexed_at:chrono::Utc::now(),
191 symbol_count:symbols.len() as u32,
192 },
193 symbols,
194 ))
195}
196
197pub async fn ValidateFileAccess(file_path:&PathBuf) -> bool {
199 tokio::task::spawn_blocking({
200 let file_path = file_path.to_path_buf();
201 move || {
202 let can_access = std::fs::metadata(&file_path).is_ok();
204 if can_access {
205 std::fs::File::open(&file_path).is_ok()
207 } else {
208 false
209 }
210 }
211 })
212 .await
213 .unwrap_or(false)
214}
215
216pub fn CalculateChecksum(content:&[u8]) -> String {
218 use sha2::{Digest, Sha256};
219 let mut hasher = Sha256::new();
220 hasher.update(content);
221 format!("{:x}", hasher.finalize())
222}
223
224#[cfg(unix)]
226pub fn GetPermissionsString(metadata:&std::fs::Metadata) -> String {
227 use std::os::unix::fs::PermissionsExt;
228 let mode = metadata.permissions().mode();
229 let mut perms = String::new();
230 perms.push(if mode & 0o400 != 0 { 'r' } else { '-' });
232 perms.push(if mode & 0o200 != 0 { 'w' } else { '-' });
234 perms.push(if mode & 0o100 != 0 { 'x' } else { '-' });
236 perms.push(if mode & 0o040 != 0 { 'r' } else { '-' });
238 perms.push(if mode & 0o020 != 0 { 'w' } else { '-' });
239 perms.push(if mode & 0o010 != 0 { 'x' } else { '-' });
240 perms.push(if mode & 0o004 != 0 { 'r' } else { '-' });
242 perms.push(if mode & 0o002 != 0 { 'w' } else { '-' });
243 perms.push(if mode & 0o001 != 0 { 'x' } else { '-' });
244 perms
245}
246
247#[cfg(not(unix))]
249pub fn GetPermissionsString(_metadata:&std::fs::Metadata) -> String { "--------".to_string() }
250
251pub async fn ScanFileMetadata(file_path:&PathBuf) -> Result<FileMetadata> {
253 let metadata = std::fs::metadata(file_path)
254 .map_err(|e| AirError::FileSystem(format!("Failed to get file metadata: {}", e)))?;
255
256 let modified = metadata
257 .modified()
258 .map_err(|e| AirError::FileSystem(format!("Failed to get modification time: {}", e)))?;
259
260 let modified_time = chrono::DateTime::<chrono::Utc>::from(modified);
261
262 Ok(FileMetadata {
263 path:file_path.clone(),
264 size:metadata.len(),
265 modified:modified_time,
266 mime_type:"application/octet-stream".to_string(),
267 language:None,
268 line_count:None,
269 checksum:String::new(),
270 is_symlink:metadata.file_type().is_symlink(),
271 permissions:GetPermissionsString(&metadata),
272 encoding:None,
273 indexed_at:chrono::Utc::now(),
274 symbol_count:0,
275 })
276}
277
278pub fn FileModifiedSince(file_path:&PathBuf, last_indexed:chrono::DateTime<chrono::Utc>) -> Result<bool> {
280 let metadata = std::fs::metadata(file_path)
281 .map_err(|e| AirError::FileSystem(format!("Failed to get file metadata: {}", e)))?;
282
283 let modified = metadata
284 .modified()
285 .map_err(|e| AirError::FileSystem(format!("Failed to get modification time: {}", e)))?;
286
287 let modified_time = chrono::DateTime::<chrono::Utc>::from(modified);
288
289 Ok(modified_time > last_indexed)
290}
291
292pub async fn GetFileSize(file_path:&PathBuf) -> Result<u64> {
294 tokio::task::spawn_blocking({
295 let file_path = file_path.to_path_buf();
296 move || {
297 let metadata = std::fs::metadata(&file_path)
298 .map_err(|e| AirError::FileSystem(format!("Failed to get file metadata: {}", e)))?;
299 Ok(metadata.len())
300 }
301 })
302 .await?
303}
304
305pub fn IsTextFile(metadata:&FileMetadata) -> bool {
307 metadata.mime_type.starts_with("text/")
308 || metadata.mime_type.contains("json")
309 || metadata.mime_type.contains("xml")
310 || metadata.mime_type.contains("yaml")
311 || metadata.mime_type.contains("toml")
312 || metadata.language.is_some()
313}
314
315pub fn IsBinaryFile(metadata:&FileMetadata) -> bool {
317 !IsTextFile(metadata)
318 || metadata.mime_type == "application/octet-stream"
319 || metadata.mime_type == "application/zip"
320 || metadata.mime_type == "application/x-tar"
321 || metadata.mime_type == "application/x-gzip"
322 || metadata.mime_type == "application/x-bzip2"
323}