Skip to main content

AirLibrary/Indexing/Store/
StoreEntry.rs

1//! # StoreEntry
2//!
3//! ## File: Indexing/Store/StoreEntry.rs
4//!
5//! ## Role in Air Architecture
6//!
7//! Provides index storage functionality for the File Indexer service,
8//! handling serialization and persistence of the file index to disk.
9//!
10//! ## Primary Responsibility
11//!
12//! Store the file index to disk with atomic writes and corruption recovery
13//! mechanisms.
14//!
15//! ## Secondary Responsibilities
16//!
17//! - Load index from disk with validation
18//! - Backup corrupted indexes automatically
19//! - Atomic writes using temp files
20//! - Index integrity verification
21//!
22//! ## Dependencies
23//!
24//! **External Crates:**
25//! - `serde_json` - JSON serialization/deserialization
26//! - `tokio` - Async file I/O operations
27//!
28//! **Internal Modules:**
29//! - `crate::Result` - Error handling type
30//! - `crate::AirError` - Error types
31//! - `super::super::FileIndex` - Index structure definitions
32//! - `super::super::State::CreateState` - State creation utilities
33//!
34//! ## Dependents
35//!
36//! - `Indexing::mod::FileIndexer` - Main file indexer implementation
37//!
38//! ## VSCode Pattern Reference
39//!
40//! Inspired by VSCode's index storage in
41//! `src/vs/workbench/services/search/common/`
42//!
43//! ## Security Considerations
44//!
45//! - Atomic writes prevent partial index corruption
46//! - Permission checking on index directory
47//! - Path traversal protection
48//!
49//! ## Performance Considerations
50//!
51//! - Temp file pattern for atomic writes
52//! - Lazy loading of in-memory index
53//! - Efficient serialization with serde
54//!
55//! ## Error Handling Strategy
56//!
57//! Storage operations return detailed error messages for failures and
58//! automatically backup corrupted indexes when loading fails.
59//!
60//! ## Thread Safety
61//!
62//! Storage operations use async file I/O and return results that can be
63//! safely merged into shared Ar c<RwLock<>> state.
64
65use std::path::{Path, PathBuf};
66
67use crate::{AirError, Indexing::State::CreateState::FileIndex, Result, dev_log};
68
69/// Save index to disk with atomic write
70pub async fn SaveIndex(index_directory:&Path, index:&FileIndex) -> Result<()> {
71	let index_file = index_directory.join("file_index.json");
72	let temp_file = index_directory.join("file_index.json.tmp");
73
74	let content = serde_json::to_string_pretty(index)
75		.map_err(|e| AirError::Serialization(format!("Failed to serialize index: {}", e)))?;
76
77	// Write to temp file first
78	tokio::fs::write(&temp_file, content)
79		.await
80		.map_err(|e| AirError::FileSystem(format!("Failed to write temp index file: {}", e)))?;
81
82	// Atomic rename
83	tokio::fs::rename(&temp_file, &index_file)
84		.await
85		.map_err(|e| AirError::FileSystem(format!("Failed to rename index file: {}", e)))?;
86
87	dev_log!(
88		"indexing",
89		"[StoreEntry] Index saved to: {} ({} files, {} symbols)",
90		index_file.display(),
91		index.files.len(),
92		index.symbol_index.len()
93	);
94
95	Ok(())
96}
97
98/// Load index from disk with corruption detection
99pub async fn LoadIndex(index_directory:&Path) -> Result<FileIndex> {
100	let index_file = index_directory.join("file_index.json");
101
102	if !index_file.exists() {
103		return Err(AirError::FileSystem(format!(
104			"Index file does not exist: {}",
105			index_file.display()
106		)));
107	}
108
109	let content = tokio::fs::read_to_string(&index_file)
110		.await
111		.map_err(|e| AirError::FileSystem(format!("Failed to read index file: {}", e)))?;
112
113	let index:FileIndex = serde_json::from_str(&content)
114		.map_err(|e| AirError::Serialization(format!("Failed to parse index file: {}", e)))?;
115
116	// Verify index structure
117	if index.index_version.is_empty() || index.index_checksum.is_empty() {
118		return Err(AirError::Serialization("Index missing version or checksum".to_string()));
119	}
120
121	// Verify index checksum
122	use crate::Indexing::State::CreateState::CalculateIndexChecksum;
123	let expected_checksum = CalculateIndexChecksum(&index)?;
124	if index.index_checksum != expected_checksum {
125		return Err(AirError::Serialization(format!(
126			"Index checksum mismatch: expected {}, got {}",
127			expected_checksum, index.index_checksum
128		)));
129	}
130
131	Ok(index)
132}
133
134/// Load or create index with corruption detection
135pub async fn LoadOrCreateIndex(index_directory:&Path) -> Result<FileIndex> {
136	let index_file = index_directory.join("file_index.json");
137
138	if index_file.exists() {
139		// Try to load existing index
140		match LoadIndex(index_directory).await {
141			Ok(index) => {
142				dev_log!("indexing", "[StoreEntry] Loaded index with {} files", index.files.len());
143				Ok(index)
144			},
145			Err(e) => {
146				dev_log!(
147					"indexing",
148					"warn: [StoreEntry] Failed to load index (may be corrupted): {}. Creating new index.",
149					e
150				);
151				// Backup corrupted index
152				BackupCorruptedIndex(index_directory).await?;
153				Ok(CreateNewIndex())
154			},
155		}
156	} else {
157		// Create new index
158		Ok(CreateNewIndex())
159	}
160}
161
162/// Create a new empty index
163fn CreateNewIndex() -> FileIndex {
164	use crate::Indexing::State::CreateState::CreateNewIndex as StateCreateNewIndex;
165	StateCreateNewIndex()
166}
167
168/// Ensure index directory exists with proper error handling
169pub async fn EnsureIndexDirectory(index_directory:&Path) -> Result<()> {
170	tokio::fs::create_dir_all(index_directory).await.map_err(|e| {
171		AirError::Configuration(format!("Failed to create index directory {}: {}", index_directory.display(), e))
172	})?;
173	Ok(())
174}
175
176/// Backup corrupted index before creating new one
177pub async fn BackupCorruptedIndex(index_directory:&Path) -> Result<()> {
178	let index_file = index_directory.join("file_index.json");
179	let backup_file = index_directory.join(format!("file_index.corrupted.{}.json", chrono::Utc::now().timestamp()));
180
181	if !index_file.exists() {
182		return Ok(());
183	}
184
185	// Rename corrupted file to backup
186	tokio::fs::rename(&index_file, &backup_file)
187		.await
188		.map_err(|e| AirError::FileSystem(format!("Failed to backup corrupted index: {}", e)))?;
189
190	dev_log!(
191		"indexing",
192		"[StoreEntry] Backed up corrupted index to: {}",
193		backup_file.display()
194	);
195	Ok(())
196}
197
198/// Load index with automatic recovery on corruption
199pub async fn LoadIndexWithRecovery(index_directory:&Path, max_retries:usize) -> Result<FileIndex> {
200	let mut last_error = None;
201
202	for attempt in 0..max_retries {
203		match LoadOrCreateIndex(index_directory).await {
204			Ok(index) => {
205				if attempt > 0 {
206					dev_log!(
207						"indexing",
208						"[StoreEntry] Successfully loaded index after {} attempts",
209						attempt + 1
210					);
211				}
212				return Ok(index);
213			},
214			Err(e) => {
215				last_error = Some(e);
216				dev_log!("indexing", "warn: [StoreEntry] Load attempt {} failed", attempt + 1);
217				// Wait before retry
218				if attempt < max_retries - 1 {
219					tokio::time::sleep(tokio::time::Duration::from_millis(100 * (attempt + 1) as u64)).await;
220				}
221			},
222		}
223	}
224
225	Err(last_error.unwrap_or_else(|| AirError::Internal("Failed to load index after retries".to_string())))
226}
227
228/// Get index file path
229pub fn GetIndexFilePath(index_directory:&Path) -> PathBuf { index_directory.join("file_index.json") }
230
231/// Check if index file exists and is readable
232pub async fn IndexFileExists(index_directory:&Path) -> Result<bool> {
233	let index_file = index_directory.join("file_index.json");
234
235	if !index_file.exists() {
236		return Ok(false);
237	}
238
239	// Try to read metadata to verify accessibility
240	match tokio::fs::metadata(&index_file).await {
241		Ok(_) => Ok(true),
242		Err(_) => Ok(false),
243	}
244}
245
246/// Get index file size in bytes
247pub async fn GetIndexFileSize(index_directory:&Path) -> Result<u64> {
248	let index_file = index_directory.join("file_index.json");
249
250	let metadata = tokio::fs::metadata(&index_file)
251		.await
252		.map_err(|e| AirError::FileSystem(format!("Failed to get index file metadata: {}", e)))?;
253
254	Ok(metadata.len())
255}
256
257/// Clean up old backup files
258pub async fn CleanupOldBackups(index_directory:&Path, keep_count:usize) -> Result<usize> {
259	let mut entries = tokio::fs::read_dir(index_directory)
260		.await
261		.map_err(|e| AirError::FileSystem(format!("Failed to read index directory: {}", e)))?;
262
263	let mut backups = Vec::new();
264
265	while let Some(entry) = entries
266		.next_entry()
267		.await
268		.map_err(|e| AirError::FileSystem(format!("Failed to read directory entry: {}", e)))?
269	{
270		let file_name = entry.file_name().to_string_lossy().to_string();
271
272		if file_name.starts_with("file_index.corrupted.") && file_name.ends_with(".json") {
273			if let Ok(metadata) = entry.metadata().await {
274				if let Ok(modified) = metadata.modified() {
275					backups.push((entry.path(), modified));
276				}
277			}
278		}
279	}
280
281	// Sort by modified time (oldest first)
282	backups.sort_by_key(|b| b.1);
283
284	let mut removed_count = 0;
285
286	// Remove old backups beyond keep_count
287	for (path, _) in backups.iter().take(backups.len().saturating_sub(keep_count)) {
288		match tokio::fs::remove_file(path).await {
289			Ok(_) => {
290				dev_log!("indexing", "[StoreEntry] Removed old backup: {}", path.display());
291				removed_count += 1;
292			},
293			Err(e) => {
294				dev_log!(
295					"indexing",
296					"warn: [StoreEntry] Failed to remove backup {}: {}",
297					path.display(),
298					e
299				);
300			},
301		}
302	}
303
304	Ok(removed_count)
305}
306
307/// Validate index file format before loading
308pub async fn ValidateIndexFormat(index_directory:&Path) -> Result<()> {
309	let index_file = index_directory.join("file_index.json");
310
311	let content = tokio::fs::read_to_string(&index_file)
312		.await
313		.map_err(|e| AirError::FileSystem(format!("Failed to read index file: {}", e)))?;
314
315	// Try to parse as JSON
316	let _:serde_json::Value = serde_json::from_str(&content)
317		.map_err(|e| AirError::Serialization(format!("Index file is not valid JSON: {}", e)))?;
318
319	Ok(())
320}