/**
* Universal Data Cache for HydroLang
* Handles caching of any data files from any source with automatic management
* Supports any file format and size with intelligent storage management
*/
/**
* Universal cache for all data sources
* @ignore
*/
export class HydroLangCache {
constructor() {
this.dbName = 'HydroLang_DataCache';
this.version = 1;
this.db = null;
this.maxCacheSize = 100 * 1024 * 1024 * 1024; // 100GB total
this.maxFileSize = 10 * 1024 * 1024 * 1024; // 10GB per file max
this.maxAge = 2 * 24 * 60 * 60 * 1000; // 2 days
this.cleanupInterval = 60 * 60 * 1000; // Cleanup every hour
this.lastCleanup = 0;
}
async init() {
if (this.db) return;
return new Promise((resolve, reject) => {
const request = indexedDB.open(this.dbName, this.version);
request.onerror = () => reject(request.error);
request.onsuccess = () => {
this.db = request.result;
console.log('HydroLang cache initialized');
resolve();
};
request.onupgradeneeded = (event) => {
const db = event.target.result;
// Main data store - now contains everything
if (!db.objectStoreNames.contains('files')) {
const store = db.createObjectStore('files', { keyPath: 'cacheKey' });
store.createIndex('url', 'url', { unique: false });
store.createIndex('source', 'source', { unique: false });
store.createIndex('dataset', 'dataset', { unique: false });
store.createIndex('timestamp', 'timestamp', { unique: false });
store.createIndex('size', 'size', { unique: false });
store.createIndex('format', 'format', { unique: false });
store.createIndex('source_dataset', ['source', 'dataset'], { unique: false });
store.createIndex('lastAccessed', 'lastAccessed', { unique: false });
}
};
});
}
generateCacheKey(url, params = {}) {
// Generate human-readable key based on request parameters
const source = params.source || 'unknown';
const dataType = params.datatype || 'data';
// Extract meaningful parameters from context
const context = globalThis._hydroCacheContext || {};
const args = context.params?.args || params.args || {};
// Build human-readable key parts
const keyParts = [source, dataType];
// Add location if available
if (args.lat !== undefined && args.lon !== undefined) {
keyParts.push(`lat${args.lat}`, `lon${args.lon}`);
} else if (args.latitude !== undefined && args.longitude !== undefined) {
keyParts.push(`lat${args.latitude}`, `lon${args.longitude}`);
}
// Add bbox if available
if (args.bbox) {
const bbox = Array.isArray(args.bbox) ? args.bbox.join('_') : args.bbox;
keyParts.push(`bbox${bbox}`);
}
// Add date range if available
if (args.startDate || args.startDT) {
const start = String(args.startDate || args.startDT).substring(0, 10); // YYYY-MM-DD
keyParts.push(`start${start}`);
}
if (args.endDate || args.endDT) {
const end = String(args.endDate || args.endDT).substring(0, 10);
keyParts.push(`end${end}`);
}
// Add timestamp for single-time requests
if (args.time || args.timestamp) {
const time = String(args.time || args.timestamp).substring(0, 10);
keyParts.push(`time${time}`);
}
// Add dataset/variable if available
if (args.dataset) {
keyParts.push(args.dataset);
}
if (args.variable) {
keyParts.push(args.variable);
}
// Add site ID if available (for USGS, etc.)
if (args.sites || args.site) {
keyParts.push(`site${args.sites || args.site}`);
}
// Join with underscores for readability, remove special characters
const cacheKey = keyParts.join('_').replace(/[^a-zA-Z0-9_\-\.]/g, '');
return cacheKey;
}
// Generate chunk-specific cache key
generateChunkKey(baseKey, chunkIndex) {
// Use a stable key format that doesn't depend on totalChunks
// to avoid key mismatches when totalChunks is discovered mid-download
return `${baseKey}-chunk-${chunkIndex}`;
}
// Check for partial download and resume if possible
async checkPartialDownload(url, options = {}) {
if (!this.db) await this.init();
const baseKey = this.generateCacheKey(url, options.params || {});
const manifestKey = `${baseKey}-manifest`;
try {
const manifestEntry = await new Promise((resolve, reject) => {
const transaction = this.db.transaction(['files'], 'readonly');
const store = transaction.objectStore('files');
const request = store.get(manifestKey);
request.onsuccess = () => resolve(request.result);
request.onerror = () => reject(request.error);
});
if (!manifestEntry) return null;
const manifest = JSON.parse(new TextDecoder().decode(manifestEntry.data));
// Check which chunks are already downloaded
const downloadedChunks = [];
for (let i = 0; i < manifest.totalChunks; i++) {
const chunkKey = manifest.chunks[i];
try {
const chunkEntry = await new Promise((resolve, reject) => {
const transaction = this.db.transaction(['files'], 'readonly');
const store = transaction.objectStore('files');
const request = store.get(chunkKey);
request.onsuccess = () => resolve(request.result);
request.onerror = () => reject(request.error);
});
if (chunkEntry) downloadedChunks.push(i);
} catch (e) {
// Chunk not found
}
}
console.log(`Found partial download: ${downloadedChunks.length}/${manifest.totalChunks} chunks`);
return {
manifest,
downloadedChunks,
baseKey,
manifestKey
};
} catch (error) {
return null;
}
}
// Resume a partial chunked download
async resumeChunkedDownload(partialDownload, options = {}, chunkSize = 100 * 1024 * 1024) {
const { manifest, downloadedChunks, baseKey } = partialDownload;
const { url, totalChunks, totalSize } = manifest;
console.log(`Resuming download: ${downloadedChunks.length}/${totalChunks} chunks already downloaded`);
// Download missing chunks sequentially
const chunkBuffers = [];
const downloadedSet = new Set(downloadedChunks);
for (let i = 0; i < totalChunks; i++) {
if (downloadedSet.has(i)) {
// Chunk already downloaded, just retrieve it
try {
const chunkEntry = await new Promise((resolve, reject) => {
const transaction = this.db.transaction(['files'], 'readonly');
const store = transaction.objectStore('files');
const request = store.get(manifest.chunks[i]);
request.onsuccess = () => resolve(request.result);
request.onerror = () => reject(request.error);
});
chunkBuffers.push(chunkEntry.data);
console.log(`Using cached chunk ${i + 1}/${totalChunks}`);
} catch (error) {
console.error(`Failed to retrieve cached chunk ${i}:`, error);
throw error;
}
} else {
// Download missing chunk
const start = i * chunkSize;
const end = Math.min(start + chunkSize - 1, totalSize - 1);
try {
console.log(`Downloading missing chunk ${i + 1}/${totalChunks} (bytes ${start}-${end})`);
const rangeResponse = await globalThis._originalFetch(url, {
...options,
headers: {
...options.headers,
'Range': `bytes=${start}-${end}`
}
});
if (!rangeResponse.ok) {
throw new Error(`Range request failed: ${rangeResponse.status} ${rangeResponse.statusText}`);
}
const chunkData = await rangeResponse.arrayBuffer();
const chunkKey = manifest.chunks[i];
await this.put(chunkKey, chunkData, {
source: options.source || 'unknown',
dataset: options.dataset || 'unknown',
format: 'chunk',
isChunk: true,
chunkIndex: i,
totalChunks,
chunkStart: start,
chunkEnd: end,
baseKey,
originalUrl: url,
isChunkData: true
});
console.log(`Downloaded and cached chunk ${i + 1}/${totalChunks} (${(chunkData.byteLength / 1024 / 1024).toFixed(1)}MB)`);
chunkBuffers.push(chunkData);
} catch (error) {
console.error(`Failed to download chunk ${i}:`, error);
throw error;
}
}
}
// Concatenate all chunks for immediate response
const totalBuffer = new Uint8Array(totalSize);
let offset = 0;
for (const chunk of chunkBuffers) {
totalBuffer.set(new Uint8Array(chunk), offset);
offset += chunk.byteLength;
}
console.log(`Resumed download complete: ${(totalSize / 1024 / 1024).toFixed(1)}MB in ${totalChunks} chunks`);
// Store the assembled data with the main cache key for future requests
// Use the original URL from context, not the potentially proxied URL from manifest
const originalUrl = options.params?.originalUrl || url;
const cacheKey = this.generateCacheKey(originalUrl, {
source: options.source || 'unknown',
datatype: options.dataset || 'data'
});
await this.put(cacheKey, totalBuffer.buffer, {
source: options.source || 'unknown',
dataset: options.dataset || 'unknown',
dataType: options.datatype || 'data',
format: 'netcdf',
assembledFromChunks: true,
chunkManifest: manifestKey,
originalUrl: originalUrl,
totalSize: totalSize,
params: options.params
});
// Return virtual response
return {
ok: true,
status: 200,
statusText: 'OK',
headers: new Headers({
'content-type': 'application/octet-stream',
'content-length': totalSize.toString()
}),
arrayBuffer: async () => totalBuffer.buffer,
text: async () => new TextDecoder().decode(totalBuffer),
json: async () => JSON.parse(new TextDecoder().decode(totalBuffer)),
blob: async () => new Blob([totalBuffer.buffer])
};
}
// Download file in chunks with immediate storage
async downloadChunked(url, options = {}, chunkSize = 100 * 1024 * 1024) {
if (!this.db) await this.init();
// Silent operation
// Check for partial download first
const partialDownload = await this.checkPartialDownload(url, options);
if (partialDownload) {
console.log('Resuming partial download');
return this.resumeChunkedDownload(partialDownload, options, chunkSize);
}
// Check if server supports range requests and get content length
const isS3Url = url.includes('s3.') || url.includes('amazonaws.com') || url.includes('amazon.com');
let acceptRanges = null;
let contentLength = null;
// Skip HEAD request for NOAA URLs or when skipSizeDetection is set
if (!url.includes('noaa.gov') && !options.params?.skipSizeDetection) {
try {
// Try HEAD request first for non-NOAA URLs to detect file size
const headResponse = await globalThis._originalFetch(url, { method: 'HEAD', ...options });
if (headResponse.ok) {
acceptRanges = headResponse.headers.get('accept-ranges');
contentLength = parseInt(headResponse.headers.get('content-length'));
if (contentLength) {
console.log(`File size detected: ${(contentLength / 1024 / 1024).toFixed(1)}MB via HEAD`);
}
}
} catch (headError) {
console.warn('HEAD request failed:', headError.message);
}
}
// For NOAA URLs or when skipSizeDetection is set, assume range support
if (!contentLength && (url.includes('noaa.gov') || options.params?.skipSizeDetection)) {
acceptRanges = 'bytes'; // Assume range support
}
// For large files or cloud storage URLs, try range request to detect content length
else if (!contentLength && (isS3Url || context.expectedSize > 10 * 1024 * 1024) && !url.includes('noaa.gov') && !options.params?.skipSizeDetection) {
try {
console.log('Trying range request to detect file size...');
const rangeResponse = await globalThis._originalFetch(url, {
...options,
headers: {
...options.headers,
'Range': 'bytes=0-0' // Request just first byte
}
});
if (rangeResponse.ok && rangeResponse.status === 206) {
// Parse content-range header: "bytes 0-0/123456"
const contentRange = rangeResponse.headers.get('content-range');
if (contentRange) {
const match = contentRange.match(/\/(\d+)$/);
if (match) {
contentLength = parseInt(match[1]);
acceptRanges = 'bytes';
console.log(`File size detected: ${(contentLength / 1024 / 1024).toFixed(1)}MB via range request`);
}
}
}
} catch (rangeError) {
console.warn('Range request failed:', rangeError.message);
}
}
// For S3 URLs, assume range support even if content length detection failed
if (!contentLength && isS3Url) {
acceptRanges = 'bytes';
}
// For other URLs, only enable chunked download if we know the content length
else if (!contentLength && !url.includes('noaa.gov') && !options.params?.skipSizeDetection) {
console.warn('Could not determine content length, falling back to full download');
return globalThis._originalFetch(url, options);
}
// For very small files, don't bother with chunking (only if we know the size)
if (contentLength && contentLength <= chunkSize) {
return globalThis._originalFetch(url, options);
}
const baseKey = this.generateCacheKey(url, options.params || {});
let totalChunks = contentLength ? Math.ceil(contentLength / chunkSize) : null;
let actualTotalSize = contentLength;
// No manifest needed - chunks will be reassembled automatically by baseKey
// Download and store chunks sequentially
const chunkBuffers = [];
let chunkIndex = 0;
let totalDownloaded = 0;
while (true) {
const start = chunkIndex * chunkSize;
let end;
if (contentLength) {
end = Math.min(start + chunkSize - 1, contentLength - 1);
} else {
// For unknown size, request chunkSize bytes
end = start + chunkSize - 1;
}
try {
const chunkDesc = contentLength ?
`chunk ${chunkIndex + 1}/${totalChunks} (bytes ${start}-${end})` :
`chunk ${chunkIndex + 1} (bytes ${start}-${end})`;
console.log(`Downloading ${chunkDesc}`);
const rangeResponse = await globalThis._originalFetch(url, {
...options,
headers: {
...options.headers,
'Range': `bytes=${start}-${end}`
}
});
if (!rangeResponse.ok) {
if (rangeResponse.status === 416) {
// Range not satisfiable - we've reached the end of the file
console.log(`Reached end of file at chunk ${chunkIndex}`);
break;
}
throw new Error(`Range request failed: ${rangeResponse.status} ${rangeResponse.statusText}`);
}
const chunkData = await rangeResponse.arrayBuffer();
// If we didn't know the total size, update it from content-range header
if (!contentLength) {
const contentRange = rangeResponse.headers.get('content-range');
if (contentRange) {
const match = contentRange.match(/\/(\d+)$/);
if (match) {
actualTotalSize = parseInt(match[1]);
totalChunks = Math.ceil(actualTotalSize / chunkSize);
}
}
}
const chunkKey = this.generateChunkKey(baseKey, chunkIndex);
await this.put(chunkKey, chunkData, {
cacheKey: chunkKey, // Use chunkKey as the cache key directly
source: options.source || 'unknown',
dataset: options.dataset || 'unknown',
format: 'chunk',
isChunk: true,
chunkIndex,
totalChunks: totalChunks || null,
chunkStart: start,
chunkEnd: start + chunkData.byteLength - 1,
baseKey,
originalUrl: url,
isChunkData: true
});
chunkBuffers.push(chunkData);
totalDownloaded += chunkData.byteLength;
// If this chunk is smaller than requested, we've reached the end
if (chunkData.byteLength < chunkSize) {
break;
}
chunkIndex++;
// Safety check - don't download more than a reasonable number of chunks
if (chunkIndex > 1000) {
console.warn('Too many chunks downloaded, stopping to prevent infinite loop');
break;
}
} catch (error) {
console.error(`Failed to download chunk ${chunkIndex}:`, error);
throw error;
}
}
// Concatenate all chunks for immediate response
const finalTotalSize = actualTotalSize || totalDownloaded;
const totalBuffer = new Uint8Array(finalTotalSize);
let offset = 0;
for (const chunk of chunkBuffers) {
totalBuffer.set(new Uint8Array(chunk), offset);
offset += chunk.byteLength;
}
// Store the assembled data with the main cache key for future requests
const originalUrl = options.params?.originalUrl || url;
const cacheKey = this.generateCacheKey(originalUrl, {
source: options.source || 'unknown',
datatype: options.dataset || 'data'
});
await this.put(cacheKey, totalBuffer.buffer, {
source: options.source || 'unknown',
dataset: options.dataset || 'unknown',
dataType: options.datatype || 'data',
format: 'netcdf',
assembledFromChunks: true,
totalSize: finalTotalSize,
originalUrl: originalUrl,
params: options.params
});
// Return a Response-like object
return {
ok: true,
status: 200,
statusText: 'OK',
headers: new Headers({
'content-type': 'application/octet-stream',
'content-length': finalTotalSize.toString()
}),
arrayBuffer: async () => totalBuffer.buffer,
text: async () => new TextDecoder().decode(totalBuffer),
json: async () => JSON.parse(new TextDecoder().decode(totalBuffer))
};
}
// Automatically reassemble chunks for a baseKey
async reassembleChunks(baseKey) {
if (!this.db) await this.init();
return new Promise((resolve, reject) => {
const transaction = this.db.transaction(['files'], 'readonly');
const store = transaction.objectStore('files');
// Find all chunks for this baseKey
const chunks = [];
const request = store.openCursor();
request.onsuccess = (event) => {
const cursor = event.target.result;
if (cursor) {
const entry = cursor.value;
// Check if this is a chunk for our baseKey
if (entry.baseKey === baseKey && entry.isChunk) {
chunks.push({
index: entry.chunkIndex,
data: entry.data,
size: entry.data.byteLength
});
}
cursor.continue();
} else {
// Finished scanning, now reassemble
if (chunks.length === 0) {
resolve(null);
return;
}
// Sort chunks by index
chunks.sort((a, b) => a.index - b.index);
// Concatenate
const totalSize = chunks.reduce((sum, chunk) => sum + chunk.size, 0);
const resultBuffer = new Uint8Array(totalSize);
let offset = 0;
for (const chunk of chunks) {
resultBuffer.set(new Uint8Array(chunk.data), offset);
offset += chunk.size;
}
resolve({
data: resultBuffer.buffer,
metadata: {
reassembled: true,
totalSize,
chunkCount: chunks.length,
baseKey
}
});
}
};
request.onerror = () => reject(request.error);
});
}
async get(cacheKey) {
if (!this.db) await this.init();
// First try to get as single file
const singleFile = await new Promise((resolve, reject) => {
const transaction = this.db.transaction(['files'], 'readonly');
const fileStore = transaction.objectStore('files');
const fileRequest = fileStore.get(cacheKey);
fileRequest.onsuccess = () => {
const entry = fileRequest.result;
if (entry && !entry.isChunk) {
const age = Date.now() - entry.timestamp;
if (age > this.maxAge) {
this.delete(cacheKey);
resolve(null);
} else {
resolve({
data: entry.data,
metadata: entry
});
}
} else {
resolve(null);
}
};
transaction.onerror = () => reject(transaction.error);
});
if (singleFile) return singleFile;
// If no single file, try to reassemble chunks automatically
const chunkedData = await this.reassembleChunks(cacheKey);
if (chunkedData) return chunkedData;
// Nothing found
return null;
}
async put(cacheKey, data, metadata) {
if (!this.db) await this.init();
// Check file size limit
if (data.byteLength > this.maxFileSize) {
console.warn(`File too large (${(data.byteLength / 1024 / 1024).toFixed(1)}MB), skipping cache`);
return;
}
// cacheKey is now passed as first parameter directly
const entry = {
cacheKey,
url: metadata.originalUrl || metadata.url || cacheKey, // Store original URL for reference
data,
dataType: metadata.dataType, // Add dataType to entry
timestamp: Date.now(),
lastAccessed: Date.now(),
...metadata
};
// Run cleanup if needed
await this.cleanupIfNeeded();
return new Promise((resolve, reject) => {
const transaction = this.db.transaction(['files'], 'readwrite');
const fileStore = transaction.objectStore('files');
// Store everything in files store only
const fileRequest = fileStore.put(entry);
transaction.oncomplete = () => {
console.log(`Cached: ${cacheKey} (${(data.byteLength / 1024 / 1024).toFixed(1)}MB)`);
resolve(cacheKey);
};
transaction.onerror = () => reject(transaction.error);
});
}
async putChunked(cacheKey, buffer, metadata, chunkSize = 100 * 1024 * 1024) {
if (!this.db) await this.init();
const totalSize = buffer.byteLength;
const totalChunks = Math.ceil(totalSize / chunkSize);
const baseKey = cacheKey;
console.log(`Storing large file as ${totalChunks} chunks: ${cacheKey}`);
// Store chunks
for (let i = 0; i < totalChunks; i++) {
const start = i * chunkSize;
const end = Math.min(start + chunkSize, totalSize);
const chunkData = buffer.slice(start, end);
const chunkKey = this.generateChunkKey(baseKey, i);
await this.put(chunkKey, chunkData, {
...metadata,
cacheKey: chunkKey,
format: 'chunk',
isChunk: true,
chunkIndex: i,
totalChunks,
chunkStart: start,
chunkEnd: end - 1,
baseKey,
isChunkData: true
});
}
// Store manifest/metadata as the main entry
// This allows get() to find it and reassemble
// We store an empty buffer or a small marker for the main entry
// But wait, get() checks for isChunk. If we store a main entry that is NOT a chunk,
// get() will return it.
// get() logic:
// if (entry && !entry.isChunk) -> return entry.data
// else -> reassembleChunks
// So we should NOT store a main entry with data.
// But reassembleChunks looks for chunks with baseKey.
// It doesn't look for a main entry.
// However, we might want to store metadata about the file.
// If we don't store a main entry, get() will fail to find singleFile and go to reassembleChunks.
// reassembleChunks finds chunks by baseKey.
// The chunks we stored have baseKey set.
// So reassembleChunks should work.
// But we might want to store a "manifest" or just rely on reassembleChunks.
// reassembleChunks iterates ALL files to find matching baseKey. This is slow.
// Ideally we should have an index on baseKey.
// We do: store.createIndex('source_dataset', ...) but not baseKey?
// Wait, reassembleChunks uses openCursor() and iterates everything?
// Yes: `const request = store.openCursor();`
// This is inefficient.
// But for now, to fix the error, I just need to implement putChunked.
// The current implementation of reassembleChunks scans everything.
// So if I store chunks with baseKey, it should work.
return baseKey;
}
async updateLastAccessed(cacheKey) {
if (!this.db) await this.init();
return new Promise((resolve, reject) => {
const transaction = this.db.transaction(['metadata'], 'readwrite');
const store = transaction.objectStore('metadata');
const request = store.get(cacheKey);
request.onsuccess = () => {
if (request.result) {
const entry = request.result;
entry.lastAccessed = Date.now();
store.put(entry);
}
resolve();
};
request.onerror = () => reject(request.error);
});
}
async cleanupIfNeeded() {
if (!this.db) await this.init();
const now = Date.now();
if (now - this.lastCleanup < this.cleanupInterval) return;
this.lastCleanup = now;
console.log('Running cache cleanup...');
try {
const stats = await this.getStats();
// Remove expired files
const expiredKeys = [];
for (const entry of stats.entries) {
if ((now - entry.timestamp) > this.maxAge) {
expiredKeys.push(entry.cacheKey);
}
}
for (const key of expiredKeys) {
await this.delete(key);
}
// Remove oldest files if still over size limit
if (stats.totalSize > this.maxCacheSize) {
const remainingStats = await this.getStats();
const sortedEntries = remainingStats.entries.sort((a, b) => a.lastAccessed - b.lastAccessed);
let sizeToRemove = remainingStats.totalSize - this.maxCacheSize;
for (const entry of sortedEntries) {
if (sizeToRemove <= 0) break;
await this.delete(entry.cacheKey);
sizeToRemove -= entry.size;
}
}
console.log('Cache cleanup complete');
} catch (error) {
console.warn('Cache cleanup failed:', error);
}
}
async delete(cacheKey) {
if (!this.db) await this.init();
return new Promise((resolve, reject) => {
const transaction = this.db.transaction(['files'], 'readwrite');
const fileStore = transaction.objectStore('files');
fileStore.delete(cacheKey);
transaction.oncomplete = () => resolve();
transaction.onerror = () => reject(transaction.error);
});
}
async getStats() {
if (!this.db) await this.init();
return new Promise((resolve, reject) => {
const transaction = this.db.transaction(['files'], 'readonly');
const store = transaction.objectStore('files');
const request = store.getAll();
request.onsuccess = () => {
const entries = request.result;
const totalSize = entries.reduce((sum, entry) => sum + (entry.data?.byteLength || 0), 0);
resolve({
totalFiles: entries.length,
totalSize,
totalSizeMB: (totalSize / 1024 / 1024).toFixed(1),
totalSizeGB: (totalSize / 1024 / 1024 / 1024).toFixed(2),
entries: entries.map(entry => ({
cacheKey: entry.cacheKey,
filename: entry.url?.split('/').pop() || 'unknown',
source: entry.source,
dataset: entry.dataset,
format: entry.format,
size: entry.data?.byteLength || 0,
sizeMB: ((entry.data?.byteLength || 0) / 1024 / 1024).toFixed(1),
timestamp: new Date(entry.timestamp).toISOString(),
lastAccessed: new Date(entry.lastAccessed).toISOString(),
age: Math.floor((Date.now() - entry.timestamp) / (24 * 60 * 60 * 1000))
}))
});
};
request.onerror = () => reject(request.error);
});
}
async clear() {
if (!this.db) await this.init();
return new Promise((resolve, reject) => {
const transaction = this.db.transaction(['files'], 'readwrite');
const fileStore = transaction.objectStore('files');
fileStore.clear();
transaction.oncomplete = () => {
console.log('Cleared all cached data');
resolve();
};
transaction.onerror = () => reject(transaction.error);
});
}
async list(options = {}) {
if (!this.db) await this.init();
return new Promise((resolve, reject) => {
const transaction = this.db.transaction(['files'], 'readonly');
const store = transaction.objectStore('files');
const request = store.getAll();
request.onsuccess = () => {
let entries = request.result;
// Filter by source if specified
if (options.source) {
entries = entries.filter(entry => entry.source === options.source);
}
// Filter by dataType if specified
if (options.dataType) {
entries = entries.filter(entry => entry.dataType === options.dataType);
}
// Filter out runtime variables unless explicitly requested
if (!options.includeVariables) {
entries = entries.filter(entry => !entry.cacheKey.startsWith('var_'));
}
// Map to user-friendly format
const results = entries.map(entry => ({
cacheKey: entry.cacheKey,
source: entry.source,
dataType: entry.dataType,
format: entry.format,
size: entry.size,
sizeFormatted: this.formatBytes(entry.size),
timestamp: entry.timestamp,
lastAccessed: entry.lastAccessed,
age: Date.now() - entry.timestamp,
ageFormatted: this.formatAge(Date.now() - entry.timestamp),
url: entry.url
}));
// Sort by most recently accessed
results.sort((a, b) => b.lastAccessed - a.lastAccessed);
resolve(results);
};
request.onerror = () => reject(request.error);
});
}
async getBySource(source) {
if (!this.db) await this.init();
return new Promise((resolve, reject) => {
const transaction = this.db.transaction(['files'], 'readonly');
const store = transaction.objectStore('files');
const request = store.getAll();
request.onsuccess = () => {
const entries = request.result;
const filtered = entries.filter(entry => entry.source === source);
resolve(filtered);
};
request.onerror = () => reject(request.error);
});
}
formatBytes(bytes) {
if (bytes === 0) return '0 B';
const k = 1024;
const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
}
formatAge(ms) {
const seconds = Math.floor(ms / 1000);
const minutes = Math.floor(seconds / 60);
const hours = Math.floor(minutes / 60);
const days = Math.floor(hours / 24);
if (days > 0) return `${days} day${days > 1 ? 's' : ''} ago`;
if (hours > 0) return `${hours} hour${hours > 1 ? 's' : ''} ago`;
if (minutes > 0) return `${minutes} minute${minutes > 1 ? 's' : ''} ago`;
return `${seconds} second${seconds > 1 ? 's' : ''} ago`;
}
// Runtime variable storage methods
async storeVariable(key, value, metadata = {}) {
if (!this.db) await this.init();
// Use a special cache key for variables
const cacheKey = `var_${key}`;
return new Promise((resolve, reject) => {
const transaction = this.db.transaction(['files'], 'readwrite');
const fileStore = transaction.objectStore('files');
// Convert value to ArrayBuffer for storage
let buffer;
if (value instanceof ArrayBuffer) {
buffer = value;
} else if (typeof value === 'string') {
buffer = new TextEncoder().encode(value).buffer;
} else {
// For objects/arrays, JSON stringify
const jsonStr = JSON.stringify(value);
buffer = new TextEncoder().encode(jsonStr).buffer;
}
// Store everything in files store
const fileRequest = fileStore.put({
cacheKey,
data: buffer,
url: `variable://${key}`,
source: 'runtime',
dataset: 'variables',
format: metadata.format || 'json',
digestible: true,
filename: key,
size: buffer.byteLength,
timestamp: Date.now(),
lastAccessed: Date.now(),
variableKey: key,
variableType: typeof value,
...metadata
});
transaction.oncomplete = () => {
console.log(`Stored runtime variable: ${key} (${buffer.byteLength} bytes)`);
resolve();
};
transaction.onerror = () => reject(transaction.error);
});
}
async getVariable(key) {
if (!this.db) await this.init();
const cacheKey = `var_${key}`;
return new Promise((resolve, reject) => {
const transaction = this.db.transaction(['files'], 'readonly');
const fileStore = transaction.objectStore('files');
// Get file data (everything is in files store now)
const fileRequest = fileStore.get(cacheKey);
fileRequest.onsuccess = () => {
const entry = fileRequest.result;
if (entry) {
// Update last accessed time
this.updateLastAccessed(cacheKey).catch(console.warn);
// Convert back to original type
let value;
const uint8Array = new Uint8Array(entry.data);
if (entry.variableType === 'string') {
value = new TextDecoder().decode(uint8Array);
} else if (entry.format === 'json' || entry.variableType === 'object') {
const jsonStr = new TextDecoder().decode(uint8Array);
try {
value = JSON.parse(jsonStr);
} catch (e) {
value = jsonStr; // Fallback to string if JSON parse fails
}
} else {
// Return as ArrayBuffer for binary data
value = entry.data;
}
resolve({
key: entry.variableKey,
value,
metadata: entry,
timestamp: entry.timestamp,
lastAccessed: entry.lastAccessed
});
} else {
resolve(null);
}
};
transaction.onerror = () => reject(transaction.error);
});
}
async deleteVariable(key) {
const cacheKey = `var_${key}`;
return await this.delete(cacheKey);
}
async listVariables() {
if (!this.db) await this.init();
return new Promise((resolve, reject) => {
const transaction = this.db.transaction(['files'], 'readonly');
const store = transaction.objectStore('files');
const request = store.getAll();
request.onsuccess = () => {
const allEntries = request.result;
const variables = allEntries
.filter(entry => entry.source === 'runtime' && entry.dataset === 'variables')
.map(entry => ({
key: entry.variableKey,
type: entry.variableType,
format: entry.format,
size: entry.data?.byteLength || 0,
timestamp: entry.timestamp,
lastAccessed: entry.lastAccessed
}));
resolve(variables);
};
request.onerror = () => reject(request.error);
});
}
// Save dataset results for later use
async saveDataset(key, data, metadata = {}) {
const datasetEntry = {
data,
savedAt: new Date().toISOString(),
...metadata
};
return await this.storeVariable(`dataset_${key}`, datasetEntry, {
format: 'dataset',
datasetKey: key
});
}
// Load saved dataset
async loadDataset(key) {
const result = await this.getVariable(`dataset_${key}`);
return result ? result.value : null;
}
// List saved datasets
async listDatasets() {
const allVars = await this.listVariables();
return allVars.filter(v => v.key.startsWith('dataset_')).map(v => ({
...v,
datasetKey: v.key.replace('dataset_', '')
}));
}
// Delete saved dataset
async deleteDataset(key) {
return await this.deleteVariable(`dataset_${key}`);
}
// Auto-save retrieved data (convenience method)
async saveRetrievedData(key, data, source, params = {}) {
const metadata = {
source,
params,
retrievedAt: new Date().toISOString(),
autoSaved: true
};
return await this.saveDataset(key, data, metadata);
}
// Force cache a raw file that was downloaded (manual caching)
async cacheRawFile(url, arrayBuffer, metadata = {}) {
try {
const buffer = arrayBuffer instanceof ArrayBuffer ? arrayBuffer : await arrayBuffer;
// Determine format
let format = 'unknown';
if (url.includes('.nc') || url.includes('netcdf')) format = 'netcdf';
else if (url.includes('.grb') || url.includes('.grib')) format = 'grib2';
else if (url.includes('.json')) format = 'json';
// Generate cache key using old method for manual caching
const cacheKey = this.generateCacheKey(url, {
source: metadata.source || 'manual',
datatype: metadata.dataset || 'raw-file'
});
await this.put(cacheKey, buffer, {
source: metadata.source || 'manual',
dataset: metadata.dataset || 'raw-file',
dataType: metadata.dataType || 'raw-file',
format: format,
digestible: format === 'json',
filename: url.split('/').pop() || 'data',
size: buffer.byteLength,
manuallyCached: true,
originalUrl: url,
...metadata
});
console.log(`Manually cached ${buffer.byteLength} bytes for: ${url}`);
return true;
} catch (error) {
console.error('Failed to manually cache file:', error);
return false;
}
}
}
// Global cache instance
const hydroLangCache = new HydroLangCache();
/**
* Enhanced fetch with universal caching
* @param {string} url - File URL to fetch
* @param {Object} options - Fetch options and metadata
* @returns {Promise<ArrayBuffer>} File data
*/
// Cache management API for global use
export const cacheAPI = {
// Get cache statistics
getStats: () => hydroLangCache.getStats(),
// Clear all cached data
clear: () => hydroLangCache.clear(),
// Get cache entries by source
getBySource: (source) => hydroLangCache.getBySource(source),
// Delete specific cache entry
delete: (cacheKey) => hydroLangCache.delete(cacheKey),
// Force cleanup
cleanup: () => hydroLangCache.cleanupIfNeeded(),
// Runtime variable storage
storeVariable: (key, value, metadata) => hydroLangCache.storeVariable(key, value, metadata),
getVariable: (key) => hydroLangCache.getVariable(key),
deleteVariable: (key) => hydroLangCache.deleteVariable(key),
listVariables: () => hydroLangCache.listVariables(),
// Dataset storage (for saving retrieved data)
saveDataset: (key, data, metadata) => hydroLangCache.saveDataset(key, data, metadata),
loadDataset: (key) => hydroLangCache.loadDataset(key),
listDatasets: () => hydroLangCache.listDatasets(),
deleteDataset: (key) => hydroLangCache.deleteDataset(key),
saveRetrievedData: (key, data, source, params) => hydroLangCache.saveRetrievedData(key, data, source, params),
cacheRawFile: (url, arrayBuffer, metadata) => hydroLangCache.cacheRawFile(url, arrayBuffer, metadata),
// Chunked file operations
putChunked: (url, arrayBuffer, metadata, chunkSize) => hydroLangCache.putChunked(url, arrayBuffer, metadata, chunkSize),
getChunked: (baseKey) => hydroLangCache.getChunked(baseKey),
};
/**
* Generate a simple cache key for non-gridded data
* @ignore
*/
export function generateSimpleCacheKey(url, params = {}) {
const source = context.source || 'unknown';
const datatype = context.dataType || 'data';
// Use user-provided cache ID if available
if (context.cacheId) {
// Special handling for Zarr chunks to ensure uniqueness even with a single cacheId
// If we don't do this, all chunks would overwrite the same cache entry!
if (url.includes('.zarr/')) {
const chunkMatch = url.match(/\.zarr\/(.*)$/);
if (chunkMatch && chunkMatch[1]) {
// Append chunk path to user's ID (e.g. "my-storm-0.2.5")
return `${source}/${datatype}/${context.cacheId}-${chunkMatch[1].replace(/\//g, '-')}`;
}
}
return `${source}/${datatype}/${context.cacheId}`;
}
// Auto-generate 4-digit hash from URL only (ignore params to ensure cache hits for same file)
const hashInput = url;
let hash = 0;
for (let i = 0; i < hashInput.length; i++) {
hash = ((hash << 5) - hash) + hashInput.charCodeAt(i);
hash = hash & hash;
}
const shortHash = Math.abs(hash).toString(36).substring(0, 4);
return `${source}/${datatype}/${shortHash}`;
}
/**
* Detect file format from URL or content type
* @ignore
*/
export function detectFormat(url, contentType = '') {
if (contentType.includes('application/json') || url.includes('outputFormat=application/json')) return 'json';
if (contentType.includes('application/xml') || contentType.includes('text/xml')) return 'xml';
if (contentType.includes('application/x-grib')) return 'grib2';
if (contentType.includes('application/x-netcdf')) return 'netcdf';
const urlPath = url.split('?')[0].toLowerCase();
if (urlPath.endsWith('.nc')) return 'netcdf';
if (urlPath.endsWith('.grib2') || urlPath.endsWith('.grb')) return 'grib2';
if (urlPath.endsWith('.h5') || urlPath.endsWith('.hdf5')) return 'hdf5';
if (urlPath.endsWith('.tif') || urlPath.endsWith('.tiff')) return 'geotiff';
if (urlPath.endsWith('.zip')) return 'archive';
if (url.includes('.zarr/')) {
const pathAfterZarr = url.split('.zarr/')[1];
if (pathAfterZarr && /^\d+\.\d+\.\d+$/.test(pathAfterZarr.split('/')[0])) {
return 'zarr-chunk';
}
return 'zarr';
}
return 'unknown';
}
/**
* Fetch with caching support
* @ignore
*/
export async function cachedFetch(url, options = {}) {
const cache = globalThis.hydro?.cache;
// Safety check: if _originalFetch isn't set yet (e.g. called before init), use native fetch
// This prevents "globalThis._originalFetch is not a function" errors
const fetchFn = globalThis._originalFetch || globalThis.fetch;
if (!cache) return fetchFn(url, options);
const context = globalThis._hydroCacheContext || {};
const cacheKey = generateSimpleCacheKey(context, url);
// 1. CHECK CACHE FIRST (if caching is enabled)
// If context.cache is explicitly false, skip cache read
if (context.cache !== false) {
try {
const cached = await cache.get(cacheKey);
if (cached) {
console.log(`✓ Cache hit [${cacheKey}]`);
const format = cached.metadata?.format;
// Return appropriately based on format
if (format === 'json' || format === 'xml') {
return new Response(new Blob([cached.data], { type: cached.metadata.contentType }), {
status: cached.metadata.responseStatus || 200,
headers: { 'content-type': cached.metadata.contentType }
});
} else {
return cached.data; // Return ArrayBuffer for binary data
}
}
} catch (e) {
console.warn('Cache read failed:', e.message);
}
} else {
console.log(`Cache disabled for this request: ${url}`);
}
// Not cached - need to fetch from server
console.log(`Fetching from server: ${url}`);
let response;
let lastError;
// Determine if this should be a chunked download
// Don't chunk Zarr chunks (they're already individual files) or metadata files
const isZarrChunk = url.includes('.zarr/') && /\d+\.\d+\.\d+$/.test(url.split('.zarr/')[1]);
const isMetadataFile = url.includes('.zarray') || url.includes('.zattrs') || url.includes('.zgroup');
// Check if this is a WFS/API request that returns JSON/XML (should NOT be chunked)
const isWFSRequest = url.includes('service=WFS') || url.includes('service=WMS') ||
url.includes('/ows?') || url.includes('/wfs?') ||
url.includes('outputFormat=application/json') ||
url.includes('outputFormat=json');
// Check if this is a very large gridded data file that benefits from chunking
const isLargeGriddedFile = url.match(/\.(grib2|grb2|nc|hdf|hdf5)(\?|$)/i) &&
(url.includes('nomads.ncep.noaa.gov') || url.includes('amazonaws.com'));
// Only use chunked download if:
// 1. Explicitly requested via context.useChunkedDownload
// 2. Expected size is known to be very large (>100MB) AND not explicitly disabled
// 3. URL is a very large GRIB2/NetCDF from NOAA/AWS AND not explicitly disabled
// DO NOT chunk: WFS requests, Zarr chunks, metadata files, or smaller files (PRISM zips, GeoTIFFs, etc.)
// Explicit control:
// context.useChunkedDownload === true -> Force chunked
// context.useChunkedDownload === false -> Force NO chunked (if it was passed as false) -- wait, I set default to undefined/false in data.js for now?
// Let's check how I set it in data.js:
// context.useChunkedDownload = params.chunked === true || params.chunked === 'auto'
// So if params.chunked is false/undefined, we rely on auto detection? No, if the user wants to DISABLE it, we need to know.
// Actually, in data.js I wrote: `useChunkedDownload: params.chunked === true || params.chunked === 'auto'`
// If params.chunked is `false` (explicitly), `useChunkedDownload` will be `false`.
// If params.chunked is `undefined` (default), `useChunkedDownload` will be `false`.
// This logic is slightly flawed if we want "default auto, allow disable".
// Correct logic in data.js should have been: `chunked: params.chunked`
// And here we interpret:
// if (context.params.chunked === true) -> Force YES
// if (context.params.chunked === false) -> Force NO
// if (undefined) -> Auto
// Since I can't change data.js in this same turn easily (I just did, but let's stick to what I have or fix it).
// In data.js I set `useChunkedDownload` based on `params.chunked`.
// Wait, I should probably rely on `context.params.chunked` directly here for better clarity?
// context.params is available.
const explicitChunked = context.params?.chunked;
const shouldUseChunked = !isWFSRequest &&
!isZarrChunk &&
!isMetadataFile &&
(explicitChunked === true || /* Force enable */
(explicitChunked !== false && /* Not explicitly disabled */
(
context.useChunkedDownload || /* Legacy/Internal flag */
(context.expectedSize && context.expectedSize > 100 * 1024 * 1024) ||
isLargeGriddedFile
)
)
);
// Explicit proxy request
const proxyRequested = options.proxy === true || options.params?.proxy === true || context.params?.proxy === true;
// Skip direct fetch for URLs that will always fail with CORS oR if proxy explicit requested
const skipDirectFetch = proxyRequested || url.includes('ncep.noaa.gov') || url.includes('nomads.ncep.noaa.gov') ||
url.includes('amazonaws.com') || url.includes('nasa.gov');
if (!skipDirectFetch) {
try {
if (shouldUseChunked) {
console.log('Using chunked download');
response = await cache.downloadChunked(url, {
...options,
source: context.source || 'unknown',
expectedSize: context.expectedSize,
params: context.params
});
} else {
response = await globalThis._originalFetch(url, options);
}
// Check if successful - if not, try proxies
if (!response || !response.ok) {
response = null;
}
} catch (error) {
lastError = error;
console.warn('Direct fetch failed, trying proxies:', error.message);
response = null;
}
} else {
console.log('Skipping direct fetch for CORS-restricted URL, using proxies');
}
// Only try proxies if direct fetch failed or was skipped
if (!response && globalThis.hydro?.proxies) {
const proxies = globalThis.hydro.proxies;
// Try local proxies
if (proxies['local-proxy']?.endpoint) {
// Handle both single URL and array of URLs
const proxyEndpoints = Array.isArray(proxies['local-proxy'].endpoint)
? proxies['local-proxy'].endpoint
: [proxies['local-proxy'].endpoint];
for (const proxyBaseUrl of proxyEndpoints) {
try {
const proxiedUrl = proxyBaseUrl + url;
if (shouldUseChunked) {
response = await cache.downloadChunked(proxiedUrl, {
...options,
source: context.source || 'unknown',
expectedSize: url.includes('noaa.gov') ? 100 * 1024 * 1024 : context.expectedSize, // Assume large for NOAA
params: {
...context.params,
source: context.source || 'unknown',
originalUrl: url, // Preserve original URL for cache key generation
skipSizeDetection: url.includes('noaa.gov') // Skip HEAD requests for NOAA
}
});
} else {
response = await globalThis._originalFetch(proxiedUrl, options);
}
break;
} catch (proxyError) {
lastError = proxyError;
}
}
}
// Try CORS proxy
if (!response && proxies['researchverse']) {
try {
const proxiedUrl = proxies['researchverse'].endpoint + url;
if (shouldUseChunked) {
response = await cache.downloadChunked(proxiedUrl, {
...options,
source: context.source || 'unknown',
expectedSize: url.includes('noaa.gov') ? 100 * 1024 * 1024 : context.expectedSize, // Assume large for NOAA
params: {
...context.params,
source: context.source || 'unknown',
originalUrl: url, // Preserve original URL for cache key generation
skipSizeDetection: url.includes('noaa.gov') // Skip HEAD requests for NOAA
}
});
} else {
response = await globalThis._originalFetch(proxiedUrl, options);
}
console.log('CORS proxy succeeded');
} catch (proxyError) {
lastError = proxyError;
}
}
}
if (!response) {
throw new Error(`All fetch attempts failed: ${lastError?.message}`);
}
// 3. DETECT FORMAT ONCE
const contentType = response.headers.get('content-type') || '';
const format = detectFormat(url, contentType);
const isAPIResponse = (format === 'json' || format === 'xml');
// 4. CACHE THE DATA (chunked downloads already cached)
// context.cache !== false checks if caching is enabled (default is true)
if (!shouldUseChunked && context.cache !== false) {
try {
const buffer = await response.clone().arrayBuffer();
const isZarrChunk = (format === 'zarr-chunk');
const isLargeFile = !isZarrChunk && buffer.byteLength > 5 * 1024 * 1024;
const metadata = {
source: context.source || 'unknown',
dataset: context.dataset || context.source || 'unknown',
dataType: context.dataType || 'data',
format: format,
contentType: contentType,
digestible: isAPIResponse,
filename: url.split('/').pop() || 'data',
size: buffer.byteLength,
responseStatus: response.status,
responseOk: response.ok,
params: context.params || {},
isChunkData: isZarrChunk,
originalUrl: url // Store the actual fetch URL
};
if (isLargeFile) {
await cache.putChunked(cacheKey, buffer, { ...metadata, chunked: true });
} else {
await cache.put(cacheKey, buffer, metadata);
}
console.log(`✓ Cached [${cacheKey}]: ${format} (${(buffer.byteLength / 1024).toFixed(0)}KB)`);
} catch (e) {
console.warn('Caching failed:', e.message);
}
}
// 5. RETURN APPROPRIATELY
if (isAPIResponse) {
return response; // Return Response object for API calls
} else {
return await response.arrayBuffer(); // Return ArrayBuffer for binary data
}
}
// Export the cache class and instance
export { hydroLangCache as cache };