Source code for granite_storage.gc

from __future__ import annotations

from collections.abc import Callable, Iterable
from dataclasses import dataclass

from granite_storage.manager import StorageManager
from granite_storage.models import StoredObjectRef


[docs] @dataclass class GarbageCollectionReport: scanned: int = 0 referenced: int = 0 orphaned: int = 0 deleted: int = 0
[docs] class StorageGarbageCollector:
[docs] def __init__( self, manager: StorageManager, iter_references: Callable[[], Iterable[StoredObjectRef | None]], ): self.manager = manager self.iter_references = iter_references
[docs] def collect( self, *, storage_key: str, dry_run: bool = True ) -> GarbageCollectionReport: policy = self.manager.get_policy(storage_key) backend = self.manager.get_backend_for_policy(policy) report = GarbageCollectionReport() referenced_locations: set[str] = set() for ref in self.iter_references(): if ref and ref.storage_key == storage_key: referenced_locations.add(ref.location) report.referenced += 1 for location in backend.iter_locations(prefix=policy.key_prefix): report.scanned += 1 if location in referenced_locations: continue report.orphaned += 1 if not dry_run: backend.delete( StoredObjectRef( storage_key=storage_key, backend=backend.backend_name, location=location, size=0, checksum="", ) ) report.deleted += 1 return report