# AI Knowledge Distillation Platform Configuration # تكوين منصة تقطير المعرفة للذكاء الاصطناعي # System Configuration system: # Memory management settings memory: max_memory_gb: 14.0 # Maximum memory usage (leave 2GB for system) chunk_size_mb: 500.0 # Chunk size for large model loading cleanup_threshold: 0.85 # Memory usage threshold for cleanup emergency_threshold: 0.95 # Emergency cleanup threshold # CPU optimization settings cpu: max_threads: 8 # Maximum number of threads use_intel_extension: true # Use Intel Extension for PyTorch if available enable_mkl: true # Enable Intel MKL enable_openmp: true # Enable OpenMP # Storage settings storage: cache_dir: "./cache" models_dir: "./models" database_dir: "./database" logs_dir: "./logs" temp_dir: "./temp" max_cache_size_gb: 20.0 # Maximum cache size # Model Loading Configuration models: # Default settings for model loading default_settings: torch_dtype: "float32" # Use float32 for CPU low_cpu_mem_usage: true device_map: "cpu" trust_remote_code: false # Chunk loading settings chunk_loading: enabled: true max_chunk_size_mb: 500.0 max_cached_chunks: 3 auto_cleanup: true # Supported model types supported_formats: - ".pt" - ".pth" - ".bin" - ".safetensors" # Model size limits size_limits: small_model_mb: 1000 # Models under 1GB load normally large_model_mb: 2000 # Models over 2GB use chunking # Training Configuration training: # Default training parameters default_params: learning_rate: 0.0001 batch_size: 4 # Small batch size for memory efficiency max_steps: 1000 temperature: 3.0 alpha: 0.7 save_steps: 100 eval_steps: 50 # Memory optimization during training memory_optimization: gradient_accumulation_steps: 4 gradient_checkpointing: true mixed_precision: false # Disable for CPU dataloader_num_workers: 2 # Medical Datasets Configuration medical: # Supported medical datasets datasets: roco_v2: repo_id: "eltorio/ROCOv2-radiology" streaming_supported: true estimated_size_gb: 8.5 ct_rate: repo_id: "ibrahimhamamci/CT-RATE" streaming_supported: true estimated_size_gb: 12.3 umie_datasets: repo_id: "lion-ai/umie_datasets" streaming_supported: true estimated_size_gb: 15.7 # DICOM processing settings dicom: memory_limit_mb: 1000.0 default_window_center: 40 default_window_width: 400 default_output_size: [512, 512] # Medical preprocessing settings preprocessing: target_size: [512, 512] normalize_images: true enhance_contrast: true # Token Management Configuration tokens: # Encryption settings encryption: key_file: ".token_key" algorithm: "Fernet" # Token types and their properties types: read: security_level: "medium" recommended_for: "development" write: security_level: "high" recommended_for: "production" fine_grained: security_level: "very_high" recommended_for: "enterprise" # Database Configuration database: # SQLite settings sqlite: database_dir: "./database" backup_interval_hours: 24 cleanup_days: 30 # Connection settings connection: timeout: 30 check_same_thread: false # Web Server Configuration server: # FastAPI settings host: "0.0.0.0" port: 8000 workers: 1 # Single worker for memory efficiency reload: false # CORS settings cors: allow_origins: ["*"] allow_methods: ["GET", "POST", "PUT", "DELETE"] allow_headers: ["*"] # Upload settings uploads: max_file_size_mb: 5000 # 5GB max file size allowed_extensions: [".pt", ".pth", ".bin", ".safetensors"] temp_dir: "./temp" # Logging Configuration logging: # Log levels level: "INFO" format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s" # File logging file: enabled: true filename: "logs/app.log" max_size_mb: 100 backup_count: 5 # Console logging console: enabled: true level: "INFO" # Specific logger levels loggers: uvicorn: "INFO" transformers: "WARNING" datasets: "WARNING" torch: "WARNING" # Performance Monitoring monitoring: # System metrics collection system_metrics: enabled: true interval_seconds: 30 store_in_database: true # Memory monitoring memory_monitoring: enabled: true alert_threshold: 0.85 emergency_threshold: 0.95 # Performance recommendations recommendations: enabled: true check_interval_minutes: 5 # Security Configuration security: # Token validation token_validation: enabled: true cache_results: true cache_duration_minutes: 60 # File upload security file_uploads: scan_uploads: true max_file_size_mb: 5000 allowed_mime_types: - "application/octet-stream" - "application/x-pytorch" # Feature Flags features: # Advanced features memory_management: true chunk_loading: true cpu_optimization: true medical_datasets: true token_management: true # Experimental features experimental: auto_model_optimization: true progressive_loading: true smart_caching: true # Environment-specific overrides environments: development: logging: level: "DEBUG" server: reload: true production: logging: level: "INFO" server: reload: false security: token_validation: enabled: true