Imageomics · NetZissou · Jul 18, 2025 · Jul 19, 2025 · Jul 19, 2025 · Jul 19, 2025
diff --git a/configs/animal_detection/config_animal_detect_image_folder_template.yaml b/configs/animal_detection/config_animal_detect_image_folder_template.yaml
@@ -0,0 +1,152 @@
+# =============================================================================
+# Configuration File for Batch Animal Detection from Image Folders
+# =============================================================================
+# This configuration is optimized for animal detection on image files directly
+# from a directory structure using MegaDetector models.
+# -----------------------------------------------------------------------------
+
+# ---------------------------
+# Model Configuration
+# ---------------------------
+# MegaDetector model for animal detection
+model:
+  weights: MDV6-yolov10-e-1280.pt      # MegaDetector model weights
+                                       # https://microsoft.github.io/CameraTraps/model_zoo/megadetector/
+
+# ---------------------------
+# Detection Parameters
+# ---------------------------
+confidence_threshold: 0.2   # Minimum confidence score for animal detections
+                            # MegaDetector typically uses 0.2 as default threshold
+                            # Lower values = more detections (including false positives)
+                            # Higher values = fewer, more confident detections
+
+image_size: 1280            # Input image size for the model (square format)
+                            # Common values: 640, 1024, 1280
+                            # Larger sizes may improve detection accuracy but increase processing time
+
+# ---------------------------
+# DataLoader Configurations
+# ---------------------------
+batch_size: 16              # Number of images per batch (adjust based on GPU memory)
+                            # Animal detection (especially MegaDetector) can be memory-intensive
+                            # Start with smaller batch sizes and increase if memory allows
+num_workers: 20             # Number of worker processes for data loading
+prefetch_factor: 8          # Number of batches prefetched by each worker
+
+# ---------------------------
+# Image Processing Settings
+# ---------------------------
+validate_images: false      # Set to true to validate all images can be opened with PIL
+                            # Slower startup but catches corrupted files
+
+# How to generate unique IDs from image file paths
+uuid_mode: filename         # Options:
+                            # - "filename": image001.jpg
+                            # - "relative": subfolder/image001.jpg
+                            # - "fullpath": /full/path/to/image001.jpg
+                            # - "hash": MD5 hash of full path
+
+# ---------------------------
+# Distributed Processing
+# ---------------------------
+evenly_distribute: true     # Distribute files based on size for load balancing
+stagger: false              # Stagger worker start times to reduce file system load
+
+# ---------------------------
+# Output Configurations
+# ---------------------------
+max_rows_per_file: 100000             # Maximum number of detection results per output file
+                                      # Animal detection results can be large due to multiple detections per image
+out_prefix: animal_detection_results  # Prefix for output files
+
+# =============================================================================
+# USAGE EXAMPLE:
+# =============================================================================
+# python animal_detect.py /path/to/images /path/to/output --input_type images --config config_animal_detect_image_folder_template.yaml
+# =============================================================================
+
+# =============================================================================
+# IMAGE DIRECTORY REQUIREMENTS:
+# =============================================================================
+# Your image directory can have any structure:
+#
+# Flat structure:
+# /images/
+#   ├── image001.jpg
+#   ├── image002.png
+#   └── image003.jpeg
+#
+# Nested structure:
+# /images/
+#   ├── category1/
+#   │   ├── img1.jpg
+#   │   └── img2.png
+#   └── category2/
+#       ├── img3.jpg
+#       └── img4.png
+#
+# Supported formats: .jpg, .jpeg, .png, .bmp, .tif, .tiff, .webp
+# All images are automatically converted to RGB mode for processing.
+#
+# UUID GENERATION MODES:
+# - filename: Good for flat directories with unique filenames
+# - relative: Good for nested directories where path info is important
+# - fullpath: Good when you need absolute path traceability
+# - hash: Good for very long paths or when you want anonymized IDs
+# =============================================================================
+
+# =============================================================================
+# OUTPUT FORMAT:
+# =============================================================================
+# The script outputs Parquet files containing:
+# - uuid: Unique identifier for each image (based on uuid_mode)
+# - max_detection_score: Maximum confidence score across all detections (0.0 if no animals detected)
+# - num_detections: Total number of detections above threshold
+# - detections: JSON string containing detailed detection information
+#
+# Each detection includes:
+# - bbox: Absolute pixel coordinates [x1, y1, x2, y2]
+# - bbox_normalized: Normalized coordinates [0-1]
+# - confidence: Detection confidence score
+# - class_id: Numeric class ID (0=animal, 1=person, 2=vehicle for MegaDetector)
+# - class_name: Human-readable class name
+#
+# Files are saved in: {output_dir}/detections/rank_{rank}/
+# Example output:
+# animal_detection_results_rank_0_0.parquet
+# animal_detection_results_rank_0_1.parquet
+# ...
+# =============================================================================
+
+# =============================================================================
+# PERFORMANCE TUNING GUIDELINES:
+# =============================================================================
+# 
+# GPU Memory Optimization:
+# - Reduce batch_size if running out of GPU memory
+# - MegaDetector can be memory-intensive, especially at high resolutions
+# - Consider using smaller image_size if memory is limited
+#
+# CPU/I-O Optimization:
+# - Increase num_workers for faster data loading & prevent OOM crashes
+# - Increase prefetch_factor for better pipeline utilization
+#
+# Distributed Processing:
+# - Use evenly_distribute=true for better load balancing
+# - Set stagger=true if experiencing file system bottlenecks
+#
+# Detection Quality vs Speed:
+# - MegaDetector confidence_threshold of 0.2 is typically optimal based on repo documentation
+# - Lower thresholds may find more animals but increase false positives
+# - Higher image_size improves accuracy but slows processing
+# - Choose appropriate model based on accuracy vs speed needs:
+#   * MegaDetectorV6-Ultralytics-YoloV10-Extra: Most accurate, best for wildlife
+#   * YOLOv8 variants: General purpose object detection
+#
+# Model-Specific Notes:
+# - MegaDetector is specifically trained for wildlife camera trap images
+# - It detects animals and people with high accuracy
+# - Works well on images from outdoor/natural settings
+# - May not perform as well on indoor or urban animal images
+# =============================================================================
diff --git a/configs/animal_detection/config_animal_detect_parquet_template.yaml b/configs/animal_detection/config_animal_detect_parquet_template.yaml
@@ -0,0 +1,191 @@
+# =============================================================================
+# Configuration File for Batch Animal Detection from Parquet Files
+# =============================================================================
+# This configuration is optimized for animal detection on Parquet files containing
+# encoded image data with metadata using MegaDetector models.
+# -----------------------------------------------------------------------------
+
+# ---------------------------
+# Model Configuration
+# ---------------------------
+# MegaDetector model for animal detection
+model:
+  weights: md_v5a.0.0.pt      # MegaDetector model weights
+                              # Options:
+                              # - md_v5a.0.0.pt (MegaDetector v5a, recommended)
+                              # - md_v5b.0.0.pt (MegaDetector v5b, alternative)
+                              # - yolov8n.pt (YOLOv8 nano, general purpose)
+                              # - yolov8s.pt (YOLOv8 small, balanced)
+                              # - yolov8m.pt (YOLOv8 medium, more accurate)
+                              # - yolov8l.pt (YOLOv8 large, most accurate)
+                              # Custom trained models are also supported
+                              # For MegaDetector models, see:
+                              # https://github.com/microsoft/CameraTraps
+
+# ---------------------------
+# Detection Parameters
+# ---------------------------
+confidence_threshold: 0.2   # Minimum confidence score for animal detections
+                            # MegaDetector typically uses 0.2 as default threshold
+                            # Lower values = more detections (including false positives)
+                            # Higher values = fewer, more confident detections
+
+image_size: 1280            # Input image size for the model (square format)
+                            # MegaDetector v5 typically uses 1280
+                            # Common values: 640, 1024, 1280
+                            # Larger sizes may improve detection accuracy but increase processing time
+
+# ---------------------------
+# DataLoader Configurations
+# ---------------------------
+batch_size: 8               # Number of images per batch (adjust based on GPU memory)
+                            # Animal detection (especially MegaDetector) can be memory-intensive
+                            # Start with smaller batch sizes and increase if memory allows
+num_workers: 24             # Number of worker processes for data loading
+prefetch_factor: 8          # Number of batches prefetched by each worker
+
+# ---------------------------
+# Parquet-Specific Settings
+# ---------------------------
+read_batch_size: 64         # Number of rows to read from Parquet at a time
+                            # Smaller than embedding tasks due to larger detection output per image
+
+# Columns to read from Parquet files (must exist in your data)
+read_columns:
+  - uuid              # [REQUIRED] Unique identifier for each image
+  - image             # [REQUIRED] Encoded image bytes (JPEG, PNG, etc.)
+  - original_size     # [OPTIONAL] Original image dimensions
+  - resized_size      # [OPTIONAL] Resized image dimensions
+
+# ---------------------------
+# Distributed Processing
+# ---------------------------
+evenly_distribute: true     # Distribute files based on size for load balancing
+stagger: false              # Stagger worker start times to reduce I/O congestion
+
+# ---------------------------
+# Output Configurations
+# ---------------------------
+max_rows_per_file: 5000             # Maximum number of detection results per output file
+                                    # Animal detection results can be large due to multiple detections per image
+out_prefix: animal_detection_results  # Prefix for output files
+
+# =============================================================================
+# USAGE EXAMPLES:
+# =============================================================================
+# 
+# For Parquet files:
+# python animal_detect.py /path/to/parquet_dir /path/to/output --input_type parquet --config config_animal_detect_parquet_template.yaml
+#
+# With file list:
+# python animal_detect.py /path/to/parquet_dir /path/to/output --input_type parquet --file_list files.txt --config config_animal_detect_parquet_template.yaml
+# =============================================================================
+
+# =============================================================================
+# PARQUET DATA REQUIREMENTS:
+# =============================================================================
+# Your Parquet files must contain:
+# 1. 'uuid' column: Unique string identifier for each image
+# 2. 'image' column: Image data encoded as bytes (from PIL Image.save() to BytesIO)
+# 3. Optional metadata columns as specified in read_columns
+#
+# Example of creating compatible Parquet data:
+# ```python
+# import io
+# from PIL import Image
+# import pandas as pd
+# import pyarrow.parquet as pq
+# 
+# # Encode image to bytes
+# img = Image.open('image.jpg')
+# img_bytes = io.BytesIO()
+# img.save(img_bytes, format='JPEG')
+# img_bytes = img_bytes.getvalue()
+# 
+# # Create DataFrame
+# df = pd.DataFrame({
+#     'uuid': ['img_001'],
+#     'image': [img_bytes],
+#     'original_size': [(1024, 768)],
+#     'resized_size': [(1280, 1280)]
+# })
+# 
+# # Save to Parquet
+# df.to_parquet('images.parquet')
+# ```
+# =============================================================================
+
+# =============================================================================
+# OUTPUT FORMAT:
+# =============================================================================
+# The script outputs Parquet files containing:
+# - uuid: Unique identifier for each image (from input Parquet)
+# - max_detection_score: Maximum confidence score across all detections (0.0 if no animals detected)
+# - num_detections: Total number of detections above threshold
+# - detections: JSON string containing detailed detection information
+#
+# Each detection in the JSON includes:
+# - bbox: Absolute pixel coordinates [x1, y1, x2, y2]
+# - bbox_normalized: Normalized coordinates [0-1]
+# - confidence: Detection confidence score
+# - class_id: Numeric class ID (0=animal, 1=person, 2=vehicle for MegaDetector)
+# - class_name: Human-readable class name
+#
+# Files are saved in: {output_dir}/detections/rank_{rank}/
+# Example output:
+# animal_detection_results_rank_0_0.parquet
+# animal_detection_results_rank_0_1.parquet
+# ...
+#
+# Example detection JSON structure:
+# [
+#   {
+#     "bbox": [120.5, 80.2, 340.8, 290.1],
+#     "bbox_normalized": [0.118, 0.078, 0.333, 0.284],
+#     "confidence": 0.85,
+#     "class_id": 0,
+#     "class_name": "animal"
+#   },
+#   {
+#     "bbox": [450.0, 200.0, 600.0, 400.0],
+#     "bbox_normalized": [0.440, 0.195, 0.586, 0.391],
+#     "confidence": 0.72,
+#     "class_id": 1,
+#     "class_name": "person"
+#   }
+# ]
+# =============================================================================
+
+# =============================================================================
+# PERFORMANCE TUNING GUIDELINES:
+# =============================================================================
+# 
+# GPU Memory Optimization:
+# - Reduce batch_size if running out of GPU memory
+# - MegaDetector can be memory-intensive, especially at high resolutions
+# - Consider using smaller image_size if memory is limited
+#
+# CPU/I-O Optimization:
+# - Increase num_workers for faster data loading (but watch CPU usage)
+# - Increase prefetch_factor for better pipeline utilization
+# - Adjust read_batch_size for optimal Parquet I/O performance
+#
+# Distributed Processing:
+# - Use evenly_distribute=true for better load balancing
+# - Set stagger=true if experiencing I/O bottlenecks
+#
+# Detection Quality vs Speed:
+# - MegaDetector confidence_threshold of 0.2 is typically optimal
+# - Lower thresholds may find more animals but increase false positives
+# - Higher image_size improves accuracy but slows processing
+# - Choose appropriate model based on accuracy vs speed needs:
+#   * md_v5a.0.0.pt: Best for wildlife/camera trap images
+#   * YOLOv8 variants: General purpose object detection
+#
+# Model-Specific Notes:
+# - MegaDetector is specifically trained for wildlife camera trap images
+# - It detects animals, people, and vehicles with high accuracy
+# - Works well on images from outdoor/natural settings
+# - May not perform as well on indoor or urban animal images
+# - Provides both detection and rough classification capabilities
+# =============================================================================
diff --git a/...s/config_embed_image_folder_template.yaml → ...d/config_embed_image_folder_template.yaml b/...s/config_embed_image_folder_template.yaml → ...d/config_embed_image_folder_template.yaml
diff --git a/configs/config_embed_parquet_template.yaml → .../embed/config_embed_parquet_template.yaml b/configs/config_embed_parquet_template.yaml → .../embed/config_embed_parquet_template.yaml