how to load dataset

3001c4c5 · William Qin · a961fcc4 · 3001c4c5 · 3001c4c5 · 3001c4c5
Commit 3001c4c5 authored 9 months ago by William Qin
--- a/MFF-pytorch/process_dataset.py
+++ b/MFF-pytorch/process_dataset.py
@@ -15,6 +15,7 @@ import os
 import pdb

 dataset_root = "/home/w29qin/Jester"
+file_lists_folder = "file_lists"

 with open(os.path.join(dataset_root, 'labels/labels.csv')) as f:
    lines = f.readlines()
@@ -23,15 +24,23 @@ for line in lines:
    line = line.rstrip()
    categories.append(line)
 categories = sorted(categories)
-with open(os.path.join(dataset_root, 'file_lists/category.txt'), 'w') as f:
+with open(os.path.join(dataset_root, file_lists_folder, 'category.txt'), 'w') as f:
    f.write('\n'.join(categories))

 dict_categories = {}
 for i, category in enumerate(categories):
    dict_categories[category] = i

-files_input = [os.path.join(dataset_root, 'labels/validation.csv'), os.path.join(dataset_root, 'labels/train.csv')]
-files_output = [os.path.join(dataset_root, 'file_lists/val_videofolder.txt'), os.path.join(dataset_root, 'file_lists/train_videofolder.txt')]
+files_input = [
+    os.path.join(dataset_root, 'labels/validation.csv'), 
+    os.path.join(dataset_root, 'labels/train.csv'),
+    os.path.join(dataset_root, 'labels/test-answers.csv'),
+]
+files_output = [
+    os.path.join(dataset_root, file_lists_folder, 'val_videofolder.txt'), 
+    os.path.join(dataset_root, file_lists_folder, 'train_videofolder.txt'),
+    os.path.join(dataset_root, file_lists_folder, 'test_videofolder.txt'),
+]
 for (filename_input, filename_output) in zip(files_input, files_output):
    with open(filename_input) as f:
        lines = f.readlines()

--- a/PAN-PyTorch/requirements.txt
+++ b/PAN-PyTorch/requirements.txt
+archspec @ file:///croot/archspec_1709217642129/work
+blessed==1.20.0
+boltons @ file:///work/perseverance-python-buildout/croot/boltons_1698851177130/work
+Brotli @ file:///work/perseverance-python-buildout/croot/brotli-split_1698805593785/work
+certifi @ file:///croot/certifi_1707229174982/work/certifi
+cffi @ file:///croot/cffi_1700254295673/work
+charset-normalizer @ file:///tmp/build/80754af9/charset-normalizer_1630003229654/work
+conda @ file:///croot/conda_1710772050586/work
+conda-content-trust @ file:///work/perseverance-python-buildout/croot/conda-content-trust_1698882886606/work
+conda-libmamba-solver @ file:///croot/conda-libmamba-solver_1702997573971/work/src
+conda-package-handling @ file:///work/perseverance-python-buildout/croot/conda-package-handling_1698851267218/work
+conda_package_streaming @ file:///work/perseverance-python-buildout/croot/conda-package-streaming_1698847176583/work
+cryptography @ file:///croot/cryptography_1702070282333/work
+distro @ file:///work/perseverance-python-buildout/croot/distro_1701732366176/work
 filelock==3.13.3
 fsspec==2024.3.1
+gpustat==1.1.1
+idna @ file:///work/perseverance-python-buildout/croot/idna_1698845632828/work
 Jinja2==3.1.3
 joblib==1.3.2
+jsonpatch @ file:///tmp/build/80754af9/jsonpatch_1615747632069/work
+jsonpointer==2.1
+libmambapy @ file:///work/perseverance-python-buildout/croot/mamba-split_1701744133524/work/libmambapy
+lmdb==1.4.1
 MarkupSafe==2.1.5
+menuinst @ file:///croot/menuinst_1706732933928/work
 mpmath==1.3.0
 networkx==3.2.1
 numpy==1.26.4
@@ -15,14 +36,25 @@ nvidia-cufft-cu12==11.0.2.54
 nvidia-curand-cu12==10.3.2.106
 nvidia-cusolver-cu12==11.4.5.107
 nvidia-cusparse-cu12==12.1.0.106
+nvidia-ml-py==12.535.133
 nvidia-nccl-cu12==2.19.3
 nvidia-nvjitlink-cu12==12.4.127
 nvidia-nvtx-cu12==12.1.105
 packaging==24.0
 pillow==10.3.0
+platformdirs @ file:///work/perseverance-python-buildout/croot/platformdirs_1701732573265/work
+pluggy @ file:///work/perseverance-python-buildout/croot/pluggy_1698805497733/work
 protobuf==5.26.1
+psutil==5.9.8
+pycosat @ file:///work/perseverance-python-buildout/croot/pycosat_1698863456259/work
+pycparser @ file:///tmp/build/80754af9/pycparser_1636541352034/work
+PySocks @ file:///work/perseverance-python-buildout/croot/pysocks_1698845478203/work
+requests @ file:///croot/requests_1707355572290/work
+ruamel.yaml @ file:///work/perseverance-python-buildout/croot/ruamel.yaml_1698863605521/work
 scikit-learn==1.4.1.post1
 scipy==1.13.0
+setuptools==68.2.2
+six==1.16.0
 sympy==1.12
 tensorboardX==2.6.2.2
 threadpoolctl==3.4.0
@@ -31,4 +63,9 @@ torchaudio==2.2.2
 torchvision==0.17.2
 tqdm==4.66.2
 triton==2.2.0
+truststore @ file:///work/perseverance-python-buildout/croot/truststore_1701735771625/work
 typing_extensions==4.10.0
+urllib3 @ file:///croot/urllib3_1707770551213/work
+wcwidth==0.2.13
+wheel==0.41.2
+zstandard @ file:///work/perseverance-python-buildout/croot/zstandard_1698847073368/work
--- a/README.md
+++ b/README.md
 # MTE546 Project

-## Getting started with Motion Fused Frames
+## Loading the dataset

 1. Download Jester dataset and extract the files following the instructions
-2. Move the dataset to `MFF-pytorch/datasets/jester/rgb`
-3. Change working directory to `MFF-pytorch`
-4. Create virtual environment and install `requirements.txt`
-5. Generate metafiles by preprocessing the dataset `python3 process_dataset.py`
-6. 
+2. Place the dataset somewhere of your choosing, e.g. `Jester`
+3. Move the videos to `Jester/rgb` and the labels to `Jester/labels`
+4. Change working directory to `MFF-pytorch`
+5. Create virtual environment and install `requirements.txt`
+6. Edit `process_dataset.py` to use the path of your dataset
+7. Generate metafiles by preprocessing the dataset `python3 process_dataset.py`
+8. Edit `PAN-PyTorch/ops/dataset_config.py` to use the path of your dataset and generated metafiles

 ## PAN PyTorch

@@ -19,12 +21,13 @@ In `PAN-PyTorch/ops/dataset.py` ensure line 124 is commented out to train on the
 self.video_list = self.video_list[a:b]
 ```
 #### Multiple non-overlapping models
-In `PAN-PyTorch/ops/dataset.py` ensure line 124 not commented out to train on a subset of the entire dataset. For each sub_model, change the `BAG_NUM` value to match the sub-model. The current method uses 5 sub_models, so modify the test split to accomodate different number of models.
+In `PAN-PyTorch/ops/dataset.py` ensure line 124 not commented out to train on a subset of the entire dataset. For each sub_model, change the `BAG_NUM` value to match the sub-model. The current method uses 5 sub_models, so modify the test split to accomodate different number of models.  
 In `PAN-PyTorch/main.py`, modify the store name to a custom name.

 ### Model Testing
-To test models, run `PAN-PyTorch/scripts/test/20bn/Lite.sh`
-In `PAN-PyTorch/test.py`: modify `bag_size` to match the number of models that will be run against eachother. In `checkpoints` definition, add the relevant checkpoints that are saved after training. Modify the store name to be unique to access the logs.
+To test models, run `PAN-PyTorch/scripts/test/20bn/Lite.sh`  
+In `PAN-PyTorch/test.py`: modify `bag_size` to match the number of models that will be run against eachother. In `checkpoints` definition, add the relevant checkpoints that are saved after training. Modify the store name to be unique to access the logs.  
+The `checkpoints` folder contains the best models from our experiments.  
 In `PAN-PyTorch/ops/dataset.py` ensure there is no dataset split.

 #### Testing Methods
@@ -32,4 +35,4 @@ In `PAN-PyTorch/test.py`, the accuracy measures are defined on this line:
 ```
 [prec1, prec5], conf = accuracy_owa(output, target, topk=(1, 5), alpha=args.lr)
 ```
-different accuracy measurement methods are proivided in 
\ No newline at end of file
+different accuracy measurement methods are proivided in `PAN-PyTorch/ops/utils.py`