feat: Add support for Weights and Biases

Use www.wandb.ai to track your experiments online and pick the best hyperparameters.
2020-10-30 15:19:54 -07:00
commit b205bc0cde
@@ -140,3 +140,6 @@ dmypy.json

 # Pyre type checker
 .pyre/
+
+# wandb log files
+wandb/
@@ -13,7 +13,7 @@ def get_parent_dir(n=1):
    """returns the n-th parent dicrectory of the current
    working directory"""
    current_path = os.path.dirname(os.path.abspath(__file__))
-    for k in range(n):
+    for _ in range(n):
        current_path = os.path.dirname(current_path)
    return current_path

@@ -29,6 +29,7 @@ import keras.backend as K
 from keras.layers import Input, Lambda
 from keras.models import Model
 from keras.optimizers import Adam
+
 from keras.callbacks import (
    TensorBoard,
    ModelCheckpoint,
@@ -152,6 +153,22 @@ if __name__ == "__main__":
        os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
        warnings.filterwarnings("ignore")

+    # Get WandB integration if setup
+    try:
+        import wandb
+        from wandb.integration.keras import WandbCallback  # type: ignore
+
+        wandb.ensure_configured()
+        if wandb.api.api_key is None:
+            _has_wandb = False
+            wandb.termwarn(
+                "W&B installed but not logged in.  Run `wandb login` or set the WANDB_API_KEY env variable."
+            )
+        else:
+            _has_wandb = False if os.getenv("WANDB_DISABLED") else True
+    except (ImportError, AttributeError):
+        _has_wandb = False
+
    np.random.seed(FLAGS.random_seed)

    log_dir = FLAGS.log_dir
@@ -208,96 +225,77 @@ if __name__ == "__main__":

    # Train with frozen layers first, to get a stable loss.
    # Adjust num epochs to your dataset. This step is enough to obtain a decent model.
-    if True:
-        model.compile(
-            optimizer=Adam(lr=1e-3),
-            loss={
-                # use custom yolo_loss Lambda layer.
-                "yolo_loss": lambda y_true, y_pred: y_pred
-            },
+    frozen_callbacks = [logging, checkpoint]
+
+    if _has_wandb:
+        wandb.init(
+            project="TrainYourOwnYOLO", config=vars(FLAGS), sync_tensorboard=False
        )
+        wandb_callback = WandbCallback(save_model=False)
+        frozen_callbacks.append(wandb_callback)

-        batch_size = 32
-        print(
-            "Train on {} samples, val on {} samples, with batch size {}.".format(
-                num_train, num_val, batch_size
-            )
+    model.compile(
+        optimizer=Adam(lr=1e-3),
+        loss={
+            # use custom yolo_loss Lambda layer.
+            "yolo_loss": lambda y_true, y_pred: y_pred
+        },
+    )
+
+    batch_size = 32
+    print(
+        "Train on {} samples, val on {} samples, with batch size {}.".format(
+            num_train, num_val, batch_size
        )
-        history = model.fit_generator(
-            data_generator_wrapper(
-                lines[:num_train], batch_size, input_shape, anchors, num_classes
-            ),
-            steps_per_epoch=max(1, num_train // batch_size),
-            validation_data=data_generator_wrapper(
-                lines[num_train:], batch_size, input_shape, anchors, num_classes
-            ),
-            validation_steps=max(1, num_val // batch_size),
-            epochs=epoch1,
-            initial_epoch=0,
-            callbacks=[logging, checkpoint],
-        )
-        model.save_weights(os.path.join(log_dir, "trained_weights_stage_1.h5"))
-
-        step1_train_loss = history.history["loss"]
-
-        file = open(os.path.join(log_dir_time, "step1_loss.npy"), "w")
-        with open(os.path.join(log_dir_time, "step1_loss.npy"), "w") as f:
-            for item in step1_train_loss:
-                f.write("%s\n" % item)
-        file.close()
-
-        step1_val_loss = np.array(history.history["val_loss"])
-
-        file = open(os.path.join(log_dir_time, "step1_val_loss.npy"), "w")
-        with open(os.path.join(log_dir_time, "step1_val_loss.npy"), "w") as f:
-            for item in step1_val_loss:
-                f.write("%s\n" % item)
-        file.close()
+    )
+    history = model.fit_generator(
+        data_generator_wrapper(
+            lines[:num_train], batch_size, input_shape, anchors, num_classes
+        ),
+        steps_per_epoch=max(1, num_train // batch_size),
+        validation_data=data_generator_wrapper(
+            lines[num_train:], batch_size, input_shape, anchors, num_classes
+        ),
+        validation_steps=max(1, num_val // batch_size),
+        epochs=epoch1,
+        initial_epoch=0,
+        callbacks=frozen_callbacks,
+    )
+    model.save_weights(os.path.join(log_dir, "trained_weights_stage_1.h5"))

    # Unfreeze and continue training, to fine-tune.
    # Train longer if the result is unsatisfactory.
-    if True:
-        for i in range(len(model.layers)):
-            model.layers[i].trainable = True
-        model.compile(
-            optimizer=Adam(lr=1e-4), loss={"yolo_loss": lambda y_true, y_pred: y_pred}
-        )  # recompile to apply the change
-        print("Unfreeze all layers.")

-        batch_size = (
-            4  # note that more GPU memory is required after unfreezing the body
+    full_callbacks = [logging, checkpoint, reduce_lr, early_stopping]
+
+    if _has_wandb:
+        full_callbacks.append(wandb_callback)
+
+    for i in range(len(model.layers)):
+        model.layers[i].trainable = True
+    model.compile(
+        optimizer=Adam(lr=1e-4), loss={"yolo_loss": lambda y_true, y_pred: y_pred}
+    )  # recompile to apply the change
+
+    print("Unfreeze all layers.")
+
+    batch_size = 4  # note that more GPU memory is required after unfreezing the body
+    print(
+        "Train on {} samples, val on {} samples, with batch size {}.".format(
+            num_train, num_val, batch_size
        )
-        print(
-            "Train on {} samples, val on {} samples, with batch size {}.".format(
-                num_train, num_val, batch_size
-            )
-        )
-        history = model.fit_generator(
-            data_generator_wrapper(
-                lines[:num_train], batch_size, input_shape, anchors, num_classes
-            ),
-            steps_per_epoch=max(1, num_train // batch_size),
-            validation_data=data_generator_wrapper(
-                lines[num_train:], batch_size, input_shape, anchors, num_classes
-            ),
-            validation_steps=max(1, num_val // batch_size),
-            epochs=epoch1 + epoch2,
-            initial_epoch=epoch1,
-            callbacks=[logging, checkpoint, reduce_lr, early_stopping],
-        )
-        model.save_weights(os.path.join(log_dir, "trained_weights_final.h5"))
-        step2_train_loss = history.history["loss"]
-
-        file = open(os.path.join(log_dir_time, "step2_loss.npy"), "w")
-        with open(os.path.join(log_dir_time, "step2_loss.npy"), "w") as f:
-            for item in step2_train_loss:
-                f.write("%s\n" % item)
-        file.close()
-
-        step2_val_loss = np.array(history.history["val_loss"])
-
-        file = open(os.path.join(log_dir_time, "step2_val_loss.npy"), "w")
-        with open(os.path.join(log_dir_time, "step2_val_loss.npy"), "w") as f:
-            for item in step2_val_loss:
-                f.write("%s\n" % item)
-        file.close()
+    )
+    history = model.fit_generator(
+        data_generator_wrapper(
+            lines[:num_train], batch_size, input_shape, anchors, num_classes
+        ),
+        steps_per_epoch=max(1, num_train // batch_size),
+        validation_data=data_generator_wrapper(
+            lines[num_train:], batch_size, input_shape, anchors, num_classes
+        ),
+        validation_steps=max(1, num_val // batch_size),
+        epochs=epoch1 + epoch2,
+        initial_epoch=epoch1,
+        callbacks=full_callbacks,
+    )
+    model.save_weights(os.path.join(log_dir, "trained_weights_final.h5"))
@@ -6,7 +6,7 @@ def get_parent_dir(n=1):
    """returns the n-th parent dicrectory of the current
    working directory"""
    current_path = os.path.dirname(os.path.abspath(__file__))
-    for k in range(n):
+    for _ in range(n):
        current_path = os.path.dirname(current_path)
    return current_path

@@ -48,7 +48,7 @@ def download_file_from_google_drive(id, destination):
 if __name__ == "__main__":
    import sys

-    if len(sys.argv) is not 3:
+    if len(sys.argv) != 3:
        print("Usage: python google_drive.py drive_file_id destination_file_path")
    else:
        # TAKE ID FROM SHAREABLE LINK
@@ -24,7 +24,7 @@ To build and test your YOLO object detection algorithm follow the below steps:

 ## Getting Started

-### NEW: Google Colab Tutorial <a href="https://colab.research.google.com/github/AntonMu/TrainYourOwnYOLO/blob/master/TrainYourOwnYOLO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
+### Google Colab Tutorial <a href="https://colab.research.google.com/github/AntonMu/TrainYourOwnYOLO/blob/master/TrainYourOwnYOLO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
 With Google Colab you can skip most of the set up steps and start training your own model right away. 

 ### Requisites
@@ -41,12 +41,12 @@ To speed up training, it is recommended to use a **GPU with CUDA** support. For
 #### Setting up Virtual Environment [Linux or Mac]

 Clone this repo with:
-```
+```bash
 git clone https://github.com/AntonMu/TrainYourOwnYOLO
 cd TrainYourOwnYOLO/
 ```
 Create Virtual **(Linux/Mac)** Environment:
-```
+```bash
 python3 -m venv env
 source env/bin/activate
 ```
@@ -57,7 +57,7 @@ Use the [Github Desktop GUI](https://desktop.github.com/) to clone this repo to

 Create Virtual **(Windows)** Environment:

-```
+```powershell
 py -m venv env
 .\env\Scripts\activate
 ```
@@ -67,7 +67,7 @@ Make sure that, from now on, you **run all commands from within your virtual env
 #### Install Required Packages [Windows, Mac or Linux]
 Install required packages (from within your virtual environment) via:

-```
+```bash
 pip install -r requirements.txt
 ```
 If this fails, you may have to upgrade your pip version first with `pip install pip --upgrade`.
@@ -75,7 +75,7 @@ If this fails, you may have to upgrade your pip version first with `pip install
 ## Quick Start (Inference only)
 To test the cat face detector on test images located in [`TrainYourOwnYOLO/Data/Source_Images/Test_Images`](/Data/Source_Images/Test_Images) run the `Minimal_Example.py` script in the root folder with:

-```
+```bash
 python Minimal_Example.py
 ```

@@ -96,9 +96,15 @@ To train your own custom YOLO object detector please follow the instructions det

 Each `*.py` script has various command line options that help tweak performance and change things such as input and output directories. All scripts are initialized with good default values that help accomplish all tasks as long as the original folder structure is preserved. To learn more about available command line options of a python script `<script_name.py>` run:

-```
+```bash
 python <script_name.py> -h
 ```
+### **NEW:** Weights and Biases
+TrainYourOwnYOLO supports [Weights & Biases](https://wandb.ai/home/) to track your experiments online. Sign up at [wandb.ai](https://wandb.ai/home) to get an API key and run:
+```bash
+wandb -login <API_KEY>
+```
+where `<API_KEY>` is your Weights & Biases API key. 

 ## License

@@ -109,7 +115,7 @@ Unless explicitly stated otherwise at the top of a file, all code is licensed un
 0. If you encounter any error, please make sure you follow the instructions **exactly** (word by word). Once you are familiar with the code, you're welcome to modify it as needed but in order to minimize error, I encourage you to not deviate from the instructions above. If you would like to file an issue, please use the provided template and make sure to fill out all fields. 

 1. If you encounter a `FileNotFoundError`, `Module not found` or similar error, make sure that you did not change the folder structure. Your directory structure **must** look exactly like this: 
-    ```
+    ```text
    TrainYourOwnYOLO
    └─── 1_Image_Annotation
    └─── 2_Training
@@ -122,7 +128,7 @@ Unless explicitly stated otherwise at the top of a file, all code is licensed un
    Don't use spaces in file or folder names, i.e. instead of `my folder` use `my_folder`.

 2. If you are a Linux user and having trouble installing `*.snap` package files try:
-    ```
+    ```bash
    snap install --dangerous vott-2.1.0-linux.snap
    ```
    See [Snap Tutorial](https://tutorials.ubuntu.com/tutorial/advanced-snap-usage#2) for more information.
@@ -154,7 +160,7 @@ Under the following terms:
 
 Cite as:
 
-  ```
+  ```text
  @misc{TrainYourOwnYOLO,
    title={TrainYourOwnYOLO: Building a Custom Object Detector from Scratch},
    author={Anton Muehlemann},
@@ -58,6 +58,7 @@ toml==0.10.1
 typed-ast==1.4.1
 typing-extensions==3.7.4.3
 urllib3==1.25.10
+wandb==0.10.8
 Werkzeug==1.0.1
 wrapt==1.12.1
 zipp==3.1.0