Arquivos
cuneiform-sign-detection-code/experiments/sign_detector/finetune_sign_detector.ipynb
T
2020-11-19 12:18:53 +01:00

624 linhas
19 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Fine-tune sign detector network (in semi-supervised case)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from PIL import Image\n",
"from ast import literal_eval\n",
"import os.path\n",
"from tqdm import tqdm\n",
"import copy\n",
"\n",
"import torch\n",
"import torch.optim as optim\n",
"from torch.optim import lr_scheduler\n",
"import torch.utils.data as data\n",
"\n",
"from torchvision import transforms as trafos\n",
"import torchvision.transforms as transforms"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# for auto-reloading external modules\n",
"# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython\n",
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"relative_path = '../../'\n",
"# ensure that parent path is on the python path in order to have all packages available\n",
"import sys, os\n",
"parent_path = os.path.join(os.getcwd(), relative_path)\n",
"parent_path = os.path.realpath(parent_path) # os.path.abspath(...)\n",
"sys.path.insert(0, parent_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from lib.datasets.cunei_dataset_ssd import CuneiformSSD\n",
"\n",
"from lib.alignment.LineFragment import plot_boxes\n",
"from lib.utils.pytorch_utils import get_tensorboard_writer"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from lib.models.mobilenetv2_mod03 import MobileNetV2\n",
"from lib.models.mobilenetv2_fpn import MobileNetV2FPN\n",
"from lib.models.trained_model_loader import get_fpn_ssd_net\n",
"from lib.utils.torchcv.models.net import FPNSSD\n",
"from lib.utils.torchcv.loss.ssd_loss import SSDLoss"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import time\n",
"hh = 0.001\n",
"## time.sleep(60*60*hh)\n",
"for i in tqdm(range(int(6*60*hh))):\n",
" time.sleep(10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Config Basics"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"model_version = 'v001ft01'\n",
"\n",
"# config pretrained detector\n",
"pretrained_model_version = 'v001' # 'v191' \n",
"\n",
"# config datasets for training and testing\n",
"train_collections = ['train_D'] \n",
"test_collections = ['testEXT'] # ['test_full']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# config generated data\n",
"with_gen_data = False\n",
"\n",
"gen_model_version = 'v001' \n",
"\n",
"gen_folder = 'results_ssd/{}/'.format(gen_model_version) \n",
"gen_file_path = None\n",
"\n",
"gen_collections = ['saa01', 'saa05', 'saa08', 'saa10', 'saa13', 'saa16']\n",
"gen_collections += ['train']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# config backbone architecture\n",
"arch_opt = 1\n",
"arch_type = 'mobile'\n",
"width_mult = 0.625\n",
"\n",
"# config detector\n",
"with_64 = False\n",
"create_bg_class = False\n",
"img_size = 512\n",
"num_classes = 240\n",
"\n",
"# config schedule\n",
"num_epochs = 11 \n",
"lr_milestones = [60]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# set log file name\n",
"if with_gen_data:\n",
" version_remark = '{}_fpnssd_mobilenetv2_{}_gen_{}'\n",
" version_remark = version_remark.format(\"_\".join(train_collections), pretrained_model_version, gen_model_version)\n",
"else:\n",
" version_remark = '{}_fpnssd_mobilenetv2_{}'\n",
" version_remark = version_remark.format(\"_\".join(train_collections), pretrained_model_version)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Preparing Datasets"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"if with_gen_data:\n",
" from lib.utils.torchcv.box_coder_retina_lm import RetinaBoxCoder\n",
" from lib.utils.torchcv.transforms_lm.resize import resize_lm\n",
" from lib.utils.torchcv.transforms_lm.random_crop_tile import random_crop_tile_lm\n",
" from lib.utils.torchcv.transforms_lm.pad_gs import pad_lm\n",
"else:\n",
" from lib.utils.torchcv.box_coder_retina import RetinaBoxCoder\n",
" from lib.utils.torchcv.transforms.resize import resize\n",
" from lib.utils.torchcv.transforms.random_crop_tile import random_crop_tile\n",
" from lib.utils.torchcv.transforms.pad_gs import pad"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"box_coder = RetinaBoxCoder(create_bg_class=create_bg_class)\n",
"print('num_anchors', len(box_coder.anchor_boxes))\n",
"print('anchor areas', np.sqrt(box_coder.anchor_areas))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"if with_gen_data: \n",
" def transform_train(img, boxes, labels, linemap):\n",
" # img = transforms.ColorJitter(0.3,0.3,0,0)(img)\n",
" img = transforms.RandomChoice([transforms.ColorJitter(0.5,0.5,0,0), \n",
" transforms.Lambda(lambda x: x) # identity\n",
" ])(img) \n",
" img, linemap = pad_lm(img, linemap, (600, 600))\n",
" img, boxes, labels, linemap = random_crop_tile_lm(img, boxes, labels, linemap, scale_range=[0.65, 1], max_aspect_ratio=1.35)\n",
" img, boxes, linemap = resize_lm(img, boxes, linemap, size=(img_size, img_size), random_interpolation=True)\n",
" img = transforms.Compose([\n",
" transforms.ToTensor(),\n",
" transforms.Normalize(mean=[0.5], std=[1.0])\n",
" ])(img)\n",
" boxes, labels = box_coder.encode(boxes, labels, linemap)\n",
"\n",
" return img, boxes, labels, transforms.ToTensor()(linemap)\n",
"else:\n",
" def transform_train(img, boxes, labels):\n",
" # img = transforms.ColorJitter(0.3,0.3,0,0)(img)\n",
" img = transforms.RandomChoice([transforms.ColorJitter(0.5,0.5,0,0), \n",
" transforms.Lambda(lambda x: x) # identity\n",
" ])(img) \n",
" img = pad(img, (600, 600))\n",
" img, boxes, labels = random_crop_tile(img, boxes, labels, scale_range=[0.65, 1], max_aspect_ratio=1.35)\n",
" img, boxes = resize(img, boxes, size=(img_size, img_size), random_interpolation=True)\n",
" img = transforms.Compose([\n",
" transforms.ToTensor(),\n",
" transforms.Normalize(mean=[0.5], std=[1.0])\n",
" ])(img)\n",
" boxes, labels = box_coder.encode(boxes, labels)\n",
" return img, boxes, labels"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"if with_gen_data:\n",
" trainset = CuneiformSSD(collections=train_collections, transform=transform_train, \n",
" gen_file_path=gen_file_path, gen_collections=gen_collections, gen_folder=gen_folder, \n",
" relative_path=relative_path, use_balanced_idx=False, use_linemaps=True, \n",
" remove_empty_tiles=False, min_align_ratio=0.2)\n",
"else:\n",
" trainset = CuneiformSSD(collections=train_collections, transform=transform_train,\n",
" gen_file_path=gen_file_path, relative_path=relative_path, use_linemaps=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"if with_gen_data:\n",
" def transform_test(img, boxes, labels, linemap):\n",
" img, boxes, labels, linemap = random_crop_tile_lm(img, boxes, labels, linemap, scale_range=[0.85, 0.86], max_aspect_ratio=1.001)\n",
" img, boxes, linemap = resize_lm(img, boxes, linemap, size=(img_size, img_size), random_interpolation=True)\n",
" img = transforms.Compose([\n",
" transforms.ToTensor(),\n",
" transforms.Normalize(mean=[0.5],std=[1.0])\n",
" ])(img)\n",
" boxes, labels = box_coder.encode(boxes, labels, linemap)\n",
" return img, boxes, labels, transforms.ToTensor()(linemap)\n",
"else:\n",
" def transform_test(img, boxes, labels):\n",
" img, boxes, labels = random_crop_tile(img, boxes, labels, scale_range=[0.85, 0.86], max_aspect_ratio=1.001)\n",
" img, boxes = resize(img, boxes, size=(img_size, img_size), random_interpolation=True)\n",
" img = transforms.Compose([\n",
" transforms.ToTensor(),\n",
" transforms.Normalize(mean=[0.5],std=[1.0])\n",
" ])(img)\n",
" boxes, labels = box_coder.encode(boxes, labels)\n",
" return img, boxes, labels"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"if with_gen_data:\n",
" testset = CuneiformSSD(collections=test_collections, transform=transform_test,\n",
" gen_file_path=None, relative_path=relative_path, use_linemaps=True)\n",
"else:\n",
" testset = CuneiformSSD(collections=test_collections, transform=transform_test,\n",
" gen_file_path=None, relative_path=relative_path, use_linemaps=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"trainloader = data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=3)\n",
"testloader = data.DataLoader(testset, batch_size=32, shuffle=False, num_workers=3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Building Model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"device = 'cuda' if torch.cuda.is_available() else 'cpu'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# load FPN model from pretrained detector model\n",
"fpnssd_net = get_fpn_ssd_net(pretrained_model_version, device, arch_type, with_64, arch_opt, width_mult, \n",
" relative_path, num_classes, num_c=1)\n",
"fpnssd_net.train()\n",
"\n",
"# print model\n",
"print(fpnssd_net)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"### Test net\n",
"loc_preds, cls_preds = fpnssd_net(torch.randn(1, 1, img_size, img_size).to(device))\n",
"print(loc_preds.size(), cls_preds.size())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Optimization"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"criterion = SSDLoss(num_classes=num_classes)\n",
"#criterion = FocalLoss(num_classes=num_classes)\n",
"optimizer = optim.SGD(fpnssd_net.parameters(), lr=0.0001, momentum=0.9, weight_decay=1e-4)\n",
"\n",
"# lr policy\n",
"# scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.97)\n",
"scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=lr_milestones, gamma=0.1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# init logger\n",
"if version_remark == '':\n",
" comment_str = '_{}'.format(model_version)\n",
"else:\n",
" comment_str = '_{}_{}'.format(model_version, version_remark)\n",
"writer = get_tensorboard_writer(logs_folder='{}results/run_logs/detector'.format(relative_path), comment=comment_str)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Training\n",
"best_loss = float('inf') # best test loss\n",
"best_epoch = 0\n",
"best_model_wts = copy.deepcopy(fpnssd_net.state_dict())\n",
"\n",
"\n",
"def train(epoch):\n",
" fpnssd_net.train()\n",
" train_loss = 0\n",
"\n",
" scheduler.step()\n",
"\n",
" if with_gen_data:\n",
" for batch_idx, (inputs, loc_targets, cls_targets, linemap) in enumerate(trainloader):\n",
" inputs = inputs.to(device)\n",
" loc_targets = loc_targets.to(device)\n",
" cls_targets = cls_targets.to(device)\n",
"\n",
" optimizer.zero_grad()\n",
" loc_preds, cls_preds = fpnssd_net(inputs)\n",
" loss = criterion(loc_preds, loc_targets, cls_preds, cls_targets)\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" train_loss += loss.item()\n",
" print('train_loss: %.3f | avg_loss: %.3f [%d/%d]'\n",
" % (loss.item(), train_loss/(batch_idx+1), batch_idx+1, len(trainloader)))\n",
" else:\n",
" for batch_idx, (inputs, loc_targets, cls_targets) in enumerate(trainloader):\n",
" inputs = inputs.to(device)\n",
" loc_targets = loc_targets.to(device)\n",
" cls_targets = cls_targets.to(device)\n",
"\n",
" optimizer.zero_grad()\n",
" loc_preds, cls_preds = fpnssd_net(inputs)\n",
" loss = criterion(loc_preds, loc_targets, cls_preds, cls_targets)\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" train_loss += loss.item()\n",
" print('train_loss: %.3f | avg_loss: %.3f [%d/%d]'\n",
" % (loss.item(), train_loss/(batch_idx+1), batch_idx+1, len(trainloader)))\n",
"\n",
" # write to logger\n",
" phase = 'train'\n",
" writer.add_scalar('data/{}/loss'.format(phase), train_loss / len(trainloader), epoch)\n",
"\n",
"def test(epoch):\n",
" fpnssd_net.eval()\n",
" test_loss = 0\n",
" with torch.no_grad():\n",
"\n",
" if with_gen_data:\n",
" for batch_idx, (inputs, loc_targets, cls_targets, linemap) in enumerate(testloader):\n",
" inputs = inputs.to(device)\n",
" loc_targets = loc_targets.to(device)\n",
" cls_targets = cls_targets.to(device)\n",
"\n",
" loc_preds, cls_preds = fpnssd_net(inputs)\n",
" loss = criterion(loc_preds, loc_targets, cls_preds, cls_targets)\n",
" test_loss += loss.item()\n",
" print('test_loss: %.3f | avg_loss: %.3f [%d/%d]'\n",
" % (loss.item(), test_loss/(batch_idx+1), batch_idx+1, len(testloader)))\n",
" else:\n",
" for batch_idx, (inputs, loc_targets, cls_targets) in enumerate(testloader):\n",
" inputs = inputs.to(device)\n",
" loc_targets = loc_targets.to(device)\n",
" cls_targets = cls_targets.to(device)\n",
"\n",
" loc_preds, cls_preds = fpnssd_net(inputs)\n",
" loss = criterion(loc_preds, loc_targets, cls_preds, cls_targets)\n",
" test_loss += loss.item()\n",
" print('test_loss: %.3f | avg_loss: %.3f [%d/%d]'\n",
" % (loss.item(), test_loss/(batch_idx+1), batch_idx+1, len(testloader)))\n",
"\n",
" # write to logger\n",
" phase = 'test'\n",
" writer.add_scalar('data/{}/loss'.format(phase), test_loss / len(testloader), epoch)\n",
"\n",
" # deep copy the model\n",
" global best_loss\n",
" global best_epoch\n",
" test_loss /= len(testloader)\n",
" if test_loss < best_loss and epoch > 5:\n",
" # best_model_wts = copy.deepcopy(fpnssd_net.state_dict())\n",
" weights_path = '{}results/weights/fpn_net_{}_best.pth'.format(relative_path, model_version)\n",
" torch.save(fpnssd_net.state_dict(), weights_path)\n",
" best_epoch = epoch\n",
" best_loss = test_loss"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"scrolled": true
},
"outputs": [],
"source": [
"for epoch in tqdm(range(num_epochs)):\n",
" print('\\nEpoch: %d' % epoch)\n",
" train(epoch)\n",
" if epoch % 2 == 0:\n",
" print('\\nTest')\n",
" test(epoch)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"print('Best val Loss: {:4f} at {}'.format(best_loss, best_epoch))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# choose model filename\n",
"weights_path = '{}results/weights/fpn_net_{}.pth'.format(relative_path, model_version)\n",
"# Save only the model parameters\n",
"torch.save(fpnssd_net.state_dict(), weights_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}