63234a81ef
Added some convenience features that make training the tiny model easier.
228 linhas
7.0 KiB
Python
228 linhas
7.0 KiB
Python
from os import path, makedirs
|
|
import pandas as pd
|
|
import numpy as np
|
|
import re
|
|
import os
|
|
from PIL import Image
|
|
from Get_File_Paths import GetFileList, ChangeToOtherMachine
|
|
|
|
|
|
def convert_vott_csv_to_yolo(
|
|
vott_df,
|
|
labeldict=dict(
|
|
zip(
|
|
["Cat_Face"],
|
|
[
|
|
0,
|
|
],
|
|
)
|
|
),
|
|
path="",
|
|
target_name="data_train.txt",
|
|
abs_path=False,
|
|
):
|
|
|
|
# Encode labels according to labeldict if code's don't exist
|
|
if not "code" in vott_df.columns:
|
|
vott_df["code"] = vott_df["label"].apply(lambda x: labeldict[x])
|
|
# Round float to ints
|
|
for col in vott_df[["xmin", "ymin", "xmax", "ymax"]]:
|
|
vott_df[col] = (vott_df[col]).apply(lambda x: round(x))
|
|
|
|
# Create Yolo Text file
|
|
last_image = ""
|
|
txt_file = ""
|
|
|
|
for index, row in vott_df.iterrows():
|
|
if not last_image == row["image"]:
|
|
if abs_path:
|
|
txt_file += "\n" + row["image_path"] + " "
|
|
else:
|
|
txt_file += "\n" + os.path.join(path, row["image"]) + " "
|
|
txt_file += ",".join(
|
|
[
|
|
str(x)
|
|
for x in (row[["xmin", "ymin", "xmax", "ymax", "code"]].tolist())
|
|
]
|
|
)
|
|
else:
|
|
txt_file += " "
|
|
txt_file += ",".join(
|
|
[
|
|
str(x)
|
|
for x in (row[["xmin", "ymin", "xmax", "ymax", "code"]].tolist())
|
|
]
|
|
)
|
|
last_image = row["image"]
|
|
file = open(target_name, "w")
|
|
file.write(txt_file[1:])
|
|
file.close()
|
|
return True
|
|
|
|
|
|
def csv_from_xml(directory, path_name=""):
|
|
# First get all images and xml files from path and its subfolders
|
|
image_paths = GetFileList(directory, ".jpg")
|
|
xml_paths = GetFileList(directory, ".xml")
|
|
result_df = pd.DataFrame()
|
|
if not len(image_paths) == len(xml_paths):
|
|
print("number of annotations doesnt match number of images")
|
|
return False
|
|
for image in image_paths:
|
|
target_filename = os.path.join(path_name, image) if path_name else image
|
|
source_filename = os.path.join(directory, image)
|
|
y_size, x_size, _ = np.array(Image.open(source_filename)).shape
|
|
source_xml = image.replace(".jpg", ".xml")
|
|
txt = open(source_xml, "r").read()
|
|
y_vals = re.findall(r"(?:x>\n)(.*)(?:\n</)", txt)
|
|
ymin_vals = y_vals[::2]
|
|
ymax_vals = y_vals[1::2]
|
|
x_vals = re.findall(r"(?:y>\n)(.*)(?:\n</)", txt)
|
|
xmin_vals = x_vals[::2]
|
|
xmax_vals = x_vals[1::2]
|
|
label_vals = re.findall(r"(?:label>\n)(.*)(?:\n</)", txt)
|
|
label_name_vals = re.findall(r"(?:labelname>\n)(.*)(?:\n</)", txt)
|
|
df = pd.DataFrame()
|
|
df["xmin"] = xmin_vals
|
|
df["xmin"] = df["xmin"].astype(float) * x_size
|
|
df["ymin"] = ymin_vals
|
|
df["ymin"] = df["ymin"].astype(float) * y_size
|
|
df["xmax"] = xmax_vals
|
|
df["xmax"] = df["xmax"].astype(float) * x_size
|
|
df["ymax"] = ymax_vals
|
|
df["ymax"] = df["ymax"].astype(float) * y_size
|
|
df["label"] = label_name_vals
|
|
df["code"] = label_vals
|
|
df["image_path"] = target_filename
|
|
df["image"] = os.path.basename(target_filename)
|
|
result_df = result_df.append(df)
|
|
# Bring image column first
|
|
cols = list(df.columns)
|
|
cols = [cols[-1]] + cols[:-1]
|
|
result_df = result_df[cols]
|
|
return result_df
|
|
|
|
|
|
def crop_and_save(
|
|
image_df,
|
|
target_path,
|
|
target_file,
|
|
one=True,
|
|
label_dict={0: "house"},
|
|
postfix="cropped",
|
|
):
|
|
"""Takes a vott_csv file with image names, labels and crop_boxes
|
|
and crops the images accordingly
|
|
|
|
Input csv file format:
|
|
|
|
image xmin ymin xmax ymax label
|
|
im.jpg 0 10 100 500 house
|
|
|
|
|
|
Parameters
|
|
----------
|
|
df : pd.Dataframe
|
|
The input dataframe with file_names, bounding box info
|
|
and label
|
|
source_path : str
|
|
Path of source images
|
|
target_path : str, optional
|
|
Path to save cropped images
|
|
one : boolean, optional
|
|
if True, only the most central house will be returned
|
|
|
|
Returns
|
|
-------
|
|
True if completed succesfully
|
|
"""
|
|
if not path.isdir(target_path):
|
|
makedirs(target_path)
|
|
|
|
previous_name = ""
|
|
counter = 0
|
|
image_df.dropna(inplace=True)
|
|
image_df["image_path"] = ChangeToOtherMachine(image_df["image_path"].values)
|
|
|
|
def find_rel_position(row):
|
|
current_name = row["image_path"]
|
|
x_size, _ = Image.open(current_name).size
|
|
x_centrality = abs((row["xmin"] + row["xmax"]) / 2 / x_size - 0.5)
|
|
return x_centrality
|
|
|
|
if one:
|
|
centrality = []
|
|
for index, row in image_df.iterrows():
|
|
centrality.append(find_rel_position(row))
|
|
image_df["x_centrality"] = pd.Series(centrality)
|
|
image_df.sort_values(["image", "x_centrality"], inplace=True)
|
|
image_df.drop_duplicates(subset="image", keep="first", inplace=True)
|
|
new_paths = []
|
|
for index, row in image_df.iterrows():
|
|
current_name = row["image_path"]
|
|
if current_name == previous_name:
|
|
counter += 1
|
|
else:
|
|
counter = 0
|
|
imageObject = Image.open(current_name)
|
|
cropped = imageObject.crop((row["xmin"], row["ymin"], row["xmax"], row["ymax"]))
|
|
label = row["label"]
|
|
if type(label) == int:
|
|
label = label_dict[label]
|
|
image_name_cropped = (
|
|
"_".join([row["image"][:-4], postfix, label, str(counter)]) + ".jpg"
|
|
)
|
|
new_path = os.path.join(target_path, image_name_cropped)
|
|
cropped.save(new_path)
|
|
new_paths.append(new_path.replace("\\", "/"))
|
|
previous_name = current_name
|
|
pd.DataFrame(new_paths, columns=["image_path"]).to_csv(target_file)
|
|
return True
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Prepare the houses dataset for YOLO
|
|
labeldict = dict(
|
|
zip(
|
|
["house"],
|
|
[
|
|
0,
|
|
],
|
|
)
|
|
)
|
|
multi_df = pd.read_csv(
|
|
"C:/Users/Anton/Documents/Insight/eq/EQ_new/Train_Housing_detector/2/vott-csv-export/Housing_cropping-export.csv"
|
|
)
|
|
multi_df.drop_duplicates(subset=None, keep="first", inplace=True)
|
|
convert_vott_csv_to_yolo(
|
|
multi_df,
|
|
labeldict,
|
|
path="/home/ubuntu/logohunter/data/houses/",
|
|
target_name="data_train.txt",
|
|
)
|
|
|
|
# Prepare the windows dataset for YOLO
|
|
path = "C:/Users/Anton/Documents/Insight/eq/EQ_new/Train_Window_Detector/base"
|
|
csv_from_xml(path, "/home/ubuntu/logohunter/data/windows").to_csv(
|
|
"C:/Users/Anton/Documents/Insight/eq/EQ_new/Train_Window_Detector/base/annotations.csv"
|
|
)
|
|
|
|
label_names = [
|
|
"background",
|
|
"facade",
|
|
"molding",
|
|
"cornice",
|
|
"pillar",
|
|
"window",
|
|
"door",
|
|
"sill",
|
|
"blind",
|
|
"balcony",
|
|
"shop",
|
|
"deco",
|
|
]
|
|
labeldict = dict(zip(label_names, list(range(12))))
|
|
convert_vott_csv_to_yolo(
|
|
csv_from_xml(path, "/home/ubuntu/logohunter/data/windows"), labeldict
|
|
)
|