feat: Add CSV Exporter (#757)

Adds CSV export provider
Esse commit está contido em:
Jacopo Mangiavacchi
2019-04-26 15:36:53 -07:00
commit de Wallace Breza
commit f29963c89e
14 arquivos alterados com 334 adições e 33 exclusões
+2
Ver Arquivo
@@ -195,8 +195,10 @@ Tagging and drawing regions is not possible while the video is playing.
Once assets have been labeled, they can be exported into a variety of formats:
* [Azure Custom Vision Service](https://azure.microsoft.com/en-us/services/cognitive-services/custom-vision-service/)
* [Microsoft Cognitive Toolkit (CNTK)](https://github.com/Microsoft/CNTK)
* TensorFlow (Pascal VOC and TFRecords)
* VoTT (generic JSON schema)
* Comma Separated Values (CSV)
In addition, users may choose to export
+32 -1
Ver Arquivo
@@ -1123,6 +1123,15 @@
"integrity": "sha512-NVQEMviDWjuen3UW+mU1J6fZ0WhOfG1yRce/2OTcbaz+fgmTw2cahx6N2wh0Yl+a+hg2UZj/oElZmtULWyGIsA==",
"dev": true
},
"@types/json2csv": {
"version": "4.4.0",
"resolved": "https://registry.npmjs.org/@types/json2csv/-/json2csv-4.4.0.tgz",
"integrity": "sha512-24S6hQGGsOZxTXbRyKvNaV5k882XTo9RX/LH6+RtVtimFNE2J0T/LWlru6BeEssByVA9/ZLif1PLk/8X8/qPCQ==",
"dev": true,
"requires": {
"@types/node": "*"
}
},
"@types/lodash": {
"version": "4.14.120",
"resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.14.120.tgz",
@@ -7310,7 +7319,8 @@
"ansi-regex": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-2.1.1.tgz",
"integrity": "sha1-w7M6te42DYbg5ijwRorn7yfWVN8="
"integrity": "sha1-w7M6te42DYbg5ijwRorn7yfWVN8=",
"optional": true
},
"aproba": {
"version": "1.2.0",
@@ -7782,6 +7792,7 @@
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-3.0.1.tgz",
"integrity": "sha1-ajhfuIU9lS1f8F0Oiq+UJ43GPc8=",
"optional": true,
"requires": {
"ansi-regex": "^2.0.0"
}
@@ -10355,6 +10366,16 @@
"resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz",
"integrity": "sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus="
},
"json2csv": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/json2csv/-/json2csv-4.5.0.tgz",
"integrity": "sha512-SfWprYRawoBsEHZyb1NvZz2qJpuLRDlSexHqtnHxFEFvzK83zgm7Bftq2miBODjRQG0O7PaHC5271Hfbu10P+w==",
"requires": {
"commander": "^2.15.1",
"jsonparse": "^1.3.1",
"lodash.get": "^4.4.2"
}
},
"json3": {
"version": "3.3.2",
"resolved": "https://registry.npmjs.org/json3/-/json3-3.3.2.tgz",
@@ -10378,6 +10399,11 @@
"resolved": "https://registry.npmjs.org/jsonify/-/jsonify-0.0.0.tgz",
"integrity": "sha1-LHS27kHZPKUbe1qu6PUDYx0lKnM="
},
"jsonparse": {
"version": "1.3.1",
"resolved": "https://registry.npmjs.org/jsonparse/-/jsonparse-1.3.1.tgz",
"integrity": "sha1-P02uSpH6wxX3EGL4UhzCOfE2YoA="
},
"jsprim": {
"version": "1.4.1",
"resolved": "https://registry.npmjs.org/jsprim/-/jsprim-1.4.1.tgz",
@@ -10613,6 +10639,11 @@
"integrity": "sha1-+wMJF/hqMTTlvJvsDWngAT3f7bI=",
"dev": true
},
"lodash.get": {
"version": "4.4.2",
"resolved": "https://registry.npmjs.org/lodash.get/-/lodash.get-4.4.2.tgz",
"integrity": "sha1-LRd/ZS+jHpObRDjVNBSZ36OCXpk="
},
"lodash.isarguments": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/lodash.isarguments/-/lodash.isarguments-3.1.0.tgz",
+2
Ver Arquivo
@@ -25,6 +25,7 @@
"dotenv": "^7.0.0",
"google-protobuf": "^3.6.1",
"jpeg-js": "^0.3.4",
"json2csv": "^4.5.0",
"lodash": "^4.17.11",
"md5.js": "^1.3.5",
"node-fetch": "^2.3.0",
@@ -98,6 +99,7 @@
"@types/dotenv": "^6.1.0",
"@types/enzyme": "^3.1.15",
"@types/jest": "23.3.9",
"@types/json2csv": "^4.4.0",
"@types/node": "10.12.7",
"@types/react": "16.7.6",
"@types/react-dom": "16.0.9",
+6 -5
Ver Arquivo
@@ -302,17 +302,15 @@ export const english: IAppStrings = {
title: "Test / Train Split",
description: "The test train split to use for exported data",
},
},
},
vottJson: {
displayName: "VoTT JSON",
properties: {
includeImages: {
title: "Include Images",
description: "Whether or not to include binary image assets in target connection",
},
},
},
vottJson: {
displayName: "VoTT JSON",
},
azureCV: {
displayName: "Azure Custom Vision Service",
regions: {
@@ -385,6 +383,9 @@ export const english: IAppStrings = {
cntk: {
displayName: "Microsoft Cognitive Toolkit (CNTK)",
},
csv: {
displayName: "Comma Separated Values (CSV)",
},
},
messages: {
saveSuccess: "Successfully saved export settings",
+6 -5
Ver Arquivo
@@ -305,17 +305,15 @@ export const spanish: IAppStrings = {
title: "La división para entrenar y comprobar",
description: "La división de datos para utilizar entre el entrenamiento y la comprobación",
},
},
},
vottJson: {
displayName: "VoTT JSON",
properties: {
includeImages: {
title: "Incluir imágenes",
description: "Si desea o no incluir activos de imagen binaria en la conexión de destino",
},
},
},
vottJson: {
displayName: "VoTT JSON",
},
azureCV: {
displayName: "Servicio de Visión Personalizada Azure",
regions: {
@@ -388,6 +386,9 @@ export const spanish: IAppStrings = {
cntk: {
displayName: "Microsoft Cognitive Toolkit (CNTK)",
},
csv: {
displayName: "Los valores separados por comas (CSV)",
},
},
messages: {
saveSuccess: "Configuración de exportación guardada correctamente",
+2 -2
Ver Arquivo
@@ -638,14 +638,14 @@ export default class MockFactory {
/**
* Creates array of IExportProviderRegistrationOptions for the different providers
* vottJson, pascalVOC, azureCustomVision
* vottJson, PascalVOC, azureCustomVision, csv
*/
public static createExportProviderRegistrations(): IExportProviderRegistrationOptions[] {
const registrations: IExportProviderRegistrationOptions[] = [];
registrations.push(MockFactory.createExportProviderRegistration("vottJson"));
registrations.push(MockFactory.createExportProviderRegistration("pascalVOC"));
registrations.push(MockFactory.createExportProviderRegistration("azureCustomVision"));
registrations.push(MockFactory.createExportProviderRegistration("csv"));
return registrations;
}
+6 -5
Ver Arquivo
@@ -299,17 +299,15 @@ export interface IAppStrings {
title: string,
description: string,
},
},
},
vottJson: {
displayName: string,
properties: {
includeImages: {
title: string,
description: string,
},
},
},
vottJson: {
displayName: string,
},
azureCV: {
displayName: string,
regions: {
@@ -382,6 +380,9 @@ export interface IAppStrings {
cntk: {
displayName: string,
},
csv: {
displayName: string,
},
},
messages: {
saveSuccess: string;
+28
Ver Arquivo
@@ -0,0 +1,28 @@
{
"type": "object",
"title": "${strings.export.providers.csv.displayName}",
"properties": {
"assetState": {
"type": "string",
"title": "${strings.export.providers.common.properties.assetState.title}",
"description": "${strings.export.providers.common.properties.assetState.description}",
"enum": [
"all",
"visited",
"tagged"
],
"default": "visited",
"enumNames": [
"${strings.export.providers.common.properties.assetState.options.all}",
"${strings.export.providers.common.properties.assetState.options.visited}",
"${strings.export.providers.common.properties.assetState.options.tagged}"
]
},
"includeImages": {
"type": "boolean",
"default": true,
"title": "${strings.export.providers.common.properties.includeImages.title}",
"description": "${strings.export.providers.common.properties.includeImages.description}"
}
}
}
+159
Ver Arquivo
@@ -0,0 +1,159 @@
import _ from "lodash";
import { CsvExportProvider, ICsvExportProviderOptions } from "./csv";
import registerProviders from "../../registerProviders";
import { ExportAssetState } from "./exportProvider";
import { ExportProviderFactory } from "./exportProviderFactory";
import {
IProject, IAssetMetadata, AssetState, IExportProviderOptions,
RegionType,
} from "../../models/applicationState";
import MockFactory from "../../common/mockFactory";
jest.mock("../../services/assetService");
import { AssetService } from "../../services/assetService";
jest.mock("../storage/localFileSystemProxy");
import { LocalFileSystemProxy } from "../storage/localFileSystemProxy";
import registerMixins from "../../registerMixins";
import HtmlFileReader from "../../common/htmlFileReader";
import { appInfo } from "../../common/appInfo";
import { AssetProviderFactory } from "../storage/assetProviderFactory";
import os from "os";
registerMixins();
describe("CSV Format Export Provider", () => {
const testAssets = MockFactory.createTestAssets(10, 1);
const testProject: IProject = {
...MockFactory.createTestProject(),
assets: {
"asset-1": MockFactory.createTestAsset("1", AssetState.Tagged),
"asset-2": MockFactory.createTestAsset("2", AssetState.Tagged),
"asset-3": MockFactory.createTestAsset("3", AssetState.Visited),
"asset-4": MockFactory.createTestAsset("4", AssetState.NotVisited),
},
exportFormat: {
providerType: "csv",
providerOptions: {
assetState: ExportAssetState.All,
},
},
};
const expectedFileName = "vott-csv-export/" + testProject.name.replace(" ", "-") + "-export.csv";
beforeAll(() => {
HtmlFileReader.getAssetBlob = jest.fn(() => {
return Promise.resolve(new Blob(["Some binary data"]));
});
AssetProviderFactory.create = jest.fn(() => {
return {
getAssets: jest.fn(() => Promise.resolve(testAssets)),
};
});
});
beforeEach(() => {
registerProviders();
});
it("Is defined", () => {
expect(CsvExportProvider).toBeDefined();
});
it("Can be instantiated through the factory", () => {
const options: IExportProviderOptions = {
assetState: ExportAssetState.All,
};
const exportProvider = ExportProviderFactory.create("csv", testProject, options);
expect(exportProvider).not.toBeNull();
expect(exportProvider).toBeInstanceOf(CsvExportProvider);
});
describe("Export variations", () => {
beforeEach(() => {
const assetServiceMock = AssetService as jest.Mocked<typeof AssetService>;
assetServiceMock.prototype.getAssetMetadata = jest.fn((asset) => {
const assetMetadata: IAssetMetadata = {
asset,
regions: [
{
id: "1",
type: RegionType.Rectangle,
tags: ["a", "b"],
boundingBox: {
left: 1,
top: 2,
width: 3,
height: 4,
},
},
],
version: appInfo.version,
};
return Promise.resolve(assetMetadata);
});
const storageProviderMock = LocalFileSystemProxy as jest.Mock<LocalFileSystemProxy>;
storageProviderMock.prototype.writeText.mockClear();
storageProviderMock.prototype.writeBinary.mockClear();
storageProviderMock.mockClear();
});
it("Exports all assets", async () => {
const options: ICsvExportProviderOptions = {
assetState: ExportAssetState.All,
includeImages: false,
};
const exportProvider = new CsvExportProvider(testProject, options);
await exportProvider.export();
const storageProviderMock = LocalFileSystemProxy as any;
const exportCsv = storageProviderMock.mock.instances[0].writeText.mock.calls[0][1];
const records = exportCsv.split(os.EOL);
// 10 assets - Each with 1 region and 2 tags
expect(records.length).toEqual(testAssets.length * 2 + 1);
expect(LocalFileSystemProxy.prototype.writeText)
.toBeCalledWith(expectedFileName, expect.any(String));
});
it("Exports only visited assets (includes tagged)", async () => {
const options: ICsvExportProviderOptions = {
assetState: ExportAssetState.Visited,
includeImages: false,
};
const exportProvider = new CsvExportProvider(testProject, options);
await exportProvider.export();
const storageProviderMock = LocalFileSystemProxy as any;
const exportCsv = storageProviderMock.mock.instances[0].writeText.mock.calls[0][1];
const records = exportCsv.split(os.EOL);
// 2 tagged / 1 visited assets - Each with 1 region and 2 tags
expect(records.length).toEqual(7);
});
it("Exports only tagged assets", async () => {
const options: ICsvExportProviderOptions = {
assetState: ExportAssetState.Tagged,
includeImages: false,
};
const exportProvider = new CsvExportProvider(testProject, options);
await exportProvider.export();
const storageProviderMock = LocalFileSystemProxy as any;
const exportCsv = storageProviderMock.mock.instances[0].writeText.mock.calls[0][1];
const records = exportCsv.split(os.EOL);
// 2 tagged - Each with 1 region and 2 tags
expect(records.length).toEqual(5);
});
});
});
+73
Ver Arquivo
@@ -0,0 +1,73 @@
import _ from "lodash";
import { ExportProvider } from "./exportProvider";
import { IProject, IExportProviderOptions } from "../../models/applicationState";
import Guard from "../../common/guard";
import HtmlFileReader from "../../common/htmlFileReader";
import json2csv, { Parser } from "json2csv";
/**
* Options for CSV Export Provider
*/
export interface ICsvExportProviderOptions extends IExportProviderOptions {
/** Whether or not to include binary assets in target connection */
includeImages: boolean;
}
/**
* @name - CSV Format Export Provider
* @description - Exports a project into a single CSV file that include all configured assets
*/
export class CsvExportProvider extends ExportProvider<ICsvExportProviderOptions> {
constructor(project: IProject, options: ICsvExportProviderOptions) {
super(project, options);
Guard.null(options);
}
/**
* Export project to CSV
*/
public async export(): Promise<void> {
const results = await this.getAssetsForExport();
const dataItems = [];
await results.forEachAsync(async (assetMetadata) => {
if (this.options.includeImages) {
// Write Image
const arrayBuffer = await HtmlFileReader.getAssetArray(assetMetadata.asset);
const assetFilePath = `vott-csv-export/${assetMetadata.asset.name}`;
await this.storageProvider.writeBinary(assetFilePath, Buffer.from(arrayBuffer));
}
// Push CSV Records
// The CSV file itself must have the following format::
// image,xmin,ymin,xmax,ymax,label
// image_1.jpg,26,594,86,617,cat
// image_1.jpg,599,528,612,541,car
// image_2.jpg,393,477,430,552,dog
assetMetadata.regions.forEach((region) => {
region.tags.forEach((tag) => {
const dataItem = {
image: assetMetadata.asset.name,
xmin: region.boundingBox.left,
ymin: region.boundingBox.top,
xmax: region.boundingBox.left + region.boundingBox.width,
ymax: region.boundingBox.top + region.boundingBox.height,
label: tag,
};
dataItems.push(dataItem);
});
});
});
// Configure CSV options
const csvOptions: json2csv.Options<{}> = {
fields: ["image", "xmin", "ymin", "xmax", "ymax", "label"],
};
const csvParser = new Parser(csvOptions);
const csvData = csvParser.parse(dataItems);
// Save CSV
const fileName = `vott-csv-export/${this.project.name.replace(/\s/g, "-")}-export.csv`;
await this.storageProvider.writeText(fileName, csvData);
}
}
+5
Ver Arquivo
@@ -0,0 +1,5 @@
{
"includeImages": {
"ui:widget": "checkbox"
}
}
+2 -2
Ver Arquivo
@@ -21,8 +21,8 @@
"includeImages": {
"type": "boolean",
"default": true,
"title": "${strings.export.providers.vottJson.properties.includeImages.title}",
"description": "${strings.export.providers.vottJson.properties.includeImages.description}"
"title": "${strings.export.providers.common.properties.includeImages.title}",
"description": "${strings.export.providers.common.properties.includeImages.description}"
}
}
}
+5 -13
Ver Arquivo
@@ -1,6 +1,6 @@
import _ from "lodash";
import { ExportProvider } from "./exportProvider";
import { IProject, IExportProviderOptions, IAssetMetadata } from "../../models/applicationState";
import { IProject, IExportProviderOptions } from "../../models/applicationState";
import Guard from "../../common/guard";
import { constants } from "../../common/constants";
import HtmlFileReader from "../../common/htmlFileReader";
@@ -31,17 +31,9 @@ export class VottJsonExportProvider extends ExportProvider<IVottJsonExportProvid
if (this.options.includeImages) {
await results.forEachAsync(async (assetMetadata) => {
return new Promise<void>(async (resolve) => {
const blob = await HtmlFileReader.getAssetBlob(assetMetadata.asset);
const assetFilePath = `vott-json-export/${assetMetadata.asset.name}`;
const fileReader = new FileReader();
fileReader.onload = async () => {
const buffer = Buffer.from(fileReader.result as ArrayBuffer);
await this.storageProvider.writeBinary(assetFilePath, buffer);
resolve();
};
fileReader.readAsArrayBuffer(blob);
});
const arrayBuffer = await HtmlFileReader.getAssetArray(assetMetadata.asset);
const assetFilePath = `vott-json-export/${assetMetadata.asset.name}`;
await this.storageProvider.writeBinary(assetFilePath, Buffer.from(arrayBuffer));
});
}
@@ -53,7 +45,7 @@ export class VottJsonExportProvider extends ExportProvider<IVottJsonExportProvid
delete exportObject.targetConnection;
delete exportObject.exportFormat;
const fileName = `vott-json-export/${this.project.name.replace(" ", "-")}${constants.exportFileExtension}`;
const fileName = `vott-json-export/${this.project.name.replace(/\s/g, "-")}${constants.exportFileExtension}`;
await this.storageProvider.writeText(fileName, JSON.stringify(exportObject, null, 4));
}
}
+6
Ver Arquivo
@@ -2,6 +2,7 @@ import { ExportProviderFactory } from "./providers/export/exportProviderFactory"
import { PascalVOCExportProvider } from "./providers/export/pascalVOC";
import { TFRecordsExportProvider } from "./providers/export/tensorFlowRecords";
import { VottJsonExportProvider } from "./providers/export/vottJson";
import { CsvExportProvider } from "./providers/export/csv";
import { AssetProviderFactory } from "./providers/storage/assetProviderFactory";
import { AzureBlobStorage } from "./providers/storage/azureBlobStorage";
import { BingImageSearch } from "./providers/storage/bingImageSearch";
@@ -74,6 +75,11 @@ export default function registerProviders() {
displayName: strings.export.providers.cntk.displayName,
factory: (project, options) => new CntkExportProvider(project, options),
});
ExportProviderFactory.register({
name: "csv",
displayName: strings.export.providers.csv.displayName,
factory: (project, options) => new CsvExportProvider(project, options),
});
registerToolbar();
}