TFRecords Reader and Builder-Reader integration tests (#517)

* Use boundigBox

* WIP: Adding TFRecords Reader

* Read TFRecords with multiple records

* change length as a property

* Added TFRecords Builder-Reader integration tests

* Adding Guard and resolving other pr feedback
Esse commit está contido em:
Jacopo Mangiavacchi
2019-02-01 10:55:40 -08:00
commit de GitHub
commit 1627fa7033
6 arquivos alterados com 187 adições e 18 exclusões
+5
Ver Arquivo
@@ -3042,6 +3042,11 @@
"resolved": "https://registry.npmjs.org/buffer-indexof/-/buffer-indexof-1.1.1.tgz",
"integrity": "sha512-4/rOEg86jivtPTeOUUT61jJO1Ya1TrR/OkqCSZDyq84WJh3LuuiphBYJN+fm5xufIk4XAFcEwte/8WzC8If/1g=="
},
"buffer-reverse": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/buffer-reverse/-/buffer-reverse-1.0.1.tgz",
"integrity": "sha1-SSg8jvpvkBvAH6MwTQYCeXGuL2A="
},
"buffer-xor": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/buffer-xor/-/buffer-xor-1.0.3.tgz",
+4 -2
Ver Arquivo
@@ -8,10 +8,12 @@
"@types/snapsvg": "^0.4.35",
"axios": "^0.18.0",
"bootstrap": "^4.1.3",
"buffer-reverse": "^1.0.1",
"deepmerge": "^2.2.1",
"google-protobuf": "^3.6.1",
"lodash": "^4.17.11",
"md5.js": "^1.3.5",
"node-int64": "^0.4.0",
"react": "^16.7.0",
"react-dom": "^16.7.0",
"react-keydown": "^1.9.7",
@@ -27,8 +29,8 @@
"redux-thunk": "^2.3.0",
"rimraf": "^2.6.2",
"shortid": "^2.2.14",
"vott-ct": "^2.1.11",
"video-react": "^0.13.2"
"video-react": "^0.13.2",
"vott-ct": "^2.1.11"
},
"scripts": {
"start": "nf start -p 3000",
+5 -7
Ver Arquivo
@@ -176,19 +176,17 @@ export class TFRecordsJsonExportProvider extends ExportProvider<ITFRecordsJsonEx
}
private updateAssetTagArrays(element: IAssetMetadata, imageInfo: IImageInfo) {
element.regions.filter((region) => (region.type === RegionType.Rectangle ||
region.type === RegionType.Square) &&
region.points.length === 2)
element.regions.filter((region) => region.boundingBox)
.forEach((region) => {
region.tags.forEach((tag) => {
const index = this.project.tags.map((pTag) => pTag.name).indexOf(tag.name);
imageInfo.text.push(tag.name);
imageInfo.label.push(index);
imageInfo.xmin.push(region.points[0].x);
imageInfo.ymin.push(region.points[0].y);
imageInfo.xmax.push(region.points[1].x);
imageInfo.ymax.push(region.points[1].y);
imageInfo.xmin.push(region.boundingBox.left);
imageInfo.ymin.push(region.boundingBox.top);
imageInfo.xmax.push(region.boundingBox.left + region.boundingBox.width);
imageInfo.ymax.push(region.boundingBox.top + region.boundingBox.height);
imageInfo.difficult.push(0);
imageInfo.truncated.push(0);
imageInfo.view.push("Unspecified");
@@ -20,11 +20,16 @@
// const maskDelta uint32 = 0xa282ead8
// mask returns a masked representation of crc.
import Guard from "../../../common/guard";
import Int64 from "node-int64";
import reverse from "buffer-reverse";
/**
* @s - Buffer input
* @buffer - Buffer input
* @description - Calculate 32-bit CRC using the Castagnoli polynomial (0x1EDC6F41)
*/
export function crc32c(s: Buffer): number {
export function crc32c(buffer: Buffer): number {
Guard.null(buffer);
const polynomial = 0x1EDC6F41; // 0x04C11DB7 for crc32
const initialValue = 0xFFFFFFFF;
const finalXORValue = 0xFFFFFFFF;
@@ -55,8 +60,8 @@ export function crc32c(s: Buffer): number {
table[i] = reverse(c, 32);
}
for (i = 0; i < s.length; i++) {
c = s[i];
for (i = 0; i < buffer.length; i++) {
c = buffer[i];
if (c > 255) {
throw new RangeError();
}
@@ -68,10 +73,11 @@ export function crc32c(s: Buffer): number {
}
/**
* @s - Input CRC32 value
* @value - Input CRC32 value
* @description - Mask an input CRC32 value according to the TensorFlow TFRecords specs
*/
export function maskCrc(value: number): number {
Guard.null(value);
const kCrc32MaskDelta = 0xa282ead8;
const fourGb = Math.pow(2, 32);
@@ -79,10 +85,11 @@ export function maskCrc(value: number): number {
}
/**
* @s - Input number value
* @value - Input number value
* @description - Get a Buffer representation of a Int64 bit value
*/
export function getInt64Buffer(value: number): Buffer {
Guard.null(value);
const metadataBuffer = new ArrayBuffer(8);
const intArray = new Uint8Array(metadataBuffer, 0, 8);
const dataView = new DataView(metadataBuffer, 0, 8);
@@ -94,10 +101,11 @@ export function getInt64Buffer(value: number): Buffer {
}
/**
* @s - Input number value
* @value - Input number value
* @description - Get a Buffer representation of a Int32 bit value
*/
export function getInt32Buffer(value: number): Buffer {
Guard.null(value);
const fourGb = Math.pow(2, 32);
const value32 = value % fourGb;
@@ -111,10 +119,11 @@ export function getInt32Buffer(value: number): Buffer {
}
/**
* @s - Input string
* @str - Input string
* @description - Get a Uint8Array representation of an input string value
*/
export function textEncode(str: string): Uint8Array {
export function textEncode(str: string): Uint8Array {
Guard.null(str);
const utf8 = unescape(encodeURIComponent(str));
const result = new Uint8Array(utf8.length);
for (let i = 0; i < utf8.length; i++) {
@@ -122,3 +131,16 @@ export function textEncode(str: string): Uint8Array {
}
return result;
}
/**
* @buffer - Input buffer
* @description - Read an Int64 value from buffer
*/
export function readInt64(buffer: Buffer): number {
Guard.null(buffer);
Guard.expression(buffer.length, (num) => num >= 8);
buffer = reverse(buffer.slice(0, 8));
const int64 = new Int64(buffer, 0);
return int64.toNumber(true);
}
@@ -0,0 +1,77 @@
import { TFRecordsBuilder, FeatureType } from "./tensorFlowBuilder";
import { TFRecordsReader } from "./tensorFlowReader";
describe("TFRecords Reader/Builder Integration test", () => {
describe("Check Adding Single TFRecords", () => {
let builder: TFRecordsBuilder;
beforeEach(() => {
builder = new TFRecordsBuilder();
});
it("Check single TFRecord", async () => {
builder.addArrayFeature("feature/1", FeatureType.Int64, [1, 2]);
builder.addArrayFeature("feature/2", FeatureType.Float, [1.0, 2.0]);
builder.addArrayFeature("feature/3", FeatureType.String, ["1", "2"]);
const buffer = builder.build();
const tfrecords = TFRecordsBuilder.buildTFRecords([buffer]);
expect(tfrecords.length).toEqual(89);
const reader = new TFRecordsReader(tfrecords);
expect(reader.length).toEqual(1);
const jsonImage = reader.toArray();
expect(jsonImage.length).toEqual(1);
expect(jsonImage[0]["context"].featureMap.length).toEqual(3);
expect(jsonImage[0]["context"].featureMap[0][0]).toEqual("feature/1");
expect(jsonImage[0]["context"].featureMap[1][0]).toEqual("feature/2");
expect(jsonImage[0]["context"].featureMap[2][0]).toEqual("feature/3");
expect(jsonImage[0]["context"].featureMap[0][1]["int64List"]["valueList"].length).toEqual(2);
expect(jsonImage[0]["context"].featureMap[1][1]["floatList"]["valueList"].length).toEqual(2);
expect(jsonImage[0]["context"].featureMap[2][1]["bytesList"]["valueList"].length).toEqual(2);
});
it("Check multiple TFRecords", async () => {
builder.addArrayFeature("feature/1", FeatureType.Int64, [1, 2]);
builder.addArrayFeature("feature/2", FeatureType.Float, [1.0, 2.0]);
builder.addArrayFeature("feature/3", FeatureType.String, ["1", "2"]);
const buffer = builder.build();
const tfrecords = TFRecordsBuilder.buildTFRecords([buffer, buffer]);
expect(tfrecords.length).toEqual(178);
const reader = new TFRecordsReader(tfrecords);
expect(reader.length).toEqual(2);
const jsonImage = reader.toArray();
expect(jsonImage.length).toEqual(2);
// Check First TFRecord
expect(jsonImage[0]["context"].featureMap.length).toEqual(3);
expect(jsonImage[0]["context"].featureMap[0][0]).toEqual("feature/1");
expect(jsonImage[0]["context"].featureMap[1][0]).toEqual("feature/2");
expect(jsonImage[0]["context"].featureMap[2][0]).toEqual("feature/3");
expect(jsonImage[0]["context"].featureMap[0][1]["int64List"]["valueList"].length).toEqual(2);
expect(jsonImage[0]["context"].featureMap[1][1]["floatList"]["valueList"].length).toEqual(2);
expect(jsonImage[0]["context"].featureMap[2][1]["bytesList"]["valueList"].length).toEqual(2);
// Check Second TFRecord
expect(jsonImage[1]["context"].featureMap.length).toEqual(3);
expect(jsonImage[1]["context"].featureMap[0][0]).toEqual("feature/1");
expect(jsonImage[1]["context"].featureMap[1][0]).toEqual("feature/2");
expect(jsonImage[1]["context"].featureMap[2][0]).toEqual("feature/3");
expect(jsonImage[1]["context"].featureMap[0][1]["int64List"]["valueList"].length).toEqual(2);
expect(jsonImage[1]["context"].featureMap[1][1]["floatList"]["valueList"].length).toEqual(2);
expect(jsonImage[1]["context"].featureMap[2][1]["bytesList"]["valueList"].length).toEqual(2);
});
});
});
@@ -0,0 +1,65 @@
import Guard from "../../../common/guard";
import { TFRecordsImageMessage, Features, Feature, FeatureList,
BytesList, Int64List, FloatList } from "./tensorFlowRecordsProtoBuf_pb";
import { crc32c, maskCrc, getInt64Buffer, getInt32Buffer, textEncode, readInt64 } from "./tensorFlowHelpers";
/**
* @name - TFRecords Read Class
* @description - Read a TFRecords object
*/
export class TFRecordsReader {
private imageMessages: TFRecordsImageMessage[];
constructor(tfrecords: Buffer) {
Guard.null(tfrecords);
this.imageMessages = [];
let position = 0;
while (position < tfrecords.length) {
const lengthBuffer = tfrecords.slice(position, position + 8);
const dataLength = readInt64(lengthBuffer);
const lengthCrc = maskCrc(crc32c(lengthBuffer));
position += 8;
const expectedLengthCrc = tfrecords.readUInt32LE(position);
position += 4;
if (lengthCrc !== expectedLengthCrc) {
console.log("Wrong Length CRC");
break;
}
const dataBuffer = tfrecords.slice(position, position + dataLength);
const dataCrc = maskCrc(crc32c(dataBuffer));
position += dataLength;
const expectedDataCrc = tfrecords.readUInt32LE(position);
position += 4;
if (dataCrc !== expectedDataCrc) {
console.log("Wrong Data CRC");
break;
}
// Deserialize TFRecord from dataBuffer
const imageMessage: TFRecordsImageMessage = TFRecordsImageMessage.deserializeBinary(dataBuffer);
this.imageMessages.push(imageMessage);
}
}
/**
* @description - Return the number of TFRecords read
*/
get length(): number {
return this.imageMessages.length;
}
/**
* @description - Return the TFRecords in a JSON Object Array format
*/
public toArray(): object[] {
return this.imageMessages.map((imageMessage) => imageMessage.toObject());
}
}