syntax = "proto3"; package org.nd4j.ir; option java_outer_classname = "TensorNamespace"; //Using a subset of onnx, we define a small IR //for defining nd4j operations // StringStringEntryProto follows the pattern for cross-proto-version maps. // See https://developers.google.com/protocol-buffers/docs/proto3#maps message StringStringEntryProto { string key = 1; string value= 2; } enum DataType { UNDEFINED = 0; // Basic types. FLOAT = 1; // float UINT8 = 2; // uint8_t INT8 = 3; // int8_t UINT16 = 4; // uint16_t INT16 = 5; // int16_t INT32 = 6; // int32_t INT64 = 7; // int64_t STRING = 8; // string BOOL = 9; // bool // IEEE754 half-precision floating-point format (16 bits wide). // This format has 1 sign bit, 5 exponent bits, and 10 mantissa bits. FLOAT16 = 10; DOUBLE = 11; UINT32 = 12; UINT64 = 13; COMPLEX64 = 14; // complex with float32 real and imaginary components COMPLEX128 = 15; // complex with float64 real and imaginary components // Non-IEEE floating-point format based on IEEE754 single-precision // floating-point number truncated to 16 bits. // This format has 1 sign bit, 8 exponent bits, and 7 mantissa bits. BFLOAT16 = 16; // Future extensions go here. } // Define the types. message TypeProto { message TensorDescriptor { // This field MUST NOT have the value of UNDEFINED // This field MUST be present for this version of the IR. DataType elem_type = 1; TensorShapeProto shape = 2; } oneof value { // The type of a tensor. TensorDescriptor tensor_type = 1; } } // Defines a tensor shape. A dimension can be either an integer value // or a symbolic variable. A symbolic variable represents an unknown // dimension. message TensorShapeProto { message Dimension { oneof value { int64 dim_value = 1; string dim_param = 2; // namespace Shape }; }; repeated Dimension dim = 1; } // Defines information on value, including the name, the type, and // the shape of the value. message ValueInfoProto { // This field MUST be present in this version of the IR. string name = 1; // namespace Value // This field MUST be present in this version of the IR. TypeProto type = 2; // A human-readable documentation for this value. Markdown is allowed. string doc_string = 3; } // Tensors // // A serialized tensor value. message TensorProto { // The shape of the tensor. repeated int64 dims = 1; // The data type of the tensor. // This field MUST have a valid TensorProto.DataType value int32 data_type = 2; // For very large tensors, we may want to store them in chunks, in which // case the following fields will specify the segment that is stored in // the current TensorProto. message Segment { int64 begin = 1; int64 end = 2; } Segment segment = 3; // Tensor content must be organized in row-major order. // // Depending on the data_type field, exactly one of the fields below with // name ending in _data is used to store the elements of the tensor. // For float and complex64 values // Complex64 tensors are encoded as a single array of floats, // with the real components appearing in odd numbered positions, // and the corresponding imaginary component appearing in the // subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i] // is encoded as [1.0, 2.0 ,3.0 ,4.0] // When this field is present, the data_type field MUST be FLOAT or COMPLEX64. repeated float float_data = 4 [packed = true]; // For int32, uint8, int8, uint16, int16, bool, and float16 values // float16 values must be bit-wise converted to an uint16_t prior // to writing to the buffer. // When this field is present, the data_type field MUST be // INT32, INT16, INT8, UINT16, UINT8, BOOL, or FLOAT16 repeated int32 int32_data = 5 [packed = true]; // For strings. // Each element of string_data is a UTF-8 encoded Unicode // string. No trailing null, no leading BOM. The protobuf "string" // scalar type is not used to match ML community conventions. // When this field is present, the data_type field MUST be STRING repeated bytes string_data = 6; // For int64. // When this field is present, the data_type field MUST be INT64 repeated int64 int64_data = 7 [packed = true]; // Optionally, a name for the tensor. string name = 8; // namespace Value // A human-readable documentation for this tensor. Markdown is allowed. string doc_string = 12; // Serializations can either use one of the fields above, or use this // raw bytes field. The only exception is the string case, where one is // required to store the content in the repeated bytes string_data field. // // When this raw_data field is used to store tensor value, elements MUST // be stored in as fixed-width, little-endian order. // Floating-point data types MUST be stored in IEEE 754 format. // Complex64 elements must be written as two consecutive FLOAT values, real component first. // Complex128 elements must be written as two consecutive DOUBLE values, real component first. // Boolean type MUST be written one byte per tensor element (00000001 for true, 00000000 for false). // // Note: the advantage of specific field rather than the raw_data field is // that in some cases (e.g. int data), protobuf does a better packing via // variable length storage, and may lead to smaller binary footprint. // When this field is present, the data_type field MUST NOT be STRING or UNDEFINED bytes raw_data = 9; // Data can be stored inside the protobuf file using type-specific fields or raw_data. // Alternatively, raw bytes data can be stored in an external file, using the external_data field. // external_data stores key-value pairs describing data location. Recognized keys are: // - "location" (required) - POSIX filesystem path relative to the directory where the ONNX // protobuf model was stored // - "offset" (optional) - position of byte at which stored data begins. Integer stored as string. // Offset values SHOULD be multiples 4096 (page size) to enable mmap support. // - "length" (optional) - number of bytes containing data. Integer stored as string. // - "checksum" (optional) - SHA1 digest of file specified in under 'location' key. repeated StringStringEntryProto external_data = 13; // Location of the data for this tensor. MUST be one of: // - DEFAULT - data stored inside the protobuf message. Data is stored in raw_data (if set) otherwise in type-specified field. // - EXTERNAL - data stored in an external location as described by external_data field. enum DataLocation { DEFAULT = 0; EXTERNAL = 1; } // If value not set, data is stored in raw_data (if set) otherwise in type-specified field. DataLocation data_location = 14; // For double // Complex128 tensors are encoded as a single array of doubles, // with the real components appearing in odd numbered positions, // and the corresponding imaginary component appearing in the // subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i] // is encoded as [1.0, 2.0 ,3.0 ,4.0] // When this field is present, the data_type field MUST be DOUBLE or COMPLEX128 repeated double double_data = 10 [packed = true]; // For uint64 and uint32 values // When this field is present, the data_type field MUST be // UINT32 or UINT64 repeated uint64 uint64_data = 11 [packed = true]; // For half values (tensorflow compatibility) repeated int32 half_val = 15 [packed = true]; //boolean values repeated bool bool_val = 16 [packed = true]; }