diff --git a/Sources/SwiftNLP/Models/all-MiniLM-L6-v2.mlmodelc/coremldata.bin b/Sources/SwiftNLP/Models/all-MiniLM-L6-v2.mlmodelc/coremldata.bin deleted file mode 100644 index 7662df6ec5b705a2a360deebd803affbf1e4d3d5..0000000000000000000000000000000000000000 Binary files a/Sources/SwiftNLP/Models/all-MiniLM-L6-v2.mlmodelc/coremldata.bin and /dev/null differ diff --git a/Sources/SwiftNLP/Models/all-MiniLM-L6-v2.mlmodelc/metadata.json b/Sources/SwiftNLP/Models/all-MiniLM-L6-v2.mlmodelc/metadata.json deleted file mode 100644 index e360c29cebf89e7c71d7f9fa1baeade972f9ad20..0000000000000000000000000000000000000000 --- a/Sources/SwiftNLP/Models/all-MiniLM-L6-v2.mlmodelc/metadata.json +++ /dev/null @@ -1,83 +0,0 @@ -[ - { - "shortDescription" : "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search.", - "metadataOutputVersion" : "3.0", - "outputSchema" : [ - { - "hasShapeFlexibility" : "0", - "isOptional" : "0", - "dataType" : "Float32", - "formattedType" : "MultiArray (Float32)", - "shortDescription" : "--", - "shape" : "[]", - "name" : "embeddings", - "type" : "MultiArray" - } - ], - "version" : "--", - "modelParameters" : [ - - ], - "author" : "See: https:\/\/huggingface.co\/sentence-transformers\/all-MiniLM-L6-v2", - "specificationVersion" : 6, - "storagePrecision" : "Float16", - "license" : "Apache 2.0", - "mlProgramOperationTypeHistogram" : { - "Linear" : 36, - "Gelu" : 6, - "LayerNorm" : 13, - "SliceByIndex" : 1, - "Matmul" : 12, - "Sub" : 1, - "Transpose" : 24, - "Softmax" : 6, - "Mul" : 7, - "Cast" : 5, - "Reshape" : 24, - "Add" : 20, - "ExpandDims" : 2, - "Gather" : 1 - }, - "computePrecision" : "Mixed (Float16, Float32, Int32)", - "isUpdatable" : "0", - "availability" : { - "macOS" : "12.0", - "tvOS" : "15.0", - "visionOS" : "1.0", - "watchOS" : "8.0", - "iOS" : "15.0", - "macCatalyst" : "15.0" - }, - "modelType" : { - "name" : "MLModelType_mlProgram" - }, - "inputSchema" : [ - { - "hasShapeFlexibility" : "0", - "isOptional" : "0", - "dataType" : "Float32", - "formattedType" : "MultiArray (Float32 1 × 512)", - "shortDescription" : "--", - "shape" : "[1, 512]", - "name" : "input_ids", - "type" : "MultiArray" - }, - { - "hasShapeFlexibility" : "0", - "isOptional" : "0", - "dataType" : "Float32", - "formattedType" : "MultiArray (Float32 1 × 512)", - "shortDescription" : "--", - "shape" : "[1, 512]", - "name" : "attention_mask", - "type" : "MultiArray" - } - ], - "userDefinedMetadata" : { - "com.github.apple.coremltools.version" : "6.3.0", - "com.github.apple.coremltools.source" : "torch==2.0.0" - }, - "generatedClassName" : "all_MiniLM_L6_v2", - "method" : "predict" - } -] \ No newline at end of file diff --git a/Sources/SwiftNLP/Models/all-MiniLM-L6-v2.mlmodelc/model.mil b/Sources/SwiftNLP/Models/all-MiniLM-L6-v2.mlmodelc/model.mil deleted file mode 100644 index f9bc606075fee95933da419178cf388b4f8e3f41..0000000000000000000000000000000000000000 --- a/Sources/SwiftNLP/Models/all-MiniLM-L6-v2.mlmodelc/model.mil +++ /dev/null @@ -1,378 +0,0 @@ -program(1.0) -[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.0.0"}, {"coremltools-version", "6.3.0"}})] -{ - func main<ios15>(tensor<fp32, [1, 512]> attention_mask, tensor<fp32, [1, 512]> input_ids) { - tensor<int32, []> var_8 = const()[name = tensor<string, []>("op_8"), val = tensor<int32, []>(-1)]; - tensor<int32, [1]> var_33_axes_0 = const()[name = tensor<string, []>("op_33_axes_0"), val = tensor<int32, [1]>([1])]; - tensor<string, []> attention_mask_to_fp16_dtype_0 = const()[name = tensor<string, []>("attention_mask_to_fp16_dtype_0"), val = tensor<string, []>("fp16")]; - tensor<fp16, [1, 512]> cast_193 = cast(dtype = attention_mask_to_fp16_dtype_0, x = attention_mask); - tensor<fp16, [1, 1, 512]> var_33_cast = expand_dims(axes = var_33_axes_0, x = cast_193); - tensor<int32, [1]> var_34_axes_0 = const()[name = tensor<string, []>("op_34_axes_0"), val = tensor<int32, [1]>([2])]; - tensor<fp16, [1, 1, 1, 512]> var_34_cast = expand_dims(axes = var_34_axes_0, x = var_33_cast); - tensor<fp16, []> var_13_to_fp16 = const()[name = tensor<string, []>("op_13_to_fp16"), val = tensor<fp16, []>(0x1p+0)]; - tensor<fp16, [1, 1, 1, 512]> var_37_cast = sub(x = var_13_to_fp16, y = var_34_cast); - tensor<string, []> var_37_cast_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_37_cast_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; - tensor<fp32, []> var_38 = const()[name = tensor<string, []>("op_38"), val = tensor<fp32, []>(-0x1.fffffep+127)]; - tensor<fp32, [1, 1, 1, 512]> cast_190 = cast(dtype = var_37_cast_to_fp32_dtype_0, x = var_37_cast); - tensor<fp32, [1, 1, 1, 512]> attention_mask_1 = mul(x = cast_190, y = var_38); - tensor<string, []> cast_0_dtype_0 = const()[name = tensor<string, []>("cast_0_dtype_0"), val = tensor<string, []>("int32")]; - tensor<int32, []> inputs_embeds_axis_0 = const()[name = tensor<string, []>("inputs_embeds_axis_0"), val = tensor<int32, []>(0)]; - tensor<fp16, [30522, 384]> model_embeddings_word_embeddings_weight_to_fp16 = const()[name = tensor<string, []>("model_embeddings_word_embeddings_weight_to_fp16"), val = tensor<fp16, [30522, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))]; - tensor<int32, [1, 512]> cast_189 = cast(dtype = cast_0_dtype_0, x = input_ids); - tensor<fp16, [1, 512, 384]> inputs_embeds_cast = gather(axis = inputs_embeds_axis_0, indices = cast_189, x = model_embeddings_word_embeddings_weight_to_fp16); - tensor<fp16, [1, 512, 384]> token_type_embeddings_1_to_fp16 = const()[name = tensor<string, []>("token_type_embeddings_1_to_fp16"), val = tensor<fp16, [1, 512, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23441024)))]; - tensor<fp16, [1, 512, 384]> embeddings_1_cast = add(x = inputs_embeds_cast, y = token_type_embeddings_1_to_fp16); - tensor<fp16, [1, 512, 384]> position_embeddings_1_to_fp16 = const()[name = tensor<string, []>("position_embeddings_1_to_fp16"), val = tensor<fp16, [1, 512, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23834304)))]; - tensor<fp16, [1, 512, 384]> input_5_cast = add(x = embeddings_1_cast, y = position_embeddings_1_to_fp16); - tensor<int32, [1]> input_7_axes_0 = const()[name = tensor<string, []>("input_7_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp16, [384]> model_embeddings_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_embeddings_LayerNorm_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24227584)))]; - tensor<fp16, [384]> model_embeddings_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_embeddings_LayerNorm_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24228416)))]; - tensor<fp16, []> var_10_to_fp16 = const()[name = tensor<string, []>("op_10_to_fp16"), val = tensor<fp16, []>(0x1p-24)]; - tensor<fp16, [1, 512, 384]> input_7_cast = layer_norm(axes = input_7_axes_0, beta = model_embeddings_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_embeddings_LayerNorm_weight_to_fp16, x = input_5_cast); - tensor<fp16, [384, 384]> model_encoder_layer_0_attention_self_query_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_0_attention_self_query_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24229248)))]; - tensor<fp16, [384]> model_encoder_layer_0_attention_self_query_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_0_attention_self_query_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24524224)))]; - tensor<fp16, [1, 512, 384]> x_9_cast = linear(bias = model_encoder_layer_0_attention_self_query_bias_to_fp16, weight = model_encoder_layer_0_attention_self_query_weight_to_fp16, x = input_7_cast); - tensor<fp16, [384, 384]> model_encoder_layer_0_attention_self_key_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_0_attention_self_key_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24525056)))]; - tensor<fp16, [384]> model_encoder_layer_0_attention_self_key_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_0_attention_self_key_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24820032)))]; - tensor<fp16, [1, 512, 384]> x_1_cast = linear(bias = model_encoder_layer_0_attention_self_key_bias_to_fp16, weight = model_encoder_layer_0_attention_self_key_weight_to_fp16, x = input_7_cast); - tensor<int32, [4]> var_93 = const()[name = tensor<string, []>("op_93"), val = tensor<int32, [4]>([1, 512, 12, 32])]; - tensor<fp16, [1, 512, 12, 32]> x_3_cast = reshape(shape = var_93, x = x_1_cast); - tensor<fp16, [384, 384]> model_encoder_layer_0_attention_self_value_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_0_attention_self_value_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24820864)))]; - tensor<fp16, [384]> model_encoder_layer_0_attention_self_value_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_0_attention_self_value_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25115840)))]; - tensor<fp16, [1, 512, 384]> x_5_cast = linear(bias = model_encoder_layer_0_attention_self_value_bias_to_fp16, weight = model_encoder_layer_0_attention_self_value_weight_to_fp16, x = input_7_cast); - tensor<int32, [4]> var_102 = const()[name = tensor<string, []>("op_102"), val = tensor<int32, [4]>([1, 512, 12, 32])]; - tensor<fp16, [1, 512, 12, 32]> x_7_cast = reshape(shape = var_102, x = x_5_cast); - tensor<int32, [4]> var_104 = const()[name = tensor<string, []>("op_104"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> var_108 = const()[name = tensor<string, []>("op_108"), val = tensor<int32, [4]>([1, 512, 12, 32])]; - tensor<fp16, [1, 512, 12, 32]> x_11_cast = reshape(shape = var_108, x = x_9_cast); - tensor<bool, []> attention_scores_1_transpose_x_0 = const()[name = tensor<string, []>("attention_scores_1_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> attention_scores_1_transpose_y_0 = const()[name = tensor<string, []>("attention_scores_1_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<int32, [4]> transpose_6_perm_0 = const()[name = tensor<string, []>("transpose_6_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> transpose_7_perm_0 = const()[name = tensor<string, []>("transpose_7_perm_0"), val = tensor<int32, [4]>([0, 2, 3, 1])]; - tensor<fp16, [1, 12, 32, 512]> transpose_39 = transpose(perm = transpose_7_perm_0, x = x_3_cast); - tensor<fp16, [1, 12, 512, 32]> transpose_40 = transpose(perm = transpose_6_perm_0, x = x_11_cast); - tensor<fp16, [1, 12, 512, 512]> attention_scores_1_cast = matmul(transpose_x = attention_scores_1_transpose_x_0, transpose_y = attention_scores_1_transpose_y_0, x = transpose_40, y = transpose_39); - tensor<fp16, []> _inversed_attention_scores_3_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_attention_scores_3_y_0_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)]; - tensor<fp16, [1, 12, 512, 512]> _inversed_attention_scores_3_cast = mul(x = attention_scores_1_cast, y = _inversed_attention_scores_3_y_0_to_fp16); - tensor<fp16, [1, 12, 512, 512]> input_11_cast = add(x = _inversed_attention_scores_3_cast, y = cast_193); - tensor<fp16, [1, 12, 512, 512]> input_13_cast = softmax(axis = var_8, x = input_11_cast); - tensor<bool, []> context_layer_1_transpose_x_0 = const()[name = tensor<string, []>("context_layer_1_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> context_layer_1_transpose_y_0 = const()[name = tensor<string, []>("context_layer_1_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<fp16, [1, 12, 512, 32]> transpose_41 = transpose(perm = var_104, x = x_7_cast); - tensor<fp16, [1, 12, 512, 32]> context_layer_1_cast = matmul(transpose_x = context_layer_1_transpose_x_0, transpose_y = context_layer_1_transpose_y_0, x = input_13_cast, y = transpose_41); - tensor<int32, [4]> var_120 = const()[name = tensor<string, []>("op_120"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [3]> var_125 = const()[name = tensor<string, []>("op_125"), val = tensor<int32, [3]>([1, 512, 384])]; - tensor<fp16, [1, 512, 12, 32]> transpose_38 = transpose(perm = var_120, x = context_layer_1_cast); - tensor<fp16, [1, 512, 384]> input_15_cast = reshape(shape = var_125, x = transpose_38); - tensor<fp16, [384, 384]> model_encoder_layer_0_attention_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_0_attention_output_dense_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25116672)))]; - tensor<fp16, [384]> model_encoder_layer_0_attention_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_0_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25411648)))]; - tensor<fp16, [1, 512, 384]> input_17_cast = linear(bias = model_encoder_layer_0_attention_output_dense_bias_to_fp16, weight = model_encoder_layer_0_attention_output_dense_weight_to_fp16, x = input_15_cast); - tensor<fp16, [1, 512, 384]> input_19_cast = add(x = input_17_cast, y = input_7_cast); - tensor<int32, [1]> input_21_axes_0 = const()[name = tensor<string, []>("input_21_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp16, [384]> model_encoder_layer_0_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_0_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25412480)))]; - tensor<fp16, [384]> model_encoder_layer_0_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_0_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25413312)))]; - tensor<fp16, [1, 512, 384]> input_21_cast = layer_norm(axes = input_21_axes_0, beta = model_encoder_layer_0_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_encoder_layer_0_attention_output_LayerNorm_weight_to_fp16, x = input_19_cast); - tensor<fp16, [1536, 384]> model_encoder_layer_0_intermediate_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_0_intermediate_dense_weight_to_fp16"), val = tensor<fp16, [1536, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25414144)))]; - tensor<fp16, [1536]> model_encoder_layer_0_intermediate_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_0_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26593856)))]; - tensor<fp16, [1, 512, 1536]> input_23_cast = linear(bias = model_encoder_layer_0_intermediate_dense_bias_to_fp16, weight = model_encoder_layer_0_intermediate_dense_weight_to_fp16, x = input_21_cast); - tensor<string, []> input_25_mode_0 = const()[name = tensor<string, []>("input_25_mode_0"), val = tensor<string, []>("EXACT")]; - tensor<fp16, [1, 512, 1536]> input_25_cast = gelu(mode = input_25_mode_0, x = input_23_cast); - tensor<fp16, [384, 1536]> model_encoder_layer_0_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_0_output_dense_weight_to_fp16"), val = tensor<fp16, [384, 1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26596992)))]; - tensor<fp16, [384]> model_encoder_layer_0_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_0_output_dense_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27776704)))]; - tensor<fp16, [1, 512, 384]> input_27_cast = linear(bias = model_encoder_layer_0_output_dense_bias_to_fp16, weight = model_encoder_layer_0_output_dense_weight_to_fp16, x = input_25_cast); - tensor<fp16, [1, 512, 384]> input_29_cast = add(x = input_27_cast, y = input_21_cast); - tensor<int32, [1]> input_31_axes_0 = const()[name = tensor<string, []>("input_31_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp16, [384]> model_encoder_layer_0_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_0_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27777536)))]; - tensor<fp16, [384]> model_encoder_layer_0_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_0_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27778368)))]; - tensor<fp16, [1, 512, 384]> input_31_cast = layer_norm(axes = input_31_axes_0, beta = model_encoder_layer_0_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_encoder_layer_0_output_LayerNorm_weight_to_fp16, x = input_29_cast); - tensor<fp16, [384, 384]> model_encoder_layer_1_attention_self_query_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_1_attention_self_query_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27779200)))]; - tensor<fp16, [384]> model_encoder_layer_1_attention_self_query_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_1_attention_self_query_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28074176)))]; - tensor<fp16, [1, 512, 384]> x_21_cast = linear(bias = model_encoder_layer_1_attention_self_query_bias_to_fp16, weight = model_encoder_layer_1_attention_self_query_weight_to_fp16, x = input_31_cast); - tensor<fp16, [384, 384]> model_encoder_layer_1_attention_self_key_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_1_attention_self_key_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28075008)))]; - tensor<fp16, [384]> model_encoder_layer_1_attention_self_key_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_1_attention_self_key_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28369984)))]; - tensor<fp16, [1, 512, 384]> x_13_cast = linear(bias = model_encoder_layer_1_attention_self_key_bias_to_fp16, weight = model_encoder_layer_1_attention_self_key_weight_to_fp16, x = input_31_cast); - tensor<int32, [4]> var_170 = const()[name = tensor<string, []>("op_170"), val = tensor<int32, [4]>([1, 512, 12, 32])]; - tensor<fp16, [1, 512, 12, 32]> x_15_cast = reshape(shape = var_170, x = x_13_cast); - tensor<fp16, [384, 384]> model_encoder_layer_1_attention_self_value_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_1_attention_self_value_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28370816)))]; - tensor<fp16, [384]> model_encoder_layer_1_attention_self_value_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_1_attention_self_value_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28665792)))]; - tensor<fp16, [1, 512, 384]> x_17_cast = linear(bias = model_encoder_layer_1_attention_self_value_bias_to_fp16, weight = model_encoder_layer_1_attention_self_value_weight_to_fp16, x = input_31_cast); - tensor<int32, [4]> var_179 = const()[name = tensor<string, []>("op_179"), val = tensor<int32, [4]>([1, 512, 12, 32])]; - tensor<fp16, [1, 512, 12, 32]> x_19_cast = reshape(shape = var_179, x = x_17_cast); - tensor<int32, [4]> var_181 = const()[name = tensor<string, []>("op_181"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> var_185 = const()[name = tensor<string, []>("op_185"), val = tensor<int32, [4]>([1, 512, 12, 32])]; - tensor<fp16, [1, 512, 12, 32]> x_23_cast = reshape(shape = var_185, x = x_21_cast); - tensor<bool, []> attention_scores_5_transpose_x_0 = const()[name = tensor<string, []>("attention_scores_5_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> attention_scores_5_transpose_y_0 = const()[name = tensor<string, []>("attention_scores_5_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<int32, [4]> transpose_8_perm_0 = const()[name = tensor<string, []>("transpose_8_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> transpose_9_perm_0 = const()[name = tensor<string, []>("transpose_9_perm_0"), val = tensor<int32, [4]>([0, 2, 3, 1])]; - tensor<fp16, [1, 12, 32, 512]> transpose_35 = transpose(perm = transpose_9_perm_0, x = x_15_cast); - tensor<fp16, [1, 12, 512, 32]> transpose_36 = transpose(perm = transpose_8_perm_0, x = x_23_cast); - tensor<fp16, [1, 12, 512, 512]> attention_scores_5_cast = matmul(transpose_x = attention_scores_5_transpose_x_0, transpose_y = attention_scores_5_transpose_y_0, x = transpose_36, y = transpose_35); - tensor<fp16, []> _inversed_attention_scores_7_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_attention_scores_7_y_0_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)]; - tensor<fp16, [1, 12, 512, 512]> _inversed_attention_scores_7_cast = mul(x = attention_scores_5_cast, y = _inversed_attention_scores_7_y_0_to_fp16); - tensor<fp16, [1, 12, 512, 512]> input_33_cast = add(x = _inversed_attention_scores_7_cast, y = cast_193); - tensor<fp16, [1, 12, 512, 512]> input_35_cast = softmax(axis = var_8, x = input_33_cast); - tensor<bool, []> context_layer_5_transpose_x_0 = const()[name = tensor<string, []>("context_layer_5_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> context_layer_5_transpose_y_0 = const()[name = tensor<string, []>("context_layer_5_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<fp16, [1, 12, 512, 32]> transpose_37 = transpose(perm = var_181, x = x_19_cast); - tensor<fp16, [1, 12, 512, 32]> context_layer_5_cast = matmul(transpose_x = context_layer_5_transpose_x_0, transpose_y = context_layer_5_transpose_y_0, x = input_35_cast, y = transpose_37); - tensor<int32, [4]> var_197 = const()[name = tensor<string, []>("op_197"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [3]> var_202 = const()[name = tensor<string, []>("op_202"), val = tensor<int32, [3]>([1, 512, 384])]; - tensor<fp16, [1, 512, 12, 32]> transpose_34 = transpose(perm = var_197, x = context_layer_5_cast); - tensor<fp16, [1, 512, 384]> input_37_cast = reshape(shape = var_202, x = transpose_34); - tensor<fp16, [384, 384]> model_encoder_layer_1_attention_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_1_attention_output_dense_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28666624)))]; - tensor<fp16, [384]> model_encoder_layer_1_attention_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_1_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28961600)))]; - tensor<fp16, [1, 512, 384]> input_39_cast = linear(bias = model_encoder_layer_1_attention_output_dense_bias_to_fp16, weight = model_encoder_layer_1_attention_output_dense_weight_to_fp16, x = input_37_cast); - tensor<fp16, [1, 512, 384]> input_41_cast = add(x = input_39_cast, y = input_31_cast); - tensor<int32, [1]> input_43_axes_0 = const()[name = tensor<string, []>("input_43_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp16, [384]> model_encoder_layer_1_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_1_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28962432)))]; - tensor<fp16, [384]> model_encoder_layer_1_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_1_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28963264)))]; - tensor<fp16, [1, 512, 384]> input_43_cast = layer_norm(axes = input_43_axes_0, beta = model_encoder_layer_1_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_encoder_layer_1_attention_output_LayerNorm_weight_to_fp16, x = input_41_cast); - tensor<fp16, [1536, 384]> model_encoder_layer_1_intermediate_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_1_intermediate_dense_weight_to_fp16"), val = tensor<fp16, [1536, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28964096)))]; - tensor<fp16, [1536]> model_encoder_layer_1_intermediate_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_1_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30143808)))]; - tensor<fp16, [1, 512, 1536]> input_45_cast = linear(bias = model_encoder_layer_1_intermediate_dense_bias_to_fp16, weight = model_encoder_layer_1_intermediate_dense_weight_to_fp16, x = input_43_cast); - tensor<string, []> input_47_mode_0 = const()[name = tensor<string, []>("input_47_mode_0"), val = tensor<string, []>("EXACT")]; - tensor<fp16, [1, 512, 1536]> input_47_cast = gelu(mode = input_47_mode_0, x = input_45_cast); - tensor<fp16, [384, 1536]> model_encoder_layer_1_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_1_output_dense_weight_to_fp16"), val = tensor<fp16, [384, 1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30146944)))]; - tensor<fp16, [384]> model_encoder_layer_1_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_1_output_dense_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31326656)))]; - tensor<fp16, [1, 512, 384]> input_49_cast = linear(bias = model_encoder_layer_1_output_dense_bias_to_fp16, weight = model_encoder_layer_1_output_dense_weight_to_fp16, x = input_47_cast); - tensor<fp16, [1, 512, 384]> input_51_cast = add(x = input_49_cast, y = input_43_cast); - tensor<int32, [1]> input_53_axes_0 = const()[name = tensor<string, []>("input_53_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp16, [384]> model_encoder_layer_1_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_1_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31327488)))]; - tensor<fp16, [384]> model_encoder_layer_1_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_1_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31328320)))]; - tensor<fp16, [1, 512, 384]> input_53_cast = layer_norm(axes = input_53_axes_0, beta = model_encoder_layer_1_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_encoder_layer_1_output_LayerNorm_weight_to_fp16, x = input_51_cast); - tensor<fp16, [384, 384]> model_encoder_layer_2_attention_self_query_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_2_attention_self_query_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31329152)))]; - tensor<fp16, [384]> model_encoder_layer_2_attention_self_query_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_2_attention_self_query_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31624128)))]; - tensor<fp16, [1, 512, 384]> x_33_cast = linear(bias = model_encoder_layer_2_attention_self_query_bias_to_fp16, weight = model_encoder_layer_2_attention_self_query_weight_to_fp16, x = input_53_cast); - tensor<fp16, [384, 384]> model_encoder_layer_2_attention_self_key_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_2_attention_self_key_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31624960)))]; - tensor<fp16, [384]> model_encoder_layer_2_attention_self_key_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_2_attention_self_key_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31919936)))]; - tensor<fp16, [1, 512, 384]> x_25_cast = linear(bias = model_encoder_layer_2_attention_self_key_bias_to_fp16, weight = model_encoder_layer_2_attention_self_key_weight_to_fp16, x = input_53_cast); - tensor<int32, [4]> var_247 = const()[name = tensor<string, []>("op_247"), val = tensor<int32, [4]>([1, 512, 12, 32])]; - tensor<fp16, [1, 512, 12, 32]> x_27_cast = reshape(shape = var_247, x = x_25_cast); - tensor<fp16, [384, 384]> model_encoder_layer_2_attention_self_value_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_2_attention_self_value_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(31920768)))]; - tensor<fp16, [384]> model_encoder_layer_2_attention_self_value_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_2_attention_self_value_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32215744)))]; - tensor<fp16, [1, 512, 384]> x_29_cast = linear(bias = model_encoder_layer_2_attention_self_value_bias_to_fp16, weight = model_encoder_layer_2_attention_self_value_weight_to_fp16, x = input_53_cast); - tensor<int32, [4]> var_256 = const()[name = tensor<string, []>("op_256"), val = tensor<int32, [4]>([1, 512, 12, 32])]; - tensor<fp16, [1, 512, 12, 32]> x_31_cast = reshape(shape = var_256, x = x_29_cast); - tensor<int32, [4]> var_258 = const()[name = tensor<string, []>("op_258"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> var_262 = const()[name = tensor<string, []>("op_262"), val = tensor<int32, [4]>([1, 512, 12, 32])]; - tensor<fp16, [1, 512, 12, 32]> x_35_cast = reshape(shape = var_262, x = x_33_cast); - tensor<bool, []> attention_scores_9_transpose_x_0 = const()[name = tensor<string, []>("attention_scores_9_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> attention_scores_9_transpose_y_0 = const()[name = tensor<string, []>("attention_scores_9_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<int32, [4]> transpose_10_perm_0 = const()[name = tensor<string, []>("transpose_10_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> transpose_11_perm_0 = const()[name = tensor<string, []>("transpose_11_perm_0"), val = tensor<int32, [4]>([0, 2, 3, 1])]; - tensor<fp16, [1, 12, 32, 512]> transpose_31 = transpose(perm = transpose_11_perm_0, x = x_27_cast); - tensor<fp16, [1, 12, 512, 32]> transpose_32 = transpose(perm = transpose_10_perm_0, x = x_35_cast); - tensor<fp16, [1, 12, 512, 512]> attention_scores_9_cast = matmul(transpose_x = attention_scores_9_transpose_x_0, transpose_y = attention_scores_9_transpose_y_0, x = transpose_32, y = transpose_31); - tensor<fp16, []> _inversed_attention_scores_11_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_attention_scores_11_y_0_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)]; - tensor<fp16, [1, 12, 512, 512]> _inversed_attention_scores_11_cast = mul(x = attention_scores_9_cast, y = _inversed_attention_scores_11_y_0_to_fp16); - tensor<fp16, [1, 12, 512, 512]> input_55_cast = add(x = _inversed_attention_scores_11_cast, y = cast_193); - tensor<fp16, [1, 12, 512, 512]> input_57_cast = softmax(axis = var_8, x = input_55_cast); - tensor<bool, []> context_layer_9_transpose_x_0 = const()[name = tensor<string, []>("context_layer_9_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> context_layer_9_transpose_y_0 = const()[name = tensor<string, []>("context_layer_9_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<fp16, [1, 12, 512, 32]> transpose_33 = transpose(perm = var_258, x = x_31_cast); - tensor<fp16, [1, 12, 512, 32]> context_layer_9_cast = matmul(transpose_x = context_layer_9_transpose_x_0, transpose_y = context_layer_9_transpose_y_0, x = input_57_cast, y = transpose_33); - tensor<int32, [4]> var_274 = const()[name = tensor<string, []>("op_274"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [3]> var_279 = const()[name = tensor<string, []>("op_279"), val = tensor<int32, [3]>([1, 512, 384])]; - tensor<fp16, [1, 512, 12, 32]> transpose_30 = transpose(perm = var_274, x = context_layer_9_cast); - tensor<fp16, [1, 512, 384]> input_59_cast = reshape(shape = var_279, x = transpose_30); - tensor<fp16, [384, 384]> model_encoder_layer_2_attention_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_2_attention_output_dense_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32216576)))]; - tensor<fp16, [384]> model_encoder_layer_2_attention_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_2_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32511552)))]; - tensor<fp16, [1, 512, 384]> input_61_cast = linear(bias = model_encoder_layer_2_attention_output_dense_bias_to_fp16, weight = model_encoder_layer_2_attention_output_dense_weight_to_fp16, x = input_59_cast); - tensor<fp16, [1, 512, 384]> input_63_cast = add(x = input_61_cast, y = input_53_cast); - tensor<int32, [1]> input_65_axes_0 = const()[name = tensor<string, []>("input_65_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp16, [384]> model_encoder_layer_2_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_2_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32512384)))]; - tensor<fp16, [384]> model_encoder_layer_2_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_2_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32513216)))]; - tensor<fp16, [1, 512, 384]> input_65_cast = layer_norm(axes = input_65_axes_0, beta = model_encoder_layer_2_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_encoder_layer_2_attention_output_LayerNorm_weight_to_fp16, x = input_63_cast); - tensor<fp16, [1536, 384]> model_encoder_layer_2_intermediate_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_2_intermediate_dense_weight_to_fp16"), val = tensor<fp16, [1536, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32514048)))]; - tensor<fp16, [1536]> model_encoder_layer_2_intermediate_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_2_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33693760)))]; - tensor<fp16, [1, 512, 1536]> input_67_cast = linear(bias = model_encoder_layer_2_intermediate_dense_bias_to_fp16, weight = model_encoder_layer_2_intermediate_dense_weight_to_fp16, x = input_65_cast); - tensor<string, []> input_69_mode_0 = const()[name = tensor<string, []>("input_69_mode_0"), val = tensor<string, []>("EXACT")]; - tensor<fp16, [1, 512, 1536]> input_69_cast = gelu(mode = input_69_mode_0, x = input_67_cast); - tensor<fp16, [384, 1536]> model_encoder_layer_2_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_2_output_dense_weight_to_fp16"), val = tensor<fp16, [384, 1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33696896)))]; - tensor<fp16, [384]> model_encoder_layer_2_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_2_output_dense_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34876608)))]; - tensor<fp16, [1, 512, 384]> input_71_cast = linear(bias = model_encoder_layer_2_output_dense_bias_to_fp16, weight = model_encoder_layer_2_output_dense_weight_to_fp16, x = input_69_cast); - tensor<fp16, [1, 512, 384]> input_73_cast = add(x = input_71_cast, y = input_65_cast); - tensor<int32, [1]> input_75_axes_0 = const()[name = tensor<string, []>("input_75_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp16, [384]> model_encoder_layer_2_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_2_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34877440)))]; - tensor<fp16, [384]> model_encoder_layer_2_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_2_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34878272)))]; - tensor<fp16, [1, 512, 384]> input_75_cast = layer_norm(axes = input_75_axes_0, beta = model_encoder_layer_2_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_encoder_layer_2_output_LayerNorm_weight_to_fp16, x = input_73_cast); - tensor<fp16, [384, 384]> model_encoder_layer_3_attention_self_query_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_3_attention_self_query_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34879104)))]; - tensor<fp16, [384]> model_encoder_layer_3_attention_self_query_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_3_attention_self_query_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35174080)))]; - tensor<fp16, [1, 512, 384]> x_45_cast = linear(bias = model_encoder_layer_3_attention_self_query_bias_to_fp16, weight = model_encoder_layer_3_attention_self_query_weight_to_fp16, x = input_75_cast); - tensor<fp16, [384, 384]> model_encoder_layer_3_attention_self_key_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_3_attention_self_key_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35174912)))]; - tensor<fp16, [384]> model_encoder_layer_3_attention_self_key_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_3_attention_self_key_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35469888)))]; - tensor<fp16, [1, 512, 384]> x_37_cast = linear(bias = model_encoder_layer_3_attention_self_key_bias_to_fp16, weight = model_encoder_layer_3_attention_self_key_weight_to_fp16, x = input_75_cast); - tensor<int32, [4]> var_324 = const()[name = tensor<string, []>("op_324"), val = tensor<int32, [4]>([1, 512, 12, 32])]; - tensor<fp16, [1, 512, 12, 32]> x_39_cast = reshape(shape = var_324, x = x_37_cast); - tensor<fp16, [384, 384]> model_encoder_layer_3_attention_self_value_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_3_attention_self_value_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35470720)))]; - tensor<fp16, [384]> model_encoder_layer_3_attention_self_value_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_3_attention_self_value_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35765696)))]; - tensor<fp16, [1, 512, 384]> x_41_cast = linear(bias = model_encoder_layer_3_attention_self_value_bias_to_fp16, weight = model_encoder_layer_3_attention_self_value_weight_to_fp16, x = input_75_cast); - tensor<int32, [4]> var_333 = const()[name = tensor<string, []>("op_333"), val = tensor<int32, [4]>([1, 512, 12, 32])]; - tensor<fp16, [1, 512, 12, 32]> x_43_cast = reshape(shape = var_333, x = x_41_cast); - tensor<int32, [4]> var_335 = const()[name = tensor<string, []>("op_335"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> var_339 = const()[name = tensor<string, []>("op_339"), val = tensor<int32, [4]>([1, 512, 12, 32])]; - tensor<fp16, [1, 512, 12, 32]> x_47_cast = reshape(shape = var_339, x = x_45_cast); - tensor<bool, []> attention_scores_13_transpose_x_0 = const()[name = tensor<string, []>("attention_scores_13_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> attention_scores_13_transpose_y_0 = const()[name = tensor<string, []>("attention_scores_13_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<int32, [4]> transpose_12_perm_0 = const()[name = tensor<string, []>("transpose_12_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> transpose_13_perm_0 = const()[name = tensor<string, []>("transpose_13_perm_0"), val = tensor<int32, [4]>([0, 2, 3, 1])]; - tensor<fp16, [1, 12, 32, 512]> transpose_27 = transpose(perm = transpose_13_perm_0, x = x_39_cast); - tensor<fp16, [1, 12, 512, 32]> transpose_28 = transpose(perm = transpose_12_perm_0, x = x_47_cast); - tensor<fp16, [1, 12, 512, 512]> attention_scores_13_cast = matmul(transpose_x = attention_scores_13_transpose_x_0, transpose_y = attention_scores_13_transpose_y_0, x = transpose_28, y = transpose_27); - tensor<fp16, []> _inversed_attention_scores_15_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_attention_scores_15_y_0_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)]; - tensor<fp16, [1, 12, 512, 512]> _inversed_attention_scores_15_cast = mul(x = attention_scores_13_cast, y = _inversed_attention_scores_15_y_0_to_fp16); - tensor<fp16, [1, 12, 512, 512]> input_77_cast = add(x = _inversed_attention_scores_15_cast, y = cast_193); - tensor<fp16, [1, 12, 512, 512]> input_79_cast = softmax(axis = var_8, x = input_77_cast); - tensor<bool, []> context_layer_13_transpose_x_0 = const()[name = tensor<string, []>("context_layer_13_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> context_layer_13_transpose_y_0 = const()[name = tensor<string, []>("context_layer_13_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<fp16, [1, 12, 512, 32]> transpose_29 = transpose(perm = var_335, x = x_43_cast); - tensor<fp16, [1, 12, 512, 32]> context_layer_13_cast = matmul(transpose_x = context_layer_13_transpose_x_0, transpose_y = context_layer_13_transpose_y_0, x = input_79_cast, y = transpose_29); - tensor<int32, [4]> var_351 = const()[name = tensor<string, []>("op_351"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [3]> var_356 = const()[name = tensor<string, []>("op_356"), val = tensor<int32, [3]>([1, 512, 384])]; - tensor<fp16, [1, 512, 12, 32]> transpose_26 = transpose(perm = var_351, x = context_layer_13_cast); - tensor<fp16, [1, 512, 384]> input_81_cast = reshape(shape = var_356, x = transpose_26); - tensor<fp16, [384, 384]> model_encoder_layer_3_attention_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_3_attention_output_dense_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35766528)))]; - tensor<fp16, [384]> model_encoder_layer_3_attention_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_3_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36061504)))]; - tensor<fp16, [1, 512, 384]> input_83_cast = linear(bias = model_encoder_layer_3_attention_output_dense_bias_to_fp16, weight = model_encoder_layer_3_attention_output_dense_weight_to_fp16, x = input_81_cast); - tensor<fp16, [1, 512, 384]> input_85_cast = add(x = input_83_cast, y = input_75_cast); - tensor<int32, [1]> input_87_axes_0 = const()[name = tensor<string, []>("input_87_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp16, [384]> model_encoder_layer_3_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_3_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36062336)))]; - tensor<fp16, [384]> model_encoder_layer_3_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_3_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36063168)))]; - tensor<fp16, [1, 512, 384]> input_87_cast = layer_norm(axes = input_87_axes_0, beta = model_encoder_layer_3_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_encoder_layer_3_attention_output_LayerNorm_weight_to_fp16, x = input_85_cast); - tensor<fp16, [1536, 384]> model_encoder_layer_3_intermediate_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_3_intermediate_dense_weight_to_fp16"), val = tensor<fp16, [1536, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36064000)))]; - tensor<fp16, [1536]> model_encoder_layer_3_intermediate_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_3_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37243712)))]; - tensor<fp16, [1, 512, 1536]> input_89_cast = linear(bias = model_encoder_layer_3_intermediate_dense_bias_to_fp16, weight = model_encoder_layer_3_intermediate_dense_weight_to_fp16, x = input_87_cast); - tensor<string, []> input_91_mode_0 = const()[name = tensor<string, []>("input_91_mode_0"), val = tensor<string, []>("EXACT")]; - tensor<fp16, [1, 512, 1536]> input_91_cast = gelu(mode = input_91_mode_0, x = input_89_cast); - tensor<fp16, [384, 1536]> model_encoder_layer_3_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_3_output_dense_weight_to_fp16"), val = tensor<fp16, [384, 1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37246848)))]; - tensor<fp16, [384]> model_encoder_layer_3_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_3_output_dense_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38426560)))]; - tensor<fp16, [1, 512, 384]> input_93_cast = linear(bias = model_encoder_layer_3_output_dense_bias_to_fp16, weight = model_encoder_layer_3_output_dense_weight_to_fp16, x = input_91_cast); - tensor<fp16, [1, 512, 384]> input_95_cast = add(x = input_93_cast, y = input_87_cast); - tensor<int32, [1]> input_97_axes_0 = const()[name = tensor<string, []>("input_97_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp16, [384]> model_encoder_layer_3_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_3_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38427392)))]; - tensor<fp16, [384]> model_encoder_layer_3_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_3_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38428224)))]; - tensor<fp16, [1, 512, 384]> input_97_cast = layer_norm(axes = input_97_axes_0, beta = model_encoder_layer_3_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_encoder_layer_3_output_LayerNorm_weight_to_fp16, x = input_95_cast); - tensor<fp16, [384, 384]> model_encoder_layer_4_attention_self_query_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_4_attention_self_query_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38429056)))]; - tensor<fp16, [384]> model_encoder_layer_4_attention_self_query_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_4_attention_self_query_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38724032)))]; - tensor<fp16, [1, 512, 384]> x_57_cast = linear(bias = model_encoder_layer_4_attention_self_query_bias_to_fp16, weight = model_encoder_layer_4_attention_self_query_weight_to_fp16, x = input_97_cast); - tensor<fp16, [384, 384]> model_encoder_layer_4_attention_self_key_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_4_attention_self_key_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38724864)))]; - tensor<fp16, [384]> model_encoder_layer_4_attention_self_key_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_4_attention_self_key_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39019840)))]; - tensor<fp16, [1, 512, 384]> x_49_cast = linear(bias = model_encoder_layer_4_attention_self_key_bias_to_fp16, weight = model_encoder_layer_4_attention_self_key_weight_to_fp16, x = input_97_cast); - tensor<int32, [4]> var_401 = const()[name = tensor<string, []>("op_401"), val = tensor<int32, [4]>([1, 512, 12, 32])]; - tensor<fp16, [1, 512, 12, 32]> x_51_cast = reshape(shape = var_401, x = x_49_cast); - tensor<fp16, [384, 384]> model_encoder_layer_4_attention_self_value_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_4_attention_self_value_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39020672)))]; - tensor<fp16, [384]> model_encoder_layer_4_attention_self_value_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_4_attention_self_value_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39315648)))]; - tensor<fp16, [1, 512, 384]> x_53_cast = linear(bias = model_encoder_layer_4_attention_self_value_bias_to_fp16, weight = model_encoder_layer_4_attention_self_value_weight_to_fp16, x = input_97_cast); - tensor<int32, [4]> var_410 = const()[name = tensor<string, []>("op_410"), val = tensor<int32, [4]>([1, 512, 12, 32])]; - tensor<fp16, [1, 512, 12, 32]> x_55_cast = reshape(shape = var_410, x = x_53_cast); - tensor<int32, [4]> var_412 = const()[name = tensor<string, []>("op_412"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> var_416 = const()[name = tensor<string, []>("op_416"), val = tensor<int32, [4]>([1, 512, 12, 32])]; - tensor<fp16, [1, 512, 12, 32]> x_59_cast = reshape(shape = var_416, x = x_57_cast); - tensor<bool, []> attention_scores_17_transpose_x_0 = const()[name = tensor<string, []>("attention_scores_17_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> attention_scores_17_transpose_y_0 = const()[name = tensor<string, []>("attention_scores_17_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<int32, [4]> transpose_14_perm_0 = const()[name = tensor<string, []>("transpose_14_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> transpose_15_perm_0 = const()[name = tensor<string, []>("transpose_15_perm_0"), val = tensor<int32, [4]>([0, 2, 3, 1])]; - tensor<fp16, [1, 12, 32, 512]> transpose_23 = transpose(perm = transpose_15_perm_0, x = x_51_cast); - tensor<fp16, [1, 12, 512, 32]> transpose_24 = transpose(perm = transpose_14_perm_0, x = x_59_cast); - tensor<fp16, [1, 12, 512, 512]> attention_scores_17_cast = matmul(transpose_x = attention_scores_17_transpose_x_0, transpose_y = attention_scores_17_transpose_y_0, x = transpose_24, y = transpose_23); - tensor<fp16, []> _inversed_attention_scores_19_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_attention_scores_19_y_0_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)]; - tensor<fp16, [1, 12, 512, 512]> _inversed_attention_scores_19_cast = mul(x = attention_scores_17_cast, y = _inversed_attention_scores_19_y_0_to_fp16); - tensor<fp16, [1, 12, 512, 512]> input_99_cast = add(x = _inversed_attention_scores_19_cast, y = cast_193); - tensor<fp16, [1, 12, 512, 512]> input_101_cast = softmax(axis = var_8, x = input_99_cast); - tensor<bool, []> context_layer_17_transpose_x_0 = const()[name = tensor<string, []>("context_layer_17_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> context_layer_17_transpose_y_0 = const()[name = tensor<string, []>("context_layer_17_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<fp16, [1, 12, 512, 32]> transpose_25 = transpose(perm = var_412, x = x_55_cast); - tensor<fp16, [1, 12, 512, 32]> context_layer_17_cast = matmul(transpose_x = context_layer_17_transpose_x_0, transpose_y = context_layer_17_transpose_y_0, x = input_101_cast, y = transpose_25); - tensor<int32, [4]> var_428 = const()[name = tensor<string, []>("op_428"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [3]> var_433 = const()[name = tensor<string, []>("op_433"), val = tensor<int32, [3]>([1, 512, 384])]; - tensor<fp16, [1, 512, 12, 32]> transpose_22 = transpose(perm = var_428, x = context_layer_17_cast); - tensor<fp16, [1, 512, 384]> input_103_cast = reshape(shape = var_433, x = transpose_22); - tensor<fp16, [384, 384]> model_encoder_layer_4_attention_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_4_attention_output_dense_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39316480)))]; - tensor<fp16, [384]> model_encoder_layer_4_attention_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_4_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39611456)))]; - tensor<fp16, [1, 512, 384]> input_105_cast = linear(bias = model_encoder_layer_4_attention_output_dense_bias_to_fp16, weight = model_encoder_layer_4_attention_output_dense_weight_to_fp16, x = input_103_cast); - tensor<fp16, [1, 512, 384]> input_107_cast = add(x = input_105_cast, y = input_97_cast); - tensor<int32, [1]> input_109_axes_0 = const()[name = tensor<string, []>("input_109_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp16, [384]> model_encoder_layer_4_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_4_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39612288)))]; - tensor<fp16, [384]> model_encoder_layer_4_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_4_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39613120)))]; - tensor<fp16, [1, 512, 384]> input_109_cast = layer_norm(axes = input_109_axes_0, beta = model_encoder_layer_4_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_encoder_layer_4_attention_output_LayerNorm_weight_to_fp16, x = input_107_cast); - tensor<fp16, [1536, 384]> model_encoder_layer_4_intermediate_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_4_intermediate_dense_weight_to_fp16"), val = tensor<fp16, [1536, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39613952)))]; - tensor<fp16, [1536]> model_encoder_layer_4_intermediate_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_4_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40793664)))]; - tensor<fp16, [1, 512, 1536]> input_111_cast = linear(bias = model_encoder_layer_4_intermediate_dense_bias_to_fp16, weight = model_encoder_layer_4_intermediate_dense_weight_to_fp16, x = input_109_cast); - tensor<string, []> input_113_mode_0 = const()[name = tensor<string, []>("input_113_mode_0"), val = tensor<string, []>("EXACT")]; - tensor<fp16, [1, 512, 1536]> input_113_cast = gelu(mode = input_113_mode_0, x = input_111_cast); - tensor<fp16, [384, 1536]> model_encoder_layer_4_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_4_output_dense_weight_to_fp16"), val = tensor<fp16, [384, 1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40796800)))]; - tensor<fp16, [384]> model_encoder_layer_4_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_4_output_dense_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41976512)))]; - tensor<fp16, [1, 512, 384]> input_115_cast = linear(bias = model_encoder_layer_4_output_dense_bias_to_fp16, weight = model_encoder_layer_4_output_dense_weight_to_fp16, x = input_113_cast); - tensor<fp16, [1, 512, 384]> input_117_cast = add(x = input_115_cast, y = input_109_cast); - tensor<int32, [1]> input_119_axes_0 = const()[name = tensor<string, []>("input_119_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp16, [384]> model_encoder_layer_4_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_4_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41977344)))]; - tensor<fp16, [384]> model_encoder_layer_4_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_4_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41978176)))]; - tensor<fp16, [1, 512, 384]> input_119_cast = layer_norm(axes = input_119_axes_0, beta = model_encoder_layer_4_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_encoder_layer_4_output_LayerNorm_weight_to_fp16, x = input_117_cast); - tensor<fp16, [384, 384]> model_encoder_layer_5_attention_self_query_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_5_attention_self_query_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41979008)))]; - tensor<fp16, [384]> model_encoder_layer_5_attention_self_query_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_5_attention_self_query_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42273984)))]; - tensor<fp16, [1, 512, 384]> x_69_cast = linear(bias = model_encoder_layer_5_attention_self_query_bias_to_fp16, weight = model_encoder_layer_5_attention_self_query_weight_to_fp16, x = input_119_cast); - tensor<fp16, [384, 384]> model_encoder_layer_5_attention_self_key_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_5_attention_self_key_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42274816)))]; - tensor<fp16, [384]> model_encoder_layer_5_attention_self_key_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_5_attention_self_key_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42569792)))]; - tensor<fp16, [1, 512, 384]> x_61_cast = linear(bias = model_encoder_layer_5_attention_self_key_bias_to_fp16, weight = model_encoder_layer_5_attention_self_key_weight_to_fp16, x = input_119_cast); - tensor<int32, [4]> var_478 = const()[name = tensor<string, []>("op_478"), val = tensor<int32, [4]>([1, 512, 12, 32])]; - tensor<fp16, [1, 512, 12, 32]> x_63_cast = reshape(shape = var_478, x = x_61_cast); - tensor<fp16, [384, 384]> model_encoder_layer_5_attention_self_value_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_5_attention_self_value_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42570624)))]; - tensor<fp16, [384]> model_encoder_layer_5_attention_self_value_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_5_attention_self_value_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42865600)))]; - tensor<fp16, [1, 512, 384]> x_65_cast = linear(bias = model_encoder_layer_5_attention_self_value_bias_to_fp16, weight = model_encoder_layer_5_attention_self_value_weight_to_fp16, x = input_119_cast); - tensor<int32, [4]> var_487 = const()[name = tensor<string, []>("op_487"), val = tensor<int32, [4]>([1, 512, 12, 32])]; - tensor<fp16, [1, 512, 12, 32]> x_67_cast = reshape(shape = var_487, x = x_65_cast); - tensor<int32, [4]> var_489 = const()[name = tensor<string, []>("op_489"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> var_493 = const()[name = tensor<string, []>("op_493"), val = tensor<int32, [4]>([1, 512, 12, 32])]; - tensor<fp16, [1, 512, 12, 32]> x_cast = reshape(shape = var_493, x = x_69_cast); - tensor<bool, []> attention_scores_21_transpose_x_0 = const()[name = tensor<string, []>("attention_scores_21_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> attention_scores_21_transpose_y_0 = const()[name = tensor<string, []>("attention_scores_21_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<int32, [4]> transpose_16_perm_0 = const()[name = tensor<string, []>("transpose_16_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> transpose_17_perm_0 = const()[name = tensor<string, []>("transpose_17_perm_0"), val = tensor<int32, [4]>([0, 2, 3, 1])]; - tensor<fp16, [1, 12, 32, 512]> transpose_19 = transpose(perm = transpose_17_perm_0, x = x_63_cast); - tensor<fp16, [1, 12, 512, 32]> transpose_20 = transpose(perm = transpose_16_perm_0, x = x_cast); - tensor<fp16, [1, 12, 512, 512]> attention_scores_21_cast = matmul(transpose_x = attention_scores_21_transpose_x_0, transpose_y = attention_scores_21_transpose_y_0, x = transpose_20, y = transpose_19); - tensor<fp16, []> _inversed_attention_scores_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_attention_scores_y_0_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)]; - tensor<fp16, [1, 12, 512, 512]> _inversed_attention_scores_cast = mul(x = attention_scores_21_cast, y = _inversed_attention_scores_y_0_to_fp16); - tensor<string, []> attention_mask_to_fp16_dtype_1 = const()[name = tensor<string, []>("attention_mask_to_fp16_dtype_1"), val = tensor<string, []>("fp16")]; - tensor<fp16, [1, 1, 1, 512]> cast_53 = cast(dtype = attention_mask_to_fp16_dtype_1, x = attention_mask_1); - tensor<fp16, [1, 12, 512, 512]> input_121_cast = add(x = _inversed_attention_scores_cast, y = cast_53); - tensor<fp16, [1, 12, 512, 512]> input_123_cast = softmax(axis = var_8, x = input_121_cast); - tensor<bool, []> context_layer_21_transpose_x_0 = const()[name = tensor<string, []>("context_layer_21_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> context_layer_21_transpose_y_0 = const()[name = tensor<string, []>("context_layer_21_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<fp16, [1, 12, 512, 32]> transpose_21 = transpose(perm = var_489, x = x_67_cast); - tensor<fp16, [1, 12, 512, 32]> context_layer_21_cast = matmul(transpose_x = context_layer_21_transpose_x_0, transpose_y = context_layer_21_transpose_y_0, x = input_123_cast, y = transpose_21); - tensor<int32, [4]> var_505 = const()[name = tensor<string, []>("op_505"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [3]> var_510 = const()[name = tensor<string, []>("op_510"), val = tensor<int32, [3]>([1, 512, 384])]; - tensor<fp16, [1, 512, 12, 32]> transpose_18 = transpose(perm = var_505, x = context_layer_21_cast); - tensor<fp16, [1, 512, 384]> input_125_cast = reshape(shape = var_510, x = transpose_18); - tensor<fp16, [384, 384]> model_encoder_layer_5_attention_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_5_attention_output_dense_weight_to_fp16"), val = tensor<fp16, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42866432)))]; - tensor<fp16, [384]> model_encoder_layer_5_attention_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_5_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43161408)))]; - tensor<fp16, [1, 512, 384]> input_127_cast = linear(bias = model_encoder_layer_5_attention_output_dense_bias_to_fp16, weight = model_encoder_layer_5_attention_output_dense_weight_to_fp16, x = input_125_cast); - tensor<fp16, [1, 512, 384]> input_129_cast = add(x = input_127_cast, y = input_119_cast); - tensor<int32, [1]> input_131_axes_0 = const()[name = tensor<string, []>("input_131_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp16, [384]> model_encoder_layer_5_attention_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_5_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43162240)))]; - tensor<fp16, [384]> model_encoder_layer_5_attention_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_5_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43163072)))]; - tensor<fp16, [1, 512, 384]> input_131_cast = layer_norm(axes = input_131_axes_0, beta = model_encoder_layer_5_attention_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_encoder_layer_5_attention_output_LayerNorm_weight_to_fp16, x = input_129_cast); - tensor<fp16, [1536, 384]> model_encoder_layer_5_intermediate_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_5_intermediate_dense_weight_to_fp16"), val = tensor<fp16, [1536, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43163904)))]; - tensor<fp16, [1536]> model_encoder_layer_5_intermediate_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_5_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44343616)))]; - tensor<fp16, [1, 512, 1536]> input_133_cast = linear(bias = model_encoder_layer_5_intermediate_dense_bias_to_fp16, weight = model_encoder_layer_5_intermediate_dense_weight_to_fp16, x = input_131_cast); - tensor<string, []> input_135_mode_0 = const()[name = tensor<string, []>("input_135_mode_0"), val = tensor<string, []>("EXACT")]; - tensor<fp16, [1, 512, 1536]> input_135_cast = gelu(mode = input_135_mode_0, x = input_133_cast); - tensor<fp16, [384, 1536]> model_encoder_layer_5_output_dense_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_5_output_dense_weight_to_fp16"), val = tensor<fp16, [384, 1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44346752)))]; - tensor<fp16, [384]> model_encoder_layer_5_output_dense_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_5_output_dense_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45526464)))]; - tensor<fp16, [1, 512, 384]> input_137_cast = linear(bias = model_encoder_layer_5_output_dense_bias_to_fp16, weight = model_encoder_layer_5_output_dense_weight_to_fp16, x = input_135_cast); - tensor<fp16, [1, 512, 384]> input_139_cast = add(x = input_137_cast, y = input_131_cast); - tensor<int32, [1]> hidden_states_axes_0 = const()[name = tensor<string, []>("hidden_states_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp16, [384]> model_encoder_layer_5_output_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_5_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45527296)))]; - tensor<fp16, [384]> model_encoder_layer_5_output_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("model_encoder_layer_5_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45528128)))]; - tensor<fp16, [1, 512, 384]> hidden_states_cast = layer_norm(axes = hidden_states_axes_0, beta = model_encoder_layer_5_output_LayerNorm_bias_to_fp16, epsilon = var_10_to_fp16, gamma = model_encoder_layer_5_output_LayerNorm_weight_to_fp16, x = input_139_cast); - tensor<int32, [3]> var_546_begin_0 = const()[name = tensor<string, []>("op_546_begin_0"), val = tensor<int32, [3]>([0, 0, 0])]; - tensor<int32, [3]> var_546_end_0 = const()[name = tensor<string, []>("op_546_end_0"), val = tensor<int32, [3]>([1, 1, 384])]; - tensor<bool, [3]> var_546_end_mask_0 = const()[name = tensor<string, []>("op_546_end_mask_0"), val = tensor<bool, [3]>([true, false, true])]; - tensor<bool, [3]> var_546_squeeze_mask_0 = const()[name = tensor<string, []>("op_546_squeeze_mask_0"), val = tensor<bool, [3]>([false, true, false])]; - tensor<fp16, [1, 384]> var_546_cast = slice_by_index(begin = var_546_begin_0, end = var_546_end_0, end_mask = var_546_end_mask_0, squeeze_mask = var_546_squeeze_mask_0, x = hidden_states_cast); - tensor<string, []> var_546_cast_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_546_cast_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; - tensor<fp32, [1, 384]> embeddings = cast(dtype = var_546_cast_to_fp32_dtype_0, x = var_546_cast); - } -> (embeddings); -} \ No newline at end of file diff --git a/Sources/SwiftNLP/Models/all-MiniLM-L6-v2.mlmodelc/weights/weight.bin b/Sources/SwiftNLP/Models/all-MiniLM-L6-v2.mlmodelc/weights/weight.bin deleted file mode 100644 index f9d0a4c23111a03fb3051b244c38bd6d85c74b48..0000000000000000000000000000000000000000 Binary files a/Sources/SwiftNLP/Models/all-MiniLM-L6-v2.mlmodelc/weights/weight.bin and /dev/null differ diff --git a/Sources/SwiftNLP/Models/float32_model.mlmodelc/analytics/coremldata.bin b/Sources/SwiftNLP/Models/float32_model.mlmodelc/analytics/coremldata.bin deleted file mode 100644 index 8ba8d463c4dcdb85db8524a058f730f7f5fe5307..0000000000000000000000000000000000000000 Binary files a/Sources/SwiftNLP/Models/float32_model.mlmodelc/analytics/coremldata.bin and /dev/null differ diff --git a/Sources/SwiftNLP/Models/float32_model.mlmodelc/coremldata.bin b/Sources/SwiftNLP/Models/float32_model.mlmodelc/coremldata.bin deleted file mode 100644 index a5f7878d99d8b11a852842e3cbffb2a20e622057..0000000000000000000000000000000000000000 Binary files a/Sources/SwiftNLP/Models/float32_model.mlmodelc/coremldata.bin and /dev/null differ diff --git a/Sources/SwiftNLP/Models/float32_model.mlmodelc/metadata.json b/Sources/SwiftNLP/Models/float32_model.mlmodelc/metadata.json deleted file mode 100644 index e68a06a26ab46dde93b27942f06f9abae3afc5b6..0000000000000000000000000000000000000000 --- a/Sources/SwiftNLP/Models/float32_model.mlmodelc/metadata.json +++ /dev/null @@ -1,98 +0,0 @@ -[ - { - "metadataOutputVersion" : "3.0", - "shortDescription" : "thenlper\/gte-small (feature-extraction)", - "outputSchema" : [ - { - "hasShapeFlexibility" : "0", - "isOptional" : "0", - "dataType" : "Float32", - "formattedType" : "MultiArray (Float32 1 × 128 × 384)", - "shortDescription" : "Sequence of hidden-states at the output of the last layer of the model", - "shape" : "[1, 128, 384]", - "name" : "last_hidden_state", - "type" : "MultiArray" - }, - { - "hasShapeFlexibility" : "0", - "isOptional" : "0", - "dataType" : "Float32", - "formattedType" : "MultiArray (Float32 1 × 384)", - "shortDescription" : "Last layer hidden-state of the first token of the sequence", - "shape" : "[1, 384]", - "name" : "pooler_output", - "type" : "MultiArray" - } - ], - "storagePrecision" : "Float32", - "modelParameters" : [ - - ], - "specificationVersion" : 6, - "mlProgramOperationTypeHistogram" : { - "Linear" : 73, - "Gelu" : 12, - "LayerNorm" : 25, - "SliceByIndex" : 1, - "Matmul" : 24, - "Sub" : 1, - "Tanh" : 1, - "Transpose" : 48, - "Softmax" : 12, - "Mul" : 13, - "Cast" : 1, - "Reshape" : 48, - "Add" : 38, - "ExpandDims" : 2, - "Gather" : 1 - }, - "computePrecision" : "Mixed (Float32, Int32)", - "isUpdatable" : "0", - "availability" : { - "macOS" : "12.0", - "tvOS" : "15.0", - "visionOS" : "1.0", - "watchOS" : "8.0", - "iOS" : "15.0", - "macCatalyst" : "15.0" - }, - "modelType" : { - "name" : "MLModelType_mlProgram" - }, - "userDefinedMetadata" : { - "com.github.apple.coremltools.source" : "torch==2.1.0", - "com.github.apple.coremltools.version" : "7.1", - "transformers_version" : "4.28.1", - "com.github.apple.coremltools.source_dialect" : "TorchScript", - "co.huggingface.exporters.architecture" : "BertModel", - "co.huggingface.exporters.name" : "thenlper\/gte-small", - "co.huggingface.exporters.framework" : "pytorch", - "co.huggingface.exporters.task" : "feature-extraction", - "co.huggingface.exporters.precision" : "float32" - }, - "inputSchema" : [ - { - "hasShapeFlexibility" : "0", - "isOptional" : "0", - "dataType" : "Int32", - "formattedType" : "MultiArray (Int32 1 × 128)", - "shortDescription" : "Indices of input sequence tokens in the vocabulary", - "shape" : "[1, 128]", - "name" : "input_ids", - "type" : "MultiArray" - }, - { - "hasShapeFlexibility" : "0", - "isOptional" : "0", - "dataType" : "Int32", - "formattedType" : "MultiArray (Int32 1 × 128)", - "shortDescription" : "Mask to avoid performing attention on padding token indices (1 = not masked, 0 = masked)", - "shape" : "[1, 128]", - "name" : "attention_mask", - "type" : "MultiArray" - } - ], - "generatedClassName" : "float32_model", - "method" : "predict" - } -] \ No newline at end of file diff --git a/Sources/SwiftNLP/Models/float32_model.mlmodelc/model.mil b/Sources/SwiftNLP/Models/float32_model.mlmodelc/model.mil deleted file mode 100644 index f0c89a26e339b10c94771f51214454cfcbfa9871..0000000000000000000000000000000000000000 --- a/Sources/SwiftNLP/Models/float32_model.mlmodelc/model.mil +++ /dev/null @@ -1,710 +0,0 @@ -program(1.0) -[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.1"}})] -{ - func main<ios15>(tensor<int32, [1, 128]> attention_mask, tensor<int32, [1, 128]> input_ids) { - tensor<fp32, [30522, 384]> model_embeddings_word_embeddings_weight = const()[name = tensor<string, []>("model_embeddings_word_embeddings_weight"), val = tensor<fp32, [30522, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))]; - tensor<fp32, [384]> model_embeddings_LayerNorm_bias = const()[name = tensor<string, []>("model_embeddings_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46881920)))]; - tensor<fp32, [384]> model_embeddings_LayerNorm_weight = const()[name = tensor<string, []>("model_embeddings_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46883520)))]; - tensor<fp32, [384]> model_encoder_layer_0_attention_self_query_bias = const()[name = tensor<string, []>("model_encoder_layer_0_attention_self_query_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46885120)))]; - tensor<fp32, [384, 384]> model_encoder_layer_0_attention_self_query_weight = const()[name = tensor<string, []>("model_encoder_layer_0_attention_self_query_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(46886720)))]; - tensor<fp32, [384]> model_encoder_layer_0_attention_self_key_bias = const()[name = tensor<string, []>("model_encoder_layer_0_attention_self_key_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47476608)))]; - tensor<fp32, [384, 384]> model_encoder_layer_0_attention_self_key_weight = const()[name = tensor<string, []>("model_encoder_layer_0_attention_self_key_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47478208)))]; - tensor<fp32, [384]> model_encoder_layer_0_attention_self_value_bias = const()[name = tensor<string, []>("model_encoder_layer_0_attention_self_value_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48068096)))]; - tensor<fp32, [384, 384]> model_encoder_layer_0_attention_self_value_weight = const()[name = tensor<string, []>("model_encoder_layer_0_attention_self_value_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48069696)))]; - tensor<fp32, [384]> model_encoder_layer_0_attention_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_0_attention_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48659584)))]; - tensor<fp32, [384, 384]> model_encoder_layer_0_attention_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_0_attention_output_dense_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48661184)))]; - tensor<fp32, [384]> model_encoder_layer_0_attention_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_0_attention_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49251072)))]; - tensor<fp32, [384]> model_encoder_layer_0_attention_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_0_attention_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49252672)))]; - tensor<fp32, [1536]> model_encoder_layer_0_intermediate_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_0_intermediate_dense_bias"), val = tensor<fp32, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49254272)))]; - tensor<fp32, [1536, 384]> model_encoder_layer_0_intermediate_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_0_intermediate_dense_weight"), val = tensor<fp32, [1536, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49260480)))]; - tensor<fp32, [384]> model_encoder_layer_0_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_0_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51619840)))]; - tensor<fp32, [384, 1536]> model_encoder_layer_0_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_0_output_dense_weight"), val = tensor<fp32, [384, 1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51621440)))]; - tensor<fp32, [384]> model_encoder_layer_0_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_0_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53980800)))]; - tensor<fp32, [384]> model_encoder_layer_0_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_0_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53982400)))]; - tensor<fp32, [384]> model_encoder_layer_1_attention_self_query_bias = const()[name = tensor<string, []>("model_encoder_layer_1_attention_self_query_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53984000)))]; - tensor<fp32, [384, 384]> model_encoder_layer_1_attention_self_query_weight = const()[name = tensor<string, []>("model_encoder_layer_1_attention_self_query_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53985600)))]; - tensor<fp32, [384]> model_encoder_layer_1_attention_self_key_bias = const()[name = tensor<string, []>("model_encoder_layer_1_attention_self_key_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54575488)))]; - tensor<fp32, [384, 384]> model_encoder_layer_1_attention_self_key_weight = const()[name = tensor<string, []>("model_encoder_layer_1_attention_self_key_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54577088)))]; - tensor<fp32, [384]> model_encoder_layer_1_attention_self_value_bias = const()[name = tensor<string, []>("model_encoder_layer_1_attention_self_value_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55166976)))]; - tensor<fp32, [384, 384]> model_encoder_layer_1_attention_self_value_weight = const()[name = tensor<string, []>("model_encoder_layer_1_attention_self_value_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55168576)))]; - tensor<fp32, [384]> model_encoder_layer_1_attention_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_1_attention_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55758464)))]; - tensor<fp32, [384, 384]> model_encoder_layer_1_attention_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_1_attention_output_dense_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55760064)))]; - tensor<fp32, [384]> model_encoder_layer_1_attention_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_1_attention_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56349952)))]; - tensor<fp32, [384]> model_encoder_layer_1_attention_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_1_attention_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56351552)))]; - tensor<fp32, [1536]> model_encoder_layer_1_intermediate_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_1_intermediate_dense_bias"), val = tensor<fp32, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56353152)))]; - tensor<fp32, [1536, 384]> model_encoder_layer_1_intermediate_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_1_intermediate_dense_weight"), val = tensor<fp32, [1536, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56359360)))]; - tensor<fp32, [384]> model_encoder_layer_1_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_1_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(58718720)))]; - tensor<fp32, [384, 1536]> model_encoder_layer_1_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_1_output_dense_weight"), val = tensor<fp32, [384, 1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(58720320)))]; - tensor<fp32, [384]> model_encoder_layer_1_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_1_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61079680)))]; - tensor<fp32, [384]> model_encoder_layer_1_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_1_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61081280)))]; - tensor<fp32, [384]> model_encoder_layer_2_attention_self_query_bias = const()[name = tensor<string, []>("model_encoder_layer_2_attention_self_query_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61082880)))]; - tensor<fp32, [384, 384]> model_encoder_layer_2_attention_self_query_weight = const()[name = tensor<string, []>("model_encoder_layer_2_attention_self_query_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61084480)))]; - tensor<fp32, [384]> model_encoder_layer_2_attention_self_key_bias = const()[name = tensor<string, []>("model_encoder_layer_2_attention_self_key_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61674368)))]; - tensor<fp32, [384, 384]> model_encoder_layer_2_attention_self_key_weight = const()[name = tensor<string, []>("model_encoder_layer_2_attention_self_key_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61675968)))]; - tensor<fp32, [384]> model_encoder_layer_2_attention_self_value_bias = const()[name = tensor<string, []>("model_encoder_layer_2_attention_self_value_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62265856)))]; - tensor<fp32, [384, 384]> model_encoder_layer_2_attention_self_value_weight = const()[name = tensor<string, []>("model_encoder_layer_2_attention_self_value_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62267456)))]; - tensor<fp32, [384]> model_encoder_layer_2_attention_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_2_attention_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62857344)))]; - tensor<fp32, [384, 384]> model_encoder_layer_2_attention_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_2_attention_output_dense_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62858944)))]; - tensor<fp32, [384]> model_encoder_layer_2_attention_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_2_attention_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63448832)))]; - tensor<fp32, [384]> model_encoder_layer_2_attention_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_2_attention_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63450432)))]; - tensor<fp32, [1536]> model_encoder_layer_2_intermediate_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_2_intermediate_dense_bias"), val = tensor<fp32, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63452032)))]; - tensor<fp32, [1536, 384]> model_encoder_layer_2_intermediate_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_2_intermediate_dense_weight"), val = tensor<fp32, [1536, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63458240)))]; - tensor<fp32, [384]> model_encoder_layer_2_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_2_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65817600)))]; - tensor<fp32, [384, 1536]> model_encoder_layer_2_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_2_output_dense_weight"), val = tensor<fp32, [384, 1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65819200)))]; - tensor<fp32, [384]> model_encoder_layer_2_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_2_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68178560)))]; - tensor<fp32, [384]> model_encoder_layer_2_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_2_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68180160)))]; - tensor<fp32, [384]> model_encoder_layer_3_attention_self_query_bias = const()[name = tensor<string, []>("model_encoder_layer_3_attention_self_query_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68181760)))]; - tensor<fp32, [384, 384]> model_encoder_layer_3_attention_self_query_weight = const()[name = tensor<string, []>("model_encoder_layer_3_attention_self_query_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68183360)))]; - tensor<fp32, [384]> model_encoder_layer_3_attention_self_key_bias = const()[name = tensor<string, []>("model_encoder_layer_3_attention_self_key_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68773248)))]; - tensor<fp32, [384, 384]> model_encoder_layer_3_attention_self_key_weight = const()[name = tensor<string, []>("model_encoder_layer_3_attention_self_key_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68774848)))]; - tensor<fp32, [384]> model_encoder_layer_3_attention_self_value_bias = const()[name = tensor<string, []>("model_encoder_layer_3_attention_self_value_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69364736)))]; - tensor<fp32, [384, 384]> model_encoder_layer_3_attention_self_value_weight = const()[name = tensor<string, []>("model_encoder_layer_3_attention_self_value_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69366336)))]; - tensor<fp32, [384]> model_encoder_layer_3_attention_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_3_attention_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69956224)))]; - tensor<fp32, [384, 384]> model_encoder_layer_3_attention_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_3_attention_output_dense_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69957824)))]; - tensor<fp32, [384]> model_encoder_layer_3_attention_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_3_attention_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70547712)))]; - tensor<fp32, [384]> model_encoder_layer_3_attention_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_3_attention_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70549312)))]; - tensor<fp32, [1536]> model_encoder_layer_3_intermediate_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_3_intermediate_dense_bias"), val = tensor<fp32, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70550912)))]; - tensor<fp32, [1536, 384]> model_encoder_layer_3_intermediate_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_3_intermediate_dense_weight"), val = tensor<fp32, [1536, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70557120)))]; - tensor<fp32, [384]> model_encoder_layer_3_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_3_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(72916480)))]; - tensor<fp32, [384, 1536]> model_encoder_layer_3_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_3_output_dense_weight"), val = tensor<fp32, [384, 1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(72918080)))]; - tensor<fp32, [384]> model_encoder_layer_3_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_3_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(75277440)))]; - tensor<fp32, [384]> model_encoder_layer_3_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_3_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(75279040)))]; - tensor<fp32, [384]> model_encoder_layer_4_attention_self_query_bias = const()[name = tensor<string, []>("model_encoder_layer_4_attention_self_query_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(75280640)))]; - tensor<fp32, [384, 384]> model_encoder_layer_4_attention_self_query_weight = const()[name = tensor<string, []>("model_encoder_layer_4_attention_self_query_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(75282240)))]; - tensor<fp32, [384]> model_encoder_layer_4_attention_self_key_bias = const()[name = tensor<string, []>("model_encoder_layer_4_attention_self_key_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(75872128)))]; - tensor<fp32, [384, 384]> model_encoder_layer_4_attention_self_key_weight = const()[name = tensor<string, []>("model_encoder_layer_4_attention_self_key_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(75873728)))]; - tensor<fp32, [384]> model_encoder_layer_4_attention_self_value_bias = const()[name = tensor<string, []>("model_encoder_layer_4_attention_self_value_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(76463616)))]; - tensor<fp32, [384, 384]> model_encoder_layer_4_attention_self_value_weight = const()[name = tensor<string, []>("model_encoder_layer_4_attention_self_value_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(76465216)))]; - tensor<fp32, [384]> model_encoder_layer_4_attention_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_4_attention_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77055104)))]; - tensor<fp32, [384, 384]> model_encoder_layer_4_attention_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_4_attention_output_dense_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77056704)))]; - tensor<fp32, [384]> model_encoder_layer_4_attention_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_4_attention_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77646592)))]; - tensor<fp32, [384]> model_encoder_layer_4_attention_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_4_attention_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77648192)))]; - tensor<fp32, [1536]> model_encoder_layer_4_intermediate_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_4_intermediate_dense_bias"), val = tensor<fp32, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77649792)))]; - tensor<fp32, [1536, 384]> model_encoder_layer_4_intermediate_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_4_intermediate_dense_weight"), val = tensor<fp32, [1536, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77656000)))]; - tensor<fp32, [384]> model_encoder_layer_4_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_4_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80015360)))]; - tensor<fp32, [384, 1536]> model_encoder_layer_4_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_4_output_dense_weight"), val = tensor<fp32, [384, 1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80016960)))]; - tensor<fp32, [384]> model_encoder_layer_4_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_4_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82376320)))]; - tensor<fp32, [384]> model_encoder_layer_4_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_4_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82377920)))]; - tensor<fp32, [384]> model_encoder_layer_5_attention_self_query_bias = const()[name = tensor<string, []>("model_encoder_layer_5_attention_self_query_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82379520)))]; - tensor<fp32, [384, 384]> model_encoder_layer_5_attention_self_query_weight = const()[name = tensor<string, []>("model_encoder_layer_5_attention_self_query_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82381120)))]; - tensor<fp32, [384]> model_encoder_layer_5_attention_self_key_bias = const()[name = tensor<string, []>("model_encoder_layer_5_attention_self_key_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82971008)))]; - tensor<fp32, [384, 384]> model_encoder_layer_5_attention_self_key_weight = const()[name = tensor<string, []>("model_encoder_layer_5_attention_self_key_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82972608)))]; - tensor<fp32, [384]> model_encoder_layer_5_attention_self_value_bias = const()[name = tensor<string, []>("model_encoder_layer_5_attention_self_value_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(83562496)))]; - tensor<fp32, [384, 384]> model_encoder_layer_5_attention_self_value_weight = const()[name = tensor<string, []>("model_encoder_layer_5_attention_self_value_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(83564096)))]; - tensor<fp32, [384]> model_encoder_layer_5_attention_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_5_attention_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84153984)))]; - tensor<fp32, [384, 384]> model_encoder_layer_5_attention_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_5_attention_output_dense_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84155584)))]; - tensor<fp32, [384]> model_encoder_layer_5_attention_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_5_attention_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84745472)))]; - tensor<fp32, [384]> model_encoder_layer_5_attention_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_5_attention_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84747072)))]; - tensor<fp32, [1536]> model_encoder_layer_5_intermediate_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_5_intermediate_dense_bias"), val = tensor<fp32, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84748672)))]; - tensor<fp32, [1536, 384]> model_encoder_layer_5_intermediate_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_5_intermediate_dense_weight"), val = tensor<fp32, [1536, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84754880)))]; - tensor<fp32, [384]> model_encoder_layer_5_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_5_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87114240)))]; - tensor<fp32, [384, 1536]> model_encoder_layer_5_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_5_output_dense_weight"), val = tensor<fp32, [384, 1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87115840)))]; - tensor<fp32, [384]> model_encoder_layer_5_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_5_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89475200)))]; - tensor<fp32, [384]> model_encoder_layer_5_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_5_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89476800)))]; - tensor<fp32, [384]> model_encoder_layer_6_attention_self_query_bias = const()[name = tensor<string, []>("model_encoder_layer_6_attention_self_query_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89478400)))]; - tensor<fp32, [384, 384]> model_encoder_layer_6_attention_self_query_weight = const()[name = tensor<string, []>("model_encoder_layer_6_attention_self_query_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89480000)))]; - tensor<fp32, [384]> model_encoder_layer_6_attention_self_key_bias = const()[name = tensor<string, []>("model_encoder_layer_6_attention_self_key_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90069888)))]; - tensor<fp32, [384, 384]> model_encoder_layer_6_attention_self_key_weight = const()[name = tensor<string, []>("model_encoder_layer_6_attention_self_key_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90071488)))]; - tensor<fp32, [384]> model_encoder_layer_6_attention_self_value_bias = const()[name = tensor<string, []>("model_encoder_layer_6_attention_self_value_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90661376)))]; - tensor<fp32, [384, 384]> model_encoder_layer_6_attention_self_value_weight = const()[name = tensor<string, []>("model_encoder_layer_6_attention_self_value_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90662976)))]; - tensor<fp32, [384]> model_encoder_layer_6_attention_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_6_attention_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91252864)))]; - tensor<fp32, [384, 384]> model_encoder_layer_6_attention_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_6_attention_output_dense_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91254464)))]; - tensor<fp32, [384]> model_encoder_layer_6_attention_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_6_attention_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91844352)))]; - tensor<fp32, [384]> model_encoder_layer_6_attention_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_6_attention_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91845952)))]; - tensor<fp32, [1536]> model_encoder_layer_6_intermediate_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_6_intermediate_dense_bias"), val = tensor<fp32, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91847552)))]; - tensor<fp32, [1536, 384]> model_encoder_layer_6_intermediate_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_6_intermediate_dense_weight"), val = tensor<fp32, [1536, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91853760)))]; - tensor<fp32, [384]> model_encoder_layer_6_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_6_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94213120)))]; - tensor<fp32, [384, 1536]> model_encoder_layer_6_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_6_output_dense_weight"), val = tensor<fp32, [384, 1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94214720)))]; - tensor<fp32, [384]> model_encoder_layer_6_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_6_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(96574080)))]; - tensor<fp32, [384]> model_encoder_layer_6_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_6_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(96575680)))]; - tensor<fp32, [384]> model_encoder_layer_7_attention_self_query_bias = const()[name = tensor<string, []>("model_encoder_layer_7_attention_self_query_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(96577280)))]; - tensor<fp32, [384, 384]> model_encoder_layer_7_attention_self_query_weight = const()[name = tensor<string, []>("model_encoder_layer_7_attention_self_query_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(96578880)))]; - tensor<fp32, [384]> model_encoder_layer_7_attention_self_key_bias = const()[name = tensor<string, []>("model_encoder_layer_7_attention_self_key_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(97168768)))]; - tensor<fp32, [384, 384]> model_encoder_layer_7_attention_self_key_weight = const()[name = tensor<string, []>("model_encoder_layer_7_attention_self_key_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(97170368)))]; - tensor<fp32, [384]> model_encoder_layer_7_attention_self_value_bias = const()[name = tensor<string, []>("model_encoder_layer_7_attention_self_value_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(97760256)))]; - tensor<fp32, [384, 384]> model_encoder_layer_7_attention_self_value_weight = const()[name = tensor<string, []>("model_encoder_layer_7_attention_self_value_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(97761856)))]; - tensor<fp32, [384]> model_encoder_layer_7_attention_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_7_attention_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(98351744)))]; - tensor<fp32, [384, 384]> model_encoder_layer_7_attention_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_7_attention_output_dense_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(98353344)))]; - tensor<fp32, [384]> model_encoder_layer_7_attention_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_7_attention_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(98943232)))]; - tensor<fp32, [384]> model_encoder_layer_7_attention_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_7_attention_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(98944832)))]; - tensor<fp32, [1536]> model_encoder_layer_7_intermediate_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_7_intermediate_dense_bias"), val = tensor<fp32, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(98946432)))]; - tensor<fp32, [1536, 384]> model_encoder_layer_7_intermediate_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_7_intermediate_dense_weight"), val = tensor<fp32, [1536, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(98952640)))]; - tensor<fp32, [384]> model_encoder_layer_7_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_7_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101312000)))]; - tensor<fp32, [384, 1536]> model_encoder_layer_7_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_7_output_dense_weight"), val = tensor<fp32, [384, 1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101313600)))]; - tensor<fp32, [384]> model_encoder_layer_7_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_7_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103672960)))]; - tensor<fp32, [384]> model_encoder_layer_7_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_7_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103674560)))]; - tensor<fp32, [384]> model_encoder_layer_8_attention_self_query_bias = const()[name = tensor<string, []>("model_encoder_layer_8_attention_self_query_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103676160)))]; - tensor<fp32, [384, 384]> model_encoder_layer_8_attention_self_query_weight = const()[name = tensor<string, []>("model_encoder_layer_8_attention_self_query_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103677760)))]; - tensor<fp32, [384]> model_encoder_layer_8_attention_self_key_bias = const()[name = tensor<string, []>("model_encoder_layer_8_attention_self_key_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(104267648)))]; - tensor<fp32, [384, 384]> model_encoder_layer_8_attention_self_key_weight = const()[name = tensor<string, []>("model_encoder_layer_8_attention_self_key_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(104269248)))]; - tensor<fp32, [384]> model_encoder_layer_8_attention_self_value_bias = const()[name = tensor<string, []>("model_encoder_layer_8_attention_self_value_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(104859136)))]; - tensor<fp32, [384, 384]> model_encoder_layer_8_attention_self_value_weight = const()[name = tensor<string, []>("model_encoder_layer_8_attention_self_value_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(104860736)))]; - tensor<fp32, [384]> model_encoder_layer_8_attention_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_8_attention_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105450624)))]; - tensor<fp32, [384, 384]> model_encoder_layer_8_attention_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_8_attention_output_dense_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105452224)))]; - tensor<fp32, [384]> model_encoder_layer_8_attention_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_8_attention_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106042112)))]; - tensor<fp32, [384]> model_encoder_layer_8_attention_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_8_attention_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106043712)))]; - tensor<fp32, [1536]> model_encoder_layer_8_intermediate_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_8_intermediate_dense_bias"), val = tensor<fp32, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106045312)))]; - tensor<fp32, [1536, 384]> model_encoder_layer_8_intermediate_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_8_intermediate_dense_weight"), val = tensor<fp32, [1536, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106051520)))]; - tensor<fp32, [384]> model_encoder_layer_8_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_8_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108410880)))]; - tensor<fp32, [384, 1536]> model_encoder_layer_8_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_8_output_dense_weight"), val = tensor<fp32, [384, 1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108412480)))]; - tensor<fp32, [384]> model_encoder_layer_8_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_8_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110771840)))]; - tensor<fp32, [384]> model_encoder_layer_8_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_8_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110773440)))]; - tensor<fp32, [384]> model_encoder_layer_9_attention_self_query_bias = const()[name = tensor<string, []>("model_encoder_layer_9_attention_self_query_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110775040)))]; - tensor<fp32, [384, 384]> model_encoder_layer_9_attention_self_query_weight = const()[name = tensor<string, []>("model_encoder_layer_9_attention_self_query_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110776640)))]; - tensor<fp32, [384]> model_encoder_layer_9_attention_self_key_bias = const()[name = tensor<string, []>("model_encoder_layer_9_attention_self_key_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(111366528)))]; - tensor<fp32, [384, 384]> model_encoder_layer_9_attention_self_key_weight = const()[name = tensor<string, []>("model_encoder_layer_9_attention_self_key_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(111368128)))]; - tensor<fp32, [384]> model_encoder_layer_9_attention_self_value_bias = const()[name = tensor<string, []>("model_encoder_layer_9_attention_self_value_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(111958016)))]; - tensor<fp32, [384, 384]> model_encoder_layer_9_attention_self_value_weight = const()[name = tensor<string, []>("model_encoder_layer_9_attention_self_value_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(111959616)))]; - tensor<fp32, [384]> model_encoder_layer_9_attention_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_9_attention_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(112549504)))]; - tensor<fp32, [384, 384]> model_encoder_layer_9_attention_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_9_attention_output_dense_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(112551104)))]; - tensor<fp32, [384]> model_encoder_layer_9_attention_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_9_attention_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(113140992)))]; - tensor<fp32, [384]> model_encoder_layer_9_attention_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_9_attention_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(113142592)))]; - tensor<fp32, [1536]> model_encoder_layer_9_intermediate_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_9_intermediate_dense_bias"), val = tensor<fp32, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(113144192)))]; - tensor<fp32, [1536, 384]> model_encoder_layer_9_intermediate_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_9_intermediate_dense_weight"), val = tensor<fp32, [1536, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(113150400)))]; - tensor<fp32, [384]> model_encoder_layer_9_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_9_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(115509760)))]; - tensor<fp32, [384, 1536]> model_encoder_layer_9_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_9_output_dense_weight"), val = tensor<fp32, [384, 1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(115511360)))]; - tensor<fp32, [384]> model_encoder_layer_9_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_9_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(117870720)))]; - tensor<fp32, [384]> model_encoder_layer_9_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_9_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(117872320)))]; - tensor<fp32, [384]> model_encoder_layer_10_attention_self_query_bias = const()[name = tensor<string, []>("model_encoder_layer_10_attention_self_query_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(117873920)))]; - tensor<fp32, [384, 384]> model_encoder_layer_10_attention_self_query_weight = const()[name = tensor<string, []>("model_encoder_layer_10_attention_self_query_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(117875520)))]; - tensor<fp32, [384]> model_encoder_layer_10_attention_self_key_bias = const()[name = tensor<string, []>("model_encoder_layer_10_attention_self_key_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118465408)))]; - tensor<fp32, [384, 384]> model_encoder_layer_10_attention_self_key_weight = const()[name = tensor<string, []>("model_encoder_layer_10_attention_self_key_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118467008)))]; - tensor<fp32, [384]> model_encoder_layer_10_attention_self_value_bias = const()[name = tensor<string, []>("model_encoder_layer_10_attention_self_value_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119056896)))]; - tensor<fp32, [384, 384]> model_encoder_layer_10_attention_self_value_weight = const()[name = tensor<string, []>("model_encoder_layer_10_attention_self_value_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119058496)))]; - tensor<fp32, [384]> model_encoder_layer_10_attention_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_10_attention_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119648384)))]; - tensor<fp32, [384, 384]> model_encoder_layer_10_attention_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_10_attention_output_dense_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119649984)))]; - tensor<fp32, [384]> model_encoder_layer_10_attention_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_10_attention_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(120239872)))]; - tensor<fp32, [384]> model_encoder_layer_10_attention_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_10_attention_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(120241472)))]; - tensor<fp32, [1536]> model_encoder_layer_10_intermediate_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_10_intermediate_dense_bias"), val = tensor<fp32, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(120243072)))]; - tensor<fp32, [1536, 384]> model_encoder_layer_10_intermediate_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_10_intermediate_dense_weight"), val = tensor<fp32, [1536, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(120249280)))]; - tensor<fp32, [384]> model_encoder_layer_10_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_10_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(122608640)))]; - tensor<fp32, [384, 1536]> model_encoder_layer_10_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_10_output_dense_weight"), val = tensor<fp32, [384, 1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(122610240)))]; - tensor<fp32, [384]> model_encoder_layer_10_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_10_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124969600)))]; - tensor<fp32, [384]> model_encoder_layer_10_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_10_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124971200)))]; - tensor<fp32, [384]> model_encoder_layer_11_attention_self_query_bias = const()[name = tensor<string, []>("model_encoder_layer_11_attention_self_query_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124972800)))]; - tensor<fp32, [384, 384]> model_encoder_layer_11_attention_self_query_weight = const()[name = tensor<string, []>("model_encoder_layer_11_attention_self_query_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124974400)))]; - tensor<fp32, [384]> model_encoder_layer_11_attention_self_key_bias = const()[name = tensor<string, []>("model_encoder_layer_11_attention_self_key_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(125564288)))]; - tensor<fp32, [384, 384]> model_encoder_layer_11_attention_self_key_weight = const()[name = tensor<string, []>("model_encoder_layer_11_attention_self_key_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(125565888)))]; - tensor<fp32, [384]> model_encoder_layer_11_attention_self_value_bias = const()[name = tensor<string, []>("model_encoder_layer_11_attention_self_value_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(126155776)))]; - tensor<fp32, [384, 384]> model_encoder_layer_11_attention_self_value_weight = const()[name = tensor<string, []>("model_encoder_layer_11_attention_self_value_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(126157376)))]; - tensor<fp32, [384]> model_encoder_layer_11_attention_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_11_attention_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(126747264)))]; - tensor<fp32, [384, 384]> model_encoder_layer_11_attention_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_11_attention_output_dense_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(126748864)))]; - tensor<fp32, [384]> model_encoder_layer_11_attention_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_11_attention_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127338752)))]; - tensor<fp32, [384]> model_encoder_layer_11_attention_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_11_attention_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127340352)))]; - tensor<fp32, [1536]> model_encoder_layer_11_intermediate_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_11_intermediate_dense_bias"), val = tensor<fp32, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127341952)))]; - tensor<fp32, [1536, 384]> model_encoder_layer_11_intermediate_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_11_intermediate_dense_weight"), val = tensor<fp32, [1536, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127348160)))]; - tensor<fp32, [384]> model_encoder_layer_11_output_dense_bias = const()[name = tensor<string, []>("model_encoder_layer_11_output_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(129707520)))]; - tensor<fp32, [384, 1536]> model_encoder_layer_11_output_dense_weight = const()[name = tensor<string, []>("model_encoder_layer_11_output_dense_weight"), val = tensor<fp32, [384, 1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(129709120)))]; - tensor<fp32, [384]> model_encoder_layer_11_output_LayerNorm_bias = const()[name = tensor<string, []>("model_encoder_layer_11_output_LayerNorm_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132068480)))]; - tensor<fp32, [384]> model_encoder_layer_11_output_LayerNorm_weight = const()[name = tensor<string, []>("model_encoder_layer_11_output_LayerNorm_weight"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132070080)))]; - tensor<fp32, [384]> model_pooler_dense_bias = const()[name = tensor<string, []>("model_pooler_dense_bias"), val = tensor<fp32, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132071680)))]; - tensor<fp32, [384, 384]> model_pooler_dense_weight = const()[name = tensor<string, []>("model_pooler_dense_weight"), val = tensor<fp32, [384, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132073280)))]; - tensor<int32, []> var_8 = const()[name = tensor<string, []>("op_8"), val = tensor<int32, []>(-1)]; - tensor<fp32, []> var_10 = const()[name = tensor<string, []>("op_10"), val = tensor<fp32, []>(0x1.197998p-40)]; - tensor<fp32, []> var_13 = const()[name = tensor<string, []>("op_13"), val = tensor<fp32, []>(0x1p+0)]; - tensor<int32, [1]> var_34_axes_0 = const()[name = tensor<string, []>("op_34_axes_0"), val = tensor<int32, [1]>([1])]; - tensor<int32, [1, 1, 128]> var_34 = expand_dims(axes = var_34_axes_0, x = attention_mask)[name = tensor<string, []>("op_34")]; - tensor<int32, [1]> var_35_axes_0 = const()[name = tensor<string, []>("op_35_axes_0"), val = tensor<int32, [1]>([2])]; - tensor<int32, [1, 1, 1, 128]> var_35 = expand_dims(axes = var_35_axes_0, x = var_34)[name = tensor<string, []>("op_35")]; - tensor<string, []> var_37_dtype_0 = const()[name = tensor<string, []>("op_37_dtype_0"), val = tensor<string, []>("fp32")]; - tensor<fp32, [1, 1, 1, 128]> cast_75 = cast(dtype = var_37_dtype_0, x = var_35)[name = tensor<string, []>("cast_75")]; - tensor<fp32, [1, 1, 1, 128]> var_38 = sub(x = var_13, y = cast_75)[name = tensor<string, []>("op_38")]; - tensor<fp32, []> var_39 = const()[name = tensor<string, []>("op_39"), val = tensor<fp32, []>(-0x1.fffffep+127)]; - tensor<fp32, [1, 1, 1, 128]> attention_mask_1 = mul(x = var_38, y = var_39)[name = tensor<string, []>("attention_mask")]; - tensor<int32, []> inputs_embeds_axis_0 = const()[name = tensor<string, []>("inputs_embeds_axis_0"), val = tensor<int32, []>(0)]; - tensor<fp32, [1, 128, 384]> inputs_embeds = gather(axis = inputs_embeds_axis_0, indices = input_ids, x = model_embeddings_word_embeddings_weight)[name = tensor<string, []>("inputs_embeds")]; - tensor<fp32, [1, 128, 384]> token_type_embeddings_1 = const()[name = tensor<string, []>("token_type_embeddings_1"), val = tensor<fp32, [1, 128, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132663168)))]; - tensor<fp32, [1, 128, 384]> embeddings_1 = add(x = inputs_embeds, y = token_type_embeddings_1)[name = tensor<string, []>("embeddings_1")]; - tensor<fp32, [1, 128, 384]> position_embeddings_1 = const()[name = tensor<string, []>("position_embeddings_1"), val = tensor<fp32, [1, 128, 384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132859840)))]; - tensor<fp32, [1, 128, 384]> input_5 = add(x = embeddings_1, y = position_embeddings_1)[name = tensor<string, []>("input_5")]; - tensor<int32, [1]> input_7_axes_0 = const()[name = tensor<string, []>("input_7_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_7 = layer_norm(axes = input_7_axes_0, beta = model_embeddings_LayerNorm_bias, epsilon = var_10, gamma = model_embeddings_LayerNorm_weight, x = input_5)[name = tensor<string, []>("input_7")]; - tensor<fp32, [1, 128, 384]> linear_0 = linear(bias = model_encoder_layer_0_attention_self_query_bias, weight = model_encoder_layer_0_attention_self_query_weight, x = input_7)[name = tensor<string, []>("linear_0")]; - tensor<fp32, [1, 128, 384]> linear_1 = linear(bias = model_encoder_layer_0_attention_self_key_bias, weight = model_encoder_layer_0_attention_self_key_weight, x = input_7)[name = tensor<string, []>("linear_1")]; - tensor<int32, [4]> var_106 = const()[name = tensor<string, []>("op_106"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_3 = reshape(shape = var_106, x = linear_1)[name = tensor<string, []>("x_3")]; - tensor<fp32, [1, 128, 384]> linear_2 = linear(bias = model_encoder_layer_0_attention_self_value_bias, weight = model_encoder_layer_0_attention_self_value_weight, x = input_7)[name = tensor<string, []>("linear_2")]; - tensor<int32, [4]> var_115 = const()[name = tensor<string, []>("op_115"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_7 = reshape(shape = var_115, x = linear_2)[name = tensor<string, []>("x_7")]; - tensor<int32, [4]> var_117 = const()[name = tensor<string, []>("op_117"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> var_121 = const()[name = tensor<string, []>("op_121"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_11 = reshape(shape = var_121, x = linear_0)[name = tensor<string, []>("x_11")]; - tensor<bool, []> attention_scores_1_transpose_x_0 = const()[name = tensor<string, []>("attention_scores_1_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> attention_scores_1_transpose_y_0 = const()[name = tensor<string, []>("attention_scores_1_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<int32, [4]> transpose_36_perm_0 = const()[name = tensor<string, []>("transpose_36_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> transpose_37_perm_0 = const()[name = tensor<string, []>("transpose_37_perm_0"), val = tensor<int32, [4]>([0, 2, 3, 1])]; - tensor<fp32, [1, 12, 32, 128]> transpose_105 = transpose(perm = transpose_37_perm_0, x = x_3)[name = tensor<string, []>("transpose_105")]; - tensor<fp32, [1, 12, 128, 32]> transpose_106 = transpose(perm = transpose_36_perm_0, x = x_11)[name = tensor<string, []>("transpose_106")]; - tensor<fp32, [1, 12, 128, 128]> attention_scores_1 = matmul(transpose_x = attention_scores_1_transpose_x_0, transpose_y = attention_scores_1_transpose_y_0, x = transpose_106, y = transpose_105)[name = tensor<string, []>("attention_scores_1")]; - tensor<fp32, []> _inversed_attention_scores_3_y_0 = const()[name = tensor<string, []>("_inversed_attention_scores_3_y_0"), val = tensor<fp32, []>(0x1.6a09e6p-3)]; - tensor<fp32, [1, 12, 128, 128]> _inversed_attention_scores_3 = mul(x = attention_scores_1, y = _inversed_attention_scores_3_y_0)[name = tensor<string, []>("_inversed_attention_scores_3")]; - tensor<fp32, [1, 12, 128, 128]> input_11 = add(x = _inversed_attention_scores_3, y = attention_mask_1)[name = tensor<string, []>("input_11")]; - tensor<fp32, [1, 12, 128, 128]> input_13 = softmax(axis = var_8, x = input_11)[name = tensor<string, []>("input_13")]; - tensor<bool, []> context_layer_1_transpose_x_0 = const()[name = tensor<string, []>("context_layer_1_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> context_layer_1_transpose_y_0 = const()[name = tensor<string, []>("context_layer_1_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<fp32, [1, 12, 128, 32]> transpose_107 = transpose(perm = var_117, x = x_7)[name = tensor<string, []>("transpose_107")]; - tensor<fp32, [1, 12, 128, 32]> context_layer_1 = matmul(transpose_x = context_layer_1_transpose_x_0, transpose_y = context_layer_1_transpose_y_0, x = input_13, y = transpose_107)[name = tensor<string, []>("context_layer_1")]; - tensor<int32, [4]> var_133 = const()[name = tensor<string, []>("op_133"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [3]> var_138 = const()[name = tensor<string, []>("op_138"), val = tensor<int32, [3]>([1, 128, 384])]; - tensor<fp32, [1, 128, 12, 32]> transpose_104 = transpose(perm = var_133, x = context_layer_1)[name = tensor<string, []>("transpose_104")]; - tensor<fp32, [1, 128, 384]> input_15 = reshape(shape = var_138, x = transpose_104)[name = tensor<string, []>("input_15")]; - tensor<fp32, [1, 128, 384]> linear_3 = linear(bias = model_encoder_layer_0_attention_output_dense_bias, weight = model_encoder_layer_0_attention_output_dense_weight, x = input_15)[name = tensor<string, []>("linear_3")]; - tensor<fp32, [1, 128, 384]> input_19 = add(x = linear_3, y = input_7)[name = tensor<string, []>("input_19")]; - tensor<int32, [1]> input_21_axes_0 = const()[name = tensor<string, []>("input_21_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_21 = layer_norm(axes = input_21_axes_0, beta = model_encoder_layer_0_attention_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_0_attention_output_LayerNorm_weight, x = input_19)[name = tensor<string, []>("input_21")]; - tensor<fp32, [1, 128, 1536]> linear_4 = linear(bias = model_encoder_layer_0_intermediate_dense_bias, weight = model_encoder_layer_0_intermediate_dense_weight, x = input_21)[name = tensor<string, []>("linear_4")]; - tensor<string, []> input_25_mode_0 = const()[name = tensor<string, []>("input_25_mode_0"), val = tensor<string, []>("EXACT")]; - tensor<fp32, [1, 128, 1536]> input_25 = gelu(mode = input_25_mode_0, x = linear_4)[name = tensor<string, []>("input_25")]; - tensor<fp32, [1, 128, 384]> linear_5 = linear(bias = model_encoder_layer_0_output_dense_bias, weight = model_encoder_layer_0_output_dense_weight, x = input_25)[name = tensor<string, []>("linear_5")]; - tensor<fp32, [1, 128, 384]> input_29 = add(x = linear_5, y = input_21)[name = tensor<string, []>("input_29")]; - tensor<int32, [1]> input_31_axes_0 = const()[name = tensor<string, []>("input_31_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_31 = layer_norm(axes = input_31_axes_0, beta = model_encoder_layer_0_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_0_output_LayerNorm_weight, x = input_29)[name = tensor<string, []>("input_31")]; - tensor<fp32, [1, 128, 384]> linear_6 = linear(bias = model_encoder_layer_1_attention_self_query_bias, weight = model_encoder_layer_1_attention_self_query_weight, x = input_31)[name = tensor<string, []>("linear_6")]; - tensor<fp32, [1, 128, 384]> linear_7 = linear(bias = model_encoder_layer_1_attention_self_key_bias, weight = model_encoder_layer_1_attention_self_key_weight, x = input_31)[name = tensor<string, []>("linear_7")]; - tensor<int32, [4]> var_183 = const()[name = tensor<string, []>("op_183"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_15 = reshape(shape = var_183, x = linear_7)[name = tensor<string, []>("x_15")]; - tensor<fp32, [1, 128, 384]> linear_8 = linear(bias = model_encoder_layer_1_attention_self_value_bias, weight = model_encoder_layer_1_attention_self_value_weight, x = input_31)[name = tensor<string, []>("linear_8")]; - tensor<int32, [4]> var_192 = const()[name = tensor<string, []>("op_192"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_19 = reshape(shape = var_192, x = linear_8)[name = tensor<string, []>("x_19")]; - tensor<int32, [4]> var_194 = const()[name = tensor<string, []>("op_194"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> var_198 = const()[name = tensor<string, []>("op_198"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_23 = reshape(shape = var_198, x = linear_6)[name = tensor<string, []>("x_23")]; - tensor<bool, []> attention_scores_5_transpose_x_0 = const()[name = tensor<string, []>("attention_scores_5_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> attention_scores_5_transpose_y_0 = const()[name = tensor<string, []>("attention_scores_5_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<int32, [4]> transpose_38_perm_0 = const()[name = tensor<string, []>("transpose_38_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> transpose_39_perm_0 = const()[name = tensor<string, []>("transpose_39_perm_0"), val = tensor<int32, [4]>([0, 2, 3, 1])]; - tensor<fp32, [1, 12, 32, 128]> transpose_101 = transpose(perm = transpose_39_perm_0, x = x_15)[name = tensor<string, []>("transpose_101")]; - tensor<fp32, [1, 12, 128, 32]> transpose_102 = transpose(perm = transpose_38_perm_0, x = x_23)[name = tensor<string, []>("transpose_102")]; - tensor<fp32, [1, 12, 128, 128]> attention_scores_5 = matmul(transpose_x = attention_scores_5_transpose_x_0, transpose_y = attention_scores_5_transpose_y_0, x = transpose_102, y = transpose_101)[name = tensor<string, []>("attention_scores_5")]; - tensor<fp32, []> _inversed_attention_scores_7_y_0 = const()[name = tensor<string, []>("_inversed_attention_scores_7_y_0"), val = tensor<fp32, []>(0x1.6a09e6p-3)]; - tensor<fp32, [1, 12, 128, 128]> _inversed_attention_scores_7 = mul(x = attention_scores_5, y = _inversed_attention_scores_7_y_0)[name = tensor<string, []>("_inversed_attention_scores_7")]; - tensor<fp32, [1, 12, 128, 128]> input_33 = add(x = _inversed_attention_scores_7, y = attention_mask_1)[name = tensor<string, []>("input_33")]; - tensor<fp32, [1, 12, 128, 128]> input_35 = softmax(axis = var_8, x = input_33)[name = tensor<string, []>("input_35")]; - tensor<bool, []> context_layer_5_transpose_x_0 = const()[name = tensor<string, []>("context_layer_5_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> context_layer_5_transpose_y_0 = const()[name = tensor<string, []>("context_layer_5_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<fp32, [1, 12, 128, 32]> transpose_103 = transpose(perm = var_194, x = x_19)[name = tensor<string, []>("transpose_103")]; - tensor<fp32, [1, 12, 128, 32]> context_layer_5 = matmul(transpose_x = context_layer_5_transpose_x_0, transpose_y = context_layer_5_transpose_y_0, x = input_35, y = transpose_103)[name = tensor<string, []>("context_layer_5")]; - tensor<int32, [4]> var_210 = const()[name = tensor<string, []>("op_210"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [3]> var_215 = const()[name = tensor<string, []>("op_215"), val = tensor<int32, [3]>([1, 128, 384])]; - tensor<fp32, [1, 128, 12, 32]> transpose_100 = transpose(perm = var_210, x = context_layer_5)[name = tensor<string, []>("transpose_100")]; - tensor<fp32, [1, 128, 384]> input_37 = reshape(shape = var_215, x = transpose_100)[name = tensor<string, []>("input_37")]; - tensor<fp32, [1, 128, 384]> linear_9 = linear(bias = model_encoder_layer_1_attention_output_dense_bias, weight = model_encoder_layer_1_attention_output_dense_weight, x = input_37)[name = tensor<string, []>("linear_9")]; - tensor<fp32, [1, 128, 384]> input_41 = add(x = linear_9, y = input_31)[name = tensor<string, []>("input_41")]; - tensor<int32, [1]> input_43_axes_0 = const()[name = tensor<string, []>("input_43_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_43 = layer_norm(axes = input_43_axes_0, beta = model_encoder_layer_1_attention_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_1_attention_output_LayerNorm_weight, x = input_41)[name = tensor<string, []>("input_43")]; - tensor<fp32, [1, 128, 1536]> linear_10 = linear(bias = model_encoder_layer_1_intermediate_dense_bias, weight = model_encoder_layer_1_intermediate_dense_weight, x = input_43)[name = tensor<string, []>("linear_10")]; - tensor<string, []> input_47_mode_0 = const()[name = tensor<string, []>("input_47_mode_0"), val = tensor<string, []>("EXACT")]; - tensor<fp32, [1, 128, 1536]> input_47 = gelu(mode = input_47_mode_0, x = linear_10)[name = tensor<string, []>("input_47")]; - tensor<fp32, [1, 128, 384]> linear_11 = linear(bias = model_encoder_layer_1_output_dense_bias, weight = model_encoder_layer_1_output_dense_weight, x = input_47)[name = tensor<string, []>("linear_11")]; - tensor<fp32, [1, 128, 384]> input_51 = add(x = linear_11, y = input_43)[name = tensor<string, []>("input_51")]; - tensor<int32, [1]> input_53_axes_0 = const()[name = tensor<string, []>("input_53_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_53 = layer_norm(axes = input_53_axes_0, beta = model_encoder_layer_1_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_1_output_LayerNorm_weight, x = input_51)[name = tensor<string, []>("input_53")]; - tensor<fp32, [1, 128, 384]> linear_12 = linear(bias = model_encoder_layer_2_attention_self_query_bias, weight = model_encoder_layer_2_attention_self_query_weight, x = input_53)[name = tensor<string, []>("linear_12")]; - tensor<fp32, [1, 128, 384]> linear_13 = linear(bias = model_encoder_layer_2_attention_self_key_bias, weight = model_encoder_layer_2_attention_self_key_weight, x = input_53)[name = tensor<string, []>("linear_13")]; - tensor<int32, [4]> var_260 = const()[name = tensor<string, []>("op_260"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_27 = reshape(shape = var_260, x = linear_13)[name = tensor<string, []>("x_27")]; - tensor<fp32, [1, 128, 384]> linear_14 = linear(bias = model_encoder_layer_2_attention_self_value_bias, weight = model_encoder_layer_2_attention_self_value_weight, x = input_53)[name = tensor<string, []>("linear_14")]; - tensor<int32, [4]> var_269 = const()[name = tensor<string, []>("op_269"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_31 = reshape(shape = var_269, x = linear_14)[name = tensor<string, []>("x_31")]; - tensor<int32, [4]> var_271 = const()[name = tensor<string, []>("op_271"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> var_275 = const()[name = tensor<string, []>("op_275"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_35 = reshape(shape = var_275, x = linear_12)[name = tensor<string, []>("x_35")]; - tensor<bool, []> attention_scores_9_transpose_x_0 = const()[name = tensor<string, []>("attention_scores_9_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> attention_scores_9_transpose_y_0 = const()[name = tensor<string, []>("attention_scores_9_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<int32, [4]> transpose_40_perm_0 = const()[name = tensor<string, []>("transpose_40_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> transpose_41_perm_0 = const()[name = tensor<string, []>("transpose_41_perm_0"), val = tensor<int32, [4]>([0, 2, 3, 1])]; - tensor<fp32, [1, 12, 32, 128]> transpose_97 = transpose(perm = transpose_41_perm_0, x = x_27)[name = tensor<string, []>("transpose_97")]; - tensor<fp32, [1, 12, 128, 32]> transpose_98 = transpose(perm = transpose_40_perm_0, x = x_35)[name = tensor<string, []>("transpose_98")]; - tensor<fp32, [1, 12, 128, 128]> attention_scores_9 = matmul(transpose_x = attention_scores_9_transpose_x_0, transpose_y = attention_scores_9_transpose_y_0, x = transpose_98, y = transpose_97)[name = tensor<string, []>("attention_scores_9")]; - tensor<fp32, []> _inversed_attention_scores_11_y_0 = const()[name = tensor<string, []>("_inversed_attention_scores_11_y_0"), val = tensor<fp32, []>(0x1.6a09e6p-3)]; - tensor<fp32, [1, 12, 128, 128]> _inversed_attention_scores_11 = mul(x = attention_scores_9, y = _inversed_attention_scores_11_y_0)[name = tensor<string, []>("_inversed_attention_scores_11")]; - tensor<fp32, [1, 12, 128, 128]> input_55 = add(x = _inversed_attention_scores_11, y = attention_mask_1)[name = tensor<string, []>("input_55")]; - tensor<fp32, [1, 12, 128, 128]> input_57 = softmax(axis = var_8, x = input_55)[name = tensor<string, []>("input_57")]; - tensor<bool, []> context_layer_9_transpose_x_0 = const()[name = tensor<string, []>("context_layer_9_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> context_layer_9_transpose_y_0 = const()[name = tensor<string, []>("context_layer_9_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<fp32, [1, 12, 128, 32]> transpose_99 = transpose(perm = var_271, x = x_31)[name = tensor<string, []>("transpose_99")]; - tensor<fp32, [1, 12, 128, 32]> context_layer_9 = matmul(transpose_x = context_layer_9_transpose_x_0, transpose_y = context_layer_9_transpose_y_0, x = input_57, y = transpose_99)[name = tensor<string, []>("context_layer_9")]; - tensor<int32, [4]> var_287 = const()[name = tensor<string, []>("op_287"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [3]> var_292 = const()[name = tensor<string, []>("op_292"), val = tensor<int32, [3]>([1, 128, 384])]; - tensor<fp32, [1, 128, 12, 32]> transpose_96 = transpose(perm = var_287, x = context_layer_9)[name = tensor<string, []>("transpose_96")]; - tensor<fp32, [1, 128, 384]> input_59 = reshape(shape = var_292, x = transpose_96)[name = tensor<string, []>("input_59")]; - tensor<fp32, [1, 128, 384]> linear_15 = linear(bias = model_encoder_layer_2_attention_output_dense_bias, weight = model_encoder_layer_2_attention_output_dense_weight, x = input_59)[name = tensor<string, []>("linear_15")]; - tensor<fp32, [1, 128, 384]> input_63 = add(x = linear_15, y = input_53)[name = tensor<string, []>("input_63")]; - tensor<int32, [1]> input_65_axes_0 = const()[name = tensor<string, []>("input_65_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_65 = layer_norm(axes = input_65_axes_0, beta = model_encoder_layer_2_attention_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_2_attention_output_LayerNorm_weight, x = input_63)[name = tensor<string, []>("input_65")]; - tensor<fp32, [1, 128, 1536]> linear_16 = linear(bias = model_encoder_layer_2_intermediate_dense_bias, weight = model_encoder_layer_2_intermediate_dense_weight, x = input_65)[name = tensor<string, []>("linear_16")]; - tensor<string, []> input_69_mode_0 = const()[name = tensor<string, []>("input_69_mode_0"), val = tensor<string, []>("EXACT")]; - tensor<fp32, [1, 128, 1536]> input_69 = gelu(mode = input_69_mode_0, x = linear_16)[name = tensor<string, []>("input_69")]; - tensor<fp32, [1, 128, 384]> linear_17 = linear(bias = model_encoder_layer_2_output_dense_bias, weight = model_encoder_layer_2_output_dense_weight, x = input_69)[name = tensor<string, []>("linear_17")]; - tensor<fp32, [1, 128, 384]> input_73 = add(x = linear_17, y = input_65)[name = tensor<string, []>("input_73")]; - tensor<int32, [1]> input_75_axes_0 = const()[name = tensor<string, []>("input_75_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_75 = layer_norm(axes = input_75_axes_0, beta = model_encoder_layer_2_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_2_output_LayerNorm_weight, x = input_73)[name = tensor<string, []>("input_75")]; - tensor<fp32, [1, 128, 384]> linear_18 = linear(bias = model_encoder_layer_3_attention_self_query_bias, weight = model_encoder_layer_3_attention_self_query_weight, x = input_75)[name = tensor<string, []>("linear_18")]; - tensor<fp32, [1, 128, 384]> linear_19 = linear(bias = model_encoder_layer_3_attention_self_key_bias, weight = model_encoder_layer_3_attention_self_key_weight, x = input_75)[name = tensor<string, []>("linear_19")]; - tensor<int32, [4]> var_337 = const()[name = tensor<string, []>("op_337"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_39 = reshape(shape = var_337, x = linear_19)[name = tensor<string, []>("x_39")]; - tensor<fp32, [1, 128, 384]> linear_20 = linear(bias = model_encoder_layer_3_attention_self_value_bias, weight = model_encoder_layer_3_attention_self_value_weight, x = input_75)[name = tensor<string, []>("linear_20")]; - tensor<int32, [4]> var_346 = const()[name = tensor<string, []>("op_346"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_43 = reshape(shape = var_346, x = linear_20)[name = tensor<string, []>("x_43")]; - tensor<int32, [4]> var_348 = const()[name = tensor<string, []>("op_348"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> var_352 = const()[name = tensor<string, []>("op_352"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_47 = reshape(shape = var_352, x = linear_18)[name = tensor<string, []>("x_47")]; - tensor<bool, []> attention_scores_13_transpose_x_0 = const()[name = tensor<string, []>("attention_scores_13_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> attention_scores_13_transpose_y_0 = const()[name = tensor<string, []>("attention_scores_13_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<int32, [4]> transpose_42_perm_0 = const()[name = tensor<string, []>("transpose_42_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> transpose_43_perm_0 = const()[name = tensor<string, []>("transpose_43_perm_0"), val = tensor<int32, [4]>([0, 2, 3, 1])]; - tensor<fp32, [1, 12, 32, 128]> transpose_93 = transpose(perm = transpose_43_perm_0, x = x_39)[name = tensor<string, []>("transpose_93")]; - tensor<fp32, [1, 12, 128, 32]> transpose_94 = transpose(perm = transpose_42_perm_0, x = x_47)[name = tensor<string, []>("transpose_94")]; - tensor<fp32, [1, 12, 128, 128]> attention_scores_13 = matmul(transpose_x = attention_scores_13_transpose_x_0, transpose_y = attention_scores_13_transpose_y_0, x = transpose_94, y = transpose_93)[name = tensor<string, []>("attention_scores_13")]; - tensor<fp32, []> _inversed_attention_scores_15_y_0 = const()[name = tensor<string, []>("_inversed_attention_scores_15_y_0"), val = tensor<fp32, []>(0x1.6a09e6p-3)]; - tensor<fp32, [1, 12, 128, 128]> _inversed_attention_scores_15 = mul(x = attention_scores_13, y = _inversed_attention_scores_15_y_0)[name = tensor<string, []>("_inversed_attention_scores_15")]; - tensor<fp32, [1, 12, 128, 128]> input_77 = add(x = _inversed_attention_scores_15, y = attention_mask_1)[name = tensor<string, []>("input_77")]; - tensor<fp32, [1, 12, 128, 128]> input_79 = softmax(axis = var_8, x = input_77)[name = tensor<string, []>("input_79")]; - tensor<bool, []> context_layer_13_transpose_x_0 = const()[name = tensor<string, []>("context_layer_13_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> context_layer_13_transpose_y_0 = const()[name = tensor<string, []>("context_layer_13_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<fp32, [1, 12, 128, 32]> transpose_95 = transpose(perm = var_348, x = x_43)[name = tensor<string, []>("transpose_95")]; - tensor<fp32, [1, 12, 128, 32]> context_layer_13 = matmul(transpose_x = context_layer_13_transpose_x_0, transpose_y = context_layer_13_transpose_y_0, x = input_79, y = transpose_95)[name = tensor<string, []>("context_layer_13")]; - tensor<int32, [4]> var_364 = const()[name = tensor<string, []>("op_364"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [3]> var_369 = const()[name = tensor<string, []>("op_369"), val = tensor<int32, [3]>([1, 128, 384])]; - tensor<fp32, [1, 128, 12, 32]> transpose_92 = transpose(perm = var_364, x = context_layer_13)[name = tensor<string, []>("transpose_92")]; - tensor<fp32, [1, 128, 384]> input_81 = reshape(shape = var_369, x = transpose_92)[name = tensor<string, []>("input_81")]; - tensor<fp32, [1, 128, 384]> linear_21 = linear(bias = model_encoder_layer_3_attention_output_dense_bias, weight = model_encoder_layer_3_attention_output_dense_weight, x = input_81)[name = tensor<string, []>("linear_21")]; - tensor<fp32, [1, 128, 384]> input_85 = add(x = linear_21, y = input_75)[name = tensor<string, []>("input_85")]; - tensor<int32, [1]> input_87_axes_0 = const()[name = tensor<string, []>("input_87_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_87 = layer_norm(axes = input_87_axes_0, beta = model_encoder_layer_3_attention_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_3_attention_output_LayerNorm_weight, x = input_85)[name = tensor<string, []>("input_87")]; - tensor<fp32, [1, 128, 1536]> linear_22 = linear(bias = model_encoder_layer_3_intermediate_dense_bias, weight = model_encoder_layer_3_intermediate_dense_weight, x = input_87)[name = tensor<string, []>("linear_22")]; - tensor<string, []> input_91_mode_0 = const()[name = tensor<string, []>("input_91_mode_0"), val = tensor<string, []>("EXACT")]; - tensor<fp32, [1, 128, 1536]> input_91 = gelu(mode = input_91_mode_0, x = linear_22)[name = tensor<string, []>("input_91")]; - tensor<fp32, [1, 128, 384]> linear_23 = linear(bias = model_encoder_layer_3_output_dense_bias, weight = model_encoder_layer_3_output_dense_weight, x = input_91)[name = tensor<string, []>("linear_23")]; - tensor<fp32, [1, 128, 384]> input_95 = add(x = linear_23, y = input_87)[name = tensor<string, []>("input_95")]; - tensor<int32, [1]> input_97_axes_0 = const()[name = tensor<string, []>("input_97_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_97 = layer_norm(axes = input_97_axes_0, beta = model_encoder_layer_3_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_3_output_LayerNorm_weight, x = input_95)[name = tensor<string, []>("input_97")]; - tensor<fp32, [1, 128, 384]> linear_24 = linear(bias = model_encoder_layer_4_attention_self_query_bias, weight = model_encoder_layer_4_attention_self_query_weight, x = input_97)[name = tensor<string, []>("linear_24")]; - tensor<fp32, [1, 128, 384]> linear_25 = linear(bias = model_encoder_layer_4_attention_self_key_bias, weight = model_encoder_layer_4_attention_self_key_weight, x = input_97)[name = tensor<string, []>("linear_25")]; - tensor<int32, [4]> var_414 = const()[name = tensor<string, []>("op_414"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_51 = reshape(shape = var_414, x = linear_25)[name = tensor<string, []>("x_51")]; - tensor<fp32, [1, 128, 384]> linear_26 = linear(bias = model_encoder_layer_4_attention_self_value_bias, weight = model_encoder_layer_4_attention_self_value_weight, x = input_97)[name = tensor<string, []>("linear_26")]; - tensor<int32, [4]> var_423 = const()[name = tensor<string, []>("op_423"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_55 = reshape(shape = var_423, x = linear_26)[name = tensor<string, []>("x_55")]; - tensor<int32, [4]> var_425 = const()[name = tensor<string, []>("op_425"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> var_429 = const()[name = tensor<string, []>("op_429"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_59 = reshape(shape = var_429, x = linear_24)[name = tensor<string, []>("x_59")]; - tensor<bool, []> attention_scores_17_transpose_x_0 = const()[name = tensor<string, []>("attention_scores_17_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> attention_scores_17_transpose_y_0 = const()[name = tensor<string, []>("attention_scores_17_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<int32, [4]> transpose_44_perm_0 = const()[name = tensor<string, []>("transpose_44_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> transpose_45_perm_0 = const()[name = tensor<string, []>("transpose_45_perm_0"), val = tensor<int32, [4]>([0, 2, 3, 1])]; - tensor<fp32, [1, 12, 32, 128]> transpose_89 = transpose(perm = transpose_45_perm_0, x = x_51)[name = tensor<string, []>("transpose_89")]; - tensor<fp32, [1, 12, 128, 32]> transpose_90 = transpose(perm = transpose_44_perm_0, x = x_59)[name = tensor<string, []>("transpose_90")]; - tensor<fp32, [1, 12, 128, 128]> attention_scores_17 = matmul(transpose_x = attention_scores_17_transpose_x_0, transpose_y = attention_scores_17_transpose_y_0, x = transpose_90, y = transpose_89)[name = tensor<string, []>("attention_scores_17")]; - tensor<fp32, []> _inversed_attention_scores_19_y_0 = const()[name = tensor<string, []>("_inversed_attention_scores_19_y_0"), val = tensor<fp32, []>(0x1.6a09e6p-3)]; - tensor<fp32, [1, 12, 128, 128]> _inversed_attention_scores_19 = mul(x = attention_scores_17, y = _inversed_attention_scores_19_y_0)[name = tensor<string, []>("_inversed_attention_scores_19")]; - tensor<fp32, [1, 12, 128, 128]> input_99 = add(x = _inversed_attention_scores_19, y = attention_mask_1)[name = tensor<string, []>("input_99")]; - tensor<fp32, [1, 12, 128, 128]> input_101 = softmax(axis = var_8, x = input_99)[name = tensor<string, []>("input_101")]; - tensor<bool, []> context_layer_17_transpose_x_0 = const()[name = tensor<string, []>("context_layer_17_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> context_layer_17_transpose_y_0 = const()[name = tensor<string, []>("context_layer_17_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<fp32, [1, 12, 128, 32]> transpose_91 = transpose(perm = var_425, x = x_55)[name = tensor<string, []>("transpose_91")]; - tensor<fp32, [1, 12, 128, 32]> context_layer_17 = matmul(transpose_x = context_layer_17_transpose_x_0, transpose_y = context_layer_17_transpose_y_0, x = input_101, y = transpose_91)[name = tensor<string, []>("context_layer_17")]; - tensor<int32, [4]> var_441 = const()[name = tensor<string, []>("op_441"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [3]> var_446 = const()[name = tensor<string, []>("op_446"), val = tensor<int32, [3]>([1, 128, 384])]; - tensor<fp32, [1, 128, 12, 32]> transpose_88 = transpose(perm = var_441, x = context_layer_17)[name = tensor<string, []>("transpose_88")]; - tensor<fp32, [1, 128, 384]> input_103 = reshape(shape = var_446, x = transpose_88)[name = tensor<string, []>("input_103")]; - tensor<fp32, [1, 128, 384]> linear_27 = linear(bias = model_encoder_layer_4_attention_output_dense_bias, weight = model_encoder_layer_4_attention_output_dense_weight, x = input_103)[name = tensor<string, []>("linear_27")]; - tensor<fp32, [1, 128, 384]> input_107 = add(x = linear_27, y = input_97)[name = tensor<string, []>("input_107")]; - tensor<int32, [1]> input_109_axes_0 = const()[name = tensor<string, []>("input_109_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_109 = layer_norm(axes = input_109_axes_0, beta = model_encoder_layer_4_attention_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_4_attention_output_LayerNorm_weight, x = input_107)[name = tensor<string, []>("input_109")]; - tensor<fp32, [1, 128, 1536]> linear_28 = linear(bias = model_encoder_layer_4_intermediate_dense_bias, weight = model_encoder_layer_4_intermediate_dense_weight, x = input_109)[name = tensor<string, []>("linear_28")]; - tensor<string, []> input_113_mode_0 = const()[name = tensor<string, []>("input_113_mode_0"), val = tensor<string, []>("EXACT")]; - tensor<fp32, [1, 128, 1536]> input_113 = gelu(mode = input_113_mode_0, x = linear_28)[name = tensor<string, []>("input_113")]; - tensor<fp32, [1, 128, 384]> linear_29 = linear(bias = model_encoder_layer_4_output_dense_bias, weight = model_encoder_layer_4_output_dense_weight, x = input_113)[name = tensor<string, []>("linear_29")]; - tensor<fp32, [1, 128, 384]> input_117 = add(x = linear_29, y = input_109)[name = tensor<string, []>("input_117")]; - tensor<int32, [1]> input_119_axes_0 = const()[name = tensor<string, []>("input_119_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_119 = layer_norm(axes = input_119_axes_0, beta = model_encoder_layer_4_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_4_output_LayerNorm_weight, x = input_117)[name = tensor<string, []>("input_119")]; - tensor<fp32, [1, 128, 384]> linear_30 = linear(bias = model_encoder_layer_5_attention_self_query_bias, weight = model_encoder_layer_5_attention_self_query_weight, x = input_119)[name = tensor<string, []>("linear_30")]; - tensor<fp32, [1, 128, 384]> linear_31 = linear(bias = model_encoder_layer_5_attention_self_key_bias, weight = model_encoder_layer_5_attention_self_key_weight, x = input_119)[name = tensor<string, []>("linear_31")]; - tensor<int32, [4]> var_491 = const()[name = tensor<string, []>("op_491"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_63 = reshape(shape = var_491, x = linear_31)[name = tensor<string, []>("x_63")]; - tensor<fp32, [1, 128, 384]> linear_32 = linear(bias = model_encoder_layer_5_attention_self_value_bias, weight = model_encoder_layer_5_attention_self_value_weight, x = input_119)[name = tensor<string, []>("linear_32")]; - tensor<int32, [4]> var_500 = const()[name = tensor<string, []>("op_500"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_67 = reshape(shape = var_500, x = linear_32)[name = tensor<string, []>("x_67")]; - tensor<int32, [4]> var_502 = const()[name = tensor<string, []>("op_502"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> var_506 = const()[name = tensor<string, []>("op_506"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_71 = reshape(shape = var_506, x = linear_30)[name = tensor<string, []>("x_71")]; - tensor<bool, []> attention_scores_21_transpose_x_0 = const()[name = tensor<string, []>("attention_scores_21_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> attention_scores_21_transpose_y_0 = const()[name = tensor<string, []>("attention_scores_21_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<int32, [4]> transpose_46_perm_0 = const()[name = tensor<string, []>("transpose_46_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> transpose_47_perm_0 = const()[name = tensor<string, []>("transpose_47_perm_0"), val = tensor<int32, [4]>([0, 2, 3, 1])]; - tensor<fp32, [1, 12, 32, 128]> transpose_85 = transpose(perm = transpose_47_perm_0, x = x_63)[name = tensor<string, []>("transpose_85")]; - tensor<fp32, [1, 12, 128, 32]> transpose_86 = transpose(perm = transpose_46_perm_0, x = x_71)[name = tensor<string, []>("transpose_86")]; - tensor<fp32, [1, 12, 128, 128]> attention_scores_21 = matmul(transpose_x = attention_scores_21_transpose_x_0, transpose_y = attention_scores_21_transpose_y_0, x = transpose_86, y = transpose_85)[name = tensor<string, []>("attention_scores_21")]; - tensor<fp32, []> _inversed_attention_scores_23_y_0 = const()[name = tensor<string, []>("_inversed_attention_scores_23_y_0"), val = tensor<fp32, []>(0x1.6a09e6p-3)]; - tensor<fp32, [1, 12, 128, 128]> _inversed_attention_scores_23 = mul(x = attention_scores_21, y = _inversed_attention_scores_23_y_0)[name = tensor<string, []>("_inversed_attention_scores_23")]; - tensor<fp32, [1, 12, 128, 128]> input_121 = add(x = _inversed_attention_scores_23, y = attention_mask_1)[name = tensor<string, []>("input_121")]; - tensor<fp32, [1, 12, 128, 128]> input_123 = softmax(axis = var_8, x = input_121)[name = tensor<string, []>("input_123")]; - tensor<bool, []> context_layer_21_transpose_x_0 = const()[name = tensor<string, []>("context_layer_21_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> context_layer_21_transpose_y_0 = const()[name = tensor<string, []>("context_layer_21_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<fp32, [1, 12, 128, 32]> transpose_87 = transpose(perm = var_502, x = x_67)[name = tensor<string, []>("transpose_87")]; - tensor<fp32, [1, 12, 128, 32]> context_layer_21 = matmul(transpose_x = context_layer_21_transpose_x_0, transpose_y = context_layer_21_transpose_y_0, x = input_123, y = transpose_87)[name = tensor<string, []>("context_layer_21")]; - tensor<int32, [4]> var_518 = const()[name = tensor<string, []>("op_518"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [3]> var_523 = const()[name = tensor<string, []>("op_523"), val = tensor<int32, [3]>([1, 128, 384])]; - tensor<fp32, [1, 128, 12, 32]> transpose_84 = transpose(perm = var_518, x = context_layer_21)[name = tensor<string, []>("transpose_84")]; - tensor<fp32, [1, 128, 384]> input_125 = reshape(shape = var_523, x = transpose_84)[name = tensor<string, []>("input_125")]; - tensor<fp32, [1, 128, 384]> linear_33 = linear(bias = model_encoder_layer_5_attention_output_dense_bias, weight = model_encoder_layer_5_attention_output_dense_weight, x = input_125)[name = tensor<string, []>("linear_33")]; - tensor<fp32, [1, 128, 384]> input_129 = add(x = linear_33, y = input_119)[name = tensor<string, []>("input_129")]; - tensor<int32, [1]> input_131_axes_0 = const()[name = tensor<string, []>("input_131_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_131 = layer_norm(axes = input_131_axes_0, beta = model_encoder_layer_5_attention_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_5_attention_output_LayerNorm_weight, x = input_129)[name = tensor<string, []>("input_131")]; - tensor<fp32, [1, 128, 1536]> linear_34 = linear(bias = model_encoder_layer_5_intermediate_dense_bias, weight = model_encoder_layer_5_intermediate_dense_weight, x = input_131)[name = tensor<string, []>("linear_34")]; - tensor<string, []> input_135_mode_0 = const()[name = tensor<string, []>("input_135_mode_0"), val = tensor<string, []>("EXACT")]; - tensor<fp32, [1, 128, 1536]> input_135 = gelu(mode = input_135_mode_0, x = linear_34)[name = tensor<string, []>("input_135")]; - tensor<fp32, [1, 128, 384]> linear_35 = linear(bias = model_encoder_layer_5_output_dense_bias, weight = model_encoder_layer_5_output_dense_weight, x = input_135)[name = tensor<string, []>("linear_35")]; - tensor<fp32, [1, 128, 384]> input_139 = add(x = linear_35, y = input_131)[name = tensor<string, []>("input_139")]; - tensor<int32, [1]> input_141_axes_0 = const()[name = tensor<string, []>("input_141_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_141 = layer_norm(axes = input_141_axes_0, beta = model_encoder_layer_5_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_5_output_LayerNorm_weight, x = input_139)[name = tensor<string, []>("input_141")]; - tensor<fp32, [1, 128, 384]> linear_36 = linear(bias = model_encoder_layer_6_attention_self_query_bias, weight = model_encoder_layer_6_attention_self_query_weight, x = input_141)[name = tensor<string, []>("linear_36")]; - tensor<fp32, [1, 128, 384]> linear_37 = linear(bias = model_encoder_layer_6_attention_self_key_bias, weight = model_encoder_layer_6_attention_self_key_weight, x = input_141)[name = tensor<string, []>("linear_37")]; - tensor<int32, [4]> var_568 = const()[name = tensor<string, []>("op_568"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_75 = reshape(shape = var_568, x = linear_37)[name = tensor<string, []>("x_75")]; - tensor<fp32, [1, 128, 384]> linear_38 = linear(bias = model_encoder_layer_6_attention_self_value_bias, weight = model_encoder_layer_6_attention_self_value_weight, x = input_141)[name = tensor<string, []>("linear_38")]; - tensor<int32, [4]> var_577 = const()[name = tensor<string, []>("op_577"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_79 = reshape(shape = var_577, x = linear_38)[name = tensor<string, []>("x_79")]; - tensor<int32, [4]> var_579 = const()[name = tensor<string, []>("op_579"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> var_583 = const()[name = tensor<string, []>("op_583"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_83 = reshape(shape = var_583, x = linear_36)[name = tensor<string, []>("x_83")]; - tensor<bool, []> attention_scores_25_transpose_x_0 = const()[name = tensor<string, []>("attention_scores_25_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> attention_scores_25_transpose_y_0 = const()[name = tensor<string, []>("attention_scores_25_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<int32, [4]> transpose_48_perm_0 = const()[name = tensor<string, []>("transpose_48_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> transpose_49_perm_0 = const()[name = tensor<string, []>("transpose_49_perm_0"), val = tensor<int32, [4]>([0, 2, 3, 1])]; - tensor<fp32, [1, 12, 32, 128]> transpose_81 = transpose(perm = transpose_49_perm_0, x = x_75)[name = tensor<string, []>("transpose_81")]; - tensor<fp32, [1, 12, 128, 32]> transpose_82 = transpose(perm = transpose_48_perm_0, x = x_83)[name = tensor<string, []>("transpose_82")]; - tensor<fp32, [1, 12, 128, 128]> attention_scores_25 = matmul(transpose_x = attention_scores_25_transpose_x_0, transpose_y = attention_scores_25_transpose_y_0, x = transpose_82, y = transpose_81)[name = tensor<string, []>("attention_scores_25")]; - tensor<fp32, []> _inversed_attention_scores_27_y_0 = const()[name = tensor<string, []>("_inversed_attention_scores_27_y_0"), val = tensor<fp32, []>(0x1.6a09e6p-3)]; - tensor<fp32, [1, 12, 128, 128]> _inversed_attention_scores_27 = mul(x = attention_scores_25, y = _inversed_attention_scores_27_y_0)[name = tensor<string, []>("_inversed_attention_scores_27")]; - tensor<fp32, [1, 12, 128, 128]> input_143 = add(x = _inversed_attention_scores_27, y = attention_mask_1)[name = tensor<string, []>("input_143")]; - tensor<fp32, [1, 12, 128, 128]> input_145 = softmax(axis = var_8, x = input_143)[name = tensor<string, []>("input_145")]; - tensor<bool, []> context_layer_25_transpose_x_0 = const()[name = tensor<string, []>("context_layer_25_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> context_layer_25_transpose_y_0 = const()[name = tensor<string, []>("context_layer_25_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<fp32, [1, 12, 128, 32]> transpose_83 = transpose(perm = var_579, x = x_79)[name = tensor<string, []>("transpose_83")]; - tensor<fp32, [1, 12, 128, 32]> context_layer_25 = matmul(transpose_x = context_layer_25_transpose_x_0, transpose_y = context_layer_25_transpose_y_0, x = input_145, y = transpose_83)[name = tensor<string, []>("context_layer_25")]; - tensor<int32, [4]> var_595 = const()[name = tensor<string, []>("op_595"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [3]> var_600 = const()[name = tensor<string, []>("op_600"), val = tensor<int32, [3]>([1, 128, 384])]; - tensor<fp32, [1, 128, 12, 32]> transpose_80 = transpose(perm = var_595, x = context_layer_25)[name = tensor<string, []>("transpose_80")]; - tensor<fp32, [1, 128, 384]> input_147 = reshape(shape = var_600, x = transpose_80)[name = tensor<string, []>("input_147")]; - tensor<fp32, [1, 128, 384]> linear_39 = linear(bias = model_encoder_layer_6_attention_output_dense_bias, weight = model_encoder_layer_6_attention_output_dense_weight, x = input_147)[name = tensor<string, []>("linear_39")]; - tensor<fp32, [1, 128, 384]> input_151 = add(x = linear_39, y = input_141)[name = tensor<string, []>("input_151")]; - tensor<int32, [1]> input_153_axes_0 = const()[name = tensor<string, []>("input_153_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_153 = layer_norm(axes = input_153_axes_0, beta = model_encoder_layer_6_attention_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_6_attention_output_LayerNorm_weight, x = input_151)[name = tensor<string, []>("input_153")]; - tensor<fp32, [1, 128, 1536]> linear_40 = linear(bias = model_encoder_layer_6_intermediate_dense_bias, weight = model_encoder_layer_6_intermediate_dense_weight, x = input_153)[name = tensor<string, []>("linear_40")]; - tensor<string, []> input_157_mode_0 = const()[name = tensor<string, []>("input_157_mode_0"), val = tensor<string, []>("EXACT")]; - tensor<fp32, [1, 128, 1536]> input_157 = gelu(mode = input_157_mode_0, x = linear_40)[name = tensor<string, []>("input_157")]; - tensor<fp32, [1, 128, 384]> linear_41 = linear(bias = model_encoder_layer_6_output_dense_bias, weight = model_encoder_layer_6_output_dense_weight, x = input_157)[name = tensor<string, []>("linear_41")]; - tensor<fp32, [1, 128, 384]> input_161 = add(x = linear_41, y = input_153)[name = tensor<string, []>("input_161")]; - tensor<int32, [1]> input_163_axes_0 = const()[name = tensor<string, []>("input_163_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_163 = layer_norm(axes = input_163_axes_0, beta = model_encoder_layer_6_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_6_output_LayerNorm_weight, x = input_161)[name = tensor<string, []>("input_163")]; - tensor<fp32, [1, 128, 384]> linear_42 = linear(bias = model_encoder_layer_7_attention_self_query_bias, weight = model_encoder_layer_7_attention_self_query_weight, x = input_163)[name = tensor<string, []>("linear_42")]; - tensor<fp32, [1, 128, 384]> linear_43 = linear(bias = model_encoder_layer_7_attention_self_key_bias, weight = model_encoder_layer_7_attention_self_key_weight, x = input_163)[name = tensor<string, []>("linear_43")]; - tensor<int32, [4]> var_645 = const()[name = tensor<string, []>("op_645"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_87 = reshape(shape = var_645, x = linear_43)[name = tensor<string, []>("x_87")]; - tensor<fp32, [1, 128, 384]> linear_44 = linear(bias = model_encoder_layer_7_attention_self_value_bias, weight = model_encoder_layer_7_attention_self_value_weight, x = input_163)[name = tensor<string, []>("linear_44")]; - tensor<int32, [4]> var_654 = const()[name = tensor<string, []>("op_654"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_91 = reshape(shape = var_654, x = linear_44)[name = tensor<string, []>("x_91")]; - tensor<int32, [4]> var_656 = const()[name = tensor<string, []>("op_656"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> var_660 = const()[name = tensor<string, []>("op_660"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_95 = reshape(shape = var_660, x = linear_42)[name = tensor<string, []>("x_95")]; - tensor<bool, []> attention_scores_29_transpose_x_0 = const()[name = tensor<string, []>("attention_scores_29_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> attention_scores_29_transpose_y_0 = const()[name = tensor<string, []>("attention_scores_29_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<int32, [4]> transpose_50_perm_0 = const()[name = tensor<string, []>("transpose_50_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> transpose_51_perm_0 = const()[name = tensor<string, []>("transpose_51_perm_0"), val = tensor<int32, [4]>([0, 2, 3, 1])]; - tensor<fp32, [1, 12, 32, 128]> transpose_77 = transpose(perm = transpose_51_perm_0, x = x_87)[name = tensor<string, []>("transpose_77")]; - tensor<fp32, [1, 12, 128, 32]> transpose_78 = transpose(perm = transpose_50_perm_0, x = x_95)[name = tensor<string, []>("transpose_78")]; - tensor<fp32, [1, 12, 128, 128]> attention_scores_29 = matmul(transpose_x = attention_scores_29_transpose_x_0, transpose_y = attention_scores_29_transpose_y_0, x = transpose_78, y = transpose_77)[name = tensor<string, []>("attention_scores_29")]; - tensor<fp32, []> _inversed_attention_scores_31_y_0 = const()[name = tensor<string, []>("_inversed_attention_scores_31_y_0"), val = tensor<fp32, []>(0x1.6a09e6p-3)]; - tensor<fp32, [1, 12, 128, 128]> _inversed_attention_scores_31 = mul(x = attention_scores_29, y = _inversed_attention_scores_31_y_0)[name = tensor<string, []>("_inversed_attention_scores_31")]; - tensor<fp32, [1, 12, 128, 128]> input_165 = add(x = _inversed_attention_scores_31, y = attention_mask_1)[name = tensor<string, []>("input_165")]; - tensor<fp32, [1, 12, 128, 128]> input_167 = softmax(axis = var_8, x = input_165)[name = tensor<string, []>("input_167")]; - tensor<bool, []> context_layer_29_transpose_x_0 = const()[name = tensor<string, []>("context_layer_29_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> context_layer_29_transpose_y_0 = const()[name = tensor<string, []>("context_layer_29_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<fp32, [1, 12, 128, 32]> transpose_79 = transpose(perm = var_656, x = x_91)[name = tensor<string, []>("transpose_79")]; - tensor<fp32, [1, 12, 128, 32]> context_layer_29 = matmul(transpose_x = context_layer_29_transpose_x_0, transpose_y = context_layer_29_transpose_y_0, x = input_167, y = transpose_79)[name = tensor<string, []>("context_layer_29")]; - tensor<int32, [4]> var_672 = const()[name = tensor<string, []>("op_672"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [3]> var_677 = const()[name = tensor<string, []>("op_677"), val = tensor<int32, [3]>([1, 128, 384])]; - tensor<fp32, [1, 128, 12, 32]> transpose_76 = transpose(perm = var_672, x = context_layer_29)[name = tensor<string, []>("transpose_76")]; - tensor<fp32, [1, 128, 384]> input_169 = reshape(shape = var_677, x = transpose_76)[name = tensor<string, []>("input_169")]; - tensor<fp32, [1, 128, 384]> linear_45 = linear(bias = model_encoder_layer_7_attention_output_dense_bias, weight = model_encoder_layer_7_attention_output_dense_weight, x = input_169)[name = tensor<string, []>("linear_45")]; - tensor<fp32, [1, 128, 384]> input_173 = add(x = linear_45, y = input_163)[name = tensor<string, []>("input_173")]; - tensor<int32, [1]> input_175_axes_0 = const()[name = tensor<string, []>("input_175_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_175 = layer_norm(axes = input_175_axes_0, beta = model_encoder_layer_7_attention_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_7_attention_output_LayerNorm_weight, x = input_173)[name = tensor<string, []>("input_175")]; - tensor<fp32, [1, 128, 1536]> linear_46 = linear(bias = model_encoder_layer_7_intermediate_dense_bias, weight = model_encoder_layer_7_intermediate_dense_weight, x = input_175)[name = tensor<string, []>("linear_46")]; - tensor<string, []> input_179_mode_0 = const()[name = tensor<string, []>("input_179_mode_0"), val = tensor<string, []>("EXACT")]; - tensor<fp32, [1, 128, 1536]> input_179 = gelu(mode = input_179_mode_0, x = linear_46)[name = tensor<string, []>("input_179")]; - tensor<fp32, [1, 128, 384]> linear_47 = linear(bias = model_encoder_layer_7_output_dense_bias, weight = model_encoder_layer_7_output_dense_weight, x = input_179)[name = tensor<string, []>("linear_47")]; - tensor<fp32, [1, 128, 384]> input_183 = add(x = linear_47, y = input_175)[name = tensor<string, []>("input_183")]; - tensor<int32, [1]> input_185_axes_0 = const()[name = tensor<string, []>("input_185_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_185 = layer_norm(axes = input_185_axes_0, beta = model_encoder_layer_7_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_7_output_LayerNorm_weight, x = input_183)[name = tensor<string, []>("input_185")]; - tensor<fp32, [1, 128, 384]> linear_48 = linear(bias = model_encoder_layer_8_attention_self_query_bias, weight = model_encoder_layer_8_attention_self_query_weight, x = input_185)[name = tensor<string, []>("linear_48")]; - tensor<fp32, [1, 128, 384]> linear_49 = linear(bias = model_encoder_layer_8_attention_self_key_bias, weight = model_encoder_layer_8_attention_self_key_weight, x = input_185)[name = tensor<string, []>("linear_49")]; - tensor<int32, [4]> var_722 = const()[name = tensor<string, []>("op_722"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_99 = reshape(shape = var_722, x = linear_49)[name = tensor<string, []>("x_99")]; - tensor<fp32, [1, 128, 384]> linear_50 = linear(bias = model_encoder_layer_8_attention_self_value_bias, weight = model_encoder_layer_8_attention_self_value_weight, x = input_185)[name = tensor<string, []>("linear_50")]; - tensor<int32, [4]> var_731 = const()[name = tensor<string, []>("op_731"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_103 = reshape(shape = var_731, x = linear_50)[name = tensor<string, []>("x_103")]; - tensor<int32, [4]> var_733 = const()[name = tensor<string, []>("op_733"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> var_737 = const()[name = tensor<string, []>("op_737"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_107 = reshape(shape = var_737, x = linear_48)[name = tensor<string, []>("x_107")]; - tensor<bool, []> attention_scores_33_transpose_x_0 = const()[name = tensor<string, []>("attention_scores_33_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> attention_scores_33_transpose_y_0 = const()[name = tensor<string, []>("attention_scores_33_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<int32, [4]> transpose_52_perm_0 = const()[name = tensor<string, []>("transpose_52_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> transpose_53_perm_0 = const()[name = tensor<string, []>("transpose_53_perm_0"), val = tensor<int32, [4]>([0, 2, 3, 1])]; - tensor<fp32, [1, 12, 32, 128]> transpose_73 = transpose(perm = transpose_53_perm_0, x = x_99)[name = tensor<string, []>("transpose_73")]; - tensor<fp32, [1, 12, 128, 32]> transpose_74 = transpose(perm = transpose_52_perm_0, x = x_107)[name = tensor<string, []>("transpose_74")]; - tensor<fp32, [1, 12, 128, 128]> attention_scores_33 = matmul(transpose_x = attention_scores_33_transpose_x_0, transpose_y = attention_scores_33_transpose_y_0, x = transpose_74, y = transpose_73)[name = tensor<string, []>("attention_scores_33")]; - tensor<fp32, []> _inversed_attention_scores_35_y_0 = const()[name = tensor<string, []>("_inversed_attention_scores_35_y_0"), val = tensor<fp32, []>(0x1.6a09e6p-3)]; - tensor<fp32, [1, 12, 128, 128]> _inversed_attention_scores_35 = mul(x = attention_scores_33, y = _inversed_attention_scores_35_y_0)[name = tensor<string, []>("_inversed_attention_scores_35")]; - tensor<fp32, [1, 12, 128, 128]> input_187 = add(x = _inversed_attention_scores_35, y = attention_mask_1)[name = tensor<string, []>("input_187")]; - tensor<fp32, [1, 12, 128, 128]> input_189 = softmax(axis = var_8, x = input_187)[name = tensor<string, []>("input_189")]; - tensor<bool, []> context_layer_33_transpose_x_0 = const()[name = tensor<string, []>("context_layer_33_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> context_layer_33_transpose_y_0 = const()[name = tensor<string, []>("context_layer_33_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<fp32, [1, 12, 128, 32]> transpose_75 = transpose(perm = var_733, x = x_103)[name = tensor<string, []>("transpose_75")]; - tensor<fp32, [1, 12, 128, 32]> context_layer_33 = matmul(transpose_x = context_layer_33_transpose_x_0, transpose_y = context_layer_33_transpose_y_0, x = input_189, y = transpose_75)[name = tensor<string, []>("context_layer_33")]; - tensor<int32, [4]> var_749 = const()[name = tensor<string, []>("op_749"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [3]> var_754 = const()[name = tensor<string, []>("op_754"), val = tensor<int32, [3]>([1, 128, 384])]; - tensor<fp32, [1, 128, 12, 32]> transpose_72 = transpose(perm = var_749, x = context_layer_33)[name = tensor<string, []>("transpose_72")]; - tensor<fp32, [1, 128, 384]> input_191 = reshape(shape = var_754, x = transpose_72)[name = tensor<string, []>("input_191")]; - tensor<fp32, [1, 128, 384]> linear_51 = linear(bias = model_encoder_layer_8_attention_output_dense_bias, weight = model_encoder_layer_8_attention_output_dense_weight, x = input_191)[name = tensor<string, []>("linear_51")]; - tensor<fp32, [1, 128, 384]> input_195 = add(x = linear_51, y = input_185)[name = tensor<string, []>("input_195")]; - tensor<int32, [1]> input_197_axes_0 = const()[name = tensor<string, []>("input_197_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_197 = layer_norm(axes = input_197_axes_0, beta = model_encoder_layer_8_attention_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_8_attention_output_LayerNorm_weight, x = input_195)[name = tensor<string, []>("input_197")]; - tensor<fp32, [1, 128, 1536]> linear_52 = linear(bias = model_encoder_layer_8_intermediate_dense_bias, weight = model_encoder_layer_8_intermediate_dense_weight, x = input_197)[name = tensor<string, []>("linear_52")]; - tensor<string, []> input_201_mode_0 = const()[name = tensor<string, []>("input_201_mode_0"), val = tensor<string, []>("EXACT")]; - tensor<fp32, [1, 128, 1536]> input_201 = gelu(mode = input_201_mode_0, x = linear_52)[name = tensor<string, []>("input_201")]; - tensor<fp32, [1, 128, 384]> linear_53 = linear(bias = model_encoder_layer_8_output_dense_bias, weight = model_encoder_layer_8_output_dense_weight, x = input_201)[name = tensor<string, []>("linear_53")]; - tensor<fp32, [1, 128, 384]> input_205 = add(x = linear_53, y = input_197)[name = tensor<string, []>("input_205")]; - tensor<int32, [1]> input_207_axes_0 = const()[name = tensor<string, []>("input_207_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_207 = layer_norm(axes = input_207_axes_0, beta = model_encoder_layer_8_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_8_output_LayerNorm_weight, x = input_205)[name = tensor<string, []>("input_207")]; - tensor<fp32, [1, 128, 384]> linear_54 = linear(bias = model_encoder_layer_9_attention_self_query_bias, weight = model_encoder_layer_9_attention_self_query_weight, x = input_207)[name = tensor<string, []>("linear_54")]; - tensor<fp32, [1, 128, 384]> linear_55 = linear(bias = model_encoder_layer_9_attention_self_key_bias, weight = model_encoder_layer_9_attention_self_key_weight, x = input_207)[name = tensor<string, []>("linear_55")]; - tensor<int32, [4]> var_799 = const()[name = tensor<string, []>("op_799"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_111 = reshape(shape = var_799, x = linear_55)[name = tensor<string, []>("x_111")]; - tensor<fp32, [1, 128, 384]> linear_56 = linear(bias = model_encoder_layer_9_attention_self_value_bias, weight = model_encoder_layer_9_attention_self_value_weight, x = input_207)[name = tensor<string, []>("linear_56")]; - tensor<int32, [4]> var_808 = const()[name = tensor<string, []>("op_808"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_115 = reshape(shape = var_808, x = linear_56)[name = tensor<string, []>("x_115")]; - tensor<int32, [4]> var_810 = const()[name = tensor<string, []>("op_810"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> var_814 = const()[name = tensor<string, []>("op_814"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_119 = reshape(shape = var_814, x = linear_54)[name = tensor<string, []>("x_119")]; - tensor<bool, []> attention_scores_37_transpose_x_0 = const()[name = tensor<string, []>("attention_scores_37_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> attention_scores_37_transpose_y_0 = const()[name = tensor<string, []>("attention_scores_37_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<int32, [4]> transpose_54_perm_0 = const()[name = tensor<string, []>("transpose_54_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> transpose_55_perm_0 = const()[name = tensor<string, []>("transpose_55_perm_0"), val = tensor<int32, [4]>([0, 2, 3, 1])]; - tensor<fp32, [1, 12, 32, 128]> transpose_69 = transpose(perm = transpose_55_perm_0, x = x_111)[name = tensor<string, []>("transpose_69")]; - tensor<fp32, [1, 12, 128, 32]> transpose_70 = transpose(perm = transpose_54_perm_0, x = x_119)[name = tensor<string, []>("transpose_70")]; - tensor<fp32, [1, 12, 128, 128]> attention_scores_37 = matmul(transpose_x = attention_scores_37_transpose_x_0, transpose_y = attention_scores_37_transpose_y_0, x = transpose_70, y = transpose_69)[name = tensor<string, []>("attention_scores_37")]; - tensor<fp32, []> _inversed_attention_scores_39_y_0 = const()[name = tensor<string, []>("_inversed_attention_scores_39_y_0"), val = tensor<fp32, []>(0x1.6a09e6p-3)]; - tensor<fp32, [1, 12, 128, 128]> _inversed_attention_scores_39 = mul(x = attention_scores_37, y = _inversed_attention_scores_39_y_0)[name = tensor<string, []>("_inversed_attention_scores_39")]; - tensor<fp32, [1, 12, 128, 128]> input_209 = add(x = _inversed_attention_scores_39, y = attention_mask_1)[name = tensor<string, []>("input_209")]; - tensor<fp32, [1, 12, 128, 128]> input_211 = softmax(axis = var_8, x = input_209)[name = tensor<string, []>("input_211")]; - tensor<bool, []> context_layer_37_transpose_x_0 = const()[name = tensor<string, []>("context_layer_37_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> context_layer_37_transpose_y_0 = const()[name = tensor<string, []>("context_layer_37_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<fp32, [1, 12, 128, 32]> transpose_71 = transpose(perm = var_810, x = x_115)[name = tensor<string, []>("transpose_71")]; - tensor<fp32, [1, 12, 128, 32]> context_layer_37 = matmul(transpose_x = context_layer_37_transpose_x_0, transpose_y = context_layer_37_transpose_y_0, x = input_211, y = transpose_71)[name = tensor<string, []>("context_layer_37")]; - tensor<int32, [4]> var_826 = const()[name = tensor<string, []>("op_826"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [3]> var_831 = const()[name = tensor<string, []>("op_831"), val = tensor<int32, [3]>([1, 128, 384])]; - tensor<fp32, [1, 128, 12, 32]> transpose_68 = transpose(perm = var_826, x = context_layer_37)[name = tensor<string, []>("transpose_68")]; - tensor<fp32, [1, 128, 384]> input_213 = reshape(shape = var_831, x = transpose_68)[name = tensor<string, []>("input_213")]; - tensor<fp32, [1, 128, 384]> linear_57 = linear(bias = model_encoder_layer_9_attention_output_dense_bias, weight = model_encoder_layer_9_attention_output_dense_weight, x = input_213)[name = tensor<string, []>("linear_57")]; - tensor<fp32, [1, 128, 384]> input_217 = add(x = linear_57, y = input_207)[name = tensor<string, []>("input_217")]; - tensor<int32, [1]> input_219_axes_0 = const()[name = tensor<string, []>("input_219_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_219 = layer_norm(axes = input_219_axes_0, beta = model_encoder_layer_9_attention_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_9_attention_output_LayerNorm_weight, x = input_217)[name = tensor<string, []>("input_219")]; - tensor<fp32, [1, 128, 1536]> linear_58 = linear(bias = model_encoder_layer_9_intermediate_dense_bias, weight = model_encoder_layer_9_intermediate_dense_weight, x = input_219)[name = tensor<string, []>("linear_58")]; - tensor<string, []> input_223_mode_0 = const()[name = tensor<string, []>("input_223_mode_0"), val = tensor<string, []>("EXACT")]; - tensor<fp32, [1, 128, 1536]> input_223 = gelu(mode = input_223_mode_0, x = linear_58)[name = tensor<string, []>("input_223")]; - tensor<fp32, [1, 128, 384]> linear_59 = linear(bias = model_encoder_layer_9_output_dense_bias, weight = model_encoder_layer_9_output_dense_weight, x = input_223)[name = tensor<string, []>("linear_59")]; - tensor<fp32, [1, 128, 384]> input_227 = add(x = linear_59, y = input_219)[name = tensor<string, []>("input_227")]; - tensor<int32, [1]> input_229_axes_0 = const()[name = tensor<string, []>("input_229_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_229 = layer_norm(axes = input_229_axes_0, beta = model_encoder_layer_9_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_9_output_LayerNorm_weight, x = input_227)[name = tensor<string, []>("input_229")]; - tensor<fp32, [1, 128, 384]> linear_60 = linear(bias = model_encoder_layer_10_attention_self_query_bias, weight = model_encoder_layer_10_attention_self_query_weight, x = input_229)[name = tensor<string, []>("linear_60")]; - tensor<fp32, [1, 128, 384]> linear_61 = linear(bias = model_encoder_layer_10_attention_self_key_bias, weight = model_encoder_layer_10_attention_self_key_weight, x = input_229)[name = tensor<string, []>("linear_61")]; - tensor<int32, [4]> var_876 = const()[name = tensor<string, []>("op_876"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_123 = reshape(shape = var_876, x = linear_61)[name = tensor<string, []>("x_123")]; - tensor<fp32, [1, 128, 384]> linear_62 = linear(bias = model_encoder_layer_10_attention_self_value_bias, weight = model_encoder_layer_10_attention_self_value_weight, x = input_229)[name = tensor<string, []>("linear_62")]; - tensor<int32, [4]> var_885 = const()[name = tensor<string, []>("op_885"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_127 = reshape(shape = var_885, x = linear_62)[name = tensor<string, []>("x_127")]; - tensor<int32, [4]> var_887 = const()[name = tensor<string, []>("op_887"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> var_891 = const()[name = tensor<string, []>("op_891"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_131 = reshape(shape = var_891, x = linear_60)[name = tensor<string, []>("x_131")]; - tensor<bool, []> attention_scores_41_transpose_x_0 = const()[name = tensor<string, []>("attention_scores_41_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> attention_scores_41_transpose_y_0 = const()[name = tensor<string, []>("attention_scores_41_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<int32, [4]> transpose_56_perm_0 = const()[name = tensor<string, []>("transpose_56_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> transpose_57_perm_0 = const()[name = tensor<string, []>("transpose_57_perm_0"), val = tensor<int32, [4]>([0, 2, 3, 1])]; - tensor<fp32, [1, 12, 32, 128]> transpose_65 = transpose(perm = transpose_57_perm_0, x = x_123)[name = tensor<string, []>("transpose_65")]; - tensor<fp32, [1, 12, 128, 32]> transpose_66 = transpose(perm = transpose_56_perm_0, x = x_131)[name = tensor<string, []>("transpose_66")]; - tensor<fp32, [1, 12, 128, 128]> attention_scores_41 = matmul(transpose_x = attention_scores_41_transpose_x_0, transpose_y = attention_scores_41_transpose_y_0, x = transpose_66, y = transpose_65)[name = tensor<string, []>("attention_scores_41")]; - tensor<fp32, []> _inversed_attention_scores_43_y_0 = const()[name = tensor<string, []>("_inversed_attention_scores_43_y_0"), val = tensor<fp32, []>(0x1.6a09e6p-3)]; - tensor<fp32, [1, 12, 128, 128]> _inversed_attention_scores_43 = mul(x = attention_scores_41, y = _inversed_attention_scores_43_y_0)[name = tensor<string, []>("_inversed_attention_scores_43")]; - tensor<fp32, [1, 12, 128, 128]> input_231 = add(x = _inversed_attention_scores_43, y = attention_mask_1)[name = tensor<string, []>("input_231")]; - tensor<fp32, [1, 12, 128, 128]> input_233 = softmax(axis = var_8, x = input_231)[name = tensor<string, []>("input_233")]; - tensor<bool, []> context_layer_41_transpose_x_0 = const()[name = tensor<string, []>("context_layer_41_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> context_layer_41_transpose_y_0 = const()[name = tensor<string, []>("context_layer_41_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<fp32, [1, 12, 128, 32]> transpose_67 = transpose(perm = var_887, x = x_127)[name = tensor<string, []>("transpose_67")]; - tensor<fp32, [1, 12, 128, 32]> context_layer_41 = matmul(transpose_x = context_layer_41_transpose_x_0, transpose_y = context_layer_41_transpose_y_0, x = input_233, y = transpose_67)[name = tensor<string, []>("context_layer_41")]; - tensor<int32, [4]> var_903 = const()[name = tensor<string, []>("op_903"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [3]> var_908 = const()[name = tensor<string, []>("op_908"), val = tensor<int32, [3]>([1, 128, 384])]; - tensor<fp32, [1, 128, 12, 32]> transpose_64 = transpose(perm = var_903, x = context_layer_41)[name = tensor<string, []>("transpose_64")]; - tensor<fp32, [1, 128, 384]> input_235 = reshape(shape = var_908, x = transpose_64)[name = tensor<string, []>("input_235")]; - tensor<fp32, [1, 128, 384]> linear_63 = linear(bias = model_encoder_layer_10_attention_output_dense_bias, weight = model_encoder_layer_10_attention_output_dense_weight, x = input_235)[name = tensor<string, []>("linear_63")]; - tensor<fp32, [1, 128, 384]> input_239 = add(x = linear_63, y = input_229)[name = tensor<string, []>("input_239")]; - tensor<int32, [1]> input_241_axes_0 = const()[name = tensor<string, []>("input_241_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_241 = layer_norm(axes = input_241_axes_0, beta = model_encoder_layer_10_attention_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_10_attention_output_LayerNorm_weight, x = input_239)[name = tensor<string, []>("input_241")]; - tensor<fp32, [1, 128, 1536]> linear_64 = linear(bias = model_encoder_layer_10_intermediate_dense_bias, weight = model_encoder_layer_10_intermediate_dense_weight, x = input_241)[name = tensor<string, []>("linear_64")]; - tensor<string, []> input_245_mode_0 = const()[name = tensor<string, []>("input_245_mode_0"), val = tensor<string, []>("EXACT")]; - tensor<fp32, [1, 128, 1536]> input_245 = gelu(mode = input_245_mode_0, x = linear_64)[name = tensor<string, []>("input_245")]; - tensor<fp32, [1, 128, 384]> linear_65 = linear(bias = model_encoder_layer_10_output_dense_bias, weight = model_encoder_layer_10_output_dense_weight, x = input_245)[name = tensor<string, []>("linear_65")]; - tensor<fp32, [1, 128, 384]> input_249 = add(x = linear_65, y = input_241)[name = tensor<string, []>("input_249")]; - tensor<int32, [1]> input_251_axes_0 = const()[name = tensor<string, []>("input_251_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_251 = layer_norm(axes = input_251_axes_0, beta = model_encoder_layer_10_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_10_output_LayerNorm_weight, x = input_249)[name = tensor<string, []>("input_251")]; - tensor<fp32, [1, 128, 384]> linear_66 = linear(bias = model_encoder_layer_11_attention_self_query_bias, weight = model_encoder_layer_11_attention_self_query_weight, x = input_251)[name = tensor<string, []>("linear_66")]; - tensor<fp32, [1, 128, 384]> linear_67 = linear(bias = model_encoder_layer_11_attention_self_key_bias, weight = model_encoder_layer_11_attention_self_key_weight, x = input_251)[name = tensor<string, []>("linear_67")]; - tensor<int32, [4]> var_953 = const()[name = tensor<string, []>("op_953"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_135 = reshape(shape = var_953, x = linear_67)[name = tensor<string, []>("x_135")]; - tensor<fp32, [1, 128, 384]> linear_68 = linear(bias = model_encoder_layer_11_attention_self_value_bias, weight = model_encoder_layer_11_attention_self_value_weight, x = input_251)[name = tensor<string, []>("linear_68")]; - tensor<int32, [4]> var_962 = const()[name = tensor<string, []>("op_962"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x_139 = reshape(shape = var_962, x = linear_68)[name = tensor<string, []>("x_139")]; - tensor<int32, [4]> var_964 = const()[name = tensor<string, []>("op_964"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> var_968 = const()[name = tensor<string, []>("op_968"), val = tensor<int32, [4]>([1, 128, 12, 32])]; - tensor<fp32, [1, 128, 12, 32]> x = reshape(shape = var_968, x = linear_66)[name = tensor<string, []>("x")]; - tensor<bool, []> attention_scores_45_transpose_x_0 = const()[name = tensor<string, []>("attention_scores_45_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> attention_scores_45_transpose_y_0 = const()[name = tensor<string, []>("attention_scores_45_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<int32, [4]> transpose_58_perm_0 = const()[name = tensor<string, []>("transpose_58_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [4]> transpose_59_perm_0 = const()[name = tensor<string, []>("transpose_59_perm_0"), val = tensor<int32, [4]>([0, 2, 3, 1])]; - tensor<fp32, [1, 12, 32, 128]> transpose_61 = transpose(perm = transpose_59_perm_0, x = x_135)[name = tensor<string, []>("transpose_61")]; - tensor<fp32, [1, 12, 128, 32]> transpose_62 = transpose(perm = transpose_58_perm_0, x = x)[name = tensor<string, []>("transpose_62")]; - tensor<fp32, [1, 12, 128, 128]> attention_scores_45 = matmul(transpose_x = attention_scores_45_transpose_x_0, transpose_y = attention_scores_45_transpose_y_0, x = transpose_62, y = transpose_61)[name = tensor<string, []>("attention_scores_45")]; - tensor<fp32, []> _inversed_attention_scores_y_0 = const()[name = tensor<string, []>("_inversed_attention_scores_y_0"), val = tensor<fp32, []>(0x1.6a09e6p-3)]; - tensor<fp32, [1, 12, 128, 128]> _inversed_attention_scores = mul(x = attention_scores_45, y = _inversed_attention_scores_y_0)[name = tensor<string, []>("_inversed_attention_scores")]; - tensor<fp32, [1, 12, 128, 128]> input_253 = add(x = _inversed_attention_scores, y = attention_mask_1)[name = tensor<string, []>("input_253")]; - tensor<fp32, [1, 12, 128, 128]> input_255 = softmax(axis = var_8, x = input_253)[name = tensor<string, []>("input_255")]; - tensor<bool, []> context_layer_45_transpose_x_0 = const()[name = tensor<string, []>("context_layer_45_transpose_x_0"), val = tensor<bool, []>(false)]; - tensor<bool, []> context_layer_45_transpose_y_0 = const()[name = tensor<string, []>("context_layer_45_transpose_y_0"), val = tensor<bool, []>(false)]; - tensor<fp32, [1, 12, 128, 32]> transpose_63 = transpose(perm = var_964, x = x_139)[name = tensor<string, []>("transpose_63")]; - tensor<fp32, [1, 12, 128, 32]> context_layer_45 = matmul(transpose_x = context_layer_45_transpose_x_0, transpose_y = context_layer_45_transpose_y_0, x = input_255, y = transpose_63)[name = tensor<string, []>("context_layer_45")]; - tensor<int32, [4]> var_980 = const()[name = tensor<string, []>("op_980"), val = tensor<int32, [4]>([0, 2, 1, 3])]; - tensor<int32, [3]> var_985 = const()[name = tensor<string, []>("op_985"), val = tensor<int32, [3]>([1, 128, 384])]; - tensor<fp32, [1, 128, 12, 32]> transpose_60 = transpose(perm = var_980, x = context_layer_45)[name = tensor<string, []>("transpose_60")]; - tensor<fp32, [1, 128, 384]> input_257 = reshape(shape = var_985, x = transpose_60)[name = tensor<string, []>("input_257")]; - tensor<fp32, [1, 128, 384]> linear_69 = linear(bias = model_encoder_layer_11_attention_output_dense_bias, weight = model_encoder_layer_11_attention_output_dense_weight, x = input_257)[name = tensor<string, []>("linear_69")]; - tensor<fp32, [1, 128, 384]> input_261 = add(x = linear_69, y = input_251)[name = tensor<string, []>("input_261")]; - tensor<int32, [1]> input_263_axes_0 = const()[name = tensor<string, []>("input_263_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> input_263 = layer_norm(axes = input_263_axes_0, beta = model_encoder_layer_11_attention_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_11_attention_output_LayerNorm_weight, x = input_261)[name = tensor<string, []>("input_263")]; - tensor<fp32, [1, 128, 1536]> linear_70 = linear(bias = model_encoder_layer_11_intermediate_dense_bias, weight = model_encoder_layer_11_intermediate_dense_weight, x = input_263)[name = tensor<string, []>("linear_70")]; - tensor<string, []> input_267_mode_0 = const()[name = tensor<string, []>("input_267_mode_0"), val = tensor<string, []>("EXACT")]; - tensor<fp32, [1, 128, 1536]> input_267 = gelu(mode = input_267_mode_0, x = linear_70)[name = tensor<string, []>("input_267")]; - tensor<fp32, [1, 128, 384]> linear_71 = linear(bias = model_encoder_layer_11_output_dense_bias, weight = model_encoder_layer_11_output_dense_weight, x = input_267)[name = tensor<string, []>("linear_71")]; - tensor<fp32, [1, 128, 384]> input_271 = add(x = linear_71, y = input_263)[name = tensor<string, []>("input_271")]; - tensor<int32, [1]> hidden_states_axes_0 = const()[name = tensor<string, []>("hidden_states_axes_0"), val = tensor<int32, [1]>([-1])]; - tensor<fp32, [1, 128, 384]> last_hidden_state = layer_norm(axes = hidden_states_axes_0, beta = model_encoder_layer_11_output_LayerNorm_bias, epsilon = var_10, gamma = model_encoder_layer_11_output_LayerNorm_weight, x = input_271)[name = tensor<string, []>("hidden_states")]; - tensor<int32, [3]> input_273_begin_0 = const()[name = tensor<string, []>("input_273_begin_0"), val = tensor<int32, [3]>([0, 0, 0])]; - tensor<int32, [3]> input_273_end_0 = const()[name = tensor<string, []>("input_273_end_0"), val = tensor<int32, [3]>([1, 1, 384])]; - tensor<bool, [3]> input_273_end_mask_0 = const()[name = tensor<string, []>("input_273_end_mask_0"), val = tensor<bool, [3]>([true, false, true])]; - tensor<bool, [3]> input_273_squeeze_mask_0 = const()[name = tensor<string, []>("input_273_squeeze_mask_0"), val = tensor<bool, [3]>([false, true, false])]; - tensor<fp32, [1, 384]> input_273 = slice_by_index(begin = input_273_begin_0, end = input_273_end_0, end_mask = input_273_end_mask_0, squeeze_mask = input_273_squeeze_mask_0, x = last_hidden_state)[name = tensor<string, []>("input_273")]; - tensor<fp32, [1, 384]> linear_72 = linear(bias = model_pooler_dense_bias, weight = model_pooler_dense_weight, x = input_273)[name = tensor<string, []>("linear_72")]; - tensor<fp32, [1, 384]> pooler_output = tanh(x = linear_72)[name = tensor<string, []>("op_1020")]; - } -> (last_hidden_state, pooler_output); -} \ No newline at end of file diff --git a/Sources/SwiftNLP/Models/float32_model.mlmodelc/weights/weight.bin b/Sources/SwiftNLP/Models/float32_model.mlmodelc/weights/weight.bin deleted file mode 100644 index 71d86e38eb7ae4707228b8af0888c01ab9b7c4a5..0000000000000000000000000000000000000000 Binary files a/Sources/SwiftNLP/Models/float32_model.mlmodelc/weights/weight.bin and /dev/null differ