From f7686c69eef72226c02838569c4449b6ecd8afcf Mon Sep 17 00:00:00 2001
From: Jim Wallace <james.wallace@uwaterloo.ca>
Date: Fri, 5 Apr 2024 15:07:52 -0400
Subject: [PATCH] Added default init() to CoreMLEncoder

---
 Package.resolved                                 | 11 ++++++++++-
 Sources/SwiftNLP/2. Encoding/CoreMLEncoder.swift | 10 ++++++++++
 Tests/SwiftNLPTests/AllMiniLM_sampleTest.swift   |  2 +-
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/Package.resolved b/Package.resolved
index 7e9c86f8..a7b19d4f 100644
--- a/Package.resolved
+++ b/Package.resolved
@@ -5,7 +5,7 @@
       "kind" : "remoteSourceControl",
       "location" : "https://github.com/tannerdsilva/CLMDB.git",
       "state" : {
-        "revision" : "39e2f317c898824777365d5669f6fa7571541893",
+        "revision" : "30d45263c8e512b01ab073e77ebff09039bdc593",
         "version" : "0.9.31"
       }
     },
@@ -45,6 +45,15 @@
         "version" : "0.1.14"
       }
     },
+    {
+      "identity" : "surge",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/Jounce/Surge.git",
+      "state" : {
+        "revision" : "6e4a47e63da8801afe6188cf039e9f04eb577721",
+        "version" : "2.3.2"
+      }
+    },
     {
       "identity" : "swift-argument-parser",
       "kind" : "remoteSourceControl",
diff --git a/Sources/SwiftNLP/2. Encoding/CoreMLEncoder.swift b/Sources/SwiftNLP/2. Encoding/CoreMLEncoder.swift
index 751bc956..311ba600 100644
--- a/Sources/SwiftNLP/2. Encoding/CoreMLEncoder.swift	
+++ b/Sources/SwiftNLP/2. Encoding/CoreMLEncoder.swift	
@@ -39,9 +39,19 @@ public macro MODEL_VALIDATE_NAME_AND_SET_INPUT_SIZE() = #externalMacro(
 
 class CoreMLEncoder<Scalar: BinaryFloatingPoint & Codable>: SNLPEncoder {
     
+    
+    
     var zeroes: [Scalar]
+    var dimensions: UInt
     var model: String
         
+    required init() {
+      zeroes = []
+      dimensions = 0
+      model = "all-MiniLM-L6-v2"
+    }
+    
+    
     func encodeToken(_ token: String) -> [Scalar] {
         let tokenization = LLMEmbeddings(model_type: self.model).tokenizer.tokenizeToIds(text: token) as! [Scalar]
         return tokenization
diff --git a/Tests/SwiftNLPTests/AllMiniLM_sampleTest.swift b/Tests/SwiftNLPTests/AllMiniLM_sampleTest.swift
index 0cdb0ce8..026cfd9e 100644
--- a/Tests/SwiftNLPTests/AllMiniLM_sampleTest.swift
+++ b/Tests/SwiftNLPTests/AllMiniLM_sampleTest.swift
@@ -34,7 +34,7 @@ final class BERT_test: XCTestCase {
             var query_embedding: [Float] = []
             var embedding_dim: Int = 384
            
-            var model = LLMEmbeddings(model_type: "gte-small")
+            var model = LLMEmbeddings(model_type: model)
            
             query_embedding = await model.encode(sentence: query[0])!
            
-- 
GitLab