From f7686c69eef72226c02838569c4449b6ecd8afcf Mon Sep 17 00:00:00 2001 From: Jim Wallace <james.wallace@uwaterloo.ca> Date: Fri, 5 Apr 2024 15:07:52 -0400 Subject: [PATCH] Added default init() to CoreMLEncoder --- Package.resolved | 11 ++++++++++- Sources/SwiftNLP/2. Encoding/CoreMLEncoder.swift | 10 ++++++++++ Tests/SwiftNLPTests/AllMiniLM_sampleTest.swift | 2 +- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/Package.resolved b/Package.resolved index 7e9c86f8..a7b19d4f 100644 --- a/Package.resolved +++ b/Package.resolved @@ -5,7 +5,7 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/tannerdsilva/CLMDB.git", "state" : { - "revision" : "39e2f317c898824777365d5669f6fa7571541893", + "revision" : "30d45263c8e512b01ab073e77ebff09039bdc593", "version" : "0.9.31" } }, @@ -45,6 +45,15 @@ "version" : "0.1.14" } }, + { + "identity" : "surge", + "kind" : "remoteSourceControl", + "location" : "https://github.com/Jounce/Surge.git", + "state" : { + "revision" : "6e4a47e63da8801afe6188cf039e9f04eb577721", + "version" : "2.3.2" + } + }, { "identity" : "swift-argument-parser", "kind" : "remoteSourceControl", diff --git a/Sources/SwiftNLP/2. Encoding/CoreMLEncoder.swift b/Sources/SwiftNLP/2. Encoding/CoreMLEncoder.swift index 751bc956..311ba600 100644 --- a/Sources/SwiftNLP/2. Encoding/CoreMLEncoder.swift +++ b/Sources/SwiftNLP/2. Encoding/CoreMLEncoder.swift @@ -39,9 +39,19 @@ public macro MODEL_VALIDATE_NAME_AND_SET_INPUT_SIZE() = #externalMacro( class CoreMLEncoder<Scalar: BinaryFloatingPoint & Codable>: SNLPEncoder { + + var zeroes: [Scalar] + var dimensions: UInt var model: String + required init() { + zeroes = [] + dimensions = 0 + model = "all-MiniLM-L6-v2" + } + + func encodeToken(_ token: String) -> [Scalar] { let tokenization = LLMEmbeddings(model_type: self.model).tokenizer.tokenizeToIds(text: token) as! [Scalar] return tokenization diff --git a/Tests/SwiftNLPTests/AllMiniLM_sampleTest.swift b/Tests/SwiftNLPTests/AllMiniLM_sampleTest.swift index 0cdb0ce8..026cfd9e 100644 --- a/Tests/SwiftNLPTests/AllMiniLM_sampleTest.swift +++ b/Tests/SwiftNLPTests/AllMiniLM_sampleTest.swift @@ -34,7 +34,7 @@ final class BERT_test: XCTestCase { var query_embedding: [Float] = [] var embedding_dim: Int = 384 - var model = LLMEmbeddings(model_type: "gte-small") + var model = LLMEmbeddings(model_type: model) query_embedding = await model.encode(sentence: query[0])! -- GitLab