diff --git a/Package.resolved b/Package.resolved index 7e9c86f8b5f2f359860755f981e8a5e5f42c141f..a7b19d4ff9daa92c318f7b28b6bd8dc35c71d647 100644 --- a/Package.resolved +++ b/Package.resolved @@ -5,7 +5,7 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/tannerdsilva/CLMDB.git", "state" : { - "revision" : "39e2f317c898824777365d5669f6fa7571541893", + "revision" : "30d45263c8e512b01ab073e77ebff09039bdc593", "version" : "0.9.31" } }, @@ -45,6 +45,15 @@ "version" : "0.1.14" } }, + { + "identity" : "surge", + "kind" : "remoteSourceControl", + "location" : "https://github.com/Jounce/Surge.git", + "state" : { + "revision" : "6e4a47e63da8801afe6188cf039e9f04eb577721", + "version" : "2.3.2" + } + }, { "identity" : "swift-argument-parser", "kind" : "remoteSourceControl", diff --git a/Sources/SwiftNLP/2. Encoding/CoreMLEncoder.swift b/Sources/SwiftNLP/2. Encoding/CoreMLEncoder.swift index 751bc956dc9d111390675a77dd671657b72df596..311ba60069be5c6f15f0ea4488bea8b1f3802ea1 100644 --- a/Sources/SwiftNLP/2. Encoding/CoreMLEncoder.swift +++ b/Sources/SwiftNLP/2. Encoding/CoreMLEncoder.swift @@ -39,9 +39,19 @@ public macro MODEL_VALIDATE_NAME_AND_SET_INPUT_SIZE() = #externalMacro( class CoreMLEncoder<Scalar: BinaryFloatingPoint & Codable>: SNLPEncoder { + + var zeroes: [Scalar] + var dimensions: UInt var model: String + required init() { + zeroes = [] + dimensions = 0 + model = "all-MiniLM-L6-v2" + } + + func encodeToken(_ token: String) -> [Scalar] { let tokenization = LLMEmbeddings(model_type: self.model).tokenizer.tokenizeToIds(text: token) as! [Scalar] return tokenization diff --git a/Tests/SwiftNLPTests/AllMiniLM_sampleTest.swift b/Tests/SwiftNLPTests/AllMiniLM_sampleTest.swift index 0cdb0ce835f2609dd7c620552d1146316e5879e6..026cfd9ea7b16f2fd1f67cb03683080af2e2bf1d 100644 --- a/Tests/SwiftNLPTests/AllMiniLM_sampleTest.swift +++ b/Tests/SwiftNLPTests/AllMiniLM_sampleTest.swift @@ -34,7 +34,7 @@ final class BERT_test: XCTestCase { var query_embedding: [Float] = [] var embedding_dim: Int = 384 - var model = LLMEmbeddings(model_type: "gte-small") + var model = LLMEmbeddings(model_type: model) query_embedding = await model.encode(sentence: query[0])!