Skip to content
Snippets Groups Projects

Add interface for using generic CoreML LLMs

Merged Abhinav Jain requested to merge compile_cmd_line_generic_model_broken into main
Files
5
@@ -26,69 +26,99 @@ import Foundation
import CoreML
struct CoreMLEncoder<Scalar: BinaryFloatingPoint>: SNLPEncoder {
@freestanding(expression)
public macro MODEL_MAKE_PREDICTION(_ input_name: Any, _ attention_ids: Any, _ output_name: Any) = #externalMacro(
module: "SwiftNLPGenericLLMMacros",
type: "LLMModelPredictionCases")
@freestanding(expression)
public macro MODEL_VALIDATE_NAME_AND_SET_INPUT_SIZE() = #externalMacro(
module: "SwiftNLPGenericLLMMacros",
type: "LLMModelNameValidation")
class CoreMLEncoder<Scalar: BinaryFloatingPoint & Codable>: SNLPEncoder {
var zeroes: [Scalar] = []
var dimensions: UInt = 0
var zeroes: [Scalar]
var dimensions: UInt
var model: String
required init() {
zeroes = Array(repeating: Scalar(0), count: 384)
dimensions = 384
model = "all_MiniLM_L6_v2"
}
func encodeToken(_ token: String) -> [Scalar] {
fatalError("CoreMLEncoder not implemented yet. Get on it.")
let tokenization = LLMEmbeddings(model_type: self.model).tokenizer.tokenizeToIds(text: token) as! [Scalar]
return tokenization
}
func encodeSentence(_ sentence: String) -> [Scalar] {
fatalError("CoreMLEncoder not implemented yet. Get on it.")
let encoding = Task {
await LLMEmbeddings(model_type: self.model).encode(sentence: sentence)
} as! [Scalar]
return encoding
}
}
//@available(macOS 13.0, *)
//public class MiniLMEmbeddings {
// public let model: all_MiniLM_L6_v2
// public let tokenizer: BertTokenizer
// public let inputDimention: Int = 512
// public let outputDimention: Int = 384
//
// public init() {
// let modelConfig = MLModelConfiguration()
// modelConfig.computeUnits = .all
//
// do {
// self.model = try all_MiniLM_L6_v2(configuration: modelConfig)
// } catch {
// fatalError("Failed to load the Core ML model. Error: \(error.localizedDescription)")
// }
//
// self.tokenizer = BertTokenizer()
// }
//
// // MARK: - Dense Embeddings
//
// public func encode(sentence: String) async -> [Float]? {
// // Encode input text as bert tokens
// let inputTokens = tokenizer.buildModelTokens(sentence: sentence)
// let (inputIds, attentionMask) = tokenizer.buildModelInputs(from: inputTokens)
//
// // Send tokens through the MLModel
// let embeddings = generateEmbeddings(inputIds: inputIds, attentionMask: attentionMask)
//
// return embeddings
// }
//
// public func generateEmbeddings(inputIds: MLMultiArray, attentionMask: MLMultiArray) -> [Float]? {
// let inputFeatures = all_MiniLM_L6_v2Input(input_ids: inputIds, attention_mask: attentionMask)
//
// let output = try? model.prediction(input: inputFeatures)
// guard let embeddings = output?.embeddings else {
// return nil
// }
//
// var embeddingsArray = [Float]()
// for index in 0..<embeddings.count {
// let value = embeddings[index].floatValue
// embeddingsArray.append(Float(value))
// }
//
// return embeddingsArray
// }
//
//}
@available(macOS 13.0, *)
public class LLMEmbeddings {
private let model: String
public var tokenizer: BertTokenizer
public var inputDimention: Int = 512 // 512 is a dummy value, correct value is set by the macro below
public let outputDimention: Int = 384
public init(model_type: String) {
let modelConfig = MLModelConfiguration()
modelConfig.computeUnits = .all
self.model = model_type;
// dummy initialization needed here to avoid compilation error
self.tokenizer = BertTokenizer(maxLen: self.inputDimention)
// validate the model type is valid and set the correct input dimension
#MODEL_VALIDATE_NAME_AND_SET_INPUT_SIZE()
// reinitialize with correct input size
self.tokenizer = BertTokenizer(maxLen: self.inputDimention)
}
public func encode(sentence: String) async -> [Float]? {
// Encode input text as bert tokens
let inputTokens = tokenizer.buildModelTokens(sentence: sentence)
let (inputIds, attentionMask) = tokenizer.buildModelInputs(from: inputTokens)
let embeddings = generateEmbeddings(inputIds: inputIds, attentionMask: attentionMask)
return embeddings
}
public func generateEmbeddings(inputIds: MLMultiArray, attentionMask: MLMultiArray) -> [Float]? {
var output: MLMultiArray? = nil
// determine which model to use and generate predictions
#MODEL_MAKE_PREDICTION("inputIds", "attentionMask", "output")
if (output === nil) {
return nil;
}
let embeddings = output!;
var embeddingsArray = [Float]()
for index in 0..<embeddings.count {
let value = embeddings[index].floatValue
embeddingsArray.append(Float(value))
}
return embeddingsArray
}
}
#endif
Loading