Skip to content
Snippets Groups Projects
Commit 7fb12cbb authored by Abhinav Jain's avatar Abhinav Jain
Browse files

broken commit

parent 95207e4a
No related branches found
No related tags found
1 merge request!15Add interface for using generic CoreML LLMs
Pipeline #114572 failed
......@@ -27,6 +27,15 @@
"version" : "2.2.0"
}
},
{
"identity" : "faissmobile",
"kind" : "remoteSourceControl",
"location" : "https://github.com/jkrukowski/FaissMobile",
"state" : {
"revision" : "9d5b9925305eea9398cc92ce4a8e51c8a4b043af",
"version" : "0.0.1"
}
},
{
"identity" : "similarity-topology",
"kind" : "remoteSourceControl",
......@@ -36,6 +45,24 @@
"version" : "0.1.14"
}
},
{
"identity" : "swift-argument-parser",
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-argument-parser",
"state" : {
"revision" : "c8ed701b513cf5177118a175d85fbbbcd707ab41",
"version" : "1.3.0"
}
},
{
"identity" : "swift-log",
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-log",
"state" : {
"revision" : "e97a6fcb1ab07462881ac165fdbb37f067e205d5",
"version" : "1.5.4"
}
},
{
"identity" : "swift-numerics",
"kind" : "remoteSourceControl",
......@@ -62,6 +89,24 @@
"revision" : "6d90636e22510c2f0798f9f8ff072109e345750a",
"version" : "1.1.0"
}
},
{
"identity" : "swiftfaiss",
"kind" : "remoteSourceControl",
"location" : "https://github.com/jkrukowski/SwiftFaiss.git",
"state" : {
"revision" : "d3831c1e9898695ae7f680b6353e48e873d3f1d3",
"version" : "0.0.8"
}
},
{
"identity" : "swiftformat",
"kind" : "remoteSourceControl",
"location" : "https://github.com/nicklockwood/SwiftFormat",
"state" : {
"revision" : "dbc9a4406d21cc52f16caf1e299172b097145e5e",
"version" : "0.53.3"
}
}
],
"version" : 2
......
......@@ -15,6 +15,7 @@ let package = Package(
],
dependencies: [
.package(url: "https://github.com/jbadger3/SwiftAnnoy", .upToNextMajor(from: "1.0.1")),
.package(url: "https://github.com/jkrukowski/SwiftFaiss.git", from: "0.0.7"),
.package(url: "https://github.com/L1MeN9Yu/Elva", .upToNextMajor(from: "2.1.3")),
.package(url: "https://github.com/JadenGeller/similarity-topology", .upToNextMajor(from: "0.1.14")),
],
......@@ -29,7 +30,8 @@ let package = Package(
],
resources: [
.process("Resources"),
.process("Resources/bert_vocab.txt"),
.process("Resources/glove.6B.50d.mmap"),
]
),
.testTarget(
......
......@@ -59,9 +59,9 @@ class CoreMLEncoder<Scalar: BinaryFloatingPoint & Codable>: SNLPEncoder {
@available(macOS 13.0, *)
public class MiniLMEmbeddings {
private let model: GenericLLMModel
private let model: LLMModel
public let tokenizer: BertTokenizer
public let inputDimention: Int = 512
public let inputDimention: Int = 128
public let outputDimention: Int = 384
public init() {
......@@ -69,12 +69,12 @@ public class MiniLMEmbeddings {
modelConfig.computeUnits = .all
do {
self.model = try GenericLLMModel(contentsOf: URL(fileURLWithPath: "Sources/SwiftNLP/Models/all-MiniLM-L6-v2.mlmodelc"), model_name: "all-MiniLM-L6-v2", input_size: inputDimention, output_size: outputDimention)
self.model = try LLMModel<all_MiniLM_L6_v2>()
} catch {
fatalError("Failed to load the Core ML model. Error: \(error.localizedDescription)")
}
self.tokenizer = BertTokenizer()
self.tokenizer = BertTokenizer(maxLen: self.inputDimention)
}
// MARK: - Dense Embeddings
......@@ -84,15 +84,18 @@ public class MiniLMEmbeddings {
let inputTokens = tokenizer.buildModelTokens(sentence: sentence)
let (inputIds, attentionMask) = tokenizer.buildModelInputs(from: inputTokens)
print(inputIds.count, attentionMask.count)
// Send tokens through the MLModel
let embeddings = generateEmbeddings(inputIds: inputIds, attentionMask: attentionMask)
print(inputIds.count, attentionMask.count)
return embeddings
}
public func generateEmbeddings(inputIds: MLMultiArray, attentionMask: MLMultiArray) -> [Float]? {
let output : GenericLLMModelOutput? = try? model.prediction(input: GenericLLMModelInput(input_ids: inputIds, attention_mask: attentionMask))
let output : LLMModelOutput? = try? model.prediction(input: LLMModelInput(input_ids: inputIds, attention_mask: attentionMask))
guard let embeddings = output?.embeddings else {
return nil
}
......
......@@ -6,12 +6,12 @@ import CoreML
public class BertTokenizer {
private let basicTokenizer = BasicTokenizer()
private let wordpieceTokenizer: WordpieceTokenizer
private let maxLen = 512
private var maxLen = 512
private let vocab: [String: Int]
private let ids_to_tokens: [Int: String]
public init() {
public init(maxLen: Int) {
let url = Bundle.module.url(forResource: "bert_vocab", withExtension: "txt")!
let vocabTxt = try! String(contentsOf: url)
let tokens = vocabTxt.split(separator: "\n").map { String($0) }
......@@ -24,6 +24,7 @@ public class BertTokenizer {
self.vocab = vocab
self.ids_to_tokens = ids_to_tokens
self.wordpieceTokenizer = WordpieceTokenizer(vocab: self.vocab)
self.maxLen = maxLen
}
public func buildModelTokens(sentence: String) -> [Int] {
......
//
// all_MiniLM_L6_v2.swift
//
// This file was automatically generated and should not be edited.
//
import CoreML
/// Model Prediction Input Type
@available(macOS 12.0, iOS 15.0, tvOS 15.0, watchOS 8.0, *)
class all_MiniLM_L6_v2Input : MLFeatureProvider {
/// input_ids as 1 by 512 matrix of floats
var input_ids: MLMultiArray
/// attention_mask as 1 by 512 matrix of floats
var attention_mask: MLMultiArray
var featureNames: Set<String> {
get {
return ["input_ids", "attention_mask"]
}
}
func featureValue(for featureName: String) -> MLFeatureValue? {
if (featureName == "input_ids") {
return MLFeatureValue(multiArray: input_ids)
}
if (featureName == "attention_mask") {
return MLFeatureValue(multiArray: attention_mask)
}
return nil
}
init(input_ids: MLMultiArray, attention_mask: MLMultiArray) {
self.input_ids = input_ids
self.attention_mask = attention_mask
}
convenience init(input_ids: MLShapedArray<Float>, attention_mask: MLShapedArray<Float>) {
self.init(input_ids: MLMultiArray(input_ids), attention_mask: MLMultiArray(attention_mask))
}
}
/// Model Prediction Output Type
@available(macOS 12.0, iOS 15.0, tvOS 15.0, watchOS 8.0, *)
class all_MiniLM_L6_v2Output : MLFeatureProvider {
/// Source provided by CoreML
private let provider : MLFeatureProvider
/// embeddings as multidimensional array of floats
var embeddings: MLMultiArray {
return self.provider.featureValue(for: "embeddings")!.multiArrayValue!
}
/// embeddings as multidimensional array of floats
var embeddingsShapedArray: MLShapedArray<Float> {
return MLShapedArray<Float>(self.embeddings)
}
var featureNames: Set<String> {
return self.provider.featureNames
}
func featureValue(for featureName: String) -> MLFeatureValue? {
return self.provider.featureValue(for: featureName)
}
init(embeddings: MLMultiArray) {
self.provider = try! MLDictionaryFeatureProvider(dictionary: ["embeddings" : MLFeatureValue(multiArray: embeddings)])
}
init(features: MLFeatureProvider) {
self.provider = features
}
}
/// Class for model loading and prediction
@available(macOS 12.0, iOS 15.0, tvOS 15.0, watchOS 8.0, *)
class all_MiniLM_L6_v2 {
let model: MLModel
/// URL of model assuming it was installed in the same bundle as this class
class var urlOfModelInThisBundle : URL {
let bundle = Bundle(for: self)
return bundle.url(forResource: "all-MiniLM-L6-v2", withExtension:"mlmodelc")!
}
/**
Construct all_MiniLM_L6_v2 instance with an existing MLModel object.
Usually the application does not use this initializer unless it makes a subclass of all_MiniLM_L6_v2.
Such application may want to use `MLModel(contentsOfURL:configuration:)` and `all_MiniLM_L6_v2.urlOfModelInThisBundle` to create a MLModel object to pass-in.
- parameters:
- model: MLModel object
*/
init(model: MLModel) {
self.model = model
}
/**
Construct a model with configuration
- parameters:
- configuration: the desired model configuration
- throws: an NSError object that describes the problem
*/
convenience init(configuration: MLModelConfiguration = MLModelConfiguration()) throws {
try self.init(contentsOf: type(of:self).urlOfModelInThisBundle, configuration: configuration)
}
/**
Construct all_MiniLM_L6_v2 instance with explicit path to mlmodelc file
- parameters:
- modelURL: the file url of the model
- throws: an NSError object that describes the problem
*/
convenience init(contentsOf modelURL: URL) throws {
try self.init(model: MLModel(contentsOf: modelURL))
}
/**
Construct a model with URL of the .mlmodelc directory and configuration
- parameters:
- modelURL: the file url of the model
- configuration: the desired model configuration
- throws: an NSError object that describes the problem
*/
convenience init(contentsOf modelURL: URL, configuration: MLModelConfiguration) throws {
try self.init(model: MLModel(contentsOf: modelURL, configuration: configuration))
}
/**
Construct all_MiniLM_L6_v2 instance asynchronously with optional configuration.
Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
- parameters:
- configuration: the desired model configuration
- handler: the completion handler to be called when the model loading completes successfully or unsuccessfully
*/
class func load(configuration: MLModelConfiguration = MLModelConfiguration(), completionHandler handler: @escaping (Swift.Result<all_MiniLM_L6_v2, Error>) -> Void) {
return self.load(contentsOf: self.urlOfModelInThisBundle, configuration: configuration, completionHandler: handler)
}
/**
Construct all_MiniLM_L6_v2 instance asynchronously with optional configuration.
Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
- parameters:
- configuration: the desired model configuration
*/
class func load(configuration: MLModelConfiguration = MLModelConfiguration()) async throws -> all_MiniLM_L6_v2 {
return try await self.load(contentsOf: self.urlOfModelInThisBundle, configuration: configuration)
}
/**
Construct all_MiniLM_L6_v2 instance asynchronously with URL of the .mlmodelc directory with optional configuration.
Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
- parameters:
- modelURL: the URL to the model
- configuration: the desired model configuration
- handler: the completion handler to be called when the model loading completes successfully or unsuccessfully
*/
class func load(contentsOf modelURL: URL, configuration: MLModelConfiguration = MLModelConfiguration(), completionHandler handler: @escaping (Swift.Result<all_MiniLM_L6_v2, Error>) -> Void) {
MLModel.load(contentsOf: modelURL, configuration: configuration) { result in
switch result {
case .failure(let error):
handler(.failure(error))
case .success(let model):
handler(.success(all_MiniLM_L6_v2(model: model)))
}
}
}
/**
Construct all_MiniLM_L6_v2 instance asynchronously with URL of the .mlmodelc directory with optional configuration.
Model loading may take time when the model content is not immediately available (e.g. encrypted model). Use this factory method especially when the caller is on the main thread.
- parameters:
- modelURL: the URL to the model
- configuration: the desired model configuration
*/
class func load(contentsOf modelURL: URL, configuration: MLModelConfiguration = MLModelConfiguration()) async throws -> all_MiniLM_L6_v2 {
let model = try await MLModel.load(contentsOf: modelURL, configuration: configuration)
return all_MiniLM_L6_v2(model: model)
}
/**
Make a prediction using the structured interface
- parameters:
- input: the input to the prediction as all_MiniLM_L6_v2Input
- throws: an NSError object that describes the problem
- returns: the result of the prediction as all_MiniLM_L6_v2Output
*/
func prediction(input: all_MiniLM_L6_v2Input) throws -> all_MiniLM_L6_v2Output {
return try self.prediction(input: input, options: MLPredictionOptions())
}
/**
Make a prediction using the structured interface
- parameters:
- input: the input to the prediction as all_MiniLM_L6_v2Input
- options: prediction options
- throws: an NSError object that describes the problem
- returns: the result of the prediction as all_MiniLM_L6_v2Output
*/
func prediction(input: all_MiniLM_L6_v2Input, options: MLPredictionOptions) throws -> all_MiniLM_L6_v2Output {
let outFeatures = try model.prediction(from: input, options:options)
return all_MiniLM_L6_v2Output(features: outFeatures)
}
/**
Make an asynchronous prediction using the structured interface
- parameters:
- input: the input to the prediction as all_MiniLM_L6_v2Input
- options: prediction options
- throws: an NSError object that describes the problem
- returns: the result of the prediction as all_MiniLM_L6_v2Output
*/
@available(macOS 14.0, iOS 17.0, tvOS 17.0, watchOS 10.0, *)
func prediction(input: all_MiniLM_L6_v2Input, options: MLPredictionOptions = MLPredictionOptions()) async throws -> all_MiniLM_L6_v2Output {
let outFeatures = try await model.prediction(from: input, options:options)
return all_MiniLM_L6_v2Output(features: outFeatures)
}
/**
Make a prediction using the convenience interface
- parameters:
- input_ids as 1 by 512 matrix of floats
- attention_mask as 1 by 512 matrix of floats
- throws: an NSError object that describes the problem
- returns: the result of the prediction as all_MiniLM_L6_v2Output
*/
func prediction(input_ids: MLMultiArray, attention_mask: MLMultiArray) throws -> all_MiniLM_L6_v2Output {
let input_ = all_MiniLM_L6_v2Input(input_ids: input_ids, attention_mask: attention_mask)
return try self.prediction(input: input_)
}
/**
Make a prediction using the convenience interface
- parameters:
- input_ids as 1 by 512 matrix of floats
- attention_mask as 1 by 512 matrix of floats
- throws: an NSError object that describes the problem
- returns: the result of the prediction as all_MiniLM_L6_v2Output
*/
func prediction(input_ids: MLShapedArray<Float>, attention_mask: MLShapedArray<Float>) throws -> all_MiniLM_L6_v2Output {
let input_ = all_MiniLM_L6_v2Input(input_ids: input_ids, attention_mask: attention_mask)
return try self.prediction(input: input_)
}
/**
Make a batch prediction using the structured interface
- parameters:
- inputs: the inputs to the prediction as [all_MiniLM_L6_v2Input]
- options: prediction options
- throws: an NSError object that describes the problem
- returns: the result of the prediction as [all_MiniLM_L6_v2Output]
*/
func predictions(inputs: [all_MiniLM_L6_v2Input], options: MLPredictionOptions = MLPredictionOptions()) throws -> [all_MiniLM_L6_v2Output] {
let batchIn = MLArrayBatchProvider(array: inputs)
let batchOut = try model.predictions(from: batchIn, options: options)
var results : [all_MiniLM_L6_v2Output] = []
results.reserveCapacity(inputs.count)
for i in 0..<batchOut.count {
let outProvider = batchOut.features(at: i)
let result = all_MiniLM_L6_v2Output(features: outProvider)
results.append(result)
}
return results
}
}
import CoreML
/// Model Prediction Input Type
@available(macOS 12.0, iOS 15.0, tvOS 15.0, watchOS 8.0, *)
class GenericLLMModelInput : MLFeatureProvider {
var input_ids: MLMultiArray
var attention_mask: MLMultiArray
var featureNames: Set<String> {
get {
return ["input_ids", "attention_mask"]
}
}
func featureValue(for featureName: String) -> MLFeatureValue? {
if (featureName == "input_ids") {
return MLFeatureValue(multiArray: input_ids)
}
if (featureName == "attention_mask") {
return MLFeatureValue(multiArray: attention_mask)
}
return nil
}
init(input_ids: MLMultiArray, attention_mask: MLMultiArray) {
self.input_ids = input_ids
self.attention_mask = attention_mask
}
convenience init(input_ids: MLShapedArray<Float>, attention_mask: MLShapedArray<Float>) {
self.init(input_ids: MLMultiArray(input_ids), attention_mask: MLMultiArray(attention_mask))
}
}
/// Model Prediction Output Type
@available(macOS 12.0, iOS 15.0, tvOS 15.0, watchOS 8.0, *)
class GenericLLMModelOutput : MLFeatureProvider {
private let provider : MLFeatureProvider
var embeddings: MLMultiArray {
return self.provider.featureValue(for: "embeddings")!.multiArrayValue!
}
var embeddingsShapedArray: MLShapedArray<Float> {
return MLShapedArray<Float>(self.embeddings)
}
var featureNames: Set<String> {
return self.provider.featureNames
}
func featureValue(for featureName: String) -> MLFeatureValue? {
return self.provider.featureValue(for: featureName)
}
init(embeddings: MLMultiArray) {
self.provider = try! MLDictionaryFeatureProvider(dictionary: ["embeddings" : MLFeatureValue(multiArray: embeddings)])
}
init(features: MLFeatureProvider) {
self.provider = features
}
}
/// Class for model loading and prediction
@available(macOS 12.0, iOS 15.0, tvOS 15.0, watchOS 8.0, *)
class GenericLLMModel {
let model: MLModel
let model_name: String
let input_size: Int
let output_size: Int
init(model: MLModel, model_name: String, input_size: Int, output_size: Int) {
self.model = model
self.model_name = model_name
self.input_size = input_size
self.output_size = output_size
}
convenience init(contentsOf modelURL: URL, model_name: String, input_size: Int, output_size: Int) throws {
try self.init(model: MLModel(contentsOf: modelURL), model_name: model_name, input_size: input_size, output_size: output_size)
}
convenience init(contentsOf modelURL: URL, configuration: MLModelConfiguration, model_name: String, input_size: Int, output_size: Int) throws {
try self.init(model: MLModel(contentsOf: modelURL, configuration: configuration), model_name: model_name, input_size: input_size, output_size: output_size)
}
class func load(contentsOf modelURL: URL, model_name: String, input_size: Int, output_size: Int, configuration: MLModelConfiguration = MLModelConfiguration()) async throws -> GenericLLMModel {
let model = try await MLModel.load(contentsOf: modelURL, configuration: configuration)
return GenericLLMModel(model: model, model_name: model_name, input_size: input_size, output_size: output_size)
}
func prediction(input: GenericLLMModelInput) throws -> GenericLLMModelOutput {
return try self.prediction(input: input, options: MLPredictionOptions())
}
func prediction(input: GenericLLMModelInput, options: MLPredictionOptions) throws -> GenericLLMModelOutput {
let outFeatures = try model.prediction(from: input, options:options)
return GenericLLMModelOutput(features: outFeatures)
}
@available(macOS 14.0, iOS 17.0, tvOS 17.0, watchOS 10.0, *)
func prediction(input: GenericLLMModelInput, options: MLPredictionOptions = MLPredictionOptions()) async throws -> GenericLLMModelOutput {
let outFeatures = try await model.prediction(from: input, options:options)
return GenericLLMModelOutput(features: outFeatures)
}
func prediction(input_ids: MLMultiArray, attention_mask: MLMultiArray) throws -> GenericLLMModelOutput {
let input_ = GenericLLMModelInput(input_ids: input_ids, attention_mask: attention_mask)
return try self.prediction(input: input_)
}
func prediction(input_ids: MLShapedArray<Float>, attention_mask: MLShapedArray<Float>) throws -> GenericLLMModelOutput {
let input_ = GenericLLMModelInput(input_ids: input_ids, attention_mask: attention_mask)
return try self.prediction(input: input_)
}
func predictions(inputs: [GenericLLMModelInput], options: MLPredictionOptions = MLPredictionOptions()) throws -> [GenericLLMModelOutput] {
for input in inputs {
assert(input.input_ids.count == self.input_size)
}
let batchIn = MLArrayBatchProvider(array: inputs)
let batchOut = try model.predictions(from: batchIn, options: options)
var results : [GenericLLMModelOutput] = []
results.reserveCapacity(inputs.count)
for i in 0..<batchOut.count {
let outProvider = batchOut.features(at: i)
let result = GenericLLMModelOutput(features: outProvider)
results.append(result)
}
return results
}
}
......@@ -35,7 +35,6 @@ final class BERT_test: XCTestCase {
var embedding_dim: Int = 384
var model = MiniLMEmbeddings()
query_embedding = await model.encode(sentence: query[0])!
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment