diff --git a/Sources/SwiftNLP/2. Encoding/ContextFreeEncoder + File IO .swift b/Sources/SwiftNLP/2. Encoding/ContextFreeEncoder + File IO .swift index 58ea84c60f478be12b2946549faa937dea18d1fa..7113b9ce97e2a10180301dff288cd4e38de1da6f 100644 --- a/Sources/SwiftNLP/2. Encoding/ContextFreeEncoder + File IO .swift +++ b/Sources/SwiftNLP/2. Encoding/ContextFreeEncoder + File IO .swift @@ -168,6 +168,21 @@ extension ContextFreeEncoder { var hnsw = DeterministicSampleVectorIndex<[Scalar]>() +// while index < data.count { +// if let stringRange = data[index...].range(of: "\0".data(using: .utf8)!) { +// let keyData = data[index..<stringRange.lowerBound] +// if let key = String(data: keyData, encoding: .utf8) { +// index = stringRange.upperBound +// +// let valuesData = data[index..<(index + 50 * MemoryLayout<Scalar>.size)] +// let values = valuesData.withUnsafeBytes { Array($0.bindMemory(to: Scalar.self)) } +// hnsw.insert(values) +// } +// } else { +// break +// } +// } + for _ in 0..<count { let vectorData = data[index..<(index + 50 * MemoryLayout<Scalar>.size)] let vector = vectorData.withUnsafeBytes { Array($0.bindMemory(to: Scalar.self)) } @@ -186,6 +201,18 @@ extension ContextFreeEncoder { do { let content = try String(contentsOf: url, encoding: .utf8) let lines = content.split(separator: "\n") + +// var data = DeterministicSampleVectorIndex<[Scalar]>() +// +// for line in lines.dropFirst() { +// let tokens = line.split(separator: " ") +// let word = String(tokens[0]) +// let vector = tokens.dropFirst().compactMap { Scalar(Double($0)!) } +// data.insert(vector) +// } +// +// return data + var hnsw = DeterministicSampleVectorIndex<[Scalar]>() for line in lines { let vector = line.split(separator: " ").compactMap { Scalar(Double($0)!) } diff --git a/Tests/SwiftNLPTests/HNSWPipelineTest.swift b/Tests/SwiftNLPTests/HNSWPipelineTest.swift index 4c6d810f2b09db17b17fcbc89385f6c02c8234c1..a089bbcb260d0f83246b5a20d243253deecf523e 100644 --- a/Tests/SwiftNLPTests/HNSWPipelineTest.swift +++ b/Tests/SwiftNLPTests/HNSWPipelineTest.swift @@ -5,6 +5,7 @@ //@testable import SwiftNLP // //// MARK: See AllMiniLM_pipelineTest.swift +////TODO: Find where TestUtils comes from to fix // //final class HNSWPipelineTest: XCTestCase { //