From dc84a3d88e7f0a6bb888c39b4e42a60c21f57769 Mon Sep 17 00:00:00 2001
From: Mingchung Xia <mingchung.xia@gmail.com>
Date: Thu, 8 Feb 2024 11:46:49 -0500
Subject: [PATCH] Added commented alternatives for hnsw io

---
 .../ContextFreeEncoder + File IO .swift       | 27 +++++++++++++++++++
 Tests/SwiftNLPTests/HNSWPipelineTest.swift    |  1 +
 2 files changed, 28 insertions(+)

diff --git a/Sources/SwiftNLP/2. Encoding/ContextFreeEncoder + File IO .swift b/Sources/SwiftNLP/2. Encoding/ContextFreeEncoder + File IO .swift
index 58ea84c6..7113b9ce 100644
--- a/Sources/SwiftNLP/2. Encoding/ContextFreeEncoder + File IO .swift	
+++ b/Sources/SwiftNLP/2. Encoding/ContextFreeEncoder + File IO .swift	
@@ -168,6 +168,21 @@ extension ContextFreeEncoder {
             
             var hnsw = DeterministicSampleVectorIndex<[Scalar]>()
             
+//            while index < data.count {
+//                if let stringRange = data[index...].range(of: "\0".data(using: .utf8)!) {
+//                    let keyData = data[index..<stringRange.lowerBound]
+//                    if let key = String(data: keyData, encoding: .utf8) {
+//                        index = stringRange.upperBound
+//                        
+//                        let valuesData = data[index..<(index + 50 * MemoryLayout<Scalar>.size)]
+//                        let values = valuesData.withUnsafeBytes { Array($0.bindMemory(to: Scalar.self)) }
+//                        hnsw.insert(values)
+//                    }
+//                } else {
+//                    break
+//                }
+//            }
+            
             for _ in 0..<count {
                 let vectorData = data[index..<(index + 50 * MemoryLayout<Scalar>.size)]
                 let vector = vectorData.withUnsafeBytes { Array($0.bindMemory(to: Scalar.self)) }
@@ -186,6 +201,18 @@ extension ContextFreeEncoder {
         do {
             let content = try String(contentsOf: url, encoding: .utf8)
             let lines = content.split(separator: "\n")
+            
+//            var data = DeterministicSampleVectorIndex<[Scalar]>()
+//            
+//            for line in lines.dropFirst() {
+//                let tokens = line.split(separator: " ")
+//                let word = String(tokens[0])
+//                let vector = tokens.dropFirst().compactMap { Scalar(Double($0)!) }
+//                data.insert(vector)
+//            }
+//            
+//            return data
+            
             var hnsw = DeterministicSampleVectorIndex<[Scalar]>()
             for line in lines {
                 let vector = line.split(separator: " ").compactMap { Scalar(Double($0)!) }
diff --git a/Tests/SwiftNLPTests/HNSWPipelineTest.swift b/Tests/SwiftNLPTests/HNSWPipelineTest.swift
index 4c6d810f..a089bbcb 100644
--- a/Tests/SwiftNLPTests/HNSWPipelineTest.swift
+++ b/Tests/SwiftNLPTests/HNSWPipelineTest.swift
@@ -5,6 +5,7 @@
 //@testable import SwiftNLP
 //
 //// MARK: See AllMiniLM_pipelineTest.swift
+////TODO: Find where TestUtils comes from to fix
 //
 //final class HNSWPipelineTest: XCTestCase {
 //
-- 
GitLab