diff --git a/.swiftpm/xcode/xcshareddata/xcschemes/SwiftNLP.xcscheme b/.swiftpm/xcode/xcshareddata/xcschemes/SwiftNLP.xcscheme
index e4605cf5d7cf8d17b5f78adc4664e832fb2c1b59..068446f9609b7359d88754ccaa6fa15f0b4010ce 100644
--- a/.swiftpm/xcode/xcshareddata/xcschemes/SwiftNLP.xcscheme
+++ b/.swiftpm/xcode/xcshareddata/xcschemes/SwiftNLP.xcscheme
@@ -79,7 +79,7 @@
       </Testables>
    </TestAction>
    <LaunchAction
-      buildConfiguration = "Debug"
+      buildConfiguration = "Release"
       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
       launchStyle = "0"
diff --git a/Package.swift b/Package.swift
index be0eca8183d03a6856f4c9eb39eeae13e9f57bbc..9a079182aa7c2d6b97a0f44926ca64c3a85e008d 100644
--- a/Package.swift
+++ b/Package.swift
@@ -12,7 +12,13 @@ let package = Package(
     products: [
         .library(
             name: "SwiftNLP",
-            targets: ["SwiftNLP"]),
+            targets: ["SwiftNLP"]
+        ),
+        /// This is commented out to fix the gitlab pipeline, but must be uncommented when in use on macOS only.
+//        .executable(
+//            name: "SwiftNLPVisualizer",
+//            targets: ["SwiftNLPVisualizer"]
+//        ),
     ],
     dependencies: [
         .package(url: "https://github.com/jbadger3/SwiftAnnoy", .upToNextMajor(from: "1.0.1")),
@@ -36,8 +42,11 @@ let package = Package(
                 "SwiftNLPGenericLLMMacros",
                 .product(name: "HNSWAlgorithm", package: "similarity-topology"),
                 .product(name: "HNSWEphemeral", package: "similarity-topology"),
+                .product(name: "HNSWDurable", package: "similarity-topology", condition: .when(platforms: [.macOS])),
+                .product(name: "HNSWSample", package: "similarity-topology", condition: .when(platforms: [.macOS])),
+//                .product(name: "Nifty", package: "Nifty"),
                 .product(name: "ZSTD", package: "Elva"),
-                
+                .byName(name: "Surge", condition: .when(platforms: [.macOS])),
             ],
             resources: [
                 .process("Resources/bert_vocab.txt"),
diff --git a/README.md b/README.md
index 73eecd91f42c150a2b62b594f0736efad77579fc..6669d853255977a5b09555f336eea277ebac1058 100644
--- a/README.md
+++ b/README.md
@@ -48,7 +48,7 @@ print(topicModel)
         - [ ] Linux via PythonKit?
 
 - *Topic Modelling*
-    - [ ] Linear Algebra (e.g., [Nifty](https://github.com/philipce/nifty), [Surge](https://github.com/Jounce/Surge))
+    - [X] Linear Algebra (e.g., [Nifty](https://github.com/philipce/nifty), [Surge](https://github.com/Jounce/Surge))
     - [ ] Dimensionality Reduction (e.g., [t-SNE](https://github.com/emannuelOC/swift-tsne), [UMAP](https://github.com/LTLA/umappp))
     - [ ] Clustering (e.g., K-Means, HDBSCAN)
     - [ ] Topic models
@@ -62,6 +62,7 @@ This project is developed by a team of researchers from the [Human-Computer Inte
  - Peter Li
  - Adrian Davila
  - Henry Tian
+ - Mingchung Xia
 
 If you would like to contribute to the project, [contact Prof. Wallace](mailto:james.wallace@uwaterloo.ca) with "SwiftNLP" in the subject line, and mention one or more of the roadmap items above that you would like to work on. 
 
diff --git a/Sources/SwiftNLP/0. SNLP Internal Protocols/SNLPCorpus.swift b/Sources/SwiftNLP/0. SNLP Internal Protocols/SNLPCorpus.swift
index 51446ebaf85d1b290a28300a3856e5e94cd49931..4cd643a7e75f6d74ee8d9ab10173dcd61684a99c 100644
--- a/Sources/SwiftNLP/0. SNLP Internal Protocols/SNLPCorpus.swift	
+++ b/Sources/SwiftNLP/0. SNLP Internal Protocols/SNLPCorpus.swift	
@@ -23,26 +23,58 @@
 
 import Foundation
 
-protocol SNLPCorpus: Collection {
+protocol SNLPCorpus<Item> {
     
-    associatedtype Scalar: BinaryFloatingPoint & Codable
+    associatedtype Item: SNLPDataItem        
+    associatedtype Encoder: SNLPEncoder
+        
+    associatedtype DocumentStorage: RandomAccessCollection & RangeReplaceableCollection where DocumentStorage.Element == Item
+    associatedtype EmbeddingStorage: RandomAccessCollection  & RangeReplaceableCollection where EmbeddingStorage.Element == [Encoder.Scalar]
+        
+    var documents: DocumentStorage { get set }
+    var encodedDocuments: EmbeddingStorage { get set }
     
-    var zeroes: [Scalar] { get }
-    var count: Int { get }
+    var documentEncoder: Encoder { get }
     
-    mutating func addUntokenizedDocument(_ document: String)
-    mutating func addUntokenizedDocuments(_ documents: [String])
+    var zeroes: [Encoder.Scalar] { get }
+    var dimensions: UInt { get }
+    var count: UInt { get }
+    
+    mutating func addUntokenizedDocument(_ document: Item)
+    mutating func addUntokenizedDocuments(_ documents: [Item])
+    
+    func searchFor(_ query: String) -> [Item]
 }
 
+
+
 extension SNLPCorpus {
+       
+    
+    var zeroes: [Encoder.Scalar] { documentEncoder.zeroes }
+    var dimensions: UInt { documentEncoder.dimensions }
+    
+    var count: UInt { UInt(documents.count) }
     
     /**
         Adds a series of untokenized documents to the corpus, using default tokenization and text processing
      */
-    @inlinable
-    mutating func addUntokenizedDocuments(_ documents: [String]) {
+    mutating func addUntokenizedDocuments(_ documents: [Item]) {
         for d in documents {
             addUntokenizedDocument(d)
         }
+        
+        assert (documents.count == encodedDocuments.count)
+    }
+    
+    /**
+        Adds an untokenized document to the corpus, using default tokenization and text processing
+     */
+    mutating func addUntokenizedDocument(_ document: Item) {
+        documents.append(document)
+        encodedDocuments.append(documentEncoder.encodeSentence(document.fullText))
+        
+        assert( documents.count == encodedDocuments.count )
     }
+        
 }
diff --git a/Sources/SwiftNLP/0. SNLP Internal Protocols/SNLPEncoder.swift b/Sources/SwiftNLP/0. SNLP Internal Protocols/SNLPEncoder.swift
index c00df8edc5e3b37104005d54eae8687acb5eb392..386eaec6a7db3e2649ca0d8d990aa827705c8075 100644
--- a/Sources/SwiftNLP/0. SNLP Internal Protocols/SNLPEncoder.swift	
+++ b/Sources/SwiftNLP/0. SNLP Internal Protocols/SNLPEncoder.swift	
@@ -24,11 +24,14 @@
 import Foundation
 
 
-protocol SNLPEncoder: Codable {
+protocol SNLPEncoder<Scalar> {
     
-    associatedtype Scalar: BinaryFloatingPoint & Codable
+    associatedtype Scalar: BinaryFloatingPoint
             
     var zeroes: [Scalar] { get }
+    var dimensions: UInt { get }
+    
+    init()
     
     @inlinable
     func encodeToken(_ token: String) -> [Scalar]
diff --git a/Sources/SwiftNLP/1. Data Collection/DurableHNSWCorpus + Dictionary.swift b/Sources/SwiftNLP/1. Data Collection/DurableHNSWCorpus + Dictionary.swift
new file mode 100644
index 0000000000000000000000000000000000000000..78c77324a197d25f694ebc78ee8fda17c5cf7f2f
--- /dev/null
+++ b/Sources/SwiftNLP/1. Data Collection/DurableHNSWCorpus + Dictionary.swift	
@@ -0,0 +1,74 @@
+// Copyright (c) 2024 Jim Wallace
+//
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+//
+// Created by Mingchung Xia on 2024-02-26.
+//
+
+#if os(macOS)
+
+import Foundation
+
+extension DurableHNSWCorpus {
+    /// This extension is used for the dictionary operations
+    public struct DocumentVectorPair {
+        var untokenizedDocument: String
+        var vector: [Encoder.Scalar]
+        
+        init(untokenizedDocument: String, vector: [Encoder.Scalar]) {
+            self.untokenizedDocument = untokenizedDocument
+            self.vector = vector
+        }
+    }
+    
+    @inlinable
+    func getUntokenizedDocument(at key: Int) -> String {
+        if let pair = dictionary[key] {
+            return pair.untokenizedDocument
+        } else {
+            fatalError("Key \(key) not found in HNSW dictionary")
+        }
+    }
+    
+    @inlinable
+    func getVector(at key: Int) -> [Encoder.Scalar] {
+        if let pair = dictionary[key] {
+            return pair.vector
+        } else {
+            fatalError("Key \(key) not found in HNSW dictionary")
+        }
+    }
+    
+    @inlinable
+    func getDictionary() -> [Int: DocumentVectorPair] {
+        return dictionary
+    }
+    
+    func addDocumentVectorPair(at key: Int, document: String, vector: [Encoder.Scalar]) {
+        dictionary[key] = DocumentVectorPair(
+            untokenizedDocument: document,
+            vector: vector
+        )
+    }
+}
+
+#endif
diff --git a/Sources/SwiftNLP/1. Data Collection/DurableHNSWCorpus + File IO.swift b/Sources/SwiftNLP/1. Data Collection/DurableHNSWCorpus + File IO.swift
new file mode 100644
index 0000000000000000000000000000000000000000..ec7c64d389ae089954f397787d68e82722b521c6
--- /dev/null
+++ b/Sources/SwiftNLP/1. Data Collection/DurableHNSWCorpus + File IO.swift	
@@ -0,0 +1,153 @@
+// Copyright (c) 2024 Jim Wallace
+//
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+//
+// Created by Mingchung Xia on 2024-03-12.
+//
+
+#if os(macOS)
+
+import Foundation
+import System
+
+// MARK: This extension for saving and loading the memory map data of untokenized documents is currently used as a workaround
+// This is because loading the memory mapped data using CoreLMDB does not load the untokenized documents (and the other fields) of a DurableHNSWCorpus so in order to write and read from disk of the original data, we need to have this workaround
+// Eventually, all this code in this extension should be moved to the HNSWCorpusDataHandler after a general wrapper class for DurableHNSW and EmphemeralHNSW is made
+
+extension DurableHNSWCorpus {
+    /// Saves untokenized documents in dictionary to disk to Downloads directory
+    func saveDictionaryToDownloads(fileName: String) {
+        guard let downloadsURL = FileManager.default.urls(for: .downloadsDirectory, in: .userDomainMask).first else {
+            print("Could not find Downloads directory")
+            return
+        }
+        
+        let fileURL = downloadsURL.appendingPathComponent(fileName)
+        
+        saveDictionaryMemoryMap(url: fileURL)
+    }
+    
+    func saveDictionaryMemoryMap(url: URL) {
+        let fileManager = FileManager.default
+        if !fileManager.fileExists(atPath: url.path) {
+            fileManager.createFile(atPath: url.path, contents: nil, attributes: nil)
+        }
+        
+        do {
+            let fileHandle = try FileHandle(forWritingTo: url)
+            
+            let count = dictionary.count
+            let countData = withUnsafeBytes(of: count) { Data($0) }
+            fileHandle.write(countData)
+            
+            for (key, value) in dictionary {
+                let keyData = withUnsafeBytes(of: key) { Data($0) }
+                fileHandle.write(keyData)
+                
+                // Convert the untokenizedDocument (String) to Data
+                let documentData = value.untokenizedDocument.data(using: .utf8) ?? Data()
+                // Prefix the document data with its length to know how much to read when loading
+                let documentLengthData = withUnsafeBytes(of: documentData.count) { Data($0) }
+                fileHandle.write(documentLengthData)
+                fileHandle.write(documentData)
+                
+                // Convert the vector ([Scalar]) to Data
+                let vectorData = value.vector.withUnsafeBytes { Data($0) }
+                // Prefix the vector data with its length to know how much to read when loading
+                let vectorLengthData = withUnsafeBytes(of: value.vector.count) { Data($0) }
+                fileHandle.write(vectorLengthData)
+                fileHandle.write(vectorData)
+            }
+            
+            fileHandle.closeFile()
+        } catch {
+            print("Error writing dictionary to file: \(error)")
+        }
+    }
+    
+    static func readDictionaryFromDownloads(fileName: String, dimensions: Int = 50) -> HNSWDictionary {
+        guard let downloadsURL = FileManager.default.urls(for: .downloadsDirectory, in: .userDomainMask).first else {
+            print("Could not find Downloads directory")
+            return [:]
+        }
+        
+        let fileURL = downloadsURL.appendingPathComponent(fileName)
+        
+        return readDictionaryMemoryMap(fileURL, dimensions: dimensions)
+    }
+    
+    /// Width is the number of dimensions of the glove encoding
+    // TODO: Improve this to not need to take in a width, rather switch between the encoding / encoder
+    static func readDictionaryMemoryMap(_ url: URL, dimensions: Int = 50) -> HNSWDictionary {
+        var dictionary = HNSWDictionary()
+
+        do {
+            let data = try Data(contentsOf: url)
+            var index = 0
+
+            // Safely read the dictionary count
+            let countData = data.subdata(in: index..<index+MemoryLayout<Int>.size)
+            let count = countData.withUnsafeBytes { $0.load(as: Int.self) }
+            index += MemoryLayout<Int>.size
+
+            for _ in 0..<count {
+                // Safely read the key
+                let keyData = data.subdata(in: index..<index+MemoryLayout<Int>.size)
+                let key = keyData.withUnsafeBytes { $0.load(as: Int.self) }
+                index += MemoryLayout<Int>.size
+
+                // Read the document length and document
+                let documentLengthData = data.subdata(in: index..<index+MemoryLayout<Int>.size)
+                let documentLength = documentLengthData.withUnsafeBytes { $0.load(as: Int.self) }
+                index += MemoryLayout<Int>.size
+                let documentData = data.subdata(in: index..<index+documentLength)
+                guard let document = String(data: documentData, encoding: .utf8) else {
+                    print("Failed to decode string")
+                    continue // Skip this entry on failure
+                }
+                index += documentLength
+
+                // Read the vector
+                let vectorLengthData = data.subdata(in: index..<index+MemoryLayout<Int>.size)
+                let vectorLength = vectorLengthData.withUnsafeBytes { $0.load(as: Int.self) }
+                index += MemoryLayout<Int>.size
+
+                var vector = [Encoder.Scalar]()
+                for _ in 0..<vectorLength {
+                    let scalarData = data.subdata(in: index..<index+MemoryLayout<Encoder.Scalar>.size)
+                    let scalar = scalarData.withUnsafeBytes { $0.load(as: Encoder.Scalar.self) }
+                    vector.append(scalar)
+                    index += MemoryLayout<Encoder.Scalar>.size
+                }
+                
+                // Add the key-value pair to the dictionary
+                dictionary[key] = DocumentVectorPair(untokenizedDocument: document, vector: vector)
+            }
+        } catch {
+            print("Error reading dictionary from file: \(error)")
+        }
+
+        return dictionary
+    }
+}
+
+#endif
diff --git a/Sources/SwiftNLP/1. Data Collection/HNSWCorpus + Sequence.swift b/Sources/SwiftNLP/1. Data Collection/DurableHNSWCorpus + Sequence.swift
similarity index 59%
rename from Sources/SwiftNLP/1. Data Collection/HNSWCorpus + Sequence.swift
rename to Sources/SwiftNLP/1. Data Collection/DurableHNSWCorpus + Sequence.swift
index d4a88a33b350d5e42087ffa57258f0f9d5d2d86a..072b2b1d32ab05b38819f8905a9901cd2ac399d6 100644
--- a/Sources/SwiftNLP/1. Data Collection/HNSWCorpus + Sequence.swift	
+++ b/Sources/SwiftNLP/1. Data Collection/DurableHNSWCorpus + Sequence.swift	
@@ -20,37 +20,50 @@
 // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 // OTHER DEALINGS IN THE SOFTWARE.
+//
+// Created by Mingchung Xia on 2024-03-16.
+//
 
-extension HNSWCorpus: Sequence {
-        
-    typealias Element = [Scalar]
-        
-    
+#if os(macOS)
+
+import Foundation
+
+/// HNSWCorpus iterates through its dictionary of key to document vector pairs
+ 
+extension DurableHNSWCorpus: Sequence, Collection {
     // Sequence Protocol Requirements
     @inlinable
-    func makeIterator() -> Dictionary<Int, [Scalar]>.Values.Iterator {
-        return encodedDocuments.values.makeIterator()
+    func makeIterator() -> AnyIterator<DocumentVectorPair> {
+        var iterator = dictionary.values.makeIterator()
+        return AnyIterator {
+            return iterator.next()
+        }
     }
     
-    
     // Collection Protocol Requirements
     @inlinable
-    var startIndex: Dictionary<Int, [Scalar]>.Index {
-        return encodedDocuments.startIndex
+    var startIndex: Int {
+        return dictionary.keys.sorted().startIndex
     }
     
     @inlinable
-    var endIndex: Dictionary<Int, [Scalar]>.Index {
-        return encodedDocuments.endIndex
+    var endIndex: Int {
+        return dictionary.keys.sorted().endIndex
     }
     
     @inlinable
-    subscript(position: Dictionary<Int, [Scalar]>.Index) -> [Scalar] {
-        encodedDocuments.values[position]
+    subscript(position: Int) -> DocumentVectorPair {
+        let key = dictionary.keys.sorted()[position]
+        guard let pair = dictionary[key] else {
+            fatalError("Key \(key) not found in HNSW dictionary")
+        }
+        return pair
     }
     
     @inlinable
-    func index(after i: Dictionary<Int, [Scalar]>.Index) -> Dictionary<Int, [Scalar]>.Index {
-        return encodedDocuments.index(after: i)
+    func index(after i: Int) -> Int {
+        return dictionary.keys.sorted().index(after: i)
     }
 }
+
+#endif
diff --git a/Sources/SwiftNLP/1. Data Collection/DurableHNSWCorpus.swift b/Sources/SwiftNLP/1. Data Collection/DurableHNSWCorpus.swift
new file mode 100644
index 0000000000000000000000000000000000000000..00f49fdcf861e5debe31b564860376781fe3da5f
--- /dev/null
+++ b/Sources/SwiftNLP/1. Data Collection/DurableHNSWCorpus.swift	
@@ -0,0 +1,120 @@
+// Copyright (c) 2024 Jim Wallace
+//
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+//
+// Created by Mingchung Xia on 2024-02-26.
+//
+// MARK: DurableHNSWCorpus is only available on MacOS, not Linux
+
+#if os(macOS)
+
+import Foundation
+import CoreLMDB
+import CoreLMDBCoders
+
+// MARK: DurableHNSWCorpus cannot conform to SNLPCorpus under its current definition
+// This is because addingUntokenizedDocuments in a DurableHNSWCorpus requires an additional parameter (transaction) and can throw
+
+final class DurableHNSWCorpus<Item: SNLPDataItem, Encoder: SNLPEncoder>: SNLPCorpus where Encoder.Scalar: Codable & UnsafeMemoryLayoutStorableFloat {
+
+    
+
+    public typealias HNSWDictionary = [Int: DocumentVectorPair]
+    
+    
+    internal var documentEncoder: Encoder
+    internal var documents = ContiguousArray<Item>()
+    internal var encodedDocuments = ContiguousArray<[Encoder.Scalar]>()
+    
+    var index: DeterministicDurableVectorIndex<Encoder.Scalar>
+    
+    
+    private let ONE_GB: Int = 1_073_741_824
+    private let ONE_MB: Int = 1_048_576
+    private let ONE_KB: Int = 1_024
+    private let ONE_B:  Int = 1
+    private let DEFAULT_MAXREADERS: UInt32 = 126
+    private let DEFAULT_MAXDBS:     UInt32 = 10
+    
+    
+    
+    
+    // Keeps track of the original document for client code
+    var dictionary: HNSWDictionary = [:]
+
+    
+    
+    // typicalNeighbourhoodSize = 20 is a standard benchmark
+//    init(encoding: ContextFreeEncoder<Encoder.Scalar>.PreComputedEmbeddings, typicalNeighborhoodSize: Int = 20, namespace: String = "hnsw", in transaction: Transaction) throws {
+//        documentEncoder = ContextFreeEncoder<Encoder.Scalar>(source: encoding) as! Encoder
+//        
+//        index = try DeterministicDurableVectorIndex<Encoder.Scalar>(
+//            namespace: namespace,
+//            typicalNeighborhoodSize: typicalNeighborhoodSize,
+//            in: transaction
+//        )
+//    }
+    
+    init(encoder: Encoder = Encoder(), typicalNeighborhoodSize: Int = 20, namespace: String = "hnsw", in transaction: Transaction) throws {
+        documentEncoder = encoder
+        index = try DeterministicDurableVectorIndex<Encoder.Scalar>(
+            namespace: namespace,
+            typicalNeighborhoodSize: typicalNeighborhoodSize,
+            in: transaction
+        )
+    }
+    
+    
+    @inlinable
+    func addUntokenizedDocument(_ document: Item, in transaction: Transaction) throws {
+        /// forced unwrap as! [Scalar] is needed when we use SNLPEncoder but not ContextFreeEncoder
+        /// encodedDocuments.insert will insert and return the corresponding key (id)s
+        
+        documents.append(document)
+        encodedDocuments.append(documentEncoder.encodeSentence(document.fullText))
+        
+        assert( documents.count == encodedDocuments.count )
+        
+        let encodedVector = documentEncoder.encodeSentence(document.fullText)
+        let key = try index.insert(encodedVector, in: transaction)
+        addDocumentVectorPair(
+            at: key,
+            document: document.fullText,
+            vector: encodedVector
+        )
+    }
+    
+    func searchFor(_ query: String) -> [Item] {
+        return []
+    }
+    
+//    func searchFor(_ query: String, in transaction: Transaction) -> [Item] {
+//        let queryVector = documentEncoder.encodeToken(query)
+//        let results = try! index.find(near: queryVector, limit: 8, in: transaction)
+//        
+//        return results.map{ documents[$0.id] }
+//        return []
+//    }
+    
+}
+
+#endif
diff --git a/Sources/SwiftNLP/1. Data Collection/EphemeralHNSWCorpus + Codable.swift b/Sources/SwiftNLP/1. Data Collection/EphemeralHNSWCorpus + Codable.swift
new file mode 100644
index 0000000000000000000000000000000000000000..a7189dc6afcef3d4c7d3290bcec7f44bcb9285e0
--- /dev/null
+++ b/Sources/SwiftNLP/1. Data Collection/EphemeralHNSWCorpus + Codable.swift	
@@ -0,0 +1,44 @@
+//// Copyright (c) 2024 Jim Wallace
+////
+//// Permission is hereby granted, free of charge, to any person
+//// obtaining a copy of this software and associated documentation
+//// files (the "Software"), to deal in the Software without
+//// restriction, including without limitation the rights to use,
+//// copy, modify, merge, publish, distribute, sublicense, and/or sell
+//// copies of the Software, and to permit persons to whom the
+//// Software is furnished to do so, subject to the following
+//// conditions:
+////
+//// The above copyright notice and this permission notice shall be
+//// included in all copies or substantial portions of the Software.
+////
+//// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+//// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+//// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+//// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+//// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+//// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+//// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+//// OTHER DEALINGS IN THE SOFTWARE.
+////
+//// Created by Mingchung Xia on 2024-02-07.
+////
+//
+//import Foundation
+//
+//// MARK: Decodable conformance is in HNSWCorpus
+//
+//extension EphemeralHNSWCorpus: Codable {
+//    enum CodingKeys: String, CodingKey {
+//        case _documentEncoder
+//        case encodedDocuments
+//        case dictionary
+//    }
+//    
+//    func encode(to encoder: Encoder) throws {
+//        var container = encoder.container(keyedBy: CodingKeys.self)
+//        try container.encode(_documentEncoder, forKey: ._documentEncoder)
+//        try container.encode(encodedDocuments, forKey: .encodedDocuments)
+//        try container.encode(dictionary, forKey: .dictionary)
+//    }
+//}
diff --git a/Sources/SwiftNLP/1. Data Collection/EphemeralHNSWCorpus + Dictionary.swift b/Sources/SwiftNLP/1. Data Collection/EphemeralHNSWCorpus + Dictionary.swift
new file mode 100644
index 0000000000000000000000000000000000000000..a0a9c0cd10b92b7cafd45be8c871eb951ab98a8f
--- /dev/null
+++ b/Sources/SwiftNLP/1. Data Collection/EphemeralHNSWCorpus + Dictionary.swift	
@@ -0,0 +1,89 @@
+// Copyright (c) 2024 Jim Wallace
+//
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+//
+// Created by Mingchung Xia on 2024-02-14.
+//
+
+import Foundation
+
+extension EphemeralHNSWCorpus {
+    /// This extension is used for the dictionary operations
+    public struct DocumentVectorPair {
+        var untokenizedDocument: String
+        var vector: [Encoder.Scalar]
+        
+        init(untokenizedDocument: String, vector: [Encoder.Scalar]) {
+            self.untokenizedDocument = untokenizedDocument
+            self.vector = vector
+        }
+    }
+    
+    @inlinable
+    func getUntokenizedDocument(at key: Int) -> String {
+        if let pair = dictionary[key] {
+            return pair.untokenizedDocument
+        } else {
+            fatalError("Key \(key) not found in HNSW dictionary")
+        }
+    }
+    
+    @inlinable
+    func getVector(at key: Int) -> [Encoder.Scalar] {
+        if let pair = dictionary[key] {
+            return pair.vector
+        } else {
+            fatalError("Key \(key) not found in HNSW dictionary")
+        }
+    }
+    
+    @inlinable
+    func getDictionary() -> [Int: DocumentVectorPair] {
+        return dictionary
+    }
+    
+    func addDocumentVectorPair(at key: Int, document: String, vector: [Encoder.Scalar]) {
+        dictionary[key] = DocumentVectorPair(
+            untokenizedDocument: document,
+            vector: vector
+        )
+    }
+}
+
+extension EphemeralHNSWCorpus.DocumentVectorPair: Codable where Encoder.Scalar: Codable {
+    enum CodingKeys: String, CodingKey {
+        case untokenizedDocument
+        case vector
+    }
+    
+    internal init(from decoder: Decoder) throws {
+        let container = try decoder.container(keyedBy: CodingKeys.self)
+        untokenizedDocument = try container.decode(String.self, forKey: .untokenizedDocument)
+        vector = try container.decode([Encoder.Scalar].self, forKey: .vector)
+    }
+    
+//    internal func encode(to encoder: Encoder) throws {
+//        var container = encoder.container(keyedBy: CodingKeys.self)
+//        try container.encode(untokenizedDocument, forKey: .untokenizedDocument)
+//        try container.encode(vector, forKey: .vector)
+//    }
+}
diff --git a/Sources/SwiftNLP/1. Data Collection/DictionaryCorpus + Sequence.swift b/Sources/SwiftNLP/1. Data Collection/EphemeralHNSWCorpus + Sequence.swift
similarity index 59%
rename from Sources/SwiftNLP/1. Data Collection/DictionaryCorpus + Sequence.swift
rename to Sources/SwiftNLP/1. Data Collection/EphemeralHNSWCorpus + Sequence.swift
index c2bde96a43ad323dbb78065bc9e890c67ec3840c..e9670f71b295d37f8a5ba264b528caf80a3959d3 100644
--- a/Sources/SwiftNLP/1. Data Collection/DictionaryCorpus + Sequence.swift	
+++ b/Sources/SwiftNLP/1. Data Collection/EphemeralHNSWCorpus + Sequence.swift	
@@ -20,37 +20,47 @@
 // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 // OTHER DEALINGS IN THE SOFTWARE.
+//
+// Created by Mingchung Xia on 2024-02-14.
+//
 
-extension DictionaryCorpus: Sequence {
-        
-    typealias Element = [Scalar]
-        
-    
+import Foundation
+
+/// HNSWCorpus iterates through its dictionary of key to document vector pairs
+ 
+extension EphemeralHNSWCorpus: Sequence, Collection {
     // Sequence Protocol Requirements
     @inlinable
-    func makeIterator() -> Dictionary<Int, [Scalar]>.Values.Iterator {
-        return encodedDocuments.values.makeIterator()
+    func makeIterator() -> AnyIterator<DocumentVectorPair> {
+        var iterator = dictionary.values.makeIterator()
+        return AnyIterator {
+            return iterator.next()
+        }
     }
     
-    
     // Collection Protocol Requirements
     @inlinable
-    var startIndex: Dictionary<Int, [Scalar]>.Index {
-        return encodedDocuments.startIndex
+    var startIndex: Int {
+        return dictionary.keys.sorted().startIndex
     }
     
     @inlinable
-    var endIndex: Dictionary<Int, [Scalar]>.Index {
-        return encodedDocuments.endIndex
+    var endIndex: Int {
+        return dictionary.keys.sorted().endIndex
     }
     
     @inlinable
-    subscript(position: Dictionary<Int, [Scalar]>.Index) -> [Scalar] {
-        encodedDocuments.values[position]
+    subscript(position: Int) -> DocumentVectorPair {
+        let key = dictionary.keys.sorted()[position]
+        guard let pair = dictionary[key] else {
+            fatalError("Key \(key) not found in HNSW dictionary")
+        }
+        return pair
     }
     
     @inlinable
-    func index(after i: Dictionary<Int, [Scalar]>.Index) -> Dictionary<Int, [Scalar]>.Index {
-        return encodedDocuments.index(after: i)
+    func index(after i: Int) -> Int {
+        return dictionary.keys.sorted().index(after: i)
     }
 }
+
diff --git a/Sources/SwiftNLP/1. Data Collection/EphemeralHNSWCorpus.swift b/Sources/SwiftNLP/1. Data Collection/EphemeralHNSWCorpus.swift
new file mode 100644
index 0000000000000000000000000000000000000000..706ddb7e9286ca254b52b4898502c45a5015fa72
--- /dev/null
+++ b/Sources/SwiftNLP/1. Data Collection/EphemeralHNSWCorpus.swift	
@@ -0,0 +1,98 @@
+// Copyright (c) 2024 Jim Wallace
+//
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+//
+// The HNSW work is based on the original work of Jaden Geller
+// See the https://github.com/JadenGeller/similarity-topology.git
+// for reference. The code is used with permission from the author
+// under the MIT License.
+//
+// Created by Mingchung Xia on 2024-02-14.
+//
+
+import Foundation
+
+// MARK: Allow EphemeralHNSWCorpus to simply be used as HNSWCorpus
+typealias HNSWCorpus = EphemeralHNSWCorpus
+
+final class EphemeralHNSWCorpus<Item: SNLPDataItem,Encoder: SNLPEncoder>: SNLPCorpus where Encoder.Scalar: Codable {
+        
+
+    public typealias HNSWDictionary = [Int: DocumentVectorPair]
+    
+    
+    internal var documentEncoder: Encoder
+    internal var documents = ContiguousArray<Item>()
+    internal var encodedDocuments = ContiguousArray<[Encoder.Scalar]>()
+    
+    var index: DeterministicEphemeralVectorIndex<[Encoder.Scalar]>
+    
+    
+    // Keeps track of the original document for client code
+    var dictionary: HNSWDictionary = [:]
+
+    // typicalNeighbourhoodSize = 20 is a standard benchmark
+//    init(encoding: ContextFreeEncoder<Encoder.Scalar>.PreComputedEmbeddings,
+//         typicalNeighborhoodSize: Int = 20) {
+//        documentEncoder = ContextFreeEncoder(source: encoding) as! Encoder
+//        index = DeterministicEphemeralVectorIndex<[Encoder.Scalar]>(typicalNeighborhoodSize: typicalNeighborhoodSize)
+//    }
+    
+    init(encoder: Encoder = Encoder(), typicalNeighborhoodSize: Int = 20) {
+        documentEncoder = encoder
+        index = DeterministicEphemeralVectorIndex<[Encoder.Scalar]>(typicalNeighborhoodSize: typicalNeighborhoodSize)
+    }
+    
+//     // Decodable conformance
+//    required init(from decoder: Decoder) throws {
+//        let container = try decoder.container(keyedBy: CodingKeys.self)
+//        _documentEncoder = try container.decode(ContextFreeEncoder<Scalar>.self, forKey: ._documentEncoder)
+//        encodedDocuments = try container.decode(DeterministicEphemeralVectorIndex<[Scalar]>.self, forKey: .encodedDocuments)
+//        dictionary = try container.decode(HNSWDictionary.self, forKey: .dictionary)
+//    }
+    
+    @inlinable
+    func addUntokenizedDocument(_ document: Item) {
+        /// forced unwrap as! [Scalar] is needed when we use SNLPEncoder but not ContextFreeEncoder
+        /// encodedDocuments.insert will insert and return the corresponding key (id)
+        
+        documents.append(document)
+        encodedDocuments.append(documentEncoder.encodeSentence(document.fullText))
+        
+        assert( documents.count == encodedDocuments.count )
+        
+        let key = index.insert(documentEncoder.encodeSentence(document.fullText))
+        addDocumentVectorPair(
+            at: key,
+            document: document.fullText,
+            vector: index.base.vectors[key]
+        )
+    }
+    
+    func searchFor(_ query: String) -> [Item] {
+        
+        let queryVector = documentEncoder.encodeToken(query)
+        let results = try! index.find(near: queryVector, limit: 8)
+        
+        return results.map{ documents[$0.id] }
+    }
+}
diff --git a/Sources/SwiftNLP/1. Data Collection/HNSW/DeterministicDurableVectorIndex.swift b/Sources/SwiftNLP/1. Data Collection/HNSW/DeterministicDurableVectorIndex.swift
new file mode 100644
index 0000000000000000000000000000000000000000..2fc91aea485eb00a1a91541df6604177a35f4c92
--- /dev/null
+++ b/Sources/SwiftNLP/1. Data Collection/HNSW/DeterministicDurableVectorIndex.swift	
@@ -0,0 +1,86 @@
+// Copyright (c) 2024 Jim Wallace
+//
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+//
+// Created by Mingchung Xia on 2024-02-26.
+//
+
+#if os(macOS)
+
+import Foundation
+import PriorityHeapModule
+import PriorityHeapAlgorithms
+import SimilarityMetric
+import HNSWAlgorithm
+import HNSWDurable
+import CoreLMDB
+import CoreLMDBCoders
+
+// MARK: This uses the persistent DurableVectorIndex
+
+extension DurableVectorIndex {
+    public typealias Neighbor = NearbyVector<DurableVectorIndex.Accessor.CompoundKey, Metric.Vector, Metric.Similarity>
+}
+
+public struct DeterministicDurableVectorIndex<VectorComponent: UnsafeMemoryLayoutStorableFloat> where VectorComponent: Codable {
+    public typealias Vector = [VectorComponent]
+    public typealias Index = DurableVectorIndex<CartesianDistanceMetric<Vector>, Vector.Element>
+    public var base: Index
+    public var typicalNeighborhoodSize: Int
+    public var size: Int = 0 // TODO: This size is not set when read from LMDB
+    
+    private var rng: RandomNumberGenerator
+    
+    public init(namespace: String, typicalNeighborhoodSize: Int = 20, in transaction: Transaction) throws {
+        let metric = CartesianDistanceMetric<Vector>()
+        let config = Config.unstableDefault(typicalNeighborhoodSize: typicalNeighborhoodSize)
+        self.base = try Index(
+            namespace: namespace,
+            metric: metric,
+            config: config,
+            in: transaction
+        )
+        self.typicalNeighborhoodSize = typicalNeighborhoodSize
+        self.rng = SeedableRNG(seed: 1)
+    }
+    
+    public func find(near query: Vector, limit: Int, exact: Bool = false, in transaction: Transaction) throws -> [Index.Neighbor] {
+        if exact {
+            // TODO: Exact search logic
+            fatalError("Exact search logic for DeterministicDurableVectorIndex is not currently supported")
+        } else {
+            let accessor = try Index.Accessor(for: base, in: transaction)
+            return Array(try accessor.find(near: query, limit: limit))
+        }
+    }
+    
+    @discardableResult
+    public mutating func insert(_ vector: Vector, in transaction: Transaction) throws -> Int {
+        defer { size += 1 }
+        let accessor = try Index.Accessor(for: base, in: transaction)
+        let key = String(size)
+        accessor.insert(vector, forKey: key, using: &rng)
+        return self.size
+    }
+}
+
+#endif
diff --git a/Sources/SwiftNLP/1. Data Collection/HNSW/DeterministicEphemeralVectorIndex + Codable.swift b/Sources/SwiftNLP/1. Data Collection/HNSW/DeterministicEphemeralVectorIndex + Codable.swift
new file mode 100644
index 0000000000000000000000000000000000000000..8445929834592a51a2779a3cc89a31ef6ddca2a1
--- /dev/null
+++ b/Sources/SwiftNLP/1. Data Collection/HNSW/DeterministicEphemeralVectorIndex + Codable.swift	
@@ -0,0 +1,55 @@
+// Copyright (c) 2024 Jim Wallace
+//
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+//
+// Created by Mingchung Xia on 2024-02-07.
+//
+
+import Foundation
+
+extension DeterministicEphemeralVectorIndex: Encodable where Vector: Encodable {
+    enum CodingKeys: String, CodingKey {
+        case typicalNeighborhoodSize
+        case vectors
+    }
+
+    public func encode(to encoder: Encoder) throws {
+        var container = encoder.container(keyedBy: CodingKeys.self)
+        try container.encode(typicalNeighborhoodSize, forKey: .typicalNeighborhoodSize)
+        try container.encode(base.vectors, forKey: .vectors)
+    }
+}
+
+extension DeterministicEphemeralVectorIndex: Decodable where Vector: Decodable {
+    public init(from decoder: Decoder) throws {
+        let container = try decoder.container(keyedBy: CodingKeys.self)
+        let typicalNeighborhoodSize = try container.decode(Int.self, forKey: .typicalNeighborhoodSize)
+        let vectors = try container.decode([Vector].self, forKey: .vectors)
+
+        self.init(typicalNeighborhoodSize: typicalNeighborhoodSize)
+        for vector in vectors {
+            self.insert(vector)
+        }
+    }
+}
+
+
diff --git a/Sources/SwiftNLP/1. Data Collection/HNSW/DeterministicEphemeralVectorIndex.swift b/Sources/SwiftNLP/1. Data Collection/HNSW/DeterministicEphemeralVectorIndex.swift
new file mode 100644
index 0000000000000000000000000000000000000000..9634945fbd1273d8d09f0f85475854f9d3990372
--- /dev/null
+++ b/Sources/SwiftNLP/1. Data Collection/HNSW/DeterministicEphemeralVectorIndex.swift	
@@ -0,0 +1,80 @@
+// Copyright (c) 2024 Jim Wallace
+//
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+//
+// Created by Mingchung Xia on 2024-01-28.
+//
+
+import Foundation
+import PriorityHeapModule
+import PriorityHeapAlgorithms
+import HNSWAlgorithm
+import HNSWEphemeral
+
+// MARK: This uses the temporary EmphermalVectorIndex
+
+public struct DeterministicEphemeralVectorIndex<Vector: Collection & Codable> where Vector.Element: BinaryFloatingPoint {
+    
+    public typealias Index = EphemeralVectorIndex<Int, Int, CartesianDistanceMetric<Vector>, Void>
+    public var base: Index
+    public var typicalNeighborhoodSize: Int
+    
+    private var vectorRNG: RandomNumberGenerator
+    private var graphRNG: RandomNumberGenerator
+    
+    public init(typicalNeighborhoodSize: Int = 20) {
+        base = .init(metric: CartesianDistanceMetric<Vector>(), config: .unstableDefault(typicalNeighborhoodSize: typicalNeighborhoodSize))
+        self.typicalNeighborhoodSize = typicalNeighborhoodSize
+        self.vectorRNG = SeedableRNG(seed: 0)
+        self.graphRNG = SeedableRNG(seed: 1)
+    }
+    
+    public func find(near query: Vector, limit: Int, exact: Bool = false) throws -> [Index.Neighbor] {
+        if exact {
+            return Array(PriorityHeap(base.vectors.enumerated().map {
+                let similarity = base.metric.similarity(between: query, $0.element)
+                return NearbyVector(id: $0.offset, vector: $0.element, priority: similarity)
+            }).descending().prefix(limit))
+        } else {
+            return Array(try base.find(near: query, limit: limit))
+        }
+    }
+    
+    public mutating func generateRandom(range: ClosedRange<Double>) -> CGPoint {
+        CGPoint(
+            x: .random(in: range, using: &vectorRNG),
+            y: .random(in: range, using: &vectorRNG)
+        )
+    }
+    
+    @discardableResult
+    public mutating func insert(_ vector: Vector) -> Int {
+        let convertedVector: [Double] = vector.map{ Double($0) }
+        if let metricVector = convertedVector as? CartesianDistanceMetric<Vector>.Vector {
+            /// base.insert will returns a key and inserts the vector into the index
+            let key = base.insert(metricVector, using: &graphRNG)
+            return key
+        } else {
+            fatalError("Unable to get metric vector")
+        }
+    }
+}
diff --git a/Sources/SwiftNLP/1. Data Collection/HNSW/HNSWCorpusDataHandler.swift b/Sources/SwiftNLP/1. Data Collection/HNSW/HNSWCorpusDataHandler.swift
new file mode 100644
index 0000000000000000000000000000000000000000..9348babd686209a59caab30ea443e33ba838c0e7
--- /dev/null
+++ b/Sources/SwiftNLP/1. Data Collection/HNSW/HNSWCorpusDataHandler.swift	
@@ -0,0 +1,154 @@
+//// Copyright (c) 2024 Jim Wallace
+////
+//// Permission is hereby granted, free of charge, to any person
+//// obtaining a copy of this software and associated documentation
+//// files (the "Software"), to deal in the Software without
+//// restriction, including without limitation the rights to use,
+//// copy, modify, merge, publish, distribute, sublicense, and/or sell
+//// copies of the Software, and to permit persons to whom the
+//// Software is furnished to do so, subject to the following
+//// conditions:
+////
+//// The above copyright notice and this permission notice shall be
+//// included in all copies or substantial portions of the Software.
+////
+//// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+//// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+//// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+//// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+//// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+//// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+//// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+//// OTHER DEALINGS IN THE SOFTWARE.
+////
+//// Created by Mingchung Xia on 2024-02-13.
+////
+//
+//// MARK: This is outdated since we now have the presence of a DurableHNSWCorpus but still available for reference
+//
+//import Foundation
+//
+//final class HNSWCorpusDataHandler<Scalar: BinaryFloatingPoint & Codable> {
+//    var corpus: HNSWCorpus<Scalar>
+//    private var url: URL?
+//    
+//    init(corpus: HNSWCorpus<Scalar>, resource: String = "hnsw") {
+//        self.corpus = corpus
+////        self.url = Bundle.module.url(forResource: resource, withExtension: "mmap")
+//        if let downloadsDirectory = FileManager.default.urls(for: .downloadsDirectory, in: .userDomainMask).first {
+//            self.url = downloadsDirectory.appendingPathComponent(resource + ".mmap")
+//        }
+//    }
+//    
+//    /// It is very difficult to get the exact size of the corpus as every class also depends on other classes
+//    /// The size of the memory map may not even be correct if it only stores the vectors, and the vectors are really the only "important" part
+//    func getCorpusSize() -> Int {
+////        return heapSize(corpus)
+////        return class_getInstanceSize(type(of: corpus))
+////        return MemoryLayout.size(ofValue: corpus)
+//        var size = 0
+//        let data = corpus.encodedDocuments.base.vectors
+//        for vector in data {
+//            size += MemoryLayout.size(ofValue: vector)
+//        }
+//        return size
+//    }
+//    
+//    func getDictionarySize(includeKey: Bool = true) -> Int {
+//        var size = 0
+//        let data = corpus.getDictionary()
+//        for (key, documentVectorPair) in data {
+//            if includeKey { size += MemoryLayout.size(ofValue: key) }
+//            size += MemoryLayout.size(ofValue: documentVectorPair.untokenizedDocument)
+//            size += MemoryLayout.size(ofValue: documentVectorPair.vector)
+//        }
+//        return size
+//    }
+//    
+//    private func heapSize(_ obj: AnyObject) -> Int {
+//        return malloc_size(Unmanaged.passUnretained(obj).toOpaque())
+//    }
+//}
+//
+//extension HNSWCorpusDataHandler {
+//    func saveMemoryMap() {
+//        guard let url = url else {
+//            print("URL to resource not found")
+//            return
+//        }
+//        let fileManager = FileManager.default
+//        if !fileManager.fileExists(atPath: url.path) {
+//            fileManager.createFile(atPath: url.path, contents: nil, attributes: nil)
+//        }
+//        do {
+////            let fileHandle = try FileHandle(forWritingTo: url)
+////            
+////            let count = corpus.count
+////            let countData = withUnsafeBytes(of: count) { Data($0) }
+////            fileHandle.write(countData)
+////
+////            for pair in corpus {
+////                let documentData = pair.untokenizedDocument.utf8CString.withUnsafeBufferPointer { Data(buffer: $0) }
+////                fileHandle.write(documentData)
+////            }
+////            fileHandle.closeFile()
+//            
+//            print("Saving HNSW to file...")
+//            /// Using the Codable conformances
+//            let encoder = JSONEncoder()
+//            let encoded = try encoder.encode(corpus)
+//            try encoded.write(to: url)
+//        } catch {
+//            print("Error writing HNSW to file: \(error)")
+//        }
+//    }
+//    
+//    /// This saves only the untokenized documents dictionary map
+//    func saveDictionaryMemoryMap() {
+//        // TODO: Move from DurableHNSW extension once HNSW wrapper is created
+//    }
+//    
+//    // TODO: find out how to not rebuild the index
+//    static func loadMemoryMap(encoder: any SNLPEncoder, typicalNeighborhoodSize: Int = 20, resource: String = "hnsw") -> HNSWCorpus<Double> {
+//        guard let url = Bundle.module.url(forResource: resource, withExtension: "mmap") else {
+//            print("URL to resource not found")
+//            return HNSWCorpus(encoder: encoder, typicalNeighborhoodSize: typicalNeighborhoodSize)
+//        }
+//        
+//        var loadedCorpus = HNSWCorpus(encoder: encoder, typicalNeighborhoodSize: typicalNeighborhoodSize)
+//        
+//        do {
+////            let data = try Data(contentsOf: url, options: .alwaysMapped)
+////            let countData = data.prefix(MemoryLayout<Int>.size)
+////            let count: Int = countData.withUnsafeBytes { $0.load(as: Int.self) }
+////            var index = MemoryLayout<Int>.size
+////
+////            for _ in 0..<count {
+////                if let stringRange = data[index...].range(of: "\0".data(using: .utf8)!) {
+////                    let documentData = data[index..<stringRange.lowerBound]
+////                    if let document = String(data: documentData, encoding: .utf8) {
+////                        // Add the untokenized document to the corpus
+////                        loadedCorpus.addUntokenizedDocument(document)
+////                        index = stringRange.upperBound
+////                    }
+////                } else {
+////                    break
+////                }
+////            }
+//            
+//            /// Using the Codable conformances
+//            print("Loading HNSW from file...")
+//            let decoder = JSONDecoder()
+//            let data = try Data(contentsOf: url)
+//            loadedCorpus = try decoder.decode(HNSWCorpus<Double>.self, from: data)
+//        } catch {
+//            print("Error reading HNSW from file: \(error)")
+//        }
+//        return loadedCorpus
+//    }
+//    
+//    static func loadMemoryMap(encoding: ContextFreeEncoder<Scalar>.PreComputedEmbeddings, typicalNeighborhoodSize: Int = 20, resource: String = "hnsw") -> HNSWCorpus<Double> {
+//        let encoder = ContextFreeEncoder<Scalar>(source: encoding)
+//        return loadMemoryMap(encoder: encoder, typicalNeighborhoodSize: typicalNeighborhoodSize, resource: resource)
+//    }
+//}
diff --git a/Sources/SwiftNLP/1. Data Collection/HNSW/Metrics/CartesianDistanceMetric.swift b/Sources/SwiftNLP/1. Data Collection/HNSW/Metrics/CartesianDistanceMetric.swift
new file mode 100644
index 0000000000000000000000000000000000000000..a66009fc0fad321b3884783b43d1646b58f5d5ee
--- /dev/null
+++ b/Sources/SwiftNLP/1. Data Collection/HNSW/Metrics/CartesianDistanceMetric.swift	
@@ -0,0 +1,59 @@
+// Copyright (c) 2024 Jim Wallace
+//
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+//
+// Created by Mingchung Xia on 2024-01-28.
+//
+
+import Foundation
+import SimilarityMetric
+
+#if canImport(Surge) && canImport(Accelerate) && os(macOS)
+import Surge
+import Accelerate
+
+public struct CartesianDistanceMetric<Vector: Collection & Codable>: SimilarityMetric where Vector.Element: BinaryFloatingPoint {
+    public func similarity(between someItem: Vector, _ otherItem: Vector) -> Vector.Element {
+        return Vector.Element(Surge.distSq(someItem as! [Double], otherItem as! [Double]))
+    }
+}
+
+#else
+//import Nifty
+
+// MARK: Nifty is too outdated to retrofit our code, even after updating its swift-tools-version to 5.9.
+
+/// This implementation may be less efficient on Linux
+public struct CartesianDistanceMetric<Vector: Collection & Codable>: SimilarityMetric where Vector.Element: BinaryFloatingPoint {
+    public func similarity(between someItem: Vector, _ otherItem: Vector) -> Vector.Element {
+        var sum: Vector.Element = 0
+        
+        for (a, b) in zip(someItem, otherItem) {
+            let difference = a - b
+            sum += difference * difference
+        }
+        
+        return sum
+    }
+}
+
+#endif
diff --git a/Sources/SwiftNLP/1. Data Collection/HNSW/Metrics/CosineSimilarityMetric.swift b/Sources/SwiftNLP/1. Data Collection/HNSW/Metrics/CosineSimilarityMetric.swift
new file mode 100644
index 0000000000000000000000000000000000000000..b1a35e2ab8a1aa6ad160f29d9b70194219e62bec
--- /dev/null
+++ b/Sources/SwiftNLP/1. Data Collection/HNSW/Metrics/CosineSimilarityMetric.swift	
@@ -0,0 +1,73 @@
+// Copyright (c) 2024 Jim Wallace
+//
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+//
+// Created by Mingchung Xia on 2024-03-14.
+//
+
+import Foundation
+import SimilarityMetric
+
+#if canImport(Surge) && canImport(Accelerate) && os(macOS)
+import Surge
+import Accelerate
+
+public struct CosineSimilarityMetric<Vector: Collection & Codable>: SimilarityMetric where Vector.Element: BinaryFloatingPoint {
+    public func similarity(between someItem: Vector, _ otherItem: Vector) -> Vector.Element {
+        // Convert vectors to arrays of Double
+        let someItemDoubles = someItem.map { Double($0) }
+        let otherItemDoubles = otherItem.map { Double($0) }
+        
+        // Calculate dot product using Surge for cosine similarity numerator
+        let dotProduct = Surge.dot(someItemDoubles, otherItemDoubles)
+        
+        // Manually calculate magnitudes (norms) of the vectors for the denominator
+        let someItemMagnitude = sqrt(Surge.dot(someItemDoubles, someItemDoubles))
+        let otherItemMagnitude = sqrt(Surge.dot(otherItemDoubles, otherItemDoubles))
+        
+        // Calculate cosine similarity
+        let cosineSimilarity = dotProduct / (someItemMagnitude * otherItemMagnitude)
+        
+        // Convert back to type Vector.Element
+        return Vector.Element(cosineSimilarity)
+    }
+}
+
+#else
+//import Nifty
+
+// MARK: Nifty is too outdated to retrofit our code, even after updating its swift-tools-version to 5.9.
+
+/// This implementation may be less efficient on Linux
+public struct CosineSimilarityMetric<Vector: Collection & Codable>: SimilarityMetric where Vector.Element: BinaryFloatingPoint {
+    public func similarity(between someItem: Vector, _ otherItem: Vector) -> Vector.Element {
+        let dotProduct = zip(someItem, otherItem).reduce(0) { $0 + $1.0 * $1.1 }
+        let magnitudeSomeItem = sqrt(someItem.reduce(0) { $0 + $1 * $1 })
+        let magnitudeOtherItem = sqrt(otherItem.reduce(0) { $0 + $1 * $1 })
+        
+        let cosineSimilarity = dotProduct / (magnitudeSomeItem * magnitudeOtherItem)
+        
+        return cosineSimilarity
+    }
+}
+
+#endif
diff --git a/Sources/SwiftNLP/1. Data Collection/HNSW/RNG/MersenneTwisterRNG.swift b/Sources/SwiftNLP/1. Data Collection/HNSW/RNG/MersenneTwisterRNG.swift
new file mode 100644
index 0000000000000000000000000000000000000000..5eabd7ac276c25465bb25923fcbe6febc84a6975
--- /dev/null
+++ b/Sources/SwiftNLP/1. Data Collection/HNSW/RNG/MersenneTwisterRNG.swift	
@@ -0,0 +1,53 @@
+// Copyright (c) 2024 Jim Wallace
+//
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+//
+// See the https://github.com/JadenGeller/similarity-topology.git
+// for reference. The code is used with permission from the author
+// under the MIT License.
+//
+// Created by Mingchung Xia on 2024-01-28.
+//
+
+#if canImport(GameplayKit) && os(macOS)
+import Foundation
+import GameplayKit
+
+// MARK: GameplayKit provides a mersenne twister for RNG, but is not available on Linux
+// See https://github.com/quells/Squall package for alternative mersenne twister
+
+@available(macOS, introduced: 10.11)
+struct MersenneTwisterRNG: RandomNumberGenerator {
+    private let randomSource: GKMersenneTwisterRandomSource
+
+    init(seed: UInt64) {
+        randomSource = GKMersenneTwisterRandomSource(seed: seed)
+    }
+
+    mutating func next() -> UInt64 {
+        let upperBits = UInt64(UInt32(bitPattern: Int32(randomSource.nextInt()))) << 32
+        let lowerBits = UInt64(UInt32(bitPattern: Int32(randomSource.nextInt())))
+        return upperBits | lowerBits
+    }
+}
+
+#endif
diff --git a/Sources/SwiftNLP/1. Data Collection/DictionaryCorpus + RangeReplacableColleection.swift b/Sources/SwiftNLP/1. Data Collection/HNSW/RNG/SeedableRNG.swift
similarity index 76%
rename from Sources/SwiftNLP/1. Data Collection/DictionaryCorpus + RangeReplacableColleection.swift
rename to Sources/SwiftNLP/1. Data Collection/HNSW/RNG/SeedableRNG.swift
index 744bd8a6f5c2ab8d142ca0f3810616010bf315ef..bd7d52e7d8a2d962b48eee01c1e60973e8b2f5ae 100644
--- a/Sources/SwiftNLP/1. Data Collection/DictionaryCorpus + RangeReplacableColleection.swift	
+++ b/Sources/SwiftNLP/1. Data Collection/HNSW/RNG/SeedableRNG.swift	
@@ -20,10 +20,22 @@
 // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 // OTHER DEALINGS IN THE SOFTWARE.
+//
+// Created by Mingchung Xia on 2024-01-28.
+//
+
+import Foundation
+
+struct SeedableRNG: RandomNumberGenerator {
+    private var seed: UInt64
+
+    init(seed: UInt64) {
+        self.seed = seed
+    }
 
-//extension DictionaryCorpus: RangeReplaceableCollection {
-//        
-//    func replaceSubrange<C: Collection>(_ range: Range<DictionaryCorpus.Index>, with newElements: C) where DictionaryCorpus.Element == C.Element {
-//        
-//    }
-//}
+    mutating func next() -> UInt64 {
+        let lcg: UInt64 = 6364136223846793005
+        seed = lcg &* seed &+ 1
+        return seed
+    }
+}
diff --git a/Sources/SwiftNLP/1. Data Collection/HNSWCorpus.swift b/Sources/SwiftNLP/1. Data Collection/HNSWCorpus.swift
deleted file mode 100644
index 23bfa262a16f64865ce729ec36300cb6f7d7edf9..0000000000000000000000000000000000000000
--- a/Sources/SwiftNLP/1. Data Collection/HNSWCorpus.swift	
+++ /dev/null
@@ -1,101 +0,0 @@
-// Copyright (c) 2024 Jim Wallace
-//
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-//
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-import Foundation
-import PriorityHeapModule
-import PriorityHeapAlgorithms
-
-import SimilarityMetric
-import HNSWAlgorithm
-import HNSWEphemeral
-
-
-class HNSWCorpus<Scalar: BinaryFloatingPoint & Codable>: SNLPCorpus {
-            
-    var _documentEncoder: ContextFreeEncoder<Scalar>
-    var zeroes: [Scalar]
-    var count: Int { 0 }
-    
-    var encodedDocuments: [Int : [Scalar]] = [:] // TODO: This should be replaced by HNSW
-    
-    init(_documentEncoder: ContextFreeEncoder<Scalar>) {
-        self._documentEncoder = _documentEncoder
-        zeroes = Array(repeating: Scalar(0), count: 384)
-    }
-    
-    @inlinable
-    func addUntokenizedDocument(_ document: String) {
-        fatalError("HNSWCorpus not implemented yet. Get on it.")
-    }
-    
-//    var index = DeterministicSampleVectorIndex(typicalNeighborhoodSize: 20)
-//    for _ in 0..<100 {
-//        index.insertRandom(range: 0...1)
-//    }
-//    
-//    for i in 0..<10 {
-//        let sample = index.generateRandom(range: 0...1)
-//        print("iter \(i): \(sample)")
-//        let hnswResults = try! index.find(near: sample, limit: 10)
-//        let exactResult = try! index.find(near: sample, limit: 1, exact: true)
-//        XCTAssert(exactResult.contains(where: { $0.id == hnswResults[0].id }))
-//    }
-    
-}
-
-
-
-
-
-public struct DeterministicSampleVectorIndex<Vector: Collection & Codable> where Vector.Element: BinaryFloatingPoint {
-    public typealias Index = EphemeralVectorIndex<Int, Int, CartesianDistanceMetric<[Double]>, Void>
-    public var base: Index
-    
-    public init(typicalNeighborhoodSize: Int) {
-        base = .init(metric: .init(), config: .unstableDefault(typicalNeighborhoodSize: typicalNeighborhoodSize))
-    }
-    
-    public func find(near query: Vector, limit: Int, exact: Bool = false) throws -> [Index.Neighbor] {
-        if exact {
-            Array(PriorityHeap(base.vectors.enumerated().map {
-                let similarity = base.metric.similarity(between: query as! [Double], $0.element)
-                return NearbyVector(id: $0.offset, vector: $0.element, priority: similarity)
-            }).descending().prefix(limit))
-        } else {
-            Array(try base.find(near: query as! [Double], limit: limit))
-        }
-    }
-    
-}
-
-public struct CartesianDistanceMetric<Vector: Collection & Codable>: SimilarityMetric where Vector.Element: BinaryFloatingPoint{
-    public func similarity(between someItem: Vector, _ otherItem: Vector) -> Vector.Element {
-        // Naïve cartesian distance
-        let squaredSum = zip(someItem, otherItem)
-                .map { (x, y) in (x - y) * (x - y) }
-                .reduce(0, +)
-        
-        return sqrt(squaredSum)
-    }
-}
-
diff --git a/Sources/SwiftNLP/1. Data Collection/DictionaryCorpus.swift b/Sources/SwiftNLP/1. Data Collection/InMemoryCorpus.swift
similarity index 59%
rename from Sources/SwiftNLP/1. Data Collection/DictionaryCorpus.swift
rename to Sources/SwiftNLP/1. Data Collection/InMemoryCorpus.swift
index 902f859efadb7baaa2a1bdd7faa60361f98ad743..21b876afd9209d2cd812dc689a9c73a3ba686b58 100644
--- a/Sources/SwiftNLP/1. Data Collection/DictionaryCorpus.swift	
+++ b/Sources/SwiftNLP/1. Data Collection/InMemoryCorpus.swift	
@@ -23,26 +23,38 @@
 
 import Foundation
 
-class DictionaryCorpus<Scalar: BinaryFloatingPoint & Codable>: SNLPCorpus {
-                                                    
-    private var _documentEncoder: any SNLPEncoder
-    var zeroes: [Scalar] { _documentEncoder.zeroes as! [Scalar] }
+struct InMemoryCorpus<Item: SNLPDataItem, Encoder: SNLPEncoder>: SNLPCorpus  {
     
-    var encodedDocuments: [Int : [Scalar] ] = [:]
+    internal var documentEncoder: Encoder
+    internal var documents = ContiguousArray<Item>()
+    internal var encodedDocuments = ContiguousArray<[Encoder.Scalar]>()
+                
     var count: Int { encodedDocuments.count }
+            
     
+    init(item: Item = String(), encoder: Encoder = Encoder()) {
+        documentEncoder = encoder
+    }
         
-    init(encoding: ContextFreeEncoder<Scalar>.PreComputedEmbeddings, scalar: Scalar.Type = Double.self) {
-        _documentEncoder = ContextFreeEncoder(source: encoding)
+    init(encoder: Encoder) {
+        documentEncoder = encoder
     }
     
-    init(encoder: any SNLPEncoder, scalar: Scalar.Type = Double.self) {
-        _documentEncoder = encoder
-    }
+    
+    /*
+            Implements a naive search function ... better to use a more efficient data structure
+            TODO: Replace this with a brute force search implementation?
+     */
+    func searchFor(_ query: String) -> [Item] {
+        let q = documentEncoder.encodeSentence(query)
+
+        if let index = encodedDocuments.firstIndex(of: q) {
+            return [documents[index]]
+        }
         
-    @inlinable
-    func addUntokenizedDocument(_ document: String) {
-        encodedDocuments[ encodedDocuments.count ] = (_documentEncoder.encodeSentence(document) as! [Scalar])
+
+        return []
     }
     
+    
 }
diff --git a/Sources/SwiftNLP/1. Data Collection/String + SNLPDataItem.swift b/Sources/SwiftNLP/1. Data Collection/String + SNLPDataItem.swift
new file mode 100644
index 0000000000000000000000000000000000000000..33a0ddd2b83f99b5e93d66b18a44ce431ca50c07
--- /dev/null
+++ b/Sources/SwiftNLP/1. Data Collection/String + SNLPDataItem.swift	
@@ -0,0 +1,20 @@
+//
+//  File.swift
+//  
+//
+//  Created by Jim Wallace on 2024-04-03.
+//
+
+import Foundation
+
+/*
+        Provides a bare bones implementation of SNLPDataItem so that String can be used in test cases
+        - Not a particularly reliable set of defaults, but enough to work with text
+ */
+extension String: SNLPDataItem {
+    public var createdOn: Date { Date.distantFuture }
+    
+    public var id: String { self }
+
+    public var fullText: String { self }
+}
diff --git a/Sources/SwiftNLP/2. Encoding/ContextFreeEncoder + File IO .swift b/Sources/SwiftNLP/2. Encoding/ContextFreeEncoder + File IO .swift
index cd5b35ef7097c9c900f316609571e037ca94417e..5f186318c50b0a0ab949b494fcce9fbd10bec4bb 100644
--- a/Sources/SwiftNLP/2. Encoding/ContextFreeEncoder + File IO .swift	
+++ b/Sources/SwiftNLP/2. Encoding/ContextFreeEncoder + File IO .swift	
@@ -62,7 +62,7 @@ extension ContextFreeEncoder {
     
     // These use memory mapping to load the values in more quickly
     // TODO: Validate that this actually works on other systems... could easily be some issues
-    static func readDictionaryFromFile(_ url: URL) -> [String : [Scalar]] {
+    static func readDictionaryFromFile(_ url: URL, width: Int = 50) -> [String : [Scalar]] {
         
         //let fileURL = URL(fileURLWithPath: filename)
         var result: [String : [Scalar]]
@@ -87,7 +87,7 @@ extension ContextFreeEncoder {
                         index = stringRange.upperBound
                         
                         // Read the values
-                        let valuesData = data[index..<(index + 50 * MemoryLayout<Double>.size)]
+                        let valuesData = data[index..<(index + width * MemoryLayout<Double>.size)]
                         let values = valuesData.withUnsafeBytes { Array($0.bindMemory(to: Scalar.self)) }
                         
                         // Add the key-value pair to the dictionary
@@ -95,7 +95,7 @@ extension ContextFreeEncoder {
                         //debugPrint("\(key) -> \(values[0])")
                     }
                     
-                    index += 50 * MemoryLayout<Double>.size //TODO: Why is this magical 50 here? 
+                    index += width * MemoryLayout<Double>.size
                 } else {
                     break
                 }
diff --git a/Sources/SwiftNLP/2. Encoding/ContextFreeEncoder.swift b/Sources/SwiftNLP/2. Encoding/ContextFreeEncoder.swift
index b90ca71aec6cb8451bf4fc048e05c006ea09c093..897918385ce1f10b44efa63df973f641f8525db1 100644
--- a/Sources/SwiftNLP/2. Encoding/ContextFreeEncoder.swift	
+++ b/Sources/SwiftNLP/2. Encoding/ContextFreeEncoder.swift	
@@ -23,10 +23,10 @@
 
 import Foundation
 
-class ContextFreeEncoder<Scalar: BinaryFloatingPoint & Codable>: SNLPEncoder {
+struct ContextFreeEncoder<Scalar: BinaryFloatingPoint>: SNLPEncoder {
     
     var dictionary: [String : [Scalar]]
-    let width: Int
+    let dimensions: UInt
     var zeroes: [Scalar]
     
     var count: Int { dictionary.count }
@@ -34,25 +34,29 @@ class ContextFreeEncoder<Scalar: BinaryFloatingPoint & Codable>: SNLPEncoder {
     public enum PreComputedEmbeddings {
         case glove6B50d
         case glove6B100d
-        //case NLEmbedding
     }
     
-    init(source: PreComputedEmbeddings) {
+    init() {
+        self.init(source: .glove6B50d)
+    }
+    
+    
+    init(source: PreComputedEmbeddings  = .glove6B50d) {
         
         dictionary = Dictionary<String,[Scalar]>()
         
         var dictionaryToLoad: String
         switch source {
         case .glove6B50d:
-            width = 50
+            dimensions = 50
             dictionaryToLoad = "glove.6B.50d"
             
         case .glove6B100d:
-            width = 100
+            dimensions = 100
             dictionaryToLoad = "glove.6B.100d"
         }
         
-        zeroes = Array(repeating: Scalar(0), count: width) as! [Scalar]
+        zeroes = Array(repeating: Scalar(0), count: Int(dimensions))
         
         // Try to load locally first
         guard let url = Bundle.module.url(forResource: dictionaryToLoad, withExtension: "mmap") else {
@@ -62,7 +66,6 @@ class ContextFreeEncoder<Scalar: BinaryFloatingPoint & Codable>: SNLPEncoder {
             return
         }
         dictionary = ContextFreeEncoder<Scalar>.readDictionaryFromFile(url)
-        
     }
     
     subscript(_ token: String) -> [Scalar] {
@@ -77,7 +80,6 @@ class ContextFreeEncoder<Scalar: BinaryFloatingPoint & Codable>: SNLPEncoder {
     
     
     func encodeToken(_ token: String) -> [Scalar] {
-        //print("\(token) --> \(dictionary[token] ?? zeroes)")
         return dictionary[token] ?? zeroes
     }
     
@@ -89,7 +91,6 @@ class ContextFreeEncoder<Scalar: BinaryFloatingPoint & Codable>: SNLPEncoder {
                 result[i] += encoding[i]
             }
         }
-        //print("\(sentence) --> \(result)")
         return result
     }
 }
diff --git a/Sources/SwiftNLP/2. Encoding/NatualLanguageEncoder.swift b/Sources/SwiftNLP/2. Encoding/NatualLanguageEncoder.swift
index 5c1db99fee8119a02d6231fd727ffdad4f742dbb..98d32eaa199fa2e89db5d8ea02747358b9fe1391 100644
--- a/Sources/SwiftNLP/2. Encoding/NatualLanguageEncoder.swift	
+++ b/Sources/SwiftNLP/2. Encoding/NatualLanguageEncoder.swift	
@@ -25,10 +25,11 @@
 import Foundation
 import NaturalLanguage
 
-class NaturalLanguageEncoder<Scalar: BinaryFloatingPoint & Codable>: SNLPEncoder {
-
-    var zeroes: [Scalar] { Array(repeating: Scalar(0), count: 512) }
+struct NaturalLanguageEncoder<Scalar: BinaryFloatingPoint>: SNLPEncoder {
     
+    var dimensions: UInt = 512
+    var zeroes: [Scalar] { Array(repeating: Scalar(0), count: Int(dimensions)) }
+        
     @inlinable
     func encodeToken(_ token: String) -> [Scalar] {
         if let embedding = NLEmbedding.wordEmbedding(for: .english) {
diff --git a/Sources/SwiftNLP/2. Encoding/NaturalLanguageContextualEncoder.swift b/Sources/SwiftNLP/2. Encoding/NaturalLanguageContextualEncoder.swift
new file mode 100644
index 0000000000000000000000000000000000000000..7d6814a9438f60b00487e509959dc94e281eab4b
--- /dev/null
+++ b/Sources/SwiftNLP/2. Encoding/NaturalLanguageContextualEncoder.swift	
@@ -0,0 +1,55 @@
+// Copyright (c) 2024 Henry Tian, Jim Wallace
+//
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+
+//#if os(macOS)
+//import Foundation
+//import NaturalLanguage
+//
+//class NaturalLanguageContextualEncoder<Scalar: BinaryFloatingPoint & Codable>: SNLPEncoder {
+//
+//    var zeroes: [Scalar] { Array(repeating: Scalar(0), count: 512) }
+//    
+//
+//    @inlinable
+//    func encodeToken(_ token: String) -> [Scalar] {
+//        if let embedding = NLContextualEmbedding(language: .english) {
+//            return embedding.vector(for: token) as! [Scalar]
+//        }
+//        return zeroes
+//    }
+//    
+//    /**
+//        Adds a single untokenized document to the corpus, using default tokenization and text processing
+//     */
+//    @inlinable
+//    func encodeSentence(_ sentence: String) -> [Scalar] {
+//        if let embedding = NLEmbedding.sentenceEmbedding(for: .english) {
+//            if let result = embedding.vector(for: sentence) {
+//                return result as! [Scalar]
+//            }
+//        }
+//        return zeroes
+//    }
+//    
+//}
+//#endif
diff --git a/Sources/SwiftNLP/2. Encoding/OpenAIEncoder.swift b/Sources/SwiftNLP/2. Encoding/OpenAIEncoder.swift
index c6db861dee685da421b0f8ae17e702ef17ef9ddd..ff3ca6ad146dc47031940f620ffe797c836adeda 100644
--- a/Sources/SwiftNLP/2. Encoding/OpenAIEncoder.swift	
+++ b/Sources/SwiftNLP/2. Encoding/OpenAIEncoder.swift	
@@ -23,10 +23,14 @@
 
 import Foundation
 
-class OpenAIEncoder<Scalar: BinaryFloatingPoint & Codable>: SNLPAsyncEncoder {
-
-                    
+struct OpenAIEncoder<Scalar: BinaryFloatingPoint & Codable>: SNLPAsyncEncoder {
+    
     var zeroes: [Scalar]
+    var dimensions: UInt
+    
+    init() {
+        fatalError()
+    }
     
     func fetchEncodingForToken(_ token: String) async throws -> [Scalar] {
         fatalError("OpenAIEncoder not implemented. Get on it.")
diff --git a/Sources/SwiftNLPVisualizer/HNSWVisualizerApp.swift b/Sources/SwiftNLPVisualizer/HNSWVisualizerApp.swift
new file mode 100644
index 0000000000000000000000000000000000000000..a9d5d30f8cec668c162866db94618d95509c52e3
--- /dev/null
+++ b/Sources/SwiftNLPVisualizer/HNSWVisualizerApp.swift
@@ -0,0 +1,104 @@
+#if canImport(GameplayKit) && os(macOS)
+
+import SwiftUI
+import HNSWAlgorithm
+import HNSWSample
+
+// MARK: go to Product -> Scheme -> SwiftNLPVisualizer then run
+// TODO: Support this for SwiftNLP data structures instead of the sample
+
+struct GraphView: View {
+    let points: [(Int, CGPoint)]
+    let edges: [(CGPoint, CGPoint)]
+
+    var body: some View {
+        Canvas { context, size in
+            for (startPoint, endPoint) in edges {
+                var path = Path()
+                path.move(to: startPoint)
+                path.addLine(to: endPoint)
+                context.stroke(path, with: .color(.black), lineWidth: 1)
+            }
+            
+            for (id, point) in points {
+                context.fill(
+                    Circle().path(in: CGRect(x: point.x - 5, y: point.y - 5, dimensions: 10, height: 10)),
+                    with: .color(.blue)
+                )
+                context.draw(Text("\(id)").bold().foregroundColor(.red), in: CGRect(x: point.x, y: point.y, dimensions: 20, height: 20))
+            }
+        }
+        .frame(maxWidth: .infinity, maxHeight: .infinity)
+    }
+}
+
+extension DeterministicSampleVectorIndex {
+    func points(for level: Int) -> [(Int, CGPoint)] {
+        base.graph.keys(on: level).map { id in
+            (id, base.vectors[id])
+        }
+    }
+    func edges(for level: Int) -> [(CGPoint, CGPoint)] {
+        base.graph.keys(on: level).flatMap { id in
+            base.graph.neighborhood(on: level, around: id).map { neighbor in
+                return (base.vectors[id], base.vectors[neighbor])
+            }
+        }
+    }
+}
+
+struct VisualizerView: View {
+    @State var index = DeterministicSampleVectorIndex(typicalNeighborhoodSize: 6)
+    @State var angle: Angle = .zero
+    @State var updateCount = 0 // since index isn't observable!
+    
+    var body: some View {
+        VStack {
+            HStack {
+                Button("Add Data") {
+                    index.insertRandom(range: 0...500)
+                    updateCount += 1
+                }
+                Slider(value: $angle.degrees, in: 0...89)
+                    .frame(dimensions: 100)
+            }
+            .padding()
+            ScrollView {
+                VStack {
+                    let graph = index.base.graph
+                    ForEach(Array(sequence(state: graph.entry?.level, next: graph.descend)), id: \.self) { level in
+                        let _ = updateCount // to force an update
+                        Text("Level \(String(level))")
+                        GraphView(
+                            points: index.points(for: level),
+                            edges: index.edges(for: level)
+                        )
+                        .rotation3DEffect(angle, axis: (1, 0, 0), perspective: 0)
+                        .frame(dimensions: 600, height: 600, alignment: .top)
+                        .frame(dimensions: 600, height: 600 * cos(angle.radians))
+                        Divider()
+                    }
+                }
+            }
+        }
+    }
+}
+
+@main
+struct HNSWVisualizerApp: App {
+    @NSApplicationDelegateAdaptor(AppDelegate.self) var appDelegate
+
+    var body: some Scene {
+        WindowGroup {
+            VisualizerView()
+        }
+    }
+}
+
+class AppDelegate: NSObject, NSApplicationDelegate {
+    func applicationDidFinishLaunching(_ notification: Notification) {
+        NSApp.setActivationPolicy(.regular)
+    }
+}
+
+#endif
diff --git a/Tests/SwiftNLPTests/1. Data Collection/HNSW/DurableHNSWCorpusTests.swift b/Tests/SwiftNLPTests/1. Data Collection/HNSW/DurableHNSWCorpusTests.swift
new file mode 100644
index 0000000000000000000000000000000000000000..358e0f9e8bd68fb1ff8ace55215d2a905bf8c503
--- /dev/null
+++ b/Tests/SwiftNLPTests/1. Data Collection/HNSW/DurableHNSWCorpusTests.swift	
@@ -0,0 +1,230 @@
+#if os(macOS)
+import XCTest
+import Foundation
+import CoreLMDB
+import System
+@testable import SwiftNLP
+
+// MARK: These tests are not to be included within the pipeline
+
+final class DurableHNSWCorpusTests: XCTestCase {
+    /// This is used to skip these tests in the GitLab pipeline
+    override class var defaultTestSuite: XCTestSuite {
+        if ProcessInfo.processInfo.environment["SKIP_TESTS"] == "DurableHNSWCorpusTests" {
+            return XCTestSuite(name: "Empty")
+        }
+        return super.defaultTestSuite
+    }
+    
+    /// Setting up constants for environment
+    private let ONE_GB: Int = 1_073_741_824
+    private let ONE_MB: Int = 1_048_576
+    private let ONE_KB: Int = 1_024
+    private let ONE_B:  Int = 1
+    private let DEFAULT_MAXREADERS: UInt32 = 126
+    private let DEFAULT_MAXDBS:     UInt32 = 10
+    
+    /// Setting up working directory
+    private var workingDirectoryPath: FilePath!
+    
+    override func setUpWithError() throws {
+        try super.setUpWithError()
+        
+        let fileManager = FileManager.default
+        let directoryURL = fileManager.homeDirectoryForCurrentUser.appendingPathComponent("/Downloads/lmdb")
+        try fileManager.createDirectory(at: directoryURL, withIntermediateDirectories: true, attributes: nil)
+        workingDirectoryPath = FilePath(directoryURL.path)
+        
+        /// This commented out code alternatively works in the XCode bundle resource environment
+//        guard let resourcesPath = Bundle.module.resourcePath else { fatalError("Failed to find resource path.") }
+//        let resourcesDirectoryURL = URL(fileURLWithPath: resourcesPath).appendingPathComponent("lmdb")
+//        let fileManager = FileManager.default
+//        try fileManager.createDirectory(at: resourcesDirectoryURL, withIntermediateDirectories: true, attributes: nil)
+//        print("Resources directory: \(resourcesDirectoryURL)")
+//        workingDirectoryPath = FilePath(resourcesDirectoryURL.path)
+    }
+    
+    func testBuildBasicCorpus() throws {
+        let docs = [
+            "CNTK formerly known as Computational Network Toolkit",
+            "is a free easy-to-use open-source commercial-grade toolkit",
+            "that enable us to train deep learning algorithms to learn like the human brain."
+        ]
+        
+        /// Setting up the environment
+        let env = try Environment()
+        try env.setMapSize(ONE_GB)
+        try env.setMaxReaders(DEFAULT_MAXREADERS)
+        try env.setMaxDBs(DEFAULT_MAXDBS)
+        try env.open(path: workingDirectoryPath)
+        
+        /// Writing to LMDB
+        let transaction = try Transaction.begin(.write, in: env)
+
+        let corpus = try DurableHNSWCorpus<String,ContextFreeEncoder<Double>>(
+            namespace: "testBasicExample",
+            in: transaction
+        )
+        
+        for doc in docs {
+            try corpus.addUntokenizedDocument(doc, in: transaction)
+        }
+        
+        try transaction.commit()
+        
+        /// Reading from LMDB
+        let readTransaction = try Transaction.begin(.read, in: env)
+        
+        let _ = try DurableHNSWCorpus<String,ContextFreeEncoder<Double>>(
+            namespace: "testBasicExample",
+            in: readTransaction
+        )
+        
+        readTransaction.abort()
+        
+        // XCTAssert(readCorpus.count == 3)
+        /// readCorpus.count == 3 will fail because we have not fixed the bug with setting size upon reads
+        /// This is because size is only incremented when insertion is called but it is not called when read from disk!
+    }
+    
+    func testQueryBasicCorpus() async throws {
+        let docs = [
+            "The quick brown fox jumps over the lazy dog",
+            "I enjoy taking long walks along the beach at sunset",
+            "Advances in neural networks have enabled new AI capabilities",
+            "The stock market experienced a significant downturn last week",
+            "Cooking a good meal can be both an art and a science",
+            "The exploration of space is both challenging and rewarding",
+            "Machine learning models are becoming increasingly sophisticated",
+            "I love reading about history and ancient civilizations"
+        ]
+        
+        let query = "I like to read about new technology and artificial intelligence"
+        //let documentEncoder = ContextFreeEncoder<Double>(source: .glove6B50d)
+        
+        /// Setting up the environment
+        let env = try Environment()
+        try env.setMapSize(ONE_GB)
+        try env.setMaxReaders(DEFAULT_MAXREADERS)
+        try env.setMaxDBs(DEFAULT_MAXDBS)
+        try env.open(path: workingDirectoryPath)
+        
+        let transaction = try Transaction.begin(.write, in: env)
+        
+        /// Saving the memory map to disk
+        let corpus = try DurableHNSWCorpus<String,ContextFreeEncoder<Double>>(
+            namespace: "testBasicQueryExample",
+            in: transaction
+        )
+        
+        for doc in docs {
+            try corpus.addUntokenizedDocument(doc, in: transaction)
+        }
+        
+        corpus.saveDictionaryToDownloads(fileName: "dictionary.mmap")
+        
+        try transaction.commit()
+        
+        do {
+            let queryVector: [Double] = corpus.documentEncoder.encodeToken(query).map { Double($0) }
+            
+            /// Reading the memory map (and dictionary) from disk
+            let readTransaction = try Transaction.begin(.write, in: env)
+            
+            let readCorpus = try DurableHNSWCorpus<String,ContextFreeEncoder<Double>>(
+                namespace: "testBasicQueryExample",
+                in: readTransaction
+            )
+            
+            readCorpus.dictionary = DurableHNSWCorpus.readDictionaryFromDownloads(fileName: "dictionary.mmap") // TODO: move this to initializer?
+            
+            let result = try readCorpus.index.find(near: queryVector, limit: 8, in: transaction)
+            
+            for result in result {
+                let key = Int(result.id.foreignKey)!
+                print(readCorpus.getUntokenizedDocument(at: key))
+            }
+        } catch {
+            print("Error when trying corpus.encodedDocuments.find(): \(error)")
+        }
+        
+        try transaction.commit()
+    }
+    
+    func testBuildGuelphSubredditCorpus() async throws {
+        /// Generates the LMDB durable storage to disk but runs no tests otherwise
+    
+        /// Setting up the environment
+        let env = try Environment()
+        try env.setMapSize(ONE_GB)
+        try env.setMaxReaders(DEFAULT_MAXREADERS)
+        try env.setMaxDBs(DEFAULT_MAXDBS)
+        try env.open(path: workingDirectoryPath)
+        
+        /// Get subreddit data
+        guard let submissionsURL = Bundle.module.url(forResource: "Guelph_submissions", withExtension: "zst") else {
+            fatalError("Failed to find waterloo_submissions.zst in test bundle.")
+        }
+        guard let submissionsData = try? Data(contentsOf: submissionsURL) else {
+            fatalError("Failed to load waterloo_submissions.zst from test bundle.")
+        }
+
+        let (submissions, _ ): ([Submission],[Data]) = try await loadFromRedditArchive(submissionsData)
+        
+        let transaction = try Transaction.begin(.write, in: env)
+        
+        let documentEncoder = ContextFreeEncoder<Double>(source: .glove6B50d)
+        
+        let corpus = try DurableHNSWCorpus<String,ContextFreeEncoder>(
+            encoder: documentEncoder,
+            namespace: "subreddit_durable",
+            in: transaction
+        )
+
+        /// Add documents to corpus
+        for submission in submissions {
+            if let text = submission.selftext {
+                try corpus.addUntokenizedDocument(text, in: transaction)
+            }
+        }
+
+        /// Save dictionary to disk
+        corpus.saveDictionaryToDownloads(fileName: "dictionary.mmap")
+        
+        try transaction.commit()
+    }
+    
+    func testQueryGuelphSubredditCorpus() async throws {
+        let documentEncoder = ContextFreeEncoder<Double>(source: .glove6B50d)
+        
+        /// Setting up the environment
+        let env = try Environment()
+        try env.setMapSize(ONE_GB)
+        try env.setMaxReaders(DEFAULT_MAXREADERS)
+        try env.setMaxDBs(DEFAULT_MAXDBS)
+        try env.open(path: workingDirectoryPath)
+        
+        /// Reading the memory map (and dictionary) from disk
+        let transaction = try Transaction.begin(.read, in: env)
+        
+        let corpus = try DurableHNSWCorpus<String,ContextFreeEncoder>(
+            encoder: documentEncoder,
+            namespace: "subreddit_durable",
+            in: transaction
+        )
+        
+        corpus.dictionary = DurableHNSWCorpus.readDictionaryFromDownloads(fileName: "dictionary.mmap")
+        
+        let query = "I love waterloo and I love the geese."
+        let queryVector: [Double] = documentEncoder.encodeToken(query).map { Double($0) }
+        
+        let result = try corpus.index.find(near: queryVector, limit: 8, in: transaction)
+        
+        for result in result {
+            let key = Int(result.id.foreignKey)!
+            print(corpus.getUntokenizedDocument(at: key))
+        }
+    }
+}
+#endif
+
diff --git a/Tests/SwiftNLPTests/1. Data Collection/HNSW/EphemeralHNSWCorpusTests.swift b/Tests/SwiftNLPTests/1. Data Collection/HNSW/EphemeralHNSWCorpusTests.swift
new file mode 100644
index 0000000000000000000000000000000000000000..c0ac5382bd39d8f132864b527d1fe414eaf9a076
--- /dev/null
+++ b/Tests/SwiftNLPTests/1. Data Collection/HNSW/EphemeralHNSWCorpusTests.swift	
@@ -0,0 +1,215 @@
+#if os(macOS)
+import XCTest
+import Foundation
+import System
+@testable import SwiftNLP
+
+final class EphemeralHNSWCorpusTests: XCTestCase {
+    // MARK: EphemeralHNSWCorpus can also be used as its typealias HNSWCorpus
+    
+    // Load a small set of documents and confirm that corpus and dictionary are updated accordingly
+    func testBuildBasicCorpus() throws {
+        let docs = [
+            "CNTK formerly known as Computational Network Toolkit",
+            "is a free easy-to-use open-source commercial-grade toolkit",
+            "that enable us to train deep learning algorithms to learn like the human brain."
+         ]
+        
+        var corpus = HNSWCorpus<String,ContextFreeEncoder<Double>>()
+        corpus.addUntokenizedDocuments(docs)
+
+        XCTAssert(corpus.count == 3)
+        
+        /// Make sure none of our encodings are zero
+        for item in corpus {
+            XCTAssertNotEqual(item.vector, corpus.zeroes)
+        }
+    }
+    
+    // Load a bigger set of documents and confirm
+    func testBuildLargeCorpus() throws {
+        let twentyQuotes = [
+            "Imagination is more important than knowledge. - Albert Einstein",
+            "The greatest enemy of knowledge is not ignorance, it is the illusion of knowledge. - Stephen Hawking",
+            "If I have seen further it is by standing on the shoulders of giants. - Isaac Newton",
+            "The universe is a wondrous place! The faster you create unbreakable code, the faster the universe creates people that can break it. - Richard Feynman",
+            "Science is the belief in the ignorance of experts. - Richard Feynman",
+            "The saddest aspect of life right now is that science gathers knowledge faster than society gathers wisdom. - Isaac Asimov",
+            "Science is the poetry of reality. - Richard Dawkins",
+            "To raise new questions, new possibilities, to regard old problems from a new angle, requires creative imagination and marks real advance in science. - Albert Einstein",
+            "The scientist does not study nature because it is useful; he studies it because he delights in it, and he delights in it because it is beautiful. - Henri Poincaré",
+            "Nothing in life is to be feared, it is only to be understood. Now is the time to understand more, so that we may fear less. - Marie Curie",
+            "An experiment is a question which science poses to Nature, and a measurement is the recording of Nature’s answer. - Max Planck",
+            "If you wish to make an apple pie from scratch, you must first invent the universe. - Carl Sagan",
+            "The function of science fiction is not always to predict the future but sometimes to prevent it. - Frank Herbert",
+            "Science is what we understand well enough to explain to a computer. Art is everything else we do. - Donald Knuth",
+            "In science one tries to tell people, in such a way as to be understood by everyone, something that no one ever knew before. But in poetry, it's the exact opposite. - Paul Dirac",
+            "Science is a way of thinking much more than it is a body of knowledge. - Carl Sagan",
+            "Research is what I’m doing when I don’t know what I’m doing. - Wernher von Braun",
+            "The most beautiful thing we can experience is the mysterious. It is the source of all true art and science. - Albert Einstein",
+            "One, remember to look up at the stars and not down at your feet. Two, never give up work. Work gives you meaning and purpose and life is empty without it. Three, if you are lucky enough to find love, remember it is there and don't throw it away. - Stephen Hawking",
+            "All science is either physics or stamp collecting. - Ernest Rutherford"
+        ]
+        
+        var corpus = HNSWCorpus<String,ContextFreeEncoder<Double>>()
+        corpus.addUntokenizedDocuments(twentyQuotes)
+        
+        XCTAssertEqual(corpus.count, 20)
+        
+        /// Make sure none of our encodings are zero
+        for item in corpus {
+            XCTAssertNotEqual(item.vector, corpus.zeroes)
+        }
+    }
+    
+    func testBuildGuelphSubredditCorpus() async throws {
+        guard let submissionsURL = Bundle.module.url(forResource: "Guelph_submissions", withExtension: "zst") else {
+            fatalError("Failed to find waterloo_submissions.zst in test bundle.")
+        }
+        guard let submissionsData = try? Data(contentsOf: submissionsURL) else {
+            fatalError("Failed to load waterloo_submissions.zst from test bundle.")
+        }
+        
+        let (submissions, _ ): ([Submission],[Data]) = try await loadFromRedditArchive(submissionsData)
+        
+        let corpus = HNSWCorpus<String,ContextFreeEncoder<Double>>()
+        
+        for submission in submissions {
+            if let text = submission.selftext {
+                corpus.addUntokenizedDocument(text)
+            }
+        }
+
+        XCTAssert(corpus.count == 17999)
+    }
+    
+    // Load a small set of documents and confirm that corpus and dictionary are updated accordingly
+    func testQueryBasicCorpus() async throws {
+        let docs = [
+            "The quick brown fox jumps over the lazy dog",
+            "I enjoy taking long walks along the beach at sunset",
+            "Advances in neural networks have enabled new AI capabilities",
+            "The stock market experienced a significant downturn last week",
+            "Cooking a good meal can be both an art and a science",
+            "The exploration of space is both challenging and rewarding",
+            "Machine learning models are becoming increasingly sophisticated",
+            "I love reading about history and ancient civilizations"
+        ]
+
+        let query = "I like to read about new technology and artificial intelligence"
+ 
+        //let _documentEncoder = ContextFreeEncoder<Double>(source: .glove6B50d)
+        var corpus = HNSWCorpus<String,ContextFreeEncoder<Double>>()
+        corpus.addUntokenizedDocuments(docs)
+        
+        //do {
+            //let queryVector: [Double] = _documentEncoder.encodeToken(query).map { Double($0) }
+            //let results = try corpus.index.find(near: queryVector, limit: 8)
+            
+            let results = corpus.searchFor(query)
+            
+            for result in results {
+                print(result)
+            }
+        //} catch {
+        //    print("Error when trying corpus.encodedDocuments.find(): \(error)")
+        //}
+    }
+    
+    func testQueryLargeCorpus() async throws {
+        let docs = [
+            "Imagination is more important than knowledge. - Albert Einstein",
+            "The greatest enemy of knowledge is not ignorance, it is the illusion of knowledge. - Stephen Hawking",
+            "If I have seen further it is by standing on the shoulders of giants. - Isaac Newton",
+            "The universe is a wondrous place! The faster you create unbreakable code, the faster the universe creates people that can break it. - Richard Feynman",
+            "Science is the belief in the ignorance of experts. - Richard Feynman",
+            "The saddest aspect of life right now is that science gathers knowledge faster than society gathers wisdom. - Isaac Asimov",
+            "Science is the poetry of reality. - Richard Dawkins",
+            "To raise new questions, new possibilities, to regard old problems from a new angle, requires creative imagination and marks real advance in science. - Albert Einstein",
+            "The scientist does not study nature because it is useful; he studies it because he delights in it, and he delights in it because it is beautiful. - Henri Poincaré",
+            "Nothing in life is to be feared, it is only to be understood. Now is the time to understand more, so that we may fear less. - Marie Curie",
+            "An experiment is a question which science poses to Nature, and a measurement is the recording of Nature’s answer. - Max Planck",
+            "If you wish to make an apple pie from scratch, you must first invent the universe. - Carl Sagan",
+            "The function of science fiction is not always to predict the future but sometimes to prevent it. - Frank Herbert",
+            "Science is what we understand well enough to explain to a computer. Art is everything else we do. - Donald Knuth",
+            "In science one tries to tell people, in such a way as to be understood by everyone, something that no one ever knew before. But in poetry, it's the exact opposite. - Paul Dirac",
+            "Science is a way of thinking much more than it is a body of knowledge. - Carl Sagan",
+            "Research is what I’m doing when I don’t know what I’m doing. - Wernher von Braun",
+            "The most beautiful thing we can experience is the mysterious. It is the source of all true art and science. - Albert Einstein",
+            "One, remember to look up at the stars and not down at your feet. Two, never give up work. Work gives you meaning and purpose and life is empty without it. Three, if you are lucky enough to find love, remember it is there and don't throw it away. - Stephen Hawking",
+            "All science is either physics or stamp collecting. - Ernest Rutherford"
+        ]
+        
+        let query = "I love Albert Einstein!"
+        
+        var corpus = HNSWCorpus<String,ContextFreeEncoder<Double>>()
+        corpus.addUntokenizedDocuments(docs)
+        
+        
+        let results = corpus.searchFor(query)
+        
+        for result in results {
+            print(result)
+        }
+        
+    }
+    
+    func testQueryGuephSubredditCorpus() async throws {
+        guard let submissionsURL = Bundle.module.url(forResource: "Guelph_submissions", withExtension: "zst") else {
+            fatalError("Failed to find guelph_submissions.zst in test bundle.")
+        }
+        guard let submissionsData = try? Data(contentsOf: submissionsURL) else {
+            fatalError("Failed to load guelph_submissions.zst from test bundle.")
+        }
+        
+        let (submissions, _ ): ([Submission],[Data]) = try await loadFromRedditArchive(submissionsData)
+        
+        //let _documentEncoder = ContextFreeEncoder<Double>(source: .glove6B50d)
+        let corpus = HNSWCorpus<String,ContextFreeEncoder<Double>>(typicalNeighborhoodSize: 10)
+        
+        for submission in submissions {
+            if let text = submission.selftext {
+                corpus.addUntokenizedDocument(text)
+            }
+        }
+        
+        let query = "Mr. Goose is a very important figure at the University of Waterloo."
+
+        let results = corpus.searchFor(query)
+        
+        for result in results {
+            print(result)
+        }
+    }
+    
+    func testTypicalNeighborhoodSize() async throws {
+//        guard let submissionsURL = Bundle.module.url(forResource: "Guelph_submissions", withExtension: "zst") else {
+//            fatalError("Failed to find waterloo_submissions.zst in test bundle.")
+//        }
+//        guard let submissionsData = try? Data(contentsOf: submissionsURL) else {
+//            fatalError("Failed to load waterloo_submissions.zst from test bundle.")
+//        }
+//        
+//        let (submissions, _ ): ([Submission],[Data]) = try await loadFromRedditArchive(submissionsData)
+//        
+//        let typicalNeighborhoodSizes = [2, 8, 16, 32, 64, 128, 512, 1028]
+//        
+//        for typicalNeighborhoodSize in typicalNeighborhoodSizes {
+//            let startTime = Date()
+//            var corpus = HNSWCorpus(encoding: .glove6B50d, typicalNeighborhoodSize: typicalNeighborhoodSize)
+//            
+//            for submission in submissions {
+//                if let text = submission.selftext {
+//                    corpus.addUntokenizedDocument(text)
+//                }
+//            }
+//
+//            XCTAssert(corpus.count == 17999)
+//            
+//            let endTime = Date()
+//            print("Typical neighborhood size \(typicalNeighborhoodSize) took \(endTime.timeIntervalSince(startTime)) seconds.")
+//        }
+    }
+}
+#endif
+
diff --git a/Tests/SwiftNLPTests/2. Encoding/ContextFreeEncoderTests.swift b/Tests/SwiftNLPTests/2. Encoding/ContextFreeEncoderTests.swift
index 3142fc516974b5241e2e6748b1b35d8dc6784913..17ee2a4eee32c668691f324a6346555189063abe 100644
--- a/Tests/SwiftNLPTests/2. Encoding/ContextFreeEncoderTests.swift	
+++ b/Tests/SwiftNLPTests/2. Encoding/ContextFreeEncoderTests.swift	
@@ -13,13 +13,13 @@ final class ContextFreeEncoderTests: XCTestCase {
             "that enable us to train deep learning algorithms to learn like the human brain."
          ]
         
-        var corpus = DictionaryCorpus(encoding: .glove6B50d)
+        var corpus = InMemoryCorpus<String,ContextFreeEncoder<Double>>()
         corpus.addUntokenizedDocuments(docs)
         
         XCTAssert(corpus.encodedDocuments.count == 3)
         
         // Make sure none of our encodings are zero
-        for c in corpus {
+        for c in corpus.encodedDocuments {
             XCTAssertNotEqual(c, corpus.zeroes)
         }
     }
@@ -50,14 +50,14 @@ final class ContextFreeEncoderTests: XCTestCase {
             "All science is either physics or stamp collecting. - Ernest Rutherford"
         ]
         
-        var corpus = DictionaryCorpus(encoding: .glove6B50d)
+        var corpus = InMemoryCorpus<String,ContextFreeEncoder<Double>>()
         corpus.addUntokenizedDocuments(twentyQuotes)
         
         
         XCTAssertEqual(corpus.encodedDocuments.count, 20)
         
         // Make sure none of our encodings are zero
-        for c in corpus {
+        for c in corpus.encodedDocuments {
             XCTAssertNotEqual(c, corpus.zeroes)
         }
     }
@@ -65,17 +65,17 @@ final class ContextFreeEncoderTests: XCTestCase {
     func testSubreddit() async throws {
         
         guard let submissionsURL = Bundle.module.url(forResource: "Guelph_submissions", withExtension: "zst") else {
-            fatalError("Failed to find waterloo_submissions.zst in test bundle.")
+            fatalError("Failed to find guelph_submissions.zst in test bundle.")
         }
         guard let submissionsData = try? Data(contentsOf: submissionsURL) else {
-            fatalError("Failed to load waterloo_submissions.zst from test bundle.")
+            fatalError("Failed to load guelph_submissions.zst from test bundle.")
         }
         
         let (submissions, _ ): ([Submission],[Data]) = try await loadFromRedditArchive(submissionsData)
         
         //print("Errors: \(errors.count)")
         
-        let corpus = DictionaryCorpus(encoding: .glove6B50d)
+        var corpus = InMemoryCorpus<String,ContextFreeEncoder<Double>>()
         for submission in submissions {
             if let text = submission.selftext {
                 corpus.addUntokenizedDocument(text)
diff --git a/Tests/SwiftNLPTests/2. Encoding/NaturalLanguageEncoderTests.swift b/Tests/SwiftNLPTests/2. Encoding/NaturalLanguageEncoderTests.swift
index 50112696f02f32d1df5e765b47f49a34bdbe4268..c7b5ab391bbc8571ca5016fcc137764576979380 100644
--- a/Tests/SwiftNLPTests/2. Encoding/NaturalLanguageEncoderTests.swift	
+++ b/Tests/SwiftNLPTests/2. Encoding/NaturalLanguageEncoderTests.swift	
@@ -13,15 +13,14 @@ final class NaturalLanguageEncoderTests: XCTestCase {
             "is a free easy-to-use open-source commercial-grade toolkit",
             "that enable us to train deep learning algorithms to learn like the human brain."
          ]
-        
-        let encoder = NaturalLanguageEncoder<Double>()
-        var corpus = DictionaryCorpus(encoder: encoder)
+                
+        var corpus = InMemoryCorpus<String,NaturalLanguageEncoder<Double>>()
         corpus.addUntokenizedDocuments(docs)
         
         XCTAssert(corpus.encodedDocuments.count == 3)
         
         // Make sure none of our encodings are zero
-        for c in corpus {
+        for c in corpus.encodedDocuments {
             XCTAssertNotEqual(c, corpus.zeroes)
         }
     }
@@ -51,16 +50,15 @@ final class NaturalLanguageEncoderTests: XCTestCase {
             "One, remember to look up at the stars and not down at your feet. Two, never give up work. Work gives you meaning and purpose and life is empty without it. Three, if you are lucky enough to find love, remember it is there and don't throw it away. - Stephen Hawking",
             "All science is either physics or stamp collecting. - Ernest Rutherford"
         ]
-        
-        let encoder = NaturalLanguageEncoder<Double>()
-        var corpus = DictionaryCorpus(encoder: encoder)
+                
+        var corpus = InMemoryCorpus<String,NaturalLanguageEncoder<Double>>()
         corpus.addUntokenizedDocuments(twentyQuotes)
         
         
         XCTAssertEqual(corpus.encodedDocuments.count, 20)
         
         // Make sure none of our encodings are zero
-        for c in corpus {
+        for c in corpus.encodedDocuments {
             XCTAssertNotEqual(c, corpus.zeroes)
         }
     }
@@ -68,16 +66,16 @@ final class NaturalLanguageEncoderTests: XCTestCase {
     func testSubreddit() async throws {
         
         guard let submissionsURL = Bundle.module.url(forResource: "Guelph_submissions", withExtension: "zst") else {
-            fatalError("Failed to find waterloo_submissions.zst in test bundle.")
+            fatalError("Failed to find guelph_submissions.zst in test bundle.")
         }
         guard let submissionsData = try? Data(contentsOf: submissionsURL) else {
-            fatalError("Failed to load waterloo_submissions.zst from test bundle.")
+            fatalError("Failed to load guelph_submissions.zst from test bundle.")
         }
         
         let (submissions, _ ): ([Submission],[Data]) = try await loadFromRedditArchive(submissionsData)
         
         let encoder = NaturalLanguageEncoder<Double>()
-        var corpus = DictionaryCorpus(encoder: encoder)
+        var corpus = InMemoryCorpus<String,NaturalLanguageEncoder<Double>>(encoder: encoder)
         
         for submission in submissions {
             if let text = submission.selftext {
diff --git a/Tests/SwiftNLPTests/HNSWPipelineTest.swift b/Tests/SwiftNLPTests/HNSWPipelineTest.swift
new file mode 100644
index 0000000000000000000000000000000000000000..a089bbcb260d0f83246b5a20d243253deecf523e
--- /dev/null
+++ b/Tests/SwiftNLPTests/HNSWPipelineTest.swift
@@ -0,0 +1,136 @@
+//#if os(macOS)
+//import XCTest
+//import Foundation
+//import NaturalLanguage
+//@testable import SwiftNLP
+//
+//// MARK: See AllMiniLM_pipelineTest.swift
+////TODO: Find where TestUtils comes from to fix
+//
+//final class HNSWPipelineTest: XCTestCase {
+//
+//    // test fetching names of all the files
+//    func testFileNameFetching() throws {
+//        let redditCommentNames = TestUtils.getJsonFiles(prefix: "RC")
+//        print("reddit comment files: \(redditCommentNames)")
+//        let redditSubmissionNames = TestUtils.getJsonFiles(prefix: "RS")
+//        print("reddit submission files: \(redditSubmissionNames)")
+//    }
+//
+//    // test reading reddit submission json files into actual objects
+//    func testRedditSubmissions() throws {
+//        let redditSubmissionJson = TestUtils.loadAllRedditSubmission()
+//        for jsonData in redditSubmissionJson {
+//            let redditSubmission = TestUtils.readRedditSubmissionJson(json: jsonData)
+//            XCTAssertNotNil(redditSubmission, "Failed to decode RedditSubmissionData")
+//        }
+//    }
+//
+//    // test reading reddit comment json files into actual objects
+//    func testRedditComments() throws {
+//        let redditCommentJson = TestUtils.loadAllRedditComment()
+//        for jsonData in redditCommentJson {
+//            let redditComment = TestUtils.readRedditCommentJson(json: jsonData)
+//            XCTAssertNotNil(redditComment, "Failed to decode RedditCommentData")
+//        }
+//    }
+//
+//    func test20kDownload() async throws {
+//
+//        let result = try await downloadSubredditFromServer(subreddit: "StopGaming")
+//        print("Loaded \(result.count) threads from server.")
+//        if let random = result.randomElement() {
+//            let (key, value) = random
+//            print("Key: \(key), Value: \(value)")
+//        }
+//        XCTAssertEqual(result.count, 34829, "Failed to load subreddit data from https://reddit-top20k.cworld.ai")
+//
+//    }
+//
+//
+//    func testDocumentReading() async throws {
+//        // loads all json data for test documents
+//        let redditCommentJson = TestUtils.loadAllRedditComment()
+//        let redditSubmissionJson = TestUtils.loadAllRedditSubmission()
+//
+//        let redditComments = redditCommentJson.compactMap { TestUtils.readRedditCommentJson(json: $0)}
+//        let redditSubmissions = redditSubmissionJson.compactMap { TestUtils.readRedditSubmissionJson(json: $0) }
+//
+//        var bodies: [String] = []
+//
+//        // load all the reddit comments' body as comment to the document
+//        for comment in redditComments {
+//            //debugPrint("Processing \(comment.posts.count) comments")
+//
+//            for post in comment.posts {
+//                if let body = post.body {
+//                    bodies.append(body)
+//                }
+//            }
+//        }
+//
+//        for submission in redditSubmissions {
+//            //debugPrint("Processing \(submission.posts.count) submissions")
+//
+//            for post in submission.posts {
+//                if let p = post.selftext {
+//                    //debugPrint(p)
+//                    bodies.append(p)
+//                }
+//            }
+//        }
+//
+//        // Debug code
+////        bodies = Array(bodies.prefix(10))
+////        print(bodies)
+//
+//        // start to encode the db and query
+////        var database_embedding: [[Float]] = []
+////        var query_embedding: [Float] = []
+////        let query = "stop playing video games"
+////        var embedding_dim: Int = 384
+////        var model = MiniLMEmbeddings()
+////        query_embedding = await model.encode(sentence: query)!
+////
+////        var i = 1
+////        //append sentence embedding to database_embedding
+////        for string in bodies {
+////            if let vector = await model.encode(sentence: string) {
+////                database_embedding.append(vector)
+////                //print(i)
+////                i += 1
+////            } else {
+////                fatalError("Error occurred1")
+////            }
+////
+////        }
+////        
+//        let _documentEncoder = ContextFreeEncoder<Double>(source: .glove6B50d)
+//        var corpus = HNSWCorpus(encoder: _documentEncoder)
+//        corpus.addUntokenizedDocuments(bodies)
+//        
+//        let size = MemoryLayout.size(ofValue: corpus)
+//        print("Approximate memory footprint: \(size) bytes")
+//        
+//        do {
+//            print("Attempting to query corpus.encodedDocuments.find()...")
+//            let query = "stop playing video games"
+//            let queryVector = _documentEncoder.encodeToken(query)
+//            let results = try corpus.encodedDocuments.find(near: queryVector, limit: 10)
+//            print(results)
+//            print("Query completed!")
+//        } catch {
+//            print("Error when trying corpus.encodedDocuments.find(): \(error)")
+//        }
+//
+////        let index = AnnoyIndex<Float>(itemLength: embedding_dim, metric: .euclidean)
+////
+////        try? index.addItems(items: &database_embedding)
+////        try? index.build(numTrees: 50)
+////
+////        let results = index.getNNsForVector(vector: &query_embedding, neighbors: 10)
+////
+////        print(results)
+//    }
+//}
+//#endif