From cdc9f669d33d2c41291938a8ebefb978dfb29424 Mon Sep 17 00:00:00 2001 From: Mingchung Xia <mingchung.xia@gmail.com> Date: Wed, 7 Feb 2024 21:12:46 -0500 Subject: [PATCH] Codable conformances for hnsw index --- ...rministicSampleVectorIndex + Codable.swift | 36 +++++++++++++++++++ .../DeterministicSampleVectorIndex.swift | 6 +++- .../HNSWCorpus + Codable.swift | 25 +++++++++++++ 3 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 Sources/SwiftNLP/1. Data Collection/DeterministicSampleVectorIndex + Codable.swift create mode 100644 Sources/SwiftNLP/1. Data Collection/HNSWCorpus + Codable.swift diff --git a/Sources/SwiftNLP/1. Data Collection/DeterministicSampleVectorIndex + Codable.swift b/Sources/SwiftNLP/1. Data Collection/DeterministicSampleVectorIndex + Codable.swift new file mode 100644 index 00000000..715c50e0 --- /dev/null +++ b/Sources/SwiftNLP/1. Data Collection/DeterministicSampleVectorIndex + Codable.swift @@ -0,0 +1,36 @@ +// +// DeterministicSampleVectorIndex + Codable.swift +// +// +// Created by Mingchung Xia on 2024-02-07. +// + +import Foundation + +extension DeterministicSampleVectorIndex: Encodable where Vector: Encodable { + enum CodingKeys: String, CodingKey { + case typicalNeighborhoodSize + case vectors + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + try container.encode(typicalNeighborhoodSize, forKey: .typicalNeighborhoodSize) + try container.encode(base.vectors, forKey: .vectors) + } +} + +extension DeterministicSampleVectorIndex: Decodable where Vector: Decodable { + public init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + let typicalNeighborhoodSize = try container.decode(Int.self, forKey: .typicalNeighborhoodSize) + let vectors = try container.decode([Vector].self, forKey: .vectors) + + self.init(typicalNeighborhoodSize: typicalNeighborhoodSize) + for vector in vectors { + self.insert(vector) + } + } +} + + diff --git a/Sources/SwiftNLP/1. Data Collection/DeterministicSampleVectorIndex.swift b/Sources/SwiftNLP/1. Data Collection/DeterministicSampleVectorIndex.swift index 5127d2a0..d11ba020 100644 --- a/Sources/SwiftNLP/1. Data Collection/DeterministicSampleVectorIndex.swift +++ b/Sources/SwiftNLP/1. Data Collection/DeterministicSampleVectorIndex.swift @@ -30,13 +30,17 @@ import PriorityHeapAlgorithms import HNSWAlgorithm import HNSWEphemeral +// It may be useful to conform to Sequence and/or Collection + public struct DeterministicSampleVectorIndex<Vector: Collection & Codable> where Vector.Element: BinaryFloatingPoint { public typealias Index = EphemeralVectorIndex<Int, Int, CartesianDistanceMetric<Vector>, Void> public var base: Index + public var typicalNeighborhoodSize: Int - public init(typicalNeighborhoodSize: Int) { + public init(typicalNeighborhoodSize: Int = 20) { base = .init(metric: CartesianDistanceMetric<Vector>(), config: .unstableDefault(typicalNeighborhoodSize: typicalNeighborhoodSize)) + self.typicalNeighborhoodSize = typicalNeighborhoodSize } private var srng = SeedableRandomNumberGenerator(seed: 1) diff --git a/Sources/SwiftNLP/1. Data Collection/HNSWCorpus + Codable.swift b/Sources/SwiftNLP/1. Data Collection/HNSWCorpus + Codable.swift new file mode 100644 index 00000000..3b29b1e2 --- /dev/null +++ b/Sources/SwiftNLP/1. Data Collection/HNSWCorpus + Codable.swift @@ -0,0 +1,25 @@ +// +// HNSWCorpus + Codable.swift +// +// +// Created by Mingchung Xia on 2024-02-07. +// + +//import Foundation + +// MARK: Decodable conformance is in HNSWCorpus + +//extension HNSWCorpus: Codable { +// enum CodingKeys: String, CodingKey { +// case _documentEncoder +// case zeroes +// case encodedDocuments +// } +// +// func encode(to encoder: Encoder) throws { +// var container = encoder.container(keyedBy: CodingKeys.self) +// try container.encode(_documentEncoder, forKey: ._documentEncoder) +// try container.encode(zeroes, forKey: .zeroes) +// try container.encode(encodedDocuments, forKey: .encodedDocuments) +// } +//} -- GitLab