diff --git a/Sources/SwiftNLP/1. Data Collection/DeterministicSampleVectorIndex + Codable.swift b/Sources/SwiftNLP/1. Data Collection/DeterministicSampleVectorIndex + Codable.swift new file mode 100644 index 0000000000000000000000000000000000000000..715c50e0ee73dff4ae64798737806c854aaa0d5a --- /dev/null +++ b/Sources/SwiftNLP/1. Data Collection/DeterministicSampleVectorIndex + Codable.swift @@ -0,0 +1,36 @@ +// +// DeterministicSampleVectorIndex + Codable.swift +// +// +// Created by Mingchung Xia on 2024-02-07. +// + +import Foundation + +extension DeterministicSampleVectorIndex: Encodable where Vector: Encodable { + enum CodingKeys: String, CodingKey { + case typicalNeighborhoodSize + case vectors + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + try container.encode(typicalNeighborhoodSize, forKey: .typicalNeighborhoodSize) + try container.encode(base.vectors, forKey: .vectors) + } +} + +extension DeterministicSampleVectorIndex: Decodable where Vector: Decodable { + public init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + let typicalNeighborhoodSize = try container.decode(Int.self, forKey: .typicalNeighborhoodSize) + let vectors = try container.decode([Vector].self, forKey: .vectors) + + self.init(typicalNeighborhoodSize: typicalNeighborhoodSize) + for vector in vectors { + self.insert(vector) + } + } +} + + diff --git a/Sources/SwiftNLP/1. Data Collection/DeterministicSampleVectorIndex.swift b/Sources/SwiftNLP/1. Data Collection/DeterministicSampleVectorIndex.swift index 5127d2a0aed0b4730c53ab21fa99b0f54803e3f7..d11ba0200b4f02d2613c7251f6c738dddaf981de 100644 --- a/Sources/SwiftNLP/1. Data Collection/DeterministicSampleVectorIndex.swift +++ b/Sources/SwiftNLP/1. Data Collection/DeterministicSampleVectorIndex.swift @@ -30,13 +30,17 @@ import PriorityHeapAlgorithms import HNSWAlgorithm import HNSWEphemeral +// It may be useful to conform to Sequence and/or Collection + public struct DeterministicSampleVectorIndex<Vector: Collection & Codable> where Vector.Element: BinaryFloatingPoint { public typealias Index = EphemeralVectorIndex<Int, Int, CartesianDistanceMetric<Vector>, Void> public var base: Index + public var typicalNeighborhoodSize: Int - public init(typicalNeighborhoodSize: Int) { + public init(typicalNeighborhoodSize: Int = 20) { base = .init(metric: CartesianDistanceMetric<Vector>(), config: .unstableDefault(typicalNeighborhoodSize: typicalNeighborhoodSize)) + self.typicalNeighborhoodSize = typicalNeighborhoodSize } private var srng = SeedableRandomNumberGenerator(seed: 1) diff --git a/Sources/SwiftNLP/1. Data Collection/HNSWCorpus + Codable.swift b/Sources/SwiftNLP/1. Data Collection/HNSWCorpus + Codable.swift new file mode 100644 index 0000000000000000000000000000000000000000..3b29b1e222e93e5d766cea71dbc4c5aceb510795 --- /dev/null +++ b/Sources/SwiftNLP/1. Data Collection/HNSWCorpus + Codable.swift @@ -0,0 +1,25 @@ +// +// HNSWCorpus + Codable.swift +// +// +// Created by Mingchung Xia on 2024-02-07. +// + +//import Foundation + +// MARK: Decodable conformance is in HNSWCorpus + +//extension HNSWCorpus: Codable { +// enum CodingKeys: String, CodingKey { +// case _documentEncoder +// case zeroes +// case encodedDocuments +// } +// +// func encode(to encoder: Encoder) throws { +// var container = encoder.container(keyedBy: CodingKeys.self) +// try container.encode(_documentEncoder, forKey: ._documentEncoder) +// try container.encode(zeroes, forKey: .zeroes) +// try container.encode(encodedDocuments, forKey: .encodedDocuments) +// } +//}