Skip to content
Snippets Groups Projects
Commit 22e0de05 authored by Mingchung Xia's avatar Mingchung Xia
Browse files

Durable query for single lifecycle testcase

parent af61df12
No related branches found
No related tags found
1 merge request!13HNSW Implementation with Testcases
Pipeline #113938 failed
...@@ -26,6 +26,7 @@ class DurableHNSWCorpus/*<Scalar: BinaryFloatingPoint & Codable>: SNLPCorpus*/ { ...@@ -26,6 +26,7 @@ class DurableHNSWCorpus/*<Scalar: BinaryFloatingPoint & Codable>: SNLPCorpus*/ {
// typicalNeighbourhoodSize = 20 is a standard benchmark // typicalNeighbourhoodSize = 20 is a standard benchmark
init(encoding: ContextFreeEncoder<Scalar>.PreComputedEmbeddings, scalar: Scalar.Type = Double.self, typicalNeighborhoodSize: Int = 20, namespace: String = "durablehnsw", in transaction: Transaction) throws { init(encoding: ContextFreeEncoder<Scalar>.PreComputedEmbeddings, scalar: Scalar.Type = Double.self, typicalNeighborhoodSize: Int = 20, namespace: String = "durablehnsw", in transaction: Transaction) throws {
_documentEncoder = ContextFreeEncoder(source: encoding) _documentEncoder = ContextFreeEncoder(source: encoding)
encodedDocuments = try DeterministicDurableVectorIndex( encodedDocuments = try DeterministicDurableVectorIndex(
namespace: namespace, namespace: namespace,
typicalNeighborhoodSize: typicalNeighborhoodSize, typicalNeighborhoodSize: typicalNeighborhoodSize,
......
...@@ -64,7 +64,7 @@ public struct DeterministicDurableVectorIndex/*<Vector: Collection & Codable> wh ...@@ -64,7 +64,7 @@ public struct DeterministicDurableVectorIndex/*<Vector: Collection & Codable> wh
public func find(near query: Vector, limit: Int, exact: Bool = false, in transaction: Transaction) throws -> [Index.Neighbor] { public func find(near query: Vector, limit: Int, exact: Bool = false, in transaction: Transaction) throws -> [Index.Neighbor] {
if exact { if exact {
// TODO: Exact search logic // TODO: Exact search logic
fatalError("Exact search logic for DeterministicDurableVectorIndex is not supported") fatalError("Exact search logic for DeterministicDurableVectorIndex is not currently supported")
} else { } else {
let accessor = try Index.Accessor(for: base, in: transaction) let accessor = try Index.Accessor(for: base, in: transaction)
return Array(try accessor.find(near: query, limit: limit)) return Array(try accessor.find(near: query, limit: limit))
......
...@@ -31,16 +31,17 @@ final class HNSWTests: XCTestCase { ...@@ -31,16 +31,17 @@ final class HNSWTests: XCTestCase {
"that enable us to train deep learning algorithms to learn like the human brain." "that enable us to train deep learning algorithms to learn like the human brain."
] ]
let corpus = HNSWCorpusDataHandler<Double>.loadMemoryMap(encoding: .glove6B50d, resource: "hnsw_testbasicexample")
// var corpus = HNSWCorpus(encoding: .glove6B50d) // var corpus = HNSWCorpus(encoding: .glove6B50d)
// corpus.addUntokenizedDocuments(docs) // corpus.addUntokenizedDocuments(docs)
// //
// let dataHandler = HNSWCorpusDataHandler(corpus: corpus, resource: "hnsw_testbasicexample") let dataHandler = HNSWCorpusDataHandler(corpus: corpus, resource: "hnsw_testbasicexample")
// let corpusSize = dataHandler.getCorpusSize() let corpusSize = dataHandler.getCorpusSize()
// let dictionarySize = dataHandler.getDictionarySize(includeKey: false) // let dictionarySize = dataHandler.getDictionarySize(includeKey: false)
// print("Corpus size: \(corpusSize) bytes") // print("Corpus size: \(corpusSize) bytes")
// print("Dictionary size: \(dictionarySize) bytes") // print("Dictionary size: \(dictionarySize) bytes")
// dataHandler.saveMemoryMap() // dataHandler.saveMemoryMap()
let corpus = HNSWCorpusDataHandler<Double>.loadMemoryMap(encoding: .glove6B50d, resource: "hnsw_testbasicexample")
XCTAssert(corpus.count == 3) XCTAssert(corpus.count == 3)
...@@ -56,20 +57,23 @@ final class HNSWTests: XCTestCase { ...@@ -56,20 +57,23 @@ final class HNSWTests: XCTestCase {
"is a free easy-to-use open-source commercial-grade toolkit", "is a free easy-to-use open-source commercial-grade toolkit",
"that enable us to train deep learning algorithms to learn like the human brain." "that enable us to train deep learning algorithms to learn like the human brain."
] ]
/// Setting up working directory
let fileManager = FileManager.default
let directoryURL = fileManager.homeDirectoryForCurrentUser.appendingPathComponent("/Downloads/lmdb")
try fileManager.createDirectory(at: directoryURL, withIntermediateDirectories: true, attributes: nil)
let filepath = FilePath(directoryURL.path)
/// Setting up the environment /// Setting up the environment
let env = try Environment() let env = try Environment()
try env.setMapSize(1_073_741_824) /// 1 GB try env.setMapSize(1_073_741_824) /// 1 GB
try env.setMaxReaders(126) /// default try env.setMaxReaders(126) /// default
try env.setMaxDBs(10) try env.setMaxDBs(10)
try env.open(path: filepath)
let fileManager = FileManager.default // Writing to disk
let directoryURL = fileManager.homeDirectoryForCurrentUser.appendingPathComponent("/Downloads/lmdb")
try fileManager.createDirectory(at: directoryURL, withIntermediateDirectories: true, attributes: nil)
try env.open(path: FilePath(directoryURL.path))
let transaction = try Transaction.begin(.write, in: env) let transaction = try Transaction.begin(.write, in: env)
var corpus = try DurableHNSWCorpus( var corpus = try DurableHNSWCorpus(
encoding: .glove6B50d, encoding: .glove6B50d,
namespace: "testbasicexampledurable", namespace: "testbasicexampledurable",
...@@ -79,14 +83,23 @@ final class HNSWTests: XCTestCase { ...@@ -79,14 +83,23 @@ final class HNSWTests: XCTestCase {
for doc in docs { for doc in docs {
try corpus.addUntokenizedDocument(doc, in: transaction) try corpus.addUntokenizedDocument(doc, in: transaction)
} }
try transaction.commit() try transaction.commit()
// let transaction = try Transaction.begin(.read, in: env)
//
// transaction.abort()
// Reading from disk
XCTAssert(corpus.count == 3) let readTransaction = try Transaction.begin(.read, in: env)
let readCorpus = try DurableHNSWCorpus(
encoding: .glove6B50d,
namespace: "testbasicexampledurable",
in: readTransaction
)
readTransaction.abort()
// XCTAssert(corpus.count == 3)
XCTAssert(readCorpus.count == 3)
} }
...@@ -254,30 +267,49 @@ final class HNSWTests: XCTestCase { ...@@ -254,30 +267,49 @@ final class HNSWTests: XCTestCase {
let query = "I like to read about new technology and artificial intelligence" let query = "I like to read about new technology and artificial intelligence"
let _documentEncoder = ContextFreeEncoder<Double>(source: .glove6B50d) let _documentEncoder = ContextFreeEncoder<Double>(source: .glove6B50d)
/// Setting up working directory
let fileManager = FileManager.default
let directoryURL = fileManager.homeDirectoryForCurrentUser.appendingPathComponent("/Downloads/lmdb")
try fileManager.createDirectory(at: directoryURL, withIntermediateDirectories: true, attributes: nil)
let filepath = FilePath(directoryURL.path)
/// Setting up the environment /// Setting up the environment
let env = try Environment() let env = try Environment()
try env.setMapSize(1_073_741_824) /// 1 GB try env.setMapSize(1_073_741_824) /// 1 GB
try env.setMaxReaders(256) try env.setMaxReaders(126) /// default
try env.setMaxDBs(10) try env.setMaxDBs(10)
try env.open(path: filepath)
let fileManager = FileManager.default
let directoryURL = fileManager.temporaryDirectory.appendingPathComponent("lmdb")
try fileManager.createDirectory(at: directoryURL, withIntermediateDirectories: true, attributes: nil)
try env.open(path: FilePath(directoryURL.path))
let transaction = try Transaction.begin(.write, in: env) let transaction = try Transaction.begin(.write, in: env)
var corpus = try DurableHNSWCorpus(encoder: _documentEncoder, namespace: "testbasicqueryexampledurable", in: transaction) var corpus = try DurableHNSWCorpus(
encoder: _documentEncoder,
namespace: "testbasicqueryexampledurable",
in: transaction
)
for doc in docs { for doc in docs {
try corpus.addUntokenizedDocument(doc, in: transaction) try corpus.addUntokenizedDocument(doc, in: transaction)
} }
try transaction.commit()
do { do {
print("Attempting to query corpus.encodedDocuments.find()...") print("Attempting to query corpus.encodedDocuments.find()...")
let queryVector: [Double] = _documentEncoder.encodeToken(query).map { Double($0) } let queryVector: [Double] = _documentEncoder.encodeToken(query).map { Double($0) }
let result = try corpus.encodedDocuments.find(near: queryVector, limit: 8, in: transaction)
let readTransaction = try Transaction.begin(.write, in: env)
let readCorpus = try DurableHNSWCorpus(
encoder: _documentEncoder,
namespace: "testbasicqueryexampledurable",
in: readTransaction
)
// do not add documents here!
let result = try readCorpus.encodedDocuments.find(near: queryVector, limit: 8, in: transaction)
for result in result { for result in result {
let key = Int(result.id.foreignKey)! let key = Int(result.id.foreignKey)!
...@@ -288,9 +320,8 @@ final class HNSWTests: XCTestCase { ...@@ -288,9 +320,8 @@ final class HNSWTests: XCTestCase {
} catch { } catch {
print("Error when trying corpus.encodedDocuments.find(): \(error)") print("Error when trying corpus.encodedDocuments.find(): \(error)")
} }
transaction.reset()
try transaction.commit() try transaction.commit()
try transaction.renew()
} }
func testLargeQueryExample() async throws { func testLargeQueryExample() async throws {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment