Skip to content
Snippets Groups Projects
Commit 452de2bd authored by Mingchung Xia's avatar Mingchung Xia
Browse files

Added and cleaned tests

parent 1a377556
No related branches found
No related tags found
1 merge request!13HNSW Implementation with Testcases
Pipeline #115493 passed with warnings
......@@ -44,7 +44,7 @@ final class DurableHNSWCorpusTests: XCTestCase {
// workingDirectoryPath = FilePath(resourcesDirectoryURL.path)
}
func testBasicExample() throws {
func testBuildBasicCorpus() throws {
let docs = [
"CNTK formerly known as Computational Network Toolkit",
"is a free easy-to-use open-source commercial-grade toolkit",
......@@ -89,7 +89,7 @@ final class DurableHNSWCorpusTests: XCTestCase {
/// This is because size is only incremented when insertion is called but it is not called when read from disk!
}
func testBasicQueryExample() async throws {
func testQueryBasicCorpus() async throws {
let docs = [
"The quick brown fox jumps over the lazy dog",
"I enjoy taking long walks along the beach at sunset",
......@@ -155,7 +155,7 @@ final class DurableHNSWCorpusTests: XCTestCase {
try transaction.commit()
}
func testBuildSubredditCorpus() async throws {
func testBuildGuelphSubredditCorpus() async throws {
/// Generates the LMDB durable storage to disk but runs no tests otherwise
/// Setting up the environment
......@@ -198,7 +198,7 @@ final class DurableHNSWCorpusTests: XCTestCase {
try transaction.commit()
}
func testSubredditQueryExample() async throws {
func testQueryGuelphSubredditCorpus() async throws {
let _documentEncoder = ContextFreeEncoder<Double>(source: .glove6B50d)
/// Setting up the environment
......
......@@ -8,7 +8,7 @@ final class EphemeralHNSWCorpusTests: XCTestCase {
// MARK: EphemeralHNSWCorpus can also be used as its typealias HNSWCorpus
// Load a small set of documents and confirm that corpus and dictionary are updated accordingly
func testBasicExample() throws {
func testBuildBasicCorpus() throws {
let docs = [
"CNTK formerly known as Computational Network Toolkit",
"is a free easy-to-use open-source commercial-grade toolkit",
......@@ -27,7 +27,7 @@ final class EphemeralHNSWCorpusTests: XCTestCase {
}
// Load a bigger set of documents and confirm
func testLargeExample() throws {
func testBuildLargeCorpus() throws {
let twentyQuotes = [
"Imagination is more important than knowledge. - Albert Einstein",
"The greatest enemy of knowledge is not ignorance, it is the illusion of knowledge. - Stephen Hawking",
......@@ -62,7 +62,7 @@ final class EphemeralHNSWCorpusTests: XCTestCase {
}
}
func testSubreddit() async throws {
func testBuildGuelphSubredditCorpus() async throws {
guard let submissionsURL = Bundle.module.url(forResource: "Guelph_submissions", withExtension: "zst") else {
fatalError("Failed to find waterloo_submissions.zst in test bundle.")
}
......@@ -84,7 +84,7 @@ final class EphemeralHNSWCorpusTests: XCTestCase {
}
// Load a small set of documents and confirm that corpus and dictionary are updated accordingly
func testBasicQueryExample() async throws {
func testQueryBasicCorpus() async throws {
let docs = [
"The quick brown fox jumps over the lazy dog",
"I enjoy taking long walks along the beach at sunset",
......@@ -114,7 +114,7 @@ final class EphemeralHNSWCorpusTests: XCTestCase {
}
}
func testLargeQueryExample() async throws {
func testQueryLargeCorpus() async throws {
let docs = [
"Imagination is more important than knowledge. - Albert Einstein",
"The greatest enemy of knowledge is not ignorance, it is the illusion of knowledge. - Stephen Hawking",
......@@ -156,7 +156,7 @@ final class EphemeralHNSWCorpusTests: XCTestCase {
}
}
func testSubredditQueryExample() async throws {
func testQueryGuephSubredditCorpus() async throws {
guard let submissionsURL = Bundle.module.url(forResource: "Guelph_submissions", withExtension: "zst") else {
fatalError("Failed to find waterloo_submissions.zst in test bundle.")
}
......@@ -188,6 +188,35 @@ final class EphemeralHNSWCorpusTests: XCTestCase {
print("Error when trying corpus.encodedDocuments.find(): \(error)")
}
}
func testTypicalNeighborhoodSize() async throws {
// guard let submissionsURL = Bundle.module.url(forResource: "Guelph_submissions", withExtension: "zst") else {
// fatalError("Failed to find waterloo_submissions.zst in test bundle.")
// }
// guard let submissionsData = try? Data(contentsOf: submissionsURL) else {
// fatalError("Failed to load waterloo_submissions.zst from test bundle.")
// }
//
// let (submissions, _ ): ([Submission],[Data]) = try await loadFromRedditArchive(submissionsData)
//
// let typicalNeighborhoodSizes = [2, 8, 16, 32, 64, 128, 512, 1028]
//
// for typicalNeighborhoodSize in typicalNeighborhoodSizes {
// let startTime = Date()
// var corpus = HNSWCorpus(encoding: .glove6B50d, typicalNeighborhoodSize: typicalNeighborhoodSize)
//
// for submission in submissions {
// if let text = submission.selftext {
// corpus.addUntokenizedDocument(text)
// }
// }
//
// XCTAssert(corpus.count == 17999)
//
// let endTime = Date()
// print("Typical neighborhood size \(typicalNeighborhoodSize) took \(endTime.timeIntervalSince(startTime)) seconds.")
// }
}
}
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment