Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
Curio
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Jim Wallace
Curio
Commits
3a30f7f3
Commit
3a30f7f3
authored
1 year ago
by
Mingchung Xia
Browse files
Options
Downloads
Patches
Plain Diff
Minor comment changes
parent
7e892787
No related branches found
No related tags found
1 merge request
!13
HNSW Implementation with Testcases
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
Sources/SwiftNLP/1. Data Collection/HNSWCorpus.swift
+7
-7
7 additions, 7 deletions
Sources/SwiftNLP/1. Data Collection/HNSWCorpus.swift
with
7 additions
and
7 deletions
Sources/SwiftNLP/1. Data Collection/HNSWCorpus.swift
+
7
−
7
View file @
3a30f7f3
...
@@ -27,7 +27,7 @@ import PriorityHeapAlgorithms
...
@@ -27,7 +27,7 @@ import PriorityHeapAlgorithms
import
SimilarityMetric
import
SimilarityMetric
import
HNSWAlgorithm
import
HNSWAlgorithm
import
HNSWEphemeral
import
HNSWEphemeral
import
GameplayKit
//
/
Not avaliable on Linux?
import
GameplayKit
// Not avaliable on Linux?
- try to change to other random
class
HNSWCorpus
<
Scalar
:
BinaryFloatingPoint
&
Codable
>
:
SNLPCorpus
{
class
HNSWCorpus
<
Scalar
:
BinaryFloatingPoint
&
Codable
>
:
SNLPCorpus
{
...
@@ -36,9 +36,11 @@ class HNSWCorpus<Scalar: BinaryFloatingPoint & Codable>: SNLPCorpus {
...
@@ -36,9 +36,11 @@ class HNSWCorpus<Scalar: BinaryFloatingPoint & Codable>: SNLPCorpus {
var
zeroes
:
[
Scalar
]
var
zeroes
:
[
Scalar
]
var
count
:
Int
{
0
}
var
count
:
Int
{
0
}
//
/
typicalNeighbourhoodSize = 20 is a standard benchmark
// typicalNeighbourhoodSize = 20 is a standard benchmark
var
encodedDocuments
:
DeterministicSampleVectorIndex
=
DeterministicSampleVectorIndex
<
[
Scalar
]
>
(
typicalNeighborhoodSize
:
20
)
var
encodedDocuments
:
DeterministicSampleVectorIndex
=
DeterministicSampleVectorIndex
<
[
Scalar
]
>
(
typicalNeighborhoodSize
:
20
)
// Map from Key to documentId - similar to DictionaryCorpus
init
(
_documentEncoder
:
ContextFreeEncoder
<
Scalar
>
)
{
init
(
_documentEncoder
:
ContextFreeEncoder
<
Scalar
>
)
{
self
.
_documentEncoder
=
_documentEncoder
self
.
_documentEncoder
=
_documentEncoder
zeroes
=
Array
(
repeating
:
Scalar
(
0
),
count
:
384
)
zeroes
=
Array
(
repeating
:
Scalar
(
0
),
count
:
384
)
...
@@ -46,15 +48,13 @@ class HNSWCorpus<Scalar: BinaryFloatingPoint & Codable>: SNLPCorpus {
...
@@ -46,15 +48,13 @@ class HNSWCorpus<Scalar: BinaryFloatingPoint & Codable>: SNLPCorpus {
@inlinable
@inlinable
func
addUntokenizedDocument
(
_
document
:
String
)
{
func
addUntokenizedDocument
(
_
document
:
String
)
{
/// Forced cast to [Scalar] is unnecessary
encodedDocuments
.
insert
((
_documentEncoder
.
encodeSentence
(
document
)))
/// as! [Scalar] not needed
encodedDocuments
.
insert
((
_documentEncoder
.
encodeSentence
(
document
)))
}
}
}
}
public
struct
DeterministicSampleVectorIndex
<
Vector
:
Collection
&
Codable
>
where
Vector
.
Element
:
BinaryFloatingPoint
{
public
struct
DeterministicSampleVectorIndex
<
Vector
:
Collection
&
Codable
>
where
Vector
.
Element
:
BinaryFloatingPoint
{
/// EmphermalVectorIndex<Key: BinaryInteger, Level: BinaryInteger, Metric: SimilarityMetric, Metadata>
public
typealias
Index
=
EphemeralVectorIndex
<
Int
,
Int
,
CartesianDistanceMetric
<
Vector
>
,
Void
>
public
typealias
Index
=
EphemeralVectorIndex
<
Int
,
Int
,
CartesianDistanceMetric
<
Vector
>
,
Void
>
public
var
base
:
Index
public
var
base
:
Index
...
@@ -78,8 +78,7 @@ public struct DeterministicSampleVectorIndex<Vector: Collection & Codable> where
...
@@ -78,8 +78,7 @@ public struct DeterministicSampleVectorIndex<Vector: Collection & Codable> where
public
mutating
func
insert
(
_
vector
:
Vector
)
{
public
mutating
func
insert
(
_
vector
:
Vector
)
{
let
convertedVector
:
[
Double
]
=
vector
.
map
{
Double
(
$0
)
}
let
convertedVector
:
[
Double
]
=
vector
.
map
{
Double
(
$0
)
}
if
let
metricVector
=
convertedVector
as?
CartesianDistanceMetric
<
Vector
>.
Vector
{
if
let
metricVector
=
convertedVector
as?
CartesianDistanceMetric
<
Vector
>.
Vector
{
/// Returns a Key (unused)
base
.
insert
(
metricVector
,
using
:
&
graphRNG
)
/// returns an unused 'Key' type
base
.
insert
(
metricVector
,
using
:
&
graphRNG
)
}
else
{
}
else
{
fatalError
(
"Unable to get metric vector"
)
fatalError
(
"Unable to get metric vector"
)
}
}
...
@@ -97,6 +96,7 @@ public struct CartesianDistanceMetric<Vector: Collection & Codable>: SimilarityM
...
@@ -97,6 +96,7 @@ public struct CartesianDistanceMetric<Vector: Collection & Codable>: SimilarityM
}
}
struct
DeterministicRandomNumberGenerator
:
RandomNumberGenerator
{
struct
DeterministicRandomNumberGenerator
:
RandomNumberGenerator
{
// Try another package for this...
private
let
randomSource
:
GKMersenneTwisterRandomSource
private
let
randomSource
:
GKMersenneTwisterRandomSource
init
(
seed
:
UInt64
)
{
init
(
seed
:
UInt64
)
{
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment