Skip to content
Snippets Groups Projects
Commit 2792e9aa authored by Jim Wallace's avatar Jim Wallace
Browse files

Re-organized folders around BERTopic chain

parent 4f6c46eb
No related branches found
No related tags found
No related merge requests found
Pipeline #92184 passed
...@@ -38,10 +38,10 @@ class NLCorpus: SNLPCorpus { ...@@ -38,10 +38,10 @@ class NLCorpus: SNLPCorpus {
tokenizer.string = document tokenizer.string = document
var words: [String] = [] var words: [String] = []
// tokenizer.enumerateTokens(in: document.startIndex..<document.endIndex) { tokenRange, _ in tokenizer.enumerateTokens(in: document.startIndex..<document.endIndex) { tokenRange, _ in
// words.append(String(document[tokenRange])) words.append(String(document[tokenRange]))
// return true return true
// } }
addDocument(document: words) addDocument(document: words)
} }
......
...@@ -106,7 +106,7 @@ class SNLPBoWDictionary: SNLPDictionary { ...@@ -106,7 +106,7 @@ class SNLPBoWDictionary: SNLPDictionary {
// for document in documents { // for document in documents {
// result.append(documentToBagOfWords(document: document, allowUpdate: true)) // result.append(documentToBagOfWords(document: document, allowUpdate: true))
// } // }
//
return result return result
} }
......
/*
GENERATED BY CHAT-GPT on April 5th 2023 ... does it work?
*/
import Foundation
struct KMeans {
let k: Int
let maxIterations: Int
func cluster(data: [[Double]]) -> [Int] {
var centroids = randomCentroids(data: data, k: k)
var assignments = [Int]()
for _ in 0..<maxIterations {
assignments = assignToCentroids(data: data, centroids: centroids)
centroids = updateCentroids(data: data, assignments: assignments, k: k)
}
return assignments
}
private func randomCentroids(data: [[Double]], k: Int) -> [[Double]] {
return data.shuffled()[0..<k].map { $0 }
}
private func assignToCentroids(data: [[Double]], centroids: [[Double]]) -> [Int] {
return data.map { point in
centroids.enumerated().min(by: { distance($0.1, point) < distance($1.1, point) })!.offset
}
}
private func updateCentroids(data: [[Double]], assignments: [Int], k: Int) -> [[Double]] {
var clusters = Array(repeating: [[Double]](), count: k)
for (i, point) in data.enumerated() {
clusters[assignments[i]].append(point)
}
return clusters.map { cluster in
let dimension = data[0].count
var centroid = [Double](repeating: 0.0, count: dimension)
for point in cluster {
for i in 0..<dimension {
centroid[i] += point[i]
}
}
return centroid.map { $0 / Double(cluster.count) }
}
}
private func distance(_ a: [Double], _ b: [Double]) -> Double {
return zip(a, b).map { pow($0 - $1, 2) }.reduce(0, +).squareRoot()
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment