summaryrefslogtreecommitdiffstats
path: root/semanticsearchscratchpad/create-embeddings.js
diff options
context:
space:
mode:
Diffstat (limited to 'semanticsearchscratchpad/create-embeddings.js')
-rw-r--r--semanticsearchscratchpad/create-embeddings.js84
1 files changed, 63 insertions, 21 deletions
diff --git a/semanticsearchscratchpad/create-embeddings.js b/semanticsearchscratchpad/create-embeddings.js
index bbb3a81..2d2c321 100644
--- a/semanticsearchscratchpad/create-embeddings.js
+++ b/semanticsearchscratchpad/create-embeddings.js
@@ -1,30 +1,72 @@
-onmessage = msg => {
- const lines = msg.data;
-
- const request = indexedDB.open("embeddings");
-
- request.onupgradeneeded = (event) => {
- console.log('onupgradeneeded')
- const db = event.target.result;
- db.createObjectStore('embeddings', {autoIncrement: true});
- };
-
- let embeddings;
- request.onsuccess = (event) => {
- const db = event.target.result;
- embeddings = db.transaction('embeddings', 'readwrite').objectStore('embeddings').getAll();
+
+//const yemb = (await emb('I really like curry.')).data;
+//
+//
+//console.log('out', xsembs.map(x => ({value:x.value, similarity: cosineSimilarity(x.embeddings, yemb)})));
+//
+//import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
+
+//importScripts('./embeddings-lib.js');
+
+import('https://cdn.jsdelivr.net/npm/@xenova/[email protected]')
+ .then(({pipeline, env}) => {
+
+env.allowLocalModels = false;
+
+//const extractor = await pipeline('embeddings', 'Xenova/all-MiniLM-L6-v2');
+//const emb = x => {postMessage(x) ; return extractor(x, {pooling:'mean', normalize:'true'})};
+return pipeline('embeddings', 'Xenova/all-MiniLM-L6-v2')
+ .then(extractor => {
+
+const emb = x => extractor(x, {pooling:'mean', normalize:'true'});
+
+async function createEmbeddings(xs) {
+ return (await Promise.all(xs.map(emb))).map((x,i) => ({value: xs[i], embeddings: x.data}));
+}
+
+function generateEmbeddings({data: {group, dataset}}) {
+const request = indexedDB.open("embeddings");
+request.onerror = () => {
+ console.error("Why didn't you allow my web app to use IndexedDB?!");
+};
+
+request.onupgradeneeded = (event) => {
+ const db = event.target.result;
+ const objectStore = db.createObjectStore('embeddings', {autoIncrement: true});
+ objectStore.createIndex("value", "value", { unique: false });
+ };
+
+request.onsuccess = (event) => {
+ const db = event.target.result;
+ let t = db.transaction('embeddings', 'readwrite').objectStore('embeddings').getAll();
+ t.onerror = () => console.error('transaction failed');
+ t.onsuccess = e => {
+ const embeddings = e.target.result.filter(x => x.group === group);
if (!embeddings || !embeddings.length) {
- createEmbeddings(lines)
+ createEmbeddings(dataset)
.then(xsembs => {
- embeddings = xsembs;
const st = db
.transaction('embeddings', 'readwrite')
.objectStore('embeddings');
xsembs.forEach(emb => {
- console.log(emb);
- st.add(emb);
+ st.add({...emb, group});
});
});
- }
- };
+ }
+}
+}
+
}
+
+self.onmessage = generateEmbeddings;
+postMessage({loaded:true});
+return generateEmbeddings;
+ });
+ });
+
+
+// const generateEmbeddings = self.onmessage;
+//
+//export default generateEmbeddings;
+
+