summaryrefslogtreecommitdiffstats
path: root/semanticsearchscratchpad/create-embeddings.js
blob: 2d2c321250ec7ee349ce08da9fc2a6c3b723b2cd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
//const yemb = (await emb('I really like curry.')).data;
//
//
//console.log('out', xsembs.map(x => ({value:x.value, similarity: cosineSimilarity(x.embeddings, yemb)})));
//
//import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';

//importScripts('./embeddings-lib.js');

import('https://cdn.jsdelivr.net/npm/@xenova/[email protected]')
  .then(({pipeline, env}) => {

env.allowLocalModels = false;

//const extractor = await pipeline('embeddings', 'Xenova/all-MiniLM-L6-v2');
//const emb = x => {postMessage(x) ; return extractor(x, {pooling:'mean', normalize:'true'})};
return pipeline('embeddings', 'Xenova/all-MiniLM-L6-v2')
  .then(extractor => {

const emb = x => extractor(x, {pooling:'mean', normalize:'true'});

async function createEmbeddings(xs) {
    return (await Promise.all(xs.map(emb))).map((x,i) => ({value: xs[i], embeddings: x.data}));
}

function generateEmbeddings({data: {group, dataset}}) {
const request = indexedDB.open("embeddings");
request.onerror = () => {
  console.error("Why didn't you allow my web app to use IndexedDB?!");
};

request.onupgradeneeded = (event) => {
  const db = event.target.result;
  const objectStore = db.createObjectStore('embeddings', {autoIncrement: true});
  objectStore.createIndex("value", "value", { unique: false });
  };

request.onsuccess = (event) => {
  const db = event.target.result;
  let t = db.transaction('embeddings', 'readwrite').objectStore('embeddings').getAll();
  t.onerror = () => console.error('transaction failed');
  t.onsuccess = e => {
    const embeddings = e.target.result.filter(x => x.group === group);
    if (!embeddings || !embeddings.length) {
        createEmbeddings(dataset)
      .then(xsembs => {
        const st = db
            .transaction('embeddings', 'readwrite')
            .objectStore('embeddings');
        xsembs.forEach(emb => {
            st.add({...emb, group});
        });
      });
    }
}
}

}

self.onmessage = generateEmbeddings;
postMessage({loaded:true});
return generateEmbeddings;
  });
  });


//   const generateEmbeddings = self.onmessage;
//
//export default generateEmbeddings;