diff options
Diffstat (limited to 'semanticsearchscratchpad')
-rw-r--r-- | semanticsearchscratchpad/create-embeddings.js | 30 | ||||
-rw-r--r-- | semanticsearchscratchpad/index.html | 162 | ||||
-rw-r--r-- | semanticsearchscratchpad/worker.js | 11 |
3 files changed, 203 insertions, 0 deletions
diff --git a/semanticsearchscratchpad/create-embeddings.js b/semanticsearchscratchpad/create-embeddings.js new file mode 100644 index 0000000..bbb3a81 --- /dev/null +++ b/semanticsearchscratchpad/create-embeddings.js @@ -0,0 +1,30 @@ +onmessage = msg => { + const lines = msg.data; + + const request = indexedDB.open("embeddings"); + + request.onupgradeneeded = (event) => { + console.log('onupgradeneeded') + const db = event.target.result; + db.createObjectStore('embeddings', {autoIncrement: true}); + }; + + let embeddings; + request.onsuccess = (event) => { + const db = event.target.result; + embeddings = db.transaction('embeddings', 'readwrite').objectStore('embeddings').getAll(); + if (!embeddings || !embeddings.length) { + createEmbeddings(lines) + .then(xsembs => { + embeddings = xsembs; + const st = db + .transaction('embeddings', 'readwrite') + .objectStore('embeddings'); + xsembs.forEach(emb => { + console.log(emb); + st.add(emb); + }); + }); + } + }; +} diff --git a/semanticsearchscratchpad/index.html b/semanticsearchscratchpad/index.html new file mode 100644 index 0000000..8a96b54 --- /dev/null +++ b/semanticsearchscratchpad/index.html @@ -0,0 +1,162 @@ + <div class="controls" tabindex="0"> + <form> + <div> + <label for="number1">Multiply number 1: </label> + <input type="text" id="number1" value="0" /> + </div> + <div> + <label for="number2">Multiply number 2: </label> + <input type="text" id="number2" value="0" /> + </div> + </form> + + <p class="result">Result: 0</p> + </div> + +<script type="module"> +import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]'; +env.allowLocalModels = false; + +let pipe = await pipeline('embeddings'); + +let emb = x => pipe(x, {pooling:'mean', normalize:'true'}); + +async function createEmbeddings(xs) { + return (await Promise.all(xs.map(emb))).map((x,i) => ({value: xs[i], embeddings: x.data})); +} + +function dotp(x, y) { + function dotp_sum(a, b) { + return a + b; + } + function dotp_times(_, i) { + return x[i] * y[i]; + } + return x.map(dotp_times).reduce((a, v) => a + v, 0); +} + +function cosineSimilarity(A,B){ + var similarity = dotp(A, B) / (Math.sqrt(dotp(A,A)) * Math.sqrt(dotp(B,B))); + return similarity; +} + + + +/** Open DB **/ + + +/****/ + + + +//const yemb = (await emb('I really like curry.')).data; +// +// +//console.log('out', xsembs.map(x => ({value:x.value, similarity: cosineSimilarity(x.embeddings, yemb)}))); +// + +/******/ + +const request = indexedDB.open("embeddings"); +request.onerror = (event) => { + console.error("Why didn't you allow my web app to use IndexedDB?!"); +}; + +request.onupgradeneeded = (event) => { + console.log('onupgradeneeded') + const db = event.target.result; + db.createObjectStore('embeddings', {autoIncrement: true}); + }; + +var embeddings; + + +number1.onchange = e => { + const request = indexedDB.open("embeddings"); + request.onsuccess = (event) => { + const db = event.target.result; + const t = db.transaction('embeddings', 'readwrite').objectStore('embeddings').getAll(); + t.onsuccess = e => { + // embeddings.onsuccess = => + embeddings = e.target.result; + if (embeddings && embeddings.length) { + emb('I really like curry.').then(yemb =>{ + const r = embeddings + .map(x => ({value:x.value, similarity: cosineSimilarity(x.embeddings, yemb.data)})); + console.log(r) + result.innerHTML = r + .sort((a,b) => a.similarity > b.similarity) + .map(({value, similarity}) => `<div>${value}: ${similarity}</div>`) + .join('<br/>'); + } + ); + } else { + console.error(embeddings); + } + } + } +} + +request.onsuccess = (event) => { + const db = event.target.result; + let t = db.transaction('embeddings', 'readwrite').objectStore('embeddings').getAll(); + t.onerror = () => console.error('transaction failed'); + t.onsuccess = e => { + const embeddings = e.target.result; + if (!embeddings || !embeddings.length) { + createEmbeddings(['Jim likes curry', 'I really dislike potatoes', 'We went to Mars last week']) + .then(xsembs => { + const st = db + .transaction('embeddings', 'readwrite') + .objectStore('embeddings'); + xsembs.forEach(emb => { + console.log(emb); + st.add(emb); + }); + }); + } +} +} + + + + +//request.onsuccess = (event) => { +// console.log('onsuccess') +// const db = event.target.result; +// const st = db +// .transaction('embeddings', 'readwrite') +// .objectStore('embeddings'); +// xsembs.forEach(emb => { +// console.log(emb); +// st.add(emb); +// }); +//}; + +/******/ + +const first = document.querySelector('#number1'); +const second = document.querySelector('#number2'); + +const result = document.querySelector('.result'); + +const myWorker = new Worker("worker.js"); + +//first.onchange = function() { +// myWorker.postMessage([first.value, second.value]); +// console.log('Message posted to worker', [first.value, second.value]); +//} + +second.onchange = function() { + myWorker.postMessage([first.value, second.value]); + console.log('Message posted to worker', [first.value, second.value]); +} + +myWorker.onmessage = function(e) { + result.textContent = e.data; + console.log('Message received from worker', e.data); +} + +</script> + + diff --git a/semanticsearchscratchpad/worker.js b/semanticsearchscratchpad/worker.js new file mode 100644 index 0000000..447ede5 --- /dev/null +++ b/semanticsearchscratchpad/worker.js @@ -0,0 +1,11 @@ +onmessage = function(e) { + console.log('Worker: Message received from main script'); + const result = e.data[0] * e.data[1]; + if (isNaN(result)) { + postMessage('Please write two numbers'); + } else { + const workerResult = 'Result: ' + result; + console.log('Worker: Posting message back to main script'); + postMessage(workerResult); + } +} |