summaryrefslogtreecommitdiffstats
path: root/semanticsearchscratchpad
diff options
context:
space:
mode:
authordan <[email protected]>2023-06-07 17:20:38 -0400
committerdan <[email protected]>2023-06-07 17:20:38 -0400
commitace13cf4aa724c078a6a6f36a6f243cd4821a548 (patch)
tree4e6d9b520f25d4a44102dac49b0ece58f7ae6741 /semanticsearchscratchpad
parent68616f40cc7786ac2d001db4bada203bfe477bc4 (diff)
downloaddump-ace13cf4aa724c078a6a6f36a6f243cd4821a548.tar.gz
dump-ace13cf4aa724c078a6a6f36a6f243cd4821a548.tar.bz2
dump-ace13cf4aa724c078a6a6f36a6f243cd4821a548.zip
proto: semantic search with indexdb caching embeddings
Diffstat (limited to 'semanticsearchscratchpad')
-rw-r--r--semanticsearchscratchpad/create-embeddings.js30
-rw-r--r--semanticsearchscratchpad/index.html162
-rw-r--r--semanticsearchscratchpad/worker.js11
3 files changed, 203 insertions, 0 deletions
diff --git a/semanticsearchscratchpad/create-embeddings.js b/semanticsearchscratchpad/create-embeddings.js
new file mode 100644
index 0000000..bbb3a81
--- /dev/null
+++ b/semanticsearchscratchpad/create-embeddings.js
@@ -0,0 +1,30 @@
+onmessage = msg => {
+ const lines = msg.data;
+
+ const request = indexedDB.open("embeddings");
+
+ request.onupgradeneeded = (event) => {
+ console.log('onupgradeneeded')
+ const db = event.target.result;
+ db.createObjectStore('embeddings', {autoIncrement: true});
+ };
+
+ let embeddings;
+ request.onsuccess = (event) => {
+ const db = event.target.result;
+ embeddings = db.transaction('embeddings', 'readwrite').objectStore('embeddings').getAll();
+ if (!embeddings || !embeddings.length) {
+ createEmbeddings(lines)
+ .then(xsembs => {
+ embeddings = xsembs;
+ const st = db
+ .transaction('embeddings', 'readwrite')
+ .objectStore('embeddings');
+ xsembs.forEach(emb => {
+ console.log(emb);
+ st.add(emb);
+ });
+ });
+ }
+ };
+}
diff --git a/semanticsearchscratchpad/index.html b/semanticsearchscratchpad/index.html
new file mode 100644
index 0000000..8a96b54
--- /dev/null
+++ b/semanticsearchscratchpad/index.html
@@ -0,0 +1,162 @@
+ <div class="controls" tabindex="0">
+ <form>
+ <div>
+ <label for="number1">Multiply number 1: </label>
+ <input type="text" id="number1" value="0" />
+ </div>
+ <div>
+ <label for="number2">Multiply number 2: </label>
+ <input type="text" id="number2" value="0" />
+ </div>
+ </form>
+
+ <p class="result">Result: 0</p>
+ </div>
+
+<script type="module">
+import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
+env.allowLocalModels = false;
+
+let pipe = await pipeline('embeddings');
+
+let emb = x => pipe(x, {pooling:'mean', normalize:'true'});
+
+async function createEmbeddings(xs) {
+ return (await Promise.all(xs.map(emb))).map((x,i) => ({value: xs[i], embeddings: x.data}));
+}
+
+function dotp(x, y) {
+ function dotp_sum(a, b) {
+ return a + b;
+ }
+ function dotp_times(_, i) {
+ return x[i] * y[i];
+ }
+ return x.map(dotp_times).reduce((a, v) => a + v, 0);
+}
+
+function cosineSimilarity(A,B){
+ var similarity = dotp(A, B) / (Math.sqrt(dotp(A,A)) * Math.sqrt(dotp(B,B)));
+ return similarity;
+}
+
+
+
+/** Open DB **/
+
+
+/****/
+
+
+
+//const yemb = (await emb('I really like curry.')).data;
+//
+//
+//console.log('out', xsembs.map(x => ({value:x.value, similarity: cosineSimilarity(x.embeddings, yemb)})));
+//
+
+/******/
+
+const request = indexedDB.open("embeddings");
+request.onerror = (event) => {
+ console.error("Why didn't you allow my web app to use IndexedDB?!");
+};
+
+request.onupgradeneeded = (event) => {
+ console.log('onupgradeneeded')
+ const db = event.target.result;
+ db.createObjectStore('embeddings', {autoIncrement: true});
+ };
+
+var embeddings;
+
+
+number1.onchange = e => {
+ const request = indexedDB.open("embeddings");
+ request.onsuccess = (event) => {
+ const db = event.target.result;
+ const t = db.transaction('embeddings', 'readwrite').objectStore('embeddings').getAll();
+ t.onsuccess = e => {
+ // embeddings.onsuccess = =>
+ embeddings = e.target.result;
+ if (embeddings && embeddings.length) {
+ emb('I really like curry.').then(yemb =>{
+ const r = embeddings
+ .map(x => ({value:x.value, similarity: cosineSimilarity(x.embeddings, yemb.data)}));
+ console.log(r)
+ result.innerHTML = r
+ .sort((a,b) => a.similarity > b.similarity)
+ .map(({value, similarity}) => `<div>${value}: ${similarity}</div>`)
+ .join('<br/>');
+ }
+ );
+ } else {
+ console.error(embeddings);
+ }
+ }
+ }
+}
+
+request.onsuccess = (event) => {
+ const db = event.target.result;
+ let t = db.transaction('embeddings', 'readwrite').objectStore('embeddings').getAll();
+ t.onerror = () => console.error('transaction failed');
+ t.onsuccess = e => {
+ const embeddings = e.target.result;
+ if (!embeddings || !embeddings.length) {
+ createEmbeddings(['Jim likes curry', 'I really dislike potatoes', 'We went to Mars last week'])
+ .then(xsembs => {
+ const st = db
+ .transaction('embeddings', 'readwrite')
+ .objectStore('embeddings');
+ xsembs.forEach(emb => {
+ console.log(emb);
+ st.add(emb);
+ });
+ });
+ }
+}
+}
+
+
+
+
+//request.onsuccess = (event) => {
+// console.log('onsuccess')
+// const db = event.target.result;
+// const st = db
+// .transaction('embeddings', 'readwrite')
+// .objectStore('embeddings');
+// xsembs.forEach(emb => {
+// console.log(emb);
+// st.add(emb);
+// });
+//};
+
+/******/
+
+const first = document.querySelector('#number1');
+const second = document.querySelector('#number2');
+
+const result = document.querySelector('.result');
+
+const myWorker = new Worker("worker.js");
+
+//first.onchange = function() {
+// myWorker.postMessage([first.value, second.value]);
+// console.log('Message posted to worker', [first.value, second.value]);
+//}
+
+second.onchange = function() {
+ myWorker.postMessage([first.value, second.value]);
+ console.log('Message posted to worker', [first.value, second.value]);
+}
+
+myWorker.onmessage = function(e) {
+ result.textContent = e.data;
+ console.log('Message received from worker', e.data);
+}
+
+</script>
+
+
diff --git a/semanticsearchscratchpad/worker.js b/semanticsearchscratchpad/worker.js
new file mode 100644
index 0000000..447ede5
--- /dev/null
+++ b/semanticsearchscratchpad/worker.js
@@ -0,0 +1,11 @@
+onmessage = function(e) {
+ console.log('Worker: Message received from main script');
+ const result = e.data[0] * e.data[1];
+ if (isNaN(result)) {
+ postMessage('Please write two numbers');
+ } else {
+ const workerResult = 'Result: ' + result;
+ console.log('Worker: Posting message back to main script');
+ postMessage(workerResult);
+ }
+}