summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordan <[email protected]>2023-06-07 21:19:21 -0400
committerdan <[email protected]>2023-06-07 21:19:21 -0400
commit5a5e2dd05623aad71d8343a5de0480cd8756f85b (patch)
tree7357cde29f61eb43fb8fbee49b4c452f953742e0
parentace13cf4aa724c078a6a6f36a6f243cd4821a548 (diff)
downloaddump-5a5e2dd05623aad71d8343a5de0480cd8756f85b.tar.gz
dump-5a5e2dd05623aad71d8343a5de0480cd8756f85b.tar.bz2
dump-5a5e2dd05623aad71d8343a5de0480cd8756f85b.zip
proto: working sem search with embedding done by workers
-rw-r--r--semanticsearchscratchpad/create-embeddings.js84
-rw-r--r--semanticsearchscratchpad/index.html144
2 files changed, 153 insertions, 75 deletions
diff --git a/semanticsearchscratchpad/create-embeddings.js b/semanticsearchscratchpad/create-embeddings.js
index bbb3a81..2d2c321 100644
--- a/semanticsearchscratchpad/create-embeddings.js
+++ b/semanticsearchscratchpad/create-embeddings.js
@@ -1,30 +1,72 @@
-onmessage = msg => {
- const lines = msg.data;
-
- const request = indexedDB.open("embeddings");
-
- request.onupgradeneeded = (event) => {
- console.log('onupgradeneeded')
- const db = event.target.result;
- db.createObjectStore('embeddings', {autoIncrement: true});
- };
-
- let embeddings;
- request.onsuccess = (event) => {
- const db = event.target.result;
- embeddings = db.transaction('embeddings', 'readwrite').objectStore('embeddings').getAll();
+
+//const yemb = (await emb('I really like curry.')).data;
+//
+//
+//console.log('out', xsembs.map(x => ({value:x.value, similarity: cosineSimilarity(x.embeddings, yemb)})));
+//
+//import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
+
+//importScripts('./embeddings-lib.js');
+
+import('https://cdn.jsdelivr.net/npm/@xenova/[email protected]')
+ .then(({pipeline, env}) => {
+
+env.allowLocalModels = false;
+
+//const extractor = await pipeline('embeddings', 'Xenova/all-MiniLM-L6-v2');
+//const emb = x => {postMessage(x) ; return extractor(x, {pooling:'mean', normalize:'true'})};
+return pipeline('embeddings', 'Xenova/all-MiniLM-L6-v2')
+ .then(extractor => {
+
+const emb = x => extractor(x, {pooling:'mean', normalize:'true'});
+
+async function createEmbeddings(xs) {
+ return (await Promise.all(xs.map(emb))).map((x,i) => ({value: xs[i], embeddings: x.data}));
+}
+
+function generateEmbeddings({data: {group, dataset}}) {
+const request = indexedDB.open("embeddings");
+request.onerror = () => {
+ console.error("Why didn't you allow my web app to use IndexedDB?!");
+};
+
+request.onupgradeneeded = (event) => {
+ const db = event.target.result;
+ const objectStore = db.createObjectStore('embeddings', {autoIncrement: true});
+ objectStore.createIndex("value", "value", { unique: false });
+ };
+
+request.onsuccess = (event) => {
+ const db = event.target.result;
+ let t = db.transaction('embeddings', 'readwrite').objectStore('embeddings').getAll();
+ t.onerror = () => console.error('transaction failed');
+ t.onsuccess = e => {
+ const embeddings = e.target.result.filter(x => x.group === group);
if (!embeddings || !embeddings.length) {
- createEmbeddings(lines)
+ createEmbeddings(dataset)
.then(xsembs => {
- embeddings = xsembs;
const st = db
.transaction('embeddings', 'readwrite')
.objectStore('embeddings');
xsembs.forEach(emb => {
- console.log(emb);
- st.add(emb);
+ st.add({...emb, group});
});
});
- }
- };
+ }
+}
+}
+
}
+
+self.onmessage = generateEmbeddings;
+postMessage({loaded:true});
+return generateEmbeddings;
+ });
+ });
+
+
+// const generateEmbeddings = self.onmessage;
+//
+//export default generateEmbeddings;
+
+
diff --git a/semanticsearchscratchpad/index.html b/semanticsearchscratchpad/index.html
index 8a96b54..7657c94 100644
--- a/semanticsearchscratchpad/index.html
+++ b/semanticsearchscratchpad/index.html
@@ -10,20 +10,65 @@
</div>
</form>
- <p class="result">Result: 0</p>
+ <pre id="result"></pre>
</div>
+ <!-- <script src="dataset.js"></script> --!>
<script type="module">
+import {Stripe_1,
+Stripe_2,
+Stripe_3,
+Gmail_1,
+Gmail_2,
+Gmail_3,
+Alexa_1,
+Alexa_2,
+Alexa_3
+} from './dataset.js';
+
import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
env.allowLocalModels = false;
+const extractor = await pipeline('embeddings', 'Xenova/all-MiniLM-L6-v2');
+const emb = x => extractor(x, {pooling:'mean', normalize:'true'});
+
+
+const s = {Stripe_1,
+Stripe_2,
+Stripe_3,
+Gmail_1,
+Gmail_2,
+Gmail_3,
+Alexa_1,
+Alexa_2,
+Alexa_3};
+
+
+//import genE from './create-embeddings.js';
+
+//genE({data: {group:'Stripe_1', dataset: Stripe_1}});
+
+Object.keys(s).map(k => {
+const embWorker = new Worker("create-embeddings.js");
+//const k = 'Gmail_1';
+embWorker.onmessage = ({data}) => {
+ if (data.loaded) {
+ embWorker.postMessage({group:k, dataset: s[k]});
+ console.log('Message posted to worker', {group:k, dataset: s[k]});
+ } else {
+ console.error(data);
+ }
+}
+});
-let pipe = await pipeline('embeddings');
-let emb = x => pipe(x, {pooling:'mean', normalize:'true'});
-async function createEmbeddings(xs) {
- return (await Promise.all(xs.map(emb))).map((x,i) => ({value: xs[i], embeddings: x.data}));
-}
+//let pipe = await pipeline('embeddings', 'Xenova/all-MiniLM-L6-v2');
+//
+//let emb = x => pipe(x, {pooling:'mean', normalize:'true'});
+//
+//async function createEmbeddings(xs) {
+// return (await Promise.all(xs.map(emb))).map((x,i) => ({value: xs[i], embeddings: x.data}));
+//}
function dotp(x, y) {
function dotp_sum(a, b) {
@@ -42,36 +87,28 @@ function cosineSimilarity(A,B){
-/** Open DB **/
-
-
-/****/
-
-
-
//const yemb = (await emb('I really like curry.')).data;
//
//
//console.log('out', xsembs.map(x => ({value:x.value, similarity: cosineSimilarity(x.embeddings, yemb)})));
//
-/******/
-
-const request = indexedDB.open("embeddings");
-request.onerror = (event) => {
- console.error("Why didn't you allow my web app to use IndexedDB?!");
-};
-
-request.onupgradeneeded = (event) => {
- console.log('onupgradeneeded')
- const db = event.target.result;
- db.createObjectStore('embeddings', {autoIncrement: true});
- };
-
+//const request = indexedDB.open("embeddings");
+//request.onerror = (event) => {
+// console.error("Why didn't you allow my web app to use IndexedDB?!");
+//};
+//
+//request.onupgradeneeded = (event) => {
+// console.log('onupgradeneeded')
+// const db = event.target.result;
+// const objectStore = db.createObjectStore('embeddings', {autoIncrement: true});
+// objectStore.createIndex("value", "value", { unique: false });
+// };
+//
var embeddings;
-
number1.onchange = e => {
+ const query = e.target.value;
const request = indexedDB.open("embeddings");
request.onsuccess = (event) => {
const db = event.target.result;
@@ -80,14 +117,14 @@ number1.onchange = e => {
// embeddings.onsuccess = =>
embeddings = e.target.result;
if (embeddings && embeddings.length) {
- emb('I really like curry.').then(yemb =>{
+ emb(query).then(yemb =>{
const r = embeddings
.map(x => ({value:x.value, similarity: cosineSimilarity(x.embeddings, yemb.data)}));
- console.log(r)
- result.innerHTML = r
- .sort((a,b) => a.similarity > b.similarity)
- .map(({value, similarity}) => `<div>${value}: ${similarity}</div>`)
- .join('<br/>');
+ result.textContent = r
+ .sort((a,b) => b.similarity - a.similarity)
+ .slice(0, 10)
+ .map(({value, similarity}) => `${similarity}: ${value}`)
+ .join('\n');
}
);
} else {
@@ -97,26 +134,26 @@ number1.onchange = e => {
}
}
-request.onsuccess = (event) => {
- const db = event.target.result;
- let t = db.transaction('embeddings', 'readwrite').objectStore('embeddings').getAll();
- t.onerror = () => console.error('transaction failed');
- t.onsuccess = e => {
- const embeddings = e.target.result;
- if (!embeddings || !embeddings.length) {
- createEmbeddings(['Jim likes curry', 'I really dislike potatoes', 'We went to Mars last week'])
- .then(xsembs => {
- const st = db
- .transaction('embeddings', 'readwrite')
- .objectStore('embeddings');
- xsembs.forEach(emb => {
- console.log(emb);
- st.add(emb);
- });
- });
- }
-}
-}
+//request.onsuccess = (event) => {
+// const db = event.target.result;
+// let t = db.transaction('embeddings', 'readwrite').objectStore('embeddings').getAll();
+// t.onerror = () => console.error('transaction failed');
+// t.onsuccess = e => {
+// const embeddings = e.target.result;
+// if (!embeddings || !embeddings.length) {
+// createEmbeddings(ds)
+// .then(xsembs => {
+// const st = db
+// .transaction('embeddings', 'readwrite')
+// .objectStore('embeddings');
+// xsembs.forEach(emb => {
+// console.log(emb);
+// st.add(emb);
+// });
+// });
+// }
+//}
+//}
@@ -138,7 +175,6 @@ request.onsuccess = (event) => {
const first = document.querySelector('#number1');
const second = document.querySelector('#number2');
-const result = document.querySelector('.result');
const myWorker = new Worker("worker.js");