summaryrefslogtreecommitdiffstats
path: root/semanticsearchscratchpad/index.html
diff options
context:
space:
mode:
Diffstat (limited to 'semanticsearchscratchpad/index.html')
-rw-r--r--semanticsearchscratchpad/index.html144
1 files changed, 90 insertions, 54 deletions
diff --git a/semanticsearchscratchpad/index.html b/semanticsearchscratchpad/index.html
index 8a96b54..7657c94 100644
--- a/semanticsearchscratchpad/index.html
+++ b/semanticsearchscratchpad/index.html
@@ -10,20 +10,65 @@
</div>
</form>
- <p class="result">Result: 0</p>
+ <pre id="result"></pre>
</div>
+ <!-- <script src="dataset.js"></script> --!>
<script type="module">
+import {Stripe_1,
+Stripe_2,
+Stripe_3,
+Gmail_1,
+Gmail_2,
+Gmail_3,
+Alexa_1,
+Alexa_2,
+Alexa_3
+} from './dataset.js';
+
import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
env.allowLocalModels = false;
+const extractor = await pipeline('embeddings', 'Xenova/all-MiniLM-L6-v2');
+const emb = x => extractor(x, {pooling:'mean', normalize:'true'});
+
+
+const s = {Stripe_1,
+Stripe_2,
+Stripe_3,
+Gmail_1,
+Gmail_2,
+Gmail_3,
+Alexa_1,
+Alexa_2,
+Alexa_3};
+
+
+//import genE from './create-embeddings.js';
+
+//genE({data: {group:'Stripe_1', dataset: Stripe_1}});
+
+Object.keys(s).map(k => {
+const embWorker = new Worker("create-embeddings.js");
+//const k = 'Gmail_1';
+embWorker.onmessage = ({data}) => {
+ if (data.loaded) {
+ embWorker.postMessage({group:k, dataset: s[k]});
+ console.log('Message posted to worker', {group:k, dataset: s[k]});
+ } else {
+ console.error(data);
+ }
+}
+});
-let pipe = await pipeline('embeddings');
-let emb = x => pipe(x, {pooling:'mean', normalize:'true'});
-async function createEmbeddings(xs) {
- return (await Promise.all(xs.map(emb))).map((x,i) => ({value: xs[i], embeddings: x.data}));
-}
+//let pipe = await pipeline('embeddings', 'Xenova/all-MiniLM-L6-v2');
+//
+//let emb = x => pipe(x, {pooling:'mean', normalize:'true'});
+//
+//async function createEmbeddings(xs) {
+// return (await Promise.all(xs.map(emb))).map((x,i) => ({value: xs[i], embeddings: x.data}));
+//}
function dotp(x, y) {
function dotp_sum(a, b) {
@@ -42,36 +87,28 @@ function cosineSimilarity(A,B){
-/** Open DB **/
-
-
-/****/
-
-
-
//const yemb = (await emb('I really like curry.')).data;
//
//
//console.log('out', xsembs.map(x => ({value:x.value, similarity: cosineSimilarity(x.embeddings, yemb)})));
//
-/******/
-
-const request = indexedDB.open("embeddings");
-request.onerror = (event) => {
- console.error("Why didn't you allow my web app to use IndexedDB?!");
-};
-
-request.onupgradeneeded = (event) => {
- console.log('onupgradeneeded')
- const db = event.target.result;
- db.createObjectStore('embeddings', {autoIncrement: true});
- };
-
+//const request = indexedDB.open("embeddings");
+//request.onerror = (event) => {
+// console.error("Why didn't you allow my web app to use IndexedDB?!");
+//};
+//
+//request.onupgradeneeded = (event) => {
+// console.log('onupgradeneeded')
+// const db = event.target.result;
+// const objectStore = db.createObjectStore('embeddings', {autoIncrement: true});
+// objectStore.createIndex("value", "value", { unique: false });
+// };
+//
var embeddings;
-
number1.onchange = e => {
+ const query = e.target.value;
const request = indexedDB.open("embeddings");
request.onsuccess = (event) => {
const db = event.target.result;
@@ -80,14 +117,14 @@ number1.onchange = e => {
// embeddings.onsuccess = =>
embeddings = e.target.result;
if (embeddings && embeddings.length) {
- emb('I really like curry.').then(yemb =>{
+ emb(query).then(yemb =>{
const r = embeddings
.map(x => ({value:x.value, similarity: cosineSimilarity(x.embeddings, yemb.data)}));
- console.log(r)
- result.innerHTML = r
- .sort((a,b) => a.similarity > b.similarity)
- .map(({value, similarity}) => `<div>${value}: ${similarity}</div>`)
- .join('<br/>');
+ result.textContent = r
+ .sort((a,b) => b.similarity - a.similarity)
+ .slice(0, 10)
+ .map(({value, similarity}) => `${similarity}: ${value}`)
+ .join('\n');
}
);
} else {
@@ -97,26 +134,26 @@ number1.onchange = e => {
}
}
-request.onsuccess = (event) => {
- const db = event.target.result;
- let t = db.transaction('embeddings', 'readwrite').objectStore('embeddings').getAll();
- t.onerror = () => console.error('transaction failed');
- t.onsuccess = e => {
- const embeddings = e.target.result;
- if (!embeddings || !embeddings.length) {
- createEmbeddings(['Jim likes curry', 'I really dislike potatoes', 'We went to Mars last week'])
- .then(xsembs => {
- const st = db
- .transaction('embeddings', 'readwrite')
- .objectStore('embeddings');
- xsembs.forEach(emb => {
- console.log(emb);
- st.add(emb);
- });
- });
- }
-}
-}
+//request.onsuccess = (event) => {
+// const db = event.target.result;
+// let t = db.transaction('embeddings', 'readwrite').objectStore('embeddings').getAll();
+// t.onerror = () => console.error('transaction failed');
+// t.onsuccess = e => {
+// const embeddings = e.target.result;
+// if (!embeddings || !embeddings.length) {
+// createEmbeddings(ds)
+// .then(xsembs => {
+// const st = db
+// .transaction('embeddings', 'readwrite')
+// .objectStore('embeddings');
+// xsembs.forEach(emb => {
+// console.log(emb);
+// st.add(emb);
+// });
+// });
+// }
+//}
+//}
@@ -138,7 +175,6 @@ request.onsuccess = (event) => {
const first = document.querySelector('#number1');
const second = document.querySelector('#number2');
-const result = document.querySelector('.result');
const myWorker = new Worker("worker.js");