fix(core): allow core to be used as a utility, turning off some featu…

…res that cause assertion errors. feature(ai): allow more flags
socketsupply · Jun 10, 2024 · 9304f50 · 9304f50
1 parent 76ca14d
commit 9304f50
Show file tree

Hide file tree

Showing 17 changed files with 243 additions and 82 deletions.
diff --git a/api/ai.js b/api/ai.js
@@ -43,13 +43,49 @@ import * as exports from './ai.js'
  */
 export class LLM extends EventEmitter {
   /**
-   * Constructs an LLM instance.
-   * @param {Object} [options] - The options for initializing the LLM.
-   * @param {string} [options.path] - The path to a valid model (.gguf).
-   * @param {string} [options.prompt] - The query that guides the model to generate a relevant and coherent responses.
-   * @param {string} [options.id] - The optional ID for the LLM instance.
-   * @throws {Error} If the model path is not provided.
+   * Constructs an LLM instance. Each parameter is designed to configure and control
+   * the behavior of the underlying large language model provided by llama.cpp.
+   * @param {Object} options - Configuration options for the LLM instance.
+   * @param {string} options.path - The file path to the model in .gguf format. This model file contains
+   *                                the weights and configuration necessary for initializing the language model.
+   * @param {string} options.prompt - The initial input text to the model, setting the context or query
+   *                                  for generating responses. The model uses this as a starting point for text generation.
+   * @param {string} [options.id] - An optional unique identifier for this specific instance of the model,
+   *                                useful for tracking or referencing the model in multi-model setups.
+   * @param {number} [options.n_ctx=1024] - Specifies the maximum number of tokens that the model can consider
+   *                                        for a single query. This is crucial for managing memory and computational
+   *                                        efficiency. Exceeding the model's configuration may lead to errors or truncated outputs.
+   * @param {number} [options.n_threads=8] - The number of threads allocated for the model's computation,
+   *                                         affecting performance and speed of response generation.
+   * @param {number} [options.temp=1.1] - Sampling temperature controls the randomness of predictions.
+   *                                      Higher values increase diversity, potentially at the cost of coherence.
+   * @param {number} [options.max_tokens=512] - The upper limit on the number of tokens that the model can generate
+   *                                            in response to a single prompt. This prevents runaway generations.
+   * @param {number} [options.n_gpu_layers=32] - The number of GPU layers dedicated to the model processing.
+   *                                             More layers can increase accuracy and complexity of the outputs.
+   * @param {number} [options.n_keep=0] - Determines how many of the top generated responses are retained after
+   *                                      the initial generation phase. Useful for models that generate multiple outputs.
+   * @param {number} [options.n_batch=0] - The size of processing batches. Larger batch sizes can reduce
+   *                                       the time per token generation by parallelizing computations.
+   * @param {number} [options.n_predict=0] - Specifies how many forward predictions the model should make
+   *                                         from the current state. This can pre-generate responses or calculate probabilities.
+   * @param {number} [options.grp_attn_n=0] - Group attention parameter 'N' modifies how attention mechanisms
+   *                                          within the model are grouped and interact, affecting the model’s focus and accuracy.
+   * @param {number} [options.grp_attn_w=0] - Group attention parameter 'W' adjusts the width of each attention group,
+   *                                          influencing the breadth of context considered by each attention group.
+   * @param {number} [options.seed=0] - A seed for the random number generator used in the model. Setting this ensures
+   *                                    consistent results in model outputs, important for reproducibility in experiments.
+   * @param {number} [options.top_k=0] - Limits the model's output choices to the top 'k' most probable next words,
+   *                                     reducing the risk of less likely, potentially nonsensical outputs.
+   * @param {float} [options.tok_p=0.0] - Top-p (nucleus) sampling threshold, filtering the token selection pool
+   *                                      to only those whose cumulative probability exceeds this value, enhancing output relevance.
+   * @param {float} [options.min_p=0.0] - Sets a minimum probability filter for token generation, ensuring
+   *                                      that generated tokens have at least this likelihood of being relevant or coherent.
+   * @param {float} [options.tfs_z=0.0] - Temperature factor scale for zero-shot learning scenarios, adjusting how
+   *                                      the model weights novel or unseen prompts during generation.
+   * @throws {Error} Throws an error if the model path is not provided, as the model cannot initialize without it.
    */
+
   constructor (options = {}) {
     super()
 
@@ -63,8 +99,27 @@ export class LLM extends EventEmitter {
 
     const opts = {
       id: this.id,
+      path: this.path,
       prompt: this.prompt,
-      path: this.path
+      antiprompt: options.antiprompt,
+      conversation: options.conversation === true,  // Convert to boolean, more idiomatic than String(true/false)
+      chatml: options.chatml === true,
+      instruct: options.instruct === true,
+      n_ctx: options.n_ctx || 1024, // Simplified, assuming default value of 1024 if not specified
+      n_threads: options.n_threads || 8,
+      temp: options.temp || 1.1,  // Assuming `temp` should be a number, not a string
+      max_tokens: options.max_tokens || 512,
+      n_gpu_layers: options.n_gpu_layers || 32,
+      n_keep: options.n_keep || 0,
+      n_batch: options.n_batch || 0,
+      n_predict: options.n_predict || 0,
+      grp_attn_n: options.grp_attn_n || 0,
+      grp_attn_w: options.grp_attn_w || 0,
+      seed: options.seed || 0,  // Default seed if not specified
+      top_k: options.top_k || 0,  // Default top_k if not specified
+      tok_p: options.tok_p || 0.0,  // Default tok_p if not specified
+      min_p: options.min_p || 0.0,  // Default min_p if not specified
+      tfs_z: options.tfs_z || 0.0  // Default tfs_z if not specified
     }
 
     globalThis.addEventListener('data', event => {
@@ -92,7 +147,8 @@ export class LLM extends EventEmitter {
       }
     })
 
-    const result = ipc.sendSync('ai.llm.create', opts)
+    console.log('NEW LLM', opts)
+    const result = ipc.request('ai.llm.create', opts)
 
     if (result.err) {
       throw result.err

diff --git a/api/latica/api.js b/api/latica/api.js
@@ -55,6 +55,7 @@ async function api (options = {}, events, dgram) {
   }
 
   _peer.onData = (...args) => bus._emit('#data', ...args)
+  _peer.onDebug = (...args) => bus._emit('#debug', ...args)
   _peer.onSend = (...args) => bus._emit('#send', ...args)
   _peer.onFirewall = (...args) => bus._emit('#firewall', ...args)
   _peer.onMulticast = (...args) => bus._emit('#multicast', ...args)
@@ -126,6 +127,8 @@ async function api (options = {}, events, dgram) {
   bus.seal = (m, v = options.signingKeys) => _peer.seal(m, v)
   bus.open = (m, v = options.signingKeys) => _peer.open(m, v)
 
+  bus.send = (...args) => _peer.send(...args)
+
   bus.query = (...args) => _peer.query(...args)
 
   const pack = async (eventName, value, opts = {}) => {
@@ -163,10 +166,10 @@ async function api (options = {}, events, dgram) {
     const sub = bus.subclusters.get(scid)
     if (!sub) return {}
 
-    try {
-      opened = await _peer.open(packet.message, scid)
-    } catch (err) {
-      sub._emit('warning', err)
+    const { err: errOpen, data: dataOpened } = await _peer.open(packet.message, scid)
+
+    if (errOpen) {
+      sub._emit('warning', errOpen)
       return {}
     }
 

diff --git a/api/latica/encryption.js b/api/latica/encryption.js
@@ -174,7 +174,7 @@ export class Encryption {
       throw new Error('ENOTVERIFIED')
     }
 
-    return Buffer.from(sodium.crypto_box_seal_open(ct, pk, sk))
+    return sodium.crypto_box_seal_open(ct, pk, sk)
   }
 
   /**

diff --git a/api/latica/index.js b/api/latica/index.js
@@ -572,15 +572,15 @@ export class Peer {
 
       await this.mcast(packet)
       debug(this.peerId, `-> RESEND (packetId=${packetId})`)
-      if (this.onState) await this.onState(this.getState())
+      if (this.onState) this.onState(this.getState())
     }
   }
 
   /**
    * Get the serializable state of the peer (can be passed to the constructor or create method)
    * @return {undefined}
    */
-  async getState () {
+  getState () {
     this.config.clock = this.clock // save off the clock
 
     const peers = this.peers.map(p => {
@@ -939,7 +939,7 @@ export class Peer {
     })
 
     debug(this.peerId, `-> JOIN (clusterId=${cid}, subclusterId=${scid}, clock=${packet.clock}/${this.clock})`)
-    if (this.onState) await this.onState(this.getState())
+    if (this.onState) this.onState(this.getState())
 
     this.mcast(packet)
     this.gate.set(packet.packetId.toString('hex'), 1)
@@ -1097,6 +1097,7 @@ export class Peer {
 
     this.closing = true
     this.socket.close()
+    this.probeSocket.close()
 
     if (this.onClose) this.onClose()
   }

diff --git a/api/latica/packets.js b/api/latica/packets.js
@@ -320,7 +320,7 @@ export class Packet {
       p.message = String(p.message)
     }
 
-    if (p.message?.length > Packet.MESSAGE_BYTES) throw new Error('ETOOBIG')
+    if (p.message?.length > Packet.maxLength) throw new Error('ETOOBIG')
 
     // we only have p.nextId when we know ahead of time, if it's empty that's fine.
     if (p.packetId.length === 1 && p.packetId[0] === 0) {

diff --git a/api/latica/proxy.js b/api/latica/proxy.js
@@ -10,7 +10,6 @@
  * Protocol
  *
  */
-import { Deferred } from '../async.js'
 import path from '../path.js'
 const { pathname } = new URL(import.meta.url)
 
@@ -236,7 +235,10 @@ class PeerWorkerProxy {
     }
 
     const seq = ++this.#index
-    const d = new Deferred()
+    let { promise, resolve, reject } = Promise.withResolvers();
+    const d = promise
+    d.resolve = resolve
+    d.reject = reject
 
     this.#channel.port1.postMessage(
       { prop, data, seq },

diff --git a/api/latica/worker.js b/api/latica/worker.js
@@ -21,6 +21,7 @@ globalThis.addEventListener('message', ({ data: source }) => {
       case 'create': {
         peer = new Peer(data, dgram)
 
+        peer.onDebug = (...args) => this.callMainThread('onDebug', args)
         peer.onConnecting = (...args) => this.callMainThread('onConnecting', args)
         peer.onConnection = (...args) => this.callMainThread('onConnection', args)
         peer.onDisconnection = (...args) => this.callMainThread('onDisconnection', args)

diff --git a/bin/update-network-protocol.sh b/bin/update-network-protocol.sh
@@ -1,5 +1,7 @@
 #!/bin/sh
 
+npm link @socketsupply/latica
+
 version="${1:-"1.0.23-0"}"
 
 rm -rf api/latica.js || exit $?

diff --git a/src/cli/cli.cc b/src/cli/cli.cc
@@ -43,16 +43,20 @@
 #include <span>
 #include <unordered_set>
 
+#ifndef CMD_RUNNER
+#define CMD_RUNNER
+#endif
+
+#ifndef SSC_CLI
+#define SSC_CLI 1
+#endif
+
 #include "../extension/extension.hh"
 #include "../core/core.hh"
 
 #include "templates.hh"
 #include "cli.hh"
 
-#ifndef CMD_RUNNER
-#define CMD_RUNNER
-#endif
-
 #ifndef SOCKET_RUNTIME_BUILD_TIME
 #define SOCKET_RUNTIME_BUILD_TIME 0
 #endif

diff --git a/src/cli/cli.hh b/src/cli/cli.hh
@@ -6,6 +6,10 @@
 
 #include <signal.h>
 
+#ifndef SOCKET_CLI
+#define SOCKET_CLI 1
+#endif
+
 namespace SSC::CLI {
   inline void notify (int signal) {
   #if !defined(_WIN32)

diff --git a/src/core/core.hh b/src/core/core.hh
@@ -111,7 +111,7 @@ namespace SSC {
         Thread *eventLoopThread = nullptr;
       #endif
 
-      Core () :
+      Core (bool isUtility = false) :
         #if !SOCKET_RUNTIME_PLATFORM_IOS
           childProcess(this),
         #endif
@@ -120,7 +120,7 @@ namespace SSC {
         fs(this),
         geolocation(this),
         networkStatus(this),
-        notifications(this),
+        notifications(this, isUtility),
         os(this),
         platform(this),
         timers(this),

diff --git a/src/core/file_system_watcher.cc b/src/core/file_system_watcher.cc
@@ -125,7 +125,8 @@ namespace SSC {
     // a loop may be configured for the instance already, perhaps here or
     // manually by the caller
     if (this->core == nullptr) {
-      this->core = new Core();
+      const bool isUtility = true;
+      this->core = new Core(isUtility);
       this->ownsCore = true;
     }