chore: Update version to 1.1.9 and add Model Settings to Ollama settings page

2024-05-23 00:39:44 +05:30
parent d2afcc6a39
commit b3a455382c
13 changed files with 1271 additions and 18 deletions
--- a/src/models/utils/ollama.ts
+++ b/src/models/utils/ollama.ts
@@ -0,0 +1,201 @@
+import { IterableReadableStream } from "@langchain/core/utils/stream";
+import type { StringWithAutocomplete } from "@langchain/core/utils/types";
+import { BaseLanguageModelCallOptions } from "@langchain/core/language_models/base";
+
+export interface OllamaInput {
+  embeddingOnly?: boolean;
+  f16KV?: boolean;
+  frequencyPenalty?: number;
+  headers?: Record<string, string>;
+  keepAlive?: string;
+  logitsAll?: boolean;
+  lowVram?: boolean;
+  mainGpu?: number;
+  model?: string;
+  baseUrl?: string;
+  mirostat?: number;
+  mirostatEta?: number;
+  mirostatTau?: number;
+  numBatch?: number;
+  numCtx?: number;
+  numGpu?: number;
+  numGqa?: number;
+  numKeep?: number;
+  numPredict?: number;
+  numThread?: number;
+  penalizeNewline?: boolean;
+  presencePenalty?: number;
+  repeatLastN?: number;
+  repeatPenalty?: number;
+  ropeFrequencyBase?: number;
+  ropeFrequencyScale?: number;
+  temperature?: number;
+  stop?: string[];
+  tfsZ?: number;
+  topK?: number;
+  topP?: number;
+  typicalP?: number;
+  useMLock?: boolean;
+  useMMap?: boolean;
+  vocabOnly?: boolean;
+  seed?: number;
+  format?: StringWithAutocomplete<"json">;
+}
+
+export interface OllamaRequestParams {
+  model: string;
+  format?: StringWithAutocomplete<"json">;
+  images?: string[];
+  options: {
+    embedding_only?: boolean;
+    f16_kv?: boolean;
+    frequency_penalty?: number;
+    logits_all?: boolean;
+    low_vram?: boolean;
+    main_gpu?: number;
+    mirostat?: number;
+    mirostat_eta?: number;
+    mirostat_tau?: number;
+    num_batch?: number;
+    num_ctx?: number;
+    num_gpu?: number;
+    num_gqa?: number;
+    num_keep?: number;
+    num_thread?: number;
+    num_predict?: number;
+    penalize_newline?: boolean;
+    presence_penalty?: number;
+    repeat_last_n?: number;
+    repeat_penalty?: number;
+    rope_frequency_base?: number;
+    rope_frequency_scale?: number;
+    temperature?: number;
+    stop?: string[];
+    tfs_z?: number;
+    top_k?: number;
+    top_p?: number;
+    typical_p?: number;
+    use_mlock?: boolean;
+    use_mmap?: boolean;
+    vocab_only?: boolean;
+  };
+}
+
+export type OllamaMessage = {
+  role: StringWithAutocomplete<"user" | "assistant" | "system">;
+  content: string;
+  images?: string[];
+};
+
+export interface OllamaGenerateRequestParams extends OllamaRequestParams {
+  prompt: string;
+}
+
+export interface OllamaChatRequestParams extends OllamaRequestParams {
+  messages: OllamaMessage[];
+}
+
+export type BaseOllamaGenerationChunk = {
+  model: string;
+  created_at: string;
+  done: boolean;
+  total_duration?: number;
+  load_duration?: number;
+  prompt_eval_count?: number;
+  prompt_eval_duration?: number;
+  eval_count?: number;
+  eval_duration?: number;
+};
+
+export type OllamaGenerationChunk = BaseOllamaGenerationChunk & {
+  response: string;
+};
+
+export type OllamaChatGenerationChunk = BaseOllamaGenerationChunk & {
+  message: OllamaMessage;
+};
+
+export type OllamaCallOptions = BaseLanguageModelCallOptions & {
+  headers?: Record<string, string>;
+};
+
+async function* createOllamaStream(
+  url: string,
+  params: OllamaRequestParams,
+  options: OllamaCallOptions
+) {
+  let formattedUrl = url;
+  if (formattedUrl.startsWith("http://localhost:")) {
+    // Node 18 has issues with resolving "localhost"
+    // See https://github.com/node-fetch/node-fetch/issues/1624
+    formattedUrl = formattedUrl.replace(
+      "http://localhost:",
+      "http://127.0.0.1:"
+    );
+  }
+  const response = await fetch(formattedUrl, {
+    method: "POST",
+    body: JSON.stringify(params),
+    headers: {
+      "Content-Type": "application/json",
+      ...options.headers,
+    },
+    signal: options.signal,
+  });
+  if (!response.ok) {
+    let error;
+    const responseText = await response.text();
+    try {
+      const json = JSON.parse(responseText);
+      error = new Error(
+        `Ollama call failed with status code ${response.status}: ${json.error}`
+      );
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    } catch (e: any) {
+      error = new Error(
+        `Ollama call failed with status code ${response.status}: ${responseText}`
+      );
+    }
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    (error as any).response = response;
+    throw error;
+  }
+  if (!response.body) {
+    throw new Error(
+      "Could not begin Ollama stream. Please check the given URL and try again."
+    );
+  }
+
+  const stream = IterableReadableStream.fromReadableStream(response.body);
+
+  const decoder = new TextDecoder();
+  let extra = "";
+  for await (const chunk of stream) {
+    const decoded = extra + decoder.decode(chunk);
+    const lines = decoded.split("\n");
+    extra = lines.pop() || "";
+    for (const line of lines) {
+      try {
+        yield JSON.parse(line);
+      } catch (e) {
+        console.warn(`Received a non-JSON parseable chunk: ${line}`);
+      }
+    }
+  }
+}
+
+export async function* createOllamaGenerateStream(
+  baseUrl: string,
+  params: OllamaGenerateRequestParams,
+  options: OllamaCallOptions
+): AsyncGenerator<OllamaGenerationChunk> {
+  yield* createOllamaStream(`${baseUrl}/api/generate`, params, options);
+}
+
+export async function* createOllamaChatStream(
+  baseUrl: string,
+  params: OllamaChatRequestParams,
+  options: OllamaCallOptions
+): AsyncGenerator<OllamaChatGenerationChunk> {
+  yield* createOllamaStream(`${baseUrl}/api/chat`, params, options);
+}