diff --git a/.env.example b/.env.example
index 426651a29..84381f848 100644
--- a/.env.example
+++ b/.env.example
@@ -33,6 +33,15 @@ AZURE_GPT4O_MINI_API_KEY=""
 AZURE_GPT4O_MINI_API_BASE=""
 AZURE_GPT4O_MINI_API_VERSION=""
 
+# ENABLE_LLAMA: Set to true to enable Llama as a language model provider
+ENABLE_LLAMA=false
+# LLAMA_API_BASE: The base URL for Llama API (default: http://localhost:11434)
+LLAMA_API_BASE=""
+# LLAMA_MODEL_NAME: The model name to use (e.g., llama3.2-vision)
+LLAMA_MODEL_NAME=""
+# LLAMA_API_ROUTE: The API route for Llama (default: /api/chat)  
+LLAMA_API_ROUTE=""
+
 # LLM_KEY: The chosen language model to use. This should be one of the models
 # provided by the enabled LLM providers (e.g., OPENAI_GPT4_TURBO, OPENAI_GPT4V, ANTHROPIC_CLAUDE3, AZURE_OPENAI_GPT4V).
 LLM_KEY=""
diff --git a/Dockerfile b/Dockerfile
index 1364616ba..c5b24c8e6 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -14,15 +14,21 @@ RUN playwright install-deps
 RUN playwright install
 RUN apt-get install -y xauth x11-apps netpbm && apt-get clean
 
+# Add these lines to install dos2unix and convert entrypoint scripts
+RUN apt-get update && \
+    apt-get install -y dos2unix && \
+    apt-get clean
+
 COPY . /app
 
+# Convert line endings
+RUN dos2unix /app/entrypoint-skyvern.sh && \
+    chmod +x /app/entrypoint-skyvern.sh
+
 ENV PYTHONPATH="/app:$PYTHONPATH"
 ENV VIDEO_PATH=/data/videos
 ENV HAR_PATH=/data/har
 ENV LOG_PATH=/data/log
 ENV ARTIFACT_STORAGE_PATH=/data/artifacts
 
-COPY ./entrypoint-skyvern.sh /app/entrypoint-skyvern.sh
-RUN chmod +x /app/entrypoint-skyvern.sh
-
 CMD [ "/bin/bash", "/app/entrypoint-skyvern.sh" ]
diff --git a/docker-compose.yml b/docker-compose.yml
index 83844785d..03c1a666b 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -21,9 +21,12 @@ services:
       retries: 5
 
   skyvern:
-    image: public.ecr.aws/skyvern/skyvern:latest
+    # Replace the public image with a local build
+    build: 
+      context: .
+      dockerfile: Dockerfile
+    # Keep the rest of the configuration
     restart: on-failure
-    # comment out if you want to externally call skyvern API
     ports:
       - 8000:8000
     volumes:
@@ -35,18 +38,20 @@ services:
     environment:
       - DATABASE_STRING=postgresql+psycopg://skyvern:skyvern@postgres:5432/skyvern
       - BROWSER_TYPE=chromium-headful
-      - ENABLE_OPENAI=true
-      - OPENAI_API_KEY=<your_openai_key>
-      # If you want to use other LLM provider, like azure and anthropic:
-      # - ENABLE_ANTHROPIC=true
-      # - LLM_KEY=ANTHROPIC_CLAUDE3_OPUS
-      # - ANTHROPIC_API_KEY=<your_anthropic_key>
-      # - ENABLE_AZURE=true
-      # - LLM_KEY=AZURE_OPENAI
-      # - AZURE_DEPLOYMENT=<your_azure_deployment>
-      # - AZURE_API_KEY=<your_azure_api_key>
-      # - AZURE_API_BASE=<your_azure_api_base>
-      # - AZURE_API_VERSION=<your_azure_api_version>
+      - ENABLE_LLAMA=true
+      - LLM_KEY=LLAMA3
+      - LLAMA_API_BASE=http://192.168.1.65:11434
+      - LLAMA_MODEL_NAME=llama3.2-vision
+      - LLAMA_API_ROUTE=/api/chat
+      - ENABLE_OPENAI=false
+      - ENABLE_ANTHROPIC=false
+      - ENABLE_AZURE=false
+      - ENABLE_BEDROCK=false
+      - ENABLE_AZURE_GPT4O_MINI=false
+      - LLAMA_BASE_URL=http://192.168.1.65:11434
+      - LLAMA_MODEL=llama3.2-vision
+      - ENV=local
+      - SECONDARY_LLM_KEY=LLAMA3
     depends_on:
       postgres:
         condition: service_healthy
@@ -55,6 +60,8 @@ services:
       interval: 5s
       timeout: 5s
       retries: 5
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
 
   skyvern-ui:
     image: public.ecr.aws/skyvern/skyvern-ui:latest
diff --git a/setup.sh b/setup.sh
index 8b7a145ec..c4ef77331 100755
--- a/setup.sh
+++ b/setup.sh
@@ -9,7 +9,7 @@ log_event() {
 
 # Function to check if a command exists
 command_exists() {
-    command -v "$1" &> /dev/null
+    command -v "$1" &>/dev/null
 }
 
 ensure_required_commands() {
@@ -31,7 +31,7 @@ update_or_add_env_var() {
         sed -i.bak "s/^$key=.*/$key=$value/" .env && rm -f .env.bak
     else
         # Add new variable
-        echo "$key=$value" >> .env
+        echo "$key=$value" >>.env
     fi
 }
 
@@ -98,16 +98,25 @@ setup_llm_providers() {
         update_or_add_env_var "ENABLE_AZURE" "false"
     fi
 
+    echo "Do you want to enable Llama (y/n)?"
+    read enable_llama
+    if [[ "$enable_llama" == "y" ]]; then
+        read -p "Enter path to Llama model: " llama_model_path
+        update_or_add_env_var "ENABLE_LLAMA" "true"
+        update_or_add_env_var "LLAMA_MODEL_PATH" "$llama_model_path"
+        model_options+=("LLAMA_3_2_VISION")
+    fi
+
     # Model Selection
     if [ ${#model_options[@]} -eq 0 ]; then
         echo "No LLM providers enabled. You won't be able to run Skyvern unless you enable at least one provider. You can re-run this script to enable providers or manually update the .env file."
     else
         echo "Available LLM models based on your selections:"
         for i in "${!model_options[@]}"; do
-            echo "$((i+1)). ${model_options[$i]}"
+            echo "$((i + 1)). ${model_options[$i]}"
         done
         read -p "Choose a model by number (e.g., 1 for ${model_options[0]}): " model_choice
-        chosen_model=${model_options[$((model_choice-1))]}
+        chosen_model=${model_options[$((model_choice - 1))]}
         echo "Chosen LLM Model: $chosen_model"
         update_or_add_env_var "LLM_KEY" "$chosen_model"
     fi
@@ -115,7 +124,6 @@ setup_llm_providers() {
     echo "LLM provider configurations updated in .env."
 }
 
-
 # Function to initialize .env file
 initialize_env_file() {
     if [ -f ".env" ]; then
@@ -165,14 +173,16 @@ remove_poetry_env() {
 
 # Choose python version
 choose_python_version_or_fail() {
-  # https://github.com/python-poetry/poetry/issues/2117
-  # Py --list-paths 
+    # https://github.com/python-poetry/poetry/issues/2117
+    # Py --list-paths
     # This will output which paths are being used for Python 3.11
-  # Windows users need to poetry env use {{ Py --list-paths with 3.11}}
-  poetry env use python3.11 || { echo "Error: Python 3.11 is not installed. If you're on Windows, check out https://github.com/python-poetry/poetry/issues/2117 to unblock yourself"; exit 1; }
+    # Windows users need to poetry env use {{ Py --list-paths with 3.11}}
+    poetry env use python3.11 || {
+        echo "Error: Python 3.11 is not installed. If you're on Windows, check out https://github.com/python-poetry/poetry/issues/2117 to unblock yourself"
+        exit 1
+    }
 }
 
-
 # Function to install dependencies
 install_dependencies() {
     poetry install
@@ -211,9 +221,9 @@ setup_postgresql() {
             return 0
         fi
     fi
-    
+
     # Check if Docker is installed and running
-    if ! command_exists docker || ! docker info > /dev/null 2>&1; then
+    if ! command_exists docker || ! docker info >/dev/null 2>&1; then
         echo "Docker is not running or not installed. Please install or start Docker and try again."
         exit 1
     fi
@@ -221,7 +231,7 @@ setup_postgresql() {
     # Check if PostgreSQL is already running in a Docker container
     if docker ps | grep -q postgresql-container; then
         echo "PostgreSQL is already running in a Docker container."
-    else 
+    else
         # Attempt to install and start PostgreSQL using Docker
         echo "Attempting to install PostgreSQL via Docker..."
         docker run --name postgresql-container -e POSTGRES_HOST_AUTH_METHOD=trust -d -p 5432:5432 postgres:14
@@ -229,7 +239,7 @@ setup_postgresql() {
 
         # Wait for PostgreSQL to start
         echo "Waiting for PostgreSQL to start..."
-        sleep 20  # Adjust sleep time as necessary
+        sleep 20 # Adjust sleep time as necessary
     fi
 
     # Assuming docker exec works directly since we've checked Docker's status before
@@ -272,7 +282,7 @@ create_organization() {
     fi
 
     # Update the secrets-open-source.toml file
-    echo -e "[skyvern]\nconfigs = [\n    {\"env\" = \"local\", \"host\" = \"http://127.0.0.1:8000/api/v1\", \"orgs\" = [{name=\"Skyvern\", cred=\"$api_token\"}]}\n]" > .streamlit/secrets.toml
+    echo -e "[skyvern]\nconfigs = [\n    {\"env\" = \"local\", \"host\" = \"http://127.0.0.1:8000/api/v1\", \"orgs\" = [{name=\"Skyvern\", cred=\"$api_token\"}]}\n]" >.streamlit/secrets.toml
     echo ".streamlit/secrets.toml file updated with organization details."
 
     # Check if skyvern-frontend/.env exists and back it up
diff --git a/skyvern/__init__.py b/skyvern/__init__.py
index 502cde743..c6df0049c 100644
--- a/skyvern/__init__.py
+++ b/skyvern/__init__.py
@@ -2,6 +2,8 @@
 from ddtrace.filters import FilterRequestsOnUrl
 
 from skyvern.forge.sdk.forge_log import setup_logger
+from typing import Any, List
+from skyvern.forge.sdk.models import Step
 
 tracer.configure(
     settings={
@@ -11,3 +13,12 @@
     },
 )
 setup_logger()
+
+async def llama_handler(
+    prompt: str,
+    step: Step | None = None,
+    screenshots: list[bytes] | None = None,
+    parameters: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    # Implement Llama 3.2 vision API integration here
+    ...
diff --git a/skyvern/config.py b/skyvern/config.py
index aed4b70ba..54d2e0e77 100644
--- a/skyvern/config.py
+++ b/skyvern/config.py
@@ -5,7 +5,26 @@
 
 
 class Settings(BaseSettings):
-    model_config = SettingsConfigDict(env_file=(".env", ".env.staging", ".env.prod"), extra="ignore")
+    # Use only model_config, not Config class
+    model_config = SettingsConfigDict(
+        env_file=".env",
+        env_file_encoding="utf-8",
+        extra="ignore"
+    )
+    
+    # Llama Configuration
+    ENABLE_LLAMA: bool = True
+    LLAMA_API_BASE: str = "http://192.168.1.65:11434"
+    LLAMA_MODEL_NAME: str = "llama3.2-vision"
+    LLAMA_API_ROUTE: str = "/api/chat"
+    LLM_KEY: str = "LLAMA3"
+    SECONDARY_LLM_KEY: str = "LLAMA3"
+
+    # Disable other providers
+    ENABLE_OPENAI: bool = False
+    ENABLE_ANTHROPIC: bool = False
+    ENABLE_AZURE: bool = False
+    ENABLE_BEDROCK: bool = False
 
     ADDITIONAL_MODULES: list[str] = []
 
@@ -18,6 +37,14 @@ class Settings(BaseSettings):
     BROWSER_SCREENSHOT_TIMEOUT_MS: int = 20000
     BROWSER_LOADING_TIMEOUT_MS: int = 120000
     OPTION_LOADING_TIMEOUT_MS: int = 600000
+    MAX_SCRAPING_RETRIES: int = 0
+    VIDEO_PATH: str | None = None
+    HAR_PATH: str | None = "./har"
+    LOG_PATH: str = "./log"
+    BROWSER_ACTION_TIMEOUT_MS: int = 5000
+    BROWSER_SCREENSHOT_TIMEOUT_MS: int = 20000
+    BROWSER_LOADING_TIMEOUT_MS: int = 120000
+    OPTION_LOADING_TIMEOUT_MS: int = 600000
     MAX_STEPS_PER_RUN: int = 75
     MAX_NUM_SCREENSHOTS: int = 10
     # Ratio should be between 0 and 1.
@@ -91,8 +118,8 @@ class Settings(BaseSettings):
     # LLM Configuration #
     #####################
     # ACTIVE LLM PROVIDER
-    LLM_KEY: str = "OPENAI_GPT4O"
-    SECONDARY_LLM_KEY: str | None = None
+    LLM_KEY: str = "LLAMA3"  # Change default from OPENAI_GPT4O
+    SECONDARY_LLM_KEY: str = "LLAMA3"  # Also set this to LLAMA3
     # COMMON
     LLM_CONFIG_TIMEOUT: int = 300
     LLM_CONFIG_MAX_TOKENS: int = 4096
@@ -126,6 +153,9 @@ class Settings(BaseSettings):
 
     SVG_MAX_LENGTH: int = 100000
 
+    # Add debug property
+    DEBUG: bool = True
+
     def is_cloud_environment(self) -> bool:
         """
         :return: True if env is not local, else False
diff --git a/skyvern/forge/prompts.py b/skyvern/forge/prompts.py
index ce836d93e..cb7d42a9d 100644
--- a/skyvern/forge/prompts.py
+++ b/skyvern/forge/prompts.py
@@ -1,4 +1,5 @@
 from skyvern.forge.sdk.prompting import PromptEngine
 
 # Initialize the prompt engine
-prompt_engine = PromptEngine("skyvern")
+prompt_engine = PromptEngine("ollama")
+prompt_engine_llama = PromptEngine("ollama")
diff --git a/skyvern/forge/prompts/ollama/answer-user-detail-questions.j2 b/skyvern/forge/prompts/ollama/answer-user-detail-questions.j2
new file mode 100644
index 000000000..2d99c81c9
--- /dev/null
+++ b/skyvern/forge/prompts/ollama/answer-user-detail-questions.j2
@@ -0,0 +1,45 @@
+You are a JSON API endpoint that answers questions based on user details and goals. API endpoints ONLY return data - no explanations allowed.
+
+Purpose:
+- Answer user questions based on provided information
+- Use exact information from user details
+- Keep answers direct and concise
+- Fill in answers as JSON key-value pairs
+
+Input data:
+User's goal: {{ navigation_goal }}
+User's details: {{ navigation_payload }}
+User's questions: {{ queries_and_answers }}
+
+Instructions for answering:
+1. Read each question carefully
+2. Find relevant information in user's goal and details
+3. Provide only the exact information needed
+4. Include answers in the JSON response
+5. Keep answers direct - no explanations
+6. Use precise values from provided details
+
+CRITICAL FORMATTING RULES:
+1. Start response with { and end with }
+2. NO text before or after JSON
+3. NO markdown formatting or code blocks
+4. NO explanations, notes, or comments
+5. NO additional formatting or whitespace
+6. Response must be pure JSON only
+
+Response format (replace with actual answers):
+{
+  "question_1": "",
+  "question_2": "",
+  "question_3": ""
+}
+
+AUTOMATIC FAILURE TRIGGERS:
+- Text before the opening {
+- Text after the closing }
+- Explanations or markdown
+- Notes or comments
+- Code blocks or ```
+- Any content outside JSON structure
+
+These answers will be used to fill out information on a webpage automatically. Invalid format will cause system errors.
\ No newline at end of file
diff --git a/skyvern/forge/prompts/ollama/auto-completion-choose-option.j2 b/skyvern/forge/prompts/ollama/auto-completion-choose-option.j2
new file mode 100644
index 000000000..86d595e14
--- /dev/null
+++ b/skyvern/forge/prompts/ollama/auto-completion-choose-option.j2
@@ -0,0 +1,61 @@
+You are a JSON API endpoint for auto-completion analysis. API endpoints ONLY return data - no explanations allowed.
+
+Purpose:
+- Analyze auto-completion attempts for input fields
+- Evaluate suggested options against user goals
+- Select the most appropriate option
+- Return analysis in strict JSON format
+
+Auto-completion Detection Rules:
+1. Count as attempt if:
+   - Multiple suggestions appear
+   - Even "No results" messages indicate an attempt
+2. Valid suggestions must:
+   - Have an ID from provided HTML elements
+   - Contain meaningful content (not just "No results")
+   - Match user goals and context
+
+Analysis Requirements:
+1. Check for auto-completion presence
+2. Evaluate suggestion relevance
+3. Consider user goals and details
+4. Select best matching element
+5. Provide confidence ratings
+6. Use only existing element IDs
+
+Input Data:
+Context: Choose an auto-completion suggestion for "{{ field_information }}"
+Input value: {{ filled_value }}
+User goal: {{ navigation_goal }}
+User details: {{ navigation_payload_str }}
+HTML elements: {{ elements }}
+
+CRITICAL FORMATTING RULES:
+1. Start response with { and end with }
+2. NO text before or after JSON
+3. NO markdown formatting or code blocks
+4. NO explanations, notes, or comments
+5. Response must be pure JSON only
+6. Use exact format specified below
+
+Required Response Format:
+{
+    "auto_completion_attempt": false,  // true if attempt detected
+    "reasoning": "",                   // brief reason for decision
+    "confidence_float": 0.0,          // 0.0 to 1.0
+    "relevance_float": 0.0,           // 0.00 to 1.00
+    "value": "",                      // selected value
+    "id": null                        // element ID or null
+}
+
+AUTOMATIC FAILURE TRIGGERS:
+- Text before the opening {
+- Text after the closing }
+- Explanations or markdown
+- Notes or comments
+- Code blocks or ```
+- Any content outside JSON structure
+- Missing or extra fields
+- Invalid value types
+
+This response will be used for automated webpage interaction. Invalid format will cause system errors.
\ No newline at end of file
diff --git a/skyvern/forge/prompts/ollama/auto-completion-potential-answers.j2 b/skyvern/forge/prompts/ollama/auto-completion-potential-answers.j2
new file mode 100644
index 000000000..f51af90a6
--- /dev/null
+++ b/skyvern/forge/prompts/ollama/auto-completion-potential-answers.j2
@@ -0,0 +1,57 @@
+You are a JSON API endpoint for generating alternative input values. API endpoints ONLY return data - no explanations allowed.
+
+Purpose:
+- Generate 10 alternative values for failed auto-completion
+- Maintain same core meaning as original value
+- Provide variations that might match system expectations
+- Return strictly formatted JSON array of options
+
+Value Generation Rules:
+1. Create variations by:
+   - Using subset of original value
+   - Using superset of original value
+   - Summarizing original value
+   - Removing unnecessary details
+2. Each variation must:
+   - Keep core meaning intact
+   - Not add new information
+   - Be more concise when possible
+3. Order by relevance (highest to lowest)
+
+Input Data:
+Context: Choose an auto-completion suggestion for "{{ field_information }}"
+Current Value: {{ current_value }}
+
+CRITICAL FORMATTING RULES:
+1. Start response with { and end with }
+2. NO text before or after JSON
+3. NO markdown formatting or code blocks
+4. NO explanations, notes, or comments
+5. Response must be pure JSON only
+6. Exactly 10 variations required
+
+Required Response Format:
+{
+    "potential_values": [
+        {
+            "reasoning": "",              // brief explanation of relationship to original
+            "relevance_float": 0.00,      // 0.00 to 1.00, two decimal places
+            "value": ""                   // alternative value
+        }
+        // Repeat for total of 10 values
+    ]
+}
+
+AUTOMATIC FAILURE TRIGGERS:
+- Text before the opening {
+- Text after the closing }
+- Explanations or markdown
+- Notes or comments
+- Code blocks or ```
+- Any content outside JSON structure
+- Fewer or more than 10 values
+- Missing or extra fields
+- Invalid value types
+- Invalid relevance range
+
+This response will be used for automated value suggestion. Invalid format will cause system errors.
\ No newline at end of file
diff --git a/skyvern/forge/prompts/ollama/auto-completion-tweak-value.j2 b/skyvern/forge/prompts/ollama/auto-completion-tweak-value.j2
new file mode 100644
index 000000000..4d93afb1b
--- /dev/null
+++ b/skyvern/forge/prompts/ollama/auto-completion-tweak-value.j2
@@ -0,0 +1,55 @@
+You are a JSON API endpoint for value refinement after failed auto-completions. API endpoints ONLY return data - no explanations allowed.
+
+Purpose:
+- Analyze failed auto-completion attempts
+- Identify patterns in popup suggestions
+- Extract common concepts if present
+- Generate refined input value
+- Return analysis in strict JSON format
+
+Value Refinement Rules:
+1. Current value modifications:
+   - Must relate to original value
+   - Can be subset or superset
+   - Must maintain core meaning
+2. Popup element handling:
+   - Identify common patterns
+   - Extract shared concepts
+   - Don't copy exact values
+   - Use concept for guidance only
+
+Input Data:
+Context: Choose an auto-completion suggestion for "{{ field_information }}"
+Current Value: {{ current_value }}
+Tried Values: {{ tried_values }}
+Popped Elements: {{ popped_up_elements }}
+
+CRITICAL FORMATTING RULES:
+1. Start response with { and end with }
+2. NO text before or after JSON
+3. NO markdown formatting or code blocks
+4. NO explanations, notes, or comments
+5. Response must be pure JSON only
+6. Use exact format specified below
+
+Required Response Format:
+{
+    "is_any_popped_up_elements": false,    // true if popups detected
+    "common_concept": null,                // concept or null
+    "reasoning": "",                       // brief reason for changes
+    "confidence_float": 0.0,              // 0.0 to 1.0
+    "tweaked_value": ""                   // modified value
+}
+
+AUTOMATIC FAILURE TRIGGERS:
+- Text before the opening {
+- Text after the closing }
+- Explanations or markdown
+- Notes or comments
+- Code blocks or ```
+- Any content outside JSON structure
+- Missing or extra fields
+- Invalid value types
+- Direct copying of popup values
+
+This response will be used for automated value refinement. Invalid format will cause system errors.
\ No newline at end of file
diff --git a/skyvern/forge/prompts/ollama/check-user-goal.j2 b/skyvern/forge/prompts/ollama/check-user-goal.j2
new file mode 100644
index 000000000..7545b486d
--- /dev/null
+++ b/skyvern/forge/prompts/ollama/check-user-goal.j2
@@ -0,0 +1,57 @@
+You are a JSON API endpoint for analyzing goal completion status. API endpoints ONLY return data - no explanations allowed.
+
+Purpose:
+- Analyze webpage content against user goals
+- Check if user objective is complete
+- Evaluate page elements and content
+- Provide structured analysis in JSON
+- Return clear completion status
+
+Analysis Requirements:
+1. Page Information:
+   - Identify relevant page elements
+   - Extract useful content
+   - Match elements to user goal
+   - Document key findings
+2. Analysis Process:
+   - Compare page state to goal
+   - Evaluate completion criteria
+   - Check required elements
+   - Verify user details match
+3. Goal Status:
+   - Determine if goal is met
+   - Provide evidence-based decision
+   - Use strict true/false evaluation
+
+Input Data:
+Elements on page: {{ elements }}
+User Goal: {{ navigation_goal }}
+User Details: {{ navigation_payload }}
+
+CRITICAL FORMATTING RULES:
+1. Start response with { and end with }
+2. NO text before or after JSON
+3. NO markdown formatting or code blocks
+4. NO explanations, notes, or comments
+5. Response must be pure JSON only
+6. Use exact format specified below
+
+Required Response Format:
+{
+    "page_info": "",           // relevant page information and findings
+    "thoughts": "",            // analysis of goal completion evidence
+    "user_goal_achieved": false // true if goal completed, false if not
+}
+
+AUTOMATIC FAILURE TRIGGERS:
+- Text before the opening {
+- Text after the closing }
+- Explanations or markdown
+- Notes or comments
+- Code blocks or ```
+- Any content outside JSON structure
+- Missing or extra fields
+- Invalid value types
+- Incorrect boolean format
+
+This response will be used for automated goal verification. Invalid format will cause system errors.
\ No newline at end of file
diff --git a/skyvern/forge/prompts/ollama/css-shape-convert.j2 b/skyvern/forge/prompts/ollama/css-shape-convert.j2
new file mode 100644
index 000000000..6f5ec65ea
--- /dev/null
+++ b/skyvern/forge/prompts/ollama/css-shape-convert.j2
@@ -0,0 +1,46 @@
+You are a JSON API endpoint for visual element analysis. API endpoints ONLY return data - no explanations allowed.
+
+Purpose:
+- Analyze HTML element appearance
+- Identify visual shape and meaning
+- Provide confidence rating
+- Return analysis in strict JSON format
+
+Analysis Requirements:
+1. Shape Description:
+   - Brief, clear description
+   - Include visual appearance
+   - Include implied meaning
+   - Keep description concise
+2. Confidence Rating:
+   - Rate certainty of analysis
+   - Use 0.0 to 1.0 scale
+   - Consider clarity of shape
+   - Consider common usage
+
+CRITICAL FORMATTING RULES:
+1. Start response with { and end with }
+2. NO text before or after JSON
+3. NO markdown formatting or code blocks
+4. NO explanations, notes, or comments
+5. Response must be pure JSON only
+6. Use exact format specified below
+
+Required Response Format:
+{
+    "confidence_float": 0.0,    // 0.0 to 1.0
+    "shape": ""                 // brief description
+}
+
+AUTOMATIC FAILURE TRIGGERS:
+- Text before the opening {
+- Text after the closing }
+- Explanations or markdown
+- Notes or comments
+- Code blocks or ```
+- Any content outside JSON structure
+- Missing or extra fields
+- Invalid value types
+- Invalid confidence range
+
+This response will be used for automated element classification. Invalid format will cause system errors.
\ No newline at end of file
diff --git a/skyvern/forge/prompts/ollama/custom-select.j2 b/skyvern/forge/prompts/ollama/custom-select.j2
new file mode 100644
index 000000000..550f5429c
--- /dev/null
+++ b/skyvern/forge/prompts/ollama/custom-select.j2
@@ -0,0 +1,62 @@
+You are a JSON API endpoint for HTML element selection and input. API endpoints ONLY return data - no explanations allowed.
+
+Purpose:
+- Perform {{ "multi-level selection" if select_history else "selection" }} on webpage
+- Choose best matching element or input value
+- Consider user goals and context
+- Return decision in strict JSON format
+
+Selection Rules:
+1. Element Matching:
+   - Match to user goal and details
+   - Consider fallback options if needed
+   - Never select placeholders
+   - Skip loading indicators
+   - Required fields must have value{% if select_history %}
+   - Consider selection history
+   - Complete multi-level process{% endif %}
+
+2. Action Types:
+   - CLICK: Select existing option
+   - INPUT_TEXT: Search only if no valid options
+
+Input Data:
+Context: Select an option for "{{ field_information }}" ({{ "required" if required_field else "optional" }})
+{% if target_value %}Target Value: {{ target_value }}{% endif %}
+User Goal: {{ navigation_goal }}
+User Details: {{ navigation_payload_str }}
+Elements: {{ elements }}
+{% if select_history %}Selection History: {{ select_history }}{% endif %}
+
+CRITICAL FORMATTING RULES:
+1. Start response with { and end with }
+2. NO text before or after JSON
+3. NO markdown formatting or code blocks
+4. NO explanations, notes, or comments
+5. Response must be pure JSON only
+6. Use exact format specified below
+
+Required Response Format:
+{
+    "reasoning": "",              // brief reason for selection
+    "confidence_float": 0.0,      // 0.0 to 1.0
+    "id": "",                     // element ID from list
+    "action_type": "",           // "CLICK" or "INPUT_TEXT"
+    "value": ""{% if target_value %},
+    "relevant": false             // true if matches target{% endif %}
+}
+
+AUTOMATIC FAILURE TRIGGERS:
+- Text before the opening {
+- Text after the closing }
+- Explanations or markdown
+- Notes or comments
+- Code blocks or ```
+- Any content outside JSON structure
+- Missing or extra fields
+- Invalid value types
+- Invalid action_type values
+- Empty required fields
+- Placeholder selections
+
+This response will be used for automated form interaction. Invalid format will cause system errors.
\ No newline at end of file
diff --git a/skyvern/forge/prompts/ollama/extract-action.j2 b/skyvern/forge/prompts/ollama/extract-action.j2
new file mode 100644
index 000000000..ad1829091
--- /dev/null
+++ b/skyvern/forge/prompts/ollama/extract-action.j2
@@ -0,0 +1,55 @@
+EXECUTE THIS BROWSER TASK NOW:
+
+URL: {{ current_url }}
+GOAL: {{ navigation_goal }}
+ELEMENTS: {{ elements }}
+USER_INPUT: {{ navigation_payload_str }}
+
+RETURN JSON WITH THESE EXACT ACTIONS:
+1. INPUT_TEXT - For typing in search/text fields
+2. CLICK - For buttons and links
+3. SELECT_OPTION - For dropdowns
+4. CHECKBOX - For checkboxes
+5. WAIT - When waiting needed
+6. SOLVE_CAPTCHA - For captchas
+7. TERMINATE - If goal impossible
+8. COMPLETE - When goal achieved
+9. UPLOAD_FILE - For file uploads
+10. NULL_ACTION - When no action needed
+
+EXAMPLE RESPONSE:
+{
+    "user_goal_stage": "Starting search",
+    "user_goal_achieved": false,
+    "action_plan": "Search for product",
+    "actions": [
+        {
+            "reasoning": "Enter search term",
+            "user_detail_query": "What to search?", 
+            "user_detail_answer": "search term",
+            "confidence_float": 1.0,
+            "action_type": "INPUT_TEXT",
+            "id": "an html element id from 'ELEMENTS' provided above. Scan it and find the correct id for the action. An id in html looks like this "<div id='thisIsAnId"</div>"",
+            "text": "search term",
+            "file_url": null,
+            "download": false,
+            "option": null
+        }
+    ]
+}
+
+STRICT RULES:
+1. Output JSON object only - nothing else
+2. No text before or after JSON
+3. No ```json or ``` markers
+4. No "Here is the response:"
+5. No explanations or notes
+6. No markdown formatting
+7. Just the raw JSON object
+8. ONLY use element_ids from the provided ELEMENTS list
+9. VERIFY element exists before including in action
+10. NO assumed/common element names like "search-field"
+11. Element IDs must match EXACTLY
+12. If no valid elements, return an empty actions list
+
+RESPOND NOW WITH JSON ONLY.
\ No newline at end of file
diff --git a/skyvern/forge/prompts/ollama/extract-information.j2 b/skyvern/forge/prompts/ollama/extract-information.j2
new file mode 100644
index 000000000..46de997b5
--- /dev/null
+++ b/skyvern/forge/prompts/ollama/extract-information.j2
@@ -0,0 +1,38 @@
+You are given a screenshot, user data extraction goal, the JSON schema for the output data format, and the current URL.
+
+Your task is to:
+
+Extract the requested information from the screenshot and output it in the specified JSON schema format:
+
+DO NOT USE:
+❌ Markdown formatting
+❌ Code blocks
+❌ Explanations
+❌ HTML analysis
+❌ Notes or comments
+
+REQUIRED FORMAT:
+{% if extracted_information_schema %}{{ extracted_information_schema }}{% else %}{
+    // Schema not provided - use minimal structure
+    "extracted_data": null
+}{% endif %}
+
+SYSTEM RULES:
+1. Start with {
+2. End with }
+3. Only pure JSON allowed
+4. No explanations or analysis
+5. Use null for missing data
+6. Follow schema exactly
+7. No markdown or formatting
+
+Input Data:
+GOAL={{ data_extraction_goal }}
+URL={{ current_url }}
+ELEMENTS={{ elements }}
+TEXT={{ extracted_text }}
+DETAILS={{ navigation_payload }}
+TIME={{ utc_datetime }}
+{% if error_code_mapping_str %}ERRORS={{ error_code_mapping_str }}{% endif %}
+
+SYSTEM WARNING: Response format violations will cause task termination.
\ No newline at end of file
diff --git a/skyvern/forge/prompts/ollama/generate-task.j2 b/skyvern/forge/prompts/ollama/generate-task.j2
new file mode 100644
index 000000000..336d1a0f9
--- /dev/null
+++ b/skyvern/forge/prompts/ollama/generate-task.j2
@@ -0,0 +1,65 @@
+You are a JSON API endpoint for browser task creation. API endpoints ONLY return data - no explanations allowed.
+
+Purpose:
+- Parse user task requirements
+- Generate automation schema
+- Define task goals and payloads
+- Return strict JSON format
+
+Task Requirements:
+1. Required Fields:
+   - url: HTTPS starting point
+   - suggested_title: Brief task description
+   - navigation_goal_reasoning: Why navigation needed
+   - is_navigation_goal_required: Navigation requirement flag
+   - data_extraction_goal_reasoning: Why extraction needed
+   - is_data_extraction_goal_required: Extraction requirement flag
+
+2. Optional Fields:
+   - navigation_goal: Action steps (if required)
+   - data_extraction_goal: Data targets (if required)
+   - navigation_payload: Required input data
+
+3. Validation Rules:
+   - At least one goal required
+   - Navigation goal needs completion criteria
+   - URLs must use HTTPS
+   - Use null for unused fields
+
+Input Data:
+User Prompt: {{ user_prompt }}
+
+CRITICAL FORMATTING RULES:
+1. Start response with { and end with }
+2. NO text before or after JSON
+3. NO markdown formatting or code blocks
+4. NO explanations, notes, or comments
+5. Response must be pure JSON only
+
+Required Response Format:
+{
+    "url": "",                               // required, HTTPS only
+    "suggested_title": "",                   // required, brief description
+    "navigation_goal_reasoning": "",         // required, why navigation needed
+    "is_navigation_goal_required": false,    // required boolean
+    "navigation_goal": null,                 // optional, include COMPLETE criteria
+    "data_extraction_goal_reasoning": "",    // required, why extraction needed
+    "is_data_extraction_goal_required": false, // required boolean
+    "data_extraction_goal": null,            // optional, data requirements
+    "navigation_payload": null               // optional, input data
+}
+
+AUTOMATIC FAILURE TRIGGERS:
+- Text before the opening {
+- Text after the closing }
+- Explanations or markdown
+- Notes or comments
+- Code blocks or ```
+- Any content outside JSON structure
+- Missing required fields
+- Invalid value types
+- Non-HTTPS URLs
+- No goals defined
+- Missing completion criteria
+
+This response will be used for automated task creation. Invalid format will cause system errors.
\ No newline at end of file
diff --git a/skyvern/forge/prompts/ollama/opened-dropdown-confirm.j2 b/skyvern/forge/prompts/ollama/opened-dropdown-confirm.j2
new file mode 100644
index 000000000..b4a21862f
--- /dev/null
+++ b/skyvern/forge/prompts/ollama/opened-dropdown-confirm.j2
@@ -0,0 +1,48 @@
+You are a JSON API endpoint for dropdown menu detection. API endpoints ONLY return data - no explanations allowed.
+
+Purpose:
+- Analyze screenshot for dropdown menu
+- Check for visible options
+- Exclude placeholder messages
+- Return analysis in strict JSON format
+
+Detection Rules:
+1. Valid Dropdown:
+   - Has visible options
+   - Options are selectable
+   - Menu is expanded
+
+2. Invalid Cases:
+   - "No results" messages
+   - "No match" indicators
+   - Placeholder text only
+   - "Please select" options
+   - Single dash (-) options
+   - "Select..." text
+
+CRITICAL FORMATTING RULES:
+1. Start response with { and end with }
+2. NO text before or after JSON
+3. NO markdown formatting or code blocks
+4. NO explanations, notes, or comments
+5. Response must be pure JSON only
+6. Use exact format specified below
+
+Required Response Format:
+{
+    "reasoning": "",                    // brief detection explanation
+    "is_opened_dropdown_menu": false    // true if valid dropdown detected
+}
+
+AUTOMATIC FAILURE TRIGGERS:
+- Text before the opening {
+- Text after the closing }
+- Explanations or markdown
+- Notes or comments
+- Code blocks or ```
+- Any content outside JSON structure
+- Missing or extra fields
+- Invalid value types
+- Invalid boolean format
+
+This response will be used for automated menu detection. Invalid format will cause system errors.
\ No newline at end of file
diff --git a/skyvern/forge/prompts/ollama/parse-input-or-select-context.j2 b/skyvern/forge/prompts/ollama/parse-input-or-select-context.j2
new file mode 100644
index 000000000..6a966b607
--- /dev/null
+++ b/skyvern/forge/prompts/ollama/parse-input-or-select-context.j2
@@ -0,0 +1,54 @@
+You are a JSON API endpoint for web field analysis. API endpoints ONLY return data - no explanations allowed.
+
+Purpose:
+- Analyze INPUT/SELECT element
+- Verify field properties
+- Cross-check with context
+- Return structured analysis
+
+Analysis Requirements:
+1. Field Checking:
+   - Element type verification
+   - Required status check
+   - Search functionality detection
+   - Field purpose identification
+
+2. Verification Process:
+   - Compare with context
+   - Check element attributes
+   - Validate field purpose
+   - Confirm requirements
+
+Input Data:
+Element ID: {{ element_id }}
+Action Reasoning: {{ action_reasoning }}
+Elements: {{ elements }}
+
+CRITICAL FORMATTING RULES:
+1. Start response with { and end with }
+2. NO text before or after JSON
+3. NO markdown formatting or code blocks
+4. NO explanations, notes, or comments
+5. Response must be pure JSON only
+6. Use exact format specified below
+
+Required Response Format:
+{
+    "thought": "",           // verification process description
+    "field": "",            // field purpose/name
+    "is_required": false,   // required field status
+    "is_search_bar": false  // search functionality status
+}
+
+AUTOMATIC FAILURE TRIGGERS:
+- Text before the opening {
+- Text after the closing }
+- Explanations or markdown
+- Notes or comments
+- Code blocks or ```
+- Any content outside JSON structure
+- Missing or extra fields
+- Invalid value types
+- Invalid boolean format
+
+This response will be used for automated field interaction. Invalid format will cause system errors.
\ No newline at end of file
diff --git a/skyvern/forge/prompts/ollama/summarize-max-steps-reason.j2 b/skyvern/forge/prompts/ollama/summarize-max-steps-reason.j2
new file mode 100644
index 000000000..e93656d1a
--- /dev/null
+++ b/skyvern/forge/prompts/ollama/summarize-max-steps-reason.j2
@@ -0,0 +1,55 @@
+You are a JSON API endpoint for task failure analysis. API endpoints ONLY return data - no explanations allowed.
+
+Purpose:
+- Analyze failed task completion
+- Review step history
+- Evaluate page state
+- Return structured analysis
+
+Analysis Requirements:
+1. Page Information:
+   - Current page state
+   - Relevant UI elements
+   - Error messages
+   - Progress indicators
+
+2. Step Analysis:
+   - Review {{ step_count }} steps taken
+   - Identify failure points
+   - Consider user goals
+   - Evaluate action results
+
+Input Data:
+User Goal: {{ navigation_goal }}
+User Details: {{ navigation_payload }}
+Steps Taken:
+{% for step in steps %}Step {{ step.order }}: {{ step.actions_result }}
+{% endfor %}
+
+CRITICAL FORMATTING RULES:
+1. Start response with { and end with }
+2. NO text before or after JSON
+3. NO markdown formatting or code blocks
+4. NO explanations, notes, or comments
+5. Response must be pure JSON only
+6. Use exact format specified below
+
+Required Response Format:
+{
+    "page_info": "",     // current page state analysis
+    "reasoning": ""      // failure cause analysis
+}
+
+AUTOMATIC FAILURE TRIGGERS:
+- Text before the opening {
+- Text after the closing }
+- Explanations or markdown
+- Notes or comments
+- Code blocks or ```
+- Any content outside JSON structure
+- Missing or extra fields
+- Invalid value types
+- Generic explanations
+- Missing step references
+
+This response will be used for automated failure analysis. Invalid format will cause system errors.
\ No newline at end of file
diff --git a/skyvern/forge/prompts/ollama/svg-convert.j2 b/skyvern/forge/prompts/ollama/svg-convert.j2
new file mode 100644
index 000000000..3aa7964d0
--- /dev/null
+++ b/skyvern/forge/prompts/ollama/svg-convert.j2
@@ -0,0 +1,14 @@
+You are given a svg element. You need to figure out what its shape means.
+SVG Element:
+```
+{{svg_element}}
+```
+
+MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments (//), no unnecessary quotes, etc.
+Reply in JSON format with the following keys:
+{
+    "confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
+    "shape": string, // A short description of the shape of SVG and its meaning
+}
+
+Leaving shape empty will cause errors. Do not do that. Please provide a valid shape. 
diff --git a/skyvern/forge/sdk/api/llm/api_handler_factory.py b/skyvern/forge/sdk/api/llm/api_handler_factory.py
index 5258ac21b..ac6323f9a 100644
--- a/skyvern/forge/sdk/api/llm/api_handler_factory.py
+++ b/skyvern/forge/sdk/api/llm/api_handler_factory.py
@@ -55,24 +55,30 @@ def get_llm_api_handler_with_router(llm_key: str) -> LLMAPIHandler:
         )
         main_model_group = llm_config.main_model_group
 
+        async def llm_api_handler(
+            prompt: str,
+            llm_key: str,
+            model: str,
+            messages: list[dict[str, Any]],
+            **parameters: dict[str, Any],
+        ) -> dict[str, Any]:
+            try:
+                response = await router.completion(
+                    model=model,
+                    messages=messages,
+                    **parameters,
+                )
+                return response
+            except Exception as e:
+                LOG.exception("LLM request failed unexpectedly", llm_key=llm_key)
+                raise LLMProviderError(llm_key) from e
+
         async def llm_api_handler_with_router_and_fallback(
             prompt: str,
             step: Step | None = None,
             screenshots: list[bytes] | None = None,
             parameters: dict[str, Any] | None = None,
         ) -> dict[str, Any]:
-            """
-            Custom LLM API handler that utilizes the LiteLLM router and fallbacks to OpenAI GPT-4 Vision.
-
-            Args:
-                prompt: The prompt to generate completions for.
-                step: The step object associated with the prompt.
-                screenshots: The screenshots associated with the prompt.
-                parameters: Additional parameters to be passed to the LLM router.
-
-            Returns:
-                The response from the LLM router.
-            """
             if parameters is None:
                 parameters = LLMAPIHandlerFactory.get_api_parameters(llm_config)
 
@@ -85,68 +91,26 @@ async def llm_api_handler_with_router_and_fallback(
                 for screenshot in screenshots or []:
                     await app.ARTIFACT_MANAGER.create_artifact(
                         step=step,
-                        artifact_type=ArtifactType.SCREENSHOT_LLM,
+                        artifact_type=ArtifactType.LLM_SCREENSHOT,
                         data=screenshot,
                     )
 
-            messages = await llm_messages_builder(prompt, screenshots, llm_config.add_assistant_prefix)
-            if step:
-                await app.ARTIFACT_MANAGER.create_artifact(
-                    step=step,
-                    artifact_type=ArtifactType.LLM_REQUEST,
-                    data=json.dumps(
-                        {
-                            "model": llm_key,
-                            "messages": messages,
-                            **parameters,
-                        }
-                    ).encode("utf-8"),
-                )
-            try:
-                response = await router.acompletion(model=main_model_group, messages=messages, **parameters)
-                LOG.info("LLM API call successful", llm_key=llm_key, model=llm_config.model_name)
-            except litellm.exceptions.APIError as e:
-                raise LLMProviderErrorRetryableTask(llm_key) from e
-            except ValueError as e:
-                LOG.exception(
-                    "LLM token limit exceeded",
-                    llm_key=llm_key,
-                    model=main_model_group,
-                )
-                raise LLMProviderErrorRetryableTask(llm_key) from e
-            except Exception as e:
-                LOG.exception(
-                    "LLM request failed unexpectedly",
-                    llm_key=llm_key,
-                    model=main_model_group,
-                )
-                raise LLMProviderError(llm_key) from e
+            messages = await llm_messages_builder(
+                prompt=prompt,
+                screenshots=screenshots,
+                add_assistant_prefix=llm_config.add_assistant_prefix,
+                is_llama=llm_config.model_name.startswith("ollama/"),
+            )
 
-            if step:
-                await app.ARTIFACT_MANAGER.create_artifact(
-                    step=step,
-                    artifact_type=ArtifactType.LLM_RESPONSE,
-                    data=response.model_dump_json(indent=2).encode("utf-8"),
-                )
-                llm_cost = litellm.completion_cost(completion_response=response)
-                prompt_tokens = response.get("usage", {}).get("prompt_tokens", 0)
-                completion_tokens = response.get("usage", {}).get("completion_tokens", 0)
-                await app.DATABASE.update_step(
-                    task_id=step.task_id,
-                    step_id=step.step_id,
-                    organization_id=step.organization_id,
-                    incremental_cost=llm_cost,
-                    incremental_input_tokens=prompt_tokens if prompt_tokens > 0 else None,
-                    incremental_output_tokens=completion_tokens if completion_tokens > 0 else None,
-                )
-            parsed_response = parse_api_response(response, llm_config.add_assistant_prefix)
-            if step:
-                await app.ARTIFACT_MANAGER.create_artifact(
-                    step=step,
-                    artifact_type=ArtifactType.LLM_RESPONSE_PARSED,
-                    data=json.dumps(parsed_response, indent=2).encode("utf-8"),
-                )
-            return parsed_response
+            response = await llm_api_handler(
+                prompt=prompt,
+                llm_key=llm_config.model_name,
+                model=llm_config.model_name,
+                messages=messages,
+                **parameters,
+            )
+
+            return response
 
         return llm_api_handler_with_router_and_fallback
 
@@ -186,11 +150,16 @@ async def llm_api_handler(
                         data=screenshot,
                     )
 
-            # TODO (kerem): instead of overriding the screenshots, should we just not take them in the first place?
             if not llm_config.supports_vision:
                 screenshots = None
 
-            messages = await llm_messages_builder(prompt, screenshots, llm_config.add_assistant_prefix)
+            messages = await llm_messages_builder(
+                prompt=prompt,
+                screenshots=screenshots,
+                add_assistant_prefix=llm_config.add_assistant_prefix,
+                is_llama=llm_config.model_name.startswith("ollama/"),
+            )
+
             if step:
                 await app.ARTIFACT_MANAGER.create_artifact(
                     step=step,
@@ -199,16 +168,12 @@ async def llm_api_handler(
                         {
                             "model": llm_config.model_name,
                             "messages": messages,
-                            # we're not using active_parameters here because it may contain sensitive information
                             **parameters,
                         }
                     ).encode("utf-8"),
                 )
             t_llm_request = time.perf_counter()
             try:
-                # TODO (kerem): add a timeout to this call
-                # TODO (kerem): add a retry mechanism to this call (acompletion_with_retries)
-                # TODO (kerem): use litellm fallbacks? https://litellm.vercel.app/docs/tutorials/fallbacks#how-does-completion_with_fallbacks-work
                 LOG.info("Calling LLM API", llm_key=llm_key, model=llm_config.model_name)
                 response = await litellm.acompletion(
                     model=llm_config.model_name,
@@ -237,7 +202,11 @@ async def llm_api_handler(
                     artifact_type=ArtifactType.LLM_RESPONSE,
                     data=response.model_dump_json(indent=2).encode("utf-8"),
                 )
-                llm_cost = litellm.completion_cost(completion_response=response)
+                # Skip cost calculation for local Ollama models
+                if not llm_config.model_name.startswith("ollama/"):
+                    llm_cost = litellm.completion_cost(completion_response=response)
+                else:
+                    llm_cost = 0.0  # Local models are free
                 prompt_tokens = response.get("usage", {}).get("prompt_tokens", 0)
                 completion_tokens = response.get("usage", {}).get("completion_tokens", 0)
                 await app.DATABASE.update_step(
@@ -248,7 +217,7 @@ async def llm_api_handler(
                     incremental_input_tokens=prompt_tokens if prompt_tokens > 0 else None,
                     incremental_output_tokens=completion_tokens if completion_tokens > 0 else None,
                 )
-            parsed_response = parse_api_response(response, llm_config.add_assistant_prefix)
+            parsed_response = parse_api_response(response, llm_config.add_assistant_prefix, llm_config.model_name.startswith("ollama/"))
             if step:
                 await app.ARTIFACT_MANAGER.create_artifact(
                     step=step,
@@ -271,3 +240,7 @@ def register_custom_handler(cls, llm_key: str, handler: LLMAPIHandler) -> None:
         if llm_key in cls._custom_handlers:
             raise DuplicateCustomLLMProviderError(llm_key)
         cls._custom_handlers[llm_key] = handler
+
+if SettingsManager.get_settings().ENABLE_LLAMA:
+    from .llama_handler import llama_handler
+    LLMAPIHandlerFactory.register_custom_handler("LLAMA3", llama_handler)
\ No newline at end of file
diff --git a/skyvern/forge/sdk/api/llm/config_registry.py b/skyvern/forge/sdk/api/llm/config_registry.py
index 1f4dc6af4..cdbd5c2ab 100644
--- a/skyvern/forge/sdk/api/llm/config_registry.py
+++ b/skyvern/forge/sdk/api/llm/config_registry.py
@@ -1,4 +1,5 @@
 import structlog
+import logging
 
 from skyvern.forge.sdk.api.llm.exceptions import (
     DuplicateLLMConfigError,
@@ -10,7 +11,34 @@
 from skyvern.forge.sdk.settings_manager import SettingsManager
 
 LOG = structlog.get_logger()
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
 
+# Add debug logging at the top of the file
+print("Initializing config registry...")
+
+settings = SettingsManager.get_settings()
+print("Config Registry Settings:", {
+    "ENABLE_LLAMA": settings.ENABLE_LLAMA,
+    "LLAMA_API_BASE": settings.LLAMA_API_BASE,
+    "LLAMA_MODEL_NAME": settings.LLAMA_MODEL_NAME,
+    "LLM_KEY": settings.LLM_KEY,
+    "ENV_FILE": settings.model_config.get('env_file', '.env')  # Use model_config instead of Config
+})
+
+# First check if any providers are enabled
+provider_check = any([
+    settings.ENABLE_OPENAI,
+    settings.ENABLE_ANTHROPIC,
+    settings.ENABLE_AZURE,
+    settings.ENABLE_BEDROCK,
+    settings.ENABLE_LLAMA,
+])
+print("Provider check result:", provider_check)
+
+if not provider_check:
+    print("No providers enabled, raising NoProviderEnabledError")
+    raise NoProviderEnabledError()
 
 class LLMConfigRegistry:
     _configs: dict[str, LLMRouterConfig | LLMConfig] = {}
@@ -43,19 +71,64 @@ def get_config(cls, llm_key: str) -> LLMRouterConfig | LLMConfig:
         return cls._configs[llm_key]
 
 
-# if none of the LLM providers are enabled, raise an error
-if not any(
-    [
-        SettingsManager.get_settings().ENABLE_OPENAI,
-        SettingsManager.get_settings().ENABLE_ANTHROPIC,
-        SettingsManager.get_settings().ENABLE_AZURE,
-        SettingsManager.get_settings().ENABLE_AZURE_GPT4O_MINI,
-        SettingsManager.get_settings().ENABLE_BEDROCK,
-    ]
-):
+# Before the provider check, add debug logging
+logger.debug("Current settings: %s", {
+    "ENABLE_LLAMA": SettingsManager.get_settings().ENABLE_LLAMA,
+    "LLAMA_API_BASE": SettingsManager.get_settings().LLAMA_API_BASE,
+    "LLAMA_MODEL_NAME": SettingsManager.get_settings().LLAMA_MODEL_NAME,
+    "LLM_KEY": SettingsManager.get_settings().LLM_KEY
+})
+
+# Add this before the provider check
+logger.debug("Checking environment settings:")
+settings = SettingsManager.get_settings()
+logger.debug("Environment variables: %s", {
+    "ENABLE_LLAMA": settings.ENABLE_LLAMA,
+    "LLAMA_API_BASE": settings.LLAMA_API_BASE,
+    "LLAMA_MODEL_NAME": settings.LLAMA_MODEL_NAME,
+    "LLAMA_API_ROUTE": settings.LLAMA_API_ROUTE,
+    "LLM_KEY": settings.LLM_KEY,
+    "ENV_FILE": settings.model_config.get('env_file', '.env')
+})
+
+# First check if any providers are enabled
+if not any([
+    SettingsManager.get_settings().ENABLE_OPENAI,
+    SettingsManager.get_settings().ENABLE_ANTHROPIC,
+    SettingsManager.get_settings().ENABLE_AZURE,
+    SettingsManager.get_settings().ENABLE_BEDROCK,
+    SettingsManager.get_settings().ENABLE_LLAMA,  # Make sure Llama is included
+]):
     raise NoProviderEnabledError()
 
+# First register Llama configuration
+if SettingsManager.get_settings().ENABLE_LLAMA:
+    print("Registering Llama configuration...")
+    LLMConfigRegistry.register_config(
+        "LLAMA3",
+        LLMConfig(
+            model_name="ollama/llama3.2-vision",  # Move model name here with ollama/ prefix
+            required_env_vars=[],
+            supports_vision=True,
+            add_assistant_prefix=False,
+            max_output_tokens=16384,
+            litellm_params=LiteLLMParams(
+                api_base=settings.LLAMA_API_BASE,
+                api_key="",
+                model_info={
+                    "completion_route": "/api/chat"
+                }
+            )
+        )
+    )
+
+# Add after LLMConfigRegistry.register_config
+logger.debug("Registered configs after Llama registration: %s", LLMConfigRegistry._configs)
+
+# After registration, check registered configs
+logger.debug("Registered configs: %s", LLMConfigRegistry._configs)
 
+# Then register other provider configurations
 if SettingsManager.get_settings().ENABLE_OPENAI:
     LLMConfigRegistry.register_config(
         "OPENAI_GPT4_TURBO",
diff --git a/skyvern/forge/sdk/api/llm/llama_handler.py b/skyvern/forge/sdk/api/llm/llama_handler.py
new file mode 100644
index 000000000..1b6190d19
--- /dev/null
+++ b/skyvern/forge/sdk/api/llm/llama_handler.py
@@ -0,0 +1,33 @@
+from typing import Any, Optional
+import aiohttp
+import base64
+import json
+from skyvern.forge.sdk.models import Step
+
+async def llama_handler(
+    prompt: str,
+    step: Step | None = None,
+    screenshots: list[bytes] | None = None,
+    parameters: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    """Handler for local Llama 3.2 model running on Ollama"""
+    async with aiohttp.ClientSession() as session:
+        payload = {
+            "model": "llama3",  # Using llama3 model name
+            "messages": [{"role": "user", "content": prompt}],
+            "stream": False
+        }
+        
+        if screenshots:
+            # Convert screenshots to base64 for vision tasks
+            payload["images"] = [base64.b64encode(img).decode('utf-8') for img in screenshots]
+            
+        async with session.post("http://192.168.1.65:11434/api/chat", json=payload) as response:
+            result = await response.json()
+            return {
+                "choices": [{
+                    "message": {
+                        "content": result["message"]["content"]
+                    }
+                }]
+            }
\ No newline at end of file
diff --git a/skyvern/forge/sdk/api/llm/utils.py b/skyvern/forge/sdk/api/llm/utils.py
index 1f7ba93d4..8bfcae6a3 100644
--- a/skyvern/forge/sdk/api/llm/utils.py
+++ b/skyvern/forge/sdk/api/llm/utils.py
@@ -16,60 +16,91 @@ async def llm_messages_builder(
     prompt: str,
     screenshots: list[bytes] | None = None,
     add_assistant_prefix: bool = False,
+    is_llama: bool = False,
 ) -> list[dict[str, Any]]:
-    messages: list[dict[str, Any]] = [
-        {
-            "type": "text",
-            "text": prompt,
+    if is_llama:
+        system_message = {
+            "role": "system",
+            "content": "You are a helpful assistant. You keep to the strict formatting rules. You are loved. You are appreciated. You are a good assistant."
         }
-    ]
-
-    if screenshots:
-        for screenshot in screenshots:
-            encoded_image = base64.b64encode(screenshot).decode("utf-8")
-            messages.append(
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": f"data:image/png;base64,{encoded_image}",
-                    },
-                }
-            )
-    # Anthropic models seems to struggle to always output a valid json object so we need to prefill the response to force it:
-    if add_assistant_prefix:
+        
+        # Build content array with images first
+        content = []
+        if screenshots:
+            for screenshot in screenshots:
+                encoded_image = base64.b64encode(screenshot).decode("utf-8")
+                # Use ollama's native format without data URI prefix
+                content.append({
+                    "type": "image",
+                    "data": encoded_image,
+                    "format": "png"
+                })
+        
+        # Add text prompt last
+        content.append({
+            "type": "text", 
+            "text": f"{prompt} OUTPUT JSON ONLY."
+        })
+        
         return [
-            {"role": "user", "content": messages},
-            {"role": "assistant", "content": "{"},
+            system_message,
+            {
+                "role": "user",
+                "content": content
+            }
         ]
-    return [{"role": "user", "content": messages}]
-
-
-def parse_api_response(response: litellm.ModelResponse, add_assistant_prefix: bool = False) -> dict[str, Any]:
+    else:
+        # Original format for other models (unchanged)
+        messages: list[dict[str, Any]] = [
+            {
+                "role": "system",
+                "content": "You are a helpful assistant."
+            },
+            {
+                "role": "user",
+                "content": prompt
+            }
+        ]
+        
+        if screenshots:
+            for screenshot in screenshots:
+                encoded_image = base64.b64encode(screenshot).decode("utf-8")
+                messages.append({
+                    "role": "user",
+                    "content": {
+                        "type": "image",
+                        "image_url": f"data:image/png;base64,{encoded_image}"
+                    }
+                })
+        
+        return messages
+    
+def parse_api_response(response: litellm.ModelResponse, add_assistant_prefix: bool = False, is_llama: bool = False) -> dict[str, Any]:
+    """Parse the response from the LLM API into a dictionary.
+    
+    Args:
+        response: The response from the LLM API
+        add_assistant_prefix: Whether to add a prefix to the response
+        is_llama: Whether the response is from a Llama/Ollama model
+    
+    Returns:
+        The parsed response as a dictionary
+    """
     content = None
     try:
         content = response.choices[0].message.content
-        # Since we prefilled Anthropic response with "{" we need to add it back to the response to have a valid json object:
         if add_assistant_prefix:
             content = "{" + content
-        content = try_to_extract_json_from_markdown_format(content)
-        if not content:
-            raise EmptyLLMResponseError(str(response))
-        return commentjson.loads(content)
-    except Exception as e:
-        if content:
-            LOG.warning(
-                "Failed to parse LLM response. Will retry auto-fixing the response for unescaped quotes.",
-                exc_info=True,
-                content=content,
-            )
-            try:
-                return fix_and_parse_json_string(content)
-            except Exception as e2:
-                LOG.exception("Failed to auto-fix LLM response.", error=str(e2))
-                raise InvalidLLMResponseFormat(str(response)) from e2
 
-        raise InvalidLLMResponseFormat(str(response)) from e
+        if is_llama:
+            content = try_to_extract_json_from_markdown_format_llama(content)
 
+        return commentjson.loads(content)
+          
+                
+    except Exception as e:
+        LOG.error("Failed to parse LLM response.", content=content)
+        raise InvalidLLMResponseFormat(content) from e
 
 def fix_cutoff_json(json_string: str, error_position: int) -> dict[str, Any]:
     """
@@ -99,7 +130,6 @@ def fix_cutoff_json(json_string: str, error_position: int) -> dict[str, Any]:
     except Exception as e:
         raise InvalidLLMResponseFormat(json_string) from e
 
-
 def fix_unescaped_quotes_in_json(json_string: str) -> str:
     """
     Extracts the positions of quotation marks that define the JSON structure
@@ -154,7 +184,6 @@ def fix_unescaped_quotes_in_json(json_string: str) -> str:
 
     return json_string
 
-
 def fix_and_parse_json_string(json_string: str) -> dict[str, Any]:
     """
     Auto-fixes a JSON string by escaping unescaped quotes and ignoring the last action if the JSON is cutoff.
@@ -182,7 +211,6 @@ def fix_and_parse_json_string(json_string: str) -> dict[str, Any]:
             # Try to fix the cutoff JSON string and see if it can be parsed
             return fix_cutoff_json(json_string, error_position)
 
-
 def try_to_extract_json_from_markdown_format(text: str) -> str:
     pattern = r"```json\s*(.*?)\s*```"
     match = re.search(pattern, text, re.DOTALL)
@@ -190,3 +218,57 @@ def try_to_extract_json_from_markdown_format(text: str) -> str:
         return match.group(1)
     else:
         return text
+       
+def try_to_extract_json_from_markdown_format_llama(text: str) -> str:
+    try:
+        json.loads(text)
+        return text  # Return original if valid JSON
+    except json.JSONDecodeError:
+        pass  # Continue with fixes if invalid
+    
+    """Extract and fix JSON content from markdown code blocks."""
+    # First try to extract from ```json blocks
+    json_pattern = r"```(?:json)?\s*([\s\S]*?)\s*```"
+    match = re.search(json_pattern, text, re.MULTILINE)
+    if match:
+        json_str = match.group(1).strip()
+    else:
+        # If no code blocks found, use the text as-is
+        json_str = text.strip()
+    
+    # Fix specific JSON formatting issues
+    json_str = re.sub(r'\}\}(\s*\])', '}]}', json_str)  # Fix double closing brace before array end
+    json_str = re.sub(r'\}\}\s*$', '}]}', json_str)     # Fix double closing brace at end
+    
+    # Balance brackets if still needed
+    open_curly = json_str.count('{')
+    close_curly = json_str.count('}')
+    open_square = json_str.count('[')
+    close_square = json_str.count(']')
+    
+    if open_curly > close_curly:
+        json_str += '}' * (open_curly - close_curly)
+    if open_square > close_square:
+        json_str += ']' * (open_square - close_square)
+    
+    # Validate JSON structure
+    try:
+        json.loads(json_str)
+        return json_str
+    except json.JSONDecodeError:
+        # If still invalid, try more aggressive fixes
+        json_str = re.sub(r'\}\s*\}\s*\]', '}]}', json_str)
+        return json_str
+
+def extract_json_from_response(response: str) -> dict:
+    """Extract JSON object from response text that may contain comments/explanations."""
+    # Find content between first { and last }
+    json_match = re.search(r'(\{.*\})', response, re.DOTALL)
+    if not json_match:
+        raise ValueError("No JSON object found in response")
+    
+    json_str = json_match.group(1)
+    try:
+        return json.loads(json_str)
+    except json.JSONDecodeError as e:
+        raise ValueError(f"Invalid JSON structure: {e}")
\ No newline at end of file
diff --git a/skyvern/forge/sdk/settings_manager.py b/skyvern/forge/sdk/settings_manager.py
index cf3c3cf34..e9bb5d743 100644
--- a/skyvern/forge/sdk/settings_manager.py
+++ b/skyvern/forge/sdk/settings_manager.py
@@ -1,13 +1,54 @@
 from skyvern.config import Settings
 from skyvern.config import settings as base_settings
+from pydantic import Field  # Import Field from pydantic
+from pydantic_settings import BaseSettings  # Import BaseSettings from pydantic_settings
 
 
-class SettingsManager:
-    __instance: Settings = base_settings
+class Settings(BaseSettings):
+    # Base configuration
+    ENV: str = Field(default="local")
+    
+    # Llama Configuration
+    ENABLE_LLAMA: bool = Field(default=False, env="ENABLE_LLAMA")
+    LLAMA_API_BASE: str = Field(default="http://192.168.1.65:11434", env="LLAMA_API_BASE")
+    LLAMA_MODEL_NAME: str = Field(default="llama3.2-vision", env="LLAMA_MODEL_NAME")
+    LLAMA_API_ROUTE: str = Field(default="/api/chat", env="LLAMA_API_ROUTE")
+    
+    # Disable other providers
+    ENABLE_OPENAI: bool = Field(default=False, env="ENABLE_OPENAI")
+    ENABLE_ANTHROPIC: bool = Field(default=False, env="ENABLE_ANTHROPIC")  
+    ENABLE_AZURE: bool = Field(default=False, env="ENABLE_AZURE")
+    ENABLE_AZURE_GPT4O_MINI: bool = Field(default=False, env="ENABLE_AZURE_GPT4O_MINI")
+    ENABLE_BEDROCK: bool = Field(default=False, env="ENABLE_BEDROCK")
 
-    @staticmethod
-    def get_settings() -> Settings:
-        return SettingsManager.__instance
+    # LLM Configuration
+    LLM_KEY: str = Field(default="LLAMA3")
+    LLM_CONFIG_TIMEOUT: int = Field(default=300)
+    LLM_CONFIG_MAX_TOKENS: int = Field(default=16384)
+    LLM_CONFIG_TEMPERATURE: float = Field(default=0)
+
+    class Config:
+        env_file = ".env"
+        env_file_encoding = "utf-8"
+
+
+class SettingsManager:
+    _instance = None
+    
+    @staticmethod 
+    def get_settings():
+        if SettingsManager._instance is None:
+            print("\n=== Initializing Settings ===")
+            from skyvern.config import Settings
+            SettingsManager._instance = Settings(_env_file=".env")
+            print("Settings values:", {
+                "ENABLE_LLAMA": SettingsManager._instance.ENABLE_LLAMA,
+                "LLM_KEY": SettingsManager._instance.LLM_KEY,
+                "LLAMA_API_BASE": SettingsManager._instance.LLAMA_API_BASE,
+                "LLAMA_MODEL_NAME": SettingsManager._instance.LLAMA_MODEL_NAME,
+                "env_file": ".env"
+            })
+        return SettingsManager._instance
 
     @staticmethod
     def set_settings(settings: Settings) -> None: