webclaw/setup.sh at main · 0xMassi/webclaw · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
#!/usr/bin/env bash
# setup.sh — Local setup for webclaw
#
# Checks prerequisites, builds binaries, configures .env,
# optionally installs Ollama, and wires up the MCP server.
#
# Usage:
#   ./setup.sh              # Interactive full setup
#   ./setup.sh --minimal    # Build only, skip configuration
#   ./setup.sh --check      # Check prerequisites without installing

set -euo pipefail

# ---------------------------------------------------------------------------
# Colors
# ---------------------------------------------------------------------------
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
BOLD='\033[1m'
DIM='\033[2m'
RESET='\033[0m'

info()    { printf "${BLUE}[*]${RESET} %s\n" "$*"; }
success() { printf "${GREEN}[+]${RESET} %s\n" "$*"; }
warn()    { printf "${YELLOW}[!]${RESET} %s\n" "$*"; }
error()   { printf "${RED}[x]${RESET} %s\n" "$*" >&2; }

prompt() {
    local var_name="$1" prompt_text="$2" default="${3:-}"
    if [[ -n "$default" ]]; then
        printf "${CYAN}    %s${DIM} [%s]${RESET}: " "$prompt_text" "$default"
    else
        printf "${CYAN}    %s${RESET}: " "$prompt_text"
    fi
    read -r input
    printf -v "$var_name" '%s' "${input:-$default}"
}

prompt_secret() {
    local var_name="$1" prompt_text="$2" default="${3:-}"
    if [[ -n "$default" ]]; then
        printf "${CYAN}    %s${DIM} [%s]${RESET}: " "$prompt_text" "$default"
    else
        printf "${CYAN}    %s${RESET}: " "$prompt_text"
    fi
    read -rs input
    echo
    printf -v "$var_name" '%s' "${input:-$default}"
}

prompt_yn() {
    local prompt_text="$1" default="${2:-y}"
    local hint="Y/n"
    [[ "$default" == "n" ]] && hint="y/N"
    printf "${CYAN}    %s${DIM} [%s]${RESET}: " "$prompt_text" "$hint"
    read -r input
    input="${input:-$default}"
    [[ "$input" =~ ^[Yy]$ ]]
}

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

# ---------------------------------------------------------------------------
# Step 1: Check prerequisites
# ---------------------------------------------------------------------------
check_prerequisites() {
    echo
    printf "${BOLD}${GREEN}  Step 1: Prerequisites${RESET}\n"
    echo

    local all_good=true

    # Rust
    if command -v rustc &>/dev/null; then
        local rust_version
        rust_version=$(rustc --version | awk '{print $2}')
        success "Rust $rust_version"

        # Check minimum version (1.85 for edition 2024)
        local major minor
        major=$(echo "$rust_version" | cut -d. -f1)
        minor=$(echo "$rust_version" | cut -d. -f2)
        if [[ "$major" -lt 1 ]] || [[ "$major" -eq 1 && "$minor" -lt 85 ]]; then
            warn "Rust 1.85+ required (edition 2024). Run: rustup update"
            all_good=false
        fi
    else
        error "Rust not found. Install: https://rustup.rs"
        all_good=false
    fi

    # cargo
    if command -v cargo &>/dev/null; then
        success "Cargo $(cargo --version | awk '{print $2}')"
    else
        error "Cargo not found (should come with Rust)"
        all_good=false
    fi

    # Ollama (optional)
    if command -v ollama &>/dev/null; then
        success "Ollama installed"
        if curl -sf http://localhost:11434/api/tags &>/dev/null; then
            success "Ollama is running"
            local models
            models=$(curl -sf http://localhost:11434/api/tags | python3 -c "import sys,json; [print(m['name']) for m in json.load(sys.stdin).get('models',[])]" 2>/dev/null || echo "")
            if [[ -n "$models" ]]; then
                success "Models: $(echo "$models" | tr '\n' ', ' | sed 's/,$//')"
            else
                warn "No models pulled yet"
            fi
        else
            warn "Ollama installed but not running. Start with: ollama serve"
        fi
    else
        warn "Ollama not found (optional — needed for local LLM features)"
    fi

    # Git
    if command -v git &>/dev/null; then
        success "Git $(git --version | awk '{print $3}')"
    else
        error "Git not found"
        all_good=false
    fi

    echo
    if $all_good; then
        success "All prerequisites met."
    else
        error "Some prerequisites are missing. Fix them before continuing."
        [[ "${1:-}" == "--check" ]] && exit 1
    fi
}

# ---------------------------------------------------------------------------
# Step 2: Build
# ---------------------------------------------------------------------------
build_binaries() {
    echo
    printf "${BOLD}${GREEN}  Step 2: Build${RESET}\n"
    echo

    info "Building release binaries (this may take a few minutes on first build)..."
    cd "$SCRIPT_DIR"

    if cargo build --release 2>&1 | tail -5; then
        echo
        success "Built 3 binaries:"
        ls -lh target/release/webclaw target/release/webclaw-server target/release/webclaw-mcp 2>/dev/null | \
            awk '{printf "    %-20s %s\n", $NF, $5}'
    else
        error "Build failed. Check the output above."
        exit 1
    fi
}

# ---------------------------------------------------------------------------
# Step 3: Configure .env
# ---------------------------------------------------------------------------
configure_env() {
    echo
    printf "${BOLD}${GREEN}  Step 3: Configuration${RESET}\n"
    echo

    if [[ -f "$SCRIPT_DIR/.env" ]]; then
        warn ".env already exists."
        if ! prompt_yn "Overwrite?"; then
            info "Keeping existing .env"
            return
        fi
    fi

    local ollama_model="qwen3:8b"
    local openai_key=""
    local anthropic_key=""
    local proxy_file=""
    local server_port="3000"
    local auth_key=""

    info "LLM configuration"
    prompt ollama_model "Ollama model (local)" "qwen3:8b"
    prompt_secret openai_key "OpenAI API key (optional, press enter to skip)" ""
    prompt_secret anthropic_key "Anthropic API key (optional, press enter to skip)" ""

    echo
    info "Proxy configuration"
    if [[ -f "$SCRIPT_DIR/proxies.txt" ]]; then
        local proxy_count
        proxy_count=$(grep -cv '^\s*#\|^\s*$' "$SCRIPT_DIR/proxies.txt" 2>/dev/null || echo "0")
        success "proxies.txt found with $proxy_count proxies (auto-loaded)"
    else
        info "To use proxies, create proxies.txt with one proxy per line:"
        printf "    ${DIM}Format: host:port:user:pass${RESET}\n"
        printf "    ${DIM}cp proxies.example.txt proxies.txt${RESET}\n"
    fi
    local proxy_file=""

    echo
    info "Server configuration"
    prompt server_port "REST API port" "3000"
    prompt auth_key "API auth key (press enter to auto-generate)" ""

    if [[ -z "$auth_key" ]]; then
        if command -v openssl &>/dev/null; then
            auth_key=$(openssl rand -hex 16)
        else
            auth_key=$(LC_ALL=C tr -dc 'a-f0-9' < /dev/urandom | head -c 32)
        fi
        info "Generated auth key: $auth_key"
    fi

    # Write .env
    cat > "$SCRIPT_DIR/.env" <<EOF
# webclaw configuration — generated by setup.sh

# --- LLM Providers ---
OLLAMA_HOST=http://localhost:11434
OLLAMA_MODEL=$ollama_model
EOF

    if [[ -n "$openai_key" ]]; then
        echo "OPENAI_API_KEY=$openai_key" >> "$SCRIPT_DIR/.env"
    fi
    if [[ -n "$anthropic_key" ]]; then
        echo "ANTHROPIC_API_KEY=$anthropic_key" >> "$SCRIPT_DIR/.env"
    fi

    cat >> "$SCRIPT_DIR/.env" <<EOF

# --- Proxy ---
EOF
    if [[ -n "$proxy_file" ]]; then
        echo "WEBCLAW_PROXY_FILE=$proxy_file" >> "$SCRIPT_DIR/.env"
    else
        echo "# WEBCLAW_PROXY_FILE=/path/to/proxies.txt" >> "$SCRIPT_DIR/.env"
    fi

    cat >> "$SCRIPT_DIR/.env" <<EOF

# --- Server ---
WEBCLAW_PORT=$server_port
WEBCLAW_HOST=0.0.0.0
WEBCLAW_AUTH_KEY=$auth_key

# --- Logging ---
WEBCLAW_LOG=info
EOF

    echo
    success ".env created."
}

# ---------------------------------------------------------------------------
# Step 4: Install Ollama (optional)
# ---------------------------------------------------------------------------
setup_ollama() {
    echo
    printf "${BOLD}${GREEN}  Step 4: Ollama (Local LLM)${RESET}\n"
    echo

    if ! command -v ollama &>/dev/null; then
        info "Ollama is not installed."
        info "It's optional but needed for local LLM features (extract, summarize)."
        info "Without it, you can still use OpenAI/Anthropic APIs."
        echo
        if prompt_yn "Install Ollama?" "y"; then
            info "Installing Ollama..."
            if [[ "$(uname)" == "Darwin" ]]; then
                if command -v brew &>/dev/null; then
                    brew install ollama
                else
                    warn "Install Ollama manually: https://ollama.ai/download"
                    return
                fi
            else
                curl -fsSL https://ollama.ai/install.sh | sh
            fi
            success "Ollama installed."
        else
            info "Skipping Ollama. You can install later: https://ollama.ai"
            return
        fi
    fi

    # Check if running
    if ! curl -sf http://localhost:11434/api/tags &>/dev/null; then
        warn "Ollama is not running."
        if [[ "$(uname)" == "Darwin" ]]; then
            info "On macOS, open the Ollama app or run: ollama serve"
        else
            info "Start with: ollama serve"
        fi
        echo
        if prompt_yn "Start Ollama now?" "y"; then
            nohup ollama serve &>/dev/null &
            sleep 2
            if curl -sf http://localhost:11434/api/tags &>/dev/null; then
                success "Ollama is running."
            else
                warn "Ollama didn't start. Start it manually and re-run setup."
                return
            fi
        else
            return
        fi
    fi

    # Pull model
    local model
    model=$(grep '^OLLAMA_MODEL=' "$SCRIPT_DIR/.env" 2>/dev/null | cut -d= -f2 || echo "qwen3:8b")

    local has_model
    has_model=$(curl -sf http://localhost:11434/api/tags | python3 -c "import sys,json; models=[m['name'] for m in json.load(sys.stdin).get('models',[])]; print('yes' if any('$model' in m for m in models) else 'no')" 2>/dev/null || echo "no")

    if [[ "$has_model" == "yes" ]]; then
        success "Model $model already available."
    else
        info "Model $model not found locally."
        if prompt_yn "Pull $model now? (this downloads ~5GB)" "y"; then
            ollama pull "$model"
            success "Model $model ready."
        fi
    fi
}

# ---------------------------------------------------------------------------
# Step 5: Configure MCP server for Claude Desktop
# ---------------------------------------------------------------------------
setup_mcp() {
    echo
    printf "${BOLD}${GREEN}  Step 5: MCP Server (Claude Desktop integration)${RESET}\n"
    echo

    local mcp_binary="$SCRIPT_DIR/target/release/webclaw-mcp"
    if [[ ! -f "$mcp_binary" ]]; then
        warn "webclaw-mcp binary not found. Build first."
        return
    fi

    info "The MCP server lets Claude Desktop use webclaw's tools directly."
    info "Tools: scrape, crawl, map, batch, extract, summarize, diff, brand"
    echo

    if ! prompt_yn "Configure MCP server for Claude Desktop?" "y"; then
        info "Skipping MCP setup."
        info "You can configure it later by adding to your Claude Desktop config:"
        printf '    {"mcpServers": {"webclaw": {"command": "%s"}}}\n' "$mcp_binary"
        return
    fi

    # Find Claude Desktop config
    local config_path=""
    if [[ "$(uname)" == "Darwin" ]]; then
        config_path="$HOME/Library/Application Support/Claude/claude_desktop_config.json"
    else
        config_path="$HOME/.config/claude/claude_desktop_config.json"
    fi

    if [[ ! -f "$config_path" ]]; then
        # Create config directory and file
        mkdir -p "$(dirname "$config_path")"
        echo '{}' > "$config_path"
        info "Created Claude Desktop config at: $config_path"
    fi

    # Read existing config and merge
    local existing
    existing=$(cat "$config_path")

    # Check if webclaw is already configured
    if echo "$existing" | python3 -c "import sys,json; c=json.load(sys.stdin); exit(0 if 'webclaw' in c.get('mcpServers',{}) else 1)" 2>/dev/null; then
        warn "webclaw MCP server already configured in Claude Desktop."
        if ! prompt_yn "Update the path?" "y"; then
            return
        fi
    fi

    # Merge webclaw into mcpServers
    local updated
    updated=$(echo "$existing" | python3 -c "
import sys, json
config = json.load(sys.stdin)
if 'mcpServers' not in config:
    config['mcpServers'] = {}
config['mcpServers']['webclaw'] = {
    'command': '$mcp_binary'
}
print(json.dumps(config, indent=2))
")

    echo "$updated" > "$config_path"
    success "MCP server configured in Claude Desktop."
    info "Restart Claude Desktop to activate."
}

# ---------------------------------------------------------------------------
# Step 6: Smoke test
# ---------------------------------------------------------------------------
smoke_test() {
    echo
    printf "${BOLD}${GREEN}  Step 6: Smoke Test${RESET}\n"
    echo

    local webclaw="$SCRIPT_DIR/target/release/webclaw"

    info "Testing extraction..."
    local output
    output=$("$webclaw" https://example.com --format llm 2>/dev/null || echo "FAILED")

    if [[ "$output" == "FAILED" ]]; then
        warn "Extraction test failed. Check your network connection."
    else
        local word_count
        word_count=$(echo "$output" | wc -w | tr -d ' ')
        success "Extracted example.com: $word_count words"
    fi

    # Test Ollama if available
    if curl -sf http://localhost:11434/api/tags &>/dev/null; then
        info "Testing LLM summarization..."
        local summary
        summary=$("$webclaw" https://example.com --summarize 2>/dev/null || echo "FAILED")
        if [[ "$summary" == "FAILED" ]]; then
            warn "LLM test failed. Check Ollama and model availability."
        else
            success "LLM summarization works."
        fi
    fi
}

# ---------------------------------------------------------------------------
# Summary
# ---------------------------------------------------------------------------
print_summary() {
    local webclaw="$SCRIPT_DIR/target/release/webclaw"
    local server="$SCRIPT_DIR/target/release/webclaw-server"
    local mcp="$SCRIPT_DIR/target/release/webclaw-mcp"
    local port
    port=$(grep '^WEBCLAW_PORT=' "$SCRIPT_DIR/.env" 2>/dev/null | cut -d= -f2 || echo "3000")

    echo
    printf "${BOLD}${GREEN}  Setup Complete${RESET}\n"
    echo
    printf "  ${BOLD}CLI:${RESET}\n"
    printf "    %s https://example.com --format llm\n" "$webclaw"
    echo
    printf "  ${BOLD}REST API:${RESET}\n"
    printf "    %s\n" "$server"
    printf "    curl http://localhost:%s/health\n" "$port"
    echo
    printf "  ${BOLD}MCP Server:${RESET}\n"
    printf "    Configured in Claude Desktop (restart to activate)\n"
    echo
    printf "  ${BOLD}Config:${RESET}  %s/.env\n" "$SCRIPT_DIR"
    printf "  ${BOLD}Docs:${RESET}    %s/README.md\n" "$SCRIPT_DIR"
    echo
    printf "  ${DIM}Tip: Add to PATH for convenience:${RESET}\n"
    printf "    export PATH=\"%s/target/release:\$PATH\"\n" "$SCRIPT_DIR"
    echo
}

# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
main() {
    echo
    printf "${BOLD}${GREEN}  webclaw — Local Setup${RESET}\n"
    printf "${DIM}  Web extraction toolkit for AI agents${RESET}\n"
    echo

    local mode="${1:-}"

    if [[ "$mode" == "--check" ]]; then
        check_prerequisites "--check"
        exit 0
    fi

    check_prerequisites

    build_binaries

    if [[ "$mode" == "--minimal" ]]; then
        success "Minimal build complete. Run ./setup.sh for full configuration."
        exit 0
    fi

    configure_env
    setup_ollama
    setup_mcp
    smoke_test
    print_summary
}

main "$@"