diff --git a/.github/workflows/update.yml b/.github/workflows/update.yml index 892aacf2..21d9d5d2 100644 --- a/.github/workflows/update.yml +++ b/.github/workflows/update.yml @@ -49,6 +49,10 @@ jobs: run: go run ./cmd/ionet/main.go continue-on-error: true + - name: NEAR AI + run: go run ./cmd/nearai/main.go + continue-on-error: true + - name: OpenCode Go run: go run ./cmd/opencode-go/main.go continue-on-error: true diff --git a/Taskfile.yaml b/Taskfile.yaml index 07ba2dd9..1afb4184 100644 --- a/Taskfile.yaml +++ b/Taskfile.yaml @@ -64,6 +64,7 @@ tasks: - task: gen:cortecs - task: gen:huggingface - task: gen:ionet + - task: gen:nearai - task: gen:nebius - task: gen:neuralwatt - task: gen:opencode-go @@ -109,6 +110,11 @@ tasks: cmds: - go run cmd/ionet/main.go + gen:nearai: + desc: Generate NEAR AI Cloud provider configurations + cmds: + - go run cmd/nearai/main.go + gen:nebius: desc: Generate Nebius provider configurations cmds: diff --git a/cmd/nearai/main.go b/cmd/nearai/main.go new file mode 100644 index 00000000..c2130ec1 --- /dev/null +++ b/cmd/nearai/main.go @@ -0,0 +1,243 @@ +// Package main provides a command-line tool to fetch models from NEAR AI Cloud +// and generate a configuration file for the provider. +package main + +import ( + "context" + "encoding/json" + "fmt" + "io" + "log" + "math" + "net/http" + "os" + "slices" + "strings" + "time" + + "charm.land/catwalk/pkg/catwalk" +) + +type ModelsResponse struct { + Models []NearAIModel `json:"models"` +} + +type NearAIModel struct { + ModelID string `json:"modelId"` + InputCostPerToken PricingValue `json:"inputCostPerToken"` + OutputCostPerToken PricingValue `json:"outputCostPerToken"` + CacheReadCostPerToken PricingValue `json:"cacheReadCostPerToken"` + Metadata Metadata `json:"metadata"` +} + +type PricingValue struct { + Amount int64 `json:"amount"` + Scale int64 `json:"scale"` + Currency string `json:"currency"` +} + +type Metadata struct { + ContextLength int64 `json:"contextLength"` + ModelDisplayName string `json:"modelDisplayName"` + Verifiable bool `json:"verifiable"` + AttestationSupported bool `json:"attestationSupported"` + Architecture Architecture `json:"architecture"` +} + +type Architecture struct { + InputModalities []string `json:"inputModalities"` + OutputModalities []string `json:"outputModalities"` +} + +func fetchNearAIModels(apiEndpoint string) (*ModelsResponse, error) { + client := &http.Client{Timeout: 30 * time.Second} + req, _ := http.NewRequestWithContext(context.Background(), "GET", apiEndpoint+"/model/list", nil) + req.Header.Set("User-Agent", "Crush-Client/1.0") + + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("fetching models: %w", err) + } + defer resp.Body.Close() //nolint:errcheck + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("status %d: %s", resp.StatusCode, body) + } + + var mr ModelsResponse + if err := json.NewDecoder(resp.Body).Decode(&mr); err != nil { + return nil, err //nolint:wrapcheck + } + return &mr, nil +} + +func contains(values []string, want string) bool { + return slices.ContainsFunc(values, func(value string) bool { + return strings.EqualFold(value, want) + }) +} + +func isChatModel(model NearAIModel) bool { + id := strings.ToLower(model.ModelID) + if strings.Contains(id, "privacy-filter") || strings.Contains(id, "reranker") { + return false + } + + if model.Metadata.ContextLength <= 0 { + return false + } + + input := model.Metadata.Architecture.InputModalities + output := model.Metadata.Architecture.OutputModalities + if contains(input, "audio") { + return false + } + if contains(output, "embedding") || contains(output, "image") { + return false + } + if len(output) > 0 && !contains(output, "text") { + return false + } + return true +} + +func costPer1M(cost PricingValue) float64 { + if cost.Currency != "" && cost.Currency != "USD" { + return 0 + } + v := float64(cost.Amount) * math.Pow10(6-int(cost.Scale)) + return math.Round(v*1e5) / 1e5 +} + +func displayName(model NearAIModel) string { + if model.Metadata.ModelDisplayName != "" { + return model.Metadata.ModelDisplayName + } + if _, name, found := strings.Cut(model.ModelID, "/"); found { + return strings.ReplaceAll(name, "-", " ") + } + return strings.ReplaceAll(model.ModelID, "-", " ") +} + +func defaultMaxTokens(contextWindow int64) int64 { + if contextWindow < 10 { + return contextWindow + } + return contextWindow / 10 +} + +func bestLargeModelID(models []catwalk.Model) string { + var best *catwalk.Model + for i := range models { + m := &models[i] + + if best == nil { + best = m + continue + } + mCost := m.CostPer1MIn + m.CostPer1MOut + bestCost := best.CostPer1MIn + best.CostPer1MOut + if mCost > bestCost { + best = m + continue + } + if mCost == bestCost && m.ContextWindow > best.ContextWindow { + best = m + } + } + if best == nil { + return "" + } + return best.ID +} + +func bestSmallModelID(models []catwalk.Model) string { + var best *catwalk.Model + for i := range models { + m := &models[i] + + if best == nil { + best = m + continue + } + mCost := m.CostPer1MIn + m.CostPer1MOut + bestCost := best.CostPer1MIn + best.CostPer1MOut + if mCost < bestCost { + best = m + continue + } + if mCost == bestCost && m.ContextWindow < best.ContextWindow { + best = m + } + } + if best == nil { + return "" + } + return best.ID +} + +func main() { + nearAIProvider := catwalk.Provider{ + Name: "NEAR AI Cloud", + ID: catwalk.InferenceProviderNEARAI, + APIKey: "$NEARAI_API_KEY", + APIEndpoint: "https://cloud-api.near.ai/v1", + Type: catwalk.TypeOpenAICompat, + Models: []catwalk.Model{}, + } + + modelsResp, err := fetchNearAIModels(nearAIProvider.APIEndpoint) + if err != nil { + log.Fatal("Error fetching NEAR AI Cloud models:", err) + } + + var verifiableModels []catwalk.Model + for _, model := range modelsResp.Models { + if !isChatModel(model) { + continue + } + + m := catwalk.Model{ + ID: model.ModelID, + Name: displayName(model), + CostPer1MIn: costPer1M(model.InputCostPerToken), + CostPer1MOut: costPer1M(model.OutputCostPerToken), + CostPer1MInCached: costPer1M(model.CacheReadCostPerToken), + CostPer1MOutCached: 0, + ContextWindow: model.Metadata.ContextLength, + DefaultMaxTokens: defaultMaxTokens(model.Metadata.ContextLength), + CanReason: false, + SupportsImages: contains(model.Metadata.Architecture.InputModalities, "image"), + } + + nearAIProvider.Models = append(nearAIProvider.Models, m) + if model.Metadata.Verifiable && model.Metadata.AttestationSupported { + verifiableModels = append(verifiableModels, m) + } + fmt.Printf("Added model %s with context window %d\n", model.ModelID, model.Metadata.ContextLength) + } + + defaultCandidates := nearAIProvider.Models + if len(verifiableModels) > 0 { + defaultCandidates = verifiableModels + } + nearAIProvider.DefaultLargeModelID = bestLargeModelID(defaultCandidates) + nearAIProvider.DefaultSmallModelID = bestSmallModelID(defaultCandidates) + + slices.SortFunc(nearAIProvider.Models, func(a catwalk.Model, b catwalk.Model) int { + return strings.Compare(a.Name, b.Name) + }) + + data, err := json.MarshalIndent(nearAIProvider, "", " ") + if err != nil { + log.Fatal("Error marshaling NEAR AI Cloud provider:", err) + } + data = append(data, '\n') + + if err := os.WriteFile("internal/providers/configs/nearai.json", data, 0o600); err != nil { + log.Fatal("Error writing NEAR AI Cloud provider config:", err) + } + + fmt.Printf("Generated nearai.json with %d models\n", len(nearAIProvider.Models)) +} diff --git a/internal/providers/configs/nearai.json b/internal/providers/configs/nearai.json new file mode 100644 index 00000000..d4d6c73e --- /dev/null +++ b/internal/providers/configs/nearai.json @@ -0,0 +1,407 @@ +{ + "name": "NEAR AI Cloud", + "id": "nearai", + "api_key": "$NEARAI_API_KEY", + "api_endpoint": "https://cloud-api.near.ai/v1", + "type": "openai-compat", + "default_large_model_id": "zai-org/GLM-5.1-FP8", + "default_small_model_id": "google/gemma-4-31B-it", + "models": [ + { + "id": "anthropic/claude-haiku-4-5", + "name": "Claude Haiku 4.5", + "cost_per_1m_in": 1, + "cost_per_1m_out": 5, + "cost_per_1m_in_cached": 0.1, + "cost_per_1m_out_cached": 0, + "context_window": 200000, + "default_max_tokens": 20000, + "can_reason": false, + "supports_attachments": true + }, + { + "id": "anthropic/claude-opus-4-6", + "name": "Claude Opus 4.6", + "cost_per_1m_in": 5, + "cost_per_1m_out": 25, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 200000, + "default_max_tokens": 20000, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "anthropic/claude-opus-4-7", + "name": "Claude Opus 4.7", + "cost_per_1m_in": 5, + "cost_per_1m_out": 25, + "cost_per_1m_in_cached": 0.5, + "cost_per_1m_out_cached": 0, + "context_window": 1000000, + "default_max_tokens": 100000, + "can_reason": false, + "supports_attachments": true + }, + { + "id": "anthropic/claude-sonnet-4-5", + "name": "Claude Sonnet 4.5", + "cost_per_1m_in": 3, + "cost_per_1m_out": 15.5, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 200000, + "default_max_tokens": 20000, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "anthropic/claude-sonnet-4-6", + "name": "Claude Sonnet 4.6", + "cost_per_1m_in": 3, + "cost_per_1m_out": 15, + "cost_per_1m_in_cached": 0.3, + "cost_per_1m_out_cached": 0, + "context_window": 1000000, + "default_max_tokens": 100000, + "can_reason": false, + "supports_attachments": true + }, + { + "id": "zai-org/GLM-5.1-FP8", + "name": "GLM 5.1", + "cost_per_1m_in": 0.85, + "cost_per_1m_out": 3.3, + "cost_per_1m_in_cached": 0.17, + "cost_per_1m_out_cached": 0, + "context_window": 202752, + "default_max_tokens": 20275, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "openai/gpt-oss-120b", + "name": "GPT OSS 120B", + "cost_per_1m_in": 0.15, + "cost_per_1m_out": 0.55, + "cost_per_1m_in_cached": 0.03, + "cost_per_1m_out_cached": 0, + "context_window": 131000, + "default_max_tokens": 13100, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "openai/gpt-5-mini", + "name": "GPT-5 Mini", + "cost_per_1m_in": 0.25, + "cost_per_1m_out": 2, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 400000, + "default_max_tokens": 40000, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "openai/gpt-5-nano", + "name": "GPT-5 Nano", + "cost_per_1m_in": 0.05, + "cost_per_1m_out": 0.4, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 400000, + "default_max_tokens": 40000, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "openai/gpt-5.1", + "name": "GPT-5.1", + "cost_per_1m_in": 1.25, + "cost_per_1m_out": 10, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 400000, + "default_max_tokens": 40000, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "openai/gpt-5.4", + "name": "GPT-5.4", + "cost_per_1m_in": 2.5, + "cost_per_1m_out": 15, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 1050000, + "default_max_tokens": 105000, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "openai/gpt-5.4-mini", + "name": "GPT-5.4 Mini", + "cost_per_1m_in": 0.75, + "cost_per_1m_out": 4.5, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 400000, + "default_max_tokens": 40000, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "openai/gpt-5.4-nano", + "name": "GPT-5.4 Nano", + "cost_per_1m_in": 0.2, + "cost_per_1m_out": 1.25, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 400000, + "default_max_tokens": 40000, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "openai/gpt-5.5", + "name": "GPT-5.5", + "cost_per_1m_in": 5, + "cost_per_1m_out": 30, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 1050000, + "default_max_tokens": 105000, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "google/gemini-2.5-flash", + "name": "Gemini 2.5 Flash", + "cost_per_1m_in": 0.3, + "cost_per_1m_out": 2.5, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 1000000, + "default_max_tokens": 100000, + "can_reason": false, + "supports_attachments": true + }, + { + "id": "google/gemini-2.5-flash-lite", + "name": "Gemini 2.5 Flash Lite", + "cost_per_1m_in": 0.1, + "cost_per_1m_out": 0.4, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 1048576, + "default_max_tokens": 104857, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "google/gemini-2.5-pro", + "name": "Gemini 2.5 Pro", + "cost_per_1m_in": 1.25, + "cost_per_1m_out": 10, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 1000000, + "default_max_tokens": 100000, + "can_reason": false, + "supports_attachments": true + }, + { + "id": "google/gemini-3-pro", + "name": "Gemini 3 Pro Preview", + "cost_per_1m_in": 1.25, + "cost_per_1m_out": 15, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 1000000, + "default_max_tokens": 100000, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "google/gemini-3.1-flash-lite", + "name": "Gemini 3.1 Flash Lite", + "cost_per_1m_in": 0.25, + "cost_per_1m_out": 1.5, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 1048576, + "default_max_tokens": 104857, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "google/gemini-3.5-flash", + "name": "Gemini 3.5 Flash", + "cost_per_1m_in": 1.5, + "cost_per_1m_out": 9, + "cost_per_1m_in_cached": 0.15, + "cost_per_1m_out_cached": 0, + "context_window": 1000000, + "default_max_tokens": 100000, + "can_reason": false, + "supports_attachments": true + }, + { + "id": "google/gemma-4-31B-it", + "name": "Gemma 4 31B Instruct", + "cost_per_1m_in": 0.13, + "cost_per_1m_out": 0.4, + "cost_per_1m_in_cached": 0.026, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 26214, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "openai/gpt-4.1", + "name": "OpenAI GPT-4.1", + "cost_per_1m_in": 2, + "cost_per_1m_out": 8, + "cost_per_1m_in_cached": 0.5, + "cost_per_1m_out_cached": 0, + "context_window": 1000000, + "default_max_tokens": 100000, + "can_reason": false, + "supports_attachments": true + }, + { + "id": "openai/gpt-4.1-mini", + "name": "OpenAI GPT-4.1 Mini", + "cost_per_1m_in": 0.4, + "cost_per_1m_out": 1.6, + "cost_per_1m_in_cached": 0.1, + "cost_per_1m_out_cached": 0, + "context_window": 1000000, + "default_max_tokens": 100000, + "can_reason": false, + "supports_attachments": true + }, + { + "id": "openai/gpt-4.1-nano", + "name": "OpenAI GPT-4.1 Nano", + "cost_per_1m_in": 0.1, + "cost_per_1m_out": 0.4, + "cost_per_1m_in_cached": 0.025, + "cost_per_1m_out_cached": 0, + "context_window": 1000000, + "default_max_tokens": 100000, + "can_reason": false, + "supports_attachments": true + }, + { + "id": "openai/gpt-5", + "name": "OpenAI GPT-5", + "cost_per_1m_in": 1.25, + "cost_per_1m_out": 10, + "cost_per_1m_in_cached": 0.125, + "cost_per_1m_out_cached": 0, + "context_window": 400000, + "default_max_tokens": 40000, + "can_reason": false, + "supports_attachments": true + }, + { + "id": "openai/gpt-5.2", + "name": "OpenAI GPT-5.2", + "cost_per_1m_in": 1.8, + "cost_per_1m_out": 15.5, + "cost_per_1m_in_cached": 0.18, + "cost_per_1m_out_cached": 0, + "context_window": 400000, + "default_max_tokens": 40000, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "openai/o3", + "name": "OpenAI o3", + "cost_per_1m_in": 2, + "cost_per_1m_out": 8, + "cost_per_1m_in_cached": 1, + "cost_per_1m_out_cached": 0, + "context_window": 200000, + "default_max_tokens": 20000, + "can_reason": false, + "supports_attachments": true + }, + { + "id": "openai/o4-mini", + "name": "OpenAI o4 Mini", + "cost_per_1m_in": 1.1, + "cost_per_1m_out": 4.4, + "cost_per_1m_in_cached": 0.55, + "cost_per_1m_out_cached": 0, + "context_window": 200000, + "default_max_tokens": 20000, + "can_reason": false, + "supports_attachments": true + }, + { + "id": "Qwen/Qwen3.6-35B-A3B-FP8", + "name": "Qwen 3.6 35B A3B FP8", + "cost_per_1m_in": 0.17, + "cost_per_1m_out": 1.1, + "cost_per_1m_in_cached": 0.056, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 26214, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "name": "Qwen3 30B A3B Instruct 2507", + "cost_per_1m_in": 0.15, + "cost_per_1m_out": 0.55, + "cost_per_1m_in_cached": 0.03, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 26214, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "Qwen/Qwen3-VL-30B-A3B-Instruct", + "name": "Qwen3-VL-30B-A3B-Instruct", + "cost_per_1m_in": 0.15, + "cost_per_1m_out": 0.55, + "cost_per_1m_in_cached": 0.03, + "cost_per_1m_out_cached": 0, + "context_window": 256000, + "default_max_tokens": 25600, + "can_reason": false, + "supports_attachments": true + }, + { + "id": "Qwen/Qwen3.5-122B-A10B", + "name": "Qwen3.5 122B A10B", + "cost_per_1m_in": 0.4, + "cost_per_1m_out": 3.2, + "cost_per_1m_in_cached": 0.08, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "openai/o3-mini", + "name": "o3 Mini", + "cost_per_1m_in": 1.1, + "cost_per_1m_out": 4.4, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 200000, + "default_max_tokens": 20000, + "can_reason": false, + "supports_attachments": false + } + ] +} diff --git a/internal/providers/providers.go b/internal/providers/providers.go index f73d9717..c05a128c 100644 --- a/internal/providers/providers.go +++ b/internal/providers/providers.go @@ -66,6 +66,9 @@ var miniMaxConfig []byte //go:embed configs/minimax-china.json var miniMaxChinaConfig []byte +//go:embed configs/nearai.json +var nearAIConfig []byte + //go:embed configs/nebius.json var nebiusConfig []byte @@ -141,6 +144,7 @@ var providerRegistry = []ProviderFunc{ groqProvider, huggingFaceProvider, ioNetProvider, + nearAIProvider, nebiusProvider, neuralwattProvider, openCodeGoProvider, @@ -248,6 +252,10 @@ func miniMaxChinaProvider() catwalk.Provider { return loadProviderFromConfig(miniMaxChinaConfig) } +func nearAIProvider() catwalk.Provider { + return loadProviderFromConfig(nearAIConfig) +} + func nebiusProvider() catwalk.Provider { return loadProviderFromConfig(nebiusConfig) } diff --git a/pkg/catwalk/provider.go b/pkg/catwalk/provider.go index 55835825..ce8281fb 100644 --- a/pkg/catwalk/provider.go +++ b/pkg/catwalk/provider.go @@ -50,6 +50,7 @@ const ( InferenceProviderIoNet InferenceProvider = "ionet" InferenceProviderQiniuCloud InferenceProvider = "qiniucloud" InferenceProviderAvian InferenceProvider = "avian" + InferenceProviderNEARAI InferenceProvider = "nearai" InferenceProviderNebius InferenceProvider = "nebius" InferenceProviderNeuralwatt InferenceProvider = "neuralwatt" InferenceProviderOpenCodeZen InferenceProvider = "opencode-zen" @@ -127,6 +128,7 @@ func KnownProviders() []InferenceProvider { InferenceProviderMiniMaxChina, InferenceProviderQiniuCloud, InferenceProviderAvian, + InferenceProviderNEARAI, InferenceProviderNebius, InferenceProviderNeuralwatt, InferenceProviderOpenCodeZen,