File size: 3,247 Bytes
f9c67bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
/**
 * Calculate optimal max_tokens based on provider capabilities and input size
 * 
 * @param selectedProvider - The selected provider object from getBestProvider
 * @param inputTokens - Estimated input tokens (prompt + system message + context)
 * @param isStreaming - Whether this is a streaming request (affects buffer)
 * @returns Optimal max_tokens value
 */
export function calculateMaxTokens(
  selectedProvider: any,
  inputTokens: number = 0,
  isStreaming: boolean = false
): number {
  if (!selectedProvider?.context_length) {
    // Fallback for unknown providers - use conservative default
    return 4096;
  }

  const contextLength = selectedProvider.context_length;
  
  // Reserve buffer for safety and potential tokenization differences
  const safetyBuffer = isStreaming ? 1000 : 500;
  
  // Calculate available tokens for output
  const availableTokens = contextLength - inputTokens - safetyBuffer;
  
  // Define reasonable max output limits based on use case
  const useCase = {
    // For HTML generation, we typically need substantial output
    htmlGeneration: Math.min(32_000, availableTokens),
    // For code editing, moderate output is usually sufficient  
    codeEditing: Math.min(16_000, availableTokens),
    // Conservative fallback
    default: Math.min(8_000, availableTokens)
  };
  
  // Choose based on available tokens and provider capabilities
  let targetTokens: number;
  
  if (availableTokens >= 32_000) {
    targetTokens = useCase.htmlGeneration;
  } else if (availableTokens >= 16_000) {
    targetTokens = useCase.codeEditing;
  } else {
    targetTokens = useCase.default;
  }
  
  // Ensure we don't go below minimum viable output
  const minimumViableOutput = 2048;
  if (targetTokens < minimumViableOutput) {
    // If we can't provide minimum viable output, try with minimal buffer
    const minimalBuffer = 200;
    targetTokens = Math.max(
      minimumViableOutput,
      contextLength - inputTokens - minimalBuffer
    );
  }
  
  // Final safety check - never exceed context length
  return Math.min(targetTokens, contextLength - inputTokens - 100);
}

/**
 * Estimate input tokens for a request (rough estimation)
 * 
 * @param systemPrompt - System prompt content
 * @param userPrompt - User prompt content  
 * @param additionalContext - Additional context (templates, pages, etc.)
 * @returns Estimated token count
 */
export function estimateInputTokens(
  systemPrompt: string = "",
  userPrompt: string = "",
  additionalContext: string = ""
): number {
  // Rough estimation: ~4 characters per token for English text
  // This is conservative - actual tokenization may vary
  const totalChars = systemPrompt.length + userPrompt.length + additionalContext.length;
  return Math.ceil(totalChars / 3.5); // Slightly more conservative than 4 chars/token
}

/**
 * Get max_tokens configuration for specific providers with special handling
 */
export function getProviderSpecificConfig(selectedProvider: any, baseMaxTokens: number) {
  const providerName = selectedProvider?.provider;
  
  switch (providerName) {
    case "sambanova":
      // SambaNova has specific limitations - don't set max_tokens
      return {};
    default:
      return { max_tokens: baseMaxTokens };
  }
}