|
| 1 | +package embedding |
| 2 | + |
| 3 | +import ( |
| 4 | + "encoding/json" |
| 5 | + "errors" |
| 6 | + "fmt" |
| 7 | + "net/http" |
| 8 | + "strings" |
| 9 | + |
| 10 | + "github.com/alibaba/higress/plugins/wasm-go/pkg/log" |
| 11 | + "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper" |
| 12 | + "github.com/tidwall/gjson" |
| 13 | +) |
| 14 | + |
| 15 | +const ( |
| 16 | + AZURE_PORT = 443 |
| 17 | + AZURE_DEFAULT_MODEL_NAME = "text-embedding-ada-002" |
| 18 | + AZURE_ENDPOINT = "/openai/deployments/{model}/embeddings" |
| 19 | +) |
| 20 | + |
| 21 | +type azureProviderInitializer struct { |
| 22 | +} |
| 23 | + |
| 24 | +var azureConfig azureProviderConfig |
| 25 | + |
| 26 | +type azureProviderConfig struct { |
| 27 | + // @Title zh-CN 文本特征提取服务 API Key |
| 28 | + // @Description zh-CN 文本特征提取服务 API Key |
| 29 | + apiKey string |
| 30 | + // @Title zh-CN 文本特征提取 api-version |
| 31 | + // @Description zh-CN 文本特征提取服务 api-version |
| 32 | + apiVersion string |
| 33 | +} |
| 34 | + |
| 35 | +func (c *azureProviderInitializer) InitConfig(json gjson.Result) { |
| 36 | + azureConfig.apiKey = json.Get("apiKey").String() |
| 37 | + azureConfig.apiVersion = json.Get("apiVersion").String() |
| 38 | +} |
| 39 | + |
| 40 | +func (c *azureProviderInitializer) ValidateConfig() error { |
| 41 | + if azureConfig.apiKey == "" { |
| 42 | + return errors.New("[Azure] apiKey is required") |
| 43 | + } |
| 44 | + if azureConfig.apiVersion == "" { |
| 45 | + return errors.New("[Azure] apiVersion is required") |
| 46 | + } |
| 47 | + return nil |
| 48 | +} |
| 49 | + |
| 50 | +func (t *azureProviderInitializer) CreateProvider(c ProviderConfig) (Provider, error) { |
| 51 | + if c.servicePort == 0 { |
| 52 | + c.servicePort = AZURE_PORT |
| 53 | + } |
| 54 | + |
| 55 | + if c.model == "" { |
| 56 | + c.model = AZURE_DEFAULT_MODEL_NAME |
| 57 | + } |
| 58 | + |
| 59 | + return &AzureProvider{ |
| 60 | + config: c, |
| 61 | + client: wrapper.NewClusterClient(wrapper.FQDNCluster{ |
| 62 | + FQDN: c.serviceName, |
| 63 | + Host: c.serviceHost, |
| 64 | + Port: c.servicePort, |
| 65 | + }), |
| 66 | + }, nil |
| 67 | +} |
| 68 | + |
| 69 | +func (t *AzureProvider) GetProviderType() string { |
| 70 | + return PROVIDER_TYPE_AZURE |
| 71 | +} |
| 72 | + |
| 73 | +type AzureProvider struct { |
| 74 | + config ProviderConfig |
| 75 | + client wrapper.HttpClient |
| 76 | +} |
| 77 | + |
| 78 | +type AzureEmbeddingRequest struct { |
| 79 | + Input string `json:"input"` |
| 80 | +} |
| 81 | + |
| 82 | +func (t *AzureProvider) constructParameters(text string) (string, [][2]string, []byte, error) { |
| 83 | + if text == "" { |
| 84 | + err := errors.New("queryString text cannot be empty") |
| 85 | + return "", nil, nil, err |
| 86 | + } |
| 87 | + |
| 88 | + data := AzureEmbeddingRequest{ |
| 89 | + Input: text, |
| 90 | + } |
| 91 | + |
| 92 | + requestBody, err := json.Marshal(data) |
| 93 | + if err != nil { |
| 94 | + log.Errorf("failed to marshal request data: %v", err) |
| 95 | + return "", nil, nil, err |
| 96 | + } |
| 97 | + |
| 98 | + model := t.config.model |
| 99 | + if model == "" { |
| 100 | + model = AZURE_DEFAULT_MODEL_NAME |
| 101 | + } |
| 102 | + |
| 103 | + // 拼接 endpoint |
| 104 | + endpoint := strings.Replace(AZURE_ENDPOINT, "{model}", model, 1) |
| 105 | + endpoint = endpoint + "?" + "api-version=" + azureConfig.apiVersion |
| 106 | + |
| 107 | + headers := [][2]string{ |
| 108 | + {"api-key", azureConfig.apiKey}, |
| 109 | + {"Content-Type", "application/json"}, |
| 110 | + } |
| 111 | + |
| 112 | + return endpoint, headers, requestBody, err |
| 113 | +} |
| 114 | + |
| 115 | +type AzureEmbeddingResponse struct { |
| 116 | + Object string `json:"object"` |
| 117 | + Model string `json:"model"` |
| 118 | + Data []struct { |
| 119 | + Object string `json:"object"` |
| 120 | + Embedding []float64 `json:"embedding"` |
| 121 | + Index int `json:"index"` |
| 122 | + } `json:"data"` |
| 123 | +} |
| 124 | + |
| 125 | +func (t *AzureProvider) parseTextEmbedding(responseBody []byte) (*AzureEmbeddingResponse, error) { |
| 126 | + var resp AzureEmbeddingResponse |
| 127 | + if err := json.Unmarshal(responseBody, &resp); err != nil { |
| 128 | + return nil, fmt.Errorf("failed to parse response: %w", err) |
| 129 | + } |
| 130 | + return &resp, nil |
| 131 | +} |
| 132 | + |
| 133 | +func (t *AzureProvider) GetEmbedding( |
| 134 | + queryString string, |
| 135 | + ctx wrapper.HttpContext, |
| 136 | + callback func(emb []float64, err error)) error { |
| 137 | + embUrl, embHeaders, embRequestBody, err := t.constructParameters(queryString) |
| 138 | + if err != nil { |
| 139 | + log.Errorf("failed to construct parameters: %v", err) |
| 140 | + return err |
| 141 | + } |
| 142 | + |
| 143 | + var resp *AzureEmbeddingResponse |
| 144 | + err = t.client.Post(embUrl, embHeaders, embRequestBody, |
| 145 | + func(statusCode int, responseHeaders http.Header, responseBody []byte) { |
| 146 | + |
| 147 | + if statusCode != http.StatusOK { |
| 148 | + err = fmt.Errorf("failed to get embedding due to status code: %d, resp: %s", statusCode, responseBody) |
| 149 | + callback(nil, err) |
| 150 | + return |
| 151 | + } |
| 152 | + |
| 153 | + resp, err = t.parseTextEmbedding(responseBody) |
| 154 | + if err != nil { |
| 155 | + err = fmt.Errorf("failed to parse response: %v", err) |
| 156 | + callback(nil, err) |
| 157 | + return |
| 158 | + } |
| 159 | + |
| 160 | + log.Debugf("get embedding response: %d, %s", statusCode, responseBody) |
| 161 | + |
| 162 | + if len(resp.Data) == 0 { |
| 163 | + err = errors.New("no embedding found in response") |
| 164 | + callback(nil, err) |
| 165 | + return |
| 166 | + } |
| 167 | + |
| 168 | + callback(resp.Data[0].Embedding, nil) |
| 169 | + |
| 170 | + }, t.config.timeout) |
| 171 | + return err |
| 172 | +} |
0 commit comments