diff --git a/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/aliyun_bai_lian_model_provider.py b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/aliyun_bai_lian_model_provider.py index 9dd8f97ab08..83d42a57cd5 100644 --- a/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/aliyun_bai_lian_model_provider.py +++ b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/aliyun_bai_lian_model_provider.py @@ -13,13 +13,26 @@ ModelInfoManage from setting.models_provider.impl.aliyun_bai_lian_model_provider.credential.reranker import \ AliyunBaiLianRerankerCredential +from setting.models_provider.impl.aliyun_bai_lian_model_provider.credential.stt import AliyunBaiLianSTTModelCredential +from setting.models_provider.impl.aliyun_bai_lian_model_provider.credential.tts import AliyunBaiLianTTSModelCredential from setting.models_provider.impl.aliyun_bai_lian_model_provider.model.reranker import AliyunBaiLianReranker +from setting.models_provider.impl.aliyun_bai_lian_model_provider.model.stt import AliyunBaiLianSpeechToText +from setting.models_provider.impl.aliyun_bai_lian_model_provider.model.tts import AliyunBaiLianTextToSpeech from smartdoc.conf import PROJECT_DIR aliyun_bai_lian_model_credential = AliyunBaiLianRerankerCredential() +aliyun_bai_lian_tts_model_credential = AliyunBaiLianTTSModelCredential() +aliyun_bai_lian_stt_model_credential = AliyunBaiLianSTTModelCredential() + model_info_list = [ModelInfo('gte-rerank', '阿里巴巴通义实验室开发的GTE-Rerank文本排序系列模型,开发者可以通过LlamaIndex框架进行集成高质量文本检索、排序。', - ModelTypeConst.RERANKER, aliyun_bai_lian_model_credential, AliyunBaiLianReranker) + ModelTypeConst.RERANKER, aliyun_bai_lian_model_credential, AliyunBaiLianReranker), + ModelInfo('paraformer-realtime-v2', + '中文(含粤语等各种方言)、英文、日语、韩语支持多个语种自由切换', + ModelTypeConst.STT, aliyun_bai_lian_stt_model_credential, AliyunBaiLianSpeechToText), + ModelInfo('cosyvoice-v1', + 'CosyVoice基于新一代生成式语音大模型,能根据上下文预测情绪、语调、韵律等,具有更好的拟人效果', + ModelTypeConst.TTS, aliyun_bai_lian_tts_model_credential, AliyunBaiLianTextToSpeech), ] model_info_manage = ModelInfoManage.builder().append_model_info_list(model_info_list).build() diff --git a/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/credential/stt.py b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/credential/stt.py new file mode 100644 index 00000000000..5c9290b1519 --- /dev/null +++ b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/credential/stt.py @@ -0,0 +1,42 @@ +# coding=utf-8 + +from typing import Dict + +from common import forms +from common.exception.app_exception import AppApiException +from common.forms import BaseForm +from setting.models_provider.base_model_provider import BaseModelCredential, ValidCode + + +class AliyunBaiLianSTTModelCredential(BaseForm, BaseModelCredential): + api_key = forms.PasswordInputField("API Key", required=True) + + def is_valid(self, model_type: str, model_name, model_credential: Dict[str, object], provider, + raise_exception=False): + model_type_list = provider.get_model_type_list() + if not any(list(filter(lambda mt: mt.get('value') == model_type, model_type_list))): + raise AppApiException(ValidCode.valid_error.value, f'{model_type} 模型类型不支持') + + for key in ['api_key']: + if key not in model_credential: + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, f'{key} 字段为必填字段') + else: + return False + try: + model = provider.get_model(model_type, model_name, model_credential) + model.check_auth() + except Exception as e: + if isinstance(e, AppApiException): + raise e + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, f'校验失败,请检查参数是否正确: {str(e)}') + else: + return False + return True + + def encryption_dict(self, model: Dict[str, object]): + return {**model, 'api_key': super().encryption(model.get('api_key', ''))} + + def get_model_params_setting_form(self, model_name): + pass diff --git a/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/credential/tts.py b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/credential/tts.py new file mode 100644 index 00000000000..fe54ddaba22 --- /dev/null +++ b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/credential/tts.py @@ -0,0 +1,43 @@ +# coding=utf-8 + +from typing import Dict + +from common import forms +from common.exception.app_exception import AppApiException +from common.forms import BaseForm +from setting.models_provider.base_model_provider import BaseModelCredential, ValidCode + + +class AliyunBaiLianTTSModelCredential(BaseForm, BaseModelCredential): + api_key = forms.PasswordInputField("API Key", required=True) + + def is_valid(self, model_type: str, model_name, model_credential: Dict[str, object], provider, + raise_exception=False): + model_type_list = provider.get_model_type_list() + if not any(list(filter(lambda mt: mt.get('value') == model_type, model_type_list))): + raise AppApiException(ValidCode.valid_error.value, f'{model_type} 模型类型不支持') + + for key in ['api_key']: + if key not in model_credential: + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, f'{key} 字段为必填字段') + else: + return False + try: + model = provider.get_model(model_type, model_name, model_credential) + model.check_auth() + except Exception as e: + if isinstance(e, AppApiException): + raise e + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, f'校验失败,请检查参数是否正确: {str(e)}') + else: + return False + return True + + def encryption_dict(self, model: Dict[str, object]): + return {**model, 'api_key': super().encryption(model.get('api_key', ''))} + + + def get_model_params_setting_form(self, model_name): + pass diff --git a/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/iat_mp3_16k.mp3 b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/iat_mp3_16k.mp3 new file mode 100644 index 00000000000..75e744c8ff5 Binary files /dev/null and b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/iat_mp3_16k.mp3 differ diff --git a/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/stt.py b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/stt.py new file mode 100644 index 00000000000..89ebd508e25 --- /dev/null +++ b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/stt.py @@ -0,0 +1,63 @@ +import os +import tempfile +from typing import Dict + +import dashscope +from dashscope.audio.asr import (Recognition) + +from setting.models_provider.base_model_provider import MaxKBBaseModel +from setting.models_provider.impl.base_stt import BaseSpeechToText + + +class AliyunBaiLianSpeechToText(MaxKBBaseModel, BaseSpeechToText): + api_key: str + model: str + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.api_key = kwargs.get('api_key') + self.model = kwargs.get('model') + + @staticmethod + def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs): + optional_params = {} + if 'max_tokens' in model_kwargs and model_kwargs['max_tokens'] is not None: + optional_params['max_tokens'] = model_kwargs['max_tokens'] + if 'temperature' in model_kwargs and model_kwargs['temperature'] is not None: + optional_params['temperature'] = model_kwargs['temperature'] + return AliyunBaiLianSpeechToText( + model=model_name, + api_key=model_credential.get('api_key'), + **optional_params, + ) + + def check_auth(self): + cwd = os.path.dirname(os.path.abspath(__file__)) + with open(f'{cwd}/iat_mp3_16k.mp3', 'rb') as f: + self.speech_to_text(f) + + def speech_to_text(self, audio_file): + dashscope.api_key = self.api_key + recognition = Recognition(model=self.model, + format='mp3', + sample_rate=16000, + callback=None) + with tempfile.NamedTemporaryFile(delete=False) as temp_file: + # 将上传的文件保存到临时文件中 + temp_file.write(audio_file.read()) + # 获取临时文件的路径 + temp_file_path = temp_file.name + + try: + # 识别临时文件 + result = recognition.call(temp_file_path) + text = '' + if result.status_code == 200: + for sentence in result.get_sentence(): + text += sentence['text'] + return text + else: + raise Exception('Error: ', result.message) + finally: + # 删除临时文件 + os.remove(temp_file_path) diff --git a/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/tts.py b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/tts.py new file mode 100644 index 00000000000..1e472a742af --- /dev/null +++ b/apps/setting/models_provider/impl/aliyun_bai_lian_model_provider/model/tts.py @@ -0,0 +1,43 @@ +from typing import Dict + +import dashscope +from dashscope.audio.tts_v2 import * + +from setting.models_provider.base_model_provider import MaxKBBaseModel +from setting.models_provider.impl.base_tts import BaseTextToSpeech + + +class AliyunBaiLianTextToSpeech(MaxKBBaseModel, BaseTextToSpeech): + api_key: str + model: str + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.api_key = kwargs.get('api_key') + self.model = kwargs.get('model') + + @staticmethod + def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs): + optional_params = {} + if 'max_tokens' in model_kwargs and model_kwargs['max_tokens'] is not None: + optional_params['max_tokens'] = model_kwargs['max_tokens'] + if 'temperature' in model_kwargs and model_kwargs['temperature'] is not None: + optional_params['temperature'] = model_kwargs['temperature'] + return AliyunBaiLianTextToSpeech( + model=model_name, + api_key=model_credential.get('api_key'), + **optional_params, + ) + + def check_auth(self): + self.text_to_speech('你好') + + def text_to_speech(self, text): + voice = "longxiaochun" + dashscope.api_key = self.api_key + synthesizer = SpeechSynthesizer(model=self.model, voice=voice) + audio = synthesizer.call(text) + if type(audio) == str: + print(audio) + raise Exception(audio) + return audio