From 01b2f8b4584f499af46ea14f02a4f4b529da0df8 Mon Sep 17 00:00:00 2001 From: thedragonsinn <98635854+thedragonsinn@users.noreply.github.com> Date: Wed, 8 Jan 2025 14:38:20 +0530 Subject: [PATCH] feat `ai`: new lmodels cmd and many changes. rename stt->ts, load_history->lh, aichat->aic. Now ai commands can get context from replied messages, orcv: can hear audio now. --- app/plugins/ai/media_query.py | 65 +++++++++++++++++------------- app/plugins/ai/models.py | 74 ++++++++++++++++++++++++++++------- app/plugins/ai/text_query.py | 20 ++++------ 3 files changed, 104 insertions(+), 55 deletions(-) diff --git a/app/plugins/ai/media_query.py b/app/plugins/ai/media_query.py index 389d1c6..d073280 100644 --- a/app/plugins/ai/media_query.py +++ b/app/plugins/ai/media_query.py @@ -59,7 +59,7 @@ async def photo_query(bot: BOT, message: Message): await message_response.edit(ai_response_text) -@bot.add_cmd(cmd="stt") +@bot.add_cmd(cmd="ts") @run_basic_check async def audio_to_text(bot: BOT, message: Message): """ @@ -67,12 +67,17 @@ async def audio_to_text(bot: BOT, message: Message): INFO: Convert Audio files to text. USAGE: .stt [reply to audio file] summarise/transcribe the audio file. """ - prompt = message.input + default_prompt = ( + "Transcribe the audio file to english alphabets AS IS." + "\nTranslate it only if the audio is not in hindi/english." + "\nDo not summarise." + ) + prompt = message.input or default_prompt reply = message.replied audio = reply.audio or reply.voice message_response = await message.reply("processing... this may take a while") - if not (prompt and reply and audio): + if not (reply and audio): await message_response.edit("Reply to an audio file and give a prompt.") return @@ -88,17 +93,21 @@ async def video_to_text(bot: BOT, message: Message): INFO: Convert Video info to text. USAGE: .ocrv [reply to video file] summarise the video file. """ - prompt = message.input + default_prompt = "Summarize the file" + prompt = message.input or default_prompt reply = message.replied message_response = await message.reply("processing... this may take a while") - if not (prompt and reply and (reply.video or reply.animation)): + if not (reply and (reply.video or reply.animation)): await message_response.edit("Reply to a video and give a prompt.") return - ai_response_text = await handle_video(prompt, reply) + ai_response_text, uploaded_files = await handle_video(prompt, reply) await message_response.edit(ai_response_text) + for uploaded_frame in uploaded_files: + await asyncio.to_thread(genai.delete_file, name=uploaded_frame.name) + @bot.add_cmd(cmd="aim") @run_basic_check @@ -146,14 +155,14 @@ async def download_file(file_name: str, message: Message) -> tuple[str, str]: return file_path, download_dir -async def handle_audio(prompt: str, message: Message): +async def handle_audio(prompt: str, message: Message, model=MODEL): audio = message.document or message.audio or message.voice file_name = getattr(audio, "file_name", "audio.aac") file_path, download_dir = await download_file(file_name, message) file_response = genai.upload_file(path=file_path) - response = await MODEL.generate_content_async([prompt, file_response]) + response = await model.generate_content_async([prompt, file_response]) response_text = get_response_text(response) genai.delete_file(name=file_response.name) @@ -162,15 +171,15 @@ async def handle_audio(prompt: str, message: Message): return response_text -async def handle_code(prompt: str, message: Message): +async def handle_code(prompt: str, message: Message, model=MODEL): file: BytesIO = await message.download(in_memory=True) text = file.getvalue().decode("utf-8") final_prompt = f"{text}\n\n{prompt}" - response = await MODEL.generate_content_async(final_prompt) + response = await model.generate_content_async(final_prompt) return get_response_text(response) -async def handle_photo(prompt: str, message: Message): +async def handle_photo(prompt: str, message: Message, model=MODEL): file = await message.download(in_memory=True) mime_type, _ = mimetypes.guess_type(file.name) @@ -178,34 +187,34 @@ async def handle_photo(prompt: str, message: Message): mime_type = "image/unknown" image_blob = glm.Blob(mime_type=mime_type, data=file.getvalue()) - response = await MODEL.generate_content_async([prompt, image_blob]) + response = await model.generate_content_async([prompt, image_blob]) return get_response_text(response) -async def handle_video(prompt: str, message: Message): +async def handle_video(prompt: str, message: Message, model=MODEL) -> tuple[str, list]: file_name = "v.mp4" file_path, download_dir = await download_file(file_name, message) output_path = os.path.join(download_dir, "output_frame_%04d.png") - ffmpeg_output_error = await run_shell_cmd( - f'ffmpeg -hide_banner -loglevel error -i {file_path} -vf "fps=1" {output_path}' + audio_path = os.path.join(download_dir, "audio.") + + await run_shell_cmd( + f'ffmpeg -hide_banner -loglevel error -i "{file_path}" -vf "fps=1" "{output_path}"' + f"&&" + f'ffmpeg -hide_banner -loglevel error -i "{file_path}" -map 0:a:1 -vn -acodec copy "{audio_path}%(ext)s"' ) - if ffmpeg_output_error: - return ffmpeg_output_error + prompt_n_uploaded_files = [prompt] - extracted_frames = glob.glob(f"{download_dir}/*png") - - uploaded_frames = [] - for frame in extracted_frames: + for frame in glob.glob(f"{download_dir}/*png"): uploaded_frame = await asyncio.to_thread(genai.upload_file, frame) - uploaded_frames.append(uploaded_frame) + prompt_n_uploaded_files.append(uploaded_frame) - response = await MODEL.generate_content_async([prompt, *uploaded_frames]) + for file in glob.glob(f"{audio_path}*"): + uploaded_file = await asyncio.to_thread(genai.upload_file, file) + prompt_n_uploaded_files.append(uploaded_file) + + response = await model.generate_content_async(prompt_n_uploaded_files) response_text = get_response_text(response) - - for uploaded_frame in uploaded_frames: - await asyncio.to_thread(genai.delete_file, name=uploaded_frame.name) - shutil.rmtree(download_dir, ignore_errors=True) - return response_text + return response_text, prompt_n_uploaded_files diff --git a/app/plugins/ai/models.py b/app/plugins/ai/models.py index 46690cc..0326334 100644 --- a/app/plugins/ai/models.py +++ b/app/plugins/ai/models.py @@ -2,13 +2,9 @@ from functools import wraps import google.generativeai as genai -from app import BOT, Message, extra_config - - -async def init_task(): - if extra_config.GEMINI_API_KEY: - genai.configure(api_key=extra_config.GEMINI_API_KEY) +from app import BOT, CustomDB, Message, extra_config +SETTINGS = CustomDB("COMMON_SETTINGS") GENERATION_CONFIG = {"temperature": 0.69, "max_output_tokens": 2048} @@ -19,19 +15,69 @@ SAFETY_SETTINGS = [ {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_ONLY_HIGH"}, ] +SYSTEM_INSTRUCTION = ( + "Answer precisely and in short unless specifically instructed otherwise." + "\nWhen asked related to code, do not comment the code and do not explain unless instructed." +) MODEL = genai.GenerativeModel( - model_name="models/gemini-1.5-flash", generation_config=GENERATION_CONFIG, safety_settings=SAFETY_SETTINGS, + system_instruction=SYSTEM_INSTRUCTION, ) -def run_basic_check(func): +async def init_task(): + if extra_config.GEMINI_API_KEY: + genai.configure(api_key=extra_config.GEMINI_API_KEY) - @wraps(func) + model_info = await SETTINGS.find_one({"_id": "gemini_model_info"}) or {} + model_name = model_info.get("model_name") + if model_name: + MODEL._model_name = model_name + + +@BOT.add_cmd(cmd="lmodels") +async def list_ai_models(bot: BOT, message: Message): + """ + CMD: LIST MODELS + INFO: List and change Gemini Models. + USAGE: .lmodels + """ + model_list = [ + model + for model in genai.list_models() + if "generateContent" in model.supported_generation_methods + ] + + mono_names = "".join([f"`{model}`" for model in model_list]) + update_str = ( + f"\n\nCurrent Model: {MODEL._model_name}" + "\n\nTo change to a different model," + "Reply to this message with the model name." + ) + + model_reply = await message.reply(mono_names + update_str, del_in=30, block=False) + response = await model_reply.get_response(timeout=10) + + if not response: + return + + if response.text not in model_list: + await response.edit( + f"Invalid Model... run {message.trigger}lams again" + ) + return + + await SETTINGS.add_data({"_id": "gemini_model_info", "model_name": response.text}) + await response.edit(f"{response.text} saved as model.") + await response.log() + MODEL._model_name = response.text + + +def run_basic_check(function): + @wraps(function) async def wrapper(bot: BOT, message: Message): - if not extra_config.GEMINI_API_KEY: await message.reply( "Gemini API KEY not found." @@ -40,21 +86,19 @@ def run_basic_check(func): ) return - if not message.input: - await message.reply("Ask a Question.") + if not (message.input or message.replied): + await message.reply("Ask a Question | Reply to a Message") return try: - await func(bot, message) + await function(bot, message) except Exception as e: - if "User location is not supported for the API use" in str(e): await message.reply( "Your server location doesn't allow gemini yet." "\nIf you are on koyeb change your app region to Washington DC." ) return - raise return wrapper diff --git a/app/plugins/ai/text_query.py b/app/plugins/ai/text_query.py index 1b9bc88..fa18ecc 100644 --- a/app/plugins/ai/text_query.py +++ b/app/plugins/ai/text_query.py @@ -18,11 +18,11 @@ async def question(bot: BOT, message: Message): INFO: Ask a question to Gemini AI. USAGE: .ai what is the meaning of life. """ - - prompt = message.input + reply = message.replied + reply_text = reply.text if reply else "" + prompt = f"{reply_text}\n\n\n{message.input}".strip() response = await MODEL.generate_content_async(prompt) - response_text = get_response_text(response) if not isinstance(message, Message): @@ -39,14 +39,14 @@ async def question(bot: BOT, message: Message): ) -@bot.add_cmd(cmd="aichat") +@bot.add_cmd(cmd="aic") @run_basic_check async def ai_chat(bot: BOT, message: Message): """ CMD: AICHAT INFO: Have a Conversation with Gemini AI. USAGE: - .aichat hello + .aic hello keep replying to AI responses After 5 mins of Idle bot will export history and stop chat. use .load_history to continue @@ -55,14 +55,14 @@ async def ai_chat(bot: BOT, message: Message): await do_convo(chat=chat, message=message) -@bot.add_cmd(cmd="load_history") +@bot.add_cmd(cmd="lh") @run_basic_check async def history_chat(bot: BOT, message: Message): """ CMD: LOAD_HISTORY INFO: Load a Conversation with Gemini AI from previous session. USAGE: - .load_history {question} [reply to history document] + .lh {question} [reply to history document] """ reply = message.replied @@ -73,7 +73,6 @@ async def history_chat(bot: BOT, message: Message): return resp = await message.reply("Loading History...") - doc = await reply.download(in_memory=True) doc.seek(0) @@ -87,7 +86,6 @@ async def do_convo(chat, message: Message): prompt = message.input reply_to_id = message.id chat_id = message.chat.id - old_convo = CONVO_CACHE.get(message.unique_chat_user_id) if old_convo in Convo.CONVO_DICT[chat_id]: @@ -107,11 +105,8 @@ async def do_convo(chat, message: Message): async with convo_obj: while True: ai_response = await chat.send_message_async(prompt) - ai_response_text = get_response_text(ai_response) - text = f"**GEMINI AI**:\n\n{ai_response_text}" - _, prompt_message = await convo_obj.send_message( text=text, reply_to_id=reply_to_id, @@ -119,6 +114,7 @@ async def do_convo(chat, message: Message): get_response=True, ) prompt, reply_to_id = prompt_message.text, prompt_message.id + except TimeoutError: await export_history(chat, message)