From 01b2f8b4584f499af46ea14f02a4f4b529da0df8 Mon Sep 17 00:00:00 2001
From: thedragonsinn <98635854+thedragonsinn@users.noreply.github.com>
Date: Wed, 8 Jan 2025 14:38:20 +0530
Subject: [PATCH] feat `ai`: new lmodels cmd and many changes.

rename stt->ts, load_history->lh, aichat->aic. Now ai commands can get context from replied messages, orcv: can hear audio now.
---
 app/plugins/ai/media_query.py | 65 +++++++++++++++++-------------
 app/plugins/ai/models.py      | 74 ++++++++++++++++++++++++++++-------
 app/plugins/ai/text_query.py  | 20 ++++------
 3 files changed, 104 insertions(+), 55 deletions(-)

diff --git a/app/plugins/ai/media_query.py b/app/plugins/ai/media_query.py
index 389d1c6..d073280 100644
--- a/app/plugins/ai/media_query.py
+++ b/app/plugins/ai/media_query.py
@@ -59,7 +59,7 @@ async def photo_query(bot: BOT, message: Message):
     await message_response.edit(ai_response_text)
 
 
-@bot.add_cmd(cmd="stt")
+@bot.add_cmd(cmd="ts")
 @run_basic_check
 async def audio_to_text(bot: BOT, message: Message):
     """
@@ -67,12 +67,17 @@ async def audio_to_text(bot: BOT, message: Message):
     INFO: Convert Audio files to text.
     USAGE: .stt [reply to audio file] summarise/transcribe the audio file.
     """
-    prompt = message.input
+    default_prompt = (
+        "Transcribe the audio file to english alphabets AS IS."
+        "\nTranslate it only if the audio is not in hindi/english."
+        "\nDo not summarise."
+    )
+    prompt = message.input or default_prompt
     reply = message.replied
     audio = reply.audio or reply.voice
 
     message_response = await message.reply("processing... this may take a while")
-    if not (prompt and reply and audio):
+    if not (reply and audio):
         await message_response.edit("Reply to an audio file and give a prompt.")
         return
 
@@ -88,17 +93,21 @@ async def video_to_text(bot: BOT, message: Message):
     INFO: Convert Video info to text.
     USAGE: .ocrv [reply to video file] summarise the video file.
     """
-    prompt = message.input
+    default_prompt = "Summarize the file"
+    prompt = message.input or default_prompt
     reply = message.replied
     message_response = await message.reply("processing... this may take a while")
 
-    if not (prompt and reply and (reply.video or reply.animation)):
+    if not (reply and (reply.video or reply.animation)):
         await message_response.edit("Reply to a video and give a prompt.")
         return
 
-    ai_response_text = await handle_video(prompt, reply)
+    ai_response_text, uploaded_files = await handle_video(prompt, reply)
     await message_response.edit(ai_response_text)
 
+    for uploaded_frame in uploaded_files:
+        await asyncio.to_thread(genai.delete_file, name=uploaded_frame.name)
+
 
 @bot.add_cmd(cmd="aim")
 @run_basic_check
@@ -146,14 +155,14 @@ async def download_file(file_name: str, message: Message) -> tuple[str, str]:
     return file_path, download_dir
 
 
-async def handle_audio(prompt: str, message: Message):
+async def handle_audio(prompt: str, message: Message, model=MODEL):
     audio = message.document or message.audio or message.voice
     file_name = getattr(audio, "file_name", "audio.aac")
 
     file_path, download_dir = await download_file(file_name, message)
     file_response = genai.upload_file(path=file_path)
 
-    response = await MODEL.generate_content_async([prompt, file_response])
+    response = await model.generate_content_async([prompt, file_response])
     response_text = get_response_text(response)
 
     genai.delete_file(name=file_response.name)
@@ -162,15 +171,15 @@ async def handle_audio(prompt: str, message: Message):
     return response_text
 
 
-async def handle_code(prompt: str, message: Message):
+async def handle_code(prompt: str, message: Message, model=MODEL):
     file: BytesIO = await message.download(in_memory=True)
     text = file.getvalue().decode("utf-8")
     final_prompt = f"{text}\n\n{prompt}"
-    response = await MODEL.generate_content_async(final_prompt)
+    response = await model.generate_content_async(final_prompt)
     return get_response_text(response)
 
 
-async def handle_photo(prompt: str, message: Message):
+async def handle_photo(prompt: str, message: Message, model=MODEL):
     file = await message.download(in_memory=True)
 
     mime_type, _ = mimetypes.guess_type(file.name)
@@ -178,34 +187,34 @@ async def handle_photo(prompt: str, message: Message):
         mime_type = "image/unknown"
 
     image_blob = glm.Blob(mime_type=mime_type, data=file.getvalue())
-    response = await MODEL.generate_content_async([prompt, image_blob])
+    response = await model.generate_content_async([prompt, image_blob])
     return get_response_text(response)
 
 
-async def handle_video(prompt: str, message: Message):
+async def handle_video(prompt: str, message: Message, model=MODEL) -> tuple[str, list]:
     file_name = "v.mp4"
     file_path, download_dir = await download_file(file_name, message)
 
     output_path = os.path.join(download_dir, "output_frame_%04d.png")
-    ffmpeg_output_error = await run_shell_cmd(
-        f'ffmpeg -hide_banner -loglevel error -i {file_path} -vf "fps=1" {output_path}'
+    audio_path = os.path.join(download_dir, "audio.")
+
+    await run_shell_cmd(
+        f'ffmpeg -hide_banner -loglevel error -i "{file_path}" -vf "fps=1" "{output_path}"'
+        f"&&"
+        f'ffmpeg -hide_banner -loglevel error -i "{file_path}" -map 0:a:1 -vn -acodec copy "{audio_path}%(ext)s"'
     )
 
-    if ffmpeg_output_error:
-        return ffmpeg_output_error
+    prompt_n_uploaded_files = [prompt]
 
-    extracted_frames = glob.glob(f"{download_dir}/*png")
-
-    uploaded_frames = []
-    for frame in extracted_frames:
+    for frame in glob.glob(f"{download_dir}/*png"):
         uploaded_frame = await asyncio.to_thread(genai.upload_file, frame)
-        uploaded_frames.append(uploaded_frame)
+        prompt_n_uploaded_files.append(uploaded_frame)
 
-    response = await MODEL.generate_content_async([prompt, *uploaded_frames])
+    for file in glob.glob(f"{audio_path}*"):
+        uploaded_file = await asyncio.to_thread(genai.upload_file, file)
+        prompt_n_uploaded_files.append(uploaded_file)
+
+    response = await model.generate_content_async(prompt_n_uploaded_files)
     response_text = get_response_text(response)
-
-    for uploaded_frame in uploaded_frames:
-        await asyncio.to_thread(genai.delete_file, name=uploaded_frame.name)
-
     shutil.rmtree(download_dir, ignore_errors=True)
-    return response_text
+    return response_text, prompt_n_uploaded_files
diff --git a/app/plugins/ai/models.py b/app/plugins/ai/models.py
index 46690cc..0326334 100644
--- a/app/plugins/ai/models.py
+++ b/app/plugins/ai/models.py
@@ -2,13 +2,9 @@ from functools import wraps
 
 import google.generativeai as genai
 
-from app import BOT, Message, extra_config
-
-
-async def init_task():
-    if extra_config.GEMINI_API_KEY:
-        genai.configure(api_key=extra_config.GEMINI_API_KEY)
+from app import BOT, CustomDB, Message, extra_config
 
+SETTINGS = CustomDB("COMMON_SETTINGS")
 
 GENERATION_CONFIG = {"temperature": 0.69, "max_output_tokens": 2048}
 
@@ -19,19 +15,69 @@ SAFETY_SETTINGS = [
     {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_ONLY_HIGH"},
 ]
 
+SYSTEM_INSTRUCTION = (
+    "Answer precisely and in short unless specifically instructed otherwise."
+    "\nWhen asked related to code, do not comment the code and do not explain unless instructed."
+)
 
 MODEL = genai.GenerativeModel(
-    model_name="models/gemini-1.5-flash",
     generation_config=GENERATION_CONFIG,
     safety_settings=SAFETY_SETTINGS,
+    system_instruction=SYSTEM_INSTRUCTION,
 )
 
 
-def run_basic_check(func):
+async def init_task():
+    if extra_config.GEMINI_API_KEY:
+        genai.configure(api_key=extra_config.GEMINI_API_KEY)
 
-    @wraps(func)
+    model_info = await SETTINGS.find_one({"_id": "gemini_model_info"}) or {}
+    model_name = model_info.get("model_name")
+    if model_name:
+        MODEL._model_name = model_name
+
+
+@BOT.add_cmd(cmd="lmodels")
+async def list_ai_models(bot: BOT, message: Message):
+    """
+    CMD: LIST MODELS
+    INFO: List and change Gemini Models.
+    USAGE: .lmodels
+    """
+    model_list = [
+        model
+        for model in genai.list_models()
+        if "generateContent" in model.supported_generation_methods
+    ]
+
+    mono_names = "".join([f"`{model}`" for model in model_list])
+    update_str = (
+        f"\n\nCurrent Model: {MODEL._model_name}"
+        "\n\nTo change to a different model,"
+        "Reply to this message with the model name."
+    )
+
+    model_reply = await message.reply(mono_names + update_str, del_in=30, block=False)
+    response = await model_reply.get_response(timeout=10)
+
+    if not response:
+        return
+
+    if response.text not in model_list:
+        await response.edit(
+            f"Invalid Model... run <code>{message.trigger}lams</code> again"
+        )
+        return
+
+    await SETTINGS.add_data({"_id": "gemini_model_info", "model_name": response.text})
+    await response.edit(f"{response.text} saved as model.")
+    await response.log()
+    MODEL._model_name = response.text
+
+
+def run_basic_check(function):
+    @wraps(function)
     async def wrapper(bot: BOT, message: Message):
-
         if not extra_config.GEMINI_API_KEY:
             await message.reply(
                 "Gemini API KEY not found."
@@ -40,21 +86,19 @@ def run_basic_check(func):
             )
             return
 
-        if not message.input:
-            await message.reply("Ask a Question.")
+        if not (message.input or message.replied):
+            await message.reply("<code>Ask a Question | Reply to a Message</code>")
             return
 
         try:
-            await func(bot, message)
+            await function(bot, message)
         except Exception as e:
-
             if "User location is not supported for the API use" in str(e):
                 await message.reply(
                     "Your server location doesn't allow gemini yet."
                     "\nIf you are on koyeb change your app region to Washington DC."
                 )
                 return
-
             raise
 
     return wrapper
diff --git a/app/plugins/ai/text_query.py b/app/plugins/ai/text_query.py
index 1b9bc88..fa18ecc 100644
--- a/app/plugins/ai/text_query.py
+++ b/app/plugins/ai/text_query.py
@@ -18,11 +18,11 @@ async def question(bot: BOT, message: Message):
     INFO: Ask a question to Gemini AI.
     USAGE: .ai what is the meaning of life.
     """
-
-    prompt = message.input
+    reply = message.replied
+    reply_text = reply.text if reply else ""
+    prompt = f"{reply_text}\n\n\n{message.input}".strip()
 
     response = await MODEL.generate_content_async(prompt)
-
     response_text = get_response_text(response)
 
     if not isinstance(message, Message):
@@ -39,14 +39,14 @@ async def question(bot: BOT, message: Message):
         )
 
 
-@bot.add_cmd(cmd="aichat")
+@bot.add_cmd(cmd="aic")
 @run_basic_check
 async def ai_chat(bot: BOT, message: Message):
     """
     CMD: AICHAT
     INFO: Have a Conversation with Gemini AI.
     USAGE:
-        .aichat hello
+        .aic hello
         keep replying to AI responses
         After 5 mins of Idle bot will export history and stop chat.
         use .load_history to continue
@@ -55,14 +55,14 @@ async def ai_chat(bot: BOT, message: Message):
     await do_convo(chat=chat, message=message)
 
 
-@bot.add_cmd(cmd="load_history")
+@bot.add_cmd(cmd="lh")
 @run_basic_check
 async def history_chat(bot: BOT, message: Message):
     """
     CMD: LOAD_HISTORY
     INFO: Load a Conversation with Gemini AI from previous session.
     USAGE:
-        .load_history {question} [reply to history document]
+        .lh {question} [reply to history document]
     """
     reply = message.replied
 
@@ -73,7 +73,6 @@ async def history_chat(bot: BOT, message: Message):
         return
 
     resp = await message.reply("<i>Loading History...</i>")
-
     doc = await reply.download(in_memory=True)
     doc.seek(0)
 
@@ -87,7 +86,6 @@ async def do_convo(chat, message: Message):
     prompt = message.input
     reply_to_id = message.id
     chat_id = message.chat.id
-
     old_convo = CONVO_CACHE.get(message.unique_chat_user_id)
 
     if old_convo in Convo.CONVO_DICT[chat_id]:
@@ -107,11 +105,8 @@ async def do_convo(chat, message: Message):
         async with convo_obj:
             while True:
                 ai_response = await chat.send_message_async(prompt)
-
                 ai_response_text = get_response_text(ai_response)
-
                 text = f"**GEMINI AI**:\n\n{ai_response_text}"
-
                 _, prompt_message = await convo_obj.send_message(
                     text=text,
                     reply_to_id=reply_to_id,
@@ -119,6 +114,7 @@ async def do_convo(chat, message: Message):
                     get_response=True,
                 )
                 prompt, reply_to_id = prompt_message.text, prompt_message.id
+
     except TimeoutError:
         await export_history(chat, message)