feat ai: new lmodels cmd and many changes.

rename stt->ts, load_history->lh, aichat->aic. Now ai commands can get context from replied messages, orcv: can hear audio now.
2025-01-08 14:38:20 +05:30
parent 343bc43080
commit 01b2f8b458
3 changed files with 104 additions and 55 deletions
--- a/app/plugins/ai/media_query.py
+++ b/app/plugins/ai/media_query.py
@@ -59,7 +59,7 @@ async def photo_query(bot: BOT, message: Message):
    await message_response.edit(ai_response_text)


-@bot.add_cmd(cmd="stt")
+@bot.add_cmd(cmd="ts")
@run_basic_check
 async def audio_to_text(bot: BOT, message: Message):
    """
@@ -67,12 +67,17 @@ async def audio_to_text(bot: BOT, message: Message):
    INFO: Convert Audio files to text.
    USAGE: .stt [reply to audio file] summarise/transcribe the audio file.
    """
-    prompt = message.input
+    default_prompt = (
+        "Transcribe the audio file to english alphabets AS IS."
+        "\nTranslate it only if the audio is not in hindi/english."
+        "\nDo not summarise."
+    )
+    prompt = message.input or default_prompt
    reply = message.replied
    audio = reply.audio or reply.voice

    message_response = await message.reply("processing... this may take a while")
-    if not (prompt and reply and audio):
+    if not (reply and audio):
        await message_response.edit("Reply to an audio file and give a prompt.")
        return

@@ -88,17 +93,21 @@ async def video_to_text(bot: BOT, message: Message):
    INFO: Convert Video info to text.
    USAGE: .ocrv [reply to video file] summarise the video file.
    """
-    prompt = message.input
+    default_prompt = "Summarize the file"
+    prompt = message.input or default_prompt
    reply = message.replied
    message_response = await message.reply("processing... this may take a while")

-    if not (prompt and reply and (reply.video or reply.animation)):
+    if not (reply and (reply.video or reply.animation)):
        await message_response.edit("Reply to a video and give a prompt.")
        return

-    ai_response_text = await handle_video(prompt, reply)
+    ai_response_text, uploaded_files = await handle_video(prompt, reply)
    await message_response.edit(ai_response_text)

+    for uploaded_frame in uploaded_files:
+        await asyncio.to_thread(genai.delete_file, name=uploaded_frame.name)
+

@bot.add_cmd(cmd="aim")
@run_basic_check
@@ -146,14 +155,14 @@ async def download_file(file_name: str, message: Message) -> tuple[str, str]:
    return file_path, download_dir


-async def handle_audio(prompt: str, message: Message):
+async def handle_audio(prompt: str, message: Message, model=MODEL):
    audio = message.document or message.audio or message.voice
    file_name = getattr(audio, "file_name", "audio.aac")

    file_path, download_dir = await download_file(file_name, message)
    file_response = genai.upload_file(path=file_path)

-    response = await MODEL.generate_content_async([prompt, file_response])
+    response = await model.generate_content_async([prompt, file_response])
    response_text = get_response_text(response)

    genai.delete_file(name=file_response.name)
@@ -162,15 +171,15 @@ async def handle_audio(prompt: str, message: Message):
    return response_text


-async def handle_code(prompt: str, message: Message):
+async def handle_code(prompt: str, message: Message, model=MODEL):
    file: BytesIO = await message.download(in_memory=True)
    text = file.getvalue().decode("utf-8")
    final_prompt = f"{text}\n\n{prompt}"
-    response = await MODEL.generate_content_async(final_prompt)
+    response = await model.generate_content_async(final_prompt)
    return get_response_text(response)


-async def handle_photo(prompt: str, message: Message):
+async def handle_photo(prompt: str, message: Message, model=MODEL):
    file = await message.download(in_memory=True)

    mime_type, _ = mimetypes.guess_type(file.name)
@@ -178,34 +187,34 @@ async def handle_photo(prompt: str, message: Message):
        mime_type = "image/unknown"

    image_blob = glm.Blob(mime_type=mime_type, data=file.getvalue())
-    response = await MODEL.generate_content_async([prompt, image_blob])
+    response = await model.generate_content_async([prompt, image_blob])
    return get_response_text(response)


-async def handle_video(prompt: str, message: Message):
+async def handle_video(prompt: str, message: Message, model=MODEL) -> tuple[str, list]:
    file_name = "v.mp4"
    file_path, download_dir = await download_file(file_name, message)

    output_path = os.path.join(download_dir, "output_frame_%04d.png")
-    ffmpeg_output_error = await run_shell_cmd(
-        f'ffmpeg -hide_banner -loglevel error -i {file_path} -vf "fps=1" {output_path}'
+    audio_path = os.path.join(download_dir, "audio.")
+
+    await run_shell_cmd(
+        f'ffmpeg -hide_banner -loglevel error -i "{file_path}" -vf "fps=1" "{output_path}"'
+        f"&&"
+        f'ffmpeg -hide_banner -loglevel error -i "{file_path}" -map 0:a:1 -vn -acodec copy "{audio_path}%(ext)s"'
    )

-    if ffmpeg_output_error:
-        return ffmpeg_output_error
+    prompt_n_uploaded_files = [prompt]

-    extracted_frames = glob.glob(f"{download_dir}/*png")
-
-    uploaded_frames = []
-    for frame in extracted_frames:
+    for frame in glob.glob(f"{download_dir}/*png"):
        uploaded_frame = await asyncio.to_thread(genai.upload_file, frame)
-        uploaded_frames.append(uploaded_frame)
+        prompt_n_uploaded_files.append(uploaded_frame)

-    response = await MODEL.generate_content_async([prompt, *uploaded_frames])
+    for file in glob.glob(f"{audio_path}*"):
+        uploaded_file = await asyncio.to_thread(genai.upload_file, file)
+        prompt_n_uploaded_files.append(uploaded_file)
+
+    response = await model.generate_content_async(prompt_n_uploaded_files)
    response_text = get_response_text(response)
-
-    for uploaded_frame in uploaded_frames:
-        await asyncio.to_thread(genai.delete_file, name=uploaded_frame.name)
-
    shutil.rmtree(download_dir, ignore_errors=True)
-    return response_text
+    return response_text, prompt_n_uploaded_files
--- a/app/plugins/ai/models.py
+++ b/app/plugins/ai/models.py
@@ -2,13 +2,9 @@ from functools import wraps

 import google.generativeai as genai

-from app import BOT, Message, extra_config
-
-
-async def init_task():
-    if extra_config.GEMINI_API_KEY:
-        genai.configure(api_key=extra_config.GEMINI_API_KEY)
+from app import BOT, CustomDB, Message, extra_config

+SETTINGS = CustomDB("COMMON_SETTINGS")

 GENERATION_CONFIG = {"temperature": 0.69, "max_output_tokens": 2048}

@@ -19,19 +15,69 @@ SAFETY_SETTINGS = [
    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_ONLY_HIGH"},
 ]

+SYSTEM_INSTRUCTION = (
+    "Answer precisely and in short unless specifically instructed otherwise."
+    "\nWhen asked related to code, do not comment the code and do not explain unless instructed."
+)

 MODEL = genai.GenerativeModel(
-    model_name="models/gemini-1.5-flash",
    generation_config=GENERATION_CONFIG,
    safety_settings=SAFETY_SETTINGS,
+    system_instruction=SYSTEM_INSTRUCTION,
 )


-def run_basic_check(func):
+async def init_task():
+    if extra_config.GEMINI_API_KEY:
+        genai.configure(api_key=extra_config.GEMINI_API_KEY)

-    @wraps(func)
+    model_info = await SETTINGS.find_one({"_id": "gemini_model_info"}) or {}
+    model_name = model_info.get("model_name")
+    if model_name:
+        MODEL._model_name = model_name
+
+
+@BOT.add_cmd(cmd="lmodels")
+async def list_ai_models(bot: BOT, message: Message):
+    """
+    CMD: LIST MODELS
+    INFO: List and change Gemini Models.
+    USAGE: .lmodels
+    """
+    model_list = [
+        model
+        for model in genai.list_models()
+        if "generateContent" in model.supported_generation_methods
+    ]
+
+    mono_names = "".join([f"`{model}`" for model in model_list])
+    update_str = (
+        f"\n\nCurrent Model: {MODEL._model_name}"
+        "\n\nTo change to a different model,"
+        "Reply to this message with the model name."
+    )
+
+    model_reply = await message.reply(mono_names + update_str, del_in=30, block=False)
+    response = await model_reply.get_response(timeout=10)
+
+    if not response:
+        return
+
+    if response.text not in model_list:
+        await response.edit(
+            f"Invalid Model... run <code>{message.trigger}lams</code> again"
+        )
+        return
+
+    await SETTINGS.add_data({"_id": "gemini_model_info", "model_name": response.text})
+    await response.edit(f"{response.text} saved as model.")
+    await response.log()
+    MODEL._model_name = response.text
+
+
+def run_basic_check(function):
+    @wraps(function)
    async def wrapper(bot: BOT, message: Message):
-
        if not extra_config.GEMINI_API_KEY:
            await message.reply(
                "Gemini API KEY not found."
@@ -40,21 +86,19 @@ def run_basic_check(func):
            )
            return

-        if not message.input:
-            await message.reply("Ask a Question.")
+        if not (message.input or message.replied):
+            await message.reply("<code>Ask a Question | Reply to a Message</code>")
            return

        try:
-            await func(bot, message)
+            await function(bot, message)
        except Exception as e:
-
            if "User location is not supported for the API use" in str(e):
                await message.reply(
                    "Your server location doesn't allow gemini yet."
                    "\nIf you are on koyeb change your app region to Washington DC."
                )
                return
-
            raise

    return wrapper
--- a/app/plugins/ai/text_query.py
+++ b/app/plugins/ai/text_query.py
@@ -18,11 +18,11 @@ async def question(bot: BOT, message: Message):
    INFO: Ask a question to Gemini AI.
    USAGE: .ai what is the meaning of life.
    """
-
-    prompt = message.input
+    reply = message.replied
+    reply_text = reply.text if reply else ""
+    prompt = f"{reply_text}\n\n\n{message.input}".strip()

    response = await MODEL.generate_content_async(prompt)
-
    response_text = get_response_text(response)

    if not isinstance(message, Message):
@@ -39,14 +39,14 @@ async def question(bot: BOT, message: Message):
        )


-@bot.add_cmd(cmd="aichat")
+@bot.add_cmd(cmd="aic")
@run_basic_check
 async def ai_chat(bot: BOT, message: Message):
    """
    CMD: AICHAT
    INFO: Have a Conversation with Gemini AI.
    USAGE:
-        .aichat hello
+        .aic hello
        keep replying to AI responses
        After 5 mins of Idle bot will export history and stop chat.
        use .load_history to continue
@@ -55,14 +55,14 @@ async def ai_chat(bot: BOT, message: Message):
    await do_convo(chat=chat, message=message)


-@bot.add_cmd(cmd="load_history")
+@bot.add_cmd(cmd="lh")
@run_basic_check
 async def history_chat(bot: BOT, message: Message):
    """
    CMD: LOAD_HISTORY
    INFO: Load a Conversation with Gemini AI from previous session.
    USAGE:
-        .load_history {question} [reply to history document]
+        .lh {question} [reply to history document]
    """
    reply = message.replied

@@ -73,7 +73,6 @@ async def history_chat(bot: BOT, message: Message):
        return

    resp = await message.reply("<i>Loading History...</i>")
-
    doc = await reply.download(in_memory=True)
    doc.seek(0)

@@ -87,7 +86,6 @@ async def do_convo(chat, message: Message):
    prompt = message.input
    reply_to_id = message.id
    chat_id = message.chat.id
-
    old_convo = CONVO_CACHE.get(message.unique_chat_user_id)

    if old_convo in Convo.CONVO_DICT[chat_id]:
@@ -107,11 +105,8 @@ async def do_convo(chat, message: Message):
        async with convo_obj:
            while True:
                ai_response = await chat.send_message_async(prompt)
-
                ai_response_text = get_response_text(ai_response)
-
                text = f"**GEMINI AI**:\n\n{ai_response_text}"
-
                _, prompt_message = await convo_obj.send_message(
                    text=text,
                    reply_to_id=reply_to_id,
@@ -119,6 +114,7 @@ async def do_convo(chat, message: Message):
                    get_response=True,
                )
                prompt, reply_to_id = prompt_message.text, prompt_message.id
+
    except TimeoutError:
        await export_history(chat, message)