From 01b2f8b4584f499af46ea14f02a4f4b529da0df8 Mon Sep 17 00:00:00 2001
From: thedragonsinn <98635854+thedragonsinn@users.noreply.github.com>
Date: Wed, 8 Jan 2025 14:38:20 +0530
Subject: [PATCH] feat `ai`: new lmodels cmd and many changes.
rename stt->ts, load_history->lh, aichat->aic. Now ai commands can get context from replied messages, orcv: can hear audio now.
---
app/plugins/ai/media_query.py | 65 +++++++++++++++++-------------
app/plugins/ai/models.py | 74 ++++++++++++++++++++++++++++-------
app/plugins/ai/text_query.py | 20 ++++------
3 files changed, 104 insertions(+), 55 deletions(-)
diff --git a/app/plugins/ai/media_query.py b/app/plugins/ai/media_query.py
index 389d1c6..d073280 100644
--- a/app/plugins/ai/media_query.py
+++ b/app/plugins/ai/media_query.py
@@ -59,7 +59,7 @@ async def photo_query(bot: BOT, message: Message):
await message_response.edit(ai_response_text)
-@bot.add_cmd(cmd="stt")
+@bot.add_cmd(cmd="ts")
@run_basic_check
async def audio_to_text(bot: BOT, message: Message):
"""
@@ -67,12 +67,17 @@ async def audio_to_text(bot: BOT, message: Message):
INFO: Convert Audio files to text.
USAGE: .stt [reply to audio file] summarise/transcribe the audio file.
"""
- prompt = message.input
+ default_prompt = (
+ "Transcribe the audio file to english alphabets AS IS."
+ "\nTranslate it only if the audio is not in hindi/english."
+ "\nDo not summarise."
+ )
+ prompt = message.input or default_prompt
reply = message.replied
audio = reply.audio or reply.voice
message_response = await message.reply("processing... this may take a while")
- if not (prompt and reply and audio):
+ if not (reply and audio):
await message_response.edit("Reply to an audio file and give a prompt.")
return
@@ -88,17 +93,21 @@ async def video_to_text(bot: BOT, message: Message):
INFO: Convert Video info to text.
USAGE: .ocrv [reply to video file] summarise the video file.
"""
- prompt = message.input
+ default_prompt = "Summarize the file"
+ prompt = message.input or default_prompt
reply = message.replied
message_response = await message.reply("processing... this may take a while")
- if not (prompt and reply and (reply.video or reply.animation)):
+ if not (reply and (reply.video or reply.animation)):
await message_response.edit("Reply to a video and give a prompt.")
return
- ai_response_text = await handle_video(prompt, reply)
+ ai_response_text, uploaded_files = await handle_video(prompt, reply)
await message_response.edit(ai_response_text)
+ for uploaded_frame in uploaded_files:
+ await asyncio.to_thread(genai.delete_file, name=uploaded_frame.name)
+
@bot.add_cmd(cmd="aim")
@run_basic_check
@@ -146,14 +155,14 @@ async def download_file(file_name: str, message: Message) -> tuple[str, str]:
return file_path, download_dir
-async def handle_audio(prompt: str, message: Message):
+async def handle_audio(prompt: str, message: Message, model=MODEL):
audio = message.document or message.audio or message.voice
file_name = getattr(audio, "file_name", "audio.aac")
file_path, download_dir = await download_file(file_name, message)
file_response = genai.upload_file(path=file_path)
- response = await MODEL.generate_content_async([prompt, file_response])
+ response = await model.generate_content_async([prompt, file_response])
response_text = get_response_text(response)
genai.delete_file(name=file_response.name)
@@ -162,15 +171,15 @@ async def handle_audio(prompt: str, message: Message):
return response_text
-async def handle_code(prompt: str, message: Message):
+async def handle_code(prompt: str, message: Message, model=MODEL):
file: BytesIO = await message.download(in_memory=True)
text = file.getvalue().decode("utf-8")
final_prompt = f"{text}\n\n{prompt}"
- response = await MODEL.generate_content_async(final_prompt)
+ response = await model.generate_content_async(final_prompt)
return get_response_text(response)
-async def handle_photo(prompt: str, message: Message):
+async def handle_photo(prompt: str, message: Message, model=MODEL):
file = await message.download(in_memory=True)
mime_type, _ = mimetypes.guess_type(file.name)
@@ -178,34 +187,34 @@ async def handle_photo(prompt: str, message: Message):
mime_type = "image/unknown"
image_blob = glm.Blob(mime_type=mime_type, data=file.getvalue())
- response = await MODEL.generate_content_async([prompt, image_blob])
+ response = await model.generate_content_async([prompt, image_blob])
return get_response_text(response)
-async def handle_video(prompt: str, message: Message):
+async def handle_video(prompt: str, message: Message, model=MODEL) -> tuple[str, list]:
file_name = "v.mp4"
file_path, download_dir = await download_file(file_name, message)
output_path = os.path.join(download_dir, "output_frame_%04d.png")
- ffmpeg_output_error = await run_shell_cmd(
- f'ffmpeg -hide_banner -loglevel error -i {file_path} -vf "fps=1" {output_path}'
+ audio_path = os.path.join(download_dir, "audio.")
+
+ await run_shell_cmd(
+ f'ffmpeg -hide_banner -loglevel error -i "{file_path}" -vf "fps=1" "{output_path}"'
+ f"&&"
+ f'ffmpeg -hide_banner -loglevel error -i "{file_path}" -map 0:a:1 -vn -acodec copy "{audio_path}%(ext)s"'
)
- if ffmpeg_output_error:
- return ffmpeg_output_error
+ prompt_n_uploaded_files = [prompt]
- extracted_frames = glob.glob(f"{download_dir}/*png")
-
- uploaded_frames = []
- for frame in extracted_frames:
+ for frame in glob.glob(f"{download_dir}/*png"):
uploaded_frame = await asyncio.to_thread(genai.upload_file, frame)
- uploaded_frames.append(uploaded_frame)
+ prompt_n_uploaded_files.append(uploaded_frame)
- response = await MODEL.generate_content_async([prompt, *uploaded_frames])
+ for file in glob.glob(f"{audio_path}*"):
+ uploaded_file = await asyncio.to_thread(genai.upload_file, file)
+ prompt_n_uploaded_files.append(uploaded_file)
+
+ response = await model.generate_content_async(prompt_n_uploaded_files)
response_text = get_response_text(response)
-
- for uploaded_frame in uploaded_frames:
- await asyncio.to_thread(genai.delete_file, name=uploaded_frame.name)
-
shutil.rmtree(download_dir, ignore_errors=True)
- return response_text
+ return response_text, prompt_n_uploaded_files
diff --git a/app/plugins/ai/models.py b/app/plugins/ai/models.py
index 46690cc..0326334 100644
--- a/app/plugins/ai/models.py
+++ b/app/plugins/ai/models.py
@@ -2,13 +2,9 @@ from functools import wraps
import google.generativeai as genai
-from app import BOT, Message, extra_config
-
-
-async def init_task():
- if extra_config.GEMINI_API_KEY:
- genai.configure(api_key=extra_config.GEMINI_API_KEY)
+from app import BOT, CustomDB, Message, extra_config
+SETTINGS = CustomDB("COMMON_SETTINGS")
GENERATION_CONFIG = {"temperature": 0.69, "max_output_tokens": 2048}
@@ -19,19 +15,69 @@ SAFETY_SETTINGS = [
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_ONLY_HIGH"},
]
+SYSTEM_INSTRUCTION = (
+ "Answer precisely and in short unless specifically instructed otherwise."
+ "\nWhen asked related to code, do not comment the code and do not explain unless instructed."
+)
MODEL = genai.GenerativeModel(
- model_name="models/gemini-1.5-flash",
generation_config=GENERATION_CONFIG,
safety_settings=SAFETY_SETTINGS,
+ system_instruction=SYSTEM_INSTRUCTION,
)
-def run_basic_check(func):
+async def init_task():
+ if extra_config.GEMINI_API_KEY:
+ genai.configure(api_key=extra_config.GEMINI_API_KEY)
- @wraps(func)
+ model_info = await SETTINGS.find_one({"_id": "gemini_model_info"}) or {}
+ model_name = model_info.get("model_name")
+ if model_name:
+ MODEL._model_name = model_name
+
+
+@BOT.add_cmd(cmd="lmodels")
+async def list_ai_models(bot: BOT, message: Message):
+ """
+ CMD: LIST MODELS
+ INFO: List and change Gemini Models.
+ USAGE: .lmodels
+ """
+ model_list = [
+ model
+ for model in genai.list_models()
+ if "generateContent" in model.supported_generation_methods
+ ]
+
+ mono_names = "".join([f"`{model}`" for model in model_list])
+ update_str = (
+ f"\n\nCurrent Model: {MODEL._model_name}"
+ "\n\nTo change to a different model,"
+ "Reply to this message with the model name."
+ )
+
+ model_reply = await message.reply(mono_names + update_str, del_in=30, block=False)
+ response = await model_reply.get_response(timeout=10)
+
+ if not response:
+ return
+
+ if response.text not in model_list:
+ await response.edit(
+ f"Invalid Model... run {message.trigger}lams again"
+ )
+ return
+
+ await SETTINGS.add_data({"_id": "gemini_model_info", "model_name": response.text})
+ await response.edit(f"{response.text} saved as model.")
+ await response.log()
+ MODEL._model_name = response.text
+
+
+def run_basic_check(function):
+ @wraps(function)
async def wrapper(bot: BOT, message: Message):
-
if not extra_config.GEMINI_API_KEY:
await message.reply(
"Gemini API KEY not found."
@@ -40,21 +86,19 @@ def run_basic_check(func):
)
return
- if not message.input:
- await message.reply("Ask a Question.")
+ if not (message.input or message.replied):
+ await message.reply("Ask a Question | Reply to a Message")
return
try:
- await func(bot, message)
+ await function(bot, message)
except Exception as e:
-
if "User location is not supported for the API use" in str(e):
await message.reply(
"Your server location doesn't allow gemini yet."
"\nIf you are on koyeb change your app region to Washington DC."
)
return
-
raise
return wrapper
diff --git a/app/plugins/ai/text_query.py b/app/plugins/ai/text_query.py
index 1b9bc88..fa18ecc 100644
--- a/app/plugins/ai/text_query.py
+++ b/app/plugins/ai/text_query.py
@@ -18,11 +18,11 @@ async def question(bot: BOT, message: Message):
INFO: Ask a question to Gemini AI.
USAGE: .ai what is the meaning of life.
"""
-
- prompt = message.input
+ reply = message.replied
+ reply_text = reply.text if reply else ""
+ prompt = f"{reply_text}\n\n\n{message.input}".strip()
response = await MODEL.generate_content_async(prompt)
-
response_text = get_response_text(response)
if not isinstance(message, Message):
@@ -39,14 +39,14 @@ async def question(bot: BOT, message: Message):
)
-@bot.add_cmd(cmd="aichat")
+@bot.add_cmd(cmd="aic")
@run_basic_check
async def ai_chat(bot: BOT, message: Message):
"""
CMD: AICHAT
INFO: Have a Conversation with Gemini AI.
USAGE:
- .aichat hello
+ .aic hello
keep replying to AI responses
After 5 mins of Idle bot will export history and stop chat.
use .load_history to continue
@@ -55,14 +55,14 @@ async def ai_chat(bot: BOT, message: Message):
await do_convo(chat=chat, message=message)
-@bot.add_cmd(cmd="load_history")
+@bot.add_cmd(cmd="lh")
@run_basic_check
async def history_chat(bot: BOT, message: Message):
"""
CMD: LOAD_HISTORY
INFO: Load a Conversation with Gemini AI from previous session.
USAGE:
- .load_history {question} [reply to history document]
+ .lh {question} [reply to history document]
"""
reply = message.replied
@@ -73,7 +73,6 @@ async def history_chat(bot: BOT, message: Message):
return
resp = await message.reply("Loading History...")
-
doc = await reply.download(in_memory=True)
doc.seek(0)
@@ -87,7 +86,6 @@ async def do_convo(chat, message: Message):
prompt = message.input
reply_to_id = message.id
chat_id = message.chat.id
-
old_convo = CONVO_CACHE.get(message.unique_chat_user_id)
if old_convo in Convo.CONVO_DICT[chat_id]:
@@ -107,11 +105,8 @@ async def do_convo(chat, message: Message):
async with convo_obj:
while True:
ai_response = await chat.send_message_async(prompt)
-
ai_response_text = get_response_text(ai_response)
-
text = f"**GEMINI AI**:\n\n{ai_response_text}"
-
_, prompt_message = await convo_obj.send_message(
text=text,
reply_to_id=reply_to_id,
@@ -119,6 +114,7 @@ async def do_convo(chat, message: Message):
get_response=True,
)
prompt, reply_to_id = prompt_message.text, prompt_message.id
+
except TimeoutError:
await export_history(chat, message)