ARTICLE AD BOX
import webbrowser
import pyttsx4
import sounddevice as sd
import soundfile as sf
import pvporcupine
import numpy as np
import vosk
import json
import subprocess
PORCUPINE_ACCESS_KEY = "<access_key>"
porcupine = pvporcupine.create(
access_key=PORCUPINE_ACCESS_KEY,
keyword_paths=['R:\\Python\\Mega Projects\\The Eagle AI\\Hey-Eagle_en_windows_v3_0_0.ppn']
)
engine = pyttsx4.init()
voices = engine.getProperty('voices')
for voice in voices:
if "Male" in voice.id or "male" in voice.id:
engine.setProperty('voice', voice.id)
break
def speak(text, filename="output.wav"):
# Save speech to file
engine.save_to_file(text, filename)
engine.runAndWait()
# Read saved audio
data, fs = sf.read(filename)
# Play audio
sd.play(data, fs)
sd.wait()
def process_command(c):
if "opengoogle" in c.lower():
webbrowser.open("https://www.google.com")
if "openlinkedin" in c.lower():
webbrowser.open("https://www.linkedin.com")
if "openyoutube" in c.lower():
webbrowser.open("https://www.youtube.com")
if ("opengithub") or ("open gitub") in c.lower():
webbrowser.open("https://www.github.com")
if "openwhatsapp" in c.lower():
subprocess.run(['explorer.exe', 'shell:AppsFolder\5319275A.WhatsAppDesktop_cv1g1gvanyjgm!App'])
if "openreddit" in c.lower():
webbrowser.open("https://www.reddit.com")
if __name__ == "__main__":
speak("Initializing Eagle...")
speak("Initialized all the tools, libraries, modules and dependencies.")
try:
with sd.InputStream(channels=1, samplerate=porcupine.sample_rate, dtype='int16', blocksize=porcupine.frame_length) as stream:
print("Listening...")
while True:
pcm = stream.read(porcupine.frame_length)[0]
pcm = np.squeeze(pcm) # convert to 1D array
keyword_index = porcupine.process(pcm)
if keyword_index >= 0:
print("Wake word detected!")
speak("In service sir!")
# Record 5 seconds of audio after wake word
chunk_size = 1024
threshold = 500 # amplitude threshold to detect speech
silence_limit = 20 # how many silent chunks before stopping
silent_chunks = 0
recording = []
print("Recording command...")
with sd.InputStream(channels=1, samplerate=porcupine.sample_rate, dtype='int16', blocksize=chunk_size) as rec_stream:
while True:
data = rec_stream.read(chunk_size)[0]
data = np.squeeze(data)
recording.append(data)
if np.max(np.abs(data)) > threshold:
silent_chunks = 0
else:
silent_chunks += 1
if silent_chunks > silence_limit:
break
# Save recorded audio
recording = np.concatenate(recording)
sf.write("command.wav", recording, porcupine.sample_rate)
print("Command recorded: saved as command.wav")
# --------- Vosk Speech-to-Text ---------
model = vosk.Model("R:\\Python\\Mega Projects\\The Eagle AI\\vosk-model-small-en-us-0.15") # replace with your model path
rec = vosk.KaldiRecognizer(model, porcupine.sample_rate)
with sf.SoundFile("command.wav") as f:
while True:
data = f.read(4000, dtype='int16')
if len(data) == 0:
break
# Convert NumPy int16 array to bytes
data_bytes = data.tobytes()
if not rec.AcceptWaveform(data_bytes):
pass
else:
text_command = json.loads(rec.Result())["text"]
print("Recognized Command:", text_command)
break
try:
# Get final result from Vosk
result_json = rec.FinalResult() # use FinalResult() after finishing reading all audio
text_command = json.loads(result_json)["text"]
if text_command.strip():
text_command.strip().lower() # ensure it's not empty
print("Recognized Command:", text_command)
process_command(text_command)
except Exception as e:
print("Error processing the command:", e)
except KeyboardInterrupt:
print("Eagle shutting down...")
porcupine.delete()
I am trying to make a voice based AI app named Eagle AI using gemini 2.5 flash model(not used yet) which can do automation tasks like opening apps, websites and all in my pc.
But the problem I am facing is that the process_command(text_command) function is not running...like i called it but still it is not doing the automation tasks.
I'll be glad if someone helps me resolve the problem...
