command_recognizer/record_save.py at master · Conscious-AI/command_recognizer · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import pyaudio
import math
import struct
import wave
import time
import os
import sys
from argparse import ArgumentParser

import pandas as pd

sys.path.append('..')
# Audio configs vars
from audio_processing.audio_confs import *


SHORT_NORMALIZE = (1.0/32768.0)
TIMEOUT_LENGTH = 2


def printout(_str):
    print(_str)
    sys.stdout.flush()


class Recorder:

    @staticmethod
    def rms(frame):
        count = len(frame) / S_WIDTH
        format = "%dh" % (count)
        shorts = struct.unpack(format, frame)

        sum_squares = 0.0
        for sample in shorts:
            n = sample * SHORT_NORMALIZE
            sum_squares += n * n
        rms = math.pow(sum_squares / count, 0.5)

        return rms * 1000

    @staticmethod
    def get_file_dir(self, idx, i):
        # Extracting first letter from each word of dir
        words = self.command_dirs[idx].split('_')
        letters = [word[0] for word in words]

        if not os.path.exists(self.root_dir + self.command_dirs[idx]):
            os.makedirs(self.root_dir + self.command_dirs[idx])

        self.filename = self.root_dir + \
            self.command_dirs[idx] + ''.join(letters) + '-{}.wav'.format(i+1)

    def __init__(self, root_dir, csv_file, samples):
        self.p = pyaudio.PyAudio()
        self.stream = self.p.open(format=FORMAT,
                                  channels=CHANNELS,
                                  rate=S_RATE,
                                  input=True,
                                  output=True,
                                  frames_per_buffer=CHUNK)
        self.root_dir = root_dir
        self.samples = samples
        self.command_csv = pd.read_csv(csv_file)
        self.command_dirs = self.command_csv.iloc[:, 0]
        self.command_labels = self.command_csv.iloc[:, 1]

    def record(self):
        printout('Audio detected, recording now ...')
        rec = []
        current = time.time()
        end = time.time() + TIMEOUT_LENGTH

        while current <= end:
            data = self.stream.read(CHUNK)
            if self.rms(data) >= THRESHOLD:
                end = time.time() + TIMEOUT_LENGTH

            current = time.time()
            rec.append(data)
        self.write(b''.join(rec))

    def write(self, recording):
        printout('Saving audio sample ...')
        wf = wave.open(self.filename, 'wb')
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(self.p.get_sample_size(FORMAT))
        wf.setframerate(S_RATE)
        wf.writeframes(recording)
        wf.close()
        printout('Written to file: {}'.format(self.filename))
        printout('Listening again ...')

    def listen(self):
        for idx in range(len(self.command_dirs)):
            printout('Listening for \"{}\" ...\n'.format(
                self.command_labels[idx]))
            for i in range(self.samples):
                while True:
                    input = self.stream.read(CHUNK)
                    rms_val = self.rms(input)
                    if rms_val > THRESHOLD:
                        self.get_file_dir(self, idx, i)
                        self.record()
                        break
        printout('Done.')


parser = ArgumentParser(
    description='Records and saves a stream of audio as wavfile from default input device inside a root directory according to a csv file')

parser.add_argument('-r', '--root', dest='root', type=str, required=False,
                    default='./data/', help='Root Directory to store the audio files')
parser.add_argument('-c', '--csv', dest='csv', type=str, required=False,
                    default='./data/command_labels.csv', help='CSV file contaning the sentences')
parser.add_argument('-s', '--samples', dest='samples', type=int, required=False,
                    default=5, help='Number of audio samples to take per sentence')

args = parser.parse_args()

a = Recorder(args.root, args.csv, args.samples)

a.listen()