Skip to content

Commit

Permalink
Merge pull request #175 from linto-ai/features/explicit_vad
Browse files Browse the repository at this point in the history
Support explicit start/end timestamps of speech activity detection (VAD) given by the user + add vad segments in the output
  • Loading branch information
Jeronymous authored Mar 3, 2024
2 parents 79cc85e + f2f17bd commit bdee5d3
Show file tree
Hide file tree
Showing 64 changed files with 9,053 additions and 8,363 deletions.
424 changes: 212 additions & 212 deletions tests/expected/corner_cases/accurate.tiny_apollo11.mp3.words.json

Large diffs are not rendered by default.

152 changes: 76 additions & 76 deletions tests/expected/corner_cases/issue24_empty.wav.words.json

Large diffs are not rendered by default.

40 changes: 20 additions & 20 deletions tests/expected/corner_cases/large-v2.accurate_gloria.mp3.words.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
50664
],
"temperature": 0.0,
"avg_logprob": -0.3579153639546941,
"avg_logprob": -0.35854845368460325,
"compression_ratio": 1.425414364640884,
"no_speech_prob": 0.24429768323898315,
"no_speech_prob": 0.24429647624492645,
"confidence": 0.452,
"words": [
{
Expand All @@ -37,7 +37,7 @@
"text": "my",
"start": 1.94,
"end": 2.7,
"confidence": 0.368
"confidence": 0.367
},
{
"text": "glorious",
Expand Down Expand Up @@ -92,9 +92,9 @@
50814
],
"temperature": 0.0,
"avg_logprob": -0.3579153639546941,
"avg_logprob": -0.35854845368460325,
"compression_ratio": 1.425414364640884,
"no_speech_prob": 0.24429768323898315,
"no_speech_prob": 0.24429647624492645,
"confidence": 0.68,
"words": [
{
Expand All @@ -113,7 +113,7 @@
"text": "okay.",
"start": 7.02,
"end": 7.64,
"confidence": 0.605
"confidence": 0.604
},
{
"text": "I",
Expand Down Expand Up @@ -160,9 +160,9 @@
50964
],
"temperature": 0.0,
"avg_logprob": -0.3579153639546941,
"avg_logprob": -0.35854845368460325,
"compression_ratio": 1.425414364640884,
"no_speech_prob": 0.24429768323898315,
"no_speech_prob": 0.24429647624492645,
"confidence": 0.571,
"words": [
{
Expand Down Expand Up @@ -261,16 +261,16 @@
51014
],
"temperature": 0.0,
"avg_logprob": -0.3579153639546941,
"avg_logprob": -0.35854845368460325,
"compression_ratio": 1.425414364640884,
"no_speech_prob": 0.24429768323898315,
"no_speech_prob": 0.24429647624492645,
"confidence": 0.333,
"words": [
{
"text": "Of",
"start": 11.5,
"end": 12.1,
"confidence": 0.123
"confidence": 0.124
},
{
"text": "course",
Expand Down Expand Up @@ -312,10 +312,10 @@
51114
],
"temperature": 0.0,
"avg_logprob": -0.3579153639546941,
"avg_logprob": -0.35854845368460325,
"compression_ratio": 1.425414364640884,
"no_speech_prob": 0.24429768323898315,
"confidence": 0.649,
"no_speech_prob": 0.24429647624492645,
"confidence": 0.65,
"words": [
{
"text": "No,",
Expand Down Expand Up @@ -381,9 +381,9 @@
51214
],
"temperature": 0.0,
"avg_logprob": -0.3579153639546941,
"avg_logprob": -0.35854845368460325,
"compression_ratio": 1.425414364640884,
"no_speech_prob": 0.24429768323898315,
"no_speech_prob": 0.24429647624492645,
"confidence": 0.69,
"words": [
{
Expand Down Expand Up @@ -477,9 +477,9 @@
51314
],
"temperature": 0.0,
"avg_logprob": -0.3579153639546941,
"avg_logprob": -0.35854845368460325,
"compression_ratio": 1.425414364640884,
"no_speech_prob": 0.24429768323898315,
"no_speech_prob": 0.24429647624492645,
"confidence": 0.529,
"words": [
{
Expand Down Expand Up @@ -522,9 +522,9 @@
51364
],
"temperature": 0.0,
"avg_logprob": -0.3579153639546941,
"avg_logprob": -0.35854845368460325,
"compression_ratio": 1.425414364640884,
"no_speech_prob": 0.24429768323898315,
"no_speech_prob": 0.24429647624492645,
"confidence": 0.02,
"words": [
{
Expand Down
Loading

0 comments on commit bdee5d3

Please sign in to comment.