From b1c1a3c2ad17c72da0665a07b9e0bd230c7d4095 Mon Sep 17 00:00:00 2001 From: uneeb Date: Sat, 9 May 2026 23:06:27 +0530 Subject: [PATCH 1/6] Add intelligent CC suggestion pipeline --- README.md | 122 ++++++ detect.py | 20 + requirements.txt | 8 + src/cc_detector/__init__.py | 16 + src/cc_detector/__main__.py | 6 + .../__pycache__/__init__.cpython-310.pyc | Bin 0 -> 880 bytes .../__pycache__/audio.cpython-310.pyc | Bin 0 -> 2637 bytes .../__pycache__/cli.cpython-310.pyc | Bin 0 -> 5446 bytes .../__pycache__/events.cpython-310.pyc | Bin 0 -> 2251 bytes .../__pycache__/export.cpython-310.pyc | Bin 0 -> 4109 bytes .../__pycache__/labels.cpython-310.pyc | Bin 0 -> 6486 bytes .../__pycache__/vad.cpython-310.pyc | Bin 0 -> 1807 bytes .../__pycache__/yamnet.cpython-310.pyc | Bin 0 -> 10028 bytes src/cc_detector/audio.py | 80 ++++ src/cc_detector/cli.py | 140 +++++++ src/cc_detector/events.py | 64 +++ src/cc_detector/export.py | 120 ++++++ src/cc_detector/labels.py | 207 ++++++++++ src/cc_detector/spectral.py | 7 + src/cc_detector/vad.py | 53 +++ src/cc_detector/yamnet.py | 377 ++++++++++++++++++ 21 files changed, 1220 insertions(+) create mode 100644 README.md create mode 100644 detect.py create mode 100644 requirements.txt create mode 100644 src/cc_detector/__init__.py create mode 100644 src/cc_detector/__main__.py create mode 100644 src/cc_detector/__pycache__/__init__.cpython-310.pyc create mode 100644 src/cc_detector/__pycache__/audio.cpython-310.pyc create mode 100644 src/cc_detector/__pycache__/cli.cpython-310.pyc create mode 100644 src/cc_detector/__pycache__/events.cpython-310.pyc create mode 100644 src/cc_detector/__pycache__/export.cpython-310.pyc create mode 100644 src/cc_detector/__pycache__/labels.cpython-310.pyc create mode 100644 src/cc_detector/__pycache__/vad.cpython-310.pyc create mode 100644 src/cc_detector/__pycache__/yamnet.cpython-310.pyc create mode 100644 src/cc_detector/audio.py create mode 100644 src/cc_detector/cli.py create mode 100644 src/cc_detector/events.py create mode 100644 src/cc_detector/export.py create mode 100644 src/cc_detector/labels.py create mode 100644 src/cc_detector/spectral.py create mode 100644 src/cc_detector/vad.py create mode 100644 src/cc_detector/yamnet.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..1c0c7d0 --- /dev/null +++ b/README.md @@ -0,0 +1,122 @@ +# PR: Intelligent CC Suggestion Tool — Module 1 Complete + +## Summary + +This PR delivers a fully working **Module 1** (Sound Event Detection → SRT/SLS output) and lays the architectural groundwork for Module 2 (Visual Reaction Detection). The pipeline accepts any video or audio file and produces closed-caption suggestions for meaningful non-speech audio events — without over-captioning ambient sounds. + +**What this PR includes:** +- Full YAMNet-based detection pipeline with a transient-aware 3-path filter (not just a confidence threshold) +- English + Hindi SRT/SLS/JSON/CSV export — no translation API, fully offline +- Silero VAD speech suppression so speech frames never become false CC events +- librosa onset pass to catch short transients (<0.2s) YAMNet's window misses +- Architectural groundwork for Module 2 visual reaction scoring + +--- + +## Pipeline Architecture + +``` +INPUT VIDEO + │ + ├──▶ AUDIO EXTRACTION (imageio-ffmpeg, no system install needed) + │ │ + │ ┌──────┴──────┐ + │ │ Silero VAD │──▶ speech intervals (suppressed from detection) + │ └─────────────┘ + │ │ + │ ┌──────▼──────────────────────────────────┐ + │ │ YAMNet · RMS gate · Blocklist │ + │ │ │ + │ │ Transient? ──YES──▶ accept immediately │ + │ │ │ (dog bark, gunshot,│ + │ │ NO door slam, glass…) │ + │ │ ▼ │ + │ │ Consensus voting (2/3 frames) │ + │ │ + onset check (engine, rain, crowd) │ + │ └──────────────────┬──────────────────────┘ + │ │ + │ librosa onset pass (catches <0.2s events) + │ │ + │ Merge · Deduplicate · Sort + │ + └──▶ SRT (EN + HI) · SLS · JSON · CSV +``` + +--- + +## Run + +```bash +python detect.py --input video.mp4 --srt outputs/cc_en.srt --srt-hi outputs/cc_hi.srt +``` + +📎 **Colab links:** https://colab.research.google.com/drive/1aAbBrZBw1xg8ASqS98lyCewVWRSZb_Bj?usp=sharing, +https://colab.research.google.com/drive/15kpMJkWYWQO0sBoJZhYFMqcRBbLLzVMy?usp=sharing + + +--- + +## Research: Benchmark Across 5 Model Families + +Before settling on YAMNet as the production solution, we benchmarked five model families. Here's what we found. + +--- + +### WAV2CLIP + CLAP — Not viable + +Both models embed audio into CLIP/text space and score against text prompts via cosine similarity. In theory, free-form labels; in practice: + +- **CLAP (HTSAT-base)** had repeated checkpoint/architecture mismatches — `laion_clap`'s `load_ckpt()` silently builds a different model width depending on `enable_fusion`, causing a `RuntimeError` on every load attempt across three configurations +- **WAV2CLIP** loaded but produced inconsistent, low-confidence labelling — it lives in CLIP's *visual* embedding space, which wasn't built for diverse audio +- **Verdict:** The cosine-similarity approach is brittle without a dedicated audio backbone. Not worth pursuing further. + +--- + +### PANNs CNN14 — Better mAP, but wrong fit for CC + +PANNs CNN14 (mAP 0.385 vs YAMNet's 0.306) is technically a stronger AudioSet model. A fair benchmark was run with blocklist off and thresholds matched to YAMNet sensitivity. + +**The problem:** PANNs is trained on AudioSet's full 527-class hierarchy including very broad meta-classes — `"Music"`, `"Animal"`, `"Sound"`. On a real video, over 1500 frames fired on these broad labels. They're not wrong, but they're not CC-worthy. With blocklist on, too many real events get suppressed as collateral; with it off, the output is noisy. + +PANNs' higher mAP comes from scoring those broad categories well. For CC specifically — where you want narrow, specific, actionable events — the broad-label training is a liability, not an asset. **YAMNet's narrower 521-class set, which looks like a weakness on paper, is actually an advantage here.** + +--- + +### Qwen2-Audio-7B — Most promising, fine-tune path forward + +Qwen2-Audio is a 7B audio-language model (Whisper-large-v2 encoder + LLM). Instead of cosine similarity or fixed class indices, it reasons about audio in natural language and returns structured JSON. + +**What stood out:** +- Contextual descriptions, not bare labels: `"glass breaking, likely a fight scene"` — directly useful for CC editors reviewing output +- Native Hindi/multilingual support — no separate translation step needed +- Zero-shot on any category; the label bank is a prompt, not a fixed classifier +- Self-reported confidence calibrated better than embedding-similarity scores + +**Limitation:** 7B params needs ~14GB VRAM at full precision; we ran 4-bit quantized on a T4 (fits in 15GB). Inference is ~3–5× slower than YAMNet. + +**Fine-tuning is the path forward.** Qwen2-Audio can be adapted to Indian content without starting from scratch: +1. Start from AudioSet classes YAMNet is trained on as the base — strong prior already exists +2. Augment with clips for underrepresented India-specific sounds: dhol, shehnai, auto-rickshaw horn, switch/click sounds, crowd chanting +3. Map new classes to existing AudioSet parents where possible (dhol → `Drum`, shehnai → `Wind instrument`) so existing weights transfer +4. Fine-tune only the output mapping / last few layers — audio encoder is already strong +5. A ~1000-clip augmented dataset fine-tunes in 2–3 hours on a T4 + +--- + +### Full Comparison Table + +| | YAMNet | PANNs CNN14 | CLAP | WAV2CLIP | Qwen2-Audio | +|---|---|---|---|---|---| +| AudioSet mAP | 0.306 | 0.385 | ~0.47 | ~0.40 | LLM-based | +| Parameters | 3.7M | 81M | 87M | ~60M | 7B | +| Label type | Fixed 521 | Fixed 527 | Free text | Free text | Free text + reasoning | +| Hindi support | manual map | manual map | via prompt | via prompt | native | +| India-specific sounds | poor | poor | moderate | poor | best (zero-shot) | +| False positive control | gates + blocklist | blocklist too aggressive | mAP ceiling | inconsistent | LLM reasoning | +| Speed | fastest | fast | fast | moderate | slow | +| Offline | ✅ | ✅ | ✅ | ✅ | ✅ (quantized) | +| **Verdict** | ✅ **production** | ❌ noisy for CC | ❌ load errors | ❌ low quality | 🔬 fine-tune target | + + + +cc @abinash-sketch @keerthiseelan-planetread \ No newline at end of file diff --git a/detect.py b/detect.py new file mode 100644 index 0000000..f0f9350 --- /dev/null +++ b/detect.py @@ -0,0 +1,20 @@ +""" +Root-level entry point. + + python detect.py --input video.mp4 [options] + +This simply re-exports the CLI main() so the tool can be run from +the project root without installing the package. +""" +from __future__ import annotations + +import sys +from pathlib import Path + +# Allow running from project root without pip install +sys.path.insert(0, str(Path(__file__).resolve().parent / "src")) + +from cc_detector.cli import main + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4fdf112 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +tensorflow>=2.13.0 +tensorflow-hub>=0.14.0 +torch>=2.1.0 +torchaudio>=2.1.0 +soundfile>=0.13.1 +librosa>=0.10.0 +numpy>=1.26,<2.0 +imageio-ffmpeg>=0.6.0 diff --git a/src/cc_detector/__init__.py b/src/cc_detector/__init__.py new file mode 100644 index 0000000..846edca --- /dev/null +++ b/src/cc_detector/__init__.py @@ -0,0 +1,16 @@ +""" +cc_detector — Intelligent Closed Caption Suggestion Tool. +Module 1 MVP: YAMNet-based non-speech sound event detection. + +Improvements over baseline YAMNet approaches: + - 5-gate filtering pipeline (speech, RMS, spectral, harmonic, blocklist) + - Top-K consensus voting across frames before accepting an event + - Temporal smoothing with a sliding-window majority vote + - Librosa spectral harmonic gating suppresses tonal music artefacts + - Librosa onset detection catches short transients (<0.2s) YAMNet misses + - Dual SRT output: English + Hindi with tuple-based keyword matching + - JSON + CSV + SRT export with per-event metadata debug fields +""" + +__version__ = "0.2.0" +__author__ = "Govind Gupta" diff --git a/src/cc_detector/__main__.py b/src/cc_detector/__main__.py new file mode 100644 index 0000000..1b5d758 --- /dev/null +++ b/src/cc_detector/__main__.py @@ -0,0 +1,6 @@ +from __future__ import annotations +from .cli import main +import sys + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/cc_detector/__pycache__/__init__.cpython-310.pyc b/src/cc_detector/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..70b44fd663583d2a0822caf143daebe686fe55e7 GIT binary patch literal 880 zcmY*Y&2H2%5YDy>RDmKMVUAs~*)0ezRmG1Almc5&x2i-hk=L0`OcUF($GdD#+_~}+ zya*4`BfJF%#5ieDu@uW=d%kbx^B1$ZnI=45Wq;IG%0$g zy<+WjtyrHXgMRqRNvV{1frOHbgP&i%!GR{MYgM8q*sG1l0``O@Wi(7fS)y;3$Hvs7 zbYhB7BRqh~$%h^I_-{9&-+vj)o&4M{$KUflngY}DBd0rTq=NGs0oNeOSJ{+WZ zE&_3jdHy|deYSWz{eC*GOoQW~5ocp*t*Gmtzq`@<3t{OYwg3PC literal 0 HcmV?d00001 diff --git a/src/cc_detector/__pycache__/audio.cpython-310.pyc b/src/cc_detector/__pycache__/audio.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..666da9a7a3a241567f60b297cc37075713f39a60 GIT binary patch literal 2637 zcmZ`*-ESPX5hwTKZf|uu%Z?MWMlba;zw1wI zwQkKmt9R>m47v?FhTSRVcbmNSWcn%Uw)m-S#A<)Vk8pQ})%j@_EF)`cSlbzE3zyLi z#HLvDbHtkbwMTCEEPI=sW@kS4y03%w8aoTx8|)H0$IgRxj=jS!vbR7x&)iMaCe=S6 z_;96SNkRC55;RsxkrSFTk`|ODdA}2e_av7j$!MP^#ayqKmAwC+BwJO^QqIU$l2b7t zUwrmQk{3h{q~aM_TQjSO;Gb5B;H(p_q$y#=elC^ZG$UETcq+-|PgZWNb2Upglaz~s z++VpqOVVUZ6q2?fppwUmEc}A(emI0JdC>`4x1;2d*P(j}$X>b!GTXJXu(Dh_@;ligEb=@1d@CEadZHNeTypgQVtlAiZQi?c z=k~_k)$7syo7Y!wU#%Or;E~40*Ag|dyaKZ5_d)K{J#f`a;j!N-%1WN}uyXJE&D*gF z{p5vV@_|9n3@99RF<{7H7%<~NZd@JS;w+)7LKI@Sv_TUIr@g<;bCMbR$$S-pP@BU^ z(nyacsqCz`Jsm_*PBR`wI*cMX_9``cGm1W~XgYaPk0MsYQS=OnHz5=&j+W=Qi;T}# zIpSju zHR2~myga2>Z?IqfGxN}jcR|ykO;lvQS=Wn7MF5i61n}ZAhcgYg`BFIqWrTOo4hB8| zCV31jq8#QjPq`!X2s5A6Se*sRRUXV5ECf67L%(_GD1WCm0_?zV>alz1j@+F(Ywo(@ zeHE~2IKRf95o`U$*h6-T&HTZ+fXZL8)91kN@9_u&-p*A2h5$kW*Xo!FPF5cIXyrbE z63KuY2E-mGd5O9nR4?#^*=o5$ZS!yO#rLsA42a@F5&#$j>QE#Bb^x38^9>&95Mtpk z$*YPWKvM?gm4H{9H{WeLx?WPjb0u}a4-%=OVt44w4d70O?%ZAug2oyzm{EYWr(MVh z>}$oe1TgXll2leod&-cZ!Qt@qgv!jjAWnGe(D_xE{Qc|W#?dNRjIO6`g&*WtJpVqnEdotVl5 zjE`M=;ynmOTn3@tIAf-EWRK=YW#{KxnZEB{GtgrDLk^{2?-mDOOyyCXbuFQP+0qzXpP zBiu7gSRek)X85!KA&MC7JdzPIK+# zrFQ8Spd_OQ+JzjnJ+W+7`8LI{R{*DSy89g{m`(tEpBGW@$|B?lNyDV)Yayh)1#k#p z6zgCDkCruE(1oFFamiR~W6W8r0iGr8@pEIF#BsvX_yHFScIC_d zZF_+}6-6ihB1Dm8Z{6Iea+PGw<)Nu3IagChuL(lwLZ~hPdtb*&C>o4u(gra;$%;05!d0$af|F~q~k}~SbE#{+`hQxuVL!31`n!!i}TuQRjzBza(u{> zj0eypN(vsdh5LNM5{;H6d|o~$8to<)`Y~~$Moa5oq;neg0v+x}aDj4L#ldMPHKsq( zyGlp7tuuoqSTZnJiltfRQBsYi*}#BoXhUVWM|w||m$Kn4vJojOY>{n~ve7NF?HElN z7;Ee`e*86lN049>Y?4hqGJ3P`gk%@nJs{Zw$zJxxfW!=v>`k_BK(Zfg4zRZdBnQ#v z(10e>;EMzN3_Hw@Jc9Ad-6^qhLt(RQF4hAEIKLbX)oRuFNIcPYvB&@2WrWD_5`0t}Lz0{(R-i z^6bLO^`agbE#WnyA;$fhuv=uq(Fp9nX$5#2sx`;*{V-K3*>0;Hgj&hI78w^WEiHyq zmq=ntt2ObyNTW6Dw!>kqX~FNdVe>39g8MBVrJ6jj)+`YjtK4aY$x?|-Hr(#D0}SHN zN?rXj35%MT#6CnR#_Lww3Bp9F1e*>=Uu#P;RN;jPcl4{UncQtSw!c~?d0JMNR_$C|OAh8Qw#ZElJky1%C{nBQS#xKVH@WDUAhiAh*E7(8G zWHzjTZy9KAey$h}K_f02ywtE-J!QT>8=99%hAu7B=g^4x13l-(6uy5atIf?mW{-hRIy7M9qUz3kDZueK<>A4Mdxh(}JUfT~gOuj#s-= zlBz@8YH^oEiKTPr7niWjCdP`lc?wi96{Y*$EgIhH-lAPst@R79O#VYy<;A|yM>&hJ zD!NL4C`julQFlBmh;;Z;D5SQ2uZS7QpP!mt^_qOP?Q(u=w$^G{jx$?3-UxzV7RGqr zYPvj_^+k=i{(C-KbL?{Kew44?YTFL0O8+Yg7|y?lhoNe!rWtzj`EXiOlP|}#F_nG= z`O9h~`>AI^E2zWjsA{O<2!;~Vc+|*!_b-}d6t{HcGlMXfeee=bOGQU)`KzMc8c_K;`AC8o24gGk=RC_$cL z`$L6lE+`x7hT7FT+U?9-_(kU|!P%OK_tv8k9xpK0-3P?W8v`>i0I z5&!H-XZmeF1qw6p+rT((dS3CrU&lJ>_ZjX_6?ef&2@55%1`BtKQ_|35}$a-m{hs|1{-LJ z(RTNCa8Ix7^LzU{+B5aLsH`adI`Ay3)*a7ed2KcwYAAF5J z1dBNspCM|eAFJXUXnBa}eoN3ZkpHPCpLx|IUR5XkSQ7+arU4UXpfTV9E6K7f%V~lK zlAmWsQBJdCz4ACa0siFIBfS;8Ez8+Xn&}(b1?A@K=UR8T3kb!gJHzqVosrvf>>c#_ z8K`+lodR_Uca0s1tG?lAXk=0LE11v3mDAwhM6O^l~_LWAWIGH?*f?dvD;gC1g$yyf`|Un~|8D^U2S6w>s*p{Saq!vVn#j*m3gHvVJPzWQ zz)d&D$t^vB**#UU%zRhtXiwCqipa#}6YZ&T9R_nZb5~<3l+>Q9N0tF=5`N3E0~7&J zO_XrFbuJ**%5#wpEDYFVi9q`B_;DB$Mw!!q`utu?oOz!VAxEh_FceKotHuv$UHc)c z=3a&qP0M!6rxA&Osn3)*`-B;Z8_Mg}TDa%N{Os|Y=3o8BqdoqIxG9Zu`6{!#08#!ggUZvH`xV_ip1A;?7QY;^p2hK5+Ty~s zOIMc7g)5goUIGUl%|*iB{-%DWDOE@27+8+E4xnHLHc%Id_40h3y1c#fsMEat{Q-lmd{q)hJk+A?{K-I}8d67V(AL%QihMw<)S<{q*V+YzQ_R=)J{qn*8 z^FK_Sc^h^cYCcZXH%;&-%g5@)wjWjCB9LA0_>O zaUnzs)+s3KhNNAu+xMa&34W_F_*F!i>N0iVg#g(cp9Rh@U0L{O>Cy@^G_ou~&NRwW zLKA;wh>@(};uuAeMjxgIIy_|W&Fp6|k&Yxpj8N?`GN39sgg66=@~`{>QXO%MhIyA7 z=3Q^V6NvYy;tV}L9@kd5vAe!;R%D) zsPOu?7o4^92NaRu#-glrcd?2(79uz;H^m*Rg={`j?MUNmug52YA`gW||Ic{j)U37x zP=8FF#51YoRD>xl{UVo0BS}aj4HzXE=q#lXI#I^vFDWTa&HZb1Xc%Lp(KaVjj2-F@ zReP}eZ^nbdfqiQFK>@XUQED%8MiS%F4`bs;T%4ASLWiB;liGP%Yl1djZr+;l!q9Rr z3|)Ky#gXDu9wN38tuFUjN>++Hqg1uZyjryyWvkVC8(*n-wJIo06#MD1Opj0Sh*GrE zj(sb(>vVvRZ4!Yx=NgM51Cqh*GUL@n>O}++kGDWHn ooCxKALc)gh>DVsLP(<-5#W!#`X`qHQwlt?sYLl|e>V*UU1|;NA(f|Me literal 0 HcmV?d00001 diff --git a/src/cc_detector/__pycache__/events.cpython-310.pyc b/src/cc_detector/__pycache__/events.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a6bce35301e7b505ce42e9fd50a8ffd11aa3dbc5 GIT binary patch literal 2251 zcmZ`)OK%)S5T2gt-JP9%IB~4RNk}pTgt8*p4GAD*8H8jW5mJoACLC6ghR$@aJ!oEZ z_a?DrOGxW0zaa1fa^uMV;MP|zm;;w^$^lW;vum$OGNZ0n_f&UPebrNRI)MYv{Tu%# zzk&1zDvJ*VN60yGjcfLZb~wDkn>u{ep59p{N=$8%; zc4YvHh%~UER#~CNiM=fg;JDKC~yT|Fcq@k*m*w;@4;$aL_CT0 zwO*JBy@nkkil2OuQ#uU8^dp}hb-se@SA(# zW0`0n;i!k0v>q`nmgj|LIw^A1^Gp+iw9%N_I1wo~b*6A{(@-KVa<0G*8)#FPLQiCF zJf&q!(A(^(1NUKUU=<#KpdC&2AonZIluXG!(e579Wa>U}_uYLDG>>~$qce?It+r40 zTuz@-UfZKHUbDOo-#4DpUm8#yfA{j8J8O6Et__F%EXzWjWI}0{l_8u9vffYr^c*Mo z@$V0vX$o)M&nF( z!I45NpK`kd->XEff*Qj(@f;!gS1=1_L;%(9eaNTqxVv6)Z^kL`8rbVI-k5QJHs1su zj3{rxnl{LtQO)}4g1j`U^HX3yz3zyH@cHsjWYpwmz_J3CfQS4nKX*W(FIu3z$yfP# ztF^6mfnT&*hhOEF_+{v)F24fxdCM%oSvDaAQ%l)Hk(wZ8CBiNe`LVp6$Of(npoUUM zSWV-LoL5o+2plI|=73B2RxSR;$~ zWe_yfWY75?hr8-bUC4sFxdjvULBM6~iM$M&yaePKF;t0kY@t<=b6_?;pGe!6J#_TWg0eq?O)YPSf zk^tUCPD%Zr-Q|Tu&ptzJAhvD)&1LM@xqKVtCF4g?jx83MAd29soTMnXqv(eTOJ_4n zQMB+|iK2xJyn|a{YUMQ~h;8G;@2nJ3?^FRF$Nv#7!5ephU~kCtKu}QVE^vI$C&zEw zYm+uvCNA*#=z8{>%^tNEyA84m*M`_MjVR(pTtzjWz@H7Gl8ArPI{GC-_aic@P$RMQPFiiK)7w6>g(tJooPIIPs;^5v0rq4 zwLHfzcRpSSLyR)^*U$gR(t?jFB{%0Q6&E~~oK>S$m8wjzUMcffu`{DeaJIV1^0HEF zJ>{Zg!z|v!=~0M->vKA3KE$5un=(zR#m=E;qy_shiwmJX5;13;ILG?9=#S#QU^_d~ zq928%s8J4zjS@AIcz-!k!;vDL7%w*p%!oK=Ls=%Gs<4YKvKu8IL4~tyfSZ)PUWlvK z%2h+G=G8z5HQ$q|D0sGMjuJdoI8loARBVLdm31LE$zqFiFHyiQFi?_>xTsjnk}}UR z$g;E$>QRReZ9Vrl?uC9%W{s*sD-QJ>{P#XMFqJCl?}52h;;F~@q6uAY>m zLd~(N9LYq$xLjlrdaTx|!bwFqDmctco?#n^;BFGrHWNr7S%HP-iNUod}Y^@VFwCvn4nPFtwf^z zURK6RH>7|I3XCb4(AwRn0U^NmGKIot5R|3tF#_P2wQX0wX ze7MA}U0b?wW2w`b%kw<_yK7nL=l^`mN`Lzg%hK)}XZaI*>4`lD@f?p_?WVY@FX`I4 z!P9m4AZkdumUi+=+XHP&ZRgsq9$E6c7=ChbaZu)Baa0Jgx|j@yah5G6=le>j#U$x& z#yP;fSjmKXTquzMU5mm*g4rZETN1nwO|0j6=o#W7Qh97xwWb8h)`7HkyY08hn8{Zy1#w9|A=My4d zR@;bB8W#qrd(3L0(k#6aCxbo8UCqkmHkhGMaavSMW(8x{iErNArE76_|L|Ee{LJ4> zf>6c&1?S8H@P-n1LFQMAK72M{V7+VPo@oEt^JeYoJ=)r3JVRfgSi@WugqpSL%{L!8 zx(Uu9MCd@=1thxV?QdY2@Z14aJf;8}TWjb%vX>v(Iy6k$r8}T!i-Rx>E@W}O%HvCK zW8-7OVZ+)wX%Za8)4~N9EzYV-5bAX}#l0;yB}GpkUp#+#cSo?^+w$4`{3W*4JWYn~ zUEFdP&h@r}ox5GcGy?ddMx2CC{E_O-zm4+(Fn7s8u*=b`Zq%36etw&$Qa36jT)}lq z6r)@a^_u9CXYgL_=J8$H&CUIEn8ZV(-|isYlSLsZ7%=cGN$4qK>aIx=x(&V-s#_af zhxOITpW0h1(;{lgm#|;HLd~mml20iL43=s~tZUyGSi8_dd$jGKgz8DV!t=giK` z+Ma#X3GEl{S=+H2b{p+W_y+$z5y%g)_B= z`-FYMk-x-XLJ_+A5aCn28(3U`a1a(X!4nYRe$)$Vj&j#L)B`A$dGnsj1Ku*u4dL9W zt;O1}y-}9c{EA`>RVg^u6pd$4=qa^1Ka@!Af=3I7IEvIGW*{!AjzQ>yjczqigxC+P zht?mQJIJl(-jS`Fs_gPKQIVsag*S|W4%#KWKq`PKhnIRd%_PcW*48U^Os!RAvEQY; zOTQvDuHx7RSeQz#wkRr8y4|=+(saLv{e2!L2O{QiruO+LA6AjO=SU*qO_Xf>8yfDZ zl8}bGmB&#;bNIFVHO8VgI3r#anE^}<9pW$fQ+nf(J%%=gmY&`vw7MJc5oiyzCdtL} zA~lz2V_26&T^VmY2MZsQ&q~L`*n)sS&>$GxbASKZWCs??=S?2mb0;pCq4ChN;X80h zYvK`8jJ*fJL(4G4tDhjVdvNr&i~QW`SZ`X@`3-OfeSU*4jYZ+;htT98SJ3|k`fr4a zg~@V*4TY@1wgV?^Z^%WuBgG+H*et{b5w1LkJ{;OFN+~cN_EM4Y0>yp>_kwogDo)5u z%V6whVXHl=-L;!k>pFZNSTpKKJ=KLC-K#&pa{c{|o-!^R;9rc8Ixrn*CMgmle;)*# z-at3%=|jP~6U2De2wUqQc8(%^1AM^qU!Qs^2Xv@cSA<0Al00QHRAgU`g8FSQ)0jy( z6yhf9NJf%b{2C;b(k3e%Sh+VpivqLD%cR@YVYk~G(I35TS5jcen3|8!5UAB4OIJ-z zS)WMgsZ*>WyAP;K%wv!&3F77s`@+CbnPIWu0;+ucO}s?qh{<%G)t(NV0Qs%$gnrm^ R!uC_=_4d>7co5D6{{_D3Cg}hG literal 0 HcmV?d00001 diff --git a/src/cc_detector/__pycache__/labels.cpython-310.pyc b/src/cc_detector/__pycache__/labels.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4f5fba5ee5b7726ed04ba3757298d795f2b442b3 GIT binary patch literal 6486 zcmaJ_3w&GEdDlHUl4V(T4219~DHkXhgJXw~@-zWiR%11`WF(uAK&oBcb8X#Px>q^( z%8s?rCeRfarCUcc_M`OIrc*Yc8)IzEV52}E>(;H^y7zYX(%QYc_trhNNB95Em6H$% z&M*J_>AUBA-*>*(Irq4OgUJZ~t~l^rdwL`i`4;^+^DluPcSRzaH^m~6#fXUPj|fe$ znpS7~nHI64)rg4xsP>Syp9^lqL~Jr*M?_roJ)wz&!hVqiJ)kfp20;&rO~7Gs9`Jl| z0dTXz3l(lrc#*=3#U*%qsls=N%Rp~c_)bFTxm@VrU7_$wg;$ApfpfLOYs3iXYsDz= zI&nR4OyM>$4thdt2i_od0B;mKf$vs$leii5EedZHw>_bqAWgT6NpSBFyMT8pyi24( zPbthOoK~0>?*V_e!WoeR{a&#LIID0@h3B=jtIAI zdA@DxhGW-_nm495EZ-Xm$5v{txl*$|U;pNjk0)(kUo$*iF>PmA7w)oNG33f=U7o&H zwVa+UBc9$cq~-X!Su;Ej7D!7ZZ3iYBLU)&RziR1T!!qq9s2`bjmq&CgWMm2=N5VPd zNm^`VR0{?S$8mjw7Ub=^3cPNzVLL0j>MOvb%ZXt!_E1wsf63jP7pU7};o6YB11i7^c=;riGsg6~ z>$o*{xoLGTT(%rbVvt?OC|_3=c->2&LoLxVs~B!Lv|M8azUyA%HvP~L*_1|&7Va26 zk!QSW$_nP-6{gk=VRbJxt9DJ06eV%RR&*%w(hYlShFrGP%4lj8%j|Alvfg0IntP@( zLA2Ve2fWg3wF2HS96yL5xy))1gV&bvP)-hFo@1<7^l+sg#H+SxHXA`4-tcP{o^UWd zL*wwIXqiEuXJQ0|^?7#9a!e~oRE!Ed>MRHS6=@lMmB?frD?$o-buUPnuCr=MPq{f5 zloozetkCPNAtZ6jay-kw&-3Mhpnqz%kl8bvD=oD8OrxPMH=Uqw_hP;@Q&?yXUzmr_4A{=6()ry%6y?xOOc!?RQ|aO!dL>u7eHC|Y+nT%9+Va+Q z7R1A{3wYgf*Mev!y%0oA!^d=63P{csr&}plEE{9Gzh+r}YlPnDtFCF(>{c%m6$^~o zn660AC9~lEmF{4M{G%`AGJA5cvX7YL@R&)ptzxULNNbfHk}_7kmpHKX3`Cx%{{P0t0K7(r~sfsP=yCl3>l5eJ*d z{vckudl7`X2*n+;;VY(iDC?)t7Y*C8lK&J`94#mqi>9k-liL_LUS^Sg6-gxa@iRs(XZZgPI6 zkk9Hx@=iiRlMildz(5(*;TE-k^lZ8~*Gf_b>FPym$g8_6Rc1^vX>TPW@+1^p;B%cxUy<0bOy9PFv0(ZHUW!s0@Zs2QkdL^Mdu zrWbe5EI?txXf(j~P?zcXd9X{_)+SS~LkgOtTd62`(kSi)*a{bCE+o2Ze7R}+NZ44z zHXOG#By3YPFzBZ)wrMsETg%1l9D)Gv?9yqAOTjO5=~=y0Kyt{Nz>?btCI}uNxRoG7 zutso@;9i1Rf?WhN1n&m~@h}RqN}Tr+;J_M@4#5WrY6N2hO9V#=ZXn1K+(XbLc#zDArAQ)aKrt_s-Hos6-YblM6jWaq?Vo%V~J6GuAjpXjt-?X+J24KcBxuh+?gD2N~??Iqf48aSG`H91c5=umK` z2*^8z*?4@o)Bc>2Mrt69%j+=~n;a5=I;~r;1bsL!xwHdcsKj9WYn}E>ofD6D+MiWc zeF_t`AVEb)h1a}Tjf3d68hUcW;-^E4wIGUJqN%G<&vi~b+G#&`Zi^V2qpDpztJ1bs zn*D0xQ_2-T4Ao59-~@afiQm)xlu`_D!NwQW`kn!*^{vp@n5uSUp1r&bXZ4{@aG0~2 zDJA;lPWz}@A^DfdI?YJnpw^%g?Rf!9Lku7rzW6CcA0_cYJnRqHAbS0w4?7dhlMRUJ zMFOEiFrPtu-)7yYUaA^h6%9&=YNvUPGVmwW+K-2saSYla1_6W}^n+4EEmo6{kj|~= zoO4py@d6BwwuaQkrqi*5_7r(1X34u~ewaO>WccN&bKA_Zu~OHIsTSBuVUv`N-oY=3 zhX=sk5`MhXeqPy%M0^`BXmU51unEv<|JX*_!THBjXr{rJQ#67~Q;opdk)dz`W%S$1 zW->?9_t5k(wUIWM9wHsoTgL73dDGWh`-9g0652Aa&Hp7J8Z^PsPHtdMyOl=B34)08mYp%dYPGZdWNEi_p= zJppo|ofd!EqO&K&TG)lcFC)2w6y8X%onR+PqMoUG6Y>o04?d+Pcy4)`y!rGrxE{s0 zQS_?4oukWX@)nvF=1kZtsF+R_^SKqGX{&RO-a9MGJ0M6-k~G>dI&H*2i8@pLr;X3N?N_i-R%K20(-B07gRNB}8NZ3gw%%GRL5{8gb?9ROg zO`0@1Yzv6cljk1FS}C>HtCN6NMGICZgIJzFpg=zS(GVLK!7UtC1 z6W(eE(&S;95O(4>WHbvBr_MfcoOn(*W4IONiUfoLxCGjtRH`2%N#ScMC8kObnlp`;-X}t=~z}}!fZ6~tukMkoZ)~s8MNUz`8f<9)mpbK$fl*+bcthox)Q2Wy3$7|T{cO}(``q;VcU-0Z4fS$ z4BRx0#ysa^!17lZVf+GzDk*(!fOx?qR-+ccH78d>!Sg!j$g?BmTzHy!!Kgr<`=WS=9jR);+L|&mGM8}SFu0l?_z(%uV#P9uVDcnVSm7{Wxvly+3)e|*zfY| z+3)Z%_S<|L`z=1sev?nI-{9NXH~0zfZBfA`I1 zxbfKdQs`1<#$C@d5u?#)K>lP5=GU0-S0HG}xTMnOWP@n>j6HKOdye@!+&l5UXG1ci z1Nzh%vY|6{ht7aRWWXYK;6@%=1^)WMl**##LaLRvvZ*cYMcgzhIRyFJ&A>C`QWSZZ z%aAA1c>SD<)HFM}j72B~=4u*RmP*5}$j#WEkh{4Ofo~ekI>LGE)Sg&IRl5@e9`XZebFv6A$HqwQABCl zj#s)`>vkN6`%ym=x~*i4J1(?{A%-@Dv|Q{PFT^0m!Hs$kmKt-ZPuaz<@mF)qr7o>e zISZe=Fk$@mAXx;|OznqgFDxPr`6w+7Q_?s|rcGV-S>7zoI}DGcT=2wgHDJCn#RPG%vXIRX-`qz?O;yK0&juF7&JK|N3_H6H3N_o zfwWLM>;C~|WcUe!vI2%bAt*QC+0!8#&|`WNfN{a^bDB>L*fE^}DPs&+v<348vn_zG zSI9W>18`xeGE|&0xeOb59~XRQRc-34U6F|&3%T|&=oGeQcIXnHEu1yZ4(1be4%8KR zMh3t^C(8DLdDd|oeO`LU;LoqPaI%C~;3Jsy7_0pwKLasf5?Lrw9W*Lr=TKp}bPFYm z@okh^xLBRFxdYpPN^~34swnI+8w!4`;;}kw@DZMDnz1nCc^pFb3Byhay;+2zL~_bW zSd3?TmP7z8;jZuxyN0EQjlHU)E#_rKYpc*2%T%o4V^f%gOPbJen$(4E+mBly9qG$+umwh{_|VW767umf-jmZ}ccb{2NZdQWv%ZKyC*8!imjMhYXf-G$w?J%v58 zFJ0YR+gI49YreIxfBgXKWtm67u>IA?vhpjj;IM7}LDv5+VZXg_hz%4DvmJ#OnCTtC z^CBB$J0EI=qimY(W_uo5g=6ej*k{;5cIcs9$oVhLYwYkJ=xci6W%hzU%8s1X*cFyz zF9C9lU1c*Y&&C0nXV=&Xb`q`Qmo#>YoqnjX(`!cII6K2W!CqmnvQM(t*r(W8cJ6}| zyUs4L%MTNU6U>~|a&I=jWNBLZ&g9h@UpQgVtTV@7^6SE3zVM48;LeX9eAQVxo3M%qgKQoDaBg1o!G;g@EOdFqcoKZv=sGie5AHLnkOXwMrOb3=7K6oW~cPaa3u1 z)vGy`I?a3XwY1}$$UDWL9{TmL8M=$-PCAQ~Y8B(cO1)h5$4cC*`OhqeaVPUm*Z`W` zt2(8sC+dC}I%QAz4)+((jw$9nUJL4#qF1f1jPXiw-tp>{8sOz-Lriii?;K@8%@0MT z*zOERorW*Mqt3Dy5}tF_ptxAAgd&~32qJ`z*JxBLK5OHCzJt576bPW~R6^&CJJ%;? zXQwVmdUYu^R;+qJvFgqFRazUrV;$m*O4^J%i*+nE>XZo*uVx+87Nd?Yius%xLPz4Y ziQ_2ZcyqxLNHouVKvx-I5H?GtN|DYduShlfd{W(*x_a#v5Cvd``_&bK#&254-HRY_ zymA@yh83*ffkDK)?^G*uJP199b}kgj8$4L5Fu%R6jn$o!R}KBKMi5p6O#wJ!tx8a* zl|cP#a*j}Nzg`yeYBJy-)T=8ULOUXG8cmMDJ})+bN>FbTaa3Vj2ipNg1a+@E)+KkF zF@f!(zrloHz^d0Q&kLUu2B#(Sm0U$UFn-BvR)tdymVFKe!mQxZGtRM^CyMhp`{jTy z(jj9>e=Z0X!S;@GChzcC*qLL?*yAUT(=+DI%yn&A_9=hox} zKvsRoRnd2rLCkswfm3IkaAOuFZ;l6%ZTLQ>ouj!tUh(BrujHMZL1XL;ST_{D$4C+d z8cqR;I;$`UNa*@anH@k8<(bTy0bUpzG~nx7qT9>rBf#Xn4Tq~Op!Lr z*>ScKx?M5$S=`J0n%8ipB*y8Y*PxYLzrInQuf!P)>JkD|_TXDlJRZDx(LI0tx_fnc z#=Uawl6z(9*3^|)KNSz2f9t}eduw`j`i(17?uDsa)90sVSBb@91QU2ck&;9G0-}|SDcIDqDibSH$ zFM&!5&v~+(e~pTO3^qj9gt=}$(ps9Z)@_k^q_^})Ur#>L)>C+T zn1L3fthG#$Ug%xOtoKD|`R$>WC9><=BI}WULA%@2vLkc7KeCxQqCLW#_r`8xH4A@h zrmodfC$w;K8SV6U2T!DXT4Z`UaIn<+kv6O``+bWg#P-Ns)8E%v@>@3H7wM5n_~ueG zt4FTjEQvqbY*yky&AAEl5b%qTE$1>wmCvu*O;H+qHO{a~*r<9dZXIS~b%)a#MGOJ{ zhW=hP^CwEHiO_K($5~A|QVaWGZXoV+6`jF2%&*4VI-hNb*lzGjUBni#b8O;>VvDAY zrO5>2y>;VCoRz>Xu~P%Q8Qb+{t+5g(m|w)Cu^~#aQ3+zZT&h(4*uHd;US=^|iqjV= zMR5ZX>hm}Oo8s45Y*zhxE`d?UV_PD{_u^FJWCtUoG#{HhSpHDsqv-hP-1t0<*Lbt; z`*Y*PMgxX!ym-1SgcvUt+uIord6B+4M!n5kd0?4wrQ9egdx&y12VAMRn(O15LWglR>Q#I*CHdY4ljd=Q7O z8+gn}TepP1U_a8Cz5|@~2pkJeFlHVbfXgNLNxWjC-iWR69>;Oie6J3Qg<_@Q$4R*s z$vhtioFt1X#%P%3LNi}QEu_gDoqrOqpWwmo-zQ=tC9${cTGrBQrPP$lBYF2 zG8XOI+AO3;^sJ0^q=sf%)^bVD2En8*y~&%tQ-KPH-jx(5XGwTa?(hPjL7PzHu--l| zuPA#=irjJDBG2pE%#}OJWotqkOMUOv=A_f6oDk<2tS;I|9ggl%rwGG=RkTsjW9G=Gtrl5O84wxx3t zXXUGVviubQ@zYeiN`+)Cv*z88d#3IeeQ5`Bw&EmOf}f+}4HU8I*O#8=uCzPvg|1xU zO~8b^P-K#pZa_*BkekGlK{JDz@q}MS*Xv1|$htUvV=A_8Ois_l*6rz;3$cCv#6U<)_9_fpED-qfJ%_xD#VrT`deHP`q zj_AsGWH4ih+7f89SW@<*T(tQ0jc@7$e5$NHG60)8tKA!jjCIt>;s8HO;s$G`=7bg6 z)E|)r>6wwt!fBVWtxSAR*CC-}g#DL(vs;9ZAYk)}v*B(P{WuGs#@+Cfa#m~+F=Mmr z3x0%VOydOFUSclE>D2iwwVOc`V8ten4Sxxm3f2fJvKoZ`vrJQiwE8a2YY`(u;#&)f z*3vUjwEg-Zm?wj>Uw#`8(on})C}T_Ka{z5CuOCjt3FQgKNzz?7>@5dP+edV#4*EVS zor6|~%nSsI=DS9VjvNPBh68|_-97R{U;%?AMl>9~DUZuyDIA(Uq(SAKMq3ZQEGlt? zry>cB;~K(o!I(=20d*Wmdgd8RKpY9G=|3fvs(4On{MeSNL{jWSJ=X6(O}o@t1dnsf zd;K8>{*l&uh^ZctNPY(AWstDSbAp(Xh@u7J2`9_K8SPbH z%o?ITeH{ijG6h6wO&2gyDBJMNUNcaK(8{+wb(UPi{OdML;Vf0Hho##!net$8649R} zQDxiS?Ug5Pi3H4DKidxO8hDpZf5!~J3A`1RCB0w6=VP+1lIV|#N#k_e5ra(d+tfuH zk27uM9W*&)GofHE@;m6u^{Q!m2{=Q*X~{aS^wm-ja#@p0@FfD0(=HQaUngwQUy<_qZfLp;*aNNB4L3Aif)S4ru!Jjtsx zRZ{XACqdvFo3Ukl$ls>cfWV+xseTtl49?jgDY=1)7p1?G-MZ>~=napdu)ucoM@j|u z2u}hvQZ#A#XHfaG{yzTe050O$@*_d}G*N@RQC}}2Jy%eVv|CGE+HSR%+$}z<$uSlleXV3~(=vPZm%W}$fEA@(S-9HA3 zVHO3EtMixESBJLlG2elIX0tR_ZuV?@vkMx69%$MH3VvW@7Yw{DyiL4qye+&Fc-wd< z@lL$2eJhKtBQQ#;J)c|caghI^ZJ%4sI2-w?bE~}$WiI6A&#m@3${}=V{}QrG0twln z!Z}W=ihqHMr??4(K-|w=_dDp6%4`6N%X%Uo!}{2w$F%;(c=8;QsF5=WzcCF(XRaHk zU5d4uRjOxP_a6L5)x*C?%YSJr4iXz;YyJSmAK{VWn9{(vJ+iHB41X|D{g zhZCe?@cZwH$D~4Ta)FYcLQyWs-#<*`QgJV_M>}5=r#EspagQ?1Fs89huE1iPMK)7; zc!=-9Fz)XX(Os_8S+Fc8>trwEz9r-#x@jE#JGA;&sQ4-sU!&r8srWrAzD~tAP<)gZ z%|_LK`)OM+>TG4SQ6xGMS>5egdQuy-IpQ;@R6V=`b*e%W1pE4$_F1Eq{sPhf(D-oc z)^vRLwlcyJc9a<;N3TyL))Pn#WJPiz74@R*VLJSRZBaVf26(g8-`d{V9`&!M0X3kY zqwKKO+97(^wALUj($0$3+9fj4z#1hAKE@hDU(iKgG$ianaQm9k8W!1T7(spyd=8V} zi3TlgVH>kvfphO}yEhRHMSX)>Yee)%BT?UwhV(!$ur*q{qh7S^CO5RTM+`)J5IC8z zZL4yI9qk!*+?x=C>pR~;=m-x6!Pe(s1tCqg9JlxL+UJoBVM&&Hm;#o22}|sJAG2Hg zSx@Ugv=>NI@sDD9cmpw z;$lCun;QQciO+BwpJCuL679$M!(w-I81EON7x-fdv!@NSC)&Bb7c@BxIvrqXSY8`n z!(tztO^!n+O?$f{}lV9BX`xaVXjoW!8-M^zfjBI^2dj3@AFE{}-mTulNN$gF_oVyTLDA?!mk}B}Kk0 zXX=N)lN0tHoH53NZ<$4NwuYjCXb3#;67f^?(pG+AyCgro489pNt^m-;G9z zuh9RPjkHFi7r`sJXcW9MM!dpy<15GZ3~6jH+gFAK>F^5e?lyQ8X@%T-#!7D|u~65S zHStokGkOtY_Ok=<&m2hkaq+SkjgBL%GJxv|F&3S``($)78jN<7=#uDQb1y8mOa|O& z);rlXl}n;T{Bi|Z`))8l<}G_j)Famg$4Z9&a^z?H6{OZfq(JG?#Bs))n@?RI$(xW) z`2XBXkRLzzcFvhwkqPOp4wkby?VFdpGx@xGkxjauZbmw0Po)>eoY`(}L^*2D&56jx zIg-%0B-wmh<;+)*6R*_Ucy#e<+Xe+%Y|5(-8FPlt`hu>Sl)S$S6lz^~<^<^=hj#*)@5+(mfwXlJ8hMNs?<{qjXX*GSk|*4lE*D z9kwr6=(@v^>87U=_H>x=hHXZEIwhz@1sD{>Mg2EZ=SG=d(`5r$v4HO zGJK)PMTROekO7qw$XdY%RH-pOjPCL&`TP3`L<^rLaG4~tn{Z`AxBma?fb|xsQVdC!2~a&h_@-ny31DlJX*J$FvU;g z>CfVXc?lKr-Q{MbQ#X&6r<_e0KrLe&r>w~8@UxUup}Y#4^0A2+TxOzqk|v`tl9QJm z_Y(&rJAd)Uu&_ z{5Pq`M4Ck=8D$QcuTjTo>PULLOnC#vS@b^{_(Lk_e?0I$D&C>u2#TkWL-vo8EMPOu z8vi^&U8mwQ6~9FV35TR^Cm2>_QF+WMbjH2YHM*Ib;yl1V0;jx!OclvsE_W(UEqhDq zcA{T}5D0Iuza^wd{;wV?KSpcWUXxV<#aBs1-x+?CdT0qB|2v?Dh)haOth;fKs=7$s zrz`c6&sB6zNgff(fEE{bbQ|u@LrQQX*5QAMBkn-LSPywbo1x5p+)EBQT<%KI52Yj^ zb2pFtTAe~(c;Tu}+MYim_$0;O)wybHQ5yn4ude$cEjD&gDP zW^%gAjCEWQsPFdnEMRjcZo{-M5*2`4ZAUkWNa@7(y4g*Qgi;dObBTOHh8;}ho1}pJ zK5bh@^tef)8}q+JL)Z`TNR3b`DPyDzx|yK}K0z^=*>7a^teK_UvpIm)40;WHFXf-D z#4oIr$^cNzPc;Lj%u{0&Mj*v#8ClF@(3dPJOQ8PJKTl;bH`YwcIn$VvM*VVltDi_Cl1;hPrcRmFCq1E3`vj&2yiSlvUQ@#jDxkX5G^(H4 zFd^wbfHCTnVE%K!tWDEMY(g3}poC2>`IYV{85qj8uTh&sO@@eskcty!+t<6j6fr7Y zBj^4mK65AIr0cSvsAB9=6L;*0u{o(=ei%h;1fkS}B&qWHJ*2=D|ItSNXN*v(p(I^l zl5-%nsZ@$m!=;t3MDV$+^k{aY!Nt8OAE9-=M+N1}ltz{$O3~U0b=$`dOJJWV%S9)7^$QbH~nR*ZW?0*4GvuW%A literal 0 HcmV?d00001 diff --git a/src/cc_detector/audio.py b/src/cc_detector/audio.py new file mode 100644 index 0000000..25b96d6 --- /dev/null +++ b/src/cc_detector/audio.py @@ -0,0 +1,80 @@ +""" +Audio extraction and loading. + +Uses imageio-ffmpeg's bundled binary — no system FFmpeg required. +All downstream models (YAMNet, Silero VAD, librosa) expect 16 kHz mono. +""" + +from __future__ import annotations + +import subprocess +from pathlib import Path + +import imageio_ffmpeg +import librosa +import numpy as np +import soundfile as sf + +TARGET_SR = 16_000 # YAMNet + Silero VAD both expect 16 kHz mono +_FFMPEG = imageio_ffmpeg.get_ffmpeg_exe() + +SUPPORTED_VIDEO: frozenset[str] = frozenset( + {".mp4", ".mkv", ".mov", ".avi", ".webm", ".flv", ".ts", ".m2ts"} +) +SUPPORTED_AUDIO: frozenset[str] = frozenset( + {".wav", ".mp3", ".m4a", ".aac", ".flac", ".ogg", ".opus"} +) + + +class MediaError(RuntimeError): + """Raised when media extraction or audio loading fails.""" + + +def is_video(path: Path) -> bool: + return path.suffix.lower() in SUPPORTED_VIDEO + + +def is_audio(path: Path) -> bool: + return path.suffix.lower() in SUPPORTED_AUDIO + + +def extract_audio(media_path: Path, out_wav: Path) -> Path: + """ + Extract 16 kHz mono WAV from any video or audio file. + + Uses the imageio-ffmpeg bundled binary — callers need not install + system FFmpeg. Raises MediaError on failure. + """ + out_wav.parent.mkdir(parents=True, exist_ok=True) + cmd = [ + _FFMPEG, "-y", + "-i", str(media_path), + "-vn", + "-ac", "1", + "-ar", str(TARGET_SR), + "-f", "wav", + str(out_wav), + ] + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + raise MediaError( + f"FFmpeg failed on {media_path.name}:\n" + + (result.stderr[-600:] or "(no stderr)") + ) + return out_wav + + +def load_mono_f32(wav_path: Path) -> tuple[np.ndarray, int]: + """ + Load a WAV file as float32 mono numpy array. + Returns (samples, sample_rate). + Normalises amplitude to [-1, 1] if needed (YAMNet expects this range). + """ + audio, sr = sf.read(str(wav_path), dtype="float32", always_2d=False) + if audio.ndim > 1: + audio = audio.mean(axis=1) + # Normalise if raw PCM was decoded outside [-1, 1] + peak = np.abs(audio).max() + if peak > 1.0: + audio = audio / peak + return audio, int(sr) diff --git a/src/cc_detector/cli.py b/src/cc_detector/cli.py new file mode 100644 index 0000000..ad45cd6 --- /dev/null +++ b/src/cc_detector/cli.py @@ -0,0 +1,140 @@ +"""Command-line interface for the Intelligent CC Suggestion Tool.""" +from __future__ import annotations + +import argparse +import sys +import time +from pathlib import Path +from tempfile import TemporaryDirectory + +from .audio import extract_audio, is_video, is_audio, MediaError +from .export import write_srt, write_sls, write_json, write_csv +from .vad import get_speech_intervals +from .yamnet import detect + + +def _build_parser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser( + prog="cc_detector", + description=( + "Intelligent CC Suggestion Tool — Module 1\n" + "YAMNet-based non-speech sound event detection → SRT/SLS/JSON/CSV" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + p.add_argument("--input", "-i", required=True, type=Path, metavar="FILE", + help="Input video or audio file") + p.add_argument("--json", type=Path, default=Path("outputs/events.json")) + p.add_argument("--csv", type=Path, default=Path("outputs/events.csv")) + p.add_argument("--srt", type=Path, default=Path("outputs/cc_english.srt")) + p.add_argument("--srt-hi", type=Path, default=Path("outputs/cc_hindi.srt")) + p.add_argument("--sls", type=Path, default=None) + p.add_argument("--sls-hi", type=Path, default=None) + p.add_argument("--keep-audio", type=Path, default=None) + + p.add_argument("--min-confidence", type=float, default=0.25, + help="YAMNet confidence threshold (default: 0.25)") + p.add_argument("--rms-threshold", type=float, default=0.010, + help="Silence gate (default: 0.010)") + p.add_argument("--merge-gap", type=float, default=1.5, + help="Merge gap seconds (default: 1.5)") + p.add_argument("--top-k", type=int, default=5, + help="Top-K labels per frame (default: 5)") + p.add_argument("--vad-threshold", type=float, default=0.50, + help="Silero VAD threshold (default: 0.50)") + p.add_argument("--consensus-window", type=int, default=3) + p.add_argument("--consensus-k", type=int, default=2) + p.add_argument("--no-onset-pass", action="store_true") + p.add_argument("--block-label", action="append", default=[], metavar="LABEL") + return p + + +def main(argv=None) -> int: + args = _build_parser().parse_args(argv) + + if not args.input.exists(): + print(f"[ERROR] Input not found: {args.input}", file=sys.stderr) + return 1 + if not (is_video(args.input) or is_audio(args.input)): + print(f"[ERROR] Unsupported format: {args.input.suffix}", file=sys.stderr) + return 1 + + if args.block_label: + from . import labels as lbl_module + extra = frozenset( + part.strip().lower() + for val in args.block_label + for part in val.split(",") if part.strip() + ) + lbl_module.BLOCKLIST = lbl_module.BLOCKLIST | extra + + t_start = time.time() + try: + with TemporaryDirectory() as tmpdir: + wav_path = args.keep_audio or Path(tmpdir) / "audio.wav" + + print(f"[1/4] Extracting audio from: {args.input.name}") + extract_audio(args.input, wav_path) + + print("[2/4] Running Silero VAD (speech suppression)...") + speech_intervals = get_speech_intervals( + str(wav_path), threshold=args.vad_threshold + ) + print(f" {len(speech_intervals)} speech segment(s) found") + + print("[3/4] Running YAMNet sound event detection...") + events, stats, infer_time = detect( + wav_path, speech_intervals, + conf_thresh = args.min_confidence, + rms_thresh = args.rms_threshold, + merge_gap = args.merge_gap, + top_k = args.top_k, + use_onset_pass = not args.no_onset_pass, + consensus_window = args.consensus_window, + consensus_k = args.consensus_k, + vad_tolerance = 0.35, + ) + print(f" YAMNet inference: {infer_time:.2f}s") + print(f" Gate stats: {stats}") + print(f" {len(events)} CC event(s) detected") + + print("[4/4] Exporting outputs...") + write_json(events, args.json) + write_csv(events, args.csv) + write_srt(events, args.srt, hindi=False) + write_srt(events, args.srt_hi, hindi=True) + if args.sls: write_sls(events, args.sls, hindi=False) + if args.sls_hi: write_sls(events, args.sls_hi, hindi=True) + + except MediaError as exc: + print(f"[ERROR] {exc}", file=sys.stderr); return 1 + except Exception as exc: + print(f"[ERROR] {exc}", file=sys.stderr) + import traceback; traceback.print_exc(); return 1 + + elapsed = time.time() - t_start + print() + print("=" * 60) + print(" CC DETECTION COMPLETE") + print("=" * 60) + print(f" Events detected : {len(events)}") + print(f" Total wall time : {elapsed:.1f}s") + print(f" YAMNet inference : {infer_time:.1f}s") + print() + print(" Output files:") + print(f" JSON : {args.json}") + print(f" CSV : {args.csv}") + print(f" SRT : {args.srt}") + print(f" SRT : {args.srt_hi} (Hindi)") + if args.sls: print(f" SLS : {args.sls}") + if args.sls_hi: print(f" SLS : {args.sls_hi} (Hindi)") + print() + + if events: + print(f" {'#':<4} {'Start':>8} {'End':>8} {'Label':<22} {'Conf':>6} {'Frames':>6} {'Src':<8} Hindi CC") + print(" " + "─" * 85) + for i, ev in enumerate(events, 1): + print(f" {i:<4} {ev.start_time:>7.2f}s {ev.end_time:>7.2f}s " + f"{ev.label:<22} {ev.confidence:>6.3f} {ev.frame_count:>6} " + f"{ev.onset_source:<8} {ev.caption_hi}") + return 0 diff --git a/src/cc_detector/events.py b/src/cc_detector/events.py new file mode 100644 index 0000000..23ffe66 --- /dev/null +++ b/src/cc_detector/events.py @@ -0,0 +1,64 @@ +""" +Core data model: SoundEvent dataclass. + +Every detected event flowing through the pipeline is represented as a +SoundEvent. The to_dict() method produces the export-ready payload for +JSON, CSV, and SRT writers. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field, asdict + + +def _fmt(seconds: float) -> str: + """HH:MM:SS.mmm timestamp string.""" + ms = int(round(seconds * 1000)) + h, r = divmod(ms, 3_600_000) + m, r = divmod(r, 60_000) + s, ms = divmod(r, 1_000) + return f"{h:02d}:{m:02d}:{s:02d}.{ms:03d}" + + +@dataclass +class SoundEvent: + label: str + caption_en: str + caption_hi: str + start_time: float + end_time: float + confidence: float + yamnet_raw: str + + frame_count: int = 1 + onset_source: str = "yamnet" + spectral_gate: bool = False + top_candidates: list = field(default_factory=list) + + @property + def duration(self) -> float: + return max(0.0, self.end_time - self.start_time) + + @property + def start_ts(self) -> str: + return _fmt(self.start_time) + + @property + def end_ts(self) -> str: + return _fmt(self.end_time) + + def to_dict(self) -> dict: + return { + "label": self.label, + "caption_en": self.caption_en, + "caption_hi": self.caption_hi, + "start_time": round(self.start_time, 3), + "end_time": round(self.end_time, 3), + "start_timestamp": self.start_ts, + "end_timestamp": self.end_ts, + "duration": round(self.duration, 3), + "confidence": round(self.confidence, 4), + "frame_count": self.frame_count, + "onset_source": self.onset_source, + "yamnet_raw": self.yamnet_raw, + } diff --git a/src/cc_detector/export.py b/src/cc_detector/export.py new file mode 100644 index 0000000..82d4bf5 --- /dev/null +++ b/src/cc_detector/export.py @@ -0,0 +1,120 @@ +""" +Export writers: SRT, SLS, JSON, CSV. + +SRT — industry-standard subtitle format (used by most video players) +SLS — Simple Lyrics/Subtitle format (used in PlanetRead Same Language Subtitling) +JSON — structured output for downstream processing / Module 2 handoff +CSV — spreadsheet-friendly for editor review + +Every SRT/SLS entry includes a comment line (starting with %) that carries +debug metadata (confidence, frame count, source model) so editors can +understand why a CC was suggested without opening a separate log file. +""" + +from __future__ import annotations + +import csv +import json +from pathlib import Path + +from .events import SoundEvent + + + +def _srt_ts(sec: float) -> str: + """Convert seconds to SRT timestamp: HH:MM:SS,mmm""" + ms = int(round(sec * 1000)) + h, r = divmod(ms, 3_600_000) + m, r = divmod(r, 60_000) + s, ms = divmod(r, 1_000) + return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}" + + +def _sls_ts(sec: float) -> str: + """Convert seconds to SLS timestamp: HH:MM:SS.mmm""" + return _srt_ts(sec).replace(",", ".") + + + + +def write_srt( + events: list[SoundEvent], + path: Path, + hindi: bool = False, +) -> None: + """ + Write events to an SRT subtitle file. + + Each subtitle block contains: + Line 1 — index + Line 2 — timestamp range + Line 3 — CC text (English or Hindi) + Line 4 — % metadata comment (conf / frames / source) + """ + path.parent.mkdir(parents=True, exist_ok=True) + events = sorted(events, key=lambda e: e.start_time) + lines = [] + + for i, ev in enumerate(events, 1): + start = _srt_ts(ev.start_time) + end = _srt_ts(max(ev.end_time + 0.5, ev.start_time + 2.0)) + text = ev.caption_hi if hindi else ev.caption_en + lines += [ + str(i), + f"{start} --> {end}", + text, + f"% conf={ev.confidence:.3f} frames={ev.frame_count} src={ev.onset_source}", + "", + ] + + path.write_text("\n".join(lines), encoding="utf-8") + + + +def write_sls( + events: list[SoundEvent], + path: Path, + hindi: bool = False, +) -> None: + """ + Write events to an SLS (Simple Lyrics Subtitle) file. + Format: [HH:MM:SS.mmm] CC text + """ + path.parent.mkdir(parents=True, exist_ok=True) + events = sorted(events, key=lambda e: e.start_time) + lines = [] + + for ev in events: + ts = _sls_ts(ev.start_time) + text = ev.caption_hi if hindi else ev.caption_en + lines.append(f"[{ts}] {text}") + + path.write_text("\n".join(lines), encoding="utf-8") + + + +def write_json(events: list[SoundEvent], path: Path) -> None: + """Write full event list to JSON (pretty-printed).""" + path.parent.mkdir(parents=True, exist_ok=True) + payload = [ev.to_dict() for ev in events] + path.write_text(json.dumps(payload, indent=2, ensure_ascii=False), + encoding="utf-8") + + + +_CSV_FIELDS = [ + "label", "caption_en", "caption_hi", + "start_time", "end_time", + "start_timestamp", "end_timestamp", "duration", + "confidence", "frame_count", "onset_source", "yamnet_raw", +] + + +def write_csv(events: list[SoundEvent], path: Path) -> None: + """Write events to CSV — one row per event.""" + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w", newline="", encoding="utf-8") as fh: + writer = csv.DictWriter(fh, fieldnames=_CSV_FIELDS, extrasaction="ignore") + writer.writeheader() + for ev in events: + writer.writerow(ev.to_dict()) diff --git a/src/cc_detector/labels.py b/src/cc_detector/labels.py new file mode 100644 index 0000000..367a9ea --- /dev/null +++ b/src/cc_detector/labels.py @@ -0,0 +1,207 @@ +""" +Label system for YAMNet-based CC detection. + +Key fix from v0.2: removed 'domestic animals, pets' from blocklist — +it was blocking dog bark detections when YAMNet's parent class fired +instead of the specific 'Dog' or 'Bark' class. +""" + +from __future__ import annotations + + +BLOCKLIST: frozenset[str] = frozenset({ + "inside, small room", + "inside, large room or hall", + "inside, public space", + "outside, urban or manmade", + "outside, rural or natural", + "acoustic environment", + "reverberation", + "room acoustic", + "silence", + "noise", + "white noise", + "pink noise", + "static", + "hum", + "buzz", + "snake", + "speech", + "narration, monologue", + "male speech, man speaking", + "female speech, woman speaking", + "child speech, kid speaking", + "conversation", + "babbling", + "breathing", + "pant", + "snort", + "cough", + "belch", + "hiccup", + "sound effect", + "mechanisms", + "generic impact sounds", + "scratch", + "rattle", + "rustle", +}) + +LABEL_REMAPPING: dict[str, str] = { + "cap gun": "GUNSHOT", + "gunshot, gunfire": "GUNSHOT", + "machine gun": "RAPID GUNFIRE", + "fusillade": "RAPID GUNFIRE", + "explosion": "EXPLOSION", + "burst, pop": "POP", + "bang": "BANG", + "dog": "DOG BARK", + "bark": "DOG BARK", + "bow-wow": "DOG BARK", + "domestic animals, pets": "DOG BARK", + "animal": "DOG BARK", + "meow": "CAT", + "cat": "CAT", + "bird": "BIRD", + "chirp, tweet": "BIRD", + "bird vocalization, bird call, bird song": "BIRD", + "tick": "CLOCK TICKING", + "ticking": "CLOCK TICKING", + "clock": "CLOCK TICKING", + "chink, clink": "GLASS", + "glass": "GLASS", + "shatter": "GLASS BREAKING", + "door": "DOOR", + "door slam": "DOOR SLAM", + "slam": "DOOR SLAM", + "knock": "KNOCK", + "squeak": "SQUEAK", + "creak": "CREAK", + "vehicle horn, car horn, honking": "CAR HORN", + "honk": "CAR HORN", + "car": "VEHICLE", + "truck": "VEHICLE", + "motorcycle": "MOTORCYCLE", + "engine": "ENGINE", + "telephone": "PHONE RING", + "ringtone": "PHONE RING", + "alarm clock": "ALARM", + "fire alarm": "ALARM", + "smoke detector": "ALARM", + "alarm": "ALARM", + "siren": "SIREN", + "civil defense siren": "SIREN", + "rain": "RAIN", + "thunder": "THUNDER", + "thunderstorm": "THUNDER", + "wind": "WIND", + "fire": "FIRE", + "fireworks": "FIREWORKS", + "screaming": "SCREAM", + "shout": "SHOUT", + "laughter": "LAUGHTER", + "applause": "APPLAUSE", + "crying, sobbing": "CRYING", + "whimper": "CRYING", + "thump, thud": "THUD", + "stir": "STIRRING", + "chop": "SHARP IMPACT", + "ping": "PING", + "gears": "MECHANICAL", + "computer keyboard": "KEYBOARD", + "typewriter": "KEYBOARD", + "bell": "BELL", + "church bell": "BELL", + "doorbell": "DOORBELL", + "footsteps": "FOOTSTEPS", + "splash, splatter": "WATER SPLASH", + "water": "WATER", + "crowd": "CROWD", + "cheering": "CROWD CHEER", + "music": "MUSIC", + "drum": "DRUM", + "guitar": "MUSIC", + "piano": "MUSIC", +} + + +TRANSIENT_LABELS: frozenset[str] = frozenset({ + "DOG BARK", "CAT", "BIRD", "ANIMAL SOUND", "CLOCK TICKING", + "GLASS", "GLASS BREAKING", + "DOOR", "DOOR SLAM", "KNOCK", "DOORBELL", "SQUEAK", "CREAK", "BANG", + "GUNSHOT", "RAPID GUNFIRE", "EXPLOSION", "POP", + "SCREAM", "SHOUT", + "ALARM", "PHONE RING", + "THUD", "SHARP IMPACT", "IMPACT", "PING", + "FOOTSTEPS", "WATER SPLASH", + "BELL", "FIREWORKS", "LAUGHTER", "APPLAUSE", "KNOCK", +}) + +HINDI_CC_MAP: list[tuple[tuple[str, ...], str]] = [ + (("rapid gunfire", "machine gun", "fusillade"), "तेज़ गोलीबारी"), + (("gunshot", "gun", "rifle", "pistol", "bang"), "गोली की आवाज़"), + (("explosion", "blast", "bomb", "detonat"), "विस्फोट"), + (("firework",), "आतिशबाजी"), + (("pop",), "पॉप की आवाज़"), + (("scream", "shriek"), "चीख"), + (("shout", "yell"), "चिल्लाना"), + (("laughter", "laugh", "giggle", "chuckle"), "हँसी"), + (("applause", "clapping"), "तालियाँ"), + (("crying", "sobbing", "weeping", "whimper"), "रोने की आवाज़"), + (("crowd cheer", "cheer"), "भीड़ का जयकारा"), + (("crowd",), "भीड़ का शोर"), + (("glass breaking", "glass", "shatter"), "काँच की आवाज़"), + (("thud", "thump", "sharp impact", "impact"), "धमाके की आवाज़"), + (("knock",), "दस्तक"), + (("door slam", "door"), "दरवाज़े की आवाज़"), + (("squeak", "creak"), "चरचराहट"), + (("car horn", "horn", "honk"), "हॉर्न बजना"), + (("siren",), "सायरन"), + (("alarm",), "अलार्म"), + (("phone ring", "ringtone", "telephone"), "फ़ोन की घंटी"), + (("doorbell",), "डोरबेल"), + (("vehicle", "car", "truck", "motorcycle"), "वाहन की आवाज़"), + (("engine",), "इंजन की आवाज़"), + (("dog bark", "bark", "bow-wow", "dog", "animal"), "कुत्ते की आवाज़"), + (("cat",), "बिल्ली की आवाज़"), + (("bird",), "चिड़िया की आवाज़"), + (("clock ticking", "clock", "ticking"), "घड़ी की टिक-टिक"), + (("thunder",), "बिजली कड़कना"), + (("rain",), "बारिश"), + (("wind",), "हवा की आवाज़"), + (("fire",), "आग की आवाज़"), + (("water splash", "splash"), "पानी के छींटे"), + (("water",), "पानी की आवाज़"), + (("bell",), "घंटी"), + (("keyboard", "typing"), "टाइपिंग"), + (("drum",), "ढोल"), + (("music", "piano", "guitar"), "संगीत"), + (("footsteps",), "क़दमों की आवाज़"), + (("mechanical", "stirring"), "यांत्रिक आवाज़"), + (("ping",), "पिंग"), +] + + +def is_blocklisted(label: str) -> bool: + return label.lower() in BLOCKLIST + + +def remap_label(label: str) -> str: + return LABEL_REMAPPING.get(label.lower(), label.upper()) + + +def is_transient(canonical_label: str) -> bool: + """True if this label typically fires in 1-2 YAMNet frames.""" + return canonical_label.upper() in TRANSIENT_LABELS + + +def caption_en(label: str) -> str: + return f"[{label.lower()}]" + + +def caption_hi(label: str) -> str: + label_lower = label.lower() + for keywords, hindi in HINDI_CC_MAP: + if any(kw in label_lower for kw in keywords): + return f"[{hindi}]" + return f"[{label.upper()}]" \ No newline at end of file diff --git a/src/cc_detector/spectral.py b/src/cc_detector/spectral.py new file mode 100644 index 0000000..5ccaa2f --- /dev/null +++ b/src/cc_detector/spectral.py @@ -0,0 +1,7 @@ +"""Spectral helpers — kept minimal after v0.3 fixes removed the flatness gate.""" +from __future__ import annotations +import numpy as np + + +def rms(chunk: np.ndarray) -> float: + return float(np.sqrt(np.mean(chunk.astype(np.float32) ** 2))) diff --git a/src/cc_detector/vad.py b/src/cc_detector/vad.py new file mode 100644 index 0000000..a32eec2 --- /dev/null +++ b/src/cc_detector/vad.py @@ -0,0 +1,53 @@ +"""Silero VAD speech suppression — lazy-loaded singleton.""" +from __future__ import annotations +import torch +from .audio import TARGET_SR + +_vad_model = None +_read_audio = None +_get_ts = None + + +def _load() -> None: + global _vad_model, _read_audio, _get_ts + if _vad_model is not None: + return + model, utils = torch.hub.load( + repo_or_dir="snakers4/silero-vad", + model="silero_vad", + force_reload=False, + trust_repo=True, + verbose=False, + ) + get_ts, _, read_audio, *_ = utils + _vad_model = model + _get_ts = get_ts + _read_audio = read_audio + + +def get_speech_intervals( + wav_path: str, + sr: int = TARGET_SR, + threshold: float = 0.50, + min_silence_ms: int = 300, +) -> list[tuple[float, float]]: + _load() + wav = _read_audio(wav_path, sampling_rate=sr) + hits = _get_ts( + wav, _vad_model, + sampling_rate=sr, + threshold=threshold, + min_silence_duration_ms=min_silence_ms, + ) + return [(h["start"] / sr, h["end"] / sr) for h in hits] + + +def is_speech( + timestamp: float, + intervals: list[tuple[float, float]], + tolerance: float = 0.35, +) -> bool: + return any( + (s - tolerance) <= timestamp <= (e + tolerance) + for s, e in intervals + ) diff --git a/src/cc_detector/yamnet.py b/src/cc_detector/yamnet.py new file mode 100644 index 0000000..5f10e0f --- /dev/null +++ b/src/cc_detector/yamnet.py @@ -0,0 +1,377 @@ +""" +YAMNet sound event detector — v0.3 (fixed for short transient events). + +Root causes of missed dog bark / door slam in v0.2: + 1. consensus_k=2 killed single-frame transient events + 2. spectral flatness gate rejected harmonically-rich animal sounds + 3. 'domestic animals, pets' was in blocklist + +Fixes applied: + 1. Consensus voting is BYPASSED for transient-class labels + (dog bark, door, knock, glass, gunshot, etc.) + — a single frame above threshold is sufficient. + 2. Spectral flatness gate REMOVED entirely. + It was too aggressive and the librosa onset gate provides + sufficient false-positive protection. + 3. Onset strength gate is only applied to pure percussion labels, + not to tonal-transient events like dog bark or laughter. + 4. conf_thresh default lowered to 0.25 (matches working notebook). + 5. rms_thresh lowered to 0.010 to catch quiet background barks. + 6. VAD tolerance widened to 0.35 s to catch events at speech boundaries. + 7. Top-5 instead of top-3 candidates examined per frame. + 8. Onset transient pass now reports the YAMNet top-1 label for that + timestamp instead of always returning generic "IMPACT". +""" + +from __future__ import annotations + +import csv +import os +import time +import warnings +from collections import deque +from pathlib import Path + +import numpy as np +import tensorflow as tf +import tensorflow_hub as hub + +from .audio import TARGET_SR, load_mono_f32 +from .events import SoundEvent +from .labels import ( + is_blocklisted, remap_label, caption_en, caption_hi, + is_transient, +) +from .vad import is_speech + +os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "3") +os.environ.setdefault("CUDA_VISIBLE_DEVICES", "-1") +warnings.filterwarnings("ignore", category=UserWarning) + +YAMNET_URL = "https://tfhub.dev/google/yamnet/1" +YAMNET_FRAME_HOP = 0.48 # seconds between frames +YAMNET_FRAME_WIN = 0.96 # window size + +_yamnet_model = None +_yamnet_classes = None + + +def _load_yamnet(): + global _yamnet_model, _yamnet_classes + if _yamnet_model is not None: + return _yamnet_model, _yamnet_classes + print("Loading YAMNet from TensorFlow Hub...") + t0 = time.time() + model = hub.load(YAMNET_URL) + class_map_path = model.class_map_path().numpy().decode("utf-8") + classes = [] + with tf.io.gfile.GFile(class_map_path) as fh: + for row in csv.DictReader(fh): + classes.append(row["display_name"]) + _yamnet_model = model + _yamnet_classes = classes + print(f" YAMNet loaded in {time.time()-t0:.1f}s | {len(classes)} classes") + return model, classes + + +def _rms(chunk: np.ndarray) -> float: + return float(np.sqrt(np.mean(chunk.astype(np.float32) ** 2))) + + +def _has_onset(chunk: np.ndarray, sr: int, min_strength: float = 1.0) -> bool: + """ + True if the chunk contains a genuine energy onset. + Used ONLY for sustained ambient labels (engine, rain, crowd) where + we want to confirm the event actually started rather than was ongoing. + NOT applied to transient events (dog, door, etc.). + """ + if len(chunk) < 512: + return True # too short to assess — let through + try: + import librosa + env = librosa.onset.onset_strength( + y=chunk.astype(np.float32), sr=sr, hop_length=256 + ) + return float(np.max(env)) >= min_strength + except Exception: + return True # if librosa fails, don't block + + +# Labels where we require an energy onset (sustained ambient sounds that +# YAMNet sometimes fires on room tone if it's loud enough) +_REQUIRES_ONSET: frozenset[str] = frozenset({ + "ENGINE", "RAIN", "WIND", "FIRE", "CROWD", "MECHANICAL", "VEHICLE", +}) + + +def _merge_raw(raw: list[dict], gap_sec: float) -> list[dict]: + if not raw: + return [] + out = [] + cur = dict(raw[0]) + for ev in raw[1:]: + same = ev["label"] == cur["label"] + close = (ev["timestamp"] - cur["end"]) <= gap_sec + if same and close: + cur["end"] = ev["end"] + cur["frame_count"] = cur.get("frame_count", 1) + 1 + if ev["confidence"] > cur["confidence"]: + cur["confidence"] = ev["confidence"] + cur["top_candidates"] = ev.get("top_candidates", []) + else: + out.append(cur) + cur = dict(ev) + cur.setdefault("frame_count", 1) + out.append(cur) + return out + + +def _remove_overlaps(events: list[dict], min_gap: float = 0.5) -> list[dict]: + if not events: + return [] + events = sorted(events, key=lambda x: x["start"]) + clean = [events[0]] + for ev in events[1:]: + last = clean[-1] + if ev["start"] < last["end"] + min_gap: + if ev["confidence"] > last["confidence"]: + clean[-1] = ev + else: + clean.append(ev) + return clean + + +def _raw_to_events(raw: list[dict]) -> list[SoundEvent]: + out = [] + for r in raw: + label = r["label"] + end = r["end"] + if end - r["start"] < 1.0: + end = r["start"] + 1.0 + out.append(SoundEvent( + label = label, + caption_en = caption_en(label), + caption_hi = caption_hi(label), + start_time = round(r["start"], 3), + end_time = round(end, 3), + confidence = round(r["confidence"], 4), + yamnet_raw = r.get("yamnet_raw", label), + frame_count = r.get("frame_count", 1), + onset_source = r.get("onset_source", "yamnet"), + spectral_gate = False, + top_candidates= r.get("top_candidates", []), + )) + return sorted(out, key=lambda e: e.start_time) + + +class DetectionStats: + def __init__(self): + self.speech = 0 + self.silent = 0 + self.blocklist = 0 + self.low_conf = 0 + self.onset_fail = 0 + self.consensus = 0 + self.accepted = 0 + + def __repr__(self): + return ( + f"speech={self.speech} silent={self.silent} " + f"blocklist={self.blocklist} low_conf={self.low_conf} " + f"onset_fail={self.onset_fail} consensus={self.consensus} " + f"accepted={self.accepted}" + ) + + +def detect( + wav_path, + speech_intervals, + *, + conf_thresh: float = 0.25, + rms_thresh: float = 0.010, + merge_gap: float = 1.5, + top_k: int = 5, + use_onset_pass: bool = True, + consensus_window: int = 3, + consensus_k: int = 2, + vad_tolerance: float = 0.35, +) -> tuple[list[SoundEvent], DetectionStats, float]: + """ + Run YAMNet detection with transient-aware filtering. + + Key design: + - Transient events (dog, door, knock, glass, gunshot…) bypass consensus + voting. A single frame above conf_thresh is accepted. + - Sustained events (engine, rain, crowd…) require consensus_k hits in + consensus_window consecutive frames AND an energy onset check. + - No spectral flatness gate — it was killing legitimate animal sounds. + - Onset transient pass (librosa) labels events from YAMNet scores, + not a generic "IMPACT". + """ + wav_path = str(wav_path) + model, classes = _load_yamnet() + audio, sr = load_mono_f32(Path(wav_path)) + + FRAME_N = int(YAMNET_FRAME_HOP * sr) + + t0 = time.time() + waveform = tf.convert_to_tensor(audio, dtype=tf.float32) + scores, _, _ = model(waveform) + scores_np = scores.numpy() + infer_time = time.time() - t0 + + stats = DetectionStats() + raw: list[dict] = [] + + # Per-label sliding window for consensus + label_history: dict[str, deque] = {} + + for frame_idx, frame_scores in enumerate(scores_np): + ts = round(frame_idx * YAMNET_FRAME_HOP, 3) + + # Gate 1: VAD speech suppression + if is_speech(ts, speech_intervals, tolerance=vad_tolerance): + stats.speech += 1 + continue + + # Gate 2: RMS energy gate + s = frame_idx * FRAME_N + e = min(s + FRAME_N, len(audio)) + chunk = audio[s:e] + if _rms(chunk) < rms_thresh: + stats.silent += 1 + continue + + # Find best non-blocklisted label in top-K + top_indices = np.argsort(frame_scores)[::-1][:top_k] + top_candidates = [ + {"rank": i + 1, + "label": classes[idx], + "confidence": round(float(frame_scores[idx]), 4)} + for i, idx in enumerate(top_indices) + if float(frame_scores[idx]) >= 0.05 + ] + + chosen_raw = None + chosen_conf = 0.0 + for idx in top_indices: + raw_lbl = classes[idx] + conf = float(frame_scores[idx]) + if conf < conf_thresh: + break + if is_blocklisted(raw_lbl): + continue + chosen_raw = raw_lbl + chosen_conf = conf + break + + if chosen_raw is None: + stats.blocklist += 1 + continue + + canonical = remap_label(chosen_raw) + transient = is_transient(canonical) + + if transient: + # ── Transient path: accept immediately + stats.accepted += 1 + raw.append({ + "timestamp": ts, + "label": canonical, + "confidence": round(chosen_conf, 4), + "frame_dur": YAMNET_FRAME_HOP, + "start": ts, + "end": ts + YAMNET_FRAME_WIN, + "yamnet_raw": chosen_raw, + "onset_source": "yamnet", + "top_candidates": top_candidates, + "frame_count": 1, + }) + else: + + if canonical not in label_history: + label_history[canonical] = deque(maxlen=consensus_window) + label_history[canonical].append(True) + + votes = sum(label_history[canonical]) + if votes < consensus_k: + stats.consensus += 1 + continue + + # Gate 3: onset check for sustained labels + if canonical in _REQUIRES_ONSET: + if not _has_onset(chunk, sr): + stats.onset_fail += 1 + continue + + stats.accepted += 1 + raw.append({ + "timestamp": ts, + "label": canonical, + "confidence": round(chosen_conf, 4), + "frame_dur": YAMNET_FRAME_HOP, + "start": ts, + "end": ts + YAMNET_FRAME_WIN, + "yamnet_raw": chosen_raw, + "onset_source": "yamnet", + "top_candidates": top_candidates, + "frame_count": 1, + }) + + + transient_raw: list[dict] = [] + if use_onset_pass: + try: + import librosa + onset_times = librosa.onset.onset_detect( + y=audio.astype(np.float32), sr=sr, + units="time", delta=0.30, wait=4, + ) + for t in onset_times: + t = round(float(t), 3) + if is_speech(t, speech_intervals, tolerance=vad_tolerance): + continue + s = int(max(0, t - 0.05) * sr) + e = int(min(len(audio), t + 0.20) * sr) + if _rms(audio[s:e]) < 0.008: + continue + # Look up YAMNet's top-1 label at this timestamp + frame_idx = min(int(t / YAMNET_FRAME_HOP), len(scores_np) - 1) + onset_scores = scores_np[frame_idx] + top5 = np.argsort(onset_scores)[::-1][:5] + onset_label = None + onset_conf = 0.55 # default confidence for onset events + for idx in top5: + raw_lbl = classes[idx] + if is_blocklisted(raw_lbl): + continue + candidate_canonical = remap_label(raw_lbl) + candidate_conf = float(onset_scores[idx]) + # Only keep if it's a transient class + if is_transient(candidate_canonical) and candidate_conf >= 0.15: + onset_label = candidate_canonical + onset_conf = max(onset_conf, candidate_conf) + break + if onset_label is None: + onset_label = "IMPACT" + + transient_raw.append({ + "timestamp": t, + "label": onset_label, + "confidence": round(onset_conf, 4), + "frame_dur": 0.25, + "start": t, + "end": t + 0.5, + "yamnet_raw": "onset_transient", + "onset_source": "onset", + "top_candidates": [], + "frame_count": 1, + }) + except ImportError: + pass # librosa not installed — skip onset pass + + all_raw = sorted(raw + transient_raw, key=lambda x: x["timestamp"]) + merged = _remove_overlaps(_merge_raw(all_raw, merge_gap)) + events = _raw_to_events(merged) + + return events, stats, infer_time From e11cf7a0d025d827a112c1f09ca0699f730b87dc Mon Sep 17 00:00:00 2001 From: uneeb Date: Sun, 10 May 2026 10:14:10 +0530 Subject: [PATCH 2/6] Remove cache files --- .../__pycache__/__init__.cpython-310.pyc | Bin 880 -> 0 bytes .../__pycache__/audio.cpython-310.pyc | Bin 2637 -> 0 bytes src/cc_detector/__pycache__/cli.cpython-310.pyc | Bin 5446 -> 0 bytes .../__pycache__/events.cpython-310.pyc | Bin 2251 -> 0 bytes .../__pycache__/export.cpython-310.pyc | Bin 4109 -> 0 bytes .../__pycache__/labels.cpython-310.pyc | Bin 6486 -> 0 bytes src/cc_detector/__pycache__/vad.cpython-310.pyc | Bin 1807 -> 0 bytes .../__pycache__/yamnet.cpython-310.pyc | Bin 10028 -> 0 bytes 8 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 src/cc_detector/__pycache__/__init__.cpython-310.pyc delete mode 100644 src/cc_detector/__pycache__/audio.cpython-310.pyc delete mode 100644 src/cc_detector/__pycache__/cli.cpython-310.pyc delete mode 100644 src/cc_detector/__pycache__/events.cpython-310.pyc delete mode 100644 src/cc_detector/__pycache__/export.cpython-310.pyc delete mode 100644 src/cc_detector/__pycache__/labels.cpython-310.pyc delete mode 100644 src/cc_detector/__pycache__/vad.cpython-310.pyc delete mode 100644 src/cc_detector/__pycache__/yamnet.cpython-310.pyc diff --git a/src/cc_detector/__pycache__/__init__.cpython-310.pyc b/src/cc_detector/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index 70b44fd663583d2a0822caf143daebe686fe55e7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 880 zcmY*Y&2H2%5YDy>RDmKMVUAs~*)0ezRmG1Almc5&x2i-hk=L0`OcUF($GdD#+_~}+ zya*4`BfJF%#5ieDu@uW=d%kbx^B1$ZnI=45Wq;IG%0$g zy<+WjtyrHXgMRqRNvV{1frOHbgP&i%!GR{MYgM8q*sG1l0``O@Wi(7fS)y;3$Hvs7 zbYhB7BRqh~$%h^I_-{9&-+vj)o&4M{$KUflngY}DBd0rTq=NGs0oNeOSJ{+WZ zE&_3jdHy|deYSWz{eC*GOoQW~5ocp*t*Gmtzq`@<3t{OYwg3PC diff --git a/src/cc_detector/__pycache__/audio.cpython-310.pyc b/src/cc_detector/__pycache__/audio.cpython-310.pyc deleted file mode 100644 index 666da9a7a3a241567f60b297cc37075713f39a60..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2637 zcmZ`*-ESPX5hwTKZf|uu%Z?MWMlba;zw1wI zwQkKmt9R>m47v?FhTSRVcbmNSWcn%Uw)m-S#A<)Vk8pQ})%j@_EF)`cSlbzE3zyLi z#HLvDbHtkbwMTCEEPI=sW@kS4y03%w8aoTx8|)H0$IgRxj=jS!vbR7x&)iMaCe=S6 z_;96SNkRC55;RsxkrSFTk`|ODdA}2e_av7j$!MP^#ayqKmAwC+BwJO^QqIU$l2b7t zUwrmQk{3h{q~aM_TQjSO;Gb5B;H(p_q$y#=elC^ZG$UETcq+-|PgZWNb2Upglaz~s z++VpqOVVUZ6q2?fppwUmEc}A(emI0JdC>`4x1;2d*P(j}$X>b!GTXJXu(Dh_@;ligEb=@1d@CEadZHNeTypgQVtlAiZQi?c z=k~_k)$7syo7Y!wU#%Or;E~40*Ag|dyaKZ5_d)K{J#f`a;j!N-%1WN}uyXJE&D*gF z{p5vV@_|9n3@99RF<{7H7%<~NZd@JS;w+)7LKI@Sv_TUIr@g<;bCMbR$$S-pP@BU^ z(nyacsqCz`Jsm_*PBR`wI*cMX_9``cGm1W~XgYaPk0MsYQS=OnHz5=&j+W=Qi;T}# zIpSju zHR2~myga2>Z?IqfGxN}jcR|ykO;lvQS=Wn7MF5i61n}ZAhcgYg`BFIqWrTOo4hB8| zCV31jq8#QjPq`!X2s5A6Se*sRRUXV5ECf67L%(_GD1WCm0_?zV>alz1j@+F(Ywo(@ zeHE~2IKRf95o`U$*h6-T&HTZ+fXZL8)91kN@9_u&-p*A2h5$kW*Xo!FPF5cIXyrbE z63KuY2E-mGd5O9nR4?#^*=o5$ZS!yO#rLsA42a@F5&#$j>QE#Bb^x38^9>&95Mtpk z$*YPWKvM?gm4H{9H{WeLx?WPjb0u}a4-%=OVt44w4d70O?%ZAug2oyzm{EYWr(MVh z>}$oe1TgXll2leod&-cZ!Qt@qgv!jjAWnGe(D_xE{Qc|W#?dNRjIO6`g&*WtJpVqnEdotVl5 zjE`M=;ynmOTn3@tIAf-EWRK=YW#{KxnZEB{GtgrDLk^{2?-mDOOyyCXbuFQP+0qzXpP zBiu7gSRek)X85!KA&MC7JdzPIK+# zrFQ8Spd_OQ+JzjnJ+W+7`8LI{R{*DSy89g{m`(tEpBGW@$|B?lNyDV)Yayh)1#k#p z6zgCDkCruE(1oFFamiR~W6W8r0iGr8@pEIF#BsvX_yHFScIC_d zZF_+}6-6ihB1Dm8Z{6Iea+PGw<)Nu3IagChuL(lwLZ~hPdtb*&C>o4u(gra;$%;05!d0$af|F~q~k}~SbE#{+`hQxuVL!31`n!!i}TuQRjzBza(u{> zj0eypN(vsdh5LNM5{;H6d|o~$8to<)`Y~~$Moa5oq;neg0v+x}aDj4L#ldMPHKsq( zyGlp7tuuoqSTZnJiltfRQBsYi*}#BoXhUVWM|w||m$Kn4vJojOY>{n~ve7NF?HElN z7;Ee`e*86lN049>Y?4hqGJ3P`gk%@nJs{Zw$zJxxfW!=v>`k_BK(Zfg4zRZdBnQ#v z(10e>;EMzN3_Hw@Jc9Ad-6^qhLt(RQF4hAEIKLbX)oRuFNIcPYvB&@2WrWD_5`0t}Lz0{(R-i z^6bLO^`agbE#WnyA;$fhuv=uq(Fp9nX$5#2sx`;*{V-K3*>0;Hgj&hI78w^WEiHyq zmq=ntt2ObyNTW6Dw!>kqX~FNdVe>39g8MBVrJ6jj)+`YjtK4aY$x?|-Hr(#D0}SHN zN?rXj35%MT#6CnR#_Lww3Bp9F1e*>=Uu#P;RN;jPcl4{UncQtSw!c~?d0JMNR_$C|OAh8Qw#ZElJky1%C{nBQS#xKVH@WDUAhiAh*E7(8G zWHzjTZy9KAey$h}K_f02ywtE-J!QT>8=99%hAu7B=g^4x13l-(6uy5atIf?mW{-hRIy7M9qUz3kDZueK<>A4Mdxh(}JUfT~gOuj#s-= zlBz@8YH^oEiKTPr7niWjCdP`lc?wi96{Y*$EgIhH-lAPst@R79O#VYy<;A|yM>&hJ zD!NL4C`julQFlBmh;;Z;D5SQ2uZS7QpP!mt^_qOP?Q(u=w$^G{jx$?3-UxzV7RGqr zYPvj_^+k=i{(C-KbL?{Kew44?YTFL0O8+Yg7|y?lhoNe!rWtzj`EXiOlP|}#F_nG= z`O9h~`>AI^E2zWjsA{O<2!;~Vc+|*!_b-}d6t{HcGlMXfeee=bOGQU)`KzMc8c_K;`AC8o24gGk=RC_$cL z`$L6lE+`x7hT7FT+U?9-_(kU|!P%OK_tv8k9xpK0-3P?W8v`>i0I z5&!H-XZmeF1qw6p+rT((dS3CrU&lJ>_ZjX_6?ef&2@55%1`BtKQ_|35}$a-m{hs|1{-LJ z(RTNCa8Ix7^LzU{+B5aLsH`adI`Ay3)*a7ed2KcwYAAF5J z1dBNspCM|eAFJXUXnBa}eoN3ZkpHPCpLx|IUR5XkSQ7+arU4UXpfTV9E6K7f%V~lK zlAmWsQBJdCz4ACa0siFIBfS;8Ez8+Xn&}(b1?A@K=UR8T3kb!gJHzqVosrvf>>c#_ z8K`+lodR_Uca0s1tG?lAXk=0LE11v3mDAwhM6O^l~_LWAWIGH?*f?dvD;gC1g$yyf`|Un~|8D^U2S6w>s*p{Saq!vVn#j*m3gHvVJPzWQ zz)d&D$t^vB**#UU%zRhtXiwCqipa#}6YZ&T9R_nZb5~<3l+>Q9N0tF=5`N3E0~7&J zO_XrFbuJ**%5#wpEDYFVi9q`B_;DB$Mw!!q`utu?oOz!VAxEh_FceKotHuv$UHc)c z=3a&qP0M!6rxA&Osn3)*`-B;Z8_Mg}TDa%N{Os|Y=3o8BqdoqIxG9Zu`6{!#08#!ggUZvH`xV_ip1A;?7QY;^p2hK5+Ty~s zOIMc7g)5goUIGUl%|*iB{-%DWDOE@27+8+E4xnHLHc%Id_40h3y1c#fsMEat{Q-lmd{q)hJk+A?{K-I}8d67V(AL%QihMw<)S<{q*V+YzQ_R=)J{qn*8 z^FK_Sc^h^cYCcZXH%;&-%g5@)wjWjCB9LA0_>O zaUnzs)+s3KhNNAu+xMa&34W_F_*F!i>N0iVg#g(cp9Rh@U0L{O>Cy@^G_ou~&NRwW zLKA;wh>@(};uuAeMjxgIIy_|W&Fp6|k&Yxpj8N?`GN39sgg66=@~`{>QXO%MhIyA7 z=3Q^V6NvYy;tV}L9@kd5vAe!;R%D) zsPOu?7o4^92NaRu#-glrcd?2(79uz;H^m*Rg={`j?MUNmug52YA`gW||Ic{j)U37x zP=8FF#51YoRD>xl{UVo0BS}aj4HzXE=q#lXI#I^vFDWTa&HZb1Xc%Lp(KaVjj2-F@ zReP}eZ^nbdfqiQFK>@XUQED%8MiS%F4`bs;T%4ASLWiB;liGP%Yl1djZr+;l!q9Rr z3|)Ky#gXDu9wN38tuFUjN>++Hqg1uZyjryyWvkVC8(*n-wJIo06#MD1Opj0Sh*GrE zj(sb(>vVvRZ4!Yx=NgM51Cqh*GUL@n>O}++kGDWHn ooCxKALc)gh>DVsLP(<-5#W!#`X`qHQwlt?sYLl|e>V*UU1|;NA(f|Me diff --git a/src/cc_detector/__pycache__/events.cpython-310.pyc b/src/cc_detector/__pycache__/events.cpython-310.pyc deleted file mode 100644 index a6bce35301e7b505ce42e9fd50a8ffd11aa3dbc5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2251 zcmZ`)OK%)S5T2gt-JP9%IB~4RNk}pTgt8*p4GAD*8H8jW5mJoACLC6ghR$@aJ!oEZ z_a?DrOGxW0zaa1fa^uMV;MP|zm;;w^$^lW;vum$OGNZ0n_f&UPebrNRI)MYv{Tu%# zzk&1zDvJ*VN60yGjcfLZb~wDkn>u{ep59p{N=$8%; zc4YvHh%~UER#~CNiM=fg;JDKC~yT|Fcq@k*m*w;@4;$aL_CT0 zwO*JBy@nkkil2OuQ#uU8^dp}hb-se@SA(# zW0`0n;i!k0v>q`nmgj|LIw^A1^Gp+iw9%N_I1wo~b*6A{(@-KVa<0G*8)#FPLQiCF zJf&q!(A(^(1NUKUU=<#KpdC&2AonZIluXG!(e579Wa>U}_uYLDG>>~$qce?It+r40 zTuz@-UfZKHUbDOo-#4DpUm8#yfA{j8J8O6Et__F%EXzWjWI}0{l_8u9vffYr^c*Mo z@$V0vX$o)M&nF( z!I45NpK`kd->XEff*Qj(@f;!gS1=1_L;%(9eaNTqxVv6)Z^kL`8rbVI-k5QJHs1su zj3{rxnl{LtQO)}4g1j`U^HX3yz3zyH@cHsjWYpwmz_J3CfQS4nKX*W(FIu3z$yfP# ztF^6mfnT&*hhOEF_+{v)F24fxdCM%oSvDaAQ%l)Hk(wZ8CBiNe`LVp6$Of(npoUUM zSWV-LoL5o+2plI|=73B2RxSR;$~ zWe_yfWY75?hr8-bUC4sFxdjvULBM6~iM$M&yaePKF;t0kY@t<=b6_?;pGe!6J#_TWg0eq?O)YPSf zk^tUCPD%Zr-Q|Tu&ptzJAhvD)&1LM@xqKVtCF4g?jx83MAd29soTMnXqv(eTOJ_4n zQMB+|iK2xJyn|a{YUMQ~h;8G;@2nJ3?^FRF$Nv#7!5ephU~kCtKu}QVE^vI$C&zEw zYm+uvCNA*#=z8{>%^tNEyA84m*M`_MjVR(pTtzjWz@H7Gl8ArPI{GC-_aic@P$RMQPFiiK)7w6>g(tJooPIIPs;^5v0rq4 zwLHfzcRpSSLyR)^*U$gR(t?jFB{%0Q6&E~~oK>S$m8wjzUMcffu`{DeaJIV1^0HEF zJ>{Zg!z|v!=~0M->vKA3KE$5un=(zR#m=E;qy_shiwmJX5;13;ILG?9=#S#QU^_d~ zq928%s8J4zjS@AIcz-!k!;vDL7%w*p%!oK=Ls=%Gs<4YKvKu8IL4~tyfSZ)PUWlvK z%2h+G=G8z5HQ$q|D0sGMjuJdoI8loARBVLdm31LE$zqFiFHyiQFi?_>xTsjnk}}UR z$g;E$>QRReZ9Vrl?uC9%W{s*sD-QJ>{P#XMFqJCl?}52h;;F~@q6uAY>m zLd~(N9LYq$xLjlrdaTx|!bwFqDmctco?#n^;BFGrHWNr7S%HP-iNUod}Y^@VFwCvn4nPFtwf^z zURK6RH>7|I3XCb4(AwRn0U^NmGKIot5R|3tF#_P2wQX0wX ze7MA}U0b?wW2w`b%kw<_yK7nL=l^`mN`Lzg%hK)}XZaI*>4`lD@f?p_?WVY@FX`I4 z!P9m4AZkdumUi+=+XHP&ZRgsq9$E6c7=ChbaZu)Baa0Jgx|j@yah5G6=le>j#U$x& z#yP;fSjmKXTquzMU5mm*g4rZETN1nwO|0j6=o#W7Qh97xwWb8h)`7HkyY08hn8{Zy1#w9|A=My4d zR@;bB8W#qrd(3L0(k#6aCxbo8UCqkmHkhGMaavSMW(8x{iErNArE76_|L|Ee{LJ4> zf>6c&1?S8H@P-n1LFQMAK72M{V7+VPo@oEt^JeYoJ=)r3JVRfgSi@WugqpSL%{L!8 zx(Uu9MCd@=1thxV?QdY2@Z14aJf;8}TWjb%vX>v(Iy6k$r8}T!i-Rx>E@W}O%HvCK zW8-7OVZ+)wX%Za8)4~N9EzYV-5bAX}#l0;yB}GpkUp#+#cSo?^+w$4`{3W*4JWYn~ zUEFdP&h@r}ox5GcGy?ddMx2CC{E_O-zm4+(Fn7s8u*=b`Zq%36etw&$Qa36jT)}lq z6r)@a^_u9CXYgL_=J8$H&CUIEn8ZV(-|isYlSLsZ7%=cGN$4qK>aIx=x(&V-s#_af zhxOITpW0h1(;{lgm#|;HLd~mml20iL43=s~tZUyGSi8_dd$jGKgz8DV!t=giK` z+Ma#X3GEl{S=+H2b{p+W_y+$z5y%g)_B= z`-FYMk-x-XLJ_+A5aCn28(3U`a1a(X!4nYRe$)$Vj&j#L)B`A$dGnsj1Ku*u4dL9W zt;O1}y-}9c{EA`>RVg^u6pd$4=qa^1Ka@!Af=3I7IEvIGW*{!AjzQ>yjczqigxC+P zht?mQJIJl(-jS`Fs_gPKQIVsag*S|W4%#KWKq`PKhnIRd%_PcW*48U^Os!RAvEQY; zOTQvDuHx7RSeQz#wkRr8y4|=+(saLv{e2!L2O{QiruO+LA6AjO=SU*qO_Xf>8yfDZ zl8}bGmB&#;bNIFVHO8VgI3r#anE^}<9pW$fQ+nf(J%%=gmY&`vw7MJc5oiyzCdtL} zA~lz2V_26&T^VmY2MZsQ&q~L`*n)sS&>$GxbASKZWCs??=S?2mb0;pCq4ChN;X80h zYvK`8jJ*fJL(4G4tDhjVdvNr&i~QW`SZ`X@`3-OfeSU*4jYZ+;htT98SJ3|k`fr4a zg~@V*4TY@1wgV?^Z^%WuBgG+H*et{b5w1LkJ{;OFN+~cN_EM4Y0>yp>_kwogDo)5u z%V6whVXHl=-L;!k>pFZNSTpKKJ=KLC-K#&pa{c{|o-!^R;9rc8Ixrn*CMgmle;)*# z-at3%=|jP~6U2De2wUqQc8(%^1AM^qU!Qs^2Xv@cSA<0Al00QHRAgU`g8FSQ)0jy( z6yhf9NJf%b{2C;b(k3e%Sh+VpivqLD%cR@YVYk~G(I35TS5jcen3|8!5UAB4OIJ-z zS)WMgsZ*>WyAP;K%wv!&3F77s`@+CbnPIWu0;+ucO}s?qh{<%G)t(NV0Qs%$gnrm^ R!uC_=_4d>7co5D6{{_D3Cg}hG diff --git a/src/cc_detector/__pycache__/labels.cpython-310.pyc b/src/cc_detector/__pycache__/labels.cpython-310.pyc deleted file mode 100644 index 4f5fba5ee5b7726ed04ba3757298d795f2b442b3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6486 zcmaJ_3w&GEdDlHUl4V(T4219~DHkXhgJXw~@-zWiR%11`WF(uAK&oBcb8X#Px>q^( z%8s?rCeRfarCUcc_M`OIrc*Yc8)IzEV52}E>(;H^y7zYX(%QYc_trhNNB95Em6H$% z&M*J_>AUBA-*>*(Irq4OgUJZ~t~l^rdwL`i`4;^+^DluPcSRzaH^m~6#fXUPj|fe$ znpS7~nHI64)rg4xsP>Syp9^lqL~Jr*M?_roJ)wz&!hVqiJ)kfp20;&rO~7Gs9`Jl| z0dTXz3l(lrc#*=3#U*%qsls=N%Rp~c_)bFTxm@VrU7_$wg;$ApfpfLOYs3iXYsDz= zI&nR4OyM>$4thdt2i_od0B;mKf$vs$leii5EedZHw>_bqAWgT6NpSBFyMT8pyi24( zPbthOoK~0>?*V_e!WoeR{a&#LIID0@h3B=jtIAI zdA@DxhGW-_nm495EZ-Xm$5v{txl*$|U;pNjk0)(kUo$*iF>PmA7w)oNG33f=U7o&H zwVa+UBc9$cq~-X!Su;Ej7D!7ZZ3iYBLU)&RziR1T!!qq9s2`bjmq&CgWMm2=N5VPd zNm^`VR0{?S$8mjw7Ub=^3cPNzVLL0j>MOvb%ZXt!_E1wsf63jP7pU7};o6YB11i7^c=;riGsg6~ z>$o*{xoLGTT(%rbVvt?OC|_3=c->2&LoLxVs~B!Lv|M8azUyA%HvP~L*_1|&7Va26 zk!QSW$_nP-6{gk=VRbJxt9DJ06eV%RR&*%w(hYlShFrGP%4lj8%j|Alvfg0IntP@( zLA2Ve2fWg3wF2HS96yL5xy))1gV&bvP)-hFo@1<7^l+sg#H+SxHXA`4-tcP{o^UWd zL*wwIXqiEuXJQ0|^?7#9a!e~oRE!Ed>MRHS6=@lMmB?frD?$o-buUPnuCr=MPq{f5 zloozetkCPNAtZ6jay-kw&-3Mhpnqz%kl8bvD=oD8OrxPMH=Uqw_hP;@Q&?yXUzmr_4A{=6()ry%6y?xOOc!?RQ|aO!dL>u7eHC|Y+nT%9+Va+Q z7R1A{3wYgf*Mev!y%0oA!^d=63P{csr&}plEE{9Gzh+r}YlPnDtFCF(>{c%m6$^~o zn660AC9~lEmF{4M{G%`AGJA5cvX7YL@R&)ptzxULNNbfHk}_7kmpHKX3`Cx%{{P0t0K7(r~sfsP=yCl3>l5eJ*d z{vckudl7`X2*n+;;VY(iDC?)t7Y*C8lK&J`94#mqi>9k-liL_LUS^Sg6-gxa@iRs(XZZgPI6 zkk9Hx@=iiRlMildz(5(*;TE-k^lZ8~*Gf_b>FPym$g8_6Rc1^vX>TPW@+1^p;B%cxUy<0bOy9PFv0(ZHUW!s0@Zs2QkdL^Mdu zrWbe5EI?txXf(j~P?zcXd9X{_)+SS~LkgOtTd62`(kSi)*a{bCE+o2Ze7R}+NZ44z zHXOG#By3YPFzBZ)wrMsETg%1l9D)Gv?9yqAOTjO5=~=y0Kyt{Nz>?btCI}uNxRoG7 zutso@;9i1Rf?WhN1n&m~@h}RqN}Tr+;J_M@4#5WrY6N2hO9V#=ZXn1K+(XbLc#zDArAQ)aKrt_s-Hos6-YblM6jWaq?Vo%V~J6GuAjpXjt-?X+J24KcBxuh+?gD2N~??Iqf48aSG`H91c5=umK` z2*^8z*?4@o)Bc>2Mrt69%j+=~n;a5=I;~r;1bsL!xwHdcsKj9WYn}E>ofD6D+MiWc zeF_t`AVEb)h1a}Tjf3d68hUcW;-^E4wIGUJqN%G<&vi~b+G#&`Zi^V2qpDpztJ1bs zn*D0xQ_2-T4Ao59-~@afiQm)xlu`_D!NwQW`kn!*^{vp@n5uSUp1r&bXZ4{@aG0~2 zDJA;lPWz}@A^DfdI?YJnpw^%g?Rf!9Lku7rzW6CcA0_cYJnRqHAbS0w4?7dhlMRUJ zMFOEiFrPtu-)7yYUaA^h6%9&=YNvUPGVmwW+K-2saSYla1_6W}^n+4EEmo6{kj|~= zoO4py@d6BwwuaQkrqi*5_7r(1X34u~ewaO>WccN&bKA_Zu~OHIsTSBuVUv`N-oY=3 zhX=sk5`MhXeqPy%M0^`BXmU51unEv<|JX*_!THBjXr{rJQ#67~Q;opdk)dz`W%S$1 zW->?9_t5k(wUIWM9wHsoTgL73dDGWh`-9g0652Aa&Hp7J8Z^PsPHtdMyOl=B34)08mYp%dYPGZdWNEi_p= zJppo|ofd!EqO&K&TG)lcFC)2w6y8X%onR+PqMoUG6Y>o04?d+Pcy4)`y!rGrxE{s0 zQS_?4oukWX@)nvF=1kZtsF+R_^SKqGX{&RO-a9MGJ0M6-k~G>dI&H*2i8@pLr;X3N?N_i-R%K20(-B07gRNB}8NZ3gw%%GRL5{8gb?9ROg zO`0@1Yzv6cljk1FS}C>HtCN6NMGICZgIJzFpg=zS(GVLK!7UtC1 z6W(eE(&S;95O(4>WHbvBr_MfcoOn(*W4IONiUfoLxCGjtRH`2%N#ScMC8kObnlp`;-X}t=~z}}!fZ6~tukMkoZ)~s8MNUz`8f<9)mpbK$fl*+bcthox)Q2Wy3$7|T{cO}(``q;VcU-0Z4fS$ z4BRx0#ysa^!17lZVf+GzDk*(!fOx?qR-+ccH78d>!Sg!j$g?BmTzHy!!Kgr<`=WS=9jR);+L|&mGM8}SFu0l?_z(%uV#P9uVDcnVSm7{Wxvly+3)e|*zfY| z+3)Z%_S<|L`z=1sev?nI-{9NXH~0zfZBfA`I1 zxbfKdQs`1<#$C@d5u?#)K>lP5=GU0-S0HG}xTMnOWP@n>j6HKOdye@!+&l5UXG1ci z1Nzh%vY|6{ht7aRWWXYK;6@%=1^)WMl**##LaLRvvZ*cYMcgzhIRyFJ&A>C`QWSZZ z%aAA1c>SD<)HFM}j72B~=4u*RmP*5}$j#WEkh{4Ofo~ekI>LGE)Sg&IRl5@e9`XZebFv6A$HqwQABCl zj#s)`>vkN6`%ym=x~*i4J1(?{A%-@Dv|Q{PFT^0m!Hs$kmKt-ZPuaz<@mF)qr7o>e zISZe=Fk$@mAXx;|OznqgFDxPr`6w+7Q_?s|rcGV-S>7zoI}DGcT=2wgHDJCn#RPG%vXIRX-`qz?O;yK0&juF7&JK|N3_H6H3N_o zfwWLM>;C~|WcUe!vI2%bAt*QC+0!8#&|`WNfN{a^bDB>L*fE^}DPs&+v<348vn_zG zSI9W>18`xeGE|&0xeOb59~XRQRc-34U6F|&3%T|&=oGeQcIXnHEu1yZ4(1be4%8KR zMh3t^C(8DLdDd|oeO`LU;LoqPaI%C~;3Jsy7_0pwKLasf5?Lrw9W*Lr=TKp}bPFYm z@okh^xLBRFxdYpPN^~34swnI+8w!4`;;}kw@DZMDnz1nCc^pFb3Byhay;+2zL~_bW zSd3?TmP7z8;jZuxyN0EQjlHU)E#_rKYpc*2%T%o4V^f%gOPbJen$(4E+mBly9qG$+umwh{_|VW767umf-jmZ}ccb{2NZdQWv%ZKyC*8!imjMhYXf-G$w?J%v58 zFJ0YR+gI49YreIxfBgXKWtm67u>IA?vhpjj;IM7}LDv5+VZXg_hz%4DvmJ#OnCTtC z^CBB$J0EI=qimY(W_uo5g=6ej*k{;5cIcs9$oVhLYwYkJ=xci6W%hzU%8s1X*cFyz zF9C9lU1c*Y&&C0nXV=&Xb`q`Qmo#>YoqnjX(`!cII6K2W!CqmnvQM(t*r(W8cJ6}| zyUs4L%MTNU6U>~|a&I=jWNBLZ&g9h@UpQgVtTV@7^6SE3zVM48;LeX9eAQVxo3M%qgKQoDaBg1o!G;g@EOdFqcoKZv=sGie5AHLnkOXwMrOb3=7K6oW~cPaa3u1 z)vGy`I?a3XwY1}$$UDWL9{TmL8M=$-PCAQ~Y8B(cO1)h5$4cC*`OhqeaVPUm*Z`W` zt2(8sC+dC}I%QAz4)+((jw$9nUJL4#qF1f1jPXiw-tp>{8sOz-Lriii?;K@8%@0MT z*zOERorW*Mqt3Dy5}tF_ptxAAgd&~32qJ`z*JxBLK5OHCzJt576bPW~R6^&CJJ%;? zXQwVmdUYu^R;+qJvFgqFRazUrV;$m*O4^J%i*+nE>XZo*uVx+87Nd?Yius%xLPz4Y ziQ_2ZcyqxLNHouVKvx-I5H?GtN|DYduShlfd{W(*x_a#v5Cvd``_&bK#&254-HRY_ zymA@yh83*ffkDK)?^G*uJP199b}kgj8$4L5Fu%R6jn$o!R}KBKMi5p6O#wJ!tx8a* zl|cP#a*j}Nzg`yeYBJy-)T=8ULOUXG8cmMDJ})+bN>FbTaa3Vj2ipNg1a+@E)+KkF zF@f!(zrloHz^d0Q&kLUu2B#(Sm0U$UFn-BvR)tdymVFKe!mQxZGtRM^CyMhp`{jTy z(jj9>e=Z0X!S;@GChzcC*qLL?*yAUT(=+DI%yn&A_9=hox} zKvsRoRnd2rLCkswfm3IkaAOuFZ;l6%ZTLQ>ouj!tUh(BrujHMZL1XL;ST_{D$4C+d z8cqR;I;$`UNa*@anH@k8<(bTy0bUpzG~nx7qT9>rBf#Xn4Tq~Op!Lr z*>ScKx?M5$S=`J0n%8ipB*y8Y*PxYLzrInQuf!P)>JkD|_TXDlJRZDx(LI0tx_fnc z#=Uawl6z(9*3^|)KNSz2f9t}eduw`j`i(17?uDsa)90sVSBb@91QU2ck&;9G0-}|SDcIDqDibSH$ zFM&!5&v~+(e~pTO3^qj9gt=}$(ps9Z)@_k^q_^})Ur#>L)>C+T zn1L3fthG#$Ug%xOtoKD|`R$>WC9><=BI}WULA%@2vLkc7KeCxQqCLW#_r`8xH4A@h zrmodfC$w;K8SV6U2T!DXT4Z`UaIn<+kv6O``+bWg#P-Ns)8E%v@>@3H7wM5n_~ueG zt4FTjEQvqbY*yky&AAEl5b%qTE$1>wmCvu*O;H+qHO{a~*r<9dZXIS~b%)a#MGOJ{ zhW=hP^CwEHiO_K($5~A|QVaWGZXoV+6`jF2%&*4VI-hNb*lzGjUBni#b8O;>VvDAY zrO5>2y>;VCoRz>Xu~P%Q8Qb+{t+5g(m|w)Cu^~#aQ3+zZT&h(4*uHd;US=^|iqjV= zMR5ZX>hm}Oo8s45Y*zhxE`d?UV_PD{_u^FJWCtUoG#{HhSpHDsqv-hP-1t0<*Lbt; z`*Y*PMgxX!ym-1SgcvUt+uIord6B+4M!n5kd0?4wrQ9egdx&y12VAMRn(O15LWglR>Q#I*CHdY4ljd=Q7O z8+gn}TepP1U_a8Cz5|@~2pkJeFlHVbfXgNLNxWjC-iWR69>;Oie6J3Qg<_@Q$4R*s z$vhtioFt1X#%P%3LNi}QEu_gDoqrOqpWwmo-zQ=tC9${cTGrBQrPP$lBYF2 zG8XOI+AO3;^sJ0^q=sf%)^bVD2En8*y~&%tQ-KPH-jx(5XGwTa?(hPjL7PzHu--l| zuPA#=irjJDBG2pE%#}OJWotqkOMUOv=A_f6oDk<2tS;I|9ggl%rwGG=RkTsjW9G=Gtrl5O84wxx3t zXXUGVviubQ@zYeiN`+)Cv*z88d#3IeeQ5`Bw&EmOf}f+}4HU8I*O#8=uCzPvg|1xU zO~8b^P-K#pZa_*BkekGlK{JDz@q}MS*Xv1|$htUvV=A_8Ois_l*6rz;3$cCv#6U<)_9_fpED-qfJ%_xD#VrT`deHP`q zj_AsGWH4ih+7f89SW@<*T(tQ0jc@7$e5$NHG60)8tKA!jjCIt>;s8HO;s$G`=7bg6 z)E|)r>6wwt!fBVWtxSAR*CC-}g#DL(vs;9ZAYk)}v*B(P{WuGs#@+Cfa#m~+F=Mmr z3x0%VOydOFUSclE>D2iwwVOc`V8ten4Sxxm3f2fJvKoZ`vrJQiwE8a2YY`(u;#&)f z*3vUjwEg-Zm?wj>Uw#`8(on})C}T_Ka{z5CuOCjt3FQgKNzz?7>@5dP+edV#4*EVS zor6|~%nSsI=DS9VjvNPBh68|_-97R{U;%?AMl>9~DUZuyDIA(Uq(SAKMq3ZQEGlt? zry>cB;~K(o!I(=20d*Wmdgd8RKpY9G=|3fvs(4On{MeSNL{jWSJ=X6(O}o@t1dnsf zd;K8>{*l&uh^ZctNPY(AWstDSbAp(Xh@u7J2`9_K8SPbH z%o?ITeH{ijG6h6wO&2gyDBJMNUNcaK(8{+wb(UPi{OdML;Vf0Hho##!net$8649R} zQDxiS?Ug5Pi3H4DKidxO8hDpZf5!~J3A`1RCB0w6=VP+1lIV|#N#k_e5ra(d+tfuH zk27uM9W*&)GofHE@;m6u^{Q!m2{=Q*X~{aS^wm-ja#@p0@FfD0(=HQaUngwQUy<_qZfLp;*aNNB4L3Aif)S4ru!Jjtsx zRZ{XACqdvFo3Ukl$ls>cfWV+xseTtl49?jgDY=1)7p1?G-MZ>~=napdu)ucoM@j|u z2u}hvQZ#A#XHfaG{yzTe050O$@*_d}G*N@RQC}}2Jy%eVv|CGE+HSR%+$}z<$uSlleXV3~(=vPZm%W}$fEA@(S-9HA3 zVHO3EtMixESBJLlG2elIX0tR_ZuV?@vkMx69%$MH3VvW@7Yw{DyiL4qye+&Fc-wd< z@lL$2eJhKtBQQ#;J)c|caghI^ZJ%4sI2-w?bE~}$WiI6A&#m@3${}=V{}QrG0twln z!Z}W=ihqHMr??4(K-|w=_dDp6%4`6N%X%Uo!}{2w$F%;(c=8;QsF5=WzcCF(XRaHk zU5d4uRjOxP_a6L5)x*C?%YSJr4iXz;YyJSmAK{VWn9{(vJ+iHB41X|D{g zhZCe?@cZwH$D~4Ta)FYcLQyWs-#<*`QgJV_M>}5=r#EspagQ?1Fs89huE1iPMK)7; zc!=-9Fz)XX(Os_8S+Fc8>trwEz9r-#x@jE#JGA;&sQ4-sU!&r8srWrAzD~tAP<)gZ z%|_LK`)OM+>TG4SQ6xGMS>5egdQuy-IpQ;@R6V=`b*e%W1pE4$_F1Eq{sPhf(D-oc z)^vRLwlcyJc9a<;N3TyL))Pn#WJPiz74@R*VLJSRZBaVf26(g8-`d{V9`&!M0X3kY zqwKKO+97(^wALUj($0$3+9fj4z#1hAKE@hDU(iKgG$ianaQm9k8W!1T7(spyd=8V} zi3TlgVH>kvfphO}yEhRHMSX)>Yee)%BT?UwhV(!$ur*q{qh7S^CO5RTM+`)J5IC8z zZL4yI9qk!*+?x=C>pR~;=m-x6!Pe(s1tCqg9JlxL+UJoBVM&&Hm;#o22}|sJAG2Hg zSx@Ugv=>NI@sDD9cmpw z;$lCun;QQciO+BwpJCuL679$M!(w-I81EON7x-fdv!@NSC)&Bb7c@BxIvrqXSY8`n z!(tztO^!n+O?$f{}lV9BX`xaVXjoW!8-M^zfjBI^2dj3@AFE{}-mTulNN$gF_oVyTLDA?!mk}B}Kk0 zXX=N)lN0tHoH53NZ<$4NwuYjCXb3#;67f^?(pG+AyCgro489pNt^m-;G9z zuh9RPjkHFi7r`sJXcW9MM!dpy<15GZ3~6jH+gFAK>F^5e?lyQ8X@%T-#!7D|u~65S zHStokGkOtY_Ok=<&m2hkaq+SkjgBL%GJxv|F&3S``($)78jN<7=#uDQb1y8mOa|O& z);rlXl}n;T{Bi|Z`))8l<}G_j)Famg$4Z9&a^z?H6{OZfq(JG?#Bs))n@?RI$(xW) z`2XBXkRLzzcFvhwkqPOp4wkby?VFdpGx@xGkxjauZbmw0Po)>eoY`(}L^*2D&56jx zIg-%0B-wmh<;+)*6R*_Ucy#e<+Xe+%Y|5(-8FPlt`hu>Sl)S$S6lz^~<^<^=hj#*)@5+(mfwXlJ8hMNs?<{qjXX*GSk|*4lE*D z9kwr6=(@v^>87U=_H>x=hHXZEIwhz@1sD{>Mg2EZ=SG=d(`5r$v4HO zGJK)PMTROekO7qw$XdY%RH-pOjPCL&`TP3`L<^rLaG4~tn{Z`AxBma?fb|xsQVdC!2~a&h_@-ny31DlJX*J$FvU;g z>CfVXc?lKr-Q{MbQ#X&6r<_e0KrLe&r>w~8@UxUup}Y#4^0A2+TxOzqk|v`tl9QJm z_Y(&rJAd)Uu&_ z{5Pq`M4Ck=8D$QcuTjTo>PULLOnC#vS@b^{_(Lk_e?0I$D&C>u2#TkWL-vo8EMPOu z8vi^&U8mwQ6~9FV35TR^Cm2>_QF+WMbjH2YHM*Ib;yl1V0;jx!OclvsE_W(UEqhDq zcA{T}5D0Iuza^wd{;wV?KSpcWUXxV<#aBs1-x+?CdT0qB|2v?Dh)haOth;fKs=7$s zrz`c6&sB6zNgff(fEE{bbQ|u@LrQQX*5QAMBkn-LSPywbo1x5p+)EBQT<%KI52Yj^ zb2pFtTAe~(c;Tu}+MYim_$0;O)wybHQ5yn4ude$cEjD&gDP zW^%gAjCEWQsPFdnEMRjcZo{-M5*2`4ZAUkWNa@7(y4g*Qgi;dObBTOHh8;}ho1}pJ zK5bh@^tef)8}q+JL)Z`TNR3b`DPyDzx|yK}K0z^=*>7a^teK_UvpIm)40;WHFXf-D z#4oIr$^cNzPc;Lj%u{0&Mj*v#8ClF@(3dPJOQ8PJKTl;bH`YwcIn$VvM*VVltDi_Cl1;hPrcRmFCq1E3`vj&2yiSlvUQ@#jDxkX5G^(H4 zFd^wbfHCTnVE%K!tWDEMY(g3}poC2>`IYV{85qj8uTh&sO@@eskcty!+t<6j6fr7Y zBj^4mK65AIr0cSvsAB9=6L;*0u{o(=ei%h;1fkS}B&qWHJ*2=D|ItSNXN*v(p(I^l zl5-%nsZ@$m!=;t3MDV$+^k{aY!Nt8OAE9-=M+N1}ltz{$O3~U0b=$`dOJJWV%S9)7^$QbH~nR*ZW?0*4GvuW%A From c0435f05a6ea897b38d1cad4e6dcc0ebf6bb34ea Mon Sep 17 00:00:00 2001 From: Uneeb808 <150836183+Uneeb808@users.noreply.github.com> Date: Tue, 12 May 2026 21:19:40 +0530 Subject: [PATCH 3/6] Update __init__.py --- src/cc_detector/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cc_detector/__init__.py b/src/cc_detector/__init__.py index 846edca..4f0d68a 100644 --- a/src/cc_detector/__init__.py +++ b/src/cc_detector/__init__.py @@ -13,4 +13,4 @@ """ __version__ = "0.2.0" -__author__ = "Govind Gupta" + From f9c5feb02eae4d694e56a86361932318348904c0 Mon Sep 17 00:00:00 2001 From: Uneeb808 <150836183+Uneeb808@users.noreply.github.com> Date: Tue, 12 May 2026 21:22:02 +0530 Subject: [PATCH 4/6] Update README.md --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 1c0c7d0..e2402e5 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,6 @@ This PR delivers a fully working **Module 1** (Sound Event Detection → SRT/SLS ``` INPUT VIDEO - │ ├──▶ AUDIO EXTRACTION (imageio-ffmpeg, no system install needed) │ │ │ ┌──────┴──────┐ @@ -119,4 +118,4 @@ Qwen2-Audio is a 7B audio-language model (Whisper-large-v2 encoder + LLM). Inste -cc @abinash-sketch @keerthiseelan-planetread \ No newline at end of file +cc @abinash-sketch @keerthiseelan-planetread From c073d3c6fe23b82ad60df52ee9deb015fe8e8117 Mon Sep 17 00:00:00 2001 From: Uneeb808 <150836183+Uneeb808@users.noreply.github.com> Date: Tue, 12 May 2026 21:22:36 +0530 Subject: [PATCH 5/6] Update yamnet.py --- src/cc_detector/yamnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cc_detector/yamnet.py b/src/cc_detector/yamnet.py index 5f10e0f..49eaf30 100644 --- a/src/cc_detector/yamnet.py +++ b/src/cc_detector/yamnet.py @@ -50,7 +50,7 @@ YAMNET_URL = "https://tfhub.dev/google/yamnet/1" YAMNET_FRAME_HOP = 0.48 # seconds between frames -YAMNET_FRAME_WIN = 0.96 # window size +YAMNET_FRAME_WIN = 0.96 _yamnet_model = None _yamnet_classes = None From 40d9c9bf5f98afe167bd04250f40de27e920ce95 Mon Sep 17 00:00:00 2001 From: Uneeb808 <150836183+Uneeb808@users.noreply.github.com> Date: Tue, 12 May 2026 21:24:06 +0530 Subject: [PATCH 6/6] Update spectral.py --- src/cc_detector/spectral.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/cc_detector/spectral.py b/src/cc_detector/spectral.py index 5ccaa2f..7fa66fe 100644 --- a/src/cc_detector/spectral.py +++ b/src/cc_detector/spectral.py @@ -2,6 +2,5 @@ from __future__ import annotations import numpy as np - def rms(chunk: np.ndarray) -> float: return float(np.sqrt(np.mean(chunk.astype(np.float32) ** 2)))