Skip to content

Commit ffd4a72

Browse files
authored
Revert "Remove face detection feature from V1 client (via synth). (#8666)"
This reverts commit a0e077c.
1 parent a0e077c commit ffd4a72

File tree

6 files changed

+637
-273
lines changed

6 files changed

+637
-273
lines changed

videointelligence/google/cloud/videointelligence_v1/gapic/enums.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class Feature(enum.IntEnum):
2828
LABEL_DETECTION (int): Label detection. Detect objects, such as dog or flower.
2929
SHOT_CHANGE_DETECTION (int): Shot change detection.
3030
EXPLICIT_CONTENT_DETECTION (int): Explicit content detection.
31+
FACE_DETECTION (int): Human face detection and tracking.
3132
SPEECH_TRANSCRIPTION (int): Speech transcription.
3233
TEXT_DETECTION (int): OCR text detection and tracking.
3334
OBJECT_TRACKING (int): Object detection and tracking.
@@ -37,6 +38,7 @@ class Feature(enum.IntEnum):
3738
LABEL_DETECTION = 1
3839
SHOT_CHANGE_DETECTION = 2
3940
EXPLICIT_CONTENT_DETECTION = 3
41+
FACE_DETECTION = 4
4042
SPEECH_TRANSCRIPTION = 6
4143
TEXT_DETECTION = 7
4244
OBJECT_TRACKING = 9

videointelligence/google/cloud/videointelligence_v1/gapic/video_intelligence_service_client_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
},
1919
"methods": {
2020
"AnnotateVideo": {
21-
"timeout_millis": 60000,
21+
"timeout_millis": 600000,
2222
"retry_codes_name": "idempotent",
2323
"retry_params_name": "default",
2424
}

videointelligence/google/cloud/videointelligence_v1/proto/video_intelligence.proto

Lines changed: 132 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2019 Google LLC.
1+
// Copyright 2018 Google LLC.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -22,7 +22,6 @@ import "google/longrunning/operations.proto";
2222
import "google/protobuf/duration.proto";
2323
import "google/protobuf/timestamp.proto";
2424
import "google/rpc/status.proto";
25-
import "google/api/client.proto";
2625

2726
option csharp_namespace = "Google.Cloud.VideoIntelligence.V1";
2827
option go_package = "google.golang.org/genproto/googleapis/cloud/videointelligence/v1;videointelligence";
@@ -34,14 +33,12 @@ option ruby_package = "Google::Cloud::VideoIntelligence::V1";
3433

3534
// Service that implements Google Cloud Video Intelligence API.
3635
service VideoIntelligenceService {
37-
option (google.api.default_host) = "videointelligence.googleapis.com";
38-
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
39-
4036
// Performs asynchronous video annotation. Progress and results can be
4137
// retrieved through the `google.longrunning.Operations` interface.
4238
// `Operation.metadata` contains `AnnotateVideoProgress` (progress).
4339
// `Operation.response` contains `AnnotateVideoResponse` (results).
44-
rpc AnnotateVideo(AnnotateVideoRequest) returns (google.longrunning.Operation) {
40+
rpc AnnotateVideo(AnnotateVideoRequest)
41+
returns (google.longrunning.Operation) {
4542
option (google.api.http) = {
4643
post: "/v1/videos:annotate"
4744
body: "*"
@@ -55,10 +52,10 @@ message AnnotateVideoRequest {
5552
// [Google Cloud Storage](https://cloud.google.com/storage/) URIs are
5653
// supported, which must be specified in the following format:
5754
// `gs://bucket-id/object-id` (other URI formats return
58-
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
59-
// [Request URIs](/storage/docs/reference-uris).
60-
// A video URI may include wildcards in `object-id`, and thus identify
61-
// multiple videos. Supported wildcards: '*' to match 0 or more characters;
55+
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For
56+
// more information, see [Request URIs](/storage/docs/reference-uris). A video
57+
// URI may include wildcards in `object-id`, and thus identify multiple
58+
// videos. Supported wildcards: '*' to match 0 or more characters;
6259
// '?' to match 1 character. If unset, the input video should be embedded
6360
// in the request as `input_content`. If set, `input_content` should be unset.
6461
string input_uri = 1;
@@ -78,8 +75,8 @@ message AnnotateVideoRequest {
7875
// Currently, only [Google Cloud Storage](https://cloud.google.com/storage/)
7976
// URIs are supported, which must be specified in the following format:
8077
// `gs://bucket-id/object-id` (other URI formats return
81-
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
82-
// [Request URIs](/storage/docs/reference-uris).
78+
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For
79+
// more information, see [Request URIs](/storage/docs/reference-uris).
8380
string output_uri = 4;
8481

8582
// Optional cloud region where annotation should take place. Supported cloud
@@ -104,6 +101,9 @@ message VideoContext {
104101
// Config for EXPLICIT_CONTENT_DETECTION.
105102
ExplicitContentDetectionConfig explicit_content_detection_config = 4;
106103

104+
// Config for FACE_DETECTION.
105+
FaceDetectionConfig face_detection_config = 5;
106+
107107
// Config for SPEECH_TRANSCRIPTION.
108108
SpeechTranscriptionConfig speech_transcription_config = 6;
109109

@@ -114,66 +114,6 @@ message VideoContext {
114114
ObjectTrackingConfig object_tracking_config = 13;
115115
}
116116

117-
// Video annotation feature.
118-
enum Feature {
119-
// Unspecified.
120-
FEATURE_UNSPECIFIED = 0;
121-
122-
// Label detection. Detect objects, such as dog or flower.
123-
LABEL_DETECTION = 1;
124-
125-
// Shot change detection.
126-
SHOT_CHANGE_DETECTION = 2;
127-
128-
// Explicit content detection.
129-
EXPLICIT_CONTENT_DETECTION = 3;
130-
131-
// Speech transcription.
132-
SPEECH_TRANSCRIPTION = 6;
133-
134-
// OCR text detection and tracking.
135-
TEXT_DETECTION = 7;
136-
137-
// Object detection and tracking.
138-
OBJECT_TRACKING = 9;
139-
}
140-
141-
// Label detection mode.
142-
enum LabelDetectionMode {
143-
// Unspecified.
144-
LABEL_DETECTION_MODE_UNSPECIFIED = 0;
145-
146-
// Detect shot-level labels.
147-
SHOT_MODE = 1;
148-
149-
// Detect frame-level labels.
150-
FRAME_MODE = 2;
151-
152-
// Detect both shot-level and frame-level labels.
153-
SHOT_AND_FRAME_MODE = 3;
154-
}
155-
156-
// Bucketized representation of likelihood.
157-
enum Likelihood {
158-
// Unspecified likelihood.
159-
LIKELIHOOD_UNSPECIFIED = 0;
160-
161-
// Very unlikely.
162-
VERY_UNLIKELY = 1;
163-
164-
// Unlikely.
165-
UNLIKELY = 2;
166-
167-
// Possible.
168-
POSSIBLE = 3;
169-
170-
// Likely.
171-
LIKELY = 4;
172-
173-
// Very likely.
174-
VERY_LIKELY = 5;
175-
}
176-
177117
// Config for LABEL_DETECTION.
178118
message LabelDetectionConfig {
179119
// What labels should be detected with LABEL_DETECTION, in addition to
@@ -216,17 +156,28 @@ message ShotChangeDetectionConfig {
216156
string model = 1;
217157
}
218158

219-
// Config for OBJECT_TRACKING.
220-
message ObjectTrackingConfig {
221-
// Model to use for object tracking.
159+
// Config for EXPLICIT_CONTENT_DETECTION.
160+
message ExplicitContentDetectionConfig {
161+
// Model to use for explicit content detection.
222162
// Supported values: "builtin/stable" (the default if unset) and
223163
// "builtin/latest".
224164
string model = 1;
225165
}
226166

227-
// Config for EXPLICIT_CONTENT_DETECTION.
228-
message ExplicitContentDetectionConfig {
229-
// Model to use for explicit content detection.
167+
// Config for FACE_DETECTION.
168+
message FaceDetectionConfig {
169+
// Model to use for face detection.
170+
// Supported values: "builtin/stable" (the default if unset) and
171+
// "builtin/latest".
172+
string model = 1;
173+
174+
// Whether bounding boxes be included in the face annotation output.
175+
bool include_bounding_boxes = 2;
176+
}
177+
178+
// Config for OBJECT_TRACKING.
179+
message ObjectTrackingConfig {
180+
// Model to use for object tracking.
230181
// Supported values: "builtin/stable" (the default if unset) and
231182
// "builtin/latest".
232183
string model = 1;
@@ -344,24 +295,57 @@ message NormalizedBoundingBox {
344295
float bottom = 4;
345296
}
346297

298+
// Video segment level annotation results for face detection.
299+
message FaceSegment {
300+
// Video segment where a face was detected.
301+
VideoSegment segment = 1;
302+
}
303+
304+
// Video frame level annotation results for face detection.
305+
message FaceFrame {
306+
// Normalized Bounding boxes in a frame.
307+
// There can be more than one boxes if the same face is detected in multiple
308+
// locations within the current frame.
309+
repeated NormalizedBoundingBox normalized_bounding_boxes = 1;
310+
311+
// Time-offset, relative to the beginning of the video,
312+
// corresponding to the video frame for this location.
313+
google.protobuf.Duration time_offset = 2;
314+
}
315+
316+
// Face annotation.
317+
message FaceAnnotation {
318+
// Thumbnail of a representative face view (in JPEG format).
319+
bytes thumbnail = 1;
320+
321+
// All video segments where a face was detected.
322+
repeated FaceSegment segments = 2;
323+
324+
// All video frames where a face was detected.
325+
repeated FaceFrame frames = 3;
326+
}
327+
347328
// Annotation results for a single video.
348329
message VideoAnnotationResults {
349330
// Video file location in
350331
// [Google Cloud Storage](https://cloud.google.com/storage/).
351332
string input_uri = 1;
352333

353-
// Topical label annotations on video level or user specified segment level.
334+
// Label annotations on video level or user specified segment level.
354335
// There is exactly one element for each unique label.
355336
repeated LabelAnnotation segment_label_annotations = 2;
356337

357-
// Topical label annotations on shot level.
338+
// Label annotations on shot level.
358339
// There is exactly one element for each unique label.
359340
repeated LabelAnnotation shot_label_annotations = 3;
360341

361342
// Label annotations on frame level.
362343
// There is exactly one element for each unique label.
363344
repeated LabelAnnotation frame_label_annotations = 4;
364345

346+
// Face annotations. There is exactly one element for each unique face.
347+
repeated FaceAnnotation face_annotations = 5;
348+
365349
// Shot annotations. Each shot is represented as a video segment.
366350
repeated VideoSegment shot_annotations = 6;
367351

@@ -407,14 +391,6 @@ message VideoAnnotationProgress {
407391

408392
// Time of the most recent update.
409393
google.protobuf.Timestamp update_time = 4;
410-
411-
// Specifies which feature is being tracked if the request contains more than
412-
// one features.
413-
Feature feature = 5;
414-
415-
// Specifies which segment is being tracked if the request contains more than
416-
// one segments.
417-
VideoSegment segment = 6;
418394
}
419395

420396
// Video annotation progress. Included in the `metadata`
@@ -515,17 +491,15 @@ message SpeechRecognitionAlternative {
515491
// Transcript text representing the words that the user spoke.
516492
string transcript = 1;
517493

518-
// Output only. The confidence estimate between 0.0 and 1.0. A higher number
494+
// The confidence estimate between 0.0 and 1.0. A higher number
519495
// indicates an estimated greater likelihood that the recognized words are
520-
// correct. This field is set only for the top alternative.
521-
// This field is not guaranteed to be accurate and users should not rely on it
522-
// to be always provided.
496+
// correct. This field is typically provided only for the top hypothesis, and
497+
// only for `is_final=true` results. Clients should not rely on the
498+
// `confidence` field as it is not guaranteed to be accurate or consistent.
523499
// The default of 0.0 is a sentinel value indicating `confidence` was not set.
524500
float confidence = 2;
525501

526-
// Output only. A list of word-specific information for each recognized word.
527-
// Note: When `enable_speaker_diarization` is true, you will see all the words
528-
// from the beginning of the audio.
502+
// A list of word-specific information for each recognized word.
529503
repeated WordInfo words = 3;
530504
}
531505

@@ -671,3 +645,66 @@ message ObjectTrackingAnnotation {
671645
// Streaming mode: it can only be one ObjectTrackingFrame message in frames.
672646
repeated ObjectTrackingFrame frames = 2;
673647
}
648+
649+
// Video annotation feature.
650+
enum Feature {
651+
// Unspecified.
652+
FEATURE_UNSPECIFIED = 0;
653+
654+
// Label detection. Detect objects, such as dog or flower.
655+
LABEL_DETECTION = 1;
656+
657+
// Shot change detection.
658+
SHOT_CHANGE_DETECTION = 2;
659+
660+
// Explicit content detection.
661+
EXPLICIT_CONTENT_DETECTION = 3;
662+
663+
// Human face detection and tracking.
664+
FACE_DETECTION = 4;
665+
666+
// Speech transcription.
667+
SPEECH_TRANSCRIPTION = 6;
668+
669+
// OCR text detection and tracking.
670+
TEXT_DETECTION = 7;
671+
672+
// Object detection and tracking.
673+
OBJECT_TRACKING = 9;
674+
}
675+
676+
// Label detection mode.
677+
enum LabelDetectionMode {
678+
// Unspecified.
679+
LABEL_DETECTION_MODE_UNSPECIFIED = 0;
680+
681+
// Detect shot-level labels.
682+
SHOT_MODE = 1;
683+
684+
// Detect frame-level labels.
685+
FRAME_MODE = 2;
686+
687+
// Detect both shot-level and frame-level labels.
688+
SHOT_AND_FRAME_MODE = 3;
689+
}
690+
691+
// Bucketized representation of likelihood.
692+
enum Likelihood {
693+
// Unspecified likelihood.
694+
LIKELIHOOD_UNSPECIFIED = 0;
695+
696+
// Very unlikely.
697+
VERY_UNLIKELY = 1;
698+
699+
// Unlikely.
700+
UNLIKELY = 2;
701+
702+
// Possible.
703+
POSSIBLE = 3;
704+
705+
// Likely.
706+
LIKELY = 4;
707+
708+
// Very likely.
709+
VERY_LIKELY = 5;
710+
}

0 commit comments

Comments
 (0)