googleapis
diff --git a/‎videointelligence/google/cloud/videointelligence_v1/gapic/enums.py‎
Lines changed: 2 additions & 0 deletions b/‎videointelligence/google/cloud/videointelligence_v1/gapic/enums.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎videointelligence/google/cloud/videointelligence_v1/gapic/video_intelligence_service_client_config.py‎
Lines changed: 1 addition & 1 deletion b/‎videointelligence/google/cloud/videointelligence_v1/gapic/video_intelligence_service_client_config.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎videointelligence/google/cloud/videointelligence_v1/proto/video_intelligence.proto‎
Lines changed: 132 additions & 95 deletions b/‎videointelligence/google/cloud/videointelligence_v1/proto/video_intelligence.proto‎
Lines changed: 132 additions & 95 deletions
@@ -28,6 +28,7 @@ class Feature(enum.IntEnum):
       LABEL_DETECTION (int): Label detection. Detect objects, such as dog or flower.
       SHOT_CHANGE_DETECTION (int): Shot change detection.
       EXPLICIT_CONTENT_DETECTION (int): Explicit content detection.
+      FACE_DETECTION (int): Human face detection and tracking.
       SPEECH_TRANSCRIPTION (int): Speech transcription.
       TEXT_DETECTION (int): OCR text detection and tracking.
       OBJECT_TRACKING (int): Object detection and tracking.
@@ -37,6 +38,7 @@ class Feature(enum.IntEnum):
     LABEL_DETECTION = 1
     SHOT_CHANGE_DETECTION = 2
     EXPLICIT_CONTENT_DETECTION = 3
+    FACE_DETECTION = 4
     SPEECH_TRANSCRIPTION = 6
     TEXT_DETECTION = 7
     OBJECT_TRACKING = 9
 
@@ -18,7 +18,7 @@
             },
             "methods": {
                 "AnnotateVideo": {
-                    "timeout_millis": 60000,
+                    "timeout_millis": 600000,
                     "retry_codes_name": "idempotent",
                     "retry_params_name": "default",
                 }
 
@@ -1,4 +1,4 @@
-// Copyright 2019 Google LLC.
+// Copyright 2018 Google LLC.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -22,7 +22,6 @@ import "google/longrunning/operations.proto";
 import "google/protobuf/duration.proto";
 import "google/protobuf/timestamp.proto";
 import "google/rpc/status.proto";
-import "google/api/client.proto";
 
 option csharp_namespace = "Google.Cloud.VideoIntelligence.V1";
 option go_package = "google.golang.org/genproto/googleapis/cloud/videointelligence/v1;videointelligence";
@@ -34,14 +33,12 @@ option ruby_package = "Google::Cloud::VideoIntelligence::V1";
 
 // Service that implements Google Cloud Video Intelligence API.
 service VideoIntelligenceService {
-  option (google.api.default_host) = "videointelligence.googleapis.com";
-  option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
-
   // Performs asynchronous video annotation. Progress and results can be
   // retrieved through the `google.longrunning.Operations` interface.
   // `Operation.metadata` contains `AnnotateVideoProgress` (progress).
   // `Operation.response` contains `AnnotateVideoResponse` (results).
-  rpc AnnotateVideo(AnnotateVideoRequest) returns (google.longrunning.Operation) {
+  rpc AnnotateVideo(AnnotateVideoRequest)
+      returns (google.longrunning.Operation) {
     option (google.api.http) = {
       post: "/v1/videos:annotate"
       body: "*"
@@ -55,10 +52,10 @@ message AnnotateVideoRequest {
   // [Google Cloud Storage](https://cloud.google.com/storage/) URIs are
   // supported, which must be specified in the following format:
   // `gs://bucket-id/object-id` (other URI formats return
-  // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
-  // [Request URIs](/storage/docs/reference-uris).
-  // A video URI may include wildcards in `object-id`, and thus identify
-  // multiple videos. Supported wildcards: '*' to match 0 or more characters;
+  // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For
+  // more information, see [Request URIs](/storage/docs/reference-uris). A video
+  // URI may include wildcards in `object-id`, and thus identify multiple
+  // videos. Supported wildcards: '*' to match 0 or more characters;
   // '?' to match 1 character. If unset, the input video should be embedded
   // in the request as `input_content`. If set, `input_content` should be unset.
   string input_uri = 1;
@@ -78,8 +75,8 @@ message AnnotateVideoRequest {
   // Currently, only [Google Cloud Storage](https://cloud.google.com/storage/)
   // URIs are supported, which must be specified in the following format:
   // `gs://bucket-id/object-id` (other URI formats return
-  // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
-  // [Request URIs](/storage/docs/reference-uris).
+  // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For
+  // more information, see [Request URIs](/storage/docs/reference-uris).
   string output_uri = 4;
 
   // Optional cloud region where annotation should take place. Supported cloud
@@ -104,6 +101,9 @@ message VideoContext {
   // Config for EXPLICIT_CONTENT_DETECTION.
   ExplicitContentDetectionConfig explicit_content_detection_config = 4;
 
+  // Config for FACE_DETECTION.
+  FaceDetectionConfig face_detection_config = 5;
+
   // Config for SPEECH_TRANSCRIPTION.
   SpeechTranscriptionConfig speech_transcription_config = 6;
 
@@ -114,66 +114,6 @@ message VideoContext {
   ObjectTrackingConfig object_tracking_config = 13;
 }
 
-// Video annotation feature.
-enum Feature {
-  // Unspecified.
-  FEATURE_UNSPECIFIED = 0;
-
-  // Label detection. Detect objects, such as dog or flower.
-  LABEL_DETECTION = 1;
-
-  // Shot change detection.
-  SHOT_CHANGE_DETECTION = 2;
-
-  // Explicit content detection.
-  EXPLICIT_CONTENT_DETECTION = 3;
-
-  // Speech transcription.
-  SPEECH_TRANSCRIPTION = 6;
-
-  // OCR text detection and tracking.
-  TEXT_DETECTION = 7;
-
-  // Object detection and tracking.
-  OBJECT_TRACKING = 9;
-}
-
-// Label detection mode.
-enum LabelDetectionMode {
-  // Unspecified.
-  LABEL_DETECTION_MODE_UNSPECIFIED = 0;
-
-  // Detect shot-level labels.
-  SHOT_MODE = 1;
-
-  // Detect frame-level labels.
-  FRAME_MODE = 2;
-
-  // Detect both shot-level and frame-level labels.
-  SHOT_AND_FRAME_MODE = 3;
-}
-
-// Bucketized representation of likelihood.
-enum Likelihood {
-  // Unspecified likelihood.
-  LIKELIHOOD_UNSPECIFIED = 0;
-
-  // Very unlikely.
-  VERY_UNLIKELY = 1;
-
-  // Unlikely.
-  UNLIKELY = 2;
-
-  // Possible.
-  POSSIBLE = 3;
-
-  // Likely.
-  LIKELY = 4;
-
-  // Very likely.
-  VERY_LIKELY = 5;
-}
-
 // Config for LABEL_DETECTION.
 message LabelDetectionConfig {
   // What labels should be detected with LABEL_DETECTION, in addition to
@@ -216,17 +156,28 @@ message ShotChangeDetectionConfig {
   string model = 1;
 }
 
-// Config for OBJECT_TRACKING.
-message ObjectTrackingConfig {
-  // Model to use for object tracking.
+// Config for EXPLICIT_CONTENT_DETECTION.
+message ExplicitContentDetectionConfig {
+  // Model to use for explicit content detection.
   // Supported values: "builtin/stable" (the default if unset) and
   // "builtin/latest".
   string model = 1;
 }
 
-// Config for EXPLICIT_CONTENT_DETECTION.
-message ExplicitContentDetectionConfig {
-  // Model to use for explicit content detection.
+// Config for FACE_DETECTION.
+message FaceDetectionConfig {
+  // Model to use for face detection.
+  // Supported values: "builtin/stable" (the default if unset) and
+  // "builtin/latest".
+  string model = 1;
+
+  // Whether bounding boxes be included in the face annotation output.
+  bool include_bounding_boxes = 2;
+}
+
+// Config for OBJECT_TRACKING.
+message ObjectTrackingConfig {
+  // Model to use for object tracking.
   // Supported values: "builtin/stable" (the default if unset) and
   // "builtin/latest".
   string model = 1;
@@ -344,24 +295,57 @@ message NormalizedBoundingBox {
   float bottom = 4;
 }
 
+// Video segment level annotation results for face detection.
+message FaceSegment {
+  // Video segment where a face was detected.
+  VideoSegment segment = 1;
+}
+
+// Video frame level annotation results for face detection.
+message FaceFrame {
+  // Normalized Bounding boxes in a frame.
+  // There can be more than one boxes if the same face is detected in multiple
+  // locations within the current frame.
+  repeated NormalizedBoundingBox normalized_bounding_boxes = 1;
+
+  // Time-offset, relative to the beginning of the video,
+  // corresponding to the video frame for this location.
+  google.protobuf.Duration time_offset = 2;
+}
+
+// Face annotation.
+message FaceAnnotation {
+  // Thumbnail of a representative face view (in JPEG format).
+  bytes thumbnail = 1;
+
+  // All video segments where a face was detected.
+  repeated FaceSegment segments = 2;
+
+  // All video frames where a face was detected.
+  repeated FaceFrame frames = 3;
+}
+
 // Annotation results for a single video.
 message VideoAnnotationResults {
   // Video file location in
   // [Google Cloud Storage](https://cloud.google.com/storage/).
   string input_uri = 1;
 
-  // Topical label annotations on video level or user specified segment level.
+  // Label annotations on video level or user specified segment level.
   // There is exactly one element for each unique label.
   repeated LabelAnnotation segment_label_annotations = 2;
 
-  // Topical label annotations on shot level.
+  // Label annotations on shot level.
   // There is exactly one element for each unique label.
   repeated LabelAnnotation shot_label_annotations = 3;
 
   // Label annotations on frame level.
   // There is exactly one element for each unique label.
   repeated LabelAnnotation frame_label_annotations = 4;
 
+  // Face annotations. There is exactly one element for each unique face.
+  repeated FaceAnnotation face_annotations = 5;
+
   // Shot annotations. Each shot is represented as a video segment.
   repeated VideoSegment shot_annotations = 6;
 
@@ -407,14 +391,6 @@ message VideoAnnotationProgress {
 
   // Time of the most recent update.
   google.protobuf.Timestamp update_time = 4;
-
-  // Specifies which feature is being tracked if the request contains more than
-  // one features.
-  Feature feature = 5;
-
-  // Specifies which segment is being tracked if the request contains more than
-  // one segments.
-  VideoSegment segment = 6;
 }
 
 // Video annotation progress. Included in the `metadata`
@@ -515,17 +491,15 @@ message SpeechRecognitionAlternative {
   // Transcript text representing the words that the user spoke.
   string transcript = 1;
 
-  // Output only. The confidence estimate between 0.0 and 1.0. A higher number
+  // The confidence estimate between 0.0 and 1.0. A higher number
   // indicates an estimated greater likelihood that the recognized words are
-  // correct. This field is set only for the top alternative.
-  // This field is not guaranteed to be accurate and users should not rely on it
-  // to be always provided.
+  // correct. This field is typically provided only for the top hypothesis, and
+  // only for `is_final=true` results. Clients should not rely on the
+  // `confidence` field as it is not guaranteed to be accurate or consistent.
   // The default of 0.0 is a sentinel value indicating `confidence` was not set.
   float confidence = 2;
 
-  // Output only. A list of word-specific information for each recognized word.
-  // Note: When `enable_speaker_diarization` is true, you will see all the words
-  // from the beginning of the audio.
+  // A list of word-specific information for each recognized word.
   repeated WordInfo words = 3;
 }
 
@@ -671,3 +645,66 @@ message ObjectTrackingAnnotation {
   // Streaming mode: it can only be one ObjectTrackingFrame message in frames.
   repeated ObjectTrackingFrame frames = 2;
 }
+
+// Video annotation feature.
+enum Feature {
+  // Unspecified.
+  FEATURE_UNSPECIFIED = 0;
+
+  // Label detection. Detect objects, such as dog or flower.
+  LABEL_DETECTION = 1;
+
+  // Shot change detection.
+  SHOT_CHANGE_DETECTION = 2;
+
+  // Explicit content detection.
+  EXPLICIT_CONTENT_DETECTION = 3;
+
+  // Human face detection and tracking.
+  FACE_DETECTION = 4;
+
+  // Speech transcription.
+  SPEECH_TRANSCRIPTION = 6;
+
+  // OCR text detection and tracking.
+  TEXT_DETECTION = 7;
+
+  // Object detection and tracking.
+  OBJECT_TRACKING = 9;
+}
+
+// Label detection mode.
+enum LabelDetectionMode {
+  // Unspecified.
+  LABEL_DETECTION_MODE_UNSPECIFIED = 0;
+
+  // Detect shot-level labels.
+  SHOT_MODE = 1;
+
+  // Detect frame-level labels.
+  FRAME_MODE = 2;
+
+  // Detect both shot-level and frame-level labels.
+  SHOT_AND_FRAME_MODE = 3;
+}
+
+// Bucketized representation of likelihood.
+enum Likelihood {
+  // Unspecified likelihood.
+  LIKELIHOOD_UNSPECIFIED = 0;
+
+  // Very unlikely.
+  VERY_UNLIKELY = 1;
+
+  // Unlikely.
+  UNLIKELY = 2;
+
+  // Possible.
+  POSSIBLE = 3;
+
+  // Likely.
+  LIKELY = 4;
+
+  // Very likely.
+  VERY_LIKELY = 5;
+}
Original file line number	Diff line number	Diff line change
`@@ -18,7 +18,7 @@`
`18`	`18`	`},`
`19`	`19`	`"methods": {`
`20`	`20`	`"AnnotateVideo": {`
`21`		`- "timeout_millis": 60000,`
	`21`	`+ "timeout_millis": 600000,`
`22`	`22`	`"retry_codes_name": "idempotent",`
`23`	`23`	`"retry_params_name": "default",`
`24`	`24`	`}`