googleapis · tseaver · Jul 12, 2019 · Jul 12, 2019
diff --git a/videointelligence/google/cloud/videointelligence_v1/gapic/enums.py b/videointelligence/google/cloud/videointelligence_v1/gapic/enums.py
@@ -28,7 +28,6 @@ class Feature(enum.IntEnum):
       LABEL_DETECTION (int): Label detection. Detect objects, such as dog or flower.
       SHOT_CHANGE_DETECTION (int): Shot change detection.
       EXPLICIT_CONTENT_DETECTION (int): Explicit content detection.
-      FACE_DETECTION (int): Human face detection and tracking.
       SPEECH_TRANSCRIPTION (int): Speech transcription.
       TEXT_DETECTION (int): OCR text detection and tracking.
       OBJECT_TRACKING (int): Object detection and tracking.
@@ -38,7 +37,6 @@ class Feature(enum.IntEnum):
     LABEL_DETECTION = 1
     SHOT_CHANGE_DETECTION = 2
     EXPLICIT_CONTENT_DETECTION = 3
-    FACE_DETECTION = 4
     SPEECH_TRANSCRIPTION = 6
     TEXT_DETECTION = 7
     OBJECT_TRACKING = 9

diff --git a/...gence/google/cloud/videointelligence_v1/gapic/video_intelligence_service_client_config.py b/...gence/google/cloud/videointelligence_v1/gapic/video_intelligence_service_client_config.py
@@ -18,7 +18,7 @@
             },
             "methods": {
                 "AnnotateVideo": {
-                    "timeout_millis": 600000,
+                    "timeout_millis": 60000,
                     "retry_codes_name": "idempotent",
                     "retry_params_name": "default",
                 }

diff --git a/videointelligence/google/cloud/videointelligence_v1/proto/video_intelligence.proto b/videointelligence/google/cloud/videointelligence_v1/proto/video_intelligence.proto
@@ -1,4 +1,4 @@
-// Copyright 2018 Google LLC.
+// Copyright 2019 Google LLC.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -22,6 +22,7 @@ import "google/longrunning/operations.proto";
 import "google/protobuf/duration.proto";
 import "google/protobuf/timestamp.proto";
 import "google/rpc/status.proto";
+import "google/api/client.proto";
 
 option csharp_namespace = "Google.Cloud.VideoIntelligence.V1";
 option go_package = "google.golang.org/genproto/googleapis/cloud/videointelligence/v1;videointelligence";
@@ -33,12 +34,14 @@ option ruby_package = "Google::Cloud::VideoIntelligence::V1";
 
 // Service that implements Google Cloud Video Intelligence API.
 service VideoIntelligenceService {
+  option (google.api.default_host) = "videointelligence.googleapis.com";
+  option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
+
   // Performs asynchronous video annotation. Progress and results can be
   // retrieved through the `google.longrunning.Operations` interface.
   // `Operation.metadata` contains `AnnotateVideoProgress` (progress).
   // `Operation.response` contains `AnnotateVideoResponse` (results).
-  rpc AnnotateVideo(AnnotateVideoRequest)
-      returns (google.longrunning.Operation) {
+  rpc AnnotateVideo(AnnotateVideoRequest) returns (google.longrunning.Operation) {
     option (google.api.http) = {
       post: "/v1/videos:annotate"
       body: "*"
@@ -52,10 +55,10 @@ message AnnotateVideoRequest {
   // [Google Cloud Storage](https://cloud.google.com/storage/) URIs are
   // supported, which must be specified in the following format:
   // `gs://bucket-id/object-id` (other URI formats return
-  // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For
-  // more information, see [Request URIs](/storage/docs/reference-uris). A video
-  // URI may include wildcards in `object-id`, and thus identify multiple
-  // videos. Supported wildcards: '*' to match 0 or more characters;
+  // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
+  // [Request URIs](/storage/docs/reference-uris).
+  // A video URI may include wildcards in `object-id`, and thus identify
+  // multiple videos. Supported wildcards: '*' to match 0 or more characters;
   // '?' to match 1 character. If unset, the input video should be embedded
   // in the request as `input_content`. If set, `input_content` should be unset.
   string input_uri = 1;
@@ -75,8 +78,8 @@ message AnnotateVideoRequest {
   // Currently, only [Google Cloud Storage](https://cloud.google.com/storage/)
   // URIs are supported, which must be specified in the following format:
   // `gs://bucket-id/object-id` (other URI formats return
-  // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For
-  // more information, see [Request URIs](/storage/docs/reference-uris).
+  // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
+  // [Request URIs](/storage/docs/reference-uris).
   string output_uri = 4;
 
   // Optional cloud region where annotation should take place. Supported cloud
@@ -101,9 +104,6 @@ message VideoContext {
   // Config for EXPLICIT_CONTENT_DETECTION.
   ExplicitContentDetectionConfig explicit_content_detection_config = 4;
 
-  // Config for FACE_DETECTION.
-  FaceDetectionConfig face_detection_config = 5;
-
   // Config for SPEECH_TRANSCRIPTION.
   SpeechTranscriptionConfig speech_transcription_config = 6;
 
@@ -114,6 +114,66 @@ message VideoContext {
   ObjectTrackingConfig object_tracking_config = 13;
 }
 
+// Video annotation feature.
+enum Feature {
+  // Unspecified.
+  FEATURE_UNSPECIFIED = 0;
+
+  // Label detection. Detect objects, such as dog or flower.
+  LABEL_DETECTION = 1;
+
+  // Shot change detection.
+  SHOT_CHANGE_DETECTION = 2;
+
+  // Explicit content detection.
+  EXPLICIT_CONTENT_DETECTION = 3;
+
+  // Speech transcription.
+  SPEECH_TRANSCRIPTION = 6;
+
+  // OCR text detection and tracking.
+  TEXT_DETECTION = 7;
+
+  // Object detection and tracking.
+  OBJECT_TRACKING = 9;
+}
+
+// Label detection mode.
+enum LabelDetectionMode {
+  // Unspecified.
+  LABEL_DETECTION_MODE_UNSPECIFIED = 0;
+
+  // Detect shot-level labels.
+  SHOT_MODE = 1;
+
+  // Detect frame-level labels.
+  FRAME_MODE = 2;
+
+  // Detect both shot-level and frame-level labels.
+  SHOT_AND_FRAME_MODE = 3;
+}
+
+// Bucketized representation of likelihood.
+enum Likelihood {
+  // Unspecified likelihood.
+  LIKELIHOOD_UNSPECIFIED = 0;
+
+  // Very unlikely.
+  VERY_UNLIKELY = 1;
+
+  // Unlikely.
+  UNLIKELY = 2;
+
+  // Possible.
+  POSSIBLE = 3;
+
+  // Likely.
+  LIKELY = 4;
+
+  // Very likely.
+  VERY_LIKELY = 5;
+}
+
 // Config for LABEL_DETECTION.
 message LabelDetectionConfig {
   // What labels should be detected with LABEL_DETECTION, in addition to
@@ -156,28 +216,17 @@ message ShotChangeDetectionConfig {
   string model = 1;
 }
 
-// Config for EXPLICIT_CONTENT_DETECTION.
-message ExplicitContentDetectionConfig {
-  // Model to use for explicit content detection.
-  // Supported values: "builtin/stable" (the default if unset) and
-  // "builtin/latest".
-  string model = 1;
-}
-
-// Config for FACE_DETECTION.
-message FaceDetectionConfig {
-  // Model to use for face detection.
+// Config for OBJECT_TRACKING.
+message ObjectTrackingConfig {
+  // Model to use for object tracking.
   // Supported values: "builtin/stable" (the default if unset) and
   // "builtin/latest".
   string model = 1;
-
-  // Whether bounding boxes be included in the face annotation output.
-  bool include_bounding_boxes = 2;
 }
 
-// Config for OBJECT_TRACKING.
-message ObjectTrackingConfig {
-  // Model to use for object tracking.
+// Config for EXPLICIT_CONTENT_DETECTION.
+message ExplicitContentDetectionConfig {
+  // Model to use for explicit content detection.
   // Supported values: "builtin/stable" (the default if unset) and
   // "builtin/latest".
   string model = 1;
@@ -295,57 +344,24 @@ message NormalizedBoundingBox {
   float bottom = 4;
 }
 
-// Video segment level annotation results for face detection.
-message FaceSegment {
-  // Video segment where a face was detected.
-  VideoSegment segment = 1;
-}
-
-// Video frame level annotation results for face detection.
-message FaceFrame {
-  // Normalized Bounding boxes in a frame.
-  // There can be more than one boxes if the same face is detected in multiple
-  // locations within the current frame.
-  repeated NormalizedBoundingBox normalized_bounding_boxes = 1;
-
-  // Time-offset, relative to the beginning of the video,
-  // corresponding to the video frame for this location.
-  google.protobuf.Duration time_offset = 2;
-}
-
-// Face annotation.
-message FaceAnnotation {
-  // Thumbnail of a representative face view (in JPEG format).
-  bytes thumbnail = 1;
-
-  // All video segments where a face was detected.
-  repeated FaceSegment segments = 2;
-
-  // All video frames where a face was detected.
-  repeated FaceFrame frames = 3;
-}
-
 // Annotation results for a single video.
 message VideoAnnotationResults {
   // Video file location in
   // [Google Cloud Storage](https://cloud.google.com/storage/).
   string input_uri = 1;
 
-  // Label annotations on video level or user specified segment level.
+  // Topical label annotations on video level or user specified segment level.
   // There is exactly one element for each unique label.
   repeated LabelAnnotation segment_label_annotations = 2;
 
-  // Label annotations on shot level.
+  // Topical label annotations on shot level.
   // There is exactly one element for each unique label.
   repeated LabelAnnotation shot_label_annotations = 3;
 
   // Label annotations on frame level.
   // There is exactly one element for each unique label.
   repeated LabelAnnotation frame_label_annotations = 4;
 
-  // Face annotations. There is exactly one element for each unique face.
-  repeated FaceAnnotation face_annotations = 5;
-
   // Shot annotations. Each shot is represented as a video segment.
   repeated VideoSegment shot_annotations = 6;
 
@@ -391,6 +407,14 @@ message VideoAnnotationProgress {
 
   // Time of the most recent update.
   google.protobuf.Timestamp update_time = 4;
+
+  // Specifies which feature is being tracked if the request contains more than
+  // one features.
+  Feature feature = 5;
+
+  // Specifies which segment is being tracked if the request contains more than
+  // one segments.
+  VideoSegment segment = 6;
 }
 
 // Video annotation progress. Included in the `metadata`
@@ -491,15 +515,17 @@ message SpeechRecognitionAlternative {
   // Transcript text representing the words that the user spoke.
   string transcript = 1;
 
-  // The confidence estimate between 0.0 and 1.0. A higher number
+  // Output only. The confidence estimate between 0.0 and 1.0. A higher number
   // indicates an estimated greater likelihood that the recognized words are
-  // correct. This field is typically provided only for the top hypothesis, and
-  // only for `is_final=true` results. Clients should not rely on the
-  // `confidence` field as it is not guaranteed to be accurate or consistent.
+  // correct. This field is set only for the top alternative.
+  // This field is not guaranteed to be accurate and users should not rely on it
+  // to be always provided.
   // The default of 0.0 is a sentinel value indicating `confidence` was not set.
   float confidence = 2;
 
-  // A list of word-specific information for each recognized word.
+  // Output only. A list of word-specific information for each recognized word.
+  // Note: When `enable_speaker_diarization` is true, you will see all the words
+  // from the beginning of the audio.
   repeated WordInfo words = 3;
 }
 
@@ -645,66 +671,3 @@ message ObjectTrackingAnnotation {
   // Streaming mode: it can only be one ObjectTrackingFrame message in frames.
   repeated ObjectTrackingFrame frames = 2;
 }
-
-// Video annotation feature.
-enum Feature {
-  // Unspecified.
-  FEATURE_UNSPECIFIED = 0;
-
-  // Label detection. Detect objects, such as dog or flower.
-  LABEL_DETECTION = 1;
-
-  // Shot change detection.
-  SHOT_CHANGE_DETECTION = 2;
-
-  // Explicit content detection.
-  EXPLICIT_CONTENT_DETECTION = 3;
-
-  // Human face detection and tracking.
-  FACE_DETECTION = 4;
-
-  // Speech transcription.
-  SPEECH_TRANSCRIPTION = 6;
-
-  // OCR text detection and tracking.
-  TEXT_DETECTION = 7;
-
-  // Object detection and tracking.
-  OBJECT_TRACKING = 9;
-}
-
-// Label detection mode.
-enum LabelDetectionMode {
-  // Unspecified.
-  LABEL_DETECTION_MODE_UNSPECIFIED = 0;
-
-  // Detect shot-level labels.
-  SHOT_MODE = 1;
-
-  // Detect frame-level labels.
-  FRAME_MODE = 2;
-
-  // Detect both shot-level and frame-level labels.
-  SHOT_AND_FRAME_MODE = 3;
-}
-
-// Bucketized representation of likelihood.
-enum Likelihood {
-  // Unspecified likelihood.
-  LIKELIHOOD_UNSPECIFIED = 0;
-
-  // Very unlikely.
-  VERY_UNLIKELY = 1;
-
-  // Unlikely.
-  UNLIKELY = 2;
-
-  // Possible.
-  POSSIBLE = 3;
-
-  // Likely.
-  LIKELY = 4;
-
-  // Very likely.
-  VERY_LIKELY = 5;
-}