From bf38cf34e08fbeba761605fa7aa65d03450f3887 Mon Sep 17 00:00:00 2001 From: Thomas Schultz Date: Wed, 18 May 2016 13:43:11 -0400 Subject: [PATCH 1/2] Initial Vision usage docs for discussion --- docs/index.rst | 7 ++ docs/vision-usage.rst | 190 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 197 insertions(+) create mode 100644 docs/vision-usage.rst diff --git a/docs/index.rst b/docs/index.rst index e66c09e8d1e8..5b975fd17211 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -122,6 +122,13 @@ monitoring-timeseries monitoring-label +.. toctree:: + :maxdepth: 0 + :hidden: + :caption: Vision + + vision-usage + .. toctree:: :maxdepth: 0 :hidden: diff --git a/docs/vision-usage.rst b/docs/vision-usage.rst new file mode 100644 index 000000000000..569271223008 --- /dev/null +++ b/docs/vision-usage.rst @@ -0,0 +1,190 @@ +Using the Vision API +==================== + +Authentication and Configuration +-------------------------------- + +- For an overview of authentication in ``gcloud-python``, + see :doc:`gcloud-auth`. + +- In addition to any authentication configuration, you should also set the + :envvar:`GCLOUD_PROJECT` environment variable for the project you'd like + to interact with. If you are Google App Engine or Google Compute Engine + this will be detected automatically. + +- After configuring your environment, create a + :class:`Client ` + + .. doctest:: + + >>> from gcloud import vision + >>> client = vision.Client() + + or pass in ``credentials`` and ``project`` explicitly + + .. doctest:: + + >>> from gcloud import vision + >>> client = vision.Client(project='my-project', credentials=creds) + +Annotating an Image +------------------- + +Annotate a single image +~~~~~~~~~~~~~~~~~~~~~~~ + +.. doctest:: + + >>> from gcloud import vision + >>> client = vision.Client(project="my-project-name") + >>> with open('/tmp/car.jpg', 'r') as f: + ... client.annotate(f.read(), vision.FeatureTypes.LABEL_DETECTION, 3) + +Annotate multiple images +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. doctest:: + + >>> images = ( + ... ('./image.jpg', [ + ... vision.FeatureTypes.LABEL_DETECTION, + ... vision.FeatureTypes.LANDMARK_DETECTION]), + ... ('./image2.jpg', [ + ... vision.FeatureTypes.FACE_DETECTION, + ... vision.FeatureTypes.TEXT_DETECTION]),) + >>> annotated_images = [] + >>> for image, feature_types in images: + ... annotated_images.append( + ... vision_client.annotate( + ... image, + ... feature_types)) + +Failing annotations return no results for the feature type requested. + +.. doctest:: + + >>> from gcloud import vision + >>> client = vision.Client(project="my-project-name") + >>> with open('/tmp/car.jpg', 'r') as f: + ... results = client.annotate(f.read(), + ... vision.FeatureTypes.LOGO_DETECTION, 3) + >>> len(results.logos) # 0 + +Face Detection +~~~~~~~~~~~~~~ + +Annotating using the ``FACE_DETECTION`` feature type. + + +.. doctest:: + + >>> from gcloud import vision + >>> client = vision.Client(project="my-project-name") + >>> with open('/tmp/car.jpg', 'r') as f: + ... results = client.annotate(f.read(), + ... vision.FeatureTypes.FACE_DETECTION, 3) + >>> results.faces[0].landmarks[0].type # LEFT_EYE + >>> results.faces[0].landmarks[0].position.x # 1301.2404 + >>> results.faces[0].detection_confidence # 0.9863683 + >>> results.faces[0].joy_likelihood # 0.54453093 + + +Label Detection +~~~~~~~~~~~~~~~ + +Annotating using the ``LABEL_DETECTION`` feature type. + +.. doctest:: + + >>> from gcloud import vision + >>> client = vision.Client(project="my-project-name") + >>> with open('/tmp/car.jpg', 'r') as f: + ... results = client.annotate(f.read(), + ... vision.FeatureTypes.LABEL_DETECTION, 3) + >>> results.labels[0].description # automobile + >>> results.labels[0].score # 0.9794637 + >>> results.labels[1].description # vehicle + >>> results.labels[1].score # 0.9494648 + >>> results.labels[2].description # sports car + >>> results.labels[2].score # 0.8258028 + +Landmark Detection +~~~~~~~~~~~~~~~~~~ + +Annotating using the ``LANDMARK_DETECTION`` feature type. + +.. doctest:: + + >>> from gcloud import vision + >>> client = vision.Client(project="my-project-name") + >>> with open('/tmp/landmark.jpg', 'r') as f: + ... results = client.annotate(f.read(), + ... vision.FeatureTypes.LANDMARK_DETECTION, 3) + >>> results.landmarks[0].description # Sydney Opera House + >>> results.landmarks[0].locations[0].latitude # -33.857123 + >>> results.landmarks[0].locations[0].longitude # 151.213921 + >>> results.landmarks[0].bounding_poly.vertices[0].x = 78 + >>> results.landmarks[0].bounding_poly.vertices[0].y = 162 + +Logo Detection +~~~~~~~~~~~~~~ + +Annotating using the ``LOGO_DETECTION`` feature type. + +.. doctest:: + + >>> from gcloud import vision + >>> client = vision.Client(project="my-project-name") + >>> with open('/tmp/logo.jpg', 'r') as f: + ... results = client.annotate(f.read(), + ... vision.FeatureTypes.LOGO_DETECTION, 3) + >>> results.logos[0].description # Google + >>> results.logos[0].score # 0.9795432 + >>> results.logos[0].bounding_poly.vertices[0].x = 78 + >>> results.logos[0].bounding_poly.vertices[0].y = 162 + +Safe Search Detection +~~~~~~~~~~~~~~~~~~~~~ + +Annotating using the ``SAFE_SEARCH_DETECTION`` feature type. + +.. doctest:: + + >>> from gcloud import vision + >>> client = vision.Client(project="my-project-name") + >>> with open('/tmp/logo.jpg', 'r') as f: + ... results = client.annotate(f.read(), + ... vision.FeatureTypes.SAFE_SEARCH_DETECTION) + >>> results[0].safe.adult # VERY_UNLIKELY + >>> results[0].safe.medical # UNLIKELY + +Text Detection +~~~~~~~~~~~~~~ + +Annotating using the ``TEXT_DETECTION`` feature type. + +.. doctest:: + + >>> from gcloud import vision + >>> client = vision.Client(project="my-project-name") + >>> with open('/tmp/logo.jpg', 'r') as f: + ... results = client.annotate(f.read(), + ... vision.FeatureTypes.TEXT_DETECTION) + >>> results[0].locale # en + >>> results[0].description # the full text of the image. + +Image Properties +~~~~~~~~~~~~~~~~ + +Annotating using the ``IMAGE_PROPERTIES`` feature type. + +.. doctest:: + + >>> from gcloud import vision + >>> client = vision.Client(project="my-project-name") + >>> with open('/tmp/logo.jpg', 'r') as f: + ... results = client.annotate(f.read(), + ... vision.FeatureTypes.IMAGE_PROPERTIES) + >>> results[0].dominant_colors.colors[0].color.red # 244 + >>> results[0].dominant_colors.colors[0].score # 0.65519291 + >>> results[0].dominant_colors.colors[0].pixel_fraction # 0.758658 From 196e229ecdd60ab3ea2ce0c5bc634bb6a7015ddd Mon Sep 17 00:00:00 2001 From: Thomas Schultz Date: Mon, 23 May 2016 23:51:52 -0400 Subject: [PATCH 2/2] Update usage examples to batch. --- docs/vision-usage.rst | 237 +++++++++++++++++++++++++----------------- 1 file changed, 140 insertions(+), 97 deletions(-) diff --git a/docs/vision-usage.rst b/docs/vision-usage.rst index 569271223008..71d6ca61fc52 100644 --- a/docs/vision-usage.rst +++ b/docs/vision-usage.rst @@ -9,20 +9,23 @@ Authentication and Configuration - In addition to any authentication configuration, you should also set the :envvar:`GCLOUD_PROJECT` environment variable for the project you'd like - to interact with. If you are Google App Engine or Google Compute Engine + to interact with. If the GCLOUD_PROJECT environment variable is not present, + the project ID from JSON file credentials is used. + + If you are using Google App Engine or Google Compute Engine this will be detected automatically. - After configuring your environment, create a :class:`Client ` - .. doctest:: +.. code-block:: python >>> from gcloud import vision >>> client = vision.Client() - or pass in ``credentials`` and ``project`` explicitly +or pass in ``credentials`` and ``project`` explicitly - .. doctest:: +.. code-block:: python >>> from gcloud import vision >>> client = vision.Client(project='my-project', credentials=creds) @@ -33,158 +36,198 @@ Annotating an Image Annotate a single image ~~~~~~~~~~~~~~~~~~~~~~~ -.. doctest:: +.. code-block:: python >>> from gcloud import vision - >>> client = vision.Client(project="my-project-name") - >>> with open('/tmp/car.jpg', 'r') as f: - ... client.annotate(f.read(), vision.FeatureTypes.LABEL_DETECTION, 3) + >>> client = vision.Client() + >>> image = client.image('./image.png') + >>> faces = image.detect_faces(limit=10) Annotate multiple images ~~~~~~~~~~~~~~~~~~~~~~~~ -.. doctest:: - - >>> images = ( - ... ('./image.jpg', [ - ... vision.FeatureTypes.LABEL_DETECTION, - ... vision.FeatureTypes.LANDMARK_DETECTION]), - ... ('./image2.jpg', [ - ... vision.FeatureTypes.FACE_DETECTION, - ... vision.FeatureTypes.TEXT_DETECTION]),) - >>> annotated_images = [] - >>> for image, feature_types in images: - ... annotated_images.append( - ... vision_client.annotate( - ... image, - ... feature_types)) +.. code-block:: python + + >>> first_image = client.image('./image.jpg') + >>> second_image = client.image('gs://my-storage-bucket/image2.jpg') + >>> with client.batch(): + ... labels = first_image.detect_labels() + ... faces = second_image.detect_faces(limit=10) + +or + +.. code-block:: python + + >>> images = [] + >>> images.append(client.image('./image.jpg')) + >>> images.append(client.image('gs://my-storage-bucket/image2.jpg')) + >>> faces = client.detect_faces_multi(images, limit=10) + +No results returned +~~~~~~~~~~~~~~~~~~~ Failing annotations return no results for the feature type requested. -.. doctest:: +.. code-block:: python >>> from gcloud import vision - >>> client = vision.Client(project="my-project-name") - >>> with open('/tmp/car.jpg', 'r') as f: - ... results = client.annotate(f.read(), - ... vision.FeatureTypes.LOGO_DETECTION, 3) - >>> len(results.logos) # 0 + >>> client = vision.Client() + >>> image = client.image('./image.jpg') + >>> logos = image.detect_logos(limit=10) + >>> logos + [] + + +Manual Detection +~~~~~~~~~~~~~~~~ + +You can call the detection method manually. + +.. code-block:: python + + >>> from gcloud import vision + >>> client = vision.Client() + >>> image = client.image('gs://my-test-bucket/image.jpg') + >>> faces = image.detect(type=vision.FACE_DETECTION, limit=10) Face Detection ~~~~~~~~~~~~~~ -Annotating using the ``FACE_DETECTION`` feature type. +Detecting a face or faces in an image. +For a list of the possible facial landmarks +see: https://cloud.google.com/vision/reference/rest/v1/images/annotate#type_1 -.. doctest:: +.. code-block:: python >>> from gcloud import vision - >>> client = vision.Client(project="my-project-name") - >>> with open('/tmp/car.jpg', 'r') as f: - ... results = client.annotate(f.read(), - ... vision.FeatureTypes.FACE_DETECTION, 3) - >>> results.faces[0].landmarks[0].type # LEFT_EYE - >>> results.faces[0].landmarks[0].position.x # 1301.2404 - >>> results.faces[0].detection_confidence # 0.9863683 - >>> results.faces[0].joy_likelihood # 0.54453093 + >>> client = vision.Client() + >>> image = client.image('./image.jpg') + >>> faces = image.detect_faces(limit=10) + >>> faces[0].landmarks[0].type + 'LEFT_EYE' + >>> faces[0].landmarks[0].position.x + 1301.2404 + >>> faces[0].detection_confidence + 0.9863683 + >>> faces[0].joy_likelihood + 0.54453093 + >>> faces[0].anger_likelihood + 0.02545464 + Label Detection ~~~~~~~~~~~~~~~ -Annotating using the ``LABEL_DETECTION`` feature type. +Image labels are a way to help categorize the contents of an image. +If you have an image with a car, person and a dog it, label detection will +attempt to identify those objects. -.. doctest:: +.. code-block:: python >>> from gcloud import vision - >>> client = vision.Client(project="my-project-name") - >>> with open('/tmp/car.jpg', 'r') as f: - ... results = client.annotate(f.read(), - ... vision.FeatureTypes.LABEL_DETECTION, 3) - >>> results.labels[0].description # automobile - >>> results.labels[0].score # 0.9794637 - >>> results.labels[1].description # vehicle - >>> results.labels[1].score # 0.9494648 - >>> results.labels[2].description # sports car - >>> results.labels[2].score # 0.8258028 + >>> client = vision.Client() + >>> image = client.image('./image.jpg') + >>> labels = image.detect_labels(limit=3) + >>> labels[0].description + 'automobile' + >>> labels[0].score + 0.9863683 + Landmark Detection ~~~~~~~~~~~~~~~~~~ -Annotating using the ``LANDMARK_DETECTION`` feature type. +The API will attemtp to detect landmarks such as Mount Rushmore and +the Sydney Opera House. The API will also provide their known geographical +locations if available. -.. doctest:: +.. code-block:: python >>> from gcloud import vision - >>> client = vision.Client(project="my-project-name") - >>> with open('/tmp/landmark.jpg', 'r') as f: - ... results = client.annotate(f.read(), - ... vision.FeatureTypes.LANDMARK_DETECTION, 3) - >>> results.landmarks[0].description # Sydney Opera House - >>> results.landmarks[0].locations[0].latitude # -33.857123 - >>> results.landmarks[0].locations[0].longitude # 151.213921 - >>> results.landmarks[0].bounding_poly.vertices[0].x = 78 - >>> results.landmarks[0].bounding_poly.vertices[0].y = 162 + >>> client = vision.Client() + >>> image = client.image('./image.jpg') + >>> landmarks = image.detect_landmarks() + >>> landmarks[0].description + 'Sydney Opera House' + >>> landmarks[0].locations[0].latitude + -33.857123 + >>> landmarks[0].locations[0].longitude + 151.213921 + >>> landmarks[0].bounding_poly.vertices[0].x + 78 + >>> landmarks[0].bounding_poly.vertices[0].y + 162 Logo Detection ~~~~~~~~~~~~~~ -Annotating using the ``LOGO_DETECTION`` feature type. +Google Vision can also attempt to detect company and brand logos in images. -.. doctest:: +.. code-block:: python >>> from gcloud import vision - >>> client = vision.Client(project="my-project-name") - >>> with open('/tmp/logo.jpg', 'r') as f: - ... results = client.annotate(f.read(), - ... vision.FeatureTypes.LOGO_DETECTION, 3) - >>> results.logos[0].description # Google - >>> results.logos[0].score # 0.9795432 - >>> results.logos[0].bounding_poly.vertices[0].x = 78 - >>> results.logos[0].bounding_poly.vertices[0].y = 162 + >>> client = vision.Client() + >>> image = client.image('./image.jpg') + >>> logos = image.detect_logos(limit=1) + >>> results.logos[0].description + 'Google' + >>> logos[0].score + 0.9795432 + >>> logos[0].bounding_poly.vertices[0].x + 78 + >>> logos[0].bounding_poly.vertices[0].y + 62 Safe Search Detection ~~~~~~~~~~~~~~~~~~~~~ -Annotating using the ``SAFE_SEARCH_DETECTION`` feature type. +Detecting safe search properties of an image. -.. doctest:: +.. code-block:: python >>> from gcloud import vision - >>> client = vision.Client(project="my-project-name") - >>> with open('/tmp/logo.jpg', 'r') as f: - ... results = client.annotate(f.read(), - ... vision.FeatureTypes.SAFE_SEARCH_DETECTION) - >>> results[0].safe.adult # VERY_UNLIKELY - >>> results[0].safe.medical # UNLIKELY + >>> client = vision.Client() + >>> image = client.image('./image.jpg') + >>> safe_search = image.detect_safe_search() + >>> safe_search.adult + 'VERY_UNLIKELY' + >>> safe_search.medical + 'UNLIKELY' Text Detection ~~~~~~~~~~~~~~ -Annotating using the ``TEXT_DETECTION`` feature type. +Detecting text with ORC from an image. -.. doctest:: +.. code-block:: python >>> from gcloud import vision - >>> client = vision.Client(project="my-project-name") - >>> with open('/tmp/logo.jpg', 'r') as f: - ... results = client.annotate(f.read(), - ... vision.FeatureTypes.TEXT_DETECTION) - >>> results[0].locale # en - >>> results[0].description # the full text of the image. + >>> client = vision.Client() + >>> image = client.image('./image.jpg') + >>> text = image.detect_text() + >>> text.locale + 'en' + >>> text.description + 'the full text of the image.' Image Properties ~~~~~~~~~~~~~~~~ -Annotating using the ``IMAGE_PROPERTIES`` feature type. +Detecting image color properties. -.. doctest:: +.. code-block:: python >>> from gcloud import vision - >>> client = vision.Client(project="my-project-name") - >>> with open('/tmp/logo.jpg', 'r') as f: - ... results = client.annotate(f.read(), - ... vision.FeatureTypes.IMAGE_PROPERTIES) - >>> results[0].dominant_colors.colors[0].color.red # 244 - >>> results[0].dominant_colors.colors[0].score # 0.65519291 - >>> results[0].dominant_colors.colors[0].pixel_fraction # 0.758658 + >>> client = vision.Client() + >>> image = client.image('./image.jpg') + >>> colors = image.detect_properties() + >>> colors[0].red + 244 + >>> colors[0].blue + 134 + >>> colors[0].score + 0.65519291 + >>> colors[0].pixel_fraction + 0.758658