diff --git a/.gitignore b/.gitignore index cf6ba86..e5ed1fb 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ _site vendor .DS_Store _posts/.DS_Store +assets/notebooks/.ipynb_checkpoints/ diff --git a/_posts/2020-05-01-start.md b/_posts/2020-05-01-start.md index f86c1fb..c304016 100644 --- a/_posts/2020-05-01-start.md +++ b/_posts/2020-05-01-start.md @@ -2,6 +2,7 @@ title: Где начать? date: 2020-05-01 minutes: 7 +author: Rustem G --- Краткий пересказ [схемы Эндрю Ына](https://landing.ai/ai-transformation-playbook/) diff --git a/assets/notebooks/.ipynb_checkpoints/0.1-rg-animate-images-checkpoint.ipynb b/assets/notebooks/.ipynb_checkpoints/0.1-rg-animate-images-checkpoint.ipynb deleted file mode 100644 index 14a7dc9..0000000 --- a/assets/notebooks/.ipynb_checkpoints/0.1-rg-animate-images-checkpoint.ipynb +++ /dev/null @@ -1,114 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Create GIF animation from a series of images" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "from pprint import pprint\n", - "from PIL import Image" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [], - "source": [ - "folder = '/Users/rustem.galiullin/Downloads/en_clusters/'\n", - "match = 'Screen Shot'\n", - "duration = 2000\n", - "\n", - "output_file = f'../images/en_clusters.gif'" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [], - "source": [ - "files = list(Path(folder).glob(match + '*.png'))" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[PosixPath('/Users/rustem.galiullin/Downloads/en_clusters/Screen Shot 2020-06-01 at 22.07.34.png'),\n", - " PosixPath('/Users/rustem.galiullin/Downloads/en_clusters/Screen Shot 2020-06-01 at 22.07.09.png'),\n", - " PosixPath('/Users/rustem.galiullin/Downloads/en_clusters/Screen Shot 2020-06-01 at 22.07.24.png'),\n", - " PosixPath('/Users/rustem.galiullin/Downloads/en_clusters/Screen Shot 2020-06-01 at 22.07.42.png'),\n", - " PosixPath('/Users/rustem.galiullin/Downloads/en_clusters/Screen Shot 2020-06-01 at 22.08.14.png'),\n", - " PosixPath('/Users/rustem.galiullin/Downloads/en_clusters/Screen Shot 2020-06-01 at 22.07.53.png'),\n", - " PosixPath('/Users/rustem.galiullin/Downloads/en_clusters/Screen Shot 2020-06-01 at 22.08.02.png'),\n", - " PosixPath('/Users/rustem.galiullin/Downloads/en_clusters/Screen Shot 2020-06-01 at 22.08.27.png'),\n", - " PosixPath('/Users/rustem.galiullin/Downloads/en_clusters/Screen Shot 2020-06-01 at 22.08.08.png'),\n", - " PosixPath('/Users/rustem.galiullin/Downloads/en_clusters/Screen Shot 2020-06-01 at 22.07.01.png')]\n" - ] - } - ], - "source": [ - "pprint(files)" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [], - "source": [ - "frames = []\n", - "\n", - "for f in files:\n", - " img = Image.open(f)\n", - " frames.append(img)\n", - "\n", - "frames[0].save(output_file, format='GIF', append_images=frames[1:], save_all=True,\n", - " duration=duration, loop=0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "myblog", - "language": "python", - "name": "myblog" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.7" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/assets/notebooks/.ipynb_checkpoints/0.1-rg-face-clustering-checkpoint.ipynb b/assets/notebooks/.ipynb_checkpoints/0.1-rg-face-clustering-checkpoint.ipynb deleted file mode 100644 index 2fd6442..0000000 --- a/assets/notebooks/.ipynb_checkpoints/0.1-rg-face-clustering-checkpoint.ipynb +++ /dev/null @@ -1,6 +0,0 @@ -{ - "cells": [], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/assets/notebooks/0.1-rg-face-clustering.ipynb b/assets/notebooks/0.1-rg-face-clustering.ipynb deleted file mode 100644 index 2166882..0000000 --- a/assets/notebooks/0.1-rg-face-clustering.ipynb +++ /dev/null @@ -1,44 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Count People on a Video\n", - "\n", - "**Steps**\n", - "1. Detect faces\n", - "2. Convert faces to vectors\n", - "3. Cluster faces" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "myblog", - "language": "python", - "name": "myblog" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.7" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/assets/notebooks/0.2-rg-count-post-chars.ipynb b/assets/notebooks/0.2-rg-count-post-chars.ipynb new file mode 100644 index 0000000..70d1c27 --- /dev/null +++ b/assets/notebooks/0.2-rg-count-post-chars.ipynb @@ -0,0 +1,132 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Count post chars" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import json\n", + "import random\n", + "from bs4 import BeautifulSoup\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "posts = {\n", + " \"distinct-count\": \"https://rusteam.github.io/blog/2019/07/25/distinct-count\",\n", + " \"data-clustering\": \"https://rusteam.github.io/blog/2020/05/25/data-clustering\",\n", + " \"person-reid\": \"https://rusteam.github.io/blog/2019/06/11/person-reid\",\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [], + "source": [ + "class Article:\n", + " def __init__(self, url):\n", + " self.url = url\n", + " \n", + " def get_html(self):\n", + " contents = requests.get(url).content.decode('utf-8')\n", + " self.soup = BeautifulSoup(contents)\n", + " self.texts = self.soup.find_all(['h2','h2','h3','h4','h5','h6','p', 'th', 'td'])\n", + "\n", + " def count_chars(self):\n", + " n_chars = 0\n", + " for txt in self.texts:\n", + " n_chars += len(txt.text)\n", + " return n_chars\n", + " \n", + " def run(self):\n", + " self.get_html()\n", + " return self.count_chars()\n", + " \n", + " def show_random(self):\n", + " tag = random.choice(self.texts)\n", + " print('tag:', tag, end='\\n\\n')\n", + " print('text:', tag.text)" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "distinct-count 4474 characters\n", + "data-clustering 5286 characters\n", + "person-reid 4212 characters\n" + ] + } + ], + "source": [ + "for title,url in posts.items():\n", + " a = Article(url)\n", + " n_c = a.run()\n", + " print(title, n_c, 'characters')\n", + " time.sleep(0.5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "myblog", + "language": "python", + "name": "myblog" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}