Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ _site
vendor
.DS_Store
_posts/.DS_Store
assets/notebooks/.ipynb_checkpoints/
1 change: 1 addition & 0 deletions _posts/2020-05-01-start.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
title: Где начать?
date: 2020-05-01
minutes: 7
author: Rustem G
---

Краткий пересказ [схемы Эндрю Ына](https://landing.ai/ai-transformation-playbook/)
Expand Down

This file was deleted.

This file was deleted.

44 changes: 0 additions & 44 deletions assets/notebooks/0.1-rg-face-clustering.ipynb

This file was deleted.

132 changes: 132 additions & 0 deletions assets/notebooks/0.2-rg-count-post-chars.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Count post chars"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import json\n",
"import random\n",
"from bs4 import BeautifulSoup\n",
"import time"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [],
"source": [
"posts = {\n",
" \"distinct-count\": \"https://rusteam.github.io/blog/2019/07/25/distinct-count\",\n",
" \"data-clustering\": \"https://rusteam.github.io/blog/2020/05/25/data-clustering\",\n",
" \"person-reid\": \"https://rusteam.github.io/blog/2019/06/11/person-reid\",\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [],
"source": [
"class Article:\n",
" def __init__(self, url):\n",
" self.url = url\n",
" \n",
" def get_html(self):\n",
" contents = requests.get(url).content.decode('utf-8')\n",
" self.soup = BeautifulSoup(contents)\n",
" self.texts = self.soup.find_all(['h2','h2','h3','h4','h5','h6','p', 'th', 'td'])\n",
"\n",
" def count_chars(self):\n",
" n_chars = 0\n",
" for txt in self.texts:\n",
" n_chars += len(txt.text)\n",
" return n_chars\n",
" \n",
" def run(self):\n",
" self.get_html()\n",
" return self.count_chars()\n",
" \n",
" def show_random(self):\n",
" tag = random.choice(self.texts)\n",
" print('tag:', tag, end='\\n\\n')\n",
" print('text:', tag.text)"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"distinct-count 4474 characters\n",
"data-clustering 5286 characters\n",
"person-reid 4212 characters\n"
]
}
],
"source": [
"for title,url in posts.items():\n",
" a = Article(url)\n",
" n_c = a.run()\n",
" print(title, n_c, 'characters')\n",
" time.sleep(0.5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "myblog",
"language": "python",
"name": "myblog"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 4
}