From c91a618229958e564bce940c30ee38c7e523e66d Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Wed, 18 Nov 2020 10:58:02 -0700 Subject: [PATCH 01/32] Add API code #1 - Include issue_request - setup.py config and requirements for installation --- figstats/commons.py | 52 +++++++++++++++++++++++++++++++++++++++++++++ figstats/stats.py | 39 ++++++++++++++++++++++++++++++++++ requirements.txt | 1 + setup.py | 21 ++++++++++++++++++ 4 files changed, 113 insertions(+) create mode 100644 figstats/commons.py create mode 100644 figstats/stats.py create mode 100644 requirements.txt create mode 100644 setup.py diff --git a/figstats/commons.py b/figstats/commons.py new file mode 100644 index 0000000..9e0c51a --- /dev/null +++ b/figstats/commons.py @@ -0,0 +1,52 @@ +import json +import requests +from requests.exceptions import HTTPError + + +def issue_request(method, url, headers, data=None, binary=False, + params=None): + """Wrapper for HTTP request + + Parameters + ---------- + method : str + HTTP method. One of GET, PUT, POST or DELETE + + url : str + URL for the request + + headers: dict + HTTP header information + + data: dict + Figshare article data + + binary: bool + Whether data is binary or not + + params: dict + Additional information for URL GET request + + Returns + ------- + response_data: dict + JSON response for the request returned as python dict + """ + if data is not None and not binary: + data = json.dumps(data) + + response = requests.request(method, url, headers=headers, + data=data, params=params) + + try: + response.raise_for_status() + try: + response_data = json.loads(response.text) + except ValueError: + response_data = response.content + except HTTPError as error: + print('Caught an HTTPError: {}'.format(error)) + print('Body:\n', response.text) + raise + + return response_data diff --git a/figstats/stats.py b/figstats/stats.py new file mode 100644 index 0000000..4c3bf5a --- /dev/null +++ b/figstats/stats.py @@ -0,0 +1,39 @@ +from os.path import join + +# from ldcoolp.curation.api.figshare import FigshareInstituteAdmin + +from .commons import issue_request + +class Figshare: + """ + Purpose: + A Python interface to work with Figshare statistics endpoint + + """ + + def __init__(self, token='', institution=False, institute=''): + + self.baseurl = 'https://stats.figshare.com' + self.institution = institution + if not institute: + self.institute = institute + self.baseurl_institute = join(self.baseurl, self.institute) + self.token = token + self.headers = {'Content-Type': 'application/json'} + if self.token: + self.headers['Authorization'] = f'token {self.token}' + + def endpoint(self, link, institution=False): + if institution: + return join(self.baseurl_institute, link) + else: + return join(self.baseurl, link) + + def get_totals(self, item_id, item='article'): + total_dict = {} + for counter in ['views', 'downloads', 'shares']: + # Using non-institution one since that seems to give correct stats + url = self.endpoint(join('total', counter, item, str(item_id))) + result = issue_request('GET', url, headers=self.headers) + total_dict[counter] = result['totals'] + return total_dict diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..566083c --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +requests==2.22.0 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..e3d593e --- /dev/null +++ b/setup.py @@ -0,0 +1,21 @@ +from setuptools import setup + +with open("README.md", "r") as fh: + long_description = fh.read() + +with open("requirements.txt", "r") as fr: + requirements = fr.read().splitlines() + +setup( + name='figstats', + version='v0.0.1', + packages=['figstats'], + url='https://github.com/UAL-ODIS/figstats', + license='MIT License', + author='Chun Ly', + author_email='astro.chun@gmail.com', + description='Python tool to retrieve stats from Figshare API', + long_description=long_description, + long_description_content_type='text/markdown', + install_requires=requirements +) From a06513d5c379074569d25e98cbb67ef8fae6e9ea Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Wed, 18 Nov 2020 12:58:05 -0700 Subject: [PATCH 02/32] stats.Figshare: Add institution in get_totals #1 --- figstats/__init__.py | 0 figstats/stats.py | 9 ++++++--- 2 files changed, 6 insertions(+), 3 deletions(-) create mode 100644 figstats/__init__.py diff --git a/figstats/__init__.py b/figstats/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/figstats/stats.py b/figstats/stats.py index 4c3bf5a..1badbed 100644 --- a/figstats/stats.py +++ b/figstats/stats.py @@ -4,6 +4,7 @@ from .commons import issue_request + class Figshare: """ Purpose: @@ -15,7 +16,7 @@ def __init__(self, token='', institution=False, institute=''): self.baseurl = 'https://stats.figshare.com' self.institution = institution - if not institute: + if institute: self.institute = institute self.baseurl_institute = join(self.baseurl, self.institute) self.token = token @@ -29,11 +30,13 @@ def endpoint(self, link, institution=False): else: return join(self.baseurl, link) - def get_totals(self, item_id, item='article'): + def get_totals(self, item_id, item='article', institution=False): total_dict = {} for counter in ['views', 'downloads', 'shares']: # Using non-institution one since that seems to give correct stats - url = self.endpoint(join('total', counter, item, str(item_id))) + url = self.endpoint(join('total', counter, item, str(item_id)), + institution=institution) + print(url) result = issue_request('GET', url, headers=self.headers) total_dict[counter] = result['totals'] return total_dict From 293b0c4b8e53b7ae0273fe4374cff0ff58799965 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Wed, 2 Dec 2020 11:01:07 -0700 Subject: [PATCH 03/32] stats.Figshare: Add get_user_totals and get_timeline methods Note: get_timeline currently works with figshare datasets (not institutional ones) --- figstats/stats.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/figstats/stats.py b/figstats/stats.py index 1badbed..f6de284 100644 --- a/figstats/stats.py +++ b/figstats/stats.py @@ -36,7 +36,24 @@ def get_totals(self, item_id, item='article', institution=False): # Using non-institution one since that seems to give correct stats url = self.endpoint(join('total', counter, item, str(item_id)), institution=institution) - print(url) result = issue_request('GET', url, headers=self.headers) total_dict[counter] = result['totals'] return total_dict + + def get_user_totals(self, author_id): + # author_id is not the same as institution_user_id for institutional accounts + total_dict = self.get_totals(author_id, item='author', + institution=self.institution) + return total_dict + + def get_timeline(self, item_id, item='article', granularity='day', + institution=False): + total_dict = {} + for counter in ['views', 'downloads', 'shares']: + # Using non-institution one since that seems to give correct stats + urls = ['timeline', granularity, counter, item, str(item_id)] + url = self.endpoint(join(*urls), institution=institution) + print(url) + result = issue_request('GET', url, headers=self.headers) + total_dict[counter] = result['timeline'] + return total_dict From ef31020e8706390944011334e28afbe92c3c94e1 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Thu, 3 Dec 2020 10:02:16 -0700 Subject: [PATCH 04/32] A number of changes - Adjust attributes to distinguish basic cred and API token - Add documentation for get_totals, get_user_totals --- figstats/stats.py | 61 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 18 deletions(-) diff --git a/figstats/stats.py b/figstats/stats.py index f6de284..b759b2e 100644 --- a/figstats/stats.py +++ b/figstats/stats.py @@ -12,38 +12,64 @@ class Figshare: """ - def __init__(self, token='', institution=False, institute=''): + def __init__(self, api_token='', basic_token='', institution=False, institute=''): - self.baseurl = 'https://stats.figshare.com' + # For stats API + self.stats_baseurl = 'https://stats.figshare.com' self.institution = institution if institute: self.institute = institute - self.baseurl_institute = join(self.baseurl, self.institute) - self.token = token - self.headers = {'Content-Type': 'application/json'} - if self.token: - self.headers['Authorization'] = f'token {self.token}' + self.stats_baseurl_institute = join(self.stats_baseurl, self.institute) - def endpoint(self, link, institution=False): + # Base64 token + self.basic_headers = {'Content-Type': 'application/json'} + self.basic_token = basic_token + if self.basic_token: + self.basic_headers['Authorization'] = f'Basic {self.basic_token}' + + # API token + self.api_headers = {'Content-Type': 'application/json'} + self.api_token = api_token + if self.api_token: + self.api_headers['Authorization'] = f'token {self.api_token}' + + def stats_endpoint(self, link, institution=False): if institution: - return join(self.baseurl_institute, link) + return join(self.stats_baseurl_institute, link) else: - return join(self.baseurl, link) + return join(self.stats_baseurl, link) def get_totals(self, item_id, item='article', institution=False): + """ + Retrieve totals of views, downloads, and share for an "item" + Item can be 'article', 'author', 'collection', 'group' or 'project' + Note: This does not require authenticating credentials for institution accounts + + See: https://docs.figshare.com/#stats_totals + """ + + if item not in ['article', 'author', 'collection', 'group', 'project']: + raise ValueError("Incorrect item type") + total_dict = {} for counter in ['views', 'downloads', 'shares']: # Using non-institution one since that seems to give correct stats - url = self.endpoint(join('total', counter, item, str(item_id)), - institution=institution) - result = issue_request('GET', url, headers=self.headers) + url = self.stats_endpoint(join('total', counter, item, str(item_id)), + institution=institution) + result = issue_request('GET', url, headers=self.basic_headers) total_dict[counter] = result['totals'] return total_dict def get_user_totals(self, author_id): - # author_id is not the same as institution_user_id for institutional accounts + """ + Retrieve an author's total using get_totals() + + :param author_id: This is not the same as the institution_user_id for institutional accounts + :return: total_dict: dict containing total views, downloads, and shares + Note: This does not require authenticating credentials for institution accounts + """ total_dict = self.get_totals(author_id, item='author', - institution=self.institution) + institution=False) return total_dict def get_timeline(self, item_id, item='article', granularity='day', @@ -52,8 +78,7 @@ def get_timeline(self, item_id, item='article', granularity='day', for counter in ['views', 'downloads', 'shares']: # Using non-institution one since that seems to give correct stats urls = ['timeline', granularity, counter, item, str(item_id)] - url = self.endpoint(join(*urls), institution=institution) - print(url) - result = issue_request('GET', url, headers=self.headers) + url = self.stats_endpoint(join(*urls), institution=institution) + result = issue_request('GET', url, headers=self.basic_headers) total_dict[counter] = result['timeline'] return total_dict From 8e7b485ccdfe4cf94e1925fd64c88647a627579a Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Thu, 3 Dec 2020 10:35:14 -0700 Subject: [PATCH 05/32] stats.Figshare: Add get_figshare_id and retrieve_institution_users methods - Add Figshare API endpoint --- figstats/stats.py | 61 +++++++++++++++++++++++++++++++++++++++++++++-- requirements.txt | 1 + 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/figstats/stats.py b/figstats/stats.py index b759b2e..a76930a 100644 --- a/figstats/stats.py +++ b/figstats/stats.py @@ -1,6 +1,5 @@ from os.path import join - -# from ldcoolp.curation.api.figshare import FigshareInstituteAdmin +import pandas as pd from .commons import issue_request @@ -27,6 +26,11 @@ def __init__(self, api_token='', basic_token='', institution=False, institute='' if self.basic_token: self.basic_headers['Authorization'] = f'Basic {self.basic_token}' + # For Figshare API + self.main_baseurl = 'https://api.figshare.com/v2/account/' + if self.institution: + self.main_baseurl_institute = join(self.main_baseurl, "institution") + # API token self.api_headers = {'Content-Type': 'application/json'} self.api_token = api_token @@ -82,3 +86,56 @@ def get_timeline(self, item_id, item='article', granularity='day', result = issue_request('GET', url, headers=self.basic_headers) total_dict[counter] = result['timeline'] return total_dict + + def get_figshare_id(self, accounts_df): + """ + Retrieve Figshare account ID(s) + Note: This is not the institutional ID, but one associated with + the unique profile + + :param accounts_df: pandas DataFrame containing institution ID + :return: accounts_df: The input DataFrame with an additional column + """ + + endpoint = join(self.main_baseurl_institute, "users") + + author_id = [] + for institute_id in accounts_df['id']: + url = f"{endpoint}/{institute_id}" + response = issue_request('GET', url, self.api_headers) + author_id.append(response['id']) + accounts_df['author_id'] = author_id + return accounts_df + + def retrieve_institution_users(self, ignore_admin=False): + """ + Retrieve accounts within institutional instance + + This is based on LD-Cool-P get_account_list method of FigshareInstituteAdmin + It includes retrieving the default author_id + + It uses: + https://docs.figshare.com/#private_institution_accounts_list + https://docs.figshare.com/#private_account_institution_user + """ + url = join(self.main_baseurl_institute, "accounts") + + # Figshare API is limited to a maximum of 1000 per page + params = {'page': 1, 'page_size': 1000} + accounts = issue_request('GET', url, self.api_headers, params=params) + + accounts_df = pd.DataFrame(accounts) + accounts_df = accounts_df.drop(columns='institution_id') + + if ignore_admin: + print("Excluding administrative and test accounts") + + drop_index = list(accounts_df[accounts_df['email'] == + 'data-management@email.arizona.edu'].index) + drop_index += list(accounts_df[accounts_df['email'].str.contains('-test@email.arizona.edu')].index) + + accounts_df = accounts_df.drop(drop_index).reset_index(drop=True) + + accounts_df = self.get_figshare_id(accounts_df) + + return accounts_df diff --git a/requirements.txt b/requirements.txt index 566083c..5427184 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ requests==2.22.0 +pandas==1.0.2 From 7eacb85edf5d703406312e73078b0eef2e1ae668 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Thu, 3 Dec 2020 10:51:25 -0700 Subject: [PATCH 06/32] stats.Figshare: Add get_institution_totals --- figstats/stats.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/figstats/stats.py b/figstats/stats.py index a76930a..bfeef17 100644 --- a/figstats/stats.py +++ b/figstats/stats.py @@ -139,3 +139,22 @@ def retrieve_institution_users(self, ignore_admin=False): accounts_df = self.get_figshare_id(accounts_df) return accounts_df + + def get_institution_totals(self, df=None, by_method='author'): + """ + Retrieve total views, downloads, and shares by either authors or articles + """ + + if isinstance(df, type(None)): + if by_method == 'author': + df = self.retrieve_institution_users(ignore_admin=False) + if by_method == 'article': + print("Need to retrieve articles") + + total_dict = dict() + for author_id in df.loc[0:5, 'author_id']: + total_dict[str(author_id)] = self.get_user_totals(author_id) + + # Construct pandas DataFrame + total_df = pd.DataFrame.from_dict(total_dict, orient='index') + return total_df From 91747b50b732189ae5b9e87ea1e0d24089798b3c Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Thu, 3 Dec 2020 12:10:05 -0700 Subject: [PATCH 07/32] Loop over all authors, include author name --- figstats/stats.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/figstats/stats.py b/figstats/stats.py index bfeef17..84d6aed 100644 --- a/figstats/stats.py +++ b/figstats/stats.py @@ -152,8 +152,13 @@ def get_institution_totals(self, df=None, by_method='author'): print("Need to retrieve articles") total_dict = dict() - for author_id in df.loc[0:5, 'author_id']: - total_dict[str(author_id)] = self.get_user_totals(author_id) + for i in df.index: + print(f"{i+1} of {len(df.index)}") + record = df.loc[i] + first_name = record['first_name'] + last_name = record['last_name'] + author_id = record['author_id'] + total_dict[f"{first_name} {last_name} ({author_id})"] = self.get_user_totals(author_id) # Construct pandas DataFrame total_df = pd.DataFrame.from_dict(total_dict, orient='index') From bb680da0b05fa1828557ff94e636a7fd1bd07238 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Thu, 3 Dec 2020 13:20:34 -0700 Subject: [PATCH 08/32] stats.Figshare: Add retrieve_institution_articles method - Adjust get_institution_total to handle by_method='articles' --- figstats/stats.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/figstats/stats.py b/figstats/stats.py index 84d6aed..c320382 100644 --- a/figstats/stats.py +++ b/figstats/stats.py @@ -140,6 +140,22 @@ def retrieve_institution_users(self, ignore_admin=False): return accounts_df + def retrieve_institution_articles(self): + + url = join(self.main_baseurl_institute, "articles") + + # Figshare API is limited to a maximum of 1000 per page + params = {'page': 1, + 'page_size': 1000} + articles = issue_request('GET', url, self.api_headers, params=params) + + articles_df = pd.DataFrame(articles) + + # Only consider published dataset + articles_df = articles_df.loc[articles_df['published_date'].notnull()] + articles_df = articles_df.reset_index() + return articles_df + def get_institution_totals(self, df=None, by_method='author'): """ Retrieve total views, downloads, and shares by either authors or articles @@ -149,17 +165,21 @@ def get_institution_totals(self, df=None, by_method='author'): if by_method == 'author': df = self.retrieve_institution_users(ignore_admin=False) if by_method == 'article': - print("Need to retrieve articles") + df = self.retrieve_institution_articles() total_dict = dict() for i in df.index: print(f"{i+1} of {len(df.index)}") record = df.loc[i] - first_name = record['first_name'] - last_name = record['last_name'] - author_id = record['author_id'] - total_dict[f"{first_name} {last_name} ({author_id})"] = self.get_user_totals(author_id) - + if by_method == 'author': + first_name = record['first_name'] + last_name = record['last_name'] + author_id = record['author_id'] + total_dict[f"{first_name} {last_name} ({author_id})"] = self.get_user_totals(author_id) + if by_method == 'article': + total_dict[f"{record['id']}"] = self.get_totals(record['id'], + item='article', + institution=False) # Construct pandas DataFrame total_df = pd.DataFrame.from_dict(total_dict, orient='index') return total_df From 6560567a4e116589d29c02b669a25c0df1fe6781 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Fri, 4 Dec 2020 09:33:03 -0700 Subject: [PATCH 09/32] stats.Figshare: Sort timeline by date in get_timeline method --- figstats/stats.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/figstats/stats.py b/figstats/stats.py index c320382..8d17cdc 100644 --- a/figstats/stats.py +++ b/figstats/stats.py @@ -78,14 +78,18 @@ def get_user_totals(self, author_id): def get_timeline(self, item_id, item='article', granularity='day', institution=False): - total_dict = {} + timeline_dict = {} for counter in ['views', 'downloads', 'shares']: # Using non-institution one since that seems to give correct stats urls = ['timeline', granularity, counter, item, str(item_id)] url = self.stats_endpoint(join(*urls), institution=institution) result = issue_request('GET', url, headers=self.basic_headers) - total_dict[counter] = result['timeline'] - return total_dict + # Sort contents by date + result_sort = {} + for key in sorted(result['timeline']): + result_sort[key] = result['timeline'][key] + timeline_dict[counter] = result_sort + return timeline_dict def get_figshare_id(self, accounts_df): """ From a8dd2c153e5c3705b641fd3ad7f436d6dc421b89 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Fri, 4 Dec 2020 09:50:38 -0700 Subject: [PATCH 10/32] stats.Figshare: Add cumulative numbers for views, downloads, shares in get_timeline method --- figstats/stats.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/figstats/stats.py b/figstats/stats.py index 8d17cdc..45abf10 100644 --- a/figstats/stats.py +++ b/figstats/stats.py @@ -86,9 +86,14 @@ def get_timeline(self, item_id, item='article', granularity='day', result = issue_request('GET', url, headers=self.basic_headers) # Sort contents by date result_sort = {} + cum_dict = {} + count = 0 for key in sorted(result['timeline']): result_sort[key] = result['timeline'][key] + count += result['timeline'][key] + cum_dict[key] = count timeline_dict[counter] = result_sort + timeline_dict[f"{counter}-cum"] = cum_dict return timeline_dict def get_figshare_id(self, accounts_df): From e4fe93e7034dd96999bea1de5271f55faea71567 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Fri, 4 Dec 2020 10:01:42 -0700 Subject: [PATCH 11/32] stats.Figshare: Ensure timeline is the same for all records --- figstats/stats.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/figstats/stats.py b/figstats/stats.py index 45abf10..27c86d0 100644 --- a/figstats/stats.py +++ b/figstats/stats.py @@ -88,9 +88,15 @@ def get_timeline(self, item_id, item='article', granularity='day', result_sort = {} cum_dict = {} count = 0 - for key in sorted(result['timeline']): - result_sort[key] = result['timeline'][key] - count += result['timeline'][key] + # Use views record for timeline (most populated generally) + if counter == 'views': + save_date = sorted(result['timeline']) + for key in save_date: + try: + result_sort[key] = result['timeline'][key] + count += result['timeline'][key] + except KeyError: + pass cum_dict[key] = count timeline_dict[counter] = result_sort timeline_dict[f"{counter}-cum"] = cum_dict From f01e556b626fa5b581719eb7dfacf722f3ac63b2 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Tue, 8 Dec 2020 09:55:35 -0700 Subject: [PATCH 12/32] Minor fix to zero daily numbers if not in timeline --- figstats/stats.py | 2 +- requirements.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/figstats/stats.py b/figstats/stats.py index 27c86d0..220d6e9 100644 --- a/figstats/stats.py +++ b/figstats/stats.py @@ -96,7 +96,7 @@ def get_timeline(self, item_id, item='article', granularity='day', result_sort[key] = result['timeline'][key] count += result['timeline'][key] except KeyError: - pass + result_sort[key] = 0 cum_dict[key] = count timeline_dict[counter] = result_sort timeline_dict[f"{counter}-cum"] = cum_dict diff --git a/requirements.txt b/requirements.txt index 5427184..eb73778 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ requests==2.22.0 pandas==1.0.2 +matplotlib From a77df39d7dee1ef8881f8adeed5cbf7a7e6d8b91 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Tue, 8 Dec 2020 10:18:19 -0700 Subject: [PATCH 13/32] stats: Add retrieve_article_details method --- figstats/stats.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/figstats/stats.py b/figstats/stats.py index 220d6e9..56523e1 100644 --- a/figstats/stats.py +++ b/figstats/stats.py @@ -171,6 +171,13 @@ def retrieve_institution_articles(self): articles_df = articles_df.reset_index() return articles_df + def retrieve_article_details(self, article_id): + """Retrieve article details""" + url = join('https://api.figshare.com/v2/', f"articles/{article_id}") + + article_dict = issue_request('GET', url, self.basic_headers) + return article_dict + def get_institution_totals(self, df=None, by_method='author'): """ Retrieve total views, downloads, and shares by either authors or articles From 5cf4adb654dd50951cc830ab2d383a65f0aa4388 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Tue, 8 Dec 2020 10:23:35 -0700 Subject: [PATCH 14/32] Add visualization module --- figstats/visualization.py | 71 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 figstats/visualization.py diff --git a/figstats/visualization.py b/figstats/visualization.py new file mode 100644 index 0000000..9ea1d0b --- /dev/null +++ b/figstats/visualization.py @@ -0,0 +1,71 @@ +from datetime import datetime as dt +import matplotlib.pyplot as plt +import matplotlib.dates as m_dates + + +def matplotlib_date_format(date_list): + """Generate list of datetime objects""" + datetime_list = [dt.strptime(date, '%Y-%m-%d') for date in date_list] + + return datetime_list + + +def plot_timeline(timeline_dict, article_dict): + """ + Purpose: + Plot timeline showing views and downloads + + :param timeline_dict: dict containing daily and cumulative numbers. + From stats.Figshare.get_timeline + :param article_dict: dictionary of article details. + From stats.Figshare.retrieve_article_details + """ + datetime_list = matplotlib_date_format(list(timeline_dict['views'].keys())) + fig, [ax0, ax1] = plt.subplots(ncols=2, nrows=2, + gridspec_kw={'height_ratios': [3, 1]}) + + counters = ['views', 'downloads'] + + for ii, counter in zip(range(len(counters)), counters): + + # Bottom panels + y_bottom = timeline_dict[counter].values() + ax1[ii].bar(datetime_list, y_bottom) + locator = m_dates.AutoDateLocator(minticks=3, maxticks=7) + formatter = m_dates.ConciseDateFormatter(locator) + ax1[ii].xaxis.set_major_locator(locator) + ax1[ii].xaxis.set_major_formatter(formatter) + ax1[ii].set_ylabel(f"Daily {counter}") + ax1[ii].tick_params(axis='y', direction='in') + ax1[ii].tick_params(axis='x', direction='out') + ax1[ii].annotate(f'Maximum daily {counter}: {max(y_bottom)}', + (0.025, 0.95), xycoords='axes fraction', + va='top', ha='left') + + # Top panels + y_top = timeline_dict[counter+'-cum'].values() + ax0[ii].bar(datetime_list, y_top) + ax0[ii].xaxis.set_major_locator(locator) + ax0[ii].xaxis.set_major_formatter(formatter) + ax0[ii].set_xticklabels('') + ax0[ii].set_ylabel(f"Cumulative {counter}") + ax0[ii].tick_params(axis='both', direction='in') + ax0[ii].annotate(f'Total {counter}: {max(y_top)}', (0.025, 0.975), + xycoords='axes fraction', va='top', ha='left') + # ax0[ii].set_xlabel('Date') + + # Heading containing title, author, license, DOI + heading = f"Title: {article_dict['title']}\n" + author_list = [auth_dict['full_name'] for auth_dict in article_dict['authors']] + if len(author_list) > 3: + heading += f"Authors: {author_list[0]} et al.\n" + else: + heading += f"Authors: {' '.join(author_list)}\n" + heading += f"License: {article_dict['license']['name']} " + heading += f"DOI: https://doi.org/{article_dict['doi']}" + ax0[0].text(0.01, 1.15, heading, ha='left', va='top', + transform=ax0[0].transAxes) + + fig.set_size_inches(8, 6) + plt.subplots_adjust(left=0.09, bottom=0.1, top=0.90, right=0.985, + hspace=0.025) From 355c88f676c2600e73d76c398c9b590758548dc1 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Tue, 8 Dec 2020 10:39:33 -0700 Subject: [PATCH 15/32] Add option to save PDF file or return matplotlib fig instance --- figstats/visualization.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/figstats/visualization.py b/figstats/visualization.py index 9ea1d0b..6c3ffbe 100644 --- a/figstats/visualization.py +++ b/figstats/visualization.py @@ -10,7 +10,7 @@ def matplotlib_date_format(date_list): return datetime_list -def plot_timeline(timeline_dict, article_dict): +def plot_timeline(timeline_dict, article_dict, out_pdf=None, save=False): """ Purpose: Plot timeline showing views and downloads @@ -19,6 +19,10 @@ def plot_timeline(timeline_dict, article_dict): From stats.Figshare.get_timeline :param article_dict: dictionary of article details. From stats.Figshare.retrieve_article_details + :param out_pdf: Output filename. Default: timeline_.pdf + :param save: bool to save PDF file. Otherwise return matplotlib fig object + + :return fig: If save == False, fig is returned """ datetime_list = matplotlib_date_format(list(timeline_dict['views'].keys())) fig, [ax0, ax1] = plt.subplots(ncols=2, nrows=2, @@ -69,3 +73,10 @@ def plot_timeline(timeline_dict, article_dict): fig.set_size_inches(8, 6) plt.subplots_adjust(left=0.09, bottom=0.1, top=0.90, right=0.985, hspace=0.025) + + if save: + if isinstance(out_pdf, type(None)): + out_pdf = f"timeline_{article_dict['id']}.pdf" + fig.savefig(out_pdf) + else: + return fig From 4f1783edc9316f6d4f28d6d390e78209fad4efae Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Tue, 8 Dec 2020 10:56:58 -0700 Subject: [PATCH 16/32] visualization: Add plot_shares method --- figstats/visualization.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/figstats/visualization.py b/figstats/visualization.py index 6c3ffbe..1249747 100644 --- a/figstats/visualization.py +++ b/figstats/visualization.py @@ -10,6 +10,17 @@ def matplotlib_date_format(date_list): return datetime_list +def plot_shares(ax, timeline_dict): + shares_dict = timeline_dict['shares'] + non_zero = [key for key in shares_dict.keys() if shares_dict[key] > 0] + + if len(non_zero) > 0: + dates = matplotlib_date_format(non_zero) + for date, key in zip(dates, non_zero): + ax.axvline(x=date, color='red') + ax.text(date, 10, f"{shares_dict[key]}", color='red') + + def plot_timeline(timeline_dict, article_dict, out_pdf=None, save=False): """ Purpose: @@ -58,6 +69,8 @@ def plot_timeline(timeline_dict, article_dict, out_pdf=None, save=False): xycoords='axes fraction', va='top', ha='left') # ax0[ii].set_xlabel('Date') + plot_shares(ax0[ii], timeline_dict) + # Heading containing title, author, license, DOI heading = f"Title: {article_dict['title']}\n" author_list = [auth_dict['full_name'] for auth_dict in article_dict['authors']] From 229618e599a77137d58a3bc4ced1203ad698490a Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Tue, 8 Dec 2020 10:57:16 -0700 Subject: [PATCH 17/32] Add numpy to requirements --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index eb73778..ff48e81 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ requests==2.22.0 pandas==1.0.2 matplotlib +numpy From 1cd267ed999a01cf41e39b4a4a4496aa1ea16251 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Tue, 8 Dec 2020 13:27:28 -0700 Subject: [PATCH 18/32] Exclude testing scripts --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index b6e4761..2db35f0 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,5 @@ dmypy.json # Pyre type checker .pyre/ + +testing.py From 9347032f0b1ea8237d8c0e5899ed4156f39831f2 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Wed, 9 Dec 2020 13:50:28 -0700 Subject: [PATCH 19/32] Refactor to use counter_list --- figstats/stats.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/figstats/stats.py b/figstats/stats.py index 56523e1..b1f9d69 100644 --- a/figstats/stats.py +++ b/figstats/stats.py @@ -3,6 +3,8 @@ from .commons import issue_request +counter_list = ['views', 'downloads', 'shares'] + class Figshare: """ @@ -56,7 +58,7 @@ def get_totals(self, item_id, item='article', institution=False): raise ValueError("Incorrect item type") total_dict = {} - for counter in ['views', 'downloads', 'shares']: + for counter in counter_list: # Using non-institution one since that seems to give correct stats url = self.stats_endpoint(join('total', counter, item, str(item_id)), institution=institution) @@ -79,7 +81,7 @@ def get_user_totals(self, author_id): def get_timeline(self, item_id, item='article', granularity='day', institution=False): timeline_dict = {} - for counter in ['views', 'downloads', 'shares']: + for counter in counter_list: # Using non-institution one since that seems to give correct stats urls = ['timeline', granularity, counter, item, str(item_id)] url = self.stats_endpoint(join(*urls), institution=institution) From 897c21dd2008e5b40e7abdee0bf2ba7fbfde155e Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Wed, 9 Dec 2020 14:00:34 -0700 Subject: [PATCH 20/32] Change cumulative panels to from bar to line - Add text for total number of shares - Set axes lower value - Label daily shares at bottom --- figstats/visualization.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/figstats/visualization.py b/figstats/visualization.py index 1249747..7c92e46 100644 --- a/figstats/visualization.py +++ b/figstats/visualization.py @@ -18,7 +18,8 @@ def plot_shares(ax, timeline_dict): dates = matplotlib_date_format(non_zero) for date, key in zip(dates, non_zero): ax.axvline(x=date, color='red') - ax.text(date, 10, f"{shares_dict[key]}", color='red') + ax.text(date, 1, f"{shares_dict[key]}", color='red', + ha='right', va='bottom') def plot_timeline(timeline_dict, article_dict, out_pdf=None, save=False): @@ -56,10 +57,12 @@ def plot_timeline(timeline_dict, article_dict, out_pdf=None, save=False): ax1[ii].annotate(f'Maximum daily {counter}: {max(y_bottom)}', (0.025, 0.95), xycoords='axes fraction', va='top', ha='left') + ax1[ii].set_ylim(bottom=0) # Top panels y_top = timeline_dict[counter+'-cum'].values() - ax0[ii].bar(datetime_list, y_top) + ax0[ii].plot(datetime_list, y_top, linestyle='-', linewidth=2.0, + marker='') ax0[ii].xaxis.set_major_locator(locator) ax0[ii].xaxis.set_major_formatter(formatter) ax0[ii].set_xticklabels('') @@ -68,21 +71,26 @@ def plot_timeline(timeline_dict, article_dict, out_pdf=None, save=False): ax0[ii].annotate(f'Total {counter}: {max(y_top)}', (0.025, 0.975), xycoords='axes fraction', va='top', ha='left') # ax0[ii].set_xlabel('Date') + ax0[ii].set_ylim(bottom=0) plot_shares(ax0[ii], timeline_dict) # Heading containing title, author, license, DOI - heading = f"Title: {article_dict['title']}\n" + left_heading = f"Title: {article_dict['title']}\n" author_list = [auth_dict['full_name'] for auth_dict in article_dict['authors']] if len(author_list) > 3: - heading += f"Authors: {author_list[0]} et al.\n" + left_heading += f"Authors: {author_list[0]} et al.\n" else: - heading += f"Authors: {' '.join(author_list)}\n" - heading += f"License: {article_dict['license']['name']} " - heading += f"DOI: https://doi.org/{article_dict['doi']}" - ax0[0].text(0.01, 1.15, heading, ha='left', va='top', + left_heading += f"Authors: {' '.join(author_list)}\n" + left_heading += f"License: {article_dict['license']['name']} " + left_heading += f"DOI: https://doi.org/{article_dict['doi']}" + ax0[0].text(0.01, 1.15, left_heading, ha='left', va='top', transform=ax0[0].transAxes) + right_heading = f"Shares: {max(timeline_dict['shares-cum'].values())}" + ax0[1].text(1.0, 1.15, right_heading, ha='right', va='top', + transform=ax0[1].transAxes) + fig.set_size_inches(8, 6) plt.subplots_adjust(left=0.09, bottom=0.1, top=0.90, right=0.985, hspace=0.025) From 9ef520309561466a6938e67c70079a5356651fe9 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Wed, 9 Dec 2020 14:23:32 -0700 Subject: [PATCH 21/32] Baseline script for multi-timeline retrieval --- scripts/make_timeline_plots | 40 +++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100755 scripts/make_timeline_plots diff --git a/scripts/make_timeline_plots b/scripts/make_timeline_plots new file mode 100755 index 0000000..a96edc0 --- /dev/null +++ b/scripts/make_timeline_plots @@ -0,0 +1,40 @@ +#!/usr/bin/env python + +import argparse + +from figstats import stats, visualization +from matplotlib.backends.backend_pdf import PdfPages + + +if __name__ in '__main__': + parser = argparse.ArgumentParser(description='Command-line driver for figstats timeline plots.') + parser.add_argument('--api_token', required=True, help='Figshare API token') + parser.add_argument('--basic_token', required=True, help='Figshare base64 API stats token') + parser.add_argument('--institute', required=True, help='Name of institution') + args = parser.parse_args() + + fs = stats.Figshare(api_token=args.api_token, + basic_token=args.basic_token, + institution=True, + institute=args.institute) + + articles_df = fs.retrieve_institution_articles() + + out_pdf = f"{args.institute}_timeline_plots.pdf" + pp = PdfPages(out_pdf) + + for article_id in articles_df['id']: + print(f"Working on : {article_id}") + article_dict = fs.retrieve_article_details(article_id) + try: + timeline_dict = fs.get_timeline(article_id, item='article', institution=True) + + fig = visualization.plot_timeline(timeline_dict, article_dict, save=False) + + fig.savefig(pp, format='pdf', bbox_inches='tight') + fig.clear() + except TypeError: + print("TypeError") + + print(f"Writing : {out_pdf}") + pp.close() From 1943cfab6ddac62182cd57a0aebd20cff8821f02 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Thu, 10 Dec 2020 13:31:56 -0700 Subject: [PATCH 22/32] Fix case when timeline is not available (e.g., shares) --- figstats/stats.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/figstats/stats.py b/figstats/stats.py index b1f9d69..e7862a0 100644 --- a/figstats/stats.py +++ b/figstats/stats.py @@ -94,11 +94,15 @@ def get_timeline(self, item_id, item='article', granularity='day', if counter == 'views': save_date = sorted(result['timeline']) for key in save_date: - try: - result_sort[key] = result['timeline'][key] - count += result['timeline'][key] - except KeyError: + if isinstance(result['timeline'], type(None)): + # Handle when counter is not available (NoneType) result_sort[key] = 0 + else: + try: + result_sort[key] = result['timeline'][key] + count += result['timeline'][key] + except KeyError: + result_sort[key] = 0 cum_dict[key] = count timeline_dict[counter] = result_sort timeline_dict[f"{counter}-cum"] = cum_dict From 968b6b434b5e4aa0447e9cea0ebd181f220cd0c1 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Thu, 10 Dec 2020 13:54:17 -0700 Subject: [PATCH 23/32] Use textwrap to handle long title --- figstats/visualization.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/figstats/visualization.py b/figstats/visualization.py index 7c92e46..5556a7b 100644 --- a/figstats/visualization.py +++ b/figstats/visualization.py @@ -2,6 +2,10 @@ import matplotlib.pyplot as plt import matplotlib.dates as m_dates +from textwrap import wrap + +title_width = 80 + def matplotlib_date_format(date_list): """Generate list of datetime objects""" @@ -76,7 +80,13 @@ def plot_timeline(timeline_dict, article_dict, out_pdf=None, save=False): plot_shares(ax0[ii], timeline_dict) # Heading containing title, author, license, DOI - left_heading = f"Title: {article_dict['title']}\n" + + title_chunks = wrap(article_dict['title'], title_width) + for cc in range(len(title_chunks)): + if cc == 0: + left_heading = f"Title: {title_chunks[cc]}\n" + else: + left_heading += f" {title_chunks[cc]}\n" author_list = [auth_dict['full_name'] for auth_dict in article_dict['authors']] if len(author_list) > 3: left_heading += f"Authors: {author_list[0]} et al.\n" @@ -84,15 +94,15 @@ def plot_timeline(timeline_dict, article_dict, out_pdf=None, save=False): left_heading += f"Authors: {' '.join(author_list)}\n" left_heading += f"License: {article_dict['license']['name']} " left_heading += f"DOI: https://doi.org/{article_dict['doi']}" - ax0[0].text(0.01, 1.15, left_heading, ha='left', va='top', + ax0[0].text(0.01, 1.25, left_heading, ha='left', va='top', transform=ax0[0].transAxes) right_heading = f"Shares: {max(timeline_dict['shares-cum'].values())}" - ax0[1].text(1.0, 1.15, right_heading, ha='right', va='top', + ax0[1].text(1.0, 1.25, right_heading, ha='right', va='top', transform=ax0[1].transAxes) fig.set_size_inches(8, 6) - plt.subplots_adjust(left=0.09, bottom=0.1, top=0.90, right=0.985, + plt.subplots_adjust(left=0.09, bottom=0.08, top=0.85, right=0.985, hspace=0.025) if save: From 64a7a1d92bddafe5a333a0d93eda1374f0e8332c Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Thu, 10 Dec 2020 14:05:03 -0700 Subject: [PATCH 24/32] Reduce title_width to give room for shares in upper right --- figstats/visualization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/figstats/visualization.py b/figstats/visualization.py index 5556a7b..29dce89 100644 --- a/figstats/visualization.py +++ b/figstats/visualization.py @@ -4,7 +4,7 @@ from textwrap import wrap -title_width = 80 +title_width = 75 def matplotlib_date_format(date_list): From 2e8590a39b1c0a67ba5b23728c38352391351d60 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Wed, 3 Mar 2021 16:05:53 -0700 Subject: [PATCH 25/32] Add version info --- figstats/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/figstats/__init__.py b/figstats/__init__.py index e69de29..b8023d8 100644 --- a/figstats/__init__.py +++ b/figstats/__init__.py @@ -0,0 +1 @@ +__version__ = '0.0.1' From 0442e8ba063f959357264ca9f01c835bda0d3dc1 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Wed, 3 Mar 2021 16:30:48 -0700 Subject: [PATCH 26/32] Add HTTPException handling - important for private/restricted data --- scripts/make_timeline_plots | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/scripts/make_timeline_plots b/scripts/make_timeline_plots index a96edc0..985d83f 100755 --- a/scripts/make_timeline_plots +++ b/scripts/make_timeline_plots @@ -1,6 +1,7 @@ #!/usr/bin/env python import argparse +from requests.exceptions import HTTPError from figstats import stats, visualization from matplotlib.backends.backend_pdf import PdfPages @@ -25,16 +26,18 @@ if __name__ in '__main__': for article_id in articles_df['id']: print(f"Working on : {article_id}") - article_dict = fs.retrieve_article_details(article_id) try: - timeline_dict = fs.get_timeline(article_id, item='article', institution=True) - - fig = visualization.plot_timeline(timeline_dict, article_dict, save=False) - - fig.savefig(pp, format='pdf', bbox_inches='tight') - fig.clear() - except TypeError: - print("TypeError") + article_dict = fs.retrieve_article_details(article_id) + try: + timeline_dict = fs.get_timeline(article_id, item='article', institution=True) + + fig = visualization.plot_timeline(timeline_dict, article_dict, save=False) + fig.savefig(pp, format='pdf', bbox_inches='tight') + fig.clear() + except TypeError: + print("TypeError") + except HTTPError: + pass print(f"Writing : {out_pdf}") pp.close() From 326aff230f8d353d662e368160251c486b7c3090 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Tue, 9 Mar 2021 15:19:21 -0700 Subject: [PATCH 27/32] make_timeline_plots: Adjust for PEP8 width - Provide example for institution name --- scripts/make_timeline_plots | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/scripts/make_timeline_plots b/scripts/make_timeline_plots index 985d83f..6af9907 100755 --- a/scripts/make_timeline_plots +++ b/scripts/make_timeline_plots @@ -8,10 +8,14 @@ from matplotlib.backends.backend_pdf import PdfPages if __name__ in '__main__': - parser = argparse.ArgumentParser(description='Command-line driver for figstats timeline plots.') - parser.add_argument('--api_token', required=True, help='Figshare API token') - parser.add_argument('--basic_token', required=True, help='Figshare base64 API stats token') - parser.add_argument('--institute', required=True, help='Name of institution') + description = 'Command-line driver for figstats timeline plots.' + parser = argparse.ArgumentParser(description=description) + parser.add_argument('--api_token', required=True, + help='Figshare API token') + parser.add_argument('--basic_token', required=True, + help='Figshare base64 API stats token') + parser.add_argument('--institute', required=True, + help='Name of institution (e.g., "arizona")') args = parser.parse_args() fs = stats.Figshare(api_token=args.api_token, @@ -29,9 +33,11 @@ if __name__ in '__main__': try: article_dict = fs.retrieve_article_details(article_id) try: - timeline_dict = fs.get_timeline(article_id, item='article', institution=True) + timeline_dict = fs.get_timeline(article_id, item='article', + institution=True) - fig = visualization.plot_timeline(timeline_dict, article_dict, save=False) + fig = visualization.plot_timeline(timeline_dict, article_dict, + save=False) fig.savefig(pp, format='pdf', bbox_inches='tight') fig.clear() except TypeError: From cfcf0957f8e6f7f5e25c5c2df79411126991ff6d Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Tue, 9 Mar 2021 15:45:05 -0700 Subject: [PATCH 28/32] visualization: type hinting - Add docstrings for plot_shares - Revise docstrings for plot_timeline - Change out_pdf to string (default empty) --- figstats/visualization.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/figstats/visualization.py b/figstats/visualization.py index 29dce89..e751f4a 100644 --- a/figstats/visualization.py +++ b/figstats/visualization.py @@ -1,20 +1,24 @@ +from typing import Union from datetime import datetime as dt + import matplotlib.pyplot as plt import matplotlib.dates as m_dates +from matplotlib import figure, axes from textwrap import wrap title_width = 75 -def matplotlib_date_format(date_list): +def matplotlib_date_format(date_list: list) -> list: """Generate list of datetime objects""" datetime_list = [dt.strptime(date, '%Y-%m-%d') for date in date_list] return datetime_list -def plot_shares(ax, timeline_dict): +def plot_shares(ax: axes.Axes, timeline_dict: dict): + """Plot shares data""" shares_dict = timeline_dict['shares'] non_zero = [key for key in shares_dict.keys() if shares_dict[key] > 0] @@ -26,17 +30,18 @@ def plot_shares(ax, timeline_dict): ha='right', va='bottom') -def plot_timeline(timeline_dict, article_dict, out_pdf=None, save=False): +def plot_timeline(timeline_dict: dict, article_dict: dict, + out_pdf: str = '', save: bool = False) \ + -> Union[None, figure.Figure]: """ - Purpose: - Plot timeline showing views and downloads + Plot timeline showing views and downloads - :param timeline_dict: dict containing daily and cumulative numbers. - From stats.Figshare.get_timeline - :param article_dict: dictionary of article details. - From stats.Figshare.retrieve_article_details - :param out_pdf: Output filename. Default: timeline_.pdf - :param save: bool to save PDF file. Otherwise return matplotlib fig object + :param timeline_dict: Contains daily and cumulative numbers. + From ``stats.Figshare.get_timeline`` + :param article_dict: Contains articles details. + From ``stats.Figshare.retrieve_article_details`` + :param out_pdf: Output filename. Default: `timeline_.pdf` + :param save: Flag to save PDF file. Otherwise returns ``matplotlib`` fig object :return fig: If save == False, fig is returned """ @@ -106,7 +111,7 @@ def plot_timeline(timeline_dict, article_dict, out_pdf=None, save=False): hspace=0.025) if save: - if isinstance(out_pdf, type(None)): + if not out_pdf: out_pdf = f"timeline_{article_dict['id']}.pdf" fig.savefig(out_pdf) else: From 218a596d921d1a41d1b9acf5b34216281d4a49b0 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Tue, 9 Mar 2021 15:45:39 -0700 Subject: [PATCH 29/32] make_timeline_plots: Temporarily handle UnicodeEncodeError --- scripts/make_timeline_plots | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/make_timeline_plots b/scripts/make_timeline_plots index 6af9907..b90c848 100755 --- a/scripts/make_timeline_plots +++ b/scripts/make_timeline_plots @@ -24,7 +24,6 @@ if __name__ in '__main__': institute=args.institute) articles_df = fs.retrieve_institution_articles() - out_pdf = f"{args.institute}_timeline_plots.pdf" pp = PdfPages(out_pdf) @@ -42,7 +41,7 @@ if __name__ in '__main__': fig.clear() except TypeError: print("TypeError") - except HTTPError: + except (HTTPError, UnicodeEncodeError): pass print(f"Writing : {out_pdf}") From 973048b862d7015b81064330d0458eac3b6bfe9f Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Tue, 9 Mar 2021 15:47:48 -0700 Subject: [PATCH 30/32] Add PyCharm files to .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 2db35f0..92a1198 100644 --- a/.gitignore +++ b/.gitignore @@ -129,3 +129,6 @@ dmypy.json .pyre/ testing.py + +# PyCharm +.idea/ From b06ce5db98b57494e01c0c1ecc211c8572a0eb1f Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Tue, 27 Jul 2021 08:57:46 -0700 Subject: [PATCH 31/32] Adjust argparse inputs with hyphen format --- scripts/make_timeline_plots | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/make_timeline_plots b/scripts/make_timeline_plots index b90c848..9ee3e4d 100755 --- a/scripts/make_timeline_plots +++ b/scripts/make_timeline_plots @@ -10,11 +10,11 @@ from matplotlib.backends.backend_pdf import PdfPages if __name__ in '__main__': description = 'Command-line driver for figstats timeline plots.' parser = argparse.ArgumentParser(description=description) - parser.add_argument('--api_token', required=True, + parser.add_argument('-a', '--api-token', required=True, help='Figshare API token') - parser.add_argument('--basic_token', required=True, + parser.add_argument('-b', '--basic-token', required=True, help='Figshare base64 API stats token') - parser.add_argument('--institute', required=True, + parser.add_argument('-i', '--institute', required=True, help='Name of institution (e.g., "arizona")') args = parser.parse_args() @@ -37,11 +37,13 @@ if __name__ in '__main__': fig = visualization.plot_timeline(timeline_dict, article_dict, save=False) + print(type(fig)) fig.savefig(pp, format='pdf', bbox_inches='tight') fig.clear() except TypeError: print("TypeError") except (HTTPError, UnicodeEncodeError): + print(f"Skipping: {article_id}") pass print(f"Writing : {out_pdf}") From d28199647629b4f5cc84820ecdac77f2ce8fbdf1 Mon Sep 17 00:00:00 2001 From: Chun Ly Date: Tue, 27 Jul 2021 09:22:13 -0700 Subject: [PATCH 32/32] Fix to ensure using a raw JSON response with no basic token --- figstats/stats.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/figstats/stats.py b/figstats/stats.py index e7862a0..0262586 100644 --- a/figstats/stats.py +++ b/figstats/stats.py @@ -23,7 +23,9 @@ def __init__(self, api_token='', basic_token='', institution=False, institute='' self.stats_baseurl_institute = join(self.stats_baseurl, self.institute) # Base64 token - self.basic_headers = {'Content-Type': 'application/json'} + self.basic_headers0 = {'Content-Type': 'application/json'} + + self.basic_headers = self.basic_headers0.copy() self.basic_token = basic_token if self.basic_token: self.basic_headers['Authorization'] = f'Basic {self.basic_token}' @@ -180,8 +182,7 @@ def retrieve_institution_articles(self): def retrieve_article_details(self, article_id): """Retrieve article details""" url = join('https://api.figshare.com/v2/', f"articles/{article_id}") - - article_dict = issue_request('GET', url, self.basic_headers) + article_dict = issue_request('GET', url, self.basic_headers0) return article_dict def get_institution_totals(self, df=None, by_method='author'):