From c91a618229958e564bce940c30ee38c7e523e66d Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Wed, 18 Nov 2020 10:58:02 -0700
Subject: [PATCH 01/32] Add API code #1

 - Include issue_request
 - setup.py config and requirements for installation
---
 figstats/commons.py | 52 +++++++++++++++++++++++++++++++++++++++++++++
 figstats/stats.py   | 39 ++++++++++++++++++++++++++++++++++
 requirements.txt    |  1 +
 setup.py            | 21 ++++++++++++++++++
 4 files changed, 113 insertions(+)
 create mode 100644 figstats/commons.py
 create mode 100644 figstats/stats.py
 create mode 100644 requirements.txt
 create mode 100644 setup.py

diff --git a/figstats/commons.py b/figstats/commons.py
new file mode 100644
index 0000000..9e0c51a
--- /dev/null
+++ b/figstats/commons.py
@@ -0,0 +1,52 @@
+import json
+import requests
+from requests.exceptions import HTTPError
+
+
+def issue_request(method, url, headers, data=None, binary=False,
+                  params=None):
+    """Wrapper for HTTP request
+
+    Parameters
+    ----------
+    method : str
+        HTTP method. One of GET, PUT, POST or DELETE
+
+    url : str
+        URL for the request
+
+    headers: dict
+        HTTP header information
+
+    data: dict
+        Figshare article data
+
+    binary: bool
+        Whether data is binary or not
+
+    params: dict
+        Additional information for URL GET request
+
+    Returns
+    -------
+    response_data: dict
+        JSON response for the request returned as python dict
+    """
+    if data is not None and not binary:
+        data = json.dumps(data)
+
+    response = requests.request(method, url, headers=headers,
+                                data=data, params=params)
+
+    try:
+        response.raise_for_status()
+        try:
+            response_data = json.loads(response.text)
+        except ValueError:
+            response_data = response.content
+    except HTTPError as error:
+        print('Caught an HTTPError: {}'.format(error))
+        print('Body:\n', response.text)
+        raise
+
+    return response_data
diff --git a/figstats/stats.py b/figstats/stats.py
new file mode 100644
index 0000000..4c3bf5a
--- /dev/null
+++ b/figstats/stats.py
@@ -0,0 +1,39 @@
+from os.path import join
+
+# from ldcoolp.curation.api.figshare import FigshareInstituteAdmin
+
+from .commons import issue_request
+
+class Figshare:
+    """
+    Purpose:
+      A Python interface to work with Figshare statistics endpoint
+
+    """
+
+    def __init__(self, token='', institution=False, institute=''):
+
+        self.baseurl = 'https://stats.figshare.com'
+        self.institution = institution
+        if not institute:
+            self.institute = institute
+            self.baseurl_institute = join(self.baseurl, self.institute)
+        self.token = token
+        self.headers = {'Content-Type': 'application/json'}
+        if self.token:
+            self.headers['Authorization'] = f'token {self.token}'
+
+    def endpoint(self, link, institution=False):
+        if institution:
+            return join(self.baseurl_institute, link)
+        else:
+            return join(self.baseurl, link)
+
+    def get_totals(self, item_id, item='article'):
+        total_dict = {}
+        for counter in ['views', 'downloads', 'shares']:
+            # Using non-institution one since that seems to give correct stats
+            url = self.endpoint(join('total', counter, item, str(item_id)))
+            result = issue_request('GET', url, headers=self.headers)
+            total_dict[counter] = result['totals']
+        return total_dict
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..566083c
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+requests==2.22.0
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..e3d593e
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,21 @@
+from setuptools import setup
+
+with open("README.md", "r") as fh:
+    long_description = fh.read()
+
+with open("requirements.txt", "r") as fr:
+    requirements = fr.read().splitlines()
+
+setup(
+    name='figstats',
+    version='v0.0.1',
+    packages=['figstats'],
+    url='https://github.com/UAL-ODIS/figstats',
+    license='MIT License',
+    author='Chun Ly',
+    author_email='astro.chun@gmail.com',
+    description='Python tool to retrieve stats from Figshare API',
+    long_description=long_description,
+    long_description_content_type='text/markdown',
+    install_requires=requirements
+)

From a06513d5c379074569d25e98cbb67ef8fae6e9ea Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Wed, 18 Nov 2020 12:58:05 -0700
Subject: [PATCH 02/32] stats.Figshare: Add institution in get_totals #1

---
 figstats/__init__.py | 0
 figstats/stats.py    | 9 ++++++---
 2 files changed, 6 insertions(+), 3 deletions(-)
 create mode 100644 figstats/__init__.py

diff --git a/figstats/__init__.py b/figstats/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/figstats/stats.py b/figstats/stats.py
index 4c3bf5a..1badbed 100644
--- a/figstats/stats.py
+++ b/figstats/stats.py
@@ -4,6 +4,7 @@
 
 from .commons import issue_request
 
+
 class Figshare:
     """
     Purpose:
@@ -15,7 +16,7 @@ def __init__(self, token='', institution=False, institute=''):
 
         self.baseurl = 'https://stats.figshare.com'
         self.institution = institution
-        if not institute:
+        if institute:
             self.institute = institute
             self.baseurl_institute = join(self.baseurl, self.institute)
         self.token = token
@@ -29,11 +30,13 @@ def endpoint(self, link, institution=False):
         else:
             return join(self.baseurl, link)
 
-    def get_totals(self, item_id, item='article'):
+    def get_totals(self, item_id, item='article', institution=False):
         total_dict = {}
         for counter in ['views', 'downloads', 'shares']:
             # Using non-institution one since that seems to give correct stats
-            url = self.endpoint(join('total', counter, item, str(item_id)))
+            url = self.endpoint(join('total', counter, item, str(item_id)),
+                                institution=institution)
+            print(url)
             result = issue_request('GET', url, headers=self.headers)
             total_dict[counter] = result['totals']
         return total_dict

From 293b0c4b8e53b7ae0273fe4374cff0ff58799965 Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Wed, 2 Dec 2020 11:01:07 -0700
Subject: [PATCH 03/32] stats.Figshare: Add get_user_totals and get_timeline
 methods

Note: get_timeline currently works with figshare datasets (not institutional ones)
---
 figstats/stats.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/figstats/stats.py b/figstats/stats.py
index 1badbed..f6de284 100644
--- a/figstats/stats.py
+++ b/figstats/stats.py
@@ -36,7 +36,24 @@ def get_totals(self, item_id, item='article', institution=False):
             # Using non-institution one since that seems to give correct stats
             url = self.endpoint(join('total', counter, item, str(item_id)),
                                 institution=institution)
-            print(url)
             result = issue_request('GET', url, headers=self.headers)
             total_dict[counter] = result['totals']
         return total_dict
+
+    def get_user_totals(self, author_id):
+        # author_id is not the same as institution_user_id for institutional accounts
+        total_dict = self.get_totals(author_id, item='author',
+                                     institution=self.institution)
+        return total_dict
+
+    def get_timeline(self, item_id, item='article', granularity='day',
+                     institution=False):
+        total_dict = {}
+        for counter in ['views', 'downloads', 'shares']:
+            # Using non-institution one since that seems to give correct stats
+            urls = ['timeline', granularity, counter, item, str(item_id)]
+            url = self.endpoint(join(*urls), institution=institution)
+            print(url)
+            result = issue_request('GET', url, headers=self.headers)
+            total_dict[counter] = result['timeline']
+        return total_dict

From ef31020e8706390944011334e28afbe92c3c94e1 Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Thu, 3 Dec 2020 10:02:16 -0700
Subject: [PATCH 04/32] A number of changes

 - Adjust attributes to distinguish basic cred and API token
 - Add documentation for get_totals, get_user_totals
---
 figstats/stats.py | 61 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 43 insertions(+), 18 deletions(-)

diff --git a/figstats/stats.py b/figstats/stats.py
index f6de284..b759b2e 100644
--- a/figstats/stats.py
+++ b/figstats/stats.py
@@ -12,38 +12,64 @@ class Figshare:
 
     """
 
-    def __init__(self, token='', institution=False, institute=''):
+    def __init__(self, api_token='', basic_token='', institution=False, institute=''):
 
-        self.baseurl = 'https://stats.figshare.com'
+        # For stats API
+        self.stats_baseurl = 'https://stats.figshare.com'
         self.institution = institution
         if institute:
             self.institute = institute
-            self.baseurl_institute = join(self.baseurl, self.institute)
-        self.token = token
-        self.headers = {'Content-Type': 'application/json'}
-        if self.token:
-            self.headers['Authorization'] = f'token {self.token}'
+            self.stats_baseurl_institute = join(self.stats_baseurl, self.institute)
 
-    def endpoint(self, link, institution=False):
+        # Base64 token
+        self.basic_headers = {'Content-Type': 'application/json'}
+        self.basic_token = basic_token
+        if self.basic_token:
+            self.basic_headers['Authorization'] = f'Basic {self.basic_token}'
+
+        # API token
+        self.api_headers = {'Content-Type': 'application/json'}
+        self.api_token = api_token
+        if self.api_token:
+            self.api_headers['Authorization'] = f'token {self.api_token}'
+
+    def stats_endpoint(self, link, institution=False):
         if institution:
-            return join(self.baseurl_institute, link)
+            return join(self.stats_baseurl_institute, link)
         else:
-            return join(self.baseurl, link)
+            return join(self.stats_baseurl, link)
 
     def get_totals(self, item_id, item='article', institution=False):
+        """
+        Retrieve totals of views, downloads, and share for an "item"
+        Item can be 'article', 'author', 'collection', 'group' or 'project'
+        Note: This does not require authenticating credentials for institution accounts
+
+        See: https://docs.figshare.com/#stats_totals
+        """
+
+        if item not in ['article', 'author', 'collection', 'group', 'project']:
+            raise ValueError("Incorrect item type")
+
         total_dict = {}
         for counter in ['views', 'downloads', 'shares']:
             # Using non-institution one since that seems to give correct stats
-            url = self.endpoint(join('total', counter, item, str(item_id)),
-                                institution=institution)
-            result = issue_request('GET', url, headers=self.headers)
+            url = self.stats_endpoint(join('total', counter, item, str(item_id)),
+                                      institution=institution)
+            result = issue_request('GET', url, headers=self.basic_headers)
             total_dict[counter] = result['totals']
         return total_dict
 
     def get_user_totals(self, author_id):
-        # author_id is not the same as institution_user_id for institutional accounts
+        """
+        Retrieve an author's total using get_totals()
+
+        :param author_id: This is not the same as the institution_user_id for institutional accounts
+        :return: total_dict: dict containing total views, downloads, and shares
+        Note: This does not require authenticating credentials for institution accounts
+        """
         total_dict = self.get_totals(author_id, item='author',
-                                     institution=self.institution)
+                                     institution=False)
         return total_dict
 
     def get_timeline(self, item_id, item='article', granularity='day',
@@ -52,8 +78,7 @@ def get_timeline(self, item_id, item='article', granularity='day',
         for counter in ['views', 'downloads', 'shares']:
             # Using non-institution one since that seems to give correct stats
             urls = ['timeline', granularity, counter, item, str(item_id)]
-            url = self.endpoint(join(*urls), institution=institution)
-            print(url)
-            result = issue_request('GET', url, headers=self.headers)
+            url = self.stats_endpoint(join(*urls), institution=institution)
+            result = issue_request('GET', url, headers=self.basic_headers)
             total_dict[counter] = result['timeline']
         return total_dict

From 8e7b485ccdfe4cf94e1925fd64c88647a627579a Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Thu, 3 Dec 2020 10:35:14 -0700
Subject: [PATCH 05/32] stats.Figshare: Add get_figshare_id and
 retrieve_institution_users methods

 - Add Figshare API endpoint
---
 figstats/stats.py | 61 +++++++++++++++++++++++++++++++++++++++++++++--
 requirements.txt  |  1 +
 2 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/figstats/stats.py b/figstats/stats.py
index b759b2e..a76930a 100644
--- a/figstats/stats.py
+++ b/figstats/stats.py
@@ -1,6 +1,5 @@
 from os.path import join
-
-# from ldcoolp.curation.api.figshare import FigshareInstituteAdmin
+import pandas as pd
 
 from .commons import issue_request
 
@@ -27,6 +26,11 @@ def __init__(self, api_token='', basic_token='', institution=False, institute=''
         if self.basic_token:
             self.basic_headers['Authorization'] = f'Basic {self.basic_token}'
 
+        # For Figshare API
+        self.main_baseurl = 'https://api.figshare.com/v2/account/'
+        if self.institution:
+            self.main_baseurl_institute = join(self.main_baseurl, "institution")
+
         # API token
         self.api_headers = {'Content-Type': 'application/json'}
         self.api_token = api_token
@@ -82,3 +86,56 @@ def get_timeline(self, item_id, item='article', granularity='day',
             result = issue_request('GET', url, headers=self.basic_headers)
             total_dict[counter] = result['timeline']
         return total_dict
+
+    def get_figshare_id(self, accounts_df):
+        """
+        Retrieve Figshare account ID(s)
+        Note: This is not the institutional ID, but one associated with
+              the unique profile
+
+        :param accounts_df: pandas DataFrame containing institution ID
+        :return: accounts_df: The input DataFrame with an additional column
+        """
+
+        endpoint = join(self.main_baseurl_institute, "users")
+
+        author_id = []
+        for institute_id in accounts_df['id']:
+            url = f"{endpoint}/{institute_id}"
+            response = issue_request('GET', url, self.api_headers)
+            author_id.append(response['id'])
+        accounts_df['author_id'] = author_id
+        return accounts_df
+
+    def retrieve_institution_users(self, ignore_admin=False):
+        """
+        Retrieve accounts within institutional instance
+
+        This is based on LD-Cool-P get_account_list method of FigshareInstituteAdmin
+        It includes retrieving the default author_id
+
+        It uses:
+        https://docs.figshare.com/#private_institution_accounts_list
+        https://docs.figshare.com/#private_account_institution_user
+        """
+        url = join(self.main_baseurl_institute, "accounts")
+
+        # Figshare API is limited to a maximum of 1000 per page
+        params = {'page': 1, 'page_size': 1000}
+        accounts = issue_request('GET', url, self.api_headers, params=params)
+
+        accounts_df = pd.DataFrame(accounts)
+        accounts_df = accounts_df.drop(columns='institution_id')
+
+        if ignore_admin:
+            print("Excluding administrative and test accounts")
+
+            drop_index = list(accounts_df[accounts_df['email'] ==
+                                          'data-management@email.arizona.edu'].index)
+            drop_index += list(accounts_df[accounts_df['email'].str.contains('-test@email.arizona.edu')].index)
+
+            accounts_df = accounts_df.drop(drop_index).reset_index(drop=True)
+
+        accounts_df = self.get_figshare_id(accounts_df)
+
+        return accounts_df
diff --git a/requirements.txt b/requirements.txt
index 566083c..5427184 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,2 @@
 requests==2.22.0
+pandas==1.0.2

From 7eacb85edf5d703406312e73078b0eef2e1ae668 Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Thu, 3 Dec 2020 10:51:25 -0700
Subject: [PATCH 06/32] stats.Figshare: Add get_institution_totals

---
 figstats/stats.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/figstats/stats.py b/figstats/stats.py
index a76930a..bfeef17 100644
--- a/figstats/stats.py
+++ b/figstats/stats.py
@@ -139,3 +139,22 @@ def retrieve_institution_users(self, ignore_admin=False):
         accounts_df = self.get_figshare_id(accounts_df)
 
         return accounts_df
+
+    def get_institution_totals(self, df=None, by_method='author'):
+        """
+        Retrieve total views, downloads, and shares by either authors or articles
+        """
+
+        if isinstance(df, type(None)):
+            if by_method == 'author':
+                df = self.retrieve_institution_users(ignore_admin=False)
+            if by_method == 'article':
+                print("Need to retrieve articles")
+
+        total_dict = dict()
+        for author_id in df.loc[0:5, 'author_id']:
+            total_dict[str(author_id)] = self.get_user_totals(author_id)
+
+        # Construct pandas DataFrame
+        total_df = pd.DataFrame.from_dict(total_dict, orient='index')
+        return total_df

From 91747b50b732189ae5b9e87ea1e0d24089798b3c Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Thu, 3 Dec 2020 12:10:05 -0700
Subject: [PATCH 07/32] Loop over all authors, include author name

---
 figstats/stats.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/figstats/stats.py b/figstats/stats.py
index bfeef17..84d6aed 100644
--- a/figstats/stats.py
+++ b/figstats/stats.py
@@ -152,8 +152,13 @@ def get_institution_totals(self, df=None, by_method='author'):
                 print("Need to retrieve articles")
 
         total_dict = dict()
-        for author_id in df.loc[0:5, 'author_id']:
-            total_dict[str(author_id)] = self.get_user_totals(author_id)
+        for i in df.index:
+            print(f"{i+1} of {len(df.index)}")
+            record = df.loc[i]
+            first_name = record['first_name']
+            last_name = record['last_name']
+            author_id = record['author_id']
+            total_dict[f"{first_name} {last_name} ({author_id})"] = self.get_user_totals(author_id)
 
         # Construct pandas DataFrame
         total_df = pd.DataFrame.from_dict(total_dict, orient='index')

From bb680da0b05fa1828557ff94e636a7fd1bd07238 Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Thu, 3 Dec 2020 13:20:34 -0700
Subject: [PATCH 08/32] stats.Figshare: Add retrieve_institution_articles
 method

 - Adjust get_institution_total to handle by_method='articles'
---
 figstats/stats.py | 32 ++++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/figstats/stats.py b/figstats/stats.py
index 84d6aed..c320382 100644
--- a/figstats/stats.py
+++ b/figstats/stats.py
@@ -140,6 +140,22 @@ def retrieve_institution_users(self, ignore_admin=False):
 
         return accounts_df
 
+    def retrieve_institution_articles(self):
+
+        url = join(self.main_baseurl_institute, "articles")
+
+        # Figshare API is limited to a maximum of 1000 per page
+        params = {'page': 1,
+                  'page_size': 1000}
+        articles = issue_request('GET', url, self.api_headers, params=params)
+
+        articles_df = pd.DataFrame(articles)
+
+        # Only consider published dataset
+        articles_df = articles_df.loc[articles_df['published_date'].notnull()]
+        articles_df = articles_df.reset_index()
+        return articles_df
+
     def get_institution_totals(self, df=None, by_method='author'):
         """
         Retrieve total views, downloads, and shares by either authors or articles
@@ -149,17 +165,21 @@ def get_institution_totals(self, df=None, by_method='author'):
             if by_method == 'author':
                 df = self.retrieve_institution_users(ignore_admin=False)
             if by_method == 'article':
-                print("Need to retrieve articles")
+                df = self.retrieve_institution_articles()
 
         total_dict = dict()
         for i in df.index:
             print(f"{i+1} of {len(df.index)}")
             record = df.loc[i]
-            first_name = record['first_name']
-            last_name = record['last_name']
-            author_id = record['author_id']
-            total_dict[f"{first_name} {last_name} ({author_id})"] = self.get_user_totals(author_id)
-
+            if by_method == 'author':
+                first_name = record['first_name']
+                last_name = record['last_name']
+                author_id = record['author_id']
+                total_dict[f"{first_name} {last_name} ({author_id})"] = self.get_user_totals(author_id)
+            if by_method == 'article':
+                total_dict[f"{record['id']}"] = self.get_totals(record['id'],
+                                                                item='article',
+                                                                institution=False)
         # Construct pandas DataFrame
         total_df = pd.DataFrame.from_dict(total_dict, orient='index')
         return total_df

From 6560567a4e116589d29c02b669a25c0df1fe6781 Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Fri, 4 Dec 2020 09:33:03 -0700
Subject: [PATCH 09/32] stats.Figshare: Sort timeline by date in get_timeline
 method

---
 figstats/stats.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/figstats/stats.py b/figstats/stats.py
index c320382..8d17cdc 100644
--- a/figstats/stats.py
+++ b/figstats/stats.py
@@ -78,14 +78,18 @@ def get_user_totals(self, author_id):
 
     def get_timeline(self, item_id, item='article', granularity='day',
                      institution=False):
-        total_dict = {}
+        timeline_dict = {}
         for counter in ['views', 'downloads', 'shares']:
             # Using non-institution one since that seems to give correct stats
             urls = ['timeline', granularity, counter, item, str(item_id)]
             url = self.stats_endpoint(join(*urls), institution=institution)
             result = issue_request('GET', url, headers=self.basic_headers)
-            total_dict[counter] = result['timeline']
-        return total_dict
+            # Sort contents by date
+            result_sort = {}
+            for key in sorted(result['timeline']):
+                result_sort[key] = result['timeline'][key]
+            timeline_dict[counter] = result_sort
+        return timeline_dict
 
     def get_figshare_id(self, accounts_df):
         """

From a8dd2c153e5c3705b641fd3ad7f436d6dc421b89 Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Fri, 4 Dec 2020 09:50:38 -0700
Subject: [PATCH 10/32] stats.Figshare: Add cumulative numbers for views,
 downloads, shares in get_timeline method

---
 figstats/stats.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/figstats/stats.py b/figstats/stats.py
index 8d17cdc..45abf10 100644
--- a/figstats/stats.py
+++ b/figstats/stats.py
@@ -86,9 +86,14 @@ def get_timeline(self, item_id, item='article', granularity='day',
             result = issue_request('GET', url, headers=self.basic_headers)
             # Sort contents by date
             result_sort = {}
+            cum_dict = {}
+            count = 0
             for key in sorted(result['timeline']):
                 result_sort[key] = result['timeline'][key]
+                count += result['timeline'][key]
+                cum_dict[key] = count
             timeline_dict[counter] = result_sort
+            timeline_dict[f"{counter}-cum"] = cum_dict
         return timeline_dict
 
     def get_figshare_id(self, accounts_df):

From e4fe93e7034dd96999bea1de5271f55faea71567 Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Fri, 4 Dec 2020 10:01:42 -0700
Subject: [PATCH 11/32] stats.Figshare: Ensure timeline is the same for all
 records

---
 figstats/stats.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/figstats/stats.py b/figstats/stats.py
index 45abf10..27c86d0 100644
--- a/figstats/stats.py
+++ b/figstats/stats.py
@@ -88,9 +88,15 @@ def get_timeline(self, item_id, item='article', granularity='day',
             result_sort = {}
             cum_dict = {}
             count = 0
-            for key in sorted(result['timeline']):
-                result_sort[key] = result['timeline'][key]
-                count += result['timeline'][key]
+            # Use views record for timeline (most populated generally)
+            if counter == 'views':
+                save_date = sorted(result['timeline'])
+            for key in save_date:
+                try:
+                    result_sort[key] = result['timeline'][key]
+                    count += result['timeline'][key]
+                except KeyError:
+                    pass
                 cum_dict[key] = count
             timeline_dict[counter] = result_sort
             timeline_dict[f"{counter}-cum"] = cum_dict

From f01e556b626fa5b581719eb7dfacf722f3ac63b2 Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Tue, 8 Dec 2020 09:55:35 -0700
Subject: [PATCH 12/32] Minor fix to zero daily numbers if not in timeline

---
 figstats/stats.py | 2 +-
 requirements.txt  | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/figstats/stats.py b/figstats/stats.py
index 27c86d0..220d6e9 100644
--- a/figstats/stats.py
+++ b/figstats/stats.py
@@ -96,7 +96,7 @@ def get_timeline(self, item_id, item='article', granularity='day',
                     result_sort[key] = result['timeline'][key]
                     count += result['timeline'][key]
                 except KeyError:
-                    pass
+                    result_sort[key] = 0
                 cum_dict[key] = count
             timeline_dict[counter] = result_sort
             timeline_dict[f"{counter}-cum"] = cum_dict
diff --git a/requirements.txt b/requirements.txt
index 5427184..eb73778 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,3 @@
 requests==2.22.0
 pandas==1.0.2
+matplotlib

From a77df39d7dee1ef8881f8adeed5cbf7a7e6d8b91 Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Tue, 8 Dec 2020 10:18:19 -0700
Subject: [PATCH 13/32] stats: Add retrieve_article_details method

---
 figstats/stats.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/figstats/stats.py b/figstats/stats.py
index 220d6e9..56523e1 100644
--- a/figstats/stats.py
+++ b/figstats/stats.py
@@ -171,6 +171,13 @@ def retrieve_institution_articles(self):
         articles_df = articles_df.reset_index()
         return articles_df
 
+    def retrieve_article_details(self, article_id):
+        """Retrieve article details"""
+        url = join('https://api.figshare.com/v2/', f"articles/{article_id}")
+
+        article_dict = issue_request('GET', url, self.basic_headers)
+        return article_dict
+
     def get_institution_totals(self, df=None, by_method='author'):
         """
         Retrieve total views, downloads, and shares by either authors or articles

From 5cf4adb654dd50951cc830ab2d383a65f0aa4388 Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Tue, 8 Dec 2020 10:23:35 -0700
Subject: [PATCH 14/32] Add visualization module

---
 figstats/visualization.py | 71 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 figstats/visualization.py

diff --git a/figstats/visualization.py b/figstats/visualization.py
new file mode 100644
index 0000000..9ea1d0b
--- /dev/null
+++ b/figstats/visualization.py
@@ -0,0 +1,71 @@
+from datetime import datetime as dt
+import matplotlib.pyplot as plt
+import matplotlib.dates as m_dates
+
+
+def matplotlib_date_format(date_list):
+    """Generate list of datetime objects"""
+    datetime_list = [dt.strptime(date, '%Y-%m-%d') for date in date_list]
+
+    return datetime_list
+
+
+def plot_timeline(timeline_dict, article_dict):
+    """
+    Purpose:
+      Plot timeline showing views and downloads
+
+    :param timeline_dict: dict containing daily and cumulative numbers.
+           From stats.Figshare.get_timeline
+    :param article_dict: dictionary of article details.
+           From stats.Figshare.retrieve_article_details
+    """
+    datetime_list = matplotlib_date_format(list(timeline_dict['views'].keys()))
+    fig, [ax0, ax1] = plt.subplots(ncols=2, nrows=2,
+                                   gridspec_kw={'height_ratios': [3, 1]})
+
+    counters = ['views', 'downloads']
+
+    for ii, counter in zip(range(len(counters)), counters):
+
+        # Bottom panels
+        y_bottom = timeline_dict[counter].values()
+        ax1[ii].bar(datetime_list, y_bottom)
+        locator = m_dates.AutoDateLocator(minticks=3, maxticks=7)
+        formatter = m_dates.ConciseDateFormatter(locator)
+        ax1[ii].xaxis.set_major_locator(locator)
+        ax1[ii].xaxis.set_major_formatter(formatter)
+        ax1[ii].set_ylabel(f"Daily {counter}")
+        ax1[ii].tick_params(axis='y', direction='in')
+        ax1[ii].tick_params(axis='x', direction='out')
+        ax1[ii].annotate(f'Maximum daily {counter}: {max(y_bottom)}',
+                         (0.025, 0.95), xycoords='axes fraction',
+                         va='top', ha='left')
+
+        # Top panels
+        y_top = timeline_dict[counter+'-cum'].values()
+        ax0[ii].bar(datetime_list, y_top)
+        ax0[ii].xaxis.set_major_locator(locator)
+        ax0[ii].xaxis.set_major_formatter(formatter)
+        ax0[ii].set_xticklabels('')
+        ax0[ii].set_ylabel(f"Cumulative {counter}")
+        ax0[ii].tick_params(axis='both', direction='in')
+        ax0[ii].annotate(f'Total {counter}: {max(y_top)}', (0.025, 0.975),
+                         xycoords='axes fraction', va='top', ha='left')
+        # ax0[ii].set_xlabel('Date')
+
+    # Heading containing title, author, license, DOI
+    heading = f"Title: {article_dict['title']}\n"
+    author_list = [auth_dict['full_name'] for auth_dict in article_dict['authors']]
+    if len(author_list) > 3:
+        heading += f"Authors: {author_list[0]} et al.\n"
+    else:
+        heading += f"Authors: {' '.join(author_list)}\n"
+    heading += f"License: {article_dict['license']['name']}  "
+    heading += f"DOI: https://doi.org/{article_dict['doi']}"
+    ax0[0].text(0.01, 1.15, heading, ha='left', va='top',
+                transform=ax0[0].transAxes)
+
+    fig.set_size_inches(8, 6)
+    plt.subplots_adjust(left=0.09, bottom=0.1, top=0.90, right=0.985,
+                        hspace=0.025)

From 355c88f676c2600e73d76c398c9b590758548dc1 Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Tue, 8 Dec 2020 10:39:33 -0700
Subject: [PATCH 15/32] Add option to save PDF file or return matplotlib fig
 instance

---
 figstats/visualization.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/figstats/visualization.py b/figstats/visualization.py
index 9ea1d0b..6c3ffbe 100644
--- a/figstats/visualization.py
+++ b/figstats/visualization.py
@@ -10,7 +10,7 @@ def matplotlib_date_format(date_list):
     return datetime_list
 
 
-def plot_timeline(timeline_dict, article_dict):
+def plot_timeline(timeline_dict, article_dict, out_pdf=None, save=False):
     """
     Purpose:
       Plot timeline showing views and downloads
@@ -19,6 +19,10 @@ def plot_timeline(timeline_dict, article_dict):
            From stats.Figshare.get_timeline
     :param article_dict: dictionary of article details.
            From stats.Figshare.retrieve_article_details
+    :param out_pdf: Output filename. Default: timeline_<article_id>.pdf
+    :param save: bool to save PDF file. Otherwise return matplotlib fig object
+
+    :return fig: If save == False, fig is returned
     """
     datetime_list = matplotlib_date_format(list(timeline_dict['views'].keys()))
     fig, [ax0, ax1] = plt.subplots(ncols=2, nrows=2,
@@ -69,3 +73,10 @@ def plot_timeline(timeline_dict, article_dict):
     fig.set_size_inches(8, 6)
     plt.subplots_adjust(left=0.09, bottom=0.1, top=0.90, right=0.985,
                         hspace=0.025)
+
+    if save:
+        if isinstance(out_pdf, type(None)):
+            out_pdf = f"timeline_{article_dict['id']}.pdf"
+        fig.savefig(out_pdf)
+    else:
+        return fig

From 4f1783edc9316f6d4f28d6d390e78209fad4efae Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Tue, 8 Dec 2020 10:56:58 -0700
Subject: [PATCH 16/32] visualization: Add plot_shares method

---
 figstats/visualization.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/figstats/visualization.py b/figstats/visualization.py
index 6c3ffbe..1249747 100644
--- a/figstats/visualization.py
+++ b/figstats/visualization.py
@@ -10,6 +10,17 @@ def matplotlib_date_format(date_list):
     return datetime_list
 
 
+def plot_shares(ax, timeline_dict):
+    shares_dict = timeline_dict['shares']
+    non_zero = [key for key in shares_dict.keys() if shares_dict[key] > 0]
+
+    if len(non_zero) > 0:
+        dates = matplotlib_date_format(non_zero)
+        for date, key in zip(dates, non_zero):
+            ax.axvline(x=date, color='red')
+            ax.text(date, 10, f"{shares_dict[key]}", color='red')
+
+
 def plot_timeline(timeline_dict, article_dict, out_pdf=None, save=False):
     """
     Purpose:
@@ -58,6 +69,8 @@ def plot_timeline(timeline_dict, article_dict, out_pdf=None, save=False):
                          xycoords='axes fraction', va='top', ha='left')
         # ax0[ii].set_xlabel('Date')
 
+        plot_shares(ax0[ii], timeline_dict)
+
     # Heading containing title, author, license, DOI
     heading = f"Title: {article_dict['title']}\n"
     author_list = [auth_dict['full_name'] for auth_dict in article_dict['authors']]

From 229618e599a77137d58a3bc4ced1203ad698490a Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Tue, 8 Dec 2020 10:57:16 -0700
Subject: [PATCH 17/32] Add numpy to requirements

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index eb73778..ff48e81 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 requests==2.22.0
 pandas==1.0.2
 matplotlib
+numpy

From 1cd267ed999a01cf41e39b4a4a4496aa1ea16251 Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Tue, 8 Dec 2020 13:27:28 -0700
Subject: [PATCH 18/32] Exclude testing scripts

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index b6e4761..2db35f0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -127,3 +127,5 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+testing.py

From 9347032f0b1ea8237d8c0e5899ed4156f39831f2 Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Wed, 9 Dec 2020 13:50:28 -0700
Subject: [PATCH 19/32] Refactor to use counter_list

---
 figstats/stats.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/figstats/stats.py b/figstats/stats.py
index 56523e1..b1f9d69 100644
--- a/figstats/stats.py
+++ b/figstats/stats.py
@@ -3,6 +3,8 @@
 
 from .commons import issue_request
 
+counter_list = ['views', 'downloads', 'shares']
+
 
 class Figshare:
     """
@@ -56,7 +58,7 @@ def get_totals(self, item_id, item='article', institution=False):
             raise ValueError("Incorrect item type")
 
         total_dict = {}
-        for counter in ['views', 'downloads', 'shares']:
+        for counter in counter_list:
             # Using non-institution one since that seems to give correct stats
             url = self.stats_endpoint(join('total', counter, item, str(item_id)),
                                       institution=institution)
@@ -79,7 +81,7 @@ def get_user_totals(self, author_id):
     def get_timeline(self, item_id, item='article', granularity='day',
                      institution=False):
         timeline_dict = {}
-        for counter in ['views', 'downloads', 'shares']:
+        for counter in counter_list:
             # Using non-institution one since that seems to give correct stats
             urls = ['timeline', granularity, counter, item, str(item_id)]
             url = self.stats_endpoint(join(*urls), institution=institution)

From 897c21dd2008e5b40e7abdee0bf2ba7fbfde155e Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Wed, 9 Dec 2020 14:00:34 -0700
Subject: [PATCH 20/32] Change cumulative panels to from bar to line

 - Add text for total number of shares
 - Set axes lower value
 - Label daily shares at bottom
---
 figstats/visualization.py | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/figstats/visualization.py b/figstats/visualization.py
index 1249747..7c92e46 100644
--- a/figstats/visualization.py
+++ b/figstats/visualization.py
@@ -18,7 +18,8 @@ def plot_shares(ax, timeline_dict):
         dates = matplotlib_date_format(non_zero)
         for date, key in zip(dates, non_zero):
             ax.axvline(x=date, color='red')
-            ax.text(date, 10, f"{shares_dict[key]}", color='red')
+            ax.text(date, 1, f"{shares_dict[key]}", color='red',
+                    ha='right', va='bottom')
 
 
 def plot_timeline(timeline_dict, article_dict, out_pdf=None, save=False):
@@ -56,10 +57,12 @@ def plot_timeline(timeline_dict, article_dict, out_pdf=None, save=False):
         ax1[ii].annotate(f'Maximum daily {counter}: {max(y_bottom)}',
                          (0.025, 0.95), xycoords='axes fraction',
                          va='top', ha='left')
+        ax1[ii].set_ylim(bottom=0)
 
         # Top panels
         y_top = timeline_dict[counter+'-cum'].values()
-        ax0[ii].bar(datetime_list, y_top)
+        ax0[ii].plot(datetime_list, y_top, linestyle='-', linewidth=2.0,
+                     marker='')
         ax0[ii].xaxis.set_major_locator(locator)
         ax0[ii].xaxis.set_major_formatter(formatter)
         ax0[ii].set_xticklabels('')
@@ -68,21 +71,26 @@ def plot_timeline(timeline_dict, article_dict, out_pdf=None, save=False):
         ax0[ii].annotate(f'Total {counter}: {max(y_top)}', (0.025, 0.975),
                          xycoords='axes fraction', va='top', ha='left')
         # ax0[ii].set_xlabel('Date')
+        ax0[ii].set_ylim(bottom=0)
 
         plot_shares(ax0[ii], timeline_dict)
 
     # Heading containing title, author, license, DOI
-    heading = f"Title: {article_dict['title']}\n"
+    left_heading = f"Title: {article_dict['title']}\n"
     author_list = [auth_dict['full_name'] for auth_dict in article_dict['authors']]
     if len(author_list) > 3:
-        heading += f"Authors: {author_list[0]} et al.\n"
+        left_heading += f"Authors: {author_list[0]} et al.\n"
     else:
-        heading += f"Authors: {' '.join(author_list)}\n"
-    heading += f"License: {article_dict['license']['name']}  "
-    heading += f"DOI: https://doi.org/{article_dict['doi']}"
-    ax0[0].text(0.01, 1.15, heading, ha='left', va='top',
+        left_heading += f"Authors: {' '.join(author_list)}\n"
+    left_heading += f"License: {article_dict['license']['name']}  "
+    left_heading += f"DOI: https://doi.org/{article_dict['doi']}"
+    ax0[0].text(0.01, 1.15, left_heading, ha='left', va='top',
                 transform=ax0[0].transAxes)
 
+    right_heading = f"Shares: {max(timeline_dict['shares-cum'].values())}"
+    ax0[1].text(1.0, 1.15, right_heading, ha='right', va='top',
+                transform=ax0[1].transAxes)
+
     fig.set_size_inches(8, 6)
     plt.subplots_adjust(left=0.09, bottom=0.1, top=0.90, right=0.985,
                         hspace=0.025)

From 9ef520309561466a6938e67c70079a5356651fe9 Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Wed, 9 Dec 2020 14:23:32 -0700
Subject: [PATCH 21/32] Baseline script for multi-timeline retrieval

---
 scripts/make_timeline_plots | 40 +++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100755 scripts/make_timeline_plots

diff --git a/scripts/make_timeline_plots b/scripts/make_timeline_plots
new file mode 100755
index 0000000..a96edc0
--- /dev/null
+++ b/scripts/make_timeline_plots
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+
+import argparse
+
+from figstats import stats, visualization
+from matplotlib.backends.backend_pdf import PdfPages
+
+
+if __name__ in '__main__':
+    parser = argparse.ArgumentParser(description='Command-line driver for figstats timeline plots.')
+    parser.add_argument('--api_token', required=True, help='Figshare API token')
+    parser.add_argument('--basic_token', required=True, help='Figshare base64 API stats token')
+    parser.add_argument('--institute', required=True, help='Name of institution')
+    args = parser.parse_args()
+
+    fs = stats.Figshare(api_token=args.api_token,
+                        basic_token=args.basic_token,
+                        institution=True,
+                        institute=args.institute)
+
+    articles_df = fs.retrieve_institution_articles()
+
+    out_pdf = f"{args.institute}_timeline_plots.pdf"
+    pp = PdfPages(out_pdf)
+
+    for article_id in articles_df['id']:
+        print(f"Working on : {article_id}")
+        article_dict = fs.retrieve_article_details(article_id)
+        try:
+            timeline_dict = fs.get_timeline(article_id, item='article', institution=True)
+
+            fig = visualization.plot_timeline(timeline_dict, article_dict, save=False)
+
+            fig.savefig(pp, format='pdf', bbox_inches='tight')
+            fig.clear()
+        except TypeError:
+            print("TypeError")
+
+    print(f"Writing : {out_pdf}")
+    pp.close()

From 1943cfab6ddac62182cd57a0aebd20cff8821f02 Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Thu, 10 Dec 2020 13:31:56 -0700
Subject: [PATCH 22/32] Fix case when timeline is not available (e.g., shares)

---
 figstats/stats.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/figstats/stats.py b/figstats/stats.py
index b1f9d69..e7862a0 100644
--- a/figstats/stats.py
+++ b/figstats/stats.py
@@ -94,11 +94,15 @@ def get_timeline(self, item_id, item='article', granularity='day',
             if counter == 'views':
                 save_date = sorted(result['timeline'])
             for key in save_date:
-                try:
-                    result_sort[key] = result['timeline'][key]
-                    count += result['timeline'][key]
-                except KeyError:
+                if isinstance(result['timeline'], type(None)):
+                    # Handle when counter is not available (NoneType)
                     result_sort[key] = 0
+                else:
+                    try:
+                        result_sort[key] = result['timeline'][key]
+                        count += result['timeline'][key]
+                    except KeyError:
+                        result_sort[key] = 0
                 cum_dict[key] = count
             timeline_dict[counter] = result_sort
             timeline_dict[f"{counter}-cum"] = cum_dict

From 968b6b434b5e4aa0447e9cea0ebd181f220cd0c1 Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Thu, 10 Dec 2020 13:54:17 -0700
Subject: [PATCH 23/32] Use textwrap to handle long title

---
 figstats/visualization.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/figstats/visualization.py b/figstats/visualization.py
index 7c92e46..5556a7b 100644
--- a/figstats/visualization.py
+++ b/figstats/visualization.py
@@ -2,6 +2,10 @@
 import matplotlib.pyplot as plt
 import matplotlib.dates as m_dates
 
+from textwrap import wrap
+
+title_width = 80
+
 
 def matplotlib_date_format(date_list):
     """Generate list of datetime objects"""
@@ -76,7 +80,13 @@ def plot_timeline(timeline_dict, article_dict, out_pdf=None, save=False):
         plot_shares(ax0[ii], timeline_dict)
 
     # Heading containing title, author, license, DOI
-    left_heading = f"Title: {article_dict['title']}\n"
+
+    title_chunks = wrap(article_dict['title'], title_width)
+    for cc in range(len(title_chunks)):
+        if cc == 0:
+            left_heading = f"Title: {title_chunks[cc]}\n"
+        else:
+            left_heading += f"         {title_chunks[cc]}\n"
     author_list = [auth_dict['full_name'] for auth_dict in article_dict['authors']]
     if len(author_list) > 3:
         left_heading += f"Authors: {author_list[0]} et al.\n"
@@ -84,15 +94,15 @@ def plot_timeline(timeline_dict, article_dict, out_pdf=None, save=False):
         left_heading += f"Authors: {' '.join(author_list)}\n"
     left_heading += f"License: {article_dict['license']['name']}  "
     left_heading += f"DOI: https://doi.org/{article_dict['doi']}"
-    ax0[0].text(0.01, 1.15, left_heading, ha='left', va='top',
+    ax0[0].text(0.01, 1.25, left_heading, ha='left', va='top',
                 transform=ax0[0].transAxes)
 
     right_heading = f"Shares: {max(timeline_dict['shares-cum'].values())}"
-    ax0[1].text(1.0, 1.15, right_heading, ha='right', va='top',
+    ax0[1].text(1.0, 1.25, right_heading, ha='right', va='top',
                 transform=ax0[1].transAxes)
 
     fig.set_size_inches(8, 6)
-    plt.subplots_adjust(left=0.09, bottom=0.1, top=0.90, right=0.985,
+    plt.subplots_adjust(left=0.09, bottom=0.08, top=0.85, right=0.985,
                         hspace=0.025)
 
     if save:

From 64a7a1d92bddafe5a333a0d93eda1374f0e8332c Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Thu, 10 Dec 2020 14:05:03 -0700
Subject: [PATCH 24/32] Reduce title_width to give room for shares in upper
 right

---
 figstats/visualization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/figstats/visualization.py b/figstats/visualization.py
index 5556a7b..29dce89 100644
--- a/figstats/visualization.py
+++ b/figstats/visualization.py
@@ -4,7 +4,7 @@
 
 from textwrap import wrap
 
-title_width = 80
+title_width = 75
 
 
 def matplotlib_date_format(date_list):

From 2e8590a39b1c0a67ba5b23728c38352391351d60 Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Wed, 3 Mar 2021 16:05:53 -0700
Subject: [PATCH 25/32] Add version info

---
 figstats/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/figstats/__init__.py b/figstats/__init__.py
index e69de29..b8023d8 100644
--- a/figstats/__init__.py
+++ b/figstats/__init__.py
@@ -0,0 +1 @@
+__version__ = '0.0.1'

From 0442e8ba063f959357264ca9f01c835bda0d3dc1 Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Wed, 3 Mar 2021 16:30:48 -0700
Subject: [PATCH 26/32] Add HTTPException handling

 - important for private/restricted data
---
 scripts/make_timeline_plots | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/scripts/make_timeline_plots b/scripts/make_timeline_plots
index a96edc0..985d83f 100755
--- a/scripts/make_timeline_plots
+++ b/scripts/make_timeline_plots
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 
 import argparse
+from requests.exceptions import HTTPError
 
 from figstats import stats, visualization
 from matplotlib.backends.backend_pdf import PdfPages
@@ -25,16 +26,18 @@ if __name__ in '__main__':
 
     for article_id in articles_df['id']:
         print(f"Working on : {article_id}")
-        article_dict = fs.retrieve_article_details(article_id)
         try:
-            timeline_dict = fs.get_timeline(article_id, item='article', institution=True)
-
-            fig = visualization.plot_timeline(timeline_dict, article_dict, save=False)
-
-            fig.savefig(pp, format='pdf', bbox_inches='tight')
-            fig.clear()
-        except TypeError:
-            print("TypeError")
+            article_dict = fs.retrieve_article_details(article_id)
+            try:
+                timeline_dict = fs.get_timeline(article_id, item='article', institution=True)
+
+                fig = visualization.plot_timeline(timeline_dict, article_dict, save=False)
+                fig.savefig(pp, format='pdf', bbox_inches='tight')
+                fig.clear()
+            except TypeError:
+                print("TypeError")
+        except HTTPError:
+            pass
 
     print(f"Writing : {out_pdf}")
     pp.close()

From 326aff230f8d353d662e368160251c486b7c3090 Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Tue, 9 Mar 2021 15:19:21 -0700
Subject: [PATCH 27/32] make_timeline_plots: Adjust for PEP8 width

 - Provide example for institution name
---
 scripts/make_timeline_plots | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/scripts/make_timeline_plots b/scripts/make_timeline_plots
index 985d83f..6af9907 100755
--- a/scripts/make_timeline_plots
+++ b/scripts/make_timeline_plots
@@ -8,10 +8,14 @@ from matplotlib.backends.backend_pdf import PdfPages
 
 
 if __name__ in '__main__':
-    parser = argparse.ArgumentParser(description='Command-line driver for figstats timeline plots.')
-    parser.add_argument('--api_token', required=True, help='Figshare API token')
-    parser.add_argument('--basic_token', required=True, help='Figshare base64 API stats token')
-    parser.add_argument('--institute', required=True, help='Name of institution')
+    description = 'Command-line driver for figstats timeline plots.'
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument('--api_token', required=True,
+                        help='Figshare API token')
+    parser.add_argument('--basic_token', required=True,
+                        help='Figshare base64 API stats token')
+    parser.add_argument('--institute', required=True,
+                        help='Name of institution (e.g., "arizona")')
     args = parser.parse_args()
 
     fs = stats.Figshare(api_token=args.api_token,
@@ -29,9 +33,11 @@ if __name__ in '__main__':
         try:
             article_dict = fs.retrieve_article_details(article_id)
             try:
-                timeline_dict = fs.get_timeline(article_id, item='article', institution=True)
+                timeline_dict = fs.get_timeline(article_id, item='article',
+                                                institution=True)
 
-                fig = visualization.plot_timeline(timeline_dict, article_dict, save=False)
+                fig = visualization.plot_timeline(timeline_dict, article_dict,
+                                                  save=False)
                 fig.savefig(pp, format='pdf', bbox_inches='tight')
                 fig.clear()
             except TypeError:

From cfcf0957f8e6f7f5e25c5c2df79411126991ff6d Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Tue, 9 Mar 2021 15:45:05 -0700
Subject: [PATCH 28/32] visualization: type hinting

 - Add docstrings for plot_shares
 - Revise docstrings for plot_timeline
 - Change out_pdf to string (default empty)
---
 figstats/visualization.py | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/figstats/visualization.py b/figstats/visualization.py
index 29dce89..e751f4a 100644
--- a/figstats/visualization.py
+++ b/figstats/visualization.py
@@ -1,20 +1,24 @@
+from typing import Union
 from datetime import datetime as dt
+
 import matplotlib.pyplot as plt
 import matplotlib.dates as m_dates
+from matplotlib import figure, axes
 
 from textwrap import wrap
 
 title_width = 75
 
 
-def matplotlib_date_format(date_list):
+def matplotlib_date_format(date_list: list) -> list:
     """Generate list of datetime objects"""
     datetime_list = [dt.strptime(date, '%Y-%m-%d') for date in date_list]
 
     return datetime_list
 
 
-def plot_shares(ax, timeline_dict):
+def plot_shares(ax: axes.Axes, timeline_dict: dict):
+    """Plot shares data"""
     shares_dict = timeline_dict['shares']
     non_zero = [key for key in shares_dict.keys() if shares_dict[key] > 0]
 
@@ -26,17 +30,18 @@ def plot_shares(ax, timeline_dict):
                     ha='right', va='bottom')
 
 
-def plot_timeline(timeline_dict, article_dict, out_pdf=None, save=False):
+def plot_timeline(timeline_dict: dict, article_dict: dict,
+                  out_pdf: str = '', save: bool = False) \
+        -> Union[None, figure.Figure]:
     """
-    Purpose:
-      Plot timeline showing views and downloads
+    Plot timeline showing views and downloads
 
-    :param timeline_dict: dict containing daily and cumulative numbers.
-           From stats.Figshare.get_timeline
-    :param article_dict: dictionary of article details.
-           From stats.Figshare.retrieve_article_details
-    :param out_pdf: Output filename. Default: timeline_<article_id>.pdf
-    :param save: bool to save PDF file. Otherwise return matplotlib fig object
+    :param timeline_dict: Contains daily and cumulative numbers.
+           From ``stats.Figshare.get_timeline``
+    :param article_dict: Contains articles details.
+           From ``stats.Figshare.retrieve_article_details``
+    :param out_pdf: Output filename. Default: `timeline_<article_id>.pdf`
+    :param save: Flag to save PDF file. Otherwise returns ``matplotlib`` fig object
 
     :return fig: If save == False, fig is returned
     """
@@ -106,7 +111,7 @@ def plot_timeline(timeline_dict, article_dict, out_pdf=None, save=False):
                         hspace=0.025)
 
     if save:
-        if isinstance(out_pdf, type(None)):
+        if not out_pdf:
             out_pdf = f"timeline_{article_dict['id']}.pdf"
         fig.savefig(out_pdf)
     else:

From 218a596d921d1a41d1b9acf5b34216281d4a49b0 Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Tue, 9 Mar 2021 15:45:39 -0700
Subject: [PATCH 29/32] make_timeline_plots: Temporarily handle
 UnicodeEncodeError

---
 scripts/make_timeline_plots | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/scripts/make_timeline_plots b/scripts/make_timeline_plots
index 6af9907..b90c848 100755
--- a/scripts/make_timeline_plots
+++ b/scripts/make_timeline_plots
@@ -24,7 +24,6 @@ if __name__ in '__main__':
                         institute=args.institute)
 
     articles_df = fs.retrieve_institution_articles()
-
     out_pdf = f"{args.institute}_timeline_plots.pdf"
     pp = PdfPages(out_pdf)
 
@@ -42,7 +41,7 @@ if __name__ in '__main__':
                 fig.clear()
             except TypeError:
                 print("TypeError")
-        except HTTPError:
+        except (HTTPError, UnicodeEncodeError):
             pass
 
     print(f"Writing : {out_pdf}")

From 973048b862d7015b81064330d0458eac3b6bfe9f Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Tue, 9 Mar 2021 15:47:48 -0700
Subject: [PATCH 30/32] Add PyCharm files to .gitignore

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 2db35f0..92a1198 100644
--- a/.gitignore
+++ b/.gitignore
@@ -129,3 +129,6 @@ dmypy.json
 .pyre/
 
 testing.py
+
+# PyCharm
+.idea/

From b06ce5db98b57494e01c0c1ecc211c8572a0eb1f Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Tue, 27 Jul 2021 08:57:46 -0700
Subject: [PATCH 31/32] Adjust argparse inputs with hyphen format

---
 scripts/make_timeline_plots | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/scripts/make_timeline_plots b/scripts/make_timeline_plots
index b90c848..9ee3e4d 100755
--- a/scripts/make_timeline_plots
+++ b/scripts/make_timeline_plots
@@ -10,11 +10,11 @@ from matplotlib.backends.backend_pdf import PdfPages
 if __name__ in '__main__':
     description = 'Command-line driver for figstats timeline plots.'
     parser = argparse.ArgumentParser(description=description)
-    parser.add_argument('--api_token', required=True,
+    parser.add_argument('-a', '--api-token', required=True,
                         help='Figshare API token')
-    parser.add_argument('--basic_token', required=True,
+    parser.add_argument('-b', '--basic-token', required=True,
                         help='Figshare base64 API stats token')
-    parser.add_argument('--institute', required=True,
+    parser.add_argument('-i', '--institute', required=True,
                         help='Name of institution (e.g., "arizona")')
     args = parser.parse_args()
 
@@ -37,11 +37,13 @@ if __name__ in '__main__':
 
                 fig = visualization.plot_timeline(timeline_dict, article_dict,
                                                   save=False)
+                print(type(fig))
                 fig.savefig(pp, format='pdf', bbox_inches='tight')
                 fig.clear()
             except TypeError:
                 print("TypeError")
         except (HTTPError, UnicodeEncodeError):
+            print(f"Skipping: {article_id}")
             pass
 
     print(f"Writing : {out_pdf}")

From d28199647629b4f5cc84820ecdac77f2ce8fbdf1 Mon Sep 17 00:00:00 2001
From: Chun Ly <astro.chun@gmail.com>
Date: Tue, 27 Jul 2021 09:22:13 -0700
Subject: [PATCH 32/32] Fix to ensure using a raw JSON response with no basic
 token

---
 figstats/stats.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/figstats/stats.py b/figstats/stats.py
index e7862a0..0262586 100644
--- a/figstats/stats.py
+++ b/figstats/stats.py
@@ -23,7 +23,9 @@ def __init__(self, api_token='', basic_token='', institution=False, institute=''
             self.stats_baseurl_institute = join(self.stats_baseurl, self.institute)
 
         # Base64 token
-        self.basic_headers = {'Content-Type': 'application/json'}
+        self.basic_headers0 = {'Content-Type': 'application/json'}
+
+        self.basic_headers = self.basic_headers0.copy()
         self.basic_token = basic_token
         if self.basic_token:
             self.basic_headers['Authorization'] = f'Basic {self.basic_token}'
@@ -180,8 +182,7 @@ def retrieve_institution_articles(self):
     def retrieve_article_details(self, article_id):
         """Retrieve article details"""
         url = join('https://api.figshare.com/v2/', f"articles/{article_id}")
-
-        article_dict = issue_request('GET', url, self.basic_headers)
+        article_dict = issue_request('GET', url, self.basic_headers0)
         return article_dict
 
     def get_institution_totals(self, df=None, by_method='author'):