diff --git a/hsc/color.py b/hsc/color.py new file mode 100644 index 00000000..dc5915e8 --- /dev/null +++ b/hsc/color.py @@ -0,0 +1,31 @@ +class Color: + PURPLE = '\033[95m' + CYAN = '\033[96m' + DARK_CYAN = '\033[36m' + BLUE = '\033[94m' + GREEN = '\033[92m' + YELLOW = '\033[93m' + RED = '\033[91m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + END = '\033[0m' + + @staticmethod + def get_bold_string(string) -> str: + return '{0}{1}{2}'.format(Color.BOLD, string, Color.END) + + @staticmethod + def print_bold_string(string): + print(Color.get_bold_string(string)) + + @staticmethod + def get_colored_yes() -> str: + return '{0}y: Yes{1}'.format(Color.GREEN, Color.END) + + @staticmethod + def get_colored_no() -> str: + return '{0}n: No{1}'.format(Color.RED, Color.END) + + @staticmethod + def print_colored_note(note: str): + print('{0}Note:: {1}{2}'.format(Color.BLUE, note, Color.END)) diff --git a/hsc/crawler.py b/hsc/crawler.py index 79f579a9..c7dcef9d 100755 --- a/hsc/crawler.py +++ b/hsc/crawler.py @@ -2,209 +2,222 @@ import requests import getpass -class Crawler(): - base_url = 'https://www.hackerrank.com/' - login_url = base_url + 'auth/login' - submissions_url = base_url + 'rest/contests/master/submissions/?offset={}&limit={}' - challenge_url = base_url + 'rest/contests/master/challenges/{}/submissions/{}' - domain_url = base_url + 'domains/{}/{}' - problem_url = base_url + 'challenges/{}/problem' - - new_readme_text = '## [{}]({})\n\n|Problem Name|Problem Link|Language|Solution Link|\n---|---|---|---\n' - readme_headers_len = len(new_readme_text.split('\n')) - problem_readme_text = '|{}|[Problem]({})|{}|[Solution](./{})|\n' - - base_folder_name = 'Hackerrank' - - # make a separate folder for different languages e.g Hackerrank/Regex/Introduction/python3/matching.py - make_language_folder = False - # prepend language in file extension e.g Hackerrank/Regex/Introduction/matching.python3.py - prepend_language_in_extension = False - - # file extensions - file_extensions = { - 'ada': 'ada', - 'bash': 'sh', - 'c': 'c', - 'clojure': 'clj', - 'coffeescript': 'coffee', - 'cpp': 'cpp', - 'cpp14': 'cpp', - 'csharp': 'cs', - 'd': 'd', - 'db2': 'sql', - 'elixir': 'ex', - 'erlang': 'erl', - 'fortran': 'for', - 'fsharp': 'fs', - 'go': 'go', - 'groovy': 'groovy', - 'haskell': 'hs', - 'java': 'java', - 'java8': 'java', - 'javascript': 'js', - 'julia': 'jl', - 'kotlin': 'kt', - 'lolcode': 'lol', - 'lua': 'lua', - 'mysql': 'sql', - 'objectivec': 'm', - 'ocaml': 'ml', - 'octave': 'oct', - 'oracle': 'sql', - 'pascal': 'pas', - 'perl': 'pl', - 'php': 'php', - 'pypy': 'py', - 'pypy3': 'py', - 'python': 'py', - 'python3': 'py', - 'racket': 'rkt', - 'r': 'r', - 'ruby': 'rb', - 'rust': 'rs', - 'sbcl': 'lisp', - 'scala': 'scala', - 'swift': 'swift', - 'smalltalk': 'st', - 'tcl': 'tcl', - 'tsql': 'sql', - 'visualbasic': 'vbs', - 'whitespace': 'hs', - } - - def __init__(self): - self.session = requests.Session() - self.total_submissions = 0 - - def login(self, username, password): - resp = self.session.get(self.login_url, auth=(username, password)) - self.cookies = self.session.cookies.get_dict() - self.headers = resp.request.headers - self.get_number_of_submissions() - return self.total_submissions != 0 - - def authenticate(self): - username = input('Hackerrank Username: ') - password = getpass.getpass('Hackerrank Password: ') - return self.login(username, password) - - def get_number_of_submissions(self): - if not self.total_submissions: - all_submissions_url = self.get_all_submissions_url(0, 0) - resp = self.session.get(all_submissions_url, headers=self.headers) - self.total_submissions = resp.json()['total'] - return self.total_submissions - - def get_all_submissions_url(self, offset, limit): - return self.submissions_url.format(offset, limit) - - def get_submission_url(self, challenge_slug, submission_id): - return self.challenge_url.format(challenge_slug, submission_id) - - def store_submission(self, file_name, code): - os.makedirs(os.path.dirname(file_name), exist_ok=True) - with open(file_name, 'w') as text_file: - text_file.write(code) - - def update_readme(self, readme_file_path, problem_readme_text): - header_length = self.readme_headers_len - with open(readme_file_path, 'r+') as text_file: - lines = text_file.readlines() - lines.append(problem_readme_text) - sortedlines = lines[:header_length] + sorted(lines[header_length:]) - text_file.seek(0) - text_file.writelines(sortedlines) - - def create_readme(self, track_name, track_url, file_name): - if track_name is not None: - os.makedirs(os.path.dirname(file_name), exist_ok=True) - text = self.new_readme_text.format(track_name, track_url) - with open(file_name, 'w') as text_file: - text_file.write(text) - - def get_file_path(self, folder_name, file_name_with_extension): - return os.path.join(self.base_folder_name, folder_name, file_name_with_extension) - - def get_readme_path(self, folder_name): - return os.path.join(self.base_folder_name, folder_name, 'README.md') - - def get_submissions(self, submissions): - headers = self.headers - - for submission in submissions: - id = submission['id'] - # challenge_id = submission['challenge_id'] - # contest_id = submission['contest_id'] - # hacker_id = submission['hacker_id'] - status = submission['status'] - # created_at = submission['created_at'] - language = submission['language'] - status_code = submission['status_code'] - # score = submission['score'] - challenge = submission['challenge'] - challenge_name = challenge['name'] - challenge_slug = challenge['slug'] - submission_url = self.get_submission_url(challenge_slug, id) - - if status == 'Accepted' or status_code == 2: - resp = self.session.get(submission_url, headers=headers) - data = resp.json()['model'] - code = data['code'] - track = data['track'] - - folder_name = 'Others' - file_extension = '.' + language - file_name = challenge_slug - track_folder_name = 'Others' - track_url = '' - - if track: - track_folder_name = track['name'].strip().replace(' ', '') - track_url = self.domain_url.format(track['track_slug'], track['slug']) - parent_folder_name = track['track_name'].strip().replace(' ', '') - folder_name = os.path.join(parent_folder_name, track_folder_name) - - if self.make_language_folder: - folder_name = os.path.join(folder_name, language) - - if language in self.file_extensions: - if not self.prepend_language_in_extension: - file_extension = '' - file_extension += '.{}'.format(self.file_extensions[language]) - - if file_extension.endswith('.java'): - file_name = challenge_name.replace(' ','') - - file_path = self.get_file_path(folder_name, file_name + file_extension) - if not os.path.exists(file_path): - self.store_submission(file_path, code) - readme_file_path = self.get_readme_path(folder_name) - if not os.path.exists(readme_file_path): - self.create_readme(track_folder_name, track_url, readme_file_path) - problem_url = self.problem_url.format(challenge_slug) - readme_text = self.problem_readme_text.format(challenge_name, problem_url, language, file_name + file_extension) - self.update_readme( - readme_file_path, - readme_text, - ) - print('All Solutions Crawled') +from hsc import helper + + +class Crawler: + base_url = 'https://www.hackerrank.com/' + login_url = base_url + 'auth/login' + submissions_url = base_url + 'rest/contests/master/submissions/?offset={}&limit={}' + challenge_url = base_url + 'rest/contests/master/challenges/{}/submissions/{}' + domain_url = base_url + 'domains/{}/{}' + problem_url = base_url + 'challenges/{}/problem' + + new_readme_text = '## [{}]({})\n\n|Problem Name|Problem Link|Language|Solution Link|\n---|---|---|---\n' + readme_headers_len = len(new_readme_text.split('\n')) + problem_readme_text = '|{}|[Problem]({})|{}|[Solution](./{})|\n' + + base_folder_name = 'Hackerrank' + + # make a separate folder for different languages e.g Hackerrank/Regex/Introduction/python3/matching.py + make_language_folder = False + # prepend language in file extension e.g Hackerrank/Regex/Introduction/matching.python3.py + prepend_language_in_extension = False + + # file extensions + file_extensions = { + 'ada': 'ada', + 'bash': 'sh', + 'c': 'c', + 'clojure': 'clj', + 'coffeescript': 'coffee', + 'cpp': 'cpp', + 'cpp14': 'cpp', + 'csharp': 'cs', + 'd': 'd', + 'db2': 'sql', + 'elixir': 'ex', + 'erlang': 'erl', + 'fortran': 'for', + 'fsharp': 'fs', + 'go': 'go', + 'groovy': 'groovy', + 'haskell': 'hs', + 'java': 'java', + 'java8': 'java', + 'javascript': 'js', + 'julia': 'jl', + 'kotlin': 'kt', + 'lolcode': 'lol', + 'lua': 'lua', + 'mysql': 'sql', + 'objectivec': 'm', + 'ocaml': 'ml', + 'octave': 'oct', + 'oracle': 'sql', + 'pascal': 'pas', + 'perl': 'pl', + 'php': 'php', + 'pypy': 'py', + 'pypy3': 'py', + 'python': 'py', + 'python3': 'py', + 'racket': 'rkt', + 'r': 'r', + 'ruby': 'rb', + 'rust': 'rs', + 'sbcl': 'lisp', + 'scala': 'scala', + 'swift': 'swift', + 'smalltalk': 'st', + 'tcl': 'tcl', + 'tsql': 'sql', + 'visualbasic': 'vbs', + 'whitespace': 'hs', + } + + def __init__(self): + self.session = requests.Session() + self.total_submissions = 0 + self.update_progress = True + + def login(self, username, password): + resp = self.session.get(self.login_url, auth=(username, password)) + self.cookies = self.session.cookies.get_dict() + self.headers = resp.request.headers + self.get_number_of_submissions() + return self.total_submissions != 0 + + def authenticate(self): + username = input('Hackerrank Username: ') + password = getpass.getpass('Hackerrank Password: ') + return self.login(username, password) + + def get_number_of_submissions(self): + if not self.total_submissions: + all_submissions_url = self.get_all_submissions_url(0, 0) + resp = self.session.get(all_submissions_url, headers=self.headers) + self.total_submissions = resp.json()['total'] + return self.total_submissions + + def get_all_submissions_url(self, offset, limit): + return self.submissions_url.format(offset, limit) + + def get_submission_url(self, challenge_slug, submission_id): + return self.challenge_url.format(challenge_slug, submission_id) + + def store_submission(self, file_name, code): + os.makedirs(os.path.dirname(file_name), exist_ok=True) + with open(file_name, 'w') as text_file: + text_file.write(code) + + def update_readme(self, readme_file_path, problem_readme_text): + header_length = self.readme_headers_len + with open(readme_file_path, 'r+') as text_file: + lines = text_file.readlines() + lines.append(problem_readme_text) + sorted_lines = lines[:header_length] + sorted(lines[header_length:]) + text_file.seek(0) + text_file.writelines(sorted_lines) + + def create_readme(self, track_name, track_url, file_name): + if track_name is not None: + os.makedirs(os.path.dirname(file_name), exist_ok=True) + text = self.new_readme_text.format(track_name, track_url) + with open(file_name, 'w') as text_file: + text_file.write(text) + + def get_file_path(self, folder_name, file_name_with_extension): + return os.path.join(self.base_folder_name, folder_name, file_name_with_extension) + + def get_readme_path(self, folder_name): + return os.path.join(self.base_folder_name, folder_name, 'README.md') + + def get_submissions(self, submissions): + headers = self.headers + + submission_count = 1 + total_submissions = len(submissions) + for submission in submissions: + id = submission['id'] + # challenge_id = submission['challenge_id'] + # contest_id = submission['contest_id'] + # hacker_id = submission['hacker_id'] + status = submission['status'] + # created_at = submission['created_at'] + language = submission['language'] + status_code = submission['status_code'] + # score = submission['score'] + challenge = submission['challenge'] + challenge_name = challenge['name'] + challenge_slug = challenge['slug'] + submission_url = self.get_submission_url(challenge_slug, id) + + if status == 'Accepted' or status_code == 2: + resp = self.session.get(submission_url, headers=headers) + data = resp.json()['model'] + code = data['code'] + track = data['track'] + + folder_name = 'Others' + file_extension = '.' + language + file_name = challenge_slug + track_folder_name = 'Others' + track_url = '' + + if track: + track_folder_name = track['name'].strip().replace(' ', '') + track_url = self.domain_url.format(track['track_slug'], track['slug']) + parent_folder_name = track['track_name'].strip().replace(' ', '') + folder_name = os.path.join(parent_folder_name, track_folder_name) + + if self.make_language_folder: + folder_name = os.path.join(folder_name, language) + + if language in self.file_extensions: + if not self.prepend_language_in_extension: + file_extension = '' + file_extension += '.{}'.format(self.file_extensions[language]) + + if file_extension.endswith('.java'): + file_name = challenge_name.replace(' ', '') + + file_path = self.get_file_path(folder_name, file_name + file_extension) + if not os.path.exists(file_path): + self.store_submission(file_path, code) + readme_file_path = self.get_readme_path(folder_name) + if not os.path.exists(readme_file_path): + self.create_readme(track_folder_name, track_url, readme_file_path) + problem_url = self.problem_url.format(challenge_slug) + readme_text = self.problem_readme_text.format(challenge_name, problem_url, language, + file_name + file_extension) + self.update_readme( + readme_file_path, + readme_text, + ) + if self.update_progress: + helper.update_progress(submission_count, total_submissions, challenge_name) + submission_count += 1 + print('All Solutions Crawled') + def main(): - offset = 0 - limit = 10 # you should change this + offset = 0 + limit = 10 # you should change this + + crawler = Crawler() - crawler = Crawler() + while not crawler.authenticate(): + print('Auth was unsuccessful') - while(not crawler.authenticate()): - print('Auth was unsuccessful') + limit = input('Enter limit needed to crawl: ') + all_submissions_url = crawler.get_all_submissions_url(offset, limit) - limit = input('Enter limit needed to crawl: ') - all_submissions_url = crawler.get_all_submissions_url(offset, limit) + resp = crawler.session.get(all_submissions_url, headers=crawler.headers) + data = resp.json() + models = data['models'] + crawler.get_submissions(models) - resp = crawler.session.get(all_submissions_url, headers=crawler.headers) - data = resp.json() - models = data['models'] - crawler.get_submissions(models) -if __name__ == "__main__": main() +if __name__ == "__main__": + main() diff --git a/hsc/helper.py b/hsc/helper.py new file mode 100644 index 00000000..3cbc8c09 --- /dev/null +++ b/hsc/helper.py @@ -0,0 +1,30 @@ +import sys +from hsc import color + +Color = color.Color + + +def update_progress(index, total, challenge_name): + """Show update progress for index out of total""" + bar_length = 30 + status = "" + progress = index / total + if isinstance(progress, int): + progress = float(progress) + if not isinstance(progress, float): + progress = 0 + status = "error: progress var must be float\r\n" + if progress < 0: + progress = 0 + status = "Halt...\r\n" + if progress >= 1: + progress = 1 + status = "Done...\r\n" + challenge_name = "" + block = int(round(bar_length * progress)) + text = "\rCrawling your hackerrank solutions: {0}[{1}]{2} {3}% [{4}/{5}] {6} {7}".format( + Color.BLUE, "="*block + "-"*(bar_length - block), Color.END, int(progress*100), + index, total, challenge_name, status) + sys.stdout.write(text) + sys.stdout.flush() + diff --git a/setup.py b/setup.py index 12a11560..165f96ce 100644 --- a/setup.py +++ b/setup.py @@ -1,9 +1,9 @@ from setuptools import setup, find_packages with open("README.md", "r") as fh: - long_description = fh.read() + long_description = fh.read() -setup ( +setup( name='hsc', version='1.0.2', author='Nullifiers', @@ -15,13 +15,13 @@ long_description_content_type="text/markdown", packages=find_packages(), classifiers=[ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - ], + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], entry_points={ 'console_scripts': [ 'hsc=hsc.crawler:main', ], - } + }, install_requires=['requests'] )