From 8d1c59e8fa441f00c274d85d773910cd2c924ce4 Mon Sep 17 00:00:00 2001
From: Samuel Vasco Gonzalez <65866495+samuVG@users.noreply.github.com>
Date: Sat, 9 Jul 2022 19:32:58 -0500
Subject: [PATCH] Homework 2
---
.../1152223665/homework_01/babynames.py | 75 +++++++++++++++++++
1 file changed, 75 insertions(+)
create mode 100644 EXTRAS/homeworks_to_submit/1152223665/homework_01/babynames.py
diff --git a/EXTRAS/homeworks_to_submit/1152223665/homework_01/babynames.py b/EXTRAS/homeworks_to_submit/1152223665/homework_01/babynames.py
new file mode 100644
index 0000000..52be52b
--- /dev/null
+++ b/EXTRAS/homeworks_to_submit/1152223665/homework_01/babynames.py
@@ -0,0 +1,75 @@
+import numpy as np
+import pandas as pd
+import codecs #Library for open HTML file
+import re
+import sys
+
+
+
+def extract_names(filename):
+ #we open the file as string
+ file=codecs.open(filename,"r", "utf-8").read()
+ #we use RegEx for find year
+ year=re.findall(r"Popularity in (\d{4})",file)
+ #we use RegEx for find names and ranks
+ rank_names=re.findall(r"
| (\d+) | (\S+) | (\S+) | ",file)
+
+ #we save year and rank of names in a list
+ Data= []
+
+ Data.append(year[0])
+ for i in range(len(rank_names)):
+ Data.append(rank_names[i][1]+" "+rank_names[i][0])
+ Data.append(rank_names[i][2]+" "+rank_names[i][0])
+
+ Data.sort() #we order the list of names alphabetically
+
+ return Data
+
+
+
+def main():
+ # This command-line parsing code is provided.
+ # Make a list of command line arguments, omitting the [0] element
+ # which is the script itself.
+ args = sys.argv[1:]
+
+ if not args:
+ print('usage: [--summaryfile] file [file ...]')
+ sys.exit(1)
+
+
+ # Notice the summary flag and remove it from args if it is present.
+ summary = False
+ if args[0] == '--summaryfile':
+ summary = True
+ del args[0]
+
+ #we create variables where we saved the columns of data frame and the matrix
+ #with the data called rows
+ columns=[]
+ rows=[]
+
+ #here we fill the previous variables
+ for i in args:
+ data=extract_names(i)
+ columns.append(data[0])
+ data.pop(0)
+ rows.append(data)
+
+ rows=np.array(rows).transpose()
+
+ #here we create the data frame and we save the file
+ DF=pd.DataFrame(data=rows, columns=columns)
+ DF.to_csv("DataFrame_Babynames.csv")
+
+ #if the flag summary is true then we save a text file per every html file
+ #with the summary of baby names and his respective rank
+ if summary:
+ for i in args:
+ f=open(i+".summary.txt", 'w')
+ f.write('\n'.join(extract_names(i)) + '\n')
+ f.close()
+
+if __name__ == '__main__':
+ main()
\ No newline at end of file