Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions base_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,19 +71,22 @@ def tweet(self, text: str, article_id: str, url: str, image_path: str):
logging.info(f'Id to store: {tweet_id}')
self.data_provider.update_tweet_db(article_id, self.get_source(), tweet_id)

def store_data(self, data: Dict):
def handle_articles(self, data: Dict):
if self.data_provider.is_article_tracked(data['article_id'], self.get_source()):
count = self.data_provider.get_article_version_count(data[
'article_id'], self.get_source(), data['hash'])
count = self.data_provider.get_article_version_count(data['article_id'], self.get_source(), data['hash'])
if count != 1: # Changed
self.tweet_all_changes(data)
else:
self.data_provider.track_article(data)

def tweet_change(self, previous_data: str, current_data: str, text_to_tweet: str, article_id: str, url: str):
saved_image_diff_path = ImageDiffGenerator.generate_image_diff(previous_data, current_data, text_to_tweet)
saved_image_diff_path = ImageDiffGenerator.generate_text_diff(previous_data, current_data, text_to_tweet)
self.tweet(text_to_tweet, article_id, url, saved_image_diff_path)

def tweet_image_change(self, old_url: str, new_url: str, article_id: str, url: str):
saved_image_diff_path = ImageDiffGenerator.generate_image_diff(old_url, new_url, "שינוי בתמונה")
self.tweet("שינוי בתמונה", article_id, url, saved_image_diff_path)

def tweet_all_changes(self, data: Dict):
article_id = data['article_id']
url = data['url']
Expand All @@ -101,6 +104,11 @@ def tweet_all_changes(self, data: Dict):
if self.should_tweet(url, previous_version['abstract'], data['abstract']):
self.tweet_change(previous_version['abstract'], data['abstract'], "שינוי בתת כותרת", article_id, url)

if data["image"] and previous_version["image"] and data["image"] != previous_version["image"]:
# TODO: Add validator if image is up
save_to_db = True
self.tweet_image_change(previous_version["image"], data["image"], article_id, url)

if save_to_db:
self.data_provider.increase_article_version(data)

Expand All @@ -125,6 +133,6 @@ def loop_entries(self, entries):
logging.exception(f'Problem looping entry: {article}')
for article_dict in articles.values():
try:
self.store_data(article_dict)
self.handle_articles(article_dict)
except BaseException as e:
logging.exception(f'Problem looping entry: {article_dict}')
50 changes: 36 additions & 14 deletions css/styles.css
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,17 @@ p {
}

del {
background-color: lightpink;
color: black;
text-decoration: line-through;
font-weight: lighter;
background-color: lightpink;
color: black;
text-decoration: line-through;
font-weight: lighter;
}

ins {
background-color: aquamarine;
color: black;
text-decoration: none;
font-weight: bold;
background-color: aquamarine;
color: black;
text-decoration: none;
font-weight: bold;
}

img {
Expand All @@ -43,14 +43,36 @@ img {
width: 30px;
}

#wrapper {
padding-right: 10px;
}

.alignleft {
margin-right: 0em;

font-size: 14px;
text-align:left;
direction:ltr;
color:gray
text-align: left;
direction: ltr;
color: gray
}

.alignright {
float:right;
}
float: right;
}

.row {
display: flex;
flex-direction: row;
align-items: center;
width: 500px;
margin-top: 10px;
}

.img {
width: 175px;
height: 100px;
}

.padded {
padding-left: 50px;
padding-right: 50px;
}
12 changes: 6 additions & 6 deletions data_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

import dataset

class DataProvider():

class DataProvider:
def __init__(self):
self.db = dataset.connect('sqlite:///titles.db')
self.articles_table = self.db['rss_ids']
Expand All @@ -25,12 +25,12 @@ def track_article(self, data: dict):
data['version'] = 1
self.versions_table.insert(data)
logging.info(f"New article tracked: {data['url']}")

def get_article_version_count(self, artice_id: str, article_source: str, hash: str):
return self.versions_table.count(
self.versions_table.table.columns.article_id == artice_id,
article_source=article_source,
hash=hash)
self.versions_table.table.columns.article_id == artice_id,
article_source=article_source,
hash=hash)

def get_previous_article_version(self, article_id: str, article_source: str):
return self.db.query(f'SELECT * \
Expand All @@ -53,7 +53,7 @@ def update_tweet_db(self, article_id: str, article_source: str, tweet_id: str):
}
self.articles_table.update(article, ['article_id', 'article_source'])
logging.debug('Updated tweet ID in db')

def get_previous_tweet_id(self, article_id: str, article_source: str):
search = self.articles_table.find_one(article_id=article_id, article_source=article_source)
if search is None or 'tweet_id' not in search:
Expand Down
33 changes: 24 additions & 9 deletions image_diff_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,30 +10,45 @@


class ImageDiffGenerator:
html_template = None
text_diff_template = None
image_diff_template = None
driver = None
phantomjs_path = None

@staticmethod
def init():
if ImageDiffGenerator.html_template is None:
with open("template.html", "r", encoding="utf-8") as html_file:
ImageDiffGenerator.html_template = html_file.read()
if ImageDiffGenerator.text_diff_template is None:
with open("text_template.html", "r", encoding="utf-8") as html_file:
ImageDiffGenerator.text_diff_template = html_file.read()

with open("image_template.html", "r", encoding="utf-8") as html_file:
ImageDiffGenerator.image_diff_template = html_file.read()

ImageDiffGenerator.phantomjs_path = os.environ['PHANTOMJS_PATH']
ImageDiffGenerator.driver = webdriver.PhantomJS(executable_path=ImageDiffGenerator.phantomjs_path)

@staticmethod
def generate_image_diff(old: str, new: str, text_to_tweet: str):
def generate_text_diff(old: str, new: str, text_to_tweet: str):
ImageDiffGenerator.init()
stripped_old = strip_html(old)
stripped_new = strip_html(new)
new_hash = hashlib.sha224(stripped_new.encode('utf8')).hexdigest()
diff_html = html_diff(stripped_old, stripped_new)

html = ImageDiffGenerator.html_template.replace("text_to_tweet", text_to_tweet) \
.replace("diff_html", diff_html)
html = ImageDiffGenerator.text_diff_template.replace("text_to_tweet", text_to_tweet).replace("diff_html",
diff_html)
return ImageDiffGenerator.generate_image(html, new_hash)

@staticmethod
def generate_image_diff(old: str, new: str, text_to_tweet: str):
ImageDiffGenerator.init()
html = ImageDiffGenerator.image_diff_template.replace("old_img", old).replace("new_img", new).replace("text_to_tweet",
text_to_tweet)
new_hash = hashlib.sha224(new.encode('utf8')).hexdigest()
return ImageDiffGenerator.generate_image(html, new_hash)

@staticmethod
def generate_image(html, diff_hash):
with open('tmp.html', 'w', encoding="utf-8") as f:
f.write(html)

Expand All @@ -47,7 +62,7 @@ def generate_image_diff(old: str, new: str, text_to_tweet: str):
block_width = e.size['width']
end_width = start_width
total_height = start_height + block_height + end_height
total_width = 510 # Override because body width is set to 500
total_width = 520 # Override because body width is set to 500
timestamp = str(int(time.time()))
ImageDiffGenerator.driver.save_screenshot('./tmp.png')
img = Image.open('./tmp.png')
Expand All @@ -65,7 +80,7 @@ def generate_image_diff(old: str, new: str, text_to_tweet: str):
offset = (int((bg_w - total_width) / 2),
int((bg_h - total_height) / 2))
background.paste(img2, offset)
filename = timestamp + new_hash
filename = timestamp + diff_hash
saved_file_path = f'./output/{filename}.png'
background.save(saved_file_path)
return saved_file_path
31 changes: 31 additions & 0 deletions image_template.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<link rel="stylesheet" href="./css/styles.css">
</head>

<body style="width: 500px;">
<div id="wrapper">
<div>
text_to_tweet:
</div>
<div class="row">
<img class="img" src="old_img" />

<img class="padded" src="img/arrow_back-24px.svg" alt="back-arrow" />
<img class="img" src="new_img"
/>
</div>
<div>
<p class="alignleft">
<img src="img/twitter.png"/>
@ILNewsDiff
<span class="alignright">
כותרת בשינוי אדרת
</span>
</p>
</div>
</div>
</body>
</html>
1 change: 1 addition & 0 deletions img/arrow_back-24px.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
7 changes: 6 additions & 1 deletion parsers/haaretz_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,9 @@ def get_tweet_validators(self):
return [validators.content_validator]

def entry_to_dict(self, article):
return parser_utils.standard_entry_to_dict(article, self.get_source(), self.tz)
article_dict = parser_utils.standard_entry_to_dict(article, self.get_source(), self.tz)
media = None
if len(article.media_content) > 0:
media = article.media_content[0]["url"]
article_dict["image"] = media
return article_dict
File renamed without changes.