Spaces:

tracinginsights
/

QuotesBot

Sleeping

App Files Files Community

tracinginsights commited on Dec 28, 2022

Commit

2eee6ac

1 Parent(s): 044109a

Update app.py

Browse files

Files changed (1) hide show

app.py +121 -2

app.py CHANGED Viewed

@@ -14,6 +14,17 @@ from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
 import string
 URL = "https://www.formula1.com/content/fom-website/en/latest/all.xml"
@@ -22,6 +33,7 @@ def get_xml(url):
     # use urllib.parse to check for formula1.com website or other news
     xml = pd.read_xml(url,xpath='channel/item')
 # care taken to only consider results where there are more words not a single word quotes
@@ -127,6 +139,112 @@ def remove_punctuations(text):
     return modified_text
 def check_updates(every=300):
     while True:
@@ -138,10 +256,11 @@ def check_updates(every=300):
             # loops through new articles and gets the necessary text, quotes and speakers
             dfs_dict = get_text(new_articles_df)
         else:
             print('No New article is found')

 import string
+import textwrap
+import tweepy
+import gradio as gr
+def image_classifier(inp):
+    return {'cat': 0.3, 'dog': 0.7}
+demo = gr.Interface(fn=image_classifier, inputs="image", outputs="text", analytics_enabled=True)
+demo.launch(max_threads=1,auth=("username", "password"), show_api=False)
 URL = "https://www.formula1.com/content/fom-website/en/latest/all.xml"
     # use urllib.parse to check for formula1.com website or other news
     xml = pd.read_xml(url,xpath='channel/item')
+previous_xml = get_xml(URL)
 # care taken to only consider results where there are more words not a single word quotes
     return modified_text
+def get_speaker_quotes(dfs_dict, question_answerer):
+    speaker_quote = []
+    for link in tqdm(dfs_dict):
+        context = dfs_dict[link]['context']
+        quotes = dfs_dict[link]['quotes']
+        potential_speakers = dfs_dict[link]['speakers']
+        if len(quotes) != 0:
+            #loop through the list of quotes
+            for quote in quotes:
+                # max_seq_len == 384 : https://huggingface.co/deepset/roberta-base-squad2
+                full_quote = quote
+                if len(quote) >380:
+                    quote = quote[:384]
+                speaker_dict = question_answerer(question=f"Who said '{quote}'?", context=context)
+                speaker = speaker_dict['answer']
+                if len(speaker) >0:
+                    speaker = remove_punctuations(speaker_dict['answer'])
+                if speaker not in potential_speakers:
+                    speaker = ""
+                    quote = ""
+                else:
+                    pair = {'speaker':speaker, 'quote': quote, 'source':link}
+                    speaker_quote.append(pair)
+    return speaker_quote
+api_key = "Ua2l5yDMiS1sMZUIJlU6sIAPF"
+secret_api_key = "N3AtrZQ7wYmUfoJuG7zdvSTrZSEWT8p6VwgmNYsLyGC1OHN15I"
+access_token = "1438916815980171264-SFsqSXgn0oSqdkpsXbSvOLhmDKFkVh"
+secret_access_token = "R7RXNROT6lVLQ8SEEk0iZr7D6NINJpTqy57tkMEgjeAJq"
+bearer_token = "AAAAAAAAAAAAAAAAAAAAALi8iwEAAAAAXsnZnVR7KNbf%2B6k0dTYY4Rkv%2Bso%3DMzxbsVnMQv6yuWazmCRxPOCPgLuujmGl4SLrdCCvcBFKxc9YgL"
+def post_to_twitter():
+    twitter_api_key = api_key
+    twitter_secret_api_key = secret_api_key
+    twitter_access_token = access_token
+    twitter_secret_access_token = secret_access_token
+    twitter_bearer_token = bearer_token
+    api = tweepy.Client(bearer_token=twitter_bearer_token, consumer_key=twitter_api_key,
+                    consumer_secret=twitter_secret_api_key, access_token=twitter_access_token,
+                    access_token_secret=twitter_secret_access_token,wait_on_rate_limit=True
+                    )
+    #tweet = api.create_tweet(text=post_title, in_reply_to_tweet_id=in_reply_to_tweet_id)
+    return api
+def split_near_space(string, max_length):
+    # Split the string into lines based on the maximum line width, breaking only at spaces
+    lines = textwrap.wrap(string, width=max_length,)
+    return lines
+def send_tweets(speaker_quote):
+    for i, pair in enumerate(speaker_quote):
+        speaker = pair['speaker']
+        quote = pair['quote']
+        source = pair['source']
+        total_tweet_length = len(speaker) + len(quote) + 10 # 10 is for emojis and #f1 hashtag
+        tweet_text = f"🗣️ | {speaker}: '{quote}'"
+        api = post_to_twitter()
+        if total_tweet_length < 280:
+            try:
+                first_tweet = api.create_tweet(text=tweet_text, )
+                first_tweet_id = first_tweet.data['id']
+                second_tweet = api.create_tweet(text=f"Source: {source}", in_reply_to_tweet_id=first_tweet_id)
+            except:
+                continue
+        else:
+            quotes_list = split_near_space(quote, 280 - len(speaker) -10)
+            thread_id = None
+            try:
+                for i, quote in enumerate(quotes_list):
+                    tweet_text = f"'...{quote}...'"
+                    if i == 0:
+                        tweet_text = f"🗣️ | {speaker}: '{quote}...'"
+                    if i ==len(quotes_list) -1:
+                        tweet_text = f"'...{quote}'"
+                    recent_tweet =  api.create_tweet(text=tweet_text, in_reply_to_tweet_id=thread_id)
+                    thread_id = recent_tweet.data['id']
+                last_tweet = api.create_tweet(text=f"Source: {source}", in_reply_to_tweet_id=thread_id)
+            except:
+                continue
 def check_updates(every=300):
     while True:
             # loops through new articles and gets the necessary text, quotes and speakers
             dfs_dict = get_text(new_articles_df)
+            speaker_quote = get_speaker_quotes(dfs_dict, question_answerer)
+            send_tweets(speaker_quote)
         else:
             print('No New article is found')
+check_updates(300)