Skip to content

Commit 284f673

Browse files
committed
Replace pipfeed with news3k's summary option
1 parent 8861e35 commit 284f673

File tree

5 files changed

+61
-82
lines changed

5 files changed

+61
-82
lines changed

bestof.py

+14-32
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@
66
import random
77
import mimetypes
88
import tldr
9-
import pipfeed
109
import yt
1110
import news
1211
import deepseek
1312
import requests
1413
import urllib.parse
1514
import datetime
1615
import issues
16+
import shorten
1717
from urllib.parse import urlparse
1818
from pythorhead import Lemmy
1919
from pythorhead.types import SortType
@@ -61,20 +61,6 @@ def extract_desc(ci):
6161

6262
return "No description"
6363

64-
def shorten_text(text, deepseek_key):
65-
max_len = 300
66-
if len(text) > max_len:
67-
print('shorten...')
68-
try:
69-
t = deepseek.shorten(text, deepseek_key, max_len)
70-
t = f'{t} 🖍🤖\n\n'
71-
return t
72-
except Exception as e:
73-
print(f'deepseek raised exception: {e}')
74-
return f'{text[:297]}...\n\n' # NB: three less than max_len
75-
else:
76-
return f'{text}\n\n'
77-
7864
def add_embed(p):
7965
if "embed_title" in p:
8066
t = f'*{p["embed_title"]}*\n\n'
@@ -350,7 +336,7 @@ def run(user, pw, instance, postcomm, cfg, post_title, images_only, nsfw_b, modu
350336
posttext = posttext + f"![]({p['post']['url']})\n\n"
351337
if images_only is not True:
352338
if 'body' in p['post']:
353-
posttext += shorten_text(p['post']['body'], rapidkey)
339+
posttext += shorten.shorten_text(p['post']['body'], rapidkey)
354340
elif "url" in p['post']:
355341
print(f"* {p['post']['name']} - {p['post']['url']}")
356342
if "url_content_type" in p['post']:
@@ -367,20 +353,16 @@ def run(user, pw, instance, postcomm, cfg, post_title, images_only, nsfw_b, modu
367353
if t is not None:
368354
posttext += t
369355
else:
370-
print('pipfeed...')
371-
t = pipfeed.extract(rapidkey, p['post']['url'])
356+
# use news3k to get an article image
357+
print('news...')
358+
try:
359+
t = news.article(p['post']['url'], rapidkey)
360+
except Exception as e:
361+
print(f'failed to use news3k to get article: {e}')
362+
t = None
372363
if t is not None:
373364
posttext += t
374365
else:
375-
# use news3k to get an article image
376-
print('news...')
377-
try:
378-
t = news.article_image(p['post']['url'])
379-
except Exception as e:
380-
print(f'failed to use news3k to get article: {e}')
381-
t = None
382-
if t is not None:
383-
posttext += t
384366
# add title/desc from lemmy api
385367
print('lemmy fallback 1...')
386368
t = add_embed(p['post'])
@@ -389,12 +371,12 @@ def run(user, pw, instance, postcomm, cfg, post_title, images_only, nsfw_b, modu
389371
else:
390372
print('lemmy fallback 2...')
391373
if 'body' in p['post']:
392-
posttext += shorten_text(p['post']['body'], rapidkey)
374+
posttext += shorten.shorten_text(p['post']['body'], rapidkey)
393375
elif p['post']['url_content_type'][:6] == 'video/':
394376
# embed video url
395377
posttext = posttext + f"![]({p['post']['url']})\n\n"
396378
if 'body' in p['post']:
397-
posttext += shorten_text(p['post']['body'], rapidkey)
379+
posttext += shorten.shorten_text(p['post']['body'], rapidkey)
398380
else:
399381
'''no content type'''
400382
t = add_embed(p['post'])
@@ -406,18 +388,18 @@ def run(user, pw, instance, postcomm, cfg, post_title, images_only, nsfw_b, modu
406388
if t is not None:
407389
posttext += t
408390
if 'body' in p['post']:
409-
posttext += shorten_text(p['post']['body'], rapidkey)
391+
posttext += shorten.shorten_text(p['post']['body'], rapidkey)
410392
else:
411393
'''not url'''
412394
if 'body' in p['post']:
413-
posttext += shorten_text(p['post']['body'], rapidkey)
395+
posttext += shorten.shorten_text(p['post']['body'], rapidkey)
414396

415397
posttext = posttext + f"Posted by [{p['author']['name']}]({p['author']['actor_id']})\n\n"
416398

417399
posttext += "\n\n----\n\nThe main links are using lemmyverse.link which should redirect to the post on your own instance. If you have not used this before, you may need to go direct to https://lemmyverse.link/ and click on 'configure instance'. Some apps will open posts correctly when using the direct link."
418400

419401
if images_only is not True:
420-
posttext += "\n\n️🤖 indicates a summary generated using AI - 🖋️ TLDR This, 🖊️ Pipfeed, 🖍 Deepseek. It is possible that the summary does not accurately convey the meaning of the original article, refer to the source material if in any doubt."
402+
posttext += "\n\n️🤖 indicates a summary generated using AI - 🖋️ TLDR This, 🖊️ news3k, 🖍 Deepseek. It is possible that the summary does not accurately convey the meaning of the original article, refer to the source material if in any doubt."
421403

422404
print(posttext)
423405

news.py

+30
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#!/usr/bin/python3
22
from newspaper import Article
33
from newspaper import Config
4+
import shorten
5+
import nltk
46

57
def article_image(url):
68
config = Config()
@@ -20,3 +22,31 @@ def article_image(url):
2022
#print(article.text)
2123

2224
return None
25+
26+
27+
def article(url, rapidkey):
28+
config = Config()
29+
config.browser_user_agent = "Mozilla/5.0"
30+
31+
art = Article(url)
32+
try:
33+
art.download()
34+
except Exception as e:
35+
print(f'unable to download article, {e}')
36+
return None
37+
art.parse()
38+
nltk.download('punkt')
39+
art.nlp()
40+
t = ''
41+
if art.top_image is not None:
42+
t = f'![]({art.top_image})\n\n'
43+
if art.title is not None:
44+
t += f'*{art.title}*\n\n'
45+
if art.summary is not None:
46+
t += f'{art.summary} 🖊️️🤖\n\n'
47+
elif art.text is not None:
48+
t += shorten.shorten_text(art.text, rapidkey)
49+
if len(t) > 0:
50+
return t
51+
52+
return None

pipfeed.py

-49
This file was deleted.

requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ aiohttp
66
pytube
77
newspaper3k
88
lxml_html_clean
9-
9+
nltk

shorten.py

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/usr/bin/python3
2+
import deepseek
3+
4+
def shorten_text(text, deepseek_key):
5+
max_len = 300
6+
if len(text) > max_len:
7+
print('shorten...')
8+
try:
9+
t = deepseek.shorten(text, deepseek_key, max_len)
10+
t = f'{t} 🖍🤖\n\n'
11+
return t
12+
except Exception as e:
13+
print(f'deepseek raised exception: {e}')
14+
return f'{text[:297]}...\n\n' # NB: three less than max_len
15+
else:
16+
return f'{text}\n\n'

0 commit comments

Comments
 (0)