- Below is a general recipe for experimenting with RSS feeds AND speech synthesizers.
- For the speech synthesis there are two scripts, very similar, one will work with ESPEAK (free open source), the other works with Microsoft SAPI.
- In order to run these scripts you will need MYSQL installed. You will need a minimum level of understanding of how MYSQL works. You can easily translate the database piece to ODBC, and the rest to PowerShell or whatever. That’s your business, not mine.
- Once you’ve installed MYSQL and the server is running, create a database called “NEWS”: create database NEWS;
- After you’ve created the NEWS database, using the CLI (command line interface) as above, type command: use NEWS;
- Once you are in the NEWS database, copy and paste the entire script below into the CLI or save as text file and consult from the CLI using the command: source rss.sql (assuming you stored the create table text below in that file)
- In the example I’m using the ROOT database, why? – because IDGAF. But best practice is to create special database users with limited permissions. If you’ve installed your MYSQL database without granting permission to external (port) connections? – then it’s not a concern.
- Running the aggregator might trigger a site to block you or even your own network. This behavior, which was innocuous 20 years ago, is now attacked and classified as an aggressive network behavior. Just be careful.
- After you’ve run the aggregation script (and the script can be run by CRON or Task Manager daily or hourly if you like), then you can run one of the speech synthesis apps, reading headlines.
- If you have a compatible shortwave radio, with upper and lower side band, and a LINUX computer running JS8 Call with appropriate libraries for CAT control? – then look into this and you can set up a headline service over shortwave: https://planetarystatusreport.com/?p=7432
Have fun getting your daily dose of the fucking news.
Create Table Script for RSS Database
SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO";
SET AUTOCOMMIT = 0;
START TRANSACTION;
SET time_zone = "+00:00";
CREATE TABLE `RSS` (
`ID` bigint(20) NOT NULL,
`SOURCE` varchar(100) COLLATE utf8_unicode_ci NOT NULL,
`LINK` varchar(255) COLLATE utf8_unicode_ci NOT NULL,
`TITLE` varchar(400) COLLATE utf8_unicode_ci NOT NULL,
`PUBLISHED` datetime NOT NULL,
`ARTICLE` text COLLATE utf8_unicode_ci NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
ALTER TABLE `RSS`
ADD PRIMARY KEY (`ID`),
ADD UNIQUE KEY `unique_link` (`LINK`);
ALTER TABLE `RSS`
MODIFY `ID` bigint(20) NOT NULL AUTO_INCREMENT;
COMMIT;
Python Script for Aggregating RSS Feeds and storing stories locally
from __future__ import print_function
import os
import feedparser
import os.path, time
import json
import math
import time
import urllib.parse as pr
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup as BS
from requests import get
from os.path import exists
from socket import socket, AF_INET, SOCK_STREAM
from decimal import Decimal
from datetime import datetime, date, timedelta
from anyascii import anyascii
import mysql.connector
usern = "root"
passw = "password"
dbn = "NEWS"
servern = "localhost"
portn = 3306
newsServiceM3 = "ZEROHEDGE"
retHeadlines = 4
newsMode = 3
bigSleep = 90
def GetArt(number):
# Connect with the MySQL Server
cnx = mysql.connector.connect(user=usern, database=dbn, password=passw, host=servern, port=portn)
qry = "select ARTICLE, SOURCE, LINK from RSS where ID = %s" % (number)
cur = cnx.cursor(buffered=True)
cur.execute(qry)
retRes = cur.fetchall()
cnx.close()
return retRes[0]
def GetTopHourly(source):
# Connect with the MySQL Server
cnx = mysql.connector.connect(user=usern, database=dbn, password=passw, host=servern, port=portn)
qry = "select ID, TITLE, PUBLISHED, SOURCE, length(ARTICLE) as LOF from RSS where SOURCE = '%s' order by PUBLISHED desc limit 1" % source
cur = cnx.cursor(buffered=True)
cur.execute(qry)
retRes = cur.fetchall()
cnx.close()
return retRes
def GetTop(source, number):
# Connect with the MySQL Server
cnx = mysql.connector.connect(user=usern, database=dbn, password=passw, host=servern, port=portn)
qry = "select ID, TITLE, PUBLISHED, SOURCE, length(ARTICLE) as LOF from RSS where SOURCE = '%s' order by PUBLISHED desc limit %s" % (source, number)
cur = cnx.cursor(buffered=True)
cur.execute(qry)
retRes = cur.fetchall()
cnx.close()
return retRes
def AlreadySaved(link):
# Connect with the MySQL Server
cnx = mysql.connector.connect(user=usern, database=dbn, password=passw, host=servern, port=portn)
qry = "select ID from RSS where LINK = '" + link + "'"
cur = cnx.cursor(buffered=True)
cur.execute(qry)
cur.fetchall()
rc = cur.rowcount
cnx.close()
if rc > 0:
return True
else:
return False
def SaveRSS(source, title, link, published, article):
tit = title.replace("'", "''")
clean_text = anyascii(article)
art = str(clean_text)
art = art.replace("'", "''")
if len(art) > 5000:
art = art[0:5000]
cnx = mysql.connector.connect(user=usern, database=dbn, password=passw, host=servern, port=portn)
cur = cnx.cursor()
qry = """
INSERT INTO RSS
(SOURCE,
LINK,
TITLE,
PUBLISHED,
ARTICLE)
VALUES
(%s,%s,%s,%s,%s)
"""
val = (source, link, tit, published, art)
cur.execute(qry, val)
cnx.commit()
cnx.close()
def GrabRSS(RssURL, SourceName):
hdrs = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
NewsFeed = feedparser.parse(RssURL)
for na in NewsFeed.entries:
try:
print(na.title)
print(na.link)
print(na.published)
print(na.published_parsed)
except:
continue
if AlreadySaved(na.link):
continue
print("*************************")
response = get(na.link, None, headers=hdrs)
print(na.keys())
soup = BS(response.content, 'html.parser')
txtChunk = ""
for data in soup.find_all("p"):
txtval = data.get_text()
txtval = txtval.strip()
txtarr = txtval.split()
if len(txtarr) == 1:
continue
if "posted" in txtval and ("hours" in txtval or "days" in txtval) and len(txtarr) == 4:
continue
if txtval == "No Search Results Found":
continue
if txtval == "Terms of Service":
continue
if txtval == "Advertise with us":
continue
if txtval == "Media Inquiries":
continue
txtChunk += " " + txtval + "\n"
tyr = na.published_parsed[0]
tmn = na.published_parsed[1]
tdy = na.published_parsed[2]
thr = na.published_parsed[3]
tmi = na.published_parsed[4]
tsc = na.published_parsed[5]
ptms = "%s-%s-%s %s:%s:%s" % (tyr, tmn, tdy, thr, tmi, tsc)
SaveRSS(SourceName, na.title, na.link, ptms, txtChunk.strip())
print(txtChunk.strip())
def debugHere():
input("Press enter to continue ...")
def clearConsole():
command = 'clear'
if os.name in ('nt', 'dos'): # If Machine is running on Windows, use cls
command = 'cls'
os.system(command)
def CycleFeeds():
infowars = "https://www.infowars.com/rss.xml"
zh = "https://feeds.feedburner.com/zerohedge/feed"
yahoo = "https://news.yahoo.com/rss/"
cnn = "http://rss.cnn.com/rss/cnn_topstories.rss"
bbc = "http://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml"
nyt = "https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml"
onion = "https://www.theonion.com/rss"
bb = "https://babylonbee.com/feed"
print("Grabbing Babylon Bee ...")
GrabRSS(bb, "BB")
print("Grabbing ONION ...")
GrabRSS(onion, "ONION")
print("Grabbing INFOWARS ...")
GrabRSS(infowars, "INFOWARS")
print("Grabbing ZEROHEDGE ...")
GrabRSS(zh, "ZEROHEDGE")
#print("Grabbing YAHOO ...")
#GrabRSS(yahoo, "YAHOO")
print("Grabbing CNN ...")
GrabRSS(cnn, "CNN")
print("Grabbing BBC ...")
GrabRSS(bbc, "BBC")
print("Grabbing NYT ...")
GrabRSS(nyt, "NYT")
# FEEDS:
# 1. INFOWARS: https://www.infowars.com/rss.xml
# 2. ZEROHEDGE: https://feeds.feedburner.com/zerohedge/feed
# 3. YAHOO: https://news.yahoo.com/rss/
# 4. CNN: http://rss.cnn.com/rss/cnn_topstories.rss
time.sleep(1)
CycleFeeds()
Python Speech Synthesis Scripts
A: Windows – SAPI
#this script reads headlines from the RSS news feed
#database.
import win32com.client
speaker = win32com.client.Dispatch("SAPI.SpVoice")
import os
import time
import mysql.connector
usern = "root"
passw = "password"
dbn = "NEWS"
servern = "localhost"
portn = 3306
def TOS(text):
os.system(f"espeak -s 130 -v en+m1 '{text}'")
def GetSql(qry):
# Connect with the MySQL Server
cnx = mysql.connector.connect(user=usern, database=dbn, password=passw, host=servern, port=portn)
cur = cnx.cursor(buffered=True)
cur.execute(qry)
retRes = cur.fetchall()
cnx.close()
return retRes
#+-----------+--------------+------+-----+---------+----------------+
#| Field | Type | Null | Key | Default | Extra |
#+-----------+--------------+------+-----+---------+----------------+
#| ID | bigint(20) | NO | PRI | NULL | auto_increment |
#| SOURCE | varchar(100) | NO | | NULL | |
#| LINK | varchar(255) | NO | UNI | NULL | |
#| TITLE | varchar(400) | NO | | NULL | |
#| PUBLISHED | datetime | NO | | NULL | |
#| ARTICLE | text | NO | | NULL | |
#+-----------+--------------+------+-----+---------+----------------+
qry1 = "select SOURCE, TITLE from RSS where PUBLISHED > curdate()-1 order by PUBLISHED desc;"
res = GetSql(qry1)
for rec in res:
src = rec[0]
tit = rec[1].replace("''", "")
print(src + ": " + tit)
phrase = "From " + src + ", HEAD LINE, " + tit
speaker.Speak(phrase)
time.sleep(2)
B: Linux – ESPEAK
import os
import time
import mysql.connector
usern = "root"
passw = "password"
dbn = "NEWS"
servern = "localhost"
portn = 3306
def TOS(text):
os.system(f"espeak -s 130 -v en+m1 '{text}'")
def GetSql(qry):
# Connect with the MySQL Server
cnx = mysql.connector.connect(user=usern, database=dbn, password=passw, host=servern, port=portn)
cur = cnx.cursor(buffered=True)
cur.execute(qry)
retRes = cur.fetchall()
cnx.close()
return retRes
#+-----------+--------------+------+-----+---------+----------------+
#| Field | Type | Null | Key | Default | Extra |
#+-----------+--------------+------+-----+---------+----------------+
#| ID | bigint(20) | NO | PRI | NULL | auto_increment |
#| SOURCE | varchar(100) | NO | | NULL | |
#| LINK | varchar(255) | NO | UNI | NULL | |
#| TITLE | varchar(400) | NO | | NULL | |
#| PUBLISHED | datetime | NO | | NULL | |
#| ARTICLE | text | NO | | NULL | |
#+-----------+--------------+------+-----+---------+----------------+
qry1 = "select SOURCE, TITLE from RSS where PUBLISHED > curdate()-1 order by PUBLISHED desc;"
res = GetSql(qry1)
for rec in res:
src = rec[0]
tit = rec[1].replace("''", "")
print(src + ": " + tit)
phrase = "From " + src + ", HEAD LINE, " + tit
TOS(phrase)
time.sleep(0.5)