HEADLINE TRENDING: Part 1 (RSS) - PLANETARY STATUS REPORT!

This is a project I am just beginning. The concept is to align and link the headline trend data to other FRED or BLS or MARKET longitudinal data.

This might be on hold for a few weeks, but it’s a start.

CURRENT MYSQL DB DEFINITION:

-- MySQL dump 10.13  Distrib 8.0.39, for Win64 (x86_64)
--
-- Host: localhost    Database: news
-- ------------------------------------------------------
-- Server version	8.0.39

/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
/*!50503 SET NAMES utf8mb4 */;
/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
/*!40103 SET TIME_ZONE='+00:00' */;
/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;

--
-- Table structure for table `headline_token`
--

DROP TABLE IF EXISTS `headline_token`;
/*!40101 SET @saved_cs_client     = @@character_set_client */;
/*!50503 SET character_set_client = utf8mb4 */;
CREATE TABLE `headline_token` (
  `headline_token_id` bigint NOT NULL AUTO_INCREMENT,
  `rss_id` bigint NOT NULL,
  `year` int NOT NULL,
  `month` int NOT NULL,
  `day` int NOT NULL,
  `word` varchar(200) DEFAULT NULL,
  `freq` int DEFAULT NULL,
  `created_on` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
  PRIMARY KEY (`headline_token_id`),
  KEY `idx_headline_token` (`rss_id`,`year`,`month`,`day`,`word`)
) ENGINE=InnoDB AUTO_INCREMENT=64162 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
/*!40101 SET character_set_client = @saved_cs_client */;

--
-- Table structure for table `log`
--

DROP TABLE IF EXISTS `log`;
/*!40101 SET @saved_cs_client     = @@character_set_client */;
/*!50503 SET character_set_client = utf8mb4 */;
CREATE TABLE `log` (
  `log_id` bigint NOT NULL AUTO_INCREMENT,
  `step` varchar(100) NOT NULL,
  `log_time` timestamp NULL DEFAULT CURRENT_TIMESTAMP,
  PRIMARY KEY (`log_id`)
) ENGINE=InnoDB AUTO_INCREMENT=1248 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
/*!40101 SET character_set_client = @saved_cs_client */;

--
-- Table structure for table `rss`
--

DROP TABLE IF EXISTS `rss`;
/*!40101 SET @saved_cs_client     = @@character_set_client */;
/*!50503 SET character_set_client = utf8mb4 */;
CREATE TABLE `rss` (
  `ID` bigint NOT NULL AUTO_INCREMENT,
  `SOURCE` varchar(100) CHARACTER SET utf8mb3 COLLATE utf8mb3_unicode_ci NOT NULL,
  `LINK` varchar(255) CHARACTER SET utf8mb3 COLLATE utf8mb3_unicode_ci NOT NULL,
  `TITLE` varchar(400) CHARACTER SET utf8mb3 COLLATE utf8mb3_unicode_ci NOT NULL,
  `PUBLISHED` datetime NOT NULL,
  PRIMARY KEY (`ID`),
  UNIQUE KEY `unique_link` (`LINK`)
) ENGINE=InnoDB AUTO_INCREMENT=5491 DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_unicode_ci;
/*!40101 SET character_set_client = @saved_cs_client */;
/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;

/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;

-- Dump completed on 2024-12-09  9:49:20

GRABBING HEADLINES FROM RSS FEEDS:

from __future__ import print_function

import os
import feedparser
import os.path, time
import json
import math
import time
import urllib.parse as pr
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup as BS
from requests import get
from os.path import exists
from socket import socket, AF_INET, SOCK_STREAM
from decimal import Decimal
from datetime import datetime, date, timedelta
from anyascii import anyascii
import mysql.connector

from unidecode import unidecode

usern = "root"
passw = "password"
dbn = "NEWS"
servern = "localhost"
portn = 3306

#track number of records added 
record_load = 0

def logrun(step):
	cnx = mysql.connector.connect(user=usern, database=dbn, password=passw, host=servern, port=portn)
	cur = cnx.cursor()
	qry = """
	INSERT INTO log 
	(step)  
	VALUES 
	('<<ARG>>')
	""".replace("<<ARG>>", step)
	cur.execute(qry)
	cnx.commit()
	cnx.close()

def AlreadySaved(link):
	# Connect with the MySQL Server
	cnx = mysql.connector.connect(user=usern, database=dbn, password=passw, host=servern, port=portn)
	qry = "select ID from RSS where LINK = '" + link + "'"
	cur = cnx.cursor(buffered=True)
	cur.execute(qry)
	cur.fetchall()
	rc = cur.rowcount
	cnx.close()
	if rc > 0:
		return True
	else:
		return False

def SaveRSS(source, title, link, published):
	global record_load
	record_load += 1
	tit = title.replace("'", "''")
	cnx = mysql.connector.connect(user=usern, database=dbn, password=passw, host=servern, port=portn)
	cur = cnx.cursor()
	qry = """
	INSERT INTO RSS
	(SOURCE, 
	LINK, 
	TITLE, 
	PUBLISHED) 
	VALUES 
	(%s,%s,%s,%s)
	""" 
	val = (source, link, tit, published)
	cur.execute(qry, val)
	cnx.commit()
	cnx.close()

def GrabRSS(RssURL, SourceName):
	hdrs = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
	NewsFeed = feedparser.parse(RssURL)
	for na in NewsFeed.entries:
		try:
			print(na.title)
			print(na.link)
			print(na.published)
			print(na.published_parsed)
		except:
			logrun("ERROR: GrabRSS from " + SourceName)
			continue
		if AlreadySaved(na.link.strip().upper()):
			continue
		print("*************************")
		tyr = na.published_parsed[0]
		tmn = na.published_parsed[1]
		tdy = na.published_parsed[2]
		thr = na.published_parsed[3]
		tmi = na.published_parsed[4]
		tsc = na.published_parsed[5]
		ptms = "%s-%s-%s %s:%s:%s" % (tyr, tmn, tdy, thr, tmi, tsc)	
		SaveRSS(SourceName, unidecode(na.title), na.link.strip().upper(), ptms)

def debugHere():
	input("Press enter to continue ...")

def clearConsole():
	command = 'clear'
	if os.name in ('nt', 'dos'):  # If Machine is running on Windows, use cls
		command = 'cls'
	os.system(command)

def CycleFeeds():
	logrun("start: CycleFeeds")
	infowars = "https://www.infowars.com/rss.xml"
	zh = "https://feeds.feedburner.com/zerohedge/feed"
	yahoo = "https://news.yahoo.com/rss/"
	bbcworld = "https://feeds.bbci.co.uk/news/world/rss.xml"
	bbc = "http://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml"
	nyt = "https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml"
	cnbc = "https://www.cnbc.com/id/100727362/device/rss/rss.html"
	fox = "https://moxie.foxnews.com/google-publisher/latest.xml"
	aljazeera = "https://www.aljazeera.com/xml/rss/all.xml"
	cbs = "https://www.cbsnews.com/latest/rss/world"
	f24 = "https://www.france24.com/en/rss"
	gnews = "https://globalnews.ca/world/feed/"
	wtimes = "https://www.washingtontimes.com/rss/headlines/news/world"
	sydher = "https://www.smh.com.au/rss/world.xml"
	skynews = "https://feeds.skynews.com/feeds/rss/world.xml"
	latimes = "https://www.latimes.com/world/rss2.0.xml"
	tindia = "https://timesofindia.indiatimes.com/rssfeeds/296589292.cms"
	rt = "https://www.rt.com/rss/news/"
	sun = "https://www.thesun.co.uk/news/worldnews/feed/"
	mirror = "https://www.mirror.co.uk/news/world-news/?service=rss"
	vox = "https://www.vox.com/rss/world-politics/index.xml"
	wotr = "https://warontherocks.com/feed/"
	hot = "https://www.headlinesoftoday.com/feed"
	wnera = "https://worldnewsera.com/feed/"
	gpress = "https://globalpressjournal.com/feed/"
	indep = "https://www.independent.co.uk/news/world/rss"
	spiegel = "https://www.spiegel.de/international/index.rss"
	guard = "https://www.theguardian.com/world/rss"

	GrabRSS(guard, "GUARDIAN")
	GrabRSS(spiegel, "DER SPIEGEL")
	GrabRSS(indep, "INDEPENDENT")
	GrabRSS(gpress, "Global Press Journal")
	GrabRSS(wnera, "World News Era")
	GrabRSS(hot, "Headlines of Today")
	GrabRSS(wotr, "WAR ON THE ROCKS")
	GrabRSS(vox, "VOX")
	GrabRSS(mirror, "MIRROR")
	GrabRSS(sun, "SUN")
	GrabRSS(rt, "RT")
	GrabRSS(tindia, "Times of India")
	GrabRSS(latimes, "LA TIMES")
	GrabRSS(skynews, "SKY NEWS")
	GrabRSS(sydher, "Sydney Herald")
	GrabRSS(wtimes, "WA TIMES")
	GrabRSS(gnews, "Global News")
	GrabRSS(f24, "FRANCE24")
	GrabRSS(cbs, "CBS")
	GrabRSS(aljazeera, "ALJAZEERA")
	GrabRSS(fox, "FOX")
	GrabRSS(cnbc, "CNBC")
	GrabRSS(bbcworld, "BBC WORLD")
	GrabRSS(infowars, "INFOWARS")
	GrabRSS(zh, "ZEROHEDGE")
	GrabRSS(yahoo, "YAHOO")
	GrabRSS(bbc, "BBC")
	GrabRSS(nyt, "NYT")
	logrun("stop: CycleFeeds, records: " + str(record_load))

CycleFeeds()

TOKENIZE HEADLINES:

#parse headlines

import re
import mysql.connector
from anyascii import anyascii 

#f = open("demofile2.txt", "a")
#f.write("Now the file has more content!")
#f.close()

#open and read the file after the appending:
#f = open("demofile2.txt", "r")
#print(f.read())

usern = "root"
passw = "password"
dbn = "NEWS"
servern = "localhost"
portn = 3306

def GetSql(qry):
	# Connect with the MySQL Server
	cnx = mysql.connector.connect(user=usern, database=dbn, password=passw, host=servern, port=portn)
	cur = cnx.cursor(buffered=True)
	cur.execute(qry)
	retRes = cur.fetchall()
	cnx.close()
	return retRes

def SaveToks(rss_id, year, month, day, word, freq):
	cnx = mysql.connector.connect(user=usern, database=dbn, password=passw, host=servern, port=portn)
	cur = cnx.cursor()
	qry = """
	INSERT INTO headline_token
	(rss_id, 
	year, 
	month, 
	day,
	word,
	freq) 
	VALUES 
	(%s,%s,%s,%s,%s,%s)
	""" 
	val = (rss_id, year, month, day, word, freq)
	cur.execute(qry, val)
	cnx.commit()
	cnx.close()

def allHeadlines():
	headln = """
	select
	r.id,
	r.title,
	r.published 
	from rss r 
	left outer join headline_token ht 
	on r.id = ht.rss_id 
	where ht.rss_id is null;
	"""
	
	rset = GetSql(headln)
	
	f = open("news_token.txt", "w")
	
	f.write("ID")
	f.write('\t')
	f.write("YEAR")
	f.write('\t')
	f.write("MONTH")
	f.write('\t')
	f.write("DAY")
	f.write('\t')
	f.write("WORD")
	f.write('\t')
	f.write("FREQUENCY")
	f.write('\n')
	
	cntr = 0
	
	for rec in rset:
		cntr += 1
		print(str(cntr))
		
		id = rec[0]
		
		tit = rec[1]
		pub = rec[2]
		
		parts = str(pub).split()
		
		parts2 = parts[0].split('-')
		
		yr = int(parts2[0])
		mn = int(parts2[1])
		dy = int(parts2[2])
		
		tit = re.sub(r"(?<=\d),(?=\d)", "", tit)
		
		tit = tit.replace("''","'")
		tit = tit.replace("'","")

		
		tit = tit.replace(","," ")
		tit = tit.replace(":", " ")
		
		#&+
		tit = tit.replace("&", " ")
		tit = tit.replace("+", " ")
		tit = tit.replace("\"", " ")
		tit = tit.replace("(", " ")
		tit = tit.replace(")", " ")
		tit = tit.replace("|", " ")
		tit = tit.replace("?", " ")
		tit = tit.replace(";", " ")
		tit = tit.replace("...", " ")
		
		tit = tit.upper()
		
		
		tits = tit.split()
		
		dic = {}
		
		for t in tits:
			t2 = t.strip()
			if t2 in dic:
				dic[t2] += 1
			else:
				dic[t2] = 1
		
		for k in dic.keys():
			f.write(str(id))
			f.write('\t')
			f.write(str(yr))
			f.write('\t')
			f.write(str(mn))
			f.write('\t')
			f.write(str(dy))
			f.write('\t')
			#f.write(str(k.encode("UTF-8")))
			
			f.write(anyascii(k))
			
			f.write('\t')
			f.write(str(dic[k]))
			f.write('\n')
			
			SaveToks(id, yr, mn, dy, anyascii(k), dic[k])
			
	f.close()

allHeadlines()

Checkout Results:

select * from 
(
select
year,
month,
day,
word,
sum(freq) as frq
from headline_token

where not word in
('TO','IN','OF','THE','FOR','AND','ON',
'AFTER','AS','A','IS','WITH','AT','BY',
'HOW','BE','ABOUT','-','HIS','HER','MORE',
'IT','WILL','HE','HAS', 'SEE', 'THEY', 'SHE',
'LAST','GOES','NEXT', 'SAYS', 'FROM', 'WHAT', 
'MY', 'THEY', 'THEM', 'I', 'MOST', 'COULD', 
'WHERE', 'BUT', 'AN', 'WE', 'HAVE')

group by 
year,
month,
day,
word 

order by year asc, month asc, day asc, sum(freq) desc

) t 

where t.frq > 8;

Combined Script:

from __future__ import print_function

import os
import re
import feedparser
import os.path, time
import json
import math
import time
import urllib.parse as pr
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup as BS
from requests import get
from os.path import exists
from socket import socket, AF_INET, SOCK_STREAM
from decimal import Decimal
from datetime import datetime, date, timedelta
from anyascii import anyascii
import mysql.connector

from unidecode import unidecode

usern = "root"
passw = "password"
dbn = "NEWS"
servern = "localhost"
portn = 3306

#track number of records added 
record_load = 0

def GetSql(qry):
	# Connect with the MySQL Server
	cnx = mysql.connector.connect(user=usern, database=dbn, password=passw, host=servern, port=portn)
	cur = cnx.cursor(buffered=True)
	cur.execute(qry)
	retRes = cur.fetchall()
	cnx.close()
	return retRes

def logrun(step):
	cnx = mysql.connector.connect(user=usern, database=dbn, password=passw, host=servern, port=portn)
	cur = cnx.cursor()
	qry = """
	INSERT INTO log 
	(step)  
	VALUES 
	('<<ARG>>')
	""".replace("<<ARG>>", step)
	cur.execute(qry)
	cnx.commit()
	cnx.close()

def AlreadySaved(link):
	# Connect with the MySQL Server
	cnx = mysql.connector.connect(user=usern, database=dbn, password=passw, host=servern, port=portn)
	qry = "select ID from RSS where LINK = '" + link + "'"
	cur = cnx.cursor(buffered=True)
	cur.execute(qry)
	cur.fetchall()
	rc = cur.rowcount
	cnx.close()
	if rc > 0:
		return True
	else:
		return False

def SaveRSS(source, title, link, published):
	global record_load
	record_load += 1
	tit = title.replace("'", "''")
	cnx = mysql.connector.connect(user=usern, database=dbn, password=passw, host=servern, port=portn)
	cur = cnx.cursor()
	qry = """
	INSERT INTO RSS
	(SOURCE, 
	LINK, 
	TITLE, 
	PUBLISHED) 
	VALUES 
	(%s,%s,%s,%s)
	""" 
	val = (source, link, tit, published)
	cur.execute(qry, val)
	cnx.commit()
	cnx.close()

def GrabRSS(RssURL, SourceName):
	hdrs = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
	NewsFeed = feedparser.parse(RssURL)
	for na in NewsFeed.entries:
		try:
			print(na.title)
			print(na.link)
			print(na.published)
			print(na.published_parsed)
		except:
			logrun("ERROR: GrabRSS from " + SourceName)
			continue
		if AlreadySaved(na.link.strip().upper()):
			continue
		print("*************************")
		tyr = na.published_parsed[0]
		tmn = na.published_parsed[1]
		tdy = na.published_parsed[2]
		thr = na.published_parsed[3]
		tmi = na.published_parsed[4]
		tsc = na.published_parsed[5]
		ptms = "%s-%s-%s %s:%s:%s" % (tyr, tmn, tdy, thr, tmi, tsc)	
		SaveRSS(SourceName, unidecode(na.title), na.link.strip().upper(), ptms)

def debugHere():
	input("Press enter to continue ...")

def clearConsole():
	command = 'clear'
	if os.name in ('nt', 'dos'):  # If Machine is running on Windows, use cls
		command = 'cls'
	os.system(command)

def SaveToks(rss_id, year, month, day, word, freq):
	cnx = mysql.connector.connect(user=usern, database=dbn, password=passw, host=servern, port=portn)
	cur = cnx.cursor()
	qry = """
	INSERT INTO headline_token
	(rss_id, 
	year, 
	month, 
	day,
	word,
	freq) 
	VALUES 
	(%s,%s,%s,%s,%s,%s)
	""" 
	val = (rss_id, year, month, day, word, freq)
	cur.execute(qry, val)
	cnx.commit()
	cnx.close()

def allHeadlines():
	headln = """
	select
	r.id,
	r.title,
	r.published 
	from rss r 
	left outer join headline_token ht 
	on r.id = ht.rss_id 
	where ht.rss_id is null;
	"""
	
	rset = GetSql(headln)
	
	f = open("news_token.txt", "w")
	
	f.write("ID")
	f.write('\t')
	f.write("YEAR")
	f.write('\t')
	f.write("MONTH")
	f.write('\t')
	f.write("DAY")
	f.write('\t')
	f.write("WORD")
	f.write('\t')
	f.write("FREQUENCY")
	f.write('\n')
	
	cntr = 0
	
	for rec in rset:
		cntr += 1
		print(str(cntr))
		
		id = rec[0]
		
		tit = rec[1]
		pub = rec[2]
		
		parts = str(pub).split()
		
		parts2 = parts[0].split('-')
		
		yr = int(parts2[0])
		mn = int(parts2[1])
		dy = int(parts2[2])
		
		tit = re.sub(r"(?<=\d),(?=\d)", "", tit)
		
		tit = tit.replace("''","'")
		tit = tit.replace("'","")

		
		tit = tit.replace(","," ")
		tit = tit.replace(":", " ")
		
		#&+
		tit = tit.replace("&", " ")
		tit = tit.replace("+", " ")
		tit = tit.replace("\"", " ")
		tit = tit.replace("(", " ")
		tit = tit.replace(")", " ")
		tit = tit.replace("|", " ")
		tit = tit.replace("?", " ")
		tit = tit.replace(";", " ")
		tit = tit.replace("...", " ")
		
		tit = tit.upper()
		
		
		tits = tit.split()
		
		dic = {}
		
		for t in tits:
			t2 = t.strip()
			if t2 in dic:
				dic[t2] += 1
			else:
				dic[t2] = 1
		
		for k in dic.keys():
			f.write(str(id))
			f.write('\t')
			f.write(str(yr))
			f.write('\t')
			f.write(str(mn))
			f.write('\t')
			f.write(str(dy))
			f.write('\t')
			#f.write(str(k.encode("UTF-8")))
			
			f.write(anyascii(k))
			
			f.write('\t')
			f.write(str(dic[k]))
			f.write('\n')
			
			SaveToks(id, yr, mn, dy, anyascii(k), dic[k])
			
	f.close()

def CycleFeeds():
	logrun("start: CycleFeeds")
	infowars = "https://www.infowars.com/rss.xml"
	zh = "https://feeds.feedburner.com/zerohedge/feed"
	yahoo = "https://news.yahoo.com/rss/"
	bbcworld = "https://feeds.bbci.co.uk/news/world/rss.xml"
	bbc = "http://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml"
	nyt = "https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml"
	cnbc = "https://www.cnbc.com/id/100727362/device/rss/rss.html"
	fox = "https://moxie.foxnews.com/google-publisher/latest.xml"
	aljazeera = "https://www.aljazeera.com/xml/rss/all.xml"
	cbs = "https://www.cbsnews.com/latest/rss/world"
	f24 = "https://www.france24.com/en/rss"
	gnews = "https://globalnews.ca/world/feed/"
	wtimes = "https://www.washingtontimes.com/rss/headlines/news/world"
	sydher = "https://www.smh.com.au/rss/world.xml"
	skynews = "https://feeds.skynews.com/feeds/rss/world.xml"
	latimes = "https://www.latimes.com/world/rss2.0.xml"
	tindia = "https://timesofindia.indiatimes.com/rssfeeds/296589292.cms"
	rt = "https://www.rt.com/rss/news/"
	sun = "https://www.thesun.co.uk/news/worldnews/feed/"
	mirror = "https://www.mirror.co.uk/news/world-news/?service=rss"
	vox = "https://www.vox.com/rss/world-politics/index.xml"
	wotr = "https://warontherocks.com/feed/"
	wnera = "https://worldnewsera.com/feed/"
	gpress = "https://globalpressjournal.com/feed/"
	indep = "https://www.independent.co.uk/news/world/rss"
	spiegel = "https://www.spiegel.de/international/index.rss"
	guard = "https://www.theguardian.com/world/rss"

	GrabRSS(guard, "GUARDIAN")
	GrabRSS(spiegel, "DER SPIEGEL")
	GrabRSS(indep, "INDEPENDENT")
	GrabRSS(gpress, "Global Press Journal")
	GrabRSS(wnera, "World News Era")
	GrabRSS(wotr, "WAR ON THE ROCKS")
	GrabRSS(vox, "VOX")
	GrabRSS(mirror, "MIRROR")
	GrabRSS(sun, "SUN")
	GrabRSS(rt, "RT")
	GrabRSS(tindia, "Times of India")
	GrabRSS(latimes, "LA TIMES")
	GrabRSS(skynews, "SKY NEWS")
	GrabRSS(sydher, "Sydney Herald")
	GrabRSS(wtimes, "WA TIMES")
	GrabRSS(gnews, "Global News")
	GrabRSS(f24, "FRANCE24")
	GrabRSS(cbs, "CBS")
	GrabRSS(aljazeera, "ALJAZEERA")
	GrabRSS(fox, "FOX")
	GrabRSS(cnbc, "CNBC")
	GrabRSS(bbcworld, "BBC WORLD")
	GrabRSS(infowars, "INFOWARS")
	GrabRSS(zh, "ZEROHEDGE")
	GrabRSS(yahoo, "YAHOO")
	GrabRSS(bbc, "BBC")
	GrabRSS(nyt, "NYT")
	logrun("stop: CycleFeeds, records: " + str(record_load))

def WriteOut(yr, mn, dy):
	strSql = """

	select
	h.word,
	sum(h.freq) as frq

	from headline_token h 

	where h.year = <<YEAR>> and h.month = <<MONTH>> and h.day = <<DAY>> and 
	 not h.word in
	('TO','IN','OF','THE','FOR','AND','ON',
	'AFTER','AS','A','IS','WITH','AT','BY',
	'HOW','BE','ABOUT','-','HIS','HER','MORE',
	'IT','WILL','HE','HAS', 'SEE', 'THEY', 'SHE',
	'LAST','GOES','NEXT', 'SAYS', 'FROM', 'WHAT', 
	'MY', 'THEY', 'THEM', 'I', 'MOST', 'COULD', 
	'WHERE', 'BUT', 'AN', 'WE', 'HAVE', 'NEW')

	group by 
	h.word 

	order by sum(h.freq) desc;

	"""
	
	strSql = strSql.replace("<<YEAR>>", str(yr))
	strSql = strSql.replace("<<MONTH>>", str(mn))
	strSql = strSql.replace("<<DAY>>", str(dy))

	rset = GetSql(strSql)

	fname = str(yr) + "-" + str(mn) + "-" + str(dy) + ".txt"

	f = open(fname, "w")

	for rec in rset:
		f.write(str(rec[0]))
		f.write('\t')
		f.write(str(rec[1]))
		f.write('\n')

	f.close()

CycleFeeds()

allHeadlines()

ssql = """
select distinct year, month, day 
from headline_token 
order by year asc, month asc, day asc;
"""

rset2 = GetSql(ssql)

count = 0

for r in rset2:
	count += 1
	print(str(count))
	WriteOut(r[0],r[1],r[2])