mirror of
https://github.com/jimeh/kotaku-uk-rss.git
synced 2026-02-19 02:46:41 +00:00
199 lines
4.1 KiB
Go
199 lines
4.1 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"log"
|
|
"net/http"
|
|
"sort"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
"github.com/gorilla/feeds"
|
|
)
|
|
|
|
/*
|
|
Structs
|
|
*/
|
|
|
|
// Article holds various info about an article.
|
|
type Article struct {
|
|
Title string
|
|
Date time.Time
|
|
Author string
|
|
Summary string
|
|
MediaURL string
|
|
URL string
|
|
Href string
|
|
}
|
|
|
|
// Articles is a collection of Article structs.
|
|
type Articles []Article
|
|
|
|
// SectionsByArticleLength enables sorting section nodes by number of article
|
|
// child nodes.
|
|
type SectionsByArticleLength []*goquery.Selection
|
|
|
|
func (s SectionsByArticleLength) Len() int {
|
|
return len(s)
|
|
}
|
|
|
|
func (s SectionsByArticleLength) Less(i int, j int) bool {
|
|
return s[i].Find("article").Length() > s[j].Find("article").Length()
|
|
}
|
|
|
|
func (s SectionsByArticleLength) Swap(i, j int) {
|
|
s[i], s[j] = s[j], s[i]
|
|
}
|
|
|
|
// RssCache is a string cache with mutex locks.
|
|
type RssCache struct {
|
|
sync.RWMutex
|
|
body string
|
|
}
|
|
|
|
// Set allows setting the RssCache body.
|
|
func (r *RssCache) Set(value string) {
|
|
r.Lock()
|
|
defer r.Unlock()
|
|
r.body = value
|
|
}
|
|
|
|
// Get allows getting the RssCache body.
|
|
func (r *RssCache) Get() string {
|
|
r.RLock()
|
|
defer r.RUnlock()
|
|
return r.body
|
|
}
|
|
|
|
/*
|
|
Functions
|
|
*/
|
|
|
|
func fetchDocument(url string) *goquery.Document {
|
|
doc, err := goquery.NewDocument(url)
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
|
|
return doc
|
|
}
|
|
|
|
func extractArticleSection(doc *goquery.Document) *goquery.Selection {
|
|
var sections = SectionsByArticleLength{}
|
|
doc.Find("section").Each(func(i int, s *goquery.Selection) {
|
|
sections = append(sections, s)
|
|
})
|
|
|
|
sort.Sort(sections)
|
|
return sections[0]
|
|
}
|
|
|
|
func parseArticleSection(section *goquery.Selection) Articles {
|
|
result := Articles{}
|
|
|
|
section.Find("article").Each(func(i int, s *goquery.Selection) {
|
|
result = append(result, parseArticle(s))
|
|
})
|
|
|
|
return result
|
|
}
|
|
|
|
func parseArticle(s *goquery.Selection) Article {
|
|
href, _ := s.Find(".media__body h2 a").Attr("href")
|
|
if href == "" {
|
|
href, _ = s.Find("figure a").Attr("href")
|
|
}
|
|
url := rootURL + href
|
|
|
|
summary := s.Find(".media__body p").Text()
|
|
title := s.Find(".media__body h2").Text()
|
|
if title == "" {
|
|
title = truncateString(summary, 60) + "..."
|
|
}
|
|
|
|
mediaURL, _ := s.Find("figure").Attr("data-media992")
|
|
timeString, _ := s.Find(".meta__limited time").Attr("datetime")
|
|
parsedTime, _ := time.Parse(time.RFC3339, timeString)
|
|
|
|
return Article{
|
|
Title: title,
|
|
Date: parsedTime,
|
|
Author: s.Find(".meta__full a.is-author").Text(),
|
|
Summary: summary,
|
|
MediaURL: mediaURL,
|
|
URL: url,
|
|
Href: href,
|
|
}
|
|
}
|
|
|
|
func truncateString(s string, l int) string {
|
|
end := len(s)
|
|
if end > l {
|
|
end = l
|
|
}
|
|
return s[:end]
|
|
}
|
|
|
|
func getArticlesFromURL(url string) Articles {
|
|
doc := fetchDocument(url)
|
|
section := extractArticleSection(doc)
|
|
return parseArticleSection(section)
|
|
}
|
|
|
|
func buildFeed(articles Articles) *feeds.Feed {
|
|
feed := &feeds.Feed{
|
|
Title: "Kotaku UK",
|
|
Link: &feeds.Link{Href: "http://www.kotaku.co.uk/"},
|
|
Description: "Kotaku UK is the UK version of Kotaku",
|
|
Created: time.Now(),
|
|
}
|
|
|
|
feed.Items = []*feeds.Item{}
|
|
|
|
for _, article := range articles {
|
|
feed.Items = append(feed.Items, buildFeedItem(article))
|
|
}
|
|
|
|
return feed
|
|
}
|
|
|
|
func buildFeedItem(article Article) *feeds.Item {
|
|
description := article.Summary
|
|
|
|
if article.MediaURL != "" {
|
|
description = fmt.Sprintf("<img href=\"%s\" /> %s",
|
|
article.MediaURL, description)
|
|
}
|
|
|
|
return &feeds.Item{
|
|
Id: article.Href,
|
|
Title: article.Title,
|
|
Link: &feeds.Link{Href: article.URL},
|
|
Description: description,
|
|
Author: &feeds.Author{Name: article.Author},
|
|
Created: article.Date,
|
|
}
|
|
}
|
|
|
|
func serveRss(w http.ResponseWriter, r *http.Request) {
|
|
fmt.Fprintln(w, rssCache.Get())
|
|
}
|
|
|
|
func updateRssLoop() {
|
|
for {
|
|
articles := Articles{}
|
|
for _, url := range pageUrls {
|
|
fmt.Printf("fetching and parsing articles from: %s\n", url)
|
|
articles = append(articles, getArticlesFromURL(url)...)
|
|
}
|
|
|
|
fmt.Printf("building feed cache... ")
|
|
feed, _ := buildFeed(articles).ToRss()
|
|
rssCache.Set(feed)
|
|
fmt.Println("done")
|
|
fmt.Println("taking a nap for 60 seconds ^_^")
|
|
time.Sleep(60 * time.Second)
|
|
}
|
|
}
|