137 lines
2.4 KiB
Go
137 lines
2.4 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"flag"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"net/http"
|
|
"os"
|
|
"os/user"
|
|
"path"
|
|
"path/filepath"
|
|
"time"
|
|
|
|
"github.com/gocolly/colly"
|
|
)
|
|
|
|
var store = "~/.taldl.json"
|
|
|
|
var output = flag.String("output", "epub", "output directory")
|
|
var format = flag.String("format", "epub", "format to download")
|
|
|
|
func main() {
|
|
flag.Parse()
|
|
|
|
store = fixpath(store)
|
|
*output = fixpath(*output)
|
|
|
|
os.MkdirAll(*output, os.ModePerm)
|
|
|
|
oldMostRecent := ""
|
|
newMostRecent := ""
|
|
|
|
data, _ := ioutil.ReadFile(store)
|
|
_ = json.Unmarshal(data, &oldMostRecent)
|
|
|
|
done := false
|
|
|
|
c := colly.NewCollector()
|
|
|
|
c.OnHTML("div.amw-listing-item a[href]", func(e *colly.HTMLElement) {
|
|
url := e.Attr("href") + "." + *format
|
|
|
|
if newMostRecent == "" {
|
|
newMostRecent = url
|
|
}
|
|
|
|
if newMostRecent == oldMostRecent {
|
|
done = true
|
|
return
|
|
}
|
|
|
|
fmt.Println(url)
|
|
|
|
name := filenameForUrl(url)
|
|
path := *output + "/" + name
|
|
|
|
data, err := get(url, time.Second*3)
|
|
if err != nil {
|
|
fmt.Println(err)
|
|
return
|
|
}
|
|
|
|
err = ioutil.WriteFile(path, data, 0644)
|
|
if err != nil {
|
|
fmt.Println(err)
|
|
}
|
|
})
|
|
|
|
c.OnHTML("a[href] i.fa-chevron-right", func(e *colly.HTMLElement) {
|
|
next, _ := e.DOM.Parent().Attr("href")
|
|
|
|
if !done {
|
|
e.Request.Visit(next)
|
|
}
|
|
})
|
|
|
|
c.OnRequest(func(r *colly.Request) {
|
|
fmt.Println("Checking", r.URL)
|
|
})
|
|
|
|
c.Visit("https://theanarchistlibrary.org/latest")
|
|
|
|
data, _ = json.Marshal(newMostRecent)
|
|
_ = ioutil.WriteFile(store, data, 0644)
|
|
}
|
|
|
|
func fixpath(path string) string {
|
|
path = filepath.FromSlash(path)
|
|
if (path)[0] == '~' {
|
|
user, err := user.Current()
|
|
if err != nil {
|
|
fmt.Fprintln(os.Stderr, err.Error())
|
|
os.Exit(10)
|
|
}
|
|
path = filepath.Join(user.HomeDir, (path)[1:])
|
|
}
|
|
path, _ = filepath.Abs(path)
|
|
return path
|
|
}
|
|
|
|
func filenameForUrl(url string) string {
|
|
return path.Base(url)
|
|
}
|
|
|
|
func get(url string, timeout time.Duration) (content []byte, err error) {
|
|
request, err := http.NewRequest("GET", url, nil)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
ctx, cancel_func := context.WithTimeout(context.Background(), timeout)
|
|
request = request.WithContext(ctx)
|
|
|
|
response, err := http.DefaultClient.Do(request)
|
|
if err != nil {
|
|
return
|
|
}
|
|
defer response.Body.Close()
|
|
|
|
if response.StatusCode != 200 {
|
|
cancel_func()
|
|
return nil, fmt.Errorf("INVALID RESPONSE: %s", response.Status)
|
|
}
|
|
|
|
return ioutil.ReadAll(response.Body)
|
|
}
|
|
|
|
func fileExists(path string) bool {
|
|
info, err := os.Stat(path)
|
|
if os.IsNotExist(err) {
|
|
return false
|
|
}
|
|
return !info.IsDir()
|
|
}
|