This commit is contained in:
ron 2020-04-05 02:11:57 +02:00
parent 97abf4d0e4
commit 0bf9308dd7

18
main.go
View File

@ -16,22 +16,23 @@ import (
"github.com/gocolly/colly"
)
var format = flag.String("format", "epub", "format to download")
var store = "~/.taldl.json"
var output = flag.String("output", "epub", "output directory")
var store = flag.String("store", "~/.tal-scraper.json", "data store")
var format = flag.String("format", "epub", "format to download")
func main() {
flag.Parse()
store = fixpath(store)
*output = fixpath(*output)
*store = fixpath(*store)
os.MkdirAll(*output, os.ModePerm)
oldMostRecent := ""
newMostRecent := ""
data, _ := ioutil.ReadFile(*store)
data, _ := ioutil.ReadFile(store)
_ = json.Unmarshal(data, &oldMostRecent)
done := false
@ -55,9 +56,8 @@ func main() {
name := filenameForUrl(url)
path := *output + "/" + name
data, err := HTTPGet(url, time.Second*3)
data, err := get(url, time.Second*3)
if err != nil {
fmt.Println(url)
fmt.Println(err)
return
}
@ -83,7 +83,7 @@ func main() {
c.Visit("https://theanarchistlibrary.org/latest")
data, _ = json.Marshal(newMostRecent)
_ = ioutil.WriteFile(*store, data, 0644)
_ = ioutil.WriteFile(store, data, 0644)
}
func fixpath(path string) string {
@ -104,7 +104,7 @@ func filenameForUrl(url string) string {
return path.Base(url)
}
func HTTPGet(url string, timeout time.Duration) (content []byte, err error) {
func get(url string, timeout time.Duration) (content []byte, err error) {
request, err := http.NewRequest("GET", url, nil)
if err != nil {
return
@ -121,7 +121,7 @@ func HTTPGet(url string, timeout time.Duration) (content []byte, err error) {
if response.StatusCode != 200 {
cancel_func()
return nil, fmt.Errorf("INVALID RESPONSE; status: %s", response.Status)
return nil, fmt.Errorf("INVALID RESPONSE: %s", response.Status)
}
return ioutil.ReadAll(response.Body)