diff --git a/main.go b/main.go index fb66885..6e14687 100644 --- a/main.go +++ b/main.go @@ -22,13 +22,6 @@ import ( "github.com/schollz/progressbar/v3" ) -type Cache struct { - MostRecent string - Pages int -} - -var cacheFile = "~/.taldl.json" - var outputDir = flag.String("output", "~/TAL", "output directory") var formats = flag.String("formats", "zip,epub,pdf,a4.pdf,lt.pdf", "formats to download") var progress = flag.Bool("progress", true, "show progress bar") @@ -36,10 +29,17 @@ var verbose = flag.Bool("verbose", true, "verbose output") var workers = flag.Int("workers", 5, "amount of workers") var update = flag.Bool("update", false, "update all entries") +type Cache struct { + MostRecent string + Pages int +} + +var cacheFile = "~/.taldl.json" +var cache Cache var mostRecent string var done bool -var cache Cache var useFormat map[string]bool +var missing []string func main() { var bar *progressbar.ProgressBar @@ -95,7 +95,7 @@ func main() { if newPages > 0 { if *verbose { - fmt.Fprintf(os.Stderr, "Checking latest entries... \r\n") + fmt.Fprintf(os.Stderr, "Scanning latest entries... \r\n") } if *progress { @@ -129,7 +129,7 @@ func main() { cache.Pages = lastPage cache.MostRecent = mostRecent - numJobs := len(hrefs) + numJobs := len(hrefs) * len(useFormat) if numJobs == 0 { save(cache) @@ -137,7 +137,7 @@ func main() { } if *verbose { - fmt.Fprintf(os.Stderr, "Checking %d entries for updates...\r\n", numJobs) + fmt.Fprintf(os.Stderr, "Checking %d files for updates...\r\n", numJobs) } if *progress { @@ -154,8 +154,13 @@ func main() { for w := 1; w <= *workers; w++ { go checker(w, checkJobs, checkResults) } + for _, href := range hrefs { - checkJobs <- href + for ext, use := range useFormat { + if use { + checkJobs <- href + "." + ext + } + } } close(checkJobs) @@ -209,6 +214,13 @@ func main() { fmt.Fprintln(os.Stderr, buffer) } } + + if len(missing) > 0 { + fmt.Fprintln(os.Stderr, "Not found:") + for _, url := range missing { + fmt.Fprintln(os.Stderr, url) + } + } } func fixPath(path string) string { @@ -229,6 +241,25 @@ func filenameForUrl(url string) string { return path.Base(url) } +func dirnameForUrl(url string) string { + base := path.Base(url) + ext := filepath.Ext(base) + dirname := strings.TrimSuffix(base, ext) + subext := []string{".a4", ".lt"} + + switch ext { + case ".pdf": + for _, se := range subext { + if strings.HasSuffix(dirname, se) { + dirname = strings.TrimSuffix(dirname, se) + break + } + } + default: + } + return dirname +} + func check(url string, path string, timeout time.Duration) (modified bool, err error) { request, err := http.NewRequest("HEAD", url, nil) if err != nil { @@ -247,6 +278,7 @@ func check(url string, path string, timeout time.Duration) (modified bool, err e defer response.Body.Close() if response.StatusCode == 404 { + fmt.Fprintln(os.Stderr, url+" was removed?") return false, nil } @@ -300,6 +332,8 @@ func get(url string, timeout time.Duration) (content []byte, err error) { defer response.Body.Close() if response.StatusCode == 404 { + // fmt.Fprintln(os.Stderr, url+" NOT FOUND!") + missing = append(missing, url) return } @@ -327,10 +361,12 @@ func download(url string, path string) (err error) { continue } - err = ioutil.WriteFile(path, data, 0644) - if err != nil { - fmt.Fprintf(os.Stderr, "error: %s\n", err) - return err + if len(data) > 0 { + err = ioutil.WriteFile(path, data, 0644) + if err != nil { + fmt.Fprintf(os.Stderr, "error: %s\n", err) + return err + } } break } @@ -388,18 +424,48 @@ func unzip(src string, dest string) ([]string, error) { } func downloader(id int, jobs <-chan string, results chan<- string) { - for href := range jobs { result := "" - dir := filenameForUrl(href) + + dir := dirnameForUrl(href) + dest := filepath.Join(*outputDir, dir) os.MkdirAll(dest, 0700) - downloadFormat(href, "zip", dest) - downloadFormat(href, "epub", dest) - downloadFormat(href, "pdf", dest) - downloadFormat(href, "a4.pdf", dest) - downloadFormat(href, "lt.pdf", dest) + ext := filepath.Ext(href)[1:] + + if !useFormat[ext] { + return + } + + name := filenameForUrl(href) + + if ext == "zip" { + tmpDir, err := ioutil.TempDir(os.TempDir(), "taldl") + if err != nil { + fmt.Fprintf(os.Stderr, "error: %s\n", err) + continue + } + path := filepath.Join(tmpDir, name) + err = download(href, path) + if err != nil { + fmt.Fprintf(os.Stderr, "error: %s\n", err) + continue + } + _, err = unzip(path, *outputDir) + if err != nil { + fmt.Fprintf(os.Stderr, "error: %s\n", err) + continue + } + os.RemoveAll(tmpDir) + } else { + path := filepath.Join(dest, name) + err := download(href, path) + if err != nil { + fmt.Fprintf(os.Stderr, "error: %s\n", err) + continue + } + } if *verbose { results <- result @@ -445,10 +511,8 @@ func checker(id int, jobs <-chan string, results chan<- string) { var err error for href := range jobs { - ext := "muse" - url := href + "." + ext - dir := filenameForUrl(href) - name := filenameForUrl(url) + dir := dirnameForUrl(href) + name := filenameForUrl(href) path := filepath.Join(*outputDir, dir, name) if !fileExists(path) { @@ -457,7 +521,7 @@ func checker(id int, jobs <-chan string, results chan<- string) { } for { - modified, err = check(url, path, time.Second*30) + modified, err = check(href, path, time.Second*30) if err != nil { fmt.Fprintf(os.Stderr, "error: %s\n", err) time.Sleep(time.Second) @@ -480,39 +544,5 @@ func save(cache Cache) { } func downloadFormat(href string, ext string, dest string) error { - if !useFormat[ext] { - return nil - } - - url := href + "." + ext - name := filenameForUrl(url) - - if ext == "zip" { - tmpDir, err := ioutil.TempDir(os.TempDir(), "taldl") - if err != nil { - fmt.Fprintf(os.Stderr, "error: %s\n", err) - return err - } - path := filepath.Join(tmpDir, name) - err = download(url, path) - if err != nil { - fmt.Fprintf(os.Stderr, "error: %s\n", err) - return err - } - _, err = unzip(path, *outputDir) - if err != nil { - fmt.Fprintf(os.Stderr, "error: %s\n", err) - return err - } - os.RemoveAll(tmpDir) - } else { - path := filepath.Join(dest, name) - err := download(url, path) - if err != nil { - fmt.Fprintf(os.Stderr, "error: %s\n", err) - return err - } - } - return nil }