improved
This commit is contained in:
parent
b8de7eb2ca
commit
c8abf6fc61
146
main.go
146
main.go
@ -22,13 +22,6 @@ import (
|
||||
"github.com/schollz/progressbar/v3"
|
||||
)
|
||||
|
||||
type Cache struct {
|
||||
MostRecent string
|
||||
Pages int
|
||||
}
|
||||
|
||||
var cacheFile = "~/.taldl.json"
|
||||
|
||||
var outputDir = flag.String("output", "~/TAL", "output directory")
|
||||
var formats = flag.String("formats", "zip,epub,pdf,a4.pdf,lt.pdf", "formats to download")
|
||||
var progress = flag.Bool("progress", true, "show progress bar")
|
||||
@ -36,10 +29,17 @@ var verbose = flag.Bool("verbose", true, "verbose output")
|
||||
var workers = flag.Int("workers", 5, "amount of workers")
|
||||
var update = flag.Bool("update", false, "update all entries")
|
||||
|
||||
type Cache struct {
|
||||
MostRecent string
|
||||
Pages int
|
||||
}
|
||||
|
||||
var cacheFile = "~/.taldl.json"
|
||||
var cache Cache
|
||||
var mostRecent string
|
||||
var done bool
|
||||
var cache Cache
|
||||
var useFormat map[string]bool
|
||||
var missing []string
|
||||
|
||||
func main() {
|
||||
var bar *progressbar.ProgressBar
|
||||
@ -95,7 +95,7 @@ func main() {
|
||||
|
||||
if newPages > 0 {
|
||||
if *verbose {
|
||||
fmt.Fprintf(os.Stderr, "Checking latest entries... \r\n")
|
||||
fmt.Fprintf(os.Stderr, "Scanning latest entries... \r\n")
|
||||
}
|
||||
|
||||
if *progress {
|
||||
@ -129,7 +129,7 @@ func main() {
|
||||
cache.Pages = lastPage
|
||||
cache.MostRecent = mostRecent
|
||||
|
||||
numJobs := len(hrefs)
|
||||
numJobs := len(hrefs) * len(useFormat)
|
||||
|
||||
if numJobs == 0 {
|
||||
save(cache)
|
||||
@ -137,7 +137,7 @@ func main() {
|
||||
}
|
||||
|
||||
if *verbose {
|
||||
fmt.Fprintf(os.Stderr, "Checking %d entries for updates...\r\n", numJobs)
|
||||
fmt.Fprintf(os.Stderr, "Checking %d files for updates...\r\n", numJobs)
|
||||
}
|
||||
|
||||
if *progress {
|
||||
@ -154,8 +154,13 @@ func main() {
|
||||
for w := 1; w <= *workers; w++ {
|
||||
go checker(w, checkJobs, checkResults)
|
||||
}
|
||||
|
||||
for _, href := range hrefs {
|
||||
checkJobs <- href
|
||||
for ext, use := range useFormat {
|
||||
if use {
|
||||
checkJobs <- href + "." + ext
|
||||
}
|
||||
}
|
||||
}
|
||||
close(checkJobs)
|
||||
|
||||
@ -209,6 +214,13 @@ func main() {
|
||||
fmt.Fprintln(os.Stderr, buffer)
|
||||
}
|
||||
}
|
||||
|
||||
if len(missing) > 0 {
|
||||
fmt.Fprintln(os.Stderr, "Not found:")
|
||||
for _, url := range missing {
|
||||
fmt.Fprintln(os.Stderr, url)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func fixPath(path string) string {
|
||||
@ -229,6 +241,25 @@ func filenameForUrl(url string) string {
|
||||
return path.Base(url)
|
||||
}
|
||||
|
||||
func dirnameForUrl(url string) string {
|
||||
base := path.Base(url)
|
||||
ext := filepath.Ext(base)
|
||||
dirname := strings.TrimSuffix(base, ext)
|
||||
subext := []string{".a4", ".lt"}
|
||||
|
||||
switch ext {
|
||||
case ".pdf":
|
||||
for _, se := range subext {
|
||||
if strings.HasSuffix(dirname, se) {
|
||||
dirname = strings.TrimSuffix(dirname, se)
|
||||
break
|
||||
}
|
||||
}
|
||||
default:
|
||||
}
|
||||
return dirname
|
||||
}
|
||||
|
||||
func check(url string, path string, timeout time.Duration) (modified bool, err error) {
|
||||
request, err := http.NewRequest("HEAD", url, nil)
|
||||
if err != nil {
|
||||
@ -247,6 +278,7 @@ func check(url string, path string, timeout time.Duration) (modified bool, err e
|
||||
defer response.Body.Close()
|
||||
|
||||
if response.StatusCode == 404 {
|
||||
fmt.Fprintln(os.Stderr, url+" was removed?")
|
||||
return false, nil
|
||||
}
|
||||
|
||||
@ -300,6 +332,8 @@ func get(url string, timeout time.Duration) (content []byte, err error) {
|
||||
defer response.Body.Close()
|
||||
|
||||
if response.StatusCode == 404 {
|
||||
// fmt.Fprintln(os.Stderr, url+" NOT FOUND!")
|
||||
missing = append(missing, url)
|
||||
return
|
||||
}
|
||||
|
||||
@ -327,11 +361,13 @@ func download(url string, path string) (err error) {
|
||||
continue
|
||||
}
|
||||
|
||||
if len(data) > 0 {
|
||||
err = ioutil.WriteFile(path, data, 0644)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
@ -388,18 +424,48 @@ func unzip(src string, dest string) ([]string, error) {
|
||||
}
|
||||
|
||||
func downloader(id int, jobs <-chan string, results chan<- string) {
|
||||
|
||||
for href := range jobs {
|
||||
result := ""
|
||||
dir := filenameForUrl(href)
|
||||
|
||||
dir := dirnameForUrl(href)
|
||||
|
||||
dest := filepath.Join(*outputDir, dir)
|
||||
os.MkdirAll(dest, 0700)
|
||||
|
||||
downloadFormat(href, "zip", dest)
|
||||
downloadFormat(href, "epub", dest)
|
||||
downloadFormat(href, "pdf", dest)
|
||||
downloadFormat(href, "a4.pdf", dest)
|
||||
downloadFormat(href, "lt.pdf", dest)
|
||||
ext := filepath.Ext(href)[1:]
|
||||
|
||||
if !useFormat[ext] {
|
||||
return
|
||||
}
|
||||
|
||||
name := filenameForUrl(href)
|
||||
|
||||
if ext == "zip" {
|
||||
tmpDir, err := ioutil.TempDir(os.TempDir(), "taldl")
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
||||
continue
|
||||
}
|
||||
path := filepath.Join(tmpDir, name)
|
||||
err = download(href, path)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
||||
continue
|
||||
}
|
||||
_, err = unzip(path, *outputDir)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
||||
continue
|
||||
}
|
||||
os.RemoveAll(tmpDir)
|
||||
} else {
|
||||
path := filepath.Join(dest, name)
|
||||
err := download(href, path)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if *verbose {
|
||||
results <- result
|
||||
@ -445,10 +511,8 @@ func checker(id int, jobs <-chan string, results chan<- string) {
|
||||
var err error
|
||||
|
||||
for href := range jobs {
|
||||
ext := "muse"
|
||||
url := href + "." + ext
|
||||
dir := filenameForUrl(href)
|
||||
name := filenameForUrl(url)
|
||||
dir := dirnameForUrl(href)
|
||||
name := filenameForUrl(href)
|
||||
path := filepath.Join(*outputDir, dir, name)
|
||||
|
||||
if !fileExists(path) {
|
||||
@ -457,7 +521,7 @@ func checker(id int, jobs <-chan string, results chan<- string) {
|
||||
}
|
||||
|
||||
for {
|
||||
modified, err = check(url, path, time.Second*30)
|
||||
modified, err = check(href, path, time.Second*30)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
||||
time.Sleep(time.Second)
|
||||
@ -480,39 +544,5 @@ func save(cache Cache) {
|
||||
}
|
||||
|
||||
func downloadFormat(href string, ext string, dest string) error {
|
||||
if !useFormat[ext] {
|
||||
return nil
|
||||
}
|
||||
|
||||
url := href + "." + ext
|
||||
name := filenameForUrl(url)
|
||||
|
||||
if ext == "zip" {
|
||||
tmpDir, err := ioutil.TempDir(os.TempDir(), "taldl")
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
||||
return err
|
||||
}
|
||||
path := filepath.Join(tmpDir, name)
|
||||
err = download(url, path)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
||||
return err
|
||||
}
|
||||
_, err = unzip(path, *outputDir)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
||||
return err
|
||||
}
|
||||
os.RemoveAll(tmpDir)
|
||||
} else {
|
||||
path := filepath.Join(dest, name)
|
||||
err := download(url, path)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error: %s\n", err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user