This commit is contained in:
ron 2021-06-29 05:42:37 +02:00
parent b8de7eb2ca
commit c8abf6fc61

146
main.go
View File

@ -22,13 +22,6 @@ import (
"github.com/schollz/progressbar/v3"
)
type Cache struct {
MostRecent string
Pages int
}
var cacheFile = "~/.taldl.json"
var outputDir = flag.String("output", "~/TAL", "output directory")
var formats = flag.String("formats", "zip,epub,pdf,a4.pdf,lt.pdf", "formats to download")
var progress = flag.Bool("progress", true, "show progress bar")
@ -36,10 +29,17 @@ var verbose = flag.Bool("verbose", true, "verbose output")
var workers = flag.Int("workers", 5, "amount of workers")
var update = flag.Bool("update", false, "update all entries")
type Cache struct {
MostRecent string
Pages int
}
var cacheFile = "~/.taldl.json"
var cache Cache
var mostRecent string
var done bool
var cache Cache
var useFormat map[string]bool
var missing []string
func main() {
var bar *progressbar.ProgressBar
@ -95,7 +95,7 @@ func main() {
if newPages > 0 {
if *verbose {
fmt.Fprintf(os.Stderr, "Checking latest entries... \r\n")
fmt.Fprintf(os.Stderr, "Scanning latest entries... \r\n")
}
if *progress {
@ -129,7 +129,7 @@ func main() {
cache.Pages = lastPage
cache.MostRecent = mostRecent
numJobs := len(hrefs)
numJobs := len(hrefs) * len(useFormat)
if numJobs == 0 {
save(cache)
@ -137,7 +137,7 @@ func main() {
}
if *verbose {
fmt.Fprintf(os.Stderr, "Checking %d entries for updates...\r\n", numJobs)
fmt.Fprintf(os.Stderr, "Checking %d files for updates...\r\n", numJobs)
}
if *progress {
@ -154,8 +154,13 @@ func main() {
for w := 1; w <= *workers; w++ {
go checker(w, checkJobs, checkResults)
}
for _, href := range hrefs {
checkJobs <- href
for ext, use := range useFormat {
if use {
checkJobs <- href + "." + ext
}
}
}
close(checkJobs)
@ -209,6 +214,13 @@ func main() {
fmt.Fprintln(os.Stderr, buffer)
}
}
if len(missing) > 0 {
fmt.Fprintln(os.Stderr, "Not found:")
for _, url := range missing {
fmt.Fprintln(os.Stderr, url)
}
}
}
func fixPath(path string) string {
@ -229,6 +241,25 @@ func filenameForUrl(url string) string {
return path.Base(url)
}
func dirnameForUrl(url string) string {
base := path.Base(url)
ext := filepath.Ext(base)
dirname := strings.TrimSuffix(base, ext)
subext := []string{".a4", ".lt"}
switch ext {
case ".pdf":
for _, se := range subext {
if strings.HasSuffix(dirname, se) {
dirname = strings.TrimSuffix(dirname, se)
break
}
}
default:
}
return dirname
}
func check(url string, path string, timeout time.Duration) (modified bool, err error) {
request, err := http.NewRequest("HEAD", url, nil)
if err != nil {
@ -247,6 +278,7 @@ func check(url string, path string, timeout time.Duration) (modified bool, err e
defer response.Body.Close()
if response.StatusCode == 404 {
fmt.Fprintln(os.Stderr, url+" was removed?")
return false, nil
}
@ -300,6 +332,8 @@ func get(url string, timeout time.Duration) (content []byte, err error) {
defer response.Body.Close()
if response.StatusCode == 404 {
// fmt.Fprintln(os.Stderr, url+" NOT FOUND!")
missing = append(missing, url)
return
}
@ -327,11 +361,13 @@ func download(url string, path string) (err error) {
continue
}
if len(data) > 0 {
err = ioutil.WriteFile(path, data, 0644)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %s\n", err)
return err
}
}
break
}
@ -388,18 +424,48 @@ func unzip(src string, dest string) ([]string, error) {
}
func downloader(id int, jobs <-chan string, results chan<- string) {
for href := range jobs {
result := ""
dir := filenameForUrl(href)
dir := dirnameForUrl(href)
dest := filepath.Join(*outputDir, dir)
os.MkdirAll(dest, 0700)
downloadFormat(href, "zip", dest)
downloadFormat(href, "epub", dest)
downloadFormat(href, "pdf", dest)
downloadFormat(href, "a4.pdf", dest)
downloadFormat(href, "lt.pdf", dest)
ext := filepath.Ext(href)[1:]
if !useFormat[ext] {
return
}
name := filenameForUrl(href)
if ext == "zip" {
tmpDir, err := ioutil.TempDir(os.TempDir(), "taldl")
if err != nil {
fmt.Fprintf(os.Stderr, "error: %s\n", err)
continue
}
path := filepath.Join(tmpDir, name)
err = download(href, path)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %s\n", err)
continue
}
_, err = unzip(path, *outputDir)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %s\n", err)
continue
}
os.RemoveAll(tmpDir)
} else {
path := filepath.Join(dest, name)
err := download(href, path)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %s\n", err)
continue
}
}
if *verbose {
results <- result
@ -445,10 +511,8 @@ func checker(id int, jobs <-chan string, results chan<- string) {
var err error
for href := range jobs {
ext := "muse"
url := href + "." + ext
dir := filenameForUrl(href)
name := filenameForUrl(url)
dir := dirnameForUrl(href)
name := filenameForUrl(href)
path := filepath.Join(*outputDir, dir, name)
if !fileExists(path) {
@ -457,7 +521,7 @@ func checker(id int, jobs <-chan string, results chan<- string) {
}
for {
modified, err = check(url, path, time.Second*30)
modified, err = check(href, path, time.Second*30)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %s\n", err)
time.Sleep(time.Second)
@ -480,39 +544,5 @@ func save(cache Cache) {
}
func downloadFormat(href string, ext string, dest string) error {
if !useFormat[ext] {
return nil
}
url := href + "." + ext
name := filenameForUrl(url)
if ext == "zip" {
tmpDir, err := ioutil.TempDir(os.TempDir(), "taldl")
if err != nil {
fmt.Fprintf(os.Stderr, "error: %s\n", err)
return err
}
path := filepath.Join(tmpDir, name)
err = download(url, path)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %s\n", err)
return err
}
_, err = unzip(path, *outputDir)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %s\n", err)
return err
}
os.RemoveAll(tmpDir)
} else {
path := filepath.Join(dest, name)
err := download(url, path)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %s\n", err)
return err
}
}
return nil
}